From 7c61eef522172a6ebb9591f904555dcc3d208fe2 Mon Sep 17 00:00:00 2001 From: Holger Weiss Date: Thu, 22 Oct 2015 21:52:13 +0200 Subject: [PATCH] mod_http_upload: Add support for user quotas The new mod_http_upload_quota module implements two features: - When a "hard quota" is exceeded during a file upload, old files are removed until the disk usage equals or falls below the "soft quota". - Once a day, all uploaded files (and directories) older than a configurable number of days are deleted. --- mod_http_upload/README.txt | 56 ++- mod_http_upload/src/mod_http_upload.erl | 56 +-- mod_http_upload/src/mod_http_upload_quota.erl | 353 ++++++++++++++++++ 3 files changed, 432 insertions(+), 33 deletions(-) create mode 100644 mod_http_upload/src/mod_http_upload_quota.erl diff --git a/mod_http_upload/README.txt b/mod_http_upload/README.txt index c34eed0..faad518 100644 --- a/mod_http_upload/README.txt +++ b/mod_http_upload/README.txt @@ -12,6 +12,9 @@ This module allows for requesting permissions to upload a file via HTTP. If the request is accepted, the client receives a URL to use for uploading the file and another URL from which that file can later be downloaded. +Automatic quota management can be configured by also enabling +mod_http_upload_quota. + PLEASE NOTE: This module implements an experimental protocol which may change at any time. There are already suggestions for improvements, e.g. from ProcessOne: @@ -35,11 +38,20 @@ following to your ejabberd.yml file: request_handlers: "": mod_http_upload + access: + # [...] + soft_upload_quota: + all: 1000 # MiB + hard_upload_quota: + all: 1100 # MiB + modules: # [...] mod_http_upload: docroot: "/home/xmpp/upload" put_url: "https://@HOST@:5443" + mod_http_upload_quota: + max_days: 100 The configurable mod_http_upload options are: @@ -165,15 +177,43 @@ The configurable mod_http_upload options are: when that user is unregistered. It must be set to 'false' if this is not desired. +The configurable mod_http_upload_quota options are: - REMOVING OLD FILES - ------------------ +- max_days (default: 'infinity') -You might want to use cron(8) to remove uploaded files that are older than -some number of days: + If a number larger than zero is specified, any files (and directories) + older than this number of days are removed from the subdirectories of the + 'docroot' directory, once per day. By default, this won't be done. -0 1 * * * find /home/xmpp/upload -type f -ctime +365 -exec rm -f '{}' ';' -0 2 * * * find /home/xmpp/upload -type d -exec rmdir '{}' ';' 2>/dev/null +- access_hard_quota (default: 'hard_upload_quota') -Note that /home/xmpp/upload must be replaced with your actual 'docroot' -path, and 365 with the desired number of days. + This option defines which access rule is used to specify the "hard quota" + for the matching JIDs. That rule must yield a positive number for any JID + that is supposed to have a quota limit. This is the number of megabytes a + corresponding user may upload. When this threshold is exceeded, ejabberd + deletes the oldest files uploaded by that user until their disk usage + equals or falls below the specified soft quota. + +- access_soft_quota (default: 'soft_upload_quota') + + This option defines which access rule is used to specify the "soft quota" + for the matching JIDs. That rule must yield a positive number of + megabytes for any JID that is supposed to have a quota limit. It is + strongly recommended to make sure the soft quota will be smaller than the + hard quota (but it must be non-zero if the hard quota is non-zero). See + the description of the 'access_hard_quota' option for details. + + NOTE: It's not necessary to specify the 'access_hard_quota' and + 'access_soft_quota' options in order to use the quota feature. You can + stick to the default names and just specify access rules such as the + following: + + access: + # [...] + soft_upload_quota: + all: 400 + hard_upload_quota: + all: 500 + + This sets a soft quota of 400 MiB and a hard quota of 500 MiB for all + users. diff --git a/mod_http_upload/src/mod_http_upload.erl b/mod_http_upload/src/mod_http_upload.erl index 7ea2c05..8d517b8 100644 --- a/mod_http_upload/src/mod_http_upload.erl +++ b/mod_http_upload/src/mod_http_upload.erl @@ -66,6 +66,10 @@ %% ejabberd_hooks callback. -export([remove_user/2]). +%% Utility functions. +-export([get_proc_name/2, + expand_home/1]). + -include("ejabberd.hrl"). -include("ejabberd_http.hrl"). -include("jlib.hrl"). @@ -113,7 +117,7 @@ start(ServerHost, Opts) -> remove_user, 50); false -> ok end, - Proc = get_proc_name(ServerHost), + Proc = get_proc_name(ServerHost, ?PROCNAME), Spec = {Proc, {?MODULE, start_link, [ServerHost, Proc, Opts]}, permanent, @@ -135,7 +139,7 @@ stop(ServerHost) -> remove_user, 50); false -> ok end, - Proc = get_proc_name(ServerHost), + Proc = get_proc_name(ServerHost, ?PROCNAME), ok = supervisor:terminate_child(ejabberd_sup, Proc), ok = supervisor:delete_child(ejabberd_sup, Proc). @@ -421,6 +425,31 @@ process(_LocalPath, #request{method = Method, host = Host, ip = IP}) -> [Method, ?ADDR_TO_STR(IP), Host]), http_response(Host, 405, [{<<"Allow">>, <<"OPTIONS, HEAD, GET, PUT">>}]). +%%-------------------------------------------------------------------- +%% Exported utility functions. +%%-------------------------------------------------------------------- + +-spec get_proc_name(binary(), atom()) -> atom(). + +get_proc_name(ServerHost, ModuleName) -> + PutURL = gen_mod:get_module_opt(ServerHost, ?MODULE, put_url, + fun(<<"http://", _/binary>> = URL) -> URL; + (<<"https://", _/binary>> = URL) -> URL; + (_) -> <<"http://@HOST@">> + end, + <<"http://@HOST@">>), + [_, ProcHost | _] = binary:split(expand_host(PutURL, ServerHost), + [<<"http://">>, <<"https://">>, + <<":">>, <<"/">>], [global]), + gen_mod:get_module_proc(ProcHost, ModuleName). + +-spec expand_home(binary()) -> binary(). + +expand_home(Subject) -> + {ok, [[Home]]} = init:get_argument(home), + Parts = binary:split(Subject, <<"@HOME@">>, [global]), + str:join(Parts, list_to_binary(Home)). + %%-------------------------------------------------------------------- %% Internal functions. %%-------------------------------------------------------------------- @@ -646,13 +675,6 @@ map_int_to_char(N) when N =< 9 -> N + 48; % Digit. map_int_to_char(N) when N =< 35 -> N + 55; % Upper-case character. map_int_to_char(N) when N =< 61 -> N + 61. % Lower-case character. --spec expand_home(binary()) -> binary(). - -expand_home(Subject) -> - {ok, [[Home]]} = init:get_argument(home), - Parts = binary:split(Subject, <<"@HOME@">>, [global]), - str:join(Parts, list_to_binary(Home)). - -spec expand_host(binary(), binary()) -> binary(). expand_host(Subject, Host) -> @@ -761,22 +783,6 @@ code_to_message(413) -> <<"File size doesn't match requested size.">>; code_to_message(500) -> <<"Internal server error.">>; code_to_message(_Code) -> <<"">>. -%% Miscellaneous helpers. - --spec get_proc_name(binary()) -> atom(). - -get_proc_name(ServerHost) -> - PutURL = gen_mod:get_module_opt(ServerHost, ?MODULE, put_url, - fun(<<"http://", _/binary>> = URL) -> URL; - (<<"https://", _/binary>> = URL) -> URL; - (_) -> <<"http://@HOST@">> - end, - <<"http://@HOST@">>), - [_, ProcHost | _] = binary:split(expand_host(PutURL, ServerHost), - [<<"http://">>, <<"https://">>, - <<":">>, <<"/">>], [global]), - gen_mod:get_module_proc(ProcHost, ?PROCNAME). - %%-------------------------------------------------------------------- %% Remove user. %%-------------------------------------------------------------------- diff --git a/mod_http_upload/src/mod_http_upload_quota.erl b/mod_http_upload/src/mod_http_upload_quota.erl new file mode 100644 index 0000000..8e53973 --- /dev/null +++ b/mod_http_upload/src/mod_http_upload_quota.erl @@ -0,0 +1,353 @@ +%%%------------------------------------------------------------------- +%%% File : mod_http_upload_quota.erl +%%% Author : Holger Weiss +%%% Purpose : Quota management for HTTP File Upload (XEP-0363) +%%% Created : 15 Oct 2015 by Holger Weiss +%%%------------------------------------------------------------------- + +-module(mod_http_upload_quota). +-author('holger@zedat.fu-berlin.de'). + +-define(GEN_SERVER, gen_server). +-define(PROCNAME, ?MODULE). +-define(TIMEOUT, 86400). +-define(INITIAL_TIMEOUT, 600). + +-behaviour(?GEN_SERVER). +-behaviour(gen_mod). + +%% gen_mod/supervisor callbacks. +-export([start_link/3, + start/2, + stop/1, + mod_opt_type/1]). + +%% gen_server callbacks. +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). + +%% ejabberd_hooks callback. +-export([handle_slot_request/5]). + +-include("jlib.hrl"). +-include("logger.hrl"). +-include_lib("kernel/include/file.hrl"). + +-record(state, + {server_host :: binary(), + access_soft_quota :: atom(), + access_hard_quota :: atom(), + max_days :: pos_integer() | infinity, + docroot :: binary(), + last_sweep :: non_neg_integer(), + disk_usage = dict:new() :: term()}). + +-type state() :: #state{}. + +%%-------------------------------------------------------------------- +%% gen_mod/supervisor callbacks. +%%-------------------------------------------------------------------- + +-spec start_link(binary(), atom(), gen_mod:opts()) + -> {ok, pid()} | ignore | {error, _}. + +start_link(ServerHost, Proc, Opts) -> + ?GEN_SERVER:start_link({local, Proc}, ?MODULE, {ServerHost, Opts}, []). + +-spec start(binary(), gen_mod:opts()) -> {ok, _} | {ok, _, _} | {error, _}. + +start(ServerHost, Opts) -> + Proc = mod_http_upload:get_proc_name(ServerHost, ?PROCNAME), + Spec = {Proc, + {?MODULE, start_link, [ServerHost, Proc, Opts]}, + permanent, + 3000, + worker, + [?MODULE]}, + supervisor:start_child(ejabberd_sup, Spec). + +-spec stop(binary()) -> ok. + +stop(ServerHost) -> + Proc = mod_http_upload:get_proc_name(ServerHost, ?PROCNAME), + ok = supervisor:terminate_child(ejabberd_sup, Proc), + ok = supervisor:delete_child(ejabberd_sup, Proc). + +-spec mod_opt_type(atom()) -> fun((term()) -> term()) | [atom()]. + +mod_opt_type(access_soft_quota) -> + fun(A) when is_atom(A) -> A end; +mod_opt_type(access_hard_quota) -> + fun(A) when is_atom(A) -> A end; +mod_opt_type(max_days) -> + fun(I) when is_integer(I), I > 0 -> I; + (infinity) -> infinity + end; +mod_opt_type(_) -> + [access_soft_quota, access_hard_quota, max_days]. + +%%-------------------------------------------------------------------- +%% gen_server callbacks. +%%-------------------------------------------------------------------- + +-spec init({binary(), gen_mod:opts()}) -> {ok, state(), non_neg_integer()}. + +init({ServerHost, Opts}) -> + process_flag(trap_exit, true), + AccessSoftQuota = gen_mod:get_opt(access_soft_quota, Opts, + fun(A) when is_atom(A) -> A end, + soft_upload_quota), + AccessHardQuota = gen_mod:get_opt(access_hard_quota, Opts, + fun(A) when is_atom(A) -> A end, + hard_upload_quota), + MaxDays = gen_mod:get_opt(max_days, Opts, + fun(I) when is_integer(I), I > 0 -> I; + (infinity) -> infinity + end, + infinity), + DocRoot1 = gen_mod:get_module_opt(ServerHost, mod_http_upload, docroot, + fun iolist_to_binary/1, + <<"@HOME@/upload">>), + DocRoot2 = mod_http_upload:expand_home(str:strip(DocRoot1, right, $/)), + LastSweep = secs_since_epoch() - ?TIMEOUT + ?INITIAL_TIMEOUT, + ejabberd_hooks:add(http_upload_slot_request, ServerHost, ?MODULE, + handle_slot_request, 50), + {ok, #state{server_host = ServerHost, + access_soft_quota = AccessSoftQuota, + access_hard_quota = AccessHardQuota, + max_days = MaxDays, + docroot = DocRoot2, + last_sweep = LastSweep}, + ?INITIAL_TIMEOUT * 1000}. + +-spec handle_call(_, {pid(), _}, state()) + -> {noreply, state(), non_neg_integer()}. + +handle_call(Request, From, State) -> + ?ERROR_MSG("Got unexpected request from ~p: ~p", [From, Request]), + {noreply, State, remaining_timeout(State)}. + +-spec handle_cast(_, state()) -> {noreply, state(), non_neg_integer()}. + +handle_cast({handle_slot_request, #jid{user = U, server = S} = JID, Path, Size}, + #state{server_host = ServerHost, + access_soft_quota = AccessSoftQuota, + access_hard_quota = AccessHardQuota, + disk_usage = DiskUsage} = State) -> + HardQuota = case acl:match_rule(ServerHost, AccessHardQuota, JID) of + Hard when is_integer(Hard), Hard >= 0 -> + Hard * 1024 * 1024; + _ -> + 0 + end, + SoftQuota = case acl:match_rule(ServerHost, AccessSoftQuota, JID) of + Soft when is_integer(Soft), Soft >= 0 -> + Soft * 1024 * 1024; + _ -> + 0 + end, + OldSize = case dict:find({U, S}, DiskUsage) of + {ok, Value} -> + Value; + error -> + undefined + end, + NewSize = case {HardQuota, SoftQuota} of + {0, 0} -> + ?DEBUG("No quota specified for ~s", + [jlib:jid_to_string(JID)]), + Size; + {0, _} -> + ?WARNING_MSG("No hard quota specified for ~s", + [jlib:jid_to_string(JID)]), + enforce_quota(Path, Size, OldSize, SoftQuota, SoftQuota); + {_, 0} -> + ?WARNING_MSG("No soft quota specified for ~s", + [jlib:jid_to_string(JID)]), + enforce_quota(Path, Size, OldSize, HardQuota, HardQuota); + _ when SoftQuota > HardQuota -> + ?WARNING_MSG("Bad quota for ~s (soft: ~p, hard: ~p)", + [jlib:jid_to_string(JID), + SoftQuota, HardQuota]), + enforce_quota(Path, Size, OldSize, SoftQuota, SoftQuota); + _ -> + ?DEBUG("Enforcing quota for ~s", + [jlib:jid_to_string(JID)]), + enforce_quota(Path, Size, OldSize, SoftQuota, HardQuota) + end, + {noreply, State#state{disk_usage = dict:store({U, S}, NewSize, DiskUsage)}}; +handle_cast(Request, State) -> + ?ERROR_MSG("Got unexpected request: ~p", [Request]), + {noreply, State, remaining_timeout(State)}. + +-spec handle_info(timeout | _, state()) + -> {noreply, state(), non_neg_integer()}. + +handle_info(timeout, #state{max_days = infinity} = State) -> + {noreply, State, remaining_timeout(State)}; +handle_info(timeout, #state{docroot = DocRoot, max_days = MaxDays} = State) -> + ?DEBUG("Got timeout message", []), + Now = secs_since_epoch(), + case file:list_dir(DocRoot) of + {ok, Entries} -> + BackThen = Now - (MaxDays * 86400), + DocRootS = binary_to_list(DocRoot), + UserDirs = [DocRootS ++ "/" ++ Entry || Entry <- Entries, + filelib:is_dir(Entry)], + lists:foreach(fun(UserDir) -> delete_old_files(UserDir, BackThen) end, + UserDirs); + {error, Error} -> + ?ERROR_MSG("Cannot open document root ~s: ~p", [DocRoot, Error]) + end, + NewState = State#state{last_sweep = Now}, + {noreply, NewState, remaining_timeout(NewState)}; +handle_info(Info, State) -> + ?ERROR_MSG("Got unexpected info: ~p", [Info]), + {noreply, State, remaining_timeout(State)}. + +-spec terminate(normal | shutdown | {shutdown, _} | _, _) -> ok. + +terminate(Reason, #state{server_host = ServerHost}) -> + ?DEBUG("Stopping upload quota process for ~s: ~p", [ServerHost, Reason]), + ejabberd_hooks:delete(http_upload_slot_request, ServerHost, ?MODULE, + handle_slot_request, 50). + +-spec code_change({down, _} | _, state(), _) -> {ok, state()}. + +code_change(_OldVsn, #state{server_host = ServerHost} = State, _Extra) -> + ?DEBUG("Updating upload quota process for ~s", [ServerHost]), + {ok, State}. + +%%-------------------------------------------------------------------- +%% ejabberd_hooks callbacks. +%%-------------------------------------------------------------------- + +-spec handle_slot_request(term(), jid(), binary(), non_neg_integer(), binary()) + -> term(). + +handle_slot_request(allow, #jid{lserver = ServerHost} = JID, Path, Size, + _Lang) -> + Proc = mod_http_upload:get_proc_name(ServerHost, ?PROCNAME), + ?GEN_SERVER:cast(Proc, {handle_slot_request, JID, Path, Size}), + allow; +handle_slot_request(Acc, _JID, _Path, _Size, _Lang) -> Acc. + +%%-------------------------------------------------------------------- +%% Internal functions. +%%-------------------------------------------------------------------- + +-spec enforce_quota(file:filename_all(), non_neg_integer(), + non_neg_integer() | undefined, non_neg_integer(), + non_neg_integer()) + -> non_neg_integer(). + +enforce_quota(UserDir, SlotSize, OldSize, MinSize, MaxSize) + when is_integer(OldSize), OldSize + SlotSize =< MaxSize -> + OldSize + SlotSize; +enforce_quota(UserDir, SlotSize, _OldSize, MinSize, MaxSize) -> + Files = lists:sort(fun({_PathA, _SizeA, TimeA}, {_PathB, _SizeB, TimeB}) -> + TimeA > TimeB + end, gather_file_info(UserDir)), + {DelFiles, OldSize, NewSize} = + lists:foldl(fun({Path, Size, _Time}, {[], AccSize, AccSize}) + when AccSize + Size + SlotSize =< MinSize -> + {[], AccSize + Size, AccSize + Size}; + ({Path, Size, _Time}, {[], AccSize, AccSize}) -> + {[Path], AccSize + Size, AccSize}; + ({Path, Size, _Time}, {AccFiles, AccSize, NewSize}) -> + {[Path | AccFiles], AccSize + Size, NewSize} + end, {[], 0, 0}, Files), + if OldSize + SlotSize > MaxSize -> + lists:foreach(fun(File) -> del_file_and_dir(File) end, DelFiles), + file:del_dir(UserDir), % In case it's empty, now. + NewSize + SlotSize; + true -> + OldSize + SlotSize + end. + +-spec delete_old_files(file:filename_all(), integer()) -> ok. + +delete_old_files(UserDir, Timestamp) -> + case lists:filter(fun({_Path, _Size, Time}) -> Time < Timestamp end, + gather_file_info(UserDir)) of + [] -> + ok; + OldFiles -> + lists:foreach(fun(File) -> del_file_and_dir(File) end, OldFiles), + file:del_dir(UserDir) % In case it's empty, now. + end. + +-spec gather_file_info(file:filename_all()) + -> [{binary(), non_neg_integer(), non_neg_integer()}]. + +gather_file_info(Dir) when is_binary(Dir) -> + gather_file_info(binary_to_list(Dir)); +gather_file_info(Dir) -> + case file:list_dir(Dir) of + {ok, Entries} -> + lists:foldl(fun(Entry, Acc) -> + Path = Dir ++ "/" ++ Entry, + case file:read_file_info(Path, [{time, posix}]) of + {ok, #file_info{type = directory}} -> + gather_file_info(Path) ++ Acc; + {ok, #file_info{type = regular, + mtime = Time, + size = Size}} -> + [{Path, Size, Time} | Acc]; + {ok, _Info} -> + ?DEBUG("Won't stat(2) non-regular file ~s", + [Path]), + Acc; + {error, Error} -> + ?ERROR_MSG("Cannot stat(2) ~s: ~s", + [Path, Error]), + Acc + end + end, [], Entries); + {error, enoent} -> + ?DEBUG("Directory ~s doesn't exist", [Dir]), + []; + {error, Error} -> + ?ERROR_MSG("Cannot open directory ~s: ~p", [Dir, Error]), + [] + end. + +-spec del_file_and_dir(file:name_all()) -> ok. + +del_file_and_dir(File) -> + case file:delete(File) of + ok -> + ?INFO_MSG("Removed ~s", [File]), + Dir = filename:dirname(File), + case file:del_dir(Dir) of + ok -> + ?DEBUG("Removed ~s", [Dir]); + {error, Error} -> + ?INFO_MSG("Cannot remove ~s: ~s", [Dir, Error]) + end; + {error, Error} -> + ?WARNING_MSG("Cannot remove ~s: ~s", [File, Error]) + end. + +-spec remaining_timeout(state()) -> non_neg_integer() | infinity. + +remaining_timeout(#state{max_days = infinity}) -> + infinity; +remaining_timeout(#state{last_sweep = LastSweep}) -> + Diff = secs_since_epoch() - LastSweep, + if Diff > ?TIMEOUT; + Diff < 0 -> + 0; + true -> + (?TIMEOUT - Diff) * 1000 + end. + +-spec secs_since_epoch() -> non_neg_integer(). + +secs_since_epoch() -> + {MegaSecs, Secs, _MicroSecs} = os:timestamp(), + MegaSecs * 1000000 + Secs.