mod_spam_filter: Add "spam_dump_file" option

If the new "spam_dump_file" option is used, messages classified as spam
will be written to the specified file path.
This commit is contained in:
Holger Weiss 2019-04-24 19:19:15 +02:00
parent e26ea15d7f
commit 362c866862
2 changed files with 130 additions and 15 deletions

View File

@ -29,6 +29,16 @@ following:
The configurable mod_spam_filter options are: The configurable mod_spam_filter options are:
- spam_dump_file (default: none)
This option specifies the full path to a file that messages classified
as spam will be written to. The @HOST@ keyword will be substituted with
the name of the virtual host. Note that this module doesn't limit the
file size, so if you use this option, make sure to monitor disk file
usage and to rotate the file if necessary. After rotation, the command
"ejabberdctl reopen-log" can be called to let the module reopen the spam
dump file.
- spam_jids_file (default: none) - spam_jids_file (default: none)
This option specifies the full path to a plain text file containing a This option specifies the full path to a plain text file containing a

View File

@ -47,7 +47,8 @@
%% ejabberd_hooks callbacks. %% ejabberd_hooks callbacks.
-export([s2s_in_handle_info/2, -export([s2s_in_handle_info/2,
s2s_receive_packet/1]). s2s_receive_packet/1,
reopen_log/0]).
%% ejabberd_commands callbacks. %% ejabberd_commands callbacks.
-export([get_commands_spec/0, reload_spam_filter_files/1, -export([get_commands_spec/0, reload_spam_filter_files/1,
@ -69,6 +70,7 @@
-record(state, -record(state,
{host = <<>> :: binary(), {host = <<>> :: binary(),
dump_fd = undefined :: file:io_device() | undefined,
url_set = sets:new() :: url_set(), url_set = sets:new() :: url_set(),
jid_set = sets:new() :: jid_set(), jid_set = sets:new() :: jid_set(),
jid_cache = #{} :: map(), jid_cache = #{} :: map(),
@ -104,6 +106,13 @@ depends(_Host, _Opts) ->
[]. [].
-spec mod_opt_type(atom()) -> fun((term()) -> term()) | [atom()]. -spec mod_opt_type(atom()) -> fun((term()) -> term()) | [atom()].
mod_opt_type(spam_dump_file) ->
fun(none) -> none;
(File) ->
{ok, Fd} = file:open(File, [append, raw]),
ok = file:close(Fd),
iolist_to_binary(File)
end;
mod_opt_type(spam_jids_file) -> mod_opt_type(spam_jids_file) ->
fun(none) -> none; fun(none) -> none;
(File) -> (File) ->
@ -127,7 +136,8 @@ mod_opt_type(cache_size) ->
-spec mod_options(binary()) -> [{atom(), any()}]. -spec mod_options(binary()) -> [{atom(), any()}].
mod_options(_Host) -> mod_options(_Host) ->
[{spam_jids_file, none}, [{spam_dump_file, none},
{spam_jids_file, none},
{spam_urls_file, none}, {spam_urls_file, none},
{access_spam, none}, {access_spam, none},
{cache_size, 10000}]. {cache_size, 10000}].
@ -138,6 +148,7 @@ mod_options(_Host) ->
-spec init(list()) -> {ok, state()} | {stop, term()}. -spec init(list()) -> {ok, state()} | {stop, term()}.
init([Host, Opts]) -> init([Host, Opts]) ->
process_flag(trap_exit, true), process_flag(trap_exit, true),
DumpFile = expand_host(proplists:get_value(spam_dump_file, Opts), Host),
JIDsFile = proplists:get_value(spam_jids_file, Opts), JIDsFile = proplists:get_value(spam_jids_file, Opts),
URLsFile = proplists:get_value(spam_urls_file, Opts), URLsFile = proplists:get_value(spam_urls_file, Opts),
try read_files(JIDsFile, URLsFile) of try read_files(JIDsFile, URLsFile) of
@ -146,9 +157,21 @@ init([Host, Opts]) ->
s2s_in_handle_info, 90), s2s_in_handle_info, 90),
ejabberd_hooks:add(s2s_receive_packet, Host, ?MODULE, ejabberd_hooks:add(s2s_receive_packet, Host, ?MODULE,
s2s_receive_packet, 50), s2s_receive_packet, 50),
DumpFd = if DumpFile == none ->
undefined;
true ->
Modes = [append, raw, binary, delayed_write],
case file:open(DumpFile, Modes) of
{ok, Fd} ->
Fd;
{error, Reason} ->
throw({open, DumpFile, Reason})
end
end,
{ok, #state{host = Host, {ok, #state{host = Host,
jid_set = JIDsSet, jid_set = JIDsSet,
url_set = URLsSet, url_set = URLsSet,
dump_fd = DumpFd,
max_cache_size = proplists:get_value(cache_size, Opts)}} max_cache_size = proplists:get_value(cache_size, Opts)}}
catch {Op, File, Reason} when Op == open; catch {Op, File, Reason} when Op == open;
Op == read -> Op == read ->
@ -187,20 +210,42 @@ handle_call(Request, From, State) ->
{noreply, State}. {noreply, State}.
-spec handle_cast(term(), state()) -> {noreply, state()}. -spec handle_cast(term(), state()) -> {noreply, state()}.
handle_cast({reload, NewOpts, OldOpts}, State) -> handle_cast({dump, _XML}, #state{dump_fd = undefined} = State) ->
JIDsFile = proplists:get_value(spam_jids_file, NewOpts), {noreply, State};
URLsFile = proplists:get_value(spam_urls_file, NewOpts), handle_cast({dump, XML}, #state{dump_fd = Fd} = State) ->
State1 = case {proplists:get_value(cache_size, OldOpts), case file:write(Fd, [XML, <<$\n>>]) of
proplists:get_value(cache_size, NewOpts)} of ok ->
{OldMax, NewMax} when NewMax < OldMax -> ok;
shrink_cache(State#state{max_cache_size = NewMax}); {error, Reason} ->
{OldMax, NewMax} when NewMax > OldMax -> ?ERROR_MSG("Cannot write spam to dump file: ~s",
State#state{max_cache_size = NewMax}; [file:format_error(Reason)])
{_OldMax, _NewMax} -> end,
{noreply, State};
handle_cast({reload, NewOpts, OldOpts}, #state{host = Host} = State) ->
State1 = case {proplists:get_value(spam_dump_file, OldOpts),
proplists:get_value(spam_dump_file, NewOpts)} of
{OldDumpFile, NewDumpFile} when NewDumpFile /= OldDumpFile ->
close_dump_file(expand_host(OldDumpFile, Host), State),
open_dump_file(expand_host(NewDumpFile, Host), State);
{_OldDumpFile, _NewDumpFile} ->
State State
end, end,
{_Result, State2} = reload_files(JIDsFile, URLsFile, State1), State2 = case {proplists:get_value(cache_size, OldOpts),
{noreply, State2}; proplists:get_value(cache_size, NewOpts)} of
{OldMax, NewMax} when NewMax < OldMax ->
shrink_cache(State1#state{max_cache_size = NewMax});
{OldMax, NewMax} when NewMax > OldMax ->
State1#state{max_cache_size = NewMax};
{_OldMax, _NewMax} ->
State1
end,
JIDsFile = proplists:get_value(spam_jids_file, NewOpts),
URLsFile = proplists:get_value(spam_urls_file, NewOpts),
{_Result, State3} = reload_files(JIDsFile, URLsFile, State2),
{noreply, State3};
handle_cast(reopen_log, State) ->
close_dump_file(State),
{noreply, open_dump_file(State)};
handle_cast(Request, State) -> handle_cast(Request, State) ->
?ERROR_MSG("Got unexpected request from: ~p", [Request]), ?ERROR_MSG("Got unexpected request from: ~p", [Request]),
{noreply, State}. {noreply, State}.
@ -211,8 +256,9 @@ handle_info(Info, State) ->
{noreply, State}. {noreply, State}.
-spec terminate(normal | shutdown | {shutdown, term()} | term(), state()) -> ok. -spec terminate(normal | shutdown | {shutdown, term()} | term(), state()) -> ok.
terminate(Reason, #state{host = Host}) -> terminate(Reason, #state{host = Host} = State) ->
?DEBUG("Stopping spam filter process for ~s: ~p", [Host, Reason]), ?DEBUG("Stopping spam filter process for ~s: ~p", [Host, Reason]),
close_dump_file(State),
ejabberd_hooks:delete(s2s_receive_packet, Host, ?MODULE, ejabberd_hooks:delete(s2s_receive_packet, Host, ?MODULE,
s2s_receive_packet, 50), s2s_receive_packet, 50),
ejabberd_hooks:delete(s2s_in_handle_info, Host, ?MODULE, ejabberd_hooks:delete(s2s_in_handle_info, Host, ?MODULE,
@ -279,6 +325,13 @@ s2s_in_handle_info(State, {_Ref, {spam_filter, _}}) ->
s2s_in_handle_info(State, _) -> s2s_in_handle_info(State, _) ->
State. State.
-spec reopen_log() -> ok.
reopen_log() ->
lists:foreach(fun(Host) ->
Proc = get_proc_name(Host),
gen_server:cast(Proc, reopen_log)
end, ejabberd_config:get_myhosts()).
%%-------------------------------------------------------------------- %%--------------------------------------------------------------------
%% Internal functions. %% Internal functions.
%%-------------------------------------------------------------------- %%--------------------------------------------------------------------
@ -488,6 +541,7 @@ reject(#message{from = From, to = To, type = Type, lang = Lang} = Msg)
[jid:encode(From), jid:encode(To)]), [jid:encode(From), jid:encode(To)]),
Txt = <<"Your message is unsolicited">>, Txt = <<"Your message is unsolicited">>,
Err = xmpp:err_policy_violation(Txt, Lang), Err = xmpp:err_policy_violation(Txt, Lang),
maybe_dump_spam(Msg),
ejabberd_router:route_error(Msg, Err); ejabberd_router:route_error(Msg, Err);
reject(#presence{from = From, to = To, lang = Lang} = Presence) -> reject(#presence{from = From, to = To, lang = Lang} = Presence) ->
?INFO_MSG("Rejecting unsolicited presence from ~s to ~s", ?INFO_MSG("Rejecting unsolicited presence from ~s to ~s",
@ -498,10 +552,61 @@ reject(#presence{from = From, to = To, lang = Lang} = Presence) ->
reject(_) -> reject(_) ->
ok. ok.
-spec open_dump_file(state()) -> state().
open_dump_file(#state{host = Host} = State) ->
DumpFile = gen_mod:get_module_opt(Host, ?MODULE, spam_dump_file),
DumpFile1 = expand_host(DumpFile, Host),
open_dump_file(DumpFile1, State).
-spec open_dump_file(filename(), state()) -> state().
open_dump_file(none, State) ->
State#state{dump_fd = undefined};
open_dump_file(Name, State) ->
Modes = [append, raw, binary, delayed_write],
case file:open(Name, Modes) of
{ok, Fd} ->
State#state{dump_fd = Fd};
{error, Reason} ->
?ERROR_MSG("Cannot open ~s: ~s", [Name, file:format_error(Reason)]),
State#state{dump_fd = undefined}
end.
-spec close_dump_file(state()) -> ok.
close_dump_file(#state{host = Host} = State) ->
DumpFile = gen_mod:get_module_opt(Host, ?MODULE, spam_dump_file),
DumpFile1 = expand_host(DumpFile, Host),
close_dump_file(DumpFile1, State).
-spec close_dump_file(binary(), state()) -> ok.
close_dump_file(_Name, #state{dump_fd = undefined}) ->
ok;
close_dump_file(Name, #state{dump_fd = Fd}) ->
case file:close(Fd) of
ok ->
ok;
{error, Reason} ->
?ERROR_MSG("Cannot close ~s: ~s", [Name, file:format_error(Reason)])
end.
-spec maybe_dump_spam(message()) -> ok.
maybe_dump_spam(#message{to = #jid{lserver = LServer}} = Msg) ->
By = jid:make(<<>>, LServer),
Proc = get_proc_name(LServer),
Time = erlang:timestamp(),
Msg1 = misc:add_delay_info(Msg, By, Time),
XML = fxml:element_to_binary(xmpp:encode(Msg1)),
gen_server:cast(Proc, {dump, XML}).
-spec get_proc_name(binary()) -> atom(). -spec get_proc_name(binary()) -> atom().
get_proc_name(Host) -> get_proc_name(Host) ->
gen_mod:get_module_proc(Host, ?MODULE). gen_mod:get_module_proc(Host, ?MODULE).
-spec expand_host(binary() | none, binary()) -> binary() | none.
expand_host(none, _Host) ->
none;
expand_host(Input, Host) ->
misc:expand_keyword(<<"@HOST@">>, Input, Host).
-spec sets_equal(sets:set(), sets:set()) -> boolean(). -spec sets_equal(sets:set(), sets:set()) -> boolean().
sets_equal(A, B) -> sets_equal(A, B) ->
sets:is_subset(A, B) andalso sets:is_subset(B, A). sets:is_subset(A, B) andalso sets:is_subset(B, A).