mod_spam_filter: Also check body for listed JIDs
Also check whether the message body contains a listed JID (usually mentioned as a contact address), rather than just using the JID list to check the 'from' address of incoming stanzas.
This commit is contained in:
parent
8f63b2cbae
commit
f459a7e57c
|
@ -34,6 +34,10 @@ The configurable mod_spam_filter options are:
|
||||||
This option specifies the full path to a plain text file containing a
|
This option specifies the full path to a plain text file containing a
|
||||||
list of known spammer JIDs, one JID per line. Messages and subscription
|
list of known spammer JIDs, one JID per line. Messages and subscription
|
||||||
requests sent from one of the listed JIDs will be classified as spam.
|
requests sent from one of the listed JIDs will be classified as spam.
|
||||||
|
Messages containing at least one of the listed JIDs will be classified
|
||||||
|
as spam as well. Furthermore, the sender's JID will be cached, so that
|
||||||
|
future traffic originating from that JID will also be classified as
|
||||||
|
spam.
|
||||||
|
|
||||||
- spam_urls_file (default: none)
|
- spam_urls_file (default: none)
|
||||||
|
|
||||||
|
|
|
@ -62,12 +62,14 @@
|
||||||
|
|
||||||
-type url() :: binary().
|
-type url() :: binary().
|
||||||
-type filename() :: binary() | none.
|
-type filename() :: binary() | none.
|
||||||
|
-type jid_set() :: sets:set(ljid()).
|
||||||
|
-type url_set() :: sets:set(url()).
|
||||||
-type s2s_in_state() :: ejabberd_s2s_in:state().
|
-type s2s_in_state() :: ejabberd_s2s_in:state().
|
||||||
|
|
||||||
-record(state,
|
-record(state,
|
||||||
{host = <<>> :: binary(),
|
{host = <<>> :: binary(),
|
||||||
url_set = sets:new() :: sets:set(url()),
|
url_set = sets:new() :: url_set(),
|
||||||
jid_set = sets:new() :: sets:set(ljid()),
|
jid_set = sets:new() :: jid_set(),
|
||||||
jid_cache = #{} :: map(),
|
jid_cache = #{} :: map(),
|
||||||
max_cache_size = 0 :: non_neg_integer() | unlimited}).
|
max_cache_size = 0 :: non_neg_integer() | unlimited}).
|
||||||
|
|
||||||
|
@ -156,34 +158,18 @@ init([Host, Opts]) ->
|
||||||
-spec handle_call(term(), {pid(), term()}, state())
|
-spec handle_call(term(), {pid(), term()}, state())
|
||||||
-> {reply, {spam_filter, term()}, state()} | {noreply, state()}.
|
-> {reply, {spam_filter, term()}, state()} | {noreply, state()}.
|
||||||
handle_call({check_jid, From}, _From, #state{jid_set = JIDsSet} = State) ->
|
handle_call({check_jid, From}, _From, #state{jid_set = JIDsSet} = State) ->
|
||||||
{Result, State2} =
|
{Result, State1} = filter_jid(From, JIDsSet, State),
|
||||||
case sets:is_element(From, JIDsSet) of
|
{reply, {spam_filter, Result}, State1};
|
||||||
|
handle_call({check_body, URLs, JIDs, From}, _From,
|
||||||
|
#state{url_set = URLsSet, jid_set = JIDsSet} = State) ->
|
||||||
|
{Result1, State1} = filter_body(URLs, URLsSet, From, State),
|
||||||
|
{Result2, State2} = filter_body(JIDs, JIDsSet, From, State1),
|
||||||
|
Result = if Result1 == spam ->
|
||||||
|
Result1;
|
||||||
true ->
|
true ->
|
||||||
?DEBUG("Spam JID found: ~s", [jid:encode(From)]),
|
Result2
|
||||||
{spam, State};
|
|
||||||
false ->
|
|
||||||
case cache_lookup(From, State) of
|
|
||||||
{true, State1} ->
|
|
||||||
?DEBUG("Spam JID found: ~s", [jid:encode(From)]),
|
|
||||||
{spam, State1};
|
|
||||||
{false, State1} ->
|
|
||||||
?DEBUG("JID not listed: ~s", [jid:encode(From)]),
|
|
||||||
{ham, State1}
|
|
||||||
end
|
|
||||||
end,
|
end,
|
||||||
{reply, {spam_filter, Result}, State2};
|
{reply, {spam_filter, Result}, State2};
|
||||||
handle_call({check_urls, URLs, From}, _From,
|
|
||||||
#state{url_set = URLsSet} = State) ->
|
|
||||||
{Result, State1} =
|
|
||||||
case lists:any(fun(URL) -> sets:is_element(URL, URLsSet) end, URLs) of
|
|
||||||
true ->
|
|
||||||
?DEBUG("Spam URL(s) found: ~p", [URLs]),
|
|
||||||
{spam, cache_insert(From, State)};
|
|
||||||
false ->
|
|
||||||
?DEBUG("URL(s) not listed: ~p", [URLs]),
|
|
||||||
{ham, State}
|
|
||||||
end,
|
|
||||||
{reply, {spam_filter, Result}, State1};
|
|
||||||
handle_call({reload_files, JIDsFile, URLsFile}, _From, State) ->
|
handle_call({reload_files, JIDsFile, URLsFile}, _From, State) ->
|
||||||
{Result, State1} = reload_files(JIDsFile, URLsFile, State),
|
{Result, State1} = reload_files(JIDsFile, URLsFile, State),
|
||||||
{reply, {spam_filter, Result}, State1};
|
{reply, {spam_filter, Result}, State1};
|
||||||
|
@ -324,21 +310,20 @@ check_from(Host, From) ->
|
||||||
|
|
||||||
-spec check_body(binary(), jid(), binary()) -> ham | spam.
|
-spec check_body(binary(), jid(), binary()) -> ham | spam.
|
||||||
check_body(Host, From, Body) ->
|
check_body(Host, From, Body) ->
|
||||||
case extract_urls(Body) of
|
case {extract_urls(Body), extract_jids(Body)} of
|
||||||
{urls, URLs} ->
|
{none, none} ->
|
||||||
|
?DEBUG("No JIDs/URLs found in message", []),
|
||||||
|
ham;
|
||||||
|
{URLs, JIDs} ->
|
||||||
Proc = get_proc_name(Host),
|
Proc = get_proc_name(Host),
|
||||||
LFrom = jid:remove_resource(jid:tolower(From)),
|
LFrom = jid:remove_resource(jid:tolower(From)),
|
||||||
try gen_server:call(Proc, {check_urls, URLs, LFrom}) of
|
try gen_server:call(Proc, {check_body, URLs, JIDs, LFrom}) of
|
||||||
{spam_filter, Result} ->
|
{spam_filter, Result} ->
|
||||||
Result
|
Result
|
||||||
catch exit:{timeout, _} ->
|
catch exit:{timeout, _} ->
|
||||||
?WARNING_MSG("Timeout while checking body for spam URLs",
|
?WARNING_MSG("Timeout while checking body", []),
|
||||||
[]),
|
|
||||||
ham
|
|
||||||
end;
|
|
||||||
none ->
|
|
||||||
?DEBUG("No URL(s) found in message", []),
|
|
||||||
ham
|
ham
|
||||||
|
end
|
||||||
end.
|
end.
|
||||||
|
|
||||||
-spec extract_urls(binary()) -> {urls, [url()]} | none.
|
-spec extract_urls(binary()) -> {urls, [url()]} | none.
|
||||||
|
@ -352,6 +337,59 @@ extract_urls(Body) ->
|
||||||
none
|
none
|
||||||
end.
|
end.
|
||||||
|
|
||||||
|
-spec extract_jids(binary()) -> {jids, [ljid()]} | none.
|
||||||
|
extract_jids(Body) ->
|
||||||
|
RE = <<"\\S+@\\S+">>,
|
||||||
|
Options = [global, {capture, all, binary}],
|
||||||
|
case re:run(Body, RE, Options) of
|
||||||
|
{match, Captured} when is_list(Captured) ->
|
||||||
|
{jids, lists:filtermap(fun try_decode_jid/1,
|
||||||
|
lists:flatten(Captured))};
|
||||||
|
nomatch ->
|
||||||
|
none
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec try_decode_jid(binary()) -> {true, ljid()} | false.
|
||||||
|
try_decode_jid(S) ->
|
||||||
|
try jid:decode(S) of
|
||||||
|
#jid{} = JID ->
|
||||||
|
{true, jid:remove_resource(jid:tolower(JID))}
|
||||||
|
catch _:{bad_jid, _} ->
|
||||||
|
false
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec filter_jid(ljid(), jid_set(), state()) -> {ham | spam, state()}.
|
||||||
|
filter_jid(From, Set, State) ->
|
||||||
|
case sets:is_element(From, Set) of
|
||||||
|
true ->
|
||||||
|
?DEBUG("Spam JID found: ~s", [jid:encode(From)]),
|
||||||
|
{spam, State};
|
||||||
|
false ->
|
||||||
|
case cache_lookup(From, State) of
|
||||||
|
{true, State1} ->
|
||||||
|
?DEBUG("Spam JID found: ~s", [jid:encode(From)]),
|
||||||
|
{spam, State1};
|
||||||
|
{false, State1} ->
|
||||||
|
?DEBUG("JID not listed: ~s", [jid:encode(From)]),
|
||||||
|
{ham, State1}
|
||||||
|
end
|
||||||
|
end.
|
||||||
|
|
||||||
|
-spec filter_body({urls, [url()]} | {jids, [ljid()]} | none,
|
||||||
|
url_set() | jid_set(), jid(), state())
|
||||||
|
-> {ham | spam, state()}.
|
||||||
|
filter_body({_, Addrs}, Set, From, State) ->
|
||||||
|
case lists:any(fun(Addr) -> sets:is_element(Addr, Set) end, Addrs) of
|
||||||
|
true ->
|
||||||
|
?DEBUG("Spam addresses found: ~p", [Addrs]),
|
||||||
|
{spam, cache_insert(From, State)};
|
||||||
|
false ->
|
||||||
|
?DEBUG("Addresses not listed: ~p", [Addrs]),
|
||||||
|
{ham, State}
|
||||||
|
end;
|
||||||
|
filter_body(none, _Set, _From, State) ->
|
||||||
|
{ham, State}.
|
||||||
|
|
||||||
-spec reload_files(filename(), filename(), state())
|
-spec reload_files(filename(), filename(), state())
|
||||||
-> {{ok | error, binary()}, state()}.
|
-> {{ok | error, binary()}, state()}.
|
||||||
reload_files(JIDsFile, URLsFile, #state{host = Host} = State) ->
|
reload_files(JIDsFile, URLsFile, #state{host = Host} = State) ->
|
||||||
|
@ -379,13 +417,13 @@ reload_files(JIDsFile, URLsFile, #state{host = Host} = State) ->
|
||||||
{{error, Txt}, State}
|
{{error, Txt}, State}
|
||||||
end.
|
end.
|
||||||
|
|
||||||
-spec read_files(filename(), filename()) -> {sets:set(ljid()), sets:set(url())}.
|
-spec read_files(filename(), filename()) -> {jid_set(), url_set()}.
|
||||||
read_files(JIDsFile, URLsFile) ->
|
read_files(JIDsFile, URLsFile) ->
|
||||||
{read_file(JIDsFile, fun parse_jid/1),
|
{read_file(JIDsFile, fun parse_jid/1),
|
||||||
read_file(URLsFile, fun parse_url/1)}.
|
read_file(URLsFile, fun parse_url/1)}.
|
||||||
|
|
||||||
-spec read_file(filename(), fun((binary()) -> ljid() | url()))
|
-spec read_file(filename(), fun((binary()) -> ljid() | url()))
|
||||||
-> sets:set(ljid()) | sets:set(url()).
|
-> jid_set() | url_set().
|
||||||
read_file(none, _ParseLine) ->
|
read_file(none, _ParseLine) ->
|
||||||
sets:new();
|
sets:new();
|
||||||
read_file(File, ParseLine) ->
|
read_file(File, ParseLine) ->
|
||||||
|
@ -400,8 +438,8 @@ read_file(File, ParseLine) ->
|
||||||
end.
|
end.
|
||||||
|
|
||||||
-spec read_line(file:io_device(), fun((binary()) -> ljid() | url()),
|
-spec read_line(file:io_device(), fun((binary()) -> ljid() | url()),
|
||||||
sets:set(ljid()) | sets:set(url()))
|
jid_set() | url_set())
|
||||||
-> sets:set(ljid()) | sets:set(url()).
|
-> jid_set() | url_set().
|
||||||
read_line(Fd, ParseLine, Set) ->
|
read_line(Fd, ParseLine, Set) ->
|
||||||
case file:read_line(Fd) of
|
case file:read_line(Fd) of
|
||||||
{ok, Line} ->
|
{ok, Line} ->
|
||||||
|
|
Loading…
Reference in New Issue