ejabberd-contrib/mod_pottymouth/src/mod_pottymouth.erl

96 lines
3.0 KiB
Erlang
Raw Normal View History

2016-06-24 17:34:37 +02:00
-module(mod_pottymouth).
-behaviour(gen_mod).
-include("logger.hrl").
2016-06-24 17:34:37 +02:00
-export([
start/2,
stop/1,
on_filter_packet/1,
mod_opt_type/1,
depends/2
2016-06-24 17:34:37 +02:00
]).
-include("ejabberd.hrl").
-import(bloom_gen_server, [start/0, stop/0, member/1]).
-import(nomalize_leet_gen_server, [normalize/1]).
2016-06-24 17:34:37 +02:00
getMessageLang(Attrs) ->
LangAttr = lists:keyfind(<<"lang">>, 1, Attrs),
if
LangAttr ->
{<<"lang">>, LangBin} = LangAttr,
2016-09-13 23:13:32 +02:00
Lang = list_to_atom(binary_to_list(LangBin));
2016-06-24 17:34:37 +02:00
true ->
2016-09-13 23:13:32 +02:00
Lang = default
2016-06-24 17:34:37 +02:00
end,
Lang.
censorWord({Lang, Word} = _MessageTerm) ->
% we need unicode characters to normlize the word
NormalizedWord = normalize_leet_gen_server:normalize({Lang, unicode:characters_to_list(list_to_binary(Word))}),
% we need bytewise format for bloom lookup
IsBadWord = bloom_gen_server:member({Lang, binary_to_list(unicode:characters_to_binary(NormalizedWord))}),
2016-06-24 17:34:37 +02:00
if
IsBadWord ->
"****";
true ->
Word
end.
filterWords(L) ->
lists:map(fun censorWord/1, L).
filterMessageText(MessageAttrs, MessageText) ->
Lang = getMessageLang(MessageAttrs),
% we want to token-ize utf8 'words'
MessageWords = string:tokens(unicode:characters_to_list(MessageText, utf8), " "),
MessageTerms = [{Lang, Word} || Word <- MessageWords],
% we get back bytewise format terms (rather than utf8)
string:join(filterWords(MessageTerms), " ").
2016-09-13 23:13:32 +02:00
filterMessageBodyElements([{xmlel, <<"body">>, BodyAttr, [{xmlcdata, MessageText}]} = _H|T], MessageElements) ->
FilteredMessageWords = binary:list_to_bin(filterMessageText(BodyAttr, binary:bin_to_list(MessageText))),
2016-09-13 23:13:32 +02:00
FilteredBody = {xmlel, <<"body">>, BodyAttr, [{xmlcdata, FilteredMessageWords}]},
filterMessageBodyElements(T, lists:append(MessageElements, [FilteredBody]));
filterMessageBodyElements([H|T], MessageElements) ->
% skip this tag, but pass it on as processed
filterMessageBodyElements(T, lists:append(MessageElements, [H]));
filterMessageBodyElements([], MessageElements) ->
MessageElements.
2016-09-01 23:00:42 +02:00
2016-06-24 17:34:37 +02:00
start(_Host, Opts) ->
Blacklists = gen_mod:get_opt(blacklists, Opts, fun(A) -> A end, []),
lists:map(fun bloom_gen_server:start/1, Blacklists),
CharMaps = gen_mod:get_opt(charmaps, Opts, fun(A) -> A end, []),
lists:map(fun normalize_leet_gen_server:start/1, CharMaps),
2016-06-24 17:34:37 +02:00
ejabberd_hooks:add(filter_packet, global, ?MODULE, on_filter_packet, 0),
ok.
stop(_Host) ->
bloom_gen_server:stop(),
normalize_leet_gen_server:stop(),
2016-06-24 17:34:37 +02:00
ejabberd_hooks:delete(filter_packet, global, ?MODULE, on_filter_packet, 0),
ok.
on_filter_packet(drop) ->
drop;
2016-09-13 23:13:32 +02:00
on_filter_packet({_From, _To, {xmlel, <<"message">>, _Attrs, Els} = _Packet} = _Msg) ->
FilteredEls = filterMessageBodyElements(Els, []),
{_From, _To, {xmlel, <<"message">>, _Attrs, FilteredEls}};
2016-06-24 17:34:37 +02:00
on_filter_packet(Msg) ->
% Handle the generic case (any packet that isn't a message with a body).
Msg.
mod_opt_type(blacklists) -> fun (A) when is_list(A) -> A end;
mod_opt_type(charmaps) -> fun (A) when is_list(A) -> A end;
mod_opt_type(_) -> [blacklists, charmaps].
depends(_Host, _Opts) -> [].