2016-06-24 17:34:37 +02:00
|
|
|
-module(mod_pottymouth).
|
|
|
|
|
|
|
|
-behaviour(gen_mod).
|
|
|
|
|
2016-08-02 16:42:37 +02:00
|
|
|
-include("logger.hrl").
|
|
|
|
|
2016-06-24 17:34:37 +02:00
|
|
|
-export([
|
|
|
|
start/2,
|
|
|
|
stop/1,
|
|
|
|
on_filter_packet/1,
|
|
|
|
mod_opt_type/1
|
|
|
|
]).
|
|
|
|
|
|
|
|
-include("ejabberd.hrl").
|
|
|
|
|
|
|
|
-import(bloom_gen_server, [start/0, stop/0, member/1]).
|
2016-07-19 23:34:04 +02:00
|
|
|
-import(nomalize_leet_gen_server, [normalize/1]).
|
2016-06-24 17:34:37 +02:00
|
|
|
|
|
|
|
getMessageLang(Attrs) ->
|
|
|
|
LangAttr = lists:keyfind(<<"lang">>, 1, Attrs),
|
|
|
|
if
|
|
|
|
LangAttr ->
|
|
|
|
{<<"lang">>, LangBin} = LangAttr,
|
2016-09-01 23:00:42 +02:00
|
|
|
Lang = list_to_atom(binary_to_list(LangBin)),
|
|
|
|
?ERROR_MSG("LANG: ~p~n", [Lang]);
|
2016-06-24 17:34:37 +02:00
|
|
|
true ->
|
2016-09-01 23:00:42 +02:00
|
|
|
Lang = default,
|
|
|
|
?ERROR_MSG("LANG DEFAULT~n", [])
|
2016-06-24 17:34:37 +02:00
|
|
|
end,
|
|
|
|
Lang.
|
|
|
|
|
2016-09-13 21:22:37 +02:00
|
|
|
censorWord({Lang, Word} = _MessageTerm) ->
|
|
|
|
% we need unicode characters to normlize the word
|
|
|
|
NormalizedWord = normalize_leet_gen_server:normalize({Lang, unicode:characters_to_list(list_to_binary(Word))}),
|
|
|
|
% we need bytewise format for bloom lookup
|
|
|
|
IsBadWord = bloom_gen_server:member({Lang, binary_to_list(unicode:characters_to_binary(NormalizedWord))}),
|
2016-06-24 17:34:37 +02:00
|
|
|
if
|
|
|
|
IsBadWord ->
|
|
|
|
"****";
|
|
|
|
true ->
|
|
|
|
Word
|
|
|
|
end.
|
|
|
|
|
|
|
|
filterWords(L) ->
|
|
|
|
lists:map(fun censorWord/1, L).
|
|
|
|
|
2016-08-02 16:42:37 +02:00
|
|
|
filterMessageText(MessageAttrs, MessageText) ->
|
|
|
|
Lang = getMessageLang(MessageAttrs),
|
2016-09-13 21:22:37 +02:00
|
|
|
% we want to token-ize utf8 'words'
|
2016-08-02 16:42:37 +02:00
|
|
|
MessageWords = string:tokens(unicode:characters_to_list(MessageText, utf8), " "),
|
|
|
|
MessageTerms = [{Lang, Word} || Word <- MessageWords],
|
2016-09-13 21:22:37 +02:00
|
|
|
% we get back bytewise format terms (rather than utf8)
|
|
|
|
list_to_binary(string:join(filterWords(MessageTerms), " ")).
|
2016-08-02 16:42:37 +02:00
|
|
|
|
2016-09-01 23:00:42 +02:00
|
|
|
filterMessageBodyElements([H|T]) ->
|
|
|
|
lists:map
|
|
|
|
filterMessageBodyElements([], Element) ->
|
|
|
|
Element.
|
|
|
|
|
2016-06-24 17:34:37 +02:00
|
|
|
start(_Host, Opts) ->
|
|
|
|
Blacklists = gen_mod:get_opt(blacklists, Opts, fun(A) -> A end, []),
|
|
|
|
lists:map(fun bloom_gen_server:start/1, Blacklists),
|
2016-07-19 23:34:04 +02:00
|
|
|
CharMaps = gen_mod:get_opt(charmaps, Opts, fun(A) -> A end, []),
|
|
|
|
lists:map(fun normalize_leet_gen_server:start/1, CharMaps),
|
2016-06-24 17:34:37 +02:00
|
|
|
ejabberd_hooks:add(filter_packet, global, ?MODULE, on_filter_packet, 0),
|
|
|
|
ok.
|
|
|
|
|
|
|
|
stop(_Host) ->
|
|
|
|
bloom_gen_server:stop(),
|
2016-07-19 23:34:04 +02:00
|
|
|
normalize_leet_gen_server:stop(),
|
2016-06-24 17:34:37 +02:00
|
|
|
ejabberd_hooks:delete(filter_packet, global, ?MODULE, on_filter_packet, 0),
|
|
|
|
ok.
|
|
|
|
|
|
|
|
on_filter_packet(drop) ->
|
|
|
|
drop;
|
|
|
|
|
|
|
|
|
2016-09-01 23:00:42 +02:00
|
|
|
% on_filter_packet({_From, _To, {xmlel, <<"message">>, _Attrs, [_chatState, {xmlel, <<"body">>, BodyAttr, [{xmlcdata, MessageText}] = _BodyCData} = _MessageBody] = _Els} = _Packet} = _Msg) ->
|
|
|
|
% FilteredMessageWords = filterMessageText(BodyAttr, binary:bin_to_list(MessageText)),
|
|
|
|
% {_From, _To, {xmlel, <<"message">>, _Attrs, [_chatState, {xmlel, <<"body">>, BodyAttr, [{xmlcdata, FilteredMessageWords}]}]}};
|
|
|
|
%
|
|
|
|
% on_filter_packet({_From, _To, {xmlel, <<"message">>, _Attrs, [{xmlel, <<"body">>, BodyAttr, [{xmlcdata, MessageText}] = _BodyCData} = _MessageBody] = _Els} = _Packet} = _Msg) ->
|
|
|
|
% FilteredMessageWords = filterMessageText(BodyAttr, binary:bin_to_list(MessageText)),
|
|
|
|
% {_From, _To, {xmlel, <<"message">>, _Attrs, [{xmlel, <<"body">>, BodyAttr, [{xmlcdata, FilteredMessageWords}]}]}};
|
|
|
|
|
|
|
|
% chat message with chat state
|
|
|
|
% on_filter_packet({_From, _To, {xmlel, <<"message">>, _Attrs, [_chatState, {xmlel, <<"body">>, BodyAttr, [{xmlcdata, MessageText}] = _BodyCData} = _MessageBody] = _Els} = _Packet} = _Msg) ->
|
|
|
|
% ?ERROR_MSG("CHAT CHAT MSG W CHAT STATE: ~p~n", _Msg),
|
|
|
|
% FilteredMessageWords = filterMessageText(BodyAttr, MessageText),
|
|
|
|
% {_From, _To, {xmlel, <<"message">>, _Attrs, [_chatState, {xmlel, <<"body">>, BodyAttr, [{xmlcdata, FilteredMessageWords}]}]}};
|
|
|
|
|
|
|
|
% chat message without chat state
|
|
|
|
on_filter_packet({_From, _To, {xmlel, <<"message">>, _Attrs, [MessageBody] = _Els} = _Packet} = _Msg) ->
|
|
|
|
?ERROR_MSG("CHAT MSG WITHOUT CHAT STATE: ~p~n", [_Msg]),
|
|
|
|
|
|
|
|
# {xmlel, <<"body">>, BodyAttr, [{xmlcdata, MessageText}] = _BodyCData} = _MessageBody
|
|
|
|
|
|
|
|
if message type chat/group && !archived
|
|
|
|
foreach body
|
|
|
|
FilteredMessageWords = filterMessageText(BodyAttr, MessageText),
|
|
|
|
{_From, _To, {xmlel, <<"message">>, _Attrs, [MessageBody]}]}};
|
2016-08-02 16:42:37 +02:00
|
|
|
|
2016-06-24 17:34:37 +02:00
|
|
|
on_filter_packet(Msg) ->
|
|
|
|
% Handle the generic case (any packet that isn't a message with a body).
|
2016-09-01 23:00:42 +02:00
|
|
|
?ERROR_MSG("FILTER PACKET MSG: ~p~n", [Msg]),
|
2016-06-24 17:34:37 +02:00
|
|
|
Msg.
|
|
|
|
|
2016-09-01 23:00:42 +02:00
|
|
|
% PSI one-to-one
|
|
|
|
% {
|
|
|
|
% {jid,<<"foo">>,<<"kalamari">>,<<"Psi">>,<<"foo">>,<<"kalamari">>,<<"Psi">>},
|
|
|
|
% {jid,<<"foo">>,<<"kalamari">>,<<>>,<<"foo">>,<<"kalamari">>,<<>>},
|
|
|
|
% {xmlel,<<"message">>,
|
|
|
|
% [{<<"xml:lang">>,<<"en">>},{<<"type">>,<<"chat">>},{<<"to">>,<<"foo@kalamari">>},{<<"id">>,<<"aacba">>}],
|
|
|
|
% [{xmlcdata,<<"\n">>},{xmlel,<<"body">>,[],[{xmlcdata,<<"hi">>}]},{xmlcdata,<<"\n">>},
|
|
|
|
% {xmlel,<<"active">>,[{<<"xmlns">>,<<"http://jabber.org/protocol/chatstates">>}],[]},{xmlcdata,<<"\n">>}]}}
|
|
|
|
|
|
|
|
% gloox muc
|
|
|
|
% {
|
|
|
|
% {jid,<<"#12345">>,<<"conference.kalamari">>,<<"bar">>,<<"#12345">>,<<"conference.kalamari">>,<<"bar">>},
|
|
|
|
% {jid,<<"bar">>,<<"kalamari">>,<<"12145048529523376186799">>,<<"bar">>,<<"kalamari">>,<<"12145048529523376186799">>},
|
|
|
|
% {xmlel,<<"message">>,
|
|
|
|
% [{<<"xml:lang">>,<<"en">>},{<<"to">>,<<"#12345@conference.kalamari">>},{<<"type">>,<<"groupchat">>},{<<"from">>,<<"bar@kalamari/12145048529523376186799">>}],
|
|
|
|
% [{xmlel,<<"archived">>,
|
|
|
|
% [{<<"by">>,<<"conference.kalamari">>},{<<"xmlns">>,<<"urn:xmpp:mam:tmp">>},{<<"id">>,<<"1471940767114309">>}],
|
|
|
|
% []
|
|
|
|
% },
|
|
|
|
% {xmlel,<<"stanza-id">>,
|
|
|
|
% [{<<"by">>,<<"conference.kalamari">>},{<<"xmlns">>,<<"urn:xmpp:sid:0">>},{<<"id">>,<<"1471940767114309">>}],
|
|
|
|
% []
|
|
|
|
% },
|
|
|
|
% {xmlel,<<"body">>,
|
|
|
|
% [],
|
|
|
|
% [{xmlcdata,<<"HELLO THERE">>}]
|
|
|
|
% }]
|
|
|
|
% }
|
|
|
|
% }
|
|
|
|
%
|
|
|
|
% {
|
|
|
|
% {jid,<<"bar">>,<<"kalamari">>,<<"12145048529523376186799">>,<<"bar">>,<<"kalamari">>,<<"12145048529523376186799">>},
|
|
|
|
% {jid,<<"#12345">>,<<"conference.kalamari">>,<<>>,<<"#12345">>,<<"conference.kalamari">>,<<>>},
|
|
|
|
% {xmlel,<<"message">>,
|
|
|
|
% [{<<"xml:lang">>,<<"en">>},{<<"to">>,<<"#12345@conference.kalamari">>},{<<"type">>,<<"groupchat">>},{<<"from">>,<<"bar@kalamari/12145048529523376186799">>}],
|
|
|
|
% [{xmlel,<<"body">>,[],[{xmlcdata,<<"HELLO THERE">>}]}]
|
|
|
|
% }
|
|
|
|
}%
|
|
|
|
|
2016-06-24 17:34:37 +02:00
|
|
|
mod_opt_type(blacklists) -> fun (A) when is_list(A) -> A end;
|
2016-07-19 23:34:04 +02:00
|
|
|
mod_opt_type(charmaps) -> fun (A) when is_list(A) -> A end;
|
|
|
|
mod_opt_type(_) -> [blacklists, charmaps].
|