initial multi-body parse

This commit is contained in:
Tom Quackenbush 2016-09-01 21:00:42 +00:00
parent 5662e1c530
commit 83b32e7e4f
No known key found for this signature in database
GPG Key ID: F08C0F59E57F9F5E
2 changed files with 77 additions and 12 deletions

View File

@ -1,8 +1,8 @@
modules: modules:
mod_pottymouth: mod_pottymouth:
blacklists: blacklists:
default: /etc/ejabberd/modules/mod_pottymouth/blacklist_en.txt default: /home/vagrant/blacklist_en.txt
en: /etc/ejabberd/modules/mod_pottymouth/blacklist_en.txt en: /home/vagrant/blacklist_en.txt
charmaps: charmaps:
default: /etc/ejabberd/modules/mod_pottymouth/charmap_en.txt default: /home/vagrant/charmap_en.txt
en: /etc/ejabberd/modules/mod_pottymouth/charmap_en.txt en: /home/vagrant/charmap_en.txt

View File

@ -21,9 +21,11 @@ getMessageLang(Attrs) ->
if if
LangAttr -> LangAttr ->
{<<"lang">>, LangBin} = LangAttr, {<<"lang">>, LangBin} = LangAttr,
Lang = list_to_atom(binary_to_list(LangBin)); Lang = list_to_atom(binary_to_list(LangBin)),
?ERROR_MSG("LANG: ~p~n", [Lang]);
true -> true ->
Lang = default Lang = default,
?ERROR_MSG("LANG DEFAULT~n", [])
end, end,
Lang. Lang.
@ -50,6 +52,11 @@ filterMessageText(MessageAttrs, MessageText) ->
% we get back bytewise format terms (rather than utf8) % we get back bytewise format terms (rather than utf8)
list_to_binary(string:join(filterWords(MessageTerms), " ")). list_to_binary(string:join(filterWords(MessageTerms), " ")).
filterMessageBodyElements([H|T]) ->
lists:map
filterMessageBodyElements([], Element) ->
Element.
start(_Host, Opts) -> start(_Host, Opts) ->
Blacklists = gen_mod:get_opt(blacklists, Opts, fun(A) -> A end, []), Blacklists = gen_mod:get_opt(blacklists, Opts, fun(A) -> A end, []),
lists:map(fun bloom_gen_server:start/1, Blacklists), lists:map(fun bloom_gen_server:start/1, Blacklists),
@ -67,18 +74,76 @@ stop(_Host) ->
on_filter_packet(drop) -> on_filter_packet(drop) ->
drop; drop;
on_filter_packet({_From, _To, {xmlel, <<"message">>, _Attrs, [_chatState, {xmlel, <<"body">>, BodyAttr, [{xmlcdata, MessageText}] = _BodyCData} = _MessageBody] = _Els} = _Packet} = _Msg) ->
FilteredMessageWords = filterMessageText(BodyAttr, binary:bin_to_list(MessageText)),
{_From, _To, {xmlel, <<"message">>, _Attrs, [_chatState, {xmlel, <<"body">>, BodyAttr, [{xmlcdata, FilteredMessageWords}]}]}};
on_filter_packet({_From, _To, {xmlel, <<"message">>, _Attrs, [{xmlel, <<"body">>, BodyAttr, [{xmlcdata, MessageText}] = _BodyCData} = _MessageBody] = _Els} = _Packet} = _Msg) -> % on_filter_packet({_From, _To, {xmlel, <<"message">>, _Attrs, [_chatState, {xmlel, <<"body">>, BodyAttr, [{xmlcdata, MessageText}] = _BodyCData} = _MessageBody] = _Els} = _Packet} = _Msg) ->
FilteredMessageWords = filterMessageText(BodyAttr, binary:bin_to_list(MessageText)), % FilteredMessageWords = filterMessageText(BodyAttr, binary:bin_to_list(MessageText)),
{_From, _To, {xmlel, <<"message">>, _Attrs, [{xmlel, <<"body">>, BodyAttr, [{xmlcdata, FilteredMessageWords}]}]}}; % {_From, _To, {xmlel, <<"message">>, _Attrs, [_chatState, {xmlel, <<"body">>, BodyAttr, [{xmlcdata, FilteredMessageWords}]}]}};
%
% on_filter_packet({_From, _To, {xmlel, <<"message">>, _Attrs, [{xmlel, <<"body">>, BodyAttr, [{xmlcdata, MessageText}] = _BodyCData} = _MessageBody] = _Els} = _Packet} = _Msg) ->
% FilteredMessageWords = filterMessageText(BodyAttr, binary:bin_to_list(MessageText)),
% {_From, _To, {xmlel, <<"message">>, _Attrs, [{xmlel, <<"body">>, BodyAttr, [{xmlcdata, FilteredMessageWords}]}]}};
% chat message with chat state
% on_filter_packet({_From, _To, {xmlel, <<"message">>, _Attrs, [_chatState, {xmlel, <<"body">>, BodyAttr, [{xmlcdata, MessageText}] = _BodyCData} = _MessageBody] = _Els} = _Packet} = _Msg) ->
% ?ERROR_MSG("CHAT CHAT MSG W CHAT STATE: ~p~n", _Msg),
% FilteredMessageWords = filterMessageText(BodyAttr, MessageText),
% {_From, _To, {xmlel, <<"message">>, _Attrs, [_chatState, {xmlel, <<"body">>, BodyAttr, [{xmlcdata, FilteredMessageWords}]}]}};
% chat message without chat state
on_filter_packet({_From, _To, {xmlel, <<"message">>, _Attrs, [MessageBody] = _Els} = _Packet} = _Msg) ->
?ERROR_MSG("CHAT MSG WITHOUT CHAT STATE: ~p~n", [_Msg]),
# {xmlel, <<"body">>, BodyAttr, [{xmlcdata, MessageText}] = _BodyCData} = _MessageBody
if message type chat/group && !archived
foreach body
FilteredMessageWords = filterMessageText(BodyAttr, MessageText),
{_From, _To, {xmlel, <<"message">>, _Attrs, [MessageBody]}]}};
on_filter_packet(Msg) -> on_filter_packet(Msg) ->
% Handle the generic case (any packet that isn't a message with a body). % Handle the generic case (any packet that isn't a message with a body).
?ERROR_MSG("FILTER PACKET MSG: ~p~n", [Msg]),
Msg. Msg.
% PSI one-to-one
% {
% {jid,<<"foo">>,<<"kalamari">>,<<"Psi">>,<<"foo">>,<<"kalamari">>,<<"Psi">>},
% {jid,<<"foo">>,<<"kalamari">>,<<>>,<<"foo">>,<<"kalamari">>,<<>>},
% {xmlel,<<"message">>,
% [{<<"xml:lang">>,<<"en">>},{<<"type">>,<<"chat">>},{<<"to">>,<<"foo@kalamari">>},{<<"id">>,<<"aacba">>}],
% [{xmlcdata,<<"\n">>},{xmlel,<<"body">>,[],[{xmlcdata,<<"hi">>}]},{xmlcdata,<<"\n">>},
% {xmlel,<<"active">>,[{<<"xmlns">>,<<"http://jabber.org/protocol/chatstates">>}],[]},{xmlcdata,<<"\n">>}]}}
% gloox muc
% {
% {jid,<<"#12345">>,<<"conference.kalamari">>,<<"bar">>,<<"#12345">>,<<"conference.kalamari">>,<<"bar">>},
% {jid,<<"bar">>,<<"kalamari">>,<<"12145048529523376186799">>,<<"bar">>,<<"kalamari">>,<<"12145048529523376186799">>},
% {xmlel,<<"message">>,
% [{<<"xml:lang">>,<<"en">>},{<<"to">>,<<"#12345@conference.kalamari">>},{<<"type">>,<<"groupchat">>},{<<"from">>,<<"bar@kalamari/12145048529523376186799">>}],
% [{xmlel,<<"archived">>,
% [{<<"by">>,<<"conference.kalamari">>},{<<"xmlns">>,<<"urn:xmpp:mam:tmp">>},{<<"id">>,<<"1471940767114309">>}],
% []
% },
% {xmlel,<<"stanza-id">>,
% [{<<"by">>,<<"conference.kalamari">>},{<<"xmlns">>,<<"urn:xmpp:sid:0">>},{<<"id">>,<<"1471940767114309">>}],
% []
% },
% {xmlel,<<"body">>,
% [],
% [{xmlcdata,<<"HELLO THERE">>}]
% }]
% }
% }
%
% {
% {jid,<<"bar">>,<<"kalamari">>,<<"12145048529523376186799">>,<<"bar">>,<<"kalamari">>,<<"12145048529523376186799">>},
% {jid,<<"#12345">>,<<"conference.kalamari">>,<<>>,<<"#12345">>,<<"conference.kalamari">>,<<>>},
% {xmlel,<<"message">>,
% [{<<"xml:lang">>,<<"en">>},{<<"to">>,<<"#12345@conference.kalamari">>},{<<"type">>,<<"groupchat">>},{<<"from">>,<<"bar@kalamari/12145048529523376186799">>}],
% [{xmlel,<<"body">>,[],[{xmlcdata,<<"HELLO THERE">>}]}]
% }
}%
mod_opt_type(blacklists) -> fun (A) when is_list(A) -> A end; mod_opt_type(blacklists) -> fun (A) when is_list(A) -> A end;
mod_opt_type(charmaps) -> fun (A) when is_list(A) -> A end; mod_opt_type(charmaps) -> fun (A) when is_list(A) -> A end;
mod_opt_type(_) -> [blacklists, charmaps]. mod_opt_type(_) -> [blacklists, charmaps].