diff --git a/mod_pottymouth/conf/mod_pottymouth.yml b/mod_pottymouth/conf/mod_pottymouth.yml index 458ad4f..228a22e 100644 --- a/mod_pottymouth/conf/mod_pottymouth.yml +++ b/mod_pottymouth/conf/mod_pottymouth.yml @@ -1,7 +1,8 @@ modules: mod_pottymouth: blacklists: - default: /home/vagrant/blacklist_en.txt - en: /home/vagrant/blacklist_en.txt - cn: /home/vagrant/blacklist_cn.txt - fr: /home/vagrant/blacklist_fr.txt + default: /etc/ejabberd/modules/mod_pottymouth/blacklist_en.txt + en: /etc/ejabberd/modules/mod_pottymouth/blacklist_en.txt + charmaps: + default: /etc/ejabberd/modules/mod_pottymouth/charmap_en.txt + en: /etc/ejabberd/modules/mod_pottymouth/charmap_en.txt diff --git a/mod_pottymouth/mod_pottymouth.spec b/mod_pottymouth/mod_pottymouth.spec index 4a269aa..f32eb21 100644 --- a/mod_pottymouth/mod_pottymouth.spec +++ b/mod_pottymouth/mod_pottymouth.spec @@ -1,5 +1,5 @@ author: "Tom Quackenbush " category: "data" summary: "Filter bad words in messages" -home: "https://github.com/madglory/mod_pottymouth/tree/master" -url: "git@github.com:madglory/mod_pottymouth.git" +home: "https://github.com/processone/ejabberd-contrib/tree/master/" +url: "git@github.com:processone/ejabberd-contrib.git" diff --git a/mod_pottymouth/src/bloom_gen_server.erl b/mod_pottymouth/src/bloom_gen_server.erl index b61ef2e..19abfc0 100644 --- a/mod_pottymouth/src/bloom_gen_server.erl +++ b/mod_pottymouth/src/bloom_gen_server.erl @@ -5,12 +5,11 @@ -include("logger.hrl"). -import(etbloom, [bloom/1, member/2]). --export([start/1]). +-export([member/1]). %% gen_server callbacks --export([init/1, handle_call/3, handle_cast/2, handle_info/2, +-export([start/1, stop/0, init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, code_change/3]). --compile(export_all). serverName(Lang) -> list_to_atom(lists:flatten([atom_to_list(?MODULE), "_", atom_to_list(Lang)])). @@ -21,11 +20,11 @@ member({Lang, Word} = _MessageToken) -> start({Lang, BlacklistFile} = _Opts) -> gen_server:start_link({local, serverName(Lang)}, ?MODULE, [BlacklistFile], []). -stop(_Host) -> +stop() -> ok. init([BlacklistFile]) -> - ?INFO_MSG("Building bloom", []), + ?INFO_MSG("Building bloom ~p~n", [BlacklistFile]), Bloom = etbloom:sbf(10000000), {ok, loadWordList(Bloom, BlacklistFile)}. diff --git a/mod_pottymouth/src/mod_pottymouth.erl b/mod_pottymouth/src/mod_pottymouth.erl index 49161a0..7ae8f77 100644 --- a/mod_pottymouth/src/mod_pottymouth.erl +++ b/mod_pottymouth/src/mod_pottymouth.erl @@ -2,8 +2,6 @@ -behaviour(gen_mod). --include("logger.hrl"). - -export([ start/2, stop/1, @@ -14,6 +12,7 @@ -include("ejabberd.hrl"). -import(bloom_gen_server, [start/0, stop/0, member/1]). +-import(nomalize_leet_gen_server, [normalize/1]). getMessageLang(Attrs) -> LangAttr = lists:keyfind(<<"lang">>, 1, Attrs), @@ -26,8 +25,8 @@ getMessageLang(Attrs) -> end, Lang. -censorWord({_Lang, Word} = MessageTerm) -> - IsBadWord = bloom_gen_server:member(MessageTerm), +censorWord({Lang, Word} = MessageTerm) -> + IsBadWord = bloom_gen_server:member({Lang, normalize_leet_gen_server:normalize(MessageTerm)}), if IsBadWord -> "****"; @@ -41,11 +40,14 @@ filterWords(L) -> start(_Host, Opts) -> Blacklists = gen_mod:get_opt(blacklists, Opts, fun(A) -> A end, []), lists:map(fun bloom_gen_server:start/1, Blacklists), + CharMaps = gen_mod:get_opt(charmaps, Opts, fun(A) -> A end, []), + lists:map(fun normalize_leet_gen_server:start/1, CharMaps), ejabberd_hooks:add(filter_packet, global, ?MODULE, on_filter_packet, 0), ok. stop(_Host) -> bloom_gen_server:stop(), + normalize_leet_gen_server:stop(), ejabberd_hooks:delete(filter_packet, global, ?MODULE, on_filter_packet, 0), ok. @@ -54,9 +56,9 @@ on_filter_packet(drop) -> on_filter_packet({_From, _To, {xmlel, <<"message">>, Attrs, [_chatState, {xmlel, <<"body">>, _BodyAttr, [{xmlcdata, MessageText}] = _BodyCData} = _MessageBody] = _Els} = _Packet} = _Msg) -> Lang = getMessageLang(Attrs), - MessageWords = string:tokens(binary_to_list(MessageText), " "), + MessageWords = string:tokens(unicode:characters_to_list(MessageText, utf8), " "), MessageTerms = [{Lang, Word} || Word <- MessageWords], - FilteredMessageWords = list_to_binary(string:join(filterWords(MessageTerms), " ")), + FilteredMessageWords = unicode:characters_to_binary(string:join(filterWords(MessageTerms), " ")), {_From, _To, {xmlel, <<"message">>, Attrs, [_chatState, {xmlel, <<"body">>, _BodyAttr, [{xmlcdata, FilteredMessageWords}]}]}}; on_filter_packet(Msg) -> @@ -64,4 +66,5 @@ on_filter_packet(Msg) -> Msg. mod_opt_type(blacklists) -> fun (A) when is_list(A) -> A end; -mod_opt_type(_) -> [blacklists]. +mod_opt_type(charmaps) -> fun (A) when is_list(A) -> A end; +mod_opt_type(_) -> [blacklists, charmaps]. diff --git a/mod_pottymouth/src/normalize_leet.erl b/mod_pottymouth/src/normalize_leet.erl new file mode 100644 index 0000000..fbd9bec --- /dev/null +++ b/mod_pottymouth/src/normalize_leet.erl @@ -0,0 +1,38 @@ +-module(normalize_leet). + +-export([ + normalize/2 +]). + +distinctLetters([H|T]) -> + distinctLetters(T, [H]). + +distinctLetters([H|T], Letters) -> + distinctLetters(T, lists:umerge(Letters, [H])); +distinctLetters([], Letters) -> + Letters. + +checkMetaChar(Char) -> + MetaChars = ["\\", "^", "$", ".", "|", "?", "*", "+", "(", ")", "[", "{"], + lists:member(Char, MetaChars). + +replaceChar(true, Char, X, Word) -> + re:replace(Word, ["\\", Char], X, [global,{return,list}]); +replaceChar(false, Char, X, Word) -> + re:replace(Word, Char, X, [global,{return,list}]). + +replaceLetters([H|T], CharMap, Word) -> + CurChar = [H], + NormChar = maps:get(CurChar, CharMap, skip), + if + NormChar == skip -> + replaceLetters(T, CharMap, Word); + true -> + IsMetaChar = checkMetaChar(CurChar), + replaceLetters(T, CharMap, replaceChar(IsMetaChar, CurChar, NormChar, Word)) + end; +replaceLetters([], _CharMap, Word) -> + Word. + +normalize(CharMap, Word) -> + replaceLetters(distinctLetters(Word), CharMap, Word). diff --git a/mod_pottymouth/src/normalize_leet_gen_server.erl b/mod_pottymouth/src/normalize_leet_gen_server.erl new file mode 100644 index 0000000..88818cf --- /dev/null +++ b/mod_pottymouth/src/normalize_leet_gen_server.erl @@ -0,0 +1,46 @@ +-module(normalize_leet_gen_server). + +-behaviour(gen_server). + +-include("logger.hrl"). + +-import(normailize_leet, [normalize/2]). +-export([normalize/1]). + +%% gen_server callbacks +-export([start/1, stop/0, init/1, handle_call/3, handle_cast/2, handle_info/2, + terminate/2, code_change/3]). + +serverName(Lang) -> + list_to_atom(lists:flatten([atom_to_list(?MODULE), "_", atom_to_list(Lang)])). + +normalize({Lang, Word} = _MessageToken) -> + try gen_server:call(serverName(Lang), {normalize, Word}) + catch + exit:{noproc, _Reason} -> Word + end. + +start({Lang, CharMapFile} = _Opts) -> + gen_server:start_link({local, serverName(Lang)}, ?MODULE, [CharMapFile], []). + +stop() -> + ok. + +init([CharMapFile]) -> + ?INFO_MSG("NormalizeLeet Loading: ~p~n", [CharMapFile]), + {ok, loadCharMapConfig(file:consult(CharMapFile))}. + +loadCharMapConfig({ok, [CharMapConfig]}) -> + maps:from_list(CharMapConfig); +loadCharMapConfig({error, Reason}) -> + ?INFO_MSG("NormalizeLeet Error: ~p~n", [Reason]), + maps:new(). + +handle_call({normalize, Word}, _From, CharMap) -> + Reply = normalize_leet:normalize(CharMap, Word), + {reply, Reply, CharMap}. + +handle_cast(_Msg, State) -> {noreply, State}. +handle_info(_Info, State) -> {noreply, State}. +terminate(_Reason, _State) -> ok. +code_change(_OldVsn, State, _Extra) -> {ok, State}.