Use banword instead of bloom + etbloom + bitarray (thanks to ytkang)
This commit is contained in:
parent
2aea4659cc
commit
00e3fa97fc
|
@ -6,24 +6,6 @@ sizes. Using a large list (say, 87M terms) will slow down the initial server
|
|||
boot time (to about 15 minutes respectively), but once loaded lookups are very
|
||||
speedy.
|
||||
|
||||
Prerequisite bitarray lib:
|
||||
|
||||
mod_pottymouth uses a modified version of the 'etbloom' library that uses
|
||||
'bitarray' to replace 'hipe_bifs'. Ejabberd doesn't handle installing
|
||||
dependences of dependecies quite so well (etbloom being a dep of mod_pottymouth
|
||||
and bitarray being a dep of etbloom), so bitarray needs to be installed manually
|
||||
before installation of mod_pottymouth.
|
||||
|
||||
This is how I got it to work... YMMV.
|
||||
|
||||
1. Make sure ejabberd is running
|
||||
2. Get the updated ejabberd-contrib sources:
|
||||
ejabberdctl modules_update_specs
|
||||
3. Execute this script to get and build the dependencies:
|
||||
~/.ejabberd-modules/sources/ejabberd-contrib/mod_pottymouth/make-deps.sh
|
||||
4. When you install the module, its dependencies will be installed as well:
|
||||
ejabberdctl module_install mod_pottymouth
|
||||
|
||||
Configuration file is ~/.ejabberd-modules/mod_pottymouth/conf/mod_pottymouth.yml
|
||||
|
||||
modules:
|
||||
|
@ -73,8 +55,3 @@ Blacklist helper
|
|||
Thinking of a bunch of swear words and all the permutations can be tough. We made
|
||||
a helper script to take a bare wordlist and generate permutations given a
|
||||
dictionary of substitution characters: https://github.com/madglory/permute_wordlist
|
||||
|
||||
Tip of the hat:
|
||||
|
||||
This mod makes use of the excellent 'etbloom' module:
|
||||
https://github.com/erlangtoolbox/etbloom
|
||||
|
|
|
@ -1,18 +0,0 @@
|
|||
#!/bin/sh
|
||||
|
||||
PWD=`pwd`
|
||||
|
||||
cd ~/.ejabberd-modules/sources/ejabberd-contrib/mod_pottymouth/
|
||||
mkdir deps
|
||||
cd deps
|
||||
git clone https://github.com/madglory/etbloom.git
|
||||
cd etbloom
|
||||
rm -rf deps/
|
||||
./rebar get-deps
|
||||
cp ../../rebar.config.bitarray deps/bitarray/rebar.config
|
||||
./rebar compile
|
||||
cp -R deps/bitarray/ebin ../../
|
||||
cp -R deps/bitarray/priv ../../
|
||||
cp -R deps/proper/ebin ../../
|
||||
|
||||
cd $PWD
|
|
@ -1,14 +0,0 @@
|
|||
{ct_extra_params, "-env ERL_LIBS deps/"}.
|
||||
{port_env,
|
||||
[ {"DRV_CFLAGS", "-fPIC -O2 $ERL_CFLAGS"},
|
||||
{"ERL_LDFLAGS", " -L$ERL_EI_LIBDIR -lei"}
|
||||
]}.
|
||||
{port_specs, [{"priv/bitarray.so", ["c_src/bitarray_nif.c"]}]}.
|
||||
|
||||
|
||||
{deps, [
|
||||
%% For runtime
|
||||
%% --
|
||||
%% For tests
|
||||
{proper, "1.2",
|
||||
{git, "https://github.com/manopapad/proper.git", {tag, "v1.2"}}}]}.
|
|
@ -0,0 +1,77 @@
|
|||
-module(banword_gen_server).
|
||||
|
||||
-behaviour(gen_server).
|
||||
|
||||
-include("logger.hrl").
|
||||
|
||||
-export([member/1]).
|
||||
|
||||
%% gen_server callbacks
|
||||
-export([start/1, stop/0, init/1, handle_call/3, handle_cast/2, handle_info/2,
|
||||
terminate/2, code_change/3]).
|
||||
|
||||
serverName(Lang) ->
|
||||
list_to_atom(lists:flatten([atom_to_list(?MODULE), "_", atom_to_list(Lang)])).
|
||||
|
||||
member({Lang, Word} = _MessageToken) ->
|
||||
gen_server:call(serverName(Lang), {member, Word}).
|
||||
|
||||
start({Lang, BlacklistFile} = _Opts) ->
|
||||
Name = serverName(Lang),
|
||||
?INFO_MSG("Building blacklist name ~p~n", [Name]),
|
||||
gen_server:start_link({local, serverName(Lang)}, ?MODULE, [BlacklistFile], []).
|
||||
|
||||
stop() ->
|
||||
ok.
|
||||
|
||||
readlines(FileName) ->
|
||||
{ok, Data} = file:read_file(FileName),
|
||||
BinList = binary:split(Data, [<<"\n">>], [global]),
|
||||
[binary_to_list(X) || X <- BinList].
|
||||
|
||||
init([BlacklistFile]) ->
|
||||
?INFO_MSG("Building blacklist ~p~n", [BlacklistFile]),
|
||||
{ok, loadWordList(BlacklistFile)}.
|
||||
|
||||
check_banword(Word, BlackWord) ->
|
||||
try
|
||||
% ?INFO_MSG("== CHECK == ~p ~p~n", [Word, BlackWord]),
|
||||
Res = string:rstr(Word, BlackWord),
|
||||
if
|
||||
Res > 0 ->
|
||||
true;
|
||||
true ->
|
||||
false
|
||||
end
|
||||
catch _ ->
|
||||
false
|
||||
end.
|
||||
|
||||
handle_call({member, Word}, _From, BlackList) ->
|
||||
% ?INFO_MSG("~p ~p~n", [Word, BlackList]),
|
||||
lists:foreach(
|
||||
fun(Elem) ->
|
||||
Res = check_banword(Word, Elem),
|
||||
if
|
||||
Res ->
|
||||
throw({reply, true, BlackList});
|
||||
true ->
|
||||
false
|
||||
end
|
||||
end, BlackList),
|
||||
{reply, false, BlackList}.
|
||||
|
||||
handle_cast(_Msg, State) -> {noreply, State}.
|
||||
handle_info(_Info, State) -> {noreply, State}.
|
||||
terminate(_Reason, _State) -> ok.
|
||||
code_change(_OldVsn, State, _Extra) -> {ok, State}.
|
||||
|
||||
loadWordList(BlacklistFile) ->
|
||||
BlacklistExists = filelib:is_file(BlacklistFile),
|
||||
if
|
||||
BlacklistExists ->
|
||||
readlines(BlacklistFile);
|
||||
true ->
|
||||
?ERROR_MSG("Blacklist file not found: ~p~n", [BlacklistFile]),
|
||||
[]
|
||||
end.
|
|
@ -1,54 +0,0 @@
|
|||
-module(bloom_gen_server).
|
||||
|
||||
-behaviour(gen_server).
|
||||
|
||||
-include("logger.hrl").
|
||||
|
||||
-import(etbloom, [bloom/1, member/2]).
|
||||
-export([member/1]).
|
||||
|
||||
%% gen_server callbacks
|
||||
-export([start/1, stop/1, init/1, handle_call/3, handle_cast/2, handle_info/2,
|
||||
terminate/2, code_change/3]).
|
||||
|
||||
serverName(Lang) ->
|
||||
list_to_atom(lists:flatten([atom_to_list(?MODULE), "_", atom_to_list(Lang)])).
|
||||
|
||||
member({Lang, Word} = _MessageToken) ->
|
||||
gen_server:call(serverName(Lang), {member, Word}).
|
||||
|
||||
start({Lang, BlacklistFile} = _Opts) ->
|
||||
gen_server:start_link({local, serverName(Lang)}, ?MODULE, [BlacklistFile], []).
|
||||
|
||||
stop({Lang, _BlacklistFile} = _Opts) ->
|
||||
gen_server:stop(serverName(Lang)).
|
||||
|
||||
init([BlacklistFile]) ->
|
||||
?INFO_MSG("Building bloom ~p~n", [BlacklistFile]),
|
||||
Bloom = etbloom:sbf(10000000),
|
||||
{ok, loadWordList(Bloom, BlacklistFile)}.
|
||||
|
||||
handle_call({member, Word}, _From, Bloom) ->
|
||||
Reply = etbloom:member(Word, Bloom),
|
||||
{reply, Reply, Bloom}.
|
||||
|
||||
handle_cast(_Msg, State) -> {noreply, State}.
|
||||
handle_info(_Info, State) -> {noreply, State}.
|
||||
terminate(_Reason, _State) -> ok.
|
||||
code_change(_OldVsn, State, _Extra) -> {ok, State}.
|
||||
|
||||
loadWordList(Bloom, BlacklistFile) ->
|
||||
BlacklistExists = filelib:is_file(BlacklistFile),
|
||||
if
|
||||
BlacklistExists ->
|
||||
{ok, S} = file:open(BlacklistFile, read),
|
||||
loadWordList(io:get_line(S, ''), Bloom, S);
|
||||
true ->
|
||||
?ERROR_MSG("Blacklist file not found: ~p~n", [BlacklistFile]),
|
||||
loadWordList(eof, Bloom, BlacklistFile)
|
||||
end.
|
||||
|
||||
loadWordList(eof, Bloom, _S) ->
|
||||
Bloom;
|
||||
loadWordList(Line, Bloom, S) ->
|
||||
loadWordList(io:get_line(S, ''), etbloom:add(lists:droplast(Line), Bloom), S).
|
|
@ -15,7 +15,7 @@
|
|||
mod_options/1
|
||||
]).
|
||||
|
||||
-import(bloom_gen_server, [start/0, stop/0, member/1]).
|
||||
-import(banword_gen_server, [start/0, stop/0, member/1]).
|
||||
-import(nomalize_leet_gen_server, [normalize/1]).
|
||||
|
||||
getMessageLang(Msg) ->
|
||||
|
@ -31,8 +31,8 @@ getMessageLang(Msg) ->
|
|||
censorWord({Lang, Word} = _MessageTerm) ->
|
||||
% we need unicode characters to normlize the word
|
||||
NormalizedWord = normalize_leet_gen_server:normalize({Lang, unicode:characters_to_list(list_to_binary(Word))}),
|
||||
% we need bytewise format for bloom lookup
|
||||
IsBadWord = bloom_gen_server:member({Lang, binary_to_list(unicode:characters_to_binary(NormalizedWord))}),
|
||||
% we need bytewise format for banword lookup
|
||||
IsBadWord = banword_gen_server:member({Lang, binary_to_list(unicode:characters_to_binary(NormalizedWord))}),
|
||||
if
|
||||
IsBadWord ->
|
||||
"****";
|
||||
|
@ -61,7 +61,7 @@ filterMessageText2(Lang, MessageText) ->
|
|||
|
||||
start(_Host, Opts) ->
|
||||
Blacklists = gen_mod:get_opt(blacklists, Opts),
|
||||
lists:map(fun bloom_gen_server:start/1, Blacklists),
|
||||
lists:map(fun banword_gen_server:start/1, Blacklists),
|
||||
CharMaps = gen_mod:get_opt(charmaps, Opts),
|
||||
lists:map(fun normalize_leet_gen_server:start/1, CharMaps),
|
||||
ejabberd_hooks:add(filter_packet, global, ?MODULE, on_filter_packet, 0),
|
||||
|
@ -69,7 +69,7 @@ start(_Host, Opts) ->
|
|||
|
||||
stop(Host) ->
|
||||
Blacklists = gen_mod:get_module_opt(Host, ?MODULE, blacklists),
|
||||
lists:map(fun bloom_gen_server:stop/1, Blacklists),
|
||||
lists:map(fun banword_gen_server:stop/1, Blacklists),
|
||||
CharMaps = gen_mod:get_module_opt(Host, ?MODULE, charmaps),
|
||||
lists:map(fun normalize_leet_gen_server:stop/1, CharMaps),
|
||||
ejabberd_hooks:delete(filter_packet, global, ?MODULE, on_filter_packet, 0),
|
||||
|
|
Loading…
Reference in New Issue