Merge pull request #173 from madglory/master

mod_pottymouth: Arbitraty blacklist size
This commit is contained in:
badlop 2016-07-06 12:55:07 +02:00 committed by GitHub
commit b8ed8af10e
2 changed files with 84 additions and 14 deletions

62
mod_pottymouth/README.md Normal file
View File

@ -0,0 +1,62 @@
# mod_pottymouth
The 'mod_pottymouth' ejabberd module aims to fill the void left by 'mod_shit'
which has disappeared from the net. It allows individual whole words of a
message to be filtered against a blacklist. It allows multiple blacklists
sharded by language. The internal bloomfilter can support arbitrary blacklist
sizes. Using a large list (say, 87M terms) will slow down the initial server
boot time (to about 15 minutes respectively), but once loaded lookups are very
speedy.
#### Installation
On Ubuntu:
````
cd ~/.ejabberd-modules/sources
clone the git repo
cd mod_pottymouth
ejabberdctl module_install mod_pottymouth
ejabberdctl restart
````
module will be installed in: ~/.ejabberd-modules/mod_pottymouth
#### Config
The file format is as follows:
````
modules:
mod_pottymouth:
blacklists:
default: /home/your_user/blacklist_en.txt
en: /home/your_user/blacklist_en.txt
cn: /home/your_user/blacklist_cn.txt
fr: /home/your_user/blacklist_fr.txt
````
For each language (en,cn,fr,...whatever) provide a full path to a backlist file.
The blacklist file is a plain text file with blacklisted words listed one per
line.
#### Gotchas
The language will be looked up by whatever value is passed in the xml:lang
attribute of the xml message. So, any xml:lang value to be supported will need
a corresponding entry/blacklist in the config file. If xml:lang is missing,
the 'default' entry in config will be used.
For xml:lang attribute docs, see:
[http://wiki.xmpp.org/web/Programming_XMPP_Clients#Sending_a_message](http://wiki.xmpp.org/web/Programming_XMPP_Clients#Sending_a_message)
#### Blacklist helper
Thinking of a bunch of swear words and all the permutations can be tough. We made
a helper script to take a bare wordlist and generate permutations given a
dictionary of substitution characters:
[https://github.com/madglory/permute_wordlist](https://github.com/madglory/permute_wordlist)
#### Tip of the hat
This mod makes use of the excellent 'etbloom' module:
[https://github.com/erlangtoolbox/etbloom](https://github.com/erlangtoolbox/etbloom)

View File

@ -18,24 +18,16 @@ serverName(Lang) ->
member({Lang, Word} = _MessageToken) -> member({Lang, Word} = _MessageToken) ->
gen_server:call(serverName(Lang), {member, Word}). gen_server:call(serverName(Lang), {member, Word}).
loadWordList(BlacklistFile) ->
BlacklistExists = filelib:is_file(BlacklistFile),
if
BlacklistExists ->
{ok, S} = file:read_file(BlacklistFile);
true ->
?ERROR_MSG("Blacklist file not found: ~p~n", [BlacklistFile]),
S = <<>>
end,
WordList = string:tokens(binary_to_list(S), "\n"),
WordList.
start({Lang, BlacklistFile} = _Opts) -> start({Lang, BlacklistFile} = _Opts) ->
gen_server:start_link({local, serverName(Lang)}, ?MODULE, [BlacklistFile], []). gen_server:start_link({local, serverName(Lang)}, ?MODULE, [BlacklistFile], []).
stop(_Host) ->
ok.
init([BlacklistFile]) -> init([BlacklistFile]) ->
WordList = loadWordList(BlacklistFile), ?INFO_MSG("Building bloom", []),
{ok, etbloom:bloom(WordList)}. Bloom = etbloom:sbf(10000000),
{ok, loadWordList(Bloom, BlacklistFile)}.
handle_call({member, Word}, _From, Bloom) -> handle_call({member, Word}, _From, Bloom) ->
Reply = etbloom:member(Word, Bloom), Reply = etbloom:member(Word, Bloom),
@ -45,3 +37,19 @@ handle_cast(_Msg, State) -> {noreply, State}.
handle_info(_Info, State) -> {noreply, State}. handle_info(_Info, State) -> {noreply, State}.
terminate(_Reason, _State) -> ok. terminate(_Reason, _State) -> ok.
code_change(_OldVsn, State, _Extra) -> {ok, State}. code_change(_OldVsn, State, _Extra) -> {ok, State}.
loadWordList(Bloom, BlacklistFile) ->
BlacklistExists = filelib:is_file(BlacklistFile),
if
BlacklistExists ->
{ok, S} = file:open(BlacklistFile, read),
loadWordList(io:get_line(S, ''), Bloom, S);
true ->
?ERROR_MSG("Blacklist file not found: ~p~n", [BlacklistFile]),
loadWordList(eof, Bloom, BlacklistFile)
end.
loadWordList(eof, Bloom, _S) ->
Bloom;
loadWordList(Line, Bloom, S) ->
loadWordList(io:get_line(S, ''), etbloom:add(lists:droplast(Line), Bloom), S).