diff --git a/mod_pottymouth/README.txt b/mod_pottymouth/README.txt index 0703c96..af8c822 100644 --- a/mod_pottymouth/README.txt +++ b/mod_pottymouth/README.txt @@ -6,10 +6,32 @@ sizes. Using a large list (say, 87M terms) will slow down the initial server boot time (to about 15 minutes respectively), but once loaded lookups are very speedy. -To install in ejabberd: +Prerequisite bitarray lib: + +mod_pottymouth uses a modified version of the 'etbloom' library that uses +'bitarray' to replace 'hipe_bifs'. Ejabberd doesn't handle installing +dependences of dependecies quite so well (etbloom being a dep of mod_pottymouth +and bitarray being a dep of etbloom), so bitarray needs to be installed manually +before installation of mod_pottymouth. + +This is how I got it to work... YMMV. +Given $EJABBERD_HOME is the base directory of your ejabberd install: + +mkdir -p $EJABBERD_HOME/erlang-lib/bitarray +cd $EJABBERD_HOME/erlang-lib/bitarray +clone https://github.com/ferd/bitarray git repo +run: /usr/lib/erlang/bin/escript rebar get-deps +run: /usr/lib/erlang/bin/escript rebar compile +run: /usr/bin/install -c -d /usr/local/lib/bitarray-1.0.0/ebin +run: /usr/bin/install -c -d /usr/local/lib/bitarray-1.0.0/priv +run: /usr/bin/install -c -m 644 ./ebin/bitarray.app /usr/local/lib/bitarray-1.0.0/ebin/bitarray.app +run: /usr/bin/install -c -m 644 ./ebin/bitarray.beam /usr/local/lib/bitarray-1.0.0/ebin/bitarray.beam +run: /usr/bin/install -c -m 644 ./priv/bitarray.so /usr/local/lib/bitarray-1.0.0/priv/bitarray.so + +To install mod_pottymouth in ejabberd: cd ~/.ejabberd-modules/sources -clone the git repo +clone the ejabberd-contrib git repo cd mod_pottymouth edit: ./conf/mod_pottymouth.yml @@ -18,14 +40,6 @@ run: ejabberdctl module_install mod_pottymouth run: ejabberdctl restart module will be installed in: ~/.ejabberd-modules/mod_pottymouth - -If you don't have Erlang HiPE available, it may throw errors that mention: - {undef,[{hipe_bifs,bitarray, -In such case, you can install this library: - https://github.com/ferd/bitarray -and edit etbloom.erl to call that library instead of hipe_bifs. - - Config file format: modules: diff --git a/mod_pottymouth/deps/etbloom/.gitignore b/mod_pottymouth/deps/etbloom/.gitignore new file mode 100644 index 0000000..7f18faa --- /dev/null +++ b/mod_pottymouth/deps/etbloom/.gitignore @@ -0,0 +1,2 @@ +workspace.xml +out diff --git a/mod_pottymouth/deps/etbloom/LICENSE.txt b/mod_pottymouth/deps/etbloom/LICENSE.txt new file mode 100644 index 0000000..729d139 --- /dev/null +++ b/mod_pottymouth/deps/etbloom/LICENSE.txt @@ -0,0 +1,286 @@ +ERLANG PUBLIC LICENSE +Version 1.1 + +1. Definitions. + +1.1. ``Contributor'' means each entity that creates or contributes to +the creation of Modifications. + +1.2. ``Contributor Version'' means the combination of the Original +Code, prior Modifications used by a Contributor, and the Modifications +made by that particular Contributor. + +1.3. ``Covered Code'' means the Original Code or Modifications or the +combination of the Original Code and Modifications, in each case +including portions thereof. + +1.4. ``Electronic Distribution Mechanism'' means a mechanism generally +accepted in the software development community for the electronic +transfer of data. + +1.5. ``Executable'' means Covered Code in any form other than Source +Code. + +1.6. ``Initial Developer'' means the individual or entity identified +as the Initial Developer in the Source Code notice required by Exhibit +A. + +1.7. ``Larger Work'' means a work which combines Covered Code or +portions thereof with code not governed by the terms of this License. + +1.8. ``License'' means this document. + +1.9. ``Modifications'' means any addition to or deletion from the +substance or structure of either the Original Code or any previous +Modifications. When Covered Code is released as a series of files, a +Modification is: + +A. Any addition to or deletion from the contents of a file containing + Original Code or previous Modifications. + +B. Any new file that contains any part of the Original Code or + previous Modifications. + +1.10. ``Original Code'' means Source Code of computer software code +which is described in the Source Code notice required by Exhibit A as +Original Code, and which, at the time of its release under this +License is not already Covered Code governed by this License. + +1.11. ``Source Code'' means the preferred form of the Covered Code for +making modifications to it, including all modules it contains, plus +any associated interface definition files, scripts used to control +compilation and installation of an Executable, or a list of source +code differential comparisons against either the Original Code or +another well known, available Covered Code of the Contributor's +choice. The Source Code can be in a compressed or archival form, +provided the appropriate decompression or de-archiving software is +widely available for no charge. + +1.12. ``You'' means an individual or a legal entity exercising rights +under, and complying with all of the terms of, this License. For legal +entities,``You'' includes any entity which controls, is controlled by, +or is under common control with You. For purposes of this definition, +``control'' means (a) the power, direct or indirect, to cause the +direction or management of such entity, whether by contract or +otherwise, or (b) ownership of fifty percent (50%) or more of the +outstanding shares or beneficial ownership of such entity. + +2. Source Code License. + +2.1. The Initial Developer Grant. +The Initial Developer hereby grants You a world-wide, royalty-free, +non-exclusive license, subject to third party intellectual property +claims: + +(a) to use, reproduce, modify, display, perform, sublicense and + distribute the Original Code (or portions thereof) with or without + Modifications, or as part of a Larger Work; and + +(b) under patents now or hereafter owned or controlled by Initial + Developer, to make, have made, use and sell (``Utilize'') the + Original Code (or portions thereof), but solely to the extent that + any such patent is reasonably necessary to enable You to Utilize + the Original Code (or portions thereof) and not to any greater + extent that may be necessary to Utilize further Modifications or + combinations. + +2.2. Contributor Grant. +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license, subject to third party intellectual property +claims: + +(a) to use, reproduce, modify, display, perform, sublicense and + distribute the Modifications created by such Contributor (or + portions thereof) either on an unmodified basis, with other + Modifications, as Covered Code or as part of a Larger Work; and + +(b) under patents now or hereafter owned or controlled by Contributor, + to Utilize the Contributor Version (or portions thereof), but + solely to the extent that any such patent is reasonably necessary + to enable You to Utilize the Contributor Version (or portions + thereof), and not to any greater extent that may be necessary to + Utilize further Modifications or combinations. + +3. Distribution Obligations. + +3.1. Application of License. +The Modifications which You contribute are governed by the terms of +this License, including without limitation Section 2.2. The Source +Code version of Covered Code may be distributed only under the terms +of this License, and You must include a copy of this License with +every copy of the Source Code You distribute. You may not offer or +impose any terms on any Source Code version that alters or restricts +the applicable version of this License or the recipients' rights +hereunder. However, You may include an additional document offering +the additional rights described in Section 3.5. + +3.2. Availability of Source Code. +Any Modification which You contribute must be made available in Source +Code form under the terms of this License either on the same media as +an Executable version or via an accepted Electronic Distribution +Mechanism to anyone to whom you made an Executable version available; +and if made available via Electronic Distribution Mechanism, must +remain available for at least twelve (12) months after the date it +initially became available, or at least six (6) months after a +subsequent version of that particular Modification has been made +available to such recipients. You are responsible for ensuring that +the Source Code version remains available even if the Electronic +Distribution Mechanism is maintained by a third party. + +3.3. Description of Modifications. +You must cause all Covered Code to which you contribute to contain a +file documenting the changes You made to create that Covered Code and +the date of any change. You must include a prominent statement that +the Modification is derived, directly or indirectly, from Original +Code provided by the Initial Developer and including the name of the +Initial Developer in (a) the Source Code, and (b) in any notice in an +Executable version or related documentation in which You describe the +origin or ownership of the Covered Code. + +3.4. Intellectual Property Matters + +(a) Third Party Claims. + If You have knowledge that a party claims an intellectual property + right in particular functionality or code (or its utilization + under this License), you must include a text file with the source + code distribution titled ``LEGAL'' which describes the claim and + the party making the claim in sufficient detail that a recipient + will know whom to contact. If you obtain such knowledge after You + make Your Modification available as described in Section 3.2, You + shall promptly modify the LEGAL file in all copies You make + available thereafter and shall take other steps (such as notifying + appropriate mailing lists or newsgroups) reasonably calculated to + inform those who received the Covered Code that new knowledge has + been obtained. + +(b) Contributor APIs. + If Your Modification is an application programming interface and + You own or control patents which are reasonably necessary to + implement that API, you must also include this information in the + LEGAL file. + +3.5. Required Notices. +You must duplicate the notice in Exhibit A in each file of the Source +Code, and this License in any documentation for the Source Code, where +You describe recipients' rights relating to Covered Code. If You +created one or more Modification(s), You may add your name as a +Contributor to the notice described in Exhibit A. If it is not +possible to put such notice in a particular Source Code file due to +its structure, then you must include such notice in a location (such +as a relevant directory file) where a user would be likely to look for +such a notice. You may choose to offer, and to charge a fee for, +warranty, support, indemnity or liability obligations to one or more +recipients of Covered Code. However, You may do so only on Your own +behalf, and not on behalf of the Initial Developer or any +Contributor. You must make it absolutely clear than any such warranty, +support, indemnity or liability obligation is offered by You alone, +and You hereby agree to indemnify the Initial Developer and every +Contributor for any liability incurred by the Initial Developer or +such Contributor as a result of warranty, support, indemnity or +liability terms You offer. + +3.6. Distribution of Executable Versions. +You may distribute Covered Code in Executable form only if the +requirements of Section 3.1-3.5 have been met for that Covered Code, +and if You include a notice stating that the Source Code version of +the Covered Code is available under the terms of this License, +including a description of how and where You have fulfilled the +obligations of Section 3.2. The notice must be conspicuously included +in any notice in an Executable version, related documentation or +collateral in which You describe recipients' rights relating to the +Covered Code. You may distribute the Executable version of Covered +Code under a license of Your choice, which may contain terms different +from this License, provided that You are in compliance with the terms +of this License and that the license for the Executable version does +not attempt to limit or alter the recipient's rights in the Source +Code version from the rights set forth in this License. If You +distribute the Executable version under a different license You must +make it absolutely clear that any terms which differ from this License +are offered by You alone, not by the Initial Developer or any +Contributor. You hereby agree to indemnify the Initial Developer and +every Contributor for any liability incurred by the Initial Developer +or such Contributor as a result of any such terms You offer. + +3.7. Larger Works. +You may create a Larger Work by combining Covered Code with other code +not governed by the terms of this License and distribute the Larger +Work as a single product. In such a case, You must make sure the +requirements of this License are fulfilled for the Covered Code. + +4. Inability to Comply Due to Statute or Regulation. +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Code due to statute +or regulation then You must: (a) comply with the terms of this License +to the maximum extent possible; and (b) describe the limitations and +the code they affect. Such description must be included in the LEGAL +file described in Section 3.4 and must be included with all +distributions of the Source Code. Except to the extent prohibited by +statute or regulation, such description must be sufficiently detailed +for a recipient of ordinary skill to be able to understand it. + +5. Application of this License. + +This License applies to code to which the Initial Developer has +attached the notice in Exhibit A, and to related Covered Code. + +6. CONNECTION TO MOZILLA PUBLIC LICENSE + +This Erlang License is a derivative work of the Mozilla Public +License, Version 1.0. It contains terms which differ from the Mozilla +Public License, Version 1.0. + +7. DISCLAIMER OF WARRANTY. + +COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN ``AS IS'' BASIS, +WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF +DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR +NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF +THE COVERED CODE IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE +IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER +CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR +CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART +OF THIS LICENSE. NO USE OF ANY COVERED CODE IS AUTHORIZED HEREUNDER +EXCEPT UNDER THIS DISCLAIMER. + +8. TERMINATION. +This License and the rights granted hereunder will terminate +automatically if You fail to comply with terms herein and fail to cure +such breach within 30 days of becoming aware of the breach. All +sublicenses to the Covered Code which are properly granted shall +survive any termination of this License. Provisions which, by their +nature, must remain in effect beyond the termination of this License +shall survive. + +9. DISCLAIMER OF LIABILITY +Any utilization of Covered Code shall not cause the Initial Developer +or any Contributor to be liable for any damages (neither direct nor +indirect). + +10. MISCELLANEOUS +This License represents the complete agreement concerning the subject +matter hereof. If any provision is held to be unenforceable, such +provision shall be reformed only to the extent necessary to make it +enforceable. This License shall be construed by and in accordance with +the substantive laws of Sweden. Any dispute, controversy or claim +arising out of or relating to this License, or the breach, termination +or invalidity thereof, shall be subject to the exclusive jurisdiction +of Swedish courts, with the Stockholm City Court as the first +instance. + +EXHIBIT A. + +``The contents of this file are subject to the Erlang Public License, +Version 1.1, (the "License"); you may not use this file except in +compliance with the License. You should have received a copy of the +Erlang Public License along with this software. If not, it can be +retrieved via the world wide web at http://www.erlang.org/. + +Software distributed under the License is distributed on an "AS IS" +basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +the License for the specific language governing rights and limitations +under the License. + +The Initial Developer of the Original Code is Ericsson Utvecklings AB. +Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings +AB. All Rights Reserved.'' diff --git a/mod_pottymouth/deps/etbloom/README.md b/mod_pottymouth/deps/etbloom/README.md new file mode 100644 index 0000000..96c99bd --- /dev/null +++ b/mod_pottymouth/deps/etbloom/README.md @@ -0,0 +1,13 @@ +Erlang Bloom Filter +======= + +Based on Scalable Bloom Filters Paulo Sérgio Almeida, Carlos Baquero, Nuno Preguiça, David Hutchison +Information Processing Letters Volume 101, Issue 6, 31 March 2007, Pages 255-261 + +Provides scalable bloom filters that can grow indefinitely while +ensuring a desired maximum false positive probability. Also provides +standard partitioned bloom filters with a maximum capacity. Bit arrays +are dimensioned as a power of 2 to enable reusing hash values across +filters through bit operations. Double hashing is used (no need for +enhanced double hashing for partitioned bloom filters). + diff --git a/mod_pottymouth/deps/etbloom/rebar b/mod_pottymouth/deps/etbloom/rebar new file mode 100755 index 0000000..97c77e1 Binary files /dev/null and b/mod_pottymouth/deps/etbloom/rebar differ diff --git a/mod_pottymouth/deps/etbloom/rebar.config b/mod_pottymouth/deps/etbloom/rebar.config new file mode 100644 index 0000000..72ab93b --- /dev/null +++ b/mod_pottymouth/deps/etbloom/rebar.config @@ -0,0 +1,5 @@ +{erl_opts, [warnings_as_errors, debug_info]}. +{deps, [ + {bitarray, "0.1", + {git, "https://github.com/madglory/bitarray", {branch, "master"}}} +]}. diff --git a/mod_pottymouth/deps/etbloom/src/etbloom.app.src b/mod_pottymouth/deps/etbloom/src/etbloom.app.src new file mode 100644 index 0000000..bed1479 --- /dev/null +++ b/mod_pottymouth/deps/etbloom/src/etbloom.app.src @@ -0,0 +1,37 @@ +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved via the world wide web at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% {application, etbloom, [ +%% {description, "Erlang Bloom"}, +%% {registered, []}, +%% {applications, [ +%% kernel, +%% stdlib +%% ]}, +%% {env, []} +%% ]}. +{application, etbloom, + [{description, "Erlang Bloom"}, + {vsn, "1.1.0"}, + {registered, []}, + {mod, { etbloom_app, []}}, + {applications, + [kernel, + stdlib, + bitarray + ]}, + {env,[]}, + {modules, []}, + + {maintainers, []}, + {licenses, []}, + {links, []} + ]}. diff --git a/mod_pottymouth/src/etbloom.erl b/mod_pottymouth/deps/etbloom/src/etbloom.erl similarity index 96% rename from mod_pottymouth/src/etbloom.erl rename to mod_pottymouth/deps/etbloom/src/etbloom.erl index 113a117..d0bd148 100644 --- a/mod_pottymouth/src/etbloom.erl +++ b/mod_pottymouth/deps/etbloom/src/etbloom.erl @@ -74,7 +74,7 @@ bloom(Mode, Dim, E) -> M = 1 bsl Mb, N = trunc(log(1 - P) / log(1 - 1 / M)), #bloom{e = E, n = N, mb = Mb, size = 0, - a = [hipe_bifs:bitarray(1 bsl Mb, false) || _ <- lists:seq(1, K)]}. + a = [bitarray:new(1 bsl Mb, false) || _ <- lists:seq(1, K)]}. log2(X) -> log(X) / log(2). @@ -136,7 +136,7 @@ masked_pair(Mask, X, Y) -> {X band Mask, Y band Mask}. all_set(_Mask, _I1, _I, []) -> true; all_set(Mask, I1, I, [H | T]) -> - case hipe_bifs:bitarray_sub(H, I) of + case bitarray:sub(H, I) of true -> all_set(Mask, I1, (I + I1) band Mask, T); false -> false end. @@ -170,5 +170,5 @@ hash_add(Hashes, #bloom{mb = Mb, a = A, size = Size} = B) -> set_bits(_Mask, _I1, _I, [], Acc) -> lists:reverse(Acc); set_bits(Mask, I1, I, [H | T], Acc) -> - set_bits(Mask, I1, (I + I1) band Mask, T, [hipe_bifs:bitarray_update(H, I, true) | Acc]). + set_bits(Mask, I1, (I + I1) band Mask, T, [bitarray:update(H, I, true) | Acc]). diff --git a/mod_pottymouth/deps/etbloom/test/etbloom_tests.erl b/mod_pottymouth/deps/etbloom/test/etbloom_tests.erl new file mode 100644 index 0000000..437e354 --- /dev/null +++ b/mod_pottymouth/deps/etbloom/test/etbloom_tests.erl @@ -0,0 +1,20 @@ +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved via the world wide web at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +-module(etbloom_tests). +-author("volodymyr.kyrychenko@strikead.com"). +-include_lib("eunit/include/eunit.hrl"). + +bloom_test() -> + Values = [{xxx, binary_to_atom(base64:encode(crypto:strong_rand_bytes(10)), utf8)} || _ <- lists:seq(1, 5000)], + Bloom = etbloom:bloom(Values), + ?assert(lists:all(fun(X) -> etbloom:member(X, Bloom) end, Values)), + ?assertNot(etbloom:member(wtf, Bloom)). diff --git a/mod_pottymouth/src/mod_pottymouth.erl b/mod_pottymouth/src/mod_pottymouth.erl index 0e8bc6f..7c31534 100644 --- a/mod_pottymouth/src/mod_pottymouth.erl +++ b/mod_pottymouth/src/mod_pottymouth.erl @@ -2,27 +2,27 @@ -behaviour(gen_mod). +-include("ejabberd.hrl"). -include("logger.hrl"). +-include("xmpp.hrl"). -export([ start/2, stop/1, on_filter_packet/1, mod_opt_type/1, - depends/2 + depends/2, + reload/3 ]). --include("ejabberd.hrl"). - -import(bloom_gen_server, [start/0, stop/0, member/1]). -import(nomalize_leet_gen_server, [normalize/1]). -getMessageLang(Attrs) -> - LangAttr = lists:keyfind(<<"lang">>, 1, Attrs), +getMessageLang(Msg) -> + LangAttr = xmpp:get_lang(Msg), if - LangAttr -> - {<<"lang">>, LangBin} = LangAttr, - Lang = list_to_atom(binary_to_list(LangBin)); + (LangAttr /= <<>>) -> + Lang = list_to_atom(binary_to_list(LangAttr)); true -> Lang = default end, @@ -43,28 +43,13 @@ censorWord({Lang, Word} = _MessageTerm) -> filterWords(L) -> lists:map(fun censorWord/1, L). -filterMessageText(MessageAttrs, MessageText) -> - Lang = getMessageLang(MessageAttrs), +filterMessageText(Lang, MessageText) -> % we want to token-ize utf8 'words' MessageWords = string:tokens(unicode:characters_to_list(MessageText, utf8), " "), MessageTerms = [{Lang, Word} || Word <- MessageWords], % we get back bytewise format terms (rather than utf8) string:join(filterWords(MessageTerms), " "). - -filterMessageBodyElements([{xmlel, <<"body">>, BodyAttr, [{xmlcdata, MessageText}]} = _H|T], MessageElements) -> - FilteredMessageWords = binary:list_to_bin(filterMessageText(BodyAttr, binary:bin_to_list(MessageText))), - FilteredBody = {xmlel, <<"body">>, BodyAttr, [{xmlcdata, FilteredMessageWords}]}, - filterMessageBodyElements(T, lists:append(MessageElements, [FilteredBody])); - -filterMessageBodyElements([H|T], MessageElements) -> - % skip this tag, but pass it on as processed - filterMessageBodyElements(T, lists:append(MessageElements, [H])); - -filterMessageBodyElements([], MessageElements) -> - MessageElements. - - start(_Host, Opts) -> Blacklists = gen_mod:get_opt(blacklists, Opts, fun(A) -> A end, []), lists:map(fun bloom_gen_server:start/1, Blacklists), @@ -82,14 +67,28 @@ stop(_Host) -> on_filter_packet(drop) -> drop; -on_filter_packet({_From, _To, {xmlel, <<"message">>, _Attrs, Els} = _Packet} = _Msg) -> - FilteredEls = filterMessageBodyElements(Els, []), - {_From, _To, {xmlel, <<"message">>, _Attrs, FilteredEls}}; on_filter_packet(Msg) -> - % Handle the generic case (any packet that isn't a message with a body). - Msg. + Type = xmpp:get_type(Msg), + if + (Type == chat) orelse (Type == groupchat) -> + BodyText = xmpp:get_text(Msg#message.body), + if + (BodyText /= <<>>) -> + Lang = getMessageLang(Msg), + FilteredMessageWords = binary:list_to_bin(filterMessageText(Lang, binary:bin_to_list(BodyText))), + [BodyObject|_] = Msg#message.body, + NewBodyObject = setelement(3, BodyObject, FilteredMessageWords), + NewMsg = Msg#message{body = [NewBodyObject]}, + NewMsg; + true -> + Msg + end; + true -> + Msg + end. mod_opt_type(blacklists) -> fun (A) when is_list(A) -> A end; mod_opt_type(charmaps) -> fun (A) when is_list(A) -> A end; mod_opt_type(_) -> [blacklists, charmaps]. depends(_Host, _Opts) -> []. +reload(_Host, _NewOpts, _OldOpts) -> ok.