Merge pull request #223 from madglory/master
Replace hipe_bifs/bitarray with ferd/bitarray. 17.04 compat.
This commit is contained in:
commit
91b9bd9973
|
@ -6,10 +6,32 @@ sizes. Using a large list (say, 87M terms) will slow down the initial server
|
||||||
boot time (to about 15 minutes respectively), but once loaded lookups are very
|
boot time (to about 15 minutes respectively), but once loaded lookups are very
|
||||||
speedy.
|
speedy.
|
||||||
|
|
||||||
To install in ejabberd:
|
Prerequisite bitarray lib:
|
||||||
|
|
||||||
|
mod_pottymouth uses a modified version of the 'etbloom' library that uses
|
||||||
|
'bitarray' to replace 'hipe_bifs'. Ejabberd doesn't handle installing
|
||||||
|
dependences of dependecies quite so well (etbloom being a dep of mod_pottymouth
|
||||||
|
and bitarray being a dep of etbloom), so bitarray needs to be installed manually
|
||||||
|
before installation of mod_pottymouth.
|
||||||
|
|
||||||
|
This is how I got it to work... YMMV.
|
||||||
|
Given $EJABBERD_HOME is the base directory of your ejabberd install:
|
||||||
|
|
||||||
|
mkdir -p $EJABBERD_HOME/erlang-lib/bitarray
|
||||||
|
cd $EJABBERD_HOME/erlang-lib/bitarray
|
||||||
|
clone https://github.com/ferd/bitarray git repo
|
||||||
|
run: /usr/lib/erlang/bin/escript rebar get-deps
|
||||||
|
run: /usr/lib/erlang/bin/escript rebar compile
|
||||||
|
run: /usr/bin/install -c -d /usr/local/lib/bitarray-1.0.0/ebin
|
||||||
|
run: /usr/bin/install -c -d /usr/local/lib/bitarray-1.0.0/priv
|
||||||
|
run: /usr/bin/install -c -m 644 ./ebin/bitarray.app /usr/local/lib/bitarray-1.0.0/ebin/bitarray.app
|
||||||
|
run: /usr/bin/install -c -m 644 ./ebin/bitarray.beam /usr/local/lib/bitarray-1.0.0/ebin/bitarray.beam
|
||||||
|
run: /usr/bin/install -c -m 644 ./priv/bitarray.so /usr/local/lib/bitarray-1.0.0/priv/bitarray.so
|
||||||
|
|
||||||
|
To install mod_pottymouth in ejabberd:
|
||||||
|
|
||||||
cd ~/.ejabberd-modules/sources
|
cd ~/.ejabberd-modules/sources
|
||||||
clone the git repo
|
clone the ejabberd-contrib git repo
|
||||||
cd mod_pottymouth
|
cd mod_pottymouth
|
||||||
edit: ./conf/mod_pottymouth.yml
|
edit: ./conf/mod_pottymouth.yml
|
||||||
|
|
||||||
|
@ -18,14 +40,6 @@ run: ejabberdctl module_install mod_pottymouth
|
||||||
run: ejabberdctl restart
|
run: ejabberdctl restart
|
||||||
module will be installed in: ~/.ejabberd-modules/mod_pottymouth
|
module will be installed in: ~/.ejabberd-modules/mod_pottymouth
|
||||||
|
|
||||||
|
|
||||||
If you don't have Erlang HiPE available, it may throw errors that mention:
|
|
||||||
{undef,[{hipe_bifs,bitarray,
|
|
||||||
In such case, you can install this library:
|
|
||||||
https://github.com/ferd/bitarray
|
|
||||||
and edit etbloom.erl to call that library instead of hipe_bifs.
|
|
||||||
|
|
||||||
|
|
||||||
Config file format:
|
Config file format:
|
||||||
|
|
||||||
modules:
|
modules:
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
workspace.xml
|
||||||
|
out
|
|
@ -0,0 +1,286 @@
|
||||||
|
ERLANG PUBLIC LICENSE
|
||||||
|
Version 1.1
|
||||||
|
|
||||||
|
1. Definitions.
|
||||||
|
|
||||||
|
1.1. ``Contributor'' means each entity that creates or contributes to
|
||||||
|
the creation of Modifications.
|
||||||
|
|
||||||
|
1.2. ``Contributor Version'' means the combination of the Original
|
||||||
|
Code, prior Modifications used by a Contributor, and the Modifications
|
||||||
|
made by that particular Contributor.
|
||||||
|
|
||||||
|
1.3. ``Covered Code'' means the Original Code or Modifications or the
|
||||||
|
combination of the Original Code and Modifications, in each case
|
||||||
|
including portions thereof.
|
||||||
|
|
||||||
|
1.4. ``Electronic Distribution Mechanism'' means a mechanism generally
|
||||||
|
accepted in the software development community for the electronic
|
||||||
|
transfer of data.
|
||||||
|
|
||||||
|
1.5. ``Executable'' means Covered Code in any form other than Source
|
||||||
|
Code.
|
||||||
|
|
||||||
|
1.6. ``Initial Developer'' means the individual or entity identified
|
||||||
|
as the Initial Developer in the Source Code notice required by Exhibit
|
||||||
|
A.
|
||||||
|
|
||||||
|
1.7. ``Larger Work'' means a work which combines Covered Code or
|
||||||
|
portions thereof with code not governed by the terms of this License.
|
||||||
|
|
||||||
|
1.8. ``License'' means this document.
|
||||||
|
|
||||||
|
1.9. ``Modifications'' means any addition to or deletion from the
|
||||||
|
substance or structure of either the Original Code or any previous
|
||||||
|
Modifications. When Covered Code is released as a series of files, a
|
||||||
|
Modification is:
|
||||||
|
|
||||||
|
A. Any addition to or deletion from the contents of a file containing
|
||||||
|
Original Code or previous Modifications.
|
||||||
|
|
||||||
|
B. Any new file that contains any part of the Original Code or
|
||||||
|
previous Modifications.
|
||||||
|
|
||||||
|
1.10. ``Original Code'' means Source Code of computer software code
|
||||||
|
which is described in the Source Code notice required by Exhibit A as
|
||||||
|
Original Code, and which, at the time of its release under this
|
||||||
|
License is not already Covered Code governed by this License.
|
||||||
|
|
||||||
|
1.11. ``Source Code'' means the preferred form of the Covered Code for
|
||||||
|
making modifications to it, including all modules it contains, plus
|
||||||
|
any associated interface definition files, scripts used to control
|
||||||
|
compilation and installation of an Executable, or a list of source
|
||||||
|
code differential comparisons against either the Original Code or
|
||||||
|
another well known, available Covered Code of the Contributor's
|
||||||
|
choice. The Source Code can be in a compressed or archival form,
|
||||||
|
provided the appropriate decompression or de-archiving software is
|
||||||
|
widely available for no charge.
|
||||||
|
|
||||||
|
1.12. ``You'' means an individual or a legal entity exercising rights
|
||||||
|
under, and complying with all of the terms of, this License. For legal
|
||||||
|
entities,``You'' includes any entity which controls, is controlled by,
|
||||||
|
or is under common control with You. For purposes of this definition,
|
||||||
|
``control'' means (a) the power, direct or indirect, to cause the
|
||||||
|
direction or management of such entity, whether by contract or
|
||||||
|
otherwise, or (b) ownership of fifty percent (50%) or more of the
|
||||||
|
outstanding shares or beneficial ownership of such entity.
|
||||||
|
|
||||||
|
2. Source Code License.
|
||||||
|
|
||||||
|
2.1. The Initial Developer Grant.
|
||||||
|
The Initial Developer hereby grants You a world-wide, royalty-free,
|
||||||
|
non-exclusive license, subject to third party intellectual property
|
||||||
|
claims:
|
||||||
|
|
||||||
|
(a) to use, reproduce, modify, display, perform, sublicense and
|
||||||
|
distribute the Original Code (or portions thereof) with or without
|
||||||
|
Modifications, or as part of a Larger Work; and
|
||||||
|
|
||||||
|
(b) under patents now or hereafter owned or controlled by Initial
|
||||||
|
Developer, to make, have made, use and sell (``Utilize'') the
|
||||||
|
Original Code (or portions thereof), but solely to the extent that
|
||||||
|
any such patent is reasonably necessary to enable You to Utilize
|
||||||
|
the Original Code (or portions thereof) and not to any greater
|
||||||
|
extent that may be necessary to Utilize further Modifications or
|
||||||
|
combinations.
|
||||||
|
|
||||||
|
2.2. Contributor Grant.
|
||||||
|
Each Contributor hereby grants You a world-wide, royalty-free,
|
||||||
|
non-exclusive license, subject to third party intellectual property
|
||||||
|
claims:
|
||||||
|
|
||||||
|
(a) to use, reproduce, modify, display, perform, sublicense and
|
||||||
|
distribute the Modifications created by such Contributor (or
|
||||||
|
portions thereof) either on an unmodified basis, with other
|
||||||
|
Modifications, as Covered Code or as part of a Larger Work; and
|
||||||
|
|
||||||
|
(b) under patents now or hereafter owned or controlled by Contributor,
|
||||||
|
to Utilize the Contributor Version (or portions thereof), but
|
||||||
|
solely to the extent that any such patent is reasonably necessary
|
||||||
|
to enable You to Utilize the Contributor Version (or portions
|
||||||
|
thereof), and not to any greater extent that may be necessary to
|
||||||
|
Utilize further Modifications or combinations.
|
||||||
|
|
||||||
|
3. Distribution Obligations.
|
||||||
|
|
||||||
|
3.1. Application of License.
|
||||||
|
The Modifications which You contribute are governed by the terms of
|
||||||
|
this License, including without limitation Section 2.2. The Source
|
||||||
|
Code version of Covered Code may be distributed only under the terms
|
||||||
|
of this License, and You must include a copy of this License with
|
||||||
|
every copy of the Source Code You distribute. You may not offer or
|
||||||
|
impose any terms on any Source Code version that alters or restricts
|
||||||
|
the applicable version of this License or the recipients' rights
|
||||||
|
hereunder. However, You may include an additional document offering
|
||||||
|
the additional rights described in Section 3.5.
|
||||||
|
|
||||||
|
3.2. Availability of Source Code.
|
||||||
|
Any Modification which You contribute must be made available in Source
|
||||||
|
Code form under the terms of this License either on the same media as
|
||||||
|
an Executable version or via an accepted Electronic Distribution
|
||||||
|
Mechanism to anyone to whom you made an Executable version available;
|
||||||
|
and if made available via Electronic Distribution Mechanism, must
|
||||||
|
remain available for at least twelve (12) months after the date it
|
||||||
|
initially became available, or at least six (6) months after a
|
||||||
|
subsequent version of that particular Modification has been made
|
||||||
|
available to such recipients. You are responsible for ensuring that
|
||||||
|
the Source Code version remains available even if the Electronic
|
||||||
|
Distribution Mechanism is maintained by a third party.
|
||||||
|
|
||||||
|
3.3. Description of Modifications.
|
||||||
|
You must cause all Covered Code to which you contribute to contain a
|
||||||
|
file documenting the changes You made to create that Covered Code and
|
||||||
|
the date of any change. You must include a prominent statement that
|
||||||
|
the Modification is derived, directly or indirectly, from Original
|
||||||
|
Code provided by the Initial Developer and including the name of the
|
||||||
|
Initial Developer in (a) the Source Code, and (b) in any notice in an
|
||||||
|
Executable version or related documentation in which You describe the
|
||||||
|
origin or ownership of the Covered Code.
|
||||||
|
|
||||||
|
3.4. Intellectual Property Matters
|
||||||
|
|
||||||
|
(a) Third Party Claims.
|
||||||
|
If You have knowledge that a party claims an intellectual property
|
||||||
|
right in particular functionality or code (or its utilization
|
||||||
|
under this License), you must include a text file with the source
|
||||||
|
code distribution titled ``LEGAL'' which describes the claim and
|
||||||
|
the party making the claim in sufficient detail that a recipient
|
||||||
|
will know whom to contact. If you obtain such knowledge after You
|
||||||
|
make Your Modification available as described in Section 3.2, You
|
||||||
|
shall promptly modify the LEGAL file in all copies You make
|
||||||
|
available thereafter and shall take other steps (such as notifying
|
||||||
|
appropriate mailing lists or newsgroups) reasonably calculated to
|
||||||
|
inform those who received the Covered Code that new knowledge has
|
||||||
|
been obtained.
|
||||||
|
|
||||||
|
(b) Contributor APIs.
|
||||||
|
If Your Modification is an application programming interface and
|
||||||
|
You own or control patents which are reasonably necessary to
|
||||||
|
implement that API, you must also include this information in the
|
||||||
|
LEGAL file.
|
||||||
|
|
||||||
|
3.5. Required Notices.
|
||||||
|
You must duplicate the notice in Exhibit A in each file of the Source
|
||||||
|
Code, and this License in any documentation for the Source Code, where
|
||||||
|
You describe recipients' rights relating to Covered Code. If You
|
||||||
|
created one or more Modification(s), You may add your name as a
|
||||||
|
Contributor to the notice described in Exhibit A. If it is not
|
||||||
|
possible to put such notice in a particular Source Code file due to
|
||||||
|
its structure, then you must include such notice in a location (such
|
||||||
|
as a relevant directory file) where a user would be likely to look for
|
||||||
|
such a notice. You may choose to offer, and to charge a fee for,
|
||||||
|
warranty, support, indemnity or liability obligations to one or more
|
||||||
|
recipients of Covered Code. However, You may do so only on Your own
|
||||||
|
behalf, and not on behalf of the Initial Developer or any
|
||||||
|
Contributor. You must make it absolutely clear than any such warranty,
|
||||||
|
support, indemnity or liability obligation is offered by You alone,
|
||||||
|
and You hereby agree to indemnify the Initial Developer and every
|
||||||
|
Contributor for any liability incurred by the Initial Developer or
|
||||||
|
such Contributor as a result of warranty, support, indemnity or
|
||||||
|
liability terms You offer.
|
||||||
|
|
||||||
|
3.6. Distribution of Executable Versions.
|
||||||
|
You may distribute Covered Code in Executable form only if the
|
||||||
|
requirements of Section 3.1-3.5 have been met for that Covered Code,
|
||||||
|
and if You include a notice stating that the Source Code version of
|
||||||
|
the Covered Code is available under the terms of this License,
|
||||||
|
including a description of how and where You have fulfilled the
|
||||||
|
obligations of Section 3.2. The notice must be conspicuously included
|
||||||
|
in any notice in an Executable version, related documentation or
|
||||||
|
collateral in which You describe recipients' rights relating to the
|
||||||
|
Covered Code. You may distribute the Executable version of Covered
|
||||||
|
Code under a license of Your choice, which may contain terms different
|
||||||
|
from this License, provided that You are in compliance with the terms
|
||||||
|
of this License and that the license for the Executable version does
|
||||||
|
not attempt to limit or alter the recipient's rights in the Source
|
||||||
|
Code version from the rights set forth in this License. If You
|
||||||
|
distribute the Executable version under a different license You must
|
||||||
|
make it absolutely clear that any terms which differ from this License
|
||||||
|
are offered by You alone, not by the Initial Developer or any
|
||||||
|
Contributor. You hereby agree to indemnify the Initial Developer and
|
||||||
|
every Contributor for any liability incurred by the Initial Developer
|
||||||
|
or such Contributor as a result of any such terms You offer.
|
||||||
|
|
||||||
|
3.7. Larger Works.
|
||||||
|
You may create a Larger Work by combining Covered Code with other code
|
||||||
|
not governed by the terms of this License and distribute the Larger
|
||||||
|
Work as a single product. In such a case, You must make sure the
|
||||||
|
requirements of this License are fulfilled for the Covered Code.
|
||||||
|
|
||||||
|
4. Inability to Comply Due to Statute or Regulation.
|
||||||
|
If it is impossible for You to comply with any of the terms of this
|
||||||
|
License with respect to some or all of the Covered Code due to statute
|
||||||
|
or regulation then You must: (a) comply with the terms of this License
|
||||||
|
to the maximum extent possible; and (b) describe the limitations and
|
||||||
|
the code they affect. Such description must be included in the LEGAL
|
||||||
|
file described in Section 3.4 and must be included with all
|
||||||
|
distributions of the Source Code. Except to the extent prohibited by
|
||||||
|
statute or regulation, such description must be sufficiently detailed
|
||||||
|
for a recipient of ordinary skill to be able to understand it.
|
||||||
|
|
||||||
|
5. Application of this License.
|
||||||
|
|
||||||
|
This License applies to code to which the Initial Developer has
|
||||||
|
attached the notice in Exhibit A, and to related Covered Code.
|
||||||
|
|
||||||
|
6. CONNECTION TO MOZILLA PUBLIC LICENSE
|
||||||
|
|
||||||
|
This Erlang License is a derivative work of the Mozilla Public
|
||||||
|
License, Version 1.0. It contains terms which differ from the Mozilla
|
||||||
|
Public License, Version 1.0.
|
||||||
|
|
||||||
|
7. DISCLAIMER OF WARRANTY.
|
||||||
|
|
||||||
|
COVERED CODE IS PROVIDED UNDER THIS LICENSE ON AN ``AS IS'' BASIS,
|
||||||
|
WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
|
||||||
|
WITHOUT LIMITATION, WARRANTIES THAT THE COVERED CODE IS FREE OF
|
||||||
|
DEFECTS, MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE OR
|
||||||
|
NON-INFRINGING. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF
|
||||||
|
THE COVERED CODE IS WITH YOU. SHOULD ANY COVERED CODE PROVE DEFECTIVE
|
||||||
|
IN ANY RESPECT, YOU (NOT THE INITIAL DEVELOPER OR ANY OTHER
|
||||||
|
CONTRIBUTOR) ASSUME THE COST OF ANY NECESSARY SERVICING, REPAIR OR
|
||||||
|
CORRECTION. THIS DISCLAIMER OF WARRANTY CONSTITUTES AN ESSENTIAL PART
|
||||||
|
OF THIS LICENSE. NO USE OF ANY COVERED CODE IS AUTHORIZED HEREUNDER
|
||||||
|
EXCEPT UNDER THIS DISCLAIMER.
|
||||||
|
|
||||||
|
8. TERMINATION.
|
||||||
|
This License and the rights granted hereunder will terminate
|
||||||
|
automatically if You fail to comply with terms herein and fail to cure
|
||||||
|
such breach within 30 days of becoming aware of the breach. All
|
||||||
|
sublicenses to the Covered Code which are properly granted shall
|
||||||
|
survive any termination of this License. Provisions which, by their
|
||||||
|
nature, must remain in effect beyond the termination of this License
|
||||||
|
shall survive.
|
||||||
|
|
||||||
|
9. DISCLAIMER OF LIABILITY
|
||||||
|
Any utilization of Covered Code shall not cause the Initial Developer
|
||||||
|
or any Contributor to be liable for any damages (neither direct nor
|
||||||
|
indirect).
|
||||||
|
|
||||||
|
10. MISCELLANEOUS
|
||||||
|
This License represents the complete agreement concerning the subject
|
||||||
|
matter hereof. If any provision is held to be unenforceable, such
|
||||||
|
provision shall be reformed only to the extent necessary to make it
|
||||||
|
enforceable. This License shall be construed by and in accordance with
|
||||||
|
the substantive laws of Sweden. Any dispute, controversy or claim
|
||||||
|
arising out of or relating to this License, or the breach, termination
|
||||||
|
or invalidity thereof, shall be subject to the exclusive jurisdiction
|
||||||
|
of Swedish courts, with the Stockholm City Court as the first
|
||||||
|
instance.
|
||||||
|
|
||||||
|
EXHIBIT A.
|
||||||
|
|
||||||
|
``The contents of this file are subject to the Erlang Public License,
|
||||||
|
Version 1.1, (the "License"); you may not use this file except in
|
||||||
|
compliance with the License. You should have received a copy of the
|
||||||
|
Erlang Public License along with this software. If not, it can be
|
||||||
|
retrieved via the world wide web at http://www.erlang.org/.
|
||||||
|
|
||||||
|
Software distributed under the License is distributed on an "AS IS"
|
||||||
|
basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
|
||||||
|
the License for the specific language governing rights and limitations
|
||||||
|
under the License.
|
||||||
|
|
||||||
|
The Initial Developer of the Original Code is Ericsson Utvecklings AB.
|
||||||
|
Portions created by Ericsson are Copyright 1999, Ericsson Utvecklings
|
||||||
|
AB. All Rights Reserved.''
|
|
@ -0,0 +1,13 @@
|
||||||
|
Erlang Bloom Filter
|
||||||
|
=======
|
||||||
|
|
||||||
|
Based on Scalable Bloom Filters Paulo Sérgio Almeida, Carlos Baquero, Nuno Preguiça, David Hutchison
|
||||||
|
Information Processing Letters Volume 101, Issue 6, 31 March 2007, Pages 255-261
|
||||||
|
|
||||||
|
Provides scalable bloom filters that can grow indefinitely while
|
||||||
|
ensuring a desired maximum false positive probability. Also provides
|
||||||
|
standard partitioned bloom filters with a maximum capacity. Bit arrays
|
||||||
|
are dimensioned as a power of 2 to enable reusing hash values across
|
||||||
|
filters through bit operations. Double hashing is used (no need for
|
||||||
|
enhanced double hashing for partitioned bloom filters).
|
||||||
|
|
Binary file not shown.
|
@ -0,0 +1,5 @@
|
||||||
|
{erl_opts, [warnings_as_errors, debug_info]}.
|
||||||
|
{deps, [
|
||||||
|
{bitarray, "0.1",
|
||||||
|
{git, "https://github.com/madglory/bitarray", {branch, "master"}}}
|
||||||
|
]}.
|
|
@ -0,0 +1,37 @@
|
||||||
|
%% The contents of this file are subject to the Erlang Public License,
|
||||||
|
%% Version 1.1, (the "License"); you may not use this file except in
|
||||||
|
%% compliance with the License. You should have received a copy of the
|
||||||
|
%% Erlang Public License along with this software. If not, it can be
|
||||||
|
%% retrieved via the world wide web at http://www.erlang.org/.
|
||||||
|
%%
|
||||||
|
%% Software distributed under the License is distributed on an "AS IS"
|
||||||
|
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
|
||||||
|
%% the License for the specific language governing rights and limitations
|
||||||
|
%% under the License.
|
||||||
|
%%
|
||||||
|
%% {application, etbloom, [
|
||||||
|
%% {description, "Erlang Bloom"},
|
||||||
|
%% {registered, []},
|
||||||
|
%% {applications, [
|
||||||
|
%% kernel,
|
||||||
|
%% stdlib
|
||||||
|
%% ]},
|
||||||
|
%% {env, []}
|
||||||
|
%% ]}.
|
||||||
|
{application, etbloom,
|
||||||
|
[{description, "Erlang Bloom"},
|
||||||
|
{vsn, "1.1.0"},
|
||||||
|
{registered, []},
|
||||||
|
{mod, { etbloom_app, []}},
|
||||||
|
{applications,
|
||||||
|
[kernel,
|
||||||
|
stdlib,
|
||||||
|
bitarray
|
||||||
|
]},
|
||||||
|
{env,[]},
|
||||||
|
{modules, []},
|
||||||
|
|
||||||
|
{maintainers, []},
|
||||||
|
{licenses, []},
|
||||||
|
{links, []}
|
||||||
|
]}.
|
|
@ -74,7 +74,7 @@ bloom(Mode, Dim, E) ->
|
||||||
M = 1 bsl Mb,
|
M = 1 bsl Mb,
|
||||||
N = trunc(log(1 - P) / log(1 - 1 / M)),
|
N = trunc(log(1 - P) / log(1 - 1 / M)),
|
||||||
#bloom{e = E, n = N, mb = Mb, size = 0,
|
#bloom{e = E, n = N, mb = Mb, size = 0,
|
||||||
a = [hipe_bifs:bitarray(1 bsl Mb, false) || _ <- lists:seq(1, K)]}.
|
a = [bitarray:new(1 bsl Mb, false) || _ <- lists:seq(1, K)]}.
|
||||||
|
|
||||||
log2(X) -> log(X) / log(2).
|
log2(X) -> log(X) / log(2).
|
||||||
|
|
||||||
|
@ -136,7 +136,7 @@ masked_pair(Mask, X, Y) -> {X band Mask, Y band Mask}.
|
||||||
|
|
||||||
all_set(_Mask, _I1, _I, []) -> true;
|
all_set(_Mask, _I1, _I, []) -> true;
|
||||||
all_set(Mask, I1, I, [H | T]) ->
|
all_set(Mask, I1, I, [H | T]) ->
|
||||||
case hipe_bifs:bitarray_sub(H, I) of
|
case bitarray:sub(H, I) of
|
||||||
true -> all_set(Mask, I1, (I + I1) band Mask, T);
|
true -> all_set(Mask, I1, (I + I1) band Mask, T);
|
||||||
false -> false
|
false -> false
|
||||||
end.
|
end.
|
||||||
|
@ -170,5 +170,5 @@ hash_add(Hashes, #bloom{mb = Mb, a = A, size = Size} = B) ->
|
||||||
|
|
||||||
set_bits(_Mask, _I1, _I, [], Acc) -> lists:reverse(Acc);
|
set_bits(_Mask, _I1, _I, [], Acc) -> lists:reverse(Acc);
|
||||||
set_bits(Mask, I1, I, [H | T], Acc) ->
|
set_bits(Mask, I1, I, [H | T], Acc) ->
|
||||||
set_bits(Mask, I1, (I + I1) band Mask, T, [hipe_bifs:bitarray_update(H, I, true) | Acc]).
|
set_bits(Mask, I1, (I + I1) band Mask, T, [bitarray:update(H, I, true) | Acc]).
|
||||||
|
|
|
@ -0,0 +1,20 @@
|
||||||
|
%% The contents of this file are subject to the Erlang Public License,
|
||||||
|
%% Version 1.1, (the "License"); you may not use this file except in
|
||||||
|
%% compliance with the License. You should have received a copy of the
|
||||||
|
%% Erlang Public License along with this software. If not, it can be
|
||||||
|
%% retrieved via the world wide web at http://www.erlang.org/.
|
||||||
|
%%
|
||||||
|
%% Software distributed under the License is distributed on an "AS IS"
|
||||||
|
%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See
|
||||||
|
%% the License for the specific language governing rights and limitations
|
||||||
|
%% under the License.
|
||||||
|
%%
|
||||||
|
-module(etbloom_tests).
|
||||||
|
-author("volodymyr.kyrychenko@strikead.com").
|
||||||
|
-include_lib("eunit/include/eunit.hrl").
|
||||||
|
|
||||||
|
bloom_test() ->
|
||||||
|
Values = [{xxx, binary_to_atom(base64:encode(crypto:strong_rand_bytes(10)), utf8)} || _ <- lists:seq(1, 5000)],
|
||||||
|
Bloom = etbloom:bloom(Values),
|
||||||
|
?assert(lists:all(fun(X) -> etbloom:member(X, Bloom) end, Values)),
|
||||||
|
?assertNot(etbloom:member(wtf, Bloom)).
|
|
@ -2,27 +2,27 @@
|
||||||
|
|
||||||
-behaviour(gen_mod).
|
-behaviour(gen_mod).
|
||||||
|
|
||||||
|
-include("ejabberd.hrl").
|
||||||
-include("logger.hrl").
|
-include("logger.hrl").
|
||||||
|
-include("xmpp.hrl").
|
||||||
|
|
||||||
-export([
|
-export([
|
||||||
start/2,
|
start/2,
|
||||||
stop/1,
|
stop/1,
|
||||||
on_filter_packet/1,
|
on_filter_packet/1,
|
||||||
mod_opt_type/1,
|
mod_opt_type/1,
|
||||||
depends/2
|
depends/2,
|
||||||
|
reload/3
|
||||||
]).
|
]).
|
||||||
|
|
||||||
-include("ejabberd.hrl").
|
|
||||||
|
|
||||||
-import(bloom_gen_server, [start/0, stop/0, member/1]).
|
-import(bloom_gen_server, [start/0, stop/0, member/1]).
|
||||||
-import(nomalize_leet_gen_server, [normalize/1]).
|
-import(nomalize_leet_gen_server, [normalize/1]).
|
||||||
|
|
||||||
getMessageLang(Attrs) ->
|
getMessageLang(Msg) ->
|
||||||
LangAttr = lists:keyfind(<<"lang">>, 1, Attrs),
|
LangAttr = xmpp:get_lang(Msg),
|
||||||
if
|
if
|
||||||
LangAttr ->
|
(LangAttr /= <<>>) ->
|
||||||
{<<"lang">>, LangBin} = LangAttr,
|
Lang = list_to_atom(binary_to_list(LangAttr));
|
||||||
Lang = list_to_atom(binary_to_list(LangBin));
|
|
||||||
true ->
|
true ->
|
||||||
Lang = default
|
Lang = default
|
||||||
end,
|
end,
|
||||||
|
@ -43,28 +43,13 @@ censorWord({Lang, Word} = _MessageTerm) ->
|
||||||
filterWords(L) ->
|
filterWords(L) ->
|
||||||
lists:map(fun censorWord/1, L).
|
lists:map(fun censorWord/1, L).
|
||||||
|
|
||||||
filterMessageText(MessageAttrs, MessageText) ->
|
filterMessageText(Lang, MessageText) ->
|
||||||
Lang = getMessageLang(MessageAttrs),
|
|
||||||
% we want to token-ize utf8 'words'
|
% we want to token-ize utf8 'words'
|
||||||
MessageWords = string:tokens(unicode:characters_to_list(MessageText, utf8), " "),
|
MessageWords = string:tokens(unicode:characters_to_list(MessageText, utf8), " "),
|
||||||
MessageTerms = [{Lang, Word} || Word <- MessageWords],
|
MessageTerms = [{Lang, Word} || Word <- MessageWords],
|
||||||
% we get back bytewise format terms (rather than utf8)
|
% we get back bytewise format terms (rather than utf8)
|
||||||
string:join(filterWords(MessageTerms), " ").
|
string:join(filterWords(MessageTerms), " ").
|
||||||
|
|
||||||
|
|
||||||
filterMessageBodyElements([{xmlel, <<"body">>, BodyAttr, [{xmlcdata, MessageText}]} = _H|T], MessageElements) ->
|
|
||||||
FilteredMessageWords = binary:list_to_bin(filterMessageText(BodyAttr, binary:bin_to_list(MessageText))),
|
|
||||||
FilteredBody = {xmlel, <<"body">>, BodyAttr, [{xmlcdata, FilteredMessageWords}]},
|
|
||||||
filterMessageBodyElements(T, lists:append(MessageElements, [FilteredBody]));
|
|
||||||
|
|
||||||
filterMessageBodyElements([H|T], MessageElements) ->
|
|
||||||
% skip this tag, but pass it on as processed
|
|
||||||
filterMessageBodyElements(T, lists:append(MessageElements, [H]));
|
|
||||||
|
|
||||||
filterMessageBodyElements([], MessageElements) ->
|
|
||||||
MessageElements.
|
|
||||||
|
|
||||||
|
|
||||||
start(_Host, Opts) ->
|
start(_Host, Opts) ->
|
||||||
Blacklists = gen_mod:get_opt(blacklists, Opts, fun(A) -> A end, []),
|
Blacklists = gen_mod:get_opt(blacklists, Opts, fun(A) -> A end, []),
|
||||||
lists:map(fun bloom_gen_server:start/1, Blacklists),
|
lists:map(fun bloom_gen_server:start/1, Blacklists),
|
||||||
|
@ -82,14 +67,28 @@ stop(_Host) ->
|
||||||
on_filter_packet(drop) ->
|
on_filter_packet(drop) ->
|
||||||
drop;
|
drop;
|
||||||
|
|
||||||
on_filter_packet({_From, _To, {xmlel, <<"message">>, _Attrs, Els} = _Packet} = _Msg) ->
|
|
||||||
FilteredEls = filterMessageBodyElements(Els, []),
|
|
||||||
{_From, _To, {xmlel, <<"message">>, _Attrs, FilteredEls}};
|
|
||||||
on_filter_packet(Msg) ->
|
on_filter_packet(Msg) ->
|
||||||
% Handle the generic case (any packet that isn't a message with a body).
|
Type = xmpp:get_type(Msg),
|
||||||
Msg.
|
if
|
||||||
|
(Type == chat) orelse (Type == groupchat) ->
|
||||||
|
BodyText = xmpp:get_text(Msg#message.body),
|
||||||
|
if
|
||||||
|
(BodyText /= <<>>) ->
|
||||||
|
Lang = getMessageLang(Msg),
|
||||||
|
FilteredMessageWords = binary:list_to_bin(filterMessageText(Lang, binary:bin_to_list(BodyText))),
|
||||||
|
[BodyObject|_] = Msg#message.body,
|
||||||
|
NewBodyObject = setelement(3, BodyObject, FilteredMessageWords),
|
||||||
|
NewMsg = Msg#message{body = [NewBodyObject]},
|
||||||
|
NewMsg;
|
||||||
|
true ->
|
||||||
|
Msg
|
||||||
|
end;
|
||||||
|
true ->
|
||||||
|
Msg
|
||||||
|
end.
|
||||||
|
|
||||||
mod_opt_type(blacklists) -> fun (A) when is_list(A) -> A end;
|
mod_opt_type(blacklists) -> fun (A) when is_list(A) -> A end;
|
||||||
mod_opt_type(charmaps) -> fun (A) when is_list(A) -> A end;
|
mod_opt_type(charmaps) -> fun (A) when is_list(A) -> A end;
|
||||||
mod_opt_type(_) -> [blacklists, charmaps].
|
mod_opt_type(_) -> [blacklists, charmaps].
|
||||||
depends(_Host, _Opts) -> [].
|
depends(_Host, _Opts) -> [].
|
||||||
|
reload(_Host, _NewOpts, _OldOpts) -> ok.
|
||||||
|
|
Loading…
Reference in New Issue