diff --git a/mod_s3_upload/COPYING b/mod_s3_upload/COPYING new file mode 100644 index 0000000..d159169 --- /dev/null +++ b/mod_s3_upload/COPYING @@ -0,0 +1,339 @@ + GNU GENERAL PUBLIC LICENSE + Version 2, June 1991 + + Copyright (C) 1989, 1991 Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The licenses for most software are designed to take away your +freedom to share and change it. By contrast, the GNU General Public +License is intended to guarantee your freedom to share and change free +software--to make sure the software is free for all its users. This +General Public License applies to most of the Free Software +Foundation's software and to any other program whose authors commit to +using it. (Some other Free Software Foundation software is covered by +the GNU Lesser General Public License instead.) You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +this service if you wish), that you receive source code or can get it +if you want it, that you can change the software or use pieces of it +in new free programs; and that you know you can do these things. + + To protect your rights, we need to make restrictions that forbid +anyone to deny you these rights or to ask you to surrender the rights. +These restrictions translate to certain responsibilities for you if you +distribute copies of the software, or if you modify it. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must give the recipients all the rights that +you have. You must make sure that they, too, receive or can get the +source code. And you must show them these terms so they know their +rights. + + We protect your rights with two steps: (1) copyright the software, and +(2) offer you this license which gives you legal permission to copy, +distribute and/or modify the software. + + Also, for each author's protection and ours, we want to make certain +that everyone understands that there is no warranty for this free +software. If the software is modified by someone else and passed on, we +want its recipients to know that what they have is not the original, so +that any problems introduced by others will not reflect on the original +authors' reputations. + + Finally, any free program is threatened constantly by software +patents. We wish to avoid the danger that redistributors of a free +program will individually obtain patent licenses, in effect making the +program proprietary. To prevent this, we have made it clear that any +patent must be licensed for everyone's free use or not licensed at all. + + The precise terms and conditions for copying, distribution and +modification follow. + + GNU GENERAL PUBLIC LICENSE + TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + + 0. This License applies to any program or other work which contains +a notice placed by the copyright holder saying it may be distributed +under the terms of this General Public License. The "Program", below, +refers to any such program or work, and a "work based on the Program" +means either the Program or any derivative work under copyright law: +that is to say, a work containing the Program or a portion of it, +either verbatim or with modifications and/or translated into another +language. (Hereinafter, translation is included without limitation in +the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not +covered by this License; they are outside its scope. The act of +running the Program is not restricted, and the output from the Program +is covered only if its contents constitute a work based on the +Program (independent of having been made by running the Program). +Whether that is true depends on what the Program does. + + 1. You may copy and distribute verbatim copies of the Program's +source code as you receive it, in any medium, provided that you +conspicuously and appropriately publish on each copy an appropriate +copyright notice and disclaimer of warranty; keep intact all the +notices that refer to this License and to the absence of any warranty; +and give any other recipients of the Program a copy of this License +along with the Program. + +You may charge a fee for the physical act of transferring a copy, and +you may at your option offer warranty protection in exchange for a fee. + + 2. You may modify your copy or copies of the Program or any portion +of it, thus forming a work based on the Program, and copy and +distribute such modifications or work under the terms of Section 1 +above, provided that you also meet all of these conditions: + + a) You must cause the modified files to carry prominent notices + stating that you changed the files and the date of any change. + + b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any + part thereof, to be licensed as a whole at no charge to all third + parties under the terms of this License. + + c) If the modified program normally reads commands interactively + when run, you must cause it, when started running for such + interactive use in the most ordinary way, to print or display an + announcement including an appropriate copyright notice and a + notice that there is no warranty (or else, saying that you provide + a warranty) and that users may redistribute the program under + these conditions, and telling the user how to view a copy of this + License. (Exception: if the Program itself is interactive but + does not normally print such an announcement, your work based on + the Program is not required to print an announcement.) + +These requirements apply to the modified work as a whole. If +identifiable sections of that work are not derived from the Program, +and can be reasonably considered independent and separate works in +themselves, then this License, and its terms, do not apply to those +sections when you distribute them as separate works. But when you +distribute the same sections as part of a whole which is a work based +on the Program, the distribution of the whole must be on the terms of +this License, whose permissions for other licensees extend to the +entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest +your rights to work written entirely by you; rather, the intent is to +exercise the right to control the distribution of derivative or +collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program +with the Program (or with a work based on the Program) on a volume of +a storage or distribution medium does not bring the other work under +the scope of this License. + + 3. You may copy and distribute the Program (or a work based on it, +under Section 2) in object code or executable form under the terms of +Sections 1 and 2 above provided that you also do one of the following: + + a) Accompany it with the complete corresponding machine-readable + source code, which must be distributed under the terms of Sections + 1 and 2 above on a medium customarily used for software interchange; or, + + b) Accompany it with a written offer, valid for at least three + years, to give any third party, for a charge no more than your + cost of physically performing source distribution, a complete + machine-readable copy of the corresponding source code, to be + distributed under the terms of Sections 1 and 2 above on a medium + customarily used for software interchange; or, + + c) Accompany it with the information you received as to the offer + to distribute corresponding source code. (This alternative is + allowed only for noncommercial distribution and only if you + received the program in object code or executable form with such + an offer, in accord with Subsection b above.) + +The source code for a work means the preferred form of the work for +making modifications to it. For an executable work, complete source +code means all the source code for all modules it contains, plus any +associated interface definition files, plus the scripts used to +control compilation and installation of the executable. However, as a +special exception, the source code distributed need not include +anything that is normally distributed (in either source or binary +form) with the major components (compiler, kernel, and so on) of the +operating system on which the executable runs, unless that component +itself accompanies the executable. + +If distribution of executable or object code is made by offering +access to copy from a designated place, then offering equivalent +access to copy the source code from the same place counts as +distribution of the source code, even though third parties are not +compelled to copy the source along with the object code. + + 4. You may not copy, modify, sublicense, or distribute the Program +except as expressly provided under this License. Any attempt +otherwise to copy, modify, sublicense or distribute the Program is +void, and will automatically terminate your rights under this License. +However, parties who have received copies, or rights, from you under +this License will not have their licenses terminated so long as such +parties remain in full compliance. + + 5. You are not required to accept this License, since you have not +signed it. However, nothing else grants you permission to modify or +distribute the Program or its derivative works. These actions are +prohibited by law if you do not accept this License. Therefore, by +modifying or distributing the Program (or any work based on the +Program), you indicate your acceptance of this License to do so, and +all its terms and conditions for copying, distributing or modifying +the Program or works based on it. + + 6. Each time you redistribute the Program (or any work based on the +Program), the recipient automatically receives a license from the +original licensor to copy, distribute or modify the Program subject to +these terms and conditions. You may not impose any further +restrictions on the recipients' exercise of the rights granted herein. +You are not responsible for enforcing compliance by third parties to +this License. + + 7. If, as a consequence of a court judgment or allegation of patent +infringement or for any other reason (not limited to patent issues), +conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot +distribute so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you +may not distribute the Program at all. For example, if a patent +license would not permit royalty-free redistribution of the Program by +all those who receive copies directly or indirectly through you, then +the only way you could satisfy both it and this License would be to +refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under +any particular circumstance, the balance of the section is intended to +apply and the section as a whole is intended to apply in other +circumstances. + +It is not the purpose of this section to induce you to infringe any +patents or other property right claims or to contest validity of any +such claims; this section has the sole purpose of protecting the +integrity of the free software distribution system, which is +implemented by public license practices. Many people have made +generous contributions to the wide range of software distributed +through that system in reliance on consistent application of that +system; it is up to the author/donor to decide if he or she is willing +to distribute software through any other system and a licensee cannot +impose that choice. + +This section is intended to make thoroughly clear what is believed to +be a consequence of the rest of this License. + + 8. If the distribution and/or use of the Program is restricted in +certain countries either by patents or by copyrighted interfaces, the +original copyright holder who places the Program under this License +may add an explicit geographical distribution limitation excluding +those countries, so that distribution is permitted only in or among +countries not thus excluded. In such case, this License incorporates +the limitation as if written in the body of this License. + + 9. The Free Software Foundation may publish revised and/or new versions +of the General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + +Each version is given a distinguishing version number. If the Program +specifies a version number of this License which applies to it and "any +later version", you have the option of following the terms and conditions +either of that version or of any later version published by the Free +Software Foundation. If the Program does not specify a version number of +this License, you may choose any version ever published by the Free Software +Foundation. + + 10. If you wish to incorporate parts of the Program into other free +programs whose distribution conditions are different, write to the author +to ask for permission. For software which is copyrighted by the Free +Software Foundation, write to the Free Software Foundation; we sometimes +make exceptions for this. Our decision will be guided by the two goals +of preserving the free status of all derivatives of our free software and +of promoting the sharing and reuse of software generally. + + NO WARRANTY + + 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY +FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN +OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES +PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED +OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS +TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE +PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, +REPAIR OR CORRECTION. + + 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR +REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, +INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING +OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED +TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY +YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER +PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE +POSSIBILITY OF SUCH DAMAGES. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +convey the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License along + with this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this +when it starts in an interactive mode: + + Gnomovision version 69, Copyright (C) year name of author + Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may +be called something other than `show w' and `show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + + Yoyodyne, Inc., hereby disclaims all copyright interest in the program + `Gnomovision' (which makes passes at compilers) written by James Hacker. + + , 1 April 1989 + Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. diff --git a/mod_s3_upload/README.md b/mod_s3_upload/README.md new file mode 100644 index 0000000..9964eee --- /dev/null +++ b/mod_s3_upload/README.md @@ -0,0 +1,87 @@ +mod\_s3\_upload: XEP-0363 with S3-compatible storage +==================================================== + +* Author: Roman Hargrave + +Implements HTTP Upload using any S3-compatible storage service. + +# OTP Compatibility + +This module depends heavily on the `uri_string` module introduced in +OTP 21 in order to implement URL signing. + +# How it works + +The S3 API is highly compatible with XEP-0363 because it uses PUT and +GET for object placement and retrieval. What's more, a client may be +provided with a URL that may be used to upload a specific file without +having to expose API credentials. This makes for an extremely +desirable XEP-0363 storage backend. + +An outline of an XEP-0363 transaction using this module follows: + +1. A client sends a slot-request IQ to the upload service +2. The server verifies that the client may upload files, and that the + proposed file size is acceptable +3. The server generates an object URL, which will be used by clients + to download the file once it has been uploaded +3. The server then constructs an additional URL based upon the object + URL, including information about the object size and type. A TTL is + added to the URL, such that it will expire. The URL is then signed. +4. The server returns the object URL and the signed URL +5. The client submits a PUT request to the signed URL with the file + contents. +6. If the PUT request succeeds, the client sends message stanza + containing the link and additional metadata to whatever entity. + +# Operator considerations + +This module includes a `Content-Length` parameter in the upload URL; +however, it is the responsibility of the storage service to validate +this. Different storage services may behave differently or not respond +at all when a file is uploaded and the size does not exactly match. If +you intend to enforce a file size limit, make sure that your storage +service checks upload size against this parameter. + +Furthermore, it is not the responsibility of this module to manage the +lifecycle of objects once uploaded. Not all services implement +lifecycle management or advanced features like tagging. To this end, +you might wish to configure an object lifecycle policy to control +costs, otherwise you might end up paying to store very old objects. To +this end, bear in mind that moving objects to a colder storage class +(if your service supports this) as part of a lifecycle policy could +generate considerable retrieval expenses - particularly when combined +combined with large MUCs. + +# Known Working Services + +This has been tested with the following services: + +- **Wasabi** - which works very well. It is extremely cheap, but + **does not support lifecycle management** or custom DNS. + +It almost certainly works with Amazon S3. + +# Configuration + +The module expects a bucket URL, access key ID, secret, and region. + +Furthermore, + +```yaml +modules: + mod_s3_upload: + # Required, characteristic values shown + access_key_id: ABCDEF1234567890 + access_key_secret: whatever + region: us-east-2 + bucket_url: https://my-bucket.whatever-service.com + # Optional, defaults shown + max_size: 1073741824 + put_ttl: 600 + set_public: true + service_name: 'S3 Upload' + access: local + hosts: + - upload.@HOST@ +``` diff --git a/mod_s3_upload/conf/mod_s3_upload.yml b/mod_s3_upload/conf/mod_s3_upload.yml new file mode 100644 index 0000000..31a03ca --- /dev/null +++ b/mod_s3_upload/conf/mod_s3_upload.yml @@ -0,0 +1,19 @@ +modules: + mod_s3_upload: + region: us-west-1 + bucket_url: https://example.s3.us-west-1.wasabisys.com + access_key_id: WBPXK3YWS457RV9P + access_key_secret: N2UC4RSLPU6VH6FYGNJ9BRNMC74XM6G9MP74RNH7D4ZG9UBZY9Z5G4ZR8T782KR7 + ## Maximum permitted object size, in bytes + # max_size: 1073741824 + ## How long, in seconds from generation, an upload URL is valid + # put_ttl: 600 + ## Whether to apply the special public-read ACL to the object + # set_public: true + ## Advertised service name + # service_name: 'S3 Upload' + ## ACL containing users permitted to request slots + # access: local + ## Hostnames that this module will receive IQs at + # hosts: + # - upload.@HOST@ diff --git a/mod_s3_upload/include/aws.hrl b/mod_s3_upload/include/aws.hrl new file mode 100644 index 0000000..4294063 --- /dev/null +++ b/mod_s3_upload/include/aws.hrl @@ -0,0 +1,28 @@ +%%%---------------------------------------------------------------------- +%%% File : s3_util.erl +%%% Usage : S3 URL Generation and Signing +%%% Author : Roman Hargrave +%%% Purpose : Signing AWS Requests. Intended for S3-CS use. +%%% Created : 24 Aug 2022 by Roman Hargrave +%%% +%%% +%%% This program is free software; you can redistribute it and/or +%%% modify it under the terms of the GNU General Public License as +%%% published by the Free Software Foundation; either version 2 of the +%%% License, or (at your option) any later version. +%%% +%%% This program is distributed in the hope that it will be useful, +%%% but WITHOUT ANY WARRANTY; without even the implied warranty of +%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%%% General Public License for more details. +%%% +%%% You should have received a copy of the GNU General Public License along +%%% with this program; if not, write to the Free Software Foundation, Inc., +%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +%%%---------------------------------------------------------------------- + +-record(aws_auth, {access_key_id :: binary(), + access_key :: binary(), + region :: binary()}). + +-define(AWS_SERVICE_S3, <<"s3">>). diff --git a/mod_s3_upload/mod_s3_upload.spec b/mod_s3_upload/mod_s3_upload.spec new file mode 100644 index 0000000..9194e92 --- /dev/null +++ b/mod_s3_upload/mod_s3_upload.spec @@ -0,0 +1,6 @@ +# -*- mode:yaml; -*- +author: "Roman Hargrave " +category: "service" +summary: "Upload files to S3-compatible storage" +home: "https://github.com/processone/ejabberd-contrib/tree/master/" +url: "git@github.com:processone/ejabberd-contrib.git" diff --git a/mod_s3_upload/src/aws_util.erl b/mod_s3_upload/src/aws_util.erl new file mode 100644 index 0000000..c24f126 --- /dev/null +++ b/mod_s3_upload/src/aws_util.erl @@ -0,0 +1,256 @@ +%%%---------------------------------------------------------------------- +%%% File : aws_util.erl +%%% Usage : AWS URL Signing +%%% Author : Roman Hargrave +%%% Purpose : Signing AWS Requests. Intended for S3-CS use. +%%% Created : 24 Aug 2022 by Roman Hargrave +%%% +%%% +%%% This program is free software; you can redistribute it and/or +%%% modify it under the terms of the GNU General Public License as +%%% published by the Free Software Foundation; either version 2 of the +%%% License, or (at your option) any later version. +%%% +%%% This program is distributed in the hope that it will be useful, +%%% but WITHOUT ANY WARRANTY; without even the implied warranty of +%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%%% General Public License for more details. +%%% +%%% You should have received a copy of the GNU General Public License along +%%% with this program; if not, write to the Free Software Foundation, Inc., +%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +%%%---------------------------------------------------------------------- + +%% URL Signing. Documented at +%% https://docs.aws.amazon.com/AmazonS3/latest/API/sig-v4-authenticating-requests.html + +-module(aws_util). +-author("roman@hargrave.info"). + +-include("aws.hrl"). + +-type verb() :: get | put | post | delete. +-type headers() :: [{unicode:chardata(), unicode:chardata()}]. +-type query_list() :: [{unicode:chardata(), unicode:chardata() | true}]. +-type ttl() :: 1..604800. + +-define(AWS_SIGN_ALGO, <<"AWS4-HMAC-SHA256">>). + +-import(crypto, [mac/4]). +-import(uri_string, [compose_query/1, + dissect_query/1]). +-import(misc, [crypto_hmac/3]). + +-export([signed_url/7]). + +%%------------------------------------------------------------------------ +%% API +%%------------------------------------------------------------------------ + +-spec signed_url( + Auth :: #aws_auth{}, + Verb :: verb(), + Service :: binary(), + Url :: binary(), + ExtraHeaders :: headers(), + Time :: calendar:datetime(), + TTL :: ttl() + ) -> + SignedUrl :: binary(). +% sign a URL given headers, a verb, authentication details, and a time +signed_url(Auth, Verb, Service, URL, ExtraHeaders, Time, TTL) -> + #{host := Host} = UnauthenticatedUriMap = uri_string:parse(URL), + Headers = [{<<"host">>, Host} | ExtraHeaders], + % insert authentication params. + QueryList = sorted_query_list(uri_query_list(UnauthenticatedUriMap) + ++ base_query_params(Auth, Time, Service, Headers, TTL)), + UriMap = UnauthenticatedUriMap#{query => compose_query(QueryList)}, + % generate and sign the message + StringToSign = string_to_sign(Auth, Time, Service, Verb, UriMap, Headers), + SigningKey = signing_key(Auth, Time, Service), + Signature = encode_hex(crypto_hmac(sha256, SigningKey, StringToSign)), + % add signature to the query list and compose URI + SignedQueryString = compose_query([{<<"X-Amz-Signature">>, Signature}|QueryList]), + uri_string:recompose(UriMap#{query => SignedQueryString}). + +%%------------------------------------------------------------------------ +%% Internal +%%------------------------------------------------------------------------ + +-spec sorted_query_list( + QueryList :: query_list() + ) -> + SortedQueryList :: query_list(). +% sort a query paramater list by parameter name, ascending +sorted_query_list(QueryList) -> + lists:sort(fun ({L, _}, {R, _}) -> L =< R end, QueryList). + +-spec uri_query_list( + UriMap :: uri_string:uri_map() + ) -> + QueryList :: query_list(). +% extract a query list from a uri_map(). +uri_query_list(#{query := QueryString}) -> + dissect_query(QueryString); +uri_query_list(_) -> + []. + +-spec verb( + Verb :: verb() + ) -> + binary(). +% convert a verb atom to a binary list +verb(get) -> + <<"GET">>; +verb(put) -> + <<"PUT">>; +verb(post) -> + <<"POST">>; +verb(delete) -> + <<"DELETE">>. + +-spec encode_hex( + Data :: binary() + ) -> + EncodedData :: binary(). +% lowercase binary:encode_hex +encode_hex(Data) -> + str:to_lower(str:to_hexlist(Data)). + +-spec iso8601_timestamp_utc( + DateTime :: calendar:datetime() + ) -> + Timestamp :: binary(). +% Generate an ISO8601-like YmdTHMSZ timestamp for X-Amz-Date. Only +% produces UTC ('Z') timestamps. No separators. +iso8601_timestamp_utc({{Y, Mo, D}, {H, M, S}}) -> + str:format("~B~2..0B~2..0BT~2..0B~2..0B~2..0BZ", + [Y, Mo, D, + H, M, S]). + +-spec iso8601_date( + DateTime :: calendar:datetime() + ) -> + DateStr :: binary(). +% ISO8601 formatted date, no separators. +iso8601_date({{Y, M, D}, _}) -> + str:format("~B~2..0B~2..0B", [Y, M, D]). + +-spec scope( + Auth :: #aws_auth{}, + Time :: calendar:datetime(), + Service :: binary() + ) -> + Scope :: binary(). +% Generate the request scope used in the credential field and signature message +scope(#aws_auth{region = Region}, + Time, + Service) -> + str:format("~ts/~ts/~ts/aws4_request", + [iso8601_date(Time), + Region, + Service]). + +-spec credential( + Auth :: #aws_auth{}, + Time :: calendar:datetime(), + Service :: binary() + ) -> + Auth :: binary(). +% Generate the value used for X-Amz-Credential +credential(#aws_auth{access_key_id = KeyID} = Auth, + Time, + Service) -> + str:format("~ts/~ts", [KeyID, scope(Auth, Time, Service)]). + +-spec base_query_params( + Auth :: #aws_auth{}, + Time :: calendar:datetime(), + Service :: binary(), + Headers :: headers(), + TTL :: ttl() + ) -> + BaseQueryParams :: [{unicode:chardata(), unicode:chardata()}]. +% Return the minimum required set of query parameters needed for +% authenticated signed requests. +base_query_params(Auth, Time, Service, Headers, TTL) -> + [{<<"X-Amz-Algorithm">>, ?AWS_SIGN_ALGO}, + {<<"X-Amz-Credential">>, credential(Auth, Time, Service)}, + {<<"X-Amz-Date">>, iso8601_timestamp_utc(Time)}, + {<<"X-Amz-Expires">>, erlang:integer_to_binary(TTL)}, + {<<"X-Amz-SignedHeaders">>, signed_headers(Headers)}]. + +-spec canonical_headers( + Headers :: headers() + ) -> + CanonicalHeaders :: unicode:chardata(). +% generate the header list for canonical_request +canonical_headers(Headers) -> + str:join(lists:map(fun ({Name, Value}) -> + str:format("~ts:~ts~n", [Name, Value]) + end, Headers), + <<>>). + +-spec signed_headers( + SignedHeaders :: headers() + ) -> + SignedHeaders :: unicode:chardata(). +% generate a semicolon-delimited list of headers, used to enumerate +% signed headers in the AWSv4 canonical request +signed_headers(SignedHeaders) -> + str:join(lists:map(fun ({Name, _}) -> + Name + end, SignedHeaders), + <<";">>). + +-spec canonical_request( + Verb :: verb(), + UriMap :: uri_string:uri_map(), + Headers :: headers() + ) -> + CanonicalRequest :: unicode:chardata(). +% Generate the canonical request used to compute the signature +canonical_request(Verb, + #{query := Query, + path := Path}, + Headers) -> + <<(verb(Verb))/binary, "\n", + Path/binary, "\n", + Query/binary, "\n", + (canonical_headers(Headers))/binary, "\n", + (signed_headers(Headers))/binary, "\n", + "UNSIGNED-PAYLOAD">>. + +-spec string_to_sign( + Auth :: #aws_auth{}, + Time :: calendar:datetime(), + Service :: binary(), + Verb :: verb(), + UriMap :: uri_string:uri_map(), + Headers :: headers() + ) -> + StringToSign :: unicode:chardata(). +% generate the "string to sign", as per AWS specs +string_to_sign(Auth, Time, Service, Verb, UriMap, Headers) -> + RequestHash = crypto:hash(sha256, canonical_request(Verb, UriMap, Headers)), + <>. + +-spec signing_key( + Auth :: #aws_auth{}, + Time :: calendar:datetime(), + Service :: binary() + ) -> + SigningKey :: binary(). +% generate the signing key used in the final HMAC-SHA256 round for +% request signing. +signing_key(#aws_auth{access_key = AccessKey, + region = Region}, + Time, + Service) -> + DateKey = crypto_hmac(sha256, <<"AWS4", AccessKey/binary>>, iso8601_date(Time)), + DateRegionKey = crypto_hmac(sha256, DateKey, Region), + DateRegionServiceKey = crypto_hmac(sha256, DateRegionKey, Service), + crypto_hmac(sha256, DateRegionServiceKey, <<"aws4_request">>). diff --git a/mod_s3_upload/src/mod_s3_upload.erl b/mod_s3_upload/src/mod_s3_upload.erl new file mode 100644 index 0000000..168dce7 --- /dev/null +++ b/mod_s3_upload/src/mod_s3_upload.erl @@ -0,0 +1,454 @@ +%%%---------------------------------------------------------------------- +%%% File : mod_s3_upload.erl +%%% Author : Roman Hargrave +%%% Purpose : An XEP-0363 Implementation using S3-compatible storage +%%% Created : 24 Aug 2022 by Roman Hargrave +%%% +%%% +%%% This program is free software; you can redistribute it and/or +%%% modify it under the terms of the GNU General Public License as +%%% published by the Free Software Foundation; either version 2 of the +%%% License, or (at your option) any later version. +%%% +%%% This program is distributed in the hope that it will be useful, +%%% but WITHOUT ANY WARRANTY; without even the implied warranty of +%%% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +%%% General Public License for more details. +%%% +%%% You should have received a copy of the GNU General Public License along +%%% with this program; if not, write to the Free Software Foundation, Inc., +%%% 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +%%%---------------------------------------------------------------------- + +-module(mod_s3_upload). +-author('roman@hargrave.info'). + +-behaviour(gen_mod). +-behaviour(gen_server). + +-protocol({xep, 363, '1.1.0'}). + +-include("logger.hrl"). +-include("translate.hrl"). +-include("aws.hrl"). + +-include_lib("xmpp/include/xmpp.hrl"). + +% gen_mod callbacks +-export([start/2, + stop/1, + reload/3, + depends/2, + mod_opt_type/1, + mod_options/1, + mod_doc/0]). + +% gen_server callbacks +-export([init/1, + handle_info/2, + handle_call/3, + handle_cast/2]). + +-import(gen_mod, [get_opt/2]). + +%%----------------------------------------------------------------------- +%% gen_mod callbacks and related machinery +%%----------------------------------------------------------------------- + +-spec start( + ServerHost :: binary(), + Opts :: gen_mod:opts() + ) -> + Result :: {ok, pid()} | {error, term()}. +% +start(ServerHost, Opts) -> + gen_mod:start_child(?MODULE, ServerHost, Opts). + +-spec stop( + ServerHost :: binary() + ) -> + Result :: any(). +% +stop(ServerHost) -> + gen_mod:stop_child(?MODULE, ServerHost). + +-spec reload( + ServerHost :: binary(), + NewOpts :: gen_mod:opts(), + OldOpts :: gen_mod:opts() + ) -> + Result :: ok. +% +reload(ServerHost, NewOpts, _OldOpts) -> + ServerRef = gen_mod:get_module_proc(ServerHost, ?MODULE), + % cast a message to the server with the new options + gen_server:cast(ServerRef, {reload, + ServerHost, + build_service_params(ServerHost, NewOpts)}). + +%%------------------------------------------------------------------------ +%% Options +%%------------------------------------------------------------------------ + +-spec mod_opt_type( + OptionName :: atom() + ) -> + OptionType :: econf:validator(). +% +mod_opt_type(access_key_id) -> + econf:binary(); +mod_opt_type(access_key_secret) -> + econf:binary(); +mod_opt_type(region) -> + econf:binary(); +mod_opt_type(bucket_url) -> + econf:url([http, https]); +mod_opt_type(max_size) -> + econf:pos_int(infinity); +mod_opt_type(set_public) -> + econf:bool(); +mod_opt_type(put_ttl) -> + econf:pos_int(infinity); +mod_opt_type(service_name) -> + econf:binary(); +mod_opt_type(hosts) -> + econf:hosts(); +mod_opt_type(access) -> + econf:acl(). + +-spec mod_options( + Host :: binary() + ) -> + Options :: [{atom(), term()} | atom()]. +% +mod_options(Host) -> + [{access_key_id, undefined}, + {access_key_secret, undefined}, + {region, undefined}, + {bucket_url, undefined}, + {max_size, 1073741824}, + {set_public, true}, + {put_ttl, 600}, + {service_name, <<"S3 Upload">>}, + {hosts, [<<"upload.", Host/binary>>]}, + {access, local}]. + +-spec mod_doc() -> + Doc :: #{desc => binary() | [binary()], + opts => [{atom(), #{value := binary(), desc := binary()}}]}. +% +mod_doc() -> + #{desc => + [?T("This module implements XEP-0363 using an S3 bucket " + "instead of an internal web server. This simplifies " + "clustered deployments by removing the need to maintain " + "shared storage, and is in many cases less expensive " + "byte-for-byte than block storage. It is mutually " + "incompatible with mod_http_upload.")], + opts => + [{access_key_id, + #{value => ?T("AccessKeyId"), + desc => ?T("AWS Access Key ID.")}}, + {access_key_secret, + #{value => ?T("AccessKeySecret"), + desc => ?T("AWS Access Key Secret.")}}, + {region, + #{value => ?T("Region"), + desc => ?T("AWS Region")}}, + {bucket_url, + #{value => ?T("BucketUrl"), + desc => ?T("S3 Bucket URL.")}}, + {max_size, + #{value => ?T("MaxSize"), + desc => ?T("Maximum file size, in bytes. 0 is unlimited.")}}, + {set_public, + #{value => ?T("SetPublic"), + desc => ?T("Set x-amz-acl to public-read.")}}, + {put_ttl, + #{value => ?T("PutTtl"), + desc => ?T("How long the PUT URL will be valid for.")}}, + {service_name, + #{value => ?T("ServiceName"), + desc => ?T("Name given in discovery requests.")}}, + {hosts, % named for consistency with other modules + #{value => ?T("ServiceJids"), + desc => ?T("JIDs used when communicating with the service")}}, + {access, + #{value => ?T("UploadAccess"), + desc => ?T("Access rule for JIDs that may request new URLs")}}]}. + +depends(_Host, _Opts) -> + []. + +%%------------------------------------------------------------------------ +%% gen_server callbacks. +%%------------------------------------------------------------------------ + +-record(params, + {service_name :: binary(), % name given for the service in discovery. + service_jids :: [binary()], % stanzas destined for these JIDs will be routed to the service. + max_size :: integer() | infinity, % maximum upload size. sort of the honor system in this case. + bucket_url :: binary(), % S3 bucket URL or subdomain + set_public :: boolean(), % set the public-read ACL on the object? + ttl :: integer(), % TTL of the signed PUT URL + server_host :: binary(), % XMPP vhost the service belongs to + auth :: #aws_auth{}, + access :: atom()}). + +-spec init( + Params :: list() + ) -> + Result :: {ok, #params{}}. +% +init([ServerHost, Opts]) -> + Params = build_service_params(ServerHost, Opts), + update_routes(ServerHost, [], Params#params.service_jids), + {ok, Params}. + +-spec handle_info( + Message :: any(), + State :: #params{} + ) -> + Result :: {noreply, #params{}}. +% receive non-standard (gen_server) messages +handle_info({route, #iq{lang = Lang} = Packet}, Opts) -> + try xmpp:decode_els(Packet) of + IQ -> + ejabberd_router:route(handle_iq(IQ, Opts)), + {noreply, Opts} + catch _:{xmpp_codec, Why} -> + Message = xmpp:io_format_error(Why), + Error = xmpp:err_bad_request(Message, Lang), + ejabberd_router:route_error(Packet, Error), + {noreply, Opts} + end; +handle_info(Request, Opts) -> + ?WARNING_MSG("Unexpected info: ~p", [Request]), + {noreply, Opts}. + +-spec handle_call( + Request:: any(), + Sender :: gen_server:from(), + State :: #params{} + ) -> + Result :: {noreply, #params{}}. +% respond to $gen_call messages +handle_call(Request, Sender, Opts) -> + ?WARNING_MSG("Unexpected call from ~p: ~p", [Sender, Request]), + {noreply, Opts}. + +-spec handle_cast( + Request :: any(), + State :: #params{} + ) -> + Result :: {noreply, #params{}}. +% receive $gen_cast messages +handle_cast({reload, ServerHost, NewOpts}, OldOpts) -> + update_routes(ServerHost, + OldOpts#params.service_jids, + NewOpts#params.service_jids), + {noreply, NewOpts}; +handle_cast(Request, Opts) -> + ?WARNING_MSG("Unexpected cast: ~p", [Request]), + {noreply, Opts}. + +%%------------------------------------------------------------------------ +%% Internal Stanza Processing +%%----------------------------------------------------------------------- + +-spec update_routes( + ServerHost :: binary(), + OldJIDs :: [binary()], + NewJIDs :: [binary()] + ) -> + Result :: _. +% maintain routing rules for JIDs owned by this service. +update_routes(ServerHost, OldJIDs, NewJIDs) -> + lists:foreach(fun (Domain) -> + ejabberd_router:register_route(Domain, ServerHost) + end, NewJIDs), + lists:foreach(fun ejabberd_router:unregister_route/1, OldJIDs -- NewJIDs). + + +-spec handle_iq( + IQ :: iq(), + Params :: gen_mod:opts() + ) -> + Response :: iq(). +% Handle discovery requests. Produces a document such as depicted in +% XEP-0363 v1.1.0 Ex. 4. +handle_iq(#iq{type = get, + lang = Lang, + to = HostJID, + sub_els = [#disco_info{}]} = IQ, + #params{max_size = MaxSize, service_name = ServiceName}) -> + Host = jid:encode(HostJID), + % collect additional discovery entries, if any. + Advice = ejabberd_hooks:run_fold(disco_info, Host, [], + [Host, ?MODULE, <<"">>, Lang]), + % if a maximum size was specified, append xdata with the limit + XData = case MaxSize of + infinity -> + Advice; + _ -> + [#xdata{type = result, + fields = http_upload:encode( + [{'max-file-size', MaxSize}], + ?NS_HTTP_UPLOAD_0, + Lang + )} + | Advice] + end, + % build disco iq + Query = #disco_info{identities = [#identity{category = <<"store">>, + type = <<"file">>, + name = translate:translate(Lang, ServiceName)}], + features = [?NS_HTTP_UPLOAD_0], + xdata = XData}, + xmpp:make_iq_result(IQ, Query); % this swaps parties for us +% handle slot request with FileSize > MaxSize +handle_iq(#iq{type = get, + from = From, + lang = Lang, + sub_els = [#upload_request_0{size = FileSize, + filename = FileName}]} = IQ, + #params{max_size = MaxSize}) when FileSize > MaxSize -> + ?WARNING_MSG("~ts tried to upload an oversize file (~ts, ~B bytes)", + [jid:encode(From), FileName, FileSize]), + ErrorMessage = {?T("File larger than ~B bytes"), [MaxSize]}, + Error = xmpp:err_not_acceptable(ErrorMessage, Lang), + Els = [#upload_file_too_large{'max-file-size' = MaxSize, + xmlns = ?NS_HTTP_UPLOAD_0} + | xmpp:get_els(Error)], + xmpp:make_error(IQ, xmpp:set_els(Error, Els)); +% Handle slot request +handle_iq(#iq{type = get, + from = Requester, + lang = Lang, + sub_els = [#upload_request_0{filename = FileName, + size = FileSize} = UploadRequest]} = IQ, + #params{server_host = ServerHost, + access = Access, + bucket_url = BucketURL, + ttl = TTL, + auth = Auth} = Params) -> + case acl:match_rule(ServerHost, Access, Requester) of + allow -> + ?INFO_MSG("Generating S3 Object URL Pair for ~ts to upload file ~ts (~B bytes)", + [jid:encode(Requester), FileName, FileSize]), + % generate a unique object ID and url based on settings + ObjectURL = object_url(BucketURL, FileName), + % attach configuration- and request-specific query params to the + % PUT url + UnsignedPutURL = put_url(UploadRequest, Params, ObjectURL), + % sign the PUT url + PutURL = aws_util:signed_url(Auth, put, ?AWS_SERVICE_S3, UnsignedPutURL, [], calendar:universal_time(), TTL), + xmpp:make_iq_result(IQ, #upload_slot_0{get = ObjectURL, + put = PutURL, + xmlns = ?NS_HTTP_UPLOAD_0}); + deny -> + ?INFO_MSG("Denied upload request from ~ts for file ~ts (~B bytes)", + [jid:encode(Requester), FileName, FileSize]), + xmpp:make_error(IQ, xmpp:err_forbidden(?T("Access denied"), Lang)) + end; +% handle unexpected IQ +handle_iq(IQ, _Params) -> + xmpp:make_error(IQ, xmpp:err_bad_request()). + +%%------------------------------------------------------------------------ +%% Internal Helpers +%%------------------------------------------------------------------------ + +-spec expanded_jids( + ServiceHost :: binary(), + JIDs :: [binary()] + ) -> + ExpandedJIDs :: [binary()]. +% expand @HOST@ in JIDs +expanded_jids(ServerHost, JIDs) -> + lists:map(fun (JID) -> + misc:expand_keyword(<<"@HOST@">>, JID, ServerHost) + end, JIDs). + +-spec build_service_params( + ServerHost :: binary(), + Opts :: gen_mod:opts() + ) -> + Params :: #params{}. +% create a service params record from module config +build_service_params(ServerHost, Opts) -> + Auth = #aws_auth{access_key_id = get_opt(access_key_id, Opts), + access_key = get_opt(access_key_secret, Opts), + region = get_opt(region, Opts)}, + #params{service_name = get_opt(service_name, Opts), + service_jids = expanded_jids(ServerHost, get_opt(hosts, Opts)), + max_size = get_opt(max_size, Opts), + bucket_url = get_opt(bucket_url, Opts), + set_public = get_opt(set_public, Opts), + ttl = get_opt(put_ttl, Opts), + server_host = ServerHost, + auth = Auth, + access = get_opt(access, Opts)}. + +-spec url_service_parameters( + Params :: #params{} + ) -> + ServiceParameters :: [{binary(), binary() | true}]. +% additional URL parameters from module config +url_service_parameters(#params{set_public = true}) -> + [{<<"X-Amz-Acl">>, <<"public-read">>}]; +url_service_parameters(_) -> + []. + +-spec upload_parameters( + UploadRequest :: #upload_request_0{}, + Params :: #params{} + ) -> + UploadParameters :: [{binary(), binary() | true}]. +% headers to be included with the PUT request +upload_parameters(#upload_request_0{size = FileSize, + 'content-type' = ContentType}, + ServiceParams) -> + [{<<"Content-Type">>, <>}, + {<<"Content-Length">>, erlang:integer_to_binary(FileSize)} + | url_service_parameters(ServiceParams)]. + +-spec put_url( + UploadRequest :: #upload_request_0{}, + Params :: #params{}, + URL :: binary() + ) -> + PutURL :: binary(). +% attach additional query parameters (to the PUT URL), specifically canned ACL. +put_url(UploadRequest, ServiceParams, URL) -> + UriMap = uri_string:parse(URL), + QueryList = case UriMap of + #{query := QueryString} -> + uri_string:dissect_query(QueryString); + _ -> + [] + end, + Params = upload_parameters(UploadRequest, ServiceParams), + WithOpts = uri_string:compose_query(Params ++ QueryList), + uri_string:recompose(UriMap#{query => WithOpts}). + +-spec object_url( + BucketURL :: binary(), + FileName :: binary() + ) -> + ObjectURL :: binary(). +% generate a unique random object URL for the given filename +object_url(BucketURL, FileName) -> + #{path := BasePath} = UriMap = uri_string:parse(BucketURL), + ObjectName = object_name(FileName), + uri_string:recompose(UriMap#{path => <>}). + +-spec object_name( + FileName :: binary() + ) -> + ObjectName :: binary(). +% generate reasonably unique sortable (by time first) object name. +object_name(FileName) -> + str:format("~.36B~.36B-~s", [os:system_time(microsecond), + erlang:phash2(node()), + FileName]).