pax_global_header00006660000000000000000000000064135471424140014517gustar00rootroot0000000000000052 comment=4861b145488e3146f0c3b32b4dd56149d4acbad4 s3backer-1.5.4/000077500000000000000000000000001354714241400132235ustar00rootroot00000000000000s3backer-1.5.4/.gitignore000066400000000000000000000003261354714241400152140ustar00rootroot00000000000000aclocal.m4 autom4te.cache config.h config.h.in config.log config.status configure debian/Makefile debian/Makefile.in .deps gitrev.c Makefile Makefile.in *.o s3backer s3backer.spec scripts stamp-h1 tags TAGS tester s3backer-1.5.4/CHANGES000066400000000000000000000222561354714241400142250ustar00rootroot00000000000000Version 1.5.4 released October 8, 2019 - Only set "x-amz-server-side-encryption" header with PUT requests (issue #116) - Don't kill IAM thread unless actually started (issue #115). Version 1.5.3 released August 9, 2019 - Fixed bug where IAM update thread was killed after fork (issue #115) - Fixed use-after-free bug in block_cache_verified() (issue #113) - Fixed use-after-free bug when updating IAM credentials (pr #114) - Fixed bug in test mode that was causing bogus I/O errors Version 1.5.2 released July 9, 2019 - Fixed bug where block cache would not work when run in the background (issue #112) - Fixed bug where we were not parsing HTTP headers case-insensitively (pr #11) - Bail out during `--listBlocks' if we see an object name past our block range - Added `--blockHashPrefix' flag (issue #80) Version 1.5.1 released April 15, 2019 - Fixed a few places where fixed-sized buffers were too small (issue #108) - Don't claim cache hit if partial write required reading the block (pr #103) - Exit process with error code if s3backer store setup fails at startup - Reset statistics if stats file is unlinked (issue #106) Version 1.5.0 released June 9, 2018 - Add support for recovering dirty blocks in the disk cache (issue #87) - Replaced boolean 'mounted' flag with a unique 32-bit mount token (issue #87) - Wait for min_write_delay before access after write error (issue #76) - Configure TCP keep-alive on HTTP connections (issue #78) - Added support for server side encryption (pull #81) Version 1.4.4 released February 1, 2017 - Added `--defaultContentEncoding' for non-compliant backends (issue #68) - Fixed auth bug when prefix contains URL-encodable char (issue #69) - Remove restriction preventing streaming encryption modes (issue #70) Version 1.4.3 released July 25, 2016 - Add support for STANDARD_IA storage class (issue #59) - Set "Accept-Encoding" header appropriately (issue #48) - Fix build issue with OpenSSL 1.1.0 (issue #64) Version 1.4.2 released September 1, 2015 - Update license to fix OpenSSL vs. GPL conflict - Remove obsolete Debian files - Fix typos in help output Version 1.4.1 released May 4, 2015 - Fix use-after-free bug configuring base URL (github issue #44) Version 1.4.0 released April 17, 2015 - Added support for authentication version 4 (issue #51) - Added support for credentials via IAM role from EC2 meta-data (issue #48) - Fixed bug where `--erase' did not clear the mounted flag - Moved from Google project hosting to GitHub - Fixed compile problem on FreeBSD Version 1.3.7 (r496) released 18 July 2013 - Add `--keyLength' for overriding generated encryption key length Version 1.3.6 (r493) released 16 July 2013 - Fix use of MAX_HOST_NAME in http_io.c (issue #42) - Fix encryption key generation bug (on some systems) Version 1.3.5 (r485) released 29 May 2013 - Check for duplicate mount at startup (issue #10) - Remove obsolete backward-compatibility block size check Version 1.3.4 (r476) released 2 Apr 2013 - Support FUSE fallocate() call to zero unused blocks Version 1.3.3 (r463) released 7 Apr 2012 - Fix bug in validation of --baseURL parameter (issue #34) - Accept 404 Not Found as a valid response to a DELETE (issue #35) - Added a fix for building on Mac OS X (issue #32) Version 1.3.2 (r451) released 14 May 2011 - Added `--directIO' flag to disable kernel caching of the backed file. - Fixed bug where the stats file was not up to date (issue #26). - Fixed bug with `--blockCacheMaxDirty' not working (issue #25). - Added automatic block cache disk file resizing (issue #23). - Added `--maxUploadSpeed' and `--maxDownloadSpeed' flags. - Added `-rrs' flag to support Reduced Redundancy Storage. - Fixed missing warning for `--baseURL' when missing trailing slash. Version 1.3.1 (r413) released 19 Oct 2009 - Added `--blockCacheMaxDirty' flag. - Fixed cURL handle leak when cancelling in-progress writes. - Updated Mac OS X build instructions and added Snow Leopard support. Version 1.3.0 (r392) released 27 Sep 2009 - Added support for local cache files that can persist across restarts. - Added built-in support for encryption and authentication. - In-progress writes are now cancelled when a duplicate write occurs. - Changed default for `--blockCacheWriteDelay' from zero to 250ms. - Fix obscure and unlikely deadlock bug in ec_protect.c. - Allow configurable compression level via --compress=LEVEL. - Fix bug that caused spurious "impossible expected MD5" log messages. Version 1.2.3 (r333) released 15 May 2009 - Added `--vhost' flag for virtual hosted style URLs in all requests. - Don't send LOG_DEBUG messages to syslog unless --debug flag given. - Fix race condition when generating HTTP Date: headers. - Allow command line flags to be specified in /etc/fstab. Version 1.2.2 (r316) released 20 Dec 2008 - Added `--compress' flag enabling compression of file blocks. Note: compressed blocks are not compatible with versions < 1.2.2. - Disable the MD5 cache when the `--readOnly' flag is given. - Make `--md5CacheTime=0' really mean `infinite' as promised in man page. - Added `--debug-http' flag for debugging HTTP headers. - Don't let block and MD5 caches be configured larger than necessary. - Fixed a few minor issues with statistics reporting. Version 1.2.1 (r300) released 23 Oct 2008 - Added `--erase' and `--quiet' command line flags. - Added `--blockCacheSync' command line flag. - Fixed extra copying slowdown when using large block sizes (issue #5). - Eliminate extra copy of blocks when written by block_cache worker threads. - Fixed bug in EC layer where dirty data might not be flushed at shutdown. - Fixed bug where 'http' was shown instead of 'https' in mount(8) output when the --ssl flag was given. Version 1.2.0 (r248) released 12 Sep 2008 - Use new custom hash table implementation; this removes glib dependency. - Replaced `--assumeEmpty' flag with safer and more useful `--listBlocks'. - Fixed bug where the zero block optimization got disabled when the MD5 cache was disabled. - Supply `-o allow_other' option by default, since default mode is 0600. - Fixed bug where cp(1)'ing the backed file gave `Illegal seek' error. - Use FUSE version 25 API so code builds on older O/S distributions. Version 1.1.1 (r202) released 5 Aug 2008 - Added `--ssl' as an alias for `--baseURL https://s3.amazonaws.com/'. - Added `--insecure' and `--cacert' flags to configure cURL SSL checks. - Implemented `--blockCacheWriteDelay' and `--blockCacheTimeout' flags. - Implemented read-ahead using `--readAhead' and `--readAheadTrigger' flags. - Set FUSE max_readahead option to zero by default since we do it too now. - Added new `--test' flag which turns on local test mode. - Display the URL, bucket, and prefix in the output of mount(8). - Fixed bug where an error during auto-detection would cause a segfault. - Fixed bug where read errors from the underlying store were being ignored by the block cache layer. Version 1.1.0 (r150) released 26 July 2008 - Added a block cache with parallel writes which vastly improves performance. - Added a new `stats' file to the filesystem containing various statistics. - Added `--noAutoDetect' flag to disable auto-detection at startup. - Fixed a few small race conditions and memory leaks. - Return zeroes for unwritten blocks with `assumeEmpty'. Version 1.0.5 (r111) released 15 July 2008 - Avoid reuse of CURL instance after receiving any HTTP error (issue #3) - On MacOS, prevent kernel timeouts prior to our own timeout (issue #2) - Replaced `--connectTimeout' and `--ioTimeout' with `--timeout' because CURL's I/O timeout includes in it the connection time as well. Version 1.0.4 (r82) released 9 July 2008 - Retry on all HTTP error codes, not just 500 or greater. Tests show that a valid request can return a 4xx response due to network issues. - Added `--fileMode' and `--readOnly' flags. - Added `--assumeEmpty' flag. - Support 'E' for 'exabytes'. - Port to Mac OS (issue #1) Version 1.0.3 (r39) released 30 June 2008 - Implement exponential backoff: replace ``--maxRetry'' and ``--retryPause'' with ``--initialRetryPause'' and ``--maxRetryPause''. - Fix `--accessType' flag which was not being properly handled. - Improvements to the man page. Version 1.0.2 (r25) released 20 June 2008 - Fix bug in setting User-Agent HTTP header. - Fix glitch in man page. Version 1.0.1 (r18) released 20 June 2008 - Store filesystem size in meta-data associated with the first block and use it to auto-detect filesystem block and file sizes if not specified. As a result, `--size' flag is now optional. - Log a warning and zero remaining bytes when we encounter a short read. - Add User-Agent HTTP header to all HTTP requests. - Include SVN revision in version string. - Don't log every HTTP operation unless `-d' is passed. - Added `--force' flag. Version 1.0.0 released 19 June 2008 - Initial release s3backer-1.5.4/CHECKLIST000066400000000000000000000012351354714241400144600ustar00rootroot00000000000000Checklist for releasing version VERSION --------------------------------------- Final check make distcheck test tarball builds and works on Linux, MacOS... Tag release and release tarball sh cleanup.sh verify everything is clean update CHANGES with today's date and VERSION edit configure.ac and update with VERSION git commit git tag -a -m 'Tagging release VERSION' VERSION sh autogen.sh && ./configure && make distcheck upload tarball to Amazon S3 s3backer project update wikified man page update wiki Download page send email to s3backer-devel google group OBS update s3backer.spec update OBS project s3backer-1.5.4/COPYING000066400000000000000000000446271354714241400142730ustar00rootroot00000000000000 In addition to the license below, as a special exception, the copyright holders give permission to link the code of portions of this program with the OpenSSL library under certain conditions as described in each individual source file, and distribute linked combinations including the two. You must obey the GNU General Public License in all respects for all of the code used other than OpenSSL. If you modify file(s) with this exception, you may extend this exception to your version of the file(s), but you are not obligated to do so. If you do not wish to do so, delete this exception statement from your version. If you delete this exception statement from all source files in the program, then also delete it here. ---------------------- GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. s3backer-1.5.4/Dockerfile000066400000000000000000000004151354714241400152150ustar00rootroot00000000000000FROM ubuntu:14.04 RUN apt-get update && apt-get install -y \ build-essential \ autoconf \ libcurl4-openssl-dev \ libfuse-dev \ libexpat1-dev ADD . s3backer WORKDIR "./s3backer" RUN ["./autogen.sh"] RUN ["./configure"] RUN ["make"] RUN ["make", "install"] s3backer-1.5.4/INSTALL000066400000000000000000000006051354714241400142550ustar00rootroot00000000000000 Simplified instructions: 1. Ensure you have the following software packages installed: fuse-devel libcurl-devel libexpat-devel libopenssl-devel pkg-config zlib-devel 2. ./configure && make && sudo make install Please see https://github.com/archiecobbs/s3backer/wiki/BuildAndInstall for more build and install information. s3backer-1.5.4/Makefile.am000066400000000000000000000051731354714241400152650ustar00rootroot00000000000000 # # s3backer - FUSE-based single file backing store via Amazon S3 # # Copyright 2008-2011 Archie L. Cobbs # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # # In addition, as a special exception, the copyright holders give # permission to link the code of portions of this program with the # OpenSSL library under certain conditions as described in each # individual source file, and distribute linked combinations including # the two. # # You must obey the GNU General Public License in all respects for all # of the code used other than OpenSSL. If you modify file(s) with this # exception, you may extend this exception to your version of the # file(s), but you are not obligated to do so. If you do not wish to do # so, delete this exception statement from your version. If you delete # this exception statement from all source files in the program, then # also delete it here. bin_PROGRAMS= s3backer noinst_PROGRAMS= tester noinst_HEADERS= s3backer.h \ block_cache.h \ block_part.h \ dcache.h \ ec_protect.h \ erase.h \ fuse_ops.h \ hash.h \ http_io.h \ reset.h \ test_io.h \ s3b_config.h man_MANS= s3backer.1 docdir= $(datadir)/doc/packages/$(PACKAGE) doc_DATA= CHANGES COPYING README INSTALL TODO EXTRA_DIST= CHANGES s3backer.1 s3backer.spec s3backer_SOURCES= main.c \ block_cache.c \ block_part.c \ dcache.c \ ec_protect.c \ erase.c \ fuse_ops.c \ hash.c \ http_io.c \ reset.c \ s3b_config.c \ test_io.c \ sslcompat.c \ gitrev.c tester_SOURCES= tester.c \ block_cache.c \ block_part.c \ dcache.c \ ec_protect.c \ erase.c \ hash.c \ http_io.c \ reset.c \ s3b_config.c \ test_io.c \ sslcompat.c \ gitrev.c AM_CFLAGS= $(FUSE_CFLAGS) gitrev.c: printf 'const char *const s3backer_version = "%s";\n' "`git describe`" > gitrev.c s3backer-1.5.4/README000066400000000000000000000206211354714241400141040ustar00rootroot00000000000000s3backer - FUSE-based single file backing store via Amazon S3 Overview s3backer is a filesystem that contains a single file backed by the Amazon Simple Storage Service (Amazon S3). As a filesystem, it is very simple: it provides a single normal file having a fixed size. Underneath, the file is divided up into blocks, and the content of each block is stored in a unique Amazon S3 object. In other words, what s3backer provides is really more like an S3-backed virtual hard disk device, rather than a filesystem. In typical usage, a `normal' filesystem is mounted on top of the file exported by the s3backer filesystem using a loopback mount (or disk image mount on Mac OS X). This arrangement has several benefits compared to more complete S3 filesystem implementations: o By not attempting to implement a complete filesystem, which is a com- plex undertaking and difficult to get right, s3backer can stay very lightweight and simple. Only three HTTP operations are used: GET, PUT, and DELETE. All of the experience and knowledge about how to properly implement filesystems that already exists can be reused. o By utilizing existing filesystems, you get full UNIX filesystem semantics. Subtle bugs or missing functionality relating to hard links, extended attributes, POSIX locking, etc. are avoided. o The gap between normal filesystem semantics and Amazon S3 ``eventual consistency'' is more easily and simply solved when one can interpret S3 objects as simple device blocks rather than filesystem objects (see below). o When storing your data on Amazon S3 servers, which are not under your control, the ability to encrypt and authenticate data becomes a crit- ical issue. s3backer supports secure encryption and authentication. Alternately, the encryption capability built into the Linux loopback device can be used. o Since S3 data is accessed over the network, local caching is also very important for performance reasons. Since s3backer presents the equivalent of a virtual hard disk to the kernel, most of the filesys- tem caching can be done where it should be: in the kernel, via the kernel's page cache. However s3backer also includes its own internal block cache for increased performance, using asynchronous worker threads to take advantage of the parallelism inherent in the network. Consistency Guarantees Amazon S3 makes relatively weak guarantees relating to the timing and consistency of reads vs. writes (collectively known as ``eventual consis- tency''). s3backer includes logic and configuration parameters to work around these limitations, allowing the user to guarantee consistency to whatever level desired, up to and including 100% detection and avoidance of incorrect data. These are: 1. s3backer enforces a minimum delay between consecutive PUT or DELETE operations on the same block. This ensures that Amazon S3 doesn't receive these operations out of order. 2. s3backer maintains an internal block MD5 checksum cache, which enables automatic detection and rejection of `stale' blocks returned by GET operations. This logic is configured by the following command line options: --md5CacheSize, --md5CacheTime, and --minWriteDelay. Zeroed Block Optimization As a simple optimization, s3backer does not store blocks containing all zeroes; instead, they are simply deleted. Conversely, reads of non-exis- tent blocks will contain all zeroes. In other words, the backed file is always maximally sparse. As a result, blocks do not need to be created before being used and no special initialization is necessary when creating a new filesystem. When the --listBlocks flag is given, s3backer will list all existing blocks at startup so it knows ahead of time exactly which blocks are empty. File and Block Size Auto-Detection As a convenience, whenever the first block of the backed file is written, s3backer includes as meta-data (in the ``x-amz-meta-s3backer-filesize'' header) the total size of the file. Along with the size of the block itself, this value can be checked and/or auto-detected later when the filesystem is remounted, eliminating the need for the --blockSize or --size flags to be explicitly provided and avoiding accidental mis-inter- pretation of an existing filesystem. Block Cache s3backer includes support for an internal block cache to increase perfor- mance. The block cache cache is completely separate from the MD5 cache which only stores MD5 checksums transiently and whose sole purpose is to mitigate ``eventual consistency''. The block cache is a traditional cache containing cached data blocks. When full, clean blocks are evicted as necessary in LRU order. Reads of cached blocks will return immediately with no network traffic. Writes to the cache also return immediately and trigger an asynchronous write operation to the network via a separate worker thread. Because the kernel typically writes blocks through FUSE filesystems one at a time, performing writes asynchronously allows s3backer to take advantage of the parallelism inherent in the network, vastly improving write performance. The block cache can be configured to store the cached data in a local file instead of in memory. This permits larger cache sizes and allows s3backer to reload cached data after a restart. Reloaded data is veri- fied before reuse. The block cache is configured by the following command line options: --blockCacheFile, --blockCacheNoVerify, --blockCacheSize, --blockCacheSync, --blockCacheThreads, --blockCacheTimeout, and --blockCacheWriteDelay. Read Ahead s3backer implements a simple read-ahead algorithm in the block cache. When a configurable number of blocks are read in order, block cache worker threads are awoken to begin reading subsequent blocks into the block cache. Read ahead continues as long as the kernel continues read- ing blocks sequentially. The kernel typically requests blocks one at a time, so having multiple worker threads already reading the next few blocks improves read performance by taking advantage of the parallelism inherent in the network. Note that the kernel implements a read ahead algorithm as well; its behavior should be taken into consideration. By default, s3backer passes the -o max_readahead=0 option to FUSE. Read ahead is configured by the --readAhead and --readAheadTrigger com- mand line options. Encryption and Authentication s3backer supports encryption via the --encrypt, --password, and --passwordFile flags. When encryption is enabled, SHA1 HMAC authentica- tion is also automatically enabled, and s3backer rejects any blocks that are not properly encrypted and signed. Encrypting at the s3backer layer is preferable to encrypting at an upper layer (e.g., at the loopback device layer), because if the data s3backer sees is already encrypted it can't optimize away zeroed blocks or do meaningful compression. Read-Only Access An Amazon S3 account is not required in order to use s3backer. The filesystem must already exist and have S3 objects with ACL's configured for public read access (see --accessType below); users should perform the looback mount with the read-only flag (see mount(8)) and provide the --readOnly flag to s3backer. This mode of operation facilitates the cre- ation of public, read-only filesystems. Simultaneous Mounts Although it functions over the network, the s3backer filesystem is not a distributed filesystem and does not support simultaneous read/write mounts. (This is not something you would normally do with a hard-disk partition either.) s3backer does not detect this situation; it is up to the user to ensure that it doesn't happen. Statistics File s3backer populates the filesystem with a human-readable statistics file. See --statsFilename below. Logging In normal operation s3backer will log via syslog(3). When run with the -d or -f flags, s3backer will log to standard error. ------------------------------------------------------------------------ Home page: https://github.com/archiecobbs/s3backer ------------------------------------------------------------------------ See INSTALL for installation instructions. After installing, see the s3backer(1) man page for how to run it. See COPYING for license. See CHANGES for change history. Enjoy! s3backer-1.5.4/README.md000066400000000000000000000207611354714241400145100ustar00rootroot00000000000000**s3backer** is a filesystem that contains a single file backed by the [Amazon Simple Storage Service](http://aws.amazon.com/s3) (Amazon S3). As a filesystem, it is very simple: it provides a single normal file having a fixed size. Underneath, the file is divided up into blocks, and the content of each block is stored in a unique Amazon S3 object. In other words, what **s3backer** provides is really more like an S3-backed virtual hard disk device, rather than a filesystem. In typical usage, a normal filesystem is mounted on top of the file exported by the **s3backer** filesystem using a loopback mount (or disk image mount on Mac OS X). This arrangement has several benefits compared to more complete S3 filesystem implementations: * By not attempting to implement a complete filesystem, which is a complex undertaking and difficult to get right, **s3backer** can stay very lightweight and simple. Only three HTTP operations are used: GET, PUT, and DELETE. All of the experience and knowledge about how to properly implement filesystems that already exists can be reused. * By utilizing existing filesystems, you get full UNIX filesystem semantics. Subtle bugs or missing functionality relating to hard links, extended attributes, POSIX locking, etc. are avoided. * The gap between normal filesystem semantics and Amazon S3 ``eventual consistency'' is more easily and simply solved when one can interpret S3 objects as simple device blocks rather than filesystem objects (see below). * When storing your data on Amazon S3 servers, which are not under your control, the ability to encrypt data becomes a critical issue. **s3backer** supports secure encryption and authentication. Alternately, the encryption capability built into the Linux loopback device can be used. * Since S3 data is accessed over the network, local caching is also very important for performance reasons. Since **s3backer** presents the equivalent of a virtual hard disk to the kernel, most of the filesystem caching can be done where it should be: in the kernel, via the kernel's page cache. However **s3backer** also includes its own internal block cache for increased performance, using asynchronous worker threads to take advantage of the parallelism inherent in the network. ### Consistency Guarantees Amazon S3 makes relatively weak guarantees relating to the timing and consistency of reads vs. writes (collectively known as "eventual consistency"). **s3backer** includes logic and configuration parameters to work around these limitations, allowing the user to guarantee consistency to whatever level desired, up to and including 100% detection and avoidance of incorrect data. These are: 1. **s3backer** enforces a minimum delay between consecutive PUT or DELETE operations on the same block. This ensures that Amazon S3 doesn't receive these operations out of order. 1. **s3backer** maintains an internal block MD5 checksum cache, which enables automatic detection and rejection of `stale' blocks returned by GET operations. This logic is configured by the following command line options: `--md5CacheSize`, `--md5CacheTime`, and `--minWriteDelay`. ### Zeroed Block Optimization As a simple optimization, **s3backer** does not store blocks containing all zeroes; instead, they are simply deleted. Conversely, reads of non-existent blocks will contain all zeroes. In other words, the backed file is always maximally sparse. As a result, blocks do not need to be created before being used and no special initialization is necessary when creating a new filesystem. When the `--listBlocks` flag is given, s3backer will list all existing blocks at startup so it knows ahead of time exactly which blocks are empty. ### File and Block Size Auto-Detection As a convenience, whenever the first block of the backed file is written, **s3backer** includes as meta-data (in the `x-amz-meta-s3backer-filesize` header) the total size of the file. Along with the size of the block itself, this value can be checked and/or auto-detected later when the filesystem is remounted, eliminating the need for the `--blockSize` or `--size` flags to be explicitly provided and avoiding accidental mis-interpretation of an existing filesystem. ### Block Cache **s3backer** includes support for an internal block cache to increase performance. The block cache cache is completely separate from the MD5 cache which only stores MD5 checksums transiently and whose sole purpose is to mitigate ``eventual consistency''. The block cache is a traditional cache containing cached data blocks. When full, clean blocks are evicted as necessary in LRU order. Reads of cached blocks will return immediately with no network traffic. Writes to the cache also return immediately and trigger an asynchronous write operation to the network via a separate worker thread. Because the kernel typically writes blocks through FUSE filesystems one at a time, performing writes asynchronously allows **s3backer** to take advantage of the parallelism inherent in the network, vastly improving write performance. The block cache can be configured to store the cached data in a local file instead of in memory. This permits larger cache sizes and allows **s3backer** to reload cached data after a restart. Reloaded data is verified via MD5 checksum with Amazon S3 before reuse. The block cache is configured by the following command line options: `--blockCacheFile`, `--blockCacheNoVerify`, `--blockCacheSize`, `--blockCacheThreads` and `--blockCacheWriteDelay`. ### Read Ahead **s3backer** implements a simple read-ahead algorithm in the block cache. When a configurable number of blocks are read in order, block cache worker threads are awoken to begin reading subsequent blocks into the block cache. Read ahead continues as long as the kernel continues reading blocks sequentially. The kernel typically requests blocks one at a time, so having multiple worker threads already reading the next few blocks improves read performance by taking advantage of the parallelism inherent in the network. Note that the kernel implements a read ahead algorithm as well; its behavior should be taken into consideration. By default, **s3backer** passes the `-o max_readahead=0` option to FUSE. Read ahead is configured by the `--readAhead` and `--readAheadTrigger` command line options. ### Encryption and Authentication **s3backer** supports encryption via the `--encrypt`, `--password`, and `--passwordFile` flags. When encryption is enabled, SHA1 HMAC authentication is also automatically enabled, and s3backer rejects any blocks that are not properly encrypted and signed. Encrypting at the s3backer layer is preferable to encrypting at an upper layer (e.g., at the loopback device layer), because if the data s3backer sees is already encrypted it can't optimize away zeroed blocks or do meaningful compression. ### Compression **s3backer** supports block-level compression, which minimizes transfer time and storage costs. Compression is configured via the`--compress` flag. Compression is automatically enabled when encryption is enabled. ### Read-Only Access An Amazon S3 account is not required in order to use **s3backer**. Of course a filesystem must already exist and have S3 objects with ACL's configured for public read access (see `--accessType` below); users should perform the looback mount with the read-only flag (see mount(8)) and provide the `--readOnly` flag to **s3backer**. This mode of operation facilitates the creation of public, read-only filesystems. ### Simultaneous Mounts Although it functions over the network, the **s3backer** filesystem is not a distributed filesystem and does not support simultaneous read/write mounts. (This is not something you would normally do with a hard-disk partition either.) **s3backer** does not detect this situation; it is up to the user to ensure that it doesn't happen. ### Statistics File **s3backer** populates the filesystem with a human-readable statistics file. See `--statsFilename` below. ### Logging In normal operation **s3backer** will log via `syslog(3)`. When run with the `-d` or `-f` flags, **s3backer** will log to standard error. ### OK, Where to Next? **[Try it out!](https://github.com/archiecobbs/s3backer/wiki/Running-the-Demo)** No Amazon S3 account is required. See the [ManPage](https://github.com/archiecobbs/s3backer/wiki/ManPage) for further documentation and the [CHANGES](https://github.com/archiecobbs/s3backer/blob/master/CHANGES) file for release notes. Join the [s3backer-devel](http://groups.google.com/group/s3backer-devel) group to participate in discussion and development of **s3backer**. s3backer-1.5.4/TODO000066400000000000000000000001261354714241400137120ustar00rootroot00000000000000TODO - support alternate backends, generalize `--test' to `--backend=localfs', etc. s3backer-1.5.4/autogen.sh000077500000000000000000000010341354714241400152220ustar00rootroot00000000000000#!/usr/bin/env bash # # Script to regenerate all the GNU auto* gunk. # Run this from the top directory of the source tree. # # If it looks like I don't know what I'm doing here, you're right. # set -e . ./cleanup.sh mkdir -p scripts ACLOCAL="aclocal" AUTOHEADER="autoheader" AUTOMAKE="automake" AUTOCONF="autoconf" echo "running aclocal" ${ACLOCAL} ${ACLOCAL_ARGS} -I scripts -I . echo "running autoheader" ${AUTOHEADER} echo "running automake" ${AUTOMAKE} --add-missing -c --foreign echo "running autoconf" ${AUTOCONF} -I . -f -i s3backer-1.5.4/block_cache.c000066400000000000000000001551101354714241400156070ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "s3backer.h" #include "block_cache.h" #include "dcache.h" #include "hash.h" /* * This file implements a simple block cache that acts as a "layer" on top * of an underlying s3backer_store. * * Blocks in the cache are in one of these states: * * CLEAN Data is consistent with underlying s3backer_store * CLEAN2 Data is belived consistent with underlying s3backer_store, but need to verify MD5 * DIRTY Data is inconsistent with underlying s3backer_store (needs writing) * READING Data is being read from the underlying s3backer_store * READING2 Data is being read/verified from the underlying s3backer_store * WRITING Data is being written to underlying s3backer_store * WRITING2 Same as WRITING, but a subsequent write has stored new data * * Blocks in the CLEAN and CLEAN2 states are linked in a list in order from least recently * used to most recently used (where 'used' means either read or written). CLEAN2 is the * same as CLEAN except that the data must be MD5 verified before being used. * * Blocks in the DIRTY state are linked in a list in the order they should be written. * A pool of worker threads picks them off and writes them through to the underlying * s3backer_store; while being written they are in state WRITING, or WRITING2 if another * write to the same block happens during that time. If the write is unsuccessful, the * block goes back to DIRTY and to the head of the DIRTY list: the result is that failed * writes of DIRTY blocks will retry indefinitely. If the write is successful, the * block moves to CLEAN if still in state WRITING, or DIRTY if in WRITING2. * * Because we allow writes to update the data in a block while that block is being * written, the worker threads always write from the original buffer, and a new buffer * will get created on demand when a block moves to state WRITING2. When it completes * its write attempt, the worker thread then checks for this condition and, if indeed * the block has changed to WRITING2, it knows to free the original buffer. * * Blocks in the READING, WRITING and WRITING2 states are not in either list. * * Only CLEAN and CLEAN2 blocks are eligible to be evicted from the cache. We evict entries * either when they timeout or the cache is full and we need to add a new entry to it. */ /* Cache entry states */ #define CLEAN 0 #define CLEAN2 1 #define DIRTY 2 #define READING 3 #define READING2 4 #define WRITING 5 #define WRITING2 6 /* * One cache entry. In order to keep this structure as small as possible, we do * two size optimizations: * * 1. We use the low-order bit of '_data' as the dirty flag (we assume all valid * pointers are aligned to an even address). * 2. When not linked into either list (i.e., in WRITING state), we set link.tqe_prev * to NULL to indicate this; this is safe because link.tqe_prev is always non-NULL * when the structure is linked into a list. * * Invariants: * * State ENTRY_IN_LIST()? dirty? timeout == -1 verify dcache * ----- ---------------- ------ ------------- ------ ------ * * CLEAN YES: priv->cleans NO ? 0 recorded * CLEAN2 YES: priv->cleans NO ? 1 recorded * READING NO NO YES 0 allocated * READING2 NO NO YES 1 allocated * DIRTY YES: priv->dirties YES ? ? allocated * WRITING NO NO NO ? allocated * WRITING2 NO YES NO ? allocated * * Timeouts: we track time in units of TIME_UNIT_MILLIS milliseconds from when we start. * This is so we can jam them into 30 bits instead of 64. It's possible for the time value * to wrap after about two years; the effect would be mis-timed writes and evictions. * * In state CLEAN2 only, the MD5 to verify immediately follows the structure. */ struct cache_entry { s3b_block_t block_num; // block number - MUST BE FIRST u_int dirty:1; // indicates state DIRTY or WRITING2 u_int verify:1; // data should be verified first uint32_t timeout:30; // when to evict (CLEAN[2]) or write (DIRTY) TAILQ_ENTRY(cache_entry) link; // next in list (cleans or dirties) union { void *data; // data buffer in memory u_int dslot; // disk cache data slot } u; u_char md5[0]; // MD5 checksum (CLEAN2) }; #define ENTRY_IN_LIST(entry) ((entry)->link.tqe_prev != NULL) #define ENTRY_RESET_LINK(entry) do { (entry)->link.tqe_prev = NULL; } while (0) #define ENTRY_GET_STATE(entry) (ENTRY_IN_LIST(entry) ? \ ((entry)->dirty ? DIRTY : \ ((entry)->verify ? CLEAN2 : CLEAN)) : \ ((entry)->timeout == READING_TIMEOUT ? \ ((entry)->verify ? READING2 : READING) : \ (entry)->dirty ? WRITING2 : WRITING)) /* One time unit in milliseconds */ #define TIME_UNIT_MILLIS 64 /* The dirty ratio at which we want to be writing out dirty blocks immediately */ #define DIRTY_RATIO_WRITE_ASAP 0.90 // 90% /* Special timeout value for entries in state READING and READING2 */ #define READING_TIMEOUT ((uint32_t)0x3fffffff) /* Private data */ struct block_cache_private { struct block_cache_conf *config; // configuration struct s3backer_store *inner; // underlying s3backer store struct block_cache_stats stats; // statistics TAILQ_HEAD(, cache_entry) cleans; // list of clean blocks (LRU order) TAILQ_HEAD(, cache_entry) dirties; // list of dirty blocks (write order) struct s3b_hash *hashtable; // hashtable of all cached blocks struct s3b_dcache *dcache; // on-disk persistent cache u_int num_cleans; // length of the 'cleans' list u_int num_dirties; // # blocks that are DIRTY, WRITING, or WRITING2 u_int64_t start_time; // when we started u_int32_t clean_timeout; // timeout for clean entries in time units u_int32_t dirty_timeout; // timeout for dirty entries in time units double max_dirty_ratio;// dirty ratio at which we write immediately s3b_block_t seq_last; // last block read in sequence by upper layer u_int seq_count; // # of blocks read in sequence by upper layer u_int ra_count; // # of blocks of read-ahead initiated u_int thread_id; // next thread id u_int num_threads; // number of alive worker threads int stopping; // signals worker threads to exit pthread_mutex_t mutex; // my mutex pthread_cond_t space_avail; // there is new space available in cache pthread_cond_t end_reading; // some entry in state READING[2] changed state pthread_cond_t worker_work; // there is new work for worker thread(s) pthread_cond_t worker_exit; // a worker thread has exited pthread_cond_t write_complete; // a write has completed }; /* Callback info */ struct cbinfo { block_list_func_t *callback; void *arg; }; /* s3backer_store functions */ static int block_cache_create_threads(struct s3backer_store *s3b); static int block_cache_meta_data(struct s3backer_store *s3b, off_t *file_sizep, u_int *block_sizep); static int block_cache_set_mount_token(struct s3backer_store *s3b, int32_t *old_valuep, int32_t new_value); static int block_cache_read_block(struct s3backer_store *s3b, s3b_block_t block_num, void *dest, u_char *actual_md5, const u_char *expect_md5, int strict); static int block_cache_write_block(struct s3backer_store *s3b, s3b_block_t block_num, const void *src, u_char *md5, check_cancel_t *check_cancel, void *check_cancel_arg); static int block_cache_read_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, void *dest); static int block_cache_write_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, const void *src); static int block_cache_list_blocks(struct s3backer_store *s3b, block_list_func_t *callback, void *arg); static int block_cache_flush(struct s3backer_store *s3b); static void block_cache_destroy(struct s3backer_store *s3b); /* Other functions */ static s3b_dcache_visit_t block_cache_dcache_load; static int block_cache_read(struct block_cache_private *priv, s3b_block_t block_num, u_int off, u_int len, void *dest); static int block_cache_do_read(struct block_cache_private *priv, s3b_block_t block_num, u_int off, u_int len, void *dest, int stats); static int block_cache_write(struct block_cache_private *priv, s3b_block_t block_num, u_int off, u_int len, const void *src); static void *block_cache_worker_main(void *arg); static int block_cache_check_cancel(void *arg, s3b_block_t block_num); static int block_cache_get_entry(struct block_cache_private *priv, struct cache_entry **entryp, void **datap); static void block_cache_free_entry(struct block_cache_private *priv, struct cache_entry **entryp); static void block_cache_free_one(void *arg, void *value); static struct cache_entry *block_cache_verified(struct block_cache_private *priv, struct cache_entry *entry); static void block_cache_dirty_callback(void *arg, void *value); static double block_cache_dirty_ratio(struct block_cache_private *priv); static void block_cache_worker_wait(struct block_cache_private *priv, struct cache_entry *entry); static uint32_t block_cache_get_time(struct block_cache_private *priv); static uint64_t block_cache_get_time_millis(void); static int block_cache_read_data(struct block_cache_private *priv, struct cache_entry *entry, void *dest, u_int off, u_int len); static int block_cache_write_data(struct block_cache_private *priv, struct cache_entry *entry, const void *src, u_int off, u_int len); /* Invariants checking */ #ifndef NDEBUG static void block_cache_check_invariants(struct block_cache_private *priv); static void block_cache_check_one(void *arg, void *value); #define S3BCACHE_CHECK_INVARIANTS(priv) block_cache_check_invariants(priv) #else #define S3BCACHE_CHECK_INVARIANTS(priv) do { } while (0) #endif /* * Wrap an underlying s3backer store with a block cache. Invoking the * destroy method will destroy both this and the inner s3backer store. * * Returns NULL and sets errno on failure. */ struct s3backer_store * block_cache_create(struct block_cache_conf *config, struct s3backer_store *inner) { struct s3backer_store *s3b; struct block_cache_private *priv; struct cache_entry *entry; int r; /* Initialize s3backer_store structure */ if ((s3b = calloc(1, sizeof(*s3b))) == NULL) { r = errno; (*config->log)(LOG_ERR, "calloc(): %s", strerror(r)); goto fail0; } s3b->create_threads = block_cache_create_threads; s3b->meta_data = block_cache_meta_data; s3b->set_mount_token = block_cache_set_mount_token; s3b->read_block = block_cache_read_block; s3b->write_block = block_cache_write_block; s3b->read_block_part = block_cache_read_block_part; s3b->write_block_part = block_cache_write_block_part; s3b->list_blocks = block_cache_list_blocks; s3b->flush = block_cache_flush; s3b->destroy = block_cache_destroy; /* Initialize block_cache_private structure */ if ((priv = calloc(1, sizeof(*priv))) == NULL) { r = errno; (*config->log)(LOG_ERR, "calloc(): %s", strerror(r)); goto fail1; } priv->config = config; priv->inner = inner; priv->start_time = block_cache_get_time_millis(); priv->clean_timeout = (config->timeout + TIME_UNIT_MILLIS - 1) / TIME_UNIT_MILLIS; priv->dirty_timeout = (config->write_delay + TIME_UNIT_MILLIS - 1) / TIME_UNIT_MILLIS; if ((r = pthread_mutex_init(&priv->mutex, NULL)) != 0) goto fail2; if ((r = pthread_cond_init(&priv->space_avail, NULL)) != 0) goto fail3; if ((r = pthread_cond_init(&priv->end_reading, NULL)) != 0) goto fail4; if ((r = pthread_cond_init(&priv->worker_work, NULL)) != 0) goto fail5; if ((r = pthread_cond_init(&priv->worker_exit, NULL)) != 0) goto fail6; if ((r = pthread_cond_init(&priv->write_complete, NULL)) != 0) goto fail7; TAILQ_INIT(&priv->cleans); TAILQ_INIT(&priv->dirties); if ((r = s3b_hash_create(&priv->hashtable, config->cache_size)) != 0) goto fail8; s3b->data = priv; /* Compute dirty ratio at which we will be writing immediately */ priv->max_dirty_ratio = (double)(config->max_dirty != 0 ? config->max_dirty : config->cache_size) / (double)config->cache_size; if (priv->max_dirty_ratio > DIRTY_RATIO_WRITE_ASAP) priv->max_dirty_ratio = DIRTY_RATIO_WRITE_ASAP; /* Initialize on-disk cache and read in directory */ if (config->cache_file != NULL) { if ((r = s3b_dcache_open(&priv->dcache, config->log, config->cache_file, config->block_size, config->cache_size, block_cache_dcache_load, priv, config->perform_flush)) != 0) goto fail9; if (config->perform_flush && priv->num_dirties > 0) (*config->log)(LOG_INFO, "%u dirty blocks in cache file `%s' will be recovered", priv->num_dirties, config->cache_file); priv->stats.initial_size = priv->num_cleans + priv->num_dirties; } /* Grab lock */ pthread_mutex_lock(&priv->mutex); S3BCACHE_CHECK_INVARIANTS(priv); /* Done */ pthread_mutex_unlock(&priv->mutex); return s3b; fail9: if (config->cache_file != NULL) { while ((entry = TAILQ_FIRST(&priv->cleans)) != NULL) { TAILQ_REMOVE(&priv->cleans, entry, link); free(entry); } s3b_dcache_close(priv->dcache); } s3b_hash_destroy(priv->hashtable); fail8: pthread_cond_destroy(&priv->write_complete); fail7: pthread_cond_destroy(&priv->worker_exit); fail6: pthread_cond_destroy(&priv->worker_work); fail5: pthread_cond_destroy(&priv->end_reading); fail4: pthread_cond_destroy(&priv->space_avail); fail3: pthread_mutex_destroy(&priv->mutex); fail2: free(priv); fail1: free(s3b); fail0: (*config->log)(LOG_ERR, "block_cache creation failed: %s", strerror(r)); errno = r; return NULL; } /* * Callback function to pre-load the cache from a pre-existing cache file. */ static int block_cache_dcache_load(void *arg, s3b_block_t dslot, s3b_block_t block_num, const u_char *md5) { const u_int dirty = md5 == NULL; struct block_cache_private *const priv = arg; struct block_cache_conf *const config = priv->config; struct cache_entry *entry; int r; /* Sanity check */ assert(config->cache_file != NULL); assert(!dirty || config->perform_flush); /* we should never see dirty blocks unless we asked for them */ /* Sanity check a block is not listed twice */ if ((entry = s3b_hash_get(priv->hashtable, block_num)) != NULL) { (*config->log)(LOG_ERR, "corrupted cache file: block 0x%0*jx listed twice (in dslots %ju and %ju)", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num, (uintmax_t)entry->u.dslot, (uintmax_t)dslot); return EINVAL; } /* Create a new cache entry */ assert(config->cache_file != NULL); if ((entry = calloc(1, sizeof(*entry) + (!config->no_verify ? MD5_DIGEST_LENGTH : 0))) == NULL) { r = errno; (*config->log)(LOG_ERR, "can't allocate block cache entry: %s", strerror(r)); priv->stats.out_of_memory_errors++; return r; } entry->block_num = block_num; entry->timeout = block_cache_get_time(priv) + priv->clean_timeout; entry->u.dslot = dslot; /* Mark as clean or dirty accordingly */ if (dirty) { entry->dirty = 1; TAILQ_INSERT_TAIL(&priv->dirties, entry, link); priv->num_dirties++; assert(ENTRY_GET_STATE(entry) == DIRTY); } else { entry->verify = !config->no_verify; if (entry->verify) memcpy(&entry->md5, md5, MD5_DIGEST_LENGTH); TAILQ_INSERT_TAIL(&priv->cleans, entry, link); priv->num_cleans++; assert(ENTRY_GET_STATE(entry) == (config->no_verify ? CLEAN : CLEAN2)); } s3b_hash_put_new(priv->hashtable, entry); return 0; } static int block_cache_create_threads(struct s3backer_store *s3b) { struct block_cache_private *const priv = s3b->data; struct block_cache_conf *const config = priv->config; pthread_t thread; int r; /* Create threads in lower layer */ if ((r = (*priv->inner->create_threads)(priv->inner)) != 0) return r; /* Grab lock */ pthread_mutex_lock(&priv->mutex); S3BCACHE_CHECK_INVARIANTS(priv); /* Create threads */ while (priv->num_threads < config->num_threads) { if ((r = pthread_create(&thread, NULL, block_cache_worker_main, priv)) != 0) goto fail; priv->num_threads++; } fail: pthread_mutex_unlock(&priv->mutex); return r; } static int block_cache_meta_data(struct s3backer_store *s3b, off_t *file_sizep, u_int *block_sizep) { struct block_cache_private *const priv = s3b->data; return (*priv->inner->meta_data)(priv->inner, file_sizep, block_sizep); } static int block_cache_set_mount_token(struct s3backer_store *s3b, int32_t *old_valuep, int32_t new_value) { struct block_cache_private *const priv = s3b->data; int r; /* Set flag in lower layer */ if ((r = (*priv->inner->set_mount_token)(priv->inner, old_valuep, new_value)) != 0) return r; /* Update the disk cache file as well, if the value was changed */ if (priv->dcache != NULL && new_value >= 0) r = s3b_dcache_set_mount_token(priv->dcache, NULL, new_value); /* Done */ return 0; } static int block_cache_flush(struct s3backer_store *const s3b) { struct block_cache_private *const priv = s3b->data; /* Grab lock and sanity check */ pthread_mutex_lock(&priv->mutex); S3BCACHE_CHECK_INVARIANTS(priv); /* Wait for all dirty blocks to be written and all worker threads to exit */ priv->stopping = 1; while (TAILQ_FIRST(&priv->dirties) != NULL || priv->num_threads > 0) { pthread_cond_broadcast(&priv->worker_work); pthread_cond_wait(&priv->worker_exit, &priv->mutex); } /* Release lock */ pthread_mutex_unlock(&priv->mutex); return 0; } static void block_cache_destroy(struct s3backer_store *const s3b) { struct block_cache_private *const priv = s3b->data; struct block_cache_conf *const config = priv->config; /* Grab lock and sanity check */ pthread_mutex_lock(&priv->mutex); S3BCACHE_CHECK_INVARIANTS(priv); /* Wait for all dirty blocks to be written and all worker threads to exit */ priv->stopping = 1; while (TAILQ_FIRST(&priv->dirties) != NULL || priv->num_threads > 0) { pthread_cond_broadcast(&priv->worker_work); pthread_cond_wait(&priv->worker_exit, &priv->mutex); } /* Destroy inner store */ (*priv->inner->destroy)(priv->inner); /* Free structures */ if (config->cache_file != NULL) s3b_dcache_close(priv->dcache); s3b_hash_foreach(priv->hashtable, block_cache_free_one, priv); s3b_hash_destroy(priv->hashtable); pthread_cond_destroy(&priv->write_complete); pthread_cond_destroy(&priv->worker_exit); pthread_cond_destroy(&priv->worker_work); pthread_cond_destroy(&priv->end_reading); pthread_cond_destroy(&priv->space_avail); pthread_mutex_destroy(&priv->mutex); free(priv); free(s3b); } void block_cache_get_stats(struct s3backer_store *s3b, struct block_cache_stats *stats) { struct block_cache_private *const priv = s3b->data; pthread_mutex_lock(&priv->mutex); memcpy(stats, &priv->stats, sizeof(*stats)); stats->current_size = s3b_hash_size(priv->hashtable); stats->dirty_ratio = block_cache_dirty_ratio(priv); pthread_mutex_unlock(&priv->mutex); } void block_cache_clear_stats(struct s3backer_store *s3b) { struct block_cache_private *const priv = s3b->data; pthread_mutex_lock(&priv->mutex); memset(&priv->stats, 0, sizeof(priv->stats)); pthread_mutex_unlock(&priv->mutex); } static int block_cache_list_blocks(struct s3backer_store *s3b, block_list_func_t *callback, void *arg) { struct block_cache_private *const priv = s3b->data; struct cbinfo cbinfo; int r; if ((r = (*priv->inner->list_blocks)(priv->inner, callback, arg)) != 0) return r; cbinfo.callback = callback; cbinfo.arg = arg; pthread_mutex_lock(&priv->mutex); s3b_hash_foreach(priv->hashtable, block_cache_dirty_callback, &cbinfo); pthread_mutex_unlock(&priv->mutex); return 0; } static int block_cache_read_block(struct s3backer_store *const s3b, s3b_block_t block_num, void *dest, u_char *actual_md5, const u_char *expect_md5, int strict) { struct block_cache_private *const priv = s3b->data; struct block_cache_conf *const config = priv->config; assert(expect_md5 == NULL); assert(actual_md5 == NULL); return block_cache_read(priv, block_num, 0, config->block_size, dest); } static int block_cache_read_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, void *dest) { struct block_cache_private *const priv = s3b->data; return block_cache_read(priv, block_num, off, len, dest); } /* * Read a block, and trigger read-ahead if necessary. */ static int block_cache_read(struct block_cache_private *const priv, s3b_block_t block_num, u_int off, u_int len, void *dest) { struct block_cache_conf *const config = priv->config; int r = 0; /* Grab lock */ pthread_mutex_lock(&priv->mutex); S3BCACHE_CHECK_INVARIANTS(priv); /* Sanity check */ if (priv->num_threads == 0) { (*config->log)(LOG_ERR, "block_cache_read(): no threads created yet"); return ENOTCONN; } /* Update count of block(s) read sequentially by the upper layer */ if (block_num == priv->seq_last + 1) { priv->seq_count++; if (priv->ra_count > 0) priv->ra_count--; } else if (block_num != priv->seq_last) { priv->seq_count = 1; priv->ra_count = 0; } priv->seq_last = block_num; /* Wakeup a worker thread to read the next read-ahead block if needed */ if (priv->seq_count >= config->read_ahead_trigger && priv->ra_count < config->read_ahead) pthread_cond_signal(&priv->worker_work); /* Peform the read */ r = block_cache_do_read(priv, block_num, off, len, dest, 1); /* Release lock */ pthread_mutex_unlock(&priv->mutex); return r; } /* * Read a block or a portion thereof. * * Assumes the mutex is held. */ static int block_cache_do_read(struct block_cache_private *const priv, s3b_block_t block_num, u_int off, u_int len, void *dest, int stats) { struct block_cache_conf *const config = priv->config; struct cache_entry *entry; u_char md5[MD5_DIGEST_LENGTH]; int verified_but_not_read = 0; void *data = NULL; int r; /* Sanity check */ assert(off <= priv->config->block_size); assert(len <= priv->config->block_size); assert(off + len <= priv->config->block_size); again: /* Check to see if a cache entry already exists */ if ((entry = s3b_hash_get(priv->hashtable, block_num)) != NULL) { assert(entry->block_num == block_num); switch (ENTRY_GET_STATE(entry)) { case READING: /* Wait for other thread already reading this block to finish */ case READING2: pthread_cond_wait(&priv->end_reading, &priv->mutex); goto again; case CLEAN2: /* Go into READING2 state to read/verify the data */ /* Allocate temporary buffer for reading the data if necessary */ if (config->cache_file != NULL) { if ((data = malloc(config->block_size)) == NULL) { r = errno; (*config->log)(LOG_ERR, "can't allocate block cache buffer: %s", strerror(r)); return r; } } else data = entry->u.data; /* Change from CLEAN2 to READING2 */ if (config->cache_file != NULL) { if ((r = s3b_dcache_erase_block(priv->dcache, entry->u.dslot)) != 0) (*config->log)(LOG_ERR, "can't erase cached block! %s", strerror(r)); } TAILQ_REMOVE(&priv->cleans, entry, link); ENTRY_RESET_LINK(entry); priv->num_cleans--; entry->timeout = READING_TIMEOUT; assert(entry->verify); assert(ENTRY_GET_STATE(entry) == READING2); /* Now go read/verify the data */ goto read; case CLEAN: /* Update timestamp and move to the end of the list to maintain LRU ordering */ TAILQ_REMOVE(&priv->cleans, entry, link); TAILQ_INSERT_TAIL(&priv->cleans, entry, link); entry->timeout = block_cache_get_time(priv) + priv->clean_timeout; // FALLTHROUGH case DIRTY: /* Copy the cached data */ case WRITING: case WRITING2: if ((r = block_cache_read_data(priv, entry, dest, off, len)) != 0) return r; break; default: assert(0); break; } if (stats) priv->stats.read_hits++; return 0; } /* Create a new cache entry in state READING */ if ((r = block_cache_get_entry(priv, &entry, &data)) != 0) return r; if (entry == NULL) { /* no free entries right now */ pthread_cond_wait(&priv->space_avail, &priv->mutex); goto again; } entry->block_num = block_num; entry->dirty = 0; entry->verify = 0; entry->timeout = READING_TIMEOUT; ENTRY_RESET_LINK(entry); s3b_hash_put_new(priv->hashtable, entry); assert(ENTRY_GET_STATE(entry) == READING); /* Update stats */ if (stats) priv->stats.read_misses++; read: /* Read the block from the underlying s3backer_store */ assert(ENTRY_GET_STATE(entry) == READING || ENTRY_GET_STATE(entry) == READING2); pthread_mutex_unlock(&priv->mutex); r = (*priv->inner->read_block)(priv->inner, block_num, data, md5, entry->verify ? entry->md5 : NULL, 0); pthread_mutex_lock(&priv->mutex); S3BCACHE_CHECK_INVARIANTS(priv); /* The entry should still exist and be in state READING[2] */ assert(s3b_hash_get(priv->hashtable, block_num) == entry); assert(ENTRY_GET_STATE(entry) == READING || ENTRY_GET_STATE(entry) == READING2); assert(config->cache_file != NULL || entry->u.data == data); /* * We know two things at this point: the state is going to * change from READING[2] and we will create new available space * in the cache. Wake up any threads waiting on those events. */ pthread_cond_broadcast(&priv->end_reading); pthread_cond_signal(&priv->space_avail); /* Check for unexpected error from underlying s3backer_store */ if (r != 0 && !(entry->verify && r == EEXIST)) goto fail; /* Handle READING2 blocks that were verified (revert to READING) */ if (entry->verify) { if (r == EEXIST) { /* MD5 matched our expectation, download avoided */ priv->stats.read_hits++; priv->stats.verified++; verified_but_not_read = 1; r = 0; } else { assert(r == 0); priv->stats.read_misses++; priv->stats.mismatch++; } entry = block_cache_verified(priv, entry); assert(ENTRY_GET_STATE(entry) == READING); } /* Copy the block data's into the destination buffer */ if (!verified_but_not_read) memcpy(dest, (char *)data + off, len); /* Copy data into the disk cache and free temporary buffer (if necessary) */ if (config->cache_file != NULL) { if (!verified_but_not_read) { if ((r = s3b_dcache_write_block(priv->dcache, entry->u.dslot, data, 0, config->block_size)) != 0) goto fail; } free(data); } /* Change entry from READING to CLEAN */ assert(ENTRY_GET_STATE(entry) == READING); assert(!entry->verify); if (config->cache_file != NULL) { if ((r = s3b_dcache_record_block(priv->dcache, entry->u.dslot, entry->block_num, md5)) != 0) (*config->log)(LOG_ERR, "can't record cached block! %s", strerror(r)); } entry->timeout = block_cache_get_time(priv) + priv->clean_timeout; TAILQ_INSERT_TAIL(&priv->cleans, entry, link); priv->num_cleans++; assert(ENTRY_GET_STATE(entry) == CLEAN); /* If data was only verified, we have to actually go read it now */ if (verified_but_not_read) goto again; /* Done */ return 0; fail: assert(r != 0); assert(ENTRY_GET_STATE(entry) == READING || ENTRY_GET_STATE(entry) == READING2); if (config->cache_file != NULL) s3b_dcache_free_block(priv->dcache, entry->u.dslot); s3b_hash_remove(priv->hashtable, entry->block_num); free(data); free(entry); return r; } static int block_cache_write_block(struct s3backer_store *const s3b, s3b_block_t block_num, const void *src, u_char *md5, check_cancel_t *check_cancel, void *check_cancel_arg) { struct block_cache_private *const priv = s3b->data; struct block_cache_conf *const config = priv->config; assert(md5 == NULL); return block_cache_write(priv, block_num, 0, config->block_size, src); } static int block_cache_write_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, const void *src) { struct block_cache_private *const priv = s3b->data; return block_cache_write(priv, block_num, off, len, src); } /* * Write a block or a portion thereof. */ static int block_cache_write(struct block_cache_private *const priv, s3b_block_t block_num, u_int off, u_int len, const void *src) { struct block_cache_conf *const config = priv->config; struct cache_entry *entry; int partial_miss = 0; int r; /* Sanity check */ assert(off <= config->block_size); assert(len <= config->block_size); assert(off + len <= config->block_size); /* Grab lock */ pthread_mutex_lock(&priv->mutex); again: /* Sanity check */ S3BCACHE_CHECK_INVARIANTS(priv); if (priv->num_threads == 0) { (*config->log)(LOG_ERR, "block_cache_write(): no threads created yet"); r = ENOTCONN; goto fail; } /* Find cache entry */ if ((entry = s3b_hash_get(priv->hashtable, block_num)) != NULL) { assert(entry->block_num == block_num); switch (ENTRY_GET_STATE(entry)) { case READING: /* wait for entry to leave READING */ case READING2: pthread_cond_wait(&priv->end_reading, &priv->mutex); goto again; case CLEAN2: /* convert to CLEAN, then proceed */ entry = block_cache_verified(priv, entry); // FALLTHROUGH case CLEAN: /* update data, move to state DIRTY */ /* If there are too many dirty blocks, we have to wait */ if (config->max_dirty != 0 && priv->num_dirties >= config->max_dirty) { pthread_cond_signal(&priv->worker_work); pthread_cond_wait(&priv->write_complete, &priv->mutex); goto again; } /* Record dirty disk cache entry */ if (config->cache_file != NULL) { if ((r = s3b_dcache_record_block(priv->dcache, entry->u.dslot, entry->block_num, NULL)) != 0) (*config->log)(LOG_ERR, "can't dirty cached block %u! %s", block_num, strerror(r)); } /* Change from CLEAN to DIRTY */ TAILQ_REMOVE(&priv->cleans, entry, link); priv->num_cleans--; TAILQ_INSERT_TAIL(&priv->dirties, entry, link); priv->num_dirties++; entry->timeout = block_cache_get_time(priv) + priv->dirty_timeout; pthread_cond_signal(&priv->worker_work); // FALLTHROUGH case WRITING2: /* update data, stay in state WRITING2 */ case WRITING: /* update data, move to state WRITING2 */ case DIRTY: /* update data, stay in state DIRTY */ if ((r = block_cache_write_data(priv, entry, src, off, len)) != 0) (*config->log)(LOG_ERR, "error updating dirty block! %s", strerror(r)); entry->dirty = 1; if (!partial_miss) priv->stats.write_hits++; break; default: assert(0); break; } goto success; } /* * The block is not in the cache. If we're writing a partial block, * we have to read it into the cache first. */ if (off != 0 || len != config->block_size) { if ((r = block_cache_do_read(priv, block_num, 0, 0, NULL, 0)) != 0) goto fail; if (partial_miss++ == 0) priv->stats.write_misses++; goto again; } /* If there are too many dirty blocks, we have to wait */ if (config->max_dirty != 0 && priv->num_dirties >= config->max_dirty) { pthread_cond_signal(&priv->worker_work); pthread_cond_wait(&priv->write_complete, &priv->mutex); goto again; } /* Get a cache entry, evicting a CLEAN[2] entry if necessary */ if ((r = block_cache_get_entry(priv, &entry, NULL)) != 0) goto fail; /* If cache is full, wait for an entry to go CLEAN[2] so we can evict it */ if (entry == NULL) { pthread_cond_wait(&priv->space_avail, &priv->mutex); goto again; } /* Record block data */ if ((r = block_cache_write_data(priv, entry, src, off, len)) != 0) (*config->log)(LOG_ERR, "error updating dirty block! %s", strerror(r)); /* Initialize a new DIRTY cache entry */ priv->stats.write_misses++; entry->block_num = block_num; entry->timeout = block_cache_get_time(priv) + priv->dirty_timeout; entry->dirty = 1; assert(off == 0 && len == config->block_size); s3b_hash_put_new(priv->hashtable, entry); TAILQ_INSERT_TAIL(&priv->dirties, entry, link); priv->num_dirties++; assert(ENTRY_GET_STATE(entry) == DIRTY); /* Record dirty disk cache entry */ if (config->cache_file != NULL) { if ((r = s3b_dcache_record_block(priv->dcache, entry->u.dslot, entry->block_num, NULL)) != 0) (*config->log)(LOG_ERR, "can't dirty cached block %u! %s", block_num, strerror(r)); } /* Wake up a worker thread to go write it */ pthread_cond_signal(&priv->worker_work); success: /* If doing synchronous writes, wait for write to complete */ if (config->synchronous) { while (1) { int state; /* Wait for notification */ pthread_cond_wait(&priv->write_complete, &priv->mutex); /* Sanity check */ S3BCACHE_CHECK_INVARIANTS(priv); /* Find cache entry */ if ((entry = s3b_hash_get(priv->hashtable, block_num)) == NULL) break; /* See if it is now clean */ state = ENTRY_GET_STATE(entry); if (state == CLEAN || state == CLEAN2 || state == READING || state == READING2) break; /* Not written yet, go back to sleep */ continue; } } r = 0; fail: /* Done */ pthread_mutex_unlock(&priv->mutex); return r; } /* * Acquire a new cache entry. If the cache is full, and there is at least one * CLEAN[2] entry, evict and return it (uninitialized). Otherwise, return NULL entry. * * On successful return, *datap will point to a malloc'd buffer for the data. If using * the disk cache, this will be a temporary buffer, otherwise it's the in-memory buffer. * If datap == NULL, then in the case of the disk cache only, no buffer is allocated. * * This assumes the mutex is held. * * Returns non-zero on error. */ static int block_cache_get_entry(struct block_cache_private *priv, struct cache_entry **entryp, void **datap) { struct block_cache_conf *const config = priv->config; struct cache_entry *entry; void *data = NULL; int r; again: /* * If cache is not full, allocate a new entry. We allocate the structure * and the data separately in hopes that the malloc() implementation will * put the data into its own page of virtual memory. * * If the cache is full, try to evict a clean entry. */ if (s3b_hash_size(priv->hashtable) < config->cache_size) { if ((entry = calloc(1, sizeof(*entry))) == NULL) { r = errno; (*config->log)(LOG_ERR, "can't allocate block cache entry: %s", strerror(r)); priv->stats.out_of_memory_errors++; return r; } } else if ((entry = TAILQ_FIRST(&priv->cleans)) != NULL) { block_cache_free_entry(priv, &entry); goto again; } else goto done; /* Get associated data buffer */ if (datap != NULL || config->cache_file == NULL) { if ((data = malloc(config->block_size)) == NULL) { r = errno; (*config->log)(LOG_ERR, "can't allocate block cache buffer: %s", strerror(r)); priv->stats.out_of_memory_errors++; free(entry); return r; } } /* Get permanent data buffer */ if (config->cache_file == NULL) entry->u.data = data; else if ((r = s3b_dcache_alloc_block(priv->dcache, &entry->u.dslot)) != 0) { /* should not happen */ (*config->log)(LOG_ERR, "can't alloc cached block! %s", strerror(r)); free(data); /* OK if NULL */ data = NULL; free(entry); entry = NULL; goto done; } done: /* Return what we got */ *entryp = entry; if (datap != NULL) *datap = data; return 0; } /* * Evict a CLEAN[2] entry. */ static void block_cache_free_entry(struct block_cache_private *priv, struct cache_entry **entryp) { struct block_cache_conf *const config = priv->config; struct cache_entry *const entry = *entryp; int r; /* Sanity check */ assert(ENTRY_GET_STATE(entry) == CLEAN || ENTRY_GET_STATE(entry) == CLEAN2); /* Invalidate caller's pointer */ *entryp = NULL; /* Free the data */ if (config->cache_file != NULL) { if ((r = s3b_dcache_erase_block(priv->dcache, entry->u.dslot)) != 0) (*config->log)(LOG_ERR, "can't erase cached block! %s", strerror(r)); if ((r = s3b_dcache_free_block(priv->dcache, entry->u.dslot)) != 0) (*config->log)(LOG_ERR, "can't free cached block! %s", strerror(r)); } else free(entry->u.data); /* Remove entry from the clean list */ TAILQ_REMOVE(&priv->cleans, entry, link); s3b_hash_remove(priv->hashtable, entry->block_num); priv->num_cleans--; /* Free the entry */ free(entry); } /* * Worker thread main entry point. */ static void * block_cache_worker_main(void *arg) { struct block_cache_private *const priv = arg; struct block_cache_conf *const config = priv->config; struct cache_entry *entry; struct cache_entry *clean_entry = NULL; u_char md5[MD5_DIGEST_LENGTH]; uint32_t adjusted_now; uint32_t now; u_int thread_id; void *buf; int r; /* Grab lock */ pthread_mutex_lock(&priv->mutex); /* Assign myself a thread ID (for debugging purposes) */ thread_id = priv->thread_id++; /* * Allocate buffer for outgoing block data. We have to copy it before we send it in case * another write to this block comes in and updates the data associated with the cache entry. */ if ((buf = malloc(config->block_size)) == NULL) { (*config->log)(LOG_ERR, "block_cache worker %u can't alloc buffer, exiting: %s", thread_id, strerror(errno)); goto done; } /* Repeatedly do stuff until told to stop */ while (1) { /* Sanity check */ S3BCACHE_CHECK_INVARIANTS(priv); /* Get current time */ now = block_cache_get_time(priv); /* Evict any CLEAN[2] blocks that have timed out (if enabled) */ if (priv->clean_timeout != 0) { while ((clean_entry = TAILQ_FIRST(&priv->cleans)) != NULL && now >= clean_entry->timeout) { block_cache_free_entry(priv, &clean_entry); pthread_cond_signal(&priv->space_avail); } } /* As we approach our maximum dirty block limit, force earlier than planned writes */ adjusted_now = now + (uint32_t)(priv->dirty_timeout * (block_cache_dirty_ratio(priv) / priv->max_dirty_ratio)); /* See if there is a block that needs writing */ if ((entry = TAILQ_FIRST(&priv->dirties)) != NULL && (priv->stopping || adjusted_now >= entry->timeout)) { /* If we are also supposed to do read-ahead, wake up a sibling to handle it */ if (priv->seq_count >= config->read_ahead_trigger && priv->ra_count < config->read_ahead) pthread_cond_signal(&priv->worker_work); /* Copy data to our private buffer; it may change while we're writing */ if ((r = block_cache_read_data(priv, entry, buf, 0, config->block_size)) != 0) { (*config->log)(LOG_ERR, "error reading cached block! %s", strerror(r)); sleep(5); continue; } /* Move to WRITING state */ assert(ENTRY_GET_STATE(entry) == DIRTY); TAILQ_REMOVE(&priv->dirties, entry, link); ENTRY_RESET_LINK(entry); entry->dirty = 0; entry->timeout = 0; assert(ENTRY_GET_STATE(entry) == WRITING); /* Attempt to write the block */ pthread_mutex_unlock(&priv->mutex); r = (*priv->inner->write_block)(priv->inner, entry->block_num, buf, md5, block_cache_check_cancel, priv); pthread_mutex_lock(&priv->mutex); S3BCACHE_CHECK_INVARIANTS(priv); /* Sanity checks */ assert(ENTRY_GET_STATE(entry) == WRITING || ENTRY_GET_STATE(entry) == WRITING2); /* If write attempt failed (or we canceled it), go back to the DIRTY state and try again later */ if (r != 0) { entry->dirty = 1; TAILQ_INSERT_HEAD(&priv->dirties, entry, link); continue; } /* If block was not modified while being written (WRITING), it is now CLEAN */ if (!entry->dirty) { if (config->cache_file != NULL) { if ((r = s3b_dcache_record_block(priv->dcache, entry->u.dslot, entry->block_num, md5)) != 0) (*config->log)(LOG_ERR, "can't record cached block! %s", strerror(r)); } priv->num_dirties--; TAILQ_INSERT_TAIL(&priv->cleans, entry, link); entry->verify = 0; entry->timeout = block_cache_get_time(priv) + priv->clean_timeout; priv->num_cleans++; assert(ENTRY_GET_STATE(entry) == CLEAN); pthread_cond_signal(&priv->space_avail); pthread_cond_broadcast(&priv->write_complete); continue; } /* Block was modified while being written (WRITING2), so it stays DIRTY */ TAILQ_INSERT_TAIL(&priv->dirties, entry, link); entry->timeout = now + priv->dirty_timeout; /* update for 2nd write timing conservatively */ continue; } /* Are we supposed to stop? */ if (priv->stopping != 0) break; /* See if there is a read-ahead block that needs to be read */ if (priv->seq_count >= config->read_ahead_trigger && priv->ra_count < config->read_ahead) { while (priv->ra_count < config->read_ahead) { s3b_block_t ra_block; /* We will handle read-ahead for the next read-ahead block; claim it now */ ra_block = priv->seq_last + ++priv->ra_count; /* If block already exists in the cache, nothing needs to be done */ if (s3b_hash_get(priv->hashtable, ra_block) != NULL) continue; /* Perform a speculative read of the block so it will get stored in the cache */ (void)block_cache_do_read(priv, ra_block, 0, 0, NULL, 0); break; } continue; } /* There is nothing to do at this time; sleep until there is something to do */ if (entry == NULL || (clean_entry != NULL && clean_entry->timeout < entry->timeout)) entry = clean_entry; block_cache_worker_wait(priv, entry); } /* Decrement live worker thread count */ priv->num_threads--; pthread_cond_signal(&priv->worker_exit); done: /* Done */ pthread_mutex_unlock(&priv->mutex); free(buf); return NULL; } /* * See if we want to cancel the current write for the given block. */ static int block_cache_check_cancel(void *arg, s3b_block_t block_num) { struct block_cache_private *const priv = arg; struct cache_entry *entry; int r; /* Lock mutex */ pthread_mutex_lock(&priv->mutex); S3BCACHE_CHECK_INVARIANTS(priv); /* Find cache entry */ entry = s3b_hash_get(priv->hashtable, block_num); /* Sanity check */ assert(entry != NULL); assert(entry->block_num == block_num); assert(ENTRY_GET_STATE(entry) == WRITING || ENTRY_GET_STATE(entry) == WRITING2); /* If block is in the WRITING2 state, cancel the current (obsolete) write operation */ r = entry->dirty; /* Unlock mutex */ pthread_mutex_unlock(&priv->mutex); return r; } /* * Sleep until either the 'worker_work' condition becomes true, or the * entry (if any) times out. * * This assumes the mutex is held. */ static void block_cache_worker_wait(struct block_cache_private *priv, struct cache_entry *entry) { uint64_t wake_time_millis; struct timespec wake_time; if (entry == NULL) { pthread_cond_wait(&priv->worker_work, &priv->mutex); return; } wake_time_millis = priv->start_time + ((uint64_t)entry->timeout * TIME_UNIT_MILLIS); wake_time.tv_sec = wake_time_millis / 1000; wake_time.tv_nsec = (wake_time_millis % 1000) * 1000000; pthread_cond_timedwait(&priv->worker_work, &priv->mutex, &wake_time); } /* * Return current time in units of TIME_UNIT_MILLIS milliseconds since startup. */ static uint32_t block_cache_get_time(struct block_cache_private *priv) { uint64_t since_start; since_start = block_cache_get_time_millis() - priv->start_time; return (uint32_t)(since_start / TIME_UNIT_MILLIS); } /* * Return current time in milliseconds. */ static uint64_t block_cache_get_time_millis(void) { struct timeval tv; gettimeofday(&tv, NULL); return (uint64_t)tv.tv_sec * 1000 + (uint64_t)tv.tv_usec / 1000; } static void block_cache_free_one(void *arg, void *value) { struct block_cache_private *const priv = arg; struct block_cache_conf *const config = priv->config; struct cache_entry *const entry = value; if (config->cache_file == NULL) free(entry->u.data); free(entry); } /* * Mark an entry verified and free the extra bytes we allocated for the MD5 checksum. */ static struct cache_entry * block_cache_verified(struct block_cache_private *priv, struct cache_entry *entry) { struct cache_entry *new_entry; /* Sanity check */ assert(entry->verify); assert(ENTRY_GET_STATE(entry) == CLEAN2 || ENTRY_GET_STATE(entry) == READING2); /* Allocate new, smaller entry; if we can't no big deal */ if ((new_entry = malloc(sizeof(*entry))) == NULL) goto done; memcpy(new_entry, entry, sizeof(*entry)); /* Update all references that point to the entry */ s3b_hash_put(priv->hashtable, new_entry); if (ENTRY_IN_LIST(entry)) { TAILQ_REMOVE(&priv->cleans, entry, link); TAILQ_INSERT_TAIL(&priv->cleans, new_entry, link); } free(entry); entry = new_entry; done: /* Mark entry as verified */ entry->verify = 0; return entry; } /* * Read the data from a cached block into a buffer. */ static int block_cache_read_data(struct block_cache_private *priv, struct cache_entry *entry, void *dest, u_int off, u_int len) { struct block_cache_conf *const config = priv->config; /* Sanity check */ assert(off <= config->block_size); assert(len <= config->block_size); assert(off + len <= config->block_size); /* Handle easy in-memory case */ if (config->cache_file == NULL) { memcpy(dest, (char *)entry->u.data + off, len); return 0; } /* Handle on-disk case */ return s3b_dcache_read_block(priv->dcache, entry->u.dslot, dest, off, len); } /* * Write the data in a buffer to a cached block. */ static int block_cache_write_data(struct block_cache_private *priv, struct cache_entry *entry, const void *src, u_int off, u_int len) { struct block_cache_conf *const config = priv->config; /* Sanity check */ assert(off <= config->block_size); assert(len <= config->block_size); assert(off + len <= config->block_size); /* Handle easy in-memory case */ if (config->cache_file == NULL) { if (src == NULL) memset((char *)entry->u.data + off, 0, len); else memcpy((char *)entry->u.data + off, src, len); return 0; } /* Handle on-disk case */ return s3b_dcache_write_block(priv->dcache, entry->u.dslot, src, off, len); } /* * Compute dirty ratio, i.e., percent of total cache space occupied by entries * that are not CLEAN[2] or READING[2]. */ static double block_cache_dirty_ratio(struct block_cache_private *priv) { struct block_cache_conf *const config = priv->config; return (double)priv->num_dirties / (double)config->cache_size; } static void block_cache_dirty_callback(void *arg, void *value) { struct cbinfo *const cbinfo = arg; struct cache_entry *const entry = value; switch (ENTRY_GET_STATE(entry)) { case CLEAN: case CLEAN2: case READING: case READING2: break; case WRITING2: case WRITING: case DIRTY: (*cbinfo->callback)(cbinfo->arg, entry->block_num); break; default: assert(0); break; } } #ifndef NDEBUG /* Accounting structure */ struct check_info { u_int num_clean; u_int num_dirty; u_int num_reading; u_int num_writing; u_int num_writing2; }; static void block_cache_check_invariants(struct block_cache_private *priv) { struct block_cache_conf *const config = priv->config; struct cache_entry *entry; struct check_info info; int clean_len = 0; int dirty_len = 0; /* Check CLEANs and CLEAN2s */ for (entry = TAILQ_FIRST(&priv->cleans); entry != NULL; entry = TAILQ_NEXT(entry, link)) { assert(ENTRY_GET_STATE(entry) == CLEAN || ENTRY_GET_STATE(entry) == CLEAN2); assert(s3b_hash_get(priv->hashtable, entry->block_num) == entry); clean_len++; } assert(clean_len == priv->num_cleans); /* Check DIRTYs */ for (entry = TAILQ_FIRST(&priv->dirties); entry != NULL; entry = TAILQ_NEXT(entry, link)) { assert(ENTRY_GET_STATE(entry) == DIRTY); assert(s3b_hash_get(priv->hashtable, entry->block_num) == entry); dirty_len++; } /* Check hash table size */ assert(s3b_hash_size(priv->hashtable) <= config->cache_size); /* Check hash table entries */ memset(&info, 0, sizeof(info)); s3b_hash_foreach(priv->hashtable, block_cache_check_one, &info); /* Check agreement */ assert(info.num_clean == clean_len); assert(info.num_dirty == dirty_len); assert(info.num_clean + info.num_dirty + info.num_reading + info.num_writing + info.num_writing2 == s3b_hash_size(priv->hashtable)); assert(priv->num_dirties == info.num_dirty + info.num_writing + info.num_writing2); /* Check read-ahead */ assert(priv->ra_count <= config->read_ahead); } static void block_cache_check_one(void *arg, void *value) { struct cache_entry *const entry = value; struct check_info *const info = arg; assert(entry != NULL); switch (ENTRY_GET_STATE(entry)) { case CLEAN: case CLEAN2: info->num_clean++; break; case DIRTY: info->num_dirty++; break; case READING: case READING2: assert(!entry->dirty); info->num_reading++; break; case WRITING: info->num_writing++; break; case WRITING2: info->num_writing2++; break; default: assert(0); break; } } #endif s3backer-1.5.4/block_cache.h000066400000000000000000000056041354714241400156160ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ /* Configuration info structure for block_cache */ struct block_cache_conf { u_int block_size; u_int cache_size; u_int write_delay; u_int max_dirty; u_int synchronous; u_int timeout; u_int num_threads; u_int read_ahead; u_int read_ahead_trigger; u_int no_verify; u_int recover_dirty_blocks; u_int perform_flush; const char *cache_file; log_func_t *log; }; /* Statistics structure for block_cache */ struct block_cache_stats { u_int initial_size; u_int current_size; double dirty_ratio; u_int read_hits; u_int read_misses; u_int write_hits; u_int write_misses; u_int verified; u_int mismatch; u_int out_of_memory_errors; }; /* block_cache.c */ extern struct s3backer_store *block_cache_create(struct block_cache_conf *config, struct s3backer_store *inner); extern void block_cache_get_stats(struct s3backer_store *s3b, struct block_cache_stats *stats); extern void block_cache_clear_stats(struct s3backer_store *s3b); s3backer-1.5.4/block_part.c000066400000000000000000000070731354714241400155160ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "s3backer.h" #include "block_part.h" /* * Generic "dumb" implementation of the read_block_part function. */ int block_part_read_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int block_size, u_int off, u_int len, void *dest) { u_char *buf; int r; /* Sanity check */ assert(off <= block_size); assert(len <= block_size); assert(off + len <= block_size); /* Check for degenerate cases */ if (len == 0) return 0; if (off == 0 && len == block_size) return (*s3b->read_block)(s3b, block_num, dest, NULL, NULL, 0); /* Allocate buffer */ if ((buf = malloc(block_size)) == NULL) return errno; /* Read entire block */ if ((r = (*s3b->read_block)(s3b, block_num, buf, NULL, NULL, 0)) != 0) { free(buf); return r; } /* Copy out desired fragment */ memcpy(dest, buf + off, len); /* Done */ free(buf); return 0; } /* * Generic "dumb" implementation of the write_block_part function. */ int block_part_write_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int block_size, u_int off, u_int len, const void *src) { u_char *buf; int r; /* Sanity check */ assert(off <= block_size); assert(len <= block_size); assert(off + len <= block_size); /* Check for degenerate cases */ if (len == 0) return 0; if (off == 0 && len == block_size) return (*s3b->write_block)(s3b, block_num, src, NULL, NULL, NULL); /* Allocate buffer */ if ((buf = malloc(block_size)) == NULL) return errno; /* Read entire block */ if ((r = (*s3b->read_block)(s3b, block_num, buf, NULL, NULL, 0)) != 0) { free(buf); return r; } /* Write in supplied fragment */ memcpy(buf + off, src, len); /* Write back entire block */ if ((r = (*s3b->write_block)(s3b, block_num, buf, NULL, NULL, NULL)) != 0) { free(buf); return r; } /* Done */ free(buf); return 0; } s3backer-1.5.4/block_part.h000066400000000000000000000036071354714241400155220ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ /* block_part.c */ extern int block_part_read_block_part(struct s3backer_store *inner, s3b_block_t block_num, u_int block_size, u_int off, u_int len, void *dest); extern int block_part_write_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int block_size, u_int off, u_int len, const void *src); s3backer-1.5.4/cleanup.sh000077500000000000000000000006201354714241400152070ustar00rootroot00000000000000#!/bin/sh # # Script to clean out generated GNU auto* gunk. # set -e echo "cleaning up" rm -rf autom4te*.cache scripts aclocal.m4 configure config.log config.status .deps stamp-h1 rm -f config.h.in config.h.in~ config.h rm -f scripts TAGS find . \( -name Makefile -o -name Makefile.in \) -print0 | xargs -0 rm -f rm -f gitrev.c s3backer.spec rm -f *.o s3backer tester rm -f s3backer-?.?.?.tar.gz s3backer-1.5.4/configure.ac000066400000000000000000000126511354714241400155160ustar00rootroot00000000000000# # s3backer - FUSE-based single file backing store via Amazon S3 # # Copyright 2008-2011 Archie L. Cobbs # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA # 02110-1301, USA. # # In addition, as a special exception, the copyright holders give # permission to link the code of portions of this program with the # OpenSSL library under certain conditions as described in each # individual source file, and distribute linked combinations including # the two. # # You must obey the GNU General Public License in all respects for all # of the code used other than OpenSSL. If you modify file(s) with this # exception, you may extend this exception to your version of the # file(s), but you are not obligated to do so. If you do not wish to do # so, delete this exception statement from your version. If you delete # this exception statement from all source files in the program, then # also delete it here. AC_INIT([s3backer FUSE filesystem backed by Amazon S3], [1.5.4], [https://github.com/archiecobbs/s3backer], [s3backer]) AC_CONFIG_AUX_DIR(scripts) AM_INIT_AUTOMAKE(foreign) dnl AM_MAINTAINER_MODE AC_PREREQ(2.59) AC_PREFIX_DEFAULT(/usr) AC_PROG_MAKE_SET [CFLAGS="-g -O3 -pipe -Wall -Waggregate-return -Wcast-align -Wchar-subscripts -Wcomment -Wformat -Wimplicit -Wmissing-declarations -Wmissing-prototypes -Wnested-externs -Wno-long-long -Wparentheses -Wpointer-arith -Wredundant-decls -Wreturn-type -Wswitch -Wtrigraphs -Wuninitialized -Wunused -Wwrite-strings -Wshadow -Wstrict-prototypes -Wcast-qual $CFLAGS"] AC_SUBST(CFLAGS) # Compile flags for Linux. See https://stackoverflow.com/a/29201732 AC_DEFINE(_GNU_SOURCE, 1, GNU functions) AC_DEFINE(_BSD_SOURCE, 1, BSD functions) AC_DEFINE(_DEFAULT_SOURCE, 1, Default functions) # Compile flags for Mac OS AC_DEFINE(_DARWIN_C_SOURCE, 1, MacOS functions) # Compile flags for FUSE AC_DEFINE(FUSE_USE_VERSION, 26, FUSE API version) AC_DEFINE(FUSE_FALLOCATE, 0, FUSE fallocate() support) # Check for required programs AC_PROG_INSTALL AC_PROG_CC # Check for required pkg-config'd stuff PKG_PROG_PKG_CONFIG(0.19) PKG_CHECK_MODULES(FUSE, fuse, [CFLAGS="${CFLAGS} ${FUSE_CFLAGS}" LDFLAGS="${LDFLAGS} ${FUSE_LIBS}"], [AC_MSG_ERROR(["fuse" not found in pkg-config])]) # Check for required libraries AC_CHECK_LIB(curl, curl_version,, [AC_MSG_ERROR([required library libcurl missing])]) AC_CHECK_LIB(crypto, BIO_new,, [AC_MSG_ERROR([required library libcrypto missing])]) AC_CHECK_LIB(expat, XML_ParserCreate,, [AC_MSG_ERROR([required library expat missing])]) AC_CHECK_LIB(fuse, fuse_version,, [AC_MSG_ERROR([required library libfuse missing])]) AC_CHECK_LIB(z, compressBound,, [AC_MSG_ERROR([required library zlib missing])]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ #include long x = CURLOPT_TCP_KEEPALIVE; ]])],, [AC_MSG_ERROR([unable to compile with curl, or curl version is < 7.25.0])]) # See if FUSE version is 2.9.2 or later AC_MSG_CHECKING([for fallocate() support in fuse]) AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[ #include struct fuse_operations x = { fallocate: (void*)1 }; ]])],[AC_MSG_RESULT([yes]); AC_DEFINE(FUSE_FALLOCATE)],AC_MSG_RESULT([no])) # Set some O/S specific stuff case `uname -s` in Darwin|FreeBSD) AC_CHECK_LIB(pthread, pthread_create,, [AC_MSG_ERROR([required library libpthread missing])]) ;; Linux) LDFLAGS="${LDFLAGS} -pthread" ;; *) ;; esac # Check for some O/S specific functions AC_CHECK_DECLS(fdatasync) # Check for required header files AC_HEADER_STDC AC_CHECK_HEADERS(assert.h ctype.h curl/curl.h err.h errno.h expat.h pthread.h stdarg.h stddef.h stdint.h stdio.h stdlib.h string.h syslog.h time.h unistd.h sys/queue.h openssl/bio.h openssl/buffer.h openssl/evp.h openssl/hmac.h openssl/md5.h zlib.h, [], [AC_MSG_ERROR([required header file '$ac_header' missing])]) # Optional features AC_ARG_ENABLE(assertions, AC_HELP_STRING([--enable-assertions], [enable debugging sanity checks (default NO)]), [test x"$enableval" = "xyes" || AC_DEFINE(NDEBUG, 1, [disable assertions])], [AC_DEFINE(NDEBUG, 1, [disable assertions])]) AC_ARG_ENABLE(gprof, AC_HELP_STRING([--enable-gprof], [Compile and link with gprof(1) support (default NO)]), [test x"$enableval" = "xyes" && CFLAGS="${CFLAGS} -pg"]) AC_ARG_ENABLE(Werror, AC_HELP_STRING([--enable-Werror], [enable compilation with -Werror flag (default NO)]), [test x"$enableval" = "xyes" && CFLAGS="${CFLAGS} -Werror"]) AC_ARG_ENABLE(sanitize, AC_HELP_STRING([--enable-sanitize], [enable compilation with -fsanitize=address and -fsanitize=undefined (default NO)]), [test x"$enableval" = "xyes" && CFLAGS="${CFLAGS} -fsanitize=address -fsanitize=undefined"]) # Generated files AC_CONFIG_FILES(Makefile) AC_CONFIG_FILES(s3backer.spec) AM_CONFIG_HEADER(config.h) # Go AC_OUTPUT s3backer-1.5.4/dcache.c000066400000000000000000001012221354714241400145740ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "s3backer.h" #include "dcache.h" /* * This file implements a simple on-disk storage area for cached blocks. * The file contains a header, a directory, and a data area. Each directory * entry indicates which block is stored in the corresponding "data slot" * in the data area and that block's MD5 checksum. Note the MD5 checksum is * the checksum of the stored data, which will differ from the actual block * data's MD5 if the block was compressed, encrypted, etc. when stored. * * File format: * * [ struct file_header ] * directory entry for data slot #0 * directory entry for data slot #1 * ... * directory entry for data slot #N-1 * padding up to getpagesize() * data slot #0 * data slot #1 * ... * data slot #N-1 */ /* Definitions */ #define DCACHE_SIGNATURE 0xe496f17b #define ROUNDUP2(x, y) (((x) + (y) - 1) & ~((y) - 1)) #define DIRECTORY_READ_CHUNK 1024 #define HDR_SIZE(flags) (((flags) & HDRFLG_NEW_FORMAT) == 0 ? sizeof(struct ofile_header) : sizeof(struct file_header)) #define DIR_ENTSIZE(flags) (((flags) & HDRFLG_NEW_FORMAT) == 0 ? sizeof(struct odir_entry) : sizeof(struct dir_entry)) #define DIR_OFFSET(flags, dslot) ((off_t)HDR_SIZE(flags) + (off_t)(dslot) * DIR_ENTSIZE(flags)) #define DATA_OFFSET(priv, dslot) ((off_t)(priv)->data + (off_t)(dslot) * (priv)->block_size) /* Bits for file_header.flags */ #define HDRFLG_NEW_FORMAT 0x00000001 #define HDRFLG_MASK 0x00000001 /* Bits for dir_entry.flags */ #define ENTFLG_DIRTY 0x00000001 #define ENTFLG_MASK 0x00000001 /* File header (old format) */ struct ofile_header { uint32_t signature; uint32_t header_size; uint32_t u_int_size; uint32_t s3b_block_t_size; uint32_t block_size; uint32_t data_align; uint32_t flags; u_int max_blocks; } __attribute__ ((packed)); /* File header */ struct file_header { uint32_t signature; uint32_t header_size; uint32_t u_int_size; uint32_t s3b_block_t_size; uint32_t block_size; uint32_t data_align; uint32_t flags; u_int max_blocks; int32_t mount_token; uint32_t spare[7]; /* future expansion */ } __attribute__ ((packed)); /* One directory entry (old format) */ struct odir_entry { s3b_block_t block_num; u_char md5[MD5_DIGEST_LENGTH]; } __attribute__ ((packed)); /* One directory entry (new format) */ struct dir_entry { s3b_block_t block_num; u_char md5[MD5_DIGEST_LENGTH]; uint32_t flags; } __attribute__ ((packed)); /* Private structure */ struct s3b_dcache { int fd; log_func_t *log; char *filename; void *zero_block; u_int block_size; u_int max_blocks; u_int num_alloc; uint32_t flags; /* copy of file_header.flags */ off_t data; u_int free_list_len; u_int free_list_alloc; s3b_block_t *free_list; }; /* Internal functions */ static int s3b_dcache_write_entry(struct s3b_dcache *priv, u_int dslot, const struct dir_entry *entry); #ifndef NDEBUG static int s3b_dcache_entry_is_empty(struct s3b_dcache *priv, u_int dslot); static int s3b_dcache_entry_write_ok(struct s3b_dcache *priv, u_int dslot, s3b_block_t block_num, u_int dirty); static int s3b_dcache_read_entry(struct s3b_dcache *priv, u_int dslot, struct dir_entry *entryp); #endif static int s3b_dcache_create_file(struct s3b_dcache *priv, int *fdp, const char *filename, u_int max_blocks, struct file_header *headerp); static int s3b_dcache_resize_file(struct s3b_dcache *priv, const struct file_header *header); static int s3b_dcache_init_free_list(struct s3b_dcache *priv, s3b_dcache_visit_t *visitor, void *arg, u_int visit_dirty); static int s3b_dcache_push(struct s3b_dcache *priv, u_int dslot); static void s3b_dcache_pop(struct s3b_dcache *priv, u_int *dslotp); static int s3b_dcache_read(struct s3b_dcache *priv, off_t offset, void *data, size_t len); static int s3b_dcache_write(struct s3b_dcache *priv, off_t offset, const void *data, size_t len); static int s3b_dcache_write2(struct s3b_dcache *priv, int fd, const char *filename, off_t offset, const void *data, size_t len); /* Internal variables */ static const struct dir_entry zero_entry; /* Public functions */ int s3b_dcache_open(struct s3b_dcache **dcachep, log_func_t *log, const char *filename, u_int block_size, u_int max_blocks, s3b_dcache_visit_t *visitor, void *arg, u_int visit_dirty) { struct ofile_header oheader; struct file_header header; struct s3b_dcache *priv; struct stat sb; int r; /* Sanity check */ if (max_blocks == 0) return EINVAL; /* Initialize private structure */ if ((priv = malloc(sizeof(*priv))) == NULL) return errno; memset(priv, 0, sizeof(*priv)); priv->fd = -1; priv->log = log; priv->block_size = block_size; priv->max_blocks = max_blocks; if ((priv->filename = strdup(filename)) == NULL) { r = errno; goto fail1; } if ((priv->zero_block = calloc(1, block_size)) == NULL) { r = errno; goto fail2; } /* Create cache file if it doesn't already exist */ if (stat(priv->filename, &sb) == -1 && errno == ENOENT) { (*priv->log)(LOG_NOTICE, "creating new cache file `%s' with capacity %u blocks", priv->filename, priv->max_blocks); if ((r = s3b_dcache_create_file(priv, &priv->fd, priv->filename, priv->max_blocks, NULL)) != 0) goto fail3; (void)close(priv->fd); priv->fd = -1; } retry: /* Open cache file */ assert(priv->fd == -1); if ((priv->fd = open(priv->filename, O_RDWR, 0)) == -1) { r = errno; (*priv->log)(LOG_ERR, "can't open cache file `%s': %s", priv->filename, strerror(r)); goto fail3; } /* Get file info */ if (fstat(priv->fd, &sb) == -1) { r = errno; goto fail4; } /* Read in header with backward compatible support for older header format */ if (sb.st_size < sizeof(oheader)) { (*priv->log)(LOG_ERR, "invalid cache file `%s': file is truncated (size %ju < %u)", priv->filename, (uintmax_t)sb.st_size, (u_int)sizeof(oheader)); r = EINVAL; goto fail4; } if ((r = s3b_dcache_read(priv, (off_t)0, &oheader, sizeof(oheader))) != 0) { (*priv->log)(LOG_ERR, "can't read cache file `%s' header: %s", priv->filename, strerror(r)); goto fail4; } switch (oheader.header_size) { case sizeof(oheader): /* old format */ memset(&header, 0, sizeof(header)); memcpy(&header, &oheader, sizeof(oheader)); break; case sizeof(header): /* new format */ if ((r = s3b_dcache_read(priv, (off_t)0, &header, sizeof(header))) != 0) { (*priv->log)(LOG_ERR, "can't read cache file `%s' header: %s", priv->filename, strerror(r)); goto fail4; } break; default: (*priv->log)(LOG_ERR, "invalid cache file `%s': %s %d", priv->filename, "invalid header size", (int)oheader.header_size); r = EINVAL; goto fail4; } /* Verify header - all but number of blocks */ r = EINVAL; if (header.signature != DCACHE_SIGNATURE) { (*priv->log)(LOG_ERR, "invalid cache file `%s': wrong signature %08x != %08x", priv->filename, header.signature, DCACHE_SIGNATURE); goto fail4; } if (header.header_size != HDR_SIZE(header.flags)) { (*priv->log)(LOG_ERR, "invalid cache file `%s': %s %d != %d", priv->filename, "invalid header size", (int)header.header_size, (int)HDR_SIZE(header.flags)); goto fail4; } if (header.u_int_size != sizeof(u_int)) { (*priv->log)(LOG_ERR, "invalid cache file `%s': created with sizeof(u_int) %u != %u", priv->filename, header.u_int_size, (u_int)sizeof(u_int)); goto fail4; } if (header.s3b_block_t_size != sizeof(s3b_block_t)) { (*priv->log)(LOG_ERR, "invalid cache file `%s': created with sizeof(s3b_block_t) %u != %u", priv->filename, header.s3b_block_t_size, (u_int)sizeof(s3b_block_t)); goto fail4; } if (header.block_size != priv->block_size) { (*priv->log)(LOG_ERR, "invalid cache file `%s': created with block size %u != %u", priv->filename, header.block_size, priv->block_size); goto fail4; } if (header.data_align != getpagesize()) { (*priv->log)(LOG_ERR, "invalid cache file `%s': created with alignment %u != %u", priv->filename, header.data_align, getpagesize()); goto fail4; } if ((header.flags & ~HDRFLG_MASK) != 0) { (*priv->log)(LOG_ERR, "invalid cache file `%s': %s", priv->filename, "unrecognized flags present"); goto fail4; } priv->flags = header.flags; /* Check number of blocks, shrinking or expanding if necessary */ if (header.max_blocks != priv->max_blocks) { (*priv->log)(LOG_NOTICE, "cache file `%s' was created with capacity %u != %u blocks, automatically %s", priv->filename, header.max_blocks, priv->max_blocks, header.max_blocks < priv->max_blocks ? "expanding" : "shrinking"); if ((r = s3b_dcache_resize_file(priv, &header)) != 0) goto fail4; (*priv->log)(LOG_INFO, "successfully resized cache file `%s' from %u to %u blocks", priv->filename, header.max_blocks, priv->max_blocks); goto retry; } /* Verify file's directory is not truncated */ if (sb.st_size < DIR_OFFSET(priv->flags, priv->max_blocks)) { (*priv->log)(LOG_ERR, "invalid cache file `%s': file is truncated (size %ju < %ju)", priv->filename, (uintmax_t)sb.st_size, (uintmax_t)DIR_OFFSET(priv->flags, priv->max_blocks)); goto fail4; } /* Compute offset of first data block */ priv->data = ROUNDUP2(DIR_OFFSET(priv->flags, priv->max_blocks), header.data_align); /* Read the directory to build the free list and visit allocated blocks */ if (visitor != NULL && (r = s3b_dcache_init_free_list(priv, visitor, arg, visit_dirty)) != 0) goto fail4; /* Done */ *dcachep = priv; return 0; fail4: close(priv->fd); fail3: free(priv->zero_block); fail2: free(priv->filename); fail1: free(priv->free_list); free(priv); return r; } int s3b_dcache_has_mount_token(struct s3b_dcache *priv) { return (priv->flags & HDRFLG_NEW_FORMAT) != 0; } int s3b_dcache_set_mount_token(struct s3b_dcache *priv, int32_t *old_valuep, int32_t new_value) { int r; /* Read old value */ if (old_valuep != NULL) { if ((r = s3b_dcache_read(priv, offsetof(struct file_header, mount_token), old_valuep, sizeof(*old_valuep))) != 0) return r; } /* Write new value */ if (new_value >= 0) { /* Update file */ if ((r = s3b_dcache_write(priv, offsetof(struct file_header, mount_token), &new_value, sizeof(new_value))) != 0) return r; /* Sync to disk */ s3b_dcache_fsync(priv); } /* Done */ return 0; } void s3b_dcache_close(struct s3b_dcache *priv) { close(priv->fd); free(priv->zero_block); free(priv->filename); free(priv->free_list); free(priv); } u_int s3b_dcache_size(struct s3b_dcache *priv) { return priv->num_alloc; } /* * Allocate a dslot for a block's data. We don't record this block in the directory yet; * that is done by s3b_dcache_record_block(). */ int s3b_dcache_alloc_block(struct s3b_dcache *priv, u_int *dslotp) { /* Any free dslots? */ if (priv->free_list_len == 0) return ENOMEM; /* Pop off the next free dslot */ s3b_dcache_pop(priv, dslotp); /* Directory entry should be empty */ assert(s3b_dcache_entry_is_empty(priv, *dslotp)); /* Done */ priv->num_alloc++; return 0; } /* * Record a block's dslot in the directory. After this function is called, the block will * be visible in the directory and picked up after a restart. * * If md5 != NULL, the block is CLEAN; if md5 == NULL, the block is DIRTY. * * This should be called AFTER the data for the block has already been written. * * There MUST NOT be a directory entry for the block. */ int s3b_dcache_record_block(struct s3b_dcache *priv, u_int dslot, s3b_block_t block_num, const u_char *md5) { const u_int dirty = md5 == NULL; struct dir_entry entry; int r; /* Sanity check */ assert(dslot < priv->max_blocks); /* Directory entry should be writable */ assert(s3b_dcache_entry_write_ok(priv, dslot, block_num, dirty)); /* If cache file is older format, it doesn't store dirty blocks, so just erase it instead (prior behavior) */ if (dirty && (priv->flags & HDRFLG_NEW_FORMAT) == 0) { s3b_dcache_erase_block(priv, dslot); return 0; } /* Make sure any new data is written to disk before updating the directory */ if ((r = s3b_dcache_fsync(priv)) != 0) return r; /* Update directory */ memset(&entry, 0, sizeof(entry)); entry.block_num = block_num; entry.flags = dirty ? ENTFLG_DIRTY : 0; if (!dirty) memcpy(&entry.md5, md5, MD5_DIGEST_LENGTH); if ((r = s3b_dcache_write_entry(priv, dslot, &entry)) != 0) return r; /* Done */ return 0; } /* * Erase the directory entry for a dslot. After this function is called, the block will * no longer be visible in the directory after a restart. * * This should be called BEFORE any new data for the block is written. * * There MUST be a directory entry for the block. */ int s3b_dcache_erase_block(struct s3b_dcache *priv, u_int dslot) { int r; /* Sanity check */ assert(dslot < priv->max_blocks); /* Update directory */ if ((r = s3b_dcache_write_entry(priv, dslot, &zero_entry)) != 0) return r; /* Make sure directory entry is written to disk before any new data is written */ if ((r = s3b_dcache_fsync(priv)) != 0) return r; /* Done */ return 0; } /* * Free a no-longer used dslot. * * There MUST NOT be a directory entry for the block. */ int s3b_dcache_free_block(struct s3b_dcache *priv, u_int dslot) { int r; /* Sanity check */ assert(dslot < priv->max_blocks); /* Directory entry should be empty */ assert(s3b_dcache_entry_is_empty(priv, dslot)); /* Push dslot onto free list */ if ((r = s3b_dcache_push(priv, dslot)) != 0) return r; /* Done */ priv->num_alloc--; return 0; } /* * Read data from one dslot. */ int s3b_dcache_read_block(struct s3b_dcache *priv, u_int dslot, void *dest, u_int off, u_int len) { /* Sanity check */ assert(dslot < priv->max_blocks); assert(off <= priv->block_size); assert(len <= priv->block_size); assert(off + len <= priv->block_size); /* Read data */ return s3b_dcache_read(priv, DATA_OFFSET(priv, dslot) + off, dest, len); } /* * Write data into one dslot. */ int s3b_dcache_write_block(struct s3b_dcache *priv, u_int dslot, const void *src, u_int off, u_int len) { /* Sanity check */ assert(dslot < priv->max_blocks); assert(off <= priv->block_size); assert(len <= priv->block_size); assert(off + len <= priv->block_size); /* Write data */ return s3b_dcache_write(priv, DATA_OFFSET(priv, dslot) + off, src != NULL ? src : priv->zero_block, len); } /* * Synchronize outstanding changes to persistent storage. */ int s3b_dcache_fsync(struct s3b_dcache *priv) { int r; #if HAVE_DECL_FDATASYNC r = fdatasync(priv->fd); #else r = fsync(priv->fd); #endif if (r == -1) { r = errno; (*priv->log)(LOG_ERR, "error fsync'ing cache file `%s': %s", priv->filename, strerror(r)); } return 0; } /* Internal functions */ #ifndef NDEBUG static int s3b_dcache_entry_is_empty(struct s3b_dcache *priv, u_int dslot) { struct dir_entry entry; (void)s3b_dcache_read_entry(priv, dslot, &entry); return memcmp(&entry, &zero_entry, sizeof(entry)) == 0; } static int s3b_dcache_entry_write_ok(struct s3b_dcache *priv, u_int dslot, s3b_block_t block_num, u_int dirty) { struct dir_entry entry; u_int old_dirty; if (s3b_dcache_entry_is_empty(priv, dslot)) return 1; (void)s3b_dcache_read_entry(priv, dslot, &entry); old_dirty = (entry.flags & ENTFLG_DIRTY) != 0; return entry.block_num == block_num && old_dirty != dirty; } static int s3b_dcache_read_entry(struct s3b_dcache *priv, u_int dslot, struct dir_entry *entry) { assert(dslot < priv->max_blocks); memset(entry, 0, sizeof(*entry)); return s3b_dcache_read(priv, DIR_OFFSET(priv->flags, dslot), entry, DIR_ENTSIZE(priv->flags)); } #endif /* * Write a directory entry. */ static int s3b_dcache_write_entry(struct s3b_dcache *priv, u_int dslot, const struct dir_entry *entry) { assert(dslot < priv->max_blocks); assert((entry->flags & ~((priv->flags & HDRFLG_NEW_FORMAT) != 0 ? ENTFLG_MASK : 0)) == 0); return s3b_dcache_write(priv, DIR_OFFSET(priv->flags, dslot), entry, DIR_ENTSIZE(priv->flags)); } /* * Resize (and compress) an existing cache file. Upon successful return, priv->fd is closed * and the cache file must be re-opened. */ static int s3b_dcache_resize_file(struct s3b_dcache *priv, const struct file_header *old_header) { const u_int old_max_blocks = old_header->max_blocks; const u_int new_max_blocks = priv->max_blocks; struct file_header new_header; off_t old_data_base; off_t new_data_base; u_int base_old_dslot; u_int new_dslot = 0; u_int num_entries; u_char *block_buf = NULL; char *tempfile = NULL; int new_fd = -1; int r; /* Create new temporary cache file */ if (asprintf(&tempfile, "%s.new", priv->filename) == -1) { r = errno; tempfile = NULL; (*priv->log)(LOG_ERR, "can't allocate string: %s", strerror(r)); goto fail; } if ((r = s3b_dcache_create_file(priv, &new_fd, tempfile, new_max_blocks, &new_header)) != 0) goto fail; /* Allocate block data buffer */ if ((block_buf = malloc(priv->block_size)) == NULL) { r = errno; (*priv->log)(LOG_ERR, "can't allocate buffer: %s", strerror(r)); goto fail; } /* Copy non-empty cache entries from old file to new file */ old_data_base = ROUNDUP2(DIR_OFFSET(old_header->flags, old_max_blocks), old_header->data_align); new_data_base = ROUNDUP2(DIR_OFFSET(new_header.flags, new_max_blocks), new_header.data_align); for (base_old_dslot = 0; base_old_dslot < old_max_blocks; base_old_dslot += num_entries) { char buffer[DIRECTORY_READ_CHUNK * DIR_ENTSIZE(old_header->flags)]; int i; /* Read in the next chunk of old directory entries */ num_entries = old_max_blocks - base_old_dslot; if (num_entries > DIRECTORY_READ_CHUNK) num_entries = DIRECTORY_READ_CHUNK; if ((r = s3b_dcache_read(priv, DIR_OFFSET(old_header->flags, base_old_dslot), buffer, num_entries * DIR_ENTSIZE(old_header->flags))) != 0) { (*priv->log)(LOG_ERR, "error reading cache file `%s' directory: %s", priv->filename, strerror(r)); goto fail; } /* For each dslot: if not free, copy it to the next slot in the new file */ for (i = 0; i < num_entries; i++) { const u_int old_dslot = base_old_dslot + i; struct dir_entry entry; off_t old_data; off_t new_data; /* Read old entry */ memset(&entry, 0, sizeof(entry)); memcpy(&entry, buffer + i * DIR_ENTSIZE(old_header->flags), DIR_ENTSIZE(old_header->flags)); /* Is this entry non-empty? */ if (memcmp(&entry, &zero_entry, sizeof(entry)) == 0) continue; /* Any more space? */ if (new_dslot == new_max_blocks) { (*priv->log)(LOG_INFO, "cache file `%s' contains more than %u blocks; some will be discarded", priv->filename, new_max_blocks); goto done; } /* Copy the directory entry */ assert(DIR_ENTSIZE(new_header.flags) == sizeof(entry)); if ((r = s3b_dcache_write2(priv, new_fd, tempfile, DIR_OFFSET(new_header.flags, new_dslot), &entry, sizeof(entry))) != 0) goto fail; /* Copy the data block */ old_data = old_data_base + (off_t)old_dslot * priv->block_size; new_data = new_data_base + (off_t)new_dslot * priv->block_size; if ((r = s3b_dcache_read(priv, old_data, block_buf, priv->block_size)) != 0) goto fail; if ((r = s3b_dcache_write2(priv, new_fd, tempfile, new_data, block_buf, priv->block_size)) != 0) goto fail; /* Advance to the next slot */ new_dslot++; } } done: /* Close the new file */ if (close(new_fd) == -1) { (*priv->log)(LOG_ERR, "error closing temporary cache file `%s': %s", tempfile, strerror(r)); goto fail; } new_fd = -1; /* Replace old cache file with new cache file */ if (rename(tempfile, priv->filename) == -1) { r = errno; (*priv->log)(LOG_ERR, "error renaming `%s' to `%s': %s", tempfile, priv->filename, strerror(r)); goto fail; } free(tempfile); tempfile = NULL; /* Update flags */ priv->flags = new_header.flags; /* Close old file to release it and we're done */ close(priv->fd); priv->fd = -1; r = 0; fail: /* Clean up */ if (block_buf != NULL) free(block_buf); if (new_fd != -1) (void)close(new_fd); if (tempfile != NULL) { (void)unlink(tempfile); free(tempfile); } return r; } static int s3b_dcache_create_file(struct s3b_dcache *priv, int *fdp, const char *filename, u_int max_blocks, struct file_header *headerp) { struct file_header header; int r; /* Initialize header */ memset(&header, 0, sizeof(header)); header.signature = DCACHE_SIGNATURE; header.flags = HDRFLG_NEW_FORMAT; header.header_size = HDR_SIZE(header.flags); header.u_int_size = sizeof(u_int); header.s3b_block_t_size = sizeof(s3b_block_t); header.block_size = priv->block_size; header.max_blocks = priv->max_blocks; header.data_align = getpagesize(); /* Create file */ if ((*fdp = open(filename, O_RDWR|O_CREAT|O_EXCL, 0644)) == -1) { r = errno; (*priv->log)(LOG_ERR, "can't create file `%s': %s", filename, strerror(r)); return r; } /* Write header */ if ((r = s3b_dcache_write2(priv, *fdp, filename, (off_t)0, &header, sizeof(header))) != 0) { (*priv->log)(LOG_ERR, "error initializing cache file `%s': %s", filename, strerror(r)); goto fail; } /* Extend the file to the required length; the directory will be filled with zeroes */ if (ftruncate(*fdp, sizeof(header)) == -1 || ftruncate(*fdp, DIR_OFFSET(header.flags, max_blocks)) == -1) { r = errno; (*priv->log)(LOG_ERR, "error initializing cache file `%s': %s", filename, strerror(r)); goto fail; } /* Done */ if (headerp != NULL) *headerp = header; return 0; fail: (void)unlink(filename); (void)close(*fdp); *fdp = -1; return r; } static int s3b_dcache_init_free_list(struct s3b_dcache *priv, s3b_dcache_visit_t *visitor, void *arg, u_int visit_dirty) { off_t required_size; struct stat sb; u_int num_entries; u_int num_dslots_used; u_int base_dslot; u_int i; int r; /* Logging */ (*priv->log)(LOG_INFO, "reading meta-data from cache file `%s'", priv->filename); assert(visitor != NULL); /* Inspect all directory entries */ for (num_dslots_used = base_dslot = 0; base_dslot < priv->max_blocks; base_dslot += num_entries) { char buffer[DIRECTORY_READ_CHUNK * DIR_ENTSIZE(priv->flags)]; /* Read in the next chunk of directory entries */ num_entries = priv->max_blocks - base_dslot; if (num_entries > DIRECTORY_READ_CHUNK) num_entries = DIRECTORY_READ_CHUNK; if ((r = s3b_dcache_read(priv, DIR_OFFSET(priv->flags, base_dslot), buffer, num_entries * DIR_ENTSIZE(priv->flags))) != 0) { (*priv->log)(LOG_ERR, "error reading cache file `%s' directory: %s", priv->filename, strerror(r)); return r; } /* For each dslot: if free, add to the free list, else let visitor decide what to do */ for (i = 0; i < num_entries; i++) { const u_int dslot = base_dslot + i; struct dir_entry entry; memset(&entry, 0, sizeof(entry)); memcpy(&entry, buffer + i * DIR_ENTSIZE(priv->flags), DIR_ENTSIZE(priv->flags)); if (memcmp(&entry, &zero_entry, sizeof(entry)) == 0) { if ((r = s3b_dcache_push(priv, dslot)) != 0) return r; } else if ((entry.flags & ENTFLG_DIRTY) != 0 && !visit_dirty) { /* visitor doesn't want dirties, so just nuke it */ if ((r = s3b_dcache_write_entry(priv, dslot, &zero_entry)) != 0) return r; if ((r = s3b_dcache_push(priv, dslot)) != 0) return r; } else { priv->num_alloc++; if (dslot + 1 > num_dslots_used) /* keep track of the number of dslots in use */ num_dslots_used = dslot + 1; if ((r = (*visitor)(arg, dslot, entry.block_num, (entry.flags & ENTFLG_DIRTY) == 0 ? entry.md5 : NULL)) != 0) return r; } } } /* Reverse the free list so we allocate lower numbered slots first */ for (i = 0; i < priv->free_list_len / 2; i++) { const s3b_block_t temp = priv->free_list[i]; priv->free_list[i] = priv->free_list[priv->free_list_len - i - 1]; priv->free_list[priv->free_list_len - i - 1] = temp; } /* Verify the cache file is not truncated */ required_size = DIR_OFFSET(priv->flags, priv->max_blocks); if (num_dslots_used > 0) { if (required_size < DATA_OFFSET(priv, num_dslots_used)) required_size = DATA_OFFSET(priv, num_dslots_used); } if (fstat(priv->fd, &sb) == -1) { r = errno; (*priv->log)(LOG_ERR, "error reading cache file `%s' length: %s", priv->filename, strerror(r)); return r; } if (sb.st_size < required_size) { (*priv->log)(LOG_ERR, "cache file `%s' is truncated (has size %ju < %ju bytes)", priv->filename, (uintmax_t)sb.st_size, (uintmax_t)required_size); return EINVAL; } /* Discard any unreferenced data beyond the last entry */ if (sb.st_size > required_size && ftruncate(priv->fd, required_size) == -1) { r = errno; (*priv->log)(LOG_ERR, "error trimming cache file `%s' to %ju bytes: %s", priv->filename, (uintmax_t)required_size, strerror(r)); return EINVAL; } /* Report results */ (*priv->log)(LOG_INFO, "loaded cache file `%s' with %u free and %u used blocks (max index %u)", priv->filename, priv->free_list_len, priv->max_blocks - priv->free_list_len, num_dslots_used); /* Done */ return 0; } /* * Push a dslot onto the free list. */ static int s3b_dcache_push(struct s3b_dcache *priv, u_int dslot) { /* Sanity check */ assert(dslot < priv->max_blocks); assert(priv->free_list_len < priv->max_blocks); /* Grow the free list array if necessary */ if (priv->free_list_alloc == priv->free_list_len) { s3b_block_t *new_free_list; s3b_block_t new_free_list_alloc; int r; new_free_list_alloc = priv->free_list_alloc == 0 ? 1024 : 2 * priv->free_list_alloc; if ((new_free_list = realloc(priv->free_list, new_free_list_alloc * sizeof(*new_free_list))) == NULL) { r = errno; (*priv->log)(LOG_ERR, "realloc: %s", strerror(r)); return r; } priv->free_list = new_free_list; priv->free_list_alloc = new_free_list_alloc; } /* Add new dslot */ assert(priv->free_list_len < priv->free_list_alloc); priv->free_list[priv->free_list_len++] = dslot; return 0; } /* * Pop the next dslot off of the free list. There must be one. */ static void s3b_dcache_pop(struct s3b_dcache *priv, u_int *dslotp) { /* Sanity check */ assert(priv->free_list_len > 0); /* Pop off dslot at the head of the list */ *dslotp = priv->free_list[--priv->free_list_len]; assert(*dslotp < priv->max_blocks); /* See if we can give back some memory */ if (priv->free_list_alloc > 1024 && priv->free_list_len <= priv->free_list_alloc / 4) { s3b_block_t *new_free_list; s3b_block_t new_free_list_alloc; new_free_list_alloc = priv->free_list_alloc / 4; if ((new_free_list = realloc(priv->free_list, new_free_list_alloc * sizeof(*new_free_list))) == NULL) (*priv->log)(LOG_ERR, "can't shrink dcache free list: realloc: %s (ignored)", strerror(errno)); else { priv->free_list = new_free_list; priv->free_list_alloc = new_free_list_alloc; } } assert(priv->free_list_len <= priv->free_list_alloc); } static int s3b_dcache_read(struct s3b_dcache *priv, off_t offset, void *data, size_t len) { size_t sofar; ssize_t r; for (sofar = 0; sofar < len; sofar += r) { const off_t posn = offset + sofar; if ((r = pread(priv->fd, (char *)data + sofar, len - sofar, offset + sofar)) == -1) { (*priv->log)(LOG_ERR, "error reading cache file `%s' at offset %ju: %s", priv->filename, (uintmax_t)posn, strerror(r)); return r; } if (r == 0) { /* truncated input */ (*priv->log)(LOG_ERR, "error reading cache file `%s' at offset %ju: file is truncated", priv->filename, (uintmax_t)posn); return EINVAL; } } return 0; } static int s3b_dcache_write(struct s3b_dcache *priv, off_t offset, const void *data, size_t len) { return s3b_dcache_write2(priv, priv->fd, priv->filename, offset, data, len); } static int s3b_dcache_write2(struct s3b_dcache *priv, int fd, const char *filename, off_t offset, const void *data, size_t len) { size_t sofar; ssize_t r; for (sofar = 0; sofar < len; sofar += r) { const off_t posn = offset + sofar; if ((r = pwrite(fd, (const char *)data + sofar, len - sofar, offset + sofar)) == -1) { (*priv->log)(LOG_ERR, "error writing cache file `%s' at offset %ju: %s", filename, (uintmax_t)posn, strerror(r)); return r; } } return 0; } s3backer-1.5.4/dcache.h000066400000000000000000000057751354714241400146210ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ /* * Simple on-disk persistent cache. */ /* Declarations */ struct s3b_dcache; /* * Startup visitor callback. Each non-empty slot in the disk cache is visited. * * The "md5" pointer is NULL for dirty blocks, and not NULL for clean blocks. */ typedef int s3b_dcache_visit_t(void *arg, s3b_block_t dslot, s3b_block_t block_num, const u_char *md5); /* dcache.c */ extern int s3b_dcache_open(struct s3b_dcache **dcachep, log_func_t *log, const char *filename, u_int block_size, u_int max_blocks, s3b_dcache_visit_t *visitor, void *arg, u_int visit_dirty); extern void s3b_dcache_close(struct s3b_dcache *dcache); extern u_int s3b_dcache_size(struct s3b_dcache *dcache); extern int s3b_dcache_alloc_block(struct s3b_dcache *priv, u_int *dslotp); extern int s3b_dcache_record_block(struct s3b_dcache *priv, u_int dslot, s3b_block_t block_num, const u_char *md5); extern int s3b_dcache_erase_block(struct s3b_dcache *priv, u_int dslot); extern int s3b_dcache_free_block(struct s3b_dcache *dcache, u_int dslot); extern int s3b_dcache_read_block(struct s3b_dcache *dcache, u_int dslot, void *dest, u_int off, u_int len); extern int s3b_dcache_write_block(struct s3b_dcache *dcache, u_int dslot, const void *src, u_int off, u_int len); extern int s3b_dcache_fsync(struct s3b_dcache *dcache); extern int s3b_dcache_has_mount_token(struct s3b_dcache *priv); extern int s3b_dcache_set_mount_token(struct s3b_dcache *priv, int32_t *old_valuep, int32_t new_value); s3backer-1.5.4/ec_protect.c000066400000000000000000000614171354714241400155270ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "s3backer.h" #include "ec_protect.h" #include "block_part.h" #include "hash.h" /* * Written block information caching. * * The purpose of this is to minimize problems from the weak guarantees provided * by S3's "eventual consistency". We do this by: * * (a) Enforcing a minimum delay between the completion of one PUT/DELETE * of a block and the initiation of the next PUT/DELETE of the same block * (b) Caching the MD5 checksum of every block written for some minimum time * and verifying that data returned from subsequent GETs is correct. * * These are the relevant configuration parameters: * * min_write_delay * Minimum time delay after a PUT/DELETE completes before the next PUT/DELETE * can be initiated. * cache_time * How long after writing a block we'll remember its MD5 checksum. This * must be at least as long as min_write_delay. Zero means infinity. * cache_size * Maximum number of blocks we'll track at one time. When table * is full, additional writes will block. * * Blocks we are currently tracking can be in the following states: * * State Meaning Hash table List Other invariants * ----- ------- ---------- ---- ---------------- * * CLEAN initial state No No * WRITING currently being written Yes No timestamp == 0, u.data valid * WRITTEN written and MD5 cached Yes Yes timestamp != 0, u.md5 valid * * The steady state for a block is CLEAN. WRITING means the block is currently * being sent; concurrent attempts to write will simply sleep until the first one * finishes. WRITTEN is where you go after successfully writing a block. The WRITTEN * state will timeout (and the entry revert to CLEAN) after cache_time. * * If another attempt to write a block in the WRITTEN state occurs occurs before * min_write_delay has elapsed, the second attempt will sleep. * * In the WRITING state, we have the data still so any reads are local. In the WRITTEN * state we don't have the data but we do know its MD5, so therefore we can verify what * comes back; if it doesn't verify, we retry as we would with any other error. * * There is a special case that occurs when we get an error while WRITING: in this case, * we don't know whether the block was successfully written or not, so we transition to * WRITTEN but with an all zeroes MD5 indicating "don't know". * * If we hit the 'cache_size' limit, we sleep a little while and then try again. * * We keep track of blocks in 'struct block_info' structures. These structures * are themselves tracked in both (a) a linked list and (b) a hash table. * * The hash table contains all structures, and is keyed by block number. This * is simply so we can quickly find the structure associated with a specific block. * * The linked list contains WRITTEN blocks, and is sorted in increasing order by timestamp, * so the entries that will expire first are at the front of the list. */ struct block_info { s3b_block_t block_num; // block number - MUST BE FIRST uint64_t timestamp; // time PUT/DELETE completed (if WRITTEN) TAILQ_ENTRY(block_info) link; // list entry link union { const void *data; // blocks actual content (if WRITING) u_char md5[MD5_DIGEST_LENGTH]; // block's content MD5 (if WRITTEN) } u; }; /* Internal state */ struct ec_protect_private { struct ec_protect_conf *config; struct s3backer_store *inner; struct ec_protect_stats stats; struct s3b_hash *hashtable; u_int num_sleepers; // count of sleeping threads TAILQ_HEAD(, block_info) list; pthread_mutex_t mutex; pthread_cond_t space_cond; // signaled when cache space available pthread_cond_t sleepers_cond; // signaled when no more threads are sleeping pthread_cond_t never_cond; // never signaled; used for sleeping only }; /* Callback info */ struct cbinfo { block_list_func_t *callback; void *arg; }; /* s3backer_store functions */ static int ec_protect_create_threads(struct s3backer_store *s3b); static int ec_protect_meta_data(struct s3backer_store *s3b, off_t *file_sizep, u_int *block_sizep); static int ec_protect_set_mount_token(struct s3backer_store *s3b, int32_t *old_valuep, int32_t new_value); static int ec_protect_read_block(struct s3backer_store *s3b, s3b_block_t block_num, void *dest, u_char *actual_md5, const u_char *expect_md5, int strict); static int ec_protect_write_block(struct s3backer_store *s3b, s3b_block_t block_num, const void *src, u_char *md5, check_cancel_t *check_cancel, void *check_cancel_arg); static int ec_protect_read_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, void *dest); static int ec_protect_write_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, const void *src); static int ec_protect_flush(struct s3backer_store *s3b); static void ec_protect_destroy(struct s3backer_store *s3b); /* Misc */ static uint64_t ec_protect_sleep_until(struct ec_protect_private *priv, pthread_cond_t *cond, uint64_t wake_time_millis); static void ec_protect_scrub_expired_writtens(struct ec_protect_private *priv, uint64_t current_time); static uint64_t ec_protect_get_time(void); static int ec_protect_list_blocks(struct s3backer_store *s3b, block_list_func_t *callback, void *arg); static void ec_protect_dirty_callback(void *arg, void *value); static void ec_protect_free_one(void *arg, void *value); /* Invariants checking */ #ifndef NDEBUG static void ec_protect_check_one(void *arg, void *value); static void ec_protect_check_invariants(struct ec_protect_private *priv); #define EC_PROTECT_CHECK_INVARIANTS(priv) ec_protect_check_invariants(priv) #else #define EC_PROTECT_CHECK_INVARIANTS(priv) do { } while (0) #endif /* Special all-zeroes MD5 value signifying a zeroed block */ static const u_char zero_md5[MD5_DIGEST_LENGTH]; /* Special all-onew MD5 value signifying a just-written block whose content is unknown */ static u_char unknown_md5[MD5_DIGEST_LENGTH]; /* * Constructor * * On error, returns NULL and sets `errno'. */ struct s3backer_store * ec_protect_create(struct ec_protect_conf *config, struct s3backer_store *inner) { struct s3backer_store *s3b; struct ec_protect_private *priv; int r; /* Initialize structures */ if ((s3b = calloc(1, sizeof(*s3b))) == NULL) { r = errno; (*config->log)(LOG_ERR, "calloc(): %s", strerror(r)); goto fail0; } s3b->create_threads = ec_protect_create_threads; s3b->meta_data = ec_protect_meta_data; s3b->set_mount_token = ec_protect_set_mount_token; s3b->read_block = ec_protect_read_block; s3b->write_block = ec_protect_write_block; s3b->read_block_part = ec_protect_read_block_part; s3b->write_block_part = ec_protect_write_block_part; s3b->list_blocks = ec_protect_list_blocks; s3b->flush = ec_protect_flush; s3b->destroy = ec_protect_destroy; if ((priv = calloc(1, sizeof(*priv))) == NULL) { r = errno; (*config->log)(LOG_ERR, "calloc(): %s", strerror(r)); goto fail1; } priv->config = config; priv->inner = inner; if ((r = pthread_mutex_init(&priv->mutex, NULL)) != 0) goto fail2; if ((r = pthread_cond_init(&priv->space_cond, NULL)) != 0) goto fail3; if ((r = pthread_cond_init(&priv->sleepers_cond, NULL)) != 0) goto fail4; if ((r = pthread_cond_init(&priv->never_cond, NULL)) != 0) goto fail5; TAILQ_INIT(&priv->list); if ((r = s3b_hash_create(&priv->hashtable, config->cache_size)) != 0) goto fail6; s3b->data = priv; memset(unknown_md5, 0xff, sizeof(unknown_md5)); /* Done */ EC_PROTECT_CHECK_INVARIANTS(priv); return s3b; fail6: pthread_cond_destroy(&priv->never_cond); fail5: pthread_cond_destroy(&priv->sleepers_cond); fail4: pthread_cond_destroy(&priv->space_cond); fail3: pthread_mutex_destroy(&priv->mutex); fail2: free(priv); fail1: free(s3b); fail0: (*config->log)(LOG_ERR, "ec_protect creation failed: %s", strerror(r)); errno = r; return NULL; } static int ec_protect_create_threads(struct s3backer_store *s3b) { struct ec_protect_private *const priv = s3b->data; return (*priv->inner->create_threads)(priv->inner); } static int ec_protect_meta_data(struct s3backer_store *s3b, off_t *file_sizep, u_int *block_sizep) { struct ec_protect_private *const priv = s3b->data; return (*priv->inner->meta_data)(priv->inner, file_sizep, block_sizep); } static int ec_protect_set_mount_token(struct s3backer_store *s3b, int32_t *old_valuep, int32_t new_value) { struct ec_protect_private *const priv = s3b->data; return (*priv->inner->set_mount_token)(priv->inner, old_valuep, new_value); } static int ec_protect_flush(struct s3backer_store *const s3b) { struct ec_protect_private *const priv = s3b->data; /* Grab lock and sanity check */ pthread_mutex_lock(&priv->mutex); EC_PROTECT_CHECK_INVARIANTS(priv); /* Wait for all sleeping writers to finish */ while (priv->num_sleepers > 0) pthread_cond_wait(&priv->sleepers_cond, &priv->mutex); /* Release lock */ pthread_mutex_unlock(&priv->mutex); return 0; } static void ec_protect_destroy(struct s3backer_store *const s3b) { struct ec_protect_private *const priv = s3b->data; /* Grab lock and sanity check */ pthread_mutex_lock(&priv->mutex); EC_PROTECT_CHECK_INVARIANTS(priv); /* Wait for all sleeping writers to finish */ while (priv->num_sleepers > 0) pthread_cond_wait(&priv->sleepers_cond, &priv->mutex); /* Destroy inner store */ (*priv->inner->destroy)(priv->inner); /* Free structures */ pthread_mutex_destroy(&priv->mutex); pthread_cond_destroy(&priv->space_cond); pthread_cond_destroy(&priv->sleepers_cond); pthread_cond_destroy(&priv->never_cond); s3b_hash_foreach(priv->hashtable, ec_protect_free_one, NULL); s3b_hash_destroy(priv->hashtable); free(priv); free(s3b); } void ec_protect_get_stats(struct s3backer_store *s3b, struct ec_protect_stats *stats) { struct ec_protect_private *const priv = s3b->data; pthread_mutex_lock(&priv->mutex); memcpy(stats, &priv->stats, sizeof(*stats)); stats->current_cache_size = s3b_hash_size(priv->hashtable); pthread_mutex_unlock(&priv->mutex); } void ec_protect_clear_stats(struct s3backer_store *s3b) { struct ec_protect_private *const priv = s3b->data; pthread_mutex_lock(&priv->mutex); memset(&priv->stats, 0, sizeof(priv->stats)); pthread_mutex_unlock(&priv->mutex); } static int ec_protect_list_blocks(struct s3backer_store *s3b, block_list_func_t *callback, void *arg) { struct ec_protect_private *const priv = s3b->data; struct cbinfo cbinfo; int r; if ((r = (*priv->inner->list_blocks)(priv->inner, callback, arg)) != 0) return r; cbinfo.callback = callback; cbinfo.arg = arg; pthread_mutex_lock(&priv->mutex); s3b_hash_foreach(priv->hashtable, ec_protect_dirty_callback, &cbinfo); pthread_mutex_unlock(&priv->mutex); return 0; } static int ec_protect_read_block(struct s3backer_store *const s3b, s3b_block_t block_num, void *dest, u_char *actual_md5, const u_char *expect_md5, int strict) { struct ec_protect_private *const priv = s3b->data; struct ec_protect_conf *const config = priv->config; u_char md5[MD5_DIGEST_LENGTH]; struct block_info *binfo; /* Sanity check */ if (config->block_size == 0) return EINVAL; /* Grab lock and sanity check */ pthread_mutex_lock(&priv->mutex); EC_PROTECT_CHECK_INVARIANTS(priv); again: /* Scrub the list of WRITTENs */ ec_protect_scrub_expired_writtens(priv, ec_protect_get_time()); /* Find info for this block */ if ((binfo = s3b_hash_get(priv->hashtable, block_num)) != NULL) { /* In WRITING state: we have the data already! */ if (binfo->timestamp == 0) { if (binfo->u.data == NULL) memset(dest, 0, config->block_size); else memcpy(dest, binfo->u.data, config->block_size); if (actual_md5 != NULL) memset(actual_md5, 0, MD5_DIGEST_LENGTH); // we don't know it yet! priv->stats.cache_data_hits++; pthread_mutex_unlock(&priv->mutex); return 0; } /* In WRITTEN state: special case: unknown MD5. Wait for settle time, then try again */ if (memcmp(binfo->u.md5, unknown_md5, MD5_DIGEST_LENGTH) == 0) { /* Have we waited long enough already? If so, reset block and try again */ if (ec_protect_get_time() >= binfo->timestamp + config->min_write_delay) { TAILQ_REMOVE(&priv->list, binfo, link); s3b_hash_remove(priv->hashtable, binfo->block_num); free(binfo); goto again; } /* Sleep to allow previous failed write to resolve, and then try again */ ec_protect_sleep_until(priv, NULL, binfo->timestamp + config->min_write_delay); goto again; } /* In WRITTEN state: special case: zero block */ if (memcmp(binfo->u.md5, zero_md5, MD5_DIGEST_LENGTH) == 0) { if (expect_md5 != NULL && strict && memcmp(expect_md5, zero_md5, MD5_DIGEST_LENGTH) != 0) (*config->log)(LOG_ERR, "ec_protect_read_block(): impossible expected MD5?"); memset(dest, 0, config->block_size); if (actual_md5 != NULL) memset(actual_md5, 0, MD5_DIGEST_LENGTH); priv->stats.cache_data_hits++; pthread_mutex_unlock(&priv->mutex); return 0; } /* In WRITTEN state: we know the expected MD5 */ memcpy(md5, binfo->u.md5, MD5_DIGEST_LENGTH); if (expect_md5 != NULL && strict && memcmp(md5, expect_md5, MD5_DIGEST_LENGTH) != 0) (*config->log)(LOG_ERR, "ec_protect_read_block(): impossible expected MD5?"); expect_md5 = md5; strict = 1; } /* Release lock */ pthread_mutex_unlock(&priv->mutex); /* Read block normally */ return (*priv->inner->read_block)(priv->inner, block_num, dest, actual_md5, expect_md5, strict); } static int ec_protect_write_block(struct s3backer_store *const s3b, s3b_block_t block_num, const void *src, u_char *caller_md5, check_cancel_t *check_cancel, void *check_cancel_arg) { struct ec_protect_private *const priv = s3b->data; struct ec_protect_conf *const config = priv->config; u_char md5[MD5_DIGEST_LENGTH]; struct block_info *binfo; uint64_t current_time; uint64_t delay; int r; /* Sanity check */ if (config->block_size == 0) return EINVAL; /* Grab lock */ pthread_mutex_lock(&priv->mutex); again: /* Sanity check */ EC_PROTECT_CHECK_INVARIANTS(priv); /* Scrub the list of WRITTENs */ current_time = ec_protect_get_time(); ec_protect_scrub_expired_writtens(priv, current_time); /* Find info for this block */ binfo = s3b_hash_get(priv->hashtable, block_num); /* CLEAN case: add new entry in state WRITING and write the block */ if (binfo == NULL) { /* If we have reached max cache capacity, wait until there's more room */ if (s3b_hash_size(priv->hashtable) >= config->cache_size) { /* Report deadlock situation */ if (config->cache_time == 0) (*config->log)(LOG_ERR, "md5 cache is full, but timeout is infinite: you have write deadlock!"); /* Sleep until space becomes available */ if ((binfo = TAILQ_FIRST(&priv->list)) != NULL && config->cache_time > 0) delay = ec_protect_sleep_until(priv, &priv->space_cond, binfo->timestamp + config->cache_time); else delay = ec_protect_sleep_until(priv, &priv->space_cond, 0); /* sleep indefinitely... */ priv->stats.cache_full_delay += delay; goto again; } /* Create new entry in WRITING state */ if ((binfo = calloc(1, sizeof(*binfo))) == NULL) { r = errno; (*config->log)(LOG_ERR, "can't alloc new MD5 cache entry: %s", strerror(r)); priv->stats.out_of_memory_errors++; pthread_mutex_unlock(&priv->mutex); return r; } binfo->block_num = block_num; binfo->u.data = src; s3b_hash_put_new(priv->hashtable, binfo); writeit: /* Write the block */ pthread_mutex_unlock(&priv->mutex); r = (*priv->inner->write_block)(priv->inner, block_num, src, md5, check_cancel, check_cancel_arg); pthread_mutex_lock(&priv->mutex); EC_PROTECT_CHECK_INVARIANTS(priv); /* * Wake up at least one thread that might be sleeping indefinitely (see above). This handles an obscure * case where the cache is full and every entry is in the WRITING state. The next thread that attempts * to write could be stuck waiting indefinitely unless we wake it up here. */ pthread_cond_signal(&priv->space_cond); /* * Move to state WRITTEN. * * If there was an error, we can't assume we know whether the write succeeded or not, * so mark the block as WRITTEN but with a special MD5 value meaning "unknown". * We have to wait for min_write_delay before trying to read the block again. */ binfo->timestamp = ec_protect_get_time(); memcpy(binfo->u.md5, r == 0 ? md5 : unknown_md5, MD5_DIGEST_LENGTH); TAILQ_INSERT_TAIL(&priv->list, binfo, link); pthread_mutex_unlock(&priv->mutex); /* Copy expected MD5 for caller */ if (r == 0 && caller_md5 != NULL) memcpy(caller_md5, md5, MD5_DIGEST_LENGTH); return r; } /* * WRITING case: wait until current write completes (hmm, why is kernel doing overlapping writes?). * Since we know after current write completes we'll have to wait another 'min_write_time' milliseconds * anyway, we conservatively just wait exactly that long now. There may be an extra wakeup or two, * but that's OK. */ if (binfo->timestamp == 0) { delay = ec_protect_sleep_until(priv, NULL, current_time + config->min_write_delay); priv->stats.repeated_write_delay += delay; goto again; } /* * WRITTEN case: wait until at least 'min_write_time' milliseconds has passed since previous write. */ if (current_time < binfo->timestamp + config->min_write_delay) { delay = ec_protect_sleep_until(priv, NULL, binfo->timestamp + config->min_write_delay); priv->stats.repeated_write_delay += delay; goto again; } /* * WRITTEN case: 'min_write_time' milliseconds have indeed passed, so go back to WRITING. */ binfo->timestamp = 0; binfo->u.data = src; TAILQ_REMOVE(&priv->list, binfo, link); goto writeit; } static int ec_protect_read_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, void *dest) { struct ec_protect_private *const priv = s3b->data; struct ec_protect_conf *const config = priv->config; return block_part_read_block_part(s3b, block_num, config->block_size, off, len, dest); } static int ec_protect_write_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, const void *src) { struct ec_protect_private *const priv = s3b->data; struct ec_protect_conf *const config = priv->config; return block_part_write_block_part(s3b, block_num, config->block_size, off, len, src); } /* * Return current time in milliseconds. */ static uint64_t ec_protect_get_time(void) { struct timeval tv; gettimeofday(&tv, NULL); return (uint64_t)tv.tv_sec * 1000 + (uint64_t)tv.tv_usec / 1000; } /* * Remove expired WRITTEN entries from the list. * This assumes the mutex is held. */ static void ec_protect_scrub_expired_writtens(struct ec_protect_private *priv, uint64_t current_time) { struct ec_protect_conf *const config = priv->config; struct block_info *binfo; int num_removed = 0; if (config->cache_time > 0) { while ((binfo = TAILQ_FIRST(&priv->list)) != NULL && current_time >= binfo->timestamp + config->cache_time) { TAILQ_REMOVE(&priv->list, binfo, link); s3b_hash_remove(priv->hashtable, binfo->block_num); free(binfo); num_removed++; } } switch (num_removed) { case 0: break; case 1: pthread_cond_signal(&priv->space_cond); break; default: pthread_cond_broadcast(&priv->space_cond); break; } } /* * Sleep until specified time (if non-zero) or condition (if non-NULL). * Note: in rare cases there can be spurious early wakeups. * Returns number of milliseconds slept. * * This assumes the mutex is locked. */ static uint64_t ec_protect_sleep_until(struct ec_protect_private *priv, pthread_cond_t *cond, uint64_t wake_time_millis) { uint64_t time_before; uint64_t time_after; assert(cond != NULL || wake_time_millis != 0); if (cond == NULL) cond = &priv->never_cond; time_before = ec_protect_get_time(); priv->num_sleepers++; if (wake_time_millis != 0) { struct timespec wake_time; wake_time.tv_sec = wake_time_millis / 1000; wake_time.tv_nsec = (wake_time_millis % 1000) * 1000000; if (pthread_cond_timedwait(cond, &priv->mutex, &wake_time) == ETIMEDOUT) time_after = wake_time_millis; else time_after = ec_protect_get_time(); } else { pthread_cond_wait(cond, &priv->mutex); time_after = ec_protect_get_time(); } assert(priv->num_sleepers > 0); if (--priv->num_sleepers == 0) pthread_cond_broadcast(&priv->sleepers_cond); return time_after - time_before; } static void ec_protect_free_one(void *arg, void *value) { free(value); } static void ec_protect_dirty_callback(void *arg, void *value) { struct cbinfo *const cbinfo = arg; struct block_info *const binfo = value; if (binfo->timestamp == 0 ? binfo->u.data != NULL : memcmp(binfo->u.md5, zero_md5, MD5_DIGEST_LENGTH) != 0) (*cbinfo->callback)(cbinfo->arg, binfo->block_num); } #ifndef NDEBUG /* Accounting structure */ struct check_info { u_int num_in_list; u_int written; u_int writing; }; static void ec_protect_check_one(void *arg, void *value) { struct block_info *const binfo = value; struct check_info *const info = arg; if (binfo->timestamp == 0) info->writing++; else info->written++; } static void ec_protect_check_invariants(struct ec_protect_private *priv) { struct block_info *binfo; struct check_info info; memset(&info, 0, sizeof(info)); for (binfo = TAILQ_FIRST(&priv->list); binfo != NULL; binfo = TAILQ_NEXT(binfo, link)) { assert(binfo->timestamp != 0); assert(s3b_hash_get(priv->hashtable, binfo->block_num) == binfo); info.num_in_list++; } s3b_hash_foreach(priv->hashtable, ec_protect_check_one, &info); assert(info.written == info.num_in_list); assert(info.written + info.writing == s3b_hash_size(priv->hashtable)); } #endif s3backer-1.5.4/ec_protect.h000066400000000000000000000046311354714241400155270ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ /* Configuration info structure for ec_protect store */ struct ec_protect_conf { u_int block_size; u_int min_write_delay; u_int cache_time; u_int cache_size; log_func_t *log; }; /* Statistics structure for ec_protect store */ struct ec_protect_stats { u_int current_cache_size; u_int cache_data_hits; uint64_t cache_full_delay; uint64_t repeated_write_delay; u_int out_of_memory_errors; }; /* ec_protect.c */ extern struct s3backer_store *ec_protect_create(struct ec_protect_conf *config, struct s3backer_store *inner); extern void ec_protect_get_stats(struct s3backer_store *s3b, struct ec_protect_stats *stats); extern void ec_protect_clear_stats(struct s3backer_store *s3b); s3backer-1.5.4/erase.c000066400000000000000000000165221354714241400144740ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "s3backer.h" #include "block_cache.h" #include "ec_protect.h" #include "fuse_ops.h" #include "http_io.h" #include "test_io.h" #include "s3b_config.h" #include "erase.h" #define BLOCKS_PER_DOT 0x100 #define MAX_QUEUE_LENGTH 1000 #define NUM_ERASURE_THREADS 25 /* Erasure state */ struct erase_state { struct s3backer_store *s3b; s3b_block_t queue[MAX_QUEUE_LENGTH]; u_int qlen; pthread_t threads[NUM_ERASURE_THREADS]; int quiet; int stopping; uintmax_t count; pthread_mutex_t mutex; pthread_cond_t thread_wakeup; pthread_cond_t queue_not_full; }; /* Internal functions */ static void erase_list_callback(void *arg, s3b_block_t block_num); static void *erase_thread_main(void *arg); int s3backer_erase(struct s3b_config *config) { struct erase_state state; struct erase_state *const priv = &state; char response[10]; int ok = 0; int i; int r; /* Double check with user */ if (!config->force) { warnx("`--erase' flag given: erasing all blocks in %s", config->description); fprintf(stderr, "s3backer: is this correct? [y/N] "); *response = '\0'; if (fgets(response, sizeof(response), stdin) != NULL) { while (*response && isspace(response[strlen(response) - 1])) response[strlen(response) - 1] = '\0'; } if (strcasecmp(response, "y") != 0 && strcasecmp(response, "yes") != 0) { warnx("not confirmed"); goto fail0; } } /* Initialize state */ memset(priv, 0, sizeof(*priv)); priv->quiet = config->quiet; if ((r = pthread_mutex_init(&priv->mutex, NULL)) != 0) { warnx("pthread_mutex_init: %s", strerror(r)); goto fail0; } if ((r = pthread_cond_init(&priv->thread_wakeup, NULL)) != 0) { warnx("pthread_cond_init: %s", strerror(r)); goto fail1; } if ((r = pthread_cond_init(&priv->queue_not_full, NULL)) != 0) { warnx("pthread_cond_init: %s", strerror(r)); goto fail2; } for (i = 0; i < NUM_ERASURE_THREADS; i++) { if ((r = pthread_create(&priv->threads[i], NULL, erase_thread_main, priv)) != 0) goto fail3; } /* Logging */ if (!config->quiet) { fprintf(stderr, "s3backer: erasing non-zero blocks..."); fflush(stderr); } /* Create temporary lower layer */ if ((priv->s3b = config->test ? test_io_create(&config->http_io) : http_io_create(&config->http_io)) == NULL) { warnx(config->test ? "test_io_create" : "http_io_create"); goto fail3; } /* Iterate over non-zero blocks */ if ((r = (*priv->s3b->list_blocks)(priv->s3b, erase_list_callback, priv)) != 0) { warnx("can't list blocks: %s", strerror(r)); goto fail3; } /* Clear mount token */ if ((r = (*priv->s3b->set_mount_token)(priv->s3b, NULL, 0)) != 0) { warnx("can't clear mount token: %s", strerror(r)); goto fail3; } /* Success */ ok = 1; /* Clean up */ fail3: pthread_mutex_lock(&priv->mutex); priv->stopping = 1; pthread_cond_broadcast(&priv->thread_wakeup); pthread_mutex_unlock(&priv->mutex); for (i = 0; i < NUM_ERASURE_THREADS; i++) { if (priv->threads[i] == (pthread_t)0) continue; if ((r = pthread_join(priv->threads[i], NULL)) != 0) warnx("pthread_join: %s", strerror(r)); } if (priv->s3b != NULL) { if (ok && !config->quiet) { fprintf(stderr, "done\n"); warnx("erased %ju non-zero blocks", priv->count); } (*priv->s3b->destroy)(priv->s3b); } pthread_cond_destroy(&priv->queue_not_full); fail2: pthread_cond_destroy(&priv->thread_wakeup); fail1: pthread_mutex_destroy(&priv->mutex); fail0: return ok ? 0 : -1; } static void erase_list_callback(void *arg, s3b_block_t block_num) { struct erase_state *const priv = arg; pthread_mutex_lock(&priv->mutex); while (priv->qlen == MAX_QUEUE_LENGTH) pthread_cond_wait(&priv->queue_not_full, &priv->mutex); priv->queue[priv->qlen++] = block_num; pthread_cond_signal(&priv->thread_wakeup); pthread_mutex_unlock(&priv->mutex); } static void * erase_thread_main(void *arg) { struct erase_state *const priv = arg; s3b_block_t block_num; int r; /* Acquire lock */ pthread_mutex_lock(&priv->mutex); /* Erase blocks until there are no more */ while (1) { /* Is there a block to erase? */ if (priv->qlen > 0) { /* Grab next bock */ if (priv->qlen == MAX_QUEUE_LENGTH) pthread_cond_signal(&priv->queue_not_full); block_num = priv->queue[--priv->qlen]; /* Do block deletion */ pthread_mutex_unlock(&priv->mutex); r = (*priv->s3b->write_block)(priv->s3b, block_num, NULL, NULL, NULL, NULL); pthread_mutex_lock(&priv->mutex); /* Check for error */ if (r != 0) { warnx("can't delete block %0*jx: %s", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num, strerror(r)); continue; } /* Update count and output a dot */ if ((++priv->count % BLOCKS_PER_DOT) == 0 && !priv->quiet) { fprintf(stderr, "."); fflush(stderr); } /* Spin again */ continue; } /* Are we done? */ if (priv->stopping) break; /* Wait for something to do */ pthread_cond_wait(&priv->thread_wakeup, &priv->mutex); } /* Done */ pthread_mutex_unlock(&priv->mutex); return NULL; } s3backer-1.5.4/erase.h000066400000000000000000000032141354714241400144730ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ /* erase.c */ extern int s3backer_erase(struct s3b_config *config); s3backer-1.5.4/fuse_ops.c000066400000000000000000000532201354714241400152140ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "s3backer.h" #include "block_cache.h" #include "ec_protect.h" #include "fuse_ops.h" #include "http_io.h" #include "s3b_config.h" /**************************************************************************** * DEFINITIONS * ****************************************************************************/ #define ROOT_INODE 1 #define FILE_INODE 2 #define STATS_INODE 3 /* Represents an open 'stats' file */ struct stat_file { char *buf; // note: not necessarily nul-terminated size_t len; // length of string in 'buf' size_t bufsiz; // size allocated for 'buf' int memerr; // we got a memory error }; /* Private information */ struct fuse_ops_private { struct s3backer_store *s3b; u_int block_bits; off_t file_size; time_t start_time; time_t file_atime; time_t file_mtime; time_t stats_atime; }; /**************************************************************************** * FUNCTION DECLARATIONS * ****************************************************************************/ /* FUSE functions */ static void *fuse_op_init(struct fuse_conn_info *conn); static void fuse_op_destroy(void *data); static int fuse_op_getattr(const char *path, struct stat *st); static int fuse_op_fgetattr(const char *path, struct stat *st, struct fuse_file_info *); static int fuse_op_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi); static int fuse_op_open(const char *path, struct fuse_file_info *fi); static int fuse_op_release(const char *path, struct fuse_file_info *fi); static int fuse_op_read(const char *path, char *buf, size_t size, off_t offset, struct fuse_file_info *fi); static int fuse_op_write(const char *path, const char *buf, size_t size, off_t offset, struct fuse_file_info *fi); static int fuse_op_statfs(const char *path, struct statvfs *st); static int fuse_op_truncate(const char *path, off_t size); static int fuse_op_flush(const char *path, struct fuse_file_info *fi); static int fuse_op_fsync(const char *path, int isdatasync, struct fuse_file_info *fi); static int fuse_op_unlink(const char *path); #if FUSE_FALLOCATE static int fuse_op_fallocate(const char *path, int mode, off_t offset, off_t len, struct fuse_file_info *fi); #endif /* Attribute functions */ static void fuse_op_getattr_file(struct fuse_ops_private *priv, struct stat *st); static void fuse_op_getattr_stats(struct fuse_ops_private *priv, struct stat_file *sfile, struct stat *st); /* Stats functions */ static struct stat_file *fuse_op_stats_create(struct fuse_ops_private *priv); static void fuse_op_stats_destroy(struct stat_file *sfile); static printer_t fuse_op_stats_printer; /**************************************************************************** * VARIABLE DEFINITIONS * ****************************************************************************/ /* FUSE operations */ const struct fuse_operations s3backer_fuse_ops = { .init = fuse_op_init, .destroy = fuse_op_destroy, .getattr = fuse_op_getattr, .fgetattr = fuse_op_fgetattr, .readdir = fuse_op_readdir, .open = fuse_op_open, .read = fuse_op_read, .write = fuse_op_write, .statfs = fuse_op_statfs, .truncate = fuse_op_truncate, .flush = fuse_op_flush, .fsync = fuse_op_fsync, .release = fuse_op_release, .unlink = fuse_op_unlink, #if FUSE_FALLOCATE .fallocate = fuse_op_fallocate, #endif }; /* Configuration and underlying s3backer_store */ static struct fuse_ops_conf *config; static struct fuse_ops_private *the_priv; /**************************************************************************** * PUBLIC FUNCTION DEFINITIONS * ****************************************************************************/ const struct fuse_operations * fuse_ops_create(struct fuse_ops_conf *config0, struct s3backer_store *s3b) { /* Sanity check */ assert(config0 != NULL); assert(s3b != NULL); /* Prevent duplicate invocation */ if (config != NULL || the_priv != NULL) { (*config0->log)(LOG_ERR, "fuse_ops_create(): duplicate invocation"); return NULL; } /* Create private structure */ if ((the_priv = calloc(1, sizeof(*the_priv))) == NULL) { (*config->log)(LOG_ERR, "fuse_ops_create(): %s", strerror(errno)); return NULL; } the_priv->s3b = s3b; /* Now we're ready */ config = config0; return &s3backer_fuse_ops; } /**************************************************************************** * FUSE OPERATION FUNCTIONS * ****************************************************************************/ static void * fuse_op_init(struct fuse_conn_info *conn) { struct s3b_config *const s3bconf = config->s3bconf; struct fuse_ops_private *const priv = the_priv; int r; /* Sanity check */ assert(priv != NULL); assert(priv->s3b != NULL); /* Initialize */ priv->block_bits = ffs(config->block_size) - 1; priv->start_time = time(NULL); priv->file_atime = priv->start_time; priv->file_mtime = priv->start_time; priv->stats_atime = priv->start_time; priv->file_size = config->num_blocks * config->block_size; /* Startup background threads now that we have fork()'d */ if ((r = (*priv->s3b->create_threads)(priv->s3b)) != 0) { (*config->log)(LOG_ERR, "fuse_op_init(): can't create threads: %s", strerror(errno)); return NULL; } /* Done */ (*config->log)(LOG_INFO, "mounting %s", s3bconf->mount); return priv; } static void fuse_op_destroy(void *data) { struct fuse_ops_private *const priv = data; struct s3backer_store *const s3b = priv != NULL ? priv->s3b : NULL; struct s3b_config *const s3bconf = config->s3bconf; int r; /* Sanity check */ if (priv == NULL || s3b == NULL) return; (*config->log)(LOG_INFO, "unmount %s: initiated", s3bconf->mount); /* Flush dirty data */ if (!config->read_only) { (*config->log)(LOG_INFO, "unmount %s: flushing dirty data", s3bconf->mount); if ((r = (*s3b->flush)(s3b)) != 0) (*config->log)(LOG_ERR, "unmount %s: flushing filesystem failed: %s", s3bconf->mount, strerror(r)); } /* Clear mount token */ if (!config->read_only) { (*config->log)(LOG_INFO, "unmount %s: clearing mount token", s3bconf->mount); if ((r = (*s3b->set_mount_token)(s3b, NULL, 0)) != 0) (*config->log)(LOG_ERR, "unmount %s: clearing mount token failed: %s", s3bconf->mount, strerror(r)); } /* Shutdown */ (*s3b->destroy)(s3b); (*config->log)(LOG_INFO, "unmount %s: completed", s3bconf->mount); free(priv); } static int fuse_op_getattr(const char *path, struct stat *st) { struct fuse_ops_private *const priv = (struct fuse_ops_private *)fuse_get_context()->private_data; memset(st, 0, sizeof(*st)); if (strcmp(path, "/") == 0) { st->st_mode = S_IFDIR | 0755; st->st_nlink = 2; st->st_ino = ROOT_INODE; st->st_uid = config->uid; st->st_gid = config->gid; if (priv != NULL) { st->st_atime = priv->start_time; st->st_mtime = priv->start_time; st->st_ctime = priv->start_time; } return 0; } if (priv == NULL) return -ENOENT; if (*path == '/' && strcmp(path + 1, config->filename) == 0) { fuse_op_getattr_file(priv, st); return 0; } if (*path == '/' && config->print_stats != NULL && strcmp(path + 1, config->stats_filename) == 0) { struct stat_file *sfile; if ((sfile = fuse_op_stats_create(priv)) == NULL) return -ENOMEM; fuse_op_getattr_stats(priv, sfile, st); fuse_op_stats_destroy(sfile); return 0; } return -ENOENT; } static int fuse_op_fgetattr(const char *path, struct stat *st, struct fuse_file_info *fi) { struct fuse_ops_private *const priv = (struct fuse_ops_private *)fuse_get_context()->private_data; if (fi->fh != 0) { struct stat_file *const sfile = (struct stat_file *)(uintptr_t)fi->fh; fuse_op_getattr_stats(priv, sfile, st); } else fuse_op_getattr_file(priv, st); return 0; } static void fuse_op_getattr_file(struct fuse_ops_private *priv, struct stat *st) { st->st_mode = S_IFREG | config->file_mode; st->st_nlink = 1; st->st_ino = FILE_INODE; st->st_uid = config->uid; st->st_gid = config->gid; st->st_size = priv->file_size; st->st_blksize = config->block_size; st->st_blocks = config->num_blocks; st->st_atime = priv->file_atime; st->st_mtime = priv->file_mtime; st->st_ctime = priv->start_time; } static void fuse_op_getattr_stats(struct fuse_ops_private *priv, struct stat_file *sfile, struct stat *st) { st->st_mode = S_IFREG | S_IRUSR | S_IRGRP | S_IROTH; st->st_nlink = 1; st->st_ino = STATS_INODE; st->st_uid = config->uid; st->st_gid = config->gid; st->st_size = sfile->len; st->st_blksize = config->block_size; st->st_blocks = 0; st->st_atime = priv->stats_atime; st->st_mtime = time(NULL); st->st_ctime = priv->start_time; } static int fuse_op_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi) { struct fuse_ops_private *const priv = (struct fuse_ops_private *)fuse_get_context()->private_data; (void)offset; (void)fi; if (strcmp(path, "/") != 0) return -ENOENT; if (filler(buf, ".", NULL, 0) != 0) return -ENOMEM; if (filler(buf, "..", NULL, 0) != 0) return -ENOMEM; if (priv != NULL) { if (filler(buf, config->filename, NULL, 0) != 0) return -ENOMEM; if (config->print_stats != NULL && config->stats_filename != NULL) { if (filler(buf, config->stats_filename, NULL, 0) != 0) return -ENOMEM; } } return 0; } static int fuse_op_open(const char *path, struct fuse_file_info *fi) { struct fuse_ops_private *const priv = (struct fuse_ops_private *)fuse_get_context()->private_data; /* Sanity check */ if (priv == NULL) return -ENOENT; /* Backed file */ if (*path == '/' && strcmp(path + 1, config->filename) == 0) { fi->fh = 0; priv->file_atime = time(NULL); if (config->direct_io) fi->direct_io = 1; return 0; } /* Stats file */ if (*path == '/' && config->print_stats != NULL && strcmp(path + 1, config->stats_filename) == 0) { struct stat_file *sfile; if ((sfile = fuse_op_stats_create(priv)) == NULL) return -ENOMEM; fi->fh = (uint64_t)(uintptr_t)sfile; priv->stats_atime = time(NULL); fi->direct_io = 1; return 0; } /* Unknown file */ return -ENOENT; } static int fuse_op_release(const char *path, struct fuse_file_info *fi) { if (fi->fh != 0) { struct stat_file *const sfile = (struct stat_file *)(uintptr_t)fi->fh; fuse_op_stats_destroy(sfile); } return 0; } static int fuse_op_read(const char *path, char *buf, size_t size, off_t offset, struct fuse_file_info *fi) { struct fuse_ops_private *const priv = (struct fuse_ops_private *)fuse_get_context()->private_data; const u_int mask = config->block_size - 1; size_t orig_size = size; s3b_block_t block_num; size_t num_blocks; int r; /* Handle stats file */ if (fi->fh != 0) { struct stat_file *const sfile = (struct stat_file *)(uintptr_t)fi->fh; if (offset > sfile->len) return 0; if (offset + size > sfile->len) size = sfile->len - offset; memcpy(buf, sfile->buf + offset, size); priv->stats_atime = time(NULL); return size; } /* Check for end of file */ if (offset > priv->file_size) { (*config->log)(LOG_ERR, "read offset=0x%jx size=0x%jx out of range", (uintmax_t)offset, (uintmax_t)size); return -ESPIPE; } if (offset + size > priv->file_size) { size = priv->file_size - offset; orig_size = size; } /* Read first block fragment (if any) */ if ((offset & mask) != 0) { size_t fragoff = (size_t)(offset & mask); size_t fraglen = (size_t)config->block_size - fragoff; if (fraglen > size) fraglen = size; block_num = offset >> priv->block_bits; if ((r = (*priv->s3b->read_block_part)(priv->s3b, block_num, fragoff, fraglen, buf)) != 0) return -r; buf += fraglen; offset += fraglen; size -= fraglen; } /* Get block number and count */ block_num = offset >> priv->block_bits; num_blocks = size >> priv->block_bits; /* Read intermediate complete blocks */ while (num_blocks-- > 0) { if ((r = (*priv->s3b->read_block)(priv->s3b, block_num++, buf, NULL, NULL, 0)) != 0) return -r; buf += config->block_size; } /* Read last block fragment (if any) */ if ((size & mask) != 0) { const size_t fraglen = size & mask; if ((r = (*priv->s3b->read_block_part)(priv->s3b, block_num, 0, fraglen, buf)) != 0) return -r; } /* Done */ priv->file_atime = time(NULL); return orig_size; } static int fuse_op_write(const char *path, const char *buf, size_t size, off_t offset, struct fuse_file_info *fi) { struct fuse_ops_private *const priv = (struct fuse_ops_private *)fuse_get_context()->private_data; const u_int mask = config->block_size - 1; size_t orig_size = size; s3b_block_t block_num; size_t num_blocks; int r; /* Handle read-only flag */ if (config->read_only) return -EROFS; /* Handle stats file */ if (fi->fh != 0) return -EINVAL; /* Check for end of file */ if (offset > priv->file_size) { (*config->log)(LOG_ERR, "write offset=0x%jx size=0x%jx out of range", (uintmax_t)offset, (uintmax_t)size); return -ESPIPE; } if (offset + size > priv->file_size) { size = priv->file_size - offset; orig_size = size; } /* Handle request to write nothing */ if (size == 0) return 0; /* Write first block fragment (if any) */ if ((offset & mask) != 0) { size_t fragoff = (size_t)(offset & mask); size_t fraglen = (size_t)config->block_size - fragoff; if (fraglen > size) fraglen = size; block_num = offset >> priv->block_bits; if ((r = (*priv->s3b->write_block_part)(priv->s3b, block_num, fragoff, fraglen, buf)) != 0) return -r; buf += fraglen; offset += fraglen; size -= fraglen; } /* Get block number and count */ block_num = offset >> priv->block_bits; num_blocks = size >> priv->block_bits; /* Write intermediate complete blocks */ while (num_blocks-- > 0) { if ((r = (*priv->s3b->write_block)(priv->s3b, block_num++, buf, NULL, NULL, NULL)) != 0) return -r; buf += config->block_size; } /* Write last block fragment (if any) */ if ((size & mask) != 0) { const size_t fraglen = size & mask; if ((r = (*priv->s3b->write_block_part)(priv->s3b, block_num, 0, fraglen, buf)) != 0) return -r; } /* Done */ priv->file_mtime = time(NULL); return orig_size; } static int fuse_op_statfs(const char *path, struct statvfs *st) { st->f_bsize = config->block_size; st->f_frsize = config->block_size; st->f_blocks = config->num_blocks; st->f_bfree = 0; st->f_bavail = 0; st->f_files = 3; st->f_ffree = 0; st->f_favail = 0; return 0; } static int fuse_op_truncate(const char *path, off_t size) { return 0; } static int fuse_op_flush(const char *path, struct fuse_file_info *fi) { return 0; } static int fuse_op_fsync(const char *path, int isdatasync, struct fuse_file_info *fi) { return 0; } static int fuse_op_unlink(const char *path) { /* Handle stats file */ if (*path == '/' && strcmp(path + 1, config->stats_filename) == 0) { if (config->clear_stats == NULL) return -EOPNOTSUPP; (*config->clear_stats)(); return 0; } /* Not supported */ return -EOPNOTSUPP; } #if FUSE_FALLOCATE static int fuse_op_fallocate(const char *path, int mode, off_t offset, off_t len, struct fuse_file_info *fi) { struct fuse_ops_private *const priv = (struct fuse_ops_private *)fuse_get_context()->private_data; const u_int mask = config->block_size - 1; size_t size = (size_t)len; s3b_block_t block_num; void *zero_block; size_t num_blocks; int r; /* Handle stats file */ if (fi->fh != 0) return -EOPNOTSUPP; /* Sanity check */ if (offset < 0 || len <= 0) return -EINVAL; if (offset + len > priv->file_size) return -ENOSPC; /* Handle request */ if ((mode & FALLOC_FL_PUNCH_HOLE) == 0) return 0; /* if ((mode & FALLOC_FL_KEEP_SIZE) == 0) return -EINVAL; */ /* Create an empty block */ if ((zero_block = calloc(1, config->block_size)) == NULL) return -ENOMEM; /* Write first block fragment (if any) */ if ((offset & mask) != 0) { size_t fragoff = (size_t)(offset & mask); size_t fraglen = (size_t)config->block_size - fragoff; if (fraglen > size) fraglen = size; block_num = offset >> priv->block_bits; if ((r = (*priv->s3b->write_block_part)(priv->s3b, block_num, fragoff, fraglen, zero_block)) != 0) { free(zero_block); return -r; } offset += fraglen; size -= fraglen; } /* Get block number and count */ block_num = offset >> priv->block_bits; num_blocks = size >> priv->block_bits; /* Write intermediate complete blocks */ while (num_blocks-- > 0) { if ((r = (*priv->s3b->write_block)(priv->s3b, block_num++, NULL, NULL, NULL, NULL)) != 0) { free(zero_block); return -r; } } /* Write last block fragment (if any) */ if ((size & mask) != 0) { const size_t fraglen = size & mask; if ((r = (*priv->s3b->write_block_part)(priv->s3b, block_num, 0, fraglen, zero_block)) != 0) { free(zero_block); return -r; } } /* Done */ priv->file_mtime = time(NULL); free(zero_block); return 0; } #endif /**************************************************************************** * OTHER INTERNAL FUNCTIONS * ****************************************************************************/ static struct stat_file * fuse_op_stats_create(struct fuse_ops_private *priv) { struct stat_file *sfile; if ((sfile = calloc(1, sizeof(*sfile))) == NULL) return NULL; (*config->print_stats)(sfile, fuse_op_stats_printer); if (sfile->memerr != 0) { fuse_op_stats_destroy(sfile); return NULL; } return sfile; } static void fuse_op_stats_destroy(struct stat_file *sfile) { free(sfile->buf); free(sfile); } static void fuse_op_stats_printer(void *prarg, const char *fmt, ...) { struct stat_file *const sfile = prarg; va_list args; char *new_buf; size_t new_bufsiz; size_t remain; int added; /* Bail if no memory */ if (sfile->memerr) return; again: /* Append to string buffer */ remain = sfile->bufsiz - sfile->len; va_start(args, fmt); added = vsnprintf(sfile->buf + sfile->len, sfile->bufsiz - sfile->len, fmt, args); va_end(args); if (added + 1 <= remain) { sfile->len += added; return; } /* We need a bigger buffer */ new_bufsiz = ((sfile->bufsiz + added + 1023) / 1024) * 1024; if ((new_buf = realloc(sfile->buf, new_bufsiz)) == NULL) { sfile->memerr = 1; return; } sfile->buf = new_buf; sfile->bufsiz = new_bufsiz; goto again; } s3backer-1.5.4/fuse_ops.h000066400000000000000000000050441354714241400152220ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ /* Forward decl's */ struct s3b_config; struct s3backer_store; /* Function types */ typedef void printer_t(void *prarg, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 2, 3))); typedef void print_stats_t(void *prarg, printer_t *printer); typedef void clear_stats_t(void); /* Configuration info structure for fuse_ops */ struct fuse_ops_conf { struct s3b_config *s3bconf; print_stats_t *print_stats; clear_stats_t *clear_stats; int read_only; int direct_io; const char *filename; const char *stats_filename; uid_t uid; gid_t gid; u_int block_size; off_t num_blocks; int file_mode; log_func_t *log; }; /* fuse_ops.c */ const struct fuse_operations *fuse_ops_create(struct fuse_ops_conf *config, struct s3backer_store *s3b); s3backer-1.5.4/hash.c000066400000000000000000000140531354714241400143150ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ /* * This is a simple closed hash table implementation with linear probing. * We pre-allocate the hash array based on the expected maximum size. */ #include "s3backer.h" #include "hash.h" /* Definitions */ #define LOAD_FACTOR 0.666666 #define FIRST(hash, key) (s3b_hash_index((hash), (key))) #define NEXT(hash, index) ((index) + 1 < (hash)->alen ? (index) + 1 : 0) #define EMPTY(value) ((value) == NULL) #define VALUE(hash, index) ((hash)->array[(index)]) #define KEY(value) (*(s3b_block_t *)(value)) /* Hash table structure */ struct s3b_hash { u_int maxkeys; /* max capacity */ u_int numkeys; /* number of keys in table */ u_int alen; /* hash array length */ void *array[0]; /* hash array */ }; /* Declarations */ static u_int s3b_hash_index(struct s3b_hash *hash, s3b_block_t key); /* Public functions */ int s3b_hash_create(struct s3b_hash **hashp, u_int maxkeys) { struct s3b_hash *hash; u_int alen; if (maxkeys >= (u_int)(UINT_MAX * LOAD_FACTOR) - 1) return EINVAL; alen = (u_int)(maxkeys / LOAD_FACTOR) + 1; if ((hash = calloc(1, sizeof(*hash) + alen * sizeof(*hash->array))) == NULL) return ENOMEM; hash->maxkeys = maxkeys; hash->alen = alen; *hashp = hash; return 0; } void s3b_hash_destroy(struct s3b_hash *hash) { free(hash); } u_int s3b_hash_size(struct s3b_hash *hash) { return hash->numkeys; } void * s3b_hash_get(struct s3b_hash *hash, s3b_block_t key) { u_int i; for (i = FIRST(hash, key); 1; i = NEXT(hash, i)) { void *const value = VALUE(hash, i); if (EMPTY(value)) return NULL; if (KEY(value) == key) return value; } } /* * Add/replace entry. * * Note that the value being replaced (if any) is referenced by this function, * so it should not be free'd until after this function returns. */ void * s3b_hash_put(struct s3b_hash *hash, void *value) { const s3b_block_t key = KEY(value); u_int i; for (i = FIRST(hash, key); 1; i = NEXT(hash, i)) { void *const value2 = VALUE(hash, i); if (EMPTY(value)) break; if (KEY(value2) == key) { VALUE(hash, i) = value; /* replace existing value having the same key with new value */ return value2; } } assert(hash->numkeys < hash->maxkeys); VALUE(hash, i) = value; hash->numkeys++; return NULL; } /* * Optimization of s3b_hash_put() for when it is known that no matching entry exists. */ void s3b_hash_put_new(struct s3b_hash *hash, void *value) { const s3b_block_t key = KEY(value); u_int i; for (i = FIRST(hash, key); 1; i = NEXT(hash, i)) { void *const value2 = VALUE(hash, i); if (EMPTY(value2)) break; assert(KEY(value2) != key); } assert(hash->numkeys < hash->maxkeys); VALUE(hash, i) = value; hash->numkeys++; } void s3b_hash_remove(struct s3b_hash *hash, s3b_block_t key) { u_int i; u_int j; u_int k; /* Find entry */ for (i = FIRST(hash, key); 1; i = NEXT(hash, i)) { void *const value = VALUE(hash, i); if (EMPTY(value)) /* no such entry */ return; if (KEY(value) == key) /* entry found */ break; } /* Repair subsequent entries as necessary */ for (j = NEXT(hash, i); 1; j = NEXT(hash, j)) { void *const value = VALUE(hash, j); if (value == NULL) break; k = FIRST(hash, KEY(value)); if (j > i ? (k <= i || k > j) : (k <= i && k > j)) { VALUE(hash, i) = value; i = j; } } /* Remove entry */ assert(VALUE(hash, i) != NULL); VALUE(hash, i) = NULL; hash->numkeys--; } void s3b_hash_foreach(struct s3b_hash *hash, s3b_hash_visit_t *visitor, void *arg) { u_int i; for (i = 0; i < hash->alen; i++) { void *const value = VALUE(hash, i); if (value != NULL) (*visitor)(arg, value); } } /* * Jenkins one-at-a-time hash */ static u_int s3b_hash_index(struct s3b_hash *hash, s3b_block_t key) { u_int value = 0; int i; for (i = 0; i < sizeof(key); i++) { value += ((u_char *)&key)[i]; value += (value << 10); value ^= (value >> 6); } value += (value << 3); value ^= (value >> 11); value += (value << 15); return value % hash->alen; } s3backer-1.5.4/hash.h000066400000000000000000000047211354714241400143230ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ /* * Our hash table implementation. * * We make the following simplifying assumptions: * * 1. Keys are of type s3b_block_t * 2. Values are structures in which the first field is the key * 3. No attempts will be made to overload the table */ /* Definitions */ typedef void s3b_hash_visit_t(void *arg, void *value); /* Declarations */ struct s3b_hash; /* hash.c */ extern int s3b_hash_create(struct s3b_hash **hashp, u_int maxkeys); extern void s3b_hash_destroy(struct s3b_hash *hash); extern u_int s3b_hash_size(struct s3b_hash *hash); extern void *s3b_hash_get(struct s3b_hash *hash, s3b_block_t key); extern void *s3b_hash_put(struct s3b_hash *hash, void *value); extern void s3b_hash_put_new(struct s3b_hash *hash, void *value); extern void s3b_hash_remove(struct s3b_hash *hash, s3b_block_t key); extern void s3b_hash_foreach(struct s3b_hash *hash, s3b_hash_visit_t *visitor, void *arg); s3backer-1.5.4/http_io.c000066400000000000000000003162301354714241400150420ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "s3backer.h" #include "block_part.h" #include "http_io.h" /* HTTP definitions */ #define HTTP_GET "GET" #define HTTP_PUT "PUT" #define HTTP_DELETE "DELETE" #define HTTP_HEAD "HEAD" #define HTTP_NOT_MODIFIED 304 #define HTTP_UNAUTHORIZED 401 #define HTTP_FORBIDDEN 403 #define HTTP_NOT_FOUND 404 #define HTTP_PRECONDITION_FAILED 412 #define AUTH_HEADER "Authorization" #define CTYPE_HEADER "Content-Type" #define CONTENT_ENCODING_HEADER "Content-Encoding" #define ACCEPT_ENCODING_HEADER "Accept-Encoding" #define ETAG_HEADER "ETag" #define CONTENT_ENCODING_DEFLATE "deflate" #define CONTENT_ENCODING_ENCRYPT "encrypt" #define MD5_HEADER "Content-MD5" #define ACL_HEADER "x-amz-acl" #define CONTENT_SHA256_HEADER "x-amz-content-sha256" #define SSE_HEADER "x-amz-server-side-encryption" #define STORAGE_CLASS_HEADER "x-amz-storage-class" #define FILE_SIZE_HEADER "x-amz-meta-s3backer-filesize" #define BLOCK_SIZE_HEADER "x-amz-meta-s3backer-blocksize" #define MOUNT_TOKEN_HEADER "x-amz-meta-s3backer-mount-token" #define HMAC_HEADER "x-amz-meta-s3backer-hmac" #define IF_MATCH_HEADER "If-Match" #define IF_NONE_MATCH_HEADER "If-None-Match" /* MIME type for blocks */ #define CONTENT_TYPE "application/x-s3backer-block" /* Mount token file */ #define MOUNT_TOKEN_FILE "s3backer-mounted" #define MOUNT_TOKEN_FILE_MIME_TYPE "text/plain" /* HTTP `Date' and `x-amz-date' header formats */ #define HTTP_DATE_HEADER "Date" #define AWS_DATE_HEADER "x-amz-date" #define HTTP_DATE_BUF_FMT "%a, %d %b %Y %H:%M:%S GMT" #define AWS_DATE_BUF_FMT "%Y%m%dT%H%M%SZ" #define DATE_BUF_SIZE 64 /* Size required for URL buffer */ #define URL_BUF_SIZE(config) (strlen((config)->baseURL) \ + strlen((config)->bucket) + 1 \ + strlen((config)->prefix) \ + S3B_BLOCK_NUM_DIGITS + 1 \ + S3B_BLOCK_NUM_DIGITS + 2) /* Bucket listing API constants */ #define LIST_PARAM_MARKER "marker" #define LIST_PARAM_PREFIX "prefix" #define LIST_PARAM_MAX_KEYS "max-keys" #define LIST_ELEM_LIST_BUCKET_RESLT "ListBucketResult" #define LIST_ELEM_IS_TRUNCATED "IsTruncated" #define LIST_ELEM_CONTENTS "Contents" #define LIST_ELEM_KEY "Key" #define LIST_TRUE "true" #define LIST_MAX_PATH (sizeof(LIST_ELEM_LIST_BUCKET_RESLT) \ + sizeof(LIST_ELEM_CONTENTS) \ + sizeof(LIST_ELEM_KEY) + 1) /* How many blocks to list at a time */ #define LIST_BLOCKS_CHUNK 0x100 /* PBKDF2 key generation iterations */ #define PBKDF2_ITERATIONS 5000 /* Enable to debug encryption key stuff */ #define DEBUG_ENCRYPTION 0 /* Enable to debug authentication stuff */ #define DEBUG_AUTHENTICATION 0 /* Enable to debug parsing block list response */ #define DEBUG_BLOCK_LIST 0 /* Version 4 authentication stuff */ #define SIGNATURE_ALGORITHM "AWS4-HMAC-SHA256" #define ACCESS_KEY_PREFIX "AWS4" #define S3_SERVICE_NAME "s3" #define SIGNATURE_TERMINATOR "aws4_request" #define SECURITY_TOKEN_HEADER "x-amz-security-token" /* EC2 IAM info URL */ #define EC2_IAM_META_DATA_URLBASE "http://169.254.169.254/latest/meta-data/iam/security-credentials/" #define EC2_IAM_META_DATA_ACCESSID "AccessKeyId" #define EC2_IAM_META_DATA_ACCESSKEY "SecretAccessKey" #define EC2_IAM_META_DATA_TOKEN "Token" /* TCP keep-alive */ #define TCP_KEEP_ALIVE_IDLE 200 #define TCP_KEEP_ALIVE_INTERVAL 60 /* Misc */ #define WHITESPACE " \t\v\f\r\n" /* * HTTP-based implementation of s3backer_store. * * This implementation does no caching or consistency checking. */ /* Internal definitions */ struct curl_holder { CURL *curl; LIST_ENTRY(curl_holder) link; }; /* Internal state */ struct http_io_private { struct http_io_conf *config; struct http_io_stats stats; LIST_HEAD(, curl_holder) curls; pthread_mutex_t mutex; u_int *non_zero; // config->nonzero_bitmap is moved to here pthread_t iam_thread; // IAM credentials refresh thread u_char iam_thread_alive; // IAM thread was successfully created u_char iam_thread_shutdown; // Flag to the IAM thread telling it to exit /* Encryption info */ const EVP_CIPHER *cipher; u_int keylen; // length of key and ivkey u_char key[EVP_MAX_KEY_LENGTH]; // key used to encrypt data u_char ivkey[EVP_MAX_KEY_LENGTH]; // key used to encrypt block number to get IV for data }; /* I/O buffers */ struct http_io_bufs { size_t rdremain; size_t wrremain; char *rddata; const char *wrdata; }; /* I/O state when reading/writing a block */ struct http_io { // I/O buffers struct http_io_bufs bufs; // XML parser and bucket listing info XML_Parser xml; // XML parser int xml_error; // XML parse error (if any) int xml_error_line; // XML parse error line int xml_error_column; // XML parse error column char *xml_path; // Current XML path char *xml_text; // Current XML text int xml_text_len; // # chars in 'xml_text' buffer int xml_text_max; // max chars in 'xml_text' buffer int list_truncated; // returned list was truncated s3b_block_t last_block; // last dirty block listed block_list_func_t *callback_func; // callback func for listing blocks void *callback_arg; // callback arg for listing blocks struct http_io_conf *config; // configuration // Other info that needs to be passed around const char *method; // HTTP method const char *url; // HTTP URL struct curl_slist *headers; // HTTP headers const char *sse; // Server Side Encryption void *dest; // Block data (when reading) const void *src; // Block data (when writing) s3b_block_t block_num; // The block we're reading/writing u_int buf_size; // Size of data buffer u_int *content_lengthp; // Returned Content-Length uintmax_t file_size; // file size from "x-amz-meta-s3backer-filesize" u_int block_size; // block size from "x-amz-meta-s3backer-blocksize" int32_t mount_token; // mount_token from "x-amz-meta-s3backer-mount-token" u_int expect_304; // a verify request; expect a 304 response u_char md5[MD5_DIGEST_LENGTH]; // parsed ETag header u_char hmac[SHA_DIGEST_LENGTH];// parsed "x-amz-meta-s3backer-hmac" header char content_encoding[32]; // received content encoding check_cancel_t *check_cancel; // write check-for-cancel callback void *check_cancel_arg; // write check-for-cancel callback argument }; /* CURL prepper function type */ typedef void http_io_curl_prepper_t(CURL *curl, struct http_io *io); /* s3backer_store functions */ static int http_io_create_threads(struct s3backer_store *s3b); static int http_io_meta_data(struct s3backer_store *s3b, off_t *file_sizep, u_int *block_sizep); static int http_io_set_mount_token(struct s3backer_store *s3b, int32_t *old_valuep, int32_t new_value); static int http_io_read_block(struct s3backer_store *s3b, s3b_block_t block_num, void *dest, u_char *actual_md5, const u_char *expect_md5, int strict); static int http_io_write_block(struct s3backer_store *s3b, s3b_block_t block_num, const void *src, u_char *md5, check_cancel_t *check_cancel, void *check_cancel_arg); static int http_io_read_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, void *dest); static int http_io_write_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, const void *src); static int http_io_list_blocks(struct s3backer_store *s3b, block_list_func_t *callback, void *arg); static int http_io_flush(struct s3backer_store *s3b); static void http_io_destroy(struct s3backer_store *s3b); /* Other functions */ static http_io_curl_prepper_t http_io_head_prepper; static http_io_curl_prepper_t http_io_read_prepper; static http_io_curl_prepper_t http_io_write_prepper; static http_io_curl_prepper_t http_io_list_prepper; static http_io_curl_prepper_t http_io_iamcreds_prepper; /* S3 REST API functions */ static void http_io_get_block_url(char *buf, size_t bufsiz, struct http_io_conf *config, s3b_block_t block_num); static void http_io_get_mount_token_file_url(char *buf, size_t bufsiz, struct http_io_conf *config); static int http_io_add_auth(struct http_io_private *priv, struct http_io *io, time_t now, const void *payload, size_t plen); static int http_io_add_auth2(struct http_io_private *priv, struct http_io *io, time_t now, const void *payload, size_t plen); static int http_io_add_auth4(struct http_io_private *priv, struct http_io *io, time_t now, const void *payload, size_t plen); static size_t url_encode(const char *src, size_t len, char *dst, size_t buflen, int encode_slash); static void digest_url_encoded(EVP_MD_CTX* hash_ctx, const char *data, size_t len, int encode_slash); /* EC2 IAM thread */ static void *update_iam_credentials_main(void *arg); static int update_iam_credentials(struct http_io_private *priv); static char *parse_json_field(struct http_io_private *priv, const char *json, const char *field); /* Bucket listing functions */ static size_t http_io_curl_list_reader(const void *ptr, size_t size, size_t nmemb, void *stream); static void http_io_list_elem_start(void *arg, const XML_Char *name, const XML_Char **atts); static void http_io_list_elem_end(void *arg, const XML_Char *name); static void http_io_list_text(void *arg, const XML_Char *s, int len); /* HTTP and curl functions */ static int http_io_perform_io(struct http_io_private *priv, struct http_io *io, http_io_curl_prepper_t *prepper); static size_t http_io_curl_reader(const void *ptr, size_t size, size_t nmemb, void *stream); static size_t http_io_curl_writer(void *ptr, size_t size, size_t nmemb, void *stream); static size_t http_io_curl_header(void *ptr, size_t size, size_t nmemb, void *stream); static struct curl_slist *http_io_add_header(struct http_io_private *priv, struct curl_slist *headers, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 3, 4))); static void http_io_add_date(struct http_io_private *priv, struct http_io *const io, time_t now); static CURL *http_io_acquire_curl(struct http_io_private *priv, struct http_io *io); static void http_io_release_curl(struct http_io_private *priv, CURL **curlp, int may_cache); /* Misc */ static void http_io_openssl_locker(int mode, int i, const char *file, int line); static u_long http_io_openssl_ider(void); static void http_io_base64_encode(char *buf, size_t bufsiz, const void *data, size_t len); static u_int http_io_crypt(struct http_io_private *priv, s3b_block_t block_num, int enc, const u_char *src, u_int len, u_char *dst); static void http_io_authsig(struct http_io_private *priv, s3b_block_t block_num, const u_char *src, u_int len, u_char *hmac); static void update_hmac_from_header(HMAC_CTX *ctx, struct http_io *io, const char *name, int value_only, char *sigbuf, size_t sigbuflen); static int http_io_is_zero_block(const void *data, u_int block_size); static s3b_block_t http_io_block_hash_prefix(s3b_block_t block_num); static int http_io_parse_hex(const char *str, u_char *buf, u_int nbytes); static int http_io_parse_hex_block_num(const char *string, s3b_block_t *block_nump); static void http_io_prhex(char *buf, const u_char *data, size_t len); static int http_io_strcasecmp_ptr(const void *ptr1, const void *ptr2); static int http_io_parse_header(const char *input, const char *header, const char *fmt, ...); /* Internal variables */ static pthread_mutex_t *openssl_locks; static int num_openssl_locks; static u_char zero_md5[MD5_DIGEST_LENGTH]; static u_char zero_hmac[SHA_DIGEST_LENGTH]; static const s3b_block_t last_possible_block = (s3b_block_t)~0L; /* * Constructor * * On error, returns NULL and sets `errno'. */ struct s3backer_store * http_io_create(struct http_io_conf *config) { struct s3backer_store *s3b; struct http_io_private *priv; struct curl_holder *holder; int nlocks; int r; /* Sanity check: we can really only handle one instance */ if (openssl_locks != NULL) { (*config->log)(LOG_ERR, "http_io_create() called twice"); r = EALREADY; goto fail0; } /* Initialize structures */ if ((s3b = calloc(1, sizeof(*s3b))) == NULL) { r = errno; goto fail0; } s3b->create_threads = http_io_create_threads; s3b->meta_data = http_io_meta_data; s3b->set_mount_token = http_io_set_mount_token; s3b->read_block = http_io_read_block; s3b->write_block = http_io_write_block; s3b->read_block_part = http_io_read_block_part; s3b->write_block_part = http_io_write_block_part; s3b->list_blocks = http_io_list_blocks; s3b->flush = http_io_flush; s3b->destroy = http_io_destroy; if ((priv = calloc(1, sizeof(*priv))) == NULL) { r = errno; goto fail1; } priv->config = config; if ((r = pthread_mutex_init(&priv->mutex, NULL)) != 0) goto fail2; LIST_INIT(&priv->curls); s3b->data = priv; /* Initialize openssl */ num_openssl_locks = CRYPTO_num_locks(); if ((openssl_locks = malloc(num_openssl_locks * sizeof(*openssl_locks))) == NULL) { r = errno; goto fail3; } for (nlocks = 0; nlocks < num_openssl_locks; nlocks++) { if ((r = pthread_mutex_init(&openssl_locks[nlocks], NULL)) != 0) goto fail4; } CRYPTO_set_locking_callback(http_io_openssl_locker); CRYPTO_set_id_callback(http_io_openssl_ider); /* Avoid GCC unused-function warnings */ (void)http_io_openssl_locker; (void)http_io_openssl_ider; /* Initialize encryption */ if (config->encryption != NULL) { char saltbuf[strlen(config->bucket) + 1 + strlen(config->prefix) + 1]; u_int cipher_key_len; /* Sanity checks */ assert(config->password != NULL); assert(config->block_size % EVP_MAX_IV_LENGTH == 0); /* Find encryption algorithm */ OpenSSL_add_all_ciphers(); if ((priv->cipher = EVP_get_cipherbyname(config->encryption)) == NULL) { (*config->log)(LOG_ERR, "unknown encryption cipher `%s'", config->encryption); r = EINVAL; goto fail4; } cipher_key_len = EVP_CIPHER_key_length(priv->cipher); priv->keylen = config->key_length > 0 ? config->key_length : cipher_key_len; if (priv->keylen < cipher_key_len || priv->keylen > sizeof(priv->key)) { (*config->log)(LOG_ERR, "key length %u for cipher `%s' is out of range", priv->keylen, config->encryption); r = EINVAL; goto fail4; } /* Hash password to get bulk data encryption key */ snprintf(saltbuf, sizeof(saltbuf), "%s/%s", config->bucket, config->prefix); if ((r = PKCS5_PBKDF2_HMAC_SHA1(config->password, strlen(config->password), (u_char *)saltbuf, strlen(saltbuf), PBKDF2_ITERATIONS, priv->keylen, priv->key)) != 1) { (*config->log)(LOG_ERR, "failed to create encryption key"); r = EINVAL; goto fail4; } /* Hash the bulk encryption key to get the IV encryption key */ if ((r = PKCS5_PBKDF2_HMAC_SHA1((char *)priv->key, priv->keylen, priv->key, priv->keylen, PBKDF2_ITERATIONS, priv->keylen, priv->ivkey)) != 1) { (*config->log)(LOG_ERR, "failed to create encryption key"); r = EINVAL; goto fail4; } /* Encryption debug */ #if DEBUG_ENCRYPTION { char keybuf[priv->keylen * 2 + 1]; char ivkeybuf[priv->keylen * 2 + 1]; http_io_prhex(keybuf, priv->key, priv->keylen); http_io_prhex(ivkeybuf, priv->ivkey, priv->keylen); (*config->log)(LOG_DEBUG, "ENCRYPTION INIT: cipher=\"%s\" pass=\"%s\" salt=\"%s\" key=0x%s ivkey=0x%s", config->encryption, config->password, saltbuf, keybuf, ivkeybuf); } #endif } /* Initialize cURL */ curl_global_init(CURL_GLOBAL_ALL); /* Initialize IAM credentials */ if (config->ec2iam_role != NULL && (r = update_iam_credentials(priv)) != 0) goto fail5; /* Take ownership of non-zero block bitmap */ priv->non_zero = config->nonzero_bitmap; config->nonzero_bitmap = NULL; /* Done */ return s3b; fail5: while ((holder = LIST_FIRST(&priv->curls)) != NULL) { curl_easy_cleanup(holder->curl); LIST_REMOVE(holder, link); free(holder); } curl_global_cleanup(); fail4: CRYPTO_set_locking_callback(NULL); CRYPTO_set_id_callback(NULL); while (nlocks > 0) pthread_mutex_destroy(&openssl_locks[--nlocks]); free(openssl_locks); openssl_locks = NULL; num_openssl_locks = 0; fail3: pthread_mutex_destroy(&priv->mutex); fail2: free(priv); fail1: free(s3b); fail0: (*config->log)(LOG_ERR, "http_io creation failed: %s", strerror(r)); errno = r; return NULL; } /* * Destructor */ static void http_io_destroy(struct s3backer_store *const s3b) { struct http_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; struct curl_holder *holder; int r; /* Shut down IAM thread */ if (priv->iam_thread_alive) { (*config->log)(LOG_DEBUG, "waiting for EC2 IAM thread to shutdown"); priv->iam_thread_shutdown = 1; if ((r = pthread_cancel(priv->iam_thread)) != 0) (*config->log)(LOG_ERR, "pthread_cancel: %s", strerror(r)); if ((r = pthread_join(priv->iam_thread, NULL)) != 0) (*config->log)(LOG_ERR, "pthread_join: %s", strerror(r)); else (*config->log)(LOG_DEBUG, "EC2 IAM thread successfully shutdown"); priv->iam_thread_alive = 0; } /* Clean up openssl */ while (num_openssl_locks > 0) pthread_mutex_destroy(&openssl_locks[--num_openssl_locks]); free(openssl_locks); openssl_locks = NULL; CRYPTO_set_locking_callback(NULL); CRYPTO_set_id_callback(NULL); /* Clean up cURL */ while ((holder = LIST_FIRST(&priv->curls)) != NULL) { curl_easy_cleanup(holder->curl); LIST_REMOVE(holder, link); free(holder); } curl_global_cleanup(); /* Free structures */ pthread_mutex_destroy(&priv->mutex); free(priv->non_zero); free(priv); free(s3b); } static int http_io_flush(struct s3backer_store *const s3b) { return 0; } void http_io_get_stats(struct s3backer_store *s3b, struct http_io_stats *stats) { struct http_io_private *const priv = s3b->data; pthread_mutex_lock(&priv->mutex); memcpy(stats, &priv->stats, sizeof(*stats)); pthread_mutex_unlock(&priv->mutex); } void http_io_clear_stats(struct s3backer_store *s3b) { struct http_io_private *const priv = s3b->data; pthread_mutex_lock(&priv->mutex); memset(&priv->stats, 0, sizeof(priv->stats)); pthread_mutex_unlock(&priv->mutex); } static int http_io_list_blocks(struct s3backer_store *s3b, block_list_func_t *callback, void *arg) { struct http_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; char url_encoded_prefix[strlen(config->prefix) * 3 + 1]; char urlbuf[URL_BUF_SIZE(config) + sizeof("&marker=") + sizeof(url_encoded_prefix) + (S3B_BLOCK_NUM_DIGITS * 2) + 36]; struct http_io io; int r; /* Initialize I/O info */ memset(&io, 0, sizeof(io)); io.url = urlbuf; io.method = HTTP_GET; io.config = config; io.xml_error = XML_ERROR_NONE; io.callback_func = callback; io.callback_arg = arg; /* Create XML parser */ if ((io.xml = XML_ParserCreate(NULL)) == NULL) { (*config->log)(LOG_ERR, "failed to create XML parser"); return ENOMEM; } /* Allocate buffers for XML path and tag text content */ io.xml_text_max = strlen(config->prefix) + (S3B_BLOCK_NUM_DIGITS * 2) + 16; if ((io.xml_text = malloc(io.xml_text_max + 1)) == NULL) { (*config->log)(LOG_ERR, "malloc: %s", strerror(errno)); goto oom; } if ((io.xml_path = calloc(1, 1)) == NULL) { (*config->log)(LOG_ERR, "calloc: %s", strerror(errno)); goto oom; } /* List blocks */ do { const time_t now = time(NULL); /* Reset XML parser state */ XML_ParserReset(io.xml, NULL); XML_SetUserData(io.xml, &io); XML_SetElementHandler(io.xml, http_io_list_elem_start, http_io_list_elem_end); XML_SetCharacterDataHandler(io.xml, http_io_list_text); /* URL-encode prefix */ url_encode(config->prefix, strlen(config->prefix), url_encoded_prefix, sizeof(url_encoded_prefix), 1); /* Format URL */ snprintf(urlbuf, sizeof(urlbuf), "%s%s?", config->baseURL, config->vhost ? "" : config->bucket); /* Add URL parameters (note: must be in "canonical query string" format for proper authentication) */ if (io.list_truncated) { char block_hash_buf[S3B_BLOCK_NUM_DIGITS + 2]; http_io_format_block_hash(config, block_hash_buf, sizeof(block_hash_buf), io.last_block); snprintf(urlbuf + strlen(urlbuf), sizeof(urlbuf) - strlen(urlbuf), "%s=%s%s%0*jx&", LIST_PARAM_MARKER, url_encoded_prefix, block_hash_buf, S3B_BLOCK_NUM_DIGITS, (uintmax_t)io.last_block); } snprintf(urlbuf + strlen(urlbuf), sizeof(urlbuf) - strlen(urlbuf), "%s=%u", LIST_PARAM_MAX_KEYS, LIST_BLOCKS_CHUNK); snprintf(urlbuf + strlen(urlbuf), sizeof(urlbuf) - strlen(urlbuf), "&%s=%s", LIST_PARAM_PREFIX, url_encoded_prefix); /* Add Date header */ http_io_add_date(priv, &io, now); /* Add Authorization header */ if ((r = http_io_add_auth(priv, &io, now, NULL, 0)) != 0) goto fail; /* Perform operation */ r = http_io_perform_io(priv, &io, http_io_list_prepper); /* Clean up headers */ curl_slist_free_all(io.headers); io.headers = NULL; /* Check for error */ if (r != 0) goto fail; /* Finalize parse */ if (XML_Parse(io.xml, NULL, 0, 1) != XML_STATUS_OK) { io.xml_error = XML_GetErrorCode(io.xml); io.xml_error_line = XML_GetCurrentLineNumber(io.xml); io.xml_error_column = XML_GetCurrentColumnNumber(io.xml); } /* Check for XML error */ if (io.xml_error != XML_ERROR_NONE) { (*config->log)(LOG_ERR, "XML parse error: line %d col %d: %s", io.xml_error_line, io.xml_error_column, XML_ErrorString(io.xml_error)); r = EIO; goto fail; } } while (io.list_truncated); /* Done */ XML_ParserFree(io.xml); free(io.xml_path); free(io.xml_text); return 0; oom: /* Update stats */ pthread_mutex_lock(&priv->mutex); priv->stats.out_of_memory_errors++; pthread_mutex_unlock(&priv->mutex); r = ENOMEM; fail: /* Clean up after failure */ if (io.xml != NULL) XML_ParserFree(io.xml); free(io.xml_path); free(io.xml_text); return r; } static void http_io_list_prepper(CURL *curl, struct http_io *io) { curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, http_io_curl_list_reader); curl_easy_setopt(curl, CURLOPT_WRITEDATA, io); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, io->headers); curl_easy_setopt(curl, CURLOPT_ENCODING, ""); curl_easy_setopt(curl, CURLOPT_HTTP_CONTENT_DECODING, (long)1); } static size_t http_io_curl_list_reader(const void *ptr, size_t size, size_t nmemb, void *stream) { struct http_io *const io = (struct http_io *)stream; size_t total = size * nmemb; if (io->xml_error != XML_ERROR_NONE) return total; if (XML_Parse(io->xml, ptr, total, 0) != XML_STATUS_OK) { io->xml_error = XML_GetErrorCode(io->xml); io->xml_error_line = XML_GetCurrentLineNumber(io->xml); io->xml_error_column = XML_GetCurrentColumnNumber(io->xml); } return total; } static void http_io_list_elem_start(void *arg, const XML_Char *name, const XML_Char **atts) { struct http_io *const io = (struct http_io *)arg; const size_t plen = strlen(io->xml_path); char *newbuf; /* Update current path */ if ((newbuf = realloc(io->xml_path, plen + 1 + strlen(name) + 1)) == NULL) { (*io->config->log)(LOG_DEBUG, "realloc: %s", strerror(errno)); io->xml_error = XML_ERROR_NO_MEMORY; return; } io->xml_path = newbuf; io->xml_path[plen] = '/'; strcpy(io->xml_path + plen + 1, name); /* Reset buffer */ io->xml_text_len = 0; io->xml_text[0] = '\0'; #if DEBUG_BLOCK_LIST /* Debug */ (*io->config->log)(LOG_DEBUG, "list: new path: \"%s\"", io->xml_path); #endif } static void http_io_list_elem_end(void *arg, const XML_Char *name) { struct http_io *const io = (struct http_io *)arg; struct http_io_conf *const config = io->config; s3b_block_t block_num; /* Handle tag */ if (strcmp(io->xml_path, "/" LIST_ELEM_LIST_BUCKET_RESLT "/" LIST_ELEM_IS_TRUNCATED) == 0) { io->list_truncated = strcmp(io->xml_text, LIST_TRUE) == 0; #if DEBUG_BLOCK_LIST (*config->log)(LOG_DEBUG, "list: parsed truncated=%d", io->list_truncated); #endif } /* Handle tag */ else if (strcmp(io->xml_path, "/" LIST_ELEM_LIST_BUCKET_RESLT "/" LIST_ELEM_CONTENTS "/" LIST_ELEM_KEY) == 0) { #if DEBUG_BLOCK_LIST (*config->log)(LOG_DEBUG, "list: key=\"%s\"", io->xml_text); #endif /* Attempt to parse key as a block's object name */ if (http_io_parse_block(config, io->xml_text, &block_num) == 0) { #if DEBUG_BLOCK_LIST (*config->log)(LOG_DEBUG, "list: parsed key=\"%s\" -> block=%0*jx", io->xml_text, S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); #endif (*io->callback_func)(io->callback_arg, block_num); io->last_block = block_num; } else { /* object is some unrelated junk that we can ignore */ char last_block_path[strlen(config->prefix) + S3B_BLOCK_NUM_DIGITS + 1]; #if DEBUG_BLOCK_LIST (*config->log)(LOG_DEBUG, "list: can't parse key=\"%s\"", io->xml_text); #endif /* * If the object name is lexicographically after our last possible block name, we are done. * Note that this works whether or not --blockHashPrefix is being used, because the block hash * prefix is in the same format as the block number (i.e., 32 bit unsigned hexadecimal value). */ snprintf(last_block_path, sizeof(last_block_path), "%s%0*jx", config->prefix, S3B_BLOCK_NUM_DIGITS, (uintmax_t)last_possible_block); if (strcmp(io->xml_text, last_block_path) > 0) { #if DEBUG_BLOCK_LIST (*config->log)(LOG_DEBUG, "list: key=\"%s\" > last block \"%s\" -> we're done", io->xml_text, last_block_path); #endif io->list_truncated = 0; } } } /* Update current XML path */ assert(strrchr(io->xml_path, '/') != NULL); *strrchr(io->xml_path, '/') = '\0'; /* Reset buffer */ io->xml_text_len = 0; io->xml_text[0] = '\0'; } static void http_io_list_text(void *arg, const XML_Char *s, int len) { struct http_io *const io = (struct http_io *)arg; int avail; /* Append text to buffer */ avail = io->xml_text_max - io->xml_text_len; if (len > avail) len = avail; memcpy(io->xml_text + io->xml_text_len, s, len); io->xml_text_len += len; io->xml_text[io->xml_text_len] = '\0'; } /* * Parse a block's item name (including prefix and block hash prefix if any) and returns the result in *block_nump. */ int http_io_parse_block(struct http_io_conf *config, const char *name, s3b_block_t *block_nump) { const size_t plen = strlen(config->prefix); s3b_block_t hash_value = 0; s3b_block_t block_num = 0; /* Parse prefix */ if (strncmp(name, config->prefix, plen) != 0) return -1; name += plen; /* Parse block hash prefix followed by dash (if so configured) */ if (config->blockHashPrefix) { if (http_io_parse_hex_block_num(name, &hash_value) == -1) return -1; name += S3B_BLOCK_NUM_DIGITS; if (*name++ != '-') return -1; } /* Parse block number */ if (http_io_parse_hex_block_num(name, &block_num) == -1) return -1; name += S3B_BLOCK_NUM_DIGITS; if (*name != '\0' || block_num >= config->num_blocks) return -1; /* Verify hash matches what's expected */ if (config->blockHashPrefix && hash_value != http_io_block_hash_prefix(block_num)) return -1; /* Done */ *block_nump = block_num; return 0; } /* * Parse a hexadecimal block number value, which should be S3B_BLOCK_NUM_DIGITS lowercase digits. * * Returns zero on success, -1 on failure. */ static int http_io_parse_hex_block_num(const char *string, s3b_block_t *valuep) { s3b_block_t value = 0; int i; /* Parse block number */ for (i = 0; i < S3B_BLOCK_NUM_DIGITS; i++) { const char ch = string[i]; value <<= 4; if (ch >= '0' && ch <= '9') value |= ch - '0'; else if (ch >= 'a' && ch <= 'f') value |= ch - 'a' + 10; else return -1; } /* Done */ *valuep = value; return 0; } /* * Append deterministic hash value based on block number for even name distribution. * * Ref: https://github.com/archiecobbs/s3backer/issues/80 * Ref: https://crypto.stackexchange.com/questions/16219/cryptographic-hash-function-for-32-bit-length-input-keys */ void http_io_format_block_hash(const struct http_io_conf *const config, char *buf, size_t bufsiz, s3b_block_t block_num) { assert(bufsiz >= S3B_BLOCK_NUM_DIGITS + 2); if (config->blockHashPrefix) snprintf(buf, bufsiz, "%0*jx-", S3B_BLOCK_NUM_DIGITS, (uintmax_t)http_io_block_hash_prefix(block_num)); else *buf = '\0'; } /* * Calculate deterministic hash value based on block number for even name distribution. * * Ref: https://github.com/archiecobbs/s3backer/issues/80 * Ref: https://crypto.stackexchange.com/questions/16219/cryptographic-hash-function-for-32-bit-length-input-keys */ s3b_block_t http_io_block_hash_prefix(s3b_block_t block_num) { s3b_block_t hash; int n; hash = block_num; for (n = 12; n > 0; n--) hash = ((hash >> 8) ^ hash) * 0x6b + n; return hash; } static int http_io_create_threads(struct s3backer_store *s3b) { struct http_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; int r; /* Start IAM updater thread if appropriate */ if (config->ec2iam_role != NULL) { assert(!priv->iam_thread_alive); if ((r = pthread_create(&priv->iam_thread, NULL, update_iam_credentials_main, priv)) != 0) return r; priv->iam_thread_alive = 1; } /* Done */ return 0; } static int http_io_meta_data(struct s3backer_store *s3b, off_t *file_sizep, u_int *block_sizep) { struct http_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; char urlbuf[URL_BUF_SIZE(config)]; const time_t now = time(NULL); struct http_io io; int r; /* Initialize I/O info */ memset(&io, 0, sizeof(io)); io.url = urlbuf; io.method = HTTP_HEAD; /* Construct URL for the first block */ http_io_get_block_url(urlbuf, sizeof(urlbuf), config, 0); /* Add Date header */ http_io_add_date(priv, &io, now); /* Add Authorization header */ if ((r = http_io_add_auth(priv, &io, now, NULL, 0)) != 0) goto done; /* Perform operation */ if ((r = http_io_perform_io(priv, &io, http_io_head_prepper)) != 0) goto done; /* Extract filesystem sizing information */ if (io.file_size == 0 || io.block_size == 0) { r = ENOENT; goto done; } *file_sizep = (off_t)io.file_size; *block_sizep = io.block_size; done: /* Clean up */ curl_slist_free_all(io.headers); return r; } static void http_io_head_prepper(CURL *curl, struct http_io *io) { memset(&io->bufs, 0, sizeof(io->bufs)); curl_easy_setopt(curl, CURLOPT_NOBODY, 1); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, http_io_curl_reader); curl_easy_setopt(curl, CURLOPT_WRITEDATA, io); curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, http_io_curl_header); curl_easy_setopt(curl, CURLOPT_HEADERDATA, io); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, io->headers); } static int http_io_set_mount_token(struct s3backer_store *s3b, int32_t *old_valuep, int32_t new_value) { struct http_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; char urlbuf[URL_BUF_SIZE(config) + sizeof(MOUNT_TOKEN_FILE)]; const time_t now = time(NULL); struct http_io io; int r = 0; /* Initialize I/O info */ memset(&io, 0, sizeof(io)); io.url = urlbuf; io.method = HTTP_HEAD; /* Construct URL for the mount token file */ http_io_get_mount_token_file_url(urlbuf, sizeof(urlbuf), config); /* Get old value */ if (old_valuep != NULL) { /* Add Date header */ http_io_add_date(priv, &io, now); /* Add Authorization header */ if ((r = http_io_add_auth(priv, &io, now, NULL, 0)) != 0) goto done; /* See if object exists */ switch ((r = http_io_perform_io(priv, &io, http_io_head_prepper))) { case ENOENT: *old_valuep = 0; r = 0; break; case 0: *old_valuep = io.mount_token; if (*old_valuep == 0) // backward compatibility *old_valuep = 1; break; default: goto done; } } /* Set new value */ if (new_value >= 0) { char content[_POSIX_HOST_NAME_MAX + DATE_BUF_SIZE + 32]; u_char md5[MD5_DIGEST_LENGTH]; char md5buf[MD5_DIGEST_LENGTH * 2 + 1]; const char *storage_class; MD5_CTX ctx; /* Reset I/O info */ curl_slist_free_all(io.headers); memset(&io, 0, sizeof(io)); io.url = urlbuf; io.method = new_value != 0 ? HTTP_PUT : HTTP_DELETE; /* Add Date header */ http_io_add_date(priv, &io, now); /* To set the flag PUT some content containing current date */ if (new_value != 0) { struct tm tm; /* Create content for the mount token file (timestamp) */ gethostname(content, sizeof(content) - 1); content[sizeof(content) - 1] = '\0'; strftime(content + strlen(content), sizeof(content) - strlen(content), "\n" AWS_DATE_BUF_FMT "\n", gmtime_r(&now, &tm)); io.src = content; io.buf_size = strlen(content); MD5_Init(&ctx); MD5_Update(&ctx, content, strlen(content)); MD5_Final(md5, &ctx); /* Add Content-Type header */ io.headers = http_io_add_header(priv, io.headers, "%s: %s", CTYPE_HEADER, MOUNT_TOKEN_FILE_MIME_TYPE); /* Add Content-MD5 header */ http_io_base64_encode(md5buf, sizeof(md5buf), md5, MD5_DIGEST_LENGTH); io.headers = http_io_add_header(priv, io.headers, "%s: %s", MD5_HEADER, md5buf); /* Add Mount-Token header */ io.headers = http_io_add_header(priv, io.headers, "%s: %08x", MOUNT_TOKEN_HEADER, (int)new_value); /* Add ACL header */ io.headers = http_io_add_header(priv, io.headers, "%s: %s", ACL_HEADER, config->accessType); } /* Add Server Side Encryption value (if needed) */ if (config->sse != NULL && new_value != 0) io.headers = http_io_add_header(priv, io.headers, "%s: %s", SSE_HEADER, config->sse); /* Add storage class header (if needed) */ storage_class = config->storage_class != NULL ? config->storage_class : config->rrs ? STORAGE_CLASS_REDUCED_REDUNDANCY : NULL; if (storage_class != NULL) io.headers = http_io_add_header(priv, io.headers, "%s: %s", STORAGE_CLASS_HEADER, storage_class); /* Add Authorization header */ if ((r = http_io_add_auth(priv, &io, now, io.src, io.buf_size)) != 0) goto done; /* Perform operation to set or clear mount token */ r = http_io_perform_io(priv, &io, http_io_write_prepper); } done: /* Clean up */ curl_slist_free_all(io.headers); return r; } static int update_iam_credentials(struct http_io_private *const priv) { struct http_io_conf *const config = priv->config; char *urlbuf; struct http_io io; char buf[2048] = { '\0' }; char *access_id = NULL; char *access_key = NULL; char *iam_token = NULL; size_t buflen; int r; /* Build URL */ if (asprintf(&urlbuf, "%s%s", EC2_IAM_META_DATA_URLBASE, config->ec2iam_role) == -1) { (*config->log)(LOG_ERR, "%s: asprintf() failed: %s", "update_iam_credentials", strerror(ENOMEM)); return ENOMEM; } /* Initialize I/O info */ memset(&io, 0, sizeof(io)); io.url = urlbuf; io.method = HTTP_GET; io.dest = buf; io.buf_size = sizeof(buf); /* Perform operation */ (*config->log)(LOG_INFO, "acquiring EC2 IAM credentials from %s", io.url); if ((r = http_io_perform_io(priv, &io, http_io_iamcreds_prepper)) != 0) { (*config->log)(LOG_ERR, "failed to acquire EC2 IAM credentials from %s: %s", io.url, strerror(r)); free(urlbuf); return r; } /* Determine how many bytes we read */ buflen = io.buf_size - io.bufs.rdremain; if (buflen > sizeof(buf) - 1) buflen = sizeof(buf) - 1; buf[buflen] = '\0'; /* Find credentials in JSON response */ if ((access_id = parse_json_field(priv, buf, EC2_IAM_META_DATA_ACCESSID)) == NULL || (access_key = parse_json_field(priv, buf, EC2_IAM_META_DATA_ACCESSKEY)) == NULL || (iam_token = parse_json_field(priv, buf, EC2_IAM_META_DATA_TOKEN)) == NULL) { (*config->log)(LOG_ERR, "failed to extract EC2 IAM credentials from response: %s", strerror(errno)); free(access_id); free(access_key); free(urlbuf); return EINVAL; } /* Update credentials */ pthread_mutex_lock(&priv->mutex); free(config->accessId); free(config->accessKey); free(config->iam_token); config->accessId = access_id; config->accessKey = access_key; config->iam_token = iam_token; pthread_mutex_unlock(&priv->mutex); (*config->log)(LOG_INFO, "successfully updated EC2 IAM credentials from %s", io.url); free(urlbuf); /* Done */ return 0; } static void * update_iam_credentials_main(void *arg) { struct http_io_private *const priv = arg; while (!priv->iam_thread_shutdown) { // Sleep for five minutes, or until woken up by pthread_cancel() sleep(300); // Shutting down? if (priv->iam_thread_shutdown) break; // Attempt to update credentials update_iam_credentials(priv); } // Done return NULL; } static char * parse_json_field(struct http_io_private *priv, const char *json, const char *field) { struct http_io_conf *const config = priv->config; regmatch_t match[2]; regex_t regex; char buf[128]; char *value; size_t vlen; int r; snprintf(buf, sizeof(buf), "\"%s\"[[:space:]]*:[[:space:]]*\"([^\"]+)\"", field); memset(®ex, 0, sizeof(regex)); if ((r = regcomp(®ex, buf, REG_EXTENDED)) != 0) { regerror(r, ®ex, buf, sizeof(buf)); (*config->log)(LOG_INFO, "regex compilation failed: %s", buf); errno = EINVAL; return NULL; } if ((r = regexec(®ex, json, sizeof(match) / sizeof(*match), match, 0)) != 0) { regerror(r, ®ex, buf, sizeof(buf)); (*config->log)(LOG_INFO, "failed to find JSON field \"%s\" in credentials response: %s", field, buf); regfree(®ex); errno = EINVAL; return NULL; } regfree(®ex); vlen = match[1].rm_eo - match[1].rm_so; if ((value = malloc(vlen + 1)) == NULL) { r = errno; (*config->log)(LOG_INFO, "malloc: %s", strerror(r)); errno = r; return NULL; } memcpy(value, json + match[1].rm_so, vlen); value[vlen] = '\0'; return value; } static void http_io_iamcreds_prepper(CURL *curl, struct http_io *io) { memset(&io->bufs, 0, sizeof(io->bufs)); io->bufs.rdremain = io->buf_size; io->bufs.rddata = io->dest; curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, http_io_curl_reader); curl_easy_setopt(curl, CURLOPT_WRITEDATA, io); curl_easy_setopt(curl, CURLOPT_MAXFILESIZE_LARGE, (curl_off_t)io->buf_size); curl_easy_setopt(curl, CURLOPT_ENCODING, ""); curl_easy_setopt(curl, CURLOPT_HTTP_CONTENT_DECODING, (long)0); } static int http_io_read_block(struct s3backer_store *const s3b, s3b_block_t block_num, void *dest, u_char *actual_md5, const u_char *expect_md5, int strict) { struct http_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; char urlbuf[URL_BUF_SIZE(config)]; char accepted_encodings[64]; const time_t now = time(NULL); int encrypted = 0; struct http_io io; u_int did_read; char *layer; int r; /* Sanity check */ if (config->block_size == 0 || block_num >= config->num_blocks) return EINVAL; /* Read zero blocks when bitmap indicates empty until non-zero content is written */ if (priv->non_zero != NULL) { const int bits_per_word = sizeof(*priv->non_zero) * 8; const int word = block_num / bits_per_word; const int bit = 1 << (block_num % bits_per_word); pthread_mutex_lock(&priv->mutex); if ((priv->non_zero[word] & bit) == 0) { priv->stats.empty_blocks_read++; pthread_mutex_unlock(&priv->mutex); memset(dest, 0, config->block_size); if (actual_md5 != NULL) memset(actual_md5, 0, MD5_DIGEST_LENGTH); return 0; } pthread_mutex_unlock(&priv->mutex); } /* Initialize I/O info */ memset(&io, 0, sizeof(io)); io.url = urlbuf; io.method = HTTP_GET; io.block_num = block_num; /* Allocate a buffer in case compressed and/or encrypted data is larger */ io.buf_size = compressBound(config->block_size) + EVP_MAX_IV_LENGTH; if ((io.dest = malloc(io.buf_size)) == NULL) { (*config->log)(LOG_ERR, "malloc: %s", strerror(errno)); pthread_mutex_lock(&priv->mutex); priv->stats.out_of_memory_errors++; pthread_mutex_unlock(&priv->mutex); return ENOMEM; } /* Construct URL for this block */ http_io_get_block_url(urlbuf, sizeof(urlbuf), config, block_num); /* Add Date header */ http_io_add_date(priv, &io, now); /* Add If-Match or If-None-Match header as required */ if (expect_md5 != NULL && memcmp(expect_md5, zero_md5, MD5_DIGEST_LENGTH) != 0) { char md5buf[MD5_DIGEST_LENGTH * 2 + 1]; const char *header; if (strict) header = IF_MATCH_HEADER; else { header = IF_NONE_MATCH_HEADER; io.expect_304 = 1; } http_io_prhex(md5buf, expect_md5, MD5_DIGEST_LENGTH); io.headers = http_io_add_header(priv, io.headers, "%s: \"%s\"", header, md5buf); } /* Set Accept-Encoding header */ snprintf(accepted_encodings, sizeof(accepted_encodings), "%s", CONTENT_ENCODING_DEFLATE); if (config->encryption != NULL) { snprintf(accepted_encodings + strlen(accepted_encodings), sizeof(accepted_encodings) - strlen(accepted_encodings), ", %s-%s", CONTENT_ENCODING_ENCRYPT, config->encryption); } io.headers = http_io_add_header(priv, io.headers, "%s: %s", ACCEPT_ENCODING_HEADER, accepted_encodings); /* Add Authorization header */ if ((r = http_io_add_auth(priv, &io, now, NULL, 0)) != 0) goto fail; /* Perform operation */ r = http_io_perform_io(priv, &io, http_io_read_prepper); /* Determine how many bytes we read */ did_read = io.buf_size - io.bufs.rdremain; /* Check Content-Encoding and decode if necessary */ if (*io.content_encoding == '\0' && config->default_ce != NULL) snprintf(io.content_encoding, sizeof(io.content_encoding), "%s", config->default_ce); for ( ; r == 0 && *io.content_encoding != '\0'; *layer = '\0') { /* Find next encoding layer */ if ((layer = strrchr(io.content_encoding, ',')) != NULL) *layer++ = '\0'; else layer = io.content_encoding; /* Sanity check */ if (io.dest == NULL) goto bad_encoding; /* Check for encryption (which must have been applied after compression) */ if (strncasecmp(layer, CONTENT_ENCODING_ENCRYPT "-", sizeof(CONTENT_ENCODING_ENCRYPT)) == 0) { const char *const block_cipher = layer + sizeof(CONTENT_ENCODING_ENCRYPT); u_char hmac[SHA_DIGEST_LENGTH]; u_char *buf; /* Encryption must be enabled */ if (config->encryption == NULL) { (*config->log)(LOG_ERR, "block %0*jx is encrypted with `%s' but `--encrypt' was not specified", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num, block_cipher); r = EIO; break; } /* Verify encryption type */ if (strcasecmp(block_cipher, EVP_CIPHER_name(priv->cipher)) != 0) { (*config->log)(LOG_ERR, "block %0*jx was encrypted using `%s' but `%s' encryption is configured", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num, block_cipher, EVP_CIPHER_name(priv->cipher)); r = EIO; break; } /* Verify block's signature */ if (memcmp(io.hmac, zero_hmac, sizeof(io.hmac)) == 0) { (*config->log)(LOG_ERR, "block %0*jx is encrypted, but no signature was found", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); r = EIO; break; } http_io_authsig(priv, block_num, io.dest, did_read, hmac); if (memcmp(io.hmac, hmac, sizeof(hmac)) != 0) { (*config->log)(LOG_ERR, "block %0*jx has an incorrect signature (did you provide the right password?)", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); r = EIO; break; } /* Allocate buffer for the decrypted data */ if ((buf = malloc(did_read + EVP_MAX_IV_LENGTH)) == NULL) { (*config->log)(LOG_ERR, "malloc: %s", strerror(errno)); pthread_mutex_lock(&priv->mutex); priv->stats.out_of_memory_errors++; pthread_mutex_unlock(&priv->mutex); r = ENOMEM; break; } /* Decrypt the block */ did_read = http_io_crypt(priv, block_num, 0, io.dest, did_read, buf); memcpy(io.dest, buf, did_read); free(buf); /* Proceed */ encrypted = 1; continue; } /* Check for compression */ if (strcasecmp(layer, CONTENT_ENCODING_DEFLATE) == 0) { u_long uclen = config->block_size; switch (uncompress(dest, &uclen, io.dest, did_read)) { case Z_OK: did_read = uclen; free(io.dest); io.dest = NULL; /* compression should have been first */ r = 0; break; case Z_MEM_ERROR: (*config->log)(LOG_ERR, "zlib uncompress: %s", strerror(ENOMEM)); pthread_mutex_lock(&priv->mutex); priv->stats.out_of_memory_errors++; pthread_mutex_unlock(&priv->mutex); r = ENOMEM; break; case Z_BUF_ERROR: (*config->log)(LOG_ERR, "zlib uncompress: %s", "decompressed block is oversize"); r = EIO; break; case Z_DATA_ERROR: (*config->log)(LOG_ERR, "zlib uncompress: %s", "data is corrupted or truncated"); r = EIO; break; default: (*config->log)(LOG_ERR, "unknown zlib compress2() error %d", r); r = EIO; break; } /* Proceed */ continue; } bad_encoding: /* It was something we don't recognize */ (*config->log)(LOG_ERR, "read of block %0*jx returned unexpected encoding \"%s\"", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num, layer); r = EIO; break; } /* Check for required encryption */ if (r == 0 && config->encryption != NULL && !encrypted) { (*config->log)(LOG_ERR, "block %0*jx was supposed to be encrypted but wasn't", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); r = EIO; } /* Check for wrong length read */ if (r == 0 && did_read != config->block_size) { (*config->log)(LOG_ERR, "read of block %0*jx returned %lu != %lu bytes", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num, (u_long)did_read, (u_long)config->block_size); r = EIO; } /* Copy the data to the desination buffer (if we haven't already) */ if (r == 0 && io.dest != NULL) memcpy(dest, io.dest, config->block_size); /* Update stats */ pthread_mutex_lock(&priv->mutex); switch (r) { case 0: priv->stats.normal_blocks_read++; break; case ENOENT: priv->stats.zero_blocks_read++; break; default: break; } pthread_mutex_unlock(&priv->mutex); /* Check expected MD5 */ if (expect_md5 != NULL) { const int expected_not_found = memcmp(expect_md5, zero_md5, MD5_DIGEST_LENGTH) == 0; /* Compare result with expectation */ switch (r) { case 0: if (expected_not_found) r = strict ? EIO : 0; break; case ENOENT: if (expected_not_found) r = strict ? 0 : EEXIST; break; default: break; } /* Update stats */ if (!strict) { switch (r) { case 0: pthread_mutex_lock(&priv->mutex); priv->stats.http_mismatch++; pthread_mutex_unlock(&priv->mutex); break; case EEXIST: pthread_mutex_lock(&priv->mutex); priv->stats.http_verified++; pthread_mutex_unlock(&priv->mutex); break; default: break; } } } /* Treat `404 Not Found' all zeroes */ if (r == ENOENT) { memset(dest, 0, config->block_size); r = 0; } /* Copy actual MD5 */ if (actual_md5 != NULL) memcpy(actual_md5, io.md5, MD5_DIGEST_LENGTH); fail: /* Clean up */ if (io.dest != NULL) free(io.dest); curl_slist_free_all(io.headers); return r; } static void http_io_read_prepper(CURL *curl, struct http_io *io) { memset(&io->bufs, 0, sizeof(io->bufs)); io->bufs.rdremain = io->buf_size; io->bufs.rddata = io->dest; curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, http_io_curl_reader); curl_easy_setopt(curl, CURLOPT_WRITEDATA, io); curl_easy_setopt(curl, CURLOPT_MAXFILESIZE_LARGE, (curl_off_t)io->buf_size); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, io->headers); curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, http_io_curl_header); curl_easy_setopt(curl, CURLOPT_HEADERDATA, io); curl_easy_setopt(curl, CURLOPT_ENCODING, ""); curl_easy_setopt(curl, CURLOPT_HTTP_CONTENT_DECODING, (long)0); } /* * Write block if src != NULL, otherwise delete block. */ static int http_io_write_block(struct s3backer_store *const s3b, s3b_block_t block_num, const void *src, u_char *caller_md5, check_cancel_t *check_cancel, void *check_cancel_arg) { struct http_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; char urlbuf[URL_BUF_SIZE(config)]; char md5buf[(MD5_DIGEST_LENGTH * 4) / 3 + 4]; char hmacbuf[SHA_DIGEST_LENGTH * 2 + 1]; u_char hmac[SHA_DIGEST_LENGTH]; u_char md5[MD5_DIGEST_LENGTH]; const time_t now = time(NULL); void *encoded_buf = NULL; const char *storage_class; struct http_io io; int compressed = 0; int encrypted = 0; int r; /* Sanity check */ if (config->block_size == 0 || block_num >= config->num_blocks) return EINVAL; /* Detect zero blocks (if not done already by upper layer) */ if (src != NULL) { if (http_io_is_zero_block(src, config->block_size)) src = NULL; } /* Don't write zero blocks when bitmap indicates empty until non-zero content is written */ if (priv->non_zero != NULL) { const int bits_per_word = sizeof(*priv->non_zero) * 8; const int word = block_num / bits_per_word; const int bit = 1 << (block_num % bits_per_word); pthread_mutex_lock(&priv->mutex); if (src == NULL) { if ((priv->non_zero[word] & bit) == 0) { priv->stats.empty_blocks_written++; pthread_mutex_unlock(&priv->mutex); return 0; } } else priv->non_zero[word] |= bit; pthread_mutex_unlock(&priv->mutex); } /* Initialize I/O info */ memset(&io, 0, sizeof(io)); io.url = urlbuf; io.method = src != NULL ? HTTP_PUT : HTTP_DELETE; io.src = src; io.buf_size = config->block_size; io.block_num = block_num; io.check_cancel = check_cancel; io.check_cancel_arg = check_cancel_arg; /* Compress block if desired */ if (src != NULL && config->compress != Z_NO_COMPRESSION) { u_long compress_len; /* Allocate buffer */ compress_len = compressBound(io.buf_size); if ((encoded_buf = malloc(compress_len)) == NULL) { (*config->log)(LOG_ERR, "malloc: %s", strerror(errno)); pthread_mutex_lock(&priv->mutex); priv->stats.out_of_memory_errors++; pthread_mutex_unlock(&priv->mutex); r = ENOMEM; goto fail; } /* Compress data */ r = compress2(encoded_buf, &compress_len, io.src, io.buf_size, config->compress); switch (r) { case Z_OK: break; case Z_MEM_ERROR: (*config->log)(LOG_ERR, "zlib compress: %s", strerror(ENOMEM)); pthread_mutex_lock(&priv->mutex); priv->stats.out_of_memory_errors++; pthread_mutex_unlock(&priv->mutex); r = ENOMEM; goto fail; default: (*config->log)(LOG_ERR, "unknown zlib compress2() error %d", r); r = EIO; goto fail; } /* Update POST data */ io.src = encoded_buf; io.buf_size = compress_len; compressed = 1; } /* Encrypt data if desired */ if (src != NULL && config->encryption != NULL) { void *encrypt_buf; u_int encrypt_len; /* Allocate buffer */ if ((encrypt_buf = malloc(io.buf_size + EVP_MAX_IV_LENGTH)) == NULL) { (*config->log)(LOG_ERR, "malloc: %s", strerror(errno)); pthread_mutex_lock(&priv->mutex); priv->stats.out_of_memory_errors++; pthread_mutex_unlock(&priv->mutex); r = ENOMEM; goto fail; } /* Encrypt the block */ encrypt_len = http_io_crypt(priv, block_num, 1, io.src, io.buf_size, encrypt_buf); /* Compute block signature */ http_io_authsig(priv, block_num, encrypt_buf, encrypt_len, hmac); http_io_prhex(hmacbuf, hmac, SHA_DIGEST_LENGTH); /* Update POST data */ io.src = encrypt_buf; io.buf_size = encrypt_len; free(encoded_buf); /* OK if NULL */ encoded_buf = encrypt_buf; encrypted = 1; } /* Set Content-Encoding HTTP header */ if (compressed || encrypted) { char ebuf[128]; snprintf(ebuf, sizeof(ebuf), "%s: ", CONTENT_ENCODING_HEADER); if (compressed) snprintf(ebuf + strlen(ebuf), sizeof(ebuf) - strlen(ebuf), "%s", CONTENT_ENCODING_DEFLATE); if (encrypted) { snprintf(ebuf + strlen(ebuf), sizeof(ebuf) - strlen(ebuf), "%s%s-%s", compressed ? ", " : "", CONTENT_ENCODING_ENCRYPT, config->encryption); } io.headers = http_io_add_header(priv, io.headers, "%s", ebuf); } /* Compute MD5 checksum */ if (src != NULL) MD5(io.src, io.buf_size, md5); else memset(md5, 0, MD5_DIGEST_LENGTH); /* Report MD5 back to caller */ if (caller_md5 != NULL) memcpy(caller_md5, md5, MD5_DIGEST_LENGTH); /* Construct URL for this block */ http_io_get_block_url(urlbuf, sizeof(urlbuf), config, block_num); /* Add Date header */ http_io_add_date(priv, &io, now); /* Add PUT-only headers */ if (src != NULL) { /* Add Content-Type header */ io.headers = http_io_add_header(priv, io.headers, "%s: %s", CTYPE_HEADER, CONTENT_TYPE); /* Add Content-MD5 header */ http_io_base64_encode(md5buf, sizeof(md5buf), md5, MD5_DIGEST_LENGTH); io.headers = http_io_add_header(priv, io.headers, "%s: %s", MD5_HEADER, md5buf); } /* Add ACL header (PUT only) */ if (src != NULL) io.headers = http_io_add_header(priv, io.headers, "%s: %s", ACL_HEADER, config->accessType); /* Add file size meta-data to zero'th block */ if (src != NULL && block_num == 0) { io.headers = http_io_add_header(priv, io.headers, "%s: %u", BLOCK_SIZE_HEADER, config->block_size); io.headers = http_io_add_header(priv, io.headers, "%s: %ju", FILE_SIZE_HEADER, (uintmax_t)(config->block_size * config->num_blocks)); } /* Add signature header (if encrypting) */ if (src != NULL && config->encryption != NULL) io.headers = http_io_add_header(priv, io.headers, "%s: \"%s\"", HMAC_HEADER, hmacbuf); /* Add Server Side Encryption header (if needed) */ if (config->sse != NULL && src != NULL) io.headers = http_io_add_header(priv, io.headers, "%s: %s", SSE_HEADER, config->sse); /* Add storage class header (if needed) */ storage_class = config->storage_class != NULL ? config->storage_class : config->rrs ? STORAGE_CLASS_REDUCED_REDUNDANCY : NULL; if (storage_class != NULL) io.headers = http_io_add_header(priv, io.headers, "%s: %s", STORAGE_CLASS_HEADER, storage_class); /* Add Authorization header */ if ((r = http_io_add_auth(priv, &io, now, io.src, io.buf_size)) != 0) goto fail; /* Perform operation */ r = http_io_perform_io(priv, &io, http_io_write_prepper); /* Update stats */ if (r == 0) { pthread_mutex_lock(&priv->mutex); if (src == NULL) priv->stats.zero_blocks_written++; else priv->stats.normal_blocks_written++; pthread_mutex_unlock(&priv->mutex); } fail: /* Clean up */ curl_slist_free_all(io.headers); if (encoded_buf != NULL) free(encoded_buf); return r; } static void http_io_write_prepper(CURL *curl, struct http_io *io) { memset(&io->bufs, 0, sizeof(io->bufs)); if (io->src != NULL) { io->bufs.wrremain = io->buf_size; io->bufs.wrdata = io->src; } curl_easy_setopt(curl, CURLOPT_READFUNCTION, http_io_curl_writer); curl_easy_setopt(curl, CURLOPT_READDATA, io); curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, http_io_curl_reader); curl_easy_setopt(curl, CURLOPT_WRITEDATA, io); if (io->src != NULL) { curl_easy_setopt(curl, CURLOPT_UPLOAD, 1); curl_easy_setopt(curl, CURLOPT_INFILESIZE_LARGE, (curl_off_t)io->buf_size); } curl_easy_setopt(curl, CURLOPT_CUSTOMREQUEST, io->method); curl_easy_setopt(curl, CURLOPT_HTTPHEADER, io->headers); } static int http_io_read_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, void *dest) { struct http_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; return block_part_read_block_part(s3b, block_num, config->block_size, off, len, dest); } static int http_io_write_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, const void *src) { struct http_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; return block_part_write_block_part(s3b, block_num, config->block_size, off, len, src); } /* * Perform HTTP operation. */ static int http_io_perform_io(struct http_io_private *priv, struct http_io *io, http_io_curl_prepper_t *prepper) { struct http_io_conf *const config = priv->config; struct timespec delay; CURLcode curl_code; u_int retry_pause = 0; u_int total_pause; long http_code; double clen; int attempt; CURL *curl; /* Debug */ if (config->debug) (*config->log)(LOG_DEBUG, "%s %s", io->method, io->url); /* Make attempts */ for (attempt = 0, total_pause = 0; 1; attempt++, total_pause += retry_pause) { /* Acquire and initialize CURL instance */ if ((curl = http_io_acquire_curl(priv, io)) == NULL) return EIO; (*prepper)(curl, io); /* Perform HTTP operation and check result */ if (attempt > 0) (*config->log)(LOG_INFO, "retrying query (attempt #%d): %s %s", attempt + 1, io->method, io->url); curl_code = curl_easy_perform(curl); /* Find out what the HTTP result code was (if any) */ switch (curl_code) { case CURLE_HTTP_RETURNED_ERROR: case 0: if (curl_easy_getinfo(curl, CURLINFO_RESPONSE_CODE, &http_code) != 0) http_code = 999; /* this should never happen */ break; default: http_code = -1; break; } /* Work around the fact that libcurl converts a 304 HTTP code as success */ if (curl_code == 0 && http_code == HTTP_NOT_MODIFIED) curl_code = CURLE_HTTP_RETURNED_ERROR; /* In the case of a DELETE, treat an HTTP_NOT_FOUND error as successful */ if (curl_code == CURLE_HTTP_RETURNED_ERROR && http_code == HTTP_NOT_FOUND && strcmp(io->method, HTTP_DELETE) == 0) curl_code = 0; /* Handle success */ if (curl_code == 0) { double curl_time; int r = 0; /* Extra debug logging */ if (config->debug) (*config->log)(LOG_DEBUG, "success: %s %s", io->method, io->url); /* Extract timing info */ if ((curl_code = curl_easy_getinfo(curl, CURLINFO_TOTAL_TIME, &curl_time)) != CURLE_OK) { (*config->log)(LOG_ERR, "can't get cURL timing: %s", curl_easy_strerror(curl_code)); curl_time = 0.0; } /* Extract content-length (if required) */ if (io->content_lengthp != NULL) { if ((curl_code = curl_easy_getinfo(curl, CURLINFO_CONTENT_LENGTH_DOWNLOAD, &clen)) == CURLE_OK) *io->content_lengthp = (u_int)clen; else { (*config->log)(LOG_ERR, "can't get content-length: %s", curl_easy_strerror(curl_code)); r = ENXIO; } } /* Update stats */ pthread_mutex_lock(&priv->mutex); if (strcmp(io->method, HTTP_GET) == 0) { priv->stats.http_gets.count++; priv->stats.http_gets.time += curl_time; } else if (strcmp(io->method, HTTP_PUT) == 0) { priv->stats.http_puts.count++; priv->stats.http_puts.time += curl_time; } else if (strcmp(io->method, HTTP_DELETE) == 0) { priv->stats.http_deletes.count++; priv->stats.http_deletes.time += curl_time; } else if (strcmp(io->method, HTTP_HEAD) == 0) { priv->stats.http_heads.count++; priv->stats.http_heads.time += curl_time; } pthread_mutex_unlock(&priv->mutex); /* Done */ http_io_release_curl(priv, &curl, r == 0); return r; } /* Free the curl handle (and ensure we don't try to re-use it) */ http_io_release_curl(priv, &curl, 0); /* Handle errors */ switch (curl_code) { case CURLE_ABORTED_BY_CALLBACK: if (config->debug) (*config->log)(LOG_DEBUG, "write aborted: %s %s", io->method, io->url); pthread_mutex_lock(&priv->mutex); priv->stats.http_canceled_writes++; pthread_mutex_unlock(&priv->mutex); return ECONNABORTED; case CURLE_OPERATION_TIMEDOUT: (*config->log)(LOG_NOTICE, "operation timeout: %s %s", io->method, io->url); pthread_mutex_lock(&priv->mutex); priv->stats.curl_timeouts++; pthread_mutex_unlock(&priv->mutex); break; case CURLE_HTTP_RETURNED_ERROR: /* special handling for some specific HTTP codes */ switch (http_code) { case HTTP_NOT_FOUND: if (config->debug) (*config->log)(LOG_DEBUG, "rec'd %ld response: %s %s", http_code, io->method, io->url); return ENOENT; case HTTP_UNAUTHORIZED: (*config->log)(LOG_ERR, "rec'd %ld response: %s %s", http_code, io->method, io->url); pthread_mutex_lock(&priv->mutex); priv->stats.http_unauthorized++; pthread_mutex_unlock(&priv->mutex); return EACCES; case HTTP_FORBIDDEN: (*config->log)(LOG_ERR, "rec'd %ld response: %s %s", http_code, io->method, io->url); pthread_mutex_lock(&priv->mutex); priv->stats.http_forbidden++; pthread_mutex_unlock(&priv->mutex); return EPERM; case HTTP_PRECONDITION_FAILED: (*config->log)(LOG_INFO, "rec'd stale content: %s %s", io->method, io->url); pthread_mutex_lock(&priv->mutex); priv->stats.http_stale++; pthread_mutex_unlock(&priv->mutex); break; case HTTP_NOT_MODIFIED: if (io->expect_304) { if (config->debug) (*config->log)(LOG_DEBUG, "rec'd %ld response: %s %s", http_code, io->method, io->url); return EEXIST; } /* FALLTHROUGH */ default: (*config->log)(LOG_ERR, "rec'd %ld response: %s %s", http_code, io->method, io->url); pthread_mutex_lock(&priv->mutex); switch (http_code / 100) { case 4: priv->stats.http_4xx_error++; break; case 5: priv->stats.http_5xx_error++; break; default: priv->stats.http_other_error++; break; } pthread_mutex_unlock(&priv->mutex); break; } break; default: (*config->log)(LOG_ERR, "operation failed: %s (%s)", curl_easy_strerror(curl_code), total_pause >= config->max_retry_pause ? "final attempt" : "will retry"); pthread_mutex_lock(&priv->mutex); switch (curl_code) { case CURLE_OUT_OF_MEMORY: priv->stats.curl_out_of_memory++; break; case CURLE_COULDNT_CONNECT: priv->stats.curl_connect_failed++; break; case CURLE_COULDNT_RESOLVE_HOST: priv->stats.curl_host_unknown++; break; default: priv->stats.curl_other_error++; break; } pthread_mutex_unlock(&priv->mutex); break; } /* Retry with exponential backoff up to max total pause limit */ if (total_pause >= config->max_retry_pause) break; retry_pause = retry_pause > 0 ? retry_pause * 2 : config->initial_retry_pause; if (total_pause + retry_pause > config->max_retry_pause) retry_pause = config->max_retry_pause - total_pause; delay.tv_sec = retry_pause / 1000; delay.tv_nsec = (retry_pause % 1000) * 1000000; nanosleep(&delay, NULL); // TODO: check for EINTR /* Update retry stats */ pthread_mutex_lock(&priv->mutex); priv->stats.num_retries++; priv->stats.retry_delay += retry_pause; pthread_mutex_unlock(&priv->mutex); } /* Give up */ (*config->log)(LOG_ERR, "giving up on: %s %s", io->method, io->url); return EIO; } /* * Compute S3 authorization hash using secret access key and add Authorization and SHA256 hash headers. * * Note: headers must be unique and not wrapped. */ static int http_io_add_auth(struct http_io_private *priv, struct http_io *const io, time_t now, const void *payload, size_t plen) { const struct http_io_conf *const config = priv->config; /* Anything to do? */ if (config->accessId == NULL) return 0; /* Which auth version? */ if (strcmp(config->authVersion, AUTH_VERSION_AWS2) == 0) return http_io_add_auth2(priv, io, now, payload, plen); if (strcmp(config->authVersion, AUTH_VERSION_AWS4) == 0) return http_io_add_auth4(priv, io, now, payload, plen); /* Oops */ return EINVAL; } /** * AWS verison 2 authentication */ static int http_io_add_auth2(struct http_io_private *priv, struct http_io *const io, time_t now, const void *payload, size_t plen) { const struct http_io_conf *const config = priv->config; const struct curl_slist *header; u_char hmac[SHA_DIGEST_LENGTH]; const char *resource; char **amz_hdrs = NULL; char access_id[128]; char access_key[128]; char authbuf[200]; #if DEBUG_AUTHENTICATION char sigbuf[1024]; char hmac_buf[EVP_MAX_MD_SIZE * 2 + 1]; #else char sigbuf[1]; #endif int num_amz_hdrs; const char *qmark; size_t resource_len; u_int hmac_len; HMAC_CTX* hmac_ctx = NULL; int i; int r; /* Snapshot current credentials */ pthread_mutex_lock(&priv->mutex); snprintf(access_id, sizeof(access_id), "%s", config->accessId); snprintf(access_key, sizeof(access_key), "%s", config->accessKey); pthread_mutex_unlock(&priv->mutex); /* Initialize HMAC */ hmac_ctx = HMAC_CTX_new(); assert(NULL != hmac_ctx); HMAC_Init_ex(hmac_ctx, access_key, strlen(access_key), EVP_sha1(), NULL); #if DEBUG_AUTHENTICATION *sigbuf = '\0'; #endif /* Sign initial stuff */ HMAC_Update(hmac_ctx, (const u_char *)io->method, strlen(io->method)); HMAC_Update(hmac_ctx, (const u_char *)"\n", 1); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "%s\n", io->method); #endif update_hmac_from_header(hmac_ctx, io, MD5_HEADER, 1, sigbuf, sizeof(sigbuf)); update_hmac_from_header(hmac_ctx, io, CTYPE_HEADER, 1, sigbuf, sizeof(sigbuf)); update_hmac_from_header(hmac_ctx, io, HTTP_DATE_HEADER, 1, sigbuf, sizeof(sigbuf)); /* Get x-amz headers sorted by name */ for (header = io->headers, num_amz_hdrs = 0; header != NULL; header = header->next) { if (strncmp(header->data, "x-amz", 5) == 0) num_amz_hdrs++; } if ((amz_hdrs = malloc(num_amz_hdrs * sizeof(*amz_hdrs))) == NULL) { r = errno; goto fail; } for (header = io->headers, i = 0; header != NULL; header = header->next) { if (strncmp(header->data, "x-amz", 5) == 0) amz_hdrs[i++] = header->data; } assert(i == num_amz_hdrs); qsort(amz_hdrs, num_amz_hdrs, sizeof(*amz_hdrs), http_io_strcasecmp_ptr); /* Sign x-amz headers (in sorted order) */ for (i = 0; i < num_amz_hdrs; i++) update_hmac_from_header(hmac_ctx, io, amz_hdrs[i], 0, sigbuf, sizeof(sigbuf)); /* Get resource */ resource = config->vhost ? io->url + strlen(config->baseURL) - 1 : io->url + strlen(config->baseURL) + strlen(config->bucket); resource_len = (qmark = strchr(resource, '?')) != NULL ? qmark - resource : strlen(resource); /* Sign final stuff */ HMAC_Update(hmac_ctx, (const u_char *)"/", 1); HMAC_Update(hmac_ctx, (const u_char *)config->bucket, strlen(config->bucket)); HMAC_Update(hmac_ctx, (const u_char *)resource, resource_len); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "/%s%.*s", config->bucket, (int)resource_len, resource); #endif /* Finish up */ HMAC_Final(hmac_ctx, hmac, &hmac_len); assert(hmac_len == SHA_DIGEST_LENGTH); /* Base64-encode result */ http_io_base64_encode(authbuf, sizeof(authbuf), hmac, hmac_len); #if DEBUG_AUTHENTICATION (*config->log)(LOG_DEBUG, "auth: string to sign:\n%s", sigbuf); http_io_prhex(hmac_buf, hmac, hmac_len); (*config->log)(LOG_DEBUG, "auth: signature hmac = %s", hmac_buf); (*config->log)(LOG_DEBUG, "auth: signature hmac base64 = %s", authbuf); #endif /* Add auth header */ io->headers = http_io_add_header(priv, io->headers, "%s: AWS %s:%s", AUTH_HEADER, access_id, authbuf); /* Done */ r = 0; fail: /* Clean up */ if (amz_hdrs != NULL) free(amz_hdrs); HMAC_CTX_free(hmac_ctx); return r; } /** * AWS verison 4 authentication */ static int http_io_add_auth4(struct http_io_private *priv, struct http_io *const io, time_t now, const void *payload, size_t plen) { const struct http_io_conf *const config = priv->config; u_char payload_hash[EVP_MAX_MD_SIZE]; u_char creq_hash[EVP_MAX_MD_SIZE]; u_char hmac[EVP_MAX_MD_SIZE]; u_int payload_hash_len; u_int creq_hash_len; u_int hmac_len; char payload_hash_buf[EVP_MAX_MD_SIZE * 2 + 1]; char creq_hash_buf[EVP_MAX_MD_SIZE * 2 + 1]; char hmac_buf[EVP_MAX_MD_SIZE * 2 + 1]; const struct curl_slist *hdr; char **sorted_hdrs = NULL; char *header_names = NULL; const char *host; size_t host_len; const char *uripath; size_t uripath_len; const char *query_params; size_t query_params_len; u_int header_names_length; u_int num_sorted_hdrs; EVP_MD_CTX* hash_ctx; HMAC_CTX* hmac_ctx = NULL; #if DEBUG_AUTHENTICATION char sigbuf[1024]; #endif char hosthdr[128]; char datebuf[DATE_BUF_SIZE]; char access_id[128]; char access_key[128]; char *iam_token = NULL; struct tm tm; char *p; int r; int i; /* Initialize */ hash_ctx = EVP_MD_CTX_new(); /* Snapshot current credentials */ pthread_mutex_lock(&priv->mutex); snprintf(access_id, sizeof(access_id), "%s", config->accessId); snprintf(access_key, sizeof(access_key), "%s%s", ACCESS_KEY_PREFIX, config->accessKey); if (config->iam_token != NULL && (iam_token = strdup(config->iam_token)) == NULL) { r = errno; pthread_mutex_unlock(&priv->mutex); (*config->log)(LOG_ERR, "%s: strdup: %s", "http_io_add_auth4", strerror(r)); goto fail; } pthread_mutex_unlock(&priv->mutex); /* Extract host, URI path, and query parameters from URL */ if ((p = strchr(io->url, ':')) == NULL || *++p != '/' || *++p != '/' || (host = p + 1) == NULL || (uripath = strchr(host, '/')) == NULL) { r = EINVAL; free(iam_token); goto fail; } host_len = uripath - host; if ((p = strchr(uripath, '?')) != NULL) { uripath_len = p - uripath; query_params = p + 1; query_params_len = strlen(query_params); } else { uripath_len = strlen(uripath); query_params = NULL; query_params_len = 0; } /* Format date */ strftime(datebuf, sizeof(datebuf), AWS_DATE_BUF_FMT, gmtime_r(&now, &tm)); /****** Hash Payload and Add Header ******/ EVP_DigestInit_ex(hash_ctx, EVP_sha256(), NULL); if (payload != NULL) EVP_DigestUpdate(hash_ctx, payload, plen); EVP_DigestFinal_ex(hash_ctx, payload_hash, &payload_hash_len); http_io_prhex(payload_hash_buf, payload_hash, payload_hash_len); io->headers = http_io_add_header(priv, io->headers, "%s: %s", CONTENT_SHA256_HEADER, payload_hash_buf); /****** Add IAM security token header (if any) ******/ if (iam_token != NULL && *iam_token != '\0') { io->headers = http_io_add_header(priv, io->headers, "%s: %s", SECURITY_TOKEN_HEADER, iam_token); free(iam_token); } /****** Create Hashed Canonical Request ******/ #if DEBUG_AUTHENTICATION *sigbuf = '\0'; #endif /* Reset hash */ EVP_DigestInit_ex(hash_ctx, EVP_sha256(), NULL); /* Sort headers by (lowercase) name; add "Host" header manually - special case because cURL adds it, not us */ snprintf(hosthdr, sizeof(hosthdr), "host:%.*s", (int)host_len, host); for (num_sorted_hdrs = 1, hdr = io->headers; hdr != NULL; hdr = hdr->next) num_sorted_hdrs++; if ((sorted_hdrs = malloc(num_sorted_hdrs * sizeof(*sorted_hdrs))) == NULL) { r = errno; goto fail; } sorted_hdrs[0] = hosthdr; for (i = 1, hdr = io->headers; hdr != NULL; hdr = hdr->next) sorted_hdrs[i++] = hdr->data; assert(i == num_sorted_hdrs); qsort(sorted_hdrs, num_sorted_hdrs, sizeof(*sorted_hdrs), http_io_strcasecmp_ptr); /* Request method */ EVP_DigestUpdate(hash_ctx, (const u_char *)io->method, strlen(io->method)); EVP_DigestUpdate(hash_ctx, (const u_char *)"\n", 1); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "%s\n", io->method); #endif /* Canonical URI */ digest_url_encoded(hash_ctx, uripath, uripath_len, 0); EVP_DigestUpdate(hash_ctx, (const u_char *)"\n", 1); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "%.*s\n", (int)uripath_len, uripath); #endif /* Canonical query string */ EVP_DigestUpdate(hash_ctx, (const u_char *)query_params, query_params_len); EVP_DigestUpdate(hash_ctx, (const u_char *)"\n", 1); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "%.*s\n", (int)query_params_len, query_params); #endif /* Canonical headers */ header_names_length = 0; for (i = 0; i < num_sorted_hdrs; i++) { const char *value = sorted_hdrs[i]; const char *s; char lcase; s = value; do { if (*s == '\0') { r = EINVAL; goto fail; } lcase = tolower(*s); EVP_DigestUpdate(hash_ctx, (const u_char *)&lcase, 1); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "%c", lcase); #endif header_names_length++; } while (*s++ != ':'); while (isspace(*s)) s++; EVP_DigestUpdate(hash_ctx, (const u_char *)s, strlen(s)); EVP_DigestUpdate(hash_ctx, (const u_char *)"\n", 1); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "%s\n", s); #endif } EVP_DigestUpdate(hash_ctx, (const u_char *)"\n", 1); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "\n"); #endif /* Signed headers */ if ((header_names = malloc(header_names_length)) == NULL) { r = errno; goto fail; } p = header_names; for (i = 0; i < num_sorted_hdrs; i++) { const char *value = sorted_hdrs[i]; const char *s; if (p > header_names) *p++ = ';'; for (s = value; *s != '\0' && *s != ':'; s++) *p++ = tolower(*s); } *p++ = '\0'; assert(p <= header_names + header_names_length); EVP_DigestUpdate(hash_ctx, (const u_char *)header_names, strlen(header_names)); EVP_DigestUpdate(hash_ctx, (const u_char *)"\n", 1); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "%s\n", header_names); #endif /* Hashed payload */ EVP_DigestUpdate(hash_ctx, (const u_char *)payload_hash_buf, strlen(payload_hash_buf)); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "%s", payload_hash_buf); #endif /* Get canonical request hash as a string */ EVP_DigestFinal_ex(hash_ctx, creq_hash, &creq_hash_len); http_io_prhex(creq_hash_buf, creq_hash, creq_hash_len); #if DEBUG_AUTHENTICATION (*config->log)(LOG_DEBUG, "auth: canonical request:\n%s", sigbuf); (*config->log)(LOG_DEBUG, "auth: canonical request hash = %s", creq_hash_buf); #endif /****** Derive Signing Key ******/ /* Do nested HMAC's */ hmac_ctx = HMAC_CTX_new(); assert(NULL != hmac_ctx); HMAC_Init_ex(hmac_ctx, access_key, strlen(access_key), EVP_sha256(), NULL); #if DEBUG_AUTHENTICATION (*config->log)(LOG_DEBUG, "auth: access_key = \"%s\"", access_key); #endif HMAC_Update(hmac_ctx, (const u_char *)datebuf, 8); HMAC_Final(hmac_ctx, hmac, &hmac_len); assert(hmac_len <= sizeof(hmac)); #if DEBUG_AUTHENTICATION http_io_prhex(hmac_buf, hmac, hmac_len); (*config->log)(LOG_DEBUG, "auth: HMAC[%.8s] = %s", datebuf, hmac_buf); #endif HMAC_Init_ex(hmac_ctx, hmac, hmac_len, EVP_sha256(), NULL); HMAC_Update(hmac_ctx, (const u_char *)config->region, strlen(config->region)); HMAC_Final(hmac_ctx, hmac, &hmac_len); #if DEBUG_AUTHENTICATION http_io_prhex(hmac_buf, hmac, hmac_len); (*config->log)(LOG_DEBUG, "auth: HMAC[%s] = %s", config->region, hmac_buf); #endif HMAC_Init_ex(hmac_ctx, hmac, hmac_len, EVP_sha256(), NULL); HMAC_Update(hmac_ctx, (const u_char *)S3_SERVICE_NAME, strlen(S3_SERVICE_NAME)); HMAC_Final(hmac_ctx, hmac, &hmac_len); #if DEBUG_AUTHENTICATION http_io_prhex(hmac_buf, hmac, hmac_len); (*config->log)(LOG_DEBUG, "auth: HMAC[%s] = %sn", S3_SERVICE_NAME, hmac_buf); #endif HMAC_Init_ex(hmac_ctx, hmac, hmac_len, EVP_sha256(), NULL); HMAC_Update(hmac_ctx, (const u_char *)SIGNATURE_TERMINATOR, strlen(SIGNATURE_TERMINATOR)); HMAC_Final(hmac_ctx, hmac, &hmac_len); #if DEBUG_AUTHENTICATION http_io_prhex(hmac_buf, hmac, hmac_len); (*config->log)(LOG_DEBUG, "auth: HMAC[%s] = %s", SIGNATURE_TERMINATOR, hmac_buf); #endif /****** Sign the String To Sign ******/ #if DEBUG_AUTHENTICATION *sigbuf = '\0'; #endif HMAC_Init_ex(hmac_ctx, hmac, hmac_len, EVP_sha256(), NULL); HMAC_Update(hmac_ctx, (const u_char *)SIGNATURE_ALGORITHM, strlen(SIGNATURE_ALGORITHM)); HMAC_Update(hmac_ctx, (const u_char *)"\n", 1); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "%s\n", SIGNATURE_ALGORITHM); #endif HMAC_Update(hmac_ctx, (const u_char *)datebuf, strlen(datebuf)); HMAC_Update(hmac_ctx, (const u_char *)"\n", 1); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "%s\n", datebuf); #endif HMAC_Update(hmac_ctx, (const u_char *)datebuf, 8); HMAC_Update(hmac_ctx, (const u_char *)"/", 1); HMAC_Update(hmac_ctx, (const u_char *)config->region, strlen(config->region)); HMAC_Update(hmac_ctx, (const u_char *)"/", 1); HMAC_Update(hmac_ctx, (const u_char *)S3_SERVICE_NAME, strlen(S3_SERVICE_NAME)); HMAC_Update(hmac_ctx, (const u_char *)"/", 1); HMAC_Update(hmac_ctx, (const u_char *)SIGNATURE_TERMINATOR, strlen(SIGNATURE_TERMINATOR)); HMAC_Update(hmac_ctx, (const u_char *)"\n", 1); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "%.8s/%s/%s/%s\n", datebuf, config->region, S3_SERVICE_NAME, SIGNATURE_TERMINATOR); #endif HMAC_Update(hmac_ctx, (const u_char *)creq_hash_buf, strlen(creq_hash_buf)); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sizeof(sigbuf) - strlen(sigbuf), "%s", creq_hash_buf); #endif HMAC_Final(hmac_ctx, hmac, &hmac_len); http_io_prhex(hmac_buf, hmac, hmac_len); #if DEBUG_AUTHENTICATION (*config->log)(LOG_DEBUG, "auth: key to sign:\n%s", sigbuf); (*config->log)(LOG_DEBUG, "auth: signature hmac = %s", hmac_buf); #endif /****** Add Authorization Header ******/ io->headers = http_io_add_header(priv, io->headers, "%s: %s Credential=%s/%.8s/%s/%s/%s, SignedHeaders=%s, Signature=%s", AUTH_HEADER, SIGNATURE_ALGORITHM, access_id, datebuf, config->region, S3_SERVICE_NAME, SIGNATURE_TERMINATOR, header_names, hmac_buf); /* Done */ r = 0; fail: /* Clean up */ if (sorted_hdrs != NULL) free(sorted_hdrs); free(header_names); EVP_MD_CTX_free(hash_ctx); HMAC_CTX_free(hmac_ctx); return r; } /* * Add data to digest, but in URL-encoded form. */ static void digest_url_encoded(EVP_MD_CTX* hash_ctx, const char *data, size_t len, int encode_slash) { char buf[len * 3 + 1]; len = url_encode(data, len, buf, sizeof(buf), encode_slash); EVP_DigestUpdate(hash_ctx, (const u_char *)buf, len); } /* * URL-encode the given input. */ static size_t url_encode(const char *src, size_t len, char *dst, size_t buflen, int encode_slash) { char *const dst_base = dst; size_t elen; while (len-- > 0) { const char ch = *src++; if (isalnum(ch) || ch == '_' || ch == '-' || ch == '~' || ch == '.' || (ch == '/' && !encode_slash)) { snprintf(dst, buflen, "%c", ch); elen = 1; } else { snprintf(dst, buflen, "%%%02X", (int)ch & 0xff); elen = 3; } dst += elen; buflen -= elen; } if (buflen > 0) *dst = '\0'; return dst - dst_base; } /* * Create URL for a block, and return pointer to the URL's URI path. */ static void http_io_get_block_url(char *buf, size_t bufsiz, struct http_io_conf *config, s3b_block_t block_num) { char block_hash_buf[S3B_BLOCK_NUM_DIGITS + 2]; int len; http_io_format_block_hash(config, block_hash_buf, sizeof(block_hash_buf), block_num); if (config->vhost) { len = snprintf(buf, bufsiz, "%s%s%s%0*jx", config->baseURL, config->prefix, block_hash_buf, S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); } else { len = snprintf(buf, bufsiz, "%s%s/%s%s%0*jx", config->baseURL, config->bucket, config->prefix, block_hash_buf, S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); } (void)len; /* avoid compiler warning when NDEBUG defined */ assert(len < bufsiz); } /* * Create URL for the mount token file, and return pointer to the URL's path not including any "/bucket" prefix. */ static void http_io_get_mount_token_file_url(char *buf, size_t bufsiz, struct http_io_conf *config) { int len; if (config->vhost) len = snprintf(buf, bufsiz, "%s%s%s", config->baseURL, config->prefix, MOUNT_TOKEN_FILE); else len = snprintf(buf, bufsiz, "%s%s/%s%s", config->baseURL, config->bucket, config->prefix, MOUNT_TOKEN_FILE); (void)len; /* avoid compiler warning when NDEBUG defined */ assert(len < bufsiz); } /* * Add date header based on supplied time. */ static void http_io_add_date(struct http_io_private *const priv, struct http_io *const io, time_t now) { struct http_io_conf *const config = priv->config; char buf[DATE_BUF_SIZE]; struct tm tm; if (strcmp(config->authVersion, AUTH_VERSION_AWS2) == 0) { strftime(buf, sizeof(buf), HTTP_DATE_BUF_FMT, gmtime_r(&now, &tm)); io->headers = http_io_add_header(priv, io->headers, "%s: %s", HTTP_DATE_HEADER, buf); } else { strftime(buf, sizeof(buf), AWS_DATE_BUF_FMT, gmtime_r(&now, &tm)); io->headers = http_io_add_header(priv, io->headers, "%s: %s", AWS_DATE_HEADER, buf); } } static struct curl_slist * http_io_add_header(struct http_io_private *priv, struct curl_slist *headers, const char *fmt, ...) { char *buf; va_list args; va_start(args, fmt); if (vasprintf(&buf, fmt, args) == -1) (*priv->config->log)(LOG_ERR, "%s: vasprintf() failed: %s", "http_io_add_header", strerror(ENOMEM)); else { headers = curl_slist_append(headers, buf); free(buf); } va_end(args); return headers; } static CURL * http_io_acquire_curl(struct http_io_private *priv, struct http_io *io) { struct http_io_conf *const config = priv->config; struct curl_holder *holder; CURL *curl; pthread_mutex_lock(&priv->mutex); if ((holder = LIST_FIRST(&priv->curls)) != NULL) { curl = holder->curl; LIST_REMOVE(holder, link); priv->stats.curl_handles_reused++; pthread_mutex_unlock(&priv->mutex); free(holder); curl_easy_reset(curl); } else { priv->stats.curl_handles_created++; // optimistic pthread_mutex_unlock(&priv->mutex); if ((curl = curl_easy_init()) == NULL) { pthread_mutex_lock(&priv->mutex); priv->stats.curl_handles_created--; // undo optimistic priv->stats.curl_other_error++; pthread_mutex_unlock(&priv->mutex); (*config->log)(LOG_ERR, "curl_easy_init() failed"); return NULL; } } curl_easy_setopt(curl, CURLOPT_URL, io->url); curl_easy_setopt(curl, CURLOPT_FAILONERROR, 1); curl_easy_setopt(curl, CURLOPT_FOLLOWLOCATION, 1); curl_easy_setopt(curl, CURLOPT_NOSIGNAL, (long)1); curl_easy_setopt(curl, CURLOPT_TCP_KEEPALIVE, (long)1); curl_easy_setopt(curl, CURLOPT_TCP_KEEPIDLE, (long)TCP_KEEP_ALIVE_IDLE); curl_easy_setopt(curl, CURLOPT_TCP_KEEPINTVL, (long)TCP_KEEP_ALIVE_INTERVAL); curl_easy_setopt(curl, CURLOPT_TIMEOUT, (long)config->timeout); curl_easy_setopt(curl, CURLOPT_NOPROGRESS, 1); curl_easy_setopt(curl, CURLOPT_USERAGENT, config->user_agent); if (config->max_speed[HTTP_UPLOAD] != 0) curl_easy_setopt(curl, CURLOPT_MAX_SEND_SPEED_LARGE, (curl_off_t)(config->max_speed[HTTP_UPLOAD] / 8)); if (config->max_speed[HTTP_DOWNLOAD] != 0) curl_easy_setopt(curl, CURLOPT_MAX_RECV_SPEED_LARGE, (curl_off_t)(config->max_speed[HTTP_DOWNLOAD] / 8)); if (strncmp(io->url, "https", 5) == 0) { if (config->insecure) curl_easy_setopt(curl, CURLOPT_SSL_VERIFYPEER, (long)0); if (config->cacert != NULL) curl_easy_setopt(curl, CURLOPT_CAINFO, config->cacert); } if (config->debug_http) curl_easy_setopt(curl, CURLOPT_VERBOSE, 1); return curl; } static size_t http_io_curl_reader(const void *ptr, size_t size, size_t nmemb, void *stream) { struct http_io *const io = (struct http_io *)stream; struct http_io_bufs *const bufs = &io->bufs; size_t total = size * nmemb; if (total > bufs->rdremain) /* should never happen */ total = bufs->rdremain; memcpy(bufs->rddata, ptr, total); bufs->rddata += total; bufs->rdremain -= total; return total; } static size_t http_io_curl_writer(void *ptr, size_t size, size_t nmemb, void *stream) { struct http_io *const io = (struct http_io *)stream; struct http_io_bufs *const bufs = &io->bufs; size_t total = size * nmemb; /* Check for canceled write */ if (io->check_cancel != NULL && (*io->check_cancel)(io->check_cancel_arg, io->block_num) != 0) return CURL_READFUNC_ABORT; /* Copy out data */ if (total > bufs->wrremain) /* should never happen */ total = bufs->wrremain; memcpy(ptr, bufs->wrdata, total); bufs->wrdata += total; bufs->wrremain -= total; return total; } static size_t http_io_curl_header(void *ptr, size_t size, size_t nmemb, void *stream) { struct http_io *const io = (struct http_io *)stream; const size_t total = size * nmemb; char hashbuf[64]; char buf[1024]; u_int mtoken; /* Null-terminate header */ if (total > sizeof(buf) - 1) return total; memcpy(buf, ptr, total); buf[total] = '\0'; /* Check for interesting headers */ (void)http_io_parse_header(buf, FILE_SIZE_HEADER, "%ju", &io->file_size); (void)http_io_parse_header(buf, BLOCK_SIZE_HEADER, "%u", &io->block_size); if (http_io_parse_header(buf, MOUNT_TOKEN_HEADER, "%x", &mtoken) == 1) io->mount_token = (int32_t)mtoken; /* ETag header */ #if MD5_DIGEST_LENGTH != 16 #error unexpected MD5_DIGEST_LENGTH #endif if (http_io_parse_header(buf, ETAG_HEADER, "\"%32c\"", hashbuf) == 1) http_io_parse_hex(hashbuf, io->md5, MD5_DIGEST_LENGTH); /* "x-amz-meta-s3backer-hmac" header */ #if SHA_DIGEST_LENGTH != 20 #error unexpected MD5_DIGEST_LENGTH #endif if (http_io_parse_header(buf, HMAC_HEADER, "\"%40c\"", hashbuf) == 1) http_io_parse_hex(hashbuf, io->hmac, SHA_DIGEST_LENGTH); /* Content encoding(s) */ if (strncasecmp(buf, CONTENT_ENCODING_HEADER ":", sizeof(CONTENT_ENCODING_HEADER)) == 0) { size_t celen; char *state; char *s; *io->content_encoding = '\0'; for (s = strtok_r(buf + sizeof(CONTENT_ENCODING_HEADER), WHITESPACE ",", &state); s != NULL; s = strtok_r(NULL, WHITESPACE ",", &state)) { celen = strlen(io->content_encoding); snprintf(io->content_encoding + celen, sizeof(io->content_encoding) - celen, "%s%s", celen > 0 ? "," : "", s); } } /* Done */ return total; } static void http_io_release_curl(struct http_io_private *priv, CURL **curlp, int may_cache) { struct curl_holder *holder; CURL *const curl = *curlp; *curlp = NULL; assert(curl != NULL); if (!may_cache) { curl_easy_cleanup(curl); return; } if ((holder = calloc(1, sizeof(*holder))) == NULL) { curl_easy_cleanup(curl); pthread_mutex_lock(&priv->mutex); priv->stats.out_of_memory_errors++; pthread_mutex_unlock(&priv->mutex); return; } holder->curl = curl; pthread_mutex_lock(&priv->mutex); LIST_INSERT_HEAD(&priv->curls, holder, link); pthread_mutex_unlock(&priv->mutex); } static void http_io_openssl_locker(int mode, int i, const char *file, int line) { if ((mode & CRYPTO_LOCK) != 0) pthread_mutex_lock(&openssl_locks[i]); else pthread_mutex_unlock(&openssl_locks[i]); } static u_long http_io_openssl_ider(void) { return (u_long)pthread_self(); } static void http_io_base64_encode(char *buf, size_t bufsiz, const void *data, size_t len) { BUF_MEM *bptr; BIO* bmem; BIO* b64; b64 = BIO_new(BIO_f_base64()); bmem = BIO_new(BIO_s_mem()); b64 = BIO_push(b64, bmem); BIO_write(b64, data, len); (void)BIO_flush(b64); BIO_get_mem_ptr(b64, &bptr); snprintf(buf, bufsiz, "%.*s", (int)bptr->length - 1, (char *)bptr->data); BIO_free_all(b64); } static int http_io_is_zero_block(const void *data, u_int block_size) { static const u_long zero; const u_int *ptr; int i; if (block_size <= sizeof(zero)) return memcmp(data, &zero, block_size) == 0; ptr = (const u_int *)data; for (i = 0; i < block_size / sizeof(*ptr); i++) { if (*ptr++ != 0) return 0; } return 1; } /* * Encrypt or decrypt one block */ static u_int http_io_crypt(struct http_io_private *priv, s3b_block_t block_num, int enc, const u_char *src, u_int len, u_char *dest) { u_char ivec[EVP_MAX_IV_LENGTH]; EVP_CIPHER_CTX* ctx; u_int total_len; char blockbuf[EVP_MAX_IV_LENGTH]; int clen; int r; #ifdef NDEBUG /* Avoid unused variable warning */ (void)r; #endif /* Sanity check */ assert(EVP_MAX_IV_LENGTH >= MD5_DIGEST_LENGTH); /* Initialize cipher context */ ctx = EVP_CIPHER_CTX_new(); EVP_CIPHER_CTX_init(ctx); /* Generate initialization vector by encrypting the block number using previously generated IV */ memset(blockbuf, 0, sizeof(blockbuf)); snprintf(blockbuf, sizeof(blockbuf), "%0*jx", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); /* Initialize cipher for IV generation */ r = EVP_EncryptInit_ex(ctx, priv->cipher, NULL, priv->ivkey, priv->ivkey); assert(r == 1); EVP_CIPHER_CTX_set_padding(ctx, 0); /* Encrypt block number to get IV for bulk encryption */ r = EVP_EncryptUpdate(ctx, ivec, &clen, (const u_char *)blockbuf, EVP_CIPHER_CTX_block_size(ctx)); assert(r == 1 && clen == EVP_CIPHER_CTX_block_size(ctx)); r = EVP_EncryptFinal_ex(ctx, NULL, &clen); assert(r == 1 && clen == 0); /* Re-initialize cipher for bulk data encryption */ assert(EVP_CIPHER_CTX_block_size(ctx) == EVP_CIPHER_CTX_iv_length(ctx)); r = EVP_CipherInit_ex(ctx, priv->cipher, NULL, priv->key, ivec, enc); assert(r == 1); EVP_CIPHER_CTX_set_padding(ctx, 1); /* Encrypt/decrypt */ r = EVP_CipherUpdate(ctx, dest, &clen, src, (int)len); assert(r == 1 && clen >= 0); total_len = (u_int)clen; r = EVP_CipherFinal_ex(ctx, dest + total_len, &clen); assert(r == 1 && clen >= 0); total_len += (u_int)clen; /* Encryption debug */ #if DEBUG_ENCRYPTION { struct http_io_conf *const config = priv->config; char ivecbuf[sizeof(ivec) * 2 + 1]; http_io_prhex(ivecbuf, ivec, sizeof(ivec)); (*config->log)(LOG_DEBUG, "%sCRYPT: block=%s ivec=0x%s len: %d -> %d", (enc ? "EN" : "DE"), blockbuf, ivecbuf, len, total_len); } #endif /* Done */ EVP_CIPHER_CTX_free(ctx); return total_len; } static void http_io_authsig(struct http_io_private *priv, s3b_block_t block_num, const u_char *src, u_int len, u_char *hmac) { const char *const ciphername = EVP_CIPHER_name(priv->cipher); char blockbuf[64]; u_int hmac_len; HMAC_CTX* ctx; /* Sign the block number, the name of the encryption algorithm, and the block data */ snprintf(blockbuf, sizeof(blockbuf), "%0*jx", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); ctx = HMAC_CTX_new(); assert(NULL != ctx); HMAC_Init_ex(ctx, (const u_char *)priv->key, priv->keylen, EVP_sha1(), NULL); HMAC_Update(ctx, (const u_char *)blockbuf, strlen(blockbuf)); HMAC_Update(ctx, (const u_char *)ciphername, strlen(ciphername)); HMAC_Update(ctx, (const u_char *)src, len); HMAC_Final(ctx, (u_char *)hmac, &hmac_len); assert(hmac_len == SHA_DIGEST_LENGTH); HMAC_CTX_free(ctx); } static void update_hmac_from_header(HMAC_CTX *const ctx, struct http_io *const io, const char *name, int value_only, char *sigbuf, size_t sigbuflen) { const struct curl_slist *header; const char *colon; const char *value; size_t name_len; /* Find and add header */ name_len = (colon = strchr(name, ':')) != NULL ? colon - name : strlen(name); for (header = io->headers; header != NULL; header = header->next) { if (strncasecmp(header->data, name, name_len) == 0 && header->data[name_len] == ':') { if (!value_only) { HMAC_Update(ctx, (const u_char *)header->data, name_len + 1); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sigbuflen - strlen(sigbuf), "%.*s", (int)name_len + 1, header->data); #endif } for (value = header->data + name_len + 1; isspace(*value); value++) ; HMAC_Update(ctx, (const u_char *)value, strlen(value)); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sigbuflen - strlen(sigbuf), "%s", value); #endif break; } } /* Add newline whether or not header was found */ HMAC_Update(ctx, (const u_char *)"\n", 1); #if DEBUG_AUTHENTICATION snprintf(sigbuf + strlen(sigbuf), sigbuflen - strlen(sigbuf), "\n"); #endif } /* * Parse exactly "nbytes" contiguous 2-digit hex bytes. * On failure, zero out the buffer and return -1. */ static int http_io_parse_hex(const char *str, u_char *buf, u_int nbytes) { int i; /* Parse hex string */ for (i = 0; i < nbytes; i++) { int byte; int j; for (byte = j = 0; j < 2; j++) { const char ch = str[2 * i + j]; if (!isxdigit(ch)) { memset(buf, 0, nbytes); return -1; } byte <<= 4; byte |= ch <= '9' ? ch - '0' : tolower(ch) - 'a' + 10; } buf[i] = byte; } /* Done */ return 0; } static void http_io_prhex(char *buf, const u_char *data, size_t len) { static const char *hexdig = "0123456789abcdef"; int i; for (i = 0; i < len; i++) { buf[i * 2 + 0] = hexdig[data[i] >> 4]; buf[i * 2 + 1] = hexdig[data[i] & 0x0f]; } buf[i * 2] = '\0'; } static int http_io_strcasecmp_ptr(const void *const ptr1, const void *const ptr2) { const char *const str1 = *(const char *const *)ptr1; const char *const str2 = *(const char *const *)ptr2; return strcasecmp(str1, str2); } static int http_io_parse_header(const char *const input, const char *const header, const char *const fmt, ...) { va_list args; size_t offset; int rtn; /* Initialize */ va_start(args, fmt); /* Match header (case-insensitively) followed by ':' and optional whitespace */ offset = strlen(header); if (strncasecmp(input, header, offset) != 0 || input[offset++] != ':') return -1; while (isspace(input[offset])) offset++; /* Parse header value */ rtn = vsscanf(input + offset, fmt, args); /* Done */ va_end(args); return rtn; } s3backer-1.5.4/http_io.h000066400000000000000000000133411354714241400150440ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ /* Upload/download indexes */ #define HTTP_DOWNLOAD 0 #define HTTP_UPLOAD 1 /* Authentication types */ #define AUTH_VERSION_AWS2 "aws2" #define AUTH_VERSION_AWS4 "aws4" /* Storage classes */ #define STORAGE_CLASS_STANDARD "STANDARD" #define STORAGE_CLASS_STANDARD_IA "STANDARD_IA" #define STORAGE_CLASS_REDUCED_REDUNDANCY "REDUCED_REDUNDANCY" /* Server side encryption types */ #define REQUIRED_SSE_VALUE "AES256" /* Configuration info structure for http_io store */ struct http_io_conf { char *accessId; char *accessKey; char *iam_token; const char *accessType; const char *ec2iam_role; const char *storage_class; const char *authVersion; const char *baseURL; const char *region; const char *bucket; const char *prefix; const char *user_agent; const char *cacert; const char *password; const char *encryption; const char *default_ce; u_int key_length; int debug; int debug_http; int quiet; int rrs; // reduced redundancy storage (backward compat.) int compress; // zlib compression level int vhost; // use virtual host style URL u_int *nonzero_bitmap; // is set to NULL by http_io_create() int blockHashPrefix; int insecure; u_int block_size; off_t num_blocks; u_int timeout; u_int initial_retry_pause; u_int max_retry_pause; uintmax_t max_speed[2]; log_func_t *log; const char *sse; }; /* Statistics structure for http_io store */ struct http_io_evst { u_int count; // number of occurrences double time; // total time taken }; struct http_io_stats { /* Block stats */ u_int normal_blocks_read; u_int normal_blocks_written; u_int zero_blocks_read; u_int zero_blocks_written; u_int empty_blocks_read; // only when nonzero_bitmap != NULL u_int empty_blocks_written; // only when nonzero_bitmap != NULL /* HTTP transfer stats */ struct http_io_evst http_heads; // total successful struct http_io_evst http_gets; // total successful struct http_io_evst http_puts; // total successful struct http_io_evst http_deletes; // total successful u_int http_unauthorized; u_int http_forbidden; u_int http_stale; u_int http_verified; u_int http_mismatch; u_int http_5xx_error; u_int http_4xx_error; u_int http_other_error; u_int http_canceled_writes; /* CURL stats */ u_int curl_handles_created; u_int curl_handles_reused; u_int curl_timeouts; u_int curl_connect_failed; u_int curl_host_unknown; u_int curl_out_of_memory; u_int curl_other_error; /* Retry stats */ u_int num_retries; uint64_t retry_delay; /* Misc */ u_int out_of_memory_errors; }; /* http_io.c */ extern struct s3backer_store *http_io_create(struct http_io_conf *config); extern void http_io_get_stats(struct s3backer_store *s3b, struct http_io_stats *stats); extern void http_io_clear_stats(struct s3backer_store *s3b); extern int http_io_parse_block(struct http_io_conf *config, const char *name, s3b_block_t *block_num); extern void http_io_format_block_hash(const struct http_io_conf *config, char *block_hash_buf, size_t bufsiz, s3b_block_t block_num); s3backer-1.5.4/main.c000066400000000000000000000055161354714241400143220ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "s3backer.h" #include "block_cache.h" #include "ec_protect.h" #include "fuse_ops.h" #include "http_io.h" #include "s3b_config.h" #include "erase.h" #include "reset.h" int main(int argc, char **argv) { const struct fuse_operations *fuse_ops; struct s3backer_store *s3b; struct s3b_config *config; /* Get configuration */ if ((config = s3backer_get_config(argc, argv)) == NULL) return 1; /* Handle `--erase' flag */ if (config->erase) { if (s3backer_erase(config) != 0) return 1; return 0; } /* Handle `--reset' flag */ if (config->reset) { if (s3backer_reset(config) != 0) return 1; return 0; } /* Create backing store */ if ((s3b = s3backer_create_store(config)) == NULL) { (*config->log)(LOG_ERR, "error creating s3backer_store: %s", strerror(errno)); return 1; } /* Setup FUSE operation hooks */ if ((fuse_ops = fuse_ops_create(&config->fuse_ops, s3b)) == NULL) { (*s3b->destroy)(s3b); return 1; } /* Start */ (*config->log)(LOG_INFO, "s3backer process %lu for %s started", (u_long)getpid(), config->mount); return fuse_main(config->fuse_args.argc, config->fuse_args.argv, fuse_ops, NULL); } s3backer-1.5.4/reset.c000066400000000000000000000070721354714241400145170ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "s3backer.h" #include "block_cache.h" #include "ec_protect.h" #include "fuse_ops.h" #include "http_io.h" #include "test_io.h" #include "s3b_config.h" #include "reset.h" #include "dcache.h" int s3backer_reset(struct s3b_config *config) { struct s3backer_store *s3b = NULL; struct s3b_dcache *dcache = NULL; struct stat cache_file_stat; int ok = 0; int r; /* Logging */ if (!config->quiet) warnx("resetting mount token for %s", config->description); /* Create temporary lower layer */ if ((s3b = config->test ? test_io_create(&config->http_io) : http_io_create(&config->http_io)) == NULL) { warnx(config->test ? "test_io_create" : "http_io_create"); goto fail; } /* Clear mount token */ if ((r = (*s3b->set_mount_token)(s3b, NULL, 0)) != 0) { warnx("error clearing s3 mount token: %s", strerror(r)); goto fail; } /* Open disk cache file, if any, and clear the mount token there too */ if (config->block_cache.cache_file != NULL) { if (stat(config->block_cache.cache_file, &cache_file_stat) == -1) { if (errno != ENOENT) { warnx("error opening cache file `%s'", config->block_cache.cache_file); goto fail; } } else { if ((r = s3b_dcache_open(&dcache, config->log, config->block_cache.cache_file, config->block_cache.block_size, config->block_cache.cache_size, NULL, NULL, 0)) != 0) warnx("error opening cache file `%s': %s", config->block_cache.cache_file, strerror(r)); if ((r = s3b_dcache_set_mount_token(dcache, NULL, 0)) != 0) warnx("error reading mount token from `%s': %s", config->block_cache.cache_file, strerror(r)); } } /* Success */ if (!config->quiet) warnx("done"); ok = 1; fail: /* Clean up */ if (dcache != NULL) s3b_dcache_close(dcache); if (s3b != NULL) (*s3b->destroy)(s3b); return ok ? 0 : -1; } s3backer-1.5.4/reset.h000066400000000000000000000032141354714241400145160ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ /* reset.c */ extern int s3backer_reset(struct s3b_config *config); s3backer-1.5.4/s3b_config.c000066400000000000000000002316251354714241400154140ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "s3backer.h" #include "block_cache.h" #include "ec_protect.h" #include "fuse_ops.h" #include "http_io.h" #include "test_io.h" #include "s3b_config.h" #include "dcache.h" /**************************************************************************** * DEFINITIONS * ****************************************************************************/ /* S3 URL */ #define S3_DOMAIN "amazonaws.com" /* S3 access permission strings */ #define S3_ACCESS_PRIVATE "private" #define S3_ACCESS_PUBLIC_READ "public-read" #define S3_ACCESS_PUBLIC_READ_WRITE "public-read-write" #define S3_ACCESS_AUTHENTICATED_READ "authenticated-read" /* Default values for some configuration parameters */ #define S3BACKER_DEFAULT_ACCESS_TYPE S3_ACCESS_PRIVATE #define S3BACKER_DEFAULT_AUTH_VERSION AUTH_VERSION_AWS4 #define S3BACKER_DEFAULT_REGION "us-east-1" #define S3BACKER_DEFAULT_PWD_FILE ".s3backer_passwd" #define S3BACKER_DEFAULT_PREFIX "" #define S3BACKER_DEFAULT_FILENAME "file" #define S3BACKER_DEFAULT_STATS_FILENAME "stats" #define S3BACKER_DEFAULT_BLOCKSIZE 4096 #define S3BACKER_DEFAULT_TIMEOUT 30 // 30s #define S3BACKER_DEFAULT_FILE_MODE 0600 #define S3BACKER_DEFAULT_FILE_MODE_READ_ONLY 0400 #define S3BACKER_DEFAULT_INITIAL_RETRY_PAUSE 200 // 200ms #define S3BACKER_DEFAULT_MAX_RETRY_PAUSE 30000 // 30s #define S3BACKER_DEFAULT_MIN_WRITE_DELAY 500 // 500ms #define S3BACKER_DEFAULT_MD5_CACHE_TIME 10000 // 10s #define S3BACKER_DEFAULT_MD5_CACHE_SIZE 10000 #define S3BACKER_DEFAULT_BLOCK_CACHE_SIZE 1000 #define S3BACKER_DEFAULT_BLOCK_CACHE_NUM_THREADS 20 #define S3BACKER_DEFAULT_BLOCK_CACHE_WRITE_DELAY 250 // 250ms #define S3BACKER_DEFAULT_BLOCK_CACHE_TIMEOUT 0 #define S3BACKER_DEFAULT_BLOCK_CACHE_MAX_DIRTY 0 #define S3BACKER_DEFAULT_READ_AHEAD 4 #define S3BACKER_DEFAULT_READ_AHEAD_TRIGGER 2 #define S3BACKER_DEFAULT_COMPRESSION Z_NO_COMPRESSION #define S3BACKER_DEFAULT_ENCRYPTION "AES-128-CBC" /* MacFUSE setting for kernel daemon timeout */ #ifdef __APPLE__ #ifndef FUSE_MAX_DAEMON_TIMEOUT #define FUSE_MAX_DAEMON_TIMEOUT 600 #endif #define s3bquote0(x) #x #define s3bquote(x) s3bquote0(x) #define FUSE_MAX_DAEMON_TIMEOUT_STRING s3bquote(FUSE_MAX_DAEMON_TIMEOUT) #endif /* __APPLE__ */ /* Block counting info */ struct list_blocks { u_int *bitmap; int print_dots; uintmax_t count; }; #define BLOCKS_PER_DOT 0x100 /**************************************************************************** * FUNCTION DECLARATIONS * ****************************************************************************/ static print_stats_t s3b_config_print_stats; static clear_stats_t s3b_config_clear_stats; static int parse_size_string(const char *s, uintmax_t *valp); static void unparse_size_string(char *buf, size_t bmax, uintmax_t value); static int search_access_for(const char *file, const char *accessId, char **idptr, char **pwptr); static int handle_unknown_option(void *data, const char *arg, int key, struct fuse_args *outargs); static void syslog_logger(int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 2, 3))); static void stderr_logger(int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 2, 3))); static int validate_config(void); static void list_blocks_callback(void *arg, s3b_block_t block_num); static void dump_config(void); static void usage(void); /**************************************************************************** * VARIABLE DEFINITIONS * ****************************************************************************/ /* Upload/download strings */ static const char *const upload_download_names[] = { "download", "upload" }; /* Valid S3 access values */ static const char *const s3_acls[] = { S3_ACCESS_PRIVATE, S3_ACCESS_PUBLIC_READ, S3_ACCESS_PUBLIC_READ_WRITE, S3_ACCESS_AUTHENTICATED_READ }; /* Valid S3 authentication types */ static const char *const s3_auth_types[] = { AUTH_VERSION_AWS2, AUTH_VERSION_AWS4, }; /* Configuration structure */ static char user_agent_buf[64]; static struct s3b_config config = { /* HTTP config */ .http_io= { .accessId= NULL, .accessKey= NULL, .baseURL= NULL, .region= NULL, .bucket= NULL, .sse= NULL, .blockHashPrefix= 0, .prefix= S3BACKER_DEFAULT_PREFIX, .accessType= S3BACKER_DEFAULT_ACCESS_TYPE, .authVersion= S3BACKER_DEFAULT_AUTH_VERSION, .user_agent= user_agent_buf, .compress= S3BACKER_DEFAULT_COMPRESSION, .timeout= S3BACKER_DEFAULT_TIMEOUT, .initial_retry_pause= S3BACKER_DEFAULT_INITIAL_RETRY_PAUSE, .max_retry_pause= S3BACKER_DEFAULT_MAX_RETRY_PAUSE, }, /* "Eventual consistency" protection config */ .ec_protect= { .min_write_delay= S3BACKER_DEFAULT_MIN_WRITE_DELAY, .cache_time= S3BACKER_DEFAULT_MD5_CACHE_TIME, .cache_size= S3BACKER_DEFAULT_MD5_CACHE_SIZE, }, /* Block cache config */ .block_cache= { .cache_size= S3BACKER_DEFAULT_BLOCK_CACHE_SIZE, .num_threads= S3BACKER_DEFAULT_BLOCK_CACHE_NUM_THREADS, .write_delay= S3BACKER_DEFAULT_BLOCK_CACHE_WRITE_DELAY, .max_dirty= S3BACKER_DEFAULT_BLOCK_CACHE_MAX_DIRTY, .timeout= S3BACKER_DEFAULT_BLOCK_CACHE_TIMEOUT, .read_ahead= S3BACKER_DEFAULT_READ_AHEAD, .read_ahead_trigger= S3BACKER_DEFAULT_READ_AHEAD_TRIGGER, }, /* FUSE operations config */ .fuse_ops= { .filename= S3BACKER_DEFAULT_FILENAME, .stats_filename= S3BACKER_DEFAULT_STATS_FILENAME, .file_mode= -1, /* default depends on 'read_only' */ }, /* Common stuff */ .block_size= 0, .file_size= 0, .quiet= 0, .erase= 0, .no_auto_detect= 0, .reset= 0, .log= syslog_logger }; /* * Command line flags * * Note: each entry here is listed twice, so both version "--foo=X" and "-o foo=X" work. * See http://code.google.com/p/s3backer/issues/detail?id=7 */ static const struct fuse_opt option_list[] = { { .templ= "--accessFile=%s", .offset= offsetof(struct s3b_config, accessFile), }, { .templ= "--accessId=%s", .offset= offsetof(struct s3b_config, http_io.accessId), }, { .templ= "--accessKey=%s", .offset= offsetof(struct s3b_config, http_io.accessKey), }, { .templ= "--accessType=%s", .offset= offsetof(struct s3b_config, http_io.accessType), }, { .templ= "--accessEC2IAM=%s", .offset= offsetof(struct s3b_config, http_io.ec2iam_role), }, { .templ= "--authVersion=%s", .offset= offsetof(struct s3b_config, http_io.authVersion), }, { .templ= "--listBlocks", .offset= offsetof(struct s3b_config, list_blocks), .value= 1 }, { .templ= "--baseURL=%s", .offset= offsetof(struct s3b_config, http_io.baseURL), }, { .templ= "--region=%s", .offset= offsetof(struct s3b_config, http_io.region), }, { .templ= "--sse=%s", .offset= offsetof(struct s3b_config, http_io.sse), }, { .templ= "--blockCacheSize=%u", .offset= offsetof(struct s3b_config, block_cache.cache_size), }, { .templ= "--blockCacheSync", .offset= offsetof(struct s3b_config, block_cache.synchronous), .value= 1 }, { .templ= "--blockCacheThreads=%u", .offset= offsetof(struct s3b_config, block_cache.num_threads), }, { .templ= "--blockCacheTimeout=%u", .offset= offsetof(struct s3b_config, block_cache.timeout), }, { .templ= "--blockCacheWriteDelay=%u", .offset= offsetof(struct s3b_config, block_cache.write_delay), }, { .templ= "--blockCacheMaxDirty=%u", .offset= offsetof(struct s3b_config, block_cache.max_dirty), }, { .templ= "--blockCacheRecoverDirtyBlocks", .offset= offsetof(struct s3b_config, block_cache.recover_dirty_blocks), .value= 1 }, { .templ= "--readAhead=%u", .offset= offsetof(struct s3b_config, block_cache.read_ahead), }, { .templ= "--readAheadTrigger=%u", .offset= offsetof(struct s3b_config, block_cache.read_ahead_trigger), }, { .templ= "--blockCacheFile=%s", .offset= offsetof(struct s3b_config, block_cache.cache_file), }, { .templ= "--blockCacheNoVerify", .offset= offsetof(struct s3b_config, block_cache.no_verify), .value= 1 }, { .templ= "--blockSize=%s", .offset= offsetof(struct s3b_config, block_size_str), }, { .templ= "--maxUploadSpeed=%s", .offset= offsetof(struct s3b_config, max_speed_str[HTTP_UPLOAD]), }, { .templ= "--maxDownloadSpeed=%s", .offset= offsetof(struct s3b_config, max_speed_str[HTTP_DOWNLOAD]), }, { .templ= "--md5CacheSize=%u", .offset= offsetof(struct s3b_config, ec_protect.cache_size), }, { .templ= "--md5CacheTime=%u", .offset= offsetof(struct s3b_config, ec_protect.cache_time), }, { .templ= "--debug", .offset= offsetof(struct s3b_config, debug), .value= 1 }, { .templ= "--debug-http", .offset= offsetof(struct s3b_config, http_io.debug_http), .value= 1 }, { .templ= "--quiet", .offset= offsetof(struct s3b_config, quiet), .value= 1 }, { .templ= "--erase", .offset= offsetof(struct s3b_config, erase), .value= 1 }, { .templ= "--reset-mounted-flag", .offset= offsetof(struct s3b_config, reset), .value= 1 }, { .templ= "--vhost", .offset= offsetof(struct s3b_config, http_io.vhost), .value= 1 }, { .templ= "--fileMode=%o", .offset= offsetof(struct s3b_config, fuse_ops.file_mode), }, { .templ= "--filename=%s", .offset= offsetof(struct s3b_config, fuse_ops.filename), }, { .templ= "--force", .offset= offsetof(struct s3b_config, force), .value= 1 }, { .templ= "--noAutoDetect", .offset= offsetof(struct s3b_config, no_auto_detect), .value= 1 }, { .templ= "--initialRetryPause=%u", .offset= offsetof(struct s3b_config, http_io.initial_retry_pause), }, { .templ= "--maxRetryPause=%u", .offset= offsetof(struct s3b_config, http_io.max_retry_pause), }, { .templ= "--minWriteDelay=%u", .offset= offsetof(struct s3b_config, ec_protect.min_write_delay), }, { .templ= "--blockHashPrefix", .offset= offsetof(struct s3b_config, http_io.blockHashPrefix), .value= 1 }, { .templ= "--prefix=%s", .offset= offsetof(struct s3b_config, http_io.prefix), }, { .templ= "--defaultContentEncoding=%s", .offset= offsetof(struct s3b_config, http_io.default_ce), }, { .templ= "--readOnly", .offset= offsetof(struct s3b_config, fuse_ops.read_only), .value= 1 }, { .templ= "--size=%s", .offset= offsetof(struct s3b_config, file_size_str), }, { .templ= "--statsFilename=%s", .offset= offsetof(struct s3b_config, fuse_ops.stats_filename), }, { .templ= "--rrs", .offset= offsetof(struct s3b_config, http_io.rrs), .value= 1 }, { .templ= "--storageClass=%s", .offset= offsetof(struct s3b_config, http_io.storage_class), }, { .templ= "--ssl", .offset= offsetof(struct s3b_config, ssl), .value= 1 }, { .templ= "--cacert=%s", .offset= offsetof(struct s3b_config, http_io.cacert), }, { .templ= "--insecure", .offset= offsetof(struct s3b_config, http_io.insecure), .value= 1 }, { .templ= "--compress", .offset= offsetof(struct s3b_config, http_io.compress), .value= Z_DEFAULT_COMPRESSION }, { .templ= "--compress=%d", .offset= offsetof(struct s3b_config, http_io.compress), }, { .templ= "--encrypt", .offset= offsetof(struct s3b_config, encrypt), .value= 1 }, { .templ= "--encrypt=%s", .offset= offsetof(struct s3b_config, http_io.encryption), }, { .templ= "--keyLength=%u", .offset= offsetof(struct s3b_config, http_io.key_length), }, { .templ= "--password=%s", .offset= offsetof(struct s3b_config, http_io.password), }, { .templ= "--passwordFile=%s", .offset= offsetof(struct s3b_config, password_file), }, { .templ= "--test", .offset= offsetof(struct s3b_config, test), .value= 1 }, { .templ= "--timeout=%u", .offset= offsetof(struct s3b_config, http_io.timeout), }, { .templ= "--directIO", .offset= offsetof(struct s3b_config, fuse_ops.direct_io), .value= 1 }, }; /* Default flags we send to FUSE */ static const char *const s3backer_fuse_defaults[] = { "-okernel_cache", "-oallow_other", "-ouse_ino", "-omax_readahead=0", "-osubtype=s3backer", "-oentry_timeout=31536000", "-onegative_timeout=31536000", "-oattr_timeout=0", // because statistics file length changes "-odefault_permissions", #ifndef __FreeBSD__ "-onodev", #endif "-onosuid", #ifdef __APPLE__ "-odaemon_timeout=" FUSE_MAX_DAEMON_TIMEOUT_STRING, #endif /* "-ointr", */ }; /* Size suffixes */ struct size_suffix { const char *suffix; int bits; }; static const struct size_suffix size_suffixes[] = { { .suffix= "k", .bits= 10 }, { .suffix= "m", .bits= 20 }, { .suffix= "g", .bits= 30 }, { .suffix= "t", .bits= 40 }, { .suffix= "p", .bits= 50 }, { .suffix= "e", .bits= 60 }, { .suffix= "z", .bits= 70 }, { .suffix= "y", .bits= 80 }, }; /* s3backer_store layers */ struct s3backer_store *block_cache_store; struct s3backer_store *ec_protect_store; struct s3backer_store *http_io_store; struct s3backer_store *test_io_store; /**************************************************************************** * PUBLIC FUNCTION DEFINITIONS * ****************************************************************************/ struct s3b_config * s3backer_get_config(int argc, char **argv) { const int num_options = sizeof(option_list) / sizeof(*option_list); struct fuse_opt dup_option_list[2 * sizeof(option_list) + 1]; char buf[1024]; int i; /* Remember user creds */ config.fuse_ops.uid = getuid(); config.fuse_ops.gid = getgid(); /* Set user-agent */ snprintf(user_agent_buf, sizeof(user_agent_buf), "%s/%s/%s", PACKAGE, VERSION, s3backer_version); /* Copy passed args */ memset(&config.fuse_args, 0, sizeof(config.fuse_args)); for (i = 0; i < argc; i++) { if (fuse_opt_insert_arg(&config.fuse_args, i, argv[i]) != 0) err(1, "fuse_opt_insert_arg"); } /* Insert our default FUSE options */ for (i = 0; i < sizeof(s3backer_fuse_defaults) / sizeof(*s3backer_fuse_defaults); i++) { if (fuse_opt_insert_arg(&config.fuse_args, i + 1, s3backer_fuse_defaults[i]) != 0) err(1, "fuse_opt_insert_arg"); } /* Create the equivalent fstab options (without the "--") for each option in the option list */ memcpy(dup_option_list, option_list, sizeof(option_list)); memcpy(dup_option_list + num_options, option_list, sizeof(option_list)); for (i = num_options; i < 2 * num_options; i++) dup_option_list[i].templ += 2; dup_option_list[2 * num_options].templ = NULL; /* Parse command line flags */ if (fuse_opt_parse(&config.fuse_args, &config, dup_option_list, handle_unknown_option) != 0) return NULL; /* Validate configuration */ if (validate_config() != 0) return NULL; /* Set fsname based on configuration */ snprintf(buf, sizeof(buf), "-ofsname=%s", config.description); if (fuse_opt_insert_arg(&config.fuse_args, 1, buf) != 0) err(1, "fuse_opt_insert_arg"); /* Set up fuse_ops callbacks */ config.fuse_ops.print_stats = s3b_config_print_stats; config.fuse_ops.clear_stats = s3b_config_clear_stats; config.fuse_ops.s3bconf = &config; /* Debug */ if (config.debug) dump_config(); /* Done */ return &config; } /* * Create the s3backer_store used at runtime. */ struct s3backer_store * s3backer_create_store(struct s3b_config *conf) { struct s3backer_store *store; int32_t old_mount_token; int32_t new_mount_token; int r; /* Sanity check */ if (http_io_store != NULL || test_io_store != NULL) { errno = EINVAL; return NULL; } /* Create HTTP (or test) layer */ if (conf->test) { if ((test_io_store = test_io_create(&conf->http_io)) == NULL) return NULL; store = test_io_store; } else { if ((http_io_store = http_io_create(&conf->http_io)) == NULL) return NULL; store = http_io_store; } /* Create eventual consistency protection layer (if desired) */ if (conf->ec_protect.cache_size > 0) { if ((ec_protect_store = ec_protect_create(&conf->ec_protect, store)) == NULL) goto fail_with_errno; store = ec_protect_store; } /* Create block cache layer (if desired) */ if (conf->block_cache.cache_size > 0) { if ((block_cache_store = block_cache_create(&conf->block_cache, store)) == NULL) goto fail_with_errno; store = block_cache_store; } /* Set mount token and check previous value one last time */ new_mount_token = -1; if (!conf->fuse_ops.read_only) { srandom((long)time(NULL) ^ (long)&old_mount_token); do new_mount_token = random(); while (new_mount_token <= 0); } if ((r = (*store->set_mount_token)(store, &old_mount_token, new_mount_token)) != 0) { (*conf->log)(LOG_ERR, "error reading mount token on %s: %s", conf->description, strerror(r)); goto fail; } if (old_mount_token != 0) { if (!conf->force && !conf->block_cache.perform_flush) { (*conf->log)(LOG_ERR, "%s appears to be mounted by another s3backer process (using mount token 0x%08x)", config.description, (int)old_mount_token); r = EBUSY; goto fail; } } if (new_mount_token != -1) (*conf->log)(LOG_INFO, "established new mount token 0x%08x", (int)new_mount_token); /* Done */ return store; fail_with_errno: r = errno; fail: if (store != NULL) (*store->destroy)(store); block_cache_store = NULL; ec_protect_store = NULL; http_io_store = NULL; test_io_store = NULL; errno = r; return NULL; } /**************************************************************************** * INTERNAL FUNCTION DEFINITIONS * ****************************************************************************/ static void s3b_config_print_stats(void *prarg, printer_t *printer) { struct http_io_stats http_io_stats; struct ec_protect_stats ec_protect_stats; struct block_cache_stats block_cache_stats; double curl_reuse_ratio = 0.0; u_int total_oom = 0; u_int total_curls; /* Get HTTP stats */ if (http_io_store != NULL) http_io_get_stats(http_io_store, &http_io_stats); /* Get EC protection stats */ if (ec_protect_store != NULL) ec_protect_get_stats(ec_protect_store, &ec_protect_stats); /* Get block cache stats */ if (block_cache_store != NULL) block_cache_get_stats(block_cache_store, &block_cache_stats); /* Print stats in human-readable form */ if (http_io_store != NULL) { (*printer)(prarg, "%-28s %u\n", "http_normal_blocks_read", http_io_stats.normal_blocks_read); (*printer)(prarg, "%-28s %u\n", "http_normal_blocks_written", http_io_stats.normal_blocks_written); (*printer)(prarg, "%-28s %u\n", "http_zero_blocks_read", http_io_stats.zero_blocks_read); (*printer)(prarg, "%-28s %u\n", "http_zero_blocks_written", http_io_stats.zero_blocks_written); if (config.list_blocks) { (*printer)(prarg, "%-28s %u\n", "http_empty_blocks_read", http_io_stats.empty_blocks_read); (*printer)(prarg, "%-28s %u\n", "http_empty_blocks_written", http_io_stats.empty_blocks_written); } (*printer)(prarg, "%-28s %u\n", "http_gets", http_io_stats.http_gets.count); (*printer)(prarg, "%-28s %u\n", "http_puts", http_io_stats.http_puts.count); (*printer)(prarg, "%-28s %u\n", "http_deletes", http_io_stats.http_deletes.count); (*printer)(prarg, "%-28s %.3f sec\n", "http_avg_get_time", http_io_stats.http_gets.count > 0 ? http_io_stats.http_gets.time / http_io_stats.http_gets.count : 0.0); (*printer)(prarg, "%-28s %.3f sec\n", "http_avg_put_time", http_io_stats.http_puts.count > 0 ? http_io_stats.http_puts.time / http_io_stats.http_puts.count : 0.0); (*printer)(prarg, "%-28s %.3f sec\n", "http_avg_delete_time", http_io_stats.http_deletes.count > 0 ? http_io_stats.http_deletes.time / http_io_stats.http_deletes.count : 0.0); (*printer)(prarg, "%-28s %u\n", "http_unauthorized", http_io_stats.http_unauthorized); (*printer)(prarg, "%-28s %u\n", "http_forbidden", http_io_stats.http_forbidden); (*printer)(prarg, "%-28s %u\n", "http_stale", http_io_stats.http_stale); (*printer)(prarg, "%-28s %u\n", "http_verified", http_io_stats.http_verified); (*printer)(prarg, "%-28s %u\n", "http_mismatch", http_io_stats.http_mismatch); (*printer)(prarg, "%-28s %u\n", "http_5xx_error", http_io_stats.http_5xx_error); (*printer)(prarg, "%-28s %u\n", "http_4xx_error", http_io_stats.http_4xx_error); (*printer)(prarg, "%-28s %u\n", "http_other_error", http_io_stats.http_other_error); (*printer)(prarg, "%-28s %u\n", "http_canceled_writes", http_io_stats.http_canceled_writes); (*printer)(prarg, "%-28s %u\n", "http_num_retries", http_io_stats.num_retries); (*printer)(prarg, "%-28s %ju.%03u sec\n", "http_total_retry_delay", (uintmax_t)(http_io_stats.retry_delay / 1000), (u_int)(http_io_stats.retry_delay % 1000)); total_curls = http_io_stats.curl_handles_created + http_io_stats.curl_handles_reused; if (total_curls > 0) curl_reuse_ratio = (double)http_io_stats.curl_handles_reused / (double)total_curls; (*printer)(prarg, "%-28s %.4f\n", "curl_handle_reuse_ratio", curl_reuse_ratio); (*printer)(prarg, "%-28s %u\n", "curl_timeouts", http_io_stats.curl_timeouts); (*printer)(prarg, "%-28s %u\n", "curl_connect_failed", http_io_stats.curl_connect_failed); (*printer)(prarg, "%-28s %u\n", "curl_host_unknown", http_io_stats.curl_host_unknown); (*printer)(prarg, "%-28s %u\n", "curl_out_of_memory", http_io_stats.curl_out_of_memory); (*printer)(prarg, "%-28s %u\n", "curl_other_error", http_io_stats.curl_other_error); total_oom += http_io_stats.out_of_memory_errors; } if (block_cache_store != NULL) { double read_hit_ratio = 0.0; double write_hit_ratio = 0.0; u_int total_reads; u_int total_writes; total_reads = block_cache_stats.read_hits + block_cache_stats.read_misses; if (total_reads != 0) read_hit_ratio = (double)block_cache_stats.read_hits / (double)total_reads; total_writes = block_cache_stats.write_hits + block_cache_stats.write_misses; if (total_writes != 0) write_hit_ratio = (double)block_cache_stats.write_hits / (double)total_writes; (*printer)(prarg, "%-28s %u blocks\n", "block_cache_current_size", block_cache_stats.current_size); (*printer)(prarg, "%-28s %u blocks\n", "block_cache_initial_size", block_cache_stats.initial_size); (*printer)(prarg, "%-28s %.4f\n", "block_cache_dirty_ratio", block_cache_stats.dirty_ratio); (*printer)(prarg, "%-28s %u\n", "block_cache_read_hits", block_cache_stats.read_hits); (*printer)(prarg, "%-28s %u\n", "block_cache_read_misses", block_cache_stats.read_misses); (*printer)(prarg, "%-28s %.4f\n", "block_cache_read_hit_ratio", read_hit_ratio); (*printer)(prarg, "%-28s %u\n", "block_cache_write_hits", block_cache_stats.write_hits); (*printer)(prarg, "%-28s %u\n", "block_cache_write_misses", block_cache_stats.write_misses); (*printer)(prarg, "%-28s %.4f\n", "block_cache_write_hit_ratio", write_hit_ratio); (*printer)(prarg, "%-28s %u\n", "block_cache_verified", block_cache_stats.verified); (*printer)(prarg, "%-28s %u\n", "block_cache_mismatch", block_cache_stats.mismatch); total_oom += block_cache_stats.out_of_memory_errors; } if (ec_protect_store != NULL) { (*printer)(prarg, "%-28s %u blocks\n", "md5_cache_current_size", ec_protect_stats.current_cache_size); (*printer)(prarg, "%-28s %u\n", "md5_cache_data_hits", ec_protect_stats.cache_data_hits); (*printer)(prarg, "%-28s %ju.%03u sec\n", "md5_cache_full_delays", (uintmax_t)(ec_protect_stats.cache_full_delay / 1000), (u_int)(ec_protect_stats.cache_full_delay % 1000)); (*printer)(prarg, "%-28s %ju.%03u sec\n", "md5_cache_write_delays", (uintmax_t)(ec_protect_stats.repeated_write_delay / 1000), (u_int)(ec_protect_stats.repeated_write_delay % 1000)); total_oom += ec_protect_stats.out_of_memory_errors; } (*printer)(prarg, "%-28s %u\n", "out_of_memory_errors", total_oom); } static void s3b_config_clear_stats(void) { /* Clear HTTP stats */ if (http_io_store != NULL) http_io_clear_stats(http_io_store); /* Clear EC protection stats */ if (ec_protect_store != NULL) ec_protect_clear_stats(ec_protect_store); /* Clear block cache stats */ if (block_cache_store != NULL) block_cache_clear_stats(block_cache_store); } static int parse_size_string(const char *s, uintmax_t *valp) { char suffix[3] = { '\0' }; int nconv; nconv = sscanf(s, "%ju%2s", valp, suffix); if (nconv < 1) return -1; if (nconv >= 2) { int found = 0; int i; for (i = 0; i < sizeof(size_suffixes) / sizeof(*size_suffixes); i++) { const struct size_suffix *const ss = &size_suffixes[i]; if (ss->bits >= sizeof(off_t) * 8) break; if (strcasecmp(suffix, ss->suffix) == 0) { *valp <<= ss->bits; found = 1; break; } } if (!found) return -1; } return 0; } static void unparse_size_string(char *buf, size_t bmax, uintmax_t value) { uintmax_t unit; int i; if (value == 0) { snprintf(buf, bmax, "0"); return; } for (i = sizeof(size_suffixes) / sizeof(*size_suffixes); i-- > 0; ) { const struct size_suffix *const ss = &size_suffixes[i]; if (ss->bits >= sizeof(off_t) * 8) continue; unit = (uintmax_t)1 << ss->bits; if (value % unit == 0) { snprintf(buf, bmax, "%ju%s", value / unit, ss->suffix); return; } } snprintf(buf, bmax, "%ju", value); } /** * Handle command-line flag. */ static int handle_unknown_option(void *data, const char *arg, int key, struct fuse_args *outargs) { /* Check options */ if (key == FUSE_OPT_KEY_OPT) { /* Debug flags */ if (strcmp(arg, "-d") == 0) config.debug = 1; if (strcmp(arg, "-d") == 0 || strcmp(arg, "-f") == 0) config.log = stderr_logger; /* Version */ if (strcmp(arg, "--version") == 0 || strcmp(arg, "-v") == 0) { fprintf(stderr, "%s version %s (%s)\n", PACKAGE, VERSION, s3backer_version); fprintf(stderr, "Copyright (C) 2008-2011 Archie L. Cobbs.\n"); fprintf(stderr, "This is free software; see the source for copying conditions. There is NO\n"); fprintf(stderr, "warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"); exit(0); } /* Help */ if (strcmp(arg, "--help") == 0 || strcmp(arg, "-h") == 0 || strcmp(arg, "-?") == 0) { usage(); exit(0); } /* Unknown; pass it through to fuse_main() */ return 1; } /* Get bucket parameter */ if (config.http_io.bucket == NULL) { if ((config.http_io.bucket = strdup(arg)) == NULL) err(1, "strdup"); return 0; } /* Copy mount point */ if (config.mount == NULL) { if ((config.mount = strdup(arg)) == NULL) err(1, "strdup"); return 1; } /* Pass subsequent paramters on to fuse_main() */ return 1; } static int search_access_for(const char *file, const char *accessId, char **idptr, char **pwptr) { char buf[1024]; FILE *fp; if (idptr != NULL) *idptr = NULL; if (pwptr != NULL) *pwptr = NULL; if ((fp = fopen(file, "r")) == NULL) return 0; while (fgets(buf, sizeof(buf), fp) != NULL) { char *colon; if (*buf == '#' || *buf == '\0' || isspace(*buf) || (colon = strchr(buf, ':')) == NULL) continue; while (*buf != '\0' && isspace(buf[strlen(buf) - 1])) buf[strlen(buf) - 1] = '\0'; *colon = '\0'; if (accessId != NULL && strcmp(buf, accessId) != 0) continue; if (idptr != NULL && (*idptr = strdup(buf)) == NULL) err(1, "strdup"); if (pwptr != NULL && (*pwptr = strdup(colon + 1)) == NULL) err(1, "strdup"); fclose(fp); return 1; } fclose(fp); return 0; } static int validate_config(void) { struct s3backer_store *s3b; const int customBaseURL = config.http_io.baseURL != NULL; const int customRegion = config.http_io.region != NULL; off_t auto_file_size; u_int auto_block_size; uintmax_t value; const char *s; char blockSizeBuf[64]; char fileSizeBuf[64]; struct stat sb; char urlbuf[512]; int i; int r; /* Default to $HOME/.s3backer for accessFile */ if (config.http_io.ec2iam_role == NULL && config.accessFile == NULL) { const char *home = getenv("HOME"); char buf[PATH_MAX]; if (home != NULL) { snprintf(buf, sizeof(buf), "%s/%s", home, S3BACKER_DEFAULT_PWD_FILE); if ((config.accessFile = strdup(buf)) == NULL) err(1, "strdup"); } } /* Auto-set file mode in read_only if not explicitly set */ if (config.fuse_ops.file_mode == -1) { config.fuse_ops.file_mode = config.fuse_ops.read_only ? S3BACKER_DEFAULT_FILE_MODE_READ_ONLY : S3BACKER_DEFAULT_FILE_MODE; } /* If no accessId specified, default to first in accessFile */ if (config.http_io.accessId == NULL && config.accessFile != NULL) search_access_for(config.accessFile, NULL, &config.http_io.accessId, NULL); if (config.http_io.accessId != NULL && *config.http_io.accessId == '\0') config.http_io.accessId = NULL; /* If no accessId, only read operations will succeed */ if (!config.test && config.http_io.accessId == NULL && !config.fuse_ops.read_only && !customBaseURL && config.http_io.ec2iam_role == NULL) { warnx("warning: no `accessId' specified; only read operations will succeed"); warnx("you can eliminate this warning by providing the `--readOnly' flag"); } /* Find key in file if not specified explicitly */ if (config.http_io.accessId == NULL && config.http_io.accessKey != NULL) { warnx("an `accessKey' was specified but no `accessId' was specified"); return -1; } if (config.http_io.accessId != NULL) { if (config.http_io.accessKey == NULL && config.accessFile != NULL) search_access_for(config.accessFile, config.http_io.accessId, NULL, &config.http_io.accessKey); if (config.http_io.accessKey == NULL) { warnx("no `accessKey' specified"); return -1; } } /* Check for conflict between explicit accessId and EC2 IAM role */ if (config.http_io.accessId != NULL && config.http_io.ec2iam_role != NULL) { warnx("an `accessKey' must not be specified when an `accessEC2IAM' role is specified"); return -1; } /* Check auth version */ for (i = 0; i < sizeof(s3_auth_types) / sizeof(*s3_auth_types); i++) { if (strcmp(config.http_io.authVersion, s3_auth_types[i]) == 0) break; } if (i == sizeof(s3_auth_types) / sizeof(*s3_auth_types)) { warnx("illegal authentication version `%s'", config.http_io.authVersion); return -1; } /* Check bucket/testdir */ if (!config.test) { if (config.http_io.bucket == NULL) { warnx("no S3 bucket specified"); return -1; } if (*config.http_io.bucket == '\0' || *config.http_io.bucket == '/' || strchr(config.http_io.bucket, '/') != 0) { warnx("invalid S3 bucket `%s'", config.http_io.bucket); return -1; } } else { if (config.http_io.bucket == NULL) { warnx("no test directory specified"); return -1; } if (stat(config.http_io.bucket, &sb) == -1) { warn("%s", config.http_io.bucket); return -1; } if (!S_ISDIR(sb.st_mode)) { errno = ENOTDIR; warn("%s", config.http_io.bucket); return -1; } } /* Check storage class */ if (config.http_io.storage_class != NULL && strcmp(config.http_io.storage_class, STORAGE_CLASS_STANDARD) != 0 && strcmp(config.http_io.storage_class, STORAGE_CLASS_STANDARD_IA) != 0 && strcmp(config.http_io.storage_class, STORAGE_CLASS_REDUCED_REDUNDANCY) != 0) { warnx("invalid storage class `%s'", config.http_io.storage_class); return -1; } /* Check server side encryption type */ if (config.http_io.sse != NULL && strcmp(config.http_io.sse, REQUIRED_SSE_VALUE) != 0) { warnx("invalid sse type `%s' (only `%s' is supported)", config.http_io.sse, REQUIRED_SSE_VALUE); return -1; } /* Set default or custom region */ if (config.http_io.region == NULL) config.http_io.region = S3BACKER_DEFAULT_REGION; if (customRegion) config.http_io.vhost = 1; /* Set default base URL */ if (config.http_io.baseURL == NULL) { if (customRegion && strcmp(config.http_io.region, S3BACKER_DEFAULT_REGION) != 0) snprintf(urlbuf, sizeof(urlbuf), "http%s://s3-%s.%s/", config.ssl ? "s" : "", config.http_io.region, S3_DOMAIN); else snprintf(urlbuf, sizeof(urlbuf), "http%s://s3.%s/", config.ssl ? "s" : "", S3_DOMAIN); if ((config.http_io.baseURL = strdup(urlbuf)) == NULL) { warn("malloc"); return -1; } } /* Check base URL */ s = NULL; if (strncmp(config.http_io.baseURL, "http://", 7) == 0) s = config.http_io.baseURL + 7; else if (strncmp(config.http_io.baseURL, "https://", 8) == 0) s = config.http_io.baseURL + 8; if (s != NULL && (*s == '/' || *s == '\0')) s = NULL; if (s != NULL && (s = strrchr(s, '/')) == NULL) { warnx("base URL must end with a '/'"); s = NULL; } if (s != NULL && s[1] != '\0') { warnx("base URL must end with a '/' not '%c'", s[1]); s = NULL; } if (s == NULL) { warnx("invalid base URL `%s'", config.http_io.baseURL); return -1; } if (config.ssl && customBaseURL && strncmp(config.http_io.baseURL, "https", 5) != 0) { warnx("non-SSL `--baseURL' conflicts with `--ssl'"); return -1; } /* Handle virtual host style URL (prefix hostname with bucket name) */ if (config.http_io.vhost) { size_t buflen; int schemelen; char *buf; schemelen = strchr(config.http_io.baseURL, ':') - config.http_io.baseURL + 3; buflen = strlen(config.http_io.bucket) + 1 + strlen(config.http_io.baseURL) + 1; if ((buf = malloc(buflen)) == NULL) err(1, "malloc(%u)", (u_int)buflen); snprintf(buf, buflen, "%.*s%s.%s", schemelen, config.http_io.baseURL, config.http_io.bucket, config.http_io.baseURL + schemelen); config.http_io.baseURL = buf; } /* Check S3 access privilege */ for (i = 0; i < sizeof(s3_acls) / sizeof(*s3_acls); i++) { if (strcmp(config.http_io.accessType, s3_acls[i]) == 0) break; } if (i == sizeof(s3_acls) / sizeof(*s3_acls)) { warnx("illegal access type `%s'", config.http_io.accessType); return -1; } /* Check filenames */ if (strchr(config.fuse_ops.filename, '/') != NULL || *config.fuse_ops.filename == '\0') { warnx("illegal filename `%s'", config.fuse_ops.filename); return -1; } if (strchr(config.fuse_ops.stats_filename, '/') != NULL) { warnx("illegal stats filename `%s'", config.fuse_ops.stats_filename); return -1; } /* Apply default encryption */ if (config.http_io.encryption == NULL && config.encrypt) config.http_io.encryption = strdup(S3BACKER_DEFAULT_ENCRYPTION); /* Uppercase encryption name for consistency */ if (config.http_io.encryption != NULL) { char *t; if ((t = strdup(config.http_io.encryption)) == NULL) err(1, "strdup()"); for (i = 0; t[i] != '\0'; i++) t[i] = toupper(t[i]); config.http_io.encryption = t; } /* Check encryption and get key */ if (config.http_io.encryption != NULL) { char pwbuf[1024]; FILE *fp; if (config.password_file != NULL && config.http_io.password != NULL) { warnx("specify only one of `--password' or `--passwordFile'"); return -1; } if (config.password_file == NULL && config.http_io.password == NULL) { if ((s = getpass("Password: ")) == NULL) err(1, "getpass()"); } if (config.password_file != NULL) { assert(config.http_io.password == NULL); if ((fp = fopen(config.password_file, "r")) == NULL) { warn("can't open encryption key file `%s'", config.password_file); return -1; } if (fgets(pwbuf, sizeof(pwbuf), fp) == NULL || *pwbuf == '\0') { warnx("can't read encryption key from file `%s'", config.password_file); fclose(fp); return -1; } if (pwbuf[strlen(pwbuf) - 1] == '\n') pwbuf[strlen(pwbuf) - 1] = '\0'; fclose(fp); s = pwbuf; } if (config.http_io.password == NULL && (config.http_io.password = strdup(s)) == NULL) err(1, "strdup()"); if (config.http_io.key_length > EVP_MAX_KEY_LENGTH) { warnx("`--keyLength' value must be positive and at most %u", EVP_MAX_KEY_LENGTH); return -1; } } else { if (config.http_io.password != NULL) warnx("unexpected flag `%s' (`--encrypt' was not specified)", "--password"); else if (config.password_file != NULL) warnx("unexpected flag `%s' (`--encrypt' was not specified)", "--passwordFile"); if (config.http_io.key_length != 0) warnx("unexpected flag `%s' (`--encrypt' was not specified)", "--keyLength"); } /* We always want to compress if we are encrypting */ if (config.http_io.encryption != NULL && config.http_io.compress == Z_NO_COMPRESSION) config.http_io.compress = Z_DEFAULT_COMPRESSION; /* Check compression level */ switch (config.http_io.compress) { case Z_DEFAULT_COMPRESSION: case Z_NO_COMPRESSION: break; default: if (config.http_io.compress < Z_BEST_SPEED || config.http_io.compress > Z_BEST_COMPRESSION) { warnx("illegal compression level `%d'", config.http_io.compress); return -1; } break; } /* Disable md5 cache when in read only mode */ if (config.fuse_ops.read_only) { config.ec_protect.cache_size = 0; config.ec_protect.cache_time = 0; config.ec_protect.min_write_delay = 0; } /* Check time/cache values */ if (config.ec_protect.cache_size == 0 && config.ec_protect.cache_time > 0) { warnx("`md5CacheTime' must zero when MD5 cache is disabled"); return -1; } if (config.ec_protect.cache_size == 0 && config.ec_protect.min_write_delay > 0) { warnx("`minWriteDelay' must zero when MD5 cache is disabled"); return -1; } if (config.ec_protect.cache_time > 0 && config.ec_protect.cache_time < config.ec_protect.min_write_delay) { warnx("`md5CacheTime' must be at least `minWriteDelay'"); return -1; } if (config.http_io.initial_retry_pause > config.http_io.max_retry_pause) { warnx("`maxRetryPause' must be at least `initialRetryPause'"); return -1; } /* Parse block and file sizes */ if (config.block_size_str != NULL) { if (parse_size_string(config.block_size_str, &value) == -1 || value == 0) { warnx("invalid block size `%s'", config.block_size_str); return -1; } if ((u_int)value != value) { warnx("block size `%s' is too big", config.block_size_str); return -1; } config.block_size = value; } if (config.file_size_str != NULL) { if (parse_size_string(config.file_size_str, &value) == -1 || value == 0) { warnx("invalid file size `%s'", config.file_size_str); return -1; } config.file_size = value; } /* Parse upload/download speeds */ for (i = 0; i < 2; i++) { if (config.max_speed_str[i] != NULL) { if (parse_size_string(config.max_speed_str[i], &value) == -1 || value == 0) { warnx("invalid max %s speed `%s'", upload_download_names[i], config.max_speed_str[i]); return -1; } if ((curl_off_t)(value / 8) != (value / 8)) { warnx("max %s speed `%s' is too big", upload_download_names[i], config.max_speed_str[i]); return -1; } config.http_io.max_speed[i] = value; } if (config.http_io.max_speed[i] != 0 && config.block_size / (config.http_io.max_speed[i] / 8) >= config.http_io.timeout) { warnx("configured timeout of %us is too short for block size of %u bytes and max %s speed %s bps", config.http_io.timeout, config.block_size, upload_download_names[i], config.max_speed_str[i]); return -1; } } /* Check block cache config */ if (config.block_cache.cache_size > 0 && config.block_cache.num_threads <= 0) { warnx("invalid block cache thread pool size %u", config.block_cache.num_threads); return -1; } if (config.block_cache.write_delay > 0 && config.block_cache.synchronous) { warnx("`--blockCacheSync' requires setting `--blockCacheWriteDelay=0'"); return -1; } if (config.block_cache.cache_size > 0 && config.block_cache.cache_file != NULL) { int bs_bits = ffs(config.block_size) - 1; int cs_bits = ffs(config.block_cache.cache_size); if (bs_bits + cs_bits >= sizeof(off_t) * 8 - 1) { warnx("the block cache is too big to fit within a single file (%u blocks x %u bytes)", config.block_cache.cache_size, config.block_size); return -1; } } if (config.block_cache.cache_file == NULL && config.block_cache.recover_dirty_blocks) { warnx("`--blockCacheRecoverDirtyBlocks' requires specifying `--blockCacheFile'"); return -1; } /* Check mount point */ if (config.erase || config.reset) { if (config.mount != NULL) { warnx("no mount point should be specified with `--erase' or `--reset-mounted-flag'"); return -1; } } else { if (config.mount == NULL) { warnx("no mount point specified"); return -1; } } /* Format descriptive string of what we're mounting */ if (config.test) { snprintf(config.description, sizeof(config.description), "%s%s/%s", "file://", config.http_io.bucket, config.http_io.prefix); } else if (config.http_io.vhost) snprintf(config.description, sizeof(config.description), "%s%s", config.http_io.baseURL, config.http_io.prefix); else { snprintf(config.description, sizeof(config.description), "%s%s/%s", config.http_io.baseURL, config.http_io.bucket, config.http_io.prefix); } /* * Read the first block (if any) to determine existing file and block size, * and compare with configured sizes (if given). */ if (config.test) config.no_auto_detect = 1; if (config.no_auto_detect) r = ENOENT; else { config.http_io.debug = config.debug; config.http_io.quiet = config.quiet; config.http_io.log = config.log; if ((s3b = http_io_create(&config.http_io)) == NULL) err(1, "http_io_create"); if (!config.quiet) warnx("auto-detecting block size and total file size..."); r = (*s3b->meta_data)(s3b, &auto_file_size, &auto_block_size); (*s3b->destroy)(s3b); } /* Check result */ switch (r) { case 0: unparse_size_string(blockSizeBuf, sizeof(blockSizeBuf), (uintmax_t)auto_block_size); unparse_size_string(fileSizeBuf, sizeof(fileSizeBuf), (uintmax_t)auto_file_size); if (!config.quiet) warnx("auto-detected block size=%s and total size=%s", blockSizeBuf, fileSizeBuf); if (config.block_size == 0) config.block_size = auto_block_size; else if (auto_block_size != config.block_size) { char buf[64]; unparse_size_string(buf, sizeof(buf), (uintmax_t)config.block_size); if (config.force) { if (!config.quiet) { warnx("warning: configured block size %s != filesystem block size %s,\n" "but you said `--force' so I'll proceed anyway even though your data will\n" "probably not read back correctly.", buf, blockSizeBuf); } } else errx(1, "error: configured block size %s != filesystem block size %s", buf, blockSizeBuf); } if (config.file_size == 0) config.file_size = auto_file_size; else if (auto_file_size != config.file_size) { char buf[64]; unparse_size_string(buf, sizeof(buf), (uintmax_t)config.file_size); if (config.force) { if (!config.quiet) { warnx("warning: configured file size %s != filesystem file size %s,\n" "but you said `--force' so I'll proceed anyway even though your data will\n" "probably not read back correctly.", buf, fileSizeBuf); } } else errx(1, "error: configured file size %s != filesystem file size %s", buf, fileSizeBuf); } break; case ENOENT: { const char *why = config.no_auto_detect ? "disabled" : "failed"; int config_block_size = config.block_size; if (config.file_size == 0) errx(1, "error: auto-detection of filesystem size %s; please specify `--size'", why); if (config.block_size == 0) config.block_size = S3BACKER_DEFAULT_BLOCKSIZE; unparse_size_string(blockSizeBuf, sizeof(blockSizeBuf), (uintmax_t)config.block_size); unparse_size_string(fileSizeBuf, sizeof(fileSizeBuf), (uintmax_t)config.file_size); if (!config.quiet) { warnx("auto-detection %s; using %s block size %s and file size %s", why, config_block_size == 0 ? "default" : "configured", blockSizeBuf, fileSizeBuf); } break; } default: errno = r; err(1, "can't read data store meta-data"); break; } /* Check computed block and file sizes */ if (config.block_size != (1 << (ffs(config.block_size) - 1))) { warnx("block size must be a power of 2"); return -1; } if (config.file_size % config.block_size != 0) { warnx("file size must be a multiple of block size"); return -1; } config.num_blocks = config.file_size / config.block_size; if (sizeof(s3b_block_t) < sizeof(config.num_blocks) && config.num_blocks > ((off_t)1 << (sizeof(s3b_block_t) * 8))) { warnx("more than 2^%d blocks: decrease file size or increase block size", (int)(sizeof(s3b_block_t) * 8)); return -1; } /* Check block size vs. encryption block size */ if (config.http_io.encryption != NULL && config.block_size % EVP_MAX_IV_LENGTH != 0) { warnx("block size must be at least %u when encryption is enabled", EVP_MAX_IV_LENGTH); return -1; } /* Check that MD5 cache won't eventually deadlock */ if (config.ec_protect.cache_size > 0 && config.ec_protect.cache_time == 0 && config.ec_protect.cache_size < config.num_blocks) { warnx("`md5CacheTime' is infinite but `md5CacheSize' is less than the number of blocks, so eventual deadlock will result"); return -1; } /* No point in the caches being bigger than necessary */ if (config.ec_protect.cache_size > config.num_blocks) { warnx("MD5 cache size (%ju) is greater that the total number of blocks (%ju); automatically reducing", (uintmax_t)config.ec_protect.cache_size, (uintmax_t)config.num_blocks); config.ec_protect.cache_size = config.num_blocks; } if (config.block_cache.cache_size > config.num_blocks) { warnx("block cache size (%ju) is greater that the total number of blocks (%ju); automatically reducing", (uintmax_t)config.block_cache.cache_size, (uintmax_t)config.num_blocks); config.block_cache.cache_size = config.num_blocks; } #ifdef __APPLE__ /* On MacOS, warn if kernel timeouts can happen prior to our own timeout */ { u_int total_time = 0; u_int retry_pause = 0; u_int total_pause; /* * Determine how much total time an operation can take including retries. * We have to use the same exponential backoff algorithm. */ for (total_pause = 0; 1; total_pause += retry_pause) { total_time += config.http_io.timeout * 1000; if (total_pause >= config.http_io.max_retry_pause) break; retry_pause = retry_pause > 0 ? retry_pause * 2 : config.http_io.initial_retry_pause; if (total_pause + retry_pause > config.http_io.max_retry_pause) retry_pause = config.http_io.max_retry_pause - total_pause; total_time += retry_pause; } /* Convert from milliseconds to seconds */ total_time = (total_time + 999) / 1000; /* Warn if exceeding MacFUSE limit */ if (total_time >= FUSE_MAX_DAEMON_TIMEOUT && !config.quiet) { warnx("warning: maximum possible I/O delay (%us) >= MacFUSE limit (%us);", total_time, FUSE_MAX_DAEMON_TIMEOUT); warnx("consider lower settings for `--maxRetryPause' and/or `--timeout'."); } } #endif /* __APPLE__ */ /* Copy common stuff into sub-module configs */ config.block_cache.block_size = config.block_size; config.block_cache.log = config.log; config.http_io.debug = config.debug; config.http_io.quiet = config.quiet; config.http_io.block_size = config.block_size; config.http_io.num_blocks = config.num_blocks; config.http_io.log = config.log; config.ec_protect.block_size = config.block_size; config.ec_protect.log = config.log; config.fuse_ops.block_size = config.block_size; config.fuse_ops.num_blocks = config.num_blocks; config.fuse_ops.log = config.log; /* Check whether already mounted, and if so, compare mount token against on-disk cache (if any) */ if (!config.test && !config.erase && !config.reset) { int32_t mount_token; int conflict; /* Read s3 mount token */ config.http_io.debug = config.debug; config.http_io.quiet = config.quiet; config.http_io.log = config.log; if ((s3b = http_io_create(&config.http_io)) == NULL) err(1, "http_io_create"); r = (*s3b->set_mount_token)(s3b, &mount_token, -1); (*s3b->destroy)(s3b); if (r != 0) { errno = r; err(1, "error reading mount token"); } conflict = mount_token != 0; /* * The disk cache also has a mount token, so we need to do some extra checking. * Either token can be 0 (i.e., not present -> not mounted) or != 0 (mounted). * * If neither token is present, proceed with mount. Note: there should not be * any dirty blocks in the disk cache in this case, because this represents a * clean unmount situation. * * If the cache has a token, but S3 has none, that means someone must have used * `--reset-mounted-flag' to clear it from S3 since the last time the disk cache was * used. In that case, `--force' is required to continue using the disk cache, * or `--reset-mounted-flag' must be used to clear the disk cache flag as well. * * If --blockCacheRecoverDirtyBlocks is specified and the tokens match, we * have the corresponding cache file for the last mount. Proceed with mount and, * if configured, enable cache writeback of dirty blocks. */ if (config.block_cache.cache_file != NULL) { int32_t cache_mount_token = -1; struct stat cache_file_stat; struct s3b_dcache *dcache; /* Open disk cache file, if any, and read the mount token therein, if any */ if (stat(config.block_cache.cache_file, &cache_file_stat) == -1) { if (errno != ENOENT) err(1, "can't open cache file `%s'", config.block_cache.cache_file); } else { if ((r = s3b_dcache_open(&dcache, config.log, config.block_cache.cache_file, config.block_cache.block_size, config.block_cache.cache_size, NULL, NULL, 0)) != 0) errx(1, "error opening cache file `%s': %s", config.block_cache.cache_file, strerror(r)); if (s3b_dcache_has_mount_token(dcache) && (r = s3b_dcache_set_mount_token(dcache, &cache_mount_token, -1)) != 0) errx(1, "error reading mount token from `%s': %s", config.block_cache.cache_file, strerror(r)); s3b_dcache_close(dcache); } /* If cache file is older format, then cache_mount_token will be -1, otherwise >= 0 */ if (cache_mount_token > 0) { /* If tokens do not agree, bail out, otherwise enable write-back of dirty blocks if tokens are non-zero */ if (cache_mount_token != mount_token) { warnx("cache file `%s' mount token mismatch (disk:0x%08x != s3:0x%08x)", config.block_cache.cache_file, cache_mount_token, mount_token); } else if (config.block_cache.recover_dirty_blocks) { if (!config.quiet) warnx("recovering from unclean shutdown: dirty blocks in cache file will be written back to S3"); config.block_cache.perform_flush = 1; conflict = 0; } } } /* If there is a conflicting mount, additional `--force' is required */ if (conflict) { if (!config.force) { warnx("%s appears to be already mounted (using mount token 0x%08x)", config.description, (int)mount_token); errx(1, "reset mount token with `--reset-mounted-flag', or use `--force' to override"); } if (!config.quiet) { warnx("warning: filesystem appears already mounted but you said `--force'\n" " so I'll proceed anyway even though your data may get corrupted.\n"); } } } /* If `--listBlocks' was given, build non-empty block bitmap */ if (config.erase || config.reset) config.list_blocks = 0; if (config.list_blocks) { struct s3backer_store *temp_store; struct list_blocks lb; size_t nwords; /* Logging */ if (!config.quiet) { fprintf(stderr, "s3backer: listing non-zero blocks..."); fflush(stderr); } /* Create temporary lower layer */ if ((temp_store = config.test ? test_io_create(&config.http_io) : http_io_create(&config.http_io)) == NULL) err(1, config.test ? "test_io_create" : "http_io_create"); /* Initialize bitmap */ nwords = (config.num_blocks + (sizeof(*lb.bitmap) * 8) - 1) / (sizeof(*lb.bitmap) * 8); if ((lb.bitmap = calloc(nwords, sizeof(*lb.bitmap))) == NULL) err(1, "calloc"); lb.print_dots = !config.quiet; lb.count = 0; /* Generate non-zero block bitmap */ assert(config.http_io.nonzero_bitmap == NULL); if ((r = (*temp_store->list_blocks)(temp_store, list_blocks_callback, &lb)) != 0) errx(1, "can't list blocks: %s", strerror(r)); /* Close temporary store */ (*temp_store->destroy)(temp_store); /* Save generated bitmap */ config.http_io.nonzero_bitmap = lb.bitmap; /* Logging */ if (!config.quiet) { fprintf(stderr, "done\n"); warnx("found %ju non-zero blocks", lb.count); } } /* Done */ return 0; } static void list_blocks_callback(void *arg, s3b_block_t block_num) { struct list_blocks *const lb = arg; const int bits_per_word = sizeof(*lb->bitmap) * 8; lb->bitmap[block_num / bits_per_word] |= 1 << (block_num % bits_per_word); lb->count++; if (lb->print_dots && (lb->count % BLOCKS_PER_DOT) == 0) { fprintf(stderr, "."); fflush(stderr); } } static void dump_config(void) { int i; (*config.log)(LOG_DEBUG, "s3backer config:"); (*config.log)(LOG_DEBUG, "%24s: %s", "test mode", config.test ? "true" : "false"); (*config.log)(LOG_DEBUG, "%24s: %s", "directIO", config.fuse_ops.direct_io ? "true" : "false"); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "accessId", config.http_io.accessId != NULL ? config.http_io.accessId : ""); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "accessKey", config.http_io.accessKey != NULL ? "****" : ""); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "accessFile", config.accessFile); (*config.log)(LOG_DEBUG, "%24s: %s", "accessType", config.http_io.accessType); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "ec2iam_role", config.http_io.ec2iam_role != NULL ? config.http_io.ec2iam_role : ""); (*config.log)(LOG_DEBUG, "%24s: %s", "authVersion", config.http_io.authVersion); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "baseURL", config.http_io.baseURL); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "region", config.http_io.region); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", config.test ? "testdir" : "bucket", config.http_io.bucket); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "prefix", config.http_io.prefix); (*config.log)(LOG_DEBUG, "%24s: %s", "blockHashPrefix", config.http_io.blockHashPrefix ? "true" : "false"); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "defaultContentEncoding", config.http_io.default_ce != NULL ? config.http_io.default_ce : "(none)"); (*config.log)(LOG_DEBUG, "%24s: %s", "list_blocks", config.list_blocks ? "true" : "false"); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "mount", config.mount); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "filename", config.fuse_ops.filename); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "stats_filename", config.fuse_ops.stats_filename); (*config.log)(LOG_DEBUG, "%24s: %s (%u)", "block_size", config.block_size_str != NULL ? config.block_size_str : "-", config.block_size); (*config.log)(LOG_DEBUG, "%24s: %s (%jd)", "file_size", config.file_size_str != NULL ? config.file_size_str : "-", (intmax_t)config.file_size); (*config.log)(LOG_DEBUG, "%24s: %jd", "num_blocks", (intmax_t)config.num_blocks); (*config.log)(LOG_DEBUG, "%24s: 0%o", "file_mode", config.fuse_ops.file_mode); (*config.log)(LOG_DEBUG, "%24s: %s", "read_only", config.fuse_ops.read_only ? "true" : "false"); (*config.log)(LOG_DEBUG, "%24s: %d", "compress", config.http_io.compress); (*config.log)(LOG_DEBUG, "%24s: %s", "encryption", config.http_io.encryption != NULL ? config.http_io.encryption : "(none)"); (*config.log)(LOG_DEBUG, "%24s: %u", "key_length", config.http_io.key_length); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "password", config.http_io.password != NULL ? "****" : ""); (*config.log)(LOG_DEBUG, "%24s: %s bps (%ju)", "max_upload", config.max_speed_str[HTTP_UPLOAD] != NULL ? config.max_speed_str[HTTP_UPLOAD] : "-", config.http_io.max_speed[HTTP_UPLOAD]); (*config.log)(LOG_DEBUG, "%24s: %s bps (%ju)", "max_download", config.max_speed_str[HTTP_DOWNLOAD] != NULL ? config.max_speed_str[HTTP_DOWNLOAD] : "-", config.http_io.max_speed[HTTP_DOWNLOAD]); (*config.log)(LOG_DEBUG, "%24s: %us", "timeout", config.http_io.timeout); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "sse", config.http_io.sse); (*config.log)(LOG_DEBUG, "%24s: %ums", "initial_retry_pause", config.http_io.initial_retry_pause); (*config.log)(LOG_DEBUG, "%24s: %ums", "max_retry_pause", config.http_io.max_retry_pause); (*config.log)(LOG_DEBUG, "%24s: %ums", "min_write_delay", config.ec_protect.min_write_delay); (*config.log)(LOG_DEBUG, "%24s: %ums", "md5_cache_time", config.ec_protect.cache_time); (*config.log)(LOG_DEBUG, "%24s: %u entries", "md5_cache_size", config.ec_protect.cache_size); (*config.log)(LOG_DEBUG, "%24s: %u entries", "block_cache_size", config.block_cache.cache_size); (*config.log)(LOG_DEBUG, "%24s: %u threads", "block_cache_threads", config.block_cache.num_threads); (*config.log)(LOG_DEBUG, "%24s: %ums", "block_cache_timeout", config.block_cache.timeout); (*config.log)(LOG_DEBUG, "%24s: %ums", "block_cache_write_delay", config.block_cache.write_delay); (*config.log)(LOG_DEBUG, "%24s: %u blocks", "block_cache_max_dirty", config.block_cache.max_dirty); (*config.log)(LOG_DEBUG, "%24s: %s", "block_cache_sync", config.block_cache.synchronous ? "true" : "false"); (*config.log)(LOG_DEBUG, "%24s: %s", "recover_dirty_blocks", config.block_cache.recover_dirty_blocks ? "true" : "false"); (*config.log)(LOG_DEBUG, "%24s: %u blocks", "read_ahead", config.block_cache.read_ahead); (*config.log)(LOG_DEBUG, "%24s: %u blocks", "read_ahead_trigger", config.block_cache.read_ahead_trigger); (*config.log)(LOG_DEBUG, "%24s: \"%s\"", "block_cache_cache_file", config.block_cache.cache_file != NULL ? config.block_cache.cache_file : ""); (*config.log)(LOG_DEBUG, "%24s: %s", "block_cache_no_verify", config.block_cache.no_verify ? "true" : "false"); (*config.log)(LOG_DEBUG, "fuse_main arguments:"); for (i = 0; i < config.fuse_args.argc; i++) (*config.log)(LOG_DEBUG, " [%d] = \"%s\"", i, config.fuse_args.argv[i]); } static void syslog_logger(int level, const char *fmt, ...) { va_list args; /* Filter debug messages */ if (!config.debug && level == LOG_DEBUG) return; /* Send message to syslog */ va_start(args, fmt); vsyslog(level, fmt, args); va_end(args); } static void stderr_logger(int level, const char *fmt, ...) { const char *levelstr; char timebuf[32]; va_list args; struct tm tm; time_t now; /* Filter debug messages */ if (!config.debug && level == LOG_DEBUG) return; /* Get level descriptor */ switch (level) { case LOG_ERR: levelstr = "ERROR"; break; case LOG_WARNING: levelstr = "WARNING"; break; case LOG_NOTICE: levelstr = "NOTICE"; break; case LOG_INFO: levelstr = "INFO"; break; case LOG_DEBUG: levelstr = "DEBUG"; break; default: levelstr = ""; break; } /* Format and print log message */ time(&now); strftime(timebuf, sizeof(timebuf), "%F %T", localtime_r(&now, &tm)); va_start(args, fmt); fprintf(stderr, "%s %s: ", timebuf, levelstr); vfprintf(stderr, fmt, args); fprintf(stderr, "\n"); va_end(args); } static void usage(void) { int i; fprintf(stderr, "Usage:\n"); fprintf(stderr, "\ts3backer [options] bucket /mount/point\n"); fprintf(stderr, "\ts3backer --test [options] directory /mount/point\n"); fprintf(stderr, "\ts3backer --erase [options] bucket\n"); fprintf(stderr, "\ts3backer --reset-mounted-flag [options] bucket\n"); fprintf(stderr, "Options:\n"); fprintf(stderr, "\t--%-27s %s\n", "accessFile=FILE", "File containing `accessID:accessKey' pairs"); fprintf(stderr, "\t--%-27s %s\n", "accessId=ID", "S3 access key ID"); fprintf(stderr, "\t--%-27s %s\n", "accessKey=KEY", "S3 secret access key"); fprintf(stderr, "\t--%-27s %s\n", "accessType=TYPE", "S3 ACL used when creating new items; one of:"); fprintf(stderr, "\t %-27s ", ""); for (i = 0; i < sizeof(s3_acls) / sizeof(*s3_acls); i++) fprintf(stderr, "%s%s", i > 0 ? ", " : " ", s3_acls[i]); fprintf(stderr, "\n"); fprintf(stderr, "\t--%-27s %s\n", "authVersion=TYPE", "Specify S3 authentication style; one of:"); fprintf(stderr, "\t %-27s ", ""); for (i = 0; i < sizeof(s3_auth_types) / sizeof(*s3_auth_types); i++) fprintf(stderr, "%s%s", i > 0 ? ", " : " ", s3_auth_types[i]); fprintf(stderr, "\n"); fprintf(stderr, "\t--%-27s %s\n", "accessEC2IAM=ROLE", "Acquire S3 credentials from EC2 machine via IAM role"); fprintf(stderr, "\t--%-27s %s\n", "baseURL=URL", "Base URL for all requests"); fprintf(stderr, "\t--%-27s %s\n", "blockCacheFile=FILE", "Block cache persistent file"); fprintf(stderr, "\t--%-27s %s\n", "blockCacheMaxDirty=NUM", "Block cache maximum number of dirty blocks"); fprintf(stderr, "\t--%-27s %s\n", "blockCacheNoVerify", "Disable verification of data loaded from cache file"); fprintf(stderr, "\t--%-27s %s\n", "blockCacheSize=NUM", "Block cache size (in number of blocks)"); fprintf(stderr, "\t--%-27s %s\n", "blockCacheSync", "Block cache performs all writes synchronously"); fprintf(stderr, "\t--%-27s %s\n", "blockCacheRecoverDirtyBlocks", "Recover dirty cache file blocks on startup"); fprintf(stderr, "\t--%-27s %s\n", "blockCacheThreads=NUM", "Block cache write-back thread pool size"); fprintf(stderr, "\t--%-27s %s\n", "blockCacheTimeout=MILLIS", "Block cache entry timeout (zero = infinite)"); fprintf(stderr, "\t--%-27s %s\n", "blockCacheWriteDelay=MILLIS", "Block cache maximum write-back delay"); fprintf(stderr, "\t--%-27s %s\n", "blockSize=SIZE", "Block size (with optional suffix 'K', 'M', 'G', etc.)"); fprintf(stderr, "\t--%-27s %s\n", "blockHashPrefix", "Prepend hash to block names for even distribution"); fprintf(stderr, "\t--%-27s %s\n", "cacert=FILE", "Specify SSL certificate authority file"); fprintf(stderr, "\t--%-27s %s\n", "compress[=LEVEL]", "Enable block compression, with 1=fast up to 9=small"); fprintf(stderr, "\t--%-27s %s\n", "debug", "Enable logging of debug messages"); fprintf(stderr, "\t--%-27s %s\n", "debug-http", "Print HTTP headers to standard output"); fprintf(stderr, "\t--%-27s %s\n", "directIO", "Disable kernel caching of the backed file"); fprintf(stderr, "\t--%-27s %s\n", "encrypt[=CIPHER]", "Enable encryption (implies `--compress')"); fprintf(stderr, "\t--%-27s %s\n", "erase", "Erase all blocks in the filesystem"); fprintf(stderr, "\t--%-27s %s\n", "fileMode=MODE", "Permissions of backed file in filesystem"); fprintf(stderr, "\t--%-27s %s\n", "filename=NAME", "Name of backed file in filesystem"); fprintf(stderr, "\t--%-27s %s\n", "force", "Ignore different auto-detected block and file sizes"); fprintf(stderr, "\t--%-27s %s\n", "help", "Show this information and exit"); fprintf(stderr, "\t--%-27s %s\n", "initialRetryPause=MILLIS", "Initial retry pause after stale data or server error"); fprintf(stderr, "\t--%-27s %s\n", "insecure", "Don't verify SSL server identity"); fprintf(stderr, "\t--%-27s %s\n", "keyLength", "Override generated cipher key length"); fprintf(stderr, "\t--%-27s %s\n", "listBlocks", "Auto-detect non-empty blocks at startup"); fprintf(stderr, "\t--%-27s %s\n", "maxDownloadSpeed=BITSPERSEC", "Max download bandwidth for a single read"); fprintf(stderr, "\t--%-27s %s\n", "maxRetryPause=MILLIS", "Max total pause after stale data or server error"); fprintf(stderr, "\t--%-27s %s\n", "maxUploadSpeed=BITSPERSEC", "Max upload bandwidth for a single write"); fprintf(stderr, "\t--%-27s %s\n", "md5CacheSize=NUM", "Max size of MD5 cache (zero = disabled)"); fprintf(stderr, "\t--%-27s %s\n", "md5CacheTime=MILLIS", "Expire time for MD5 cache (zero = infinite)"); fprintf(stderr, "\t--%-27s %s\n", "minWriteDelay=MILLIS", "Minimum time between same block writes"); fprintf(stderr, "\t--%-27s %s\n", "password=PASSWORD", "Encrypt using PASSWORD"); fprintf(stderr, "\t--%-27s %s\n", "passwordFile=FILE", "Encrypt using password read from FILE"); fprintf(stderr, "\t--%-27s %s\n", "prefix=STRING", "Prefix for resource names within bucket"); fprintf(stderr, "\t--%-27s %s\n", "defaultContentEncoding=STRING", "Default HTTP Content-Encoding if none given"); fprintf(stderr, "\t--%-27s %s\n", "quiet", "Omit progress output at startup"); fprintf(stderr, "\t--%-27s %s\n", "readAhead=NUM", "Number of blocks to read-ahead"); fprintf(stderr, "\t--%-27s %s\n", "readAheadTrigger=NUM", "# of sequentially read blocks to trigger read-ahead"); fprintf(stderr, "\t--%-27s %s\n", "readOnly", "Return `Read-only file system' error for write attempts"); fprintf(stderr, "\t--%-27s %s\n", "region=region", "Specify AWS region"); fprintf(stderr, "\t--%-27s %s\n", "reset-mounted-flag", "Reset `already mounted' flag in the filesystem"); fprintf(stderr, "\t--%-27s %s\n", "rrs", "Target written blocks for Reduced Redundancy Storage (deprecated)"); fprintf(stderr, "\t--%-27s %s\n", "size=SIZE", "File size (with optional suffix 'K', 'M', 'G', etc.)"); fprintf(stderr, "\t--%-27s %s\n", "sse=" REQUIRED_SSE_VALUE, "Specify server side encryption"); fprintf(stderr, "\t--%-27s %s\n", "ssl", "Enable SSL"); fprintf(stderr, "\t--%-27s %s\n", "statsFilename=NAME", "Name of statistics file in filesystem"); fprintf(stderr, "\t--%-27s %s\n", "storageClass=TYPE", "Specify storage class for written blocks"); fprintf(stderr, "\t--%-27s %s\n", "test", "Run in local test mode (bucket is a directory)"); fprintf(stderr, "\t--%-27s %s\n", "timeout=SECONDS", "Max time allowed for one HTTP operation"); fprintf(stderr, "\t--%-27s %s\n", "timeout=SECONDS", "Specify HTTP operation timeout"); fprintf(stderr, "\t--%-27s %s\n", "version", "Show version information and exit"); fprintf(stderr, "\t--%-27s %s\n", "vhost", "Use virtual host bucket style URL for all requests"); fprintf(stderr, "Default values:\n"); fprintf(stderr, "\t--%-27s \"%s\"\n", "accessFile", "$HOME/" S3BACKER_DEFAULT_PWD_FILE); fprintf(stderr, "\t--%-27s %s\n", "accessId", "The first one listed in `accessFile'"); fprintf(stderr, "\t--%-27s \"%s\"\n", "accessType", S3BACKER_DEFAULT_ACCESS_TYPE); fprintf(stderr, "\t--%-27s \"%s\"\n", "authVersion", S3BACKER_DEFAULT_AUTH_VERSION); fprintf(stderr, "\t--%-27s \"%s\"\n", "baseURL", "http://s3." S3_DOMAIN "/"); fprintf(stderr, "\t--%-27s %u\n", "blockCacheSize", S3BACKER_DEFAULT_BLOCK_CACHE_SIZE); fprintf(stderr, "\t--%-27s %u\n", "blockCacheThreads", S3BACKER_DEFAULT_BLOCK_CACHE_NUM_THREADS); fprintf(stderr, "\t--%-27s %u\n", "blockCacheTimeout", S3BACKER_DEFAULT_BLOCK_CACHE_TIMEOUT); fprintf(stderr, "\t--%-27s %u\n", "blockCacheWriteDelay", S3BACKER_DEFAULT_BLOCK_CACHE_WRITE_DELAY); fprintf(stderr, "\t--%-27s %d\n", "blockSize", S3BACKER_DEFAULT_BLOCKSIZE); fprintf(stderr, "\t--%-27s \"%s\"\n", "filename", S3BACKER_DEFAULT_FILENAME); fprintf(stderr, "\t--%-27s %u\n", "initialRetryPause", S3BACKER_DEFAULT_INITIAL_RETRY_PAUSE); fprintf(stderr, "\t--%-27s %u\n", "md5CacheSize", S3BACKER_DEFAULT_MD5_CACHE_SIZE); fprintf(stderr, "\t--%-27s %u\n", "md5CacheTime", S3BACKER_DEFAULT_MD5_CACHE_TIME); fprintf(stderr, "\t--%-27s 0%03o (0%03o if `--readOnly')\n", "fileMode", S3BACKER_DEFAULT_FILE_MODE, S3BACKER_DEFAULT_FILE_MODE_READ_ONLY); fprintf(stderr, "\t--%-27s %u\n", "maxRetryPause", S3BACKER_DEFAULT_MAX_RETRY_PAUSE); fprintf(stderr, "\t--%-27s %u\n", "minWriteDelay", S3BACKER_DEFAULT_MIN_WRITE_DELAY); fprintf(stderr, "\t--%-27s \"%s\"\n", "prefix", S3BACKER_DEFAULT_PREFIX); fprintf(stderr, "\t--%-27s %u\n", "readAhead", S3BACKER_DEFAULT_READ_AHEAD); fprintf(stderr, "\t--%-27s %u\n", "readAheadTrigger", S3BACKER_DEFAULT_READ_AHEAD_TRIGGER); fprintf(stderr, "\t--%-27s \"%s\"\n", "region", S3BACKER_DEFAULT_REGION); fprintf(stderr, "\t--%-27s \"%s\"\n", "statsFilename", S3BACKER_DEFAULT_STATS_FILENAME); fprintf(stderr, "\t--%-27s %u\n", "timeout", S3BACKER_DEFAULT_TIMEOUT); fprintf(stderr, "FUSE options (partial list):\n"); fprintf(stderr, "\t%-29s %s\n", "-o nonempty", "Allows mount over a non-empty directory"); fprintf(stderr, "\t%-29s %s\n", "-o uid=UID", "Set user ID"); fprintf(stderr, "\t%-29s %s\n", "-o gid=GID", "Set group ID"); fprintf(stderr, "\t%-29s %s\n", "-o sync_read", "Do synchronous reads"); fprintf(stderr, "\t%-29s %s\n", "-o max_readahead=NUM", "Set maximum read-ahead (bytes)"); fprintf(stderr, "\t%-29s %s\n", "-f", "Run in the foreground (do not fork)"); fprintf(stderr, "\t%-29s %s\n", "-d", "Debug mode (implies -f)"); fprintf(stderr, "\t%-29s %s\n", "-s", "Run in single-threaded mode"); } s3backer-1.5.4/s3b_config.h000066400000000000000000000060101354714241400154050ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ /* Overal application configuration info */ struct s3b_config { /* Various sub-module configurations */ struct block_cache_conf block_cache; struct fuse_ops_conf fuse_ops; struct ec_protect_conf ec_protect; struct http_io_conf http_io; /* Common/global stuff */ const char *accessFile; const char *mount; char description[768]; u_int block_size; off_t file_size; off_t num_blocks; int debug; int erase; int reset; int quiet; int force; int test; int ssl; int no_auto_detect; int list_blocks; struct fuse_args fuse_args; log_func_t *log; /* These are only used during command line parsing */ const char *file_size_str; const char *block_size_str; const char *password_file; const char *max_speed_str[2]; int encrypt; }; extern struct s3b_config *s3backer_get_config(int argc, char **argv); extern struct s3backer_store *s3backer_create_store(struct s3b_config *config); s3backer-1.5.4/s3backer.1000066400000000000000000001076341354714241400150150ustar00rootroot00000000000000.\" -*- nroff -*- .\" .\" s3backer - FUSE-based single file backing store via Amazon S3 .\" .\" Copyright 2008-2011 Archie L. Cobbs .\" .\" This program is free software; you can redistribute it and/or .\" modify it under the terms of the GNU General Public License .\" as published by the Free Software Foundation; either version 2 .\" of the License, or (at your option) any later version. .\" .\" This program is distributed in the hope that it will be useful, .\" but WITHOUT ANY WARRANTY; without even the implied warranty of .\" MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the .\" GNU General Public License for more details. .\" .\" You should have received a copy of the GNU General Public License .\" along with this program; if not, write to the Free Software .\" Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA .\" 02110-1301, USA. .\" .\" In addition, as a special exception, the copyright holders give .\" permission to link the code of portions of this program with the .\" OpenSSL library under certain conditions as described in each .\" individual source file, and distribute linked combinations including .\" the two. .\" .\" You must obey the GNU General Public License in all respects for all .\" of the code used other than OpenSSL. If you modify file(s) with this .\" exception, you may extend this exception to your version of the .\" file(s), but you are not obligated to do so. If you do not wish to do .\" so, delete this exception statement from your version. If you delete .\" this exception statement from all source files in the program, then .\" also delete it here. .\" .Dd September 7, 2009 .Dt S3BACKER 1 .Os .Sh NAME .Nm s3backer .Nd FUSE-based single file backing store via Amazon S3 .Sh SYNOPSIS .Nm s3backer .Bk -words .Op options .Ar bucket .Ar /mount/point .Ek .Pp .Nm s3backer .Bk -words .Fl \-test .Op options .Ar dir .Ar /mount/point .Ek .Pp .Nm s3backer .Bk -words .Fl \-erase .Op options .Ar bucket .Ek .Pp .Nm s3backer .Bk -words .Fl \-reset-mounted-flag .Op options .Ar bucket .Ek .Sh DESCRIPTION .Nm is a filesystem that contains a single file backed by the Amazon Simple Storage Service (Amazon S3). As a filesystem, it is very simple: it provides a single normal file having a fixed size. Underneath, the file is divided up into blocks, and the content of each block is stored in a unique Amazon S3 object. In other words, what .Nm provides is really more like an S3-backed virtual hard disk device, rather than a filesystem. .Pp In typical usage, a `normal' filesystem is mounted on top of the file exported by the .Nm filesystem using a loopback mount (or disk image mount on Mac OS X). .Pp This arrangement has several benefits compared to more complete S3 filesystem implementations: .Bl -tag -width xx .It o By not attempting to implement a complete filesystem, which is a complex undertaking and difficult to get right, .Nm can stay very lightweight and simple. Only three HTTP operations are used: GET, PUT, and DELETE. All of the experience and knowledge about how to properly implement filesystems that already exists can be reused. .It o By utilizing existing filesystems, you get full UNIX filesystem semantics. Subtle bugs or missing functionality relating to hard links, extended attributes, POSIX locking, etc. are avoided. .It o The gap between normal filesystem semantics and Amazon S3 ``eventual consistency'' is more easily and simply solved when one can interpret S3 objects as simple device blocks rather than filesystem objects (see below). .It o When storing your data on Amazon S3 servers, which are not under your control, the ability to encrypt and authenticate data becomes a critical issue. .Nm supports secure encryption and authentication. Alternately, the encryption capability built into the Linux loopback device can be used. .It o Since S3 data is accessed over the network, local caching is also very important for performance reasons. Since .Nm presents the equivalent of a virtual hard disk to the kernel, most of the filesystem caching can be done where it should be: in the kernel, via the kernel's page cache. However .Nm also includes its own internal block cache for increased performance, using asynchronous worker threads to take advantage of the parallelism inherent in the network. .El .Ss Consistency Guarantees Amazon S3 makes relatively weak guarantees relating to the timing and consistency of reads vs. writes (collectively known as ``eventual consistency''). .Nm includes logic and configuration parameters to work around these limitations, allowing the user to guarantee consistency to whatever level desired, up to and including 100% detection and avoidance of incorrect data. These are: .Bl -tag -width xx .It 1. .Nm enforces a minimum delay between consecutive PUT or DELETE operations on the same block. This ensures that Amazon S3 doesn't receive these operations out of order. .It 2. .Nm maintains an internal block MD5 checksum cache, which enables automatic detection and rejection of `stale' blocks returned by GET operations. .El .Pp This logic is configured by the following command line options: .Fl \-md5CacheSize , .Fl \-md5CacheTime , and .Fl \-minWriteDelay . .Ss Zeroed Block Optimization As a simple optimization, .Nm does not store blocks containing all zeroes; instead, they are simply deleted. Conversely, reads of non-existent blocks will contain all zeroes. In other words, the backed file is always maximally sparse. .Pp As a result, blocks do not need to be created before being used and no special initialization is necessary when creating a new filesystem. .Pp When the .Fl \-listBlocks flag is given, .Nm will list all existing blocks at startup so it knows ahead of time exactly which blocks are empty. .Ss File and Block Size Auto-Detection As a convenience, whenever the first block of the backed file is written, .Nm includes as meta-data (in the ``x-amz-meta-s3backer-filesize'' header) the total size of the file. Along with the size of the block itself, this value can be checked and/or auto-detected later when the filesystem is remounted, eliminating the need for the .Fl \-blockSize or .Fl \-size flags to be explicitly provided and avoiding accidental mis-interpretation of an existing filesystem. .Ss Block Cache .Nm includes support for an internal block cache to increase performance. The block cache cache is completely separate from the MD5 cache which only stores MD5 checksums transiently and whose sole purpose is to mitigate ``eventual consistency''. The block cache is a traditional cache containing cached data blocks. When full, clean blocks are evicted as necessary in LRU order. .Pp Reads of cached blocks will return immediately with no network traffic. Writes to the cache also return immediately and trigger an asynchronous write operation to the network via a separate worker thread. Because the kernel typically writes blocks through FUSE filesystems one at a time, performing writes asynchronously allows .Nm to take advantage of the parallelism inherent in the network, vastly improving write performance. .Pp The block cache can be configured to store the cached data in a local file instead of in memory. This permits larger cache sizes and allows .Nm to reload cached data after a restart. Reloaded data is verified via MD5 checksum with Amazon S3 before reuse. .Pp The block cache is configured by the following command line options: .Fl \-blockCacheFile , .Fl \-blockCacheMaxDirty , .Fl \-blockCacheNoVerify , .Fl \-blockCacheSize , .Fl \-blockCacheSync , .Fl \-blockCacheThreads , .Fl \-blockCacheTimeout , .Fl \-blockCacheWriteDelay , and .Fl \-blockCacheRecoverDirtyBlocks . .Ss Read Ahead .Nm implements a simple read-ahead algorithm in the block cache. When a configurable number of blocks are read in order, block cache worker threads are awoken to begin reading subsequent blocks into the block cache. Read ahead continues as long as the kernel continues reading blocks sequentially. The kernel typically requests blocks one at a time, so having multiple worker threads already reading the next few blocks improves read performance by taking advantage of the parallelism inherent in the network. .Pp Note that the kernel implements a read ahead algorithm as well; its behavior should be taken into consideration. By default, .Nm passes the .Fl o Ar max_readahead=0 option to FUSE. .Pp Read ahead is configured by the .Fl \-readAhead and .Fl \-readAheadTrigger command line options. .Ss Encryption and Authentication .Nm supports encryption via the .Fl \-encrypt , .Fl \-password , and .Fl \-passwordFile flags. When encryption is enabled, SHA1 HMAC authentication is also automatically enabled, and .Nm rejects any blocks that are not properly encrypted and signed. .Pp Encrypting at the .Nm layer is preferable to encrypting at an upper layer (e.g., at the loopback device layer), because if the data .Nm sees is already encrypted it can't optimize away zeroed blocks or do meaningful compression. .Ss Compression .Nm supports block-level compression, which minimizes transfer time and storage costs. .Pp Compression is configured via the .Fl \-compress flag. Compression is automatically enabled when encryption is enabled. .Ss Server Side Encryption .Nm supports server side encryption via the .Fl \-sse flag. .Ss Read-Only Access An Amazon S3 account is not required in order to use .Nm . The filesystem must already exist and have S3 objects with ACL's configured for public read access (see .Fl \-accessType below); users should perform the looback mount with the read-only flag (see .Xr mount 8 ) and provide the .Fl \-readOnly flag to .Nm . This mode of operation facilitates the creation of public, read-only filesystems. .Ss Simultaneous Mounts Although it functions over the network, the .Nm filesystem is not a distributed filesystem and does not support simultaneous read/write mounts. (This is not something you would normally do with a hard-disk partition either.) As a safety measure, .Nm attempts to detect this situation using an 'already mounted' flag in the data store, and will fail to start if it does. .Pp This detection may produce a false positive if a former .Nm process was not shutdown cleanly; if so, the .Fl \-reset-mounted-flag flag can be used to reset the 'already mounted' flag. But see also BUGS below. .Ss Statistics File .Nm populates the filesystem with a human-readable statistics file. Use .Fl \-statsFilename to change the name of this file (default `stats'). The statistics can be reset to zero by attempting to remove the file. .Ss Logging In normal operation .Nm will log via .Xr syslog 3 . When run with the .Fl d or .Fl f flags, .Nm will log to standard error. .Sh OPTIONS Each command line flag has two forms, for example .Fl \-accessFile=FILE and .Fl o Ar accessFile=FILE . Only the first form is shown below. Either form many be used; both are equivalent. The second form allows mount options to be specified directly in .Pa /etc/fstab and passed seamlessly to .Nm by FUSE. .Bl -tag -width Ds .It Fl \-accessFile=FILE Specify a file containing `accessID:accessKey' pairs, one per-line. Blank lines and lines beginning with a `#' are ignored. If no .Fl \-accessKey is specified, this file will be searched for the entry matching the access ID specified via .Fl \-accessId; if neither .Fl \-accessKey nor .Fl \-accessId is specified, the first entry in this file will be used. Default value is .Pa $HOME/.s3backer_passwd . .It Fl \-accessId=ID Specify Amazon S3 access ID. Specify an empty string to force no access ID. If no access ID is specified (and none is found in the access file) then .Nm will still function, but only reads of publicly available filesystems will work. .It Fl \-accessKey=KEY Specify Amazon S3 access key. To avoid publicizing this secret via the command line, use .Fl \-accessFile instead of this flag. .It Fl \-accessType=TYPE Specify the Amazon S3 access privilege ACL type for newly written blocks. The value must be one of `private', `public-read', `public-read-write', or `authenticated-read'. Default is `private'. .It Fl \-accessEC2IAM=ROLE Download access credentials and security token in JSON document form from .Bk -words .Ar http://169.254.169.254/latest/meta-data/iam/security-credentials/ROLE .Ek every five minutes. .Pp This option allows S3 credentials to be provided automatically via the specified IAM role to .Nm when running on an Amazon EC2 instance. .It Fl \-authVersion=TYPE Specify how to authenticate requests. There are two supported authentication methods: .Ar aws2 is the original AWS authentication scheme. .Ar aws4 is the newer, recommended authentication scheme. .Pp .Ar aws4 is the default setting starting in version 1.4, and is required for certain non-US regions, while .Ar aws2 may still be required by some non-Amazon S3 providers. .It Fl \-baseURL=URL Specify the base URL, which must end in a forward slash. Default is `http://s3.amazonaws.com/'. .It Fl \-blockCacheFile=FILE Specify a file in which to store cached data blocks. Without this flag, the block cache lives entirely in process memory and the cached data disappears when .Nm is stopped. The file will be created if it doesn't exist. .Pp Cache files that have been created by previous invocations of .Nm are reusable as long as they were created with the same configured block size (if not, startup will fail). This is true even if .Nm was stopped abruptly, e.g., due to a system crash; however, this guarantee rests on the assumption that the filesystem containing the cache file will not reorder writes across calls to .Xr fsync 2 . .Pp If an existing cache is used but was created with a different size, .Nm will automatically expand or shrink the file at startup. When shrinking, blocks that don't fit in the new, smaller cache are discarded. This process also compacts the cache file to the extent possible. .Pp By default, only clean cache blocks are recoverable after a restart. This means a system crash will cause dirty blocks in the cache to be lost (of course, that is the case with an in-memory cache as well). .Pp With the newer cache file format introduced in release 1.5.0, you can recover these dirty blocks by specifying the .Fl \-blockCacheRecoverDirtyBlocks option. This will cause any dirty blocks in the cache file to be made writable again on startup. If your cache file was created with a prior release of .Nm or you do not specify this option, dirty blocks in the cache file are discarded on startup. The window of this data loss can be limited by .Fl \-blockCacheWriteDelay . .Pp By default, when having reloaded the cache from a cache file, .Nm will verify the MD5 checksum of each reloaded block with Amazon S3 before its first use. This verify operation does not require actually reading the block's data, and therefore is relatively quick. This guards against the cached data having unknowingly gotten out of sync since the cache file was last used, a situation that is otherwise impossible for .Nm to detect. .It Fl \-blockCacheMaxDirty=NUM Specify a limit on the number of dirty blocks in the block cache. When this limit is reached, subsequent write attempts will block until an existing dirty block is successfully written (and therefore becomes no longer dirty). This flag limits the amount of inconsistency there can be with respect to the underlying S3 data store. .Pp The default value is zero, which means no limit. .It Fl \-blockCacheNoVerify Disable the MD5 verification of blocks loaded from a cache file specified via .Fl \-blockCacheFile . Using this flag is dangerous; use only when you are sure the cached file is uncorrupted and the data it contains is up to date. .It Fl \-blockCacheSize=SIZE Specify the block cache size (in number of blocks). Each entry in the cache will consume approximately block size plus 20 bytes. A value of zero disables the block cache. Default value is 1000. .It Fl \-blockCacheThreads=NUM Set the size of the thread pool associated with the block cache (if enabled). This bounds the number of simultaneous writes that can occur to the network. Default value is 20. .It Fl \-blockCacheTimeout=MILLIS Specify the maximum time a clean entry can remain in the block cache before it will be forcibly evicted and its associated memory freed. A value of zero means there is no timeout; in this case, the number of entries in the block cache will never decrease, eventually reaching the maximum size configured by .Fl \-blockCacheSize and staying there. Configure a non-zero value if the memory usage of the block cache is a concern. Default value is zero (no timeout). .It Fl \-blockCacheWriteDelay=MILLIS Specify the maximum time a dirty block can remain in the block cache before it must be written out to the network. Blocks may be written sooner when there is cache pressure. A value of zero configures a ``write-through'' policy; greater values configure a ``write-back'' policy. Larger values increase performance when a small number of blocks are accessed repeatedly, at the cost of greater inconsistency with the underlying S3 data store. Default value is 250 milliseconds. .It Fl \-blockCacheSync Forces synchronous writes in the block cache layer. Instead of returning immediately and scheduling the actual write to operation happen later, write requests will not return until the write has completed. This flag is a stricter requirement than .Fl \-blockCacheWriteDelay=0 , which merely causes the writes to be initiated as soon as possible (but still after the write request returns). .Pp This flag requires .Fl \-blockCacheWriteDelay to be zero. Using this flag is likely to drastically reduce write performance. .It Fl \-blockCacheRecoverDirtyBlocks An unclean dismount may leave dirty blocks (blocks written to the local cache file, but not yet flushed to S3) in the cache file. .Pp If this option is set, .Nm will recover any such dirty blocks and eventually write them back to S3. If this option is not specified, all dirty data in the cache file are discarded on startup. .Pp If the filesystem has been mounted since the cache file was last used, .Nm will refuse to mount. This is verified by checking a unique 32-bit mount token in the cache file against the 'already mounted' flag in the data store. .Pp This flag requires .Fl \-blockCacheFile to be set. .It Fl \-blockHashPrefix Prepend random prefixes (generated deterministically from the block number) to block object names. This spreads requests more evenly across the namespace, and prevents heavy access to a narrow range of blocks from all being directed to the same backend server. .Pp As with .Fl \-prefix , this flag must be used consistently once a disk image is established. .It Fl \-blockSize=SIZE Specify the block size. This must be a power of two and should be a multiple of the kernel's native page size. The size may have an optional suffix 'K' for kilobytes, 'M' for megabytes, etc. .Pp .Nm supports partial block operations, though this forces a read before each write; use of the block cache and proper alignment of the .Nm block size with the intended use (e.g., the block size of the `upper' filesystem) will help minimize the extra reads. Note that even when filesystems are configured for large block sizes, the kernel will often still write page-sized blocks. .Pp .Nm will attempt to auto-detect the block size by reading block number zero at startup. If this option is not specified, the auto-detected value will be used. If this option is specified but disagrees with the auto-detected value, .Nm will exit with an error unless .Fl \-force is also given. If auto-detection fails because block number zero does not exist, and this option is not specified, then the default value of 4K (4096) is used. .It Fl \-cacert=FILE Specify SSL certificate file to be used when verifying the remote server's identity when operating over SSL connections. Equivalent to the .Fl \-cacert flag documented in .Xr curl 1 . .It Fl \-compress[=LEVEL] Compress blocks before sending them over the network. This should result in less network traffic (in both directions) and lower storage costs. .Pp The compression level is optional; if given, it must be between 1 (fast compression) and 9 (most compression), inclusive. If omitted, the default compression level is used. .Pp This flag only enables compression of newly written blocks; decompression is always enabled and applied when appropriate. Therefore, it is safe to switch this flag on or off between different invocations of .Nm on the same filesystem. .Pp This flag is automatically enabled when .Fl \-encrypt is used, though you may also specify .Fl \-compress=LEVEL to set a non-default compression level. .Pp When using an encrypted upper layer filesystem, this flag adds no value because the data will not be compressible. .It Fl \-directIO Disable kernel caching of the backed file. This will force the kernel to always pass reads and writes directly to .Nm . This reduces performance but also eliminates one source of inconsistency. .It Fl \-debug Enable logging of debug messages. Note that this flag is different from .Fl d , which is a flag to FUSE; however, the .Fl d FUSE flag implies this flag. .It Fl \-debug-http Enable printing of HTTP headers to standard output. .It Fl \-defaultContentEncoding=VALUE Use this to workaround S3 backends that fail to send back the .Pa "Content-Encoding" header that was sent to them by .Nm . If a block read response contains no .Pa "Content-Encoding" header, this value will be substituted. .Pp If you get errors complaining that the content was expected to be encrypted, try setting this to .Pa deflate,encrypt-AES-128-CBC . .It Fl \-encrypt[=CIPHER] Enable encryption and authentication of block data. See your OpenSSL documentation for a list of supported ciphers; the default if no cipher is specified is AES-128 CBC. .Pp The encryption password may be supplied via one of .Fl \-password or .Fl \-passwordFile . If neither flag is given, .Nm will ask for the password at startup. .Pp Note: the actual key used is derived by hashing the password, the bucket name, the prefix name (if any), and the block number. Therefore, encrypted data cannot be ported to different buckets or prefixes. .Pp This flag implies .Fl \-compress . .It Fl \-erase Completely erase the file system by deleting all non-zero blocks, clear the 'already mounted' flag, and then exit. User confirmation is required unless the .Fl \-force flag is also given. Note, no simultaneous mount detection is performed in this case. .Pp This option implies .Fl \-listBlocks . .It Fl \-filename=NAME Specify the name of the backed file that appears in the .Nm filesystem. Default is `file'. .It Fl \-fileMode=MODE Specify the UNIX permission bits for the backed file that appears in the .Nm filesystem. Default is 0600, unless .Fl \-readOnly is specified, in which case the default is 0400. .It Fl \-force Proceed even if the value specified by .Fl \-blockSize or .Fl \-size disagrees with the auto-detected value, or .Nm detects that another .Nm instance is still mounted on top of the same S3 bucket (and prefix). In any of these cases, proceeding will lead to corrupted data, so the .Fl \-force flag should be avoided for normal use. .Pp The simultaneous mount detection can produce a false positive when a previous .Nm instance was not shut down cleanly. In this case, don't use .Fl \-force but rather run .Nm once with the .Fl \-reset-mounted-flag flag. .Pp If .Fl \-erase is given, .Fl \-force causes .Nm to proceed without user confirmation. .It Fl h Fl \-help Print a help message and exit. .It Fl \-initialRetryPause=MILLIS Specify the initial pause time in milliseconds before the first retry attempt after failed HTTP operations. Failures include network failures and timeouts, HTTP errors, and reads of stale data (i.e., MD5 mismatch); .Nm will make multiple retry attempts using an exponential backoff algorithm, starting with this initial retry pause time. Default value is 200ms. See also .Fl \-maxRetryPause . .It Fl \-insecure Do not verify the remote server's identity when operating over SSL connections. Equivalent to the .Fl \-insecure flag documented in .Xr curl 1 . .It Fl \-keyLength Override the length of the generated block encryption key. .Pp Versions of .Nm prior to 1.3.6 contained a bug where the length of the generated encryption key was fixed but system-dependent, causing it to be possibly incompatible on different systems for some ciphers. In version 1.3.6, this bug was corrected; however, in some cases this changed the generated key length, making the encryption no longer compatible with previously written data. This flag can be used to force the older, fixed key length. The value you want to use is whatever is defined for .Pa EVP_MAX_KEY_LENGTH on your system, typically 64. .Pp It is an error to specify a value smaller than the cipher's natural key length; however, a value of zero is allowed and is equivalent to not specifying anything. .It Fl \-listBlocks Perform a query at startup to determine which blocks already exist. This enables optimizations whereby, for each block that does not yet exist, reads return zeroes and zeroed writes are omitted, thereby eliminating any network access. This flag is useful when creating a new backed file, or any time it is expected that a large number of zeroed blocks will be read or written, such as when initializing a new filesystem. .Pp This flag will slow down startup in direct proportion to the number of blocks that already exist. .It Fl \-maxUploadSpeed=BITSPERSEC .It Fl \-maxDownloadSpeed=BITSPERSEC These flags set a limit on the bandwidth utilized for individual block uploads and downloads (i.e., the setting applies on a per-thread basis). The limits only apply to HTTP payload data and do not include any additional overhead from HTTP or TCP headers, etc. .Pp The value is measured in bits per second, and abbreviations like `256k', `1m', etc. may be used. By default, there is no fixed limit. .Pp Use of these flags may also require setting the .Fl \-timeout flag to a higher value. .It Fl \-maxRetryPause=MILLIS Specify the total amount of time in milliseconds .Nm should pause when retrying failed HTTP operations before giving up. Failures include network failures and timeouts, HTTP errors, and reads of stale data (i.e., MD5 mismatch); .Nm will make multiple retry attempts using an exponential backoff algorithm, up to this maximum total retry pause time. This value does not include the time it takes to perform the HTTP operations themselves (use .Fl \-timeout for that). Default value is 30000 (30 seconds). See also .Fl \-initialRetryPause . .It Fl \-minWriteDelay=MILLIS Specify a minimum time in milliseconds between the successful completion of a write and the initiation of another write to the same block. This delay ensures that S3 doesn't receive the writes out of order. This value must be set to zero when .Fl \-md5CacheSize is set to zero (MD5 cache disabled). Default value is 500ms. .It Fl \-md5CacheSize=SIZE Specify the size of the MD5 checksum cache (in number of blocks). If the cache is full when a new block is written, the write will block until there is room. Therefore, it is important to configure .Fl \-md5CacheTime and .Fl \-md5CacheSize according to the frequency of writes to the filesystem overall and to the same block repeatedly. Alternately, a value equal to the number of blocks in the filesystem eliminates this problem but consumes the most memory when full (each entry in the cache is approximately 40 bytes). A value of zero disables the MD5 cache. Default value is 1000. .It Fl \-md5CacheTime=MILLIS Specify in milliseconds the time after a block has been successfully written for which the MD5 checksum of the block's contents should be cached, for the purpose of detecting stale data during subsequent reads. A value of zero means `infinite' and provides a guarantee against reading stale data; however, you should only do this when .Fl \-md5CacheSize is configured to be equal to the number of blocks; otherwise deadlock will (eventually) occur. This value must be at least as big as .Fl \-minWriteDelay. This value must be set to zero when .Fl \-md5CacheSize is set to zero (MD5 cache disabled). Default value is 10 seconds. .Pp The MD5 checksum cache is not persisted across restarts. Therefore, to ensure the same eventual consistency protection while .Nm is not running, you must delay at least .Fl \-md5CacheTime milliseconds between stopping and restarting .Nm . .It Fl \-noAutoDetect Disable block and file size auto-detection at startup. If this flag is given, then the block size defaults to 4096 and the .Fl \-size flag is required. .It Fl \-password=PASSWORD Supply the password for encryption and authentication as a command-line parameter. .It Fl \-passwordFile=FILE Read the password for encryption and authentication from (the first line of) the specified file. .It Fl \-prefix=STRING Specify a prefix to prepend to the resource names within bucket that identify each block. By using different prefixes, multiple independent .Nm disks can live in the same S3 bucket. .Pp The default prefix is the empty string. .It Fl \-quiet Suppress progress output during initial startup. .It Fl \-readAhead=NUM Configure the number of blocks of read ahead. This determines how many blocks will be read into the block cache ahead of the last block read by the kernel when read ahead is active. This option has no effect if the block cache is disabled. Default value is 4. .It Fl \-readAheadTrigger=NUM Configure the number of blocks that must be read consecutively before the read ahead algorithm is triggered. Once triggered, read ahead will continue as long as the kernel continues reading blocks sequentially. This option has no effect if the block cache is disabled. Default value is 2. .It Fl \-readOnly Assume the filesystem is going to be mounted read-only, and return .Er EROFS in response to any attempt to write. This flag also changes the default mode of the backed file from 0600 to 0400 and disables the MD5 checksum cache. .It Fl \-region=REGION Specify an AWS region. This flag changes the default base URL to include the region name and automatically sets the .Fl \-vhost flag. .It Fl \-reset-mounted-flag Reset the 'already mounted' flag on the underlying S3 data store. .Pp .Nm detects simultaneous mounts by checking a special flag. If a previous invocation of .Nm was not shut down cleanly, the flag may not have been cleared. Running .Nm .Fl \-erase will clear it manually. But see also BUGS below. .Pp .It Fl \-rrs Deprecated; equivalent to .Fl \-storageClass=REDUCED_REDUNDANCY . .It Fl \-size=SIZE Specify the size (in bytes) of the backed file to be exported by the filesystem. The size may have an optional suffix 'K' for kilobytes, 'M' for megabytes, 'G' for gigabytes, 'T' for terabytes, 'E' for exabytes, 'Z' for zettabytes, or 'Y' for yottabytes. .Nm will attempt to auto-detect the size of the backed file by reading block number zero. If this option is not specified, the auto-detected value will be used. If this option is specified but disagrees with the auto-detected value, .Nm will exit with an error unless .Fl \-force is also given. .It Fl \-sse=ALGORITHM Enable server side encryption. This adds the .Pa x-amz-server-side-encryption header to all PUT requests. .Pp Currently the only supported encryption algorithm is .Pa AES256 . .It Fl \-ssl Equivalent to .Bk -words .Fl \-baseURL .Ar https://s3.amazonaws.com/ .Ek .It Fl \-statsFilename=NAME Specify the name of the human-readable statistics file that appears in the .Nm filesystem. A value of empty string disables the appearance of this file. Default is `stats'. .It Fl \-storageClass=TYPE Specify storage class. .Pp Valid values are: .Pa STANDARD , .Pa STANDARD_IA , and .Pa REDUCED_REDUNDANCY . .Pp The default is .Pa STANDARD . .It Fl \-test Operate in local test mode. Filesystem blocks are stored as regular files in the directory .Ar dir . No network traffic occurs. .Pp Note if .Ar dir is a relative pathname (and .Fl f is not given) it will be resolved relative to the root directory. .It Fl \-timeout=SECONDS Specify a time limit in seconds for one HTTP operation attempt. This limits the entire operation including connection time (if not already connected) and data transfer time. The default is 30 seconds; this value may need to be adjusted upwards to avoid premature timeouts on slower links and/or when using a large number of block cache worker threads. .Pp See also .Fl \-maxRetryPause . .It Fl \-version Output version and exit. .It Fl \-vhost Force virtual hosted style requests. For example, this will cause .Nm to use the URL .Pa http://mybucket.s3.amazonaws.com/path/uri instead of .Pa http://s3.amazonaws.com/mybucket/path/uri . .Pp This flag is required when S3 buckets have been created with location constraints (for example `EU buckets'). Put another way, this flag is required for buckets defined outside of the US region. This flag is automatically set when the .Fl \-region flag is used. .El .Pp In addition, .Nm accepts all of the generic FUSE options as well. Here is a partial list: .Bl -tag -width Ds .It Fl o Ar uid=UID Override the user ID of the backed file, which defaults to the current user ID. .It Fl o Ar gid=GID Override the group ID of the backed file, which defaults to the current group ID. .It Fl o Ar sync_read Do synchronous reads. .It Fl o Ar max_readahead=NUM Set maximum read-ahead (in bytes). .It Fl f Run in the foreground (do not fork). Causes logging to be sent to standard error. .It Fl d Enable FUSE debug mode. Implies .Fl f . .It Fl s Run in single-threaded mode. .El .Pp In addition, .Nm passes the following flags which are optimized for .Nm to FUSE (unless overridden by the user on the command line): .Pp .Bl -tag -width Ds -compact .It Fl o Ar kernel_cache .It Fl o Ar fsname=/ .It Fl o Ar subtype=s3backer .It Fl o Ar use_ino .It Fl o Ar entry_timeout=31536000 .It Fl o Ar negative_timeout=31536000 .It Fl o Ar max_readahead=0 .It Fl o Ar attr_timeout=0 .It Fl o Ar default_permissions .It Fl o Ar allow_other .It Fl o Ar nodev .It Fl o Ar nosuid .El .Sh FILES .Bl -tag -compact -width Ds .It Pa $HOME/.s3backer_passwd Contains Amazon S3 `accessID:accessKey' pairs. .El .Sh SEE ALSO .Xr curl 1 , .Xr losetup 8 , .Xr mount 8 , .Xr umount 8 , .Xr fusermount 8 . .Rs .%T "s3backer: FUSE-based single file backing store via Amazon S3" .%O https://github.com/archiecobbs/s3backer .Re .Rs .%T "Amazon Simple Storage Service (Amazon S3)" .%O http://aws.amazon.com/s3 .Re .Rs .%T "FUSE: Filesystem in Userspace" .%O http://fuse.sourceforge.net/ .Re .Rs .%T "MacFUSE: A User-Space File System Implementation Mechanism for Mac OS X" .%O http://code.google.com/p/macfuse/ .Re .Rs .%T "FUSE for OS X" .%O https://osxfuse.github.io/ .Re .Rs .%T "Google Search for `linux page cache'" .%O http://www.google.com/search?q=linux+page+cache .Re .Sh BUGS Due to a design flaw in FUSE, an unmount of the .Nm filesystem will complete successfully before .Nm has finished writing back all dirty blocks. Therefore, when using the block cache, attempts to remount the same bucket and prefix may fail with an 'already mounted' error while the former .Nm process finishes flushing its cache. Before assuming a false positive and using .Fl \-reset-mounted-flag, ensure that any previous .Nm process attached to the same bucket and prefix has exited. See issue #40 for details. .Pp For cache space efficiency, .Nm uses 32 bit values to index individual blocks. Therefore, the block size must be increased beyond the default 4K when very large filesystems (greater than 16 terabytes) are created. .Pp .Nm should really be implemented as a device rather than a filesystem. However, this would require writing a kernel module instead of a simple user-space daemon, because Linux does not provide a user-space API for devices like it does for filesystems with FUSE. Implementing .Nm as a filesystem and then using the loopback mount is a simple workaround. .Pp On Mac OS X, the kernel imposes its own timeout (600 seconds) on FUSE operations, and automatically unmounts the filesystem when this limit is reached. This can happen when a combination of .Fl \-maxRetryPause and/or .Fl \-timeout settings allow HTTP retries to take longer than this value. A warning is emitted on startup in this case. .Pp Filesystem size is limited by the maximum allowable size of a single file. .Pp The default block size of 4k is non-optimal from a compression and cost perspective. Typically, users will want a larger value to maximize compression and minimize transaction costs, e.g., 1m. .Sh AUTHOR .An Archie L. Cobbs Aq archie@dellroad.org s3backer-1.5.4/s3backer.h000066400000000000000000000210161354714241400150710ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "config.h" #include #include #include /* Add some queue.h definitions missing on Linux */ #ifndef LIST_FIRST #define LIST_FIRST(head) ((head)->lh_first) #endif #ifndef LIST_NEXT #define LIST_NEXT(item, field) ((item)->field.le_next) #endif #ifndef TAILQ_FIRST #define TAILQ_FIRST(head) ((head)->tqh_first) #endif #ifndef TAILQ_NEXT #define TAILQ_NEXT(item, field) ((item)->field.tqe_next) #endif #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #ifndef FUSE_OPT_KEY_DISCARD #define FUSE_OPT_KEY_DISCARD -4 #endif /* In case we don't have glibc >= 2.18 */ #ifndef FALLOC_FL_KEEP_SIZE #define FALLOC_FL_KEEP_SIZE 0x01 #endif #ifndef FALLOC_FL_PUNCH_HOLE #define FALLOC_FL_PUNCH_HOLE 0x02 #endif /* * Integral type for holding a block number. */ typedef uint32_t s3b_block_t; /* * How many hex digits we will use to print a block number. */ #define S3B_BLOCK_NUM_DIGITS ((int)(sizeof(s3b_block_t) * 2)) /* Logging function type */ typedef void log_func_t(int level, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 2, 3))); /* Block list callback function type */ typedef void block_list_func_t(void *arg, s3b_block_t block_num); /* Block write cancel check function type */ typedef int check_cancel_t(void *arg, s3b_block_t block_num); /* Backing store instance structure */ struct s3backer_store { /* * Create any background pthreads that may be required. * * This must be invoked prior to any of the following functions: * * o block_read * o block_read_part * o block_write * o block_write_part * * It should be invoked after the initial process fork() because it may create pthreads. * * Returns: * * 0 Success * Other Other error */ int (*create_threads)(struct s3backer_store *s3b); /* * Get meta-data associated with the underlying store. * * The information we acquire is: * o Block size * o Total size * * Returns: * * 0 Success * ENOENT Information not found * Other Other error */ int (*meta_data)(struct s3backer_store *s3b, off_t *file_sizep, u_int *block_sizep); /* * Read and (optionally) set the mount token. The mount token is any 32 bit integer value greater than zero. * * Previous value, if any, is returned in *old_valuep (if not NULL). A returned value of zero means there was * no previous value. * * new_value can be: * < 0 Don't change anything, just read the existing value, if any * = 0 Clear the flag * > 0 Set flag to new_value * * Returns zero on success or a (positive) errno value on error. */ int (*set_mount_token)(struct s3backer_store *s3b, int32_t *old_valuep, int32_t new_value); /* * Read one block. Never-written-to blocks will return all zeroes. * * If not NULL, 'actual_md5' should be filled in with a value suitable for the 'expect_md5' parameter, * or all zeroes if unknown. * * If 'expect_md5' is not NULL: * - expect_md5 should be the value returned from a previous call to read_block() or write_block(). * - If strict != 0, expect_md5 must be the value returned from the most recent call to write_block() * and the data must match it or else an error is returned. Aside from this check, read normally. * - If strict == 0: * - If block's MD5 does not match expect_md5, expect_md5 is ignored and the block is read normally * - If block's MD5 matches expect_md5, the implementation may either: * - Ignore expect_md5 and read the block normally; OR * - Return EEXIST; the block may or may not also be read normally into *dest * * Returns zero on success or a (positive) errno value on error. * May return ENOTCONN if create_threads() has not yet been invoked. */ int (*read_block)(struct s3backer_store *s3b, s3b_block_t block_num, void *dest, u_char *actual_md5, const u_char *expect_md5, int strict); /* * Read part of one block. * * Returns zero on success or a (positive) errno value on error. * May return ENOTCONN if create_threads() has not yet been invoked. */ int (*read_block_part)(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, void *dest); /* * Write one block. * * Passing src == NULL is equivalent to passing a block containing all zeroes. * * If check_cancel != NULL, then it may be invoked periodically during the write. If so, and it ever * returns a non-zero value, then this function may choose to abort the write and return ECONNABORTED. * * Upon successful return, md5 (if not NULL) will get updated with a value suitable for the 'expect_md5' * parameter of read_block(); if the block is all zeroes, md5 will be zeroed. * * Returns zero on success or a (positive) errno value on error. * May return ENOTCONN if create_threads() has not yet been invoked. */ int (*write_block)(struct s3backer_store *s3b, s3b_block_t block_num, const void *src, u_char *md5, check_cancel_t *check_cancel, void *arg); /* * Write part of one block. * * Returns zero on success or a (positive) errno value on error. * May return ENOTCONN if create_threads() has not yet been invoked. */ int (*write_block_part)(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, const void *src); /* * Identify all non-zero blocks. * * Returns zero on success or a (positive) errno value on error. */ int (*list_blocks)(struct s3backer_store *s3b, block_list_func_t *callback, void *arg); /* * Sync any dirty data to the underlying data store. */ int (*flush)(struct s3backer_store *s3b); /* * Destroy this instance. */ void (*destroy)(struct s3backer_store *s3b); /* * Implementation private data */ void *data; }; /* gitrev.c */ extern const char *const s3backer_version; /* Issue #64 OpenSSL 1.1.0 compatibility - sslcompat.c */ #if OPENSSL_VERSION_NUMBER < 0x10100000L HMAC_CTX *HMAC_CTX_new(void); void HMAC_CTX_free(HMAC_CTX *ctx); EVP_MD_CTX *EVP_MD_CTX_new(void); void EVP_MD_CTX_free(EVP_MD_CTX *ctx); #endif s3backer-1.5.4/s3backer.spec.in000066400000000000000000000061031354714241400162010ustar00rootroot00000000000000# # Copyright 2008 Archie L. Cobbs. # # Licensed under the Apache License, Version 2.0 (the "License"); you may # not use this file except in compliance with the License. You may obtain # a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, WITHOUT # WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the # License for the specific language governing permissions and limitations # under the License. # # In addition, as a special exception, the copyright holders give # permission to link the code of portions of this program with the # OpenSSL library under certain conditions as described in each # individual source file, and distribute linked combinations including # the two. # # You must obey the GNU General Public License in all respects for all # of the code used other than OpenSSL. If you modify file(s) with this # exception, you may extend this exception to your version of the # file(s), but you are not obligated to do so. If you do not wish to do # so, delete this exception statement from your version. If you delete # this exception statement from all source files in the program, then # also delete it here. # Name: s3backer Version: @PACKAGE_VERSION@ Release: 1 License: GNU General Public License, Version 2 Summary: FUSE-based single file backing store via Amazon S3 Group: System/Filesystems Source: https://s3.amazonaws.com/archie-public/%{name}/%{name}-%{version}.tar.gz URL: https://github.com/archiecobbs/%{name} BuildRoot: %{_tmppath}/%{name}-%{version}-root %if 0%{?suse_version} >= 1100 BuildRequires: libcurl-devel >= 7.16.2 BuildRequires: libopenssl-devel %else BuildRequires: curl-devel >= 7.16.2 BuildRequires: openssl-devel %endif BuildRequires: fuse-devel >= 2.5 BuildRequires: zlib-devel %if 0%{?suse_version} < 1000 || 0%{?fedora_version} != 0 || 0%{?centos_version} != 0 BuildRequires: expat %else BuildRequires: libexpat-devel %endif BuildRequires: pkgconfig %description s3backer is a filesystem that contains a single file backed by the Amazon Simple Storage Service (Amazon S3). As a filesystem, it is very simple: it provides a single normal file having a fixed size. Underneath, the file is divided up into blocks, and the content of each block is stored in a unique Amazon S3 object. In other words, what s3backer provides is really more like an S3-backed virtual hard disk device, rather than a filesystem. In typical usage, a `normal' filesystem is mounted on top of the file exported by the s3backer filesystem using a loopback mount (or disk image mount on Mac OS X). %prep %setup -q %build %{configure} make %install rm -rf ${RPM_BUILD_ROOT} %{makeinstall} %files %attr(0755,root,root) %{_bindir}/%{name} %if 0%{?mandriva_version} %attr(0644,root,root) %{_mandir}/man1/%{name}.1 %else %attr(0644,root,root) %{_mandir}/man1/%{name}.1.gz %endif %defattr(0644,root,root,0755) %doc %{_datadir}/doc/packages/%{name} s3backer-1.5.4/sslcompat.c000066400000000000000000000051001354714241400153700ustar00rootroot00000000000000/* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "s3backer.h" /* Issue #64 OpenSSL 1.1.0 compatibility */ #if OPENSSL_VERSION_NUMBER < 0x10100000L /* * OpenSSL does not allow for HMAC_CTX or EVP_MD_CTX to be allocated on the * stack. Instead it provides a set of _new and _free functions for dynamic * allocation that do not exist in the older versions of the library. For * older OpenSSL versions we provide our own implementations of these missing * functions. */ HMAC_CTX *HMAC_CTX_new(void) { HMAC_CTX *ctx = OPENSSL_malloc(sizeof(*ctx)); if (ctx != NULL) { HMAC_CTX_init(ctx); } return ctx; } void HMAC_CTX_free(HMAC_CTX *ctx) { if (ctx != NULL) { HMAC_CTX_cleanup(ctx); OPENSSL_free(ctx); } } EVP_MD_CTX *EVP_MD_CTX_new(void) { EVP_MD_CTX *ctx = OPENSSL_malloc(sizeof(*ctx)); if (NULL != ctx) { EVP_MD_CTX_init(ctx); } return ctx; } void EVP_MD_CTX_free(EVP_MD_CTX *ctx) { if (ctx != NULL) { EVP_MD_CTX_cleanup(ctx); OPENSSL_free(ctx); } } #endif s3backer-1.5.4/test_io.c000066400000000000000000000324071354714241400150430ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "s3backer.h" #include "http_io.h" #include "block_part.h" #include "test_io.h" /* Do we want random errors? */ #define RANDOM_ERROR_PERCENT 0 /* Internal state */ struct test_io_private { struct http_io_conf *config; u_char zero_block[0]; }; /* s3backer_store functions */ static int test_io_create_threads(struct s3backer_store *s3b); static int test_io_meta_data(struct s3backer_store *s3b, off_t *file_sizep, u_int *block_sizep); static int test_io_set_mount_token(struct s3backer_store *s3b, int32_t *old_valuep, int32_t new_value); static int test_io_read_block(struct s3backer_store *s3b, s3b_block_t block_num, void *dest, u_char *actual_md5, const u_char *expect_md5, int strict); static int test_io_write_block(struct s3backer_store *s3b, s3b_block_t block_num, const void *src, u_char *md5, check_cancel_t *check_cancel, void *check_cancel_arg); static int test_io_read_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, void *dest); static int test_io_write_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, const void *src); static int test_io_list_blocks(struct s3backer_store *s3b, block_list_func_t *callback, void *arg); static int test_io_flush(struct s3backer_store *s3b); static void test_io_destroy(struct s3backer_store *s3b); /* * Constructor * * On error, returns NULL and sets `errno'. */ struct s3backer_store * test_io_create(struct http_io_conf *config) { struct s3backer_store *s3b; struct test_io_private *priv; /* Initialize structures */ if ((s3b = calloc(1, sizeof(*s3b))) == NULL) return NULL; s3b->create_threads = test_io_create_threads; s3b->meta_data = test_io_meta_data; s3b->set_mount_token = test_io_set_mount_token; s3b->read_block = test_io_read_block; s3b->write_block = test_io_write_block; s3b->read_block_part = test_io_read_block_part; s3b->write_block_part = test_io_write_block_part; s3b->list_blocks = test_io_list_blocks; s3b->flush = test_io_flush; s3b->destroy = test_io_destroy; if ((priv = calloc(1, sizeof(*priv) + config->block_size)) == NULL) { free(s3b); errno = ENOMEM; return NULL; } priv->config = config; s3b->data = priv; /* Random initialization */ srandom((u_int)time(NULL)); /* Done */ return s3b; } static int test_io_create_threads(struct s3backer_store *s3b) { return 0; } static int test_io_meta_data(struct s3backer_store *s3b, off_t *file_sizep, u_int *block_sizep) { return 0; } static int test_io_set_mount_token(struct s3backer_store *s3b, int32_t *old_valuep, int32_t new_value) { if (old_valuep != NULL) *old_valuep = 0; return 0; } static int test_io_flush(struct s3backer_store *const s3b) { return 0; } static void test_io_destroy(struct s3backer_store *const s3b) { struct test_io_private *const priv = s3b->data; free(priv); free(s3b); } static int test_io_read_block(struct s3backer_store *const s3b, s3b_block_t block_num, void *dest, u_char *actual_md5, const u_char *expect_md5, int strict) { struct test_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; u_char md5[MD5_DIGEST_LENGTH]; char path[PATH_MAX]; int zero_block; MD5_CTX ctx; int fd; int r; /* Logging */ if (config->debug) (*config->log)(LOG_DEBUG, "test_io: read %0*jx started", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); /* Random delay */ usleep((random() % 200) * 1000); /* Random error */ if ((random() % 100) < RANDOM_ERROR_PERCENT) { (*config->log)(LOG_ERR, "test_io: random failure reading %0*jx", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); return EAGAIN; } /* Generate path */ snprintf(path, sizeof(path), "%s/%s%0*jx", config->bucket, config->prefix, S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); /* Read block */ if ((fd = open(path, O_RDONLY)) != -1) { int total; /* Read file */ for (total = 0; total < config->block_size; total += r) { if ((r = read(fd, (char *)dest + total, config->block_size - total)) == -1) { r = errno; (*config->log)(LOG_ERR, "can't read %s: %s", path, strerror(r)); close(fd); return r; } if (r == 0) break; } close(fd); /* Check for short read */ if (total != config->block_size) { (*config->log)(LOG_ERR, "%s: file is truncated (only read %d out of %u bytes)", path, total, config->block_size); return EIO; } /* Done */ r = 0; } else r = errno; /* Convert ENOENT into a read of all zeroes */ if ((zero_block = (r == ENOENT))) { memset(dest, 0, config->block_size); r = 0; } /* Check for other error */ if (r != 0) { (*config->log)(LOG_ERR, "can't open %s: %s", path, strerror(r)); return r; } /* Compute MD5 */ if (zero_block) memset(md5, 0, MD5_DIGEST_LENGTH); else { MD5_Init(&ctx); MD5_Update(&ctx, dest, config->block_size); MD5_Final(md5, &ctx); } if (actual_md5 != NULL) memcpy(actual_md5, md5, MD5_DIGEST_LENGTH); /* Check expected MD5 */ if (expect_md5 != NULL) { const int match = memcmp(md5, expect_md5, MD5_DIGEST_LENGTH) == 0; if (strict) { if (!match) { (*config->log)(LOG_ERR, "%s: wrong MD5 checksum?! %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x" " != %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x", path, (u_int)md5[0], (u_int)md5[1], (u_int)md5[2], (u_int)md5[3], (u_int)md5[4], (u_int)md5[5], (u_int)md5[6], (u_int)md5[7], (u_int)md5[8], (u_int)md5[9], (u_int)md5[10], (u_int)md5[11], (u_int)md5[12], (u_int)md5[13], (u_int)md5[14], (u_int)md5[15], (u_int)expect_md5[0], (u_int)expect_md5[1], (u_int)expect_md5[2], (u_int)expect_md5[3], (u_int)expect_md5[4], (u_int)expect_md5[5], (u_int)expect_md5[6], (u_int)expect_md5[7], (u_int)expect_md5[8], (u_int)expect_md5[9], (u_int)expect_md5[10], (u_int)expect_md5[11], (u_int)expect_md5[12], (u_int)expect_md5[13], (u_int)expect_md5[14], (u_int)expect_md5[15]); return EINVAL; } } else if (match) r = EEXIST; } /* Logging */ if (config->debug) { (*config->log)(LOG_DEBUG, "test_io: read %0*jx complete, MD5 %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%s%s", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num, (u_int)md5[0], (u_int)md5[1], (u_int)md5[2], (u_int)md5[3], (u_int)md5[4], (u_int)md5[5], (u_int)md5[6], (u_int)md5[7], (u_int)md5[8], (u_int)md5[9], (u_int)md5[10], (u_int)md5[11], (u_int)md5[12], (u_int)md5[13], (u_int)md5[14], (u_int)md5[15], zero_block ? " (zero)" : "", r == EEXIST ? " (expected md5 match)" : ""); } /* Done */ return r; } static int test_io_write_block(struct s3backer_store *const s3b, s3b_block_t block_num, const void *src, u_char *caller_md5, check_cancel_t *check_cancel, void *check_cancel_arg) { struct test_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; char block_hash_buf[S3B_BLOCK_NUM_DIGITS + 2]; u_char md5[MD5_DIGEST_LENGTH]; char temp[PATH_MAX]; char path[PATH_MAX]; MD5_CTX ctx; int total; int fd; int r; /* Check for zero block */ if (src != NULL && memcmp(src, priv->zero_block, config->block_size) == 0) src = NULL; /* Compute MD5 */ if (src != NULL) { MD5_Init(&ctx); MD5_Update(&ctx, src, config->block_size); MD5_Final(md5, &ctx); } else memset(md5, 0, MD5_DIGEST_LENGTH); /* Return MD5 to caller */ if (caller_md5 != NULL) memcpy(caller_md5, md5, MD5_DIGEST_LENGTH); /* Logging */ if (config->debug) { (*config->log)(LOG_DEBUG, "test_io: write %0*jx started, MD5 %02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%s", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num, (u_int)md5[0], (u_int)md5[1], (u_int)md5[2], (u_int)md5[3], (u_int)md5[4], (u_int)md5[5], (u_int)md5[6], (u_int)md5[7], (u_int)md5[8], (u_int)md5[9], (u_int)md5[10], (u_int)md5[11], (u_int)md5[12], (u_int)md5[13], (u_int)md5[14], (u_int)md5[15], src == NULL ? " (zero block)" : ""); } /* Random delay */ usleep((random() % 200) * 1000); /* Random error */ if ((random() % 100) < RANDOM_ERROR_PERCENT) { (*config->log)(LOG_ERR, "test_io: random failure writing %0*jx", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); return EAGAIN; } /* Generate path */ http_io_format_block_hash(config, block_hash_buf, sizeof(block_hash_buf), block_num); snprintf(path, sizeof(path), "%s/%s%s%0*jx", config->bucket, config->prefix, block_hash_buf, S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); /* Delete zero blocks */ if (src == NULL) { if (unlink(path) == -1 && errno != ENOENT) { r = errno; (*config->log)(LOG_ERR, "can't unlink %s: %s", path, strerror(r)); return r; } return 0; } /* Write into temporary file */ snprintf(temp, sizeof(temp), "%s.XXXXXX", path); if ((fd = mkstemp(temp)) == -1) { r = errno; (*config->log)(LOG_ERR, "%s: %s", temp, strerror(r)); return r; } for (total = 0; total < config->block_size; total += r) { if ((r = write(fd, (const char *)src + total, config->block_size - total)) == -1) { r = errno; (*config->log)(LOG_ERR, "can't write %s: %s", temp, strerror(r)); close(fd); (void)unlink(temp); return r; } } close(fd); /* Rename file */ if (rename(temp, path) == -1) { r = errno; (*config->log)(LOG_ERR, "can't rename %s: %s", temp, strerror(r)); (void)unlink(temp); return r; } /* Logging */ if (config->debug) (*config->log)(LOG_DEBUG, "test_io: write %0*jx complete", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); /* Done */ return 0; } static int test_io_read_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, void *dest) { struct test_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; return block_part_read_block_part(s3b, block_num, config->block_size, off, len, dest); } static int test_io_write_block_part(struct s3backer_store *s3b, s3b_block_t block_num, u_int off, u_int len, const void *src) { struct test_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; return block_part_write_block_part(s3b, block_num, config->block_size, off, len, src); } static int test_io_list_blocks(struct s3backer_store *s3b, block_list_func_t *callback, void *arg) { struct test_io_private *const priv = s3b->data; struct http_io_conf *const config = priv->config; s3b_block_t block_num; struct dirent *dent; DIR *dir; int i; /* Open directory */ if ((dir = opendir(config->bucket)) == NULL) return errno; /* Scan directory */ for (i = 0; (dent = readdir(dir)) != NULL; i++) { if (http_io_parse_block(config, dent->d_name, &block_num) == 0) (*callback)(arg, block_num); } /* Close directory */ closedir(dir); /* Done */ return 0; } s3backer-1.5.4/test_io.h000066400000000000000000000032431354714241400150440ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ /* test_io.c */ extern struct s3backer_store *test_io_create(struct http_io_conf *config); s3backer-1.5.4/tester.c000066400000000000000000000167321354714241400147060ustar00rootroot00000000000000 /* * s3backer - FUSE-based single file backing store via Amazon S3 * * Copyright 2008-2011 Archie L. Cobbs * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA * 02110-1301, USA. * * In addition, as a special exception, the copyright holders give * permission to link the code of portions of this program with the * OpenSSL library under certain conditions as described in each * individual source file, and distribute linked combinations including * the two. * * You must obey the GNU General Public License in all respects for all * of the code used other than OpenSSL. If you modify file(s) with this * exception, you may extend this exception to your version of the * file(s), but you are not obligated to do so. If you do not wish to do * so, delete this exception statement from your version. If you delete * this exception statement from all source files in the program, then * also delete it here. */ #include "s3backer.h" #include "block_cache.h" #include "ec_protect.h" #include "fuse_ops.h" #include "http_io.h" #include "s3b_config.h" /* Definitions */ #define NUM_THREADS 10 #define DELAY_BASE 0 #define DELAY_RANGE 50 #define READ_FACTOR 2 #define ZERO_FACTOR 3 /* Block states */ struct block_state { u_int writing; // block is currently being written by a thread u_int counter; // counts writes to the block u_int content; // most recently written content }; /* Internal functions */ static void *thread_main(void *arg); static void logit(int id, const char *fmt, ...) __attribute__ ((__format__ (__printf__, 2, 3))); static uint64_t get_time(void); /* Internal variables */ static void *zero_block; static pthread_mutex_t mutex; static struct s3b_config *config; static struct s3backer_store *store; static struct block_state *blocks; static uint64_t start_time; int main(int argc, char **argv) { pthread_t thread; int i; int r; /* Get configuration */ if ((config = s3backer_get_config(argc, argv)) == NULL) exit(1); if (config->block_size < sizeof(u_int)) err(1, "block size too small"); /* Open store */ if ((store = s3backer_create_store(config)) == NULL) err(1, "s3backer_create_store"); /* Allocate block states */ if ((blocks = calloc(config->num_blocks, sizeof(*blocks))) == NULL) err(1, "calloc"); /* Create zero block */ if ((zero_block = calloc(1, config->block_size)) == NULL) err(1, "calloc"); /* Random initialization */ srandom((u_int)time(NULL)); pthread_mutex_init(&mutex, NULL); start_time = get_time(); /* Zero all blocks */ for (i = 0; i < config->num_blocks; i++) { printf("zeroing block %0*jx\n", S3B_BLOCK_NUM_DIGITS, (uintmax_t)i); if ((r = (*store->write_block)(store, i, zero_block, NULL, NULL, NULL)) != 0) err(1, "write error"); } /* Create threads */ for (i = 0; i < NUM_THREADS; i++) pthread_create(&thread, NULL, thread_main, (void *)(intptr_t)i); /* Run for a day */ sleep(24 * 60 * 60); return 0; } static void * thread_main(void *arg) { const int id = (int)(intptr_t)arg; u_char data[config->block_size]; s3b_block_t block_num; int millis; int r; /* Loop */ while (1) { // Sleep millis = DELAY_BASE + (random() % DELAY_RANGE); usleep(millis * 1000); // Pick a random block block_num = random() % config->num_blocks; // Randomly read or write it if ((random() % READ_FACTOR) != 0) { struct block_state *const state = &blocks[block_num]; struct block_state before; struct block_state after; // Snapshot block state pthread_mutex_lock(&mutex); memcpy(&before, state, sizeof(before)); pthread_mutex_unlock(&mutex); // Do the read logit(id, "rd %0*jx START\n", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); if ((r = (*store->read_block)(store, block_num, data, NULL, NULL, 0)) != 0) { logit(id, "****** READ ERROR: %s", strerror(r)); continue; } // Snapshot block state again pthread_mutex_lock(&mutex); memcpy(&after, state, sizeof(before)); pthread_mutex_unlock(&mutex); // Verify content, but only if no write occurred while we were reading if (before.writing == 0 && after.writing == 0 && before.counter == after.counter) { if (memcmp(data, &before.content, sizeof(before.content)) != 0) { logit(id, "got wrong content block %0*jx", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num); exit(1); } } logit(id, "rd %0*jx content=0x%02x%02x%02x%02x COMPLETE\n", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num, data[0], data[1], data[2], data[3]); } else { struct block_state *const state = &blocks[block_num]; u_int content; // Update block state pthread_mutex_lock(&mutex); if (state->writing) { // only one writer at a time pthread_mutex_unlock(&mutex); continue; } state->writing = 1; pthread_mutex_unlock(&mutex); // Write block content = (random() % ZERO_FACTOR) != 0 ? 0 : (u_int)random(); memcpy(data, &content, sizeof(content)); memset(data + sizeof(content), 0, config->block_size - sizeof(content)); logit(id, "wr %0*jx content=0x%02x%02x%02x%02x START\n", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num, data[0], data[1], data[2], data[3]); if ((r = (*store->write_block)(store, block_num, data, NULL, NULL, NULL)) != 0) logit(id, "****** WRITE ERROR: %s", strerror(r)); logit(id, "wr %0*jx content=0x%02x%02x%02x%02x %s%s\n", S3B_BLOCK_NUM_DIGITS, (uintmax_t)block_num, data[0], data[1], data[2], data[3], r != 0 ? "FAILED: " : "COMPLETE", r != 0 ? strerror(r) : ""); // Update block state pthread_mutex_lock(&mutex); if (r == 0) { state->counter++; state->content = content; } state->writing = 0; pthread_mutex_unlock(&mutex); } } } static void logit(int id, const char *fmt, ...) { uint64_t timestamp = get_time() - start_time; va_list args; printf("%u.%03u [%02d] ", (u_int)(timestamp / 1000), (u_int)(timestamp % 1000), id); va_start(args, fmt); vfprintf(stdout, fmt, args); va_end(args); } static uint64_t get_time(void) { struct timeval tv; gettimeofday(&tv, NULL); return (uint64_t)tv.tv_sec * 1000 + (uint64_t)tv.tv_usec / 1000; }