pax_global_header00006660000000000000000000000064132411424630014512gustar00rootroot0000000000000052 comment=5c2cb51f9e29f7ac1ae509681e5a4b4014404c5b s3transfer-0.1.13/000077500000000000000000000000001324114246300136665ustar00rootroot00000000000000s3transfer-0.1.13/.changes/000077500000000000000000000000001324114246300153545ustar00rootroot00000000000000s3transfer-0.1.13/.changes/0.0.1.json000066400000000000000000000002671324114246300167100ustar00rootroot00000000000000[ { "category": "manager", "description": "Add boto3 s3 transfer logic to package. (`issue 2 `__)", "type": "feature" } ] s3transfer-0.1.13/.changes/0.1.0.json000066400000000000000000000015271324114246300167100ustar00rootroot00000000000000[ { "category": "copy", "description": "Add support for managed copies.", "type": "feature" }, { "category": "download", "description": "Add support for downloading to a filename, seekable file-like object, and nonseekable file-like object.", "type": "feature" }, { "category": "general", "description": "Add ``TransferManager`` class. All public functionality for ``s3transfer`` is exposed through this class.", "type": "feature" }, { "category": "subscribers", "description": "Add subscriber interface. Currently supports on_queued, on_progress, and on_done status changes.", "type": "feature" }, { "category": "upload", "description": "Add support for uploading a filename, seekable file-like object, and nonseekable file-like object.", "type": "feature" } ]s3transfer-0.1.13/.changes/0.1.1.json000066400000000000000000000002751324114246300167100ustar00rootroot00000000000000[ { "category": "deadlock", "description": "Fix deadlock issue described here: https://bugs.python.org/issue20319 with using concurrent.futures.wait", "type": "bugfix" } ]s3transfer-0.1.13/.changes/0.1.10.json000066400000000000000000000002411324114246300167610ustar00rootroot00000000000000[ { "category": "``TransferManager``", "description": "Expose ability to use own executor class for ``TransferManager``", "type": "feature" } ]s3transfer-0.1.13/.changes/0.1.11.json000066400000000000000000000003161324114246300167650ustar00rootroot00000000000000[ { "category": "TransferManager", "description": "Properly handle unicode exceptions in the context manager. Fixes `#85 `__", "type": "bugfix" } ]s3transfer-0.1.13/.changes/0.1.12.json000066400000000000000000000002751324114246300167720ustar00rootroot00000000000000[ { "category": "``max_bandwidth``", "description": "Add ability to set maximum bandwidth consumption for streaming of S3 uploads and downloads", "type": "enhancement" } ]s3transfer-0.1.13/.changes/0.1.13.json000066400000000000000000000003431324114246300167670ustar00rootroot00000000000000[ { "category": "``RequestPayer``", "description": "Plumb ``RequestPayer` argument to the ``CompleteMultipartUpload` operation (`#103 `__).", "type": "bugfix" } ]s3transfer-0.1.13/.changes/0.1.2.json000066400000000000000000000002431324114246300167040ustar00rootroot00000000000000[ { "category": "download", "description": "Patch memory leak related to unnecessarily holding onto futures for downloads.", "type": "bugfix" } ]s3transfer-0.1.13/.changes/0.1.3.json000066400000000000000000000005161324114246300167100ustar00rootroot00000000000000[ { "category": "delete", "description": "Add a ``.delete()`` method to the transfer manager.", "type": "feature" }, { "category": "seekable upload", "description": "Fix issue where seeked position of seekable file for a nonmultipart upload was not being taken into account.", "type": "bugfix" } ]s3transfer-0.1.13/.changes/0.1.4.json000066400000000000000000000004521324114246300167100ustar00rootroot00000000000000[ { "category": "chunksize", "description": "Automatically adjust the chunksize if it doesn't meet S3s requirements.", "type": "feature" }, { "category": "Download", "description": "Add support for downloading to special UNIX file by name", "type": "bugfix" } ]s3transfer-0.1.13/.changes/0.1.5.json000066400000000000000000000004031324114246300167050ustar00rootroot00000000000000[ { "category": "Cntrl-C", "description": "Fix issue of hangs when Cntrl-C happens for many queued transfers", "type": "bugfix" }, { "category": "cancel", "description": "Expose messages for cancels", "type": "feature" } ]s3transfer-0.1.13/.changes/0.1.6.json000066400000000000000000000002241324114246300167070ustar00rootroot00000000000000[ { "category": "download", "description": "Fix issue where S3 Object was not downloaded to disk when empty", "type": "bugfix" } ]s3transfer-0.1.13/.changes/0.1.7.json000066400000000000000000000002461324114246300167140ustar00rootroot00000000000000[ { "category": "TransferManager", "description": "Fix memory leak when using same client to create multiple TransferManagers", "type": "bugfix" } ]s3transfer-0.1.13/.changes/0.1.8.json000066400000000000000000000001631324114246300167130ustar00rootroot00000000000000[ { "category": "download", "description": "Support downloading to FIFOs.", "type": "feature" } ]s3transfer-0.1.13/.changes/0.1.9.json000066400000000000000000000002261324114246300167140ustar00rootroot00000000000000[ { "category": "``TransferFuture``", "description": "Add support for setting exceptions on transfer future", "type": "feature" } ] s3transfer-0.1.13/.coveragerc000066400000000000000000000000571324114246300160110ustar00rootroot00000000000000[run] branch = True include = s3transfer/* s3transfer-0.1.13/.gitignore000066400000000000000000000005641324114246300156630ustar00rootroot00000000000000*.py[co] *.DS_Store # Packages *.egg *.egg-info dist build eggs parts var sdist develop-eggs .installed.cfg # Installer logs pip-log.txt # Unit test / coverage reports .coverage .tox .cache #Translations *.mo #Mr Developer .mr.developer.cfg # Emacs backup files *~ # Eclipse IDE /.project /.pydevproject # IDEA IDE .idea* src/ # Completions Index completions.idx s3transfer-0.1.13/.travis.yml000066400000000000000000000002541324114246300160000ustar00rootroot00000000000000language: python python: - "2.6" - "2.7" - "3.3" - "3.4" - "3.5" - "3.6" sudo: false install: - python scripts/ci/install script: python scripts/ci/run-tests s3transfer-0.1.13/ACCEPTANCE_TESTS.rst000066400000000000000000000141111324114246300167660ustar00rootroot00000000000000S3 Acceptance Tests =================== List of all of the various scenarios that need to be handled in implementing a S3 transfer manager. Upload Tests ------------ General ~~~~~~~ * [x] Upload single nonmultipart file * [x] Upload single multipart file * [x] Upload multiple nonmultipart files * [x] Upload multiple multipart files * [x] Failed/cancelled multipart upload is aborted and leaves no orphaned parts especially for: * [x] Failure of ``UploadPart`` * [x] Failure of ``CompleteMultipartUpload`` * [x] Failure unrelated to making an API call during upload such as read failure * [ ] Ctrl-C of any upload does not hang and the wait time is ``avg(transfer_time_iter_chunk) * some_margin`` * [ ] Upload empty file * [ ] Upload nonseekable nonmultipart binary stream * [ ] Upload nonseekable multipart binary stream Region ~~~~~~ * [ ] Provide no or incorrect region for sig4 and be able to redirect request in fewest amount of calls as possible for multipart upload. Validation ~~~~~~~~~~ * [ ] Before upload, validate upload size of file is less than 5 TB. * [ ] Before upload, modify chunksize to an acceptable size when needed: * [ ] Make chunksize 5 MB when the provided chunksize is less * [ ] Make chunksize 5 GB when the provided chunksize is more * [ ] Increase chunksize till the maximum number of parts for multipart upload is less than or equal to 10,000 parts * [ ] Before upload, ensure upload is nonmultipart if the file size is less than 5 MB no matter the provided multipart threshold. Extra Parameters ~~~~~~~~~~~~~~~~ * [ ] Upload multipart and nonmultipart file with any of the following properties: * [x] ACL's * [x] CacheControl * [x] ContentDisposition * [x] ContentEncoding * [x] ContentLanguage * [x] ContentType * [x] Expires * [x] Metadata * [x] Grants * [x] StorageClass * [x] SSE (including KMS) * [ ] Website Redirect * [x] Upload multipart and nonmultipart file with a sse-c key * [x] Upload multipart and nonmultipart file with requester pays Performance ~~~~~~~~~~~ * [ ] Maximum memory usage does not grow linearly with linearly increasing file size for any upload. * [ ] Maximum memory usage does not grow linearly with linearly increasing number of uploads. Download Tests -------------- General ~~~~~~~ * [x] Download single nonmultipart object * [x] Download single multipart object * [x] Download multiple nonmultipart objects * [x] Download multiple multipart objects * [x] Download of any object is written to temporary file and renamed to final filename once the object is completely downloaded * [x] Failed downloads of any object cleans up temporary file * [x] Provide a transfer size for any download in lieu of using HeadObject * [ ] Ctrl-C of any download does not hang and the wait time is ``avg(transfer_time_iter_chunk) * some_margin`` * [ ] Download nonmultipart object as nonseekable binary stream * [ ] Download multipart object as nonseekable binary stream Region ~~~~~~ * [ ] Provide no or incorrect region for sig4 and be able to redirect request in fewest amount of calls as possible for multipart download. Retry Logic ~~~~~~~~~~~ * [x] Retry on connection related errors when downloading data * [ ] Compare MD5 to ``ETag`` and retry for mismatches if all following scenarios are met: * If MD5 is available * Response does not have a ``ServerSideEncryption`` header equal to ``aws:kms`` * Response does not have ``SSECustomerAlgorithm`` * ``ETag`` does not have ``-`` in its value indicating a multipart transfer Extra Parameters ~~~~~~~~~~~~~~~~ * [x] Download an object of a specific version * [x] Download an object encrypted with sse-c * [x] Download an object using requester pays Performance ~~~~~~~~~~~ * [ ] Maximum memory usage does not grow linearly with linearly increasing file size for any download. * [ ] Maximum memory usage does not grow linearly with linearly increasing number of downloads. Copy Tests ---------- General ~~~~~~~ * [x] Copy single nonmultipart object * [x] Copy single multipart object * [x] Copy multiple nonmultipart objects * [x] Copy multiple multipart objects * [x] Provide a transfer size for any copy in lieu of using HeadObject. * [x] Failed/cancelled multipart copy is aborted and leaves no orphaned parts * [ ] Ctrl-C of any copy does not hang and the wait time is ``avg(transfer_time_iter_chunk) * some_margin`` Region ~~~~~~ * [ ] Provide no or incorrect region for sig4 and be able to redirect request in fewest amount of calls as possible for multipart copy. Validation ~~~~~~~~~~ * [ ] Before copy, modify chunksize to an acceptable size when needed: * [ ] Make chunksize 5 MB when the provided chunksize is less * [ ] Make chunksize 5 GB when the provided chunksize is more * [ ] Increase chunksize till the maximum number of parts for multipart copy is less than or equal to 10,000 parts * [ ] Before copy, ensure copy is nonmultipart if the file size is less than 5 MB no matter the provided multipart threshold. Extra Parameters ~~~~~~~~~~~~~~~~ * [ ] Copy multipart and nonmultipart file with any of the following properties: * [x] ACL's * [x] CacheControl * [x] ContentDisposition * [x] ContentEncoding * [x] ContentLanguage * [x] ContentType * [x] Expires * [x] Metadata * [x] Grants * [x] StorageClass * [x] SSE (including KMS) * [ ] Website Redirect * [x] Copy multipart and nonmultipart copies with copy source parameters: * [x] CopySourceIfMatch * [x] CopySourceIfModifiedSince * [x] CopySourceIfNoneMatch * [x] CopySourceIfUnmodifiedSince * [x] Copy nonmultipart object with metadata directive and do not use metadata directive for multipart object * [x] Copy multipart and nonmultipart objects of a specific version * [x] Copy multipart and nonmultipart objects using requester pays * [x] Copy multipart and nonmultipart objects using a sse-c key * [x] Copy multipart and nonmultipart objects using a copy source sse-c key * [x] Copy multipart and nonmultipart objects using a copy source sse-c key and sse-c key Cross-Bucket ~~~~~~~~~~~~ * [ ] Copy single nonmultipart object across sigv4 regions * [ ] Copy single multipart object across sigv4 regions s3transfer-0.1.13/CHANGELOG.rst000066400000000000000000000046611324114246300157160ustar00rootroot00000000000000========= CHANGELOG ========= 0.1.13 ====== * bugfix:``RequestPayer``: Plumb ``RequestPayer` argument to the ``CompleteMultipartUpload` operation (`#103 `__). 0.1.12 ====== * enhancement:``max_bandwidth``: Add ability to set maximum bandwidth consumption for streaming of S3 uploads and downloads 0.1.11 ====== * bugfix:TransferManager: Properly handle unicode exceptions in the context manager. Fixes `#85 `__ 0.1.10 ====== * feature:``TransferManager``: Expose ability to use own executor class for ``TransferManager`` 0.1.9 ===== * feature:``TransferFuture``: Add support for setting exceptions on transfer future 0.1.8 ===== * feature:download: Support downloading to FIFOs. 0.1.7 ===== * bugfix:TransferManager: Fix memory leak when using same client to create multiple TransferManagers 0.1.6 ===== * bugfix:download: Fix issue where S3 Object was not downloaded to disk when empty 0.1.5 ===== * bugfix:Cntrl-C: Fix issue of hangs when Cntrl-C happens for many queued transfers * feature:cancel: Expose messages for cancels 0.1.4 ===== * feature:chunksize: Automatically adjust the chunksize if it doesn't meet S3s requirements. * bugfix:Download: Add support for downloading to special UNIX file by name 0.1.3 ===== * feature:delete: Add a ``.delete()`` method to the transfer manager. * bugfix:seekable upload: Fix issue where seeked position of seekable file for a nonmultipart upload was not being taken into account. 0.1.2 ===== * bugfix:download: Patch memory leak related to unnecessarily holding onto futures for downloads. 0.1.1 ===== * bugfix:deadlock: Fix deadlock issue described here: https://bugs.python.org/issue20319 with using concurrent.futures.wait 0.1.0 ===== * feature:copy: Add support for managed copies. * feature:download: Add support for downloading to a filename, seekable file-like object, and nonseekable file-like object. * feature:general: Add ``TransferManager`` class. All public functionality for ``s3transfer`` is exposed through this class. * feature:subscribers: Add subscriber interface. Currently supports on_queued, on_progress, and on_done status changes. * feature:upload: Add support for uploading a filename, seekable file-like object, and nonseekable file-like object. 0.0.1 ===== * feature:manager: Add boto3 s3 transfer logic to package. (`issue 2 `__) s3transfer-0.1.13/LICENSE.txt000066400000000000000000000261371324114246300155220ustar00rootroot00000000000000 Apache License Version 2.0, January 2004 http://www.apache.org/licenses/ TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 1. Definitions. "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: (a) You must give any other recipients of the Work or Derivative Works a copy of this License; and (b) You must cause any modified files to carry prominent notices stating that You changed the files; and (c) You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and (d) If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. END OF TERMS AND CONDITIONS APPENDIX: How to apply the Apache License to your work. To apply the Apache License to your work, attach the following boilerplate notice, with the fields enclosed by brackets "[]" replaced with your own identifying information. (Don't include the brackets!) The text should be enclosed in the appropriate comment syntax for the file format. We also recommend that a file or class name and description of purpose be included on the same "printed page" as the copyright notice for easier identification within third-party archives. Copyright [yyyy] [name of copyright owner] Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0 Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. s3transfer-0.1.13/MANIFEST.in000066400000000000000000000001211324114246300154160ustar00rootroot00000000000000include README.rst include LICENSE.txt include requirements-test.txt graft tests s3transfer-0.1.13/NOTICE.txt000066400000000000000000000001231324114246300154040ustar00rootroot00000000000000s3transfer Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. s3transfer-0.1.13/README.rst000066400000000000000000000011271324114246300153560ustar00rootroot00000000000000===================================================== s3transfer - An Amazon S3 Transfer Manager for Python ===================================================== S3transfer is a Python library for managing Amazon S3 transfers. .. note:: This project is not currently GA. If you are planning to use this code in production, make sure to lock to a minor version as interfaces may break from minor version to minor version. For a basic, stable interface of s3transfer, try the interfaces exposed in `boto3 `__ s3transfer-0.1.13/requirements-dev.txt000066400000000000000000000000761324114246300177310ustar00rootroot00000000000000-r requirements-test.txt psutil>=4.1.0,<5.0.0 tabulate==0.7.5 s3transfer-0.1.13/requirements-test.txt000066400000000000000000000004051324114246300201260ustar00rootroot00000000000000-e git://github.com/boto/botocore.git@develop#egg=botocore nose==1.3.3 mock==1.3.0 coverage==4.0.1 wheel==0.24.0 # Note you need at least pip --version of 6.0 or # higher to be able to pick on these version specifiers. unittest2==0.5.1; python_version == '2.6' s3transfer-0.1.13/s3transfer/000077500000000000000000000000001324114246300157605ustar00rootroot00000000000000s3transfer-0.1.13/s3transfer/__init__.py000066400000000000000000000670001324114246300200740ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. """Abstractions over S3's upload/download operations. This module provides high level abstractions for efficient uploads/downloads. It handles several things for the user: * Automatically switching to multipart transfers when a file is over a specific size threshold * Uploading/downloading a file in parallel * Throttling based on max bandwidth * Progress callbacks to monitor transfers * Retries. While botocore handles retries for streaming uploads, it is not possible for it to handle retries for streaming downloads. This module handles retries for both cases so you don't need to implement any retry logic yourself. This module has a reasonable set of defaults. It also allows you to configure many aspects of the transfer process including: * Multipart threshold size * Max parallel downloads * Max bandwidth * Socket timeouts * Retry amounts There is no support for s3->s3 multipart copies at this time. .. _ref_s3transfer_usage: Usage ===== The simplest way to use this module is: .. code-block:: python client = boto3.client('s3', 'us-west-2') transfer = S3Transfer(client) # Upload /tmp/myfile to s3://bucket/key transfer.upload_file('/tmp/myfile', 'bucket', 'key') # Download s3://bucket/key to /tmp/myfile transfer.download_file('bucket', 'key', '/tmp/myfile') The ``upload_file`` and ``download_file`` methods also accept ``**kwargs``, which will be forwarded through to the corresponding client operation. Here are a few examples using ``upload_file``:: # Making the object public transfer.upload_file('/tmp/myfile', 'bucket', 'key', extra_args={'ACL': 'public-read'}) # Setting metadata transfer.upload_file('/tmp/myfile', 'bucket', 'key', extra_args={'Metadata': {'a': 'b', 'c': 'd'}}) # Setting content type transfer.upload_file('/tmp/myfile.json', 'bucket', 'key', extra_args={'ContentType': "application/json"}) The ``S3Transfer`` clas also supports progress callbacks so you can provide transfer progress to users. Both the ``upload_file`` and ``download_file`` methods take an optional ``callback`` parameter. Here's an example of how to print a simple progress percentage to the user: .. code-block:: python class ProgressPercentage(object): def __init__(self, filename): self._filename = filename self._size = float(os.path.getsize(filename)) self._seen_so_far = 0 self._lock = threading.Lock() def __call__(self, bytes_amount): # To simplify we'll assume this is hooked up # to a single filename. with self._lock: self._seen_so_far += bytes_amount percentage = (self._seen_so_far / self._size) * 100 sys.stdout.write( "\r%s %s / %s (%.2f%%)" % (self._filename, self._seen_so_far, self._size, percentage)) sys.stdout.flush() transfer = S3Transfer(boto3.client('s3', 'us-west-2')) # Upload /tmp/myfile to s3://bucket/key and print upload progress. transfer.upload_file('/tmp/myfile', 'bucket', 'key', callback=ProgressPercentage('/tmp/myfile')) You can also provide a TransferConfig object to the S3Transfer object that gives you more fine grained control over the transfer. For example: .. code-block:: python client = boto3.client('s3', 'us-west-2') config = TransferConfig( multipart_threshold=8 * 1024 * 1024, max_concurrency=10, num_download_attempts=10, ) transfer = S3Transfer(client, config) transfer.upload_file('/tmp/foo', 'bucket', 'key') """ import os import math import functools import logging import socket import threading import random import string import concurrent.futures from botocore.compat import six from botocore.vendored.requests.packages.urllib3.exceptions import \ ReadTimeoutError from botocore.exceptions import IncompleteReadError import s3transfer.compat from s3transfer.exceptions import RetriesExceededError, S3UploadFailedError __author__ = 'Amazon Web Services' __version__ = '0.1.13' class NullHandler(logging.Handler): def emit(self, record): pass logger = logging.getLogger(__name__) logger.addHandler(NullHandler()) queue = six.moves.queue MB = 1024 * 1024 SHUTDOWN_SENTINEL = object() def random_file_extension(num_digits=8): return ''.join(random.choice(string.hexdigits) for _ in range(num_digits)) def disable_upload_callbacks(request, operation_name, **kwargs): if operation_name in ['PutObject', 'UploadPart'] and \ hasattr(request.body, 'disable_callback'): request.body.disable_callback() def enable_upload_callbacks(request, operation_name, **kwargs): if operation_name in ['PutObject', 'UploadPart'] and \ hasattr(request.body, 'enable_callback'): request.body.enable_callback() class QueueShutdownError(Exception): pass class ReadFileChunk(object): def __init__(self, fileobj, start_byte, chunk_size, full_file_size, callback=None, enable_callback=True): """ Given a file object shown below: |___________________________________________________| 0 | | full_file_size |----chunk_size---| start_byte :type fileobj: file :param fileobj: File like object :type start_byte: int :param start_byte: The first byte from which to start reading. :type chunk_size: int :param chunk_size: The max chunk size to read. Trying to read pass the end of the chunk size will behave like you've reached the end of the file. :type full_file_size: int :param full_file_size: The entire content length associated with ``fileobj``. :type callback: function(amount_read) :param callback: Called whenever data is read from this object. """ self._fileobj = fileobj self._start_byte = start_byte self._size = self._calculate_file_size( self._fileobj, requested_size=chunk_size, start_byte=start_byte, actual_file_size=full_file_size) self._fileobj.seek(self._start_byte) self._amount_read = 0 self._callback = callback self._callback_enabled = enable_callback @classmethod def from_filename(cls, filename, start_byte, chunk_size, callback=None, enable_callback=True): """Convenience factory function to create from a filename. :type start_byte: int :param start_byte: The first byte from which to start reading. :type chunk_size: int :param chunk_size: The max chunk size to read. Trying to read pass the end of the chunk size will behave like you've reached the end of the file. :type full_file_size: int :param full_file_size: The entire content length associated with ``fileobj``. :type callback: function(amount_read) :param callback: Called whenever data is read from this object. :type enable_callback: bool :param enable_callback: Indicate whether to invoke callback during read() calls. :rtype: ``ReadFileChunk`` :return: A new instance of ``ReadFileChunk`` """ f = open(filename, 'rb') file_size = os.fstat(f.fileno()).st_size return cls(f, start_byte, chunk_size, file_size, callback, enable_callback) def _calculate_file_size(self, fileobj, requested_size, start_byte, actual_file_size): max_chunk_size = actual_file_size - start_byte return min(max_chunk_size, requested_size) def read(self, amount=None): if amount is None: amount_to_read = self._size - self._amount_read else: amount_to_read = min(self._size - self._amount_read, amount) data = self._fileobj.read(amount_to_read) self._amount_read += len(data) if self._callback is not None and self._callback_enabled: self._callback(len(data)) return data def enable_callback(self): self._callback_enabled = True def disable_callback(self): self._callback_enabled = False def seek(self, where): self._fileobj.seek(self._start_byte + where) if self._callback is not None and self._callback_enabled: # To also rewind the callback() for an accurate progress report self._callback(where - self._amount_read) self._amount_read = where def close(self): self._fileobj.close() def tell(self): return self._amount_read def __len__(self): # __len__ is defined because requests will try to determine the length # of the stream to set a content length. In the normal case # of the file it will just stat the file, but we need to change that # behavior. By providing a __len__, requests will use that instead # of stat'ing the file. return self._size def __enter__(self): return self def __exit__(self, *args, **kwargs): self.close() def __iter__(self): # This is a workaround for http://bugs.python.org/issue17575 # Basically httplib will try to iterate over the contents, even # if its a file like object. This wasn't noticed because we've # already exhausted the stream so iterating over the file immediately # stops, which is what we're simulating here. return iter([]) class StreamReaderProgress(object): """Wrapper for a read only stream that adds progress callbacks.""" def __init__(self, stream, callback=None): self._stream = stream self._callback = callback def read(self, *args, **kwargs): value = self._stream.read(*args, **kwargs) if self._callback is not None: self._callback(len(value)) return value class OSUtils(object): def get_file_size(self, filename): return os.path.getsize(filename) def open_file_chunk_reader(self, filename, start_byte, size, callback): return ReadFileChunk.from_filename(filename, start_byte, size, callback, enable_callback=False) def open(self, filename, mode): return open(filename, mode) def remove_file(self, filename): """Remove a file, noop if file does not exist.""" # Unlike os.remove, if the file does not exist, # then this method does nothing. try: os.remove(filename) except OSError: pass def rename_file(self, current_filename, new_filename): s3transfer.compat.rename_file(current_filename, new_filename) class MultipartUploader(object): # These are the extra_args that need to be forwarded onto # subsequent upload_parts. UPLOAD_PART_ARGS = [ 'SSECustomerKey', 'SSECustomerAlgorithm', 'SSECustomerKeyMD5', 'RequestPayer', ] def __init__(self, client, config, osutil, executor_cls=concurrent.futures.ThreadPoolExecutor): self._client = client self._config = config self._os = osutil self._executor_cls = executor_cls def _extra_upload_part_args(self, extra_args): # Only the args in UPLOAD_PART_ARGS actually need to be passed # onto the upload_part calls. upload_parts_args = {} for key, value in extra_args.items(): if key in self.UPLOAD_PART_ARGS: upload_parts_args[key] = value return upload_parts_args def upload_file(self, filename, bucket, key, callback, extra_args): response = self._client.create_multipart_upload(Bucket=bucket, Key=key, **extra_args) upload_id = response['UploadId'] try: parts = self._upload_parts(upload_id, filename, bucket, key, callback, extra_args) except Exception as e: logger.debug("Exception raised while uploading parts, " "aborting multipart upload.", exc_info=True) self._client.abort_multipart_upload( Bucket=bucket, Key=key, UploadId=upload_id) raise S3UploadFailedError( "Failed to upload %s to %s: %s" % ( filename, '/'.join([bucket, key]), e)) self._client.complete_multipart_upload( Bucket=bucket, Key=key, UploadId=upload_id, MultipartUpload={'Parts': parts}) def _upload_parts(self, upload_id, filename, bucket, key, callback, extra_args): upload_parts_extra_args = self._extra_upload_part_args(extra_args) parts = [] part_size = self._config.multipart_chunksize num_parts = int( math.ceil(self._os.get_file_size(filename) / float(part_size))) max_workers = self._config.max_concurrency with self._executor_cls(max_workers=max_workers) as executor: upload_partial = functools.partial( self._upload_one_part, filename, bucket, key, upload_id, part_size, upload_parts_extra_args, callback) for part in executor.map(upload_partial, range(1, num_parts + 1)): parts.append(part) return parts def _upload_one_part(self, filename, bucket, key, upload_id, part_size, extra_args, callback, part_number): open_chunk_reader = self._os.open_file_chunk_reader with open_chunk_reader(filename, part_size * (part_number - 1), part_size, callback) as body: response = self._client.upload_part( Bucket=bucket, Key=key, UploadId=upload_id, PartNumber=part_number, Body=body, **extra_args) etag = response['ETag'] return {'ETag': etag, 'PartNumber': part_number} class ShutdownQueue(queue.Queue): """A queue implementation that can be shutdown. Shutting down a queue means that this class adds a trigger_shutdown method that will trigger all subsequent calls to put() to fail with a ``QueueShutdownError``. It purposefully deviates from queue.Queue, and is *not* meant to be a drop in replacement for ``queue.Queue``. """ def _init(self, maxsize): self._shutdown = False self._shutdown_lock = threading.Lock() # queue.Queue is an old style class so we don't use super(). return queue.Queue._init(self, maxsize) def trigger_shutdown(self): with self._shutdown_lock: self._shutdown = True logger.debug("The IO queue is now shutdown.") def put(self, item): # Note: this is not sufficient, it's still possible to deadlock! # Need to hook into the condition vars used by this class. with self._shutdown_lock: if self._shutdown: raise QueueShutdownError("Cannot put item to queue when " "queue has been shutdown.") return queue.Queue.put(self, item) class MultipartDownloader(object): def __init__(self, client, config, osutil, executor_cls=concurrent.futures.ThreadPoolExecutor): self._client = client self._config = config self._os = osutil self._executor_cls = executor_cls self._ioqueue = ShutdownQueue(self._config.max_io_queue) def download_file(self, bucket, key, filename, object_size, extra_args, callback=None): with self._executor_cls(max_workers=2) as controller: # 1 thread for the future that manages the uploading of files # 1 thread for the future that manages IO writes. download_parts_handler = functools.partial( self._download_file_as_future, bucket, key, filename, object_size, callback) parts_future = controller.submit(download_parts_handler) io_writes_handler = functools.partial( self._perform_io_writes, filename) io_future = controller.submit(io_writes_handler) results = concurrent.futures.wait( [parts_future, io_future], return_when=concurrent.futures.FIRST_EXCEPTION) self._process_future_results(results) def _process_future_results(self, futures): finished, unfinished = futures for future in finished: future.result() def _download_file_as_future(self, bucket, key, filename, object_size, callback): part_size = self._config.multipart_chunksize num_parts = int(math.ceil(object_size / float(part_size))) max_workers = self._config.max_concurrency download_partial = functools.partial( self._download_range, bucket, key, filename, part_size, num_parts, callback) try: with self._executor_cls(max_workers=max_workers) as executor: list(executor.map(download_partial, range(num_parts))) finally: self._ioqueue.put(SHUTDOWN_SENTINEL) def _calculate_range_param(self, part_size, part_index, num_parts): start_range = part_index * part_size if part_index == num_parts - 1: end_range = '' else: end_range = start_range + part_size - 1 range_param = 'bytes=%s-%s' % (start_range, end_range) return range_param def _download_range(self, bucket, key, filename, part_size, num_parts, callback, part_index): try: range_param = self._calculate_range_param( part_size, part_index, num_parts) max_attempts = self._config.num_download_attempts last_exception = None for i in range(max_attempts): try: logger.debug("Making get_object call.") response = self._client.get_object( Bucket=bucket, Key=key, Range=range_param) streaming_body = StreamReaderProgress( response['Body'], callback) buffer_size = 1024 * 16 current_index = part_size * part_index for chunk in iter(lambda: streaming_body.read(buffer_size), b''): self._ioqueue.put((current_index, chunk)) current_index += len(chunk) return except (socket.timeout, socket.error, ReadTimeoutError, IncompleteReadError) as e: logger.debug("Retrying exception caught (%s), " "retrying request, (attempt %s / %s)", e, i, max_attempts, exc_info=True) last_exception = e continue raise RetriesExceededError(last_exception) finally: logger.debug("EXITING _download_range for part: %s", part_index) def _perform_io_writes(self, filename): with self._os.open(filename, 'wb') as f: while True: task = self._ioqueue.get() if task is SHUTDOWN_SENTINEL: logger.debug("Shutdown sentinel received in IO handler, " "shutting down IO handler.") return else: try: offset, data = task f.seek(offset) f.write(data) except Exception as e: logger.debug("Caught exception in IO thread: %s", e, exc_info=True) self._ioqueue.trigger_shutdown() raise class TransferConfig(object): def __init__(self, multipart_threshold=8 * MB, max_concurrency=10, multipart_chunksize=8 * MB, num_download_attempts=5, max_io_queue=100): self.multipart_threshold = multipart_threshold self.max_concurrency = max_concurrency self.multipart_chunksize = multipart_chunksize self.num_download_attempts = num_download_attempts self.max_io_queue = max_io_queue class S3Transfer(object): ALLOWED_DOWNLOAD_ARGS = [ 'VersionId', 'SSECustomerAlgorithm', 'SSECustomerKey', 'SSECustomerKeyMD5', 'RequestPayer', ] ALLOWED_UPLOAD_ARGS = [ 'ACL', 'CacheControl', 'ContentDisposition', 'ContentEncoding', 'ContentLanguage', 'ContentType', 'Expires', 'GrantFullControl', 'GrantRead', 'GrantReadACP', 'GrantWriteACL', 'Metadata', 'RequestPayer', 'ServerSideEncryption', 'StorageClass', 'SSECustomerAlgorithm', 'SSECustomerKey', 'SSECustomerKeyMD5', 'SSEKMSKeyId', ] def __init__(self, client, config=None, osutil=None): self._client = client if config is None: config = TransferConfig() self._config = config if osutil is None: osutil = OSUtils() self._osutil = osutil def upload_file(self, filename, bucket, key, callback=None, extra_args=None): """Upload a file to an S3 object. Variants have also been injected into S3 client, Bucket and Object. You don't have to use S3Transfer.upload_file() directly. """ if extra_args is None: extra_args = {} self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS) events = self._client.meta.events events.register_first('request-created.s3', disable_upload_callbacks, unique_id='s3upload-callback-disable') events.register_last('request-created.s3', enable_upload_callbacks, unique_id='s3upload-callback-enable') if self._osutil.get_file_size(filename) >= \ self._config.multipart_threshold: self._multipart_upload(filename, bucket, key, callback, extra_args) else: self._put_object(filename, bucket, key, callback, extra_args) def _put_object(self, filename, bucket, key, callback, extra_args): # We're using open_file_chunk_reader so we can take advantage of the # progress callback functionality. open_chunk_reader = self._osutil.open_file_chunk_reader with open_chunk_reader(filename, 0, self._osutil.get_file_size(filename), callback=callback) as body: self._client.put_object(Bucket=bucket, Key=key, Body=body, **extra_args) def download_file(self, bucket, key, filename, extra_args=None, callback=None): """Download an S3 object to a file. Variants have also been injected into S3 client, Bucket and Object. You don't have to use S3Transfer.download_file() directly. """ # This method will issue a ``head_object`` request to determine # the size of the S3 object. This is used to determine if the # object is downloaded in parallel. if extra_args is None: extra_args = {} self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS) object_size = self._object_size(bucket, key, extra_args) temp_filename = filename + os.extsep + random_file_extension() try: self._download_file(bucket, key, temp_filename, object_size, extra_args, callback) except Exception: logger.debug("Exception caught in download_file, removing partial " "file: %s", temp_filename, exc_info=True) self._osutil.remove_file(temp_filename) raise else: self._osutil.rename_file(temp_filename, filename) def _download_file(self, bucket, key, filename, object_size, extra_args, callback): if object_size >= self._config.multipart_threshold: self._ranged_download(bucket, key, filename, object_size, extra_args, callback) else: self._get_object(bucket, key, filename, extra_args, callback) def _validate_all_known_args(self, actual, allowed): for kwarg in actual: if kwarg not in allowed: raise ValueError( "Invalid extra_args key '%s', " "must be one of: %s" % ( kwarg, ', '.join(allowed))) def _ranged_download(self, bucket, key, filename, object_size, extra_args, callback): downloader = MultipartDownloader(self._client, self._config, self._osutil) downloader.download_file(bucket, key, filename, object_size, extra_args, callback) def _get_object(self, bucket, key, filename, extra_args, callback): # precondition: num_download_attempts > 0 max_attempts = self._config.num_download_attempts last_exception = None for i in range(max_attempts): try: return self._do_get_object(bucket, key, filename, extra_args, callback) except (socket.timeout, socket.error, ReadTimeoutError, IncompleteReadError) as e: # TODO: we need a way to reset the callback if the # download failed. logger.debug("Retrying exception caught (%s), " "retrying request, (attempt %s / %s)", e, i, max_attempts, exc_info=True) last_exception = e continue raise RetriesExceededError(last_exception) def _do_get_object(self, bucket, key, filename, extra_args, callback): response = self._client.get_object(Bucket=bucket, Key=key, **extra_args) streaming_body = StreamReaderProgress( response['Body'], callback) with self._osutil.open(filename, 'wb') as f: for chunk in iter(lambda: streaming_body.read(8192), b''): f.write(chunk) def _object_size(self, bucket, key, extra_args): return self._client.head_object( Bucket=bucket, Key=key, **extra_args)['ContentLength'] def _multipart_upload(self, filename, bucket, key, callback, extra_args): uploader = MultipartUploader(self._client, self._config, self._osutil) uploader.upload_file(filename, bucket, key, callback, extra_args) s3transfer-0.1.13/s3transfer/bandwidth.py000066400000000000000000000364331324114246300203070ustar00rootroot00000000000000# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import time import threading class RequestExceededException(Exception): def __init__(self, requested_amt, retry_time): """Error when requested amount exceeds what is allowed The request that raised this error should be retried after waiting the time specified by ``retry_time``. :type requested_amt: int :param requested_amt: The originally requested byte amount :type retry_time: float :param retry_time: The length in time to wait to retry for the requested amount """ self.requested_amt = requested_amt self.retry_time = retry_time msg = ( 'Request amount %s exceeded the amount available. Retry in %s' % ( requested_amt, retry_time) ) super(RequestExceededException, self).__init__(msg) class RequestToken(object): """A token to pass as an identifier when consuming from the LeakyBucket""" pass class TimeUtils(object): def time(self): """Get the current time back :rtype: float :returns: The current time in seconds """ return time.time() def sleep(self, value): """Sleep for a designated time :type value: float :param value: The time to sleep for in seconds """ return time.sleep(value) class BandwidthLimiter(object): def __init__(self, leaky_bucket, time_utils=None): """Limits bandwidth for shared S3 transfers :type leaky_bucket: LeakyBucket :param leaky_bucket: The leaky bucket to use limit bandwidth :type time_utils: TimeUtils :param time_utils: Time utility to use for interacting with time. """ self._leaky_bucket = leaky_bucket self._time_utils = time_utils if time_utils is None: self._time_utils = TimeUtils() def get_bandwith_limited_stream(self, fileobj, transfer_coordinator, enabled=True): """Wraps a fileobj in a bandwidth limited stream wrapper :type fileobj: file-like obj :param fileobj: The file-like obj to wrap :type transfer_coordinator: s3transfer.futures.TransferCoordinator param transfer_coordinator: The coordinator for the general transfer that the wrapped stream is a part of :type enabled: boolean :param enabled: Whether bandwidth limiting should be enabled to start """ stream = BandwidthLimitedStream( fileobj, self._leaky_bucket, transfer_coordinator, self._time_utils) if not enabled: stream.disable_bandwidth_limiting() return stream class BandwidthLimitedStream(object): def __init__(self, fileobj, leaky_bucket, transfer_coordinator, time_utils=None, bytes_threshold=256 * 1024): """Limits bandwidth for reads on a wrapped stream :type fileobj: file-like object :param fileobj: The file like object to wrap :type leaky_bucket: LeakyBucket :param leaky_bucket: The leaky bucket to use to throttle reads on the stream :type transfer_coordinator: s3transfer.futures.TransferCoordinator param transfer_coordinator: The coordinator for the general transfer that the wrapped stream is a part of :type time_utils: TimeUtils :param time_utils: The time utility to use for interacting with time """ self._fileobj = fileobj self._leaky_bucket = leaky_bucket self._transfer_coordinator = transfer_coordinator self._time_utils = time_utils if time_utils is None: self._time_utils = TimeUtils() self._bandwidth_limiting_enabled = True self._request_token = RequestToken() self._bytes_seen = 0 self._bytes_threshold = bytes_threshold def enable_bandwidth_limiting(self): """Enable bandwidth limiting on reads to the stream""" self._bandwidth_limiting_enabled = True def disable_bandwidth_limiting(self): """Disable bandwidth limiting on reads to the stream""" self._bandwidth_limiting_enabled = False def read(self, amount): """Read a specified amount Reads will only be throttled if bandwidth limiting is enabled. """ if not self._bandwidth_limiting_enabled: return self._fileobj.read(amount) # We do not want to be calling consume on every read as the read # amounts can be small causing the lock of the leaky bucket to # introduce noticeable overhead. So instead we keep track of # how many bytes we have seen and only call consume once we pass a # certain threshold. self._bytes_seen += amount if self._bytes_seen < self._bytes_threshold: return self._fileobj.read(amount) self._consume_through_leaky_bucket() return self._fileobj.read(amount) def _consume_through_leaky_bucket(self): # NOTE: If the read amonut on the stream are high, it will result # in large bursty behavior as there is not an interface for partial # reads. However given the read's on this abstraction are at most 256KB # (via downloads), it reduces the burstiness to be small KB bursts at # worst. while not self._transfer_coordinator.exception: try: self._leaky_bucket.consume( self._bytes_seen, self._request_token) self._bytes_seen = 0 return except RequestExceededException as e: self._time_utils.sleep(e.retry_time) else: raise self._transfer_coordinator.exception def signal_transferring(self): """Signal that data being read is being transferred to S3""" self.enable_bandwidth_limiting() def signal_not_transferring(self): """Signal that data being read is not being transferred to S3""" self.disable_bandwidth_limiting() def seek(self, where): self._fileobj.seek(where) def tell(self): return self._fileobj.tell() def close(self): if self._bandwidth_limiting_enabled and self._bytes_seen: # This handles the case where the file is small enough to never # trigger the threshold and thus is never subjugated to the # leaky bucket on read(). This specifically happens for small # uploads. So instead to account for those bytes, have # it go through the leaky bucket when the file gets closed. self._consume_through_leaky_bucket() self._fileobj.close() def __enter__(self): return self def __exit__(self, *args, **kwargs): self.close() class LeakyBucket(object): def __init__(self, max_rate, time_utils=None, rate_tracker=None, consumption_scheduler=None): """A leaky bucket abstraction to limit bandwidth consumption :type rate: int :type rate: The maximum rate to allow. This rate is in terms of bytes per second. :type time_utils: TimeUtils :param time_utils: The time utility to use for interacting with time :type rate_tracker: BandwidthRateTracker :param rate_tracker: Tracks bandwidth consumption :type consumption_scheduler: ConsumptionScheduler :param consumption_scheduler: Schedules consumption retries when necessary """ self._max_rate = float(max_rate) self._time_utils = time_utils if time_utils is None: self._time_utils = TimeUtils() self._lock = threading.Lock() self._rate_tracker = rate_tracker if rate_tracker is None: self._rate_tracker = BandwidthRateTracker() self._consumption_scheduler = consumption_scheduler if consumption_scheduler is None: self._consumption_scheduler = ConsumptionScheduler() def consume(self, amt, request_token): """Consume an a requested amount :type amt: int :param amt: The amount of bytes to request to consume :type request_token: RequestToken :param request_token: The token associated to the consumption request that is used to identify the request. So if a RequestExceededException is raised the token should be used in subsequent retry consume() request. :raises RequestExceededException: If the consumption amount would exceed the maximum allocated bandwidth :rtype: int :returns: The amount consumed """ with self._lock: time_now = self._time_utils.time() if self._consumption_scheduler.is_scheduled(request_token): return self._release_requested_amt_for_scheduled_request( amt, request_token, time_now) elif self._projected_to_exceed_max_rate(amt, time_now): self._raise_request_exceeded_exception( amt, request_token, time_now) else: return self._release_requested_amt(amt, time_now) def _projected_to_exceed_max_rate(self, amt, time_now): projected_rate = self._rate_tracker.get_projected_rate(amt, time_now) return projected_rate > self._max_rate def _release_requested_amt_for_scheduled_request(self, amt, request_token, time_now): self._consumption_scheduler.process_scheduled_consumption( request_token) return self._release_requested_amt(amt, time_now) def _raise_request_exceeded_exception(self, amt, request_token, time_now): allocated_time = amt/float(self._max_rate) retry_time = self._consumption_scheduler.schedule_consumption( amt, request_token, allocated_time) raise RequestExceededException( requested_amt=amt, retry_time=retry_time) def _release_requested_amt(self, amt, time_now): self._rate_tracker.record_consumption_rate(amt, time_now) return amt class ConsumptionScheduler(object): def __init__(self): """Schedules when to consume a desired amount""" self._tokens_to_scheduled_consumption = {} self._total_wait = 0 def is_scheduled(self, token): """Indicates if a consumption request has been scheduled :type token: RequestToken :param token: The token associated to the consumption request that is used to identify the request. """ return token in self._tokens_to_scheduled_consumption def schedule_consumption(self, amt, token, time_to_consume): """Schedules a wait time to be able to consume an amount :type amt: int :param amt: The amount of bytes scheduled to be consumed :type token: RequestToken :param token: The token associated to the consumption request that is used to identify the request. :type time_to_consume: float :param time_to_consume: The desired time it should take for that specific request amount to be consumed in regardless of previously scheduled consumption requests :rtype: float :returns: The amount of time to wait for the specific request before actually consuming the specified amount. """ self._total_wait += time_to_consume self._tokens_to_scheduled_consumption[token] = { 'wait_duration': self._total_wait, 'time_to_consume': time_to_consume, } return self._total_wait def process_scheduled_consumption(self, token): """Processes a scheduled consumption request that has completed :type token: RequestToken :param token: The token associated to the consumption request that is used to identify the request. """ scheduled_retry = self._tokens_to_scheduled_consumption.pop(token) self._total_wait = max( self._total_wait - scheduled_retry['time_to_consume'], 0) class BandwidthRateTracker(object): def __init__(self, alpha=0.8): """Tracks the rate of bandwidth consumption :type a: float :param a: The constant to use in calculating the exponentional moving average of the bandwidth rate. Specifically it is used in the following calculation: current_rate = alpha * new_rate + (1 - alpha) * current_rate This value of this constant should be between 0 and 1. """ self._alpha = alpha self._last_time = None self._current_rate = None @property def current_rate(self): """The current transfer rate :rtype: float :returns: The current tracked transfer rate """ if self._last_time is None: return 0.0 return self._current_rate def get_projected_rate(self, amt, time_at_consumption): """Get the projected rate using a provided amount and time :type amt: int :param amt: The proposed amount to consume :type time_at_consumption: float :param time_at_consumption: The proposed time to consume at :rtype: float :returns: The consumption rate if that amt and time were consumed """ if self._last_time is None: return 0.0 return self._calculate_exponential_moving_average_rate( amt, time_at_consumption) def record_consumption_rate(self, amt, time_at_consumption): """Record the consumption rate based off amount and time point :type amt: int :param amt: The amount that got consumed :type time_at_consumption: float :param time_at_consumption: The time at which the amount was consumed """ if self._last_time is None: self._last_time = time_at_consumption self._current_rate = 0.0 return self._current_rate = self._calculate_exponential_moving_average_rate( amt, time_at_consumption) self._last_time = time_at_consumption def _calculate_rate(self, amt, time_at_consumption): time_delta = time_at_consumption - self._last_time if time_delta <= 0: # While it is really unlikley to see this in an actual transfer, # we do not want to be returning back a negative rate or try to # divide the amount by zero. So instead return back an infinite # rate as the time delta is infinitesimally small. return float('inf') return amt / (time_delta) def _calculate_exponential_moving_average_rate(self, amt, time_at_consumption): new_rate = self._calculate_rate(amt, time_at_consumption) return self._alpha * new_rate + (1 - self._alpha) * self._current_rate s3transfer-0.1.13/s3transfer/compat.py000066400000000000000000000056471324114246300176310ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import inspect import sys import os import errno import socket from botocore.compat import six if sys.platform.startswith('win'): def rename_file(current_filename, new_filename): try: os.remove(new_filename) except OSError as e: if not e.errno == errno.ENOENT: # We only want to a ignore trying to remove # a file that does not exist. If it fails # for any other reason we should be propagating # that exception. raise os.rename(current_filename, new_filename) else: rename_file = os.rename if six.PY3: def accepts_kwargs(func): # In python3.4.1, there's backwards incompatible # changes when using getargspec with functools.partials. return inspect.getfullargspec(func)[2] # In python3, socket.error is OSError, which is too general # for what we want (i.e FileNotFoundError is a subclass of OSError). # In py3 all the socket related errors are in a newly created # ConnectionError SOCKET_ERROR = ConnectionError MAXINT = None else: def accepts_kwargs(func): return inspect.getargspec(func)[2] SOCKET_ERROR = socket.error MAXINT = sys.maxint def seekable(fileobj): """Backwards compat function to determine if a fileobj is seekable :param fileobj: The file-like object to determine if seekable :returns: True, if seekable. False, otherwise. """ # If the fileobj has a seekable attr, try calling the seekable() # method on it. if hasattr(fileobj, 'seekable'): return fileobj.seekable() # If there is no seekable attr, check if the object can be seeked # or telled. If it can, try to seek to the current position. elif hasattr(fileobj, 'seek') and hasattr(fileobj, 'tell'): try: fileobj.seek(0, 1) return True except (OSError, IOError): # If an io related error was thrown then it is not seekable. return False # Else, the fileobj is not seekable return False def readable(fileobj): """Determines whether or not a file-like object is readable. :param fileobj: The file-like object to determine if readable :returns: True, if readable. False otherwise. """ if hasattr(fileobj, 'readable'): return fileobj.readable() return hasattr(fileobj, 'read') s3transfer-0.1.13/s3transfer/copies.py000066400000000000000000000317121324114246300176200ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import copy import math from s3transfer.tasks import Task from s3transfer.tasks import SubmissionTask from s3transfer.tasks import CreateMultipartUploadTask from s3transfer.tasks import CompleteMultipartUploadTask from s3transfer.utils import get_callbacks from s3transfer.utils import calculate_range_parameter from s3transfer.utils import get_filtered_dict from s3transfer.utils import ChunksizeAdjuster class CopySubmissionTask(SubmissionTask): """Task for submitting tasks to execute a copy""" EXTRA_ARGS_TO_HEAD_ARGS_MAPPING = { 'CopySourceIfMatch': 'IfMatch', 'CopySourceIfModifiedSince': 'IfModifiedSince', 'CopySourceIfNoneMatch': 'IfNoneMatch', 'CopySourceIfUnmodifiedSince': 'IfUnmodifiedSince', 'CopySourceSSECustomerKey': 'SSECustomerKey', 'CopySourceSSECustomerAlgorithm': 'SSECustomerAlgorithm', 'CopySourceSSECustomerKeyMD5': 'SSECustomerKeyMD5', 'RequestPayer': 'RequestPayer' } UPLOAD_PART_COPY_ARGS = [ 'CopySourceIfMatch', 'CopySourceIfModifiedSince', 'CopySourceIfNoneMatch', 'CopySourceIfUnmodifiedSince', 'CopySourceSSECustomerKey', 'CopySourceSSECustomerAlgorithm', 'CopySourceSSECustomerKeyMD5', 'SSECustomerKey', 'SSECustomerAlgorithm', 'SSECustomerKeyMD5', 'RequestPayer', ] CREATE_MULTIPART_ARGS_BLACKLIST = [ 'CopySourceIfMatch', 'CopySourceIfModifiedSince', 'CopySourceIfNoneMatch', 'CopySourceIfUnmodifiedSince', 'CopySourceSSECustomerKey', 'CopySourceSSECustomerAlgorithm', 'CopySourceSSECustomerKeyMD5', 'MetadataDirective' ] COMPLETE_MULTIPART_ARGS = [ 'RequestPayer' ] def _submit(self, client, config, osutil, request_executor, transfer_future): """ :param client: The client associated with the transfer manager :type config: s3transfer.manager.TransferConfig :param config: The transfer config associated with the transfer manager :type osutil: s3transfer.utils.OSUtil :param osutil: The os utility associated to the transfer manager :type request_executor: s3transfer.futures.BoundedExecutor :param request_executor: The request executor associated with the transfer manager :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The transfer future associated with the transfer request that tasks are being submitted for """ # Determine the size if it was not provided if transfer_future.meta.size is None: # If a size was not provided figure out the size for the # user. Note that we will only use the client provided to # the TransferManager. If the object is outside of the region # of the client, they may have to provide the file size themselves # with a completely new client. call_args = transfer_future.meta.call_args head_object_request = \ self._get_head_object_request_from_copy_source( call_args.copy_source) extra_args = call_args.extra_args # Map any values that may be used in the head object that is # used in the copy object for param, value in extra_args.items(): if param in self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING: head_object_request[ self.EXTRA_ARGS_TO_HEAD_ARGS_MAPPING[param]] = value response = call_args.source_client.head_object( **head_object_request) transfer_future.meta.provide_transfer_size( response['ContentLength']) # If it is greater than threshold do a multipart copy, otherwise # do a regular copy object. if transfer_future.meta.size < config.multipart_threshold: self._submit_copy_request( client, config, osutil, request_executor, transfer_future) else: self._submit_multipart_request( client, config, osutil, request_executor, transfer_future) def _submit_copy_request(self, client, config, osutil, request_executor, transfer_future): call_args = transfer_future.meta.call_args # Get the needed progress callbacks for the task progress_callbacks = get_callbacks(transfer_future, 'progress') # Submit the request of a single copy. self._transfer_coordinator.submit( request_executor, CopyObjectTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'copy_source': call_args.copy_source, 'bucket': call_args.bucket, 'key': call_args.key, 'extra_args': call_args.extra_args, 'callbacks': progress_callbacks, 'size': transfer_future.meta.size }, is_final=True ) ) def _submit_multipart_request(self, client, config, osutil, request_executor, transfer_future): call_args = transfer_future.meta.call_args # Submit the request to create a multipart upload and make sure it # does not include any of the arguments used for copy part. create_multipart_extra_args = {} for param, val in call_args.extra_args.items(): if param not in self.CREATE_MULTIPART_ARGS_BLACKLIST: create_multipart_extra_args[param] = val create_multipart_future = self._transfer_coordinator.submit( request_executor, CreateMultipartUploadTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'bucket': call_args.bucket, 'key': call_args.key, 'extra_args': create_multipart_extra_args, } ) ) # Determine how many parts are needed based on filesize and # desired chunksize. part_size = config.multipart_chunksize adjuster = ChunksizeAdjuster() part_size = adjuster.adjust_chunksize( part_size, transfer_future.meta.size) num_parts = int( math.ceil(transfer_future.meta.size / float(part_size))) # Submit requests to upload the parts of the file. part_futures = [] progress_callbacks = get_callbacks(transfer_future, 'progress') for part_number in range(1, num_parts + 1): extra_part_args = self._extra_upload_part_args( call_args.extra_args) # The part number for upload part starts at 1 while the # range parameter starts at zero, so just subtract 1 off of # the part number extra_part_args['CopySourceRange'] = calculate_range_parameter( part_size, part_number-1, num_parts, transfer_future.meta.size) # Get the size of the part copy as well for the progress # callbacks. size = self._get_transfer_size( part_size, part_number-1, num_parts, transfer_future.meta.size ) part_futures.append( self._transfer_coordinator.submit( request_executor, CopyPartTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'copy_source': call_args.copy_source, 'bucket': call_args.bucket, 'key': call_args.key, 'part_number': part_number, 'extra_args': extra_part_args, 'callbacks': progress_callbacks, 'size': size }, pending_main_kwargs={ 'upload_id': create_multipart_future } ) ) ) complete_multipart_extra_args = self._extra_complete_multipart_args( call_args.extra_args) # Submit the request to complete the multipart upload. self._transfer_coordinator.submit( request_executor, CompleteMultipartUploadTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'bucket': call_args.bucket, 'key': call_args.key, 'extra_args': complete_multipart_extra_args, }, pending_main_kwargs={ 'upload_id': create_multipart_future, 'parts': part_futures }, is_final=True ) ) def _get_head_object_request_from_copy_source(self, copy_source): if isinstance(copy_source, dict): return copy.copy(copy_source) else: raise TypeError( 'Expecting dictionary formatted: ' '{"Bucket": bucket_name, "Key": key} ' 'but got %s or type %s.' % (copy_source, type(copy_source)) ) def _extra_upload_part_args(self, extra_args): # Only the args in COPY_PART_ARGS actually need to be passed # onto the upload_part_copy calls. return get_filtered_dict(extra_args, self.UPLOAD_PART_COPY_ARGS) def _extra_complete_multipart_args(self, extra_args): return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS) def _get_transfer_size(self, part_size, part_index, num_parts, total_transfer_size): if part_index == num_parts - 1: # The last part may be different in size then the rest of the # parts. return total_transfer_size - (part_index * part_size) return part_size class CopyObjectTask(Task): """Task to do a nonmultipart copy""" def _main(self, client, copy_source, bucket, key, extra_args, callbacks, size): """ :param client: The client to use when calling PutObject :param copy_source: The CopySource parameter to use :param bucket: The name of the bucket to copy to :param key: The name of the key to copy to :param extra_args: A dictionary of any extra arguments that may be used in the upload. :param callbacks: List of callbacks to call after copy :param size: The size of the transfer. This value is passed into the callbacks """ client.copy_object( CopySource=copy_source, Bucket=bucket, Key=key, **extra_args) for callback in callbacks: callback(bytes_transferred=size) class CopyPartTask(Task): """Task to upload a part in a multipart copy""" def _main(self, client, copy_source, bucket, key, upload_id, part_number, extra_args, callbacks, size): """ :param client: The client to use when calling PutObject :param copy_source: The CopySource parameter to use :param bucket: The name of the bucket to upload to :param key: The name of the key to upload to :param upload_id: The id of the upload :param part_number: The number representing the part of the multipart upload :param extra_args: A dictionary of any extra arguments that may be used in the upload. :param callbacks: List of callbacks to call after copy part :param size: The size of the transfer. This value is passed into the callbacks :rtype: dict :returns: A dictionary representing a part:: {'Etag': etag_value, 'PartNumber': part_number} This value can be appended to a list to be used to complete the multipart upload. """ response = client.upload_part_copy( CopySource=copy_source, Bucket=bucket, Key=key, UploadId=upload_id, PartNumber=part_number, **extra_args) for callback in callbacks: callback(bytes_transferred=size) etag = response['CopyPartResult']['ETag'] return {'ETag': etag, 'PartNumber': part_number} s3transfer-0.1.13/s3transfer/delete.py000066400000000000000000000050271324114246300176000ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from s3transfer.tasks import Task from s3transfer.tasks import SubmissionTask class DeleteSubmissionTask(SubmissionTask): """Task for submitting tasks to execute an object deletion.""" def _submit(self, client, request_executor, transfer_future, **kwargs): """ :param client: The client associated with the transfer manager :type config: s3transfer.manager.TransferConfig :param config: The transfer config associated with the transfer manager :type osutil: s3transfer.utils.OSUtil :param osutil: The os utility associated to the transfer manager :type request_executor: s3transfer.futures.BoundedExecutor :param request_executor: The request executor associated with the transfer manager :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The transfer future associated with the transfer request that tasks are being submitted for """ call_args = transfer_future.meta.call_args self._transfer_coordinator.submit( request_executor, DeleteObjectTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'bucket': call_args.bucket, 'key': call_args.key, 'extra_args': call_args.extra_args, }, is_final=True ) ) class DeleteObjectTask(Task): def _main(self, client, bucket, key, extra_args): """ :param client: The S3 client to use when calling DeleteObject :type bucket: str :param bucket: The name of the bucket. :type key: str :param key: The name of the object to delete. :type extra_args: dict :param extra_args: Extra arguments to pass to the DeleteObject call. """ client.delete_object(Bucket=bucket, Key=key, **extra_args) s3transfer-0.1.13/s3transfer/download.py000066400000000000000000000672221324114246300201520ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import logging import os import socket import math import threading import heapq from botocore.compat import six from botocore.exceptions import IncompleteReadError from botocore.vendored.requests.packages.urllib3.exceptions import \ ReadTimeoutError from s3transfer.compat import SOCKET_ERROR from s3transfer.compat import seekable from s3transfer.exceptions import RetriesExceededError from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG from s3transfer.utils import random_file_extension from s3transfer.utils import get_callbacks from s3transfer.utils import invoke_progress_callbacks from s3transfer.utils import calculate_range_parameter from s3transfer.utils import FunctionContainer from s3transfer.utils import CountCallbackInvoker from s3transfer.utils import StreamReaderProgress from s3transfer.utils import DeferredOpenFile from s3transfer.tasks import Task from s3transfer.tasks import SubmissionTask logger = logging.getLogger(__name__) S3_RETRYABLE_ERRORS = ( socket.timeout, SOCKET_ERROR, ReadTimeoutError, IncompleteReadError ) class DownloadOutputManager(object): """Base manager class for handling various types of files for downloads This class is typically used for the DownloadSubmissionTask class to help determine the following: * Provides the fileobj to write to downloads to * Get a task to complete once everything downloaded has been written The answers/implementations differ for the various types of file outputs that may be accepted. All implementations must subclass and override public methods from this class. """ def __init__(self, osutil, transfer_coordinator, io_executor): self._osutil = osutil self._transfer_coordinator = transfer_coordinator self._io_executor = io_executor @classmethod def is_compatible(cls, download_target, osutil): """Determines if the target for the download is compatible with manager :param download_target: The target for which the upload will write data to. :param osutil: The os utility to be used for the transfer :returns: True if the manager can handle the type of target specified otherwise returns False. """ raise NotImplementedError('must implement is_compatible()') def get_download_task_tag(self): """Get the tag (if any) to associate all GetObjectTasks :rtype: s3transfer.futures.TaskTag :returns: The tag to associate all GetObjectTasks with """ return None def get_fileobj_for_io_writes(self, transfer_future): """Get file-like object to use for io writes in the io executor :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The future associated with upload request returns: A file-like object to write to """ raise NotImplementedError('must implement get_fileobj_for_io_writes()') def queue_file_io_task(self, fileobj, data, offset): """Queue IO write for submission to the IO executor. This method accepts an IO executor and information about the downloaded data, and handles submitting this to the IO executor. This method may defer submission to the IO executor if necessary. """ self._transfer_coordinator.submit( self._io_executor, self.get_io_write_task(fileobj, data, offset) ) def get_io_write_task(self, fileobj, data, offset): """Get an IO write task for the requested set of data This task can be ran immediately or be submitted to the IO executor for it to run. :type fileobj: file-like object :param fileobj: The file-like object to write to :type data: bytes :param data: The data to write out :type offset: integer :param offset: The offset to write the data to in the file-like object :returns: An IO task to be used to write data to a file-like object """ return IOWriteTask( self._transfer_coordinator, main_kwargs={ 'fileobj': fileobj, 'data': data, 'offset': offset, } ) def get_final_io_task(self): """Get the final io task to complete the download This is needed because based on the architecture of the TransferManager the final tasks will be sent to the IO executor, but the executor needs a final task for it to signal that the transfer is done and all done callbacks can be run. :rtype: s3transfer.tasks.Task :returns: A final task to completed in the io executor """ raise NotImplementedError( 'must implement get_final_io_task()') def _get_fileobj_from_filename(self, filename): f = DeferredOpenFile( filename, mode='wb', open_function=self._osutil.open) # Make sure the file gets closed and we remove the temporary file # if anything goes wrong during the process. self._transfer_coordinator.add_failure_cleanup(f.close) return f class DownloadFilenameOutputManager(DownloadOutputManager): def __init__(self, osutil, transfer_coordinator, io_executor): super(DownloadFilenameOutputManager, self).__init__( osutil, transfer_coordinator, io_executor) self._final_filename = None self._temp_filename = None self._temp_fileobj = None @classmethod def is_compatible(cls, download_target, osutil): return isinstance(download_target, six.string_types) def get_fileobj_for_io_writes(self, transfer_future): fileobj = transfer_future.meta.call_args.fileobj self._final_filename = fileobj self._temp_filename = fileobj + os.extsep + random_file_extension() self._temp_fileobj = self._get_temp_fileobj() return self._temp_fileobj def get_final_io_task(self): # A task to rename the file from the temporary file to its final # location is needed. This should be the last task needed to complete # the download. return IORenameFileTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'fileobj': self._temp_fileobj, 'final_filename': self._final_filename, 'osutil': self._osutil }, is_final=True ) def _get_temp_fileobj(self): f = self._get_fileobj_from_filename(self._temp_filename) self._transfer_coordinator.add_failure_cleanup( self._osutil.remove_file, self._temp_filename) return f class DownloadSeekableOutputManager(DownloadOutputManager): @classmethod def is_compatible(cls, download_target, osutil): return seekable(download_target) def get_fileobj_for_io_writes(self, transfer_future): # Return the fileobj provided to the future. return transfer_future.meta.call_args.fileobj def get_final_io_task(self): # This task will serve the purpose of signaling when all of the io # writes have finished so done callbacks can be called. return CompleteDownloadNOOPTask( transfer_coordinator=self._transfer_coordinator) class DownloadNonSeekableOutputManager(DownloadOutputManager): def __init__(self, osutil, transfer_coordinator, io_executor, defer_queue=None): super(DownloadNonSeekableOutputManager, self).__init__( osutil, transfer_coordinator, io_executor) if defer_queue is None: defer_queue = DeferQueue() self._defer_queue = defer_queue self._io_submit_lock = threading.Lock() @classmethod def is_compatible(cls, download_target, osutil): return hasattr(download_target, 'write') def get_download_task_tag(self): return IN_MEMORY_DOWNLOAD_TAG def get_fileobj_for_io_writes(self, transfer_future): return transfer_future.meta.call_args.fileobj def get_final_io_task(self): return CompleteDownloadNOOPTask( transfer_coordinator=self._transfer_coordinator) def queue_file_io_task(self, fileobj, data, offset): with self._io_submit_lock: writes = self._defer_queue.request_writes(offset, data) for write in writes: data = write['data'] logger.debug("Queueing IO offset %s for fileobj: %s", write['offset'], fileobj) super( DownloadNonSeekableOutputManager, self).queue_file_io_task( fileobj, data, offset) def get_io_write_task(self, fileobj, data, offset): return IOStreamingWriteTask( self._transfer_coordinator, main_kwargs={ 'fileobj': fileobj, 'data': data, } ) class DownloadSpecialFilenameOutputManager(DownloadNonSeekableOutputManager): def __init__(self, osutil, transfer_coordinator, io_executor, defer_queue=None): super(DownloadSpecialFilenameOutputManager, self).__init__( osutil, transfer_coordinator, io_executor, defer_queue) self._fileobj = None @classmethod def is_compatible(cls, download_target, osutil): return isinstance(download_target, six.string_types) and \ osutil.is_special_file(download_target) def get_fileobj_for_io_writes(self, transfer_future): filename = transfer_future.meta.call_args.fileobj self._fileobj = self._get_fileobj_from_filename(filename) return self._fileobj def get_final_io_task(self): # Make sure the file gets closed once the transfer is done. return IOCloseTask( transfer_coordinator=self._transfer_coordinator, is_final=True, main_kwargs={'fileobj': self._fileobj}) class DownloadSubmissionTask(SubmissionTask): """Task for submitting tasks to execute a download""" def _get_download_output_manager_cls(self, transfer_future, osutil): """Retrieves a class for managing output for a download :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The transfer future for the request :type osutil: s3transfer.utils.OSUtils :param osutil: The os utility associated to the transfer :rtype: class of DownloadOutputManager :returns: The appropriate class to use for managing a specific type of input for downloads. """ download_manager_resolver_chain = [ DownloadSpecialFilenameOutputManager, DownloadFilenameOutputManager, DownloadSeekableOutputManager, DownloadNonSeekableOutputManager, ] fileobj = transfer_future.meta.call_args.fileobj for download_manager_cls in download_manager_resolver_chain: if download_manager_cls.is_compatible(fileobj, osutil): return download_manager_cls raise RuntimeError( 'Output %s of type: %s is not supported.' % ( fileobj, type(fileobj))) def _submit(self, client, config, osutil, request_executor, io_executor, transfer_future, bandwidth_limiter=None): """ :param client: The client associated with the transfer manager :type config: s3transfer.manager.TransferConfig :param config: The transfer config associated with the transfer manager :type osutil: s3transfer.utils.OSUtil :param osutil: The os utility associated to the transfer manager :type request_executor: s3transfer.futures.BoundedExecutor :param request_executor: The request executor associated with the transfer manager :type io_executor: s3transfer.futures.BoundedExecutor :param io_executor: The io executor associated with the transfer manager :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The transfer future associated with the transfer request that tasks are being submitted for :type bandwidth_limiter: s3transfer.bandwidth.BandwidthLimiter :param bandwidth_limiter: The bandwidth limiter to use when downloading streams """ if transfer_future.meta.size is None: # If a size was not provided figure out the size for the # user. response = client.head_object( Bucket=transfer_future.meta.call_args.bucket, Key=transfer_future.meta.call_args.key, **transfer_future.meta.call_args.extra_args ) transfer_future.meta.provide_transfer_size( response['ContentLength']) download_output_manager = self._get_download_output_manager_cls( transfer_future, osutil)(osutil, self._transfer_coordinator, io_executor) # If it is greater than threshold do a ranged download, otherwise # do a regular GetObject download. if transfer_future.meta.size < config.multipart_threshold: self._submit_download_request( client, config, osutil, request_executor, io_executor, download_output_manager, transfer_future, bandwidth_limiter) else: self._submit_ranged_download_request( client, config, osutil, request_executor, io_executor, download_output_manager, transfer_future, bandwidth_limiter) def _submit_download_request(self, client, config, osutil, request_executor, io_executor, download_output_manager, transfer_future, bandwidth_limiter): call_args = transfer_future.meta.call_args # Get a handle to the file that will be used for writing downloaded # contents fileobj = download_output_manager.get_fileobj_for_io_writes( transfer_future) # Get the needed callbacks for the task progress_callbacks = get_callbacks(transfer_future, 'progress') # Get any associated tags for the get object task. get_object_tag = download_output_manager.get_download_task_tag() # Get the final io task to run once the download is complete. final_task = download_output_manager.get_final_io_task() # Submit the task to download the object. self._transfer_coordinator.submit( request_executor, ImmediatelyWriteIOGetObjectTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'bucket': call_args.bucket, 'key': call_args.key, 'fileobj': fileobj, 'extra_args': call_args.extra_args, 'callbacks': progress_callbacks, 'max_attempts': config.num_download_attempts, 'download_output_manager': download_output_manager, 'io_chunksize': config.io_chunksize, 'bandwidth_limiter': bandwidth_limiter }, done_callbacks=[final_task] ), tag=get_object_tag ) def _submit_ranged_download_request(self, client, config, osutil, request_executor, io_executor, download_output_manager, transfer_future, bandwidth_limiter): call_args = transfer_future.meta.call_args # Get the needed progress callbacks for the task progress_callbacks = get_callbacks(transfer_future, 'progress') # Get a handle to the file that will be used for writing downloaded # contents fileobj = download_output_manager.get_fileobj_for_io_writes( transfer_future) # Determine the number of parts part_size = config.multipart_chunksize num_parts = int( math.ceil(transfer_future.meta.size / float(part_size))) # Get any associated tags for the get object task. get_object_tag = download_output_manager.get_download_task_tag() # Callback invoker to submit the final io task once all downloads # are complete. finalize_download_invoker = CountCallbackInvoker( self._get_final_io_task_submission_callback( download_output_manager, io_executor ) ) for i in range(num_parts): # Calculate the range parameter range_parameter = calculate_range_parameter( part_size, i, num_parts) # Inject the Range parameter to the parameters to be passed in # as extra args extra_args = {'Range': range_parameter} extra_args.update(call_args.extra_args) finalize_download_invoker.increment() # Submit the ranged downloads self._transfer_coordinator.submit( request_executor, GetObjectTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'bucket': call_args.bucket, 'key': call_args.key, 'fileobj': fileobj, 'extra_args': extra_args, 'callbacks': progress_callbacks, 'max_attempts': config.num_download_attempts, 'start_index': i * part_size, 'download_output_manager': download_output_manager, 'io_chunksize': config.io_chunksize, 'bandwidth_limiter': bandwidth_limiter }, done_callbacks=[finalize_download_invoker.decrement] ), tag=get_object_tag ) finalize_download_invoker.finalize() def _get_final_io_task_submission_callback(self, download_manager, io_executor): final_task = download_manager.get_final_io_task() return FunctionContainer( self._transfer_coordinator.submit, io_executor, final_task) def _calculate_range_param(self, part_size, part_index, num_parts): # Used to calculate the Range parameter start_range = part_index * part_size if part_index == num_parts - 1: end_range = '' else: end_range = start_range + part_size - 1 range_param = 'bytes=%s-%s' % (start_range, end_range) return range_param class GetObjectTask(Task): def _main(self, client, bucket, key, fileobj, extra_args, callbacks, max_attempts, download_output_manager, io_chunksize, start_index=0, bandwidth_limiter=None): """Downloads an object and places content into io queue :param client: The client to use when calling GetObject :param bucket: The bucket to download from :param key: The key to download from :param fileobj: The file handle to write content to :param exta_args: Any extra arguements to include in GetObject request :param callbacks: List of progress callbacks to invoke on download :param max_attempts: The number of retries to do when downloading :param download_output_manager: The download output manager associated with the current download. :param io_chunksize: The size of each io chunk to read from the download stream and queue in the io queue. :param start_index: The location in the file to start writing the content of the key to. :param bandwidth_limiter: The bandwidth limiter to use when throttling the downloading of data in streams. """ last_exception = None for i in range(max_attempts): try: response = client.get_object( Bucket=bucket, Key=key, **extra_args) streaming_body = StreamReaderProgress( response['Body'], callbacks) if bandwidth_limiter: streaming_body = \ bandwidth_limiter.get_bandwith_limited_stream( streaming_body, self._transfer_coordinator) current_index = start_index chunks = DownloadChunkIterator(streaming_body, io_chunksize) for chunk in chunks: # If the transfer is done because of a cancellation # or error somewhere else, stop trying to submit more # data to be written and break out of the download. if not self._transfer_coordinator.done(): self._handle_io( download_output_manager, fileobj, chunk, current_index ) current_index += len(chunk) else: return return except S3_RETRYABLE_ERRORS as e: logger.debug("Retrying exception caught (%s), " "retrying request, (attempt %s / %s)", e, i, max_attempts, exc_info=True) last_exception = e # Also invoke the progress callbacks to indicate that we # are trying to download the stream again and all progress # for this GetObject has been lost. invoke_progress_callbacks( callbacks, start_index - current_index) continue raise RetriesExceededError(last_exception) def _handle_io(self, download_output_manager, fileobj, chunk, index): download_output_manager.queue_file_io_task(fileobj, chunk, index) class ImmediatelyWriteIOGetObjectTask(GetObjectTask): """GetObjectTask that immediately writes to the provided file object This is useful for downloads where it is known only one thread is downloading the object so there is no reason to go through the overhead of using an IO queue and executor. """ def _handle_io(self, download_output_manager, fileobj, chunk, index): task = download_output_manager.get_io_write_task(fileobj, chunk, index) task() class IOWriteTask(Task): def _main(self, fileobj, data, offset): """Pulls off an io queue to write contents to a file :param f: The file handle to write content to :param data: The data to write :param offset: The offset to write the data to. """ fileobj.seek(offset) fileobj.write(data) class IOStreamingWriteTask(Task): """Task for writing data to a non-seekable stream.""" def _main(self, fileobj, data): """Write data to a fileobj. Data will be written directly to the fileboj without any prior seeking. :param fileobj: The fileobj to write content to :param data: The data to write """ fileobj.write(data) class IORenameFileTask(Task): """A task to rename a temporary file to its final filename :param f: The file handle that content was written to. :param final_filename: The final name of the file to rename to upon completion of writing the contents. :param osutil: OS utility """ def _main(self, fileobj, final_filename, osutil): fileobj.close() osutil.rename_file(fileobj.name, final_filename) class IOCloseTask(Task): """A task to close out a file once the download is complete. :param fileobj: The fileobj to close. """ def _main(self, fileobj): fileobj.close() class CompleteDownloadNOOPTask(Task): """A NOOP task to serve as an indicator that the download is complete Note that the default for is_final is set to True because this should always be the last task. """ def __init__(self, transfer_coordinator, main_kwargs=None, pending_main_kwargs=None, done_callbacks=None, is_final=True): super(CompleteDownloadNOOPTask, self).__init__( transfer_coordinator=transfer_coordinator, main_kwargs=main_kwargs, pending_main_kwargs=pending_main_kwargs, done_callbacks=done_callbacks, is_final=is_final ) def _main(self): pass class DownloadChunkIterator(object): def __init__(self, body, chunksize): """Iterator to chunk out a downloaded S3 stream :param body: A readable file-like object :param chunksize: The amount to read each time """ self._body = body self._chunksize = chunksize self._num_reads = 0 def __iter__(self): return self def __next__(self): chunk = self._body.read(self._chunksize) self._num_reads += 1 if chunk: return chunk elif self._num_reads == 1: # Even though the response may have not had any # content, we still want to account for an empty object's # existance so return the empty chunk for that initial # read. return chunk raise StopIteration() next = __next__ class DeferQueue(object): """IO queue that defers write requests until they are queued sequentially. This class is used to track IO data for a *single* fileobj. You can send data to this queue, and it will defer any IO write requests until it has the next contiguous block available (starting at 0). """ def __init__(self): self._writes = [] self._pending_offsets = set() self._next_offset = 0 def request_writes(self, offset, data): """Request any available writes given new incoming data. You call this method by providing new data along with the offset associated with the data. If that new data unlocks any contiguous writes that can now be submitted, this method will return all applicable writes. This is done with 1 method call so you don't have to make two method calls (put(), get()) which acquires a lock each method call. """ if offset < self._next_offset: # This is a request for a write that we've already # seen. This can happen in the event of a retry # where if we retry at at offset N/2, we'll requeue # offsets 0-N/2 again. return [] writes = [] if offset in self._pending_offsets: # We've already queued this offset so this request is # a duplicate. In this case we should ignore # this request and prefer what's already queued. return [] heapq.heappush(self._writes, (offset, data)) self._pending_offsets.add(offset) while self._writes and self._writes[0][0] == self._next_offset: next_write = heapq.heappop(self._writes) writes.append({'offset': next_write[0], 'data': next_write[1]}) self._pending_offsets.remove(next_write[0]) self._next_offset += len(next_write[1]) return writes s3transfer-0.1.13/s3transfer/exceptions.py000066400000000000000000000021021324114246300205060ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from concurrent.futures import CancelledError class RetriesExceededError(Exception): def __init__(self, last_exception, msg='Max Retries Exceeded'): super(RetriesExceededError, self).__init__(msg) self.last_exception = last_exception class S3UploadFailedError(Exception): pass class InvalidSubscriberMethodError(Exception): pass class TransferNotDoneError(Exception): pass class FatalError(CancelledError): """A CancelledError raised from an error in the TransferManager""" pass s3transfer-0.1.13/s3transfer/futures.py000066400000000000000000000476301324114246300200410ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from concurrent import futures from collections import namedtuple import copy import logging import sys import threading from s3transfer.compat import MAXINT from s3transfer.compat import six from s3transfer.exceptions import CancelledError, TransferNotDoneError from s3transfer.utils import FunctionContainer from s3transfer.utils import TaskSemaphore logger = logging.getLogger(__name__) class TransferFuture(object): def __init__(self, meta=None, coordinator=None): """The future associated to a submitted transfer request :type meta: TransferMeta :param meta: The metadata associated to the request. This object is visible to the requester. :type coordinator: TransferCoordinator :param coordinator: The coordinator associated to the request. This object is not visible to the requester. """ self._meta = meta if meta is None: self._meta = TransferMeta() self._coordinator = coordinator if coordinator is None: self._coordinator = TransferCoordinator() @property def meta(self): """The metadata associated tio the TransferFuture""" return self._meta def done(self): """Determines if a TransferFuture has completed :returns: True if completed. False, otherwise. """ return self._coordinator.done() def result(self): """Waits until TransferFuture is done and returns the result If the TransferFuture succeeded, it will return the result. If the TransferFuture failed, it will raise the exception associated to the failure. """ try: # Usually the result() method blocks until the transfer is done, # however if a KeyboardInterrupt is raised we want want to exit # out of this and propogate the exception. return self._coordinator.result() except KeyboardInterrupt as e: self.cancel() raise e def cancel(self): """Cancels the request associated with the TransferFuture""" self._coordinator.cancel() def set_exception(self, exception): """Sets the exception on the future.""" if not self.done(): raise TransferNotDoneError( 'set_exception can only be called once the transfer is ' 'complete.') self._coordinator.set_exception(exception, override=True) class TransferMeta(object): """Holds metadata about the TransferFuture""" def __init__(self, call_args=None, transfer_id=None): self._call_args = call_args self._transfer_id = transfer_id self._size = None self._user_context = {} @property def call_args(self): """The call args used in the transfer request""" return self._call_args @property def transfer_id(self): """The unique id of the transfer""" return self._transfer_id @property def size(self): """The size of the transfer request if known""" return self._size @property def user_context(self): """A dictionary that requesters can store data in""" return self._user_context def provide_transfer_size(self, size): """A method to provide the size of a transfer request By providing this value, the TransferManager will not try to call HeadObject or use the use OS to determine the size of the transfer. """ self._size = size class TransferCoordinator(object): """A helper class for managing TransferFuture""" def __init__(self, transfer_id=None): self.transfer_id = transfer_id self._status = 'not-started' self._result = None self._exception = None self._associated_futures = set() self._failure_cleanups = [] self._done_callbacks = [] self._done_event = threading.Event() self._lock = threading.Lock() self._associated_futures_lock = threading.Lock() self._done_callbacks_lock = threading.Lock() self._failure_cleanups_lock = threading.Lock() def __repr__(self): return '%s(transfer_id=%s)' % ( self.__class__.__name__, self.transfer_id) @property def exception(self): return self._exception @property def associated_futures(self): """The list of futures associated to the inprogress TransferFuture Once the transfer finishes this list becomes empty as the transfer is considered done and there should be no running futures left. """ with self._associated_futures_lock: # We return a copy of the list because we do not want to # processing the returned list while another thread is adding # more futures to the actual list. return copy.copy(self._associated_futures) @property def failure_cleanups(self): """The list of callbacks to call when the TransferFuture fails""" return self._failure_cleanups @property def status(self): """The status of the TransferFuture The currently supported states are: * not-started - Has yet to start. If in this state, a transfer can be canceled immediately and nothing will happen. * queued - SubmissionTask is about to submit tasks * running - Is inprogress. In-progress as of now means that the SubmissionTask that runs the transfer is being executed. So there is no guarantee any transfer requests had been made to S3 if this state is reached. * cancelled - Was cancelled * failed - An exception other than CancelledError was thrown * success - No exceptions were thrown and is done. """ return self._status def set_result(self, result): """Set a result for the TransferFuture Implies that the TransferFuture succeeded. This will always set a result because it is invoked on the final task where there is only ever one final task and it is ran at the very end of a transfer process. So if a result is being set for this final task, the transfer succeeded even if something came a long and canceled the transfer on the final task. """ with self._lock: self._exception = None self._result = result self._status = 'success' def set_exception(self, exception, override=False): """Set an exception for the TransferFuture Implies the TransferFuture failed. :param exception: The exception that cause the transfer to fail. :param override: If True, override any existing state. """ with self._lock: if not self.done() or override: self._exception = exception self._status = 'failed' def result(self): """Waits until TransferFuture is done and returns the result If the TransferFuture succeeded, it will return the result. If the TransferFuture failed, it will raise the exception associated to the failure. """ # Doing a wait() with no timeout cannot be interrupted in python2 but # can be interrupted in python3 so we just wait with the largest # possible value integer value, which is on the scale of billions of # years... self._done_event.wait(MAXINT) # Once done waiting, raise an exception if present or return the # final result. if self._exception: raise self._exception return self._result def cancel(self, msg='', exc_type=CancelledError): """Cancels the TransferFuture :param msg: The message to attach to the cancellation :param exc_type: The type of exception to set for the cancellation """ with self._lock: if not self.done(): should_announce_done = False logger.debug('%s cancel(%s) called', self, msg) self._exception = exc_type(msg) if self._status == 'not-started': should_announce_done = True self._status = 'cancelled' if should_announce_done: self.announce_done() def set_status_to_queued(self): """Sets the TransferFutrue's status to running""" self._transition_to_non_done_state('queued') def set_status_to_running(self): """Sets the TransferFuture's status to running""" self._transition_to_non_done_state('running') def _transition_to_non_done_state(self, desired_state): with self._lock: if self.done(): raise RuntimeError( 'Unable to transition from done state %s to non-done ' 'state %s.' % (self.status, desired_state)) self._status = desired_state def submit(self, executor, task, tag=None): """Submits a task to a provided executor :type executor: s3transfer.futures.BoundedExecutor :param executor: The executor to submit the callable to :type task: s3transfer.tasks.Task :param task: The task to submit to the executor :type tag: s3transfer.futures.TaskTag :param tag: A tag to associate to the submitted task :rtype: concurrent.futures.Future :returns: A future representing the submitted task """ logger.debug( "Submitting task %s to executor %s for transfer request: %s." % ( task, executor, self.transfer_id) ) future = executor.submit(task, tag=tag) # Add this created future to the list of associated future just # in case it is needed during cleanups. self.add_associated_future(future) future.add_done_callback( FunctionContainer(self.remove_associated_future, future)) return future def done(self): """Determines if a TransferFuture has completed :returns: False if status is equal to 'failed', 'cancelled', or 'success'. True, otherwise """ return self.status in ['failed', 'cancelled', 'success'] def add_associated_future(self, future): """Adds a future to be associated with the TransferFuture""" with self._associated_futures_lock: self._associated_futures.add(future) def remove_associated_future(self, future): """Removes a future's association to the TransferFuture""" with self._associated_futures_lock: self._associated_futures.remove(future) def add_done_callback(self, function, *args, **kwargs): """Add a done callback to be invoked when transfer is done""" with self._done_callbacks_lock: self._done_callbacks.append( FunctionContainer(function, *args, **kwargs) ) def add_failure_cleanup(self, function, *args, **kwargs): """Adds a callback to call upon failure""" with self._failure_cleanups_lock: self._failure_cleanups.append( FunctionContainer(function, *args, **kwargs)) def announce_done(self): """Announce that future is done running and run associated callbacks This will run any failure cleanups if the transfer failed if not they have not been run, allows the result() to be unblocked, and will run any done callbacks associated to the TransferFuture if they have not already been ran. """ if self.status != 'success': self._run_failure_cleanups() self._done_event.set() self._run_done_callbacks() def _run_done_callbacks(self): # Run the callbacks and remove the callbacks from the internal # list so they do not get ran again if done is announced more than # once. with self._done_callbacks_lock: self._run_callbacks(self._done_callbacks) self._done_callbacks = [] def _run_failure_cleanups(self): # Run the cleanup callbacks and remove the callbacks from the internal # list so they do not get ran again if done is announced more than # once. with self._failure_cleanups_lock: self._run_callbacks(self.failure_cleanups) self._failure_cleanups = [] def _run_callbacks(self, callbacks): for callback in callbacks: self._run_callback(callback) def _run_callback(self, callback): try: callback() # We do not want a callback interrupting the process, especially # in the failure cleanups. So log and catch, the excpetion. except Exception: logger.debug("Exception raised in %s." % callback, exc_info=True) class BoundedExecutor(object): EXECUTOR_CLS = futures.ThreadPoolExecutor def __init__(self, max_size, max_num_threads, tag_semaphores=None, executor_cls=None): """An executor implentation that has a maximum queued up tasks The executor will block if the number of tasks that have been submitted and is currently working on is past its maximum. :params max_size: The maximum number of inflight futures. An inflight future means that the task is either queued up or is currently being executed. A size of None or 0 means that the executor will have no bound in terms of the number of inflight futures. :params max_num_threads: The maximum number of threads the executor uses. :type tag_semaphores: dict :params tag_semaphores: A dictionary where the key is the name of the tag and the value is the semaphore to use when limiting the number of tasks the executor is processing at a time. :type executor_cls: BaseExecutor :param underlying_executor_cls: The executor class that get bounded by this executor. If None is provided, the concurrent.futures.ThreadPoolExecutor class is used. """ self._max_num_threads = max_num_threads if executor_cls is None: executor_cls = self.EXECUTOR_CLS self._executor = executor_cls(max_workers=self._max_num_threads) self._semaphore = TaskSemaphore(max_size) self._tag_semaphores = tag_semaphores def submit(self, task, tag=None, block=True): """Submit a task to complete :type task: s3transfer.tasks.Task :param task: The task to run __call__ on :type tag: s3transfer.futures.TaskTag :param tag: An optional tag to associate to the task. This is used to override which semaphore to use. :type block: boolean :param block: True if to wait till it is possible to submit a task. False, if not to wait and raise an error if not able to submit a task. :returns: The future assocaited to the submitted task """ semaphore = self._semaphore # If a tag was provided, use the semaphore associated to that # tag. if tag: semaphore = self._tag_semaphores[tag] # Call acquire on the semaphore. acquire_token = semaphore.acquire(task.transfer_id, block) # Create a callback to invoke when task is done in order to call # release on the semaphore. release_callback = FunctionContainer( semaphore.release, task.transfer_id, acquire_token) # Submit the task to the underlying executor. future = ExecutorFuture(self._executor.submit(task)) # Add the Semaphore.release() callback to the future such that # it is invoked once the future completes. future.add_done_callback(release_callback) return future def shutdown(self, wait=True): self._executor.shutdown(wait) class ExecutorFuture(object): def __init__(self, future): """A future returned from the executor Currently, it is just a wrapper around a concurrent.futures.Future. However, this can eventually grow to implement the needed functionality of concurrent.futures.Future if we move off of the library and not affect the rest of the codebase. :type future: concurrent.futures.Future :param future: The underlying future """ self._future = future def result(self): return self._future.result() def add_done_callback(self, fn): """Adds a callback to be completed once future is done :parm fn: A callable that takes no arguments. Note that is different than concurrent.futures.Future.add_done_callback that requires a single argument for the future. """ # The done callback for concurrent.futures.Future will always pass a # the future in as the only argument. So we need to create the # proper signature wrapper that will invoke the callback provided. def done_callback(future_passed_to_callback): return fn() self._future.add_done_callback(done_callback) def done(self): return self._future.done() class BaseExecutor(object): """Base Executor class implementation needed to work with s3transfer""" def __init__(self, max_workers=None): pass def submit(self, fn, *args, **kwargs): raise NotImplementedError('submit()') def shutdown(self, wait=True): raise NotImplementedError('shutdown()') class NonThreadedExecutor(BaseExecutor): """A drop-in replacement non-threaded version of ThreadPoolExecutor""" def submit(self, fn, *args, **kwargs): future = NonThreadedExecutorFuture() try: result = fn(*args, **kwargs) future.set_result(result) except Exception: e, tb = sys.exc_info()[1:] logger.debug( 'Setting exception for %s to %s with traceback %s', future, e, tb ) future.set_exception_info(e, tb) return future def shutdown(self, wait=True): pass class NonThreadedExecutorFuture(object): """The Future returned from NonThreadedExecutor Note that this future is **not** thread-safe as it is being used from the context of a non-threaded environment. """ def __init__(self): self._result = None self._exception = None self._traceback = None self._done = False self._done_callbacks = [] def set_result(self, result): self._result = result self._set_done() def set_exception_info(self, exception, traceback): self._exception = exception self._traceback = traceback self._set_done() def result(self, timeout=None): if self._exception: six.reraise( type(self._exception), self._exception, self._traceback) return self._result def _set_done(self): self._done = True for done_callback in self._done_callbacks: self._invoke_done_callback(done_callback) self._done_callbacks = [] def _invoke_done_callback(self, done_callback): return done_callback(self) def done(self): return self._done def add_done_callback(self, fn): if self._done: self._invoke_done_callback(fn) else: self._done_callbacks.append(fn) TaskTag = namedtuple('TaskTag', ['name']) IN_MEMORY_UPLOAD_TAG = TaskTag('in_memory_upload') IN_MEMORY_DOWNLOAD_TAG = TaskTag('in_memory_download') s3transfer-0.1.13/s3transfer/manager.py000066400000000000000000000644421324114246300177560ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import copy import logging import threading from botocore.compat import six from s3transfer.utils import get_callbacks from s3transfer.utils import signal_transferring from s3transfer.utils import signal_not_transferring from s3transfer.utils import CallArgs from s3transfer.utils import OSUtils from s3transfer.utils import TaskSemaphore from s3transfer.utils import SlidingWindowSemaphore from s3transfer.exceptions import CancelledError from s3transfer.exceptions import FatalError from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG from s3transfer.futures import IN_MEMORY_UPLOAD_TAG from s3transfer.futures import BoundedExecutor from s3transfer.futures import TransferFuture from s3transfer.futures import TransferMeta from s3transfer.futures import TransferCoordinator from s3transfer.download import DownloadSubmissionTask from s3transfer.upload import UploadSubmissionTask from s3transfer.copies import CopySubmissionTask from s3transfer.delete import DeleteSubmissionTask from s3transfer.bandwidth import LeakyBucket from s3transfer.bandwidth import BandwidthLimiter KB = 1024 MB = KB * KB logger = logging.getLogger(__name__) class TransferConfig(object): def __init__(self, multipart_threshold=8 * MB, multipart_chunksize=8 * MB, max_request_concurrency=10, max_submission_concurrency=5, max_request_queue_size=1000, max_submission_queue_size=1000, max_io_queue_size=1000, io_chunksize=256 * KB, num_download_attempts=5, max_in_memory_upload_chunks=10, max_in_memory_download_chunks=10, max_bandwidth=None): """Configurations for the transfer mangager :param multipart_threshold: The threshold for which multipart transfers occur. :param max_request_concurrency: The maximum number of S3 API transfer-related requests that can happen at a time. :param max_submission_concurrency: The maximum number of threads processing a call to a TransferManager method. Processing a call usually entails determining which S3 API requests that need to be enqueued, but does **not** entail making any of the S3 API data transfering requests needed to perform the transfer. The threads controlled by ``max_request_concurrency`` is responsible for that. :param multipart_chunksize: The size of each transfer if a request becomes a multipart transfer. :param max_request_queue_size: The maximum amount of S3 API requests that can be queued at a time. A value of zero means that there is no maximum. :param max_submission_queue_size: The maximum amount of TransferManager method calls that can be queued at a time. A value of zero means that there is no maximum. :param max_io_queue_size: The maximum amount of read parts that can be queued to be written to disk per download. A value of zero means that there is no maximum. The default size for each element in this queue is 8 KB. :param io_chunksize: The max size of each chunk in the io queue. Currently, this is size used when reading from the downloaded stream as well. :param num_download_attempts: The number of download attempts that will be tried upon errors with downloading an object in S3. Note that these retries account for errors that occur when streamming down the data from s3 (i.e. socket errors and read timeouts that occur after recieving an OK response from s3). Other retryable exceptions such as throttling errors and 5xx errors are already retried by botocore (this default is 5). The ``num_download_attempts`` does not take into account the number of exceptions retried by botocore. :param max_in_memory_upload_chunks: The number of chunks that can be stored in memory at a time for all ongoing upload requests. This pertains to chunks of data that need to be stored in memory during an upload if the data is sourced from a file-like object. The total maximum memory footprint due to a in-memory upload chunks is roughly equal to: max_in_memory_upload_chunks * multipart_chunksize + max_submission_concurrency * multipart_chunksize ``max_submission_concurrency`` has an affect on this value because for each thread pulling data off of a file-like object, they may be waiting with a single read chunk to be submitted for upload because the ``max_in_memory_upload_chunks`` value has been reached by the threads making the upload request. :param max_in_memory_download_chunks: The number of chunks that can be buffered in memory and **not** in the io queue at a time for all ongoing dowload requests. This pertains specifically to file-like objects that cannot be seeked. The total maximum memory footprint due to a in-memory download chunks is roughly equal to: max_in_memory_download_chunks * multipart_chunksize :param max_bandwidth: The maximum bandwidth that will be consumed in uploading and downloading file content. The value is in terms of bytes per second. """ self.multipart_threshold = multipart_threshold self.multipart_chunksize = multipart_chunksize self.max_request_concurrency = max_request_concurrency self.max_submission_concurrency = max_submission_concurrency self.max_request_queue_size = max_request_queue_size self.max_submission_queue_size = max_submission_queue_size self.max_io_queue_size = max_io_queue_size self.io_chunksize = io_chunksize self.num_download_attempts = num_download_attempts self.max_in_memory_upload_chunks = max_in_memory_upload_chunks self.max_in_memory_download_chunks = max_in_memory_download_chunks self.max_bandwidth = max_bandwidth self._validate_attrs_are_nonzero() def _validate_attrs_are_nonzero(self): for attr, attr_val, in self.__dict__.items(): if attr_val is not None and attr_val <= 0: raise ValueError( 'Provided parameter %s of value %s must be greater than ' '0.' % (attr, attr_val)) class TransferManager(object): ALLOWED_DOWNLOAD_ARGS = [ 'VersionId', 'SSECustomerAlgorithm', 'SSECustomerKey', 'SSECustomerKeyMD5', 'RequestPayer', ] ALLOWED_UPLOAD_ARGS = [ 'ACL', 'CacheControl', 'ContentDisposition', 'ContentEncoding', 'ContentLanguage', 'ContentType', 'Expires', 'GrantFullControl', 'GrantRead', 'GrantReadACP', 'GrantWriteACP', 'Metadata', 'RequestPayer', 'ServerSideEncryption', 'StorageClass', 'SSECustomerAlgorithm', 'SSECustomerKey', 'SSECustomerKeyMD5', 'SSEKMSKeyId', 'WebsiteRedirectLocation' ] ALLOWED_COPY_ARGS = ALLOWED_UPLOAD_ARGS + [ 'CopySourceIfMatch', 'CopySourceIfModifiedSince', 'CopySourceIfNoneMatch', 'CopySourceIfUnmodifiedSince', 'CopySourceSSECustomerAlgorithm', 'CopySourceSSECustomerKey', 'CopySourceSSECustomerKeyMD5', 'MetadataDirective' ] ALLOWED_DELETE_ARGS = [ 'MFA', 'VersionId', 'RequestPayer', ] def __init__(self, client, config=None, osutil=None, executor_cls=None): """A transfer manager interface for Amazon S3 :param client: Client to be used by the manager :param config: TransferConfig to associate specific configurations :param osutil: OSUtils object to use for os-related behavior when using with transfer manager. :type executor_cls: s3transfer.futures.BaseExecutor :param executor_cls: The class of executor to use with the transfer manager. By default, concurrent.futures.ThreadPoolExecutor is used. """ self._client = client self._config = config if config is None: self._config = TransferConfig() self._osutil = osutil if osutil is None: self._osutil = OSUtils() self._coordinator_controller = TransferCoordinatorController() # A counter to create unique id's for each transfer submitted. self._id_counter = 0 # The executor responsible for making S3 API transfer requests self._request_executor = BoundedExecutor( max_size=self._config.max_request_queue_size, max_num_threads=self._config.max_request_concurrency, tag_semaphores={ IN_MEMORY_UPLOAD_TAG: TaskSemaphore( self._config.max_in_memory_upload_chunks), IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore( self._config.max_in_memory_download_chunks) }, executor_cls=executor_cls ) # The executor responsible for submitting the necessary tasks to # perform the desired transfer self._submission_executor = BoundedExecutor( max_size=self._config.max_submission_queue_size, max_num_threads=self._config.max_submission_concurrency, executor_cls=executor_cls ) # There is one thread available for writing to disk. It will handle # downloads for all files. self._io_executor = BoundedExecutor( max_size=self._config.max_io_queue_size, max_num_threads=1, executor_cls=executor_cls ) # The component responsible for limiting bandwidth usage if it # is configured. self._bandwidth_limiter = None if self._config.max_bandwidth is not None: logger.debug( 'Setting max_bandwidth to %s', self._config.max_bandwidth) leaky_bucket = LeakyBucket(self._config.max_bandwidth) self._bandwidth_limiter = BandwidthLimiter(leaky_bucket) self._register_handlers() def upload(self, fileobj, bucket, key, extra_args=None, subscribers=None): """Uploads a file to S3 :type fileobj: str or seekable file-like object :param fileobj: The name of a file to upload or a seekable file-like object to upload. It is recommended to use a filename because file-like objects may result in higher memory usage. :type bucket: str :param bucket: The name of the bucket to upload to :type key: str :param key: The name of the key to upload to :type extra_args: dict :param extra_args: Extra arguments that may be passed to the client operation :type subscribers: list(s3transfer.subscribers.BaseSubscriber) :param subscribers: The list of subscribers to be invoked in the order provided based on the event emit during the process of the transfer request. :rtype: s3transfer.futures.TransferFuture :returns: Transfer future representing the upload """ if extra_args is None: extra_args = {} if subscribers is None: subscribers = [] self._validate_all_known_args(extra_args, self.ALLOWED_UPLOAD_ARGS) call_args = CallArgs( fileobj=fileobj, bucket=bucket, key=key, extra_args=extra_args, subscribers=subscribers ) extra_main_kwargs = {} if self._bandwidth_limiter: extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter return self._submit_transfer( call_args, UploadSubmissionTask, extra_main_kwargs) def download(self, bucket, key, fileobj, extra_args=None, subscribers=None): """Downloads a file from S3 :type bucket: str :param bucket: The name of the bucket to download from :type key: str :param key: The name of the key to download from :type fileobj: str :param fileobj: The name of a file to download to. :type extra_args: dict :param extra_args: Extra arguments that may be passed to the client operation :type subscribers: list(s3transfer.subscribers.BaseSubscriber) :param subscribers: The list of subscribers to be invoked in the order provided based on the event emit during the process of the transfer request. :rtype: s3transfer.futures.TransferFuture :returns: Transfer future representing the download """ if extra_args is None: extra_args = {} if subscribers is None: subscribers = [] self._validate_all_known_args(extra_args, self.ALLOWED_DOWNLOAD_ARGS) call_args = CallArgs( bucket=bucket, key=key, fileobj=fileobj, extra_args=extra_args, subscribers=subscribers ) extra_main_kwargs = {'io_executor': self._io_executor} if self._bandwidth_limiter: extra_main_kwargs['bandwidth_limiter'] = self._bandwidth_limiter return self._submit_transfer( call_args, DownloadSubmissionTask, extra_main_kwargs) def copy(self, copy_source, bucket, key, extra_args=None, subscribers=None, source_client=None): """Copies a file in S3 :type copy_source: dict :param copy_source: The name of the source bucket, key name of the source object, and optional version ID of the source object. The dictionary format is: ``{'Bucket': 'bucket', 'Key': 'key', 'VersionId': 'id'}``. Note that the ``VersionId`` key is optional and may be omitted. :type bucket: str :param bucket: The name of the bucket to copy to :type key: str :param key: The name of the key to copy to :type extra_args: dict :param extra_args: Extra arguments that may be passed to the client operation :type subscribers: a list of subscribers :param subscribers: The list of subscribers to be invoked in the order provided based on the event emit during the process of the transfer request. :type source_client: botocore or boto3 Client :param source_client: The client to be used for operation that may happen at the source object. For example, this client is used for the head_object that determines the size of the copy. If no client is provided, the transfer manager's client is used as the client for the source object. :rtype: s3transfer.futures.TransferFuture :returns: Transfer future representing the copy """ if extra_args is None: extra_args = {} if subscribers is None: subscribers = [] if source_client is None: source_client = self._client self._validate_all_known_args(extra_args, self.ALLOWED_COPY_ARGS) call_args = CallArgs( copy_source=copy_source, bucket=bucket, key=key, extra_args=extra_args, subscribers=subscribers, source_client=source_client ) return self._submit_transfer(call_args, CopySubmissionTask) def delete(self, bucket, key, extra_args=None, subscribers=None): """Delete an S3 object. :type bucket: str :param bucket: The name of the bucket. :type key: str :param key: The name of the S3 object to delete. :type extra_args: dict :param extra_args: Extra arguments that may be passed to the DeleteObject call. :type subscribers: list :param subscribers: A list of subscribers to be invoked during the process of the transfer request. Note that the ``on_progress`` callback is not invoked during object deletion. :rtype: s3transfer.futures.TransferFuture :return: Transfer future representing the deletion. """ if extra_args is None: extra_args = {} if subscribers is None: subscribers = [] self._validate_all_known_args(extra_args, self.ALLOWED_DELETE_ARGS) call_args = CallArgs( bucket=bucket, key=key, extra_args=extra_args, subscribers=subscribers ) return self._submit_transfer(call_args, DeleteSubmissionTask) def _validate_all_known_args(self, actual, allowed): for kwarg in actual: if kwarg not in allowed: raise ValueError( "Invalid extra_args key '%s', " "must be one of: %s" % ( kwarg, ', '.join(allowed))) def _submit_transfer(self, call_args, submission_task_cls, extra_main_kwargs=None): if not extra_main_kwargs: extra_main_kwargs = {} # Create a TransferFuture to return back to the user transfer_future, components = self._get_future_with_components( call_args) # Add any provided done callbacks to the created transfer future # to be invoked on the transfer future being complete. for callback in get_callbacks(transfer_future, 'done'): components['coordinator'].add_done_callback(callback) # Get the main kwargs needed to instantiate the submission task main_kwargs = self._get_submission_task_main_kwargs( transfer_future, extra_main_kwargs) # Submit a SubmissionTask that will submit all of the necessary # tasks needed to complete the S3 transfer. self._submission_executor.submit( submission_task_cls( transfer_coordinator=components['coordinator'], main_kwargs=main_kwargs ) ) # Increment the unique id counter for future transfer requests self._id_counter += 1 return transfer_future def _get_future_with_components(self, call_args): transfer_id = self._id_counter # Creates a new transfer future along with its components transfer_coordinator = TransferCoordinator(transfer_id=transfer_id) # Track the transfer coordinator for transfers to manage. self._coordinator_controller.add_transfer_coordinator( transfer_coordinator) # Also make sure that the transfer coordinator is removed once # the transfer completes so it does not stick around in memory. transfer_coordinator.add_done_callback( self._coordinator_controller.remove_transfer_coordinator, transfer_coordinator) components = { 'meta': TransferMeta(call_args, transfer_id=transfer_id), 'coordinator': transfer_coordinator } transfer_future = TransferFuture(**components) return transfer_future, components def _get_submission_task_main_kwargs( self, transfer_future, extra_main_kwargs): main_kwargs = { 'client': self._client, 'config': self._config, 'osutil': self._osutil, 'request_executor': self._request_executor, 'transfer_future': transfer_future } main_kwargs.update(extra_main_kwargs) return main_kwargs def _register_handlers(self): # Register handlers to enable/disable callbacks on uploads. event_name = 'request-created.s3' self._client.meta.events.register_first( event_name, signal_not_transferring, unique_id='s3upload-not-transferring') self._client.meta.events.register_last( event_name, signal_transferring, unique_id='s3upload-transferring') def __enter__(self): return self def __exit__(self, exc_type, exc_value, *args): cancel = False cancel_msg = '' cancel_exc_type = FatalError # If a exception was raised in the context handler, signal to cancel # all of the inprogress futures in the shutdown. if exc_type: cancel = True cancel_msg = six.text_type(exc_value) if not cancel_msg: cancel_msg = repr(exc_value) # If it was a KeyboardInterrupt, the cancellation was initiated # by the user. if isinstance(exc_value, KeyboardInterrupt): cancel_exc_type = CancelledError self._shutdown(cancel, cancel_msg, cancel_exc_type) def shutdown(self, cancel=False, cancel_msg=''): """Shutdown the TransferManager It will wait till all transfers complete before it completely shuts down. :type cancel: boolean :param cancel: If True, calls TransferFuture.cancel() for all in-progress in transfers. This is useful if you want the shutdown to happen quicker. :type cancel_msg: str :param cancel_msg: The message to specify if canceling all in-progress transfers. """ self._shutdown(cancel, cancel, cancel_msg) def _shutdown(self, cancel, cancel_msg, exc_type=CancelledError): if cancel: # Cancel all in-flight transfers if requested, before waiting # for them to complete. self._coordinator_controller.cancel(cancel_msg, exc_type) try: # Wait until there are no more in-progress transfers. This is # wrapped in a try statement because this can be interrupted # with a KeyboardInterrupt that needs to be caught. self._coordinator_controller.wait() except KeyboardInterrupt: # If not errors were raised in the try block, the cancel should # have no coordinators it needs to run cancel on. If there was # an error raised in the try statement we want to cancel all of # the inflight transfers before shutting down to speed that # process up. self._coordinator_controller.cancel('KeyboardInterrupt()') raise finally: # Shutdown all of the executors. self._submission_executor.shutdown() self._request_executor.shutdown() self._io_executor.shutdown() class TransferCoordinatorController(object): def __init__(self): """Abstraction to control all transfer coordinators This abstraction allows the manager to wait for inprogress transfers to complete and cancel all inprogress transfers. """ self._lock = threading.Lock() self._tracked_transfer_coordinators = set() @property def tracked_transfer_coordinators(self): """The set of transfer coordinators being tracked""" with self._lock: # We return a copy because the set is mutable and if you were to # iterate over the set, it may be changing in length due to # additions and removals of transfer coordinators. return copy.copy(self._tracked_transfer_coordinators) def add_transfer_coordinator(self, transfer_coordinator): """Adds a transfer coordinator of a transfer to be canceled if needed :type transfer_coordinator: s3transfer.futures.TransferCoordinator :param transfer_coordinator: The transfer coordinator for the particular transfer """ with self._lock: self._tracked_transfer_coordinators.add(transfer_coordinator) def remove_transfer_coordinator(self, transfer_coordinator): """Remove a transfer coordinator from cancelation consideration Typically, this method is invoked by the transfer coordinator itself to remove its self when it completes its transfer. :type transfer_coordinator: s3transfer.futures.TransferCoordinator :param transfer_coordinator: The transfer coordinator for the particular transfer """ with self._lock: self._tracked_transfer_coordinators.remove(transfer_coordinator) def cancel(self, msg='', exc_type=CancelledError): """Cancels all inprogress transfers This cancels the inprogress transfers by calling cancel() on all tracked transfer coordinators. :param msg: The message to pass on to each transfer coordinator that gets cancelled. :param exc_type: The type of exception to set for the cancellation """ for transfer_coordinator in self.tracked_transfer_coordinators: transfer_coordinator.cancel(msg, exc_type) def wait(self): """Wait until there are no more inprogress transfers This will not stop when failures are encountered and not propogate any of these errors from failed transfers, but it can be interrupted with a KeyboardInterrupt. """ try: transfer_coordinator = None for transfer_coordinator in self.tracked_transfer_coordinators: transfer_coordinator.result() except KeyboardInterrupt: logger.debug('Received KeyboardInterrupt in wait()') # If Keyboard interrupt is raised while waiting for # the result, then exit out of the wait and raise the # exception if transfer_coordinator: logger.debug( 'On KeyboardInterrupt was waiting for %s', transfer_coordinator) raise except Exception: # A general exception could have been thrown because # of result(). We just want to ignore this and continue # because we at least know that the transfer coordinator # has completed. pass s3transfer-0.1.13/s3transfer/subscribers.py000066400000000000000000000070541324114246300206660ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from botocore.compat import six from s3transfer.compat import accepts_kwargs from s3transfer.exceptions import InvalidSubscriberMethodError class BaseSubscriber(object): """The base subscriber class It is recommended that all subscriber implementations subclass and then override the subscription methods (i.e. on_{subsribe_type}() methods). """ VALID_SUBSCRIBER_TYPES = [ 'queued', 'progress', 'done' ] def __new__(cls, *args, **kwargs): cls._validate_subscriber_methods() return super(BaseSubscriber, cls).__new__(cls) @classmethod def _validate_subscriber_methods(cls): for subscriber_type in cls.VALID_SUBSCRIBER_TYPES: subscriber_method = getattr(cls, 'on_' + subscriber_type) if not six.callable(subscriber_method): raise InvalidSubscriberMethodError( 'Subscriber method %s must be callable.' % subscriber_method) if not accepts_kwargs(subscriber_method): raise InvalidSubscriberMethodError( 'Subscriber method %s must accept keyword ' 'arguments (**kwargs)' % subscriber_method) def on_queued(self, future, **kwargs): """Callback to be invoked when transfer request gets queued This callback can be useful for: * Keeping track of how many transfers have been requested * Providing the expected transfer size through future.meta.provide_transfer_size() so a HeadObject would not need to be made for copies and downloads. :type future: s3transfer.futures.TransferFuture :param future: The TransferFuture representing the requested transfer. """ pass def on_progress(self, future, bytes_transferred, **kwargs): """Callback to be invoked when progress is made on transfer This callback can be useful for: * Recording and displaying progress :type future: s3transfer.futures.TransferFuture :param future: The TransferFuture representing the requested transfer. :type bytes_transferred: int :param bytes_transferred: The number of bytes transferred for that invocation of the callback. Note that a negative amount can be provided, which usually indicates that an in-progress request needed to be retried and thus progress was rewound. """ pass def on_done(self, future, **kwargs): """Callback to be invoked once a transfer is done This callback can be useful for: * Recording and displaying whether the transfer succeeded or failed using future.result() * Running some task after the transfer completed like changing the last modified time of a downloaded file. :type future: s3transfer.futures.TransferFuture :param future: The TransferFuture representing the requested transfer. """ pass s3transfer-0.1.13/s3transfer/tasks.py000066400000000000000000000372021324114246300174630ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import copy import logging from s3transfer.utils import get_callbacks logger = logging.getLogger(__name__) class Task(object): """A task associated to a TransferFuture request This is a base class for other classes to subclass from. All subclassed classes must implement the main() method. """ def __init__(self, transfer_coordinator, main_kwargs=None, pending_main_kwargs=None, done_callbacks=None, is_final=False): """ :type transfer_coordinator: s3transfer.futures.TransferCoordinator :param transfer_coordinator: The context associated to the TransferFuture for which this Task is associated with. :type main_kwargs: dict :param main_kwargs: The keyword args that can be immediately supplied to the _main() method of the task :type pending_main_kwargs: dict :param pending_main_kwargs: The keyword args that are depended upon by the result from a dependent future(s). The result returned by the future(s) will be used as the value for the keyword argument when _main() is called. The values for each key can be: * a single future - Once completed, its value will be the result of that single future * a list of futures - Once all of the futures complete, the value used will be a list of each completed future result value in order of when they were originally supplied. :type done_callbacks: list of callbacks :param done_callbacks: A list of callbacks to call once the task is done completing. Each callback will be called with no arguments and will be called no matter if the task succeeds or an exception is raised. :type is_final: boolean :param is_final: True, to indicate that this task is the final task for the TransferFuture request. By setting this value to True, it will set the result of the entire TransferFuture to the result returned by this task's main() method. """ self._transfer_coordinator = transfer_coordinator self._main_kwargs = main_kwargs if self._main_kwargs is None: self._main_kwargs = {} self._pending_main_kwargs = pending_main_kwargs if pending_main_kwargs is None: self._pending_main_kwargs = {} self._done_callbacks = done_callbacks if self._done_callbacks is None: self._done_callbacks = [] self._is_final = is_final def __repr__(self): # These are the general main_kwarg parameters that we want to # display in the repr. params_to_display = [ 'bucket', 'key', 'part_number', 'final_filename', 'transfer_future', 'offset', 'extra_args' ] main_kwargs_to_display = self._get_kwargs_with_params_to_include( self._main_kwargs, params_to_display) return '%s(transfer_id=%s, %s)' % ( self.__class__.__name__, self._transfer_coordinator.transfer_id, main_kwargs_to_display) @property def transfer_id(self): """The id for the transfer request that the task belongs to""" return self._transfer_coordinator.transfer_id def _get_kwargs_with_params_to_include(self, kwargs, include): filtered_kwargs = {} for param in include: if param in kwargs: filtered_kwargs[param] = kwargs[param] return filtered_kwargs def _get_kwargs_with_params_to_exclude(self, kwargs, exclude): filtered_kwargs = {} for param, value in kwargs.items(): if param in exclude: continue filtered_kwargs[param] = value return filtered_kwargs def __call__(self): """The callable to use when submitting a Task to an executor""" try: # Wait for all of futures this task depends on. self._wait_on_dependent_futures() # Gather up all of the main keyword arguments for main(). # This includes the immediately provided main_kwargs and # the values for pending_main_kwargs that source from the return # values from the task's depenent futures. kwargs = self._get_all_main_kwargs() # If the task is not done (really only if some other related # task to the TransferFuture had failed) then execute the task's # main() method. if not self._transfer_coordinator.done(): return self._execute_main(kwargs) except Exception as e: self._log_and_set_exception(e) finally: # Run any done callbacks associated to the task no matter what. for done_callback in self._done_callbacks: done_callback() if self._is_final: # If this is the final task announce that it is done if results # are waiting on its completion. self._transfer_coordinator.announce_done() def _execute_main(self, kwargs): # Do not display keyword args that should not be printed, especially # if they are going to make the logs hard to follow. params_to_exclude = ['data'] kwargs_to_display = self._get_kwargs_with_params_to_exclude( kwargs, params_to_exclude) # Log what is about to be executed. logger.debug( "Executing task %s with kwargs %s" % (self, kwargs_to_display) ) return_value = self._main(**kwargs) # If the task is the final task, then set the TransferFuture's # value to the return value from main(). if self._is_final: self._transfer_coordinator.set_result(return_value) return return_value def _log_and_set_exception(self, exception): # If an exception is ever thrown than set the exception for the # entire TransferFuture. logger.debug("Exception raised.", exc_info=True) self._transfer_coordinator.set_exception(exception) def _main(self, **kwargs): """The method that will be ran in the executor This method must be implemented by subclasses from Task. main() can be implemented with any arguments decided upon by the subclass. """ raise NotImplementedError('_main() must be implemented') def _wait_on_dependent_futures(self): # Gather all of the futures into that main() depends on. futures_to_wait_on = [] for _, future in self._pending_main_kwargs.items(): # If the pending main keyword arg is a list then extend the list. if isinstance(future, list): futures_to_wait_on.extend(future) # If the pending main keword arg is a future append it to the list. else: futures_to_wait_on.append(future) # Now wait for all of the futures to complete. self._wait_until_all_complete(futures_to_wait_on) def _wait_until_all_complete(self, futures): # This is a basic implementation of the concurrent.futures.wait() # # concurrent.futures.wait() is not used instead because of this # reported issue: https://bugs.python.org/issue20319. # The issue would occassionally cause multipart uploads to hang # when wait() was called. With this approach, it avoids the # concurrency bug by removing any association with concurrent.futures # implementation of waiters. logger.debug( '%s about to wait for the following futures %s', self, futures) for future in futures: try: logger.debug('%s about to wait for %s', self, future) future.result() except Exception: # result() can also produce exceptions. We want to ignore # these to be deffered to error handling down the road. pass logger.debug('%s done waiting for dependent futures', self) def _get_all_main_kwargs(self): # Copy over all of the kwargs that we know is available. kwargs = copy.copy(self._main_kwargs) # Iterate through the kwargs whose values are pending on the result # of a future. for key, pending_value in self._pending_main_kwargs.items(): # If the value is a list of futures, iterate though the list # appending on the result from each future. if isinstance(pending_value, list): result = [] for future in pending_value: result.append(future.result()) # Otherwise if the pending_value is a future, just wait for it. else: result = pending_value.result() # Add the retrieved value to the kwargs to be sent to the # main() call. kwargs[key] = result return kwargs class SubmissionTask(Task): """A base class for any submission task Submission tasks are the top-level task used to submit a series of tasks to execute a particular transfer. """ def _main(self, transfer_future, **kwargs): """ :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The transfer future associated with the transfer request that tasks are being submitted for :param kwargs: Any additional kwargs that you may want to pass to the _submit() method """ try: self._transfer_coordinator.set_status_to_queued() # Before submitting any tasks, run all of the on_queued callbacks on_queued_callbacks = get_callbacks(transfer_future, 'queued') for on_queued_callback in on_queued_callbacks: on_queued_callback() # Once callbacks have been ran set the status to running. self._transfer_coordinator.set_status_to_running() # Call the submit method to start submitting tasks to execute the # transfer. self._submit(transfer_future=transfer_future, **kwargs) except BaseException as e: # If there was an exception raised during the submission of task # there is a chance that the final task that signals if a transfer # is done and too run the cleanup may never have been submitted in # the first place so we need to account accordingly. # # Note that BaseException is caught, instead of Exception, because # for some implmentations of executors, specifically the serial # implementation, the SubmissionTask is directly exposed to # KeyboardInterupts and so needs to cleanup and signal done # for those as well. # Set the exception, that caused the process to fail. self._log_and_set_exception(e) # Wait for all possibly associated futures that may have spawned # from this submission task have finished before we anounce the # transfer done. self._wait_for_all_submitted_futures_to_complete() # Announce the transfer as done, which will run any cleanups # and done callbacks as well. self._transfer_coordinator.announce_done() def _submit(self, transfer_future, **kwargs): """The submition method to be implemented :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The transfer future associated with the transfer request that tasks are being submitted for :param kwargs: Any additional keyword arguments you want to be passed in """ raise NotImplementedError('_submit() must be implemented') def _wait_for_all_submitted_futures_to_complete(self): # We want to wait for all futures that were submitted to # complete as we do not want the cleanup callbacks or done callbacks # to be called to early. The main problem is any task that was # submitted may have submitted even more during its process and so # we need to account accordingly. # First get all of the futures that were submitted up to this point. submitted_futures = self._transfer_coordinator.associated_futures while submitted_futures: # Wait for those futures to complete. self._wait_until_all_complete(submitted_futures) # However, more futures may have been submitted as we waited so # we need to check again for any more associated futures. possibly_more_submitted_futures = \ self._transfer_coordinator.associated_futures # If the current list of submitted futures is equal to the # the list of associated futures for when after the wait completes, # we can ensure no more futures were submitted in waiting on # the current list of futures to complete ultimately meaning all # futures that may have spawned from the original submission task # have completed. if submitted_futures == possibly_more_submitted_futures: break submitted_futures = possibly_more_submitted_futures class CreateMultipartUploadTask(Task): """Task to initiate a multipart upload""" def _main(self, client, bucket, key, extra_args): """ :param client: The client to use when calling CreateMultipartUpload :param bucket: The name of the bucket to upload to :param key: The name of the key to upload to :param extra_args: A dictionary of any extra arguments that may be used in the intialization. :returns: The upload id of the multipart upload """ # Create the multipart upload. response = client.create_multipart_upload( Bucket=bucket, Key=key, **extra_args) upload_id = response['UploadId'] # Add a cleanup if the multipart upload fails at any point. self._transfer_coordinator.add_failure_cleanup( client.abort_multipart_upload, Bucket=bucket, Key=key, UploadId=upload_id ) return upload_id class CompleteMultipartUploadTask(Task): """Task to complete a multipart upload""" def _main(self, client, bucket, key, upload_id, parts, extra_args): """ :param client: The client to use when calling CompleteMultipartUpload :param bucket: The name of the bucket to upload to :param key: The name of the key to upload to :param upload_id: The id of the upload :param parts: A list of parts to use to complete the multipart upload:: [{'Etag': etag_value, 'PartNumber': part_number}, ...] Each element in the list consists of a return value from ``UploadPartTask.main()``. :param extra_args: A dictionary of any extra arguments that may be used in completing the multipart transfer. """ client.complete_multipart_upload( Bucket=bucket, Key=key, UploadId=upload_id, MultipartUpload={'Parts': parts}, **extra_args) s3transfer-0.1.13/s3transfer/upload.py000066400000000000000000000715661324114246300176350ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import math from botocore.compat import six from s3transfer.compat import seekable, readable from s3transfer.futures import IN_MEMORY_UPLOAD_TAG from s3transfer.tasks import Task from s3transfer.tasks import SubmissionTask from s3transfer.tasks import CreateMultipartUploadTask from s3transfer.tasks import CompleteMultipartUploadTask from s3transfer.utils import get_callbacks from s3transfer.utils import get_filtered_dict from s3transfer.utils import DeferredOpenFile, ChunksizeAdjuster class AggregatedProgressCallback(object): def __init__(self, callbacks, threshold=1024 * 256): """Aggregates progress updates for every provided progress callback :type callbacks: A list of functions that accepts bytes_transferred as a single argument :param callbacks: The callbacks to invoke when threshold is reached :type threshold: int :param threshold: The progress threshold in which to take the aggregated progress and invoke the progress callback with that aggregated progress total """ self._callbacks = callbacks self._threshold = threshold self._bytes_seen = 0 def __call__(self, bytes_transferred): self._bytes_seen += bytes_transferred if self._bytes_seen >= self._threshold: self._trigger_callbacks() def flush(self): """Flushes out any progress that has not been sent to its callbacks""" if self._bytes_seen > 0: self._trigger_callbacks() def _trigger_callbacks(self): for callback in self._callbacks: callback(bytes_transferred=self._bytes_seen) self._bytes_seen = 0 class InterruptReader(object): """Wrapper that can interrupt reading using an error It uses a transfer coordinator to propogate an error if it notices that a read is being made while the file is being read from. :type fileobj: file-like obj :param fileobj: The file-like object to read from :type transfer_coordinator: s3transfer.futures.TransferCoordinator :param transfer_coordinator: The transfer coordinator to use if the reader needs to be interrupted. """ def __init__(self, fileobj, transfer_coordinator): self._fileobj = fileobj self._transfer_coordinator = transfer_coordinator def read(self, amount=None): # If there is an exception, then raise the exception. # We raise an error instead of returning no bytes because for # requests where the content length and md5 was sent, it will # cause md5 mismatches and retries as there was no indication that # the stream being read from encountered any issues. if self._transfer_coordinator.exception: raise self._transfer_coordinator.exception return self._fileobj.read(amount) def seek(self, where): self._fileobj.seek(where) def tell(self): return self._fileobj.tell() def close(self): self._fileobj.close() def __enter__(self): return self def __exit__(self, *args, **kwargs): self.close() class UploadInputManager(object): """Base manager class for handling various types of files for uploads This class is typically used for the UploadSubmissionTask class to help determine the following: * How to determine the size of the file * How to determine if a multipart upload is required * How to retrieve the body for a PutObject * How to retrieve the bodies for a set of UploadParts The answers/implementations differ for the various types of file inputs that may be accepted. All implementations must subclass and override public methods from this class. """ def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None): self._osutil = osutil self._transfer_coordinator = transfer_coordinator self._bandwidth_limiter = bandwidth_limiter @classmethod def is_compatible(cls, upload_source): """Determines if the source for the upload is compatible with manager :param upload_source: The source for which the upload will pull data from. :returns: True if the manager can handle the type of source specified otherwise returns False. """ raise NotImplementedError('must implement _is_compatible()') def stores_body_in_memory(self, operation_name): """Whether the body it provides are stored in-memory :type operation_name: str :param operation_name: The name of the client operation that the body is being used for. Valid operation_names are ``put_object`` and ``upload_part``. :rtype: boolean :returns: True if the body returned by the manager will be stored in memory. False if the manager will not directly store the body in memory. """ raise NotImplemented('must implement store_body_in_memory()') def provide_transfer_size(self, transfer_future): """Provides the transfer size of an upload :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The future associated with upload request """ raise NotImplementedError('must implement provide_transfer_size()') def requires_multipart_upload(self, transfer_future, config): """Determines where a multipart upload is required :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The future associated with upload request :type config: s3transfer.manager.TransferConfig :param config: The config associated to the transfer manager :rtype: boolean :returns: True, if the upload should be multipart based on configuartion and size. False, otherwise. """ raise NotImplementedError('must implement requires_multipart_upload()') def get_put_object_body(self, transfer_future): """Returns the body to use for PutObject :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The future associated with upload request :type config: s3transfer.manager.TransferConfig :param config: The config associated to the transfer manager :rtype: s3transfer.utils.ReadFileChunk :returns: A ReadFileChunk including all progress callbacks associated with the transfer future. """ raise NotImplementedError('must implement get_put_object_body()') def yield_upload_part_bodies(self, transfer_future, chunksize): """Yields the part number and body to use for each UploadPart :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The future associated with upload request :type chunksize: int :param chunksize: The chunksize to use for this upload. :rtype: int, s3transfer.utils.ReadFileChunk :returns: Yields the part number and the ReadFileChunk including all progress callbacks associated with the transfer future for that specific yielded part. """ raise NotImplementedError('must implement yield_upload_part_bodies()') def _wrap_fileobj(self, fileobj): fileobj = InterruptReader(fileobj, self._transfer_coordinator) if self._bandwidth_limiter: fileobj = self._bandwidth_limiter.get_bandwith_limited_stream( fileobj, self._transfer_coordinator, enabled=False) return fileobj def _get_progress_callbacks(self, transfer_future): callbacks = get_callbacks(transfer_future, 'progress') # We only want to be wrapping the callbacks if there are callbacks to # invoke because we do not want to be doing any unnecessary work if # there are no callbacks to invoke. if callbacks: return [AggregatedProgressCallback(callbacks)] return [] def _get_close_callbacks(self, aggregated_progress_callbacks): return [callback.flush for callback in aggregated_progress_callbacks] class UploadFilenameInputManager(UploadInputManager): """Upload utility for filenames""" @classmethod def is_compatible(cls, upload_source): return isinstance(upload_source, six.string_types) def stores_body_in_memory(self, operation_name): return False def provide_transfer_size(self, transfer_future): transfer_future.meta.provide_transfer_size( self._osutil.get_file_size( transfer_future.meta.call_args.fileobj)) def requires_multipart_upload(self, transfer_future, config): return transfer_future.meta.size >= config.multipart_threshold def get_put_object_body(self, transfer_future): # Get a file-like object for the given input fileobj, full_size = self._get_put_object_fileobj_with_full_size( transfer_future) # Wrap fileobj with interrupt reader that will quickly cancel # uploads if needed instead of having to wait for the socket # to completely read all of the data. fileobj = self._wrap_fileobj(fileobj) callbacks = self._get_progress_callbacks(transfer_future) close_callbacks = self._get_close_callbacks(callbacks) size = transfer_future.meta.size # Return the file-like object wrapped into a ReadFileChunk to get # progress. return self._osutil.open_file_chunk_reader_from_fileobj( fileobj=fileobj, chunk_size=size, full_file_size=full_size, callbacks=callbacks, close_callbacks=close_callbacks) def yield_upload_part_bodies(self, transfer_future, chunksize): full_file_size = transfer_future.meta.size num_parts = self._get_num_parts(transfer_future, chunksize) for part_number in range(1, num_parts + 1): callbacks = self._get_progress_callbacks(transfer_future) close_callbacks = self._get_close_callbacks(callbacks) start_byte = chunksize * (part_number - 1) # Get a file-like object for that part and the size of the full # file size for the associated file-like object for that part. fileobj, full_size = self._get_upload_part_fileobj_with_full_size( transfer_future.meta.call_args.fileobj, start_byte=start_byte, part_size=chunksize, full_file_size=full_file_size) # Wrap fileobj with interrupt reader that will quickly cancel # uploads if needed instead of having to wait for the socket # to completely read all of the data. fileobj = self._wrap_fileobj(fileobj) # Wrap the file-like object into a ReadFileChunk to get progress. read_file_chunk = self._osutil.open_file_chunk_reader_from_fileobj( fileobj=fileobj, chunk_size=chunksize, full_file_size=full_size, callbacks=callbacks, close_callbacks=close_callbacks) yield part_number, read_file_chunk def _get_deferred_open_file(self, fileobj, start_byte): fileobj = DeferredOpenFile( fileobj, start_byte, open_function=self._osutil.open) return fileobj def _get_put_object_fileobj_with_full_size(self, transfer_future): fileobj = transfer_future.meta.call_args.fileobj size = transfer_future.meta.size return self._get_deferred_open_file(fileobj, 0), size def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs): start_byte = kwargs['start_byte'] full_size = kwargs['full_file_size'] return self._get_deferred_open_file(fileobj, start_byte), full_size def _get_num_parts(self, transfer_future, part_size): return int( math.ceil(transfer_future.meta.size / float(part_size))) class UploadSeekableInputManager(UploadFilenameInputManager): """Upload utility for an open file object""" @classmethod def is_compatible(cls, upload_source): return readable(upload_source) and seekable(upload_source) def stores_body_in_memory(self, operation_name): if operation_name == 'put_object': return False else: return True def provide_transfer_size(self, transfer_future): fileobj = transfer_future.meta.call_args.fileobj # To determine size, first determine the starting position # Seek to the end and then find the difference in the length # between the end and start positions. start_position = fileobj.tell() fileobj.seek(0, 2) end_position = fileobj.tell() fileobj.seek(start_position) transfer_future.meta.provide_transfer_size( end_position - start_position) def _get_upload_part_fileobj_with_full_size(self, fileobj, **kwargs): # Note: It is unfortunate that in order to do a multithreaded # multipart upload we cannot simply copy the filelike object # since there is not really a mechanism in python (i.e. os.dup # points to the same OS filehandle which causes concurrency # issues). So instead we need to read from the fileobj and # chunk the data out to seperate file-like objects in memory. data = fileobj.read(kwargs['part_size']) # We return the length of the data instead of the full_file_size # because we partitioned the data into seperate BytesIO objects # meaning the BytesIO object has no knowledge of its start position # relative the input source nor access to the rest of the input # source. So we must treat it as its own standalone file. return six.BytesIO(data), len(data) def _get_put_object_fileobj_with_full_size(self, transfer_future): fileobj = transfer_future.meta.call_args.fileobj # The current position needs to be taken into account when retrieving # the full size of the file. size = fileobj.tell() + transfer_future.meta.size return fileobj, size class UploadNonSeekableInputManager(UploadInputManager): """Upload utility for a file-like object that cannot seek.""" def __init__(self, osutil, transfer_coordinator, bandwidth_limiter=None): super(UploadNonSeekableInputManager, self).__init__( osutil, transfer_coordinator, bandwidth_limiter) self._initial_data = b'' @classmethod def is_compatible(cls, upload_source): return readable(upload_source) def stores_body_in_memory(self, operation_name): return True def provide_transfer_size(self, transfer_future): # No-op because there is no way to do this short of reading the entire # body into memory. return def requires_multipart_upload(self, transfer_future, config): # If the user has set the size, we can use that. if transfer_future.meta.size is not None: return transfer_future.meta.size >= config.multipart_threshold # This is tricky to determine in this case because we can't know how # large the input is. So to figure it out, we read data into memory # up until the threshold and compare how much data was actually read # against the threshold. fileobj = transfer_future.meta.call_args.fileobj threshold = config.multipart_threshold self._initial_data = self._read(fileobj, threshold, False) if len(self._initial_data) < threshold: return False else: return True def get_put_object_body(self, transfer_future): callbacks = self._get_progress_callbacks(transfer_future) close_callbacks = self._get_close_callbacks(callbacks) fileobj = transfer_future.meta.call_args.fileobj body = self._wrap_data( self._initial_data + fileobj.read(), callbacks, close_callbacks) # Zero out the stored data so we don't have additional copies # hanging around in memory. self._initial_data = None return body def yield_upload_part_bodies(self, transfer_future, chunksize): file_object = transfer_future.meta.call_args.fileobj part_number = 0 # Continue reading parts from the file-like object until it is empty. while True: callbacks = self._get_progress_callbacks(transfer_future) close_callbacks = self._get_close_callbacks(callbacks) part_number += 1 part_content = self._read(file_object, chunksize) if not part_content: break part_object = self._wrap_data( part_content, callbacks, close_callbacks) # Zero out part_content to avoid hanging on to additional data. part_content = None yield part_number, part_object def _read(self, fileobj, amount, truncate=True): """ Reads a specific amount of data from a stream and returns it. If there is any data in initial_data, that will be popped out first. :type fileobj: A file-like object that implements read :param fileobj: The stream to read from. :type amount: int :param amount: The number of bytes to read from the stream. :type truncate: bool :param truncate: Whether or not to truncate initial_data after reading from it. :return: Generator which generates part bodies from the initial data. """ # If the the initial data is empty, we simply read from the fileobj if len(self._initial_data) == 0: return fileobj.read(amount) # If the requested number of bytes is less thant the amount of # initial data, pull entirely from initial data. if amount <= len(self._initial_data): data = self._initial_data[:amount] # Truncate initial data so we don't hang onto the data longer # than we need. if truncate: self._initial_data = self._initial_data[amount:] return data # At this point there is some initial data left, but not enough to # satisfy the number of bytes requested. Pull out the remaining # initial data and read the rest from the fileobj. amount_to_read = amount - len(self._initial_data) data = self._initial_data + fileobj.read(amount_to_read) # Zero out initial data so we don't hang onto the data any more. if truncate: self._initial_data = b'' return data def _wrap_data(self, data, callbacks, close_callbacks): """ Wraps data with the interrupt reader and the file chunk reader. :type data: bytes :param data: The data to wrap. :type callbacks: list :param callbacks: The callbacks associated with the transfer future. :type close_callbacks: list :param close_callbacks: The callbacks to be called when closing the wrapper for the data. :return: Fully wrapped data. """ fileobj = self._wrap_fileobj(six.BytesIO(data)) return self._osutil.open_file_chunk_reader_from_fileobj( fileobj=fileobj, chunk_size=len(data), full_file_size=len(data), callbacks=callbacks, close_callbacks=close_callbacks) class UploadSubmissionTask(SubmissionTask): """Task for submitting tasks to execute an upload""" UPLOAD_PART_ARGS = [ 'SSECustomerKey', 'SSECustomerAlgorithm', 'SSECustomerKeyMD5', 'RequestPayer', ] COMPLETE_MULTIPART_ARGS = [ 'RequestPayer' ] def _get_upload_input_manager_cls(self, transfer_future): """Retieves a class for managing input for an upload based on file type :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The transfer future for the request :rtype: class of UploadInputManager :returns: The appropriate class to use for managing a specific type of input for uploads. """ upload_manager_resolver_chain = [ UploadFilenameInputManager, UploadSeekableInputManager, UploadNonSeekableInputManager ] fileobj = transfer_future.meta.call_args.fileobj for upload_manager_cls in upload_manager_resolver_chain: if upload_manager_cls.is_compatible(fileobj): return upload_manager_cls raise RuntimeError( 'Input %s of type: %s is not supported.' % ( fileobj, type(fileobj))) def _submit(self, client, config, osutil, request_executor, transfer_future, bandwidth_limiter=None): """ :param client: The client associated with the transfer manager :type config: s3transfer.manager.TransferConfig :param config: The transfer config associated with the transfer manager :type osutil: s3transfer.utils.OSUtil :param osutil: The os utility associated to the transfer manager :type request_executor: s3transfer.futures.BoundedExecutor :param request_executor: The request executor associated with the transfer manager :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The transfer future associated with the transfer request that tasks are being submitted for """ upload_input_manager = self._get_upload_input_manager_cls( transfer_future)( osutil, self._transfer_coordinator, bandwidth_limiter) # Determine the size if it was not provided if transfer_future.meta.size is None: upload_input_manager.provide_transfer_size(transfer_future) # Do a multipart upload if needed, otherwise do a regular put object. if not upload_input_manager.requires_multipart_upload( transfer_future, config): self._submit_upload_request( client, config, osutil, request_executor, transfer_future, upload_input_manager) else: self._submit_multipart_request( client, config, osutil, request_executor, transfer_future, upload_input_manager) def _submit_upload_request(self, client, config, osutil, request_executor, transfer_future, upload_input_manager): call_args = transfer_future.meta.call_args # Get any tags that need to be associated to the put object task put_object_tag = self._get_upload_task_tag( upload_input_manager, 'put_object') # Submit the request of a single upload. self._transfer_coordinator.submit( request_executor, PutObjectTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'fileobj': upload_input_manager.get_put_object_body( transfer_future), 'bucket': call_args.bucket, 'key': call_args.key, 'extra_args': call_args.extra_args }, is_final=True ), tag=put_object_tag ) def _submit_multipart_request(self, client, config, osutil, request_executor, transfer_future, upload_input_manager): call_args = transfer_future.meta.call_args # Submit the request to create a multipart upload. create_multipart_future = self._transfer_coordinator.submit( request_executor, CreateMultipartUploadTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'bucket': call_args.bucket, 'key': call_args.key, 'extra_args': call_args.extra_args, } ) ) # Submit requests to upload the parts of the file. part_futures = [] extra_part_args = self._extra_upload_part_args(call_args.extra_args) # Get any tags that need to be associated to the submitted task # for upload the data upload_part_tag = self._get_upload_task_tag( upload_input_manager, 'upload_part') size = transfer_future.meta.size adjuster = ChunksizeAdjuster() chunksize = adjuster.adjust_chunksize(config.multipart_chunksize, size) part_iterator = upload_input_manager.yield_upload_part_bodies( transfer_future, chunksize) for part_number, fileobj in part_iterator: part_futures.append( self._transfer_coordinator.submit( request_executor, UploadPartTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'fileobj': fileobj, 'bucket': call_args.bucket, 'key': call_args.key, 'part_number': part_number, 'extra_args': extra_part_args }, pending_main_kwargs={ 'upload_id': create_multipart_future } ), tag=upload_part_tag ) ) complete_multipart_extra_args = self._extra_complete_multipart_args( call_args.extra_args) # Submit the request to complete the multipart upload. self._transfer_coordinator.submit( request_executor, CompleteMultipartUploadTask( transfer_coordinator=self._transfer_coordinator, main_kwargs={ 'client': client, 'bucket': call_args.bucket, 'key': call_args.key, 'extra_args': complete_multipart_extra_args, }, pending_main_kwargs={ 'upload_id': create_multipart_future, 'parts': part_futures }, is_final=True ) ) def _extra_upload_part_args(self, extra_args): # Only the args in UPLOAD_PART_ARGS actually need to be passed # onto the upload_part calls. return get_filtered_dict(extra_args, self.UPLOAD_PART_ARGS) def _extra_complete_multipart_args(self, extra_args): return get_filtered_dict(extra_args, self.COMPLETE_MULTIPART_ARGS) def _get_upload_task_tag(self, upload_input_manager, operation_name): tag = None if upload_input_manager.stores_body_in_memory(operation_name): tag = IN_MEMORY_UPLOAD_TAG return tag class PutObjectTask(Task): """Task to do a nonmultipart upload""" def _main(self, client, fileobj, bucket, key, extra_args): """ :param client: The client to use when calling PutObject :param fileobj: The file to upload. :param bucket: The name of the bucket to upload to :param key: The name of the key to upload to :param extra_args: A dictionary of any extra arguments that may be used in the upload. """ with fileobj as body: client.put_object(Bucket=bucket, Key=key, Body=body, **extra_args) class UploadPartTask(Task): """Task to upload a part in a multipart upload""" def _main(self, client, fileobj, bucket, key, upload_id, part_number, extra_args): """ :param client: The client to use when calling PutObject :param fileobj: The file to upload. :param bucket: The name of the bucket to upload to :param key: The name of the key to upload to :param upload_id: The id of the upload :param part_number: The number representing the part of the multipart upload :param extra_args: A dictionary of any extra arguments that may be used in the upload. :rtype: dict :returns: A dictionary representing a part:: {'Etag': etag_value, 'PartNumber': part_number} This value can be appended to a list to be used to complete the multipart upload. """ with fileobj as body: response = client.upload_part( Bucket=bucket, Key=key, UploadId=upload_id, PartNumber=part_number, Body=body, **extra_args) etag = response['ETag'] return {'ETag': etag, 'PartNumber': part_number} s3transfer-0.1.13/s3transfer/utils.py000066400000000000000000000616771324114246300175130ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import random import time import functools import math import os import stat import string import logging import threading import io from collections import defaultdict from s3transfer.compat import rename_file from s3transfer.compat import seekable MAX_PARTS = 10000 # The maximum file size you can upload via S3 per request. # See: http://docs.aws.amazon.com/AmazonS3/latest/dev/UploadingObjects.html # and: http://docs.aws.amazon.com/AmazonS3/latest/dev/qfacts.html MAX_SINGLE_UPLOAD_SIZE = 5 * (1024 ** 3) MIN_UPLOAD_CHUNKSIZE = 5 * (1024 ** 2) logger = logging.getLogger(__name__) def random_file_extension(num_digits=8): return ''.join(random.choice(string.hexdigits) for _ in range(num_digits)) def signal_not_transferring(request, operation_name, **kwargs): if operation_name in ['PutObject', 'UploadPart'] and \ hasattr(request.body, 'signal_not_transferring'): request.body.signal_not_transferring() def signal_transferring(request, operation_name, **kwargs): if operation_name in ['PutObject', 'UploadPart'] and \ hasattr(request.body, 'signal_transferring'): request.body.signal_transferring() def calculate_range_parameter(part_size, part_index, num_parts, total_size=None): """Calculate the range parameter for multipart downloads/copies :type part_size: int :param part_size: The size of the part :type part_index: int :param part_index: The index for which this parts starts. This index starts at zero :type num_parts: int :param num_parts: The total number of parts in the transfer :returns: The value to use for Range parameter on downloads or the CopySourceRange parameter for copies """ # Used to calculate the Range parameter start_range = part_index * part_size if part_index == num_parts - 1: end_range = '' if total_size is not None: end_range = str(total_size - 1) else: end_range = start_range + part_size - 1 range_param = 'bytes=%s-%s' % (start_range, end_range) return range_param def get_callbacks(transfer_future, callback_type): """Retrieves callbacks from a subscriber :type transfer_future: s3transfer.futures.TransferFuture :param transfer_future: The transfer future the subscriber is associated to. :type callback_type: str :param callback_type: The type of callback to retrieve from the subscriber. Valid types include: * 'queued' * 'progress' * 'done' :returns: A list of callbacks for the type specified. All callbacks are preinjected with the transfer future. """ callbacks = [] for subscriber in transfer_future.meta.call_args.subscribers: callback_name = 'on_' + callback_type if hasattr(subscriber, callback_name): callbacks.append( functools.partial( getattr(subscriber, callback_name), future=transfer_future ) ) return callbacks def invoke_progress_callbacks(callbacks, bytes_transferred): """Calls all progress callbacks :param callbacks: A list of progress callbacks to invoke :param bytes_transferred: The number of bytes transferred. This is passed to the callbacks. If no bytes were transferred the callbacks will not be invoked because no progress was achieved. It is also possible to receive a negative amount which comes from retrying a transfer request. """ # Only invoke the callbacks if bytes were actually transferred. if bytes_transferred: for callback in callbacks: callback(bytes_transferred=bytes_transferred) def get_filtered_dict(original_dict, whitelisted_keys): """Gets a dictionary filtered by whitelisted keys :param original_dict: The original dictionary of arguments to source keys and values. :param whitelisted_key: A list of keys to include in the filtered dictionary. :returns: A dictionary containing key/values from the original dictionary whose key was included in the whitelist """ filtered_dict = {} for key, value in original_dict.items(): if key in whitelisted_keys: filtered_dict[key] = value return filtered_dict class CallArgs(object): def __init__(self, **kwargs): """A class that records call arguments The call arguments must be passed as keyword arguments. It will set each keyword argument as an attribute of the object along with its associated value. """ for arg, value in kwargs.items(): setattr(self, arg, value) class FunctionContainer(object): """An object that contains a function and any args or kwargs to call it When called the provided function will be called with provided args and kwargs. """ def __init__(self, func, *args, **kwargs): self._func = func self._args = args self._kwargs = kwargs def __repr__(self): return 'Function: %s with args %s and kwargs %s' % ( self._func, self._args, self._kwargs) def __call__(self): return self._func(*self._args, **self._kwargs) class CountCallbackInvoker(object): """An abstraction to invoke a callback when a shared count reaches zero :param callback: Callback invoke when finalized count reaches zero """ def __init__(self, callback): self._lock = threading.Lock() self._callback = callback self._count = 0 self._is_finalized = False @property def current_count(self): with self._lock: return self._count def increment(self): """Increment the count by one""" with self._lock: if self._is_finalized: raise RuntimeError( 'Counter has been finalized it can no longer be ' 'incremented.' ) self._count += 1 def decrement(self): """Decrement the count by one""" with self._lock: if self._count == 0: raise RuntimeError( 'Counter is at zero. It cannot dip below zero') self._count -= 1 if self._is_finalized and self._count == 0: self._callback() def finalize(self): """Finalize the counter Once finalized, the counter never be incremented and the callback can be invoked once the count reaches zero """ with self._lock: self._is_finalized = True if self._count == 0: self._callback() class OSUtils(object): def get_file_size(self, filename): return os.path.getsize(filename) def open_file_chunk_reader(self, filename, start_byte, size, callbacks): return ReadFileChunk.from_filename(filename, start_byte, size, callbacks, enable_callbacks=False) def open_file_chunk_reader_from_fileobj(self, fileobj, chunk_size, full_file_size, callbacks, close_callbacks=None): return ReadFileChunk( fileobj, chunk_size, full_file_size, callbacks=callbacks, enable_callbacks=False, close_callbacks=close_callbacks) def open(self, filename, mode): return open(filename, mode) def remove_file(self, filename): """Remove a file, noop if file does not exist.""" # Unlike os.remove, if the file does not exist, # then this method does nothing. try: os.remove(filename) except OSError: pass def rename_file(self, current_filename, new_filename): rename_file(current_filename, new_filename) def is_special_file(cls, filename): """Checks to see if a file is a special UNIX file. It checks if the file is a character special device, block special device, FIFO, or socket. :param filename: Name of the file :returns: True if the file is a special file. False, if is not. """ # If it does not exist, it must be a new file so it cannot be # a special file. if not os.path.exists(filename): return False mode = os.stat(filename).st_mode # Character special device. if stat.S_ISCHR(mode): return True # Block special device if stat.S_ISBLK(mode): return True # Named pipe / FIFO if stat.S_ISFIFO(mode): return True # Socket. if stat.S_ISSOCK(mode): return True return False class DeferredOpenFile(object): def __init__(self, filename, start_byte=0, mode='rb', open_function=open): """A class that defers the opening of a file till needed This is useful for deferring opening of a file till it is needed in a separate thread, as there is a limit of how many open files there can be in a single thread for most operating systems. The file gets opened in the following methods: ``read()``, ``seek()``, and ``__enter__()`` :type filename: str :param filename: The name of the file to open :type start_byte: int :param start_byte: The byte to seek to when the file is opened. :type mode: str :param mode: The mode to use to open the file :type open_function: function :param open_function: The function to use to open the file """ self._filename = filename self._fileobj = None self._start_byte = start_byte self._mode = mode self._open_function = open_function def _open_if_needed(self): if self._fileobj is None: self._fileobj = self._open_function(self._filename, self._mode) if self._start_byte != 0: self._fileobj.seek(self._start_byte) @property def name(self): return self._filename def read(self, amount=None): self._open_if_needed() return self._fileobj.read(amount) def write(self, data): self._open_if_needed() self._fileobj.write(data) def seek(self, where): self._open_if_needed() self._fileobj.seek(where) def tell(self): if self._fileobj is None: return self._start_byte return self._fileobj.tell() def close(self): if self._fileobj: self._fileobj.close() def __enter__(self): self._open_if_needed() return self def __exit__(self, *args, **kwargs): self.close() class ReadFileChunk(object): def __init__(self, fileobj, chunk_size, full_file_size, callbacks=None, enable_callbacks=True, close_callbacks=None): """ Given a file object shown below:: |___________________________________________________| 0 | | full_file_size |----chunk_size---| f.tell() :type fileobj: file :param fileobj: File like object :type chunk_size: int :param chunk_size: The max chunk size to read. Trying to read pass the end of the chunk size will behave like you've reached the end of the file. :type full_file_size: int :param full_file_size: The entire content length associated with ``fileobj``. :type callbacks: A list of function(amount_read) :param callbacks: Called whenever data is read from this object in the order provided. :type enable_callbacks: boolean :param enable_callbacks: True if to run callbacks. Otherwise, do not run callbacks :type close_callbacks: A list of function() :param close_callbacks: Called when close is called. The function should take no arguments. """ self._fileobj = fileobj self._start_byte = self._fileobj.tell() self._size = self._calculate_file_size( self._fileobj, requested_size=chunk_size, start_byte=self._start_byte, actual_file_size=full_file_size) self._amount_read = 0 self._callbacks = callbacks if callbacks is None: self._callbacks = [] self._callbacks_enabled = enable_callbacks self._close_callbacks = close_callbacks if close_callbacks is None: self._close_callbacks = close_callbacks @classmethod def from_filename(cls, filename, start_byte, chunk_size, callbacks=None, enable_callbacks=True): """Convenience factory function to create from a filename. :type start_byte: int :param start_byte: The first byte from which to start reading. :type chunk_size: int :param chunk_size: The max chunk size to read. Trying to read pass the end of the chunk size will behave like you've reached the end of the file. :type full_file_size: int :param full_file_size: The entire content length associated with ``fileobj``. :type callbacks: function(amount_read) :param callbacks: Called whenever data is read from this object. :type enable_callbacks: bool :param enable_callbacks: Indicate whether to invoke callback during read() calls. :rtype: ``ReadFileChunk`` :return: A new instance of ``ReadFileChunk`` """ f = open(filename, 'rb') f.seek(start_byte) file_size = os.fstat(f.fileno()).st_size return cls(f, chunk_size, file_size, callbacks, enable_callbacks) def _calculate_file_size(self, fileobj, requested_size, start_byte, actual_file_size): max_chunk_size = actual_file_size - start_byte return min(max_chunk_size, requested_size) def read(self, amount=None): if amount is None: amount_to_read = self._size - self._amount_read else: amount_to_read = min(self._size - self._amount_read, amount) data = self._fileobj.read(amount_to_read) self._amount_read += len(data) if self._callbacks is not None and self._callbacks_enabled: invoke_progress_callbacks(self._callbacks, len(data)) return data def signal_transferring(self): self.enable_callback() if hasattr(self._fileobj, 'signal_transferring'): self._fileobj.signal_transferring() def signal_not_transferring(self): self.disable_callback() if hasattr(self._fileobj, 'signal_not_transferring'): self._fileobj.signal_not_transferring() def enable_callback(self): self._callbacks_enabled = True def disable_callback(self): self._callbacks_enabled = False def seek(self, where): self._fileobj.seek(self._start_byte + where) if self._callbacks is not None and self._callbacks_enabled: # To also rewind the callback() for an accurate progress report invoke_progress_callbacks( self._callbacks, bytes_transferred=where - self._amount_read) self._amount_read = where def close(self): if self._close_callbacks is not None and self._callbacks_enabled: for callback in self._close_callbacks: callback() self._fileobj.close() def tell(self): return self._amount_read def __len__(self): # __len__ is defined because requests will try to determine the length # of the stream to set a content length. In the normal case # of the file it will just stat the file, but we need to change that # behavior. By providing a __len__, requests will use that instead # of stat'ing the file. return self._size def __enter__(self): return self def __exit__(self, *args, **kwargs): self.close() def __iter__(self): # This is a workaround for http://bugs.python.org/issue17575 # Basically httplib will try to iterate over the contents, even # if its a file like object. This wasn't noticed because we've # already exhausted the stream so iterating over the file immediately # stops, which is what we're simulating here. return iter([]) class StreamReaderProgress(object): """Wrapper for a read only stream that adds progress callbacks.""" def __init__(self, stream, callbacks=None): self._stream = stream self._callbacks = callbacks if callbacks is None: self._callbacks = [] def read(self, *args, **kwargs): value = self._stream.read(*args, **kwargs) invoke_progress_callbacks(self._callbacks, len(value)) return value class NoResourcesAvailable(Exception): pass class TaskSemaphore(object): def __init__(self, count): """A semaphore for the purpose of limiting the number of tasks :param count: The size of semaphore """ self._semaphore = threading.Semaphore(count) def acquire(self, tag, blocking=True): """Acquire the semaphore :param tag: A tag identifying what is acquiring the semaphore. Note that this is not really needed to directly use this class but is needed for API compatibility with the SlidingWindowSemaphore implementation. :param block: If True, block until it can be acquired. If False, do not block and raise an exception if cannot be aquired. :returns: A token (can be None) to use when releasing the semaphore """ logger.debug("Acquiring %s", tag) if not self._semaphore.acquire(blocking): raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag) def release(self, tag, acquire_token): """Release the semaphore :param tag: A tag identifying what is releasing the semaphore :param acquire_token: The token returned from when the semaphore was acquired. Note that this is not really needed to directly use this class but is needed for API compatibility with the SlidingWindowSemaphore implementation. """ logger.debug("Releasing acquire %s/%s" % (tag, acquire_token)) self._semaphore.release() class SlidingWindowSemaphore(TaskSemaphore): """A semaphore used to coordinate sequential resource access. This class is similar to the stdlib BoundedSemaphore: * It's initialized with a count. * Each call to ``acquire()`` decrements the counter. * If the count is at zero, then ``acquire()`` will either block until the count increases, or if ``blocking=False``, then it will raise a NoResourcesAvailable exception indicating that it failed to acquire the semaphore. The main difference is that this semaphore is used to limit access to a resource that requires sequential access. For example, if I want to access resource R that has 20 subresources R_0 - R_19, this semaphore can also enforce that you only have a max range of 10 at any given point in time. You must also specify a tag name when you acquire the semaphore. The sliding window semantics apply on a per tag basis. The internal count will only be incremented when the minimum sequence number for a tag is released. """ def __init__(self, count): self._count = count # Dict[tag, next_sequence_number]. self._tag_sequences = defaultdict(int) self._lowest_sequence = {} self._lock = threading.Lock() self._condition = threading.Condition(self._lock) # Dict[tag, List[sequence_number]] self._pending_release = {} def current_count(self): with self._lock: return self._count def acquire(self, tag, blocking=True): logger.debug("Acquiring %s", tag) self._condition.acquire() try: if self._count == 0: if not blocking: raise NoResourcesAvailable("Cannot acquire tag '%s'" % tag) else: while self._count == 0: self._condition.wait() # self._count is no longer zero. # First, check if this is the first time we're seeing this tag. sequence_number = self._tag_sequences[tag] if sequence_number == 0: # First time seeing the tag, so record we're at 0. self._lowest_sequence[tag] = sequence_number self._tag_sequences[tag] += 1 self._count -= 1 return sequence_number finally: self._condition.release() def release(self, tag, acquire_token): sequence_number = acquire_token logger.debug("Releasing acquire %s/%s", tag, sequence_number) self._condition.acquire() try: if tag not in self._tag_sequences: raise ValueError("Attempted to release unknown tag: %s" % tag) max_sequence = self._tag_sequences[tag] if self._lowest_sequence[tag] == sequence_number: # We can immediately process this request and free up # resources. self._lowest_sequence[tag] += 1 self._count += 1 self._condition.notify() queued = self._pending_release.get(tag, []) while queued: if self._lowest_sequence[tag] == queued[-1]: queued.pop() self._lowest_sequence[tag] += 1 self._count += 1 else: break elif self._lowest_sequence[tag] < sequence_number < max_sequence: # We can't do anything right now because we're still waiting # for the min sequence for the tag to be released. We have # to queue this for pending release. self._pending_release.setdefault( tag, []).append(sequence_number) self._pending_release[tag].sort(reverse=True) else: raise ValueError( "Attempted to release unknown sequence number " "%s for tag: %s" % (sequence_number, tag)) finally: self._condition.release() class ChunksizeAdjuster(object): def __init__(self, max_size=MAX_SINGLE_UPLOAD_SIZE, min_size=MIN_UPLOAD_CHUNKSIZE, max_parts=MAX_PARTS): self.max_size = max_size self.min_size = min_size self.max_parts = max_parts def adjust_chunksize(self, current_chunksize, file_size=None): """Get a chunksize close to current that fits within all S3 limits. :type current_chunksize: int :param current_chunksize: The currently configured chunksize. :type file_size: int or None :param file_size: The size of the file to upload. This might be None if the object being transferred has an unknown size. :returns: A valid chunksize that fits within configured limits. """ chunksize = current_chunksize if file_size is not None: chunksize = self._adjust_for_max_parts(chunksize, file_size) return self._adjust_for_chunksize_limits(chunksize) def _adjust_for_chunksize_limits(self, current_chunksize): if current_chunksize > self.max_size: logger.debug( "Chunksize greater than maximum chunksize. " "Setting to %s from %s." % (self.max_size, current_chunksize)) return self.max_size elif current_chunksize < self.min_size: logger.debug( "Chunksize less than minimum chunksize. " "Setting to %s from %s." % (self.min_size, current_chunksize)) return self.min_size else: return current_chunksize def _adjust_for_max_parts(self, current_chunksize, file_size): chunksize = current_chunksize num_parts = int(math.ceil(file_size / float(chunksize))) while num_parts > self.max_parts: chunksize *= 2 num_parts = int(math.ceil(file_size / float(chunksize))) if chunksize != current_chunksize: logger.debug( "Chunksize would result in the number of parts exceeding the " "maximum. Setting to %s from %s." % (chunksize, current_chunksize)) return chunksize s3transfer-0.1.13/scripts/000077500000000000000000000000001324114246300153555ustar00rootroot00000000000000s3transfer-0.1.13/scripts/ci/000077500000000000000000000000001324114246300157505ustar00rootroot00000000000000s3transfer-0.1.13/scripts/ci/install000077500000000000000000000013151324114246300173440ustar00rootroot00000000000000#!/usr/bin/env python import os import sys from subprocess import check_call import shutil _dname = os.path.dirname REPO_ROOT = _dname(_dname(_dname(os.path.abspath(__file__)))) os.chdir(REPO_ROOT) def run(command): return check_call(command, shell=True) try: # Has the form "major.minor" python_version = os.environ['PYTHON_VERSION'] except KeyError: python_version = '.'.join([str(i) for i in sys.version_info[:2]]) run('pip install -r requirements-test.txt') run('pip install coverage') if os.path.isdir('dist') and os.listdir('dist'): shutil.rmtree('dist') run('python setup.py bdist_wheel') wheel_dist = os.listdir('dist')[0] run('pip install %s' % (os.path.join('dist', wheel_dist))) s3transfer-0.1.13/scripts/ci/run-integ-tests000077500000000000000000000012421324114246300207450ustar00rootroot00000000000000#!/usr/bin/env python # Don't run tests from the root repo dir. # We want to ensure we're importing from the installed # binary package not from the CWD. import os from subprocess import check_call _dname = os.path.dirname REPO_ROOT = _dname(_dname(_dname(os.path.abspath(__file__)))) os.chdir(os.path.join(REPO_ROOT, 'tests')) def run(command, env=None): return check_call(command, shell=True, env=env) run('nosetests --with-xunit --cover-erase --with-coverage ' '--cover-package s3transfer --cover-xml -v integration') # Run the serial implementation of s3transfer os.environ['USE_SERIAL_EXECUTOR'] = 'True' run('nosetests -v integration', env=os.environ) s3transfer-0.1.13/scripts/ci/run-tests000077500000000000000000000012501324114246300176400ustar00rootroot00000000000000#!/usr/bin/env python # Don't run tests from the root repo dir. # We want to ensure we're importing from the installed # binary package not from the CWD. import os from subprocess import check_call _dname = os.path.dirname REPO_ROOT = _dname(_dname(_dname(os.path.abspath(__file__)))) os.chdir(os.path.join(REPO_ROOT, 'tests')) def run(command, env=None): return check_call(command, shell=True, env=env) run('nosetests --with-coverage --cover-erase --cover-package s3transfer ' '--with-xunit --cover-xml -v unit/ functional/') # Run the serial implementation of s3transfer os.environ['USE_SERIAL_EXECUTOR'] = 'True' run('nosetests -v functional/', env=os.environ) s3transfer-0.1.13/scripts/new-change000077500000000000000000000155671324114246300173350ustar00rootroot00000000000000#!/usr/bin/env python """Generate a new changelog entry. Usage ===== To generate a new changelog entry:: scripts/new-change This will open up a file in your editor (via the ``EDITOR`` env var). You'll see this template:: # Type should be one of: feature, bugfix type: # Category is the high level feature area. # This can be a service identifier (e.g ``s3``), # or something like: Paginator. category: # A brief description of the change. You can # use github style references to issues such as # "fixes #489", "boto/boto3#100", etc. These # will get automatically replaced with the correct # link. description: Fill in the appropriate values, save and exit the editor. Make sure to commit these changes as part of your pull request. If, when your editor is open, you decide don't don't want to add a changelog entry, save an empty file and no entry will be generated. You can then use the ``scripts/gen-changelog`` to generate the CHANGELOG.rst file. """ import os import re import sys import json import string import random import tempfile import subprocess import argparse VALID_CHARS = set(string.ascii_letters + string.digits) CHANGES_DIR = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), '.changes' ) TEMPLATE = """\ # Type should be one of: feature, bugfix, enhancement, api-change # feature: A larger feature or change in behavior, usually resulting in a # minor version bump. # bugfix: Fixing a bug in an existing code path. # enhancment: Small change to an underlying implementation detail. # api-change: Changes to a modeled API. type: {change_type} # Category is the high level feature area. # This can be a service identifier (e.g ``s3``), # or something like: Paginator. category: {category} # A brief description of the change. You can # use github style references to issues such as # "fixes #489", "boto/boto3#100", etc. These # will get automatically replaced with the correct # link. description: {description} """ def new_changelog_entry(args): # Changelog values come from one of two places. # Either all values are provided on the command line, # or we open a text editor and let the user provide # enter their values. if all_values_provided(args): parsed_values = { 'type': args.change_type, 'category': args.category, 'description': args.description, } else: parsed_values = get_values_from_editor(args) if has_empty_values(parsed_values): sys.stderr.write( "Empty changelog values received, skipping entry creation.\n") return 1 replace_issue_references(parsed_values, args.repo) write_new_change(parsed_values) return 0 def has_empty_values(parsed_values): return not (parsed_values.get('type') and parsed_values.get('category') and parsed_values.get('description')) def all_values_provided(args): return args.change_type and args.category and args.description def get_values_from_editor(args): with tempfile.NamedTemporaryFile('w') as f: contents = TEMPLATE.format( change_type=args.change_type, category=args.category, description=args.description, ) f.write(contents) f.flush() env = os.environ editor = env.get('VISUAL', env.get('EDITOR', 'vim')) p = subprocess.Popen('%s %s' % (editor, f.name), shell=True) p.communicate() with open(f.name) as f: filled_in_contents = f.read() parsed_values = parse_filled_in_contents(filled_in_contents) return parsed_values def replace_issue_references(parsed, repo_name): description = parsed['description'] def linkify(match): number = match.group()[1:] return ( '`%s `__' % ( match.group(), repo_name, number)) new_description = re.sub('#\d+', linkify, description) parsed['description'] = new_description def write_new_change(parsed_values): if not os.path.isdir(CHANGES_DIR): os.makedirs(CHANGES_DIR) # Assume that new changes go into the next release. dirname = os.path.join(CHANGES_DIR, 'next-release') if not os.path.isdir(dirname): os.makedirs(dirname) # Need to generate a unique filename for this change. # We'll try a couple things until we get a unique match. category = parsed_values['category'] short_summary = ''.join(filter(lambda x: x in VALID_CHARS, category)) filename = '{type_name}-{summary}'.format( type_name=parsed_values['type'], summary=short_summary) possible_filename = os.path.join( dirname, '%s-%s.json' % (filename, str(random.randint(1, 100000)))) while os.path.isfile(possible_filename): possible_filename = os.path.join( dirname, '%s-%s.json' % (filename, str(random.randint(1, 100000)))) with open(possible_filename, 'w') as f: f.write(json.dumps(parsed_values, indent=2) + "\n") def parse_filled_in_contents(contents): """Parse filled in file contents and returns parsed dict. Return value will be:: { "type": "bugfix", "category": "category", "description": "This is a description" } """ if not contents.strip(): return {} parsed = {} lines = iter(contents.splitlines()) for line in lines: line = line.strip() if line.startswith('#'): continue if 'type' not in parsed and line.startswith('type:'): parsed['type'] = line.split(':')[1].strip() elif 'category' not in parsed and line.startswith('category:'): parsed['category'] = line.split(':')[1].strip() elif 'description' not in parsed and line.startswith('description:'): # Assume that everything until the end of the file is part # of the description, so we can break once we pull in the # remaining lines. first_line = line.split(':')[1].strip() full_description = '\n'.join([first_line] + list(lines)) parsed['description'] = full_description.strip() break return parsed def main(): parser = argparse.ArgumentParser() parser.add_argument('-t', '--type', dest='change_type', default='', choices=('bugfix', 'feature', 'enhancement', 'api-change')) parser.add_argument('-c', '--category', dest='category', default='') parser.add_argument('-d', '--description', dest='description', default='') parser.add_argument('-r', '--repo', default='boto/boto3', help='Optional repo name, e.g: boto/boto3') args = parser.parse_args() sys.exit(new_changelog_entry(args)) if __name__ == '__main__': main() s3transfer-0.1.13/scripts/performance/000077500000000000000000000000001324114246300176565ustar00rootroot00000000000000s3transfer-0.1.13/scripts/performance/benchmark000077500000000000000000000101451324114246300215370ustar00rootroot00000000000000#!/usr/bin/env python """ Use for benchmarking performance of other scripts. Provides data about time, memory use, cpu usage, network in, network out about the script ran in the form of a csv. Usage ===== NOTE: Make sure you run ``pip install -r requirements-dev.txt`` before running. To use the script, run:: ./benchmark "./my-script-to-run" If no ``--output-file`` was provided, the data will be saved to ``performance.csv`` """ import argparse import os import sys import subprocess import time import psutil # Determine the interface to track network IO depending on the platform. if sys.platform.startswith('linux'): INTERFACE = 'eth0' elif sys.platform == 'darwin': INTERFACE = 'en0' else: # TODO: Add support for windows. This would require figuring out what # interface to use on windows. raise RuntimeError('Script cannot be run on %s' % sys.platform) def benchmark(args): parent_pid = os.getpid() child_p = run_script(args) try: # Benchmark the process where the script is being ran. return run_benchmark(child_p.pid, args.output_file, args.data_interval) except KeyboardInterrupt: # If there is an interrupt, then try to clean everything up. proc = psutil.Process(parent_pid) procs = proc.children(recursive=True) for child in procs: child.terminate() gone, alive = psutil.wait_procs(procs, timeout=1) for child in alive: child.kill() return 1 def run_script(args): return subprocess.Popen(args.script, shell=True) def run_benchmark(pid, output_file, data_interval): p = psutil.Process(pid) previous_net = psutil.net_io_counters(pernic=True)[INTERFACE] previous_time = time.time() with open(output_file, 'w') as f: while p.is_running(): if p.status() == psutil.STATUS_ZOMBIE: p.kill() break time.sleep(data_interval) process_to_measure = _get_underlying_python_process(p) try: # Collect the memory and cpu usage. memory_used = process_to_measure.memory_info().rss cpu_percent = process_to_measure.cpu_percent() current_net = psutil.net_io_counters(pernic=True)[INTERFACE] except psutil.AccessDenied: # Trying to get process information from a closed process will # result in AccessDenied. break # Collect data on the in/out network io. sent_delta = current_net.bytes_sent - previous_net.bytes_sent recv_delta = current_net.bytes_recv - previous_net.bytes_recv # Determine the lapsed time to determine the network io rate. current_time = time.time() dt = current_time - previous_time previous_time = current_time sent_rate = sent_delta / dt recv_rate = recv_delta / dt # Save all of the data into a CSV file. f.write(','.join(str(val) for val in [ current_time, memory_used, cpu_percent, sent_rate, recv_rate]) + '\n') f.flush() return 0 def _get_underlying_python_process(process): # For some scripts such as the streaming CLI commands, the process is # nested under a shell script that does not account for the python process. # We want to always be measuring the python process. children = process.children(recursive=True) for child_process in children: if 'python' in child_process.name().lower(): return child_process return process def main(): parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument( 'script', help='The script to run for benchmarking') parser.add_argument( '--data-interval', default=1, type=float, help='The interval in seconds to poll for data points') parser.add_argument( '--output-file', default='performance.csv', help='The file to output the data collected to') args = parser.parse_args() return benchmark(args) if __name__ == '__main__': sys.exit(main()) s3transfer-0.1.13/scripts/performance/benchmark-download000077500000000000000000000114731324114246300233510ustar00rootroot00000000000000#!/usr/bin/env python """ Benchmark the downloading of a file using s3transfer. You can also chose how type of file that is downloaded (i.e. filename, seekable, nonseekable). Usage ===== NOTE: Make sure you run ``pip install -r requirements-dev.txt`` before running. To benchmark with using a temporary file and key that is generated for you:: ./benchmark-download --file-size 10MB --file-type filename \\ --s3-bucket mybucket To benchmark with your own s3 key: ./benchmark-upload --existing-s3-key mykey --file-type filename \\ --s3-bucket mybucket """ import argparse import os import tempfile import shutil import subprocess from botocore.session import get_session from s3transfer.manager import TransferManager TEMP_FILE = 'temp' TEMP_KEY = 'temp' KB = 1024 SIZE_SUFFIX = { 'kb': 1024, 'mb': 1024 ** 2, 'gb': 1024 ** 3, 'tb': 1024 ** 4, 'kib': 1024, 'mib': 1024 ** 2, 'gib': 1024 ** 3, 'tib': 1024 ** 4, } def human_readable_to_bytes(value): """Converts a human readable size to bytes. :param value: A string such as "10MB". If a suffix is not included, then the value is assumed to be an integer representing the size in bytes. :returns: The converted value in bytes as an integer """ value = value.lower() if value[-2:] == 'ib': # Assume IEC suffix. suffix = value[-3:].lower() else: suffix = value[-2:].lower() has_size_identifier = ( len(value) >= 2 and suffix in SIZE_SUFFIX) if not has_size_identifier: try: return int(value) except ValueError: raise ValueError("Invalid size value: %s" % value) else: multiplier = SIZE_SUFFIX[suffix] return int(value[:-len(suffix)]) * multiplier def create_file(filename, file_size): with open(filename, 'wb') as f: for i in range(0, file_size, KB): f.write(b'a' * i) def benchmark_download(args): # Create a temporary directory to use for scratch work. tempdir = tempfile.mkdtemp() temp_file = os.path.join(tempdir, TEMP_FILE) if args.target_download: temp_file = os.path.abspath( os.path.expanduser(args.target_download)) session = get_session() client = session.create_client('s3') s3_key = args.existing_s3_key try: # If an existing s3 key was not specified, then create a temporary # file of that size for the user and upload it. if not args.existing_s3_key: # Create the temporary file. create_file(temp_file, args.file_size) # Create the temporary s3 key s3_key = TEMP_KEY upload_file(client, temp_file, args.s3_bucket) download_file_script = ( './download-file --file-name %s --file-type %s --s3-bucket %s ' '--s3-key %s' % ( temp_file, args.file_type, args.s3_bucket, s3_key) ) benchmark_args = ['./benchmark', download_file_script] if args.output_file: benchmark_args.extend(['--output-file', args.output_file]) subprocess.check_call(benchmark_args) finally: shutil.rmtree(tempdir) if not args.existing_s3_key: client.delete_object(Bucket=args.s3_bucket, Key=s3_key) def upload_file(client, filename, bucket): with TransferManager(client) as manager: manager.upload(filename, bucket, TEMP_KEY) def main(): parser = argparse.ArgumentParser() file_group = parser.add_mutually_exclusive_group(required=True) file_group.add_argument( '--file-size', type=human_readable_to_bytes, help=( 'The size of the temporary file to create and then upload to s3. ' 'You can also specify your own key with --existing-s3-key to ' 'avoid going through this setup step.' ) ) parser.add_argument( '--file-type', choices=['filename', 'seekable', 'nonseekable'], required=True, help='The way to represent the file when downloading') parser.add_argument( '--s3-bucket', required=True, help='The S3 bucket to download the file to') file_group.add_argument( '--existing-s3-key', help=( 'The existing s3 key to download from. You can also use ' '--file-size to create a temporary file and key to download from.' ) ) parser.add_argument( '--target-download', help=( 'The filename to download to. Note that this file will ' 'always be cleaned up for you.' ) ) parser.add_argument( '--output-file', help=( 'The file to output the data collected to. The default ' 'location performace.csv' ) ) args = parser.parse_args() benchmark_download(args) if __name__ == '__main__': main() s3transfer-0.1.13/scripts/performance/benchmark-upload000077500000000000000000000100121324114246300230120ustar00rootroot00000000000000#!/usr/bin/env python """ Benchmark the uploading of a file using s3transfer. You can also chose how type of file that is uploaded (i.e. filename, seekable, nonseekable). Usage ===== NOTE: Make sure you run ``pip install -r requirements-dev.txt`` before running. To benchmark with using a temporary file that is generated for you:: ./benchmark-upload --file-size 10MB --file-type filename \\ --s3-bucket mybucket To benchmark with your own local file:: ./benchmark-upload --source-file myfile --file-type filename \\ --s3-bucket mybucket """ import argparse import os import tempfile import shutil import subprocess from botocore.session import get_session TEMP_FILE = 'temp' TEMP_KEY = 'temp' KB = 1024 SIZE_SUFFIX = { 'kb': 1024, 'mb': 1024 ** 2, 'gb': 1024 ** 3, 'tb': 1024 ** 4, 'kib': 1024, 'mib': 1024 ** 2, 'gib': 1024 ** 3, 'tib': 1024 ** 4, } def human_readable_to_bytes(value): """Converts a human readable size to bytes. :param value: A string such as "10MB". If a suffix is not included, then the value is assumed to be an integer representing the size in bytes. :returns: The converted value in bytes as an integer """ value = value.lower() if value[-2:] == 'ib': # Assume IEC suffix. suffix = value[-3:].lower() else: suffix = value[-2:].lower() has_size_identifier = ( len(value) >= 2 and suffix in SIZE_SUFFIX) if not has_size_identifier: try: return int(value) except ValueError: raise ValueError("Invalid size value: %s" % value) else: multiplier = SIZE_SUFFIX[suffix] return int(value[:-len(suffix)]) * multiplier def create_file(filename, file_size): with open(filename, 'wb') as f: for i in range(0, file_size, KB): f.write(b'a' * i) def benchmark_upload(args): source_file = args.source_file session = get_session() client = session.create_client('s3') tempdir = None try: # If a source file was not specified, then create a temporary file of # that size for the user. if not source_file: tempdir = tempfile.mkdtemp() source_file = os.path.join(tempdir, TEMP_FILE) create_file(source_file, args.file_size) upload_file_script = ( './upload-file --file-name %s --file-type %s --s3-bucket %s ' '--s3-key %s' % ( source_file, args.file_type, args.s3_bucket, TEMP_KEY) ) benchmark_args = ['./benchmark', upload_file_script] if args.output_file: benchmark_args.extend(['--output-file', args.output_file]) subprocess.check_call(benchmark_args) finally: if tempdir: shutil.rmtree(tempdir) client.delete_object(Bucket=args.s3_bucket, Key=TEMP_KEY) def main(): parser = argparse.ArgumentParser(usage=__doc__) source_file_group = parser.add_mutually_exclusive_group(required=True) source_file_group.add_argument( '--source-file', help=( 'The local file to upload. Note this is optional. You can also ' 'use --file-size which will create a temporary file for you.' ) ) source_file_group.add_argument( '--file-size', type=human_readable_to_bytes, help=( 'The size of the temporary file to create. You can also specify ' 'your own file with --source-file' ) ) parser.add_argument( '--file-type', choices=['filename', 'seekable', 'nonseekable'], required=True, help='The way to represent the file when uploading') parser.add_argument( '--s3-bucket', required=True, help='The S3 bucket to upload the file to') parser.add_argument( '--output-file', help=( 'The file to output the data collected to. The default ' 'location performace.csv' ) ) args = parser.parse_args() benchmark_upload(args) if __name__ == '__main__': main() s3transfer-0.1.13/scripts/performance/download-file000077500000000000000000000046151324114246300223360ustar00rootroot00000000000000#!/usr/bin/env python """ Downloads a file using s3transfer. You can also chose how type of file that is downloaded (i.e. filename, seekable, nonseekable). Usage ===== NOTE: Make sure you run ``pip install -r requirements-dev.txt`` before running. To download a file:: ./download-file --file-name myfilename --file-type filename \\ --s3-bucket mybucket --s3-key mykey """ import argparse from botocore.session import get_session from s3transfer.manager import TransferManager class NonSeekableWriter(object): """A wrapper to hide the ability to seek for a fileobj""" def __init__(self, fileobj): self._fileobj = fileobj def write(self, b): return self._fileobj.write(b) class Downloader(object): def download(self, args): session = get_session() client = session.create_client('s3') file_type = args.file_type if args.debug: session.set_debug_logger('') with TransferManager(client) as manager: getattr(self, 'download_' + file_type)( manager, args.file_name, args.s3_bucket, args.s3_key) def download_filename(self, manager, filename, bucket, s3_key): manager.download(bucket, s3_key, filename) def download_seekable(self, manager, filename, bucket, s3_key): with open(filename, 'wb') as f: future = manager.download(bucket, s3_key, f) future.result() def download_nonseekable(self, manager, filename, bucket, s3_key): with open(filename, 'wb') as f: future = manager.download( bucket, s3_key, NonSeekableWriter(f)) future.result() def main(): parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument('--file-name', required=True, help='The name of file') parser.add_argument( '--file-type', choices=['filename', 'seekable', 'nonseekable'], required=True, help='The way to represent the file when downloading') parser.add_argument( '--s3-bucket', required=True, help='The S3 bucket to download the file to') parser.add_argument( '--s3-key', required=True, help='The key to download to') parser.add_argument( '--debug', action='store_true', help='Whether to turn debugging on. This will get printed to stderr') args = parser.parse_args() Downloader().download(args) if __name__ == '__main__': main() s3transfer-0.1.13/scripts/performance/summarize000077500000000000000000000243031324114246300216220ustar00rootroot00000000000000#!/usr/bin/env python """ Summarizes results of benchmarking. Usage ===== Run this script with:: ./summarize performance.csv And that should output:: +------------------------+----------+----------------------+ | Metric over 1 run(s) | Mean | Standard Deviation | +========================+==========+======================+ | Total Time (seconds) | 1.200 | 0.0 | +------------------------+----------+----------------------+ | Maximum Memory | 42.3 MiB | 0 Bytes | +------------------------+----------+----------------------+ | Maximum CPU (percent) | 88.1 | 0.0 | +------------------------+----------+----------------------+ | Average Memory | 33.9 MiB | 0 Bytes | +------------------------+----------+----------------------+ | Average CPU (percent) | 30.5 | 0.0 | +------------------------+----------+----------------------+ The script can also be ran with multiple files: ./summarize performance.csv performance-2.csv And will have a similar output: +------------------------+----------+----------------------+ | Metric over 2 run(s) | Mean | Standard Deviation | +========================+==========+======================+ | Total Time (seconds) | 1.155 | 0.0449999570847 | +------------------------+----------+----------------------+ | Maximum Memory | 42.5 MiB | 110.0 KiB | +------------------------+----------+----------------------+ | Maximum CPU (percent) | 94.5 | 6.45 | +------------------------+----------+----------------------+ | Average Memory | 35.6 MiB | 1.7 MiB | +------------------------+----------+----------------------+ | Average CPU (percent) | 27.5 | 3.03068181818 | +------------------------+----------+----------------------+ You can also specify the ``--output-format json`` option to print the summary as JSON instead of a pretty printed table:: { "total_time": 72.76999998092651, "std_dev_average_memory": 0.0, "std_dev_total_time": 0.0, "average_memory": 56884518.57534247, "std_dev_average_cpu": 0.0, "std_dev_max_memory": 0.0, "average_cpu": 61.19315068493151, "max_memory": 58331136.0 } """ import argparse import csv import json from math import sqrt from tabulate import tabulate def human_readable_size(value): """Converts integer values in bytes to human readable values""" hummanize_suffixes = ('KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB') base = 1024 bytes_int = float(value) if bytes_int == 1: return '1 Byte' elif bytes_int < base: return '%d Bytes' % bytes_int for i, suffix in enumerate(hummanize_suffixes): unit = base ** (i+2) if round((bytes_int / unit) * base) < base: return '%.1f %s' % ((base * bytes_int / unit), suffix) class Summarizer(object): DATA_INDEX_IN_ROW = { 'time': 0, 'memory': 1, 'cpu': 2 } def __init__(self): self.total_files = 0 self._num_rows = 0 self._start_time = None self._end_time = None self._totals = { 'time': [], 'average_memory': [], 'average_cpu': [], 'max_memory': [], 'max_cpu': [], } self._averages = { 'memory': 0.0, 'cpu': 0.0, } self._maximums = { 'memory': 0.0, 'cpu': 0.0 } @property def total_time(self): return self._average_across_all_files('time') @property def max_cpu(self): return self._average_across_all_files('max_cpu') @property def max_memory(self): return self._average_across_all_files('max_memory') @property def average_cpu(self): return self._average_across_all_files('average_cpu') @property def average_memory(self): return self._average_across_all_files('average_memory') @property def std_dev_total_time(self): return self._standard_deviation_across_all_files('time') @property def std_dev_max_cpu(self): return self._standard_deviation_across_all_files('max_cpu') @property def std_dev_max_memory(self): return self._standard_deviation_across_all_files('max_memory') @property def std_dev_average_cpu(self): return self._standard_deviation_across_all_files('average_cpu') @property def std_dev_average_memory(self): return self._standard_deviation_across_all_files('average_memory') def _average_across_all_files(self, name): return sum(self._totals[name])/len(self._totals[name]) def _standard_deviation_across_all_files(self, name): mean = self._average_across_all_files(name) differences = [total - mean for total in self._totals[name]] sq_differences = [difference ** 2 for difference in differences] return sqrt(sum(sq_differences)/len(self._totals[name])) def summarize_as_table(self): """Formats the processed data as pretty printed table. :return: str of formatted table """ h = human_readable_size table = [ ['Total Time (seconds)', '%.3f' % self.total_time, self.std_dev_total_time], ['Maximum Memory', h(self.max_memory), h(self.std_dev_max_memory)], ['Maximum CPU (percent)', '%.1f' % self.max_cpu, self.std_dev_max_cpu], ['Average Memory', h(self.average_memory), h(self.std_dev_average_memory)], ['Average CPU (percent)', '%.1f' % self.average_cpu, self.std_dev_average_cpu], ] return tabulate( table, headers=[ 'Metric over %s run(s)' % (self.total_files), 'Mean', 'Standard Deviation' ], tablefmt="grid" ) def summarize_as_json(self): """Return JSON summary of processed data. :return: str of formatted JSON """ return json.dumps({ 'total_time': self.total_time, 'std_dev_total_time': self.std_dev_total_time, 'max_memory': self.max_memory, 'std_dev_max_memory': self.std_dev_max_memory, 'average_memory': self.average_memory, 'std_dev_average_memory': self.std_dev_average_memory, 'average_cpu': self.average_cpu, 'std_dev_average_cpu': self.std_dev_average_cpu, }, indent=2) def process(self, args): """Processes the data from the CSV file""" for benchmark_file in args.benchmark_files: self.process_individual_file(benchmark_file) self.total_files += 1 def process_individual_file(self, benchmark_file): with open(benchmark_file, 'r') as f: reader = csv.reader(f) # Process each row from the CSV file row = None for row in reader: self._validate_row(row, benchmark_file) self.process_data_row(row) self._validate_row(row, benchmark_file) self._end_time = self._get_time(row) self._finalize_processed_data_for_file() def _validate_row(self, row, filename): if not row: raise RuntimeError( 'Row: %s could not be processed. The CSV file (%s) may be ' 'empty.' % (row, filename)) def process_data_row(self, row): # If the row is the first row collect the start time. if self._num_rows == 0: self._start_time = self._get_time(row) self._num_rows += 1 self.process_data_point(row, 'memory') self.process_data_point(row, 'cpu') def process_data_point(self, row, name): # Determine where in the CSV row the requested data is located. index = self.DATA_INDEX_IN_ROW[name] # Get the data point. data_point = float(row[index]) self._add_to_average(name, data_point) self._account_for_maximum(name, data_point) def _finalize_processed_data_for_file(self): # Add numbers to the total, which keeps track of data over # all files provided. self._totals['time'].append(self._end_time - self._start_time) self._totals['max_cpu'].append(self._maximums['cpu']) self._totals['max_memory'].append(self._maximums['memory']) self._totals['average_cpu'].append( self._averages['cpu']/self._num_rows) self._totals['average_memory'].append( self._averages['memory']/self._num_rows) # Reset some of the data needed to be tracked for each specific # file. self._num_rows = 0 self._maximums = self._maximums.fromkeys(self._maximums, 0.0) self._averages = self._averages.fromkeys(self._averages, 0.0) def _get_time(self, row): return float(row[self.DATA_INDEX_IN_ROW['time']]) def _add_to_average(self, name, data_point): self._averages[name] += data_point def _account_for_maximum(self, name, data_point): if data_point > self._maximums[name]: self._maximums[name] = data_point def main(): parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument( 'benchmark_files', nargs='+', help=( 'The CSV output file from the benchmark script. If you provide' 'more than one of these files, it will give you the average ' 'across all of the files for each metric.' ) ) parser.add_argument( '-f', '--output-format', default='table', choices=['table', 'json'], help=( 'Specify what output format to use for displaying results. ' 'By default, a pretty printed table is used, but you can also ' 'specify "json" to display pretty printed JSON.' ) ) args = parser.parse_args() summarizer = Summarizer() summarizer.process(args) if args.output_format == 'table': result = summarizer.summarize_as_table() else: result = summarizer.summarize_as_json() print(result) if __name__ == '__main__': main() s3transfer-0.1.13/scripts/performance/upload-file000077500000000000000000000045451324114246300220150ustar00rootroot00000000000000#!/usr/bin/env python """ Uploads a file using s3transfer. You can also chose how type of file that is uploaded (i.e. filename, seekable, nonseekable). Usage ===== NOTE: Make sure you run ``pip install -r requirements-dev.txt`` before running. To upload a file:: ./upload-file --file-name myfilename --file-type filename \\ --s3-bucket mybucket --s3-key mykey """ import argparse from botocore.session import get_session from s3transfer.manager import TransferManager class NonSeekableReader(object): """A wrapper to hide the ability to seek for a fileobj""" def __init__(self, fileobj): self._fileobj = fileobj def read(self, amt=-1): return self._fileobj.read(amt) class Uploader(object): def upload(self, args): session = get_session() client = session.create_client('s3') file_type = args.file_type if args.debug: session.set_debug_logger('') with TransferManager(client) as manager: getattr(self, 'upload_' + file_type)( manager, args.file_name, args.s3_bucket, args.s3_key) def upload_filename(self, manager, filename, bucket, s3_key): manager.upload(filename, bucket, s3_key) def upload_seekable(self, manager, filename, bucket, s3_key): with open(filename, 'rb') as f: future = manager.upload(f, bucket, s3_key) future.result() def upload_nonseekable(self, manager, filename, bucket, s3_key): with open(filename, 'rb') as f: future = manager.upload( NonSeekableReader(f), bucket, s3_key) future.result() def main(): parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument('--file-name', required=True, help='The name of file') parser.add_argument( '--file-type', choices=['filename', 'seekable', 'nonseekable'], required=True, help='The way to represent the file when uploading') parser.add_argument( '--s3-bucket', required=True, help='The S3 bucket to upload the file to') parser.add_argument('--s3-key', required=True, help='The key to upload to') parser.add_argument( '--debug', action='store_true', help='Whether to turn debugging on. This will get printed to stderr') args = parser.parse_args() Uploader().upload(args) if __name__ == '__main__': main() s3transfer-0.1.13/scripts/stress/000077500000000000000000000000001324114246300167005ustar00rootroot00000000000000s3transfer-0.1.13/scripts/stress/timeout000077500000000000000000000040541324114246300203170ustar00rootroot00000000000000#!/usr/bin/env python """ Use to put a timeout on the length of time a script can run. This is especially useful for checking for scripts that hang. Usage ===== NOTE: Make sure you run ``pip install -r requirements-dev.txt`` before running. To use the script, run:: ./timeout "./my-script-to-run" --timeout-after 5 """ import argparse import os import sys import subprocess import time import psutil class TimeoutException(Exception): def __init__(self, timeout_len): msg = 'Script failed to complete within %s seconds' % timeout_len Exception.__init__(self, msg) def timeout(args): parent_pid = os.getpid() child_p = run_script(args) try: run_timeout(child_p.pid, args.timeout_after) except (TimeoutException, KeyboardInterrupt) as e: proc = psutil.Process(parent_pid) procs = proc.children(recursive=True) for child in procs: child.terminate() gone, alive = psutil.wait_procs(procs, timeout=1) for child in alive: child.kill() raise e def run_timeout(pid, timeout_len): p = psutil.Process(pid) start_time = time.time() while p.is_running(): if p.status() == psutil.STATUS_ZOMBIE: p.kill() break current_time = time.time() # Raise a timeout if the duration of the process is longer than # the desired timeout. if current_time - start_time > timeout_len: raise TimeoutException(timeout_len) time.sleep(1) def run_script(args): return subprocess.Popen(args.script, shell=True) def main(): parser = argparse.ArgumentParser(usage=__doc__) parser.add_argument( 'script', help='The script to run for benchmarking') parser.add_argument( '--timeout-after', required=True, type=float, help=( 'The length of time in seconds allowed for the script to run ' 'before it time\'s out.' ) ) args = parser.parse_args() return timeout(args) if __name__ == '__main__': sys.exit(main()) s3transfer-0.1.13/setup.cfg000066400000000000000000000002271324114246300155100ustar00rootroot00000000000000[bdist_wheel] universal = 1 [metadata] requires-dist = botocore>=1.3.0,<2.0.0 futures>=2.2.0,<4.0.0; python_version=="2.6" or python_version=="2.7" s3transfer-0.1.13/setup.py000066400000000000000000000032421324114246300154010ustar00rootroot00000000000000#!/usr/bin/env python import os import re import sys from setuptools import setup, find_packages ROOT = os.path.dirname(__file__) VERSION_RE = re.compile(r'''__version__ = ['"]([0-9.]+)['"]''') requires = [ 'botocore>=1.3.0,<2.0.0', ] if sys.version_info[0] == 2: # concurrent.futures is only in python3, so for # python2 we need to install the backport. requires.append('futures>=2.2.0,<4.0.0') def get_version(): init = open(os.path.join(ROOT, 's3transfer', '__init__.py')).read() return VERSION_RE.search(init).group(1) setup( name='s3transfer', version=get_version(), description='An Amazon S3 Transfer Manager', long_description=open('README.rst').read(), author='Amazon Web Services', author_email='kyknapp1@gmail.com', url='https://github.com/boto/s3transfer', packages=find_packages(exclude=['tests*']), include_package_data=True, install_requires=requires, extras_require={ ':python_version=="2.6" or python_version=="2.7"': [ 'futures>=2.2.0,<4.0.0'] }, license="Apache License 2.0", classifiers=( 'Development Status :: 3 - Alpha', 'Intended Audience :: Developers', 'Natural Language :: English', 'License :: OSI Approved :: Apache Software License', 'Programming Language :: Python', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', ), ) s3transfer-0.1.13/tests/000077500000000000000000000000001324114246300150305ustar00rootroot00000000000000s3transfer-0.1.13/tests/__init__.py000066400000000000000000000415651324114246300171540ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the 'License'). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the 'license' file accompanying this file. This file is # distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import io import hashlib import math import os import platform import shutil import string import tempfile try: import unittest2 as unittest except ImportError: import unittest import botocore.session from botocore.stub import Stubber from botocore.compat import six from s3transfer.manager import TransferConfig from s3transfer.futures import IN_MEMORY_UPLOAD_TAG from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG from s3transfer.futures import TransferCoordinator from s3transfer.futures import TransferMeta from s3transfer.futures import TransferFuture from s3transfer.futures import BoundedExecutor from s3transfer.futures import NonThreadedExecutor from s3transfer.subscribers import BaseSubscriber from s3transfer.utils import OSUtils from s3transfer.utils import CallArgs from s3transfer.utils import TaskSemaphore from s3transfer.utils import SlidingWindowSemaphore ORIGINAL_EXECUTOR_CLS = BoundedExecutor.EXECUTOR_CLS def setup_package(): if is_serial_implementation(): BoundedExecutor.EXECUTOR_CLS = NonThreadedExecutor def teardown_package(): BoundedExecutor.EXECUTOR_CLS = ORIGINAL_EXECUTOR_CLS def is_serial_implementation(): return os.environ.get('USE_SERIAL_EXECUTOR', False) def assert_files_equal(first, second): if os.path.getsize(first) != os.path.getsize(second): raise AssertionError("Files are not equal: %s, %s" % (first, second)) first_md5 = md5_checksum(first) second_md5 = md5_checksum(second) if first_md5 != second_md5: raise AssertionError( "Files are not equal: %s(md5=%s) != %s(md5=%s)" % ( first, first_md5, second, second_md5)) def md5_checksum(filename): checksum = hashlib.md5() with open(filename, 'rb') as f: for chunk in iter(lambda: f.read(8192), b''): checksum.update(chunk) return checksum.hexdigest() def random_bucket_name(prefix='s3transfer', num_chars=10): base = string.ascii_lowercase + string.digits random_bytes = bytearray(os.urandom(num_chars)) return prefix + ''.join([base[b % len(base)] for b in random_bytes]) def skip_if_windows(reason): """Decorator to skip tests that should not be run on windows. Example usage: @skip_if_windows("Not valid") def test_some_non_windows_stuff(self): self.assertEqual(...) """ def decorator(func): return unittest.skipIf( platform.system() not in ['Darwin', 'Linux'], reason)(func) return decorator def skip_if_using_serial_implementation(reason): """Decorator to skip tests when running as the serial implementation""" def decorator(func): return unittest.skipIf( is_serial_implementation(), reason)(func) return decorator class StreamWithError(object): """A wrapper to simulate errors while reading from a stream :param stream: The underlying stream to read from :param exception_type: The exception type to throw :param num_reads: The number of times to allow a read before raising the exception. A value of zero indicates to raise the error on the first read. """ def __init__(self, stream, exception_type, num_reads=0): self._stream = stream self._exception_type = exception_type self._num_reads = num_reads self._count = 0 def read(self, n=-1): if self._count == self._num_reads: raise self._exception_type self._count += 1 return self._stream.read(n) class FileSizeProvider(object): def __init__(self, file_size): self.file_size = file_size def on_queued(self, future, **kwargs): future.meta.provide_transfer_size(self.file_size) class FileCreator(object): def __init__(self): self.rootdir = tempfile.mkdtemp() def remove_all(self): shutil.rmtree(self.rootdir) def create_file(self, filename, contents, mode='w'): """Creates a file in a tmpdir ``filename`` should be a relative path, e.g. "foo/bar/baz.txt" It will be translated into a full path in a tmp dir. ``mode`` is the mode the file should be opened either as ``w`` or `wb``. Returns the full path to the file. """ full_path = os.path.join(self.rootdir, filename) if not os.path.isdir(os.path.dirname(full_path)): os.makedirs(os.path.dirname(full_path)) with open(full_path, mode) as f: f.write(contents) return full_path def create_file_with_size(self, filename, filesize): filename = self.create_file(filename, contents='') chunksize = 8192 with open(filename, 'wb') as f: for i in range(int(math.ceil(filesize / float(chunksize)))): f.write(b'a' * chunksize) return filename def append_file(self, filename, contents): """Append contents to a file ``filename`` should be a relative path, e.g. "foo/bar/baz.txt" It will be translated into a full path in a tmp dir. Returns the full path to the file. """ full_path = os.path.join(self.rootdir, filename) if not os.path.isdir(os.path.dirname(full_path)): os.makedirs(os.path.dirname(full_path)) with open(full_path, 'a') as f: f.write(contents) return full_path def full_path(self, filename): """Translate relative path to full path in temp dir. f.full_path('foo/bar.txt') -> /tmp/asdfasd/foo/bar.txt """ return os.path.join(self.rootdir, filename) class RecordingOSUtils(OSUtils): """An OSUtil abstraction that records openings and renamings""" def __init__(self): super(RecordingOSUtils, self).__init__() self.open_records = [] self.rename_records = [] def open(self, filename, mode): self.open_records.append((filename, mode)) return super(RecordingOSUtils, self).open(filename, mode) def rename_file(self, current_filename, new_filename): self.rename_records.append((current_filename, new_filename)) super(RecordingOSUtils, self).rename_file( current_filename, new_filename) class RecordingSubscriber(BaseSubscriber): def __init__(self): self.on_queued_calls = [] self.on_progress_calls = [] self.on_done_calls = [] def on_queued(self, **kwargs): self.on_queued_calls.append(kwargs) def on_progress(self, **kwargs): self.on_progress_calls.append(kwargs) def on_done(self, **kwargs): self.on_done_calls.append(kwargs) def calculate_bytes_seen(self, **kwargs): amount_seen = 0 for call in self.on_progress_calls: amount_seen += call['bytes_transferred'] return amount_seen class TransferCoordinatorWithInterrupt(TransferCoordinator): """Used to inject keyboard interrupts""" def result(self): raise KeyboardInterrupt() class RecordingExecutor(object): """A wrapper on an executor to record calls made to submit() You can access the submissions property to receive a list of dictionaries that represents all submissions where the dictionary is formatted:: { 'fn': function 'args': positional args (as tuple) 'kwargs': keyword args (as dict) } """ def __init__(self, executor): self._executor = executor self.submissions = [] def submit(self, task, tag=None, block=True): future = self._executor.submit(task, tag, block) self.submissions.append( { 'task': task, 'tag': tag, 'block': block } ) return future def shutdown(self): self._executor.shutdown() class StubbedClientTest(unittest.TestCase): def setUp(self): self.session = botocore.session.get_session() self.region = 'us-west-2' self.client = self.session.create_client( 's3', self.region, aws_access_key_id='foo', aws_secret_access_key='bar') self.stubber = Stubber(self.client) self.stubber.activate() def tearDown(self): self.stubber.deactivate() def reset_stubber_with_new_client(self, override_client_kwargs): client_kwargs = { 'service_name': 's3', 'region_name': self.region, 'aws_access_key_id': 'foo', 'aws_secret_access_key': 'bar' } client_kwargs.update(override_client_kwargs) self.client = self.session.create_client(**client_kwargs) self.stubber = Stubber(self.client) self.stubber.activate() class BaseTaskTest(StubbedClientTest): def setUp(self): super(BaseTaskTest, self).setUp() self.transfer_coordinator = TransferCoordinator() def get_task(self, task_cls, **kwargs): if 'transfer_coordinator' not in kwargs: kwargs['transfer_coordinator'] = self.transfer_coordinator return task_cls(**kwargs) def get_transfer_future(self, call_args=None): return TransferFuture( meta=TransferMeta(call_args), coordinator=self.transfer_coordinator ) class BaseSubmissionTaskTest(BaseTaskTest): def setUp(self): super(BaseSubmissionTaskTest, self).setUp() self.config = TransferConfig() self.osutil = OSUtils() self.executor = BoundedExecutor( 1000, 1, { IN_MEMORY_UPLOAD_TAG: TaskSemaphore(10), IN_MEMORY_DOWNLOAD_TAG: SlidingWindowSemaphore(10) } ) def tearDown(self): super(BaseSubmissionTaskTest, self).tearDown() self.executor.shutdown() class BaseGeneralInterfaceTest(StubbedClientTest): """A general test class to ensure consistency across TransferManger methods This test should never be called and should be subclassed from to pick up the various tests that all TransferManager method must pass from a functionality standpoint. """ __test__ = False def manager(self): """The transfer manager to use""" raise NotImplementedError('method is not implemented') @property def method(self): """The transfer manager method to invoke i.e. upload()""" raise NotImplementedError('method is not implemented') def create_call_kwargs(self): """The kwargs to be passed to the transfer manager method""" raise NotImplementedError('create_call_kwargs is not implemented') def create_invalid_extra_args(self): """A value for extra_args that will cause validation errors""" raise NotImplementedError( 'create_invalid_extra_args is not implemented') def create_stubbed_responses(self): """A list of stubbed responses that will cause the request to succeed The elements of this list is a dictionary that will be used as key word arguments to botocore.Stubber.add_response(). For example:: [{'method': 'put_object', 'service_response': {}}] """ raise NotImplementedError( 'create_stubbed_responses is not implemented') def create_expected_progress_callback_info(self): """A list of kwargs expected to be passed to each progress callback Note that the future kwargs does not need to be added to each dictionary provided in the list. This is injected for you. An example is:: [ {'bytes_transferred': 4}, {'bytes_transferred': 4}, {'bytes_transferred': 2} ] This indicates that the progress callback will be called three times and pass along the specified keyword arguments and corresponding values. """ raise NotImplementedError( 'create_expected_progress_callback_info is not implemented') def _setup_default_stubbed_responses(self): for stubbed_response in self.create_stubbed_responses(): self.stubber.add_response(**stubbed_response) def test_returns_future_with_meta(self): self._setup_default_stubbed_responses() future = self.method(**self.create_call_kwargs()) # The result is called so we ensure that the entire process executes # before we try to clean up resources in the tearDown. future.result() # Assert the return value is a future with metadata associated to it. self.assertIsInstance(future, TransferFuture) self.assertIsInstance(future.meta, TransferMeta) def test_returns_correct_call_args(self): self._setup_default_stubbed_responses() call_kwargs = self.create_call_kwargs() future = self.method(**call_kwargs) # The result is called so we ensure that the entire process executes # before we try to clean up resources in the tearDown. future.result() # Assert that there are call args associated to the metadata self.assertIsInstance(future.meta.call_args, CallArgs) # Assert that all of the arguments passed to the method exist and # are of the correct value in call_args. for param, value in call_kwargs.items(): self.assertEqual(value, getattr(future.meta.call_args, param)) def test_has_transfer_id_associated_to_future(self): self._setup_default_stubbed_responses() call_kwargs = self.create_call_kwargs() future = self.method(**call_kwargs) # The result is called so we ensure that the entire process executes # before we try to clean up resources in the tearDown. future.result() # Assert that an transfer id was associated to the future. # Since there is only one transfer request is made for that transfer # manager the id will be zero since it will be the first transfer # request made for that transfer manager. self.assertEqual(future.meta.transfer_id, 0) # If we make a second request, the transfer id should have incremented # by one for that new TransferFuture. self._setup_default_stubbed_responses() future = self.method(**call_kwargs) future.result() self.assertEqual(future.meta.transfer_id, 1) def test_invalid_extra_args(self): with self.assertRaisesRegexp(ValueError, 'Invalid extra_args'): self.method( extra_args=self.create_invalid_extra_args(), **self.create_call_kwargs() ) def test_for_callback_kwargs_correctness(self): # Add the stubbed responses before invoking the method self._setup_default_stubbed_responses() subscriber = RecordingSubscriber() future = self.method( subscribers=[subscriber], **self.create_call_kwargs()) # We call shutdown instead of result on future because the future # could be finished but the done callback could still be going. # The manager's shutdown method ensures everything completes. self.manager.shutdown() # Assert the various subscribers were called with the # expected kwargs expected_progress_calls = self.create_expected_progress_callback_info() for expected_progress_call in expected_progress_calls: expected_progress_call['future'] = future self.assertEqual(subscriber.on_queued_calls, [{'future': future}]) self.assertEqual(subscriber.on_progress_calls, expected_progress_calls) self.assertEqual(subscriber.on_done_calls, [{'future': future}]) class NonSeekableReader(io.RawIOBase): def __init__(self, b=b''): super(NonSeekableReader, self).__init__() self._data = six.BytesIO(b) def seekable(self): return False def writable(self): return False def readable(self): return True def write(self, b): # This is needed because python will not always return the correct # kind of error even though writeable returns False. raise io.UnsupportedOperation("write") def read(self, n=-1): return self._data.read(n) class NonSeekableWriter(io.RawIOBase): def __init__(self, fileobj): super(NonSeekableWriter, self).__init__() self._fileobj = fileobj def seekable(self): return False def writable(self): return True def readable(self): return False def write(self, b): self._fileobj.write(b) def read(self, n=-1): raise io.UnsupportedOperation("read") s3transfer-0.1.13/tests/functional/000077500000000000000000000000001324114246300171725ustar00rootroot00000000000000s3transfer-0.1.13/tests/functional/__init__.py000066400000000000000000000010611324114246300213010ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. s3transfer-0.1.13/tests/functional/test_copy.py000066400000000000000000000440661324114246300215670ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from botocore.exceptions import ClientError from botocore.stub import Stubber from tests import BaseGeneralInterfaceTest from tests import FileSizeProvider from s3transfer.manager import TransferManager from s3transfer.manager import TransferConfig from s3transfer.utils import MIN_UPLOAD_CHUNKSIZE class BaseCopyTest(BaseGeneralInterfaceTest): def setUp(self): super(BaseCopyTest, self).setUp() self.config = TransferConfig( max_request_concurrency=1, multipart_chunksize=MIN_UPLOAD_CHUNKSIZE, multipart_threshold=MIN_UPLOAD_CHUNKSIZE * 4 ) self._manager = TransferManager(self.client, self.config) # Initialize some default arguments self.bucket = 'mybucket' self.key = 'mykey' self.copy_source = { 'Bucket': 'mysourcebucket', 'Key': 'mysourcekey' } self.extra_args = {} self.subscribers = [] self.half_chunksize = int(MIN_UPLOAD_CHUNKSIZE / 2) self.content = b'0' * (2 * MIN_UPLOAD_CHUNKSIZE + self.half_chunksize) @property def manager(self): return self._manager @property def method(self): return self.manager.copy def create_call_kwargs(self): return { 'copy_source': self.copy_source, 'bucket': self.bucket, 'key': self.key, } def create_invalid_extra_args(self): return { 'Foo': 'bar' } def create_stubbed_responses(self): return [ { 'method': 'head_object', 'service_response': { 'ContentLength': len(self.content) } }, { 'method': 'copy_object', 'service_response': {} } ] def create_expected_progress_callback_info(self): return [ {'bytes_transferred': len(self.content)}, ] def add_head_object_response(self, expected_params=None, stubber=None): if not stubber: stubber = self.stubber head_response = self.create_stubbed_responses()[0] if expected_params: head_response['expected_params'] = expected_params stubber.add_response(**head_response) def add_successful_copy_responses( self, expected_copy_params=None, expected_create_mpu_params=None, expected_complete_mpu_params=None): # Add all responses needed to do the copy of the object. # Should account for both ranged and nonranged downloads. stubbed_responses = self.create_stubbed_responses()[1:] # If the length of copy responses is greater than one then it is # a multipart copy. copy_responses = stubbed_responses[0:1] if len(stubbed_responses) > 1: copy_responses = stubbed_responses[1:-1] # Add the expected create multipart upload params. if expected_create_mpu_params: stubbed_responses[0][ 'expected_params'] = expected_create_mpu_params # Add any expected copy parameters. if expected_copy_params: for i, copy_response in enumerate(copy_responses): if isinstance(expected_copy_params, list): copy_response['expected_params'] = expected_copy_params[i] else: copy_response['expected_params'] = expected_copy_params # Add the expected complete multipart upload params. if expected_complete_mpu_params: stubbed_responses[-1][ 'expected_params'] = expected_complete_mpu_params # Add the responses to the stubber. for stubbed_response in stubbed_responses: self.stubber.add_response(**stubbed_response) def test_can_provide_file_size(self): self.add_successful_copy_responses() call_kwargs = self.create_call_kwargs() call_kwargs['subscribers'] = [FileSizeProvider(len(self.content))] future = self.manager.copy(**call_kwargs) future.result() # The HeadObject should have not happened and should have been able # to successfully copy the file. self.stubber.assert_no_pending_responses() def test_provide_copy_source_as_dict(self): self.copy_source['VersionId'] = 'mysourceversionid' expected_params = { 'Bucket': 'mysourcebucket', 'Key': 'mysourcekey', 'VersionId': 'mysourceversionid' } self.add_head_object_response(expected_params=expected_params) self.add_successful_copy_responses() future = self.manager.copy(**self.create_call_kwargs()) future.result() self.stubber.assert_no_pending_responses() def test_invalid_copy_source(self): self.copy_source = ['bucket', 'key'] future = self.manager.copy(**self.create_call_kwargs()) with self.assertRaises(TypeError): future.result() def test_provide_copy_source_client(self): source_client = self.session.create_client( 's3', 'eu-central-1', aws_access_key_id='foo', aws_secret_access_key='bar') source_stubber = Stubber(source_client) source_stubber.activate() self.addCleanup(source_stubber.deactivate) self.add_head_object_response(stubber=source_stubber) self.add_successful_copy_responses() call_kwargs = self.create_call_kwargs() call_kwargs['source_client'] = source_client future = self.manager.copy(**call_kwargs) future.result() # Make sure that all of the responses were properly # used for both clients. source_stubber.assert_no_pending_responses() self.stubber.assert_no_pending_responses() class TestNonMultipartCopy(BaseCopyTest): __test__ = True def test_copy(self): expected_head_params = { 'Bucket': 'mysourcebucket', 'Key': 'mysourcekey' } expected_copy_object = { 'Bucket': self.bucket, 'Key': self.key, 'CopySource': self.copy_source } self.add_head_object_response(expected_params=expected_head_params) self.add_successful_copy_responses( expected_copy_params=expected_copy_object) future = self.manager.copy(**self.create_call_kwargs()) future.result() self.stubber.assert_no_pending_responses() def test_copy_with_extra_args(self): self.extra_args['MetadataDirective'] = 'REPLACE' expected_head_params = { 'Bucket': 'mysourcebucket', 'Key': 'mysourcekey' } expected_copy_object = { 'Bucket': self.bucket, 'Key': self.key, 'CopySource': self.copy_source, 'MetadataDirective': 'REPLACE' } self.add_head_object_response(expected_params=expected_head_params) self.add_successful_copy_responses( expected_copy_params=expected_copy_object) call_kwargs = self.create_call_kwargs() call_kwargs['extra_args'] = self.extra_args future = self.manager.copy(**call_kwargs) future.result() self.stubber.assert_no_pending_responses() def test_copy_maps_extra_args_to_head_object(self): self.extra_args['CopySourceSSECustomerAlgorithm'] = 'AES256' expected_head_params = { 'Bucket': 'mysourcebucket', 'Key': 'mysourcekey', 'SSECustomerAlgorithm': 'AES256' } expected_copy_object = { 'Bucket': self.bucket, 'Key': self.key, 'CopySource': self.copy_source, 'CopySourceSSECustomerAlgorithm': 'AES256' } self.add_head_object_response(expected_params=expected_head_params) self.add_successful_copy_responses( expected_copy_params=expected_copy_object) call_kwargs = self.create_call_kwargs() call_kwargs['extra_args'] = self.extra_args future = self.manager.copy(**call_kwargs) future.result() self.stubber.assert_no_pending_responses() def test_allowed_copy_params_are_valid(self): op_model = self.client.meta.service_model.operation_model('CopyObject') for allowed_upload_arg in self._manager.ALLOWED_COPY_ARGS: self.assertIn(allowed_upload_arg, op_model.input_shape.members) class TestMultipartCopy(BaseCopyTest): __test__ = True def setUp(self): super(TestMultipartCopy, self).setUp() self.config = TransferConfig( max_request_concurrency=1, multipart_threshold=1, multipart_chunksize=4) self._manager = TransferManager(self.client, self.config) def create_stubbed_responses(self): return [ { 'method': 'head_object', 'service_response': { 'ContentLength': len(self.content) } }, { 'method': 'create_multipart_upload', 'service_response': { 'UploadId': 'my-upload-id' } }, { 'method': 'upload_part_copy', 'service_response': { 'CopyPartResult': { 'ETag': 'etag-1' } } }, { 'method': 'upload_part_copy', 'service_response': { 'CopyPartResult': { 'ETag': 'etag-2' } } }, { 'method': 'upload_part_copy', 'service_response': { 'CopyPartResult': { 'ETag': 'etag-3' } } }, { 'method': 'complete_multipart_upload', 'service_response': {} } ] def create_expected_progress_callback_info(self): # Note that last read is from the empty sentinel indicating # that the stream is done. return [ {'bytes_transferred': MIN_UPLOAD_CHUNKSIZE}, {'bytes_transferred': MIN_UPLOAD_CHUNKSIZE}, {'bytes_transferred': self.half_chunksize} ] def add_create_multipart_upload_response(self): self.stubber.add_response(**self.create_stubbed_responses()[1]) def _get_expected_params(self): upload_id = 'my-upload-id' # Add expected parameters to the head object expected_head_params = { 'Bucket': 'mysourcebucket', 'Key': 'mysourcekey', } # Add expected parameters for the create multipart expected_create_mpu_params = { 'Bucket': self.bucket, 'Key': self.key, } expected_copy_params = [] # Add expected parameters to the copy part ranges = ['bytes=0-5242879', 'bytes=5242880-10485759', 'bytes=10485760-13107199'] for i, range_val in enumerate(ranges): expected_copy_params.append( { 'Bucket': self.bucket, 'Key': self.key, 'CopySource': self.copy_source, 'UploadId': upload_id, 'PartNumber': i + 1, 'CopySourceRange': range_val } ) # Add expected parameters for the complete multipart expected_complete_mpu_params = { 'Bucket': self.bucket, 'Key': self.key, 'UploadId': upload_id, 'MultipartUpload': { 'Parts': [ {'ETag': 'etag-1', 'PartNumber': 1}, {'ETag': 'etag-2', 'PartNumber': 2}, {'ETag': 'etag-3', 'PartNumber': 3} ] } } return expected_head_params, { 'expected_create_mpu_params': expected_create_mpu_params, 'expected_copy_params': expected_copy_params, 'expected_complete_mpu_params': expected_complete_mpu_params, } def _add_params_to_expected_params( self, add_copy_kwargs, operation_types, new_params): expected_params_to_update = [] for operation_type in operation_types: add_copy_kwargs_key = 'expected_' + operation_type + '_params' expected_params = add_copy_kwargs[add_copy_kwargs_key] if isinstance(expected_params, list): expected_params_to_update.extend(expected_params) else: expected_params_to_update.append(expected_params) for expected_params in expected_params_to_update: expected_params.update(new_params) def test_copy(self): head_params, add_copy_kwargs = self._get_expected_params() self.add_head_object_response(expected_params=head_params) self.add_successful_copy_responses(**add_copy_kwargs) future = self.manager.copy(**self.create_call_kwargs()) future.result() self.stubber.assert_no_pending_responses() def test_copy_with_extra_args(self): # This extra argument should be added to the head object, # the create multipart upload, and upload part copy. self.extra_args['RequestPayer'] = 'requester' head_params, add_copy_kwargs = self._get_expected_params() head_params.update(self.extra_args) self.add_head_object_response(expected_params=head_params) self._add_params_to_expected_params( add_copy_kwargs, ['create_mpu', 'copy', 'complete_mpu'], self.extra_args) self.add_successful_copy_responses(**add_copy_kwargs) call_kwargs = self.create_call_kwargs() call_kwargs['extra_args'] = self.extra_args future = self.manager.copy(**call_kwargs) future.result() self.stubber.assert_no_pending_responses() def test_copy_blacklists_args_to_create_multipart(self): # This argument can never be used for multipart uploads self.extra_args['MetadataDirective'] = 'COPY' head_params, add_copy_kwargs = self._get_expected_params() self.add_head_object_response(expected_params=head_params) self.add_successful_copy_responses(**add_copy_kwargs) call_kwargs = self.create_call_kwargs() call_kwargs['extra_args'] = self.extra_args future = self.manager.copy(**call_kwargs) future.result() self.stubber.assert_no_pending_responses() def test_copy_args_to_only_create_multipart(self): self.extra_args['ACL'] = 'private' head_params, add_copy_kwargs = self._get_expected_params() self.add_head_object_response(expected_params=head_params) self._add_params_to_expected_params( add_copy_kwargs, ['create_mpu'], self.extra_args) self.add_successful_copy_responses(**add_copy_kwargs) call_kwargs = self.create_call_kwargs() call_kwargs['extra_args'] = self.extra_args future = self.manager.copy(**call_kwargs) future.result() self.stubber.assert_no_pending_responses() def test_copy_passes_args_to_create_multipart_and_upload_part(self): # This will only be used for the complete multipart upload # and upload part. self.extra_args['SSECustomerAlgorithm'] = 'AES256' head_params, add_copy_kwargs = self._get_expected_params() self.add_head_object_response(expected_params=head_params) self._add_params_to_expected_params( add_copy_kwargs, ['create_mpu', 'copy'], self.extra_args) self.add_successful_copy_responses(**add_copy_kwargs) call_kwargs = self.create_call_kwargs() call_kwargs['extra_args'] = self.extra_args future = self.manager.copy(**call_kwargs) future.result() self.stubber.assert_no_pending_responses() def test_copy_maps_extra_args_to_head_object(self): self.extra_args['CopySourceSSECustomerAlgorithm'] = 'AES256' head_params, add_copy_kwargs = self._get_expected_params() # The CopySourceSSECustomerAlgorithm needs to get mapped to # SSECustomerAlgorithm for HeadObject head_params['SSECustomerAlgorithm'] = 'AES256' self.add_head_object_response(expected_params=head_params) # However, it needs to remain the same for UploadPartCopy. self._add_params_to_expected_params( add_copy_kwargs, ['copy'], self.extra_args) self.add_successful_copy_responses(**add_copy_kwargs) call_kwargs = self.create_call_kwargs() call_kwargs['extra_args'] = self.extra_args future = self.manager.copy(**call_kwargs) future.result() self.stubber.assert_no_pending_responses() def test_abort_on_failure(self): # First add the head object and create multipart upload self.add_head_object_response() self.add_create_multipart_upload_response() # Cause an error on upload_part_copy self.stubber.add_client_error('upload_part_copy', 'ArbitraryFailure') # Add the abort multipart to ensure it gets cleaned up on failure self.stubber.add_response( 'abort_multipart_upload', service_response={}, expected_params={ 'Bucket': self.bucket, 'Key': self.key, 'UploadId': 'my-upload-id' } ) future = self.manager.copy(**self.create_call_kwargs()) with self.assertRaisesRegexp(ClientError, 'ArbitraryFailure'): future.result() self.stubber.assert_no_pending_responses() s3transfer-0.1.13/tests/functional/test_delete.py000066400000000000000000000042431324114246300220500ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from tests import BaseGeneralInterfaceTest from s3transfer.manager import TransferManager class TestDeleteObject(BaseGeneralInterfaceTest): __test__ = True def setUp(self): super(TestDeleteObject, self).setUp() self.bucket = 'mybucket' self.key = 'mykey' self.manager = TransferManager(self.client) @property def method(self): """The transfer manager method to invoke i.e. upload()""" return self.manager.delete def create_call_kwargs(self): """The kwargs to be passed to the transfer manager method""" return { 'bucket': self.bucket, 'key': self.key, } def create_invalid_extra_args(self): return { 'BadKwargs': True, } def create_stubbed_responses(self): """A list of stubbed responses that will cause the request to succeed The elements of this list is a dictionary that will be used as key word arguments to botocore.Stubber.add_response(). For example:: [{'method': 'put_object', 'service_response': {}}] """ return [{ 'method': 'delete_object', 'service_response': {}, 'expected_params': {'Bucket': self.bucket, 'Key': self.key}, }] def create_expected_progress_callback_info(self): return [] def test_known_allowed_args_in_input_shape(self): op_model = self.client.meta.service_model.operation_model( 'DeleteObject') for allowed_arg in self.manager.ALLOWED_DELETE_ARGS: self.assertIn(allowed_arg, op_model.input_shape.members) s3transfer-0.1.13/tests/functional/test_download.py000066400000000000000000000434241324114246300224210ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import copy import os import tempfile import time import shutil import glob from botocore.exceptions import ClientError from tests import StreamWithError from tests import FileSizeProvider from tests import RecordingSubscriber from tests import RecordingOSUtils from tests import NonSeekableWriter from tests import BaseGeneralInterfaceTest from tests import skip_if_windows from tests import skip_if_using_serial_implementation from s3transfer.compat import six from s3transfer.compat import SOCKET_ERROR from s3transfer.exceptions import RetriesExceededError from s3transfer.manager import TransferManager from s3transfer.manager import TransferConfig from s3transfer.download import GetObjectTask class BaseDownloadTest(BaseGeneralInterfaceTest): def setUp(self): super(BaseDownloadTest, self).setUp() self.config = TransferConfig(max_request_concurrency=1) self._manager = TransferManager(self.client, self.config) # Create a temporary directory to write to self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'myfile') # Initialize some default arguments self.bucket = 'mybucket' self.key = 'mykey' self.extra_args = {} self.subscribers = [] # Create a stream to read from self.content = b'my content' self.stream = six.BytesIO(self.content) def tearDown(self): super(BaseDownloadTest, self).tearDown() shutil.rmtree(self.tempdir) @property def manager(self): return self._manager @property def method(self): return self.manager.download def create_call_kwargs(self): return { 'bucket': self.bucket, 'key': self.key, 'fileobj': self.filename } def create_invalid_extra_args(self): return { 'Foo': 'bar' } def create_stubbed_responses(self): # We want to make sure the beginning of the stream is always used # incase this gets called twice. self.stream.seek(0) return [ { 'method': 'head_object', 'service_response': { 'ContentLength': len(self.content) } }, { 'method': 'get_object', 'service_response': { 'Body': self.stream } } ] def create_expected_progress_callback_info(self): # Note that last read is from the empty sentinel indicating # that the stream is done. return [ {'bytes_transferred': 10} ] def add_head_object_response(self, expected_params=None): head_response = self.create_stubbed_responses()[0] if expected_params: head_response['expected_params'] = expected_params self.stubber.add_response(**head_response) def add_successful_get_object_responses( self, expected_params=None, expected_ranges=None): # Add all get_object responses needed to complete the download. # Should account for both ranged and nonranged downloads. for i, stubbed_response in enumerate( self.create_stubbed_responses()[1:]): if expected_params: stubbed_response['expected_params'] = copy.deepcopy( expected_params) if expected_ranges: stubbed_response['expected_params'][ 'Range'] = expected_ranges[i] self.stubber.add_response(**stubbed_response) def add_n_retryable_get_object_responses(self, n, num_reads=0): for _ in range(n): self.stubber.add_response( method='get_object', service_response={ 'Body': StreamWithError( copy.deepcopy(self.stream), SOCKET_ERROR, num_reads) } ) def test_download_temporary_file_does_not_exist(self): self.add_head_object_response() self.add_successful_get_object_responses() future = self.manager.download(**self.create_call_kwargs()) future.result() # Make sure the file exists self.assertTrue(os.path.exists(self.filename)) # Make sure the random temporary file does not exist possible_matches = glob.glob('%s*' % self.filename + os.extsep) self.assertEqual(possible_matches, []) def test_download_for_fileobj(self): self.add_head_object_response() self.add_successful_get_object_responses() with open(self.filename, 'wb') as f: future = self.manager.download( self.bucket, self.key, f, self.extra_args) future.result() # Ensure that the contents are correct with open(self.filename, 'rb') as f: self.assertEqual(self.content, f.read()) def test_download_for_seekable_filelike_obj(self): self.add_head_object_response() self.add_successful_get_object_responses() # Create a file-like object to test. In this case, it is a BytesIO # object. bytes_io = six.BytesIO() future = self.manager.download( self.bucket, self.key, bytes_io, self.extra_args) future.result() # Ensure that the contents are correct bytes_io.seek(0) self.assertEqual(self.content, bytes_io.read()) def test_download_for_nonseekable_filelike_obj(self): self.add_head_object_response() self.add_successful_get_object_responses() with open(self.filename, 'wb') as f: future = self.manager.download( self.bucket, self.key, NonSeekableWriter(f), self.extra_args) future.result() # Ensure that the contents are correct with open(self.filename, 'rb') as f: self.assertEqual(self.content, f.read()) def test_download_cleanup_on_failure(self): self.add_head_object_response() # Throw an error on the download self.stubber.add_client_error('get_object') future = self.manager.download(**self.create_call_kwargs()) with self.assertRaises(ClientError): future.result() # Make sure the actual file and the temporary do not exist # by globbing for the file and any of its extensions possible_matches = glob.glob('%s*' % self.filename) self.assertEqual(possible_matches, []) def test_download_with_nonexistent_directory(self): self.add_head_object_response() self.add_successful_get_object_responses() call_kwargs = self.create_call_kwargs() call_kwargs['fileobj'] = os.path.join( self.tempdir, 'missing-directory', 'myfile') future = self.manager.download(**call_kwargs) with self.assertRaises(IOError): future.result() def test_retries_and_succeeds(self): self.add_head_object_response() # Insert a response that will trigger a retry. self.add_n_retryable_get_object_responses(1) # Add the normal responses to simulate the download proceeding # as normal after the retry. self.add_successful_get_object_responses() future = self.manager.download(**self.create_call_kwargs()) future.result() # The retry should have been consumed and the process should have # continued using the successful responses. self.stubber.assert_no_pending_responses() with open(self.filename, 'rb') as f: self.assertEqual(self.content, f.read()) def test_retry_failure(self): self.add_head_object_response() max_retries = 3 self.config.num_download_attempts = max_retries self._manager = TransferManager(self.client, self.config) # Add responses that fill up the maximum number of retries. self.add_n_retryable_get_object_responses(max_retries) future = self.manager.download(**self.create_call_kwargs()) # A retry exceeded error should have happened. with self.assertRaises(RetriesExceededError): future.result() # All of the retries should have been used up. self.stubber.assert_no_pending_responses() def test_retry_rewinds_callbacks(self): self.add_head_object_response() # Insert a response that will trigger a retry after one read of the # stream has been made. self.add_n_retryable_get_object_responses(1, num_reads=1) # Add the normal responses to simulate the download proceeding # as normal after the retry. self.add_successful_get_object_responses() recorder_subscriber = RecordingSubscriber() # Set the streaming to a size that is smaller than the data we # currently provide to it to simulate rewinds of callbacks. self.config.io_chunksize = 3 future = self.manager.download( subscribers=[recorder_subscriber], **self.create_call_kwargs()) future.result() # Ensure that there is no more remaining responses and that contents # are correct. self.stubber.assert_no_pending_responses() with open(self.filename, 'rb') as f: self.assertEqual(self.content, f.read()) # Assert that the number of bytes seen is equal to the length of # downloaded content. self.assertEqual( recorder_subscriber.calculate_bytes_seen(), len(self.content)) # Also ensure that the second progress invocation was negative three # becasue a retry happened on the second read of the stream and we # know that the chunk size for each read is 3. progress_byte_amts = [ call['bytes_transferred'] for call in recorder_subscriber.on_progress_calls ] self.assertEqual(-3, progress_byte_amts[1]) def test_can_provide_file_size(self): self.add_successful_get_object_responses() call_kwargs = self.create_call_kwargs() call_kwargs['subscribers'] = [FileSizeProvider(len(self.content))] future = self.manager.download(**call_kwargs) future.result() # The HeadObject should have not happened and should have been able # to successfully download the file. self.stubber.assert_no_pending_responses() with open(self.filename, 'rb') as f: self.assertEqual(self.content, f.read()) def test_uses_provided_osutil(self): osutil = RecordingOSUtils() # Use the recording os utility for the transfer manager self._manager = TransferManager(self.client, self.config, osutil) self.add_head_object_response() self.add_successful_get_object_responses() future = self.manager.download(**self.create_call_kwargs()) future.result() # The osutil should have had its open() method invoked when opening # a temporary file and its rename_file() method invoked when the # the temporary file was moved to its final location. self.assertEqual(len(osutil.open_records), 1) self.assertEqual(len(osutil.rename_records), 1) @skip_if_windows('Windows does not support UNIX special files') @skip_if_using_serial_implementation( 'A seperate thread is needed to read from the fifo') def test_download_for_fifo_file(self): self.add_head_object_response() self.add_successful_get_object_responses() # Create the fifo file os.mkfifo(self.filename) future = self.manager.download( self.bucket, self.key, self.filename, self.extra_args) # The call to open a fifo will block until there is both a reader # and a writer, so we need to open it for reading after we've # started the transfer. with open(self.filename, 'rb') as fifo: future.result() self.assertEqual(fifo.read(), self.content) class TestNonRangedDownload(BaseDownloadTest): # TODO: If you want to add tests outside of this test class and still # subclass from BaseDownloadTest you need to set ``__test__ = True``. If # you do not, your tests will not get picked up by the test runner! This # needs to be done until we find a better way to ignore running test cases # from the general test base class, which we do not want ran. __test__ = True def test_download(self): self.extra_args['RequestPayer'] = 'requester' expected_params = { 'Bucket': self.bucket, 'Key': self.key, 'RequestPayer': 'requester' } self.add_head_object_response(expected_params) self.add_successful_get_object_responses(expected_params) future = self.manager.download( self.bucket, self.key, self.filename, self.extra_args) future.result() # Ensure that the contents are correct with open(self.filename, 'rb') as f: self.assertEqual(self.content, f.read()) def test_allowed_copy_params_are_valid(self): op_model = self.client.meta.service_model.operation_model('GetObject') for allowed_upload_arg in self._manager.ALLOWED_DOWNLOAD_ARGS: self.assertIn(allowed_upload_arg, op_model.input_shape.members) def test_download_empty_object(self): self.content = b'' self.stream = six.BytesIO(self.content) self.add_head_object_response() self.add_successful_get_object_responses() future = self.manager.download( self.bucket, self.key, self.filename, self.extra_args) future.result() # Ensure that the empty file exists with open(self.filename, 'rb') as f: self.assertEqual(b'', f.read()) def test_uses_bandwidth_limiter(self): self.content = b'a' * 1024 * 1024 self.stream = six.BytesIO(self.content) self.config = TransferConfig( max_request_concurrency=1, max_bandwidth=len(self.content)/2) self._manager = TransferManager(self.client, self.config) self.add_head_object_response() self.add_successful_get_object_responses() start = time.time() future = self.manager.download( self.bucket, self.key, self.filename, self.extra_args) future.result() # This is just a smoke test to make sure that the limiter is # being used and not necessary its exactness. So we set the maximum # bandwidth to len(content)/2 per sec and make sure that it is # noticeably slower. Ideally it will take more than two seconds, but # given tracking at the beginning of transfers are not entirely # accurate setting at the initial start of a transfer, we give us # some flexibility by setting the expected time to half of the # theoretical time to take. self.assertGreaterEqual(time.time() - start, 1) # Ensure that the contents are correct with open(self.filename, 'rb') as f: self.assertEqual(self.content, f.read()) class TestRangedDownload(BaseDownloadTest): # TODO: If you want to add tests outside of this test class and still # subclass from BaseDownloadTest you need to set ``__test__ = True``. If # you do not, your tests will not get picked up by the test runner! This # needs to be done until we find a better way to ignore running test cases # from the general test base class, which we do not want ran. __test__ = True def setUp(self): super(TestRangedDownload, self).setUp() self.config = TransferConfig( max_request_concurrency=1, multipart_threshold=1, multipart_chunksize=4) self._manager = TransferManager(self.client, self.config) def create_stubbed_responses(self): return [ { 'method': 'head_object', 'service_response': { 'ContentLength': len(self.content) } }, { 'method': 'get_object', 'service_response': { 'Body': six.BytesIO(self.content[0:4]) } }, { 'method': 'get_object', 'service_response': { 'Body': six.BytesIO(self.content[4:8]) } }, { 'method': 'get_object', 'service_response': { 'Body': six.BytesIO(self.content[8:]) } } ] def create_expected_progress_callback_info(self): return [ {'bytes_transferred': 4}, {'bytes_transferred': 4}, {'bytes_transferred': 2}, ] def test_download(self): self.extra_args['RequestPayer'] = 'requester' expected_params = { 'Bucket': self.bucket, 'Key': self.key, 'RequestPayer': 'requester' } expected_ranges = ['bytes=0-3', 'bytes=4-7', 'bytes=8-'] self.add_head_object_response(expected_params) self.add_successful_get_object_responses( expected_params, expected_ranges) future = self.manager.download( self.bucket, self.key, self.filename, self.extra_args) future.result() # Ensure that the contents are correct with open(self.filename, 'rb') as f: self.assertEqual(self.content, f.read()) s3transfer-0.1.13/tests/functional/test_manager.py000066400000000000000000000147231324114246300222240ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the 'License'). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the 'license' file accompanying this file. This file is # distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from io import RawIOBase from botocore.awsrequest import create_request_object import mock from tests import skip_if_using_serial_implementation from tests import StubbedClientTest from s3transfer.exceptions import CancelledError from s3transfer.exceptions import FatalError from s3transfer.futures import BaseExecutor from s3transfer.manager import TransferManager from s3transfer.manager import TransferConfig class ArbitraryException(Exception): pass class SignalTransferringBody(RawIOBase): """A mocked body with the ability to signal when transfers occur""" def __init__(self): super(SignalTransferringBody, self).__init__() self.signal_transferring_call_count = 0 self.signal_not_transferring_call_count = 0 def signal_transferring(self): self.signal_transferring_call_count += 1 def signal_not_transferring(self): self.signal_not_transferring_call_count += 1 def seek(self, where): pass def tell(self): return 0 def read(self, amount=0): return b'' class TestTransferManager(StubbedClientTest): @skip_if_using_serial_implementation( 'Exception is thrown once all transfers are submitted. ' 'However for the serial implementation, transfers are performed ' 'in main thread meaning all transfers will complete before the ' 'exception being thrown.' ) def test_error_in_context_manager_cancels_incomplete_transfers(self): # The purpose of this test is to make sure if an error is raised # in the body of the context manager, incomplete transfers will # be cancelled with value of the exception wrapped by a CancelledError # NOTE: The fact that delete() was chosen to test this is arbitrary # other than it is the easiet to set up for the stubber. # The specific operation is not important to the purpose of this test. num_transfers = 100 futures = [] ref_exception_msg = 'arbitrary exception' for _ in range(num_transfers): self.stubber.add_response('delete_object', {}) manager = TransferManager( self.client, TransferConfig( max_request_concurrency=1, max_submission_concurrency=1) ) try: with manager: for i in range(num_transfers): futures.append(manager.delete('mybucket', 'mykey')) raise ArbitraryException(ref_exception_msg) except ArbitraryException: # At least one of the submitted futures should have been # cancelled. with self.assertRaisesRegexp(FatalError, ref_exception_msg): for future in futures: future.result() @skip_if_using_serial_implementation( 'Exception is thrown once all transfers are submitted. ' 'However for the serial implementation, transfers are performed ' 'in main thread meaning all transfers will complete before the ' 'exception being thrown.' ) def test_cntrl_c_in_context_manager_cancels_incomplete_transfers(self): # The purpose of this test is to make sure if an error is raised # in the body of the context manager, incomplete transfers will # be cancelled with value of the exception wrapped by a CancelledError # NOTE: The fact that delete() was chosen to test this is arbitrary # other than it is the easiet to set up for the stubber. # The specific operation is not important to the purpose of this test. num_transfers = 100 futures = [] for _ in range(num_transfers): self.stubber.add_response('delete_object', {}) manager = TransferManager( self.client, TransferConfig( max_request_concurrency=1, max_submission_concurrency=1) ) try: with manager: for i in range(num_transfers): futures.append(manager.delete('mybucket', 'mykey')) raise KeyboardInterrupt() except KeyboardInterrupt: # At least one of the submitted futures should have been # cancelled. with self.assertRaisesRegexp( CancelledError, 'KeyboardInterrupt()'): for future in futures: future.result() def test_enable_disable_callbacks_only_ever_registered_once(self): body = SignalTransferringBody() request = create_request_object({ 'method': 'PUT', 'url': 'https://s3.amazonaws.com', 'body': body, 'headers': {}, 'context': {} }) # Create two TransferManager's using the same client TransferManager(self.client) TransferManager(self.client) self.client.meta.events.emit( 'request-created.s3', request=request, operation_name='PutObject') # The client should have only have the enable/disable callback # handlers registered once depite being used for two different # TransferManagers. self.assertEqual( body.signal_transferring_call_count, 1, 'The enable_callback() should have only ever been registered once') self.assertEqual( body.signal_not_transferring_call_count, 1, 'The disable_callback() should have only ever been registered ' 'once') def test_use_custom_executor_implementation(self): mocked_executor_cls = mock.Mock(BaseExecutor) transfer_manager = TransferManager( self.client, executor_cls=mocked_executor_cls) transfer_manager.delete('bucket', 'key') self.assertTrue(mocked_executor_cls.return_value.submit.called) def test_unicode_exception_in_context_manager(self): with self.assertRaises(ArbitraryException): with TransferManager(self.client): raise ArbitraryException(u'\u2713') s3transfer-0.1.13/tests/functional/test_upload.py000066400000000000000000000514731324114246300221010ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import os import time import tempfile import shutil import mock from botocore.client import Config from botocore.exceptions import ClientError from botocore.awsrequest import AWSRequest from botocore.stub import ANY from tests import BaseGeneralInterfaceTest from tests import RecordingSubscriber from tests import RecordingOSUtils from tests import NonSeekableReader from s3transfer.compat import six from s3transfer.manager import TransferManager from s3transfer.manager import TransferConfig from s3transfer.utils import ChunksizeAdjuster class BaseUploadTest(BaseGeneralInterfaceTest): def setUp(self): super(BaseUploadTest, self).setUp() # TODO: We do not want to use the real MIN_UPLOAD_CHUNKSIZE # when we're adjusting parts. # This is really wasteful and fails CI builds because self.contents # would normally use 10MB+ of memory. # Until there's an API to configure this, we're patching this with # a min size of 1. We can't patch MIN_UPLOAD_CHUNKSIZE directly # because it's already bound to a default value in the # chunksize adjuster. Instead we need to patch out the # chunksize adjuster class. self.adjuster_patch = mock.patch( 's3transfer.upload.ChunksizeAdjuster', lambda: ChunksizeAdjuster(min_size=1)) self.adjuster_patch.start() self.config = TransferConfig(max_request_concurrency=1) self._manager = TransferManager(self.client, self.config) # Create a temporary directory with files to read from self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'myfile') self.content = b'my content' with open(self.filename, 'wb') as f: f.write(self.content) # Initialize some default arguments self.bucket = 'mybucket' self.key = 'mykey' self.extra_args = {} self.subscribers = [] # A list to keep track of all of the bodies sent over the wire # and their order. self.sent_bodies = [] self.client.meta.events.register( 'before-parameter-build.s3.*', self.collect_body) def tearDown(self): super(BaseUploadTest, self).tearDown() shutil.rmtree(self.tempdir) self.adjuster_patch.stop() def collect_body(self, params, model, **kwargs): # A handler to simulate the reading of the body including the # request-created event that signals to simulate the progress # callbacks if 'Body' in params: # TODO: This is not ideal. Need to figure out a better idea of # simulating reading of the request across the wire to trigger # progress callbacks request = AWSRequest( method='PUT', url='https://s3.amazonaws.com', data=params['Body'] ) self.client.meta.events.emit( 'request-created.s3.%s' % model.name, request=request, operation_name=model.name ) self.sent_bodies.append(self._stream_body(params['Body'])) def _stream_body(self, body): read_amt = 8 * 1024 data = body.read(read_amt) collected_body = data while data: data = body.read(read_amt) collected_body += data return collected_body @property def manager(self): return self._manager @property def method(self): return self.manager.upload def create_call_kwargs(self): return { 'fileobj': self.filename, 'bucket': self.bucket, 'key': self.key } def create_invalid_extra_args(self): return { 'Foo': 'bar' } def create_stubbed_responses(self): return [{'method': 'put_object', 'service_response': {}}] def create_expected_progress_callback_info(self): return [{'bytes_transferred': 10}] def assert_expected_client_calls_were_correct(self): # We assert that expected client calls were made by ensuring that # there are no more pending responses. If there are no more pending # responses, then all stubbed responses were consumed. self.stubber.assert_no_pending_responses() class TestNonMultipartUpload(BaseUploadTest): __test__ = True def add_put_object_response_with_default_expected_params( self, extra_expected_params=None): expected_params = { 'Body': ANY, 'Bucket': self.bucket, 'Key': self.key } if extra_expected_params: expected_params.update(extra_expected_params) upload_response = self.create_stubbed_responses()[0] upload_response['expected_params'] = expected_params self.stubber.add_response(**upload_response) def assert_put_object_body_was_correct(self): self.assertEqual(self.sent_bodies, [self.content]) def test_upload(self): self.extra_args['RequestPayer'] = 'requester' self.add_put_object_response_with_default_expected_params( extra_expected_params={'RequestPayer': 'requester'} ) future = self.manager.upload( self.filename, self.bucket, self.key, self.extra_args) future.result() self.assert_expected_client_calls_were_correct() self.assert_put_object_body_was_correct() def test_upload_for_fileobj(self): self.add_put_object_response_with_default_expected_params() with open(self.filename, 'rb') as f: future = self.manager.upload( f, self.bucket, self.key, self.extra_args) future.result() self.assert_expected_client_calls_were_correct() self.assert_put_object_body_was_correct() def test_upload_for_seekable_filelike_obj(self): self.add_put_object_response_with_default_expected_params() bytes_io = six.BytesIO(self.content) future = self.manager.upload( bytes_io, self.bucket, self.key, self.extra_args) future.result() self.assert_expected_client_calls_were_correct() self.assert_put_object_body_was_correct() def test_upload_for_seekable_filelike_obj_that_has_been_seeked(self): self.add_put_object_response_with_default_expected_params() bytes_io = six.BytesIO(self.content) seek_pos = 5 bytes_io.seek(seek_pos) future = self.manager.upload( bytes_io, self.bucket, self.key, self.extra_args) future.result() self.assert_expected_client_calls_were_correct() self.assertEqual(b''.join(self.sent_bodies), self.content[seek_pos:]) def test_upload_for_non_seekable_filelike_obj(self): self.add_put_object_response_with_default_expected_params() body = NonSeekableReader(self.content) future = self.manager.upload( body, self.bucket, self.key, self.extra_args) future.result() self.assert_expected_client_calls_were_correct() self.assert_put_object_body_was_correct() def test_sigv4_progress_callbacks_invoked_once(self): # Reset the client and manager to use sigv4 self.reset_stubber_with_new_client( {'config': Config(signature_version='s3v4')}) self.client.meta.events.register( 'before-parameter-build.s3.*', self.collect_body) self._manager = TransferManager(self.client, self.config) # Add the stubbed response. self.add_put_object_response_with_default_expected_params() subscriber = RecordingSubscriber() future = self.manager.upload( self.filename, self.bucket, self.key, subscribers=[subscriber]) future.result() self.assert_expected_client_calls_were_correct() # The amount of bytes seen should be the same as the file size self.assertEqual(subscriber.calculate_bytes_seen(), len(self.content)) def test_uses_provided_osutil(self): osutil = RecordingOSUtils() # Use the recording os utility for the transfer manager self._manager = TransferManager(self.client, self.config, osutil) self.add_put_object_response_with_default_expected_params() future = self.manager.upload(self.filename, self.bucket, self.key) future.result() # The upload should have used the os utility. We check this by making # sure that the recorded opens are as expected. expected_opens = [(self.filename, 'rb')] self.assertEqual(osutil.open_records, expected_opens) def test_allowed_upload_params_are_valid(self): op_model = self.client.meta.service_model.operation_model('PutObject') for allowed_upload_arg in self._manager.ALLOWED_UPLOAD_ARGS: self.assertIn(allowed_upload_arg, op_model.input_shape.members) def test_upload_with_bandwidth_limiter(self): self.content = b'a' * 1024 * 1024 with open(self.filename, 'wb') as f: f.write(self.content) self.config = TransferConfig( max_request_concurrency=1, max_bandwidth=len(self.content)/2) self._manager = TransferManager(self.client, self.config) self.add_put_object_response_with_default_expected_params() start = time.time() future = self.manager.upload(self.filename, self.bucket, self.key) future.result() # This is just a smoke test to make sure that the limiter is # being used and not necessary its exactness. So we set the maximum # bandwidth to len(content)/2 per sec and make sure that it is # noticeably slower. Ideally it will take more than two seconds, but # given tracking at the beginning of transfers are not entirely # accurate setting at the initial start of a transfer, we give us # some flexibility by setting the expected time to half of the # theoretical time to take. self.assertGreaterEqual(time.time() - start, 1) self.assert_expected_client_calls_were_correct() self.assert_put_object_body_was_correct() class TestMultipartUpload(BaseUploadTest): __test__ = True def setUp(self): super(TestMultipartUpload, self).setUp() self.chunksize = 4 self.config = TransferConfig( max_request_concurrency=1, multipart_threshold=1, multipart_chunksize=self.chunksize) self._manager = TransferManager(self.client, self.config) self.multipart_id = 'my-upload-id' def create_stubbed_responses(self): return [ {'method': 'create_multipart_upload', 'service_response': {'UploadId': self.multipart_id}}, {'method': 'upload_part', 'service_response': {'ETag': 'etag-1'}}, {'method': 'upload_part', 'service_response': {'ETag': 'etag-2'}}, {'method': 'upload_part', 'service_response': {'ETag': 'etag-3'}}, {'method': 'complete_multipart_upload', 'service_response': {}} ] def create_expected_progress_callback_info(self): return [ {'bytes_transferred': 4}, {'bytes_transferred': 4}, {'bytes_transferred': 2} ] def assert_upload_part_bodies_were_correct(self): expected_contents = [] for i in range(0, len(self.content), self.chunksize): end_i = i + self.chunksize if end_i > len(self.content): expected_contents.append(self.content[i:]) else: expected_contents.append(self.content[i:end_i]) self.assertEqual(self.sent_bodies, expected_contents) def add_create_multipart_response_with_default_expected_params( self, extra_expected_params=None): expected_params = {'Bucket': self.bucket, 'Key': self.key} if extra_expected_params: expected_params.update(extra_expected_params) response = self.create_stubbed_responses()[0] response['expected_params'] = expected_params self.stubber.add_response(**response) def add_upload_part_responses_with_default_expected_params( self, extra_expected_params=None): num_parts = 3 upload_part_responses = self.create_stubbed_responses()[1:-1] for i in range(num_parts): upload_part_response = upload_part_responses[i] expected_params = { 'Bucket': self.bucket, 'Key': self.key, 'UploadId': self.multipart_id, 'Body': ANY, 'PartNumber': i + 1, } if extra_expected_params: expected_params.update(extra_expected_params) upload_part_response['expected_params'] = expected_params self.stubber.add_response(**upload_part_response) def add_complete_multipart_response_with_default_expected_params( self, extra_expected_params=None): expected_params = { 'Bucket': self.bucket, 'Key': self.key, 'UploadId': self.multipart_id, 'MultipartUpload': { 'Parts': [ {'ETag': 'etag-1', 'PartNumber': 1}, {'ETag': 'etag-2', 'PartNumber': 2}, {'ETag': 'etag-3', 'PartNumber': 3} ] } } if extra_expected_params: expected_params.update(extra_expected_params) response = self.create_stubbed_responses()[-1] response['expected_params'] = expected_params self.stubber.add_response(**response) def test_upload(self): self.extra_args['RequestPayer'] = 'requester' # Add requester pays to the create multipart upload and upload parts. self.add_create_multipart_response_with_default_expected_params( extra_expected_params={'RequestPayer': 'requester'}) self.add_upload_part_responses_with_default_expected_params( extra_expected_params={'RequestPayer': 'requester'}) self.add_complete_multipart_response_with_default_expected_params( extra_expected_params={'RequestPayer': 'requester'}) future = self.manager.upload( self.filename, self.bucket, self.key, self.extra_args) future.result() self.assert_expected_client_calls_were_correct() def test_upload_for_fileobj(self): self.add_create_multipart_response_with_default_expected_params() self.add_upload_part_responses_with_default_expected_params() self.add_complete_multipart_response_with_default_expected_params() with open(self.filename, 'rb') as f: future = self.manager.upload( f, self.bucket, self.key, self.extra_args) future.result() self.assert_expected_client_calls_were_correct() self.assert_upload_part_bodies_were_correct() def test_upload_for_seekable_filelike_obj(self): self.add_create_multipart_response_with_default_expected_params() self.add_upload_part_responses_with_default_expected_params() self.add_complete_multipart_response_with_default_expected_params() bytes_io = six.BytesIO(self.content) future = self.manager.upload( bytes_io, self.bucket, self.key, self.extra_args) future.result() self.assert_expected_client_calls_were_correct() self.assert_upload_part_bodies_were_correct() def test_upload_for_seekable_filelike_obj_that_has_been_seeked(self): self.add_create_multipart_response_with_default_expected_params() self.add_upload_part_responses_with_default_expected_params() self.add_complete_multipart_response_with_default_expected_params() bytes_io = six.BytesIO(self.content) seek_pos = 1 bytes_io.seek(seek_pos) future = self.manager.upload( bytes_io, self.bucket, self.key, self.extra_args) future.result() self.assert_expected_client_calls_were_correct() self.assertEqual(b''.join(self.sent_bodies), self.content[seek_pos:]) def test_upload_for_non_seekable_filelike_obj(self): self.add_create_multipart_response_with_default_expected_params() self.add_upload_part_responses_with_default_expected_params() self.add_complete_multipart_response_with_default_expected_params() stream = NonSeekableReader(self.content) future = self.manager.upload( stream, self.bucket, self.key, self.extra_args) future.result() self.assert_expected_client_calls_were_correct() self.assert_upload_part_bodies_were_correct() def test_limits_in_memory_chunks_for_fileobj(self): # Limit the maximum in memory chunks to one but make number of # threads more than one. This means that the upload will have to # happen sequentially despite having many threads available because # data is sequentially partitioned into chunks in memory and since # there can only every be one in memory chunk, each upload part will # have to happen one at a time. self.config.max_request_concurrency = 10 self.config.max_in_memory_upload_chunks = 1 self._manager = TransferManager(self.client, self.config) # Add some default stubbed responses. # These responses are added in order of part number so if the # multipart upload is not done sequentially, which it should because # we limit the in memory upload chunks to one, the stubber will # raise exceptions for mismatching parameters for partNumber when # once the upload() method is called on the transfer manager. # If there is a mismatch, the stubber error will propogate on # the future.result() self.add_create_multipart_response_with_default_expected_params() self.add_upload_part_responses_with_default_expected_params() self.add_complete_multipart_response_with_default_expected_params() with open(self.filename, 'rb') as f: future = self.manager.upload( f, self.bucket, self.key, self.extra_args) future.result() # Make sure that the stubber had all of its stubbed responses consumed. self.assert_expected_client_calls_were_correct() # Ensure the contents were uploaded in sequentially order by checking # the sent contents were in order. self.assert_upload_part_bodies_were_correct() def test_upload_failure_invokes_abort(self): self.stubber.add_response( method='create_multipart_upload', service_response={ 'UploadId': self.multipart_id }, expected_params={ 'Bucket': self.bucket, 'Key': self.key } ) self.stubber.add_response( method='upload_part', service_response={ 'ETag': 'etag-1' }, expected_params={ 'Bucket': self.bucket, 'Body': ANY, 'Key': self.key, 'UploadId': self.multipart_id, 'PartNumber': 1 } ) # With the upload part failing this should immediately initiate # an abort multipart with no more upload parts called. self.stubber.add_client_error(method='upload_part') self.stubber.add_response( method='abort_multipart_upload', service_response={}, expected_params={ 'Bucket': self.bucket, 'Key': self.key, 'UploadId': self.multipart_id } ) future = self.manager.upload(self.filename, self.bucket, self.key) # The exception should get propogated to the future and not be # a cancelled error or something. with self.assertRaises(ClientError): future.result() self.assert_expected_client_calls_were_correct() def test_upload_passes_select_extra_args(self): self.extra_args['Metadata'] = {'foo': 'bar'} # Add metadata to expected create multipart upload call self.add_create_multipart_response_with_default_expected_params( extra_expected_params={'Metadata': {'foo': 'bar'}}) self.add_upload_part_responses_with_default_expected_params() self.add_complete_multipart_response_with_default_expected_params() future = self.manager.upload( self.filename, self.bucket, self.key, self.extra_args) future.result() self.assert_expected_client_calls_were_correct() s3transfer-0.1.13/tests/functional/test_utils.py000066400000000000000000000026321324114246300217460ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import os import shutil import socket import tempfile from tests import unittest from tests import skip_if_windows from s3transfer.utils import OSUtils @skip_if_windows('Windows does not support UNIX special files') class TestOSUtilsSpecialFiles(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'myfile') def tearDown(self): shutil.rmtree(self.tempdir) def test_character_device(self): self.assertTrue(OSUtils().is_special_file('/dev/null')) def test_fifo(self): os.mkfifo(self.filename) self.assertTrue(OSUtils().is_special_file(self.filename)) def test_socket(self): sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) sock.bind(self.filename) self.assertTrue(OSUtils().is_special_file(self.filename)) s3transfer-0.1.13/tests/integration/000077500000000000000000000000001324114246300173535ustar00rootroot00000000000000s3transfer-0.1.13/tests/integration/__init__.py000066400000000000000000000050151324114246300214650ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the 'License'). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the 'license' file accompanying this file. This file is # distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import botocore.session from botocore.exceptions import ClientError from tests import unittest from tests import FileCreator from tests import random_bucket_name from s3transfer.manager import TransferManager def recursive_delete(client, bucket_name): # Recursively deletes a bucket and all of its contents. objects = client.get_paginator('list_objects').paginate( Bucket=bucket_name) for key in objects.search('Contents[].Key || `[]`'): if key: client.delete_object(Bucket=bucket_name, Key=key) client.delete_bucket(Bucket=bucket_name) class BaseTransferManagerIntegTest(unittest.TestCase): """Tests for the high level s3transfer module.""" @classmethod def setUpClass(cls): cls.region = 'us-west-2' cls.session = botocore.session.get_session() cls.client = cls.session.create_client('s3', cls.region) cls.bucket_name = random_bucket_name() cls.client.create_bucket( Bucket=cls.bucket_name, CreateBucketConfiguration={'LocationConstraint': cls.region}) def setUp(self): self.files = FileCreator() def tearDown(self): self.files.remove_all() @classmethod def tearDownClass(cls): recursive_delete(cls.client, cls.bucket_name) def delete_object(self, key): self.client.delete_object( Bucket=self.bucket_name, Key=key) def object_exists(self, key): try: self.client.head_object(Bucket=self.bucket_name, Key=key) return True except ClientError: return False def create_transfer_manager(self, config=None): return TransferManager(self.client, config=config) def upload_file(self, filename, key): with open(filename, 'rb') as f: self.client.put_object(Bucket=self.bucket_name, Key=key, Body=f) self.addCleanup(self.delete_object, key) s3transfer-0.1.13/tests/integration/test_copy.py000066400000000000000000000056521324114246300217460ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from tests import RecordingSubscriber from tests.integration import BaseTransferManagerIntegTest from s3transfer.manager import TransferConfig class TestCopy(BaseTransferManagerIntegTest): def setUp(self): super(TestCopy, self).setUp() self.multipart_threshold = 5 * 1024 * 1024 self.config = TransferConfig( multipart_threshold=self.multipart_threshold) def test_copy_below_threshold(self): transfer_manager = self.create_transfer_manager(self.config) key = '1mb.txt' new_key = '1mb-copy.txt' filename = self.files.create_file_with_size( key, filesize=1024 * 1024) self.upload_file(filename, key) future = transfer_manager.copy( copy_source={'Bucket': self.bucket_name, 'Key': key}, bucket=self.bucket_name, key=new_key ) future.result() self.assertTrue(self.object_exists(new_key)) def test_copy_above_threshold(self): transfer_manager = self.create_transfer_manager(self.config) key = '20mb.txt' new_key = '20mb-copy.txt' filename = self.files.create_file_with_size( key, filesize=20 * 1024 * 1024) self.upload_file(filename, key) future = transfer_manager.copy( copy_source={'Bucket': self.bucket_name, 'Key': key}, bucket=self.bucket_name, key=new_key ) future.result() self.assertTrue(self.object_exists(new_key)) def test_progress_subscribers_on_copy(self): subscriber = RecordingSubscriber() transfer_manager = self.create_transfer_manager(self.config) key = '20mb.txt' new_key = '20mb-copy.txt' filename = self.files.create_file_with_size( key, filesize=20 * 1024 * 1024) self.upload_file(filename, key) future = transfer_manager.copy( copy_source={'Bucket': self.bucket_name, 'Key': key}, bucket=self.bucket_name, key=new_key, subscribers=[subscriber] ) future.result() # The callback should have been called enough times such that # the total amount of bytes we've seen (via the "amount" # arg to the callback function) should be the size # of the file we uploaded. self.assertEqual(subscriber.calculate_bytes_seen(), 20 * 1024 * 1024) s3transfer-0.1.13/tests/integration/test_delete.py000066400000000000000000000023561324114246300222340ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from tests import RecordingSubscriber from tests.integration import BaseTransferManagerIntegTest from s3transfer.manager import TransferConfig class TestDeleteObject(BaseTransferManagerIntegTest): def test_can_delete_object(self): key_name = 'mykey' self.client.put_object(Bucket=self.bucket_name, Key=key_name, Body=b'hello world') self.assertTrue(self.object_exists(key_name)) transfer_manager = self.create_transfer_manager() future = transfer_manager.delete(bucket=self.bucket_name, key=key_name) future.result() self.assertFalse(self.object_exists(key_name)) s3transfer-0.1.13/tests/integration/test_download.py000066400000000000000000000240231324114246300225740ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import glob import os import time from concurrent.futures import CancelledError from tests import assert_files_equal from tests import skip_if_windows from tests import skip_if_using_serial_implementation from tests import RecordingSubscriber from tests import NonSeekableWriter from tests.integration import BaseTransferManagerIntegTest from s3transfer.manager import TransferConfig class TestDownload(BaseTransferManagerIntegTest): def setUp(self): super(TestDownload, self).setUp() self.multipart_threshold = 5 * 1024 * 1024 self.config = TransferConfig( multipart_threshold=self.multipart_threshold ) def test_below_threshold(self): transfer_manager = self.create_transfer_manager(self.config) filename = self.files.create_file_with_size( 'foo.txt', filesize=1024 * 1024) self.upload_file(filename, '1mb.txt') download_path = os.path.join(self.files.rootdir, '1mb.txt') future = transfer_manager.download( self.bucket_name, '1mb.txt', download_path) future.result() assert_files_equal(filename, download_path) def test_above_threshold(self): transfer_manager = self.create_transfer_manager(self.config) filename = self.files.create_file_with_size( 'foo.txt', filesize=20 * 1024 * 1024) self.upload_file(filename, '20mb.txt') download_path = os.path.join(self.files.rootdir, '20mb.txt') future = transfer_manager.download( self.bucket_name, '20mb.txt', download_path) future.result() assert_files_equal(filename, download_path) @skip_if_using_serial_implementation( 'Exception is thrown once the transfer is submitted. ' 'However for the serial implementation, transfers are performed ' 'in main thread meaning the transfer will complete before the ' 'KeyboardInterrupt being thrown.' ) def test_large_download_exits_quicky_on_exception(self): transfer_manager = self.create_transfer_manager(self.config) filename = self.files.create_file_with_size( 'foo.txt', filesize=60 * 1024 * 1024) self.upload_file(filename, '60mb.txt') download_path = os.path.join(self.files.rootdir, '60mb.txt') sleep_time = 0.5 try: with transfer_manager: start_time = time.time() future = transfer_manager.download( self.bucket_name, '60mb.txt', download_path) # Sleep for a little to get the transfer process going time.sleep(sleep_time) # Raise an exception which should cause the preceeding # download to cancel and exit quickly raise KeyboardInterrupt() except KeyboardInterrupt: pass end_time = time.time() # The maximum time allowed for the transfer manager to exit. # This means that it should take less than a couple second after # sleeping to exit. max_allowed_exit_time = sleep_time + 4 self.assertLess( end_time - start_time, max_allowed_exit_time, "Failed to exit under %s. Instead exited in %s." % ( max_allowed_exit_time, end_time - start_time) ) # Make sure the future was cancelled because of the KeyboardInterrupt with self.assertRaisesRegexp(CancelledError, 'KeyboardInterrupt()'): future.result() # Make sure the actual file and the temporary do not exist # by globbing for the file and any of its extensions possible_matches = glob.glob('%s*' % download_path) self.assertEqual(possible_matches, []) @skip_if_using_serial_implementation( 'Exception is thrown once the transfer is submitted. ' 'However for the serial implementation, transfers are performed ' 'in main thread meaning the transfer will complete before the ' 'KeyboardInterrupt being thrown.' ) def test_many_files_exits_quicky_on_exception(self): # Set the max request queue size and number of submission threads # to something small to simulate having a large queue # of transfer requests to complete and it is backed up. self.config.max_request_queue_size = 1 self.config.max_submission_concurrency = 1 transfer_manager = self.create_transfer_manager(self.config) filename = self.files.create_file_with_size( 'foo.txt', filesize=1024 * 1024) self.upload_file(filename, '1mb.txt') filenames = [] futures = [] for i in range(10): filenames.append( os.path.join(self.files.rootdir, 'file'+str(i))) try: with transfer_manager: start_time = time.time() for filename in filenames: futures.append(transfer_manager.download( self.bucket_name, '1mb.txt', filename)) # Raise an exception which should cause the preceeding # transfer to cancel and exit quickly raise KeyboardInterrupt() except KeyboardInterrupt: pass end_time = time.time() # The maximum time allowed for the transfer manager to exit. # This means that it should take less than a couple seconds to exit. max_allowed_exit_time = 5 self.assertLess( end_time - start_time, max_allowed_exit_time, "Failed to exit under %s. Instead exited in %s." % ( max_allowed_exit_time, end_time - start_time) ) # Make sure at least one of the futures got cancelled with self.assertRaisesRegexp(CancelledError, 'KeyboardInterrupt()'): for future in futures: future.result() # For the transfer that did get cancelled, make sure the temporary # file got removed. possible_matches = glob.glob('%s*' % future.meta.call_args.fileobj) self.assertEqual(possible_matches, []) def test_progress_subscribers_on_download(self): subscriber = RecordingSubscriber() transfer_manager = self.create_transfer_manager(self.config) filename = self.files.create_file_with_size( 'foo.txt', filesize=20 * 1024 * 1024) self.upload_file(filename, '20mb.txt') download_path = os.path.join(self.files.rootdir, '20mb.txt') future = transfer_manager.download( self.bucket_name, '20mb.txt', download_path, subscribers=[subscriber]) future.result() self.assertEqual(subscriber.calculate_bytes_seen(), 20 * 1024 * 1024) def test_below_threshold_for_fileobj(self): transfer_manager = self.create_transfer_manager(self.config) filename = self.files.create_file_with_size( 'foo.txt', filesize=1024 * 1024) self.upload_file(filename, '1mb.txt') download_path = os.path.join(self.files.rootdir, '1mb.txt') with open(download_path, 'wb') as f: future = transfer_manager.download( self.bucket_name, '1mb.txt', f) future.result() assert_files_equal(filename, download_path) def test_above_threshold_for_fileobj(self): transfer_manager = self.create_transfer_manager(self.config) filename = self.files.create_file_with_size( 'foo.txt', filesize=20 * 1024 * 1024) self.upload_file(filename, '20mb.txt') download_path = os.path.join(self.files.rootdir, '20mb.txt') with open(download_path, 'wb') as f: future = transfer_manager.download( self.bucket_name, '20mb.txt', f) future.result() assert_files_equal(filename, download_path) def test_below_threshold_for_nonseekable_fileobj(self): transfer_manager = self.create_transfer_manager(self.config) filename = self.files.create_file_with_size( 'foo.txt', filesize=1024 * 1024) self.upload_file(filename, '1mb.txt') download_path = os.path.join(self.files.rootdir, '1mb.txt') with open(download_path, 'wb') as f: future = transfer_manager.download( self.bucket_name, '1mb.txt', NonSeekableWriter(f)) future.result() assert_files_equal(filename, download_path) def test_above_threshold_for_nonseekable_fileobj(self): transfer_manager = self.create_transfer_manager(self.config) filename = self.files.create_file_with_size( 'foo.txt', filesize=20 * 1024 * 1024) self.upload_file(filename, '20mb.txt') download_path = os.path.join(self.files.rootdir, '20mb.txt') with open(download_path, 'wb') as f: future = transfer_manager.download( self.bucket_name, '20mb.txt', NonSeekableWriter(f)) future.result() assert_files_equal(filename, download_path) @skip_if_windows('Windows does not support UNIX special files') def test_download_to_special_file(self): transfer_manager = self.create_transfer_manager(self.config) filename = self.files.create_file_with_size( 'foo.txt', filesize=1024 * 1024) self.upload_file(filename, '1mb.txt') future = transfer_manager.download( self.bucket_name, '1mb.txt', '/dev/null') try: future.result() except Exception as e: self.fail( 'Should have been able to download to /dev/null but received ' 'following exception %s' % e) s3transfer-0.1.13/tests/integration/test_s3transfer.py000066400000000000000000000324721324114246300230660ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import os import threading import math import tempfile import shutil import hashlib import string from tests import unittest import botocore.session from botocore.compat import six from botocore.client import Config import s3transfer urlopen = six.moves.urllib.request.urlopen def assert_files_equal(first, second): if os.path.getsize(first) != os.path.getsize(second): raise AssertionError("Files are not equal: %s, %s" % (first, second)) first_md5 = md5_checksum(first) second_md5 = md5_checksum(second) if first_md5 != second_md5: raise AssertionError( "Files are not equal: %s(md5=%s) != %s(md5=%s)" % ( first, first_md5, second, second_md5)) def md5_checksum(filename): checksum = hashlib.md5() with open(filename, 'rb') as f: for chunk in iter(lambda: f.read(8192), b''): checksum.update(chunk) return checksum.hexdigest() def random_bucket_name(prefix='boto3-transfer', num_chars=10): base = string.ascii_lowercase + string.digits random_bytes = bytearray(os.urandom(num_chars)) return prefix + ''.join([base[b % len(base)] for b in random_bytes]) class FileCreator(object): def __init__(self): self.rootdir = tempfile.mkdtemp() def remove_all(self): shutil.rmtree(self.rootdir) def create_file(self, filename, contents, mode='w'): """Creates a file in a tmpdir ``filename`` should be a relative path, e.g. "foo/bar/baz.txt" It will be translated into a full path in a tmp dir. ``mode`` is the mode the file should be opened either as ``w`` or `wb``. Returns the full path to the file. """ full_path = os.path.join(self.rootdir, filename) if not os.path.isdir(os.path.dirname(full_path)): os.makedirs(os.path.dirname(full_path)) with open(full_path, mode) as f: f.write(contents) return full_path def create_file_with_size(self, filename, filesize): filename = self.create_file(filename, contents='') chunksize = 8192 with open(filename, 'wb') as f: for i in range(int(math.ceil(filesize / float(chunksize)))): f.write(b'a' * chunksize) return filename def append_file(self, filename, contents): """Append contents to a file ``filename`` should be a relative path, e.g. "foo/bar/baz.txt" It will be translated into a full path in a tmp dir. Returns the full path to the file. """ full_path = os.path.join(self.rootdir, filename) if not os.path.isdir(os.path.dirname(full_path)): os.makedirs(os.path.dirname(full_path)) with open(full_path, 'a') as f: f.write(contents) return full_path def full_path(self, filename): """Translate relative path to full path in temp dir. f.full_path('foo/bar.txt') -> /tmp/asdfasd/foo/bar.txt """ return os.path.join(self.rootdir, filename) class TestS3Transfers(unittest.TestCase): """Tests for the high level s3transfer module.""" @classmethod def setUpClass(cls): cls.region = 'us-west-2' cls.session = botocore.session.get_session() cls.client = cls.session.create_client('s3', cls.region) cls.bucket_name = random_bucket_name() cls.client.create_bucket( Bucket=cls.bucket_name, CreateBucketConfiguration={'LocationConstraint': cls.region}) def setUp(self): self.files = FileCreator() def tearDown(self): self.files.remove_all() @classmethod def tearDownClass(cls): cls.client.delete_bucket(Bucket=cls.bucket_name) def delete_object(self, key): self.client.delete_object( Bucket=self.bucket_name, Key=key) def object_exists(self, key): self.client.head_object(Bucket=self.bucket_name, Key=key) return True def create_s3_transfer(self, config=None): return s3transfer.S3Transfer(self.client, config=config) def assert_has_public_read_acl(self, response): grants = response['Grants'] public_read = [g['Grantee'].get('URI', '') for g in grants if g['Permission'] == 'READ'] self.assertIn('groups/global/AllUsers', public_read[0]) def test_upload_below_threshold(self): config = s3transfer.TransferConfig( multipart_threshold=2 * 1024 * 1024) transfer = self.create_s3_transfer(config) filename = self.files.create_file_with_size( 'foo.txt', filesize=1024 * 1024) transfer.upload_file(filename, self.bucket_name, 'foo.txt') self.addCleanup(self.delete_object, 'foo.txt') self.assertTrue(self.object_exists('foo.txt')) def test_upload_above_threshold(self): config = s3transfer.TransferConfig( multipart_threshold=2 * 1024 * 1024) transfer = self.create_s3_transfer(config) filename = self.files.create_file_with_size( '20mb.txt', filesize=20 * 1024 * 1024) transfer.upload_file(filename, self.bucket_name, '20mb.txt') self.addCleanup(self.delete_object, '20mb.txt') self.assertTrue(self.object_exists('20mb.txt')) def test_upload_file_above_threshold_with_acl(self): config = s3transfer.TransferConfig( multipart_threshold=5 * 1024 * 1024) transfer = self.create_s3_transfer(config) filename = self.files.create_file_with_size( '6mb.txt', filesize=6 * 1024 * 1024) extra_args = {'ACL': 'public-read'} transfer.upload_file(filename, self.bucket_name, '6mb.txt', extra_args=extra_args) self.addCleanup(self.delete_object, '6mb.txt') self.assertTrue(self.object_exists('6mb.txt')) response = self.client.get_object_acl( Bucket=self.bucket_name, Key='6mb.txt') self.assert_has_public_read_acl(response) def test_upload_file_above_threshold_with_ssec(self): key_bytes = os.urandom(32) extra_args = { 'SSECustomerKey': key_bytes, 'SSECustomerAlgorithm': 'AES256', } config = s3transfer.TransferConfig( multipart_threshold=5 * 1024 * 1024) transfer = self.create_s3_transfer(config) filename = self.files.create_file_with_size( '6mb.txt', filesize=6 * 1024 * 1024) transfer.upload_file(filename, self.bucket_name, '6mb.txt', extra_args=extra_args) self.addCleanup(self.delete_object, '6mb.txt') # A head object will fail if it has a customer key # associated with it and it's not provided in the HeadObject # request so we can use this to verify our functionality. response = self.client.head_object( Bucket=self.bucket_name, Key='6mb.txt', **extra_args) self.assertEqual(response['SSECustomerAlgorithm'], 'AES256') def test_progress_callback_on_upload(self): self.amount_seen = 0 lock = threading.Lock() def progress_callback(amount): with lock: self.amount_seen += amount transfer = self.create_s3_transfer() filename = self.files.create_file_with_size( '20mb.txt', filesize=20 * 1024 * 1024) transfer.upload_file(filename, self.bucket_name, '20mb.txt', callback=progress_callback) self.addCleanup(self.delete_object, '20mb.txt') # The callback should have been called enough times such that # the total amount of bytes we've seen (via the "amount" # arg to the callback function) should be the size # of the file we uploaded. self.assertEqual(self.amount_seen, 20 * 1024 * 1024) def test_callback_called_once_with_sigv4(self): # Verify #98, where the callback was being invoked # twice when using signature version 4. self.amount_seen = 0 lock = threading.Lock() def progress_callback(amount): with lock: self.amount_seen += amount client = self.session.create_client( 's3', self.region, config=Config(signature_version='s3v4')) transfer = s3transfer.S3Transfer(client) filename = self.files.create_file_with_size( '10mb.txt', filesize=10 * 1024 * 1024) transfer.upload_file(filename, self.bucket_name, '10mb.txt', callback=progress_callback) self.addCleanup(self.delete_object, '10mb.txt') self.assertEqual(self.amount_seen, 10 * 1024 * 1024) def test_can_send_extra_params_on_upload(self): transfer = self.create_s3_transfer() filename = self.files.create_file_with_size('foo.txt', filesize=1024) transfer.upload_file(filename, self.bucket_name, 'foo.txt', extra_args={'ACL': 'public-read'}) self.addCleanup(self.delete_object, 'foo.txt') response = self.client.get_object_acl( Bucket=self.bucket_name, Key='foo.txt') self.assert_has_public_read_acl(response) def test_can_configure_threshold(self): config = s3transfer.TransferConfig( multipart_threshold=6 * 1024 * 1024 ) transfer = self.create_s3_transfer(config) filename = self.files.create_file_with_size( 'foo.txt', filesize=8 * 1024 * 1024) transfer.upload_file(filename, self.bucket_name, 'foo.txt') self.addCleanup(self.delete_object, 'foo.txt') self.assertTrue(self.object_exists('foo.txt')) def test_can_send_extra_params_on_download(self): # We're picking the customer provided sse feature # of S3 to test the extra_args functionality of # S3. key_bytes = os.urandom(32) extra_args = { 'SSECustomerKey': key_bytes, 'SSECustomerAlgorithm': 'AES256', } self.client.put_object(Bucket=self.bucket_name, Key='foo.txt', Body=b'hello world', **extra_args) self.addCleanup(self.delete_object, 'foo.txt') transfer = self.create_s3_transfer() download_path = os.path.join(self.files.rootdir, 'downloaded.txt') transfer.download_file(self.bucket_name, 'foo.txt', download_path, extra_args=extra_args) with open(download_path, 'rb') as f: self.assertEqual(f.read(), b'hello world') def test_progress_callback_on_download(self): self.amount_seen = 0 lock = threading.Lock() def progress_callback(amount): with lock: self.amount_seen += amount transfer = self.create_s3_transfer() filename = self.files.create_file_with_size( '20mb.txt', filesize=20 * 1024 * 1024) with open(filename, 'rb') as f: self.client.put_object(Bucket=self.bucket_name, Key='20mb.txt', Body=f) self.addCleanup(self.delete_object, '20mb.txt') download_path = os.path.join(self.files.rootdir, 'downloaded.txt') transfer.download_file(self.bucket_name, '20mb.txt', download_path, callback=progress_callback) self.assertEqual(self.amount_seen, 20 * 1024 * 1024) def test_download_below_threshold(self): transfer = self.create_s3_transfer() filename = self.files.create_file_with_size( 'foo.txt', filesize=1024 * 1024) with open(filename, 'rb') as f: self.client.put_object(Bucket=self.bucket_name, Key='foo.txt', Body=f) self.addCleanup(self.delete_object, 'foo.txt') download_path = os.path.join(self.files.rootdir, 'downloaded.txt') transfer.download_file(self.bucket_name, 'foo.txt', download_path) assert_files_equal(filename, download_path) def test_download_above_threshold(self): transfer = self.create_s3_transfer() filename = self.files.create_file_with_size( 'foo.txt', filesize=20 * 1024 * 1024) with open(filename, 'rb') as f: self.client.put_object(Bucket=self.bucket_name, Key='foo.txt', Body=f) self.addCleanup(self.delete_object, 'foo.txt') download_path = os.path.join(self.files.rootdir, 'downloaded.txt') transfer.download_file(self.bucket_name, 'foo.txt', download_path) assert_files_equal(filename, download_path) s3transfer-0.1.13/tests/integration/test_upload.py000066400000000000000000000163111324114246300222520ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import time from concurrent.futures import CancelledError from botocore.compat import six from tests import skip_if_using_serial_implementation from tests import RecordingSubscriber, NonSeekableReader from tests.integration import BaseTransferManagerIntegTest from s3transfer.manager import TransferConfig class TestUpload(BaseTransferManagerIntegTest): def setUp(self): super(TestUpload, self).setUp() self.multipart_threshold = 5 * 1024 * 1024 self.config = TransferConfig( multipart_threshold=self.multipart_threshold) def get_input_fileobj(self, size, name=''): return self.files.create_file_with_size(name, size) def test_upload_below_threshold(self): transfer_manager = self.create_transfer_manager(self.config) file = self.get_input_fileobj(size=1024 * 1024, name='1mb.txt') future = transfer_manager.upload(file, self.bucket_name, '1mb.txt') self.addCleanup(self.delete_object, '1mb.txt') future.result() self.assertTrue(self.object_exists('1mb.txt')) def test_upload_above_threshold(self): transfer_manager = self.create_transfer_manager(self.config) file = self.get_input_fileobj(size=20 * 1024 * 1024, name='20mb.txt') future = transfer_manager.upload( file, self.bucket_name, '20mb.txt') self.addCleanup(self.delete_object, '20mb.txt') future.result() self.assertTrue(self.object_exists('20mb.txt')) @skip_if_using_serial_implementation( 'Exception is thrown once the transfer is submitted. ' 'However for the serial implementation, transfers are performed ' 'in main thread meaning the transfer will complete before the ' 'KeyboardInterrupt being thrown.' ) def test_large_upload_exits_quicky_on_exception(self): transfer_manager = self.create_transfer_manager(self.config) filename = self.get_input_fileobj( name='foo.txt', size=20 * 1024 * 1024) sleep_time = 0.25 try: with transfer_manager: start_time = time.time() future = transfer_manager.upload( filename, self.bucket_name, '20mb.txt') # Sleep for a little to get the transfer process going time.sleep(sleep_time) # Raise an exception which should cause the preceeding # download to cancel and exit quickly raise KeyboardInterrupt() except KeyboardInterrupt: pass end_time = time.time() # The maximum time allowed for the transfer manager to exit. # This means that it should take less than a couple second after # sleeping to exit. max_allowed_exit_time = sleep_time + 5 self.assertLess( end_time - start_time, max_allowed_exit_time, "Failed to exit under %s. Instead exited in %s." % ( max_allowed_exit_time, end_time - start_time) ) try: future.result() self.skipTest( 'Upload completed before interrupted and therefore ' 'could not cancel the upload') except CancelledError as e: self.assertEqual(str(e), 'KeyboardInterrupt()') # If the transfer did get cancelled, # make sure the object does not exist. self.assertFalse(self.object_exists('20mb.txt')) @skip_if_using_serial_implementation( 'Exception is thrown once the transfers are submitted. ' 'However for the serial implementation, transfers are performed ' 'in main thread meaning the transfers will complete before the ' 'KeyboardInterrupt being thrown.' ) def test_many_files_exits_quicky_on_exception(self): # Set the max request queue size and number of submission threads # to something small to simulate having a large queue # of transfer requests to complete and it is backed up. self.config.max_request_queue_size = 1 self.config.max_submission_concurrency = 1 transfer_manager = self.create_transfer_manager(self.config) fileobjs = [] keynames = [] futures = [] for i in range(10): filename = 'file' + str(i) keynames.append(filename) fileobjs.append( self.get_input_fileobj(name=filename, size=1024 * 1024)) try: with transfer_manager: start_time = time.time() for i, fileobj in enumerate(fileobjs): futures.append(transfer_manager.upload( fileobj, self.bucket_name, keynames[i])) # Raise an exception which should cause the preceeding # transfer to cancel and exit quickly raise KeyboardInterrupt() except KeyboardInterrupt: pass end_time = time.time() # The maximum time allowed for the transfer manager to exit. # This means that it should take less than a couple seconds to exit. max_allowed_exit_time = 5 self.assertLess( end_time - start_time, max_allowed_exit_time, "Failed to exit under %s. Instead exited in %s." % ( max_allowed_exit_time, end_time - start_time) ) # Make sure at least one of the futures got cancelled with self.assertRaisesRegexp(CancelledError, 'KeyboardInterrupt()'): for future in futures: future.result() # For the transfer that did get cancelled, make sure the object # does not exist. self.assertFalse(self.object_exists(future.meta.call_args.key)) def test_progress_subscribers_on_upload(self): subscriber = RecordingSubscriber() transfer_manager = self.create_transfer_manager(self.config) file = self.get_input_fileobj(size=20 * 1024 * 1024, name='20mb.txt') future = transfer_manager.upload( file, self.bucket_name, '20mb.txt', subscribers=[subscriber]) self.addCleanup(self.delete_object, '20mb.txt') future.result() # The callback should have been called enough times such that # the total amount of bytes we've seen (via the "amount" # arg to the callback function) should be the size # of the file we uploaded. self.assertEqual(subscriber.calculate_bytes_seen(), 20 * 1024 * 1024) class TestUploadSeekableStream(TestUpload): def get_input_fileobj(self, size, name=''): return six.BytesIO(b'0' * size) class TestUploadNonSeekableStream(TestUpload): def get_input_fileobj(self, size, name=''): return NonSeekableReader(b'0' * size) s3transfer-0.1.13/tests/unit/000077500000000000000000000000001324114246300160075ustar00rootroot00000000000000s3transfer-0.1.13/tests/unit/__init__.py000066400000000000000000000010611324114246300201160ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the 'License'). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the 'license' file accompanying this file. This file is # distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. s3transfer-0.1.13/tests/unit/test_bandwidth.py000066400000000000000000000426621324114246300213760ustar00rootroot00000000000000# Copyright 2017 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import os import shutil import tempfile import mock from tests import unittest from s3transfer.bandwidth import RequestExceededException from s3transfer.bandwidth import RequestToken from s3transfer.bandwidth import TimeUtils from s3transfer.bandwidth import BandwidthLimiter from s3transfer.bandwidth import BandwidthLimitedStream from s3transfer.bandwidth import LeakyBucket from s3transfer.bandwidth import ConsumptionScheduler from s3transfer.bandwidth import BandwidthRateTracker from s3transfer.futures import TransferCoordinator class FixedIncrementalTickTimeUtils(TimeUtils): def __init__(self, seconds_per_tick=1.0): self._count = 0 self._seconds_per_tick = seconds_per_tick def time(self): current_count = self._count self._count += self._seconds_per_tick return current_count class TestTimeUtils(unittest.TestCase): @mock.patch('time.time') def test_time(self, mock_time): mock_return_val = 1 mock_time.return_value = mock_return_val time_utils = TimeUtils() self.assertEqual(time_utils.time(), mock_return_val) @mock.patch('time.sleep') def test_sleep(self, mock_sleep): time_utils = TimeUtils() time_utils.sleep(1) self.assertEqual( mock_sleep.call_args_list, [mock.call(1)] ) class BaseBandwidthLimitTest(unittest.TestCase): def setUp(self): self.leaky_bucket = mock.Mock(LeakyBucket) self.time_utils = mock.Mock(TimeUtils) self.tempdir = tempfile.mkdtemp() self.content = b'a' * 1024 * 1024 self.filename = os.path.join(self.tempdir, 'myfile') with open(self.filename, 'wb') as f: f.write(self.content) self.coordinator = TransferCoordinator() def tearDown(self): shutil.rmtree(self.tempdir) def assert_consume_calls(self, amts): expected_consume_args = [ mock.call(amt, mock.ANY) for amt in amts ] self.assertEqual( self.leaky_bucket.consume.call_args_list, expected_consume_args ) class TestBandwidthLimiter(BaseBandwidthLimitTest): def setUp(self): super(TestBandwidthLimiter, self).setUp() self.bandwidth_limiter = BandwidthLimiter(self.leaky_bucket) def test_get_bandwidth_limited_stream(self): with open(self.filename, 'rb') as f: stream = self.bandwidth_limiter.get_bandwith_limited_stream( f, self.coordinator) self.assertIsInstance(stream, BandwidthLimitedStream) self.assertEqual(stream.read(len(self.content)), self.content) self.assert_consume_calls(amts=[len(self.content)]) def test_get_disabled_bandwidth_limited_stream(self): with open(self.filename, 'rb') as f: stream = self.bandwidth_limiter.get_bandwith_limited_stream( f, self.coordinator, enabled=False) self.assertIsInstance(stream, BandwidthLimitedStream) self.assertEqual(stream.read(len(self.content)), self.content) self.leaky_bucket.consume.assert_not_called() class TestBandwidthLimitedStream(BaseBandwidthLimitTest): def setUp(self): super(TestBandwidthLimitedStream, self).setUp() self.bytes_threshold = 1 def tearDown(self): shutil.rmtree(self.tempdir) def get_bandwidth_limited_stream(self, f): return BandwidthLimitedStream( f, self.leaky_bucket, self.coordinator, self.time_utils, self.bytes_threshold) def assert_sleep_calls(self, amts): expected_sleep_args_list = [ mock.call(amt) for amt in amts ] self.assertEqual( self.time_utils.sleep.call_args_list, expected_sleep_args_list ) def get_unique_consume_request_tokens(self): return set( call_args[0][1] for call_args in self.leaky_bucket.consume.call_args_list ) def test_read(self): with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) data = stream.read(len(self.content)) self.assertEqual(self.content, data) self.assert_consume_calls(amts=[len(self.content)]) self.assert_sleep_calls(amts=[]) def test_retries_on_request_exceeded(self): with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) retry_time = 1 amt_requested = len(self.content) self.leaky_bucket.consume.side_effect = [ RequestExceededException(amt_requested, retry_time), len(self.content) ] data = stream.read(len(self.content)) self.assertEqual(self.content, data) self.assert_consume_calls(amts=[amt_requested, amt_requested]) self.assert_sleep_calls(amts=[retry_time]) def test_with_transfer_coordinator_exception(self): self.coordinator.set_exception(ValueError()) with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) with self.assertRaises(ValueError): stream.read(len(self.content)) def test_read_when_bandwidth_limiting_disabled(self): with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) stream.disable_bandwidth_limiting() data = stream.read(len(self.content)) self.assertEqual(self.content, data) self.assertFalse(self.leaky_bucket.consume.called) def test_read_toggle_disable_enable_bandwidth_limiting(self): with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) stream.disable_bandwidth_limiting() data = stream.read(1) self.assertEqual(self.content[:1], data) self.assert_consume_calls(amts=[]) stream.enable_bandwidth_limiting() data = stream.read(len(self.content) - 1) self.assertEqual(self.content[1:], data) self.assert_consume_calls(amts=[len(self.content) - 1]) def test_seek(self): mock_fileobj = mock.Mock() stream = self.get_bandwidth_limited_stream(mock_fileobj) stream.seek(1) self.assertEqual( mock_fileobj.seek.call_args_list, [mock.call(1)] ) def test_tell(self): mock_fileobj = mock.Mock() stream = self.get_bandwidth_limited_stream(mock_fileobj) stream.tell() self.assertEqual( mock_fileobj.tell.call_args_list, [mock.call()] ) def test_close(self): mock_fileobj = mock.Mock() stream = self.get_bandwidth_limited_stream(mock_fileobj) stream.close() self.assertEqual( mock_fileobj.close.call_args_list, [mock.call()] ) def test_context_manager(self): mock_fileobj = mock.Mock() stream = self.get_bandwidth_limited_stream(mock_fileobj) with stream as stream_handle: self.assertIs(stream_handle, stream) self.assertEqual( mock_fileobj.close.call_args_list, [mock.call()] ) def test_reuses_request_token(self): with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) stream.read(1) stream.read(1) self.assertEqual(len(self.get_unique_consume_request_tokens()), 1) def test_request_tokens_unique_per_stream(self): with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) stream.read(1) with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) stream.read(1) self.assertEqual(len(self.get_unique_consume_request_tokens()), 2) def test_call_consume_after_reaching_threshold(self): self.bytes_threshold = 2 with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) self.assertEqual(stream.read(1), self.content[:1]) self.assert_consume_calls(amts=[]) self.assertEqual(stream.read(1), self.content[1:2]) self.assert_consume_calls(amts=[2]) def test_resets_after_reaching_threshold(self): self.bytes_threshold = 2 with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) self.assertEqual(stream.read(2), self.content[:2]) self.assert_consume_calls(amts=[2]) self.assertEqual(stream.read(1), self.content[2:3]) self.assert_consume_calls(amts=[2]) def test_pending_bytes_seen_on_close(self): self.bytes_threshold = 2 with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) self.assertEqual(stream.read(1), self.content[:1]) self.assert_consume_calls(amts=[]) stream.close() self.assert_consume_calls(amts=[1]) def test_no_bytes_remaining_on(self): self.bytes_threshold = 2 with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) self.assertEqual(stream.read(2), self.content[:2]) self.assert_consume_calls(amts=[2]) stream.close() # There should have been no more consume() calls made # as all bytes have been accounted for in the previous # consume() call. self.assert_consume_calls(amts=[2]) def test_disable_bandwidth_limiting_with_pending_bytes_seen_on_close(self): self.bytes_threshold = 2 with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) self.assertEqual(stream.read(1), self.content[:1]) self.assert_consume_calls(amts=[]) stream.disable_bandwidth_limiting() stream.close() self.assert_consume_calls(amts=[]) def test_signal_transferring(self): with open(self.filename, 'rb') as f: stream = self.get_bandwidth_limited_stream(f) stream.signal_not_transferring() data = stream.read(1) self.assertEqual(self.content[:1], data) self.assert_consume_calls(amts=[]) stream.signal_transferring() data = stream.read(len(self.content) - 1) self.assertEqual(self.content[1:], data) self.assert_consume_calls(amts=[len(self.content) - 1]) class TestLeakyBucket(unittest.TestCase): def setUp(self): self.max_rate = 1 self.time_now = 1.0 self.time_utils = mock.Mock(TimeUtils) self.time_utils.time.return_value = self.time_now self.scheduler = mock.Mock(ConsumptionScheduler) self.scheduler.is_scheduled.return_value = False self.rate_tracker = mock.Mock(BandwidthRateTracker) self.leaky_bucket = LeakyBucket( self.max_rate, self.time_utils, self.rate_tracker, self.scheduler ) def set_projected_rate(self, rate): self.rate_tracker.get_projected_rate.return_value = rate def set_retry_time(self, retry_time): self.scheduler.schedule_consumption.return_value = retry_time def assert_recorded_consumed_amt(self, expected_amt): self.assertEqual( self.rate_tracker.record_consumption_rate.call_args, mock.call(expected_amt, self.time_utils.time.return_value)) def assert_was_scheduled(self, amt, token): self.assertEqual( self.scheduler.schedule_consumption.call_args, mock.call(amt, token, amt/(self.max_rate)) ) def assert_nothing_scheduled(self): self.assertFalse(self.scheduler.schedule_consumption.called) def assert_processed_request_token(self, request_token): self.assertEqual( self.scheduler.process_scheduled_consumption.call_args, mock.call(request_token) ) def test_consume_under_max_rate(self): amt = 1 self.set_projected_rate(self.max_rate/2) self.assertEqual(self.leaky_bucket.consume(amt, RequestToken()), amt) self.assert_recorded_consumed_amt(amt) self.assert_nothing_scheduled() def test_consume_at_max_rate(self): amt = 1 self.set_projected_rate(self.max_rate) self.assertEqual(self.leaky_bucket.consume(amt, RequestToken()), amt) self.assert_recorded_consumed_amt(amt) self.assert_nothing_scheduled() def test_consume_over_max_rate(self): amt = 1 retry_time = 2.0 self.set_projected_rate(self.max_rate + 1) self.set_retry_time(retry_time) request_token = RequestToken() try: self.leaky_bucket.consume(amt, request_token) self.fail('A RequestExceededException should have been thrown') except RequestExceededException as e: self.assertEqual(e.requested_amt, amt) self.assertEqual(e.retry_time, retry_time) self.assert_was_scheduled(amt, request_token) def test_consume_with_scheduled_retry(self): amt = 1 self.set_projected_rate(self.max_rate + 1) self.scheduler.is_scheduled.return_value = True request_token = RequestToken() self.assertEqual(self.leaky_bucket.consume(amt, request_token), amt) # Nothing new should have been scheduled but the request token # should have been processed. self.assert_nothing_scheduled() self.assert_processed_request_token(request_token) class TestConsumptionScheduler(unittest.TestCase): def setUp(self): self.scheduler = ConsumptionScheduler() def test_schedule_consumption(self): token = RequestToken() consume_time = 5 actual_wait_time = self.scheduler.schedule_consumption( 1, token, consume_time) self.assertEqual(consume_time, actual_wait_time) def test_schedule_consumption_for_multiple_requests(self): token = RequestToken() consume_time = 5 actual_wait_time = self.scheduler.schedule_consumption( 1, token, consume_time) self.assertEqual(consume_time, actual_wait_time) other_consume_time = 3 other_token = RequestToken() next_wait_time = self.scheduler.schedule_consumption( 1, other_token, other_consume_time) # This wait time should be the previous time plus its desired # wait time self.assertEqual(next_wait_time, consume_time + other_consume_time) def test_is_scheduled(self): token = RequestToken() consume_time = 5 self.scheduler.schedule_consumption(1, token, consume_time) self.assertTrue(self.scheduler.is_scheduled(token)) def test_is_not_scheduled(self): self.assertFalse(self.scheduler.is_scheduled(RequestToken())) def test_process_scheduled_consumption(self): token = RequestToken() consume_time = 5 self.scheduler.schedule_consumption(1, token, consume_time) self.scheduler.process_scheduled_consumption(token) self.assertFalse(self.scheduler.is_scheduled(token)) different_time = 7 # The previous consume time should have no affect on the next wait tim # as it has been completed. self.assertEqual( self.scheduler.schedule_consumption(1, token, different_time), different_time ) class TestBandwidthRateTracker(unittest.TestCase): def setUp(self): self.alpha = 0.8 self.rate_tracker = BandwidthRateTracker(self.alpha) def test_current_rate_at_initilizations(self): self.assertEqual(self.rate_tracker.current_rate, 0.0) def test_current_rate_after_one_recorded_point(self): self.rate_tracker.record_consumption_rate(1, 1) # There is no last time point to do a diff against so return a # current rate of 0.0 self.assertEqual(self.rate_tracker.current_rate, 0.0) def test_current_rate(self): self.rate_tracker.record_consumption_rate(1, 1) self.rate_tracker.record_consumption_rate(1, 2) self.rate_tracker.record_consumption_rate(1, 3) self.assertEqual(self.rate_tracker.current_rate, 0.96) def test_get_projected_rate_at_initilizations(self): self.assertEqual(self.rate_tracker.get_projected_rate(1, 1), 0.0) def test_get_projected_rate(self): self.rate_tracker.record_consumption_rate(1, 1) self.rate_tracker.record_consumption_rate(1, 2) projected_rate = self.rate_tracker.get_projected_rate(1, 3) self.assertEqual(projected_rate, 0.96) self.rate_tracker.record_consumption_rate(1, 3) self.assertEqual(self.rate_tracker.current_rate, projected_rate) def test_get_projected_rate_for_same_timestamp(self): self.rate_tracker.record_consumption_rate(1, 1) self.assertEqual( self.rate_tracker.get_projected_rate(1, 1), float('inf') ) s3transfer-0.1.13/tests/unit/test_compat.py000066400000000000000000000046031324114246300207060ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import os import tempfile import shutil from botocore.compat import six from tests import unittest from s3transfer.compat import seekable, readable class ErrorRaisingSeekWrapper(object): """An object wrapper that throws an error when seeked on :param fileobj: The fileobj that it wraps :param execption: The exception to raise when seeked on. """ def __init__(self, fileobj, exception): self._fileobj = fileobj self._exception = exception def seek(self, offset, whence=0): raise self._exception def tell(self): return self._fileobj.tell() class TestSeekable(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'foo') def tearDown(self): shutil.rmtree(self.tempdir) def test_seekable_fileobj(self): with open(self.filename, 'w') as f: self.assertTrue(seekable(f)) def test_non_file_like_obj(self): # Fails becase there is no seekable(), seek(), nor tell() self.assertFalse(seekable(object())) def test_non_seekable_ioerror(self): # Should return False if IOError is thrown. with open(self.filename, 'w') as f: self.assertFalse(seekable(ErrorRaisingSeekWrapper(f, IOError()))) def test_non_seekable_oserror(self): # Should return False if OSError is thrown. with open(self.filename, 'w') as f: self.assertFalse(seekable(ErrorRaisingSeekWrapper(f, OSError()))) class TestReadable(unittest.TestCase): def test_readable_fileobj(self): with tempfile.TemporaryFile() as f: self.assertTrue(readable(f)) def test_readable_file_like_obj(self): self.assertTrue(readable(six.BytesIO())) def test_non_file_like_obj(self): self.assertFalse(readable(object())) s3transfer-0.1.13/tests/unit/test_copies.py000066400000000000000000000137431324114246300207120ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from tests import BaseTaskTest from tests import RecordingSubscriber from s3transfer.copies import CopyObjectTask from s3transfer.copies import CopyPartTask class BaseCopyTaskTest(BaseTaskTest): def setUp(self): super(BaseCopyTaskTest, self).setUp() self.bucket = 'mybucket' self.key = 'mykey' self.copy_source = { 'Bucket': 'mysourcebucket', 'Key': 'mysourcekey' } self.extra_args = {} self.callbacks = [] self.size = 5 class TestCopyObjectTask(BaseCopyTaskTest): def get_copy_task(self, **kwargs): default_kwargs = { 'client': self.client, 'copy_source': self.copy_source, 'bucket': self.bucket, 'key': self.key, 'extra_args': self.extra_args, 'callbacks': self.callbacks, 'size': self.size } default_kwargs.update(kwargs) return self.get_task(CopyObjectTask, main_kwargs=default_kwargs) def test_main(self): self.stubber.add_response( 'copy_object', service_response={}, expected_params={ 'Bucket': self.bucket, 'Key': self.key, 'CopySource': self.copy_source } ) task = self.get_copy_task() task() self.stubber.assert_no_pending_responses() def test_extra_args(self): self.extra_args['ACL'] = 'private' self.stubber.add_response( 'copy_object', service_response={}, expected_params={ 'Bucket': self.bucket, 'Key': self.key, 'CopySource': self.copy_source, 'ACL': 'private' } ) task = self.get_copy_task() task() self.stubber.assert_no_pending_responses() def test_callbacks_invoked(self): subscriber = RecordingSubscriber() self.callbacks.append(subscriber.on_progress) self.stubber.add_response( 'copy_object', service_response={}, expected_params={ 'Bucket': self.bucket, 'Key': self.key, 'CopySource': self.copy_source } ) task = self.get_copy_task() task() self.stubber.assert_no_pending_responses() self.assertEqual(subscriber.calculate_bytes_seen(), self.size) class TestCopyPartTask(BaseCopyTaskTest): def setUp(self): super(TestCopyPartTask, self).setUp() self.copy_source_range = 'bytes=5-9' self.extra_args['CopySourceRange'] = self.copy_source_range self.upload_id = 'myuploadid' self.part_number = 1 self.result_etag = 'my-etag' def get_copy_task(self, **kwargs): default_kwargs = { 'client': self.client, 'copy_source': self.copy_source, 'bucket': self.bucket, 'key': self.key, 'upload_id': self.upload_id, 'part_number': self.part_number, 'extra_args': self.extra_args, 'callbacks': self.callbacks, 'size': self.size } default_kwargs.update(kwargs) return self.get_task(CopyPartTask, main_kwargs=default_kwargs) def test_main(self): self.stubber.add_response( 'upload_part_copy', service_response={ 'CopyPartResult': { 'ETag': self.result_etag } }, expected_params={ 'Bucket': self.bucket, 'Key': self.key, 'CopySource': self.copy_source, 'UploadId': self.upload_id, 'PartNumber': self.part_number, 'CopySourceRange': self.copy_source_range } ) task = self.get_copy_task() self.assertEqual( task(), {'PartNumber': self.part_number, 'ETag': self.result_etag}) self.stubber.assert_no_pending_responses() def test_extra_args(self): self.extra_args['RequestPayer'] = 'requester' self.stubber.add_response( 'upload_part_copy', service_response={ 'CopyPartResult': { 'ETag': self.result_etag } }, expected_params={ 'Bucket': self.bucket, 'Key': self.key, 'CopySource': self.copy_source, 'UploadId': self.upload_id, 'PartNumber': self.part_number, 'CopySourceRange': self.copy_source_range, 'RequestPayer': 'requester' } ) task = self.get_copy_task() self.assertEqual( task(), {'PartNumber': self.part_number, 'ETag': self.result_etag}) self.stubber.assert_no_pending_responses() def test_callbacks_invoked(self): subscriber = RecordingSubscriber() self.callbacks.append(subscriber.on_progress) self.stubber.add_response( 'upload_part_copy', service_response={ 'CopyPartResult': { 'ETag': self.result_etag } }, expected_params={ 'Bucket': self.bucket, 'Key': self.key, 'CopySource': self.copy_source, 'UploadId': self.upload_id, 'PartNumber': self.part_number, 'CopySourceRange': self.copy_source_range } ) task = self.get_copy_task() self.assertEqual( task(), {'PartNumber': self.part_number, 'ETag': self.result_etag}) self.stubber.assert_no_pending_responses() self.assertEqual(subscriber.calculate_bytes_seen(), self.size) s3transfer-0.1.13/tests/unit/test_delete.py000066400000000000000000000041201324114246300206570ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from tests import BaseTaskTest from s3transfer.delete import DeleteObjectTask class TestDeleteObjectTask(BaseTaskTest): def setUp(self): super(TestDeleteObjectTask, self).setUp() self.bucket = 'mybucket' self.key = 'mykey' self.extra_args = {} self.callbacks = [] def get_delete_task(self, **kwargs): default_kwargs = { 'client': self.client, 'bucket': self.bucket, 'key': self.key, 'extra_args': self.extra_args, } default_kwargs.update(kwargs) return self.get_task(DeleteObjectTask, main_kwargs=default_kwargs) def test_main(self): self.stubber.add_response( 'delete_object', service_response={}, expected_params={ 'Bucket': self.bucket, 'Key': self.key, } ) task = self.get_delete_task() task() self.stubber.assert_no_pending_responses() def test_extra_args(self): self.extra_args['MFA'] = 'mfa-code' self.extra_args['VersionId'] = '12345' self.stubber.add_response( 'delete_object', service_response={}, expected_params={ 'Bucket': self.bucket, 'Key': self.key, # These extra_args should be injected into the # expected params for the delete_object call. 'MFA': 'mfa-code', 'VersionId': '12345', } ) task = self.get_delete_task() task() self.stubber.assert_no_pending_responses() s3transfer-0.1.13/tests/unit/test_download.py000066400000000000000000001052511324114246300212330ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import copy import os import shutil import tempfile import mock from tests import BaseTaskTest from tests import BaseSubmissionTaskTest from tests import StreamWithError from tests import FileCreator from tests import unittest from tests import RecordingExecutor from tests import NonSeekableWriter from s3transfer.compat import six from s3transfer.compat import SOCKET_ERROR from s3transfer.exceptions import RetriesExceededError from s3transfer.bandwidth import BandwidthLimiter from s3transfer.download import DownloadFilenameOutputManager from s3transfer.download import DownloadSpecialFilenameOutputManager from s3transfer.download import DownloadSeekableOutputManager from s3transfer.download import DownloadNonSeekableOutputManager from s3transfer.download import DownloadSubmissionTask from s3transfer.download import GetObjectTask from s3transfer.download import ImmediatelyWriteIOGetObjectTask from s3transfer.download import IOWriteTask from s3transfer.download import IOStreamingWriteTask from s3transfer.download import IORenameFileTask from s3transfer.download import IOCloseTask from s3transfer.download import CompleteDownloadNOOPTask from s3transfer.download import DownloadChunkIterator from s3transfer.download import DeferQueue from s3transfer.futures import IN_MEMORY_DOWNLOAD_TAG from s3transfer.futures import BoundedExecutor from s3transfer.utils import OSUtils from s3transfer.utils import CallArgs class DownloadException(Exception): pass class WriteCollector(object): """A utility to collect information about writes and seeks""" def __init__(self): self._pos = 0 self.writes = [] def seek(self, pos): self._pos = pos def write(self, data): self.writes.append((self._pos, data)) self._pos += len(data) class AlwaysIndicatesSpecialFileOSUtils(OSUtils): """OSUtil that always returns True for is_special_file""" def is_special_file(self, filename): return True class CancelledStreamWrapper(object): """A wrapper to trigger a cancellation while stream reading Forces the transfer coordinator to cancel after a certain amount of reads :param stream: The underlying stream to read from :param transfer_coordinator: The coordinator for the transfer :param num_reads: On which read to sigal a cancellation. 0 is the first read. """ def __init__(self, stream, transfer_coordinator, num_reads=0): self._stream = stream self._transfer_coordinator = transfer_coordinator self._num_reads = num_reads self._count = 0 def read(self, *args, **kwargs): if self._num_reads == self._count: self._transfer_coordinator.cancel() self._stream.read(*args, **kwargs) self._count += 1 class BaseDownloadOutputManagerTest(BaseTaskTest): def setUp(self): super(BaseDownloadOutputManagerTest, self).setUp() self.osutil = OSUtils() # Create a file to write to self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'myfile') self.call_args = CallArgs(fileobj=self.filename) self.future = self.get_transfer_future(self.call_args) self.io_executor = BoundedExecutor(1000, 1) def tearDown(self): super(BaseDownloadOutputManagerTest, self).tearDown() shutil.rmtree(self.tempdir) class TestDownloadFilenameOutputManager(BaseDownloadOutputManagerTest): def setUp(self): super(TestDownloadFilenameOutputManager, self).setUp() self.download_output_manager = DownloadFilenameOutputManager( self.osutil, self.transfer_coordinator, io_executor=self.io_executor) def test_is_compatible(self): self.assertTrue( self.download_output_manager.is_compatible( self.filename, self.osutil) ) def test_get_download_task_tag(self): self.assertIsNone(self.download_output_manager.get_download_task_tag()) def test_get_fileobj_for_io_writes(self): with self.download_output_manager.get_fileobj_for_io_writes( self.future) as f: # Ensure it is a file like object returned self.assertTrue(hasattr(f, 'read')) self.assertTrue(hasattr(f, 'seek')) # Make sure the name of the file returned is not the same as the # final filename as we should be writing to a temporary file. self.assertNotEqual(f.name, self.filename) def test_get_final_io_task(self): ref_contents = b'my_contents' with self.download_output_manager.get_fileobj_for_io_writes( self.future) as f: temp_filename = f.name # Write some data to test that the data gets moved over to the # final location. f.write(ref_contents) final_task = self.download_output_manager.get_final_io_task() # Make sure it is the appropriate task. self.assertIsInstance(final_task, IORenameFileTask) final_task() # Make sure the temp_file gets removed self.assertFalse(os.path.exists(temp_filename)) # Make sure what ever was written to the temp file got moved to # the final filename with open(self.filename, 'rb') as f: self.assertEqual(f.read(), ref_contents) def test_can_queue_file_io_task(self): fileobj = WriteCollector() self.download_output_manager.queue_file_io_task( fileobj=fileobj, data='foo', offset=0) self.download_output_manager.queue_file_io_task( fileobj=fileobj, data='bar', offset=3) self.io_executor.shutdown() self.assertEqual(fileobj.writes, [(0, 'foo'), (3, 'bar')]) def test_get_file_io_write_task(self): fileobj = WriteCollector() io_write_task = self.download_output_manager.get_io_write_task( fileobj=fileobj, data='foo', offset=3) self.assertIsInstance(io_write_task, IOWriteTask) io_write_task() self.assertEqual(fileobj.writes, [(3, 'foo')]) class TestDownloadSpecialFilenameOutputManager(BaseDownloadOutputManagerTest): def setUp(self): super(TestDownloadSpecialFilenameOutputManager, self).setUp() self.osutil = AlwaysIndicatesSpecialFileOSUtils() self.download_output_manager = DownloadSpecialFilenameOutputManager( self.osutil, self.transfer_coordinator, io_executor=self.io_executor) def test_is_compatible_for_special_file(self): self.assertTrue( self.download_output_manager.is_compatible( self.filename, AlwaysIndicatesSpecialFileOSUtils())) def test_is_not_compatible_for_non_special_file(self): self.assertFalse( self.download_output_manager.is_compatible( self.filename, OSUtils())) def test_get_fileobj_for_io_writes(self): with self.download_output_manager.get_fileobj_for_io_writes( self.future) as f: # Ensure it is a file like object returned self.assertTrue(hasattr(f, 'read')) # Make sure the name of the file returned is the same as the # final filename as we should not be writing to a temporary file. self.assertEqual(f.name, self.filename) def test_get_final_io_task(self): self.assertIsInstance( self.download_output_manager.get_final_io_task(), IOCloseTask) def test_can_queue_file_io_task(self): fileobj = WriteCollector() self.download_output_manager.queue_file_io_task( fileobj=fileobj, data='foo', offset=0) self.download_output_manager.queue_file_io_task( fileobj=fileobj, data='bar', offset=3) self.io_executor.shutdown() self.assertEqual(fileobj.writes, [(0, 'foo'), (3, 'bar')]) class TestDownloadSeekableOutputManager(BaseDownloadOutputManagerTest): def setUp(self): super(TestDownloadSeekableOutputManager, self).setUp() self.download_output_manager = DownloadSeekableOutputManager( self.osutil, self.transfer_coordinator, io_executor=self.io_executor) # Create a fileobj to write to self.fileobj = open(self.filename, 'wb') self.call_args = CallArgs(fileobj=self.fileobj) self.future = self.get_transfer_future(self.call_args) def tearDown(self): self.fileobj.close() super(TestDownloadSeekableOutputManager, self).tearDown() def test_is_compatible(self): self.assertTrue( self.download_output_manager.is_compatible( self.fileobj, self.osutil) ) def test_is_compatible_bytes_io(self): self.assertTrue( self.download_output_manager.is_compatible( six.BytesIO(), self.osutil) ) def test_not_compatible_for_non_filelike_obj(self): self.assertFalse(self.download_output_manager.is_compatible( object(), self.osutil) ) def test_get_download_task_tag(self): self.assertIsNone(self.download_output_manager.get_download_task_tag()) def test_get_fileobj_for_io_writes(self): self.assertIs( self.download_output_manager.get_fileobj_for_io_writes( self.future), self.fileobj ) def test_get_final_io_task(self): self.assertIsInstance( self.download_output_manager.get_final_io_task(), CompleteDownloadNOOPTask ) def test_can_queue_file_io_task(self): fileobj = WriteCollector() self.download_output_manager.queue_file_io_task( fileobj=fileobj, data='foo', offset=0) self.download_output_manager.queue_file_io_task( fileobj=fileobj, data='bar', offset=3) self.io_executor.shutdown() self.assertEqual(fileobj.writes, [(0, 'foo'), (3, 'bar')]) def test_get_file_io_write_task(self): fileobj = WriteCollector() io_write_task = self.download_output_manager.get_io_write_task( fileobj=fileobj, data='foo', offset=3) self.assertIsInstance(io_write_task, IOWriteTask) io_write_task() self.assertEqual(fileobj.writes, [(3, 'foo')]) class TestDownloadNonSeekableOutputManager(BaseDownloadOutputManagerTest): def setUp(self): super(TestDownloadNonSeekableOutputManager, self).setUp() self.download_output_manager = DownloadNonSeekableOutputManager( self.osutil, self.transfer_coordinator, io_executor=None) def test_is_compatible_with_seekable_stream(self): with open(self.filename, 'wb') as f: self.assertTrue(self.download_output_manager.is_compatible( f, self.osutil) ) def test_not_compatible_with_filename(self): self.assertFalse(self.download_output_manager.is_compatible( self.filename, self.osutil)) def test_compatible_with_non_seekable_stream(self): class NonSeekable(object): def write(self, data): pass f = NonSeekable() self.assertTrue(self.download_output_manager.is_compatible( f, self.osutil) ) def test_is_compatible_with_bytesio(self): self.assertTrue( self.download_output_manager.is_compatible( six.BytesIO(), self.osutil) ) def test_get_download_task_tag(self): self.assertIs( self.download_output_manager.get_download_task_tag(), IN_MEMORY_DOWNLOAD_TAG) def test_submit_writes_from_internal_queue(self): class FakeQueue(object): def request_writes(self, offset, data): return [ {'offset': 0, 'data': 'foo'}, {'offset': 3, 'data': 'bar'}, ] q = FakeQueue() io_executor = BoundedExecutor(1000, 1) manager = DownloadNonSeekableOutputManager( self.osutil, self.transfer_coordinator, io_executor=io_executor, defer_queue=q) fileobj = WriteCollector() manager.queue_file_io_task( fileobj=fileobj, data='foo', offset=1) io_executor.shutdown() self.assertEqual(fileobj.writes, [(0, 'foo'), (3, 'bar')]) def test_get_file_io_write_task(self): fileobj = WriteCollector() io_write_task = self.download_output_manager.get_io_write_task( fileobj=fileobj, data='foo', offset=1) self.assertIsInstance(io_write_task, IOStreamingWriteTask) io_write_task() self.assertEqual(fileobj.writes, [(0, 'foo')]) class TestDownloadSubmissionTask(BaseSubmissionTaskTest): def setUp(self): super(TestDownloadSubmissionTask, self).setUp() self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'myfile') self.bucket = 'mybucket' self.key = 'mykey' self.extra_args = {} self.subscribers = [] # Create a stream to read from self.content = b'my content' self.stream = six.BytesIO(self.content) # A list to keep track of all of the bodies sent over the wire # and their order. self.call_args = self.get_call_args() self.transfer_future = self.get_transfer_future(self.call_args) self.io_executor = BoundedExecutor(1000, 1) self.submission_main_kwargs = { 'client': self.client, 'config': self.config, 'osutil': self.osutil, 'request_executor': self.executor, 'io_executor': self.io_executor, 'transfer_future': self.transfer_future } self.submission_task = self.get_download_submission_task() def tearDown(self): super(TestDownloadSubmissionTask, self).tearDown() shutil.rmtree(self.tempdir) def get_call_args(self, **kwargs): default_call_args = { 'fileobj': self.filename, 'bucket': self.bucket, 'key': self.key, 'extra_args': self.extra_args, 'subscribers': self.subscribers } default_call_args.update(kwargs) return CallArgs(**default_call_args) def wrap_executor_in_recorder(self): self.executor = RecordingExecutor(self.executor) self.submission_main_kwargs['request_executor'] = self.executor def use_fileobj_in_call_args(self, fileobj): self.call_args = self.get_call_args(fileobj=fileobj) self.transfer_future = self.get_transfer_future(self.call_args) self.submission_main_kwargs['transfer_future'] = self.transfer_future def assert_tag_for_get_object(self, tag_value): submissions_to_compare = self.executor.submissions if len(submissions_to_compare) > 1: # If it was ranged get, make sure we do not include the join task. submissions_to_compare = submissions_to_compare[:-1] for submission in submissions_to_compare: self.assertEqual( submission['tag'], tag_value) def add_head_object_response(self): self.stubber.add_response( 'head_object', {'ContentLength': len(self.content)}) def add_get_responses(self): chunksize = self.config.multipart_chunksize for i in range(0, len(self.content), chunksize): if i + chunksize > len(self.content): stream = six.BytesIO(self.content[i:]) self.stubber.add_response('get_object', {'Body': stream}) else: stream = six.BytesIO(self.content[i:i+chunksize]) self.stubber.add_response('get_object', {'Body': stream}) def configure_for_ranged_get(self): self.config.multipart_threshold = 1 self.config.multipart_chunksize = 4 def get_download_submission_task(self): return self.get_task( DownloadSubmissionTask, main_kwargs=self.submission_main_kwargs) def wait_and_assert_completed_successfully(self, submission_task): submission_task() self.transfer_future.result() self.stubber.assert_no_pending_responses() def test_submits_no_tag_for_get_object_filename(self): self.wrap_executor_in_recorder() self.add_head_object_response() self.add_get_responses() self.submission_task = self.get_download_submission_task() self.wait_and_assert_completed_successfully(self.submission_task) # Make sure no tag to limit that task specifically was not associated # to that task submission. self.assert_tag_for_get_object(None) def test_submits_no_tag_for_ranged_get_filename(self): self.wrap_executor_in_recorder() self.configure_for_ranged_get() self.add_head_object_response() self.add_get_responses() self.submission_task = self.get_download_submission_task() self.wait_and_assert_completed_successfully(self.submission_task) # Make sure no tag to limit that task specifically was not associated # to that task submission. self.assert_tag_for_get_object(None) def test_submits_no_tag_for_get_object_fileobj(self): self.wrap_executor_in_recorder() self.add_head_object_response() self.add_get_responses() with open(self.filename, 'wb') as f: self.use_fileobj_in_call_args(f) self.submission_task = self.get_download_submission_task() self.wait_and_assert_completed_successfully(self.submission_task) # Make sure no tag to limit that task specifically was not associated # to that task submission. self.assert_tag_for_get_object(None) def test_submits_no_tag_for_ranged_get_object_fileobj(self): self.wrap_executor_in_recorder() self.configure_for_ranged_get() self.add_head_object_response() self.add_get_responses() with open(self.filename, 'wb') as f: self.use_fileobj_in_call_args(f) self.submission_task = self.get_download_submission_task() self.wait_and_assert_completed_successfully(self.submission_task) # Make sure no tag to limit that task specifically was not associated # to that task submission. self.assert_tag_for_get_object(None) def tests_submits_tag_for_get_object_nonseekable_fileobj(self): self.wrap_executor_in_recorder() self.add_head_object_response() self.add_get_responses() with open(self.filename, 'wb') as f: self.use_fileobj_in_call_args(NonSeekableWriter(f)) self.submission_task = self.get_download_submission_task() self.wait_and_assert_completed_successfully(self.submission_task) # Make sure no tag to limit that task specifically was not associated # to that task submission. self.assert_tag_for_get_object(IN_MEMORY_DOWNLOAD_TAG) def tests_submits_tag_for_ranged_get_object_nonseekable_fileobj(self): self.wrap_executor_in_recorder() self.configure_for_ranged_get() self.add_head_object_response() self.add_get_responses() with open(self.filename, 'wb') as f: self.use_fileobj_in_call_args(NonSeekableWriter(f)) self.submission_task = self.get_download_submission_task() self.wait_and_assert_completed_successfully(self.submission_task) # Make sure no tag to limit that task specifically was not associated # to that task submission. self.assert_tag_for_get_object(IN_MEMORY_DOWNLOAD_TAG) class TestGetObjectTask(BaseTaskTest): def setUp(self): super(TestGetObjectTask, self).setUp() self.bucket = 'mybucket' self.key = 'mykey' self.extra_args = {} self.callbacks = [] self.max_attempts = 5 self.io_executor = BoundedExecutor(1000, 1) self.content = b'my content' self.stream = six.BytesIO(self.content) self.fileobj = WriteCollector() self.osutil = OSUtils() self.io_chunksize = 64 * (1024 ** 2) self.task_cls = GetObjectTask self.download_output_manager = DownloadSeekableOutputManager( self.osutil, self.transfer_coordinator, self.io_executor) def get_download_task(self, **kwargs): default_kwargs = { 'client': self.client, 'bucket': self.bucket, 'key': self.key, 'fileobj': self.fileobj, 'extra_args': self.extra_args, 'callbacks': self.callbacks, 'max_attempts': self.max_attempts, 'download_output_manager': self.download_output_manager, 'io_chunksize': self.io_chunksize, } default_kwargs.update(kwargs) self.transfer_coordinator.set_status_to_queued() return self.get_task(self.task_cls, main_kwargs=default_kwargs) def assert_io_writes(self, expected_writes): # Let the io executor process all of the writes before checking # what writes were sent to it. self.io_executor.shutdown() self.assertEqual(self.fileobj.writes, expected_writes) def test_main(self): self.stubber.add_response( 'get_object', service_response={'Body': self.stream}, expected_params={'Bucket': self.bucket, 'Key': self.key} ) task = self.get_download_task() task() self.stubber.assert_no_pending_responses() self.assert_io_writes([(0, self.content)]) def test_extra_args(self): self.stubber.add_response( 'get_object', service_response={'Body': self.stream}, expected_params={ 'Bucket': self.bucket, 'Key': self.key, 'Range': 'bytes=0-' } ) self.extra_args['Range'] = 'bytes=0-' task = self.get_download_task() task() self.stubber.assert_no_pending_responses() self.assert_io_writes([(0, self.content)]) def test_control_chunk_size(self): self.stubber.add_response( 'get_object', service_response={'Body': self.stream}, expected_params={'Bucket': self.bucket, 'Key': self.key} ) task = self.get_download_task(io_chunksize=1) task() self.stubber.assert_no_pending_responses() expected_contents = [] for i in range(len(self.content)): expected_contents.append((i, bytes(self.content[i:i+1]))) self.assert_io_writes(expected_contents) def test_start_index(self): self.stubber.add_response( 'get_object', service_response={'Body': self.stream}, expected_params={'Bucket': self.bucket, 'Key': self.key} ) task = self.get_download_task(start_index=5) task() self.stubber.assert_no_pending_responses() self.assert_io_writes([(5, self.content)]) def test_uses_bandwidth_limiter(self): bandwidth_limiter = mock.Mock(BandwidthLimiter) self.stubber.add_response( 'get_object', service_response={'Body': self.stream}, expected_params={'Bucket': self.bucket, 'Key': self.key} ) task = self.get_download_task(bandwidth_limiter=bandwidth_limiter) task() self.stubber.assert_no_pending_responses() self.assertEqual( bandwidth_limiter.get_bandwith_limited_stream.call_args_list, [mock.call(mock.ANY, self.transfer_coordinator)] ) def test_retries_succeeds(self): self.stubber.add_response( 'get_object', service_response={ 'Body': StreamWithError(self.stream, SOCKET_ERROR) }, expected_params={'Bucket': self.bucket, 'Key': self.key} ) self.stubber.add_response( 'get_object', service_response={'Body': self.stream}, expected_params={'Bucket': self.bucket, 'Key': self.key} ) task = self.get_download_task() task() # Retryable error should have not affected the bytes placed into # the io queue. self.stubber.assert_no_pending_responses() self.assert_io_writes([(0, self.content)]) def test_retries_failure(self): for _ in range(self.max_attempts): self.stubber.add_response( 'get_object', service_response={ 'Body': StreamWithError(self.stream, SOCKET_ERROR) }, expected_params={'Bucket': self.bucket, 'Key': self.key} ) task = self.get_download_task() task() self.transfer_coordinator.announce_done() # Should have failed out on a RetriesExceededError with self.assertRaises(RetriesExceededError): self.transfer_coordinator.result() self.stubber.assert_no_pending_responses() def test_retries_in_middle_of_streaming(self): # After the first read a retryable error will be thrown self.stubber.add_response( 'get_object', service_response={ 'Body': StreamWithError( copy.deepcopy(self.stream), SOCKET_ERROR, 1) }, expected_params={'Bucket': self.bucket, 'Key': self.key} ) self.stubber.add_response( 'get_object', service_response={'Body': self.stream}, expected_params={'Bucket': self.bucket, 'Key': self.key} ) task = self.get_download_task(io_chunksize=1) task() self.stubber.assert_no_pending_responses() expected_contents = [] # This is the content intially read in before the retry hit on the # second read() expected_contents.append((0, bytes(self.content[0:1]))) # The rest of the content should be the entire set of data partitioned # out based on the one byte stream chunk size. Note the second # element in the list should be a copy of the first element since # a retryable exception happened in between. for i in range(len(self.content)): expected_contents.append((i, bytes(self.content[i:i+1]))) self.assert_io_writes(expected_contents) def test_cancels_out_of_queueing(self): self.stubber.add_response( 'get_object', service_response={ 'Body': CancelledStreamWrapper( self.stream, self.transfer_coordinator) }, expected_params={'Bucket': self.bucket, 'Key': self.key} ) task = self.get_download_task() task() self.stubber.assert_no_pending_responses() # Make sure that no contents were added to the queue because the task # should have been canceled before trying to add the contents to the # io queue. self.assert_io_writes([]) class TestImmediatelyWriteIOGetObjectTask(TestGetObjectTask): def setUp(self): super(TestImmediatelyWriteIOGetObjectTask, self).setUp() self.task_cls = ImmediatelyWriteIOGetObjectTask # When data is written out, it should not use the io executor at all # if it does use the io executor that is a deviation from expected # behavior as the data should be written immediately to the file # object once downloaded. self.io_executor = None self.download_output_manager = DownloadSeekableOutputManager( self.osutil, self.transfer_coordinator, self.io_executor) def assert_io_writes(self, expected_writes): self.assertEqual(self.fileobj.writes, expected_writes) class BaseIOTaskTest(BaseTaskTest): def setUp(self): super(BaseIOTaskTest, self).setUp() self.files = FileCreator() self.osutil = OSUtils() self.temp_filename = os.path.join(self.files.rootdir, 'mytempfile') self.final_filename = os.path.join(self.files.rootdir, 'myfile') def tearDown(self): super(BaseIOTaskTest, self).tearDown() self.files.remove_all() class TestIOStreamingWriteTask(BaseIOTaskTest): def test_main(self): with open(self.temp_filename, 'wb') as f: task = self.get_task( IOStreamingWriteTask, main_kwargs={ 'fileobj': f, 'data': b'foobar' } ) task() task2 = self.get_task( IOStreamingWriteTask, main_kwargs={ 'fileobj': f, 'data': b'baz' } ) task2() with open(self.temp_filename, 'rb') as f: # We should just have written to the file in the order # the tasks were executed. self.assertEqual(f.read(), b'foobarbaz') class TestIOWriteTask(BaseIOTaskTest): def test_main(self): with open(self.temp_filename, 'wb') as f: # Write once to the file task = self.get_task( IOWriteTask, main_kwargs={ 'fileobj': f, 'data': b'foo', 'offset': 0 } ) task() # Write again to the file task = self.get_task( IOWriteTask, main_kwargs={ 'fileobj': f, 'data': b'bar', 'offset': 3 } ) task() with open(self.temp_filename, 'rb') as f: self.assertEqual(f.read(), b'foobar') class TestIORenameFileTask(BaseIOTaskTest): def test_main(self): with open(self.temp_filename, 'wb') as f: task = self.get_task( IORenameFileTask, main_kwargs={ 'fileobj': f, 'final_filename': self.final_filename, 'osutil': self.osutil } ) task() self.assertTrue(os.path.exists(self.final_filename)) self.assertFalse(os.path.exists(self.temp_filename)) class TestIOCloseTask(BaseIOTaskTest): def test_main(self): with open(self.temp_filename, 'w') as f: task = self.get_task(IOCloseTask, main_kwargs={'fileobj': f}) task() self.assertTrue(f.closed) class TestDownloadChunkIterator(unittest.TestCase): def test_iter(self): content = b'my content' body = six.BytesIO(content) ref_chunks = [] for chunk in DownloadChunkIterator(body, len(content)): ref_chunks.append(chunk) self.assertEqual(ref_chunks, [b'my content']) def test_iter_chunksize(self): content = b'1234' body = six.BytesIO(content) ref_chunks = [] for chunk in DownloadChunkIterator(body, 3): ref_chunks.append(chunk) self.assertEqual(ref_chunks, [b'123', b'4']) def test_empty_content(self): body = six.BytesIO(b'') ref_chunks = [] for chunk in DownloadChunkIterator(body, 3): ref_chunks.append(chunk) self.assertEqual(ref_chunks, [b'']) class TestDeferQueue(unittest.TestCase): def setUp(self): self.q = DeferQueue() def test_no_writes_when_not_lowest_block(self): writes = self.q.request_writes(offset=1, data='bar') self.assertEqual(writes, []) def test_writes_returned_in_order(self): self.assertEqual(self.q.request_writes(offset=3, data='d'), []) self.assertEqual(self.q.request_writes(offset=2, data='c'), []) self.assertEqual(self.q.request_writes(offset=1, data='b'), []) # Everything at this point has been deferred, but as soon as we # send offset=0, that will unlock offsets 0-3. writes = self.q.request_writes(offset=0, data='a') self.assertEqual( writes, [ {'offset': 0, 'data': 'a'}, {'offset': 1, 'data': 'b'}, {'offset': 2, 'data': 'c'}, {'offset': 3, 'data': 'd'} ] ) def test_unlocks_partial_range(self): self.assertEqual(self.q.request_writes(offset=5, data='f'), []) self.assertEqual(self.q.request_writes(offset=1, data='b'), []) # offset=0 unlocks 0-1, but offset=5 still needs to see 2-4 first. writes = self.q.request_writes(offset=0, data='a') self.assertEqual( writes, [ {'offset': 0, 'data': 'a'}, {'offset': 1, 'data': 'b'}, ] ) def test_data_can_be_any_size(self): self.q.request_writes(offset=5, data='hello world') writes = self.q.request_writes(offset=0, data='abcde') self.assertEqual( writes, [ {'offset': 0, 'data': 'abcde'}, {'offset': 5, 'data': 'hello world'}, ] ) def test_data_queued_in_order(self): # This immediately gets returned because offset=0 is the # next range we're waiting on. writes = self.q.request_writes(offset=0, data='hello world') self.assertEqual(writes, [{'offset': 0, 'data': 'hello world'}]) # Same thing here but with offset writes = self.q.request_writes(offset=11, data='hello again') self.assertEqual(writes, [{'offset': 11, 'data': 'hello again'}]) def test_writes_below_min_offset_are_ignored(self): self.q.request_writes(offset=0, data='a') self.q.request_writes(offset=1, data='b') self.q.request_writes(offset=2, data='c') # At this point we're expecting offset=3, so if a write # comes in below 3, we ignore it. self.assertEqual(self.q.request_writes(offset=0, data='a'), []) self.assertEqual(self.q.request_writes(offset=1, data='b'), []) self.assertEqual( self.q.request_writes(offset=3, data='d'), [{'offset': 3, 'data': 'd'}] ) def test_duplicate_writes_are_ignored(self): self.q.request_writes(offset=2, data='c') self.q.request_writes(offset=1, data='b') # We're still waiting for offset=0, but if # a duplicate write comes in for offset=2/offset=1 # it's ignored. This gives "first one wins" behavior. self.assertEqual(self.q.request_writes(offset=2, data='X'), []) self.assertEqual(self.q.request_writes(offset=1, data='Y'), []) self.assertEqual( self.q.request_writes(offset=0, data='a'), [ {'offset': 0, 'data': 'a'}, # Note we're seeing 'b' 'c', and not 'X', 'Y'. {'offset': 1, 'data': 'b'}, {'offset': 2, 'data': 'c'}, ] ) s3transfer-0.1.13/tests/unit/test_futures.py000066400000000000000000000630651324114246300211270ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import sys import time import traceback import mock from concurrent.futures import ThreadPoolExecutor from tests import unittest from tests import RecordingExecutor from tests import TransferCoordinatorWithInterrupt from s3transfer.exceptions import CancelledError from s3transfer.exceptions import FatalError from s3transfer.exceptions import TransferNotDoneError from s3transfer.futures import TransferFuture from s3transfer.futures import TransferMeta from s3transfer.futures import TransferCoordinator from s3transfer.futures import BoundedExecutor from s3transfer.futures import ExecutorFuture from s3transfer.futures import BaseExecutor from s3transfer.futures import NonThreadedExecutor from s3transfer.futures import NonThreadedExecutorFuture from s3transfer.tasks import Task from s3transfer.utils import FunctionContainer from s3transfer.utils import TaskSemaphore from s3transfer.utils import NoResourcesAvailable def return_call_args(*args, **kwargs): return args, kwargs def raise_exception(exception): raise exception def get_exc_info(exception): try: raise_exception(exception) except: return sys.exc_info() class RecordingTransferCoordinator(TransferCoordinator): def __init__(self): self.all_transfer_futures_ever_associated = set() super(RecordingTransferCoordinator, self).__init__() def add_associated_future(self, future): self.all_transfer_futures_ever_associated.add(future) super(RecordingTransferCoordinator, self).add_associated_future(future) class ReturnFooTask(Task): def _main(self, **kwargs): return 'foo' class SleepTask(Task): def _main(self, sleep_time, **kwargs): time.sleep(sleep_time) class TestTransferFuture(unittest.TestCase): def setUp(self): self.meta = TransferMeta() self.coordinator = TransferCoordinator() self.future = self._get_transfer_future() def _get_transfer_future(self, **kwargs): components = { 'meta': self.meta, 'coordinator': self.coordinator, } for component_name, component in kwargs.items(): components[component_name] = component return TransferFuture(**components) def test_meta(self): self.assertIs(self.future.meta, self.meta) def test_done(self): self.assertFalse(self.future.done()) self.coordinator.set_result(None) self.assertTrue(self.future.done()) def test_result(self): result = 'foo' self.coordinator.set_result(result) self.coordinator.announce_done() self.assertEqual(self.future.result(), result) def test_keyboard_interrupt_on_result_does_not_block(self): # This should raise a KeyboardInterrupt when result is called on it. self.coordinator = TransferCoordinatorWithInterrupt() self.future = self._get_transfer_future() # result() should not block and immediately raise the keyboard # interrupt exception. with self.assertRaises(KeyboardInterrupt): self.future.result() def test_cancel(self): self.future.cancel() self.assertTrue(self.future.done()) self.assertEqual(self.coordinator.status, 'cancelled') def test_set_exception(self): # Set the result such that there is no exception self.coordinator.set_result('result') self.coordinator.announce_done() self.assertEqual(self.future.result(), 'result') self.future.set_exception(ValueError()) with self.assertRaises(ValueError): self.future.result() def test_set_exception_only_after_done(self): with self.assertRaises(TransferNotDoneError): self.future.set_exception(ValueError()) self.coordinator.set_result('result') self.coordinator.announce_done() self.future.set_exception(ValueError()) with self.assertRaises(ValueError): self.future.result() class TestTransferMeta(unittest.TestCase): def setUp(self): self.transfer_meta = TransferMeta() def test_size(self): self.assertEqual(self.transfer_meta.size, None) self.transfer_meta.provide_transfer_size(5) self.assertEqual(self.transfer_meta.size, 5) def test_call_args(self): call_args = object() transfer_meta = TransferMeta(call_args) # Assert the that call args provided is the same as is returned self.assertIs(transfer_meta.call_args, call_args) def test_transfer_id(self): transfer_meta = TransferMeta(transfer_id=1) self.assertEqual(transfer_meta.transfer_id, 1) def test_user_context(self): self.transfer_meta.user_context['foo'] = 'bar' self.assertEqual(self.transfer_meta.user_context, {'foo': 'bar'}) class TestTransferCoordinator(unittest.TestCase): def setUp(self): self.transfer_coordinator = TransferCoordinator() def test_transfer_id(self): transfer_coordinator = TransferCoordinator(transfer_id=1) self.assertEqual(transfer_coordinator.transfer_id, 1) def test_repr(self): transfer_coordinator = TransferCoordinator(transfer_id=1) self.assertEqual( repr(transfer_coordinator), 'TransferCoordinator(transfer_id=1)') def test_initial_status(self): # A TransferCoordinator with no progress should have the status # of not-started self.assertEqual(self.transfer_coordinator.status, 'not-started') def test_set_status_to_queued(self): self.transfer_coordinator.set_status_to_queued() self.assertEqual(self.transfer_coordinator.status, 'queued') def test_cannot_set_status_to_queued_from_done_state(self): self.transfer_coordinator.set_exception(RuntimeError) with self.assertRaises(RuntimeError): self.transfer_coordinator.set_status_to_queued() def test_status_running(self): self.transfer_coordinator.set_status_to_running() self.assertEqual(self.transfer_coordinator.status, 'running') def test_cannot_set_status_to_running_from_done_state(self): self.transfer_coordinator.set_exception(RuntimeError) with self.assertRaises(RuntimeError): self.transfer_coordinator.set_status_to_running() def test_set_result(self): success_result = 'foo' self.transfer_coordinator.set_result(success_result) self.transfer_coordinator.announce_done() # Setting result should result in a success state and the return value # that was set. self.assertEqual(self.transfer_coordinator.status, 'success') self.assertEqual(self.transfer_coordinator.result(), success_result) def test_set_exception(self): exception_result = RuntimeError self.transfer_coordinator.set_exception(exception_result) self.transfer_coordinator.announce_done() # Setting an exception should result in a failed state and the return # value should be the rasied exception self.assertEqual(self.transfer_coordinator.status, 'failed') self.assertEqual(self.transfer_coordinator.exception, exception_result) with self.assertRaises(exception_result): self.transfer_coordinator.result() def test_exception_cannot_override_done_state(self): self.transfer_coordinator.set_result('foo') self.transfer_coordinator.set_exception(RuntimeError) # It status should be success even after the exception is set because # success is a done state. self.assertEqual(self.transfer_coordinator.status, 'success') def test_exception_can_override_done_state_with_override_flag(self): self.transfer_coordinator.set_result('foo') self.transfer_coordinator.set_exception(RuntimeError, override=True) self.assertEqual(self.transfer_coordinator.status, 'failed') def test_cancel(self): self.assertEqual(self.transfer_coordinator.status, 'not-started') self.transfer_coordinator.cancel() # This should set the state to cancelled and raise the CancelledError # exception and should have also set the done event so that result() # is no longer set. self.assertEqual(self.transfer_coordinator.status, 'cancelled') with self.assertRaises(CancelledError): self.transfer_coordinator.result() def test_cancel_can_run_done_callbacks_that_uses_result(self): exceptions = [] def capture_exception(transfer_coordinator, captured_exceptions): try: transfer_coordinator.result() except Exception as e: captured_exceptions.append(e) self.assertEqual(self.transfer_coordinator.status, 'not-started') self.transfer_coordinator.add_done_callback( capture_exception, self.transfer_coordinator, exceptions) self.transfer_coordinator.cancel() self.assertEqual(len(exceptions), 1) self.assertIsInstance(exceptions[0], CancelledError) def test_cancel_with_message(self): message = 'my message' self.transfer_coordinator.cancel(message) self.transfer_coordinator.announce_done() with self.assertRaisesRegexp(CancelledError, message): self.transfer_coordinator.result() def test_cancel_with_provided_exception(self): message = 'my message' self.transfer_coordinator.cancel(message, exc_type=FatalError) self.transfer_coordinator.announce_done() with self.assertRaisesRegexp(FatalError, message): self.transfer_coordinator.result() def test_cancel_cannot_override_done_state(self): self.transfer_coordinator.set_result('foo') self.transfer_coordinator.cancel() # It status should be success even after cancel is called because # succes is a done state. self.assertEqual(self.transfer_coordinator.status, 'success') def test_set_result_can_override_cancel(self): self.transfer_coordinator.cancel() # Result setting should override any cancel or set exception as this # is always invoked by the final task. self.transfer_coordinator.set_result('foo') self.transfer_coordinator.announce_done() self.assertEqual(self.transfer_coordinator.status, 'success') def test_submit(self): # Submit a callable to the transfer coordinator. It should submit it # to the executor. executor = RecordingExecutor( BoundedExecutor(1, 1, {'my-tag': TaskSemaphore(1)})) task = ReturnFooTask(self.transfer_coordinator) future = self.transfer_coordinator.submit(executor, task, tag='my-tag') executor.shutdown() # Make sure the future got submit and executed as well by checking its # result value which should include the provided future tag. self.assertEqual( executor.submissions, [{'block': True, 'tag': 'my-tag', 'task': task}] ) self.assertEqual(future.result(), 'foo') def test_association_and_disassociation_on_submit(self): self.transfer_coordinator = RecordingTransferCoordinator() # Submit a callable to the transfer coordinator. executor = BoundedExecutor(1, 1) task = ReturnFooTask(self.transfer_coordinator) future = self.transfer_coordinator.submit(executor, task) executor.shutdown() # Make sure the future that got submitted was associated to the # transfer future at some point. self.assertEqual( self.transfer_coordinator.all_transfer_futures_ever_associated, set([future]) ) # Make sure the future got disassociated once the future is now done # by looking at the currently associated futures. self.assertEqual( self.transfer_coordinator.associated_futures, set([])) def test_done(self): # These should result in not done state: # queued self.assertFalse(self.transfer_coordinator.done()) # running self.transfer_coordinator.set_status_to_running() self.assertFalse(self.transfer_coordinator.done()) # These should result in done state: # failed self.transfer_coordinator.set_exception(Exception) self.assertTrue(self.transfer_coordinator.done()) # success self.transfer_coordinator.set_result('foo') self.assertTrue(self.transfer_coordinator.done()) # cancelled self.transfer_coordinator.cancel() self.assertTrue(self.transfer_coordinator.done()) def test_result_waits_until_done(self): execution_order = [] def sleep_then_set_result(transfer_coordinator, execution_order): time.sleep(0.05) execution_order.append('setting_result') transfer_coordinator.set_result(None) self.transfer_coordinator.announce_done() with ThreadPoolExecutor(max_workers=1) as executor: executor.submit( sleep_then_set_result, self.transfer_coordinator, execution_order) self.transfer_coordinator.result() execution_order.append('after_result') # The result() call should have waited until the other thread set # the result after sleeping for 0.05 seconds. self.assertTrue(execution_order, ['setting_result', 'after_result']) def test_failure_cleanups(self): args = (1, 2) kwargs = {'foo': 'bar'} second_args = (2, 4) second_kwargs = {'biz': 'baz'} self.transfer_coordinator.add_failure_cleanup( return_call_args, *args, **kwargs) self.transfer_coordinator.add_failure_cleanup( return_call_args, *second_args, **second_kwargs) # Ensure the callbacks got added. self.assertEqual(len(self.transfer_coordinator.failure_cleanups), 2) result_list = [] # Ensure they will get called in the correct order. for cleanup in self.transfer_coordinator.failure_cleanups: result_list.append(cleanup()) self.assertEqual( result_list, [(args, kwargs), (second_args, second_kwargs)]) def test_associated_futures(self): first_future = object() # Associate one future to the transfer self.transfer_coordinator.add_associated_future(first_future) associated_futures = self.transfer_coordinator.associated_futures # The first future should be in the returned list of futures. self.assertEqual(associated_futures, set([first_future])) second_future = object() # Associate another future to the transfer. self.transfer_coordinator.add_associated_future(second_future) # The association should not have mutated the returned list from # before. self.assertEqual(associated_futures, set([first_future])) # Both futures should be in the returned list. self.assertEqual( self.transfer_coordinator.associated_futures, set([first_future, second_future])) def test_done_callbacks_on_done(self): done_callback_invocations = [] callback = FunctionContainer( done_callback_invocations.append, 'done callback called') # Add the done callback to the transfer. self.transfer_coordinator.add_done_callback(callback) # Announce that the transfer is done. This should invoke the done # callback. self.transfer_coordinator.announce_done() self.assertEqual(done_callback_invocations, ['done callback called']) # If done is announced again, we should not invoke the callback again # because done has already been announced and thus the callback has # been ran as well. self.transfer_coordinator.announce_done() self.assertEqual(done_callback_invocations, ['done callback called']) def test_failure_cleanups_on_done(self): cleanup_invocations = [] callback = FunctionContainer( cleanup_invocations.append, 'cleanup called') # Add the failure cleanup to the transfer. self.transfer_coordinator.add_failure_cleanup(callback) # Announce that the transfer is done. This should invoke the failure # cleanup. self.transfer_coordinator.announce_done() self.assertEqual(cleanup_invocations, ['cleanup called']) # If done is announced again, we should not invoke the cleanup again # because done has already been announced and thus the cleanup has # been ran as well. self.transfer_coordinator.announce_done() self.assertEqual(cleanup_invocations, ['cleanup called']) class TestBoundedExecutor(unittest.TestCase): def setUp(self): self.coordinator = TransferCoordinator() self.tag_semaphores = {} self.executor = self.get_executor() def get_executor(self, max_size=1, max_num_threads=1): return BoundedExecutor(max_size, max_num_threads, self.tag_semaphores) def get_task(self, task_cls, main_kwargs=None): return task_cls(self.coordinator, main_kwargs=main_kwargs) def get_sleep_task(self, sleep_time=0.01): return self.get_task(SleepTask, main_kwargs={'sleep_time': sleep_time}) def add_semaphore(self, task_tag, count): self.tag_semaphores[task_tag] = TaskSemaphore(count) def assert_submit_would_block(self, task, tag=None): with self.assertRaises(NoResourcesAvailable): self.executor.submit(task, tag=tag, block=False) def assert_submit_would_not_block(self, task, tag=None, **kwargs): try: self.executor.submit(task, tag=tag, block=False) except NoResourcesAvailable: self.fail('Task %s should not have been blocked' % task) def add_done_callback_to_future(self, future, fn, *args, **kwargs): callback_for_future = FunctionContainer(fn, *args, **kwargs) future.add_done_callback(callback_for_future) def test_submit_single_task(self): # Ensure we can submit a task to the executor task = self.get_task(ReturnFooTask) future = self.executor.submit(task) # Ensure what we get back is a Future self.assertIsInstance(future, ExecutorFuture) # Ensure the callable got executed. self.assertEqual(future.result(), 'foo') def test_executor_blocks_on_full_capacity(self): first_task = self.get_sleep_task() second_task = self.get_sleep_task() self.executor.submit(first_task) # The first task should be sleeping for a substantial period of # time such that on the submission of the second task, it will # raise an error saying that it cannot be submitted as the max # capacity of the semaphore is one. self.assert_submit_would_block(second_task) def test_executor_clears_capacity_on_done_tasks(self): first_task = self.get_task(ReturnFooTask) second_task = self.get_task(ReturnFooTask) # Submit a task. future = self.executor.submit(first_task) # Submit a new task when the first task finishes. This should not get # blocked because the first task should have finished clearing up # capacity. self.add_done_callback_to_future( future, self.assert_submit_would_not_block, second_task) # Wait for it to complete. self.executor.shutdown() def test_would_not_block_when_full_capacity_in_other_semaphore(self): first_task = self.get_sleep_task() # Now let's create a new task with a tag and so it uses different # semaphore. task_tag = 'other' other_task = self.get_sleep_task() self.add_semaphore(task_tag, 1) # Submit the normal first task self.executor.submit(first_task) # Even though The first task should be sleeping for a substantial # period of time, the submission of the second task should not # raise an error because it should use a different semaphore self.assert_submit_would_not_block(other_task, task_tag) # Another submission of the other task though should raise # an exception as the capacity is equal to one for that tag. self.assert_submit_would_block(other_task, task_tag) def test_shutdown(self): slow_task = self.get_sleep_task() future = self.executor.submit(slow_task) self.executor.shutdown() # Ensure that the shutdown waits until the task is done self.assertTrue(future.done()) def test_shutdown_no_wait(self): slow_task = self.get_sleep_task() future = self.executor.submit(slow_task) self.executor.shutdown(False) # Ensure that the shutdown returns immediately even if the task is # not done, which it should not be because it it slow. self.assertFalse(future.done()) def test_replace_underlying_executor(self): mocked_executor_cls = mock.Mock(BaseExecutor) executor = BoundedExecutor(10, 1, {}, mocked_executor_cls) executor.submit(self.get_task(ReturnFooTask)) self.assertTrue(mocked_executor_cls.return_value.submit.called) class TestExecutorFuture(unittest.TestCase): def test_result(self): with ThreadPoolExecutor(max_workers=1) as executor: future = executor.submit(return_call_args, 'foo', biz='baz') wrapped_future = ExecutorFuture(future) self.assertEqual(wrapped_future.result(), (('foo',), {'biz': 'baz'})) def test_done(self): with ThreadPoolExecutor(max_workers=1) as executor: future = executor.submit(return_call_args, 'foo', biz='baz') wrapped_future = ExecutorFuture(future) self.assertTrue(wrapped_future.done()) def test_add_done_callback(self): done_callbacks = [] with ThreadPoolExecutor(max_workers=1) as executor: future = executor.submit(return_call_args, 'foo', biz='baz') wrapped_future = ExecutorFuture(future) wrapped_future.add_done_callback( FunctionContainer(done_callbacks.append, 'called')) self.assertEqual(done_callbacks, ['called']) class TestNonThreadedExecutor(unittest.TestCase): def test_submit(self): executor = NonThreadedExecutor() future = executor.submit(return_call_args, 1, 2, foo='bar') self.assertIsInstance(future, NonThreadedExecutorFuture) self.assertEqual(future.result(), ((1, 2), {'foo': 'bar'})) def test_submit_with_exception(self): executor = NonThreadedExecutor() future = executor.submit(raise_exception, RuntimeError()) self.assertIsInstance(future, NonThreadedExecutorFuture) with self.assertRaises(RuntimeError): future.result() def test_submit_with_exception_and_captures_info(self): exception = ValueError('message') tb = get_exc_info(exception)[2] future = NonThreadedExecutor().submit(raise_exception, exception) try: future.result() # An exception should have been raised self.fail('Future should have raised a ValueError') except ValueError: actual_tb = sys.exc_info()[2] last_frame = traceback.extract_tb(actual_tb)[-1] last_expected_frame = traceback.extract_tb(tb)[-1] self.assertEqual(last_frame, last_expected_frame) class TestNonThreadedExecutorFuture(unittest.TestCase): def setUp(self): self.future = NonThreadedExecutorFuture() def test_done_starts_false(self): self.assertFalse(self.future.done()) def test_done_after_setting_result(self): self.future.set_result('result') self.assertTrue(self.future.done()) def test_done_after_setting_exception(self): self.future.set_exception_info(Exception(), None) self.assertTrue(self.future.done()) def test_result(self): self.future.set_result('result') self.assertEqual(self.future.result(), 'result') def test_exception_result(self): exception = ValueError('message') self.future.set_exception_info(exception, None) with self.assertRaisesRegexp(ValueError, 'message'): self.future.result() def test_exception_result_doesnt_modify_last_frame(self): exception = ValueError('message') tb = get_exc_info(exception)[2] self.future.set_exception_info(exception, tb) try: self.future.result() # An exception should have been raised self.fail() except ValueError: actual_tb = sys.exc_info()[2] last_frame = traceback.extract_tb(actual_tb)[-1] last_expected_frame = traceback.extract_tb(tb)[-1] self.assertEqual(last_frame, last_expected_frame) def test_done_callback(self): done_futures = [] self.future.add_done_callback(done_futures.append) self.assertEqual(done_futures, []) self.future.set_result('result') self.assertEqual(done_futures, [self.future]) def test_done_callback_after_done(self): self.future.set_result('result') done_futures = [] self.future.add_done_callback(done_futures.append) self.assertEqual(done_futures, [self.future]) s3transfer-0.1.13/tests/unit/test_manager.py000066400000000000000000000131751324114246300210410ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import time from concurrent.futures import ThreadPoolExecutor from tests import unittest from tests import TransferCoordinatorWithInterrupt from s3transfer.exceptions import CancelledError from s3transfer.exceptions import FatalError from s3transfer.futures import TransferCoordinator from s3transfer.manager import TransferConfig from s3transfer.manager import TransferCoordinatorController class FutureResultException(Exception): pass class TestTransferConfig(unittest.TestCase): def test_exception_on_zero_attr_value(self): with self.assertRaises(ValueError): TransferConfig(max_request_queue_size=0) class TestTransferCoordinatorController(unittest.TestCase): def setUp(self): self.coordinator_controller = TransferCoordinatorController() def sleep_then_announce_done(self, transfer_coordinator, sleep_time): time.sleep(sleep_time) transfer_coordinator.set_result('done') transfer_coordinator.announce_done() def assert_coordinator_is_cancelled(self, transfer_coordinator): self.assertEqual(transfer_coordinator.status, 'cancelled') def test_add_transfer_coordinator(self): transfer_coordinator = TransferCoordinator() # Add the transfer coordinator self.coordinator_controller.add_transfer_coordinator( transfer_coordinator) # Ensure that is tracked. self.assertEqual( self.coordinator_controller.tracked_transfer_coordinators, set([transfer_coordinator])) def test_remove_transfer_coordinator(self): transfer_coordinator = TransferCoordinator() # Add the coordinator self.coordinator_controller.add_transfer_coordinator( transfer_coordinator) # Now remove the coordinator self.coordinator_controller.remove_transfer_coordinator( transfer_coordinator) # Make sure that it is no longer getting tracked. self.assertEqual( self.coordinator_controller.tracked_transfer_coordinators, set()) def test_cancel(self): transfer_coordinator = TransferCoordinator() # Add the transfer coordinator self.coordinator_controller.add_transfer_coordinator( transfer_coordinator) # Cancel with the canceler self.coordinator_controller.cancel() # Check that coordinator got canceled self.assert_coordinator_is_cancelled(transfer_coordinator) def test_cancel_with_message(self): message = 'my cancel message' transfer_coordinator = TransferCoordinator() self.coordinator_controller.add_transfer_coordinator( transfer_coordinator) self.coordinator_controller.cancel(message) transfer_coordinator.announce_done() with self.assertRaisesRegexp(CancelledError, message): transfer_coordinator.result() def test_cancel_with_provided_exception(self): message = 'my cancel message' transfer_coordinator = TransferCoordinator() self.coordinator_controller.add_transfer_coordinator( transfer_coordinator) self.coordinator_controller.cancel(message, exc_type=FatalError) transfer_coordinator.announce_done() with self.assertRaisesRegexp(FatalError, message): transfer_coordinator.result() def test_wait_for_done_transfer_coordinators(self): # Create a coordinator and add it to the canceler transfer_coordinator = TransferCoordinator() self.coordinator_controller.add_transfer_coordinator( transfer_coordinator) sleep_time = 0.02 with ThreadPoolExecutor(max_workers=1) as executor: # In a seperate thread sleep and then set the transfer coordinator # to done after sleeping. start_time = time.time() executor.submit( self.sleep_then_announce_done, transfer_coordinator, sleep_time) # Now call wait to wait for the transfer coordinator to be done. self.coordinator_controller.wait() end_time = time.time() wait_time = end_time - start_time # The time waited should not be less than the time it took to sleep in # the seperate thread because the wait ending should be dependent on # the sleeping thread announcing that the transfer coordinator is done. self.assertTrue(sleep_time <= wait_time) def test_wait_does_not_propogate_exceptions_from_result(self): transfer_coordinator = TransferCoordinator() transfer_coordinator.set_exception(FutureResultException()) transfer_coordinator.announce_done() try: self.coordinator_controller.wait() except FutureResultException as e: self.fail('%s should not have been raised.' % e) def test_wait_can_be_interrupted(self): inject_interrupt_coordinator = TransferCoordinatorWithInterrupt() self.coordinator_controller.add_transfer_coordinator( inject_interrupt_coordinator) with self.assertRaises(KeyboardInterrupt): self.coordinator_controller.wait() s3transfer-0.1.13/tests/unit/test_s3transfer.py000066400000000000000000000716001324114246300215160ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the 'License'). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the 'license' file accompanying this file. This file is # distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import os import tempfile import shutil import socket from tests import unittest from contextlib import closing import mock from botocore.vendored import six from concurrent import futures from s3transfer.exceptions import RetriesExceededError from s3transfer.exceptions import S3UploadFailedError from s3transfer import ReadFileChunk, StreamReaderProgress from s3transfer import S3Transfer from s3transfer import OSUtils, TransferConfig from s3transfer import MultipartDownloader, MultipartUploader from s3transfer import ShutdownQueue from s3transfer import QueueShutdownError from s3transfer import random_file_extension from s3transfer import disable_upload_callbacks, enable_upload_callbacks class InMemoryOSLayer(OSUtils): def __init__(self, filemap): self.filemap = filemap def get_file_size(self, filename): return len(self.filemap[filename]) def open_file_chunk_reader(self, filename, start_byte, size, callback): return closing(six.BytesIO(self.filemap[filename])) def open(self, filename, mode): if 'wb' in mode: fileobj = six.BytesIO() self.filemap[filename] = fileobj return closing(fileobj) else: return closing(self.filemap[filename]) def remove_file(self, filename): if filename in self.filemap: del self.filemap[filename] def rename_file(self, current_filename, new_filename): if current_filename in self.filemap: self.filemap[new_filename] = self.filemap.pop( current_filename) class SequentialExecutor(object): def __init__(self, max_workers): pass def __enter__(self): return self def __exit__(self, *args, **kwargs): pass # The real map() interface actually takes *args, but we specifically do # _not_ use this interface. def map(self, function, args): results = [] for arg in args: results.append(function(arg)) return results def submit(self, function): future = futures.Future() future.set_result(function()) return future class TestOSUtils(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tempdir) def test_get_file_size(self): with mock.patch('os.path.getsize') as m: OSUtils().get_file_size('myfile') m.assert_called_with('myfile') def test_open_file_chunk_reader(self): with mock.patch('s3transfer.ReadFileChunk') as m: OSUtils().open_file_chunk_reader('myfile', 0, 100, None) m.from_filename.assert_called_with('myfile', 0, 100, None, enable_callback=False) def test_open_file(self): fileobj = OSUtils().open(os.path.join(self.tempdir, 'foo'), 'w') self.assertTrue(hasattr(fileobj, 'write')) def test_remove_file_ignores_errors(self): with mock.patch('os.remove') as remove: remove.side_effect = OSError('fake error') OSUtils().remove_file('foo') remove.assert_called_with('foo') def test_remove_file_proxies_remove_file(self): with mock.patch('os.remove') as remove: OSUtils().remove_file('foo') remove.assert_called_with('foo') def test_rename_file(self): with mock.patch('s3transfer.compat.rename_file') as rename_file: OSUtils().rename_file('foo', 'newfoo') rename_file.assert_called_with('foo', 'newfoo') class TestReadFileChunk(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp() def tearDown(self): shutil.rmtree(self.tempdir) def test_read_entire_chunk(self): filename = os.path.join(self.tempdir, 'foo') with open(filename, 'wb') as f: f.write(b'onetwothreefourfivesixseveneightnineten') chunk = ReadFileChunk.from_filename( filename, start_byte=0, chunk_size=3) self.assertEqual(chunk.read(), b'one') self.assertEqual(chunk.read(), b'') def test_read_with_amount_size(self): filename = os.path.join(self.tempdir, 'foo') with open(filename, 'wb') as f: f.write(b'onetwothreefourfivesixseveneightnineten') chunk = ReadFileChunk.from_filename( filename, start_byte=11, chunk_size=4) self.assertEqual(chunk.read(1), b'f') self.assertEqual(chunk.read(1), b'o') self.assertEqual(chunk.read(1), b'u') self.assertEqual(chunk.read(1), b'r') self.assertEqual(chunk.read(1), b'') def test_reset_stream_emulation(self): filename = os.path.join(self.tempdir, 'foo') with open(filename, 'wb') as f: f.write(b'onetwothreefourfivesixseveneightnineten') chunk = ReadFileChunk.from_filename( filename, start_byte=11, chunk_size=4) self.assertEqual(chunk.read(), b'four') chunk.seek(0) self.assertEqual(chunk.read(), b'four') def test_read_past_end_of_file(self): filename = os.path.join(self.tempdir, 'foo') with open(filename, 'wb') as f: f.write(b'onetwothreefourfivesixseveneightnineten') chunk = ReadFileChunk.from_filename( filename, start_byte=36, chunk_size=100000) self.assertEqual(chunk.read(), b'ten') self.assertEqual(chunk.read(), b'') self.assertEqual(len(chunk), 3) def test_tell_and_seek(self): filename = os.path.join(self.tempdir, 'foo') with open(filename, 'wb') as f: f.write(b'onetwothreefourfivesixseveneightnineten') chunk = ReadFileChunk.from_filename( filename, start_byte=36, chunk_size=100000) self.assertEqual(chunk.tell(), 0) self.assertEqual(chunk.read(), b'ten') self.assertEqual(chunk.tell(), 3) chunk.seek(0) self.assertEqual(chunk.tell(), 0) def test_file_chunk_supports_context_manager(self): filename = os.path.join(self.tempdir, 'foo') with open(filename, 'wb') as f: f.write(b'abc') with ReadFileChunk.from_filename(filename, start_byte=0, chunk_size=2) as chunk: val = chunk.read() self.assertEqual(val, b'ab') def test_iter_is_always_empty(self): # This tests the workaround for the httplib bug (see # the source for more info). filename = os.path.join(self.tempdir, 'foo') open(filename, 'wb').close() chunk = ReadFileChunk.from_filename( filename, start_byte=0, chunk_size=10) self.assertEqual(list(chunk), []) class TestReadFileChunkWithCallback(TestReadFileChunk): def setUp(self): super(TestReadFileChunkWithCallback, self).setUp() self.filename = os.path.join(self.tempdir, 'foo') with open(self.filename, 'wb') as f: f.write(b'abc') self.amounts_seen = [] def callback(self, amount): self.amounts_seen.append(amount) def test_callback_is_invoked_on_read(self): chunk = ReadFileChunk.from_filename( self.filename, start_byte=0, chunk_size=3, callback=self.callback) chunk.read(1) chunk.read(1) chunk.read(1) self.assertEqual(self.amounts_seen, [1, 1, 1]) def test_callback_can_be_disabled(self): chunk = ReadFileChunk.from_filename( self.filename, start_byte=0, chunk_size=3, callback=self.callback) chunk.disable_callback() # Now reading from the ReadFileChunk should not invoke # the callback. chunk.read() self.assertEqual(self.amounts_seen, []) def test_callback_will_also_be_triggered_by_seek(self): chunk = ReadFileChunk.from_filename( self.filename, start_byte=0, chunk_size=3, callback=self.callback) chunk.read(2) chunk.seek(0) chunk.read(2) chunk.seek(1) chunk.read(2) self.assertEqual(self.amounts_seen, [2, -2, 2, -1, 2]) class TestStreamReaderProgress(unittest.TestCase): def test_proxies_to_wrapped_stream(self): original_stream = six.StringIO('foobarbaz') wrapped = StreamReaderProgress(original_stream) self.assertEqual(wrapped.read(), 'foobarbaz') def test_callback_invoked(self): amounts_seen = [] def callback(amount): amounts_seen.append(amount) original_stream = six.StringIO('foobarbaz') wrapped = StreamReaderProgress(original_stream, callback) self.assertEqual(wrapped.read(), 'foobarbaz') self.assertEqual(amounts_seen, [9]) class TestMultipartUploader(unittest.TestCase): def test_multipart_upload_uses_correct_client_calls(self): client = mock.Mock() uploader = MultipartUploader( client, TransferConfig(), InMemoryOSLayer({'filename': b'foobar'}), SequentialExecutor) client.create_multipart_upload.return_value = {'UploadId': 'upload_id'} client.upload_part.return_value = {'ETag': 'first'} uploader.upload_file('filename', 'bucket', 'key', None, {}) # We need to check both the sequence of calls (create/upload/complete) # as well as the params passed between the calls, including # 1. The upload_id was plumbed through # 2. The collected etags were added to the complete call. client.create_multipart_upload.assert_called_with( Bucket='bucket', Key='key') # Should be two parts. client.upload_part.assert_called_with( Body=mock.ANY, Bucket='bucket', UploadId='upload_id', Key='key', PartNumber=1) client.complete_multipart_upload.assert_called_with( MultipartUpload={'Parts': [{'PartNumber': 1, 'ETag': 'first'}]}, Bucket='bucket', UploadId='upload_id', Key='key') def test_multipart_upload_injects_proper_kwargs(self): client = mock.Mock() uploader = MultipartUploader( client, TransferConfig(), InMemoryOSLayer({'filename': b'foobar'}), SequentialExecutor) client.create_multipart_upload.return_value = {'UploadId': 'upload_id'} client.upload_part.return_value = {'ETag': 'first'} extra_args = { 'SSECustomerKey': 'fakekey', 'SSECustomerAlgorithm': 'AES256', 'StorageClass': 'REDUCED_REDUNDANCY' } uploader.upload_file('filename', 'bucket', 'key', None, extra_args) client.create_multipart_upload.assert_called_with( Bucket='bucket', Key='key', # The initial call should inject all the storage class params. SSECustomerKey='fakekey', SSECustomerAlgorithm='AES256', StorageClass='REDUCED_REDUNDANCY') client.upload_part.assert_called_with( Body=mock.ANY, Bucket='bucket', UploadId='upload_id', Key='key', PartNumber=1, # We only have to forward certain **extra_args in subsequent # UploadPart calls. SSECustomerKey='fakekey', SSECustomerAlgorithm='AES256', ) client.complete_multipart_upload.assert_called_with( MultipartUpload={'Parts': [{'PartNumber': 1, 'ETag': 'first'}]}, Bucket='bucket', UploadId='upload_id', Key='key') def test_multipart_upload_is_aborted_on_error(self): # If the create_multipart_upload succeeds and any upload_part # fails, then abort_multipart_upload will be called. client = mock.Mock() uploader = MultipartUploader( client, TransferConfig(), InMemoryOSLayer({'filename': b'foobar'}), SequentialExecutor) client.create_multipart_upload.return_value = {'UploadId': 'upload_id'} client.upload_part.side_effect = Exception( "Some kind of error occurred.") with self.assertRaises(S3UploadFailedError): uploader.upload_file('filename', 'bucket', 'key', None, {}) client.abort_multipart_upload.assert_called_with( Bucket='bucket', Key='key', UploadId='upload_id') class TestMultipartDownloader(unittest.TestCase): maxDiff = None def test_multipart_download_uses_correct_client_calls(self): client = mock.Mock() response_body = b'foobarbaz' client.get_object.return_value = {'Body': six.BytesIO(response_body)} downloader = MultipartDownloader(client, TransferConfig(), InMemoryOSLayer({}), SequentialExecutor) downloader.download_file('bucket', 'key', 'filename', len(response_body), {}) client.get_object.assert_called_with( Range='bytes=0-', Bucket='bucket', Key='key' ) def test_multipart_download_with_multiple_parts(self): client = mock.Mock() response_body = b'foobarbaz' client.get_object.return_value = {'Body': six.BytesIO(response_body)} # For testing purposes, we're testing with a multipart threshold # of 4 bytes and a chunksize of 4 bytes. Given b'foobarbaz', # this should result in 3 calls. In python slices this would be: # r[0:4], r[4:8], r[8:9]. But the Range param will be slightly # different because they use inclusive ranges. config = TransferConfig(multipart_threshold=4, multipart_chunksize=4) downloader = MultipartDownloader(client, config, InMemoryOSLayer({}), SequentialExecutor) downloader.download_file('bucket', 'key', 'filename', len(response_body), {}) # We're storing these in **extra because the assertEqual # below is really about verifying we have the correct value # for the Range param. extra = {'Bucket': 'bucket', 'Key': 'key'} self.assertEqual(client.get_object.call_args_list, # Note these are inclusive ranges. [mock.call(Range='bytes=0-3', **extra), mock.call(Range='bytes=4-7', **extra), mock.call(Range='bytes=8-', **extra)]) def test_retry_on_failures_from_stream_reads(self): # If we get an exception during a call to the response body's .read() # method, we should retry the request. client = mock.Mock() response_body = b'foobarbaz' stream_with_errors = mock.Mock() stream_with_errors.read.side_effect = [ socket.error("fake error"), response_body ] client.get_object.return_value = {'Body': stream_with_errors} config = TransferConfig(multipart_threshold=4, multipart_chunksize=4) downloader = MultipartDownloader(client, config, InMemoryOSLayer({}), SequentialExecutor) downloader.download_file('bucket', 'key', 'filename', len(response_body), {}) # We're storing these in **extra because the assertEqual # below is really about verifying we have the correct value # for the Range param. extra = {'Bucket': 'bucket', 'Key': 'key'} self.assertEqual(client.get_object.call_args_list, # The first call to range=0-3 fails because of the # side_effect above where we make the .read() raise a # socket.error. # The second call to range=0-3 then succeeds. [mock.call(Range='bytes=0-3', **extra), mock.call(Range='bytes=0-3', **extra), mock.call(Range='bytes=4-7', **extra), mock.call(Range='bytes=8-', **extra)]) def test_exception_raised_on_exceeded_retries(self): client = mock.Mock() response_body = b'foobarbaz' stream_with_errors = mock.Mock() stream_with_errors.read.side_effect = socket.error("fake error") client.get_object.return_value = {'Body': stream_with_errors} config = TransferConfig(multipart_threshold=4, multipart_chunksize=4) downloader = MultipartDownloader(client, config, InMemoryOSLayer({}), SequentialExecutor) with self.assertRaises(RetriesExceededError): downloader.download_file('bucket', 'key', 'filename', len(response_body), {}) def test_io_thread_failure_triggers_shutdown(self): client = mock.Mock() response_body = b'foobarbaz' client.get_object.return_value = {'Body': six.BytesIO(response_body)} os_layer = mock.Mock() mock_fileobj = mock.MagicMock() mock_fileobj.__enter__.return_value = mock_fileobj mock_fileobj.write.side_effect = Exception("fake IO error") os_layer.open.return_value = mock_fileobj downloader = MultipartDownloader(client, TransferConfig(), os_layer, SequentialExecutor) # We're verifying that the exception raised from the IO future # propogates back up via download_file(). with self.assertRaisesRegexp(Exception, "fake IO error"): downloader.download_file('bucket', 'key', 'filename', len(response_body), {}) def test_download_futures_fail_triggers_shutdown(self): class FailedDownloadParts(SequentialExecutor): def __init__(self, max_workers): self.is_first = True def submit(self, function): future = super(FailedDownloadParts, self).submit(function) if self.is_first: # This is the download_parts_thread. future.set_exception( Exception("fake download parts error")) self.is_first = False return future client = mock.Mock() response_body = b'foobarbaz' client.get_object.return_value = {'Body': six.BytesIO(response_body)} downloader = MultipartDownloader(client, TransferConfig(), InMemoryOSLayer({}), FailedDownloadParts) with self.assertRaisesRegexp(Exception, "fake download parts error"): downloader.download_file('bucket', 'key', 'filename', len(response_body), {}) class TestS3Transfer(unittest.TestCase): def setUp(self): self.client = mock.Mock() self.random_file_patch = mock.patch( 's3transfer.random_file_extension') self.random_file = self.random_file_patch.start() self.random_file.return_value = 'RANDOM' def tearDown(self): self.random_file_patch.stop() def test_callback_handlers_register_on_put_item(self): osutil = InMemoryOSLayer({'smallfile': b'foobar'}) transfer = S3Transfer(self.client, osutil=osutil) transfer.upload_file('smallfile', 'bucket', 'key') events = self.client.meta.events events.register_first.assert_called_with( 'request-created.s3', disable_upload_callbacks, unique_id='s3upload-callback-disable', ) events.register_last.assert_called_with( 'request-created.s3', enable_upload_callbacks, unique_id='s3upload-callback-enable', ) def test_upload_below_multipart_threshold_uses_put_object(self): fake_files = { 'smallfile': b'foobar', } osutil = InMemoryOSLayer(fake_files) transfer = S3Transfer(self.client, osutil=osutil) transfer.upload_file('smallfile', 'bucket', 'key') self.client.put_object.assert_called_with( Bucket='bucket', Key='key', Body=mock.ANY ) def test_extra_args_on_uploaded_passed_to_api_call(self): extra_args = {'ACL': 'public-read'} fake_files = { 'smallfile': b'hello world' } osutil = InMemoryOSLayer(fake_files) transfer = S3Transfer(self.client, osutil=osutil) transfer.upload_file('smallfile', 'bucket', 'key', extra_args=extra_args) self.client.put_object.assert_called_with( Bucket='bucket', Key='key', Body=mock.ANY, ACL='public-read' ) def test_uses_multipart_upload_when_over_threshold(self): with mock.patch('s3transfer.MultipartUploader') as uploader: fake_files = { 'smallfile': b'foobar', } osutil = InMemoryOSLayer(fake_files) config = TransferConfig(multipart_threshold=2, multipart_chunksize=2) transfer = S3Transfer(self.client, osutil=osutil, config=config) transfer.upload_file('smallfile', 'bucket', 'key') uploader.return_value.upload_file.assert_called_with( 'smallfile', 'bucket', 'key', None, {}) def test_uses_multipart_download_when_over_threshold(self): with mock.patch('s3transfer.MultipartDownloader') as downloader: osutil = InMemoryOSLayer({}) over_multipart_threshold = 100 * 1024 * 1024 transfer = S3Transfer(self.client, osutil=osutil) callback = mock.sentinel.CALLBACK self.client.head_object.return_value = { 'ContentLength': over_multipart_threshold, } transfer.download_file('bucket', 'key', 'filename', callback=callback) downloader.return_value.download_file.assert_called_with( # Note how we're downloading to a temorary random file. 'bucket', 'key', 'filename.RANDOM', over_multipart_threshold, {}, callback) def test_download_file_with_invalid_extra_args(self): below_threshold = 20 osutil = InMemoryOSLayer({}) transfer = S3Transfer(self.client, osutil=osutil) self.client.head_object.return_value = { 'ContentLength': below_threshold} with self.assertRaises(ValueError): transfer.download_file('bucket', 'key', '/tmp/smallfile', extra_args={'BadValue': 'foo'}) def test_upload_file_with_invalid_extra_args(self): osutil = InMemoryOSLayer({}) transfer = S3Transfer(self.client, osutil=osutil) bad_args = {"WebsiteRedirectLocation": "/foo"} with self.assertRaises(ValueError): transfer.upload_file('bucket', 'key', '/tmp/smallfile', extra_args=bad_args) def test_download_file_fowards_extra_args(self): extra_args = { 'SSECustomerKey': 'foo', 'SSECustomerAlgorithm': 'AES256', } below_threshold = 20 osutil = InMemoryOSLayer({'smallfile': b'hello world'}) transfer = S3Transfer(self.client, osutil=osutil) self.client.head_object.return_value = { 'ContentLength': below_threshold} self.client.get_object.return_value = { 'Body': six.BytesIO(b'foobar') } transfer.download_file('bucket', 'key', '/tmp/smallfile', extra_args=extra_args) # Note that we need to invoke the HeadObject call # and the PutObject call with the extra_args. # This is necessary. Trying to HeadObject an SSE object # will return a 400 if you don't provide the required # params. self.client.get_object.assert_called_with( Bucket='bucket', Key='key', SSECustomerAlgorithm='AES256', SSECustomerKey='foo') def test_get_object_stream_is_retried_and_succeeds(self): below_threshold = 20 osutil = InMemoryOSLayer({'smallfile': b'hello world'}) transfer = S3Transfer(self.client, osutil=osutil) self.client.head_object.return_value = { 'ContentLength': below_threshold} self.client.get_object.side_effect = [ # First request fails. socket.error("fake error"), # Second succeeds. {'Body': six.BytesIO(b'foobar')} ] transfer.download_file('bucket', 'key', '/tmp/smallfile') self.assertEqual(self.client.get_object.call_count, 2) def test_get_object_stream_uses_all_retries_and_errors_out(self): below_threshold = 20 osutil = InMemoryOSLayer({}) transfer = S3Transfer(self.client, osutil=osutil) self.client.head_object.return_value = { 'ContentLength': below_threshold} # Here we're raising an exception every single time, which # will exhaust our retry count and propogate a # RetriesExceededError. self.client.get_object.side_effect = socket.error("fake error") with self.assertRaises(RetriesExceededError): transfer.download_file('bucket', 'key', 'smallfile') self.assertEqual(self.client.get_object.call_count, 5) # We should have also cleaned up the in progress file # we were downloading to. self.assertEqual(osutil.filemap, {}) def test_download_below_multipart_threshold(self): below_threshold = 20 osutil = InMemoryOSLayer({'smallfile': b'hello world'}) transfer = S3Transfer(self.client, osutil=osutil) self.client.head_object.return_value = { 'ContentLength': below_threshold} self.client.get_object.return_value = { 'Body': six.BytesIO(b'foobar') } transfer.download_file('bucket', 'key', 'smallfile') self.client.get_object.assert_called_with(Bucket='bucket', Key='key') def test_can_create_with_just_client(self): transfer = S3Transfer(client=mock.Mock()) self.assertIsInstance(transfer, S3Transfer) class TestShutdownQueue(unittest.TestCase): def test_handles_normal_put_get_requests(self): q = ShutdownQueue() q.put('foo') self.assertEqual(q.get(), 'foo') def test_put_raises_error_on_shutdown(self): q = ShutdownQueue() q.trigger_shutdown() with self.assertRaises(QueueShutdownError): q.put('foo') class TestRandomFileExtension(unittest.TestCase): def test_has_proper_length(self): self.assertEqual( len(random_file_extension(num_digits=4)), 4) class TestCallbackHandlers(unittest.TestCase): def setUp(self): self.request = mock.Mock() def test_disable_request_on_put_object(self): disable_upload_callbacks(self.request, 'PutObject') self.request.body.disable_callback.assert_called_with() def test_disable_request_on_upload_part(self): disable_upload_callbacks(self.request, 'UploadPart') self.request.body.disable_callback.assert_called_with() def test_enable_object_on_put_object(self): enable_upload_callbacks(self.request, 'PutObject') self.request.body.enable_callback.assert_called_with() def test_enable_object_on_upload_part(self): enable_upload_callbacks(self.request, 'UploadPart') self.request.body.enable_callback.assert_called_with() def test_dont_disable_if_missing_interface(self): del self.request.body.disable_callback disable_upload_callbacks(self.request, 'PutObject') self.assertEqual(self.request.body.method_calls, []) def test_dont_enable_if_missing_interface(self): del self.request.body.enable_callback enable_upload_callbacks(self.request, 'PutObject') self.assertEqual(self.request.body.method_calls, []) def test_dont_disable_if_wrong_operation(self): disable_upload_callbacks(self.request, 'OtherOperation') self.assertFalse( self.request.body.disable_callback.called) def test_dont_enable_if_wrong_operation(self): enable_upload_callbacks(self.request, 'OtherOperation') self.assertFalse( self.request.body.enable_callback.called) s3transfer-0.1.13/tests/unit/test_subscribers.py000066400000000000000000000061471324114246300217560ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the 'License'). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the 'license' file accompanying this file. This file is # distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from tests import unittest from s3transfer.exceptions import InvalidSubscriberMethodError from s3transfer.subscribers import BaseSubscriber class ExtraMethodsSubscriber(BaseSubscriber): def extra_method(self): return 'called extra method' class NotCallableSubscriber(BaseSubscriber): on_done = 'foo' class NoKwargsSubscriber(BaseSubscriber): def on_done(self): pass class OverrideMethodSubscriber(BaseSubscriber): def on_queued(self, **kwargs): return kwargs class OverrideConstructorSubscriber(BaseSubscriber): def __init__(self, arg1, arg2): self.arg1 = arg1 self.arg2 = arg2 class TestSubscribers(unittest.TestCase): def test_can_instantiate_base_subscriber(self): try: BaseSubscriber() except InvalidSubscriberMethodError: self.fail('BaseSubscriber should be instantiable') def test_can_call_base_subscriber_method(self): subscriber = BaseSubscriber() try: subscriber.on_done(future=None) except Exception as e: self.fail( 'Should be able to call base class subscriber method. ' 'instead got: %s' % e) def test_subclass_can_have_and_call_additional_methods(self): subscriber = ExtraMethodsSubscriber() self.assertEqual(subscriber.extra_method(), 'called extra method') def test_can_subclass_and_override_method_from_base_subscriber(self): subscriber = OverrideMethodSubscriber() # Make sure that the overriden method is called self.assertEqual(subscriber.on_queued(foo='bar'), {'foo': 'bar'}) def test_can_subclass_and_override_constructor_from_base_class(self): subscriber = OverrideConstructorSubscriber('foo', arg2='bar') # Make sure you can create a custom constructor. self.assertEqual(subscriber.arg1, 'foo') self.assertEqual(subscriber.arg2, 'bar') def test_invalid_arguments_in_constructor_of_subclass_subscriber(self): # The override constructor should still have validation of # constructor args. with self.assertRaises(TypeError): OverrideConstructorSubscriber() def test_not_callable_in_subclass_subscriber_method(self): with self.assertRaisesRegexp( InvalidSubscriberMethodError, 'must be callable'): NotCallableSubscriber() def test_no_kwargs_in_subclass_subscriber_method(self): with self.assertRaisesRegexp( InvalidSubscriberMethodError, 'must accept keyword'): NoKwargsSubscriber() s3transfer-0.1.13/tests/unit/test_tasks.py000066400000000000000000000732741324114246300205620ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from concurrent import futures from functools import partial from threading import Event from tests import unittest from tests import RecordingSubscriber from tests import BaseTaskTest from tests import BaseSubmissionTaskTest from s3transfer.futures import TransferCoordinator from s3transfer.futures import BoundedExecutor from s3transfer.subscribers import BaseSubscriber from s3transfer.tasks import Task from s3transfer.tasks import SubmissionTask from s3transfer.tasks import CreateMultipartUploadTask from s3transfer.tasks import CompleteMultipartUploadTask from s3transfer.utils import get_callbacks from s3transfer.utils import CallArgs from s3transfer.utils import FunctionContainer class TaskFailureException(Exception): pass class SuccessTask(Task): def _main(self, return_value='success', callbacks=None, failure_cleanups=None): if callbacks: for callback in callbacks: callback() if failure_cleanups: for failure_cleanup in failure_cleanups: self._transfer_coordinator.add_failure_cleanup(failure_cleanup) return return_value class FailureTask(Task): def _main(self, exception=TaskFailureException): raise exception() class ReturnKwargsTask(Task): def _main(self, **kwargs): return kwargs class SubmitMoreTasksTask(Task): def _main(self, executor, tasks_to_submit): for task_to_submit in tasks_to_submit: self._transfer_coordinator.submit(executor, task_to_submit) class NOOPSubmissionTask(SubmissionTask): def _submit(self, transfer_future, **kwargs): pass class ExceptionSubmissionTask(SubmissionTask): def _submit(self, transfer_future, executor=None, tasks_to_submit=None, additional_callbacks=None, exception=TaskFailureException): if executor and tasks_to_submit: for task_to_submit in tasks_to_submit: self._transfer_coordinator.submit(executor, task_to_submit) if additional_callbacks: for callback in additional_callbacks: callback() raise exception() class StatusRecordingTransferCoordinator(TransferCoordinator): def __init__(self, transfer_id=None): super(StatusRecordingTransferCoordinator, self).__init__(transfer_id) self.status_changes = [self._status] def set_status_to_queued(self): super(StatusRecordingTransferCoordinator, self).set_status_to_queued() self._record_status_change() def set_status_to_running(self): super(StatusRecordingTransferCoordinator, self).set_status_to_running() self._record_status_change() def _record_status_change(self): self.status_changes.append(self._status) class RecordingStateSubscriber(BaseSubscriber): def __init__(self, transfer_coordinator): self._transfer_coordinator = transfer_coordinator self.status_during_on_queued = None def on_queued(self, **kwargs): self.status_during_on_queued = self._transfer_coordinator.status class TestSubmissionTask(BaseSubmissionTaskTest): def setUp(self): super(TestSubmissionTask, self).setUp() self.executor = BoundedExecutor(1000, 5) self.call_args = CallArgs(subscribers=[]) self.transfer_future = self.get_transfer_future(self.call_args) self.main_kwargs = {'transfer_future': self.transfer_future} def test_transitions_from_not_started_to_queued_to_running(self): self.transfer_coordinator = StatusRecordingTransferCoordinator() submission_task = self.get_task( NOOPSubmissionTask, main_kwargs=self.main_kwargs) # Status should be queued until submission task has been ran. self.assertEqual(self.transfer_coordinator.status, 'not-started') submission_task() # Once submission task has been ran, the status should now be running. self.assertEqual(self.transfer_coordinator.status, 'running') # Ensure the transitions were as expected as well. self.assertEqual( self.transfer_coordinator.status_changes, ['not-started', 'queued', 'running'] ) def test_on_queued_callbacks(self): submission_task = self.get_task( NOOPSubmissionTask, main_kwargs=self.main_kwargs) subscriber = RecordingSubscriber() self.call_args.subscribers.append(subscriber) submission_task() # Make sure the on_queued callback of the subscriber is called. self.assertEqual( subscriber.on_queued_calls, [{'future': self.transfer_future}]) def test_on_queued_status_in_callbacks(self): submission_task = self.get_task( NOOPSubmissionTask, main_kwargs=self.main_kwargs) subscriber = RecordingStateSubscriber(self.transfer_coordinator) self.call_args.subscribers.append(subscriber) submission_task() # Make sure the status was queued during on_queued callback. self.assertEqual(subscriber.status_during_on_queued, 'queued') def test_sets_exception_from_submit(self): submission_task = self.get_task( ExceptionSubmissionTask, main_kwargs=self.main_kwargs) submission_task() # Make sure the status of the future is failed self.assertEqual(self.transfer_coordinator.status, 'failed') # Make sure the future propogates the exception encountered in the # submission task. with self.assertRaises(TaskFailureException): self.transfer_future.result() def test_catches_and_sets_keyboard_interrupt_exception_from_submit(self): self.main_kwargs['exception'] = KeyboardInterrupt submission_task = self.get_task( ExceptionSubmissionTask, main_kwargs=self.main_kwargs) submission_task() self.assertEqual(self.transfer_coordinator.status, 'failed') with self.assertRaises(KeyboardInterrupt): self.transfer_future.result() def test_calls_done_callbacks_on_exception(self): submission_task = self.get_task( ExceptionSubmissionTask, main_kwargs=self.main_kwargs) subscriber = RecordingSubscriber() self.call_args.subscribers.append(subscriber) # Add the done callback to the callbacks to be invoked when the # transfer is done. done_callbacks = get_callbacks(self.transfer_future, 'done') for done_callback in done_callbacks: self.transfer_coordinator.add_done_callback(done_callback) submission_task() # Make sure the task failed to start self.assertEqual(self.transfer_coordinator.status, 'failed') # Make sure the on_done callback of the subscriber is called. self.assertEqual( subscriber.on_done_calls, [{'future': self.transfer_future}]) def test_calls_failure_cleanups_on_exception(self): submission_task = self.get_task( ExceptionSubmissionTask, main_kwargs=self.main_kwargs) # Add the callback to the callbacks to be invoked when the # transfer fails. invocations_of_cleanup = [] cleanup_callback = FunctionContainer( invocations_of_cleanup.append, 'cleanup happened') self.transfer_coordinator.add_failure_cleanup(cleanup_callback) submission_task() # Make sure the task failed to start self.assertEqual(self.transfer_coordinator.status, 'failed') # Make sure the cleanup was called. self.assertEqual(invocations_of_cleanup, ['cleanup happened']) def test_cleanups_only_ran_once_on_exception(self): # We want to be able to handle the case where the final task completes # and anounces done but there is an error in the submission task # which will cause it to need to anounce done as well. In this case, # we do not want the done callbacks to be invoke more than once. final_task = self.get_task(FailureTask, is_final=True) self.main_kwargs['executor'] = self.executor self.main_kwargs['tasks_to_submit'] = [final_task] submission_task = self.get_task( ExceptionSubmissionTask, main_kwargs=self.main_kwargs) subscriber = RecordingSubscriber() self.call_args.subscribers.append(subscriber) # Add the done callback to the callbacks to be invoked when the # transfer is done. done_callbacks = get_callbacks(self.transfer_future, 'done') for done_callback in done_callbacks: self.transfer_coordinator.add_done_callback(done_callback) submission_task() # Make sure the task failed to start self.assertEqual(self.transfer_coordinator.status, 'failed') # Make sure the on_done callback of the subscriber is called only once. self.assertEqual( subscriber.on_done_calls, [{'future': self.transfer_future}]) def test_done_callbacks_only_ran_once_on_exception(self): # We want to be able to handle the case where the final task completes # and anounces done but there is an error in the submission task # which will cause it to need to anounce done as well. In this case, # we do not want the failure cleanups to be invoked more than once. final_task = self.get_task(FailureTask, is_final=True) self.main_kwargs['executor'] = self.executor self.main_kwargs['tasks_to_submit'] = [final_task] submission_task = self.get_task( ExceptionSubmissionTask, main_kwargs=self.main_kwargs) # Add the callback to the callbacks to be invoked when the # transfer fails. invocations_of_cleanup = [] cleanup_callback = FunctionContainer( invocations_of_cleanup.append, 'cleanup happened') self.transfer_coordinator.add_failure_cleanup(cleanup_callback) submission_task() # Make sure the task failed to start self.assertEqual(self.transfer_coordinator.status, 'failed') # Make sure the cleanup was called only onece. self.assertEqual(invocations_of_cleanup, ['cleanup happened']) def test_handles_cleanups_submitted_in_other_tasks(self): invocations_of_cleanup = [] event = Event() cleanup_callback = FunctionContainer( invocations_of_cleanup.append, 'cleanup happened') # We want the cleanup to be added in the execution of the task and # still be executed by the submission task when it fails. task = self.get_task( SuccessTask, main_kwargs={ 'callbacks': [event.set], 'failure_cleanups': [cleanup_callback] } ) self.main_kwargs['executor'] = self.executor self.main_kwargs['tasks_to_submit'] = [task] self.main_kwargs['additional_callbacks'] = [event.wait] submission_task = self.get_task( ExceptionSubmissionTask, main_kwargs=self.main_kwargs) submission_task() self.assertEqual(self.transfer_coordinator.status, 'failed') # Make sure the cleanup was called even though the callback got # added in a completely different task. self.assertEqual(invocations_of_cleanup, ['cleanup happened']) def test_waits_for_tasks_submitted_by_other_tasks_on_exception(self): # In this test, we want to make sure that any tasks that may be # submitted in another task complete before we start performing # cleanups. # # This is tested by doing the following: # # ExecutionSubmissionTask # | # +--submits-->SubmitMoreTasksTask # | # +--submits-->SuccessTask # | # +-->sleeps-->adds failure cleanup # # In the end, the failure cleanup of the SuccessTask should be ran # when the ExecutionSubmissionTask fails. If the # ExeceptionSubmissionTask did not run the failure cleanup it is most # likely that it did not wait for the SuccessTask to complete, which # it needs to because the ExeceptionSubmissionTask does not know # what failure cleanups it needs to run until all spawned tasks have # completed. invocations_of_cleanup = [] event = Event() cleanup_callback = FunctionContainer( invocations_of_cleanup.append, 'cleanup happened') cleanup_task = self.get_task( SuccessTask, main_kwargs={ 'callbacks': [event.set], 'failure_cleanups': [cleanup_callback] } ) task_for_submitting_cleanup_task = self.get_task( SubmitMoreTasksTask, main_kwargs={ 'executor': self.executor, 'tasks_to_submit': [cleanup_task] } ) self.main_kwargs['executor'] = self.executor self.main_kwargs['tasks_to_submit'] = [ task_for_submitting_cleanup_task] self.main_kwargs['additional_callbacks'] = [event.wait] submission_task = self.get_task( ExceptionSubmissionTask, main_kwargs=self.main_kwargs) submission_task() self.assertEqual(self.transfer_coordinator.status, 'failed') self.assertEqual(invocations_of_cleanup, ['cleanup happened']) def test_submission_task_announces_done_if_cancelled_before_main(self): invocations_of_done = [] done_callback = FunctionContainer( invocations_of_done.append, 'done announced') self.transfer_coordinator.add_done_callback(done_callback) self.transfer_coordinator.cancel() submission_task = self.get_task( NOOPSubmissionTask, main_kwargs=self.main_kwargs) submission_task() # Because the submission task was cancelled before being run # it did not submit any extra tasks so a result it is responsible # for making sure it announces done as nothing else will. self.assertEqual(invocations_of_done, ['done announced']) class TestTask(unittest.TestCase): def setUp(self): self.transfer_id = 1 self.transfer_coordinator = TransferCoordinator( transfer_id=self.transfer_id) def test_repr(self): main_kwargs = { 'bucket': 'mybucket', 'param_to_not_include': 'foo' } task = ReturnKwargsTask( self.transfer_coordinator, main_kwargs=main_kwargs) # The repr should not include the other parameter because it is not # a desired parameter to include. self.assertEqual( repr(task), 'ReturnKwargsTask(transfer_id=%s, %s)' % ( self.transfer_id, {'bucket': 'mybucket'}) ) def test_transfer_id(self): task = SuccessTask(self.transfer_coordinator) # Make sure that the id is the one provided to the id associated # to the transfer coordinator. self.assertEqual(task.transfer_id, self.transfer_id) def test_context_status_transitioning_success(self): # The status should be set to running. self.transfer_coordinator.set_status_to_running() self.assertEqual(self.transfer_coordinator.status, 'running') # If a task is called, the status still should be running. SuccessTask(self.transfer_coordinator)() self.assertEqual(self.transfer_coordinator.status, 'running') # Once the final task is called, the status should be set to success. SuccessTask(self.transfer_coordinator, is_final=True)() self.assertEqual(self.transfer_coordinator.status, 'success') def test_context_status_transitioning_failed(self): self.transfer_coordinator.set_status_to_running() SuccessTask(self.transfer_coordinator)() self.assertEqual(self.transfer_coordinator.status, 'running') # A failure task should result in the failed status FailureTask(self.transfer_coordinator)() self.assertEqual(self.transfer_coordinator.status, 'failed') # Even if the final task comes in and succeeds, it should stay failed. SuccessTask(self.transfer_coordinator, is_final=True)() self.assertEqual(self.transfer_coordinator.status, 'failed') def test_result_setting_for_success(self): override_return = 'foo' SuccessTask(self.transfer_coordinator)() SuccessTask(self.transfer_coordinator, main_kwargs={ 'return_value': override_return}, is_final=True)() # The return value for the transfer future should be of the final # task. self.assertEqual(self.transfer_coordinator.result(), override_return) def test_result_setting_for_error(self): FailureTask(self.transfer_coordinator)() # If another failure comes in, the result should still throw the # original exception when result() is eventually called. FailureTask(self.transfer_coordinator, main_kwargs={ 'exception': Exception})() # Even if a success task comes along, the result of the future # should be the original exception SuccessTask(self.transfer_coordinator, is_final=True)() with self.assertRaises(TaskFailureException): self.transfer_coordinator.result() def test_done_callbacks_success(self): callback_results = [] SuccessTask(self.transfer_coordinator, done_callbacks=[ partial(callback_results.append, 'first'), partial(callback_results.append, 'second') ])() # For successful tasks, the done callbacks should get called. self.assertEqual(callback_results, ['first', 'second']) def test_done_callbacks_failure(self): callback_results = [] FailureTask(self.transfer_coordinator, done_callbacks=[ partial(callback_results.append, 'first'), partial(callback_results.append, 'second') ])() # For even failed tasks, the done callbacks should get called. self.assertEqual(callback_results, ['first', 'second']) # Callbacks should continue to be called even after a related failure SuccessTask(self.transfer_coordinator, done_callbacks=[ partial(callback_results.append, 'third'), partial(callback_results.append, 'fourth') ])() self.assertEqual( callback_results, ['first', 'second', 'third', 'fourth']) def test_failure_cleanups_on_failure(self): callback_results = [] self.transfer_coordinator.add_failure_cleanup( callback_results.append, 'first') self.transfer_coordinator.add_failure_cleanup( callback_results.append, 'second') FailureTask(self.transfer_coordinator)() # The failure callbacks should have not been called yet because it # is not the last task self.assertEqual(callback_results, []) # Now the failure callbacks should get called. SuccessTask(self.transfer_coordinator, is_final=True)() self.assertEqual(callback_results, ['first', 'second']) def test_no_failure_cleanups_on_success(self): callback_results = [] self.transfer_coordinator.add_failure_cleanup( callback_results.append, 'first') self.transfer_coordinator.add_failure_cleanup( callback_results.append, 'second') SuccessTask(self.transfer_coordinator, is_final=True)() # The failure cleanups should not have been called because no task # failed for the transfer context. self.assertEqual(callback_results, []) def test_passing_main_kwargs(self): main_kwargs = {'foo': 'bar', 'baz': 'biz'} ReturnKwargsTask( self.transfer_coordinator, main_kwargs=main_kwargs, is_final=True)() # The kwargs should have been passed to the main() self.assertEqual(self.transfer_coordinator.result(), main_kwargs) def test_passing_pending_kwargs_single_futures(self): pending_kwargs = {} ref_main_kwargs = {'foo': 'bar', 'baz': 'biz'} # Pass some tasks to an executor with futures.ThreadPoolExecutor(1) as executor: pending_kwargs['foo'] = executor.submit( SuccessTask( self.transfer_coordinator, main_kwargs={'return_value': ref_main_kwargs['foo']} ) ) pending_kwargs['baz'] = executor.submit( SuccessTask( self.transfer_coordinator, main_kwargs={'return_value': ref_main_kwargs['baz']} ) ) # Create a task that depends on the tasks passed to the executor ReturnKwargsTask( self.transfer_coordinator, pending_main_kwargs=pending_kwargs, is_final=True)() # The result should have the pending keyword arg values flushed # out. self.assertEqual(self.transfer_coordinator.result(), ref_main_kwargs) def test_passing_pending_kwargs_list_of_futures(self): pending_kwargs = {} ref_main_kwargs = {'foo': ['first', 'second']} # Pass some tasks to an executor with futures.ThreadPoolExecutor(1) as executor: first_future = executor.submit( SuccessTask( self.transfer_coordinator, main_kwargs={'return_value': ref_main_kwargs['foo'][0]} ) ) second_future = executor.submit( SuccessTask( self.transfer_coordinator, main_kwargs={'return_value': ref_main_kwargs['foo'][1]} ) ) # Make the pending keyword arg value a list pending_kwargs['foo'] = [first_future, second_future] # Create a task that depends on the tasks passed to the executor ReturnKwargsTask( self.transfer_coordinator, pending_main_kwargs=pending_kwargs, is_final=True)() # The result should have the pending keyword arg values flushed # out in the expected order. self.assertEqual(self.transfer_coordinator.result(), ref_main_kwargs) def test_passing_pending_and_non_pending_kwargs(self): main_kwargs = {'nonpending_value': 'foo'} pending_kwargs = {} ref_main_kwargs = { 'nonpending_value': 'foo', 'pending_value': 'bar', 'pending_list': ['first', 'second'] } # Create the pending tasks with futures.ThreadPoolExecutor(1) as executor: pending_kwargs['pending_value'] = executor.submit( SuccessTask( self.transfer_coordinator, main_kwargs={'return_value': ref_main_kwargs['pending_value']} ) ) first_future = executor.submit( SuccessTask( self.transfer_coordinator, main_kwargs={'return_value': ref_main_kwargs['pending_list'][0]} ) ) second_future = executor.submit( SuccessTask( self.transfer_coordinator, main_kwargs={'return_value': ref_main_kwargs['pending_list'][1]} ) ) # Make the pending keyword arg value a list pending_kwargs['pending_list'] = [first_future, second_future] # Create a task that depends on the tasks passed to the executor # and just regular nonpending kwargs. ReturnKwargsTask( self.transfer_coordinator, main_kwargs=main_kwargs, pending_main_kwargs=pending_kwargs, is_final=True)() # The result should have all of the kwargs (both pending and # nonpending) self.assertEqual(self.transfer_coordinator.result(), ref_main_kwargs) def test_single_failed_pending_future(self): pending_kwargs = {} # Pass some tasks to an executor. Make one successful and the other # a failure. with futures.ThreadPoolExecutor(1) as executor: pending_kwargs['foo'] = executor.submit( SuccessTask( self.transfer_coordinator, main_kwargs={'return_value': 'bar'} ) ) pending_kwargs['baz'] = executor.submit( FailureTask(self.transfer_coordinator)) # Create a task that depends on the tasks passed to the executor ReturnKwargsTask( self.transfer_coordinator, pending_main_kwargs=pending_kwargs, is_final=True)() # The end result should raise the exception from the initial # pending future value with self.assertRaises(TaskFailureException): self.transfer_coordinator.result() def test_single_failed_pending_future_in_list(self): pending_kwargs = {} # Pass some tasks to an executor. Make one successful and the other # a failure. with futures.ThreadPoolExecutor(1) as executor: first_future = executor.submit( SuccessTask( self.transfer_coordinator, main_kwargs={'return_value': 'bar'} ) ) second_future = executor.submit( FailureTask(self.transfer_coordinator)) pending_kwargs['pending_list'] = [first_future, second_future] # Create a task that depends on the tasks passed to the executor ReturnKwargsTask( self.transfer_coordinator, pending_main_kwargs=pending_kwargs, is_final=True)() # The end result should raise the exception from the initial # pending future value in the list with self.assertRaises(TaskFailureException): self.transfer_coordinator.result() class BaseMultipartTaskTest(BaseTaskTest): def setUp(self): super(BaseMultipartTaskTest, self).setUp() self.bucket = 'mybucket' self.key = 'foo' class TestCreateMultipartUploadTask(BaseMultipartTaskTest): def test_main(self): upload_id = 'foo' extra_args = {'Metadata': {'foo': 'bar'}} response = {'UploadId': upload_id} task = self.get_task( CreateMultipartUploadTask, main_kwargs={ 'client': self.client, 'bucket': self.bucket, 'key': self.key, 'extra_args': extra_args } ) self.stubber.add_response( method='create_multipart_upload', service_response=response, expected_params={ 'Bucket': self.bucket, 'Key': self.key, 'Metadata': {'foo': 'bar'} } ) result_id = task() self.stubber.assert_no_pending_responses() # Ensure the upload id returned is correct self.assertEqual(upload_id, result_id) # Make sure that the abort was added as a cleanup failure self.assertEqual(len(self.transfer_coordinator.failure_cleanups), 1) # Make sure if it is called, it will abort correctly self.stubber.add_response( method='abort_multipart_upload', service_response={}, expected_params={ 'Bucket': self.bucket, 'Key': self.key, 'UploadId': upload_id } ) self.transfer_coordinator.failure_cleanups[0]() self.stubber.assert_no_pending_responses() class TestCompleteMultipartUploadTask(BaseMultipartTaskTest): def test_main(self): upload_id = 'my-id' parts = [{'ETag': 'etag', 'PartNumber': 0}] task = self.get_task( CompleteMultipartUploadTask, main_kwargs={ 'client': self.client, 'bucket': self.bucket, 'key': self.key, 'upload_id': upload_id, 'parts': parts, 'extra_args': {} } ) self.stubber.add_response( method='complete_multipart_upload', service_response={}, expected_params={ 'Bucket': self.bucket, 'Key': self.key, 'UploadId': upload_id, 'MultipartUpload': { 'Parts': parts } } ) task() self.stubber.assert_no_pending_responses() def test_includes_extra_args(self): upload_id = 'my-id' parts = [{'ETag': 'etag', 'PartNumber': 0}] task = self.get_task( CompleteMultipartUploadTask, main_kwargs={ 'client': self.client, 'bucket': self.bucket, 'key': self.key, 'upload_id': upload_id, 'parts': parts, 'extra_args': {'RequestPayer': 'requester'} } ) self.stubber.add_response( method='complete_multipart_upload', service_response={}, expected_params={ 'Bucket': self.bucket, 'Key': self.key, 'UploadId': upload_id, 'MultipartUpload': { 'Parts': parts }, 'RequestPayer': 'requester' } ) task() self.stubber.assert_no_pending_responses() s3transfer-0.1.13/tests/unit/test_upload.py000066400000000000000000000631241324114246300207120ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the 'License'). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the 'license' file accompanying this file. This file is # distributed on an 'AS IS' BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. from __future__ import division import os import tempfile import shutil import math from botocore.stub import ANY from tests import unittest from tests import BaseTaskTest from tests import BaseSubmissionTaskTest from tests import FileSizeProvider from tests import RecordingSubscriber from tests import RecordingExecutor from tests import NonSeekableReader from s3transfer.compat import six from s3transfer.futures import IN_MEMORY_UPLOAD_TAG from s3transfer.manager import TransferConfig from s3transfer.upload import AggregatedProgressCallback from s3transfer.upload import InterruptReader from s3transfer.upload import UploadFilenameInputManager from s3transfer.upload import UploadSeekableInputManager from s3transfer.upload import UploadNonSeekableInputManager from s3transfer.upload import UploadSubmissionTask from s3transfer.upload import PutObjectTask from s3transfer.upload import UploadPartTask from s3transfer.utils import CallArgs from s3transfer.utils import OSUtils from s3transfer.utils import MIN_UPLOAD_CHUNKSIZE class InterruptionError(Exception): pass class OSUtilsExceptionOnFileSize(OSUtils): def get_file_size(self, filename): raise AssertionError( "The file %s should not have been stated" % filename) class BaseUploadTest(BaseTaskTest): def setUp(self): super(BaseUploadTest, self).setUp() self.bucket = 'mybucket' self.key = 'foo' self.osutil = OSUtils() self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'myfile') self.content = b'my content' self.subscribers = [] with open(self.filename, 'wb') as f: f.write(self.content) # A list to keep track of all of the bodies sent over the wire # and their order. self.sent_bodies = [] self.client.meta.events.register( 'before-parameter-build.s3.*', self.collect_body) def tearDown(self): super(BaseUploadTest, self).tearDown() shutil.rmtree(self.tempdir) def collect_body(self, params, **kwargs): if 'Body' in params: self.sent_bodies.append(params['Body'].read()) class TestAggregatedProgressCallback(unittest.TestCase): def setUp(self): self.aggregated_amounts = [] self.threshold = 3 self.aggregated_progress_callback = AggregatedProgressCallback( [self.callback], self.threshold) def callback(self, bytes_transferred): self.aggregated_amounts.append(bytes_transferred) def test_under_threshold(self): one_under_threshold_amount = self.threshold - 1 self.aggregated_progress_callback(one_under_threshold_amount) self.assertEqual(self.aggregated_amounts, []) self.aggregated_progress_callback(1) self.assertEqual(self.aggregated_amounts, [self.threshold]) def test_at_threshold(self): self.aggregated_progress_callback(self.threshold) self.assertEqual(self.aggregated_amounts, [self.threshold]) def test_over_threshold(self): over_threshold_amount = self.threshold + 1 self.aggregated_progress_callback(over_threshold_amount) self.assertEqual(self.aggregated_amounts, [over_threshold_amount]) def test_flush(self): under_threshold_amount = self.threshold - 1 self.aggregated_progress_callback(under_threshold_amount) self.assertEqual(self.aggregated_amounts, []) self.aggregated_progress_callback.flush() self.assertEqual(self.aggregated_amounts, [under_threshold_amount]) def test_flush_with_nothing_to_flush(self): under_threshold_amount = self.threshold - 1 self.aggregated_progress_callback(under_threshold_amount) self.assertEqual(self.aggregated_amounts, []) self.aggregated_progress_callback.flush() self.assertEqual(self.aggregated_amounts, [under_threshold_amount]) # Flushing again should do nothing as it was just flushed self.aggregated_progress_callback.flush() self.assertEqual(self.aggregated_amounts, [under_threshold_amount]) class TestInterruptReader(BaseUploadTest): def test_read_raises_exception(self): with open(self.filename, 'rb') as f: reader = InterruptReader(f, self.transfer_coordinator) # Read some bytes to show it can be read. self.assertEqual(reader.read(1), self.content[0:1]) # Then set an exception in the transfer coordinator self.transfer_coordinator.set_exception(InterruptionError()) # The next read should have the exception propograte with self.assertRaises(InterruptionError): reader.read() def test_seek(self): with open(self.filename, 'rb') as f: reader = InterruptReader(f, self.transfer_coordinator) # Ensure it can seek correctly reader.seek(1) self.assertEqual(reader.read(1), self.content[1:2]) def test_tell(self): with open(self.filename, 'rb') as f: reader = InterruptReader(f, self.transfer_coordinator) # Ensure it can tell correctly reader.seek(1) self.assertEqual(reader.tell(), 1) class BaseUploadInputManagerTest(BaseUploadTest): def setUp(self): super(BaseUploadInputManagerTest, self).setUp() self.osutil = OSUtils() self.config = TransferConfig() self.recording_subscriber = RecordingSubscriber() self.subscribers.append(self.recording_subscriber) def _get_expected_body_for_part(self, part_number): # A helper method for retrieving the expected body for a specific # part number of the data total_size = len(self.content) chunk_size = self.config.multipart_chunksize start_index = (part_number - 1) * chunk_size end_index = part_number * chunk_size if end_index >= total_size: return self.content[start_index:] return self.content[start_index:end_index] class TestUploadFilenameInputManager(BaseUploadInputManagerTest): def setUp(self): super(TestUploadFilenameInputManager, self).setUp() self.upload_input_manager = UploadFilenameInputManager( self.osutil, self.transfer_coordinator) self.call_args = CallArgs( fileobj=self.filename, subscribers=self.subscribers) self.future = self.get_transfer_future(self.call_args) def test_is_compatible(self): self.assertTrue( self.upload_input_manager.is_compatible( self.future.meta.call_args.fileobj) ) def test_stores_bodies_in_memory_put_object(self): self.assertFalse( self.upload_input_manager.stores_body_in_memory('put_object')) def test_stores_bodies_in_memory_upload_part(self): self.assertFalse( self.upload_input_manager.stores_body_in_memory('upload_part')) def test_provide_transfer_size(self): self.upload_input_manager.provide_transfer_size(self.future) # The provided file size should be equal to size of the contents of # the file. self.assertEqual(self.future.meta.size, len(self.content)) def test_requires_multipart_upload(self): self.future.meta.provide_transfer_size(len(self.content)) # With the default multipart threshold, the length of the content # should be smaller than the threshold thus not requiring a multipart # transfer. self.assertFalse( self.upload_input_manager.requires_multipart_upload( self.future, self.config)) # Decreasing the threshold to that of the length of the content of # the file should trigger the need for a multipart upload. self.config.multipart_threshold = len(self.content) self.assertTrue( self.upload_input_manager.requires_multipart_upload( self.future, self.config)) def test_get_put_object_body(self): self.future.meta.provide_transfer_size(len(self.content)) read_file_chunk = self.upload_input_manager.get_put_object_body( self.future) read_file_chunk.enable_callback() # The file-like object provided back should be the same as the content # of the file. with read_file_chunk: self.assertEqual(read_file_chunk.read(), self.content) # The file-like object should also have been wrapped with the # on_queued callbacks to track the amount of bytes being transferred. self.assertEqual( self.recording_subscriber.calculate_bytes_seen(), len(self.content)) def test_get_put_object_body_is_interruptable(self): self.future.meta.provide_transfer_size(len(self.content)) read_file_chunk = self.upload_input_manager.get_put_object_body( self.future) # Set an exception in the transfer coordinator self.transfer_coordinator.set_exception(InterruptionError) # Ensure the returned read file chunk can be interrupted with that # error. with self.assertRaises(InterruptionError): read_file_chunk.read() def test_yield_upload_part_bodies(self): # Adjust the chunk size to something more grainular for testing. self.config.multipart_chunksize = 4 self.future.meta.provide_transfer_size(len(self.content)) # Get an iterator that will yield all of the bodies and their # respective part number. part_iterator = self.upload_input_manager.yield_upload_part_bodies( self.future, self.config.multipart_chunksize) expected_part_number = 1 for part_number, read_file_chunk in part_iterator: # Ensure that the part number is as expected self.assertEqual(part_number, expected_part_number) read_file_chunk.enable_callback() # Ensure that the body is correct for that part. with read_file_chunk: self.assertEqual( read_file_chunk.read(), self._get_expected_body_for_part(part_number)) expected_part_number += 1 # All of the file-like object should also have been wrapped with the # on_queued callbacks to track the amount of bytes being transferred. self.assertEqual( self.recording_subscriber.calculate_bytes_seen(), len(self.content)) def test_yield_upload_part_bodies_are_interruptable(self): # Adjust the chunk size to something more grainular for testing. self.config.multipart_chunksize = 4 self.future.meta.provide_transfer_size(len(self.content)) # Get an iterator that will yield all of the bodies and their # respective part number. part_iterator = self.upload_input_manager.yield_upload_part_bodies( self.future, self.config.multipart_chunksize) # Set an exception in the transfer coordinator self.transfer_coordinator.set_exception(InterruptionError) for _, read_file_chunk in part_iterator: # Ensure that each read file chunk yielded can be interrupted # with that error. with self.assertRaises(InterruptionError): read_file_chunk.read() class TestUploadSeekableInputManager(TestUploadFilenameInputManager): def setUp(self): super(TestUploadSeekableInputManager, self).setUp() self.upload_input_manager = UploadSeekableInputManager( self.osutil, self.transfer_coordinator) self.fileobj = open(self.filename, 'rb') self.call_args = CallArgs( fileobj=self.fileobj, subscribers=self.subscribers) self.future = self.get_transfer_future(self.call_args) def tearDown(self): self.fileobj.close() super(TestUploadSeekableInputManager, self).tearDown() def test_is_compatible_bytes_io(self): self.assertTrue( self.upload_input_manager.is_compatible(six.BytesIO())) def test_not_compatible_for_non_filelike_obj(self): self.assertFalse(self.upload_input_manager.is_compatible(object())) def test_stores_bodies_in_memory_upload_part(self): self.assertTrue( self.upload_input_manager.stores_body_in_memory('upload_part')) def test_get_put_object_body(self): start_pos = 3 self.fileobj.seek(start_pos) adjusted_size = len(self.content) - start_pos self.future.meta.provide_transfer_size(adjusted_size) read_file_chunk = self.upload_input_manager.get_put_object_body( self.future) read_file_chunk.enable_callback() # The fact that the file was seeked to start should be taken into # account in length and content for the read file chunk. with read_file_chunk: self.assertEqual(len(read_file_chunk), adjusted_size) self.assertEqual(read_file_chunk.read(), self.content[start_pos:]) self.assertEqual( self.recording_subscriber.calculate_bytes_seen(), adjusted_size) class TestUploadNonSeekableInputManager(TestUploadFilenameInputManager): def setUp(self): super(TestUploadNonSeekableInputManager, self).setUp() self.upload_input_manager = UploadNonSeekableInputManager( self.osutil, self.transfer_coordinator) self.fileobj = NonSeekableReader(self.content) self.call_args = CallArgs( fileobj=self.fileobj, subscribers=self.subscribers) self.future = self.get_transfer_future(self.call_args) def assert_multipart_parts(self): """ Asserts that the input manager will generate a multipart upload and that each part is in order and the correct size. """ # Assert that a multipart upload is required. self.assertTrue( self.upload_input_manager.requires_multipart_upload( self.future, self.config)) # Get a list of all the parts that would be sent. parts = list( self.upload_input_manager.yield_upload_part_bodies( self.future, self.config.multipart_chunksize)) # Assert that the actual number of parts is what we would expect # based on the configuration. size = self.config.multipart_chunksize num_parts = math.ceil(len(self.content) / size) self.assertEqual(len(parts), num_parts) # Run for every part but the last part. for i, part in enumerate(parts[:-1]): # Assert the part number is correct. self.assertEqual(part[0], i + 1) # Assert the part contains the right amount of data. data = part[1].read() self.assertEqual(len(data), size) # Assert that the last part is the correct size. expected_final_size = len(self.content) - ((num_parts - 1) * size) final_part = parts[-1] self.assertEqual(len(final_part[1].read()), expected_final_size) # Assert that the last part has the correct part number. self.assertEqual(final_part[0], len(parts)) def test_provide_transfer_size(self): self.upload_input_manager.provide_transfer_size(self.future) # There is no way to get the size without reading the entire body. self.assertEqual(self.future.meta.size, None) def test_stores_bodies_in_memory_upload_part(self): self.assertTrue( self.upload_input_manager.stores_body_in_memory('upload_part')) def test_stores_bodies_in_memory_put_object(self): self.assertTrue( self.upload_input_manager.stores_body_in_memory('put_object')) def test_initial_data_parts_threshold_lesser(self): # threshold < size self.config.multipart_chunksize = 4 self.config.multipart_threshold = 2 self.assert_multipart_parts() def test_initial_data_parts_threshold_equal(self): # threshold == size self.config.multipart_chunksize = 4 self.config.multipart_threshold = 4 self.assert_multipart_parts() def test_initial_data_parts_threshold_greater(self): # threshold > size self.config.multipart_chunksize = 4 self.config.multipart_threshold = 8 self.assert_multipart_parts() class TestUploadSubmissionTask(BaseSubmissionTaskTest): def setUp(self): super(TestUploadSubmissionTask, self).setUp() self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'myfile') self.content = b'0' * (MIN_UPLOAD_CHUNKSIZE * 3) self.config.multipart_chunksize = MIN_UPLOAD_CHUNKSIZE self.config.multipart_threshold = MIN_UPLOAD_CHUNKSIZE * 5 with open(self.filename, 'wb') as f: f.write(self.content) self.bucket = 'mybucket' self.key = 'mykey' self.extra_args = {} self.subscribers = [] # A list to keep track of all of the bodies sent over the wire # and their order. self.sent_bodies = [] self.client.meta.events.register( 'before-parameter-build.s3.*', self.collect_body) self.call_args = self.get_call_args() self.transfer_future = self.get_transfer_future(self.call_args) self.submission_main_kwargs = { 'client': self.client, 'config': self.config, 'osutil': self.osutil, 'request_executor': self.executor, 'transfer_future': self.transfer_future } self.submission_task = self.get_task( UploadSubmissionTask, main_kwargs=self.submission_main_kwargs) def tearDown(self): super(TestUploadSubmissionTask, self).tearDown() shutil.rmtree(self.tempdir) def collect_body(self, params, **kwargs): if 'Body' in params: self.sent_bodies.append(params['Body'].read()) def get_call_args(self, **kwargs): default_call_args = { 'fileobj': self.filename, 'bucket': self.bucket, 'key': self.key, 'extra_args': self.extra_args, 'subscribers': self.subscribers } default_call_args.update(kwargs) return CallArgs(**default_call_args) def add_multipart_upload_stubbed_responses(self): self.stubber.add_response( method='create_multipart_upload', service_response={'UploadId': 'my-id'} ) self.stubber.add_response( method='upload_part', service_response={'ETag': 'etag-1'} ) self.stubber.add_response( method='upload_part', service_response={'ETag': 'etag-2'} ) self.stubber.add_response( method='upload_part', service_response={'ETag': 'etag-3'} ) self.stubber.add_response( method='complete_multipart_upload', service_response={} ) def wrap_executor_in_recorder(self): self.executor = RecordingExecutor(self.executor) self.submission_main_kwargs['request_executor'] = self.executor def use_fileobj_in_call_args(self, fileobj): self.call_args = self.get_call_args(fileobj=fileobj) self.transfer_future = self.get_transfer_future(self.call_args) self.submission_main_kwargs['transfer_future'] = self.transfer_future def assert_tag_value_for_put_object(self, tag_value): self.assertEqual( self.executor.submissions[0]['tag'], tag_value) def assert_tag_value_for_upload_parts(self, tag_value): for submission in self.executor.submissions[1:-1]: self.assertEqual( submission['tag'], tag_value) def test_provide_file_size_on_put(self): self.call_args.subscribers.append(FileSizeProvider(len(self.content))) self.stubber.add_response( method='put_object', service_response={}, expected_params={ 'Body': ANY, 'Bucket': self.bucket, 'Key': self.key } ) # With this submitter, it will fail to stat the file if a transfer # size is not provided. self.submission_main_kwargs['osutil'] = OSUtilsExceptionOnFileSize() self.submission_task = self.get_task( UploadSubmissionTask, main_kwargs=self.submission_main_kwargs) self.submission_task() self.transfer_future.result() self.stubber.assert_no_pending_responses() self.assertEqual(self.sent_bodies, [self.content]) def test_submits_no_tag_for_put_object_filename(self): self.wrap_executor_in_recorder() self.stubber.add_response('put_object', {}) self.submission_task = self.get_task( UploadSubmissionTask, main_kwargs=self.submission_main_kwargs) self.submission_task() self.transfer_future.result() self.stubber.assert_no_pending_responses() # Make sure no tag to limit that task specifically was not associated # to that task submission. self.assert_tag_value_for_put_object(None) def test_submits_no_tag_for_multipart_filename(self): self.wrap_executor_in_recorder() # Set up for a multipart upload. self.add_multipart_upload_stubbed_responses() self.config.multipart_threshold = 1 self.submission_task = self.get_task( UploadSubmissionTask, main_kwargs=self.submission_main_kwargs) self.submission_task() self.transfer_future.result() self.stubber.assert_no_pending_responses() # Make sure no tag to limit any of the upload part tasks were # were associated when submitted to the executor self.assert_tag_value_for_upload_parts(None) def test_submits_no_tag_for_put_object_fileobj(self): self.wrap_executor_in_recorder() self.stubber.add_response('put_object', {}) with open(self.filename, 'rb') as f: self.use_fileobj_in_call_args(f) self.submission_task = self.get_task( UploadSubmissionTask, main_kwargs=self.submission_main_kwargs) self.submission_task() self.transfer_future.result() self.stubber.assert_no_pending_responses() # Make sure no tag to limit that task specifically was not associated # to that task submission. self.assert_tag_value_for_put_object(None) def test_submits_tag_for_multipart_fileobj(self): self.wrap_executor_in_recorder() # Set up for a multipart upload. self.add_multipart_upload_stubbed_responses() self.config.multipart_threshold = 1 with open(self.filename, 'rb') as f: self.use_fileobj_in_call_args(f) self.submission_task = self.get_task( UploadSubmissionTask, main_kwargs=self.submission_main_kwargs) self.submission_task() self.transfer_future.result() self.stubber.assert_no_pending_responses() # Make sure tags to limit all of the upload part tasks were # were associated when submitted to the executor as these tasks will # have chunks of data stored with them in memory. self.assert_tag_value_for_upload_parts(IN_MEMORY_UPLOAD_TAG) class TestPutObjectTask(BaseUploadTest): def test_main(self): extra_args = {'Metadata': {'foo': 'bar'}} with open(self.filename, 'rb') as fileobj: task = self.get_task( PutObjectTask, main_kwargs={ 'client': self.client, 'fileobj': fileobj, 'bucket': self.bucket, 'key': self.key, 'extra_args': extra_args } ) self.stubber.add_response( method='put_object', service_response={}, expected_params={ 'Body': ANY, 'Bucket': self.bucket, 'Key': self.key, 'Metadata': {'foo': 'bar'} } ) task() self.stubber.assert_no_pending_responses() self.assertEqual(self.sent_bodies, [self.content]) class TestUploadPartTask(BaseUploadTest): def test_main(self): extra_args = {'RequestPayer': 'requester'} upload_id = 'my-id' part_number = 1 etag = 'foo' with open(self.filename, 'rb') as fileobj: task = self.get_task( UploadPartTask, main_kwargs={ 'client': self.client, 'fileobj': fileobj, 'bucket': self.bucket, 'key': self.key, 'upload_id': upload_id, 'part_number': part_number, 'extra_args': extra_args } ) self.stubber.add_response( method='upload_part', service_response={'ETag': etag}, expected_params={ 'Body': ANY, 'Bucket': self.bucket, 'Key': self.key, 'UploadId': upload_id, 'PartNumber': part_number, 'RequestPayer': 'requester' } ) rval = task() self.stubber.assert_no_pending_responses() self.assertEqual(rval, {'ETag': etag, 'PartNumber': part_number}) self.assertEqual(self.sent_bodies, [self.content]) s3transfer-0.1.13/tests/unit/test_utils.py000066400000000000000000001021401324114246300205560ustar00rootroot00000000000000# Copyright 2016 Amazon.com, Inc. or its affiliates. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"). You # may not use this file except in compliance with the License. A copy of # the License is located at # # http://aws.amazon.com/apache2.0/ # # or in the "license" file accompanying this file. This file is # distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF # ANY KIND, either express or implied. See the License for the specific # language governing permissions and limitations under the License. import os.path import shutil import tempfile import threading import random import time import io import mock from tests import unittest from tests import RecordingSubscriber from tests import NonSeekableWriter from s3transfer.compat import six from s3transfer.futures import TransferFuture from s3transfer.futures import TransferMeta from s3transfer.utils import get_callbacks from s3transfer.utils import random_file_extension from s3transfer.utils import invoke_progress_callbacks from s3transfer.utils import calculate_range_parameter from s3transfer.utils import get_filtered_dict from s3transfer.utils import CallArgs from s3transfer.utils import FunctionContainer from s3transfer.utils import CountCallbackInvoker from s3transfer.utils import OSUtils from s3transfer.utils import DeferredOpenFile from s3transfer.utils import ReadFileChunk from s3transfer.utils import StreamReaderProgress from s3transfer.utils import TaskSemaphore from s3transfer.utils import SlidingWindowSemaphore from s3transfer.utils import NoResourcesAvailable from s3transfer.utils import ChunksizeAdjuster from s3transfer.utils import MIN_UPLOAD_CHUNKSIZE, MAX_SINGLE_UPLOAD_SIZE from s3transfer.utils import MAX_PARTS class TestGetCallbacks(unittest.TestCase): def setUp(self): self.subscriber = RecordingSubscriber() self.second_subscriber = RecordingSubscriber() self.call_args = CallArgs(subscribers=[ self.subscriber, self.second_subscriber] ) self.transfer_meta = TransferMeta(self.call_args) self.transfer_future = TransferFuture(self.transfer_meta) def test_get_callbacks(self): callbacks = get_callbacks(self.transfer_future, 'queued') # Make sure two callbacks were added as both subscribers had # an on_queued method. self.assertEqual(len(callbacks), 2) # Ensure that the callback was injected with the future by calling # one of them and checking that the future was used in the call. callbacks[0]() self.assertEqual( self.subscriber.on_queued_calls, [{'future': self.transfer_future}] ) def test_get_callbacks_for_missing_type(self): callbacks = get_callbacks(self.transfer_future, 'fake_state') # There should be no callbacks as the subscribers will not have the # on_fake_state method self.assertEqual(len(callbacks), 0) class TestGetFilteredDict(unittest.TestCase): def test_get_filtered_dict(self): original = { 'Include': 'IncludeValue', 'NotInlude': 'NotIncludeValue' } whitelist = ['Include'] self.assertEqual( get_filtered_dict(original, whitelist), {'Include': 'IncludeValue'} ) class TestCallArgs(unittest.TestCase): def test_call_args(self): call_args = CallArgs(foo='bar', biz='baz') self.assertEqual(call_args.foo, 'bar') self.assertEqual(call_args.biz, 'baz') class TestFunctionContainer(unittest.TestCase): def get_args_kwargs(self, *args, **kwargs): return args, kwargs def test_call(self): func_container = FunctionContainer( self.get_args_kwargs, 'foo', bar='baz') self.assertEqual(func_container(), (('foo',), {'bar': 'baz'})) def test_repr(self): func_container = FunctionContainer( self.get_args_kwargs, 'foo', bar='baz') self.assertEqual( str(func_container), 'Function: %s with args %s and kwargs %s' % ( self.get_args_kwargs, ('foo',), {'bar': 'baz'})) class TestCountCallbackInvoker(unittest.TestCase): def invoke_callback(self): self.ref_results.append('callback invoked') def assert_callback_invoked(self): self.assertEqual(self.ref_results, ['callback invoked']) def assert_callback_not_invoked(self): self.assertEqual(self.ref_results, []) def setUp(self): self.ref_results = [] self.invoker = CountCallbackInvoker(self.invoke_callback) def test_increment(self): self.invoker.increment() self.assertEqual(self.invoker.current_count, 1) def test_decrement(self): self.invoker.increment() self.invoker.increment() self.invoker.decrement() self.assertEqual(self.invoker.current_count, 1) def test_count_cannot_go_below_zero(self): with self.assertRaises(RuntimeError): self.invoker.decrement() def test_callback_invoked_only_once_finalized(self): self.invoker.increment() self.invoker.decrement() self.assert_callback_not_invoked() self.invoker.finalize() # Callback should only be invoked once finalized self.assert_callback_invoked() def test_callback_invoked_after_finalizing_and_count_reaching_zero(self): self.invoker.increment() self.invoker.finalize() # Make sure that it does not get invoked immediately after # finalizing as the count is currently one self.assert_callback_not_invoked() self.invoker.decrement() self.assert_callback_invoked() def test_cannot_increment_after_finalization(self): self.invoker.finalize() with self.assertRaises(RuntimeError): self.invoker.increment() class TestRandomFileExtension(unittest.TestCase): def test_has_proper_length(self): self.assertEqual( len(random_file_extension(num_digits=4)), 4) class TestInvokeProgressCallbacks(unittest.TestCase): def test_invoke_progress_callbacks(self): recording_subscriber = RecordingSubscriber() invoke_progress_callbacks([recording_subscriber.on_progress], 2) self.assertEqual(recording_subscriber.calculate_bytes_seen(), 2) def test_invoke_progress_callbacks_with_no_progress(self): recording_subscriber = RecordingSubscriber() invoke_progress_callbacks([recording_subscriber.on_progress], 0) self.assertEqual(len(recording_subscriber.on_progress_calls), 0) class TestCalculateRangeParameter(unittest.TestCase): def setUp(self): self.part_size = 5 self.part_index = 1 self.num_parts = 3 def test_calculate_range_paramter(self): range_val = calculate_range_parameter( self.part_size, self.part_index, self.num_parts) self.assertEqual(range_val, 'bytes=5-9') def test_last_part_with_no_total_size(self): range_val = calculate_range_parameter( self.part_size, self.part_index, num_parts=2) self.assertEqual(range_val, 'bytes=5-') def test_last_part_with_total_size(self): range_val = calculate_range_parameter( self.part_size, self.part_index, num_parts=2, total_size=8) self.assertEqual(range_val, 'bytes=5-7') class BaseUtilsTest(unittest.TestCase): def setUp(self): self.tempdir = tempfile.mkdtemp() self.filename = os.path.join(self.tempdir, 'foo') self.content = b'abc' with open(self.filename, 'wb') as f: f.write(self.content) self.amounts_seen = [] self.num_close_callback_calls = 0 def tearDown(self): shutil.rmtree(self.tempdir) def callback(self, bytes_transferred): self.amounts_seen.append(bytes_transferred) def close_callback(self): self.num_close_callback_calls += 1 class TestOSUtils(BaseUtilsTest): def test_get_file_size(self): self.assertEqual( OSUtils().get_file_size(self.filename), len(self.content)) def test_open_file_chunk_reader(self): reader = OSUtils().open_file_chunk_reader( self.filename, 0, 3, [self.callback]) # The returned reader should be a ReadFileChunk. self.assertIsInstance(reader, ReadFileChunk) # The content of the reader should be correct. self.assertEqual(reader.read(), self.content) # Callbacks should be disabled depspite being passed in. self.assertEqual(self.amounts_seen, []) def test_open_file_chunk_reader_from_fileobj(self): with open(self.filename, 'rb') as f: reader = OSUtils().open_file_chunk_reader_from_fileobj( f, len(self.content), len(self.content), [self.callback]) # The returned reader should be a ReadFileChunk. self.assertIsInstance(reader, ReadFileChunk) # The content of the reader should be correct. self.assertEqual(reader.read(), self.content) reader.close() # Callbacks should be disabled depspite being passed in. self.assertEqual(self.amounts_seen, []) self.assertEqual(self.num_close_callback_calls, 0) def test_open_file(self): fileobj = OSUtils().open(os.path.join(self.tempdir, 'foo'), 'w') self.assertTrue(hasattr(fileobj, 'write')) def test_remove_file_ignores_errors(self): non_existent_file = os.path.join(self.tempdir, 'no-exist') # This should not exist to start. self.assertFalse(os.path.exists(non_existent_file)) try: OSUtils().remove_file(non_existent_file) except OSError as e: self.fail('OSError should have been caught: %s' % e) def test_remove_file_proxies_remove_file(self): OSUtils().remove_file(self.filename) self.assertFalse(os.path.exists(self.filename)) def test_rename_file(self): new_filename = os.path.join(self.tempdir, 'newfoo') OSUtils().rename_file(self.filename, new_filename) self.assertFalse(os.path.exists(self.filename)) self.assertTrue(os.path.exists(new_filename)) def test_is_special_file_for_normal_file(self): self.assertFalse(OSUtils().is_special_file(self.filename)) def test_is_special_file_for_non_existant_file(self): non_existant_filename = os.path.join(self.tempdir, 'no-exist') self.assertFalse(os.path.exists(non_existant_filename)) self.assertFalse(OSUtils().is_special_file(non_existant_filename)) class TestDeferredOpenFile(BaseUtilsTest): def setUp(self): super(TestDeferredOpenFile, self).setUp() self.filename = os.path.join(self.tempdir, 'foo') self.contents = b'my contents' with open(self.filename, 'wb') as f: f.write(self.contents) self.deferred_open_file = DeferredOpenFile( self.filename, open_function=self.recording_open_function) self.open_call_args = [] def tearDown(self): self.deferred_open_file.close() super(TestDeferredOpenFile, self).tearDown() def recording_open_function(self, filename, mode): self.open_call_args.append((filename, mode)) return open(filename, mode) def open_nonseekable(self, filename, mode): self.open_call_args.append((filename, mode)) return NonSeekableWriter(six.BytesIO(self.content)) def test_instantiation_does_not_open_file(self): DeferredOpenFile( self.filename, open_function=self.recording_open_function) self.assertEqual(len(self.open_call_args), 0) def test_name(self): self.assertEqual(self.deferred_open_file.name, self.filename) def test_read(self): content = self.deferred_open_file.read(2) self.assertEqual(content, self.contents[0:2]) content = self.deferred_open_file.read(2) self.assertEqual(content, self.contents[2:4]) self.assertEqual(len(self.open_call_args), 1) def test_write(self): self.deferred_open_file = DeferredOpenFile( self.filename, mode='wb', open_function=self.recording_open_function) write_content = b'foo' self.deferred_open_file.write(write_content) self.deferred_open_file.write(write_content) self.deferred_open_file.close() # Both of the writes should now be in the file. with open(self.filename, 'rb') as f: self.assertEqual(f.read(), write_content*2) # Open should have only been called once. self.assertEqual(len(self.open_call_args), 1) def test_seek(self): self.deferred_open_file.seek(2) content = self.deferred_open_file.read(2) self.assertEqual(content, self.contents[2:4]) self.assertEqual(len(self.open_call_args), 1) def test_open_does_not_seek_with_zero_start_byte(self): self.deferred_open_file = DeferredOpenFile( self.filename, mode='wb', start_byte=0, open_function=self.open_nonseekable) try: # If this seeks, an UnsupportedOperation error will be raised. self.deferred_open_file.write(b'data') except io.UnsupportedOperation: self.fail('DeferredOpenFile seeked upon opening') def test_open_seeks_with_nonzero_start_byte(self): self.deferred_open_file = DeferredOpenFile( self.filename, mode='wb', start_byte=5, open_function=self.open_nonseekable) # Since a non-seekable file is being opened, calling Seek will raise # an UnsupportedOperation error. with self.assertRaises(io.UnsupportedOperation): self.deferred_open_file.write(b'data') def test_tell(self): self.deferred_open_file.tell() # tell() should not have opened the file if it has not been seeked # or read because we know the start bytes upfront. self.assertEqual(len(self.open_call_args), 0) self.deferred_open_file.seek(2) self.assertEqual(self.deferred_open_file.tell(), 2) self.assertEqual(len(self.open_call_args), 1) def test_open_args(self): self.deferred_open_file = DeferredOpenFile( self.filename, mode='ab+', open_function=self.recording_open_function) # Force an open self.deferred_open_file.write(b'data') self.assertEqual(len(self.open_call_args), 1) self.assertEqual(self.open_call_args[0], (self.filename, 'ab+')) def test_context_handler(self): with self.deferred_open_file: self.assertEqual(len(self.open_call_args), 1) class TestReadFileChunk(BaseUtilsTest): def test_read_entire_chunk(self): filename = os.path.join(self.tempdir, 'foo') with open(filename, 'wb') as f: f.write(b'onetwothreefourfivesixseveneightnineten') chunk = ReadFileChunk.from_filename( filename, start_byte=0, chunk_size=3) self.assertEqual(chunk.read(), b'one') self.assertEqual(chunk.read(), b'') def test_read_with_amount_size(self): filename = os.path.join(self.tempdir, 'foo') with open(filename, 'wb') as f: f.write(b'onetwothreefourfivesixseveneightnineten') chunk = ReadFileChunk.from_filename( filename, start_byte=11, chunk_size=4) self.assertEqual(chunk.read(1), b'f') self.assertEqual(chunk.read(1), b'o') self.assertEqual(chunk.read(1), b'u') self.assertEqual(chunk.read(1), b'r') self.assertEqual(chunk.read(1), b'') def test_reset_stream_emulation(self): filename = os.path.join(self.tempdir, 'foo') with open(filename, 'wb') as f: f.write(b'onetwothreefourfivesixseveneightnineten') chunk = ReadFileChunk.from_filename( filename, start_byte=11, chunk_size=4) self.assertEqual(chunk.read(), b'four') chunk.seek(0) self.assertEqual(chunk.read(), b'four') def test_read_past_end_of_file(self): filename = os.path.join(self.tempdir, 'foo') with open(filename, 'wb') as f: f.write(b'onetwothreefourfivesixseveneightnineten') chunk = ReadFileChunk.from_filename( filename, start_byte=36, chunk_size=100000) self.assertEqual(chunk.read(), b'ten') self.assertEqual(chunk.read(), b'') self.assertEqual(len(chunk), 3) def test_tell_and_seek(self): filename = os.path.join(self.tempdir, 'foo') with open(filename, 'wb') as f: f.write(b'onetwothreefourfivesixseveneightnineten') chunk = ReadFileChunk.from_filename( filename, start_byte=36, chunk_size=100000) self.assertEqual(chunk.tell(), 0) self.assertEqual(chunk.read(), b'ten') self.assertEqual(chunk.tell(), 3) chunk.seek(0) self.assertEqual(chunk.tell(), 0) def test_file_chunk_supports_context_manager(self): filename = os.path.join(self.tempdir, 'foo') with open(filename, 'wb') as f: f.write(b'abc') with ReadFileChunk.from_filename(filename, start_byte=0, chunk_size=2) as chunk: val = chunk.read() self.assertEqual(val, b'ab') def test_iter_is_always_empty(self): # This tests the workaround for the httplib bug (see # the source for more info). filename = os.path.join(self.tempdir, 'foo') open(filename, 'wb').close() chunk = ReadFileChunk.from_filename( filename, start_byte=0, chunk_size=10) self.assertEqual(list(chunk), []) def test_callback_is_invoked_on_read(self): chunk = ReadFileChunk.from_filename( self.filename, start_byte=0, chunk_size=3, callbacks=[self.callback]) chunk.read(1) chunk.read(1) chunk.read(1) self.assertEqual(self.amounts_seen, [1, 1, 1]) def test_all_callbacks_invoked_on_read(self): chunk = ReadFileChunk.from_filename( self.filename, start_byte=0, chunk_size=3, callbacks=[self.callback, self.callback]) chunk.read(1) chunk.read(1) chunk.read(1) # The list should be twice as long because there are two callbacks # recording the amount read. self.assertEqual(self.amounts_seen, [1, 1, 1, 1, 1, 1]) def test_callback_can_be_disabled(self): chunk = ReadFileChunk.from_filename( self.filename, start_byte=0, chunk_size=3, callbacks=[self.callback]) chunk.disable_callback() # Now reading from the ReadFileChunk should not invoke # the callback. chunk.read() self.assertEqual(self.amounts_seen, []) def test_callback_will_also_be_triggered_by_seek(self): chunk = ReadFileChunk.from_filename( self.filename, start_byte=0, chunk_size=3, callbacks=[self.callback]) chunk.read(2) chunk.seek(0) chunk.read(2) chunk.seek(1) chunk.read(2) self.assertEqual(self.amounts_seen, [2, -2, 2, -1, 2]) def test_close_callbacks(self): with open(self.filename) as f: chunk = ReadFileChunk(f, chunk_size=1, full_file_size=3, close_callbacks=[self.close_callback]) chunk.close() self.assertEqual(self.num_close_callback_calls, 1) def test_close_callbacks_when_not_enabled(self): with open(self.filename) as f: chunk = ReadFileChunk(f, chunk_size=1, full_file_size=3, enable_callbacks=False, close_callbacks=[self.close_callback]) chunk.close() self.assertEqual(self.num_close_callback_calls, 0) def test_close_callbacks_when_context_handler_is_used(self): with open(self.filename) as f: with ReadFileChunk(f, chunk_size=1, full_file_size=3, close_callbacks=[self.close_callback]) as chunk: chunk.read(1) self.assertEqual(self.num_close_callback_calls, 1) def test_signal_transferring(self): chunk = ReadFileChunk.from_filename( self.filename, start_byte=0, chunk_size=3, callbacks=[self.callback]) chunk.signal_not_transferring() chunk.read(1) self.assertEqual(self.amounts_seen, []) chunk.signal_transferring() chunk.read(1) self.assertEqual(self.amounts_seen, [1]) def test_signal_transferring_to_underlying_fileobj(self): underlying_stream = mock.Mock() underlying_stream.tell.return_value = 0 chunk = ReadFileChunk(underlying_stream, 3, 3) chunk.signal_transferring() self.assertTrue(underlying_stream.signal_transferring.called) def test_no_call_signal_transferring_to_underlying_fileobj(self): underlying_stream = mock.Mock(io.RawIOBase) underlying_stream.tell.return_value = 0 chunk = ReadFileChunk(underlying_stream, 3, 3) try: chunk.signal_transferring() except AttributeError: self.fail( 'The stream should not have tried to call signal_transferring ' 'to the underlying stream.' ) def test_signal_not_transferring_to_underlying_fileobj(self): underlying_stream = mock.Mock() underlying_stream.tell.return_value = 0 chunk = ReadFileChunk(underlying_stream, 3, 3) chunk.signal_not_transferring() self.assertTrue(underlying_stream.signal_not_transferring.called) def test_no_call_signal_not_transferring_to_underlying_fileobj(self): underlying_stream = mock.Mock(io.RawIOBase) underlying_stream.tell.return_value = 0 chunk = ReadFileChunk(underlying_stream, 3, 3) try: chunk.signal_not_transferring() except AttributeError: self.fail( 'The stream should not have tried to call ' 'signal_not_transferring to the underlying stream.' ) class TestStreamReaderProgress(BaseUtilsTest): def test_proxies_to_wrapped_stream(self): original_stream = six.StringIO('foobarbaz') wrapped = StreamReaderProgress(original_stream) self.assertEqual(wrapped.read(), 'foobarbaz') def test_callback_invoked(self): original_stream = six.StringIO('foobarbaz') wrapped = StreamReaderProgress( original_stream, [self.callback, self.callback]) self.assertEqual(wrapped.read(), 'foobarbaz') self.assertEqual(self.amounts_seen, [9, 9]) class TestTaskSemaphore(unittest.TestCase): def setUp(self): self.semaphore = TaskSemaphore(1) def test_should_block_at_max_capacity(self): self.semaphore.acquire('a', blocking=False) with self.assertRaises(NoResourcesAvailable): self.semaphore.acquire('a', blocking=False) def test_release_capacity(self): acquire_token = self.semaphore.acquire('a', blocking=False) self.semaphore.release('a', acquire_token) try: self.semaphore.acquire('a', blocking=False) except NoResourcesAvailable: self.fail( 'The release of the semaphore should have allowed for ' 'the second acquire to not be blocked' ) class TestSlidingWindowSemaphore(unittest.TestCase): # These tests use block=False to tests will fail # instead of hang the test runner in the case of x # incorrect behavior. def test_acquire_release_basic_case(self): sem = SlidingWindowSemaphore(1) # Count is 1 num = sem.acquire('a', blocking=False) self.assertEqual(num, 0) sem.release('a', 0) # Count now back to 1. def test_can_acquire_release_multiple_times(self): sem = SlidingWindowSemaphore(1) num = sem.acquire('a', blocking=False) self.assertEqual(num, 0) sem.release('a', num) num = sem.acquire('a', blocking=False) self.assertEqual(num, 1) sem.release('a', num) def test_can_acquire_a_range(self): sem = SlidingWindowSemaphore(3) self.assertEqual(sem.acquire('a', blocking=False), 0) self.assertEqual(sem.acquire('a', blocking=False), 1) self.assertEqual(sem.acquire('a', blocking=False), 2) sem.release('a', 0) sem.release('a', 1) sem.release('a', 2) # Now we're reset so we should be able to acquire the same # sequence again. self.assertEqual(sem.acquire('a', blocking=False), 3) self.assertEqual(sem.acquire('a', blocking=False), 4) self.assertEqual(sem.acquire('a', blocking=False), 5) self.assertEqual(sem.current_count(), 0) def test_counter_release_only_on_min_element(self): sem = SlidingWindowSemaphore(3) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) # The count only increases when we free the min # element. This means if we're currently failing to # acquire now: with self.assertRaises(NoResourcesAvailable): sem.acquire('a', blocking=False) # Then freeing a non-min element: sem.release('a', 1) # doesn't change anything. We still fail to acquire. with self.assertRaises(NoResourcesAvailable): sem.acquire('a', blocking=False) self.assertEqual(sem.current_count(), 0) def test_raises_error_when_count_is_zero(self): sem = SlidingWindowSemaphore(3) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) # Count is now 0 so trying to acquire should fail. with self.assertRaises(NoResourcesAvailable): sem.acquire('a', blocking=False) def test_release_counters_can_increment_counter_repeatedly(self): sem = SlidingWindowSemaphore(3) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) # These two releases don't increment the counter # because we're waiting on 0. sem.release('a', 1) sem.release('a', 2) self.assertEqual(sem.current_count(), 0) # But as soon as we release 0, we free up 0, 1, and 2. sem.release('a', 0) self.assertEqual(sem.current_count(), 3) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) def test_error_to_release_unknown_tag(self): sem = SlidingWindowSemaphore(3) with self.assertRaises(ValueError): sem.release('a', 0) def test_can_track_multiple_tags(self): sem = SlidingWindowSemaphore(3) self.assertEqual(sem.acquire('a', blocking=False), 0) self.assertEqual(sem.acquire('b', blocking=False), 0) self.assertEqual(sem.acquire('a', blocking=False), 1) # We're at our max of 3 even though 2 are for A and 1 is for B. with self.assertRaises(NoResourcesAvailable): sem.acquire('a', blocking=False) with self.assertRaises(NoResourcesAvailable): sem.acquire('b', blocking=False) def test_can_handle_multiple_tags_released(self): sem = SlidingWindowSemaphore(4) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) sem.acquire('b', blocking=False) sem.acquire('b', blocking=False) sem.release('b', 1) sem.release('a', 1) self.assertEqual(sem.current_count(), 0) sem.release('b', 0) self.assertEqual(sem.acquire('a', blocking=False), 2) sem.release('a', 0) self.assertEqual(sem.acquire('b', blocking=False), 2) def test_is_error_to_release_unknown_sequence_number(self): sem = SlidingWindowSemaphore(3) sem.acquire('a', blocking=False) with self.assertRaises(ValueError): sem.release('a', 1) def test_is_error_to_double_release(self): # This is different than other error tests because # we're verifying we can reset the state after an # acquire/release cycle. sem = SlidingWindowSemaphore(2) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) sem.release('a', 0) sem.release('a', 1) self.assertEqual(sem.current_count(), 2) with self.assertRaises(ValueError): sem.release('a', 0) def test_can_check_in_partial_range(self): sem = SlidingWindowSemaphore(4) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) sem.release('a', 1) sem.release('a', 3) sem.release('a', 0) self.assertEqual(sem.current_count(), 2) class TestThreadingPropertiesForSlidingWindowSemaphore(unittest.TestCase): # These tests focus on mutithreaded properties of the range # semaphore. Basic functionality is tested in TestSlidingWindowSemaphore. def setUp(self): self.threads = [] def tearDown(self): self.join_threads() def join_threads(self): for thread in self.threads: thread.join() self.threads = [] def start_threads(self): for thread in self.threads: thread.start() def test_acquire_blocks_until_release_is_called(self): sem = SlidingWindowSemaphore(2) sem.acquire('a', blocking=False) sem.acquire('a', blocking=False) def acquire(): # This next call to acquire will block. self.assertEqual(sem.acquire('a', blocking=True), 2) t = threading.Thread(target=acquire) self.threads.append(t) # Starting the thread will block the sem.acquire() # in the acquire function above. t.start() # This still will keep the thread blocked. sem.release('a', 1) # Releasing the min element will unblock the thread. sem.release('a', 0) t.join() sem.release('a', 2) def test_stress_invariants_random_order(self): sem = SlidingWindowSemaphore(100) for _ in range(10): recorded = [] for _ in range(100): recorded.append(sem.acquire('a', blocking=False)) # Release them in randomized order. As long as we # eventually free all 100, we should have all the # resources released. random.shuffle(recorded) for i in recorded: sem.release('a', i) # Everything's freed so should be back at count == 100 self.assertEqual(sem.current_count(), 100) def test_blocking_stress(self): sem = SlidingWindowSemaphore(5) num_threads = 10 num_iterations = 50 def acquire(): for _ in range(num_iterations): num = sem.acquire('a', blocking=True) time.sleep(0.001) sem.release('a', num) for i in range(num_threads): t = threading.Thread(target=acquire) self.threads.append(t) self.start_threads() self.join_threads() # Should have all the available resources freed. self.assertEqual(sem.current_count(), 5) # Should have acquired num_threads * num_iterations self.assertEqual(sem.acquire('a', blocking=False), num_threads * num_iterations) class TestAdjustChunksize(unittest.TestCase): def setUp(self): self.adjuster = ChunksizeAdjuster() def test_valid_chunksize(self): chunksize = 7 * (1024 ** 2) file_size = 8 * (1024 ** 2) new_size = self.adjuster.adjust_chunksize(chunksize, file_size) self.assertEqual(new_size, chunksize) def test_chunksize_below_minimum(self): chunksize = MIN_UPLOAD_CHUNKSIZE - 1 file_size = 3 * MIN_UPLOAD_CHUNKSIZE new_size = self.adjuster.adjust_chunksize(chunksize, file_size) self.assertEqual(new_size, MIN_UPLOAD_CHUNKSIZE) def test_chunksize_above_maximum(self): chunksize = MAX_SINGLE_UPLOAD_SIZE + 1 file_size = MAX_SINGLE_UPLOAD_SIZE * 2 new_size = self.adjuster.adjust_chunksize(chunksize, file_size) self.assertEqual(new_size, MAX_SINGLE_UPLOAD_SIZE) def test_chunksize_too_small(self): chunksize = 7 * (1024 ** 2) file_size = 5 * (1024 ** 4) # If we try to upload a 5TB file, we'll need to use 896MB part # sizes. new_size = self.adjuster.adjust_chunksize(chunksize, file_size) self.assertEqual(new_size, 896 * (1024 ** 2)) num_parts = file_size / new_size self.assertLessEqual(num_parts, MAX_PARTS) def test_unknown_file_size_with_valid_chunksize(self): chunksize = 7 * (1024 ** 2) new_size = self.adjuster.adjust_chunksize(chunksize) self.assertEqual(new_size, chunksize) def test_unknown_file_size_below_minimum(self): chunksize = MIN_UPLOAD_CHUNKSIZE - 1 new_size = self.adjuster.adjust_chunksize(chunksize) self.assertEqual(new_size, MIN_UPLOAD_CHUNKSIZE) def test_unknown_file_size_above_maximum(self): chunksize = MAX_SINGLE_UPLOAD_SIZE + 1 new_size = self.adjuster.adjust_chunksize(chunksize) self.assertEqual(new_size, MAX_SINGLE_UPLOAD_SIZE) s3transfer-0.1.13/tox.ini000066400000000000000000000004171324114246300152030ustar00rootroot00000000000000[tox] envlist = py26,py27,py33,py34,py35,py36 # Comment to build sdist and install into virtualenv # This is helpful to test installation but takes extra time skipsdist = True [testenv] commands = {toxinidir}/scripts/ci/install {toxinidir}/scripts/ci/run-tests