pax_global_header00006660000000000000000000000064152015001320014477gustar00rootroot0000000000000052 comment=d80135635b4df207574689c16e3f67d33e0cbf3a dromedary-0.1.5/000077500000000000000000000000001520150013200134705ustar00rootroot00000000000000dromedary-0.1.5/.bzrignore000066400000000000000000000024521520150013200154750ustar00rootroot00000000000000*.py[oc] # These are created as byproducts of our test suite ./test*.tmp ./.python-eggs ./breezy.egg-info ./.bzr.log # Generated files CHANGELOG # generated documents brz.1 MANIFEST ./doc/*.html ./doc/*/_build/ ./doc/*/Makefile ./doc/*/make.bat ./tutorial.html ./build_doc_website ./html_docs ./pretty_docs ./api ./doc/**/*.html ./doc/developers/performance.png ./doc/en/user-reference/*.txt ./doc/en/release-notes/index.txt ./doc/en/release-notes/NEWS.txt BRANCH-INFO # setup.py working directory ./build ./build-win32 ./breezy/locale # Editor temporary/working/backup files *$ .*.sw[nop] .sw[nop] *~ [#]*# .#* ./tags ./breezy/tags ./TAGS # The shelf plugin uses this dir ./.shelf # Mac droppings .DS_Store # win32 installer generated files ./doc/*.html ./doc/brz_man.txt ./py2exe.log ./tools/win32/bzr.iss ./dist # performance history data file ./.perf_history # Pyrex breezy/_bencode_pyx.c breezy/bzr/_btree_serializer_pyx.c breezy/_chunks_to_lines_pyx.c breezy/bzr/_dirstate_helpers_pyx.c breezy/bzr/_groupcompress_pyx.c breezy/bzr/_knit_load_data_pyx.c breezy/_readdir_pyx.c # built extension modules breezy/_*.dll breezy/_*.so breezy/_*.pyd ./.ccache .testrepository selftest.log .coverage doc/developers/api/*.txt __pycache__ .mypy_cache # rust bits ./target ./brz *.so .pytest_cache .ruff_cache locale .ruff_cache dromedary-0.1.5/.coveragerc000066400000000000000000000001341520150013200156070ustar00rootroot00000000000000[run] branch = True source = breezy [report] exclude_lines = raise NotImplementedError dromedary-0.1.5/.dockerignore000066400000000000000000000000161520150013200161410ustar00rootroot00000000000000Dockerfile *~ dromedary-0.1.5/.github/000077500000000000000000000000001520150013200150305ustar00rootroot00000000000000dromedary-0.1.5/.github/dependabot.yaml000066400000000000000000000012221520150013200200160ustar00rootroot00000000000000# Keep GitHub Actions up to date with GitHub's Dependabot... # https://docs.github.com/en/code-security/dependabot/working-with-dependabot/keeping-your-actions-up-to-date-with-dependabot # https://docs.github.com/en/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file#package-ecosystem version: 2 updates: - package-ecosystem: "cargo" directory: "/" schedule: interval: "weekly" rebase-strategy: "disabled" - package-ecosystem: "github-actions" directory: "/" schedule: interval: weekly - package-ecosystem: "pip" directory: "/" schedule: interval: weekly dromedary-0.1.5/.github/workflows/000077500000000000000000000000001520150013200170655ustar00rootroot00000000000000dromedary-0.1.5/.github/workflows/breezy-tests.yml000066400000000000000000000032701520150013200222520ustar00rootroot00000000000000--- name: Breezy test suite "on": [push, pull_request] env: PYO3_USE_ABI3_FORWARD_COMPATIBILITY: "1" jobs: breezy: name: Run breezy 3.4 tests against this dromedary runs-on: ubuntu-latest steps: - name: Check out dromedary uses: actions/checkout@v6 with: path: dromedary - name: Check out breezy 3.4 uses: actions/checkout@v6 with: repository: breezy-team/breezy ref: '3.4' path: breezy - name: Set up Python uses: actions/setup-python@v6 with: python-version: '3.12' - name: Set up Rust uses: dtolnay/rust-toolchain@stable - name: Install apt dependencies run: | sudo apt-get update sudo apt-get install -y quilt - name: Install build tooling run: | python -m pip install --upgrade pip python -m pip install -U "setuptools>=60" setuptools-gettext \ setuptools-rust cython - name: Install dromedary from this checkout working-directory: dromedary run: | pip install ".[dev,testing,paramiko]" - name: Install breezy and its test dependencies working-directory: breezy run: | pip install \ ".[dev,paramiko,doc,launchpad,git,fastimport,workspace]" \ pyinotify - name: Build breezy extensions working-directory: breezy run: | make extensions PYTHON=python - name: Run breezy test suite working-directory: breezy env: PYTHONHASHSEED: random BRZ_PLUGIN_PATH: -site:-user PYTHONPATH: . run: | ./brz selftest dromedary-0.1.5/.github/workflows/pythonpackage.yml000066400000000000000000000051501520150013200224460ustar00rootroot00000000000000--- name: Python package "on": [push, pull_request] env: PYO3_USE_ABI3_FORWARD_COMPATIBILITY: "1" jobs: build: runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] python-version: ["3.10", "3.11", "3.12", "3.13", "3.14"] fail-fast: false steps: - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Set up Rust uses: dtolnay/rust-toolchain@stable - name: Install dependencies shell: bash run: | python -m pip install --upgrade pip python -m pip install -U "setuptools>=60" setuptools-rust if [[ "${{ matrix.python-version }}" == "3.14"* ]]; then python -m pip install -e ".[dev,testing]" else python -m pip install -e ".[dev,testing,paramiko]" fi - name: Run ruff if: matrix.os == 'ubuntu-latest' run: | ruff check . ruff format --check . - name: Run mypy if: matrix.os == 'ubuntu-latest' run: | python -m pip install mypy types-paramiko typing-extensions python -m mypy dromedary continue-on-error: true - name: Test suite run: | python -m unittest discover -s dromedary/tests -t . env: PYTHONHASHSEED: random - name: Rust tests run: | cargo test --manifest-path Cargo.toml build-gio: runs-on: ubuntu-latest strategy: matrix: python-version: ["3.12"] fail-fast: false steps: - uses: actions/checkout@v6 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} - name: Set up Rust uses: dtolnay/rust-toolchain@stable - name: Install GIO system libraries run: | sudo apt-get update sudo apt-get install -y libglib2.0-dev pkg-config - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install -U "setuptools>=60" setuptools-rust DROMEDARY_GIO=1 python -m pip install -e ".[dev,testing,paramiko]" - name: Rust tests (gio feature) run: | cargo test --manifest-path Cargo.toml --features gio - name: Python test suite (gio enabled) run: | python -m unittest discover -s dromedary/tests -t . env: PYTHONHASHSEED: random dromedary-0.1.5/.github/workflows/wheels.yaml000066400000000000000000000140451520150013200212440ustar00rootroot00000000000000name: Build Python Wheels on: push: pull_request: schedule: - cron: "0 6 * * *" # Daily 6AM UTC build env: PYO3_USE_ABI3_FORWARD_COMPATIBILITY: "1" jobs: define-matrix: runs-on: ubuntu-latest outputs: matrix: ${{ steps.merged-identifiers.outputs.merged-identifiers }} steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: '3.11' cache: pip - name: Install jq run: sudo apt-get update && sudo apt-get install -y jq - name: Install cibuildwheel run: pip install 'cibuildwheel>=3.0' - name: Find build identifiers using cibuildwheel --print-build-identifiers id: all-build-identifiers run: | echo "linux=$(cibuildwheel --platform linux --print-build-identifiers | tr '\n' ' ')" >> $GITHUB_OUTPUT echo "macos=$(cibuildwheel --platform macos --print-build-identifiers | tr '\n' ' ')" >> $GITHUB_OUTPUT echo "windows=$(cibuildwheel --platform windows --print-build-identifiers | tr '\n' ' ')" >> $GITHUB_OUTPUT - name: Select build identifiers id: select-build-identifiers run: | if [[ "$GITHUB_REF" = "refs/heads/main" ]] || [[ "$GITHUB_REF" = "refs/heads/master" ]] || [[ "$GITHUB_REF" = "refs/tags/"* ]]; then echo 'linux=${{ steps.all-build-identifiers.outputs.linux }}' >> $GITHUB_OUTPUT echo 'windows=${{ steps.all-build-identifiers.outputs.windows }}' >> $GITHUB_OUTPUT echo 'macos=${{ steps.all-build-identifiers.outputs.macos }}' >> $GITHUB_OUTPUT else echo "linux=$(echo -n '${{ steps.all-build-identifiers.outputs.linux }}' | awk '{print $NF}')" >> $GITHUB_OUTPUT echo "macos=$(echo -n '${{ steps.all-build-identifiers.outputs.macos }}' | awk '{print $NF}')" >> $GITHUB_OUTPUT echo "windows=$(echo -n '${{ steps.all-build-identifiers.outputs.windows }}' | awk '{print $NF}')" >> $GITHUB_OUTPUT fi - name: Output build identifiers id: json-identifiers run: | echo "linux=$(echo -n '${{ steps.select-build-identifiers.outputs.linux }}' | jq -R -s -c 'split(" ") | map(select(length > 0)) | [.[] | {os: "ubuntu-latest", "build-identifier": .}]')" >> $GITHUB_OUTPUT echo "macos=$(echo -n '${{ steps.select-build-identifiers.outputs.macos }}' | jq -R -s -c 'split(" ") | map(select(length > 0)) | [.[] | {os: "macos-latest", "build-identifier": .}]')" >> $GITHUB_OUTPUT echo "windows=$(echo -n '${{ steps.select-build-identifiers.outputs.windows }}' | jq -R -s -c 'split(" ") | map(select(length > 0)) | [.[] | {os: "windows-latest", "build-identifier": .}]')" >> $GITHUB_OUTPUT - name: Merge build identifiers id: merged-identifiers run: | echo merged-identifiers=$(echo -n '${{ steps.json-identifiers.outputs.linux }} ${{ steps.json-identifiers.outputs.macos }} ${{ steps.json-identifiers.outputs.windows }}' | jq -c -s 'add') >> $GITHUB_OUTPUT build-wheels: runs-on: ${{ matrix.os }} needs: define-matrix strategy: matrix: include: ${{ fromJSON(needs.define-matrix.outputs.matrix ) }} fail-fast: ${{ !startsWith(github.ref, 'refs/tags/') }} steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: '3.11' - name: set up rust if: matrix.os != 'ubuntu' uses: actions-rs/toolchain@v1 with: profile: minimal toolchain: stable override: true - name: Install dependencies run: | python -m pip install --upgrade pip pip install setuptools wheel 'cibuildwheel>=3.0' - name: Set up QEMU uses: docker/setup-qemu-action@v4 if: "matrix.os == 'ubuntu-latest'" - name: Build wheels run: python -m cibuildwheel --only "${{ matrix.build-identifier }}" --output-dir wheelhouse env: CIBW_ENVIRONMENT: 'PATH="$HOME/.cargo/bin:$PATH" PYO3_USE_ABI3_FORWARD_COMPATIBILITY="1" MACOSX_DEPLOYMENT_TARGET="10.15"' CIBW_BEFORE_BUILD: > pip install -U setuptools-rust && curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain=stable --profile=minimal -y && rustup show CIBW_BEFORE_BUILD_LINUX: > pip install -U setuptools-rust && curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain=stable --profile=minimal -y && rustup show CIBW_REPAIR_WHEEL_COMMAND_MACOS: > MACOSX_DEPLOYMENT_TARGET=10.15 delocate-wheel --require-archs {delocate_archs} -w {dest_dir} -v {wheel} CIBW_BEFORE_BUILD_MACOS: > pip install -U setuptools-rust && curl https://sh.rustup.rs -sSf | sh -s -- --default-toolchain=stable --profile=minimal -y && rustup target add x86_64-apple-darwin && rustup show - name: Upload wheels uses: actions/upload-artifact@v7 with: name: artifact-${{ matrix.build-identifier }} path: ./wheelhouse/*.whl build-sdist: runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 - uses: actions/setup-python@v6 with: python-version: '3.11' - name: Build sdist run: | python -m pip install --upgrade build python -m build --sdist - name: Upload sdist uses: actions/upload-artifact@v7 with: name: artifact-sdist path: ./dist/*.tar.gz publish: runs-on: ubuntu-latest needs: [build-wheels, build-sdist] if: github.event_name == 'push' && startsWith(github.ref, 'refs/tags/v') permissions: id-token: write environment: name: pypi url: https://pypi.org/p/dromedary steps: - name: Download distributions uses: actions/download-artifact@v8 with: path: dist pattern: artifact-* merge-multiple: true - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: packages-dir: dist dromedary-0.1.5/.gitignore000066400000000000000000000006371520150013200154660ustar00rootroot00000000000000__pycache__ *.pyc build/ *_pyx.so *_pyx.c *_pyx.h *_pyx_api.h *_pyx.cpython-*.so *_c.cpython-*.so *_c.so *_pyx.cpython-*.c *~ /target /brz /Cargo.lock *.cpython-*.so .testrepository/ *.swp *.swo *.swn .*.swp .mypy_cache/ breezy/locale/ .claude/settings.local.json selftest.log doc/en/user-reference/*.txt doc/en/_build/ doc/developers/_build/ doc/developers/Makefile doc/developers/make.bat *.so dromedary.egg-info dromedary-0.1.5/.mailmap000066400000000000000000000004531520150013200151130ustar00rootroot00000000000000Jelmer Vernooij Jelmer Vernooij Jelmer Vernooij INADA Naoki Martin Packman dromedary-0.1.5/.rsyncexclude000066400000000000000000000003131520150013200161760ustar00rootroot00000000000000*.pyc *.pyo *~ # arch can bite me {arch} .arch-ids ,,* ++* /doc/*.html *.tmp bzr-test.log [#]*# .#* testrev.* /tmp # do want this after all + CHANGELOG /build test*.tmp .*.swp *.orig .*.orig .bzr-shelf* dromedary-0.1.5/.testr.conf000066400000000000000000000003161520150013200155560ustar00rootroot00000000000000[DEFAULT] test_command=PYTHONPATH=`pwd`:$PYTHONPATH BRZ_PLUGIN_PATH=-site:-user python3 -m subunit.run discover dromedary.tests $IDOPTION $LISTOPT test_id_option=--load-list $IDFILE test_list_option=--list dromedary-0.1.5/CODE_OF_CONDUCT.md000066400000000000000000000064271520150013200163000ustar00rootroot00000000000000# Contributor Covenant Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at core@breezy-vcs.org. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html [homepage]: https://www.contributor-covenant.org For answers to common questions about this code of conduct, see https://www.contributor-covenant.org/faq dromedary-0.1.5/COPYING.txt000066400000000000000000000432541520150013200153510ustar00rootroot00000000000000 GNU GENERAL PUBLIC LICENSE Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. Preamble The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. The precise terms and conditions for copying, distribution and modification follow. GNU GENERAL PUBLIC LICENSE TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION 0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. 1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. 2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. 3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. 4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. 5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. 6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. 7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. 8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. 9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. 10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. NO WARRANTY 11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS How to Apply These Terms to Your New Programs If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. Copyright (C) This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. Also add information on how to contact you by electronic and paper mail. If the program is interactive, make it output a short notice like this when it starts in an interactive mode: Gnomovision version 69, Copyright (C) year name of author Gnomovision comes with ABSOLUTELY NO WARRANTY; for details type `show w'. This is free software, and you are welcome to redistribute it under certain conditions; type `show c' for details. The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: Yoyodyne, Inc., hereby disclaims all copyright interest in the program `Gnomovision' (which makes passes at compilers) written by James Hacker. , 1 April 1989 Ty Coon, President of Vice This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. dromedary-0.1.5/Cargo.toml000066400000000000000000000051211520150013200154170ustar00rootroot00000000000000[workspace] resolver = "2" members = [ "_transport_rs", ] [workspace.package] version = "0.1.5" [workspace.dependencies] nix = "0.31" pyo3 = "0.28" pyo3-filelike = "0.5.2" url = "2" log = "0.4" [package] name = "dromedary" version = { workspace = true } edition = "2021" description = "Transport layer for Breezy" license = "GPL-2.0+" repository = "https://github.com/breezy-team/dromedary" homepage = "https://www.breezy-vcs.org/" documentation = "https://www.breezy-vcs.org/doc/" authors = [ "Martin Packman ", "Jelmer Vernooij "] [lib] [dependencies] url = { workspace = true } tempfile = "3" pyo3 = { workspace = true, optional = true } pyo3-filelike = { workspace = true, optional = true } path-clean = "1" walkdir = "2.3" lazy_static = "1" log = { workspace = true } libc = "0.2" regex = "1.5.4" percent-encoding = "2.1.0" md-5 = "0.10" sha1 = "0.10" hex = "0.4" rand = "0.8" rustls-native-certs = "0.8" base64 = "0.22" # reqwest replaces ureq: ureq-proto (transitive) hardcodes a method # whitelist that rejects WebDAV verbs (MKCOL, MOVE, COPY, PROPFIND). # reqwest is hyper-based and accepts arbitrary methods. `blocking` # keeps our sync API shape; `rustls-tls-native-roots` wires rustls # with the platform's native trust store. reqwest = { version = "0.12", default-features = false, features = ["rustls-tls-native-roots", "gzip", "blocking"] } # Lightweight dep used only for typed error classification — we # downcast reqwest's error chain to `hyper::Error` so we can # distinguish protocol-level parse failures (bad HTTP version, bad # status line) from connection-level ones (DNS/TCP/TLS) without # string-matching the error message. hyper = { version = "1", default-features = false } http = "1" quick-xml = { version = "0.37", optional = true } gio = { version = "0.22", optional = true } glib = { version = "0.22", optional = true } # SFTP client used by the optional sftp transport. Construction takes # any Read+Write channel so we don't pull in an SSH library here. sftp = { version = "0.2.3", optional = true } [features] default = ["pyo3"] pyo3 = ["dep:pyo3", "dep:pyo3-filelike"] gio = ["dep:gio", "dep:glib"] # WebDAV transport on top of HTTP. Pulls in quick-xml for parsing # multi-status PROPFIND responses; opt-in because callers that only # need HTTP shouldn't pay the XML-parser cost. webdav = ["dep:quick-xml"] # SFTP transport. Caller supplies the SSH byte-stream channel — no # SSH-library dep is added here so embedders pick their own backend. sftp = ["dep:sftp"] [target.'cfg(unix)'.dependencies] nix = { workspace = true, features = ["fs", "uio"] } dromedary-0.1.5/MANIFEST.in000066400000000000000000000004571520150013200152340ustar00rootroot00000000000000include README.md setup.py COPYING.txt include Cargo.toml Cargo.lock recursive-include dromedary *.py *.rs *.toml recursive-include dromedary/tests/ssl_certs * recursive-include crates Cargo.toml *.rs recursive-include _transport_rs Cargo.toml *.rs recursive-include src *.rs include dromedary/py.typed dromedary-0.1.5/Makefile000066400000000000000000000010501520150013200151240ustar00rootroot00000000000000PYTHON ?= python3 CARGO ?= cargo .PHONY: all build test test-python test-rust check fmt clippy clean install all: build build: $(PYTHON) -m pip install -e . --no-build-isolation install: $(PYTHON) -m pip install . test: test-python test-rust test-python: $(PYTHON) -m unittest discover -t . -s dromedary/tests -p 'test_*.py' test-rust: $(CARGO) test --workspace check: fmt clippy test fmt: $(CARGO) fmt --all ruff format dromedary clippy: $(CARGO) clippy --workspace --all-targets clean: $(CARGO) clean rm -rf build dist *.egg-info dromedary-0.1.5/README.md000066400000000000000000000024441520150013200147530ustar00rootroot00000000000000# Dromedary Dromedary is a transport layer abstraction for version control systems, extracted from the [Breezy](https://www.breezy-vcs.org/) version control system. ## Overview Dromedary provides a uniform interface for accessing files and directories across different protocols and storage backends. It supports: - Local filesystem access - HTTP/HTTPS for web-based repositories - SFTP for secure remote access - Memory-based transport for testing - Various transport decorators for additional functionality ## Features - Protocol abstraction layer - Support for multiple transport protocols (file, http, https, sftp, memory) - Transport decorators for logging, readonly access, etc. - Comprehensive test coverage - Well-documented API ## Installation ```bash pip install dromedary ``` ## Usage ```python from dromedary import get_transport # Get a transport for a local directory transport = get_transport('/path/to/directory') # Get a transport for an HTTP URL transport = get_transport('http://example.com/repo') # Use the transport files = transport.list_dir('.') content = transport.get('filename').read() ``` ## Requirements - Python 3.8+ Optional dependencies: - paramiko (for SFTP support) - pygobject (for GIO transport support) ## License GNU General Public License v2 or later (GPLv2+) dromedary-0.1.5/_transport_rs/000077500000000000000000000000001520150013200163675ustar00rootroot00000000000000dromedary-0.1.5/_transport_rs/Cargo.toml000066400000000000000000000016501520150013200203210ustar00rootroot00000000000000[package] name = "dromedary-transport-rs" version = { workspace = true } edition = "2021" [lib] crate-type = ["cdylib"] [dependencies] dromedary = { path = "..", version = "0.1.3" } pyo3 = { workspace = true, features = ["extension-module"]} pyo3-filelike = { workspace = true } sftp = "0.3.0" url = { workspace = true } log = { workspace = true } russh = { version = "0.54", optional = true } tokio = { version = "1", features = ["rt", "io-util", "net", "sync", "macros"], optional = true } lazy_static = "1" [features] default = ["russh", "webdav"] gio = ["dromedary/gio"] russh = ["dep:russh", "dep:tokio"] # WebDAV bindings. Forwards to dromedary/webdav which pulls in # quick-xml for parsing multi-status PROPFIND responses. webdav = ["dromedary/webdav"] # TODO: add `libssh2` and `ssh2-rs` features when those backends are implemented. [target.'cfg(unix)'.dependencies] nix = { workspace = true, features = ["fs", "socket"] } dromedary-0.1.5/_transport_rs/src/000077500000000000000000000000001520150013200171565ustar00rootroot00000000000000dromedary-0.1.5/_transport_rs/src/brokenrename.rs000066400000000000000000000060731520150013200222020ustar00rootroot00000000000000use crate::{Transport, TransportDecorator}; use dromedary::pyo3::PyTransport; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; const PREFIX: &str = "brokenrename+"; fn resolve_inner(py: Python, url: &str, decorated: Option>) -> PyResult> { if let Some(d) = decorated { return Ok(d); } if !url.starts_with(PREFIX) { return Err(PyValueError::new_err(format!( "url {:?} doesn't start with decorator prefix {:?}", url, PREFIX ))); } let rest = &url[PREFIX.len()..]; let dromedary = py.import("dromedary")?; let urlutils = py.import("dromedary.urlutils")?; let is_url: bool = urlutils.call_method1("is_url", (rest,))?.extract()?; let func = if is_url { dromedary.getattr("get_transport_from_url")? } else { dromedary.getattr("get_transport_from_path")? }; Ok(func.call1((rest,))?.unbind()) } fn wrap_inner(decorated: &Py, py: Python) -> Transport { let py_inner: Box = Box::new(PyTransport::from(decorated.clone_ref(py))); Transport(Box::new( dromedary::brokenrename::BrokenRenameTransport::new(py_inner), )) } #[pyclass(extends=TransportDecorator, subclass)] pub(crate) struct BrokenRenameTransportDecorator; #[pymethods] impl BrokenRenameTransportDecorator { #[new] #[pyo3(signature = (url, _decorated=None, _from_transport=None))] fn new( py: Python, url: &str, _decorated: Option>, _from_transport: Option>, ) -> PyResult> { let _ = _from_transport; let decorated = resolve_inner(py, url, _decorated)?; let wrapped = wrap_inner(&decorated, py); Ok(PyClassInitializer::from(wrapped) .add_subclass(TransportDecorator { decorated, prefix: PREFIX, }) .add_subclass(BrokenRenameTransportDecorator)) } #[classmethod] fn _get_url_prefix(_cls: &Bound<'_, pyo3::types::PyType>) -> &'static str { PREFIX } #[pyo3(signature = (offset=None))] fn clone<'a>( slf: PyRef<'a, Self>, py: Python<'a>, offset: Option>, ) -> PyResult> { let decorator: &TransportDecorator = slf.as_super(); let decorated = decorator.decorated.clone_ref(py); let decorated_clone = match offset { Some(o) => decorated.call_method1(py, "clone", (o,))?, None => decorated.call_method0(py, "clone")?, }; let wrapped = wrap_inner(&decorated_clone, py); let init = PyClassInitializer::from(wrapped) .add_subclass(TransportDecorator { decorated: decorated_clone, prefix: PREFIX, }) .add_subclass(BrokenRenameTransportDecorator); Bound::new(py, init) } } pub(crate) fn register(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; Ok(()) } dromedary-0.1.5/_transport_rs/src/fakenfs.rs000066400000000000000000000060161520150013200211440ustar00rootroot00000000000000use crate::{Transport, TransportDecorator}; use dromedary::pyo3::PyTransport; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; const PREFIX: &str = "fakenfs+"; fn resolve_inner(py: Python, url: &str, decorated: Option>) -> PyResult> { if let Some(d) = decorated { return Ok(d); } if !url.starts_with(PREFIX) { return Err(PyValueError::new_err(format!( "url {:?} doesn't start with decorator prefix {:?}", url, PREFIX ))); } let rest = &url[PREFIX.len()..]; let dromedary = py.import("dromedary")?; let urlutils = py.import("dromedary.urlutils")?; let is_url: bool = urlutils.call_method1("is_url", (rest,))?.extract()?; let func = if is_url { dromedary.getattr("get_transport_from_url")? } else { dromedary.getattr("get_transport_from_path")? }; Ok(func.call1((rest,))?.unbind()) } fn wrap_inner(decorated: &Py, py: Python) -> Transport { let py_inner: Box = Box::new(PyTransport::from(decorated.clone_ref(py))); Transport(Box::new(dromedary::fakenfs::FakeNfsTransport::new( py_inner, ))) } #[pyclass(extends=TransportDecorator, subclass)] pub(crate) struct FakeNFSTransportDecorator; #[pymethods] impl FakeNFSTransportDecorator { #[new] #[pyo3(signature = (url, _decorated=None, _from_transport=None))] fn new( py: Python, url: &str, _decorated: Option>, _from_transport: Option>, ) -> PyResult> { let _ = _from_transport; let decorated = resolve_inner(py, url, _decorated)?; let wrapped = wrap_inner(&decorated, py); Ok(PyClassInitializer::from(wrapped) .add_subclass(TransportDecorator { decorated, prefix: PREFIX, }) .add_subclass(FakeNFSTransportDecorator)) } #[classmethod] fn _get_url_prefix(_cls: &Bound<'_, pyo3::types::PyType>) -> &'static str { PREFIX } #[pyo3(signature = (offset=None))] fn clone<'a>( slf: PyRef<'a, Self>, py: Python<'a>, offset: Option>, ) -> PyResult> { let decorator: &TransportDecorator = slf.as_super(); let decorated = decorator.decorated.clone_ref(py); let decorated_clone = match offset { Some(o) => decorated.call_method1(py, "clone", (o,))?, None => decorated.call_method0(py, "clone")?, }; let wrapped = wrap_inner(&decorated_clone, py); let init = PyClassInitializer::from(wrapped) .add_subclass(TransportDecorator { decorated: decorated_clone, prefix: PREFIX, }) .add_subclass(FakeNFSTransportDecorator); Bound::new(py, init) } } pub(crate) fn register(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; Ok(()) } dromedary-0.1.5/_transport_rs/src/fakevfat.rs000066400000000000000000000060231520150013200213140ustar00rootroot00000000000000use crate::{Transport, TransportDecorator}; use dromedary::pyo3::PyTransport; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; const PREFIX: &str = "vfat+"; fn resolve_inner(py: Python, url: &str, decorated: Option>) -> PyResult> { if let Some(d) = decorated { return Ok(d); } if !url.starts_with(PREFIX) { return Err(PyValueError::new_err(format!( "url {:?} doesn't start with decorator prefix {:?}", url, PREFIX ))); } let rest = &url[PREFIX.len()..]; let dromedary = py.import("dromedary")?; let urlutils = py.import("dromedary.urlutils")?; let is_url: bool = urlutils.call_method1("is_url", (rest,))?.extract()?; let func = if is_url { dromedary.getattr("get_transport_from_url")? } else { dromedary.getattr("get_transport_from_path")? }; Ok(func.call1((rest,))?.unbind()) } fn wrap_inner(decorated: &Py, py: Python) -> Transport { let py_inner: Box = Box::new(PyTransport::from(decorated.clone_ref(py))); Transport(Box::new(dromedary::fakevfat::FakeVfatTransport::new( py_inner, ))) } #[pyclass(extends=TransportDecorator, subclass)] pub(crate) struct FakeVFATTransportDecorator; #[pymethods] impl FakeVFATTransportDecorator { #[new] #[pyo3(signature = (url, _decorated=None, _from_transport=None))] fn new( py: Python, url: &str, _decorated: Option>, _from_transport: Option>, ) -> PyResult> { let _ = _from_transport; let decorated = resolve_inner(py, url, _decorated)?; let wrapped = wrap_inner(&decorated, py); Ok(PyClassInitializer::from(wrapped) .add_subclass(TransportDecorator { decorated, prefix: PREFIX, }) .add_subclass(FakeVFATTransportDecorator)) } #[classmethod] fn _get_url_prefix(_cls: &Bound<'_, pyo3::types::PyType>) -> &'static str { PREFIX } #[pyo3(signature = (offset=None))] fn clone<'a>( slf: PyRef<'a, Self>, py: Python<'a>, offset: Option>, ) -> PyResult> { let decorator: &TransportDecorator = slf.as_super(); let decorated = decorator.decorated.clone_ref(py); let decorated_clone = match offset { Some(o) => decorated.call_method1(py, "clone", (o,))?, None => decorated.call_method0(py, "clone")?, }; let wrapped = wrap_inner(&decorated_clone, py); let init = PyClassInitializer::from(wrapped) .add_subclass(TransportDecorator { decorated: decorated_clone, prefix: PREFIX, }) .add_subclass(FakeVFATTransportDecorator); Bound::new(py, init) } } pub(crate) fn register(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; Ok(()) } dromedary-0.1.5/_transport_rs/src/gio.rs000066400000000000000000000022551520150013200203060ustar00rootroot00000000000000use crate::{map_transport_err_to_py_err, Transport}; use pyo3::prelude::*; #[pyclass(extends=Transport, subclass)] pub(crate) struct GioTransport {} #[pymethods] impl GioTransport { #[new] #[pyo3(signature = (base, _from_transport=None))] fn new(base: &str, _from_transport: Option>) -> PyResult<(Self, Transport)> { let _ = _from_transport; let rust = dromedary::gio::GioTransport::new(base) .map_err(|e| map_transport_err_to_py_err(e, None, None))?; Ok((GioTransport {}, Transport(Box::new(rust)))) } #[pyo3(signature = (offset=None))] fn clone<'a>( slf: PyRef<'a, Self>, py: Python<'a>, offset: Option, ) -> PyResult> { let super_ = slf.as_ref(); let inner = super_ .0 .clone(offset.as_deref()) .map_err(|e| map_transport_err_to_py_err(e, None, None))?; let init = PyClassInitializer::from(Transport(inner)).add_subclass(GioTransport {}); Bound::new(py, init) } } pub(crate) fn register(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; Ok(()) } dromedary-0.1.5/_transport_rs/src/http/000077500000000000000000000000001520150013200201355ustar00rootroot00000000000000dromedary-0.1.5/_transport_rs/src/http/client.rs000066400000000000000000000440621520150013200217670ustar00rootroot00000000000000//! Python bindings for `dromedary::http::client`. //! //! Exposes `HttpClient` as a `#[pyclass]`. The Python `HttpTransport` //! in `dromedary/http/urllib.py` uses this instead of the legacy //! urllib.request handler stack. //! //! The returned `HttpResponse` pyclass mirrors the shape the Python //! code previously got from its `Urllib3LikeResponse` adapter — //! `.status`, `.reason`, `.getheader()`, `.getheaders()`, `.data`, //! `.text`, `.read()`, `.readline()`, `.readlines()` — so existing //! callers (and the still-Python `HttpTransport.request` wrapper) see //! no breaking change. use std::path::PathBuf; use std::sync::Mutex; use std::time::Duration; use dromedary::http::client::{ ActivityCallback, ActivityDirection, ClientError, CredentialProvider, HttpClient as RsHttpClient, HttpClientConfig, HttpResponse as RsHttpResponse, NegotiateProvider, RequestOptions, TokenProvider, }; use pyo3::exceptions::{PyIOError, PyValueError}; use pyo3::import_exception; use pyo3::prelude::*; use pyo3::types::{PyBytes, PyList, PyString, PyTuple}; import_exception!(dromedary.errors, ConnectionError); import_exception!(dromedary.errors, InvalidHttpResponse); /// Turn a [`ClientError`] into an appropriate Python exception. /// /// Transport-level failures (DNS, TCP, TLS) become /// `dromedary.errors.ConnectionError`; malformed URLs / methods /// become `ValueError`; IO errors after the response started /// become `OSError`. This matches how the Python urllib-handler /// stack used to funnel errors. pub(super) fn client_err_to_py(err: ClientError) -> PyErr { match err { ClientError::InvalidRequest(msg) => PyValueError::new_err(msg), ClientError::Io(e) => PyIOError::new_err(e.to_string()), ClientError::Transport(e) => { // `ureq::Error`'s Display carries the useful context // (hostname, port, TLS reason). Wrap as ConnectionError // so breezy's retry loop triggers on it the way it did // for the old handler-layer errors. ConnectionError::new_err(e.to_string()) } } } /// CredentialProvider impl that delegates to the Python callback /// registered via `set_credential_lookup`. All state lives in the /// parent module's `CREDENTIAL_LOOKUP` so multiple clients share /// the same callback. pub(crate) struct PythonCredentialProvider; impl CredentialProvider for PythonCredentialProvider { fn lookup( &self, protocol: &str, host: &str, port: Option, realm: Option<&str>, user_hint: Option<&str>, is_proxy: bool, ) -> (Option, Option) { super::invoke_credential_lookup(protocol, host, port, realm, user_hint, is_proxy) } } /// NegotiateProvider that delegates to the Python callback /// registered via `set_negotiate_provider`. Dromedary ships a /// default implementation in `dromedary.http` that uses the /// Python `kerberos` module. pub(crate) struct PythonNegotiateProvider; impl NegotiateProvider for PythonNegotiateProvider { fn initial_token(&self, host: &str) -> Option { super::invoke_negotiate_provider(host) } } /// TokenProvider that delegates to the Python callback registered /// via `set_token_provider`. Breezy installs one that pulls /// preemptive bearer tokens from `authentication.conf`. pub(crate) struct PythonTokenProvider; impl TokenProvider for PythonTokenProvider { fn lookup( &self, protocol: &str, host: &str, port: Option, path: Option<&str>, ) -> Option<(String, String)> { super::invoke_token_provider(protocol, host, port, path) } } #[pyclass(module = "dromedary._transport_rs.http", frozen)] pub(crate) struct HttpClient { inner: RsHttpClient, // Default options applied when Python callers don't pass an // override. Wrapped in a Mutex because pyclass(frozen) forbids // &mut self; callers interact via dedicated setters below. defaults: Mutex, } #[pymethods] impl HttpClient { /// Construct a new client. /// /// `ca_certs` — optional path to a PEM bundle. /// `disable_verification` — matches Python's `ssl.CERT_NONE`. /// `user_agent` — if omitted, inherits the module-level default. /// `read_timeout_ms` — 0 or negative means "no timeout". #[new] #[pyo3(signature = ( ca_certs=None, disable_verification=false, user_agent=None, read_timeout_ms=0, ))] fn new( ca_certs: Option, disable_verification: bool, user_agent: Option, read_timeout_ms: i64, ) -> PyResult { let timeout = if read_timeout_ms > 0 { Some(Duration::from_millis(read_timeout_ms as u64)) } else { None }; let cfg = HttpClientConfig { ca_certs_path: ca_certs, disable_verification, user_agent, read_timeout: timeout, }; let mut inner = RsHttpClient::with_full_providers( cfg, Box::new(PythonCredentialProvider), Box::new(PythonNegotiateProvider), Box::new(PythonTokenProvider), ) .map_err(client_err_to_py)?; inner.set_auth_trace(Some(std::sync::Arc::new(|header: &str| { super::invoke_auth_header_trace(header); }))); Ok(Self { inner, defaults: Mutex::new(RequestOptions::default()), }) } /// Set the default `follow_redirects` flag for subsequent calls /// that don't pass an explicit `follow_redirects` argument. /// /// Exposed so breezy's `HttpTransport.request(..., retries=N)` /// can toggle following once per transport rather than threading /// the flag through every call site. fn set_default_follow_redirects(&self, follow: bool) { self.defaults.lock().unwrap().follow_redirects = follow; } /// Issue an HTTP request. /// /// `headers` is an iterable of `(name, value)` pairs (matching /// the stdlib pattern). `report_activity`, when provided, is a /// callable invoked as `report_activity(byte_count, direction)` /// where direction is `"read"` or `"write"` — matches the Python /// `Transport._report_activity` signature so breezy's progress /// bar integration works unchanged. #[pyo3(signature = ( method, url, headers=None, body=None, follow_redirects=None, report_activity=None, ))] fn request( &self, py: Python, method: &str, url: &str, headers: Option>, body: Option>, follow_redirects: Option, report_activity: Option>, ) -> PyResult { let header_pairs = match headers { Some(h) => extract_headers(py, &h)?, None => Vec::new(), }; let body_bytes = match body { Some(b) => extract_body(py, &b)?, None => Vec::new(), }; let opts = { let mut o = self.defaults.lock().unwrap().clone(); if let Some(f) = follow_redirects { o.follow_redirects = f; } o }; let activity: Option = report_activity.map(make_activity_callback); // `Python::detach` releases the GIL while the HTTP call is // in flight so other Python threads can run (matching the // behaviour of the old urllib-based transport, which did the // actual socket read under the GIL-released `ssl` module). // // The activity callback reacquires the GIL inside its // closure body — safe because we're handing ownership of // the callback to `request_with` via a reference. let resp = py.detach(|| { self.inner.request_with( method, url, &header_pairs, &body_bytes, &opts, activity.as_ref(), ) }); let resp = resp.map_err(client_err_to_py)?; Ok(HttpResponse::new(resp)) } } /// Wrap a Python callable as an [`ActivityCallback`]. The Python /// callable receives `(byte_count, direction_str)` where direction /// is `"read"` or `"write"` — matching the /// `Transport._report_activity` signature breezy's UI expects. /// /// Errors inside the callback are silently swallowed. Activity /// reporting is advisory; a broken progress-bar hook shouldn't fail /// the actual HTTP request. pub(super) fn make_activity_callback(cb: Py) -> ActivityCallback { std::sync::Arc::new(move |bytes: usize, dir: ActivityDirection| { Python::attach(|py| { // `call1` can raise; ignore the result so a buggy hook // doesn't propagate into the HTTP path. let _ = cb.bind(py).call1((bytes, dir.as_str())); }); }) } /// Urllib3-shaped response returned by [`HttpClient::request`]. /// /// Intentionally offers both the Python-file protocol (`read`, /// `readline`, `readlines`) and the urllib3 property set (`status`, /// `reason`, `data`, `text`, `getheader`, `getheaders`) so existing /// callers pulled from `Urllib3LikeResponse` work unchanged. /// /// The body is streamed on demand from ureq rather than buffered /// eagerly: `status` / `reason` / `getheader` / `getheaders` read /// purely from metadata, `data` / `text` / `read(None)` force a /// full drain, and `read(n)` / `readline()` pull incrementally. /// First call to a "full drain" method transitions the underlying /// body to a Buffered state so repeat reads are cheap. #[pyclass(module = "dromedary._transport_rs.http")] pub(crate) struct HttpResponse { inner: Mutex, } impl HttpResponse { pub(super) fn new(raw: RsHttpResponse) -> Self { Self { inner: Mutex::new(raw), } } } #[pymethods] impl HttpResponse { #[getter] fn status(&self) -> u16 { self.inner.lock().unwrap().status } #[getter] fn reason(&self) -> String { self.inner.lock().unwrap().reason.clone() } /// Final URL after any redirect following. For requests that /// weren't redirected this equals the request URL. #[getter] fn final_url(&self) -> String { self.inner.lock().unwrap().final_url.clone() } /// Set when the server returned a 3xx that the client didn't /// auto-follow (`follow_redirects=False`). Callers use this to /// raise `RedirectRequested`. #[getter] fn redirected_to(&self) -> Option { self.inner.lock().unwrap().redirected_to.clone() } /// Case-insensitive header lookup. `default` is returned if /// no header matches. #[pyo3(signature = (name, default=None))] fn getheader(&self, py: Python, name: &str, default: Option>) -> Py { let inner = self.inner.lock().unwrap(); match inner.header(name) { Some(v) => PyString::new(py, v).into(), None => default.unwrap_or_else(|| py.None()), } } /// Return all headers as a list of `(name, value)` tuples. fn getheaders<'py>(&self, py: Python<'py>) -> PyResult> { let inner = self.inner.lock().unwrap(); let items: Vec> = inner .headers .iter() .map(|(k, v)| { let tup = PyTuple::new(py, [PyString::new(py, k), PyString::new(py, v)])?; Ok::<_, PyErr>(tup.into_any()) }) .collect::>>()?; PyList::new(py, items) } /// Full response body as bytes. Forces a drain on first access; /// subsequent reads through this property return the same /// buffer without re-reading. #[getter] fn data<'py>(&self, py: Python<'py>) -> PyResult> { let mut inner = self.inner.lock().unwrap(); let body = inner.body().map_err(py_io_err)?; Ok(PyBytes::new(py, body)) } /// Decoded body as str, using the Content-Type charset when /// present. Matches the Python `Urllib3LikeResponse.text` shim — /// returns `None` on a 204 No Content response. #[getter] fn text(&self, py: Python) -> PyResult> { let mut inner = self.inner.lock().unwrap(); if inner.status == 204 { return Ok(py.None()); } // Read the charset out of the Content-Type header before // borrowing the body; the two &self borrows otherwise // overlap because `body()` takes &mut. let _charset = inner .header("content-type") .and_then(|v| { v.split(';').find_map(|piece| { let piece = piece.trim(); piece .strip_prefix("charset=") .map(|c| c.trim_matches('"').to_string()) }) }) .unwrap_or_else(|| "utf-8".to_string()); // Only UTF-8 is handled natively; everything else falls back // to replacing invalid bytes. Real non-UTF-8 payloads are // vanishingly rare on the Bazaar smart-protocol path this // is aimed at, and breezy didn't support them either. let body = inner.body().map_err(py_io_err)?; let text = String::from_utf8_lossy(body).into_owned(); Ok(PyString::new(py, &text).into()) } /// File-like read. `size=None` (or negative) reads all remaining /// bytes — forces a full drain. Positive `size` pulls up to that /// many bytes from the current position (streamed), leaving the /// rest available for subsequent reads. #[pyo3(signature = (size=None))] fn read<'py>(&self, py: Python<'py>, size: Option) -> PyResult> { let mut inner = self.inner.lock().unwrap(); let n = match size { None | Some(-1) => None, Some(n) if n < 0 => None, Some(n) => Some(n as usize), }; let data = inner.read(n).map_err(py_io_err)?; Ok(PyBytes::new(py, &data)) } /// Read up to the next newline (inclusive) or EOF. Forces the /// body to be buffered on first call — line splitting across a /// live stream would require a BufRead wrapper we don't have /// yet, and the callers that use readline() (handle_response /// for multipart responses) typically consume the whole body /// anyway. #[pyo3(signature = (_size=-1))] fn readline<'py>(&self, py: Python<'py>, _size: i64) -> PyResult> { let mut inner = self.inner.lock().unwrap(); // Drain the body into the buffer so we can scan for '\n' // without losing the rest of the stream. let _ = inner.body().map_err(py_io_err)?; // Now read one byte at a time until '\n' or EOF. This works // because the BodyState is Buffered after body(). let mut line: Vec = Vec::new(); loop { let chunk = inner.read(Some(1)).map_err(py_io_err)?; if chunk.is_empty() { break; } let b = chunk[0]; line.push(b); if b == b'\n' { break; } } Ok(PyBytes::new(py, &line)) } fn readlines<'py>(&self, py: Python<'py>) -> PyResult> { let mut out: Vec> = Vec::new(); loop { let line = self.readline(py, -1)?; if line.as_bytes().is_empty() { break; } out.push(line); } PyList::new(py, out) } /// Close the response by discarding any unread body, returning /// the underlying socket to ureq's pool. Mirrors the file-like /// `close()` contract. fn close(&self) -> PyResult<()> { self.inner.lock().unwrap().discard_body().map_err(py_io_err) } } /// Map an `io::Error` from the streaming body read into `OSError`. fn py_io_err(e: std::io::Error) -> PyErr { PyIOError::new_err(e.to_string()) } /// Coerce whatever Python hands us into `(name, value)` pairs. We /// accept either a dict or any iterable of two-tuples, matching the /// old `request` signature where callers could pass either. pub(super) fn extract_headers(py: Python, obj: &Py) -> PyResult> { let bound = obj.bind(py); if let Ok(d) = bound.cast::() { let mut out = Vec::with_capacity(d.len()); for (k, v) in d.iter() { out.push((k.extract::()?, v.extract::()?)); } return Ok(out); } let mut out = Vec::new(); for item in bound.try_iter()? { let pair = item?; let tup = pair.cast::().map_err(|_| { PyValueError::new_err("headers must be a dict or iterable of (name, value) tuples") })?; if tup.len() != 2 { return Err(PyValueError::new_err( "header tuples must have exactly two elements", )); } let k: String = tup.get_item(0)?.extract()?; let v: String = tup.get_item(1)?.extract()?; out.push((k, v)); } Ok(out) } /// Accept `bytes`, `bytearray`, `memoryview`, or `str` (encoded as /// UTF-8) as the request body. Matches how the Python side /// previously passed data to `connection._send_request`. pub(super) fn extract_body(py: Python, obj: &Py) -> PyResult> { let bound = obj.bind(py); if let Ok(b) = bound.cast::() { return Ok(b.as_bytes().to_vec()); } if let Ok(s) = bound.extract::() { return Ok(s.into_bytes()); } // Fallback: coerce via `bytes()` (handles bytearray, memoryview). let as_bytes = py .get_type::() .call1((bound.clone(),)) .map_err(|e| PyValueError::new_err(format!("can't interpret body as bytes: {}", e)))?; let b = as_bytes .cast::() .map_err(|e| PyValueError::new_err(format!("body coercion failed: {}", e)))?; Ok(b.as_bytes().to_vec()) } pub(crate) fn register(m: &Bound) -> PyResult<()> { m.add_class::()?; m.add_class::()?; Ok(()) } dromedary-0.1.5/_transport_rs/src/http/mod.rs000066400000000000000000000434541520150013200212740ustar00rootroot00000000000000//! Python bindings for the HTTP helper functions in `dromedary::http`. pub(crate) mod client; mod response; pub(crate) mod transport; use std::sync::Mutex; use lazy_static::lazy_static; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use pyo3::types::{PyList, PyTuple}; use pyo3::IntoPyObjectExt; // --------------------------------------------------------------------------- // Credential-lookup callback. Breezy registers a Python callable that // maps (protocol, host, port, path, realm) to (user, password). We // store the Py here so the hot-path auth code in urllib.py (and // eventually the Rust HTTP client) can call into it without round- // tripping through module-attribute lookup. // --------------------------------------------------------------------------- lazy_static! { /// Registered credential-lookup callback. `None` means no /// callback has been set and `get_credentials` should return /// `(None, None)`. static ref CREDENTIAL_LOOKUP: Mutex>> = Mutex::new(None); /// Registered Negotiate (Kerberos) initial-token callback. /// Called as `cb(host) -> Optional[str]`; the returned string /// goes after `Negotiate ` in the Authorization header. static ref NEGOTIATE_PROVIDER: Mutex>> = Mutex::new(None); /// Registered preemptive token-provider callback. Called as /// `cb(protocol, host, port=None, path=None) -> (token, scheme)` /// or `(None, None)` when no token is configured. The HTTP /// client uses the result to attach `Authorization: /// ` before the request goes on the wire — no server /// challenge required. static ref TOKEN_PROVIDER: Mutex>> = Mutex::new(None); /// Registered auth-header-sent callback. The Rust client calls /// this just before sending a request carrying an Authorization /// or Proxy-Authorization header; breezy uses it to emit a /// `trace.mutter("> %s: ", header_name)` line when the /// `http` debug flag is on, so users can confirm auth happened /// without leaking the credential value into logs. static ref AUTH_HEADER_TRACE: Mutex>> = Mutex::new(None); } /// Invoke the registered auth-header-trace callback. No-op if no /// callback is set. Errors from the callback are swallowed — this /// is a tracing hook, not a control-flow one, so a broken logger /// mustn't break HTTP auth. pub(crate) fn invoke_auth_header_trace(header_name: &str) { Python::attach(|py| { let cb = { let guard = AUTH_HEADER_TRACE.lock().unwrap(); guard.as_ref().map(|p| p.clone_ref(py)) }; if let Some(cb) = cb { let _ = cb.bind(py).call1((header_name,)); } }); } /// Invoke the registered token-provider callback. Returns `None` /// if no callback is set, the callback raised, or it returned /// `(None, _)` / `(_, None)`. The breezy callback maps this to an /// `authentication.conf` lookup against the request URL. pub(super) fn invoke_token_provider( protocol: &str, host: &str, port: Option, path: Option<&str>, ) -> Option<(String, String)> { Python::attach(|py| { let cb = { let guard = TOKEN_PROVIDER.lock().unwrap(); guard.as_ref().map(|p| p.clone_ref(py)) }; let cb = cb?; let kwargs = pyo3::types::PyDict::new(py); kwargs.set_item("port", port).ok()?; kwargs.set_item("path", path).ok()?; let result = cb.bind(py).call((protocol, host), Some(&kwargs)).ok()?; let tup = result.cast::().ok()?; if tup.len() != 2 { return None; } let token: Option = tup.get_item(0).ok()?.extract().ok()?; let scheme: Option = tup.get_item(1).ok()?.extract().ok()?; match (token, scheme) { (Some(t), Some(s)) => Some((s, t)), _ => None, } }) } /// Invoke the registered Negotiate callback. Returns `None` if no /// callback is set, the callback returned a non-string, or it /// raised. Matches the behaviour of the old Python /// `_auth_match_kerberos` which quietly returned `None` on any /// GSSAPI error. pub(super) fn invoke_negotiate_provider(host: &str) -> Option { Python::attach(|py| { let cb = { let guard = NEGOTIATE_PROVIDER.lock().unwrap(); guard.as_ref().map(|p| p.clone_ref(py)) }; let cb = cb?; let result = cb.bind(py).call1((host,)).ok()?; if result.is_none() { return None; } result.extract::().ok() }) } /// Invoke the registered credential-lookup callback with the given /// arguments. Returns `(None, None)` if no callback is set, or if /// the callback raises — we don't surface those errors because the /// auth layer treats them as "no credentials available". pub(super) fn invoke_credential_lookup( protocol: &str, host: &str, port: Option, realm: Option<&str>, user: Option<&str>, is_proxy: bool, ) -> (Option, Option) { Python::attach(|py| { let cb = { let guard = CREDENTIAL_LOOKUP.lock().unwrap(); guard.as_ref().map(|p| p.clone_ref(py)) }; let Some(cb) = cb else { return (None, None); }; let kwargs = pyo3::types::PyDict::new(py); // The Python callback signature is // `(protocol, host, port=None, path=None, realm=None, user=None, // is_proxy=False)`; // we leave `path` as None because the Rust client doesn't track // it per-request (breezy's urllib version did, but the value was // rarely used by downstream credential stores). `user` is the // URL-embedded username hint — breezy's AuthenticationConfig // uses it to skip its own user prompt when the URL already // names one. `is_proxy` tells the callback that the credentials // are for a proxy (407) rather than the origin (401), so it can // label interactive prompts accordingly. let _ = kwargs.set_item("port", port); let _ = kwargs.set_item("path", py.None()); let _ = kwargs.set_item("realm", realm); if let Some(u) = user { let _ = kwargs.set_item("user", u); } if is_proxy { let _ = kwargs.set_item("is_proxy", true); } let mut result = cb.bind(py).call((protocol, host), Some(&kwargs)); // Older callbacks may not accept the `user` / `is_proxy` kwargs. // If that's the cause of a TypeError, drop them progressively so // we don't regress on callers that haven't been updated. if result.is_err() && is_proxy { let _ = kwargs.del_item("is_proxy"); result = cb.bind(py).call((protocol, host), Some(&kwargs)); } if result.is_err() && user.is_some() { let _ = kwargs.del_item("user"); result = cb.bind(py).call((protocol, host), Some(&kwargs)); } match result { Ok(obj) => { let tup = match obj.cast::() { Ok(t) => t, Err(_) => return (None, None), }; if tup.len() != 2 { return (None, None); } let user = tup .get_item(0) .ok() .and_then(|v| v.extract::>().ok()) .flatten(); let password = tup .get_item(1) .ok() .and_then(|v| v.extract::>().ok()) .flatten(); (user, password) } Err(_) => (None, None), } }) } #[pyfunction] #[pyo3(signature = (use_cache=true))] fn get_ca_path(use_cache: bool) -> String { dromedary::http::get_ca_path(use_cache) } #[pyfunction] fn clear_ca_path_cache() { dromedary::http::clear_ca_path_cache(); } #[pyfunction] fn default_ca_certs() -> String { dromedary::http::default_ca_certs() } /// Split a `host[:port]` string. Returns `(host, port_or_none)`. #[pyfunction] fn splitport(py: Python, host: &str) -> PyResult> { let (h, p) = dromedary::http::splitport(host); let tup = PyTuple::new(py, [h.into_py_any(py)?, p.into_py_any(py)?])?; Ok(tup.into()) } /// Split a WWW-Authenticate header into `(scheme_lower, remainder_or_none)`. #[pyfunction] fn parse_auth_header(py: Python, header: &str) -> PyResult> { let (scheme, rest) = dromedary::http::parse_auth_header(header); let tup = PyTuple::new(py, [scheme.into_py_any(py)?, rest.into_py_any(py)?])?; Ok(tup.into()) } /// Parse an RFC 2068 §2 comma-separated list honouring quoted strings. #[pyfunction] fn parse_http_list(s: &str) -> Vec { dromedary::http::parse_http_list(s) } /// Parse a list of `key=value` pairs (typically produced by /// [`parse_http_list`]) into a dict. #[pyfunction] fn parse_keqv_list(items: Vec) -> std::collections::HashMap { dromedary::http::parse_keqv_list(&items) } /// Compute the HTTP Digest `H(x)` function for the given algorithm. The /// algorithm name is the value of the server's `algorithm=` parameter /// (`"MD5"` or `"SHA"`). Raises `ValueError` for unsupported algorithms /// so callers get a clear error rather than a silent mismatch. #[pyfunction] fn digest_h(algorithm: &str, data: &[u8]) -> PyResult { let algo = dromedary::http::DigestAlgorithm::parse(algorithm) .ok_or_else(|| PyValueError::new_err(format!("unsupported algorithm: {}", algorithm)))?; Ok(algo.h(data)) } /// Compute the HTTP Digest `KD(secret, data) = H(secret ":" data)`. #[pyfunction] fn digest_kd(algorithm: &str, secret: &str, data: &str) -> PyResult { let algo = dromedary::http::DigestAlgorithm::parse(algorithm) .ok_or_else(|| PyValueError::new_err(format!("unsupported algorithm: {}", algorithm)))?; Ok(algo.kd(secret, data)) } /// Check whether an `algorithm=` name is one we can compute. #[pyfunction] fn digest_algorithm_supported(algorithm: &str) -> bool { dromedary::http::DigestAlgorithm::parse(algorithm).is_some() } /// Generate a client nonce for HTTP Digest authentication. #[pyfunction] fn get_new_cnonce(nonce: &str, nonce_count: u64) -> String { dromedary::http::new_cnonce(nonce, nonce_count) } /// Check a host against a `no_proxy` list. Returns `True` to bypass /// the proxy, `False` to use it, or `None` if the caller should fall /// back to the platform-specific proxy-bypass logic (Python's /// `urllib.request.proxy_bypass`). /// /// This preserves the Python `ProxyHandler.evaluate_proxy_bypass` /// contract byte-for-byte, including the surprising prefix-only /// match that lets `example.com` in `no_proxy` match /// `example.com.evil.com`. #[pyfunction] #[pyo3(signature = (host, no_proxy))] fn evaluate_proxy_bypass(py: Python, host: &str, no_proxy: Option<&str>) -> Py { use dromedary::http::ProxyBypass; match dromedary::http::evaluate_proxy_bypass(host, no_proxy) { ProxyBypass::Bypass => true.into_py_any(py).unwrap(), ProxyBypass::UseProxy => false.into_py_any(py).unwrap(), ProxyBypass::Undecided => py.None(), } } /// Replace the global User-Agent prefix. #[pyfunction] fn set_user_agent(prefix: String) { dromedary::http::set_user_agent(prefix); } /// Return the current User-Agent prefix. #[pyfunction] fn default_user_agent() -> String { dromedary::http::default_user_agent() } /// Platform-default certificate verification requirement. Returns an /// integer matching `ssl.CERT_NONE` / `ssl.CERT_REQUIRED` so the /// Python side can compare against `ssl.*` constants directly. #[pyfunction] fn default_cert_reqs() -> u8 { dromedary::http::default_cert_reqs().to_int() } /// Register a credential-lookup callable. The callable is invoked as /// `func(protocol, host, port=None, path=None, realm=None)` and /// should return `(user, password)` (either may be `None`). /// /// Passing `None` clears any previously-registered callback so /// subsequent [`get_credentials`] calls fall back to the `(None, /// None)` default. #[pyfunction] fn set_credential_lookup(py: Python, func: Py) { let mut slot = CREDENTIAL_LOOKUP.lock().unwrap(); *slot = if func.bind(py).is_none() { None } else { Some(func) }; } /// Return the currently-registered credential-lookup callable, or /// `None` if none is set. Mainly useful for tests that want to save /// and restore the callback around assertions. #[pyfunction] fn get_credential_lookup(py: Python) -> Py { CREDENTIAL_LOOKUP .lock() .unwrap() .as_ref() .map(|p| p.clone_ref(py)) .unwrap_or_else(|| py.None()) } /// Register a Negotiate (Kerberos) initial-token callback. The /// callable is invoked as `func(host)` and should return the /// base64-encoded token to send after `Negotiate ` in the /// Authorization header, or `None` if no token is available (no /// ticket / library missing / wrong realm). /// /// Passing `None` clears any previously-registered callback. #[pyfunction] fn set_negotiate_provider(py: Python, func: Py) { let mut slot = NEGOTIATE_PROVIDER.lock().unwrap(); *slot = if func.bind(py).is_none() { None } else { Some(func) }; } /// Register a callback invoked when the HTTP client is about to /// send an Authorization or Proxy-Authorization header. The /// callable is invoked as `func(header_name)` — breezy uses this /// for debug tracing so users can confirm auth credentials were /// sent without exposing the values themselves in logs. /// /// Passing `None` clears any previously-registered callback. #[pyfunction] fn set_auth_header_trace(py: Python, func: Py) { let mut slot = AUTH_HEADER_TRACE.lock().unwrap(); *slot = if func.bind(py).is_none() { None } else { Some(func) }; } /// Return the currently-registered Negotiate callback, or `None`. #[pyfunction] fn get_negotiate_provider(py: Python) -> Py { NEGOTIATE_PROVIDER .lock() .unwrap() .as_ref() .map(|p| p.clone_ref(py)) .unwrap_or_else(|| py.None()) } /// Register a preemptive token-provider callback. The callable is /// invoked as `func(protocol, host, port=None, path=None)` and /// should return `(token, scheme)`, with either field `None` when /// no token applies. The HTTP client attaches `Authorization: /// ` before the request goes out — caller-supplied /// `Authorization` headers always win. /// /// Passing `None` clears any previously-registered callback. #[pyfunction] fn set_token_provider(py: Python, func: Py) { let mut slot = TOKEN_PROVIDER.lock().unwrap(); *slot = if func.bind(py).is_none() { None } else { Some(func) }; } /// Return the currently-registered token-provider callback, or `None`. #[pyfunction] fn get_token_provider(py: Python) -> Py { TOKEN_PROVIDER .lock() .unwrap() .as_ref() .map(|p| p.clone_ref(py)) .unwrap_or_else(|| py.None()) } /// Look up credentials via the registered callback. Returns /// `(None, None)` if no callback is set (the historical default). #[pyfunction] #[pyo3(signature = (protocol, host, port=None, path=None, realm=None))] fn get_credentials( py: Python, protocol: &str, host: &str, port: Option>, path: Option>, realm: Option>, ) -> PyResult> { let cb = { let guard = CREDENTIAL_LOOKUP.lock().unwrap(); guard.as_ref().map(|p| p.clone_ref(py)) }; match cb { Some(cb) => { let kwargs = pyo3::types::PyDict::new(py); kwargs.set_item("port", port.unwrap_or_else(|| py.None()))?; kwargs.set_item("path", path.unwrap_or_else(|| py.None()))?; kwargs.set_item("realm", realm.unwrap_or_else(|| py.None()))?; let result = cb.bind(py).call((protocol, host), Some(&kwargs))?; Ok(result.unbind()) } None => { let tup = PyTuple::new(py, [py.None(), py.None()])?; Ok(tup.into()) } } } pub(crate) fn register(py: Python, m: &Bound) -> PyResult<()> { m.add_function(wrap_pyfunction!(get_ca_path, m)?)?; m.add_function(wrap_pyfunction!(clear_ca_path_cache, m)?)?; m.add_function(wrap_pyfunction!(default_ca_certs, m)?)?; m.add_function(wrap_pyfunction!(splitport, m)?)?; m.add_function(wrap_pyfunction!(parse_auth_header, m)?)?; m.add_function(wrap_pyfunction!(parse_http_list, m)?)?; m.add_function(wrap_pyfunction!(parse_keqv_list, m)?)?; m.add_function(wrap_pyfunction!(digest_h, m)?)?; m.add_function(wrap_pyfunction!(digest_kd, m)?)?; m.add_function(wrap_pyfunction!(digest_algorithm_supported, m)?)?; m.add_function(wrap_pyfunction!(get_new_cnonce, m)?)?; m.add_function(wrap_pyfunction!(set_user_agent, m)?)?; m.add_function(wrap_pyfunction!(default_user_agent, m)?)?; m.add_function(wrap_pyfunction!(default_cert_reqs, m)?)?; m.add_function(wrap_pyfunction!(set_credential_lookup, m)?)?; m.add_function(wrap_pyfunction!(get_credential_lookup, m)?)?; m.add_function(wrap_pyfunction!(get_credentials, m)?)?; m.add_function(wrap_pyfunction!(set_negotiate_provider, m)?)?; m.add_function(wrap_pyfunction!(get_negotiate_provider, m)?)?; m.add_function(wrap_pyfunction!(set_token_provider, m)?)?; m.add_function(wrap_pyfunction!(get_token_provider, m)?)?; m.add_function(wrap_pyfunction!(set_auth_header_trace, m)?)?; m.add_function(wrap_pyfunction!(evaluate_proxy_bypass, m)?)?; client::register(m)?; response::register(m)?; transport::register(m)?; let locations = PyList::new(py, dromedary::http::SSL_CA_CERTS_KNOWN_LOCATIONS)?; m.add("SSL_CA_CERTS_KNOWN_LOCATIONS", locations)?; Ok(()) } dromedary-0.1.5/_transport_rs/src/http/response.rs000066400000000000000000000326771520150013200223600ustar00rootroot00000000000000//! Python bindings for `dromedary::http::response`. //! //! Exposes `ResponseFile`, `RangeFile`, and `handle_response` so that //! the Python side of `dromedary.http` can drop its hand-rolled //! implementation and delegate here. //! //! The two classes each hold their Rust counterpart. The underlying //! input file is an arbitrary Python file-like: we don't require //! `pyo3-filelike` because the trait we need (`read(n)` and //! `readline()` only) is simpler than the full file protocol. use std::io; use pyo3::exceptions::PyTypeError; use pyo3::import_exception; use pyo3::prelude::*; use pyo3::types::PyBytes; use dromedary::http::response::{ handle_response as rs_handle_response, InFile, RangeFile as RsRangeFile, ResponseError, ResponseFile as RsResponseFile, ResponseKind, }; import_exception!(dromedary.errors, InvalidHttpResponse); import_exception!(dromedary.errors, InvalidHttpRange); import_exception!(dromedary.errors, HttpBoundaryMissing); import_exception!(dromedary.errors, ShortReadvError); import_exception!(dromedary.errors, InvalidRange); import_exception!(dromedary.errors, UnexpectedHttpStatus); /// Adapter: a Python file-like object accessed via Python attribute /// calls. We do **not** wrap with `pyo3_filelike::PyBinaryFile` /// because we need the file-like's own `readline()` semantics to /// match what socket / BytesIO objects already provide — stdlib /// `BufReader::read_until` splits differently on non-terminated /// last lines. struct PyInFile { obj: Py, } impl PyInFile { fn new(obj: Py) -> Self { Self { obj } } } fn py_err_to_io(e: PyErr) -> io::Error { io::Error::other(e.to_string()) } fn pyany_to_bytes(py: Python, any: &Bound) -> io::Result> { if let Ok(b) = any.cast::() { return Ok(b.as_bytes().to_vec()); } // Some file-likes (e.g. text-mode objects, or `StringIO`) return // `str` instead of `bytes`. Convert with UTF-8 encoding; matches // the Python-side behaviour where a caller using a StringIO for // tests would get the same bytes out. if let Ok(s) = any.extract::() { return Ok(s.into_bytes()); } // As a last resort, anything buffer-protocol-compatible (memoryview, // bytearray) can go through `bytes()`. let as_bytes = py .get_type::() .call1((any,)) .map_err(py_err_to_io)?; let b = as_bytes .cast::() .map_err(|e| io::Error::other(format!("file-like returned non-bytes: {}", e)))?; Ok(b.as_bytes().to_vec()) } impl InFile for PyInFile { fn read(&mut self, n: usize) -> io::Result> { Python::attach(|py| { let r = self .obj .bind(py) .call_method1("read", (n,)) .map_err(py_err_to_io)?; pyany_to_bytes(py, &r) }) } fn readline(&mut self) -> io::Result> { Python::attach(|py| { let r = self .obj .bind(py) .call_method0("readline") .map_err(py_err_to_io)?; pyany_to_bytes(py, &r) }) } } fn response_err_to_py(err: ResponseError) -> PyErr { match err { ResponseError::InvalidResponse { path, msg } => InvalidHttpResponse::new_err((path, msg)), ResponseError::InvalidHttpRange { path, range, msg } => { InvalidHttpRange::new_err((path, range, msg)) } ResponseError::BoundaryMissing { path, boundary } => { // The Python side passes the raw boundary bytes as the // `msg` argument; mirror that. Python::attach(|py| { HttpBoundaryMissing::new_err((path, PyBytes::new(py, &boundary).unbind())) }) } ResponseError::ShortReadv { path, offset, length, actual, } => ShortReadvError::new_err((path, offset, length, actual)), ResponseError::InvalidRange { path, offset, msg } => { InvalidRange::new_err((path, offset, msg)) } ResponseError::UnexpectedStatus { path, code } => { UnexpectedHttpStatus::new_err((path, code)) } ResponseError::Io(e) => pyo3::exceptions::PyIOError::new_err(e.to_string()), ResponseError::InvalidWhence(w) => { pyo3::exceptions::PyValueError::new_err(format!("Invalid value {} for whence.", w)) } ResponseError::BackwardSeek { path, pos, offset } => { pyo3::exceptions::PyAssertionError::new_err(format!( "{}: can't seek backwards, pos: {}, offset: {}", path, pos, offset )) } } } /// Python binding: `dromedary._transport_rs.http.ResponseFile`. /// /// Constructor: `ResponseFile(path, infile)`. The `infile` must /// expose at least `read(n)` and `readline()` — standard file-like /// duck typing, the same as what the Python original needed. #[pyclass(module = "dromedary._transport_rs.http", subclass)] pub(crate) struct ResponseFile { inner: RsResponseFile, } #[pymethods] impl ResponseFile { #[new] fn new(path: String, infile: Py) -> Self { Self { inner: RsResponseFile::new(path, PyInFile::new(infile)), } } /// No-op; matches the Python API for file-like compatibility. fn close(&self) {} fn __enter__(slf: Py) -> Py { slf } #[pyo3(signature = (_exc_type=None, _exc_val=None, _exc_tb=None))] fn __exit__( &self, _exc_type: Option>, _exc_val: Option>, _exc_tb: Option>, ) -> bool { false } /// Read up to `size` bytes; `None` (the default) means "read to /// EOF". `-1` is accepted as an alias for `None`, matching the /// Python convention. #[pyo3(signature = (size=None))] fn read<'py>(&mut self, py: Python<'py>, size: Option) -> PyResult> { let sz = match size { None | Some(-1) => None, Some(n) if n < 0 => None, Some(n) => Some(n as usize), }; let data = self.inner.read(sz).map_err(response_err_to_py)?; Ok(PyBytes::new(py, &data)) } fn readline<'py>(&mut self, py: Python<'py>) -> PyResult> { let data = self.inner.readline().map_err(response_err_to_py)?; Ok(PyBytes::new(py, &data)) } /// Read all remaining lines. The optional `size` argument is /// accepted for Python-file-like compatibility but ignored, /// matching the Python version. #[pyo3(signature = (_size=None))] fn readlines<'py>( &mut self, py: Python<'py>, _size: Option, ) -> PyResult>> { let lines = self.inner.readlines().map_err(response_err_to_py)?; Ok(lines.into_iter().map(|l| PyBytes::new(py, &l)).collect()) } fn __iter__(slf: Py) -> Py { slf } fn __next__<'py>(&mut self, py: Python<'py>) -> PyResult>> { let line = self.inner.readline().map_err(response_err_to_py)?; if line.is_empty() { Ok(None) } else { Ok(Some(PyBytes::new(py, &line))) } } fn tell(&self) -> u64 { self.inner.tell() } #[pyo3(signature = (offset, whence=0))] fn seek(&mut self, offset: i64, whence: u32) -> PyResult<()> { self.inner.seek(offset, whence).map_err(response_err_to_py) } } /// Python binding: `dromedary._transport_rs.http.RangeFile`. /// /// Not a subclass of `ResponseFile` in the Rust bindings — nothing /// in the Python code base does `isinstance(rf, ResponseFile)`, so /// keeping them independent avoids the PyO3 subclass boilerplate. #[pyclass(module = "dromedary._transport_rs.http")] pub(crate) struct RangeFile { inner: RsRangeFile, } #[pymethods] impl RangeFile { #[new] fn new(path: String, infile: Py) -> Self { Self { inner: RsRangeFile::new(path, PyInFile::new(infile)), } } fn close(&self) {} fn __enter__(slf: Py) -> Py { slf } #[pyo3(signature = (_exc_type=None, _exc_val=None, _exc_tb=None))] fn __exit__( &self, _exc_type: Option>, _exc_val: Option>, _exc_tb: Option>, ) -> bool { false } /// Read up to `size` bytes from the current range. `-1` means /// "read to end of range" (matches the Python default). #[pyo3(signature = (size=-1))] fn read<'py>(&mut self, py: Python<'py>, size: i64) -> PyResult> { let data = self.inner.read(size).map_err(response_err_to_py)?; Ok(PyBytes::new(py, &data)) } fn tell(&self) -> u64 { self.inner.tell() } #[pyo3(signature = (offset, whence=0))] fn seek(&mut self, offset: i64, whence: u32) -> PyResult<()> { self.inner.seek(offset, whence).map_err(response_err_to_py) } fn set_range(&mut self, start: u64, size: i64) { let size = if size < 0 { None } else { Some(size as u64) }; self.inner.set_range(start, size); } /// Multipart boundary; passed as bytes by the caller. A non-bytes /// argument raises `TypeError` to match the Python /// `isinstance(boundary, bytes)` check. fn set_boundary(&mut self, py: Python, boundary: Py) -> PyResult<()> { let bound = boundary.bind(py); let bytes = bound .cast::() .map_err(|_| PyTypeError::new_err("boundary must be bytes"))?; self.inner .set_boundary(bytes.as_bytes().to_vec()) .map_err(response_err_to_py) } fn read_boundary(&mut self) -> PyResult<()> { self.inner.read_boundary().map_err(response_err_to_py) } fn read_range_definition(&mut self) -> PyResult<()> { self.inner .read_range_definition() .map_err(response_err_to_py) } fn set_range_from_header(&mut self, content_range: &str) -> PyResult<()> { self.inner .set_range_from_header(content_range) .map_err(response_err_to_py) } // Python-compatibility properties — the original pure-Python // implementation exposed `_start`, `_size`, `_pos`, and // `_boundary` as ordinary instance attributes. Several tests // (and occasionally user code that bypassed the normal seek // machinery) poke them directly, so we mirror them here. The // setter methods use the `set_` naming that PyO3 expects // for `#[setter]` on properties called `_start` / `_size` / // `_pos`; that's what puts the underscore in the middle of the // Rust name. #[getter(_start)] fn py_start(&self) -> u64 { self.inner.rs_start() } #[setter(_start)] fn py_set_start(&mut self, value: u64) { self.inner.rs_set_start(value); } /// `-1` means "size unknown" (matches the Python convention). #[getter(_size)] fn py_size(&self) -> i64 { match self.inner.rs_size() { Some(n) => n as i64, None => -1, } } #[setter(_size)] fn py_set_size(&mut self, value: i64) { let v = if value < 0 { None } else { Some(value as u64) }; self.inner.rs_set_size(v); } #[getter(_pos)] fn py_pos(&self) -> u64 { self.inner.tell() } #[setter(_pos)] fn py_set_pos(&mut self, value: u64) { self.inner.rs_set_pos(value); } #[getter(_boundary)] fn py_boundary<'py>(&self, py: Python<'py>) -> Option> { self.inner.rs_boundary().map(|b| PyBytes::new(py, b)) } #[getter(_discarded_buf_size)] fn py_discarded_buf_size(&self) -> usize { self.inner.rs_discarded_buf_size() } #[setter(_discarded_buf_size)] fn py_set_discarded_buf_size(&mut self, value: usize) { self.inner.rs_set_discarded_buf_size(value); } } /// Factory mirroring `dromedary.http.response.handle_response`. /// /// Instead of taking a `getheader` callback like the Python version, /// we accept the headers dict up front and look them up case- /// insensitively. `getheader` takes `(name, default)` in Python; we /// unify by requiring a plain `dict[str, str]` from the caller and /// handling the default here. #[pyfunction] pub(crate) fn handle_response( py: Python, url: String, code: u16, getheader: Py, data: Py, ) -> PyResult> { // Bridge the Python getheader(name, default=None) callable to a // Rust closure returning Option. let get = |name: &str| -> Option { Python::attach(|py| { let res = getheader.bind(py).call1((name, py.None())).ok()?; if res.is_none() { None } else { res.extract::().ok() } }) }; let kind = rs_handle_response(url, code, &get, PyInFile::new(data)).map_err(response_err_to_py)?; match kind { ResponseKind::Plain(inner) => { let cls = Py::new(py, ResponseFile { inner })?; Ok(cls.into_any()) } ResponseKind::Range(inner) => { let cls = Py::new(py, RangeFile { inner })?; Ok(cls.into_any()) } } } pub(crate) fn register(m: &Bound) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_function(wrap_pyfunction!(handle_response, m)?)?; Ok(()) } dromedary-0.1.5/_transport_rs/src/http/transport.rs000066400000000000000000000364131520150013200225460ustar00rootroot00000000000000//! Python bindings for `dromedary::http::transport::HttpTransport`. //! //! Exposes the Rust HTTP transport as //! `dromedary._transport_rs.http.HttpTransport`, a pyclass that //! extends `Transport`. The Python `HttpTransport` in //! `dromedary/http/urllib.py` becomes a thin subclass that adds //! breezy-side hooks (`_medium`, `_report_activity` override, //! ssl-config resolution, redirect fix-up) without re-implementing //! the transport itself. use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; use dromedary::http::client::{ ActivityCallback, HttpClientConfig, NegotiateProvider, RequestOptions, }; use dromedary::http::{HttpClient, HttpTransport as RsHttpTransport}; use dromedary::Transport as RsTransport; use pyo3::prelude::*; use pyo3::types::PyBytes; use crate::map_transport_err_to_py_err; use crate::{ConnectedTransport, Transport}; use super::client::{ client_err_to_py, extract_body, extract_headers, make_activity_callback, HttpResponse, PythonCredentialProvider, PythonNegotiateProvider, PythonTokenProvider, }; /// Python-bound Rust HTTP transport. /// /// Constructor: `HttpTransport(base, ca_certs=None, /// disable_verification=False, user_agent=None, /// read_timeout_ms=0)`. Each construction builds a fresh /// `HttpClient`; call `clone()` to get a sibling transport that /// shares the underlying agent and auth cache. /// /// We stash a concrete `Arc` alongside the base /// `Transport` so helper methods like `_post` can reach the /// HttpTransport-specific API without going through a /// `dyn Transport` downcast. #[pyclass(extends=ConnectedTransport, subclass, module = "dromedary._transport_rs.http")] pub(crate) struct HttpTransport { // `pub(crate)` rather than private so sibling modules (notably // `webdav`) can construct an HttpTransport pyclass parent that // points at their own transport's embedded HttpTransport — // necessary for `extends=HttpTransport` pyclasses to share the // inherited `request` / `_post` / ... methods. pub(crate) inner: Arc, } #[pymethods] impl HttpTransport { #[new] #[pyo3(signature = ( base, ca_certs=None, disable_verification=false, user_agent=None, read_timeout_ms=0, ))] fn new( base: &str, ca_certs: Option, disable_verification: bool, user_agent: Option, read_timeout_ms: i64, ) -> PyResult> { let timeout = if read_timeout_ms > 0 { Some(Duration::from_millis(read_timeout_ms as u64)) } else { None }; let cfg = HttpClientConfig { ca_certs_path: ca_certs, disable_verification, user_agent, read_timeout: timeout, }; let mut client = HttpClient::with_full_providers( cfg, Box::new(PythonCredentialProvider), Box::new(PythonNegotiateProvider) as Box, Box::new(PythonTokenProvider), ) .map_err(|e| { map_transport_err_to_py_err( dromedary::Error::Io(std::io::Error::other(format!("{}", e))), None, Some(base), ) })?; // Route auth-header traces back through the Python callback // set via `set_auth_header_trace`. Breezy registers that // callback at import time so debug-flag-controlled // "> Authorization: " lines reach `trace.mutter`. client.set_auth_trace(Some(std::sync::Arc::new(|header: &str| { super::invoke_auth_header_trace(header); }))); let rust = RsHttpTransport::new(base, Arc::new(client)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(base)))?; let inner = Arc::new(rust); Ok(http_transport_initializer(inner)) } /// Clone this transport at an optional offset, sharing the /// underlying HttpClient. /// /// Returns an instance of the base Rust ``HttpTransport``. /// Python subclasses that want ``type(self)``-preserving cloning /// should override ``clone`` and use /// ``_clone_from`` to graft the shared state onto /// a freshly-constructed subclass instance. #[pyo3(signature = (offset=None))] fn clone<'a>( slf: PyRef<'a, Self>, py: Python<'a>, offset: Option<&str>, ) -> PyResult> { let cloned = slf .inner .clone_concrete(offset) .map_err(|e| map_transport_err_to_py_err(e, None, offset))?; Bound::new(py, http_transport_initializer(Arc::new(cloned))) } /// Replace this transport's inner state with the state of /// ``source``, effectively turning ``self`` into a clone of /// ``source`` at the current base URL. Used by the Python /// subclass override of ``clone`` to achieve shared-state /// cloning while preserving the subclass identity: the subclass /// builds a fresh instance with its desired base, then calls /// this method to inherit the source transport's HttpClient, /// auth cache, and range-hint state. /// /// After calling this, the two transports share the same /// underlying Rust state exactly as if the receiver had been /// produced by ``source.clone(offset)``. fn _clone_from( mut slf: PyRefMut, source: PyRef, offset: Option<&str>, ) -> PyResult<()> { // When no offset is supplied, take the source's inner // directly rather than cloning — clone_concrete's // (deliberate) segment-parameter and raw_base stripping // loses information that matters for ``__init__``-time // TLS-config rebuilds (see // ``dromedary.http.urllib.HttpTransport.__init__``). let new_inner = match offset { None => source.inner.clone(), Some(_) => { let cloned = source .inner .clone_concrete(offset) .map_err(|e| map_transport_err_to_py_err(e, None, offset))?; Arc::new(cloned) } }; // Replace the base Transport(Box) too so // calls routed through the dyn vtable see the cloned state. let base_box: Box = Box::new(Clone::clone(&*new_inner)); // Walk up through the ConnectedTransport layer to reach the // Transport pyclass where the dyn handle actually lives. let connected = slf.as_super(); connected.as_super().0 = base_box; slf.inner = new_inner; Ok(()) } /// Install (or clear) the transport-level activity callback. /// Python subclasses invoke this with a bound method reference /// (``self._report_activity``) so internal get / has / post / /// readv calls funnel byte-count updates into breezy's progress /// UI the same way the explicit ``request()`` path already does. /// Pass ``None`` to clear. #[pyo3(signature = (callback))] fn _set_activity_callback(&self, callback: Option>) { let cb = callback.map(super::client::make_activity_callback); self.inner.set_activity(cb); } /// Current range hint as `"multi"`, `"single"`, or `None`. Part /// of the public-ish transport interface so the breezy test /// suite can observe the client's fallback state. #[getter] fn _range_hint(&self) -> Option<&'static str> { self.inner.range_hint_str() } /// Step the range hint down one rung. Returns True if we /// stepped, False if we were already at the floor. fn _degrade_range_hint(&self) -> bool { self.inner.degrade_range_hint() } // Readv-tuning getters / setters. These mirror the Python // urllib HttpTransport's instance attributes that breezy's // tests poke at to force specific batching behaviour: // `_max_readv_combine`, `_bytes_to_read_before_seek`, // `_get_max_size`, `_max_get_ranges`. Each setter swaps the // single named field while leaving the others untouched, so // tests that override one don't reset the others. #[getter] fn _max_readv_combine(&self) -> usize { self.inner.readv_tuning().max_readv_combine } #[setter] fn set__max_readv_combine(&self, v: usize) { let mut t = self.inner.readv_tuning(); t.max_readv_combine = v; self.inner.set_readv_tuning(t); } #[getter] fn _bytes_to_read_before_seek(&self) -> usize { self.inner.readv_tuning().bytes_to_read_before_seek } #[setter] fn set__bytes_to_read_before_seek(&self, v: usize) { let mut t = self.inner.readv_tuning(); t.bytes_to_read_before_seek = v; self.inner.set_readv_tuning(t); } #[getter] fn _get_max_size(&self) -> usize { self.inner.readv_tuning().get_max_size } #[setter] fn set__get_max_size(&self, v: usize) { let mut t = self.inner.readv_tuning(); t.get_max_size = v; self.inner.set_readv_tuning(t); } #[getter] fn _max_get_ranges(&self) -> usize { self.inner.readv_tuning().max_get_ranges } #[setter] fn set__max_get_ranges(&self, v: usize) { let mut t = self.inner.readv_tuning(); t.max_get_ranges = v; self.inner.set_readv_tuning(t); } /// Unqualified HTTP scheme (`"http"` or `"https"`) — strips any /// `+impl` qualifier present in the base URL. Exposed for /// breezy's test harness which reads it directly. #[getter] fn _unqualified_scheme(&self) -> PyResult { // Base URL's scheme is always the unqualified form after // normalise_http_url(); use that rather than re-parsing. let url = RsTransport::base(&*self.inner); Ok(url.scheme().to_string()) } /// Build the remote URL for `relpath`: absolute URL with any /// embedded user/password stripped and the scheme reduced to /// its unqualified form. Auth goes through headers, not URL. fn _remote_path(&self, relpath: &str) -> PyResult { let url = self .inner .remote_url(relpath) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath)))?; Ok(url.to_string()) } /// Drop any cached connection state. A no-op on the Rust client /// (ureq's pool is per-agent and released lazily), but kept for /// API compatibility with the urllib handler stack. fn disconnect(&self) {} /// Issue an HTTP request through the transport's shared client. /// /// Returns the same `HttpResponse` pyclass the standalone /// `HttpClient.request` hands back, so existing callers that /// previously did `self._client.request(...)` don't need to /// change response-handling code. #[pyo3(signature = ( method, url, headers=None, body=None, follow_redirects=false, report_activity=None, ))] fn request( &self, py: Python, method: &str, url: &str, headers: Option>, body: Option>, follow_redirects: bool, report_activity: Option>, ) -> PyResult { let header_pairs = match headers { Some(h) => extract_headers(py, &h)?, None => Vec::new(), }; let body_bytes = match body { Some(b) => extract_body(py, &b)?, None => Vec::new(), }; let opts = RequestOptions { follow_redirects, ..RequestOptions::default() }; let activity: Option = report_activity.map(make_activity_callback); let resp = py.detach(|| { self.inner.client().request_with( method, url, &header_pairs, &body_bytes, &opts, activity.as_ref(), ) }); let resp = resp.map_err(client_err_to_py)?; Ok(HttpResponse::new(resp)) } /// HEAD `relpath`. Returns the response for 200 / 404 and /// raises for everything else, matching the Python `_head`. fn _head(&self, py: Python, relpath: &str) -> PyResult { let resp = py .detach(|| self.inner.head(relpath)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath)))?; Ok(HttpResponse::new(resp)) } /// OPTIONS `relpath`. Returns the response headers as a list of /// `(name, value)` tuples on 2xx; raises `NoSuchFile` on 404 or /// `InvalidHttpResponse` on 403/405. fn _options(&self, py: Python, relpath: &str) -> PyResult> { py.detach(|| self.inner.options(relpath)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath))) } /// POST `body` to `relpath` and return `(status, response_body)`. /// Mirrors the Python `HttpTransport._post` used by breezy's /// smart HTTP medium. fn _post<'py>( &self, py: Python<'py>, relpath: &str, body: &[u8], ) -> PyResult<(u16, Bound<'py, PyBytes>)> { // Release the GIL around the blocking HTTP exchange: the // in-process breezy test HTTP server runs in a Python thread // and can't accept connections while we hold the GIL, which // otherwise deadlocks POST-over-loopback tests. let (status, buf) = py .detach(|| -> Result<(u16, Vec), dromedary::Error> { let (status, mut rf) = self.inner.post(relpath, body)?; let mut buf = Vec::new(); std::io::Read::read_to_end(&mut rf, &mut buf).map_err(dromedary::Error::Io)?; Ok((status, buf)) }) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath)))?; Ok((status, PyBytes::new(py, &buf))) } } /// Build the three-layer `Transport → ConnectedTransport → /// HttpTransport` initializer PyO3 needs for pyclass construction. /// /// `pub(crate)` so `webdav::transport` can extend the chain with a /// fourth `add_subclass(HttpDavTransport {...})` layer, reusing the /// HttpTransport parent's `inner` pointer to share the HTTP client /// and range-hint state with the DAV transport above it. pub(crate) fn http_transport_initializer( inner: Arc, ) -> PyClassInitializer { let base_box: Box = Box::new(Clone::clone(&*inner)); http_transport_initializer_with_base(inner, base_box) } /// Like `http_transport_initializer` but lets the caller supply a /// specific `dyn Transport` to install at the base `Transport` /// pyclass layer. Used by the DAV subclass so default-method /// dispatch (e.g. `Transport::move` in Python, which goes to the /// Rust `dyn Transport::move` in turn) reaches the DAV impls (stat /// via PROPFIND, native MOVE) rather than the HTTP parent that /// has no stat support. pub(crate) fn http_transport_initializer_with_base( inner: Arc, base_box: Box, ) -> PyClassInitializer { PyClassInitializer::from(Transport(base_box)) .add_subclass(ConnectedTransport) .add_subclass(HttpTransport { inner }) } pub(crate) fn register(m: &Bound) -> PyResult<()> { m.add_class::()?; Ok(()) } dromedary-0.1.5/_transport_rs/src/lib.rs000066400000000000000000001632631520150013200203050ustar00rootroot00000000000000use ::log::debug; use dromedary::lock::{FileLock, Lock as LockTrait, LockError}; use dromedary::{Error, ReadStream, Transport as TransportTrait, UrlFragment, WriteStream}; use pyo3::exceptions::{PyRuntimeError, PyValueError}; use pyo3::import_exception; use pyo3::prelude::*; use pyo3::types::{PyBytes, PyIterator, PyList, PyType}; use pyo3_filelike::PyBinaryFile; use std::collections::HashMap; use std::fs::Permissions; use std::io::{BufRead, BufReader, Read, Seek, Write}; #[cfg(unix)] use std::os::unix::fs::PermissionsExt; use std::path::{Path, PathBuf}; use std::sync::Mutex; use url::Url; import_exception!(dromedary.errors, TransportError); import_exception!(dromedary.errors, NotLocalUrl); import_exception!(dromedary.errors, InProcessTransport); import_exception!(dromedary.errors, NoSuchFile); import_exception!(dromedary.errors, FileExists); import_exception!(dromedary.errors, PathNotChild); import_exception!(dromedary.errors, PermissionDenied); import_exception!(dromedary.errors, TransportNotPossible); import_exception!(dromedary.errors, ShortReadvError); import_exception!(dromedary.errors, LockContention); import_exception!(dromedary.errors, LockFailed); import_exception!(dromedary.errors, ReadError); import_exception!(dromedary.errors, PathError); import_exception!(dromedary.errors, DirectoryNotEmpty); import_exception!(dromedary.errors, NotADirectory); import_exception!(dromedary.errors, ResourceBusy); import_exception!(dromedary.errors, InvalidHttpResponse); import_exception!(dromedary.errors, UnexpectedHttpStatus); import_exception!(dromedary.errors, InvalidHttpRange); import_exception!(dromedary.errors, BadHttpRequest); import_exception!(dromedary.errors, RedirectRequested); import_exception!(dromedary.errors, UnusableRedirect); import_exception!(dromedary.errors, ConnectionError); import_exception!(dromedary.urlutils, InvalidURL); #[pyclass(subclass)] pub(crate) struct Transport(pub(crate) Box); /// Python-visible iterator wrapping the Rust-side `readv` iterator. /// /// Holds a strong reference to the parent Transport pyclass /// (`Py`) so the `'static`-transmuted inner iterator /// never outlives the transport it borrows from. Each `__next__` /// pulls one element from the Rust iterator under `py.detach` so /// HTTP calls in the iterator body can block without holding the /// GIL — the in-process Python test HTTP server lives in another /// thread and needs the GIL to serve requests. #[pyclass] pub(crate) struct ReadvIter { /// Keeps the underlying Transport (and therefore the `dyn /// TransportTrait` + client state the iterator closed over) /// alive for as long as the iterator is live. Python holds a /// strong reference via this `Py`. #[allow(dead_code)] parent: Py, /// The Rust iterator. `Option` so `__next__` can hand out the /// final `None` cleanly and drop the iterator early if a /// caller leaks the iterator pyclass. `Mutex` because PyO3's /// `#[pymethods]` takes `&self`, but iterator advancement /// needs mutation. iter: Mutex), dromedary::Error>> + Send>>>, /// Path string used for error-mapping; carried alongside the /// iterator for easy access on `__next__`'s Err path. path: String, } #[pymethods] impl ReadvIter { fn __iter__(slf: PyRef) -> PyRef { slf } fn __next__<'py>(&self, py: Python<'py>) -> PyResult)>> { let result = py.detach(|| { let mut guard = self.iter.lock().unwrap(); guard.as_mut().and_then(|it| it.next()) }); match result { None => { // Drop the iterator so cached HTTP state releases // as soon as the caller stops consuming. *self.iter.lock().unwrap() = None; Ok(None) } Some(Err(e)) => { *self.iter.lock().unwrap() = None; Err(map_transport_err_to_py_err(e, None, Some(&self.path))) } Some(Ok((offset, data))) => Ok(Some((offset, PyBytes::new(py, &data)))), } } } /// Python-visible base class for Rust-backed transports that talk to /// a remote server. Mirrors `dromedary.ConnectedTransport` on the /// Python side, providing the `_user` / `_password` / `_host` / /// `_port` / `_path` / `_scheme` getters and `disconnect()` / /// `_reuse_for()` that `get_transport_from_url(possible_transports=…)` /// relies on for connection pooling. #[pyclass(extends=Transport, subclass)] pub(crate) struct ConnectedTransport; #[pymethods] impl ConnectedTransport { #[getter] fn _scheme(slf: PyRef<'_, Self>) -> String { let base = slf.as_super().0.base(); dromedary::connected_url_scheme(&base) } #[getter] fn _host(slf: PyRef<'_, Self>) -> Option { let base = slf.as_super().0.base(); dromedary::connected_url_host(&base) } #[getter] fn _port(slf: PyRef<'_, Self>) -> Option { let base = slf.as_super().0.base(); dromedary::connected_url_port(&base) } #[getter] fn _user(slf: PyRef<'_, Self>) -> Option { let base = slf.as_super().0.base(); dromedary::connected_url_user(&base) } #[getter] fn _password(slf: PyRef<'_, Self>) -> Option { let base = slf.as_super().0.base(); dromedary::connected_url_password(&base) } #[getter] fn _path(slf: PyRef<'_, Self>) -> String { let base = slf.as_super().0.base(); dromedary::connected_url_path(&base) } /// Parsed form of the base URL as a `dromedary.urlutils.URL`. /// Historically set on the Python `ConnectedTransport` at /// construction; breezy's `transport/remote.py` reads it for /// host / port / user / password access. We build one on /// demand so Rust-backed transports don't have to carry extra /// state. #[getter] fn _parsed_url<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> PyResult> { let base = slf.as_super().0.base(); let base_str = base.to_string(); let module = py.import("dromedary.urlutils")?; let cls = module.getattr("URL")?; let url = cls.call_method1("from_string", (base_str.as_str(),))?; Ok(url.unbind()) } /// Shared-connection handle. Breezy compares /// `_get_connection()` results across clones to decide whether /// two transports share an underlying connection. We can't /// expose the Rust `Arc` identity directly across /// the FFI boundary, so we use the next best signal: the /// `(scheme, host, port)` tuple. Two transports with the same /// origin share the same `HttpClient` pool entry and from /// breezy's perspective are "the same connection"; transports /// at different paths under the same origin (e.g. after a /// redirect from `/foo/` to `/foo/subdir/`) are also "the same /// connection" and should compare equal here. fn _get_connection<'py>(slf: PyRef<'py, Self>, py: Python<'py>) -> PyResult> { let base = slf.as_super().0.base(); let key = format!( "{}://{}:{}", base.scheme(), base.host_str().unwrap_or(""), base.port_or_known_default().unwrap_or(0), ); Ok(pyo3::types::PyString::new(py, &key).into()) } /// Default `disconnect` — a no-op. Concrete transports with an /// explicit connection handle override this in Python (or extend /// the pyclass and override the pymethod). fn disconnect(&self) {} /// Return a transport for `other_base` sharing this transport's /// connection state, or `None` if the URLs point at different /// origins. /// /// The comparison logic lives in pure Rust (`classify_reuse_for`) /// so pure-Rust callers use the same rules. This layer just /// dispatches on the result: same-origin-same-path returns /// `self`, same-origin-different-path calls back into Python to /// let subclass `__init__` flow through with its extra kwargs /// (e.g. HTTPS_transport's `ca_certs`). fn _reuse_for<'py>( slf: PyRef<'py, Self>, py: Python<'py>, other_base: &str, ) -> PyResult> { let base = slf.as_super().0.base(); match dromedary::classify_reuse_for(&base, other_base) { dromedary::ReuseMatch::None => Ok(py.None()), dromedary::ReuseMatch::Same => Ok(slf.into_pyobject(py)?.into_any().unbind()), dromedary::ReuseMatch::Sibling => { let bound = slf.into_pyobject(py)?; let cls = bound.get_type(); let kwargs = pyo3::types::PyDict::new(py); kwargs.set_item("_from_transport", bound.clone())?; let sibling = cls.call((other_base,), Some(&kwargs))?; Ok(sibling.unbind()) } } } } /// Shared base class for all Rust-backed Transport decorators. /// /// Stores a reference to the wrapped Python transport and provides the /// common decorator machinery (`_decorated` accessor, `__getattr__` /// fallback, redirect re-wrapping). Concrete decorators extend this class /// and only need to supply their URL prefix and their `__new__` / `clone` /// (which construct the decorator-specific wrapped Rust transport). #[pyclass(extends=Transport, subclass)] pub(crate) struct TransportDecorator { pub(crate) decorated: Py, pub(crate) prefix: &'static str, } #[pymethods] impl TransportDecorator { #[getter] fn _decorated(&self, py: Python) -> Py { self.decorated.clone_ref(py) } #[classmethod] fn _get_url_prefix(_cls: &Bound<'_, pyo3::types::PyType>) -> PyResult<&'static str> { // Subclasses override this to return their static prefix. Called on // the class itself, not the instance, so we can't consult `prefix` // here — but since this is the abstract base, we shouldn't be called // on it directly. Return empty to make the failure obvious. Ok("") } /// Forward unrecognised attribute access to the wrapped transport. /// /// Decorators mix Rust (base Transport methods) with Python (extras like /// HttpTransport.request). Anything not bound on the decorator itself /// should pass through to the inner transport. fn __getattr__(&self, py: Python, name: &str) -> PyResult> { self.decorated.bind(py).getattr(name).map(|b| b.unbind()) } /// Re-wrap the redirected transport so the decorator qualifier is /// preserved across HTTP redirects. fn _redirected_to<'a>( slf: PyRef<'a, Self>, py: Python<'a>, source: &str, target: &str, ) -> PyResult>> { let inner = slf.decorated.bind(py); let redirected = inner.call_method1("_redirected_to", (source, target))?; if redirected.is_none() { return Ok(None); } let base: String = redirected.getattr("base")?.extract()?; let new_url = format!("{}{}", slf.prefix, base); let cls = slf.into_pyobject(py)?.get_type(); let instance = cls.call1((new_url, redirected.clone().unbind()))?; Ok(Some(instance.unbind())) } } pub(crate) fn map_transport_err_to_py_err( e: Error, t: Option>, p: Option<&UrlFragment>, ) -> PyErr { let pick_path = |n: Option| n.or_else(|| p.map(|p| p.to_string())); match e { Error::InProcessTransport => InProcessTransport::new_err((t,)), Error::NotLocalUrl(url) => NotLocalUrl::new_err((url,)), Error::NoSuchFile(name) => NoSuchFile::new_err((pick_path(name),)), Error::FileExists(name) => FileExists::new_err((pick_path(name),)), Error::TransportNotPossible(msg) => TransportNotPossible::new_err((msg,)), Error::UrlError(_e) => InvalidURL::new_err((p.map(|p| p.to_string()),)), Error::PermissionDenied(name) => PermissionDenied::new_err((pick_path(name),)), Error::PathNotChild => { PathNotChild::new_err((p.map(|p| p.to_string()).unwrap_or_default(), "".to_string())) } Error::UrlutilsError(_e) => InvalidURL::new_err((p.map(|p| p.to_string()),)), Error::Io(e) => e.into(), Error::UnexpectedEof => PyValueError::new_err("Unexpected EOF"), Error::LockContention(name) => LockContention::new_err((name,)), Error::LockFailed(name, error) => LockFailed::new_err((name, error)), Error::NotADirectoryError(name) => NoSuchFile::new_err((pick_path(name),)), Error::IsADirectoryError(name) => ReadError::new_err((pick_path(name), "is a directory")), Error::DirectoryNotEmptyError(name) => DirectoryNotEmpty::new_err((pick_path(name),)), Error::ShortReadvError(path, offset, expected, got) => { ShortReadvError::new_err((path, offset, expected, got)) } Error::ResourceBusy(name) => ResourceBusy::new_err((pick_path(name),)), Error::InvalidHttpResponse { path, msg } => InvalidHttpResponse::new_err((path, msg)), Error::UnexpectedHttpStatus { path, code, extra } => { UnexpectedHttpStatus::new_err((path, code, extra)) } Error::InvalidHttpRange { path, range, msg } => { InvalidHttpRange::new_err((path, range, msg)) } Error::BadHttpRequest { path, reason } => BadHttpRequest::new_err((path, reason)), Error::RedirectRequested { source, target, is_permanent, } => RedirectRequested::new_err((source, target, is_permanent)), Error::UnusableRedirect { source, target, reason, } => UnusableRedirect::new_err((source, target, reason)), Error::ConnectionError(msg) => ConnectionError::new_err((msg,)), } } /// Convert a Python mode int to Rust `Option`. /// /// On Unix we translate the int into `Permissions` via `PermissionsExt`. /// On Windows there is no meaningful mapping — Python's own transport layer /// ignores the mode argument there — so we discard it and return `None`. #[cfg(unix)] fn perms_from_py_object(obj: Py) -> Option { Python::attach(|py| { let mode = obj.extract::(py).ok()?; Some(Permissions::from_mode(mode)) }) } #[cfg(not(unix))] fn perms_from_py_object(_obj: Py) -> Option { None } /// Default permissions to apply when Python passes no mode. /// On Unix this mirrors the process umask; on Windows we have nothing /// meaningful to set, so we return `None`. #[cfg(unix)] fn default_perms() -> Option { use nix::sys::stat::{umask, Mode}; let mask = umask(Mode::empty()); umask(mask); let mode = 0o666 & !mask.bits(); Some(Permissions::from_mode(mode as u32)) } #[cfg(not(unix))] fn default_perms() -> Option { None } #[pyclass] struct PyStat { #[pyo3(get)] st_mode: u32, #[pyo3(get)] st_size: usize, #[pyo3(get)] st_mtime: Option, } trait BufReadStream: BufRead + Seek {} impl BufReadStream for BufReader> {} #[pyclass] struct PyBufReadStream { f: Box, path: PathBuf, } #[pyclass] struct PyWriteStream(Option>); impl PyWriteStream { fn inner_mut(&mut self) -> PyResult<&mut (dyn WriteStream + Sync + Send)> { match self.0.as_deref_mut() { Some(s) => Ok(s), None => Err(pyo3::exceptions::PyValueError::new_err( "I/O operation on closed file", )), } } } #[pymethods] impl PyWriteStream { fn write(&mut self, py: Python, data: &[u8]) -> PyResult { let inner = self.inner_mut()?; py.detach(|| inner.write(data)).map_err(|e| e.into()) } #[pyo3(signature = (want_fdatasync=None))] fn close(&mut self, py: Python, want_fdatasync: Option) -> PyResult<()> { if want_fdatasync.unwrap_or(false) { if let Err(err) = self.fdatasync(py) { if !err.is_instance_of::(py) { return Err(err); } } } // Drop the underlying writer so its file descriptor is released // immediately — Python's FileStream.close() contract promises // the file is no longer in use after this returns. self.0 = None; Ok(()) } fn fdatasync(&mut self, py: Python) -> PyResult<()> { let inner = self.inner_mut()?; py.detach(|| inner.sync_data()).map_err(|e| e.into()) } fn __enter__(slf: PyRef) -> Py { slf.into() } fn __exit__( &mut self, py: Python<'_>, _exc_type: Option<&Bound>, _exc_val: Option<&Bound>, _exc_tb: Option<&Bound>, ) -> PyResult { self.close(py, None)?; Ok(false) } fn flush(&mut self, py: Python) -> PyResult<()> { let inner = self.inner_mut()?; py.detach(|| inner.flush()).map_err(|e| e.into()) } fn writelines(&mut self, py: Python, lines: &Bound) -> PyResult<()> { for line in lines.iter() { self.write(py, line.extract::<&[u8]>().unwrap())?; } Ok(()) } } impl PyBufReadStream { fn new(read: Box, path: &Path) -> Self { Self { f: Box::new(BufReader::new(read)), path: path.to_path_buf(), } } fn map_io_err_to_py_err(&self, e: std::io::Error) -> PyErr { let transport_err = dromedary::map_io_err_to_transport_err(e, Some(&self.path.as_path().to_string_lossy())); map_transport_err_to_py_err( transport_err, None, Some(self.path.as_path().to_string_lossy().as_ref()), ) } } #[pymethods] impl PyBufReadStream { #[pyo3(signature = (size=None))] fn read<'a>(&mut self, py: Python<'a>, size: Option) -> PyResult> { if let Some(size) = size { let mut buf = vec![0; size]; let ret = py .detach(|| self.f.read(&mut buf)) .map_err(|e| self.map_io_err_to_py_err(e))?; Ok(PyBytes::new(py, &buf[..ret])) } else { let mut buf = Vec::new(); py.detach(|| self.f.read_to_end(&mut buf)) .map_err(|e| self.map_io_err_to_py_err(e))?; Ok(PyBytes::new(py, &buf)) } } fn seekable(&self) -> bool { true } #[pyo3(signature = (offset, whence=None))] fn seek(&mut self, py: Python, offset: i64, whence: Option) -> PyResult { let seekfrom = match whence.unwrap_or(0) { 0 => std::io::SeekFrom::Start(offset as u64), 1 => std::io::SeekFrom::Current(offset), 2 => std::io::SeekFrom::End(offset), _ => return Err(PyValueError::new_err("Invalid whence")), }; py.detach(|| self.f.seek(seekfrom)) .map_err(|e| self.map_io_err_to_py_err(e)) } fn tell(&mut self, py: Python) -> PyResult { py.detach(|| self.f.stream_position()) .map_err(|e| self.map_io_err_to_py_err(e)) } fn readline<'a>(&mut self, py: Python<'a>) -> PyResult> { let mut buf = vec![]; let ret = py .detach(|| self.f.read_until(b'\n', &mut buf)) .map_err(|e| self.map_io_err_to_py_err(e))?; buf.truncate(ret); Ok(PyBytes::new(py, &buf)) } fn __iter__(slf: PyRef) -> Py { slf.into() } fn __next__<'a>(&mut self, py: Python<'a>) -> PyResult>> { let mut buf = vec![]; let ret = py .detach(|| self.f.read_until(b'\n', &mut buf)) .map_err(|e| self.map_io_err_to_py_err(e))?; if ret == 0 { return Ok(None); } buf.truncate(ret); Ok(Some(PyBytes::new(py, &buf))) } fn close(&mut self) -> PyResult<()> { Ok(()) } fn __enter__(slf: PyRef) -> Py { slf.into() } fn __exit__( &self, _exc_type: Option<&Bound>, _exc_val: Option<&Bound>, _exc_tb: Option<&Bound>, ) -> PyResult { Ok(false) } fn readlines<'a>(&mut self, py: Python<'a>) -> PyResult> { let ret = PyList::empty(py); while let Some(line) = self.__next__(py)? { ret.append(line)?; } Ok(ret) } } impl Transport { fn map_to_py_err(slf: PyRef, py: Python, e: Error, p: Option<&str>) -> PyErr { let obj = slf.into_pyobject(py).unwrap(); map_transport_err_to_py_err(e, Some(obj.into()), p) } } #[pymethods] impl Transport { fn external_url(slf: PyRef, py: Python) -> PyResult { match slf.0.external_url() { Ok(url) => Ok(url.to_string()), Err(e) => Err(Self::map_to_py_err(slf, py, e, None)), } } fn __repr__(&self) -> PyResult { Ok(format!("{:?}", self.0)) } /// Coalesce a list of `(start, length)` offsets into the /// fewest number of reads that still covers every byte of the /// originals, returning `_CoalescedOffset`-shaped tuples. /// /// Historically lived on the Python `Transport` base class. Breezy /// reaches into it from `transport/remote.py` and from a few /// HTTP tests, so we expose it as a staticmethod on the Rust /// base pyclass too. Keep the signature (name and arg order) /// bit-compatible with the Python original — callers pass 0 for /// "no limit" rather than None. #[staticmethod] #[pyo3(signature = (offsets, limit=None, fudge_factor=None, max_size=None))] fn _coalesce_offsets( py: Python, offsets: Vec<(usize, usize)>, limit: Option, fudge_factor: Option, max_size: Option, ) -> PyResult> { let raw = coalesce_offsets(offsets, limit, fudge_factor, max_size)?; // The Python version returns `_CoalescedOffset` namedtuples // (defined in `dromedary.__init__`); wrap each raw tuple so // callers that use attribute access (`coal.start`, // `coal.length`, `coal.ranges`) keep working. let module = py.import("dromedary")?; let cls = module.getattr("_CoalescedOffset")?; let out = pyo3::types::PyList::empty(py); for (start, length, ranges) in raw { let tuple = cls.call1((start, length, ranges))?; out.append(tuple)?; } Ok(out.into()) } fn get_bytes<'a>( slf: Bound<'a, Self>, py: Python<'a>, path: &'a str, ) -> PyResult> { let t = &slf.borrow().0; let ret = py.detach(|| t.get_bytes(path)).map_err(|e| match e { Error::IsADirectoryError(_) => { ReadError::new_err((path.to_string(), "Is a directory".to_string())) } Error::NotADirectoryError(_) => { NoSuchFile::new_err((path.to_string(), "Not a directory".to_string())) } e => { let obj = slf.unbind().into_any(); map_transport_err_to_py_err(e, Some(obj), Some(path)) } })?; Ok(PyBytes::new(py, &ret)) } #[getter] fn base(&self) -> PyResult { Ok(self.0.base().to_string()) } fn has(&self, py: Python, path: &str) -> PyResult { py.detach(|| self.0.has(path)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(path))) } fn has_any(&self, py: Python, paths: Vec) -> PyResult { let paths = paths.iter().map(|p| p.as_str()).collect::>(); py.detach(|| self.0.has_any(paths.as_slice())) .map_err(|e| map_transport_err_to_py_err(e, None, None)) } #[pyo3(signature = (path, mode=None))] fn mkdir(slf: &Bound, py: Python, path: &str, mode: Option>) -> PyResult<()> { let mode = mode.and_then(perms_from_py_object); let t = &slf.borrow().0; py.detach(|| t.mkdir(path, mode)).map_err(|e| { let obj = slf.clone().unbind().into(); map_transport_err_to_py_err(e, Some(obj), Some(path)) })?; Ok(()) } #[pyo3(signature = (mode=None))] fn ensure_base(&self, py: Python, mode: Option>) -> PyResult { let mode = mode.and_then(perms_from_py_object); py.detach(|| self.0.ensure_base(mode)) .map_err(|e| map_transport_err_to_py_err(e, None, None)) } fn local_abspath(&self, py: Python, path: &str) -> PyResult { let path = py .detach(|| self.0.local_abspath(path)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(path)))?; path.to_str() .map(|s| s.to_string()) .ok_or_else(|| PyValueError::new_err(format!("Invalid path: {}", path.display()))) } fn get<'a>( slf: PyRef<'a, Self>, py: Python<'a>, path: &'a str, ) -> PyResult> { let t = &slf.0; let ret = py.detach(|| t.get(path)).map_err(|e| match e { Error::IsADirectoryError(_) => { ReadError::new_err((path.to_string(), "Is a directory".to_string())) } Error::NotADirectoryError(_) => { NoSuchFile::new_err((path.to_string(), "Not a directory".to_string())) } e => { let obj = slf.into_pyobject(py).unwrap().unbind().into_any(); map_transport_err_to_py_err(e, Some(obj), Some(path)) } })?; Bound::new(py, PyBufReadStream::new(ret, Path::new(path))) } fn stat<'a>(&self, py: Python<'a>, path: &str) -> PyResult> { let t = &self.0; let stat = py .detach(|| t.stat(path)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(path)))?; // On Unix we report the real mode bits; on Windows the `Stat` struct // carries no mode (see `project_windows_port.md`) so we synthesise one // from the file kind, mirroring what Python's own `os.stat_result` // exposes on Windows. #[cfg(unix)] let st_mode = stat.mode; #[cfg(not(unix))] let st_mode = { use dromedary::FileKind; const S_IFDIR: u32 = 0o040000; const S_IFREG: u32 = 0o100000; const S_IFLNK: u32 = 0o120000; let kind_bits = match stat.kind { FileKind::Dir => S_IFDIR, FileKind::File => S_IFREG, FileKind::Symlink => S_IFLNK, FileKind::Other => 0, }; kind_bits | 0o777 }; Bound::new( py, PyStat { st_size: stat.size, st_mode, st_mtime: stat.mtime, }, ) } #[pyo3(signature = (path=None))] fn relpath(&self, path: Option<&str>) -> PyResult { let path = path.unwrap_or("."); let url = Url::parse(path).map_err(|_| PyValueError::new_err((path.to_string(),)))?; self.0 .relpath(&url) .map_err(|e| map_transport_err_to_py_err(e, None, Some(path))) } fn abspath(&self, path: &str) -> PyResult { Ok(self .0 .abspath(path) .map_err(|e| map_transport_err_to_py_err(e, None, Some(path)))? .to_string()) } #[pyo3(signature = (path, data, mode=None))] fn put_bytes( slf: &Bound, py: Python, path: &str, data: &[u8], mode: Option>, ) -> PyResult<()> { let mode = mode.and_then(perms_from_py_object).or_else(default_perms); let t = &slf.borrow().0; py.detach(|| t.put_bytes(path, data, mode)).map_err(|e| { let obj = slf.clone().unbind().into(); map_transport_err_to_py_err(e, Some(obj), Some(path)) })?; Ok(()) } #[pyo3(signature = (path, data, mode=None, create_parent_dir=None, dir_mode=None))] fn put_bytes_non_atomic( slf: &Bound, py: Python, path: &str, data: &[u8], mode: Option>, create_parent_dir: Option, dir_mode: Option>, ) -> PyResult<()> { let t = &slf.borrow().0; py.detach(|| { t.put_bytes_non_atomic( path, data, mode.and_then(perms_from_py_object).or_else(default_perms), create_parent_dir, dir_mode.and_then(perms_from_py_object), ) }) .map_err(|e| { let obj = slf.clone().unbind().into(); map_transport_err_to_py_err(e, Some(obj), Some(path)) })?; Ok(()) } #[pyo3(signature = (path, file, mode=None))] fn put_file( slf: &Bound, py: Python, path: &str, file: Py, mode: Option>, ) -> PyResult { let t = &slf.borrow().0; let mut file = PyBinaryFile::from(file); let ret = py .detach(|| { t.put_file( path, &mut file, mode.and_then(perms_from_py_object).or_else(default_perms), ) }) .map_err(|e| { let obj = slf.clone().unbind().into(); map_transport_err_to_py_err(e, Some(obj), Some(path)) })?; Ok(ret) } #[pyo3(signature = (path, file, mode=None, create_parent_dir=None, dir_mode=None))] fn put_file_non_atomic( slf: &Bound, py: Python, path: &str, file: Py, mode: Option>, create_parent_dir: Option, dir_mode: Option>, ) -> PyResult<()> { let t = &slf.borrow().0; let mut file = PyBinaryFile::from(file); py.detach(|| { t.put_file_non_atomic( path, &mut file, mode.and_then(perms_from_py_object).or_else(default_perms), create_parent_dir, dir_mode.and_then(perms_from_py_object), ) }) .map_err(|e| { let obj = slf.clone().unbind().into(); map_transport_err_to_py_err(e, Some(obj), Some(path)) })?; Ok(()) } fn delete(&self, py: Python, path: &str) -> PyResult<()> { let t = &self.0; py.detach(|| t.delete(path)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(path)))?; Ok(()) } fn rmdir(&self, py: Python, path: &str) -> PyResult<()> { let t = &self.0; py.detach(|| t.rmdir(path)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(path)))?; Ok(()) } fn rename(&self, py: Python, from: &str, to: &str) -> PyResult<()> { let t = &self.0; py.detach(|| t.rename(from, to)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(from)))?; Ok(()) } #[pyo3(signature = (name, value=None))] fn set_segment_parameter( &mut self, py: Python, name: &str, value: Option<&str>, ) -> PyResult<()> { py.detach(|| self.0.set_segment_parameter(name, value)) .map_err(|e| map_transport_err_to_py_err(e, None, None))?; Ok(()) } fn get_segment_parameters(&self, py: Python) -> PyResult> { let t = &self.0; py.detach(|| t.get_segment_parameters()) .map_err(|e| map_transport_err_to_py_err(e, None, None)) } #[pyo3(signature = (mode=None))] fn create_prefix(&self, py: Python, mode: Option>) -> PyResult<()> { let t = &self.0; py.detach(|| t.create_prefix(mode.and_then(perms_from_py_object))) .map_err(|e| map_transport_err_to_py_err(e, None, None)) } fn lock_write(&self, py: Python, path: &str) -> PyResult { let t = &self.0; py.detach(|| t.lock_write(path)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(path))) .map(Lock::from) } fn lock_read(&self, py: Python, path: &str) -> PyResult { let t = &self.0; py.detach(|| t.lock_read(path)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(path))) .map(Lock::from) } fn recommended_page_size(&self, py: Python) -> usize { py.detach(|| self.0.recommended_page_size()) } fn is_readonly(&self, py: Python) -> bool { py.detach(|| self.0.is_readonly()) } #[pyo3(signature = (path, offsets, max_readv_combine=None, bytes_to_read_before_seek=None))] fn _readv<'a>( slf: &Bound<'a, Self>, py: Python<'a>, path: &str, offsets: Vec<(usize, usize)>, max_readv_combine: Option, bytes_to_read_before_seek: Option, ) -> PyResult> { if offsets.is_empty() { return Ok(PyList::empty(py).into_any()); } let t = &slf.borrow().0; let ret = py.detach(|| t.get(path)).map_err(|e| match e { Error::IsADirectoryError(_) => { ReadError::new_err((path.to_string(), "Is a directory".to_string())) } Error::NotADirectoryError(_) => { ReadError::new_err((path.to_string(), "Not a directory".to_string())) } e => { let obj = slf.clone().unbind().into(); map_transport_err_to_py_err(e, Some(obj), Some(path)) } })?; let f = Bound::new(py, PyBufReadStream::new(ret, Path::new(path)))?; let buffered = seek_and_read( py, f.into_any(), offsets, max_readv_combine, bytes_to_read_before_seek, Some(path), )?; let list = PyList::new(py, &buffered)?; Ok(PyIterator::from_object(&list.into_any())?.into_any()) } #[pyo3(signature = (path, offsets, adjust_for_latency=None, upper_limit=None))] fn readv<'a>( slf: PyRef<'a, Self>, py: Python<'a>, path: String, offsets: Vec<(u64, usize)>, adjust_for_latency: Option, upper_limit: Option, ) -> PyResult> { // Construct the Rust iterator up front — that step only // does coalescing, no HTTP, so running it with the GIL // held is fine. The returned iterator borrows the // transport's `dyn TransportTrait` but our HttpTransport // readv constructs a LazyReadv that owns its state (no // borrow of `self`); promote to `'static` via transmute // and keep the parent transport alive via a Python // reference held inside `ReadvIter`. Subsequent `__next__` // calls then pull elements under `py.detach` so blocking // HTTP work doesn't hold the GIL. let boxed: Box + Send + '_> = slf.0.readv( &path, offsets, adjust_for_latency.unwrap_or(false), upper_limit, ); let mut iter: Box), dromedary::Error>> + Send> = unsafe { std::mem::transmute::< Box + Send + '_>, Box + Send + 'static>, >(boxed) }; // Peek the first element synchronously so errors that the // transport raises up-front (NoSuchFile for a missing // file, PermissionDenied, &c) surface from the `readv()` // *call* itself, not from the first `__next__` on the // returned iterator. Python's Transport.readv contract // historically raised there — breezy's pack-repo autopack // (see `test_autopack_reloads_and_stops`) catches those // around `make_readv_reader` construction to translate // deleted-pack races into RetryWithNewPacks. let first = py.detach(|| iter.next()); if let Some(Err(e)) = first { return Err(map_transport_err_to_py_err(e, None, Some(&path))); } let prefixed: Box), dromedary::Error>> + Send> = Box::new(first.into_iter().chain(iter)); Py::new( py, ReadvIter { parent: slf.into_pyobject(py)?.unbind(), iter: Mutex::new(Some(prefixed)), path, }, ) } fn listable(&self, py: Python) -> bool { py.detach(|| self.0.listable()) } fn list_dir(&self, py: Python, path: &str) -> PyResult> { py.detach(|| { self.0 .list_dir(path) .map(|r| r.map_err(|e| map_transport_err_to_py_err(e, None, Some(path)))) .collect::>>() }) } #[pyo3(signature = (path, bytes, mode=None))] fn append_bytes( &self, py: Python, path: &str, bytes: &[u8], mode: Option>, ) -> PyResult { let mode = mode.and_then(perms_from_py_object); py.detach(|| self.0.append_bytes(path, bytes, mode)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(path))) } #[pyo3(signature = (path, file, mode=None))] fn append_file( &self, py: Python, path: &str, file: Py, mode: Option>, ) -> PyResult { let mut file = PyBinaryFile::from(file); let mode = mode.and_then(perms_from_py_object); py.detach(|| self.0.append_file(path, &mut file, mode)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(path))) } fn iter_files_recursive<'a>(&self, py: Python<'a>) -> PyResult> { self.0 .iter_files_recursive() .map(|r| { r.map_err(|e| map_transport_err_to_py_err(e, None, Some("."))) .map(|o| o.to_string()) }) .collect::>>() .and_then(move |v| PyList::new(py, &v)) } #[pyo3(signature = (path, mode=None))] fn open_write_stream( slf: &Bound, py: Python, path: &str, mode: Option>, ) -> PyResult { let t = &slf.borrow().0; py.detach(|| t.open_write_stream(path, mode.and_then(perms_from_py_object))) .map_err(|e| Transport::map_to_py_err(slf.borrow(), py, e, Some(path))) .map(|w| PyWriteStream(Some(w))) } fn delete_tree(&self, py: Python, path: &str) -> PyResult<()> { py.detach(|| self.0.delete_tree(path)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(path))) } fn r#move(&self, py: Python, from: &str, to: &str) -> PyResult<()> { py.detach(|| self.0.r#move(from, to)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(from))) } fn copy_tree(&self, py: Python, from: &str, to: &str) -> PyResult<()> { py.detach(|| self.0.copy_tree(from, to)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(from))) } fn copy_tree_to_transport(&self, py: Python, to_transport: Py) -> PyResult<()> { if let Ok(t) = to_transport.clone_ref(py).extract::>(py) { let t = t.0.as_ref(); py.detach(|| self.0.copy_tree_to_transport(t)) .map_err(|e| map_transport_err_to_py_err(e, None, Some("."))) } else { let t = Box::new(dromedary::pyo3::PyTransport::from(to_transport)); py.detach(|| self.0.copy_tree_to_transport(t.as_ref())) .map_err(|e| map_transport_err_to_py_err(e, None, Some("."))) } } fn hardlink(&self, py: Python, from: &str, to: &str) -> PyResult<()> { py.detach(|| self.0.hardlink(from, to)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(from))) } fn symlink(&self, py: Python, from: &str, to: &str) -> PyResult<()> { py.detach(|| self.0.symlink(from, to)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(from))) } fn readlink(&self, py: Python, path: &str) -> PyResult { py.detach(|| self.0.readlink(path)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(path))) } #[pyo3(signature = (relpaths, to_transport, mode=None))] fn copy_to( &self, py: Python, relpaths: Py, to_transport: Py, mode: Option>, ) -> PyResult { let relpaths = relpaths .bind(py) .try_iter()? .map(|o| o?.extract::()) .collect::>>()?; let relpaths_ref = relpaths.iter().map(|s| s.as_str()).collect::>(); if let Ok(t) = to_transport.clone_ref(py).cast_bound::(py) { let t = &t.borrow().0; py.detach(|| { self.0.copy_to( relpaths_ref.as_slice(), t.as_ref(), mode.and_then(perms_from_py_object), ) }) .map_err(|e| map_transport_err_to_py_err(e, None, None)) } else { let t = Box::new(dromedary::pyo3::PyTransport::from(to_transport)); py.detach(|| { self.0 .copy_to( relpaths_ref.as_slice(), t.as_ref(), mode.and_then(perms_from_py_object), ) .map_err(|e| map_transport_err_to_py_err(e, None, None)) }) } } fn _can_roundtrip_unix_modebits(&self) -> bool { self.0.can_roundtrip_unix_modebits() } fn copy(&self, py: Python, from: &str, to: &str) -> PyResult<()> { py.detach(|| self.0.copy(from, to)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(from))) } } #[pyclass] struct Lock(Box); impl From> for Lock { fn from(lock: Box) -> Self { Lock(lock) } } #[pymethods] impl Lock { fn unlock(&mut self) -> PyResult<()> { self.0.unlock().map_err(map_lock_err_to_py_err) } } #[pyclass(extends=Transport,subclass)] struct LocalTransport {} #[pymethods] impl LocalTransport { #[new] fn new(url: &str) -> PyResult<(Self, Transport)> { Ok(( LocalTransport {}, Transport(Box::new( dromedary::local::LocalTransport::new(url) .map_err(|e| map_transport_err_to_py_err(e, None, None))?, )), )) } #[pyo3(signature = (abspath,))] #[classmethod] fn from_abspath<'a>(cls: &'a Bound<'a, PyType>, abspath: &'a str) -> PyResult> { let ret = dromedary::local::LocalTransport::from_abspath(Path::new(abspath)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(abspath)))?; let init = PyClassInitializer::from(Transport(Box::new(ret))); let init = init.add_subclass(Self {}); Bound::new(cls.py(), init) } #[pyo3(signature = (offset=None))] fn clone<'a>( slf: PyRef<'a, Self>, py: Python<'a>, offset: Option>, ) -> PyResult> { let super_ = slf.as_ref(); let inner = if let Some(offset) = offset { let offset = offset.extract::(py)?; super_.0.clone(Some(&offset)) } else { super_.0.clone(None) } .map_err(|e| map_transport_err_to_py_err(e, None, None))?; let init = PyClassInitializer::from(Transport(inner)); let init = init.add_subclass(Self {}); Bound::new(py, init) } } #[pyfunction] fn get_test_permutations(py: Python) -> PyResult> { let test_server_module = py.import("dromedary.tests.test_server")?; let local_url_server = test_server_module.getattr("LocalURLServer")?; let local_transport = py.import("dromedary.local")?.getattr("LocalTransport")?; let ret = PyList::empty(py); ret.append((local_transport, local_url_server))?; Ok(ret) } #[pyfunction] #[pyo3(signature = (offsets, limit=None, fudge_factor=None, max_size=None))] fn coalesce_offsets( offsets: Vec<(usize, usize)>, mut limit: Option, mut fudge_factor: Option, mut max_size: Option, ) -> PyResult)>> { if limit == Some(0) { limit = None; } if fudge_factor == Some(0) { fudge_factor = None; } if max_size == Some(0) { max_size = None; } dromedary::readv::coalesce_offsets(offsets.as_slice(), limit, fudge_factor, max_size) .map_err(|e| PyValueError::new_err(format!("{}", e))) } const DEFAULT_MAX_READV_COMBINE: usize = 50; const DEFAULT_BYTES_TO_READ_BEFORE_SEEK: usize = 0; #[pyfunction] #[pyo3(signature = (file, offsets, max_readv_combine=None, bytes_to_read_before_seek=None, path=None))] fn seek_and_read( py: Python, file: Bound, offsets: Vec<(usize, usize)>, max_readv_combine: Option, bytes_to_read_before_seek: Option, path: Option<&str>, ) -> PyResult)>> { let f = PyBinaryFile::from(file); let mut data = py .detach(|| { dromedary::readv::seek_and_read( f, offsets, max_readv_combine.unwrap_or(DEFAULT_MAX_READV_COMBINE), bytes_to_read_before_seek.unwrap_or(DEFAULT_BYTES_TO_READ_BEFORE_SEEK), ) }) .map_err(|e| -> PyErr { e.into() })?; std::iter::from_fn(move || py.detach(|| data.next())) .map(|e| { e.map(|(offset, data)| (offset, PyBytes::new(py, data.as_slice()).into())) .map_err(|(e, offset, length, actual)| match e.kind() { std::io::ErrorKind::UnexpectedEof => ShortReadvError::new_err(( path.map(|p| p.to_string()), offset, length, actual, )), _ => e.into(), }) }) .collect::>>() } #[pyfunction] #[pyo3(signature = (offsets, upper_limit=None, recommended_page_size=None))] fn sort_expand_and_combine( offsets: Vec<(u64, usize)>, upper_limit: Option, recommended_page_size: Option, ) -> Vec<(u64, usize)> { dromedary::readv::sort_expand_and_combine( offsets, upper_limit, recommended_page_size.unwrap_or(4 * 1024), ) } #[pyclass] struct PyFile(BufReader>); impl PyFile { fn new(f: Box, _path: &Path) -> Self { Self(BufReader::new(f)) } } #[pymethods] impl PyFile { fn seekable(&self) -> bool { true } #[pyo3(signature = (size=None))] fn read<'a>(&mut self, py: Python<'a>, size: Option) -> PyResult> { if let Some(size) = size { let mut buf = vec![0; size]; let ret = py .detach(|| self.0.read(&mut buf)) .map_err(|e| -> PyErr { e.into() })?; Ok(PyBytes::new(py, &buf[..ret])) } else { let mut buf = Vec::new(); py.detach(|| self.0.read_to_end(&mut buf)) .map_err(|e| -> PyErr { e.into() })?; Ok(PyBytes::new(py, &buf)) } } fn write(&mut self, py: Python, data: &[u8]) -> PyResult { py.detach(|| self.0.get_mut().write(data)) .map_err(|e| e.into()) } fn readline<'a>(&mut self, py: Python<'a>) -> PyResult> { let mut buf = vec![]; let ret = py.detach(|| self.0.read_until(b'\n', &mut buf))?; buf.truncate(ret); Ok(PyBytes::new(py, &buf)) } fn __iter__(slf: PyRef) -> Py { slf.into() } fn __next__<'a>(&mut self, py: Python<'a>) -> PyResult>> { let mut buf = vec![]; let ret = py .detach(|| self.0.read_until(b'\n', &mut buf)) .map_err(|e| -> PyErr { e.into() })?; if ret == 0 { return Ok(None); } buf.truncate(ret); Ok(Some(PyBytes::new(py, &buf))) } fn readlines<'a>(&mut self, py: Python<'a>) -> PyResult> { let ret = PyList::empty(py); while let Some(line) = self.__next__(py)? { ret.append(line)?; } Ok(ret) } #[pyo3(signature = (offset, whence=None))] fn seek(&mut self, offset: i64, whence: Option) -> PyResult { let seekfrom = match whence.unwrap_or(0) { 0 => std::io::SeekFrom::Start(offset as u64), 1 => std::io::SeekFrom::Current(offset), 2 => std::io::SeekFrom::End(offset), _ => return Err(PyValueError::new_err("Invalid whence")), }; self.0.seek(seekfrom).map_err(|e| e.into()) } fn tell(&mut self) -> PyResult { self.0.stream_position().map_err(|e| e.into()) } fn __enter__(slf: PyRef) -> Py { slf.into() } fn __exit__( &self, _exc_type: Option<&Bound>, _exc_val: Option<&Bound>, _exc_tb: Option<&Bound>, ) -> PyResult { Ok(false) } fn flush(&mut self) -> PyResult<()> { self.0.get_mut().flush().map_err(|e| e.into()) } fn writelines(&mut self, py: Python, lines: &Bound) -> PyResult<()> { for line in lines.iter() { self.write(py, line.extract::<&[u8]>()?)?; } Ok(()) } #[pyo3(signature = (size=None))] fn truncate(&mut self, py: Python, size: Option) -> PyResult<()> { let size = size.map_or_else(|| py.detach(|| self.tell()), Ok)?; py.detach(|| self.0.get_mut().set_len(size)) .map_err(|e| e.into()) } #[cfg(unix)] fn fileno(&self, py: Python) -> PyResult { use std::os::unix::io::AsRawFd; Ok(py.detach(|| self.0.get_ref().as_raw_fd())) } } fn map_lock_err_to_py_err(err: LockError) -> PyErr { match err { LockError::Contention(p) => LockContention::new_err((p,)), LockError::Failed(p, w) => LockFailed::new_err((p, w)), LockError::IoError(e) => e.into(), } } #[pyclass] struct ReadLock(Option); #[pyclass] struct WriteLock(dromedary::filelock::WriteLock); #[pymethods] impl ReadLock { fn unlock(&mut self) -> PyResult<()> { if let Some(mut read_lock) = self.0.take() { read_lock.unlock().map_err(map_lock_err_to_py_err) } else { debug!("ReadLock already unlocked"); Ok(()) } } #[new] #[pyo3(signature = (filename, strict_locks=None))] fn new(filename: PathBuf, strict_locks: Option) -> PyResult { Ok(Self(Some( dromedary::filelock::ReadLock::new(&filename, strict_locks.unwrap_or(false)) .map_err(map_lock_err_to_py_err)?, ))) } fn temporary_write_lock<'a>( slf: Bound<'a, Self>, py: Python<'a>, ) -> PyResult<(bool, Bound<'a, PyAny>)> { let mut m = slf.borrow_mut(); if let Some(read_lock) = m.0.take() { match read_lock.temporary_write_lock() { Ok(twl) => Ok(( true, Bound::new(py, TemporaryWriteLock(Some(twl)))?.into_any(), )), Err((rl, LockError::Contention(_))) => { m.0 = Some(rl); Ok((false, slf.into_any())) } Err((_rl, LockError::Failed(p, w))) => Err(LockFailed::new_err((p, w))), Err((_rl, LockError::IoError(e))) => Err(e.into()), } } else { Err(PyRuntimeError::new_err("ReadLock already unlocked")) } } #[getter] fn f(&self) -> PyResult { if let Some(read_lock) = &self.0 { Ok(PyFile::new(read_lock.file()?, read_lock.path())) } else { Err(PyRuntimeError::new_err("ReadLock already unlocked")) } } } #[pyclass] struct TemporaryWriteLock(Option); #[pymethods] impl TemporaryWriteLock { fn restore_read_lock(&mut self) -> PyResult { if let Some(lock) = self.0.take() { let rl = lock.restore_read_lock(); Ok(ReadLock(Some(rl))) } else { Err(PyRuntimeError::new_err( "TemporaryWriteLock already unlocked", )) } } #[getter] fn f(&self) -> PyResult { if let Some(lock) = &self.0 { Ok(PyFile::new(lock.file()?, lock.path())) } else { Err(PyRuntimeError::new_err( "TemporaryWriteLock already unlocked", )) } } } #[pymethods] impl WriteLock { fn unlock(&mut self) -> PyResult<()> { self.0.unlock().map_err(map_lock_err_to_py_err) } #[new] #[pyo3(signature = (filename, strict_locks=None))] fn new(filename: PathBuf, strict_locks: Option) -> PyResult { Ok(Self( dromedary::filelock::WriteLock::new(&filename, strict_locks.unwrap_or(false)) .map_err(map_lock_err_to_py_err)?, )) } #[getter] fn f(&self) -> PyResult { Ok(PyFile::new(self.0.file()?, self.0.path())) } } mod brokenrename; mod fakenfs; mod fakevfat; #[cfg(feature = "gio")] mod gio; mod http; mod log; mod memory; mod pathfilter; mod readonly; mod sftp; mod ssh; mod unlistable; mod urlutils; #[cfg(feature = "webdav")] mod webdav; #[pymodule] fn _transport_rs(py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; let localm = PyModule::new(py, "local")?; localm.add_class::()?; m.add_submodule(&localm)?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_function(wrap_pyfunction!(get_test_permutations, m)?)?; m.add_wrapped(wrap_pyfunction!(seek_and_read))?; m.add_wrapped(wrap_pyfunction!(coalesce_offsets))?; m.add_wrapped(wrap_pyfunction!(sort_expand_and_combine))?; let sftpm = PyModule::new(py, "sftp")?; sftp::_sftp_rs(py, &sftpm)?; m.add_submodule(&sftpm)?; let sshm = PyModule::new(py, "ssh")?; ssh::register(py, &sshm)?; m.add_submodule(&sshm)?; m.add_class::()?; m.add_class::()?; m.add_class::()?; let urlutilsm = PyModule::new(py, "urlutils")?; urlutils::_urlutils_rs(py, &urlutilsm)?; m.add_submodule(&urlutilsm)?; let unlistablem = PyModule::new(py, "unlistable")?; unlistable::register(py, &unlistablem)?; m.add_submodule(&unlistablem)?; let readonlym = PyModule::new(py, "readonly")?; readonly::register(py, &readonlym)?; m.add_submodule(&readonlym)?; let brokenrenamem = PyModule::new(py, "brokenrename")?; brokenrename::register(py, &brokenrenamem)?; m.add_submodule(&brokenrenamem)?; let fakevfatm = PyModule::new(py, "fakevfat")?; fakevfat::register(py, &fakevfatm)?; m.add_submodule(&fakevfatm)?; let fakenfsm = PyModule::new(py, "fakenfs")?; fakenfs::register(py, &fakenfsm)?; m.add_submodule(&fakenfsm)?; let pathfilterm = PyModule::new(py, "pathfilter")?; pathfilter::register(py, &pathfilterm)?; m.add_submodule(&pathfilterm)?; let memorym = PyModule::new(py, "memory")?; memory::register(py, &memorym)?; m.add_submodule(&memorym)?; let httpm = PyModule::new(py, "http")?; http::register(py, &httpm)?; m.add_submodule(&httpm)?; #[cfg(feature = "webdav")] let webdavm = { let webdavm = PyModule::new(py, "webdav")?; webdav::register(&webdavm)?; m.add_submodule(&webdavm)?; webdavm }; let logm = PyModule::new(py, "log")?; log::register(py, &logm)?; m.add_submodule(&logm)?; #[cfg(feature = "gio")] let giom = { let giom = PyModule::new(py, "gio")?; gio::register(py, &giom)?; m.add_submodule(&giom)?; giom }; // PyO3 submodule hack for proper import support let sys = py.import("sys")?; let modules = sys.getattr("modules")?; let module_name = m.name()?; // Register submodules in sys.modules for dotted import support modules.set_item(format!("{}.local", module_name), &localm)?; modules.set_item(format!("{}.sftp", module_name), &sftpm)?; modules.set_item(format!("{}.ssh", module_name), &sshm)?; modules.set_item(format!("{}.urlutils", module_name), &urlutilsm)?; modules.set_item(format!("{}.unlistable", module_name), &unlistablem)?; modules.set_item(format!("{}.readonly", module_name), &readonlym)?; modules.set_item(format!("{}.brokenrename", module_name), &brokenrenamem)?; modules.set_item(format!("{}.fakevfat", module_name), &fakevfatm)?; modules.set_item(format!("{}.fakenfs", module_name), &fakenfsm)?; modules.set_item(format!("{}.pathfilter", module_name), &pathfilterm)?; modules.set_item(format!("{}.memory", module_name), &memorym)?; modules.set_item(format!("{}.http", module_name), &httpm)?; #[cfg(feature = "webdav")] modules.set_item(format!("{}.webdav", module_name), &webdavm)?; modules.set_item(format!("{}.log", module_name), &logm)?; #[cfg(feature = "gio")] modules.set_item(format!("{}.gio", module_name), &giom)?; Ok(()) } dromedary-0.1.5/_transport_rs/src/log.rs000066400000000000000000000077701520150013200203200ustar00rootroot00000000000000//! PyO3 bindings for `dromedary::log::LogTransport`. use crate::{Transport, TransportDecorator}; use dromedary::log::{LogSink, LogTransport}; use dromedary::pyo3::PyTransport; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; use std::sync::Arc; const PREFIX: &str = "log+"; fn resolve_inner(py: Python, url: &str, decorated: Option>) -> PyResult> { if let Some(d) = decorated { return Ok(d); } if !url.starts_with(PREFIX) { return Err(PyValueError::new_err(format!( "url {:?} doesn't start with decorator prefix {:?}", url, PREFIX ))); } let rest = &url[PREFIX.len()..]; let dromedary = py.import("dromedary")?; let urlutils = py.import("dromedary.urlutils")?; let is_url: bool = urlutils.call_method1("is_url", (rest,))?.extract()?; let func = if is_url { dromedary.getattr("get_transport_from_url")? } else { dromedary.getattr("get_transport_from_path")? }; Ok(func.call1((rest,))?.unbind()) } /// Build a sink that forwards formatted messages to the named Python logger /// at DEBUG level. Wrapped in `Arc` so the sink can be cheaply cloned — each /// LogTransport instance keeps its own handle but they share the underlying /// Python object. fn python_debug_sink(py: Python, logger_name: &str) -> PyResult { let logging = py.import("logging")?; let logger = logging.call_method1("getLogger", (logger_name,))?.unbind(); let logger = Arc::new(logger); Ok(Arc::new(move |msg: &str| { Python::attach(|py| { // Ignore errors — logging failures should not propagate and break // the transport method. If the logger is gone we silently drop // the message, matching Python's best-effort logging semantics. let _ = logger.bind(py).call_method1("debug", (msg,)); }); })) } fn wrap_inner(decorated: &Py, py: Python) -> PyResult { let py_inner: Box = Box::new(PyTransport::from(decorated.clone_ref(py))); let sink = python_debug_sink(py, "dromedary.log")?; Ok(Transport(Box::new(LogTransport::new(py_inner, sink)))) } #[pyclass(extends=TransportDecorator, subclass)] pub(crate) struct TransportLogDecorator; #[pymethods] impl TransportLogDecorator { #[new] #[pyo3(signature = (url, _decorated=None, _from_transport=None))] fn new( py: Python, url: &str, _decorated: Option>, _from_transport: Option>, ) -> PyResult> { let _ = _from_transport; let decorated = resolve_inner(py, url, _decorated)?; let wrapped = wrap_inner(&decorated, py)?; Ok(PyClassInitializer::from(wrapped) .add_subclass(TransportDecorator { decorated, prefix: PREFIX, }) .add_subclass(TransportLogDecorator)) } #[classmethod] fn _get_url_prefix(_cls: &Bound<'_, pyo3::types::PyType>) -> &'static str { PREFIX } #[pyo3(signature = (offset=None))] fn clone<'a>( slf: PyRef<'a, Self>, py: Python<'a>, offset: Option>, ) -> PyResult> { let decorator: &TransportDecorator = slf.as_super(); let decorated = decorator.decorated.clone_ref(py); let decorated_clone = match offset { Some(o) => decorated.call_method1(py, "clone", (o,))?, None => decorated.call_method0(py, "clone")?, }; let wrapped = wrap_inner(&decorated_clone, py)?; let init = PyClassInitializer::from(wrapped) .add_subclass(TransportDecorator { decorated: decorated_clone, prefix: PREFIX, }) .add_subclass(TransportLogDecorator); Bound::new(py, init) } } pub(crate) fn register(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; Ok(()) } dromedary-0.1.5/_transport_rs/src/memory.rs000066400000000000000000000063241520150013200210410ustar00rootroot00000000000000use crate::{map_transport_err_to_py_err, Transport}; use dromedary::memory::{MemoryStore, MemoryTransport as RustMemoryTransport}; use pyo3::prelude::*; use std::sync::{Arc, Mutex}; /// Opaque handle to a shared `MemoryStore`. Used by `MemoryServer` (in Python) /// to ensure every `MemoryTransport` it hands out shares the same backing /// store, matching the semantics of the original Python implementation where /// the server re-assigned `_dirs`/`_files`/... on each constructed transport. #[pyclass] pub(crate) struct MemoryStoreHandle { inner: Arc>, } #[pymethods] impl MemoryStoreHandle { #[new] fn new() -> Self { // Construct a throwaway transport just to get a fresh MemoryStore Arc. let t = RustMemoryTransport::new("memory:///").expect("fresh memory transport"); MemoryStoreHandle { inner: t.shared_store(), } } } #[pyclass(extends=Transport, subclass)] pub(crate) struct MemoryTransport { #[pyo3(get)] _scheme: String, #[pyo3(get)] _cwd: String, } fn split_url(url: &str) -> (String, String, String) { let mut normalised = if url.is_empty() { "memory:///".to_string() } else { url.to_string() }; if !normalised.ends_with('/') { normalised.push('/'); } let split = normalised .find(':') .map(|i| i + 3) .unwrap_or(normalised.len()); let scheme = normalised[..split].to_string(); let cwd = normalised[split..].to_string(); (normalised, scheme, cwd) } #[pymethods] impl MemoryTransport { #[new] #[pyo3(signature = (url="", _shared_store=None))] fn new(url: &str, _shared_store: Option>) -> PyResult<(Self, Transport)> { let (normalised, scheme, cwd) = split_url(url); let rust = match _shared_store { Some(handle) => { let arc = Python::attach(|py| handle.borrow(py).inner.clone()); RustMemoryTransport::with_shared_store(&normalised, arc) .map_err(|e| map_transport_err_to_py_err(e, None, None))? } None => RustMemoryTransport::new(&normalised) .map_err(|e| map_transport_err_to_py_err(e, None, None))?, }; Ok(( MemoryTransport { _scheme: scheme, _cwd: cwd, }, Transport(Box::new(rust)), )) } #[pyo3(signature = (offset=None))] fn clone<'a>( slf: PyRef<'a, Self>, py: Python<'a>, offset: Option, ) -> PyResult> { let super_ = slf.as_ref(); let inner = super_ .0 .clone(offset.as_deref()) .map_err(|e| map_transport_err_to_py_err(e, None, None))?; let new_base = inner.base().to_string(); let (_n, scheme, cwd) = split_url(&new_base); let init = PyClassInitializer::from(Transport(inner)).add_subclass(MemoryTransport { _scheme: scheme, _cwd: cwd, }); Bound::new(py, init) } } pub(crate) fn register(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; m.add_class::()?; Ok(()) } dromedary-0.1.5/_transport_rs/src/pathfilter.rs000066400000000000000000000127211520150013200216710ustar00rootroot00000000000000use crate::{map_transport_err_to_py_err, Transport}; use dromedary::pathfilter::FilterFunc; use dromedary::pyo3::PyTransport; use pyo3::prelude::*; use std::sync::Arc; fn make_filter_func(filter_py: Option>) -> Option { let f = filter_py?; Some(Arc::new(move |p: &str| -> dromedary::Result { Python::attach(|py| match f.call1(py, (p,)) { Ok(r) => r .extract::(py) .map_err(|e| dromedary::Error::from(e)), Err(e) => Err(dromedary::Error::from(e)), }) })) } fn build_rust_transport( py: Python, server: &Py, base: &str, ) -> PyResult { let backing = server.getattr(py, "backing_transport")?; let scheme: String = server.getattr(py, "scheme")?.extract(py)?; let filter_func_py: Option> = { let obj = server.getattr(py, "filter_func")?; if obj.is_none(py) { None } else { Some(obj) } }; let mut full_base = base.to_string(); if !full_base.ends_with('/') { full_base.push('/'); } // base_path is the path portion of the base URL, derived the same way // Python does: self.base[len(self.server.scheme) - 1:] let base_path = if full_base.len() + 1 >= scheme.len() { full_base[scheme.len() - 1..].to_string() } else { "/".to_string() }; let backing_rust: Box = Box::new(PyTransport::from(backing)); let filter = make_filter_func(filter_func_py); dromedary::pathfilter::PathFilteringTransport::new(backing_rust, scheme, base_path, filter) .map_err(|e| map_transport_err_to_py_err(e, None, None)) } #[pyclass(extends=Transport, subclass)] pub(crate) struct PathFilteringTransport { server: Py, base: String, } #[pymethods] impl PathFilteringTransport { #[new] fn new(py: Python, server: Py, base: String) -> PyResult<(Self, Transport)> { let rust = build_rust_transport(py, &server, &base)?; let mut stored_base = base.clone(); if !stored_base.ends_with('/') { stored_base.push('/'); } Ok(( PathFilteringTransport { server, base: stored_base, }, Transport(Box::new(rust)), )) } #[getter] fn server(&self, py: Python) -> Py { self.server.clone_ref(py) } #[getter] fn scheme(&self, py: Python) -> PyResult { self.server.getattr(py, "scheme")?.extract(py) } #[getter] fn base_path(&self, py: Python) -> PyResult { let scheme: String = self.server.getattr(py, "scheme")?.extract(py)?; if self.base.len() + 1 >= scheme.len() { Ok(self.base[scheme.len() - 1..].to_string()) } else { Ok("/".to_string()) } } fn _relpath_from_server_root(&self, py: Python, relpath: &str) -> PyResult { let base_path = self.base_path(py)?; let urlutils = py.import("dromedary.urlutils")?; let combined: String = urlutils .call_method1("combine_paths", (base_path, relpath))? .extract()?; if !combined.starts_with('/') { return Err(pyo3::exceptions::PyValueError::new_err(combined)); } Ok(combined[1..].to_string()) } fn _filter(&self, py: Python, relpath: &str) -> PyResult { let rebased = self._relpath_from_server_root(py, relpath)?; let filter_func = self.server.getattr(py, "filter_func")?; if filter_func.is_none(py) { return Ok(rebased); } filter_func.call1(py, (rebased,))?.extract(py) } #[pyo3(signature = (offset=None))] fn clone<'a>( slf: PyRef<'a, Self>, py: Python<'a>, offset: Option>, ) -> PyResult> { let super_ = slf.as_ref(); let new_base_url = match offset { Some(o) => { let o_str: String = o.extract(py)?; super_ .0 .abspath(&o_str) .map_err(|e| map_transport_err_to_py_err(e, None, Some(&o_str)))? .to_string() } None => super_.0.base().to_string(), }; let server = slf.server.clone_ref(py); let rust = build_rust_transport(py, &server, &new_base_url)?; let mut stored_base = new_base_url; if !stored_base.ends_with('/') { stored_base.push('/'); } let init = PyClassInitializer::from(Transport(Box::new(rust))).add_subclass( PathFilteringTransport { server, base: stored_base, }, ); Bound::new(py, init) } } #[pyclass(extends=PathFilteringTransport, subclass)] pub(crate) struct ChrootTransport {} #[pymethods] impl ChrootTransport { #[new] fn new(py: Python, server: Py, base: String) -> PyResult> { let (parent, t) = PathFilteringTransport::new(py, server, base)?; let init = PyClassInitializer::from(t) .add_subclass(parent) .add_subclass(ChrootTransport {}); Ok(init) } } pub(crate) fn register(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; m.add_class::()?; Ok(()) } dromedary-0.1.5/_transport_rs/src/readonly.rs000066400000000000000000000060271520150013200213460ustar00rootroot00000000000000use crate::{Transport, TransportDecorator}; use dromedary::pyo3::PyTransport; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; const PREFIX: &str = "readonly+"; fn resolve_inner(py: Python, url: &str, decorated: Option>) -> PyResult> { if let Some(d) = decorated { return Ok(d); } if !url.starts_with(PREFIX) { return Err(PyValueError::new_err(format!( "url {:?} doesn't start with decorator prefix {:?}", url, PREFIX ))); } let rest = &url[PREFIX.len()..]; let dromedary = py.import("dromedary")?; let urlutils = py.import("dromedary.urlutils")?; let is_url: bool = urlutils.call_method1("is_url", (rest,))?.extract()?; let func = if is_url { dromedary.getattr("get_transport_from_url")? } else { dromedary.getattr("get_transport_from_path")? }; Ok(func.call1((rest,))?.unbind()) } fn wrap_inner(decorated: &Py, py: Python) -> Transport { let py_inner: Box = Box::new(PyTransport::from(decorated.clone_ref(py))); Transport(Box::new(dromedary::readonly::ReadonlyTransport::new( py_inner, ))) } #[pyclass(extends=TransportDecorator, subclass)] pub(crate) struct ReadonlyTransportDecorator; #[pymethods] impl ReadonlyTransportDecorator { #[new] #[pyo3(signature = (url, _decorated=None, _from_transport=None))] fn new( py: Python, url: &str, _decorated: Option>, _from_transport: Option>, ) -> PyResult> { let _ = _from_transport; let decorated = resolve_inner(py, url, _decorated)?; let wrapped = wrap_inner(&decorated, py); Ok(PyClassInitializer::from(wrapped) .add_subclass(TransportDecorator { decorated, prefix: PREFIX, }) .add_subclass(ReadonlyTransportDecorator)) } #[classmethod] fn _get_url_prefix(_cls: &Bound<'_, pyo3::types::PyType>) -> &'static str { PREFIX } #[pyo3(signature = (offset=None))] fn clone<'a>( slf: PyRef<'a, Self>, py: Python<'a>, offset: Option>, ) -> PyResult> { let decorator: &TransportDecorator = slf.as_super(); let decorated = decorator.decorated.clone_ref(py); let decorated_clone = match offset { Some(o) => decorated.call_method1(py, "clone", (o,))?, None => decorated.call_method0(py, "clone")?, }; let wrapped = wrap_inner(&decorated_clone, py); let init = PyClassInitializer::from(wrapped) .add_subclass(TransportDecorator { decorated: decorated_clone, prefix: PREFIX, }) .add_subclass(ReadonlyTransportDecorator); Bound::new(py, init) } } pub(crate) fn register(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; Ok(()) } dromedary-0.1.5/_transport_rs/src/sftp.rs000066400000000000000000000445761520150013200205200ustar00rootroot00000000000000use pyo3::create_exception; use pyo3::exceptions::{PyException, PyValueError}; use pyo3::import_exception; use pyo3::prelude::*; use pyo3::types::{PyBytes, PyType}; use std::collections::VecDeque; use std::io::{Read, Write}; use std::sync::Arc; /// Synchronous bidirectional byte stream the SFTP client can run over. /// Either a `std::fs::File` (subprocess vendors hand us an fd) or the /// russh blocking adapter. Boxed so `SFTPClient` has a single concrete /// generic instantiation regardless of backend. pub(crate) trait SshChannel: Read + Write + Send {} impl SshChannel for T {} pub(crate) type BoxedChannel = Box; create_exception!(dromedary._transport_rs, SFTPError, PyException); import_exception!(dromedary.errors, NoSuchFile); import_exception!(dromedary.errors, PermissionDenied); import_exception!(dromedary.errors, SocketConnectionError); #[pyclass] struct SFTPAttributes(sftp::Attributes); #[pymethods] impl SFTPAttributes { #[new] fn new() -> Self { Self(sftp::Attributes::new()) } #[getter] fn get_st_mode(&self) -> Option { self.0.permissions } #[setter] fn set_st_mode(&mut self, mode: Option) { self.0.permissions = mode; } #[getter] fn get_st_size(&self) -> Option { self.0.size } #[setter] fn set_st_size(&mut self, size: Option) { self.0.size = size; } } #[pyclass] pub(crate) struct SFTPClient { sftp: Arc>, cwd: Option, } impl SFTPClient { /// Construct an SFTP client over an arbitrary sync byte-stream channel. /// Used by library-backed SSH vendors (russh etc.) that produce a stream /// rather than a kernel fd. #[cfg(feature = "russh")] pub(crate) fn from_channel(channel: BoxedChannel) -> std::io::Result { let session = sftp::SftpClient::new(channel)?; Ok(Self { sftp: Arc::new(session), cwd: None, }) } } fn sftp_error_to_py_err(e: sftp::Error, path: Option<&str>) -> PyErr { match e { sftp::Error::Io(e) => e.into(), sftp::Error::Eof(_, _) => std::io::Error::from(std::io::ErrorKind::UnexpectedEof).into(), sftp::Error::NoSuchFile(msg, _lang) => { NoSuchFile::new_err((path.map(|p| p.to_string()), msg)) } sftp::Error::PermissionDenied(msg, _) => { PermissionDenied::new_err((path.map(|p| p.to_string()), msg)) } sftp::Error::Failure(msg, _lang) => SFTPError::new_err(msg), _ => SFTPError::new_err(format!("{:?}", e)), } } #[pyclass] struct SFTPFile { sftp: Arc>, file: sftp::File, offset: u64, } impl SFTPClient { fn _adjust_cwd(&self, path: &str) -> String { if self.cwd.is_none() { return path.to_string(); } if path.starts_with('/') { return path.to_string(); } if self.cwd == Some("/".to_owned()) { return format!("/{}", path); } format!("{}/{}", self.cwd.as_ref().unwrap(), path) } } #[pymethods] impl SFTPFile { fn block(&mut self, py: Python, offset: u64, length: u64, lockmask: u32) -> PyResult<()> { py.detach(|| self.sftp.block(&self.file, offset, length, lockmask)) .map_err(|e| sftp_error_to_py_err(e, None)) } fn unblock(&mut self, py: Python, offset: u64, length: u64) -> PyResult<()> { py.detach(|| self.sftp.unblock(&self.file, offset, length)) .map_err(|e| sftp_error_to_py_err(e, None)) } fn setstat(&mut self, py: Python, attr: &SFTPAttributes) -> PyResult<()> { py.detach(|| self.sftp.fsetstat(&self.file, &attr.0)) .map_err(|e| sftp_error_to_py_err(e, None)) } #[pyo3(signature = (flags = None))] fn stat(&mut self, py: Python, flags: Option) -> PyResult { py.detach(|| self.sftp.fstat(&self.file, flags).map(SFTPAttributes)) .map_err(|e| sftp_error_to_py_err(e, None)) } fn flush(&mut self, _py: Python) -> PyResult<()> { Ok(()) } fn pwrite(&mut self, py: Python, offset: u64, data: &[u8]) -> PyResult<()> { py.detach(|| self.sftp.pwrite(&self.file, offset, data)) .map_err(|e| sftp_error_to_py_err(e, None)) } fn pread(&mut self, py: Python, offset: u64, length: u32) -> PyResult> { py.detach(|| self.sftp.pread(&self.file, offset, length)) .map_err(|e| sftp_error_to_py_err(e, None)) .map(|b| PyBytes::new(py, &b).into()) } fn close(&mut self, py: Python) -> PyResult<()> { py.detach(|| self.sftp.fclose(&self.file)) .map_err(|e| sftp_error_to_py_err(e, None)) } fn seekable(&self) -> bool { true } fn tell(&self) -> u64 { self.offset } fn seek(&mut self, py: Python, offset: i64, whence: u32) -> PyResult { let size = self.stat(py, None)?.0.size.unwrap(); let new_offset = match whence { // SEEK_SET 0 => offset, // SEEK_CUR 1 => self.offset as i64 + offset, // SEEK_END 2 => size as i64 - offset, _ => { return Err(PyValueError::new_err(("Invalid whence",))); } }; if new_offset < 0 { return Err(PyValueError::new_err((format!( "Negative offset: {}", new_offset ),))); } self.offset = new_offset as u64; Ok(self.offset) } fn readv<'a>( &mut self, py: Python<'a>, offsets: Vec<(u64, u32)>, ) -> PyResult> { #[pyclass] struct ReadvIter { offsets: VecDeque<(u64, u32)>, sftp: Arc>, file: sftp::File, } #[pymethods] impl ReadvIter { fn __iter__(slf: PyRef) -> Py { slf.into() } fn __next__(&mut self, py: Python) -> PyResult>> { if let Some((offset, length)) = self.offsets.pop_front() { match py.detach(|| self.sftp.pread(&self.file, offset, length)) { Ok(data) => Ok(Some(PyBytes::new(py, &data).into())), Err(sftp::Error::Eof(_, _)) => Ok(Some(PyBytes::new(py, &[]).into())), Err(e) => Err(sftp_error_to_py_err(e, None)), } } else { Ok(None) } } } Ok(Bound::new( py, ReadvIter { offsets: VecDeque::from(offsets), sftp: Arc::clone(&self.sftp), file: self.file.clone(), }, )? .into_any()) } #[pyo3(signature = (length = None))] fn read<'a>(&mut self, py: Python<'a>, length: Option) -> PyResult> { let ret = if let Some(length) = length { py.detach(|| self.sftp.pread(&self.file, self.offset, length)) } else { let length = self.stat(py, None)?.0.size.unwrap(); if length == 0 { return Ok(PyBytes::new(py, &[])); } py.detach(|| { self.sftp .pread(&self.file, self.offset, (length - self.offset) as u32) }) }; match ret { Ok(data) => { self.offset += data.len() as u64; Ok(PyBytes::new(py, data.as_slice())) } Err(sftp::Error::Eof(_, _)) => Ok(PyBytes::new(py, &[])), Err(e) => Err(sftp_error_to_py_err(e, None)), } } fn write(&mut self, py: Python, data: &[u8]) -> PyResult<()> { py.detach(|| self.sftp.pwrite(&self.file, self.offset, data)) .map_err(|e| sftp_error_to_py_err(e, None))?; self.offset += data.len() as u64; Ok(()) } fn __enter__(slf: PyRef) -> Py { slf.into() } fn __exit__( &mut self, py: Python, _exc_type: Option<&Bound>, _exc_val: Option<&Bound>, _exc_tb: Option<&Bound>, ) -> PyResult { self.close(py)?; Ok(false) } } #[pyclass] struct SFTPDir(Arc>, sftp::Directory); #[pymethods] impl SFTPDir { fn readdir(&mut self, py: Python) -> PyResult>> { match py.detach(|| { self.0.readdir(&self.1).map(|e| { e.into_iter() .map(|(k, l, v)| (k, l, SFTPAttributes(v))) .collect::>() }) }) { Ok(v) => Ok(Some(v)), Err(sftp::Error::Eof(_, _)) => Ok(None), Err(e) => Err(sftp_error_to_py_err(e, None)), } } fn close(&mut self, py: Python) -> PyResult<()> { py.detach(|| self.0.closedir(&self.1)) .map_err(|e| sftp_error_to_py_err(e, None)) } } #[pymethods] impl SFTPClient { #[new] fn new(py: Python, fd: isize) -> PyResult { let session = py .detach(|| { #[cfg(unix)] let channel: BoxedChannel = { use std::os::fd::FromRawFd; // SAFETY: `fd` was produced by a vendor that transferred // ownership via `detach_fd`; wrapping it in `File` makes // us the sole owner, closed on `Drop`. Box::new(unsafe { std::fs::File::from_raw_fd(fd as i32) }) }; #[cfg(windows)] let channel: BoxedChannel = { use std::os::windows::io::{FromRawHandle, RawHandle}; // SAFETY: same detach-and-transfer contract as unix above. Box::new(unsafe { std::fs::File::from_raw_handle(fd as RawHandle) }) }; sftp::SftpClient::new(channel) }) .map_err(|e| { // An IO error during the opening SFTP handshake means the // transport (spawned ssh subprocess or plain TCP socket) // died before we could speak SFTP to it. Report it as a // transport-level connection failure rather than letting a // raw BrokenPipeError / EOF surface at the caller. SocketConnectionError::new_err(( "".to_string(), "".to_string(), "Failed to open SFTP session", e.to_string(), )) })?; Ok(Self { sftp: Arc::new(session), cwd: None, }) } #[pyo3(signature = (path, mode = None))] fn mkdir(&mut self, py: Python, path: &str, mode: Option) -> PyResult<()> { let path = self._adjust_cwd(path); let mut attr = sftp::Attributes::new(); attr.permissions = Some(mode.unwrap_or(0o777) | 0o40000); py.detach(|| self.sftp.mkdir(path.as_str(), &attr)) .map_err(|e| sftp_error_to_py_err(e, Some(path.as_str()))) } fn extended(&mut self, py: Python, extension: &str, data: &[u8]) -> PyResult>> { py.detach(|| self.sftp.extended(extension, data)) .map_err(|e| sftp_error_to_py_err(e, None)) } #[pyo3(signature = (path, flags = None))] fn lstat(&mut self, py: Python, path: &str, flags: Option) -> PyResult { let path = self._adjust_cwd(path); py.detach(|| self.sftp.lstat(path.as_str(), flags)) .map_err(|e| sftp_error_to_py_err(e, Some(path.as_str()))) .map(SFTPAttributes) } #[pyo3(signature = (path, flags = None))] fn stat(&mut self, py: Python, path: &str, flags: Option) -> PyResult { let path = self._adjust_cwd(path); py.detach(|| self.sftp.stat(path.as_str(), flags)) .map_err(|e| sftp_error_to_py_err(e, Some(path.as_str()))) .map(SFTPAttributes) } fn chmod(&mut self, py: Python, path: &str, mode: u32) -> PyResult<()> { let path = self._adjust_cwd(path); let attr = sftp::Attributes { permissions: Some(mode), ..Default::default() }; py.detach(|| self.sftp.setstat(path.as_str(), &attr)) .map_err(|e| sftp_error_to_py_err(e, Some(path.as_str()))) } fn setstat(&mut self, py: Python, path: &str, attr: &SFTPAttributes) -> PyResult<()> { let path = self._adjust_cwd(path); py.detach(|| self.sftp.setstat(path.as_str(), &attr.0)) .map_err(|e| sftp_error_to_py_err(e, Some(path.as_str()))) } fn hardlink(&mut self, py: Python, oldpath: &str, newpath: &str) -> PyResult<()> { let newpath = self._adjust_cwd(newpath); py.detach(|| self.sftp.hardlink(oldpath, newpath.as_str())) .map_err(|e| sftp_error_to_py_err(e, Some(newpath.as_str()))) } #[pyo3(signature = (path, control_byte = None, compose_path = None))] fn realpath( &mut self, py: Python, path: &str, control_byte: Option, compose_path: Option<&str>, ) -> PyResult { let path = self._adjust_cwd(path); py.detach(|| { self.sftp .realpath(path.as_str(), control_byte, compose_path) }) .map_err(|e| sftp_error_to_py_err(e, Some(path.as_str()))) } fn symlink(&mut self, py: Python, oldpath: &str, newpath: &str) -> PyResult<()> { let newpath = self._adjust_cwd(newpath); py.detach(|| self.sftp.symlink(oldpath, newpath.as_str())) .map_err(|e| sftp_error_to_py_err(e, Some(newpath.as_str()))) } fn readlink(&mut self, py: Python, path: &str) -> PyResult { let path = self._adjust_cwd(path); py.detach(|| self.sftp.readlink(path.as_str())) .map_err(|e| sftp_error_to_py_err(e, Some(path.as_str()))) } #[pyo3(signature = (oldpath, newpath, flags = None))] fn rename( &mut self, py: Python, oldpath: &str, newpath: &str, flags: Option, ) -> PyResult<()> { let newpath = self._adjust_cwd(newpath); let oldpath = self._adjust_cwd(oldpath); py.detach(|| self.sftp.rename(oldpath.as_str(), newpath.as_str(), flags)) .map_err(|e| sftp_error_to_py_err(e, Some(newpath.as_str()))) } fn remove(&mut self, py: Python, path: &str) -> PyResult<()> { let path = self._adjust_cwd(path); py.detach(|| self.sftp.remove(path.as_str())) .map_err(|e| sftp_error_to_py_err(e, Some(path.as_str()))) } fn rmdir(&mut self, py: Python, path: &str) -> PyResult<()> { let path = self._adjust_cwd(path); py.detach(|| self.sftp.rmdir(path.as_str())) .map_err(|e| sftp_error_to_py_err(e, Some(path.as_str()))) } fn close(&mut self) -> PyResult<()> { Ok(()) } #[pyo3(signature = (path, attr, *, read=false, write=false, append=false, create=false, truncate=false, excl=false))] #[allow(clippy::too_many_arguments)] fn open( &mut self, py: Python, path: &str, attr: &SFTPAttributes, read: bool, write: bool, append: bool, create: bool, truncate: bool, excl: bool, ) -> PyResult { let path = self._adjust_cwd(path); let options = sftp::OpenOptions::new() .read(read) .write(write) .append(append) .create(create) .truncate(truncate) .excl(excl); let h = py .detach(|| self.sftp.open(path.as_str(), options, &attr.0)) .map_err(|e| sftp_error_to_py_err(e, Some(path.as_str())))?; Ok(SFTPFile { sftp: Arc::clone(&self.sftp), file: h, offset: 0, }) } #[pyo3(signature = (path, mode = None, create_mode = None))] fn file( &mut self, py: Python, path: &str, mode: Option<&str>, create_mode: Option, ) -> PyResult { let path = self._adjust_cwd(path); let offset = 0; let mode = mode.unwrap_or("rt"); let options = match mode { "rt" | "rb" => sftp::OpenOptions::new().read(true), "ab" => sftp::OpenOptions::new() .write(true) .create(true) .append(true), "wb" => sftp::OpenOptions::new() .read(true) .write(true) .create(true) .truncate(true), "r+" | "rb+" | "b+" => sftp::OpenOptions::new().read(true).write(true).create(true), "a+" | "ab+" => sftp::OpenOptions::new() .read(true) .write(true) .create(true) .append(true), mode => panic!("Unsupported mode: {}", mode), }; let attr = sftp::Attributes { permissions: create_mode, ..Default::default() }; let h = py .detach(|| self.sftp.open(path.as_str(), options, &attr)) .map_err(|e| sftp_error_to_py_err(e, Some(path.as_str())))?; let mut ret = SFTPFile { sftp: Arc::clone(&self.sftp), file: h, offset, }; if mode.contains('a') { ret.seek(py, 0, 2)?; } Ok(ret) } fn opendir(&mut self, py: Python, path: &str) -> PyResult { let path = self._adjust_cwd(path); let h = py .detach(|| self.sftp.opendir(path.as_str())) .map_err(|e| sftp_error_to_py_err(e, Some(path.as_str())))?; Ok(SFTPDir(Arc::clone(&self.sftp), h)) } fn listdir(&mut self, py: Python, path: &str) -> PyResult> { let path = self._adjust_cwd(path); let mut dir = self.opendir(py, path.as_str())?; let mut entries = Vec::new(); while let Some(extra_entries) = dir.readdir(py)? { for (name, _, _) in extra_entries { entries.push(name); } } Ok(entries) } } pub fn _sftp_rs(py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add("SFTPError", py.get_type::())?; Ok(()) } dromedary-0.1.5/_transport_rs/src/ssh/000077500000000000000000000000001520150013200177535ustar00rootroot00000000000000dromedary-0.1.5/_transport_rs/src/ssh/mod.rs000066400000000000000000000061301520150013200211000ustar00rootroot00000000000000//! SSH connection support for the Rust transport layer. //! //! This module mirrors `dromedary/ssh/__init__.py`. It exposes vendor //! implementations (subprocess-based and library-based) that produce an //! [`SSHConnection`] or an SFTP channel usable by the `sftp` submodule. //! //! The `SshLibrary` / `SshSession` traits below are an internal abstraction //! that lets us plug in different crypto backends (russh today; libssh2 or //! ssh2-rs in the future) without rewriting the PyO3-facing vendor layer. //! They are deliberately not exposed to Python. use pyo3::prelude::*; use std::ffi::OsString; #[cfg(feature = "russh")] mod russh_vendor; mod subprocess; // TODO: add `libssh2` backend module gated on a future `libssh2` feature. // TODO: add `ssh2-rs` backend module gated on a future `ssh2-rs` feature. /// Parameters used to establish an SSH connection. #[allow(dead_code)] pub(crate) struct ConnectConfig { pub host: String, pub port: u16, pub username: Option, pub password: Option, } /// Backend-agnostic SSH session. Each crypto library (russh, libssh2, …) /// provides its own implementation. #[allow(dead_code)] pub(crate) trait SshSession: Send { /// Open the `sftp` subsystem and return a bidirectional stream suitable /// for feeding into `sftp::SftpClient::from_stream` (to be added). fn open_sftp(&mut self) -> std::io::Result>; /// Execute a command on the remote host, returning its stdio as a stream. fn exec(&mut self, command: &str) -> std::io::Result>; } /// Marker trait combining `Read + Write + Send` so we can hand a trait object /// to the SFTP client. pub(crate) trait ReadWrite: std::io::Read + std::io::Write + Send {} impl ReadWrite for T {} /// Library-level entry point. Each backend implements this to hand back a /// fresh [`SshSession`] for the given connection parameters. #[allow(dead_code)] pub(crate) trait SshLibrary { fn connect(cfg: &ConnectConfig) -> std::io::Result>; } /// Classify an `ssh -V` version string into a vendor registry key. /// Mirrors `dromedary.ssh.SSHVendorManager._get_vendor_by_version_string`. #[pyfunction] #[pyo3(signature = (version, progname))] fn classify_ssh_version(version: &str, progname: &str) -> Option<&'static str> { dromedary::ssh::classify_ssh_version(version, progname) } /// Run `executable -V` and return the vendor registry key, or `None` if /// the binary can't be run or the output isn't recognized. Mirrors the /// combination of `_get_ssh_version_string` + `_get_vendor_from_path`. #[pyfunction] #[pyo3(signature = (executable))] fn detect_ssh_vendor(py: Python, executable: OsString) -> Option<&'static str> { py.detach(|| dromedary::ssh::detect_ssh_vendor(&executable)) } pub(crate) fn register(py: Python, m: &Bound) -> PyResult<()> { subprocess::register(py, m)?; #[cfg(feature = "russh")] russh_vendor::register(py, m)?; m.add_function(wrap_pyfunction!(classify_ssh_version, m)?)?; m.add_function(wrap_pyfunction!(detect_ssh_vendor, m)?)?; Ok(()) } dromedary-0.1.5/_transport_rs/src/ssh/russh_vendor.rs000066400000000000000000001050671520150013200230530ustar00rootroot00000000000000//! russh-backed SSH vendor. //! //! Replaces `dromedary/ssh/paramiko.py`. Gated behind the `russh` Cargo //! feature (on by default). //! //! Progress through the migration sub-steps: //! * 5a ✓ — TCP, password auth, trust-on-first-use host-key acceptance, //! SFTP subsystem + `exec` command channel. //! * 5b ✓ — SSH agent authentication via `$SSH_AUTH_SOCK`. //! * 5c ✓ — `~/.ssh/id_rsa` / `id_dsa` key-file auth with passphrase prompt. //! * 5d ✓ — `known_hosts` load/save and host-key mismatch rejection. //! * 5e ✓ — `auth_none` probe + `_config.get_auth_password` fallback + //! keyboard-interactive with the password as sole response. use crate::sftp::{BoxedChannel, SFTPClient}; use pyo3::exceptions::PyRuntimeError; use pyo3::import_exception; use pyo3::prelude::*; use pyo3::types::PyDict; use std::io::{Read, Write}; use std::path::{Path, PathBuf}; use std::sync::{Arc, Mutex}; use std::time::Duration; use tokio::io::{AsyncReadExt, AsyncWriteExt}; use tokio::runtime::Runtime; import_exception!(dromedary.errors, SocketConnectionError); import_exception!(dromedary.errors, TransportError); // --------------------------------------------------------------------------- // Host-key verification handler // --------------------------------------------------------------------------- /// Records *why* `check_server_key` returned `Ok(false)` so the caller can /// raise the right Python exception after russh collapses the rejection /// into `Error::UnknownKey`. #[derive(Default)] struct HostKeyVerdict { mismatch: Option, } struct HostKeyMismatch { host: String, expected_fp: String, actual_fp: String, system_path: PathBuf, dromedary_path: PathBuf, } /// Client-side handler that verifies the remote server key against the /// user's `~/.ssh/known_hosts` **and** dromedary's `/ /// ssh_host_keys`, trust-on-first-use into the dromedary file. Mirrors /// `ParamikoVendor._connect` at paramiko.py:240 so both stores stay in /// sync between the two backends during the migration. struct VerifyingHandler { host: String, port: u16, system_path: PathBuf, dromedary_path: PathBuf, verdict: Arc>, } impl russh::client::Handler for VerifyingHandler { type Error = russh::Error; async fn check_server_key( &mut self, server_public_key: &russh::keys::ssh_key::PublicKey, ) -> Result { match check_host_key( &self.host, self.port, server_public_key, &self.system_path, &self.dromedary_path, ) { HostKeyCheck::Match => Ok(true), HostKeyCheck::Unknown => { // Trust-on-first-use: store into dromedary's file only // (same as paramiko's `BRZ_HOSTKEYS.add` + `save_host_keys`). log::warn!( "Adding {} host key for {}: {}", server_public_key.algorithm(), self.host, server_public_key.fingerprint(Default::default()) ); if let Err(e) = russh::keys::known_hosts::learn_known_hosts_path( &self.host, self.port, server_public_key, &self.dromedary_path, ) { log::debug!( "failed to save host key to {}: {}", self.dromedary_path.display(), e ); } Ok(true) } HostKeyCheck::Mismatch { expected_fp } => { self.verdict.lock().unwrap().mismatch = Some(HostKeyMismatch { host: self.host.clone(), expected_fp, actual_fp: server_public_key .fingerprint(Default::default()) .to_string(), system_path: self.system_path.clone(), dromedary_path: self.dromedary_path.clone(), }); Ok(false) } } } } enum HostKeyCheck { Match, Unknown, Mismatch { expected_fp: String }, } /// Check both the system file and dromedary's file. A mismatch in either /// file is fatal (it shadows a prior trusted entry). An entry only in the /// system file is acceptable without writing to the dromedary file. fn check_host_key( host: &str, port: u16, key: &russh::keys::ssh_key::PublicKey, system_path: &Path, dromedary_path: &Path, ) -> HostKeyCheck { for path in [system_path, dromedary_path] { match russh::keys::check_known_hosts_path(host, port, key, path) { Ok(true) => return HostKeyCheck::Match, Ok(false) => continue, Err(russh::keys::Error::KeyChanged { line }) => { // Surface the recorded key so we can include its fingerprint // in the error message. let expected_fp = lookup_recorded_fingerprint(host, port, path, line); return HostKeyCheck::Mismatch { expected_fp }; } Err(e) => { log::debug!("reading {}: {}", path.display(), e); } } } HostKeyCheck::Unknown } /// Pull the fingerprint of the *recorded* key on a specific line so a /// mismatch error can show "expected X, got Y". Returns `` if the /// file can't be reread — the caller still raises `TransportError`. fn lookup_recorded_fingerprint(host: &str, port: u16, path: &Path, line: usize) -> String { match russh::keys::known_hosts::known_host_keys_path(host, port, path) { Ok(entries) => entries .into_iter() .find(|(l, _)| *l == line) .map(|(_, k)| k.fingerprint(Default::default()).to_string()) .unwrap_or_else(|| "".to_string()), Err(_) => "".to_string(), } } // --------------------------------------------------------------------------- // Blocking bridge: async russh ChannelStream -> sync Read + Write // --------------------------------------------------------------------------- /// Wraps an async `ChannelStream` so it can be driven by synchronous SFTP /// code. Each `read` / `write` call `block_on`s the owned runtime. /// /// The `Runtime` is held in an `Arc` so the same runtime that performed the /// SSH handshake also services subsequent channel I/O — otherwise the inner /// tokio tasks spawned by russh would be orphaned. struct BlockingChannel { runtime: Arc, stream: russh::ChannelStream, } impl Read for BlockingChannel { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { self.runtime.block_on(self.stream.read(buf)) } } impl Write for BlockingChannel { fn write(&mut self, buf: &[u8]) -> std::io::Result { self.runtime.block_on(self.stream.write(buf)) } fn flush(&mut self) -> std::io::Result<()> { self.runtime.block_on(self.stream.flush()) } } // --------------------------------------------------------------------------- // Error mapping // --------------------------------------------------------------------------- fn connect_err(host: &str, port: Option, e: impl std::fmt::Display) -> PyErr { SocketConnectionError::new_err(( host.to_string(), port.map(|p| format!(":{p}")).unwrap_or_default(), "Failed to connect to", e.to_string(), )) } // --------------------------------------------------------------------------- // Connection wrapper for `exec`-style SSH sessions // --------------------------------------------------------------------------- /// Counterpart to `SSHSubprocessConnection` for the russh transport. Unlike /// the subprocess variant it doesn't expose a raw fd — the remote command's /// stdio is read/written through the same async runtime that performed the /// handshake. #[pyclass(module = "dromedary._transport_rs.ssh", name = "RusshSSHConnection")] pub(crate) struct RusshSSHConnection { inner: Mutex>, } #[pymethods] impl RusshSSHConnection { fn send(&self, py: Python, data: &[u8]) -> PyResult { py.detach(|| { let mut guard = self.inner.lock().unwrap(); let ch = guard .as_mut() .ok_or_else(|| PyRuntimeError::new_err("connection closed"))?; ch.write(data) .map_err(|e| PyRuntimeError::new_err(format!("send failed: {e}"))) }) } fn recv(&self, py: Python, count: usize) -> PyResult> { py.detach(|| { let mut guard = self.inner.lock().unwrap(); let ch = guard .as_mut() .ok_or_else(|| PyRuntimeError::new_err("connection closed"))?; let mut buf = vec![0u8; count]; let n = ch .read(&mut buf) .map_err(|e| PyRuntimeError::new_err(format!("recv failed: {e}")))?; buf.truncate(n); Ok(buf) }) } fn close(&self) -> PyResult<()> { // Drops the underlying stream; russh will send channel_close when // the Channel's write half is dropped. let _ = self.inner.lock().unwrap().take(); Ok(()) } } // --------------------------------------------------------------------------- // Vendor // --------------------------------------------------------------------------- #[pyclass(module = "dromedary._transport_rs.ssh", name = "RusshVendor")] pub(crate) struct RusshVendor; impl RusshVendor { /// Shared connection path: TCP connect, SSH handshake, password auth, /// return an open session handle plus the runtime that owns it. /// /// `key_files` is loaded (with a GIL-requiring passphrase prompt on /// encrypted keys) *before* we enter async, so the passphrase prompt /// never races the tokio runtime. fn connect( py: Python, username: Option<&str>, password: Option<&str>, host: &str, port: Option, ) -> PyResult<(Arc, russh::client::Handle)> { let user = username .map(str::to_string) .unwrap_or_else(|| resolve_username(py, host, port)); // Load key files while we still hold the GIL: decoding an // encrypted key needs to call back into Python's `_ui.get_password`. let key_files = load_default_keyfiles(py); // Resolve host-key paths while holding the GIL too: // `dromedary._bedding.config_dir` is an embedder-overridable // function on the Python side. let system_path = system_known_hosts_path(); let dromedary_path = dromedary_host_keys_path(py); let port_nr = port.unwrap_or(22); let host_owned = host.to_string(); let verdict = Arc::new(Mutex::new(HostKeyVerdict::default())); let password = password.map(str::to_string); // Phase 1: connect + agent/keys/probe/supplied-password. Runs with // GIL released. type Phase1 = ( Arc, russh::client::Handle, AuthPhaseOutcome, ); let phase1: Result = py.detach({ let verdict = verdict.clone(); let host_owned = host_owned.clone(); let user = user.clone(); move || { let runtime = Arc::new( tokio::runtime::Builder::new_current_thread() .enable_all() .build() .map_err(|e| { AuthError::Russh(russh::Error::IO(std::io::Error::other(format!( "runtime build: {e}" )))) })?, ); let addr = (host_owned.clone(), port_nr); let config = Arc::new(russh::client::Config { inactivity_timeout: Some(Duration::from_secs(3600)), ..Default::default() }); let handler = VerifyingHandler { host: host_owned, port: port_nr, system_path, dromedary_path, verdict, }; runtime .block_on(async move { let mut session = russh::client::connect(config, addr, handler) .await .map_err(AuthError::Russh)?; let outcome = authenticate_pre_prompt( &mut session, &user, password.as_deref(), &key_files, ) .await?; Ok::<_, AuthError>((session, outcome)) }) .map(|(session, outcome)| (runtime, session, outcome)) } }); let (runtime, handle, outcome) = phase1.map_err(|e| connect_or_hostkey_err(&host_owned, Some(port_nr), &verdict, e))?; if matches!(outcome, AuthPhaseOutcome::Authenticated) { return Ok((runtime, handle)); } // Phase 2 (GIL held): ask Python for a password. `None` means the // user cancelled — paramiko.py:138 treats that as a hard failure. let Some(prompt_pw) = prompt_auth_password(py, host, port, &user) else { return Err(connect_or_hostkey_err( &host_owned, Some(port_nr), &verdict, AuthError::NoMethodsSucceeded, )); }; // Phase 3 (GIL released): try the prompted password. The handle // moves into the closure and comes back out with the result so // the caller can still use it. let (handle, result) = py.detach({ let runtime = runtime.clone(); move || { let mut handle = handle; let r = runtime.block_on(try_password_phase(&mut handle, &user, &prompt_pw)); (handle, r) } }); match result { Ok(()) => Ok((runtime, handle)), Err(e) => Err(connect_or_hostkey_err( &host_owned, Some(port_nr), &verdict, e, )), } } } /// Translate an `AuthError` into the right Python exception. `UnknownKey` /// paired with a recorded mismatch becomes a `TransportError`; everything /// else stays a `SocketConnectionError` (consistent with paramiko.py). fn connect_or_hostkey_err( host: &str, port: Option, verdict: &Mutex, e: AuthError, ) -> PyErr { if let AuthError::Russh(russh::Error::UnknownKey) = &e { if let Some(m) = verdict.lock().unwrap().mismatch.take() { return TransportError::new_err(( format!( "Host keys for {} do not match! {} != {}", m.host, m.expected_fp, m.actual_fp ), format!( "Try editing {} or {}", m.system_path.display(), m.dromedary_path.display() ), )); } } connect_err(host, port, e) } // --------------------------------------------------------------------------- // Authentication // --------------------------------------------------------------------------- #[derive(Debug)] enum AuthError { Russh(russh::Error), /// All authentication methods we tried were rejected by the server. NoMethodsSucceeded, } impl std::fmt::Display for AuthError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { AuthError::Russh(e) => write!(f, "{e}"), AuthError::NoMethodsSucceeded => f.write_str("no SSH authentication method succeeded"), } } } impl From for AuthError { fn from(e: russh::Error) -> Self { AuthError::Russh(e) } } /// Outcome of the first authentication phase (agent → keys → auth_none /// probe → supplied password). If we return `NeedsPrompt`, the caller /// must re-acquire the GIL, call `_config.get_auth_password`, and invoke /// [`try_password_phase`] to finish authenticating. enum AuthPhaseOutcome { Authenticated, NeedsPrompt, } /// Phase 1: agent → key files → `auth_none` probe → supplied password. /// /// Matches paramiko.py:68-126. Returns `NeedsPrompt` when the server /// advertises `password` / `keyboard-interactive` but we haven't /// authenticated yet, so the caller can solicit a password. async fn authenticate_pre_prompt( session: &mut russh::client::Handle, user: &str, password: Option<&str>, key_files: &[russh::keys::PrivateKey], ) -> Result { if try_agent_auth(session, user).await? { return Ok(AuthPhaseOutcome::Authenticated); } if try_keyfile_auth(session, user, key_files).await? { return Ok(AuthPhaseOutcome::Authenticated); } // auth_none probe: unlikely to succeed, but its `remaining_methods` // tells us whether password-style auth is even accepted. If it's not, // paramiko.py:116 raises ConnectionError — we mirror that by bailing // out with `NoMethodsSucceeded`. let remaining = match session.authenticate_none(user.to_string()).await? { russh::client::AuthResult::Success => return Ok(AuthPhaseOutcome::Authenticated), russh::client::AuthResult::Failure { remaining_methods, .. } => remaining_methods, }; if !password_style_accepted(&remaining) { log::debug!( "server does not accept password or keyboard-interactive; remaining: {:?}", remaining ); return Err(AuthError::NoMethodsSucceeded); } // Try the explicitly-supplied password first, so a caller that passes // one doesn't also get a prompt. if let Some(pw) = password { if try_password_or_interactive(session, user, pw).await? { return Ok(AuthPhaseOutcome::Authenticated); } } Ok(AuthPhaseOutcome::NeedsPrompt) } /// Phase 3: try a password obtained from `_config.get_auth_password`. async fn try_password_phase( session: &mut russh::client::Handle, user: &str, password: &str, ) -> Result<(), AuthError> { if try_password_or_interactive(session, user, password).await? { Ok(()) } else { Err(AuthError::NoMethodsSucceeded) } } fn password_style_accepted(methods: &russh::MethodSet) -> bool { methods.iter().any(|m| { matches!( m, russh::MethodKind::Password | russh::MethodKind::KeyboardInteractive ) }) } /// Try `authenticate_password`; on failure, fall back to /// `keyboard-interactive` with the password as the sole response. This /// mirrors paramiko's `auth_password` which transparently does the same. async fn try_password_or_interactive( session: &mut russh::client::Handle, user: &str, password: &str, ) -> Result { match session .authenticate_password(user.to_string(), password) .await? { russh::client::AuthResult::Success => return Ok(true), russh::client::AuthResult::Failure { .. } => {} } // Keyboard-interactive fallback. We blindly respond to the first // InfoRequest with one copy of the password per prompt. Servers that // ask anything beyond a password aren't supported (paramiko has the // same limitation — see the XXX at paramiko.py:109). match session .authenticate_keyboard_interactive_start(user.to_string(), None) .await? { russh::client::KeyboardInteractiveAuthResponse::Success => Ok(true), russh::client::KeyboardInteractiveAuthResponse::Failure { .. } => Ok(false), russh::client::KeyboardInteractiveAuthResponse::InfoRequest { prompts, .. } => { let responses = vec![password.to_string(); prompts.len()]; match session .authenticate_keyboard_interactive_respond(responses) .await? { russh::client::KeyboardInteractiveAuthResponse::Success => Ok(true), _ => Ok(false), } } } } /// Enumerate keys from the SSH agent and try each in turn. Silent no-op /// when the agent is unreachable — matches paramiko's behavior (agent /// failures never block other auth methods). On Unix this uses /// `$SSH_AUTH_SOCK`; on Windows it talks to Pageant. #[cfg(unix)] async fn try_agent_auth( session: &mut russh::client::Handle, user: &str, ) -> Result { let agent = match russh::keys::agent::client::AgentClient::connect_env().await { Ok(a) => a, Err(e) => { log::debug!("SSH agent unavailable: {e}"); return Ok(false); } }; try_agent_auth_with(session, user, agent).await } #[cfg(windows)] async fn try_agent_auth( session: &mut russh::client::Handle, user: &str, ) -> Result { // `connect_pageant` in russh 0.54 returns the client directly (no // `Result`); a missing/unreachable Pageant surfaces later when we // actually request identities, where it's already handled as a silent // no-op below. let agent = russh::keys::agent::client::AgentClient::connect_pageant().await; try_agent_auth_with(session, user, agent).await } async fn try_agent_auth_with( session: &mut russh::client::Handle, user: &str, mut agent: russh::keys::agent::client::AgentClient, ) -> Result where S: russh::keys::agent::client::AgentStream + Unpin + Send + 'static, { let identities = match agent.request_identities().await { Ok(ids) => ids, Err(e) => { log::debug!("SSH agent request_identities failed: {e}"); return Ok(false); } }; for key in identities { log::debug!( "Trying SSH agent key {} ({})", key.fingerprint(Default::default()), key.algorithm() ); match session .authenticate_publickey_with(user.to_string(), key, None, &mut agent) .await { Ok(auth) if auth.success() => return Ok(true), Ok(_) => continue, Err(e) => { // `authenticate_publickey_with` can fail for signer // problems even while other agent keys might still work. log::debug!("agent key auth attempt failed: {e}"); continue; } } } Ok(false) } /// Try each preloaded key file in turn. RSA keys negotiate the strongest /// hash algorithm the server advertises (falling back to SHA-1 when the /// server doesn't send `server-sig-algs`). async fn try_keyfile_auth( session: &mut russh::client::Handle, user: &str, key_files: &[russh::keys::PrivateKey], ) -> Result { if key_files.is_empty() { return Ok(false); } // `Some(Some(alg))` = server wants this hash; `Some(None)` = server // only supports SHA-1; `None` = server didn't advertise, try SHA-1. let rsa_hash = session .best_supported_rsa_hash() .await .ok() .flatten() .flatten(); for key in key_files { let with_hash = russh::keys::PrivateKeyWithHashAlg::new(Arc::new(key.clone()), rsa_hash); log::debug!( "Trying key file ({}) fingerprint {}", key.algorithm(), key.fingerprint(Default::default()) ); match session .authenticate_publickey(user.to_string(), with_hash) .await { Ok(auth) if auth.success() => return Ok(true), Ok(_) => continue, Err(e) => { log::debug!("key file auth attempt failed: {e}"); continue; } } } Ok(false) } /// Synchronous helper that loads `~/.ssh/id_rsa` and `~/.ssh/id_dsa`, /// prompting the user via `dromedary._ui.get_password` for a passphrase /// when a key is encrypted. Missing files are silently skipped. /// /// Runs with the GIL held because the passphrase prompt is Python-side. fn load_default_keyfiles(py: Python) -> Vec { let Some(home) = home_dir() else { log::debug!("no home directory; skipping key-file auth"); return Vec::new(); }; let mut keys = Vec::new(); for name in ["id_rsa", "id_dsa"] { let path = home.join(".ssh").join(name); if !path.exists() { continue; } match load_one_keyfile(py, &path) { Ok(Some(k)) => keys.push(k), Ok(None) => {} Err(e) => log::debug!("load key {}: {}", path.display(), e), } } keys } /// Load a single private key file, prompting once for a passphrase on /// `KeyIsEncrypted`. Returns `Ok(None)` if the user cancels the prompt or /// the second attempt also fails. fn load_one_keyfile( py: Python, path: &Path, ) -> Result, russh::keys::Error> { match russh::keys::load_secret_key(path, None) { Ok(k) => Ok(Some(k)), Err(russh::keys::Error::KeyIsEncrypted) => { let Some(pw) = prompt_key_password(py, path) else { log::debug!( "encrypted key {} skipped (no passphrase provided)", path.display() ); return Ok(None); }; match russh::keys::load_secret_key(path, Some(&pw)) { Ok(k) => Ok(Some(k)), Err(e) => { log::debug!( "decrypting {} with supplied passphrase failed: {}", path.display(), e ); Ok(None) } } } Err(e) => Err(e), } } /// Call `dromedary._ui.get_password(prompt, filename=)`, matching /// paramiko.py's prompt format. Returns `None` if the call fails or the /// result is `None`. fn prompt_key_password(py: Python, path: &Path) -> Option { let ui = match py.import("dromedary._ui") { Ok(m) => m, Err(e) => { log::debug!("importing dromedary._ui failed: {e}"); return None; } }; let kwargs = PyDict::new(py); if let Err(e) = kwargs.set_item("filename", path.display().to_string()) { log::debug!("building get_password kwargs failed: {e}"); return None; } let result = ui.call_method( "get_password", ("SSH %(filename)s password",), Some(&kwargs), ); match result { Ok(v) if v.is_none() => None, Ok(v) => v.extract::().ok(), Err(e) => { log::debug!("_ui.get_password raised: {e}"); None } } } /// Resolve a default username via `dromedary._config.get_auth_user("ssh", /// host, port=port, default=getpass.getuser())`, falling back to `$USER` /// if the Python layer is unavailable. Mirrors paramiko.py:64-67. fn resolve_username(py: Python, host: &str, port: Option) -> String { let fallback = whoami_or_default(); let cfg = match py.import("dromedary._config") { Ok(m) => m, Err(e) => { log::debug!("importing dromedary._config failed: {e}"); return fallback; } }; let kwargs = PyDict::new(py); if let Err(e) = kwargs.set_item("default", &fallback) { log::debug!("building get_auth_user kwargs failed: {e}"); return fallback; } if let Some(p) = port { if let Err(e) = kwargs.set_item("port", p) { log::debug!("building get_auth_user kwargs failed: {e}"); return fallback; } } match cfg.call_method("get_auth_user", ("ssh", host), Some(&kwargs)) { Ok(v) if v.is_none() => fallback, Ok(v) => v.extract::().unwrap_or(fallback), Err(e) => { log::debug!("_config.get_auth_user raised: {e}"); fallback } } } /// Prompt for an auth password via `dromedary._config.get_auth_password( /// "ssh", host, user, port=port)`. Returns `None` if the call fails or /// the user cancels. fn prompt_auth_password(py: Python, host: &str, port: Option, user: &str) -> Option { let cfg = match py.import("dromedary._config") { Ok(m) => m, Err(e) => { log::debug!("importing dromedary._config failed: {e}"); return None; } }; let kwargs = PyDict::new(py); if let Some(p) = port { if let Err(e) = kwargs.set_item("port", p) { log::debug!("building get_auth_password kwargs failed: {e}"); return None; } } match cfg.call_method("get_auth_password", ("ssh", host, user), Some(&kwargs)) { Ok(v) if v.is_none() => None, Ok(v) => v.extract::().ok(), Err(e) => { log::debug!("_config.get_auth_password raised: {e}"); None } } } fn home_dir() -> Option { std::env::var_os("HOME").map(PathBuf::from) } /// `~/.ssh/known_hosts` (on Windows: `~/ssh/known_hosts` — matches /// russh's own `known_hosts_path`). Falls back to a sentinel path inside /// a non-existent temp dir when `$HOME` is unset; `check_known_hosts_path` /// returns `Ok(false)` on missing files so this is harmless. fn system_known_hosts_path() -> PathBuf { match home_dir() { Some(h) => { if cfg!(windows) { h.join("ssh").join("known_hosts") } else { h.join(".ssh").join("known_hosts") } } None => PathBuf::from("/nonexistent/known_hosts"), } } /// Resolve `/ssh_host_keys` via the /// Python side so embedder overrides are honored. Also calls /// `_bedding.ensure_config_dir_exists()` so the later `learn_*` write /// doesn't fail on a missing parent directory. fn dromedary_host_keys_path(py: Python) -> PathBuf { let fallback = || { let xdg = std::env::var_os("XDG_CONFIG_HOME") .map(PathBuf::from) .or_else(|| home_dir().map(|h| h.join(".config"))) .unwrap_or_else(|| PathBuf::from(".")); xdg.join("breezy").join("ssh_host_keys") }; let bedding = match py.import("dromedary._bedding") { Ok(m) => m, Err(e) => { log::debug!("importing dromedary._bedding failed: {e}"); return fallback(); } }; if let Err(e) = bedding.call_method0("ensure_config_dir_exists") { log::debug!("ensure_config_dir_exists failed: {e}"); } match bedding.call_method0("config_dir") { Ok(v) => match v.extract::() { Ok(s) => PathBuf::from(s).join("ssh_host_keys"), Err(e) => { log::debug!("config_dir() returned non-string: {e}"); fallback() } }, Err(e) => { log::debug!("_bedding.config_dir() raised: {e}"); fallback() } } } #[pymethods] impl RusshVendor { #[new] fn new() -> Self { Self } /// Open an SFTP session. Returns a fully-constructed `SFTPClient` that /// the caller can use directly (same object as `_transport_rs.sftp. /// SFTPClient`). #[pyo3(signature = (username, password, host, port=None))] fn connect_sftp( &self, py: Python, username: Option<&str>, password: Option<&str>, host: &str, port: Option, ) -> PyResult { let (runtime, handle) = Self::connect(py, username, password, host, port)?; py.detach(move || { let (runtime_for_stream, stream) = runtime.block_on({ let runtime = runtime.clone(); async move { let channel = handle .channel_open_session() .await .map_err(|e| PyRuntimeError::new_err(format!("open session: {e}")))?; channel.request_subsystem(true, "sftp").await.map_err(|e| { PyRuntimeError::new_err(format!("request sftp subsystem: {e}")) })?; Ok::<_, PyErr>((runtime, channel.into_stream())) } })?; let channel: BoxedChannel = Box::new(BlockingChannel { runtime: runtime_for_stream, stream, }); SFTPClient::from_channel(channel) .map_err(|e| PyRuntimeError::new_err(format!("sftp init: {e}"))) }) } /// Execute a remote command and return a connection whose `send` / `recv` /// drive the command's stdio. Parallels paramiko's /// `_ParamikoSSHConnection`. #[pyo3(signature = (username, password, host, command, port=None))] fn connect_ssh( &self, py: Python, username: Option<&str>, password: Option<&str>, host: &str, command: Vec, port: Option, ) -> PyResult { let (runtime, handle) = Self::connect(py, username, password, host, port)?; py.detach(move || { let (runtime_for_stream, stream) = runtime.block_on({ let runtime = runtime.clone(); async move { let channel = handle .channel_open_session() .await .map_err(|e| PyRuntimeError::new_err(format!("open session: {e}")))?; let cmdline = command.join(" "); channel .exec(true, cmdline.as_str()) .await .map_err(|e| PyRuntimeError::new_err(format!("exec: {e}")))?; Ok::<_, PyErr>((runtime, channel.into_stream())) } })?; Ok(RusshSSHConnection { inner: Mutex::new(Some(BlockingChannel { runtime: runtime_for_stream, stream, })), }) }) } } /// Minimal fallback when no username is supplied. `_config.get_auth_user` /// on the Python side gives a richer lookup; 5e wires that in. For now use /// the `$USER` env var or "unknown". fn whoami_or_default() -> String { std::env::var("USER").unwrap_or_else(|_| "unknown".to_string()) } pub(crate) fn register(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; m.add_class::()?; Ok(()) } dromedary-0.1.5/_transport_rs/src/ssh/subprocess.rs000066400000000000000000000330051520150013200225120ustar00rootroot00000000000000//! Subprocess-based SSH vendors (OpenSSH, PLink, LSH) plus the loopback //! "vendor" (plain TCP) and the connection wrapper they produce. //! //! These wrap an external `ssh` binary, exposing its stdio as a file //! descriptor that the SFTP client can consume directly. No crypto library //! involved; always compiled. //! //! The Rust surface is deliberately minimal: vendors hand back either an //! fd (SFTP) or an [`SSHSubprocessConnection`] (command exec). Wrapping the //! fd in `_transport_rs.sftp.SFTPClient` happens on the Python side — //! mirrors how `SFTPClient(sock._sock.detach())` is called today and keeps //! the `ssh` and `sftp` Rust submodules decoupled. use dromedary::ssh::{build_argv, ArgvError, Flavor}; use pyo3::exceptions::PyRuntimeError; use pyo3::import_exception; use pyo3::prelude::*; #[cfg(unix)] use std::os::fd::{IntoRawFd, OwnedFd, RawFd}; use std::process::{Child, Command, Stdio}; use std::sync::Mutex; import_exception!(dromedary.errors, StrangeHostname); import_exception!(dromedary.errors, SocketConnectionError); fn argv_err_to_py(err: ArgvError) -> PyErr { match err { ArgvError::StrangeHostname(h) => StrangeHostname::new_err((h,)), ArgvError::InvalidArguments => PyRuntimeError::new_err(err.to_string()), } } // --------------------------------------------------------------------------- // Connection wrapper // --------------------------------------------------------------------------- struct Inner { child: Option, #[cfg(unix)] sock_fd: Option, } /// Rust port of `dromedary.ssh.SSHSubprocessConnection`. /// /// Does not expose `get_sock_or_pipes()` — that method was paramiko-only and /// goes away with the russh migration. Callers use [`detach_fd`] for the /// SFTP path and [`wait`] / [`close`] for command execution. #[pyclass( module = "dromedary._transport_rs.ssh", name = "SSHSubprocessConnection" )] pub(crate) struct SSHSubprocessConnection { inner: Mutex, } impl SSHSubprocessConnection { #[cfg(unix)] fn from_parts(child: Child, sock_fd: Option) -> Self { Self { inner: Mutex::new(Inner { child: Some(child), sock_fd, }), } } #[cfg(not(unix))] fn from_parts(child: Child) -> Self { Self { inner: Mutex::new(Inner { child: Some(child) }), } } } #[pymethods] impl SSHSubprocessConnection { /// Take ownership of the raw fd underlying this connection. /// /// Returns the socketpair fd when one was used, otherwise the child's /// stdout pipe fd. After this call the connection no longer owns the /// fd; `close` / `wait` still manage the child process itself. #[cfg(unix)] fn detach_fd(&self) -> PyResult { let mut inner = self.inner.lock().unwrap(); if let Some(fd) = inner.sock_fd.take() { return Ok(fd); } let child = inner .child .as_mut() .ok_or_else(|| PyRuntimeError::new_err("connection already closed"))?; let stdout = child .stdout .take() .ok_or_else(|| PyRuntimeError::new_err("no stdout fd available"))?; Ok(stdout.into_raw_fd()) } fn wait(&self) -> PyResult> { let mut inner = self.inner.lock().unwrap(); match inner.child.as_mut() { Some(child) => child .wait() .map(|s| s.code()) .map_err(|e| PyRuntimeError::new_err(format!("wait failed: {e}"))), None => Ok(None), } } fn close(&self) -> PyResult<()> { let mut inner = self.inner.lock().unwrap(); #[cfg(unix)] if let Some(fd) = inner.sock_fd.take() { // SAFETY: we owned this fd; `nix::unistd::close` consumes it, // so it's closed exactly once. Errors are ignored to match the // best-effort semantics of Python's `_close_ssh_proc`. let _ = nix::unistd::close(fd); } if let Some(mut child) = inner.child.take() { drop(child.stdin.take()); drop(child.stdout.take()); let _ = child.wait(); } Ok(()) } } impl Drop for SSHSubprocessConnection { fn drop(&mut self) { let _ = self.close(); } } // --------------------------------------------------------------------------- // Spawn helper // --------------------------------------------------------------------------- /// Spawn the ssh subprocess, preferring a `socketpair` for its stdio (matches /// the Python comment: "we prefer sockets to pipes because they support /// non-blocking short reads"). #[cfg(unix)] fn spawn(argv: &[String], host: &str, port: Option) -> PyResult { // SOCK_CLOEXEC isn't available as a `SockFlag` on macOS in nix; fall back // to `SockFlag::empty()` and set `FD_CLOEXEC` via fcntl on the parent's // half after creation. #[cfg(any( target_os = "linux", target_os = "android", target_os = "freebsd", target_os = "dragonfly", target_os = "netbsd", target_os = "openbsd", target_os = "illumos", target_os = "solaris", ))] let sock_flags = nix::sys::socket::SockFlag::SOCK_CLOEXEC; #[cfg(not(any( target_os = "linux", target_os = "android", target_os = "freebsd", target_os = "dragonfly", target_os = "netbsd", target_os = "openbsd", target_os = "illumos", target_os = "solaris", )))] let sock_flags = nix::sys::socket::SockFlag::empty(); let pair = nix::sys::socket::socketpair( nix::sys::socket::AddressFamily::Unix, nix::sys::socket::SockType::Stream, None, sock_flags, ) .ok(); let mut cmd = Command::new(&argv[0]); cmd.args(&argv[1..]); let my_sock: Option = if let Some((mine, theirs)) = pair { // Dup twice: once for child stdin, once for stdout. The child // inherits them via `Command::stdin` / `stdout`; the parent keeps // its half (`mine`) with CLOEXEC set. let dup_in = nix::unistd::dup(&theirs) .map_err(|e| PyRuntimeError::new_err(format!("dup failed: {e}")))?; let dup_out = nix::unistd::dup(&theirs) .map_err(|e| PyRuntimeError::new_err(format!("dup failed: {e}")))?; cmd.stdin(Stdio::from(dup_in)); cmd.stdout(Stdio::from(dup_out)); // `theirs` is closed in the parent when this `OwnedFd` drops. drop(theirs); // Ensure CLOEXEC on the parent's retained half on platforms where // `SOCK_CLOEXEC` couldn't be requested at creation time. #[cfg(not(any( target_os = "linux", target_os = "android", target_os = "freebsd", target_os = "dragonfly", target_os = "netbsd", target_os = "openbsd", target_os = "illumos", target_os = "solaris", )))] nix::fcntl::fcntl( &mine, nix::fcntl::FcntlArg::F_SETFD(nix::fcntl::FdFlag::FD_CLOEXEC), ) .map_err(|e| PyRuntimeError::new_err(format!("fcntl FD_CLOEXEC failed: {e}")))?; Some(mine) } else { cmd.stdin(Stdio::piped()).stdout(Stdio::piped()); None }; cmd.stderr(Stdio::inherit()); let child = cmd.spawn().map_err(|e| { SocketConnectionError::new_err(( host.to_string(), port.map(|p| format!(":{p}")).unwrap_or_default(), "Failed to connect to", e.to_string(), )) })?; let my_raw: Option = my_sock.map(|fd| fd.into_raw_fd()); Ok(SSHSubprocessConnection::from_parts(child, my_raw)) } #[cfg(not(unix))] fn spawn(argv: &[String], host: &str, port: Option) -> PyResult { let mut cmd = Command::new(&argv[0]); cmd.args(&argv[1..]) .stdin(Stdio::piped()) .stdout(Stdio::piped()) .stderr(Stdio::inherit()); let child = cmd.spawn().map_err(|e| { SocketConnectionError::new_err(( host.to_string(), port.map(|p| format!(":{p}")).unwrap_or_default(), "Failed to connect to", e.to_string(), )) })?; Ok(SSHSubprocessConnection::from_parts(child)) } // --------------------------------------------------------------------------- // Vendor classes exposed to Python // --------------------------------------------------------------------------- macro_rules! subprocess_vendor { ($name:ident, $flavor:expr, $pyname:literal) => { #[pyclass(module = "dromedary._transport_rs.ssh", name = $pyname)] pub(crate) struct $name { executable_path: Mutex>, } #[pymethods] impl $name { #[new] fn new() -> Self { Self { executable_path: Mutex::new(None), } } /// Override the ssh binary. Matches the Python /// `SSHVendorManager._get_vendor_from_path` flow, where /// `BRZ_SSH=/path/to/ssh` assigns `vendor.executable_path` on /// the detected vendor. #[setter] fn set_executable_path(&self, path: Option) { *self.executable_path.lock().unwrap() = path; } #[getter] fn executable_path(&self) -> Option { self.executable_path.lock().unwrap().clone() } /// Spawn the ssh binary with the "sftp" subsystem and return /// the raw fd to use with `_transport_rs.sftp.SFTPClient(fd)`. /// The vendor no longer owns the fd after this call. #[cfg(unix)] #[pyo3(signature = (username, host, port=None))] fn spawn_sftp( &self, username: Option<&str>, host: &str, port: Option, ) -> PyResult { let exe = self.executable_path.lock().unwrap().clone(); let argv = build_argv( $flavor, exe.as_deref(), username, host, port, Some("sftp"), None, ) .map_err(argv_err_to_py)?; let conn = spawn(&argv, host, port)?; conn.detach_fd() } /// Spawn the ssh binary to execute `command` on the remote /// host, returning an [`SSHSubprocessConnection`] for the /// caller to drive. #[pyo3(signature = (username, host, command, port=None))] fn connect_ssh( &self, username: Option<&str>, host: &str, command: Vec, port: Option, ) -> PyResult { let exe = self.executable_path.lock().unwrap().clone(); let argv = build_argv( $flavor, exe.as_deref(), username, host, port, None, Some(&command), ) .map_err(argv_err_to_py)?; spawn(&argv, host, port) } } }; } subprocess_vendor!( OpenSSHSubprocessVendor, Flavor::OpenSSH, "OpenSSHSubprocessVendor" ); subprocess_vendor!(LSHSubprocessVendor, Flavor::Lsh, "LSHSubprocessVendor"); subprocess_vendor!( PLinkSubprocessVendor, Flavor::PLink, "PLinkSubprocessVendor" ); // --------------------------------------------------------------------------- // Loopback "vendor" (plain TCP, no ssh) // --------------------------------------------------------------------------- /// Rust port of `dromedary.ssh.LoopbackVendor`. Used by the test suite via /// `stub_sftp.py` to talk to a local SFTP server over a TCP socket with no /// SSH transport in between. #[pyclass(module = "dromedary._transport_rs.ssh", name = "LoopbackVendor")] pub(crate) struct LoopbackVendor; #[pymethods] impl LoopbackVendor { #[new] fn new() -> Self { Self } /// Open a TCP connection and return the raw fd. Caller wraps it in /// `_transport_rs.sftp.SFTPClient(fd)`. #[cfg(unix)] #[pyo3(signature = (host, port))] fn spawn_sftp(&self, host: &str, port: u16) -> PyResult { let sock = std::net::TcpStream::connect((host, port)).map_err(|e| { SocketConnectionError::new_err(( host.to_string(), format!(":{port}"), "Failed to connect to", e.to_string(), )) })?; // Convert to OwnedFd so we can release ownership cleanly. let owned: OwnedFd = sock.into(); Ok(owned.into_raw_fd()) } } // --------------------------------------------------------------------------- // Registration // --------------------------------------------------------------------------- pub(crate) fn register(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; m.add_class::()?; Ok(()) } // Argv construction and tests live in the top-level `dromedary::ssh` // module so they're unit-testable without needing the Python extension // linker symbols (which block `cargo test` on this `cdylib` crate). dromedary-0.1.5/_transport_rs/src/unlistable.rs000066400000000000000000000060511520150013200216700ustar00rootroot00000000000000use crate::{Transport, TransportDecorator}; use dromedary::pyo3::PyTransport; use pyo3::exceptions::PyValueError; use pyo3::prelude::*; const PREFIX: &str = "unlistable+"; fn resolve_inner(py: Python, url: &str, decorated: Option>) -> PyResult> { if let Some(d) = decorated { return Ok(d); } if !url.starts_with(PREFIX) { return Err(PyValueError::new_err(format!( "url {:?} doesn't start with decorator prefix {:?}", url, PREFIX ))); } let rest = &url[PREFIX.len()..]; let dromedary = py.import("dromedary")?; let urlutils = py.import("dromedary.urlutils")?; let is_url: bool = urlutils.call_method1("is_url", (rest,))?.extract()?; let func = if is_url { dromedary.getattr("get_transport_from_url")? } else { dromedary.getattr("get_transport_from_path")? }; Ok(func.call1((rest,))?.unbind()) } fn wrap_inner(decorated: &Py, py: Python) -> Transport { let py_inner: Box = Box::new(PyTransport::from(decorated.clone_ref(py))); Transport(Box::new(dromedary::unlistable::UnlistableTransport::new( py_inner, ))) } #[pyclass(extends=TransportDecorator, subclass)] pub(crate) struct UnlistableTransportDecorator; #[pymethods] impl UnlistableTransportDecorator { #[new] #[pyo3(signature = (url, _decorated=None, _from_transport=None))] fn new( py: Python, url: &str, _decorated: Option>, _from_transport: Option>, ) -> PyResult> { let _ = _from_transport; let decorated = resolve_inner(py, url, _decorated)?; let wrapped = wrap_inner(&decorated, py); Ok(PyClassInitializer::from(wrapped) .add_subclass(TransportDecorator { decorated, prefix: PREFIX, }) .add_subclass(UnlistableTransportDecorator)) } #[classmethod] fn _get_url_prefix(_cls: &Bound<'_, pyo3::types::PyType>) -> &'static str { PREFIX } #[pyo3(signature = (offset=None))] fn clone<'a>( slf: PyRef<'a, Self>, py: Python<'a>, offset: Option>, ) -> PyResult> { let decorator: &TransportDecorator = slf.as_super(); let decorated = decorator.decorated.clone_ref(py); let decorated_clone = match offset { Some(o) => decorated.call_method1(py, "clone", (o,))?, None => decorated.call_method0(py, "clone")?, }; let wrapped = wrap_inner(&decorated_clone, py); let init = PyClassInitializer::from(wrapped) .add_subclass(TransportDecorator { decorated: decorated_clone, prefix: PREFIX, }) .add_subclass(UnlistableTransportDecorator); Bound::new(py, init) } } pub(crate) fn register(_py: Python, m: &Bound) -> PyResult<()> { m.add_class::()?; Ok(()) } dromedary-0.1.5/_transport_rs/src/urlutils.rs000066400000000000000000000512661520150013200214210ustar00rootroot00000000000000use pyo3::exceptions::PyTypeError; use pyo3::exceptions::PyValueError; use pyo3::import_exception; use pyo3::prelude::*; use pyo3::types::PyTuple; use std::collections::HashMap; use std::path::PathBuf; import_exception!(dromedary.urlutils, InvalidURLJoin); import_exception!(dromedary.urlutils, InvalidURL); import_exception!(dromedary.errors, PathNotChild); #[pyfunction] fn is_url(url: &str) -> bool { dromedary::urlutils::is_url(url) } /// On Windows, `file://` URLs without a drive letter are invalid. The /// `split`/`basename`/`dirname`/`strip_trailing_slash` helpers historically /// raised `InvalidURL` for such URLs (matching breezy's behaviour). The /// underlying Rust `split` is platform-agnostic and just splits at the last /// path separator, so do the validation here in the Python boundary. #[cfg(target_os = "windows")] fn validate_win32_file_url(url: &str) -> PyResult<()> { if !is_win32_drive_letter_url(url) && url.starts_with("file:///") && url.len() > "file:///".len() { return Err(InvalidURL::new_err(( "Invalid Win32 local URL".to_string(), url.to_string(), ))); } Ok(()) } #[cfg(not(target_os = "windows"))] #[inline] fn validate_win32_file_url(_url: &str) -> PyResult<()> { Ok(()) } /// Whether `url` is a `file:///:|...` or `file:///|...` URL. #[cfg(target_os = "windows")] fn is_win32_drive_letter_url(url: &str) -> bool { let Some(rest) = url.strip_prefix("file:///") else { return false; }; let mut chars = rest.chars(); matches!(chars.next(), Some(c) if c.is_ascii_alphabetic()) && matches!(chars.next(), Some(':') | Some('|')) } #[pyfunction] #[pyo3(signature = (url, exclude_trailing_slash = true))] fn split(url: &str, exclude_trailing_slash: Option) -> PyResult<(String, String)> { validate_win32_file_url(url)?; Ok(split_impl(url, exclude_trailing_slash.unwrap_or(true))) } #[cfg(not(target_os = "windows"))] #[inline] fn split_impl(url: &str, exclude_trailing_slash: bool) -> (String, String) { dromedary::urlutils::split(url, exclude_trailing_slash) } /// On Windows, drive-letter local URLs split as `file:///C:` + `/path`. The /// generic split treats the URL as `file://` + `/C:/path` and produces the /// wrong head/tail; mirror breezy's `_win32_split` here. #[cfg(target_os = "windows")] fn split_impl(url: &str, exclude_trailing_slash: bool) -> (String, String) { if is_win32_drive_letter_url(url) { let url_base = "file://"; // SAFETY: validated above that the URL has shape `file:///<:|>...` let path = &url["file://".len()..]; // starts with `/<:|>...` if let Ok((url_base, path)) = dromedary::urlutils::win32::extract_drive_letter(url_base, path) { // `path` is now the part after the drive (e.g. "/foo/bar"). return split_after_drive(&url_base, &path, exclude_trailing_slash); } } dromedary::urlutils::split(url, exclude_trailing_slash) } #[cfg(target_os = "windows")] fn split_after_drive(url_base: &str, path: &str, exclude_trailing_slash: bool) -> (String, String) { let mut p = path; if exclude_trailing_slash && p.len() > 1 && p.ends_with('/') { p = &p[..p.len() - 1]; } match p.rsplit_once('/') { None => (url_base.to_string(), p.to_string()), Some((head, tail)) => { let head = if head.is_empty() { "/" } else { head }; (url_base.to_string() + head, tail.to_string()) } } } #[pyfunction] fn _find_scheme_and_separator(url: &str) -> (Option, Option) { dromedary::urlutils::find_scheme_and_separator(url) } #[pyfunction] fn strip_trailing_slash(url: &str) -> PyResult { validate_win32_file_url(url)?; #[cfg(target_os = "windows")] { if url.ends_with('/') && is_win32_drive_letter_url(url) { // Drive-letter local URL: preserve `file:///C:/` as-is and // strip exactly one trailing slash from anything longer. return Ok(dromedary::urlutils::win32::strip_local_trailing_slash(url)); } } Ok(dromedary::urlutils::strip_trailing_slash(url).to_string()) } #[pyfunction] #[pyo3(signature = (url, exclude_trailing_slash = true))] fn dirname(url: &str, exclude_trailing_slash: Option) -> PyResult { validate_win32_file_url(url)?; Ok(split_impl(url, exclude_trailing_slash.unwrap_or(true)).0) } #[pyfunction] #[pyo3(signature = (url, exclude_trailing_slash = true))] fn basename(url: &str, exclude_trailing_slash: Option) -> PyResult { validate_win32_file_url(url)?; Ok(split_impl(url, exclude_trailing_slash.unwrap_or(true)).1) } fn map_urlutils_error_to_pyerr(e: dromedary::urlutils::Error) -> PyErr { match e { dromedary::urlutils::Error::AboveRoot(base, path) => { InvalidURLJoin::new_err(("Above root", base, path)) } dromedary::urlutils::Error::SubsegmentMissesEquals(segment) => { InvalidURL::new_err(("Subsegment misses equals", segment)) } dromedary::urlutils::Error::UnsafeCharacters(c) => { InvalidURL::new_err(("Unsafe characters", c)) } dromedary::urlutils::Error::IoError(err) => err.into(), dromedary::urlutils::Error::SegmentParameterKeyContainsEquals(url, segment) => { InvalidURLJoin::new_err(("Segment parameter contains equals (=)", url, segment)) } dromedary::urlutils::Error::SegmentParameterContainsComma(url, segments) => { InvalidURLJoin::new_err(("Segment parameter contains comma (,)", url, segments)) } dromedary::urlutils::Error::NotLocalUrl(url) => { InvalidURL::new_err(("Not a local url", url)) } dromedary::urlutils::Error::UrlNotAscii(url) => InvalidURL::new_err(("URL not ascii", url)), dromedary::urlutils::Error::InvalidUNCUrl(url) => { InvalidURL::new_err(("Invalid UNC URL", url)) } dromedary::urlutils::Error::InvalidWin32LocalUrl(url) => { InvalidURL::new_err(("Invalid Win32 local URL", url)) } dromedary::urlutils::Error::InvalidWin32Path(path) => { InvalidURL::new_err(("Invalid Win32 path", path)) } dromedary::urlutils::Error::PathNotChild(path, start) => { PathNotChild::new_err((path, start)) } dromedary::urlutils::Error::UrlTooShort(url) => { PyValueError::new_err(("URL too short", url)) } dromedary::urlutils::Error::InvalidUrlPort(url, port_str) => { InvalidURL::new_err((format!("invalid port number {port_str} in url:\n{url}"),)) } } } #[pyfunction(signature = (url, *args))] fn joinpath(url: &str, args: &Bound) -> PyResult { let mut path = Vec::new(); for arg in args.iter() { if let Ok(arg) = arg.extract::() { path.push(arg); } else { return Err(PyTypeError::new_err( "path must be a string or a list of strings", )); } } let path_ref = path.iter().map(|s| s.as_str()).collect::>(); dromedary::urlutils::joinpath(url, path_ref.as_slice()).map_err(map_urlutils_error_to_pyerr) } #[pyfunction(signature = (url, *args))] fn join(url: &str, args: &Bound) -> PyResult { let mut path = Vec::new(); for arg in args.iter() { if let Ok(arg) = arg.extract::() { path.push(arg); } else { return Err(PyTypeError::new_err( "path must be a string or a list of strings", )); } } let path_ref = path.iter().map(|s| s.as_str()).collect::>(); dromedary::urlutils::join(url, path_ref.as_slice()).map_err(map_urlutils_error_to_pyerr) } #[pyfunction] fn split_segment_parameters(url: &str) -> PyResult<(&str, HashMap<&str, &str>)> { dromedary::urlutils::split_segment_parameters(url).map_err(map_urlutils_error_to_pyerr) } #[pyfunction] fn split_segment_parameters_raw(url: &str) -> (&str, Vec<&str>) { dromedary::urlutils::split_segment_parameters_raw(url) } #[pyfunction] fn strip_segment_parameters(url: &str) -> &str { dromedary::urlutils::strip_segment_parameters(url) } #[pyfunction] fn relative_url(base: &str, url: &str) -> String { dromedary::urlutils::relative_url(base, url) } #[pyfunction] fn combine_paths(base_path: &str, relpath: &str) -> String { dromedary::urlutils::combine_paths(base_path, relpath) } #[pyfunction] #[pyo3(signature = (text, safe = None))] fn escape(py: Python, text: Py, safe: Option<&str>) -> PyResult { if let Ok(text) = text.extract::(py) { Ok(dromedary::urlutils::escape(text.as_bytes(), safe)) } else if let Ok(text) = text.extract::>(py) { Ok(dromedary::urlutils::escape(text.as_slice(), safe)) } else { Err(PyTypeError::new_err("text must be a string or bytes")) } } #[pyfunction] fn normalize_url(url: &str) -> PyResult { dromedary::urlutils::normalize_url(url).map_err(map_urlutils_error_to_pyerr) } #[pyfunction] fn local_path_to_url(path: PathBuf) -> PyResult { dromedary::urlutils::local_path_to_url(path.as_path()).map_err(|e| e.into()) } #[pyfunction(name = "local_path_to_url")] fn win32_local_path_to_url(path: PathBuf) -> PyResult { dromedary::urlutils::win32::local_path_to_url(path).map_err(|e| e.into()) } #[pyfunction(name = "local_path_to_url")] fn posix_local_path_to_url(path: &str) -> PyResult { dromedary::urlutils::posix::local_path_to_url(path).map_err(|e| e.into()) } #[pyfunction(signature = (url, *args))] fn join_segment_parameters_raw(url: &str, args: &Bound) -> PyResult { let mut path = Vec::new(); for arg in args.iter() { if let Ok(arg) = arg.extract::() { path.push(arg); } else { return Err(PyTypeError::new_err( "path must be a string or a list of strings", )); } } let path_ref = path.iter().map(|s| s.as_str()).collect::>(); dromedary::urlutils::join_segment_parameters_raw(url, path_ref.as_slice()) .map_err(map_urlutils_error_to_pyerr) } #[pyfunction] fn join_segment_parameters(url: &str, parameters: HashMap) -> PyResult { let parameters = parameters .iter() .map(|(k, v)| (k.as_str(), v.as_str())) .collect(); dromedary::urlutils::join_segment_parameters(url, ¶meters) .map_err(map_urlutils_error_to_pyerr) } #[pyfunction] fn local_path_from_url(url: &str) -> PyResult { let path = dromedary::urlutils::local_path_from_url(url).map_err(map_urlutils_error_to_pyerr)?; match path.to_str() { Some(path) => Ok(path.to_string()), None => Err(PyValueError::new_err("Path is not valid UTF-8")), } } #[pyfunction(name = "local_path_from_url")] fn win32_local_path_from_url(url: &str) -> PyResult { let path = dromedary::urlutils::win32::local_path_from_url(url) .map_err(map_urlutils_error_to_pyerr)?; match path.to_str() { Some(path) => Ok(path.to_string()), None => Err(PyValueError::new_err("Path is not valid UTF-8")), } } /// On win32 the drive letter needs to be added to the url base. #[pyfunction(name = "extract_drive_letter")] fn win32_extract_drive_letter(url_base: &str, path: &str) -> PyResult<(String, String)> { dromedary::urlutils::win32::extract_drive_letter(url_base, path) .map_err(map_urlutils_error_to_pyerr) } #[pyfunction(name = "strip_local_trailing_slash")] fn win32_strip_local_trailing_slash(url: &str) -> String { dromedary::urlutils::win32::strip_local_trailing_slash(url) } #[pyfunction(name = "local_path_from_url")] fn posix_local_path_from_url(url: &str) -> PyResult { let path = dromedary::urlutils::posix::local_path_from_url(url) .map_err(map_urlutils_error_to_pyerr)?; match path.to_str() { Some(path) => Ok(path.to_string()), None => Err(PyValueError::new_err("Path is not valid UTF-8")), } } #[pyfunction] fn unescape(text: &str) -> PyResult { dromedary::urlutils::unescape(text).map_err(map_urlutils_error_to_pyerr) } #[pyfunction] fn derive_to_location(base: &str) -> String { dromedary::urlutils::derive_to_location(base) } #[pyfunction] fn file_relpath(base: &str, path: &str) -> PyResult { dromedary::urlutils::file_relpath(base, path).map_err(map_urlutils_error_to_pyerr) } /// Permissive percent-decode mirroring Python's urllib.parse.unquote: /// non-ASCII or undecodable input is returned unchanged. fn unquote_lossy(s: &str) -> String { dromedary::urlutils::unescape(s).unwrap_or_else(|_| s.to_string()) } /// Rust port of dromedary.urlutils.URL — a parsed URL with both /// quoted and unquoted forms of each component. Attributes are mutable /// to match the historical Python behaviour. #[pyclass(name = "URL", subclass, skip_from_py_object)] #[derive(Clone)] pub(crate) struct UrlObject { #[pyo3(get, set)] scheme: String, #[pyo3(get, set)] quoted_user: Option, #[pyo3(get, set)] user: Option, #[pyo3(get, set)] quoted_password: Option, #[pyo3(get, set)] password: Option, #[pyo3(get, set)] quoted_host: String, #[pyo3(get, set)] host: String, #[pyo3(get, set)] port: Option, #[pyo3(get, set)] quoted_path: String, #[pyo3(get, set)] path: String, } #[pymethods] impl UrlObject { #[new] fn new( scheme: String, quoted_user: Option, quoted_password: Option, quoted_host: String, port: Option, quoted_path: String, ) -> Self { let host = unquote_lossy("ed_host); let user = quoted_user.as_deref().map(unquote_lossy); let password = quoted_password.as_deref().map(unquote_lossy); let normalized_path = dromedary::urlutils::normalize_quoted_path("ed_path); let path = unquote_lossy(&normalized_path); UrlObject { scheme, quoted_user, user, quoted_password, password, quoted_host, host, port, quoted_path: normalized_path, path, } } #[classmethod] fn from_string(_cls: &Bound, url: &str) -> PyResult { let parsed = dromedary::urlutils::parse_url(url).map_err(map_urlutils_error_to_pyerr)?; Ok(UrlObject::new( parsed.scheme, parsed.quoted_user, parsed.quoted_password, parsed.quoted_host, parsed.port, parsed.quoted_path, )) } fn __eq__(&self, other: &Bound) -> bool { // Match Python: compare scheme/host/user/password/path. Port is // intentionally not compared (preserved from the original impl). match other.extract::>() { Ok(o) => { self.scheme == o.scheme && self.host == o.host && self.user == o.user && self.password == o.password && self.path == o.path } Err(_) => false, } } fn __repr__(&self) -> String { // fn opt_repr(v: &Option) -> String { match v { Some(s) => format!("'{}'", s), None => "None".to_string(), } } let port_repr = match self.port { Some(p) => p.to_string(), None => "None".to_string(), }; format!( "", self.scheme, opt_repr(&self.quoted_user), opt_repr(&self.quoted_password), self.quoted_host, port_repr, self.quoted_path, ) } fn __str__(&self) -> String { // Bracket the host if it looks like an IPv6 literal. let mut netloc = if self.quoted_host.contains(':') { format!("[{}]", self.quoted_host) } else { self.quoted_host.clone() }; if let Some(user) = &self.quoted_user { // Password is intentionally omitted to avoid accidental exposure. netloc = format!("{}@{}", user, netloc); } if let Some(port) = self.port { netloc = format!("{}:{}", netloc, port); } let sep = if self.quoted_path.starts_with('/') || self.quoted_path.is_empty() { "" } else { "/" }; format!("{}://{}{}{}", self.scheme, netloc, sep, self.quoted_path) } #[pyo3(signature = (offset = None))] fn clone(&self, offset: Option<&str>) -> PyResult { let path = match offset { Some(off) => { // offset must already be url-encoded. Non-ASCII input means // the caller forgot to escape — surface that as InvalidURL // rather than silently passing the raw bytes through. let relative = dromedary::urlutils::unescape(off) .map_err(|_| InvalidURL::new_err((off.to_string(),)))?; let combined = dromedary::urlutils::combine_paths(&self.path, &relative); dromedary::urlutils::escape(combined.as_bytes(), Some("/~")) } None => self.quoted_path.clone(), }; Ok(UrlObject::new( self.scheme.clone(), self.quoted_user.clone(), self.quoted_password.clone(), self.quoted_host.clone(), self.port, path, )) } } /// (scheme, user, password, host, port, path) — all unquoted. type ParsedUrlTuple = ( String, Option, Option, String, Option, String, ); #[pyfunction] fn parse_url(url: &str) -> PyResult { let p = dromedary::urlutils::parse_url(url).map_err(map_urlutils_error_to_pyerr)?; Ok(( p.scheme, p.quoted_user.as_deref().map(unquote_lossy), p.quoted_password.as_deref().map(unquote_lossy), unquote_lossy(&p.quoted_host), p.port, unquote_lossy(&p.quoted_path), )) } #[pymodule] pub fn _urlutils_rs(py: Python, m: &Bound) -> PyResult<()> { m.add_function(wrap_pyfunction!(is_url, m)?)?; m.add_function(wrap_pyfunction!(split, m)?)?; m.add_function(wrap_pyfunction!(_find_scheme_and_separator, m)?)?; m.add_function(wrap_pyfunction!(strip_trailing_slash, m)?)?; m.add_function(wrap_pyfunction!(dirname, m)?)?; m.add_function(wrap_pyfunction!(basename, m)?)?; m.add_function(wrap_pyfunction!(joinpath, m)?)?; m.add_function(wrap_pyfunction!(join, m)?)?; m.add_function(wrap_pyfunction!(split_segment_parameters, m)?)?; m.add_function(wrap_pyfunction!(split_segment_parameters_raw, m)?)?; m.add_function(wrap_pyfunction!(strip_segment_parameters, m)?)?; m.add_function(wrap_pyfunction!(join_segment_parameters_raw, m)?)?; m.add_function(wrap_pyfunction!(join_segment_parameters, m)?)?; m.add_function(wrap_pyfunction!(relative_url, m)?)?; m.add_function(wrap_pyfunction!(combine_paths, m)?)?; m.add_function(wrap_pyfunction!(escape, m)?)?; m.add_function(wrap_pyfunction!(normalize_url, m)?)?; m.add_function(wrap_pyfunction!(local_path_to_url, m)?)?; m.add_function(wrap_pyfunction!(local_path_from_url, m)?)?; m.add_function(wrap_pyfunction!(unescape, m)?)?; m.add_function(wrap_pyfunction!(derive_to_location, m)?)?; m.add_function(wrap_pyfunction!(file_relpath, m)?)?; m.add_function(wrap_pyfunction!(parse_url, m)?)?; m.add_class::()?; let win32m = PyModule::new(py, "win32")?; win32m.add_function(wrap_pyfunction!(win32_local_path_to_url, &win32m)?)?; win32m.add_function(wrap_pyfunction!(win32_local_path_from_url, &win32m)?)?; win32m.add_function(wrap_pyfunction!(win32_extract_drive_letter, &win32m)?)?; win32m.add_function(wrap_pyfunction!(win32_strip_local_trailing_slash, &win32m)?)?; m.add_submodule(&win32m)?; let posixm = PyModule::new(py, "posix")?; posixm.add_function(wrap_pyfunction!(posix_local_path_to_url, &posixm)?)?; posixm.add_function(wrap_pyfunction!(posix_local_path_from_url, &posixm)?)?; m.add_submodule(&posixm)?; // PyO3 submodule hack for proper import support let sys = py.import("sys")?; let modules = sys.getattr("modules")?; let module_name = m.name()?; // Register submodules in sys.modules for dotted import support modules.set_item(format!("{}.win32", module_name), &win32m)?; modules.set_item(format!("{}.posix", module_name), &posixm)?; Ok(()) } dromedary-0.1.5/_transport_rs/src/webdav/000077500000000000000000000000001520150013200204265ustar00rootroot00000000000000dromedary-0.1.5/_transport_rs/src/webdav/mod.rs000066400000000000000000000010051520150013200215470ustar00rootroot00000000000000//! Python bindings for `dromedary::webdav`. //! //! Feature-gated behind `webdav`. Exposes //! `dromedary._transport_rs.webdav.HttpDavTransport` as a pyclass //! that extends `_transport_rs.http.HttpTransport`, so the Python //! `dromedary.webdav.webdav.HttpDavTransport` becomes a thin //! subclass just like the HTTP transport did in Stage 10. pub mod transport; use pyo3::prelude::*; pub(crate) fn register(m: &Bound) -> PyResult<()> { m.add_class::()?; Ok(()) } dromedary-0.1.5/_transport_rs/src/webdav/transport.rs000066400000000000000000000316151520150013200230360ustar00rootroot00000000000000//! Python bindings for `dromedary::webdav::HttpDavTransport`. //! //! Exposes `dromedary._transport_rs.webdav.HttpDavTransport` as a //! pyclass that extends `_transport_rs.http.HttpTransport`. The //! Python `dromedary.webdav.webdav.HttpDavTransport` becomes a //! thin subclass over this, same pattern as HTTP. //! //! Inheritance rationale: WebDAV is "HTTP plus write verbs". By //! extending the HttpTransport pyclass we inherit all its Python //! methods (`request`, `_post`, `_head`, `_range_hint`, etc.) //! unchanged, and only have to implement the DAV-specific verbs. //! The HttpTransport parent holds the underlying HttpTransport //! pointer in its `inner` field; the DAV subclass holds the full //! HttpDavTransport, and the two share the same underlying //! `HttpClient` through `Arc` clones — so auth cache, connection //! pool, and range-hint state are consistent across both layers. use std::path::PathBuf; use std::sync::Arc; use std::time::Duration; use dromedary::http::client::{HttpClientConfig, NegotiateProvider}; use dromedary::http::HttpClient; use dromedary::webdav::HttpDavTransport as RsHttpDavTransport; use pyo3::prelude::*; use pyo3::types::PyBytes; use crate::http::client::{PythonCredentialProvider, PythonNegotiateProvider}; use crate::http::transport::{ http_transport_initializer_with_base, HttpTransport as PyHttpTransport, }; use crate::map_transport_err_to_py_err; /// Python-bound Rust WebDAV transport. /// /// Constructor mirrors the HTTP pyclass: `HttpDavTransport(base, /// ca_certs=None, disable_verification=False, user_agent=None, /// read_timeout_ms=0)`. The `+webdav` / `+urllib` scheme suffix is /// accepted and stripped. #[pyclass( extends=PyHttpTransport, subclass, module = "dromedary._transport_rs.webdav", )] pub(crate) struct HttpDavTransport { inner: Arc, } #[pymethods] impl HttpDavTransport { #[new] #[pyo3(signature = ( base, ca_certs=None, disable_verification=false, user_agent=None, read_timeout_ms=0, ))] fn new( base: &str, ca_certs: Option, disable_verification: bool, user_agent: Option, read_timeout_ms: i64, ) -> PyResult> { let timeout = if read_timeout_ms > 0 { Some(Duration::from_millis(read_timeout_ms as u64)) } else { None }; let cfg = HttpClientConfig { ca_certs_path: ca_certs, disable_verification, user_agent, read_timeout: timeout, }; let mut client = HttpClient::with_providers( cfg, Box::new(PythonCredentialProvider), Box::new(PythonNegotiateProvider) as Box, ) .map_err(|e| { map_transport_err_to_py_err( dromedary::Error::Io(std::io::Error::other(format!("{}", e))), None, Some(base), ) })?; client.set_auth_trace(Some(std::sync::Arc::new(|header: &str| { crate::http::invoke_auth_header_trace(header); }))); let rust = RsHttpDavTransport::new(base, Arc::new(client)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(base)))?; Ok(dav_transport_initializer(Arc::new(rust))) } /// Clone this transport at an optional offset, sharing the /// underlying HttpClient. Returns an instance of the Rust-level /// HttpDavTransport pyclass; Python subclasses override /// `clone()` to rebrand via `_clone_from` (the /// HttpTransport-inherited helper). #[pyo3(signature = (offset=None))] fn clone<'a>( slf: PyRef<'a, Self>, py: Python<'a>, offset: Option<&str>, ) -> PyResult> { let cloned = slf .inner .clone_concrete(offset) .map_err(|e| map_transport_err_to_py_err(e, None, offset))?; Bound::new(py, dav_transport_initializer(Arc::new(cloned))) } /// Replace this transport's inner state with a clone of /// `source`. Mirrors `HttpTransport._clone_from` /// but cascades down to refresh the DAV layer too, so the /// Python subclass's `clone()` preserves subclass identity /// while still sharing the HttpClient across siblings. fn _clone_from( mut slf: PyRefMut, py: Python, source: PyRef, offset: Option<&str>, ) -> PyResult<()> { // When no offset is supplied, share the source's inner Arc // directly — clone_concrete's raw_base/segment-parameter // stripping (matching ConnectedTransport.clone semantics) // is wrong for the ``__init__``-time TLS-config rebuild // path that calls this helper with offset=None. let new_inner = match offset { None => source.inner.clone(), Some(_) => { let cloned = source .inner .clone_concrete(offset) .map_err(|e| map_transport_err_to_py_err(e, None, offset))?; Arc::new(cloned) } }; // Refresh every layer's dyn-Transport pointer so calls // through each inheritance level see the cloned state. let dav_box: Box = Box::new(Clone::clone(&*new_inner)); let http_layer = slf.as_super(); // Update the HttpTransport parent's own HTTP-transport // pointer so inherited methods (`request`, `_post`, ...) see // the grafted state. http_layer.inner = Arc::new(new_inner.http().clone()); let connected_layer = http_layer.as_super(); connected_layer.as_super().0 = dav_box; slf.inner = new_inner; let _ = py; Ok(()) } /// Eagerly-drained GET — returns the full response body as a /// bytes object. Used by the Python subclass's `get()` override /// (the DAV transport uses the inherited `request()` machinery /// and wants the body materialised in one shot). fn _get_bytes<'py>(&self, py: Python<'py>, relpath: &str) -> PyResult> { let buf = py .detach(|| -> Result, dromedary::Error> { use dromedary::Transport as _; let mut rf = self.inner.get(relpath)?; let mut buf = Vec::new(); std::io::Read::read_to_end(&mut rf, &mut buf).map_err(dromedary::Error::Io)?; Ok(buf) }) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath)))?; Ok(PyBytes::new(py, &buf)) } fn has(&self, py: Python, relpath: &str) -> PyResult { use dromedary::Transport as _; py.detach(|| self.inner.has(relpath)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath))) } fn mkdir(&self, py: Python, relpath: &str) -> PyResult<()> { use dromedary::Transport as _; py.detach(|| self.inner.mkdir(relpath, None)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath))) } fn rmdir(&self, py: Python, relpath: &str) -> PyResult<()> { use dromedary::Transport as _; py.detach(|| self.inner.rmdir(relpath)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath))) } fn rename(&self, py: Python, rel_from: &str, rel_to: &str) -> PyResult<()> { use dromedary::Transport as _; py.detach(|| self.inner.rename(rel_from, rel_to)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(rel_from))) } fn delete(&self, py: Python, relpath: &str) -> PyResult<()> { use dromedary::Transport as _; py.detach(|| self.inner.delete(relpath)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath))) } fn copy(&self, py: Python, rel_from: &str, rel_to: &str) -> PyResult<()> { use dromedary::Transport as _; py.detach(|| self.inner.copy(rel_from, rel_to)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(rel_from))) } /// PUT with the atomic temp+MOVE dance; returns `len(bytes)`. fn put_bytes(&self, py: Python, relpath: &str, bytes: &[u8]) -> PyResult { use dromedary::Transport as _; py.detach(|| self.inner.put_bytes(relpath, bytes, None)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath)))?; Ok(bytes.len() as u64) } /// Non-atomic PUT (bare, no temp-file dance). `create_parent_dir` /// causes a 404/403/409 to retry after creating the missing /// parent directory via MKCOL. #[pyo3(signature = (relpath, bytes, create_parent_dir=false))] fn put_bytes_non_atomic( &self, py: Python, relpath: &str, bytes: &[u8], create_parent_dir: bool, ) -> PyResult<()> { use dromedary::Transport as _; py.detach(|| { self.inner .put_bytes_non_atomic(relpath, bytes, None, Some(create_parent_dir), None) }) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath))) } /// Append `bytes` to the file at `relpath`, returning the file /// size before the append. Picks between HEAD+ranged-PUT and /// GET+modify+PUT based on the inherited range-hint state. The /// ``mode`` argument is accepted for Transport-API parity — the /// DAV backend has no way to set file modes server-side, so it's /// ignored. #[pyo3(signature = (relpath, bytes, mode=None))] fn append_bytes( &self, py: Python, relpath: &str, bytes: &[u8], mode: Option>, ) -> PyResult { use dromedary::Transport as _; let _ = mode; py.detach(|| self.inner.append_bytes(relpath, bytes, None)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath))) } /// `stat(relpath)` — returns a Python object with `st_size` and /// `st_mode` attributes, matching the shape the Python /// `_DAVStat` had. Directories get `st_size=0` and a dir mode /// (040644); regular files get their PROPFIND-reported size /// and 100644 or 100755 depending on the `executable` flag. fn stat<'py>(&self, py: Python<'py>, relpath: &str) -> PyResult> { use dromedary::Transport as _; let stat = py .detach(|| self.inner.stat(relpath)) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath)))?; let dict = pyo3::types::PyDict::new(py); dict.set_item("st_size", stat.size)?; #[cfg(unix)] dict.set_item("st_mode", stat.mode)?; // Return a SimpleNamespace so callers use dotted access // (bzr reads `st.st_mode`, not `st['st_mode']`). let types = py.import("types")?; let ns = types.getattr("SimpleNamespace")?.call((), Some(&dict))?; Ok(ns) } /// List immediate children of `relpath`. Names are relative to /// `relpath` (with any trailing slash stripped). Errors from /// PROPFIND surface as Python exceptions on the caller's first /// iteration; we materialise the list eagerly so `list_dir()` /// behaves like a Python list rather than a lazy iterator. fn list_dir(&self, py: Python, relpath: &str) -> PyResult> { use dromedary::Transport as _; py.detach(|| -> Result, dromedary::Error> { let iter = self.inner.list_dir(relpath); iter.collect::, _>>() }) .map_err(|e| map_transport_err_to_py_err(e, None, Some(relpath))) } fn iter_files_recursive(&self, py: Python) -> PyResult> { use dromedary::Transport as _; py.detach(|| -> Result, dromedary::Error> { let iter = self.inner.iter_files_recursive(); iter.collect::, _>>() }) .map_err(|e| map_transport_err_to_py_err(e, None, None)) } } /// Build the four-layer `Transport → ConnectedTransport → /// HttpTransport → HttpDavTransport` initializer. /// /// The `dyn Transport` installed at the base points at the *DAV* /// transport, not the HTTP one it wraps. That matters because the /// `Transport` pyclass's inherited Python helpers (notably `move`, /// `copy_tree`, `copy_to`) dispatch to `self.0.r#move` etc. — /// `self.0` being the base `Box`. If that dyn /// pointed at the HTTP layer, those helpers would call the HTTP /// `stat` (which returns `TransportNotPossible`) and fail. Pointing /// at the DAV layer routes them through PROPFIND-backed stat and /// the native WebDAV MOVE verb. fn dav_transport_initializer( inner: Arc, ) -> PyClassInitializer { let http_inner = Arc::new(inner.http().clone()); let dav_box: Box = Box::new(Clone::clone(&*inner)); http_transport_initializer_with_base(http_inner, dav_box) .add_subclass(HttpDavTransport { inner }) } dromedary-0.1.5/dromedary/000077500000000000000000000000001520150013200154565ustar00rootroot00000000000000dromedary-0.1.5/dromedary/__init__.py000066400000000000000000002073621520150013200176010ustar00rootroot00000000000000# Copyright (C) 2005-2012, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Transport is an abstraction layer to handle file access. The abstraction is to allow access from the local filesystem, as well as remote (such as http or sftp). Transports are constructed from a string, being a URL or (as a degenerate case) a local filesystem path. This is typically the top directory of a bzrdir, repository, or similar object we are interested in working with. The Transport returned has methods to read, write and manipulate files within it. """ import contextlib import errno import logging import os import sys from collections.abc import Callable, Iterable, Iterator from io import BytesIO from stat import S_ISDIR from typing import IO, Any, Generic, Literal, Protocol, TypeVar, Union from catalogus import registry from . import _hooks, _ui, errors, osutils, urlutils # Set up logging logger = logging.getLogger("dromedary") T = TypeVar("T") ConnT = TypeVar("ConnT") CredT = TypeVar("CredT") class ProgressBar(Protocol): """Minimal interface for a progress bar.""" def update(self, msg: str, current: int, total: int) -> None: """Update progress display.""" ... class Lock(Protocol): """Minimal interface for a transport lock.""" def unlock(self) -> None: """Release the lock.""" ... from . import _transport_rs # TODO: unify into a single base class once _transport_rs.Transport replaces # the pure-Python Transport. AnyTransport = Union["Transport", "_transport_rs.Transport"] # a dictionary of open file streams. Keys are absolute paths, values are # transport defined. _file_streams: dict[str, Any] = {} def _get_protocol_handlers() -> "TransportListRegistry": """Return a dictionary of {urlprefix: [factory]}.""" return transport_list_registry def _set_protocol_handlers(new_handlers: "TransportListRegistry") -> None: """Replace the current protocol handlers dictionary. WARNING this will remove all build in protocols. Use with care. """ global transport_list_registry transport_list_registry = new_handlers def _clear_protocol_handlers() -> None: global transport_list_registry transport_list_registry = TransportListRegistry() def _get_transport_modules() -> list[str]: """Return a list of the modules providing transports.""" modules = set() for _prefix, factory_list in transport_list_registry.items(): for factory in factory_list: modules.add(factory.get_module()) # Add chroot and pathfilter directly, because there is no handler # registered for it. modules.add("dromedary.chroot") modules.add("dromedary.pathfilter") result = sorted(modules) return result class TransportListRegistry(registry.Registry): """A registry which simplifies tracking available Transports. A registration of a new protocol requires two steps: 1) register the prefix with the function register_transport( ) 2) register the protocol provider with the function register_transport_provider( ) ( and the "lazy" variant ) This is needed because: a) a single provider can support multiple protocols (like the ftp provider which supports both the ftp:// and the aftp:// protocols) b) a single protocol can have multiple providers (like the http:// protocol which was supported by both the urllib and pycurl providers) """ def register_transport_provider( self, key: str, obj: type | Callable[[str], "Transport"] ) -> None: """Register a transport provider object for a protocol. Args: key: Protocol prefix (e.g., 'http://'). obj: Transport class or factory object. """ self.get(key).insert(0, registry._ObjectGetter(obj)) def register_lazy_transport_provider( self, key: str, module_name: str, member_name: str ) -> None: """Register a transport provider with lazy loading. Args: key: Protocol prefix (e.g., 'http://'). module_name: Name of the module containing the transport class. member_name: Name of the transport class within the module. """ self.get(key).insert(0, registry._LazyObjectGetter(module_name, member_name)) def register_transport(self, key: str, help: str | None = None) -> None: """Register a transport protocol. Args: key: Protocol prefix (e.g., 'http://'). help: Optional help text describing the transport. """ self.register(key, [], help) transport_list_registry = TransportListRegistry() def register_transport_proto( prefix: str, help: str | None = None, info: str | None = None, register_netloc: bool = False, ) -> None: """Register a transport protocol prefix. Args: prefix: Protocol prefix (e.g., 'http://'). help: Optional help text. info: Additional protocol information (unused). register_netloc: Whether to register for URL parsing with netloc. """ transport_list_registry.register_transport(prefix, help) if register_netloc: if not prefix.endswith("://"): raise ValueError(prefix) register_urlparse_netloc_protocol(prefix[:-3]) def register_lazy_transport(prefix: str, module: str, classname: str) -> None: """Register a transport with lazy class loading. Args: prefix: Protocol prefix (e.g., 'http://'). module: Module name containing the transport class. classname: Name of the transport class. """ if prefix not in transport_list_registry: register_transport_proto(prefix) transport_list_registry.register_lazy_transport_provider(prefix, module, classname) def register_transport(prefix: str, klass: type | Callable[[str], "Transport"]) -> None: """Register a transport class for a protocol prefix. Args: prefix: Protocol prefix (e.g., 'http://'). klass: Transport class to register. """ if prefix not in transport_list_registry: register_transport_proto(prefix) transport_list_registry.register_transport_provider(prefix, klass) def register_urlparse_netloc_protocol(protocol: str) -> None: """Ensure that protocol is setup to be used with urlparse netloc parsing.""" if protocol not in urlutils.urlparse.uses_netloc: urlutils.urlparse.uses_netloc.append(protocol) def _unregister_urlparse_netloc_protocol(protocol: str) -> None: """Remove protocol from urlparse netloc parsing. Except for tests, you should never use that function. Using it with 'http', for example, will break all http transports. """ if protocol in urlutils.urlparse.uses_netloc: urlutils.urlparse.uses_netloc.remove(protocol) def unregister_transport( scheme: str, factory: type | Callable[[str], "Transport"] ) -> None: """Unregister a transport.""" l = transport_list_registry.get(scheme) for i in l: o = i.get_obj() if o == factory: transport_list_registry.get(scheme).remove(i) break if len(l) == 0: transport_list_registry.remove(scheme) class _CoalescedOffset: """A data container for keeping track of coalesced offsets.""" __slots__ = ["length", "ranges", "start"] def __init__(self, start: int, length: int, ranges: list[tuple[int, int]]) -> None: self.start = start self.length = length self.ranges = ranges def __lt__(self, other: "_CoalescedOffset") -> bool: return (self.start, self.length, self.ranges) < ( other.start, other.length, other.ranges, ) def __eq__(self, other: object) -> bool: if not isinstance(other, _CoalescedOffset): return NotImplemented # type: ignore[return-value] return (self.start, self.length, self.ranges) == ( other.start, other.length, other.ranges, ) def __repr__(self) -> str: return "{}({!r}, {!r}, {!r})".format( self.__class__.__name__, self.start, self.length, self.ranges ) class LateReadError: """A helper for transports which pretends to be a readable file. When read() is called, errors.ReadError is raised. """ def __init__(self, path: str) -> None: """Initialize LateReadError. Args: path: Path that will trigger the read error. """ self._path = path def close(self) -> None: """A no-op - do nothing.""" def __enter__(self) -> "LateReadError": """Context manager entry.""" return self def __exit__( self, exc_type: object, exc_val: object, exc_tb: object ) -> Literal[False]: """Context manager exit.""" # If there was an error raised, prefer the original one try: self.close() except BaseException: if exc_type is None: raise return False def _fail(self) -> None: """Raise ReadError.""" raise errors.ReadError(self._path) def __iter__(self) -> Iterator[bytes]: """Iterator protocol - raises ReadError.""" self._fail() return iter([]) def read(self, count: int = -1) -> bytes: """Read method - raises ReadError.""" self._fail() return b"" def readlines(self) -> list[bytes]: """Read lines method - raises ReadError.""" self._fail() return [] class FileStream: """Base class for FileStreams.""" def __init__(self, transport: "AnyTransport", relpath: str) -> None: """Create a FileStream for relpath on transport.""" self.transport = transport self.relpath = relpath def _close(self) -> None: """A hook point for subclasses that need to take action on close.""" def __enter__(self) -> "FileStream": """Context manager entry.""" return self def __exit__( self, exc_type: object, exc_value: object, exc_tb: object ) -> Literal[False]: """Context manager exit.""" self.close() return False def close(self, want_fdatasync: bool = False) -> None: """Close the file stream. Args: want_fdatasync: Whether to force data synchronization. """ if want_fdatasync: with contextlib.suppress(errors.TransportNotPossible): self.fdatasync() self._close() del _file_streams[self.transport.abspath(self.relpath)] def fdatasync(self) -> None: """Force data out to physical disk if possible. :raises errors.TransportNotPossible: If this transport has no way to flush to disk. """ raise errors.TransportNotPossible(f"{self.transport} cannot fdatasync") class FileFileStream(FileStream): """A file stream object returned by open_write_stream. This version uses a file like object to perform writes. """ def __init__( self, transport: "Transport", relpath: str, file_handle: IO[bytes] ) -> None: """Initialize FileFileStream. Args: transport: Transport instance. relpath: Relative path to the file. file_handle: File-like object for operations. """ FileStream.__init__(self, transport, relpath) self.file_handle = file_handle def _close(self) -> None: self.file_handle.close() def fdatasync(self) -> None: """Force data out to physical disk if possible.""" self.file_handle.flush() try: fileno = self.file_handle.fileno() except AttributeError as err: raise errors.TransportNotPossible() from err osutils.fdatasync(fileno) def write(self, data: bytes) -> int: """Write bytes to the file.""" osutils.pump_string_file(data, self.file_handle) return len(data) def flush(self) -> None: """Flush any buffered data.""" self.file_handle.flush() class AppendBasedFileStream(FileStream): """A file stream object returned by open_write_stream. This version uses append on a transport to perform writes. """ def write(self, data: bytes) -> int: """Write bytes by appending to the file.""" self.transport.append_bytes(self.relpath, data) return len(data) def flush(self) -> None: """Flush any buffered data (no-op for append-based streams).""" pass class TransportHooks(_hooks.Hooks): """Mapping of hook names to registered callbacks for transport hooks.""" def __init__(self) -> None: """Initialize TransportHooks.""" super().__init__() self.add_hook( "post_connect", "Called after a new connection is established or a reconnect " "occurs. The sole argument passed is either the connected " "transport or smart medium instance.", (2, 5), ) class Transport: """This class encapsulates methods for retrieving or putting a file from/to a storage location. :ivar base: Base URL for the transport; should always end in a slash. """ # implementations can override this if it is more efficient # for them to combine larger read chunks together _max_readv_combine = 50 # It is better to read this much more data in order, rather # than doing another seek. Even for the local filesystem, # there is a benefit in just reading. # TODO: jam 20060714 Do some real benchmarking to figure out # where the biggest benefit between combining reads and # and seeking is. Consider a runtime auto-tune. _bytes_to_read_before_seek = 0 hooks = TransportHooks() base: str def __init__(self, base: str) -> None: """Initialize a Transport. Args: base: Base URL for the transport; should always end in a slash. """ super().__init__() self.base = base (self._raw_base, self._segment_parameters) = urlutils.split_segment_parameters( base ) def _translate_error( self, e: OSError, path: str, raise_generic: bool = True ) -> None: """Translate an IOError or OSError into an appropriate bzr error. This handles things like ENOENT, ENOTDIR, EEXIST, and EACCESS """ if getattr(e, "errno", None) is not None: if e.errno in (errno.ENOENT, errno.ENOTDIR): raise errors.NoSuchFile(path, extra=e) elif e.errno == errno.EINVAL: logger.debug("EINVAL returned on path %s: %r", path, e) raise errors.NoSuchFile(path, extra=e) # I would rather use errno.EFOO, but there doesn't seem to be # any matching for 267 # This is the error when doing a listdir on a file: # WindowsError: [Errno 267] The directory name is invalid if sys.platform == "win32" and e.errno in (errno.ESRCH, 267): raise errors.NoSuchFile(path, extra=e) if e.errno == errno.EEXIST: raise errors.FileExists(path, extra=e) if e.errno == errno.EACCES: raise errors.PermissionDenied(path, extra=e) if e.errno == errno.ENOTEMPTY: raise errors.DirectoryNotEmpty(path, extra=e) if e.errno == errno.EBUSY: raise errors.ResourceBusy(path, extra=e) if isinstance(e, (NotADirectoryError, FileNotFoundError)): raise errors.NoSuchFile(path, extra=e) if raise_generic: raise errors.TransportError(msg="Transport operation failed", orig_error=e) def clone(self, offset: str | None = None) -> "Transport": """Return a new Transport object, cloned from the current location, using a subdirectory or parent directory. This allows connections to be pooled, rather than a new one needed for each subdir. """ raise NotImplementedError(self.clone) def create_prefix(self, mode: int | None = None) -> None: """Create all the directories leading down to self.base.""" cur_transport = self needed = [cur_transport] # Recurse upwards until we can create a directory successfully while True: new_transport = cur_transport.clone("..") if new_transport.base == cur_transport.base: raise errors.TransportError( f"Failed to create path prefix for {cur_transport.base}." ) try: new_transport.mkdir(".", mode=mode) except errors.NoSuchFile: needed.append(new_transport) cur_transport = new_transport except errors.FileExists: break else: break # Now we only need to create child directories while needed: cur_transport = needed.pop() cur_transport.ensure_base(mode=mode) def ensure_base(self, mode: int | None = None) -> bool: """Ensure that the directory this transport references exists. This will create a directory if it doesn't exist. :return: True if the directory was created, False otherwise. """ # The default implementation just uses "Easier to ask for forgiveness # than permission". We attempt to create the directory, and just # suppress FileExists and PermissionDenied (for Windows) exceptions. try: self.mkdir(".", mode=mode) except (errors.FileExists, errors.PermissionDenied): return False except errors.TransportNotPossible: if self.has("."): return False raise else: return True def external_url(self) -> str: """Return a URL for self that can be given to an external process. There is no guarantee that the URL can be accessed from a different machine - e.g. file:/// urls are only usable on the local machine, sftp:/// urls when the server is only bound to localhost are only usable from localhost etc. NOTE: This method may remove security wrappers (e.g. on chroot transports) and thus should *only* be used when the result will not be used to obtain a new transport within breezy. Ideally chroot transports would know enough to cause the external url to be the exact one used that caused the chrooting in the first place, but that is not currently the case. :return: A URL that can be given to another process. :raises InProcessTransport: If the transport is one that cannot be accessed out of the current process (e.g. a MemoryTransport) then InProcessTransport is raised. """ raise NotImplementedError(self.external_url) def get_segment_parameters(self) -> dict[str, str]: """Return the segment parameters for the top segment of the URL.""" return self._segment_parameters def set_segment_parameter(self, name: str, value: str | None) -> None: """Set a segment parameter. Args: name: Segment parameter name (urlencoded string) value: Segment parameter value (urlencoded string) """ if value is None: with contextlib.suppress(KeyError): del self._segment_parameters[name] else: self._segment_parameters[name] = value self.base = urlutils.join_segment_parameters( self._raw_base, self._segment_parameters ) def _pump(self, from_file: IO[bytes], to_file: IO[bytes]) -> int: """Most children will need to copy from one file-like object or string to another one. This just gives them something easy to call. """ return osutils.pumpfile(from_file, to_file) def _get_total(self, multi: object) -> int | None: """Try to figure out how many entries are in multi, but if not possible, return None. """ try: return len(multi) # type: ignore[arg-type] except TypeError: # We can't tell how many, because relpaths is a generator return None def _report_activity(self, bytes: int, direction: Literal["read", "write"]) -> None: """Notify that this transport has activity. Implementations should call this from all methods that actually do IO. Be careful that it's not called twice, if one method is implemented on top of another. Args: bytes: Number of bytes read or written. direction: 'read' or 'write' or None. """ _ui.report_transport_activity(self, bytes, direction) def _update_pb( self, pb: ProgressBar | None, msg: str, count: int, total: int | None ) -> None: """Update the progress bar based on the current count and total available, total may be None if it was not possible to determine. """ if pb is None: return if total is None: pb.update(msg, count, count + 1) else: pb.update(msg, count, total) def _iterate_over( self, multi: Iterable, func: Callable[..., Any], pb: ProgressBar | None, msg: str, expand: bool = True, ) -> tuple[Any, ...]: """Iterate over all entries in multi, passing them to func, and update the progress bar as you go along. :param expand: If True, the entries will be passed to the function by expanding the tuple. If False, it will be passed as a single parameter. """ total = self._get_total(multi) result = [] for count, entry in enumerate(multi): self._update_pb(pb, msg, count, total) if expand: result.append(func(*entry)) else: result.append(func(entry)) return tuple(result) def abspath(self, relpath: str) -> str: """Return the full url to the given relative path. :param relpath: a string of a relative path """ # XXX: Robert Collins 20051016 - is this really needed in the public # interface ? raise NotImplementedError(self.abspath) def recommended_page_size(self) -> int: """Return the recommended page size for this transport. This is potentially different for every path in a given namespace. For example, local transports might use an operating system call to get the block size for a given path, which can vary due to mount points. :return: The page size in bytes. """ return 4 * 1024 def relpath(self, abspath: str) -> str: """Return the local path portion from a given absolute path. This default implementation is not suitable for filesystems with aliasing, such as that given by symlinks, where a path may not start with our base, but still be a relpath once aliasing is resolved. """ # TODO: This might want to use dromedary.osutils.relpath # but we have to watch out because of the prefix issues if not (abspath == self.base[:-1] or abspath.startswith(self.base)): raise errors.PathNotChild(abspath, self.base) pl = len(self.base) return abspath[pl:].strip("/") def local_abspath(self, relpath: str) -> str: """Return the absolute path on the local filesystem. This function will only be defined for Transports which have a physical local filesystem representation. :raises errors.NotLocalUrl: When no local path representation is available. """ raise errors.NotLocalUrl(self.abspath(relpath)) def has(self, relpath: str) -> bool: """Does the file relpath exist? Note that some transports MAY allow querying on directories, but this is not part of the protocol. In other words, the results of t.has("a_directory_name") are undefined. """ raise NotImplementedError(self.has) def has_any(self, relpaths: Iterable[str]) -> bool: """Return True if any of the paths exist.""" return any(self.has(relpath) for relpath in relpaths) def iter_files_recursive(self) -> Iterator[str]: """Iter the relative paths of files in the transports sub-tree. *NOTE*: This only lists *files*, not subdirectories! As with other listing functions, only some transports implement this,. you may check via listable() to determine if it will. """ raise errors.TransportNotPossible( "This transport has not " "implemented iter_files_recursive " "(but must claim to be listable " "to trigger this error)." ) def get(self, relpath: str) -> IO[bytes]: """Get the file at the given relative path. This may fail in a number of ways: - HTTP servers may return content for a directory. (unexpected content failure) - FTP servers may indicate errors.NoSuchFile for a directory. - SFTP servers may give a file handle for a directory that will fail on read(). For correct use of the interface, be sure to catch PathError when calling it and catch errors.ReadError when reading from the returned object. :param relpath: The relative path to the file :rtype: File-like object. """ raise NotImplementedError(self.get) def get_bytes(self, relpath: str) -> bytes: """Get a raw string of the bytes for a file at the given location. :param relpath: The relative path to the file """ f = self.get(relpath) try: return f.read() finally: f.close() def readv( self, relpath: str, offsets: list[tuple[int, int]], adjust_for_latency: bool = False, upper_limit: int | None = None, ) -> Iterator[tuple[int, bytes]]: """Get parts of the file at the given relative path. Args: relpath: The path to read data from. offsets: A list of (offset, size) tuples. adjust_for_latency: Adjust the requested offsets to accomodate transport latency. This may re-order the offsets, expand them to grab adjacent data when there is likely a high cost to requesting data relative to delivering it. upper_limit: When adjust_for_latency is True setting upper_limit allows the caller to tell the transport about the length of the file, so that requests are not issued for ranges beyond the end of the file. This matters because some servers and/or transports error in such a case rather than just satisfying the available ranges. upper_limit should always be provided when adjust_for_latency is True, and should be the size of the file in bytes. Returns: A list or generator of (offset, data) tuples """ if adjust_for_latency: # Design note: We may wish to have different algorithms for the # expansion of the offsets per-transport. E.g. for local disk to # use page-aligned expansion. If that is the case consider the # following structure: # - a test that transport.readv uses self._offset_expander or some # similar attribute, to do the expansion # - a test for each transport that it has some known-good offset # expander # - unit tests for each offset expander # - a set of tests for the offset expander interface, giving # baseline behaviour (which the current transport # adjust_for_latency tests could be repurposed to). offsets = self._sort_expand_and_combine(offsets, upper_limit) return self._readv(relpath, offsets) def _readv( self, relpath: str, offsets: list[tuple[int, int]] ) -> Iterator[tuple[int, bytes]]: """Get parts of the file at the given relative path. :param relpath: The path to read. :param offsets: A list of (offset, size) tuples. :return: A list or generator of (offset, data) tuples """ if not offsets: return yield from self._seek_and_read(self.get(relpath), offsets, relpath) def _seek_and_read( self, fp: IO[bytes], offsets: list[tuple[int, int]], relpath: str = "" ) -> Iterator[tuple[int, bytes]]: """An implementation of readv that uses fp.seek and fp.read. This uses _coalesce_offsets to issue larger reads and fewer seeks. :param fp: A file-like object that supports seek() and read(size). Note that implementations are allowed to call .close() on this file handle, so don't trust that you can use it for other work. :param offsets: A list of offsets to be read from the given file. :return: yield (pos, data) tuples for each request """ try: yield from _transport_rs.seek_and_read( fp, offsets, max_readv_combine=self._max_readv_combine, bytes_to_read_before_seek=self._bytes_to_read_before_seek, path=relpath, ) finally: fp.close() def _sort_expand_and_combine( self, offsets: list[tuple[int, int]], upper_limit: int | None ) -> list[tuple[int, int]]: """Helper for readv. :param offsets: A readv vector - (offset, length) tuples. :param upper_limit: The highest byte offset that may be requested. :return: A readv vector that will read all the regions requested by offsets, in start-to-end order, with no duplicated regions, expanded by the transports recommended page size. """ return _transport_rs.sort_expand_and_combine( offsets, upper_limit, self.recommended_page_size() ) @staticmethod def _coalesce_offsets( offsets: list[tuple[int, int]], limit: int | None = None, fudge_factor: int | None = None, max_size: int | None = None, ) -> list["_CoalescedOffset"]: """Yield coalesced offsets. With a long list of neighboring requests, combine them into a single large request, while retaining the original offsets. Turns [(15, 10), (25, 10)] => [(15, 20, [(0, 10), (10, 10)])] Note that overlapping requests are not permitted. (So [(15, 10), (20, 10)] will raise a ValueError.) This is because the data we access never overlaps, and it allows callers to trust that we only need any byte of data for 1 request (so nothing needs to be buffered to fulfill a second request.) :param offsets: A list of (start, length) pairs :param limit: Only combine a maximum of this many pairs Some transports penalize multiple reads more than others, and sometimes it is better to return early. 0 means no limit :param fudge_factor: All transports have some level of 'it is better to read some more data and throw it away rather than seek', so collapse if we are 'close enough' :param max_size: Create coalesced offsets no bigger than this size. When a single offset is bigger than 'max_size', it will keep its size and be alone in the coalesced offset. 0 means no maximum size. :return: return a list of _CoalescedOffset objects, which have members for where to start, how much to read, and how to split those chunks back up """ return [ _CoalescedOffset(start, length, ranges) for start, length, ranges in _transport_rs.coalesce_offsets( offsets, limit, fudge_factor, max_size ) ] def put_bytes(self, relpath: str, raw_bytes: bytes, mode: int | None = None) -> int: """Atomically put the supplied bytes into the given location. :param relpath: The location to put the contents, relative to the transport base. :param raw_bytes: A bytestring of data. :param mode: Create the file with the given mode. :return: None """ if not isinstance(raw_bytes, bytes): raise TypeError(f"raw_bytes must be a plain string, not {type(raw_bytes)}") return self.put_file(relpath, BytesIO(raw_bytes), mode=mode) def put_bytes_non_atomic( self, relpath: str, raw_bytes: bytes, mode: int | None = None, create_parent_dir: bool = False, dir_mode: int | None = None, ) -> None: """Copy the string into the target location. This function is not strictly safe to use. See Transport.put_bytes_non_atomic for more information. :param relpath: The remote location to put the contents. :param raw_bytes: A string object containing the raw bytes to write into the target file. :param mode: Possible access permissions for new file. None means do not set remote permissions. :param create_parent_dir: If we cannot create the target file because the parent directory does not exist, go ahead and create it, and then try again. :param dir_mode: Possible access permissions for new directories. """ if not isinstance(raw_bytes, bytes): raise TypeError(f"raw_bytes must be a plain string, not {type(raw_bytes)}") self.put_file_non_atomic( relpath, BytesIO(raw_bytes), mode=mode, create_parent_dir=create_parent_dir, dir_mode=dir_mode, ) def put_file(self, relpath: str, f: IO[bytes], mode: int | None = None) -> int: """Copy the file-like object into the location. :param relpath: Location to put the contents, relative to base. :param f: File-like object. :param mode: The mode for the newly created file, None means just use the default. :return: The length of the file that was written. """ raise NotImplementedError(self.put_file) def put_file_non_atomic( self, relpath: str, f: IO[bytes], mode: int | None = None, create_parent_dir: bool = False, dir_mode: int | None = None, ) -> int | None: """Copy the file-like object into the target location. This function is not strictly safe to use. It is only meant to be used when you already know that the target does not exist. It is not safe, because it will open and truncate the remote file. So there may be a time when the file has invalid contents. :param relpath: The remote location to put the contents. :param f: File-like object. :param mode: Possible access permissions for new file. None means do not set remote permissions. :param create_parent_dir: If we cannot create the target file because the parent directory does not exist, go ahead and create it, and then try again. :param dir_mode: Possible access permissions for new directories. """ # Default implementation just does an atomic put. try: return self.put_file(relpath, f, mode=mode) except errors.NoSuchFile: if not create_parent_dir: raise parent_dir = os.path.dirname(relpath) if parent_dir: self.mkdir(parent_dir, mode=dir_mode) return self.put_file(relpath, f, mode=mode) return None def mkdir(self, relpath: str, mode: int | None = None) -> None: """Create a directory at the given path.""" raise NotImplementedError(self.mkdir) def open_write_stream(self, relpath: str, mode: int | None = None) -> FileStream: """Open a writable file stream at relpath. A file stream is a file like object with a write() method that accepts bytes to write.. Buffering may occur internally until the stream is closed with stream.close(). Calls to readv or the get_* methods will be synchronised with any internal buffering that may be present. :param relpath: The relative path to the file. :param mode: The mode for the newly created file, None means just use the default :return: A FileStream. FileStream objects have two methods, write() and close(). There is no guarantee that data is committed to the file if close() has not been called (even if get() is called on the same path). """ raise NotImplementedError(self.open_write_stream) def append_file(self, relpath: str, f: IO[bytes], mode: int | None = None) -> int: """Append bytes from a file-like object to a file at relpath. The file is created if it does not already exist. :param f: a file-like object of the bytes to append. :param mode: Unix mode for newly created files. This is not used for existing files. :returns: the length of relpath before the content was written to it. """ raise NotImplementedError(self.append_file) def append_bytes(self, relpath: str, data: bytes, mode: int | None = None) -> int: """Append bytes to a file at relpath. The file is created if it does not already exist. :param relpath: The relative path to the file. :param data: a string of the bytes to append. :param mode: Unix mode for newly created files. This is not used for existing files. :returns: the length of relpath before the content was written to it. """ if not isinstance(data, bytes): raise TypeError(f"bytes must be a plain string, not {type(data)}") return self.append_file(relpath, BytesIO(data), mode=mode) def copy(self, rel_from: str, rel_to: str) -> None: """Copy the item at rel_from to the location at rel_to. Override this for efficiency if a specific transport can do it faster than this default implementation. """ with self.get(rel_from) as f: self.put_file(rel_to, f) def copy_to( self, relpaths: Iterable[str], other: "Transport", mode: int | None = None, pb: ProgressBar | None = None, ) -> int: """Copy a set of entries from self into another Transport. :param relpaths: A list/generator of entries to be copied. :param mode: This is the target mode for the newly created files TODO: This interface needs to be updated so that the target location can be different from the source location. """ # The dummy implementation just does a simple get + put def copy_entry(path: str) -> None: other.put_file(path, self.get(path), mode=mode) return len( self._iterate_over(relpaths, copy_entry, pb, "copy_to", expand=False) ) def copy_tree(self, from_relpath: str, to_relpath: str) -> None: """Copy a subtree from one relpath to another. If a faster implementation is available, specific transports should implement it. """ source = self.clone(from_relpath) target = self.clone(to_relpath) # create target directory with the same rwx bits as source. # use mask to ensure that bits other than rwx are ignored. stat = self.stat(from_relpath) target.mkdir(".", stat.st_mode & 0o777) source.copy_tree_to_transport(target) def copy_tree_to_transport(self, to_transport: "Transport") -> None: """Copy a subtree from one transport to another. self.base is used as the source tree root, and to_transport.base is used as the target. to_transport.base must exist (and be a directory). """ files = [] directories = ["."] while directories: dir = directories.pop() if dir != ".": to_transport.mkdir(dir) for path in self.list_dir(dir): path = dir + "/" + path stat = self.stat(path) if S_ISDIR(stat.st_mode): directories.append(path) else: files.append(path) self.copy_to(files, to_transport) def rename(self, rel_from: str, rel_to: str) -> None: """Rename a file or directory. This *must* fail if the destination is a nonempty directory - it must not automatically remove it. It should raise errors.DirectoryNotEmpty, or some other errors.PathError if the case can't be specifically detected. If the destination is an empty directory or a file this function may either fail or succeed, depending on the underlying transport. It should not attempt to remove the destination if overwriting is not the native transport behaviour. If at all possible the transport should ensure that the rename either completes or not, without leaving the destination deleted and the new file not moved in place. This is intended mainly for use in implementing LockDir. """ # transports may need to override this raise NotImplementedError(self.rename) def move(self, rel_from: str, rel_to: str) -> None: """Move the item at rel_from to the location at rel_to. The destination is deleted if possible, even if it's a non-empty directory tree. If a transport can directly implement this it is suggested that it do so for efficiency. """ if S_ISDIR(self.stat(rel_from).st_mode): self.copy_tree(rel_from, rel_to) self.delete_tree(rel_from) else: self.copy(rel_from, rel_to) self.delete(rel_from) def delete(self, relpath: str) -> None: """Delete the item at relpath.""" raise NotImplementedError(self.delete) def delete_tree(self, relpath: str) -> None: """Delete an entire tree. This may require a listable transport.""" subtree = self.clone(relpath) files = [] directories = ["."] pending_rmdirs = [] while directories: dir = directories.pop() if dir != ".": pending_rmdirs.append(dir) for path in subtree.list_dir(dir): path = dir + "/" + path stat = subtree.stat(path) if S_ISDIR(stat.st_mode): directories.append(path) else: files.append(path) for file in files: subtree.delete(file) pending_rmdirs.reverse() for dir in pending_rmdirs: subtree.rmdir(dir) self.rmdir(relpath) def __repr__(self) -> str: """Return string representation of the transport.""" return f"<{self.__module__}.{self.__class__.__name__} url={self.base}>" def stat(self, relpath: str) -> os.stat_result: """Return the stat information for a file. WARNING: This may not be implementable for all protocols, so use sparingly. NOTE: This returns an object with fields such as 'st_size'. It MAY or MAY NOT return the literal result of an os.stat() call, so all access should be via named fields. ALSO NOTE: Stats of directories may not be supported on some transports. """ raise NotImplementedError(self.stat) def rmdir(self, relpath: str) -> None: """Remove a directory at the given path.""" raise NotImplementedError def readlink(self, relpath: str) -> str: """Return a string representing the path to which the symbolic link points.""" raise errors.TransportNotPossible( f"Dereferencing symlinks is not supported on {self}" ) def hardlink(self, source: str, link_name: str) -> None: """Create a hardlink pointing to source named link_name.""" raise errors.TransportNotPossible(f"Hard links are not supported on {self}") def symlink(self, source: str, link_name: str) -> None: """Create a symlink pointing to source named link_name.""" raise errors.TransportNotPossible(f"Symlinks are not supported on {self}") def listable(self) -> bool: """Return True if this store supports listing.""" raise NotImplementedError(self.listable) def list_dir(self, relpath: str) -> list[str]: """Return a list of all files at the given location. WARNING: many transports do not support this, so trying avoid using it if at all possible. """ raise errors.TransportNotPossible( "Transport {!r} has not " "implemented list_dir " "(but must claim to be listable " "to trigger this error).".format(self) ) def lock_read(self, relpath: str) -> Lock: """Lock the given file for shared (read) access. WARNING: many transports do not support this, so trying avoid using it. These methods may be removed in the future. Transports may raise errors.TransportNotPossible if OS-level locks cannot be taken over this transport. :return: A lock object, which should contain an unlock() function. """ raise errors.TransportNotPossible(f"transport locks not supported on {self}") def lock_write(self, relpath: str) -> Lock: """Lock the given file for exclusive (write) access. WARNING: many transports do not support this, so trying avoid using it. These methods may be removed in the future. Transports may raise errors.TransportNotPossible if OS-level locks cannot be taken over this transport. :return: A lock object, which should contain an unlock() function. """ raise errors.TransportNotPossible(f"transport locks not supported on {self}") def is_readonly(self) -> bool: """Return true if this connection cannot be written to.""" return False def _can_roundtrip_unix_modebits(self) -> bool: """Return true if this transport can store and retrieve unix modebits. (For example, 0700 to make a directory owner-private.) Note: most callers will not want to switch on this, but should rather just try and set permissions and let them be either stored or not. This is intended mainly for the use of the test suite. Warning: this is not guaranteed to be accurate as sometimes we can't be sure: for example with vfat mounted on unix, or a windows sftp server. """ # TODO: Perhaps return a e.g. TransportCharacteristics that can answer # several questions about the transport. return False def _reuse_for(self, other_base: str) -> "Transport | None": # This is really needed for ConnectedTransport only, but it's easier to # have Transport refuses to be reused than testing that the reuse # should be asked to ConnectedTransport only. return None def disconnect(self) -> None: """Disconnect the transport. This is primarily for ConnectedTransport subclasses, but is implemented as a no-op in the base Transport class for convenience. """ # This is really needed for ConnectedTransport only, but it's easier to # have Transport do nothing than testing that the disconnect should be # asked to ConnectedTransport only. pass def _redirected_to(self, source: str, target: str) -> "Transport": """Returns a transport suitable to re-issue a redirected request. :param source: The source url as returned by the server. :param target: The target url as returned by the server. The redirection can be handled only if the relpath involved is not renamed by the redirection. :returns: A transport :raise errors.UnusableRedirect: when redirection can not be provided """ # This returns None by default, meaning the transport can't handle the # redirection. raise errors.UnusableRedirect( source, target, "transport does not support redirection" ) class _SharedConnection(Generic[ConnT, CredT]): """A connection shared between several transports.""" def __init__( self, connection: ConnT | None = None, credentials: CredT | None = None, base: str | None = None, ) -> None: """Constructor. :param connection: An opaque object specific to each transport. :param credentials: An opaque object containing the credentials used to create the connection. """ self.connection = connection self.credentials = credentials self.base = base class ConnectedTransport(Transport, Generic[ConnT, CredT]): """A transport connected to a remote server. This class provide the basis to implement transports that need to connect to a remote server. Host and credentials are available as private attributes, cloning preserves them and share the underlying, protocol specific, connection. """ def __init__( self, base: str, _from_transport: "ConnectedTransport[ConnT, CredT] | None" = None, ) -> None: """Constructor. The caller should ensure that _from_transport points at the same host as the new base. :param base: transport root URL :param _from_transport: optional transport to build from. The built transport will share the connection with this transport. """ if not base.endswith("/"): base += "/" self._parsed_url = self._split_url(base) if _from_transport is not None: # Copy the password as it does not appear in base and will be lost # otherwise. It can appear in the _split_url above if the user # provided it on the command line. Otherwise, daughter classes will # prompt the user for one when appropriate. self._parsed_url.password = _from_transport._parsed_url.password self._parsed_url.quoted_password = ( _from_transport._parsed_url.quoted_password ) base = str(self._parsed_url) super().__init__(base) self._shared_connection: _SharedConnection[ConnT, CredT] if _from_transport is None: self._shared_connection = _SharedConnection() else: self._shared_connection = _from_transport._shared_connection @property def _user(self) -> str | None: return self._parsed_url.user @property def _password(self) -> str | None: return self._parsed_url.password @property def _host(self) -> str: return self._parsed_url.host @property def _port(self) -> int | None: return self._parsed_url.port @property def _path(self) -> str: return self._parsed_url.path @property def _scheme(self) -> str: return self._parsed_url.scheme def clone(self, offset: str | None = None) -> "ConnectedTransport[ConnT, CredT]": """Return a new transport with root at self.base + offset. We leave the daughter classes take advantage of the hint that it's a cloning not a raw creation. """ if offset is None: return self.__class__(self.base, _from_transport=self) else: return self.__class__(self.abspath(offset), _from_transport=self) @staticmethod def _split_url(url: str) -> "urlutils.URL": return urlutils.URL.from_string(url) @staticmethod def _unsplit_url( scheme: str, user: str | None, password: str | None, host: str, port: int | None, path: str, ) -> str: """Build the full URL for the given already URL encoded path. user, password, host and path will be quoted if they contain reserved chars. Args: scheme: protocol user: login password: associated password host: the server address port: the associated port path: the absolute path on the server :return: The corresponding URL. """ netloc = urlutils.quote(host) if user is not None: # Note that we don't put the password back even if we # have one so that it doesn't get accidentally # exposed. netloc = f"{urlutils.quote(user)}@{netloc}" if port is not None: netloc = "%s:%d" % (netloc, port) path = urlutils.escape(path) return urlutils.urlparse.urlunparse((scheme, netloc, path, None, None, None)) def relpath(self, abspath: str) -> str: """Return the local path portion from a given absolute path.""" parsed_url = self._split_url(abspath) error = [] if parsed_url.scheme != self._parsed_url.scheme: error.append("scheme mismatch") if parsed_url.user != self._parsed_url.user: error.append("user name mismatch") if parsed_url.host != self._parsed_url.host: error.append("host mismatch") if parsed_url.port != self._parsed_url.port: error.append("port mismatch") if not ( parsed_url.path == self._parsed_url.path[:-1] or parsed_url.path.startswith(self._parsed_url.path) ): error.append("path mismatch") if error: extra = ", ".join(error) raise errors.PathNotChild(abspath, self.base, extra=extra) pl = len(self._parsed_url.path) return parsed_url.path[pl:].strip("/") def abspath(self, relpath: str) -> str: """Return the full url to the given relative path. Args: relpath: the relative path urlencoded :returns: the Unicode version of the absolute path for relpath. """ return str(self._parsed_url.clone(relpath)) def _remote_path(self, relpath: str) -> str: """Return the absolute path part of the url to the given relative path. This is the path that the remote server expect to receive in the requests, daughter classes should redefine this method if needed and use the result to build their requests. Args: relpath: the path relative to the transport base urlencoded. :return: the absolute Unicode path on the server, """ return self._parsed_url.clone(relpath).path def _get_shared_connection(self) -> "_SharedConnection[ConnT, CredT]": """Get the object shared amongst cloned transports. This should be used only by classes that needs to extend the sharing with objects other than transports. Use _get_connection to get the connection itself. """ return self._shared_connection def _set_connection( self, connection: ConnT, credentials: CredT | None = None ) -> None: """Record a newly created connection with its associated credentials. Note: To ensure that connection is still shared after a temporary failure and a new one needs to be created, daughter classes should always call this method to set the connection and do so each time a new connection is created. Args: connection: An opaque object representing the connection used by the daughter class. credentials: An opaque object representing the credentials needed to create the connection. """ self._shared_connection.connection = connection self._shared_connection.credentials = credentials for hook in self.hooks["post_connect"]: hook(self) def _get_connection(self) -> ConnT | None: """Returns the transport specific connection object.""" return self._shared_connection.connection def _get_credentials(self) -> CredT | None: """Returns the credentials used to establish the connection.""" return self._shared_connection.credentials def _update_credentials(self, credentials: CredT) -> None: """Update the credentials of the current connection. Some protocols can renegociate the credentials within a connection, this method allows daughter classes to share updated credentials. :param credentials: the updated credentials. """ # We don't want to call _set_connection here as we are only updating # the credentials not creating a new connection. self._shared_connection.credentials = credentials def _reuse_for(self, other_base: str) -> "ConnectedTransport[ConnT, CredT] | None": """Returns a transport sharing the same connection if possible. Note: we share the connection if the expected credentials are the same: (host, port, user). Some protocols may disagree and redefine the criteria in daughter classes. Note: we don't compare the passwords here because other_base may have been obtained from an existing transport.base which do not mention the password. :param other_base: the URL we want to share the connection with. :return: A new transport or None if the connection cannot be shared. """ try: parsed_url = self._split_url(other_base) except urlutils.InvalidURL: # No hope in trying to reuse an existing transport for an invalid # URL return None transport = None # Don't compare passwords, they may be absent from other_base or from # self and they don't carry more information than user anyway. if ( parsed_url.scheme == self._parsed_url.scheme and parsed_url.user == self._parsed_url.user and parsed_url.host == self._parsed_url.host and parsed_url.port == self._parsed_url.port ): path = parsed_url.path if not path.endswith("/"): # This normally occurs at __init__ time, but it's easier to do # it now to avoid creating two transports for the same base. path += "/" if self._parsed_url.path == path: # shortcut, it's really the same transport return self # We don't call clone here because the intent is different: we # build a new transport on a different base (which may be totally # unrelated) but we share the connection. transport = self.__class__(other_base, _from_transport=self) return transport def disconnect(self) -> None: """Disconnect the transport. If and when required the transport willl reconnect automatically. """ raise NotImplementedError(self.disconnect) def get_transport_from_path( path: str, possible_transports: list[Transport] | None = None ) -> Transport: """Open a transport for a local path. :param path: Local path as byte or unicode string :return: Transport object for path """ return get_transport_from_url(urlutils.local_path_to_url(path), possible_transports) def get_transport_from_url( url: str, possible_transports: list[Transport] | None = None ) -> Transport: """Open a transport to access a URL. Args: base: a URL transports: optional reusable transports list. If not None, created transports will be added to the list. Returns: A new transport optionally sharing its connection with one of possible_transports. """ transport = None if possible_transports is not None: for t in possible_transports: try: t_same_connection = t._reuse_for(url) except AttributeError: continue if t_same_connection is not None: # Add only new transports if t_same_connection not in possible_transports: possible_transports.append(t_same_connection) return t_same_connection last_err = None for proto, factory_list in transport_list_registry.items(): if proto is not None and url.startswith(proto): transport, last_err = _try_transport_factories(url, factory_list) if transport: if possible_transports is not None: if transport in possible_transports: raise AssertionError() possible_transports.append(transport) return transport if not urlutils.is_url(url): raise urlutils.InvalidURL(path=url) raise errors.UnsupportedProtocol(url, last_err) def _try_transport_factories( base: str, factory_list: list ) -> tuple[Transport | None, Exception | None]: last_err = None for factory in factory_list: try: return factory.get_obj()(base), None except errors.DependencyNotPresent as e: logger.debug( "failed to instantiate transport %r for %r: %r", factory, base, e ) last_err = e continue return None, last_err def do_catching_redirections( action: Callable[[Transport], T], transport: Transport, redirected: Callable[[Transport, errors.RedirectRequested, str], Transport], ) -> T: """Execute an action with given transport catching redirections. This is a facility provided for callers needing to follow redirections silently. The silence is relative: it is the caller responsability to inform the user about each redirection or only inform the user of a user via the exception parameter. Args: action: A callable, what the caller want to do while catching redirections. transport: The initial transport used. redirected: A callable receiving the redirected transport and the errors.RedirectRequested exception. :return: Whatever 'action' returns """ MAX_REDIRECTIONS = 8 # If a loop occurs, there is little we can do. So we don't try to detect # them, just getting out if too much redirections occurs. The solution # is outside: where the loop is defined. for _redirections in range(MAX_REDIRECTIONS): try: return action(transport) except errors.RedirectRequested as e: redirection_notice = "{} is{} redirected to {}".format( e.source, e.permanently, e.target ) transport = redirected(transport, e, redirection_notice) else: # Loop exited without resolving redirect ? Either the # user has kept a very very very old reference or a loop # occurred in the redirections. Nothing we can cure here: # tell the user. Note that as the user has been informed # about each redirection (it is the caller responsibility # to do that in redirected via the provided # redirection_notice). The caller may provide more # information if needed (like what file or directory we # were trying to act upon when the redirection loop # occurred). raise errors.TooManyRedirections() class Server: """A Transport Server. The Server interface provides a server for a given transport type. """ def start_server(self) -> None: """Setup the server to service requests.""" def stop_server(self) -> None: """Remove the server and cleanup any resources it owns.""" def open_file(url: str) -> IO[bytes]: """Open a file from a URL. :param url: URL to open :return: A file-like object. """ base, filename = urlutils.split(url) transport = get_transport_from_url(base) return open_file_via_transport(filename, transport) def open_file_via_transport(filename: str, transport: Transport) -> IO[bytes]: """Open a file using the transport, follow redirects as necessary.""" def open_file(transport: Transport) -> IO[bytes]: return transport.get(filename) def follow_redirection( transport: Transport, e: errors.RedirectRequested, redirection_notice: str ) -> Transport: logger.debug("%s", redirection_notice) base, _filename = urlutils.split(e.target) redirected_transport = get_transport_from_url(base) return redirected_transport return do_catching_redirections(open_file, transport, follow_redirection) # None is the default transport, for things with no url scheme register_transport_proto( "file://", help="Access using the standard filesystem (default)" ) register_lazy_transport("file://", "dromedary.local", "LocalTransport") register_transport_proto( "sftp://", help="Access using SFTP (most SSH servers provide SFTP).", register_netloc=True, ) register_lazy_transport("sftp://", "dromedary.sftp", "SFTPTransport") # Decorated http transport register_transport_proto( "http+urllib://", # help="Read-only access of branches exported on the web." register_netloc=True, ) register_lazy_transport("http+urllib://", "dromedary.http.urllib", "HttpTransport") register_transport_proto( "https+urllib://", # help="Read-only access of branches exported on the web using SSL." register_netloc=True, ) register_lazy_transport("https+urllib://", "dromedary.http.urllib", "HttpTransport") # Default http transports (last declared wins (if it can be imported)) register_transport_proto( "http://", help="Read-only access of branches exported on the web." ) register_transport_proto( "https://", help="Read-only access of branches exported on the web using SSL." ) # The default http implementation is urllib register_lazy_transport("http://", "dromedary.http.urllib", "HttpTransport") register_lazy_transport("https://", "dromedary.http.urllib", "HttpTransport") register_transport_proto("http+webdav://", register_netloc=True) register_transport_proto("https+webdav://", register_netloc=True) register_lazy_transport("http+webdav://", "dromedary.webdav.webdav", "HttpDavTransport") register_lazy_transport( "https+webdav://", "dromedary.webdav.webdav", "HttpDavTransport" ) register_transport_proto("gio+", help="Access using any GIO supported protocols.") register_lazy_transport("gio+", "dromedary.gio_transport", "GioTransport") register_transport_proto("memory://") register_lazy_transport("memory://", "dromedary.memory", "MemoryTransport") register_transport_proto( "readonly+", # help="This modifier converts any transport to be readonly." ) register_lazy_transport("readonly+", "dromedary.readonly", "ReadonlyTransportDecorator") register_transport_proto("fakenfs+") register_lazy_transport("fakenfs+", "dromedary.fakenfs", "FakeNFSTransportDecorator") register_transport_proto("log+") register_lazy_transport("log+", "dromedary.log", "TransportLogDecorator") register_transport_proto("trace+") register_lazy_transport("trace+", "dromedary.trace", "TransportTraceDecorator") register_transport_proto("unlistable+") register_lazy_transport( "unlistable+", "dromedary.unlistable", "UnlistableTransportDecorator" ) register_transport_proto("brokenrename+") register_lazy_transport( "brokenrename+", "dromedary.brokenrename", "BrokenRenameTransportDecorator" ) register_transport_proto("vfat+") register_lazy_transport("vfat+", "dromedary.fakevfat", "FakeVFATTransportDecorator") dromedary-0.1.5/dromedary/_bedding.py000066400000000000000000000023421520150013200175640ustar00rootroot00000000000000# Copyright (C) 2005-2012, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Filesystem path integration points for dromedary. Embedders should replace these functions to control file locations. The defaults use XDG-style paths. """ import os def config_dir() -> str: """Return the configuration directory path.""" xdg = os.environ.get("XDG_CONFIG_HOME", os.path.expanduser("~/.config")) return os.path.join(xdg, "breezy") def ensure_config_dir_exists() -> None: """Ensure the config directory exists.""" os.makedirs(config_dir(), exist_ok=True) dromedary-0.1.5/dromedary/_config.py000066400000000000000000000033031520150013200174330ustar00rootroot00000000000000# Copyright (C) 2005-2012, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Configuration integration points for dromedary. Embedders should replace these functions to provide config/auth. The defaults provide basic functionality using the standard library. """ def get_ssh_vendor_name() -> str | None: """Return the configured SSH vendor name, or None for auto-detect.""" return None def get_auth_user( scheme: str, host: str, port: int | None = None, default: str | None = None, ask: bool = False, prompt: str | None = None, ) -> str: """Get username for authentication. Default: returns default, or falls back to the system username. """ if default is not None: return default import getpass return getpass.getuser() def get_auth_password( scheme: str, host: str, user: str, port: int | None = None ) -> str: """Get password for authentication. Default: prompts via getpass. """ import getpass return getpass.getpass(f"Password for {user}@{host}: ") dromedary-0.1.5/dromedary/_hooks.py000066400000000000000000000104231520150013200173120ustar00rootroot00000000000000# Copyright (C) 2007-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Minimal standalone hooks implementation for dromedary.""" from collections.abc import Callable, Iterator from typing import Any class HookPoint: """A named hook point that maintains a list of callbacks.""" def __init__( self, name: str, doc: str, introduced: tuple[int, ...] | None = None, deprecated: tuple[int, ...] | None = None, ) -> None: self.name = name self.__doc__ = doc self.introduced = introduced self.deprecated = deprecated self._callbacks: list[Callable[..., Any]] = [] def __iter__(self) -> Iterator[Callable[..., Any]]: return iter(self._callbacks) def __len__(self) -> int: return len(self._callbacks) def __repr__(self) -> str: return f"" def docs(self) -> str: """Generate plain-text documentation for this hook point.""" import textwrap strings = [self.name, "~" * len(self.name), ""] introduced_string = ( ".".join(str(p) for p in self.introduced) if self.introduced else "unknown" ) strings.append(f"Introduced in: {introduced_string}") if self.deprecated: deprecated_string = ".".join(str(p) for p in self.deprecated) strings.append(f"Deprecated in: {deprecated_string}") strings.append("") if self.__doc__: strings.extend(textwrap.wrap(self.__doc__, break_long_words=False)) strings.append("") return "\n".join(strings) class Hooks(dict[str, HookPoint]): """A dict mapping hook names to HookPoint instances.""" def __init__(self) -> None: dict.__init__(self) self._callable_names: dict[Callable[..., Any], str] = {} def add_hook( self, name: str, doc: str, introduced: tuple[int, ...], deprecated: tuple[int, ...] | None = None, ) -> None: """Register a new hook point.""" self[name] = HookPoint(name, doc, introduced=introduced, deprecated=deprecated) def docs(self) -> str: """Generate plain-text documentation for all registered hooks.""" hook_docs = [] cls_name = self.__class__.__name__ hook_docs.append(cls_name) hook_docs.append("-" * len(cls_name)) hook_docs.append("") for hook_name in sorted(self.keys()): hook_docs.append(self[hook_name].docs()) return "\n".join(hook_docs) def install_named_hook( self, hook_name: str, a_callable: Callable[..., Any], name: str | None ) -> None: """Install a callable on the named hook point.""" try: hook = self[hook_name] except KeyError: raise KeyError(f"Unknown hook: {hook_name!r}") from None hook._callbacks.append(a_callable) if name is not None: self._callable_names[a_callable] = name def uninstall_named_hook(self, hook_name: str, label: str) -> None: """Remove a callable from the named hook point by label.""" hook = self[hook_name] for i, cb in enumerate(hook._callbacks): if self._callable_names.get(cb) == label: del hook._callbacks[i] del self._callable_names[cb] return raise KeyError(f"No hook named {label!r} on {hook_name!r}") def get_hook_name(self, a_callable: Callable[..., Any]) -> str: """Return the name associated with a callable, or a repr.""" return self._callable_names.get(a_callable, repr(a_callable)) dromedary-0.1.5/dromedary/_transport_rs.pyi000066400000000000000000000444241520150013200211100ustar00rootroot00000000000000"""Type stubs for the _transport_rs compiled Rust extension.""" import os import sys from collections.abc import Callable, Iterable, Iterator from typing import IO, Literal if sys.version_info >= (3, 11): from typing import Self else: from typing_extensions import Self def coalesce_offsets( offsets: list[tuple[int, int]], limit: int | None = None, fudge_factor: int | None = None, max_size: int | None = None, ) -> list[tuple[int, int, list[tuple[int, int]]]]: ... def seek_and_read( file: IO[bytes], offsets: list[tuple[int, int]], max_readv_combine: int | None = None, bytes_to_read_before_seek: int | None = None, path: str | None = None, ) -> Iterator[tuple[int, bytes]]: ... def sort_expand_and_combine( offsets: list[tuple[int, int]], upper_limit: int | None = None, recommended_page_size: int | None = None, ) -> list[tuple[int, int]]: ... def get_test_permutations() -> list[tuple[type, type]]: ... class _TransportBase: base: str def abspath(self, path: str) -> str: ... def append_bytes(self, path: str, bytes: bytes, mode: int | None = None) -> int: ... def append_file( self, path: str, file: IO[bytes], mode: int | None = None ) -> int: ... def clone(self, offset: str | None = None) -> Self: ... def copy(self, from_path: str, to_path: str) -> None: ... def copy_to( self, relpaths: Iterable[str], to_transport: _TransportBase, mode: int | None = None, ) -> int: ... def copy_tree(self, from_path: str, to_path: str) -> None: ... def copy_tree_to_transport(self, to_transport: _TransportBase) -> None: ... def create_prefix(self, mode: int | None = None) -> None: ... def delete(self, path: str) -> None: ... def delete_tree(self, path: str) -> None: ... def ensure_base(self, mode: int | None = None) -> bool: ... def external_url(self) -> str: ... def get(self, path: str) -> IO[bytes]: ... def get_bytes(self, path: str) -> bytes: ... def get_segment_parameters(self) -> dict[str, str]: ... def hardlink(self, from_path: str, to_path: str) -> None: ... def has(self, path: str) -> bool: ... def has_any(self, paths: Iterable[str]) -> bool: ... def is_readonly(self) -> bool: ... def iter_files_recursive(self) -> Iterator[str]: ... def list_dir(self, path: str) -> list[str]: ... def listable(self) -> bool: ... def local_abspath(self, path: str) -> str: ... def lock_read(self, path: str) -> object: ... def lock_write(self, path: str) -> object: ... def mkdir(self, path: str, mode: int | None = None) -> None: ... def move(self, from_path: str, to_path: str) -> None: ... def open_write_stream(self, path: str, mode: int | None = None) -> object: ... def put_bytes(self, path: str, data: bytes, mode: int | None = None) -> int: ... def put_bytes_non_atomic( self, path: str, data: bytes, mode: int | None = None, create_parent_dir: bool | None = None, dir_mode: int | None = None, ) -> None: ... def put_file(self, path: str, file: IO[bytes], mode: int | None = None) -> int: ... def put_file_non_atomic( self, path: str, file: IO[bytes], mode: int | None = None, create_parent_dir: bool | None = None, dir_mode: int | None = None, ) -> int | None: ... def readlink(self, path: str) -> str: ... def readv( self, path: str, offsets: list[tuple[int, int]], adjust_for_latency: bool | None = None, upper_limit: int | None = None, ) -> Iterator[tuple[int, bytes]]: ... def recommended_page_size(self) -> int: ... def relpath(self, path: str | None = None) -> str: ... def rename(self, from_path: str, to_path: str) -> None: ... def rmdir(self, path: str) -> None: ... def set_segment_parameter(self, name: str, value: str | None = None) -> None: ... def stat(self, path: str) -> os.stat_result: ... def symlink(self, from_path: str, to_path: str) -> None: ... class _ConnectedTransportBase(Transport): def disconnect(self) -> None: ... def request( self, method: str, url: str, headers: dict[str, str] | None = None, body: bytes | None = None, follow_redirects: bool = False, report_activity: Callable[[int, Literal["read", "write"]], None] | None = None, fields: dict[str, str] | None = None, retries: int = 0, **kwargs: object, ) -> http.HttpResponse: ... class Transport(_TransportBase): ... class ConnectedTransport(_ConnectedTransportBase): ... class TransportDecorator(_TransportBase): ... class ReadLock: def unlock(self) -> None: ... class WriteLock: def unlock(self) -> None: ... class TemporaryWriteLock: def unlock(self) -> None: ... class ReadvIter: def __iter__(self) -> ReadvIter: ... def __next__(self) -> tuple[int, bytes]: ... class http: class HttpClient: def request( self, method: str, url: str, headers: dict[str, str] | None = None, body: bytes | None = None, follow_redirects: bool | None = None, report_activity: Callable[[int, Literal["read", "write"]], None] | None = None, **kwargs: object, ) -> http.HttpResponse: ... def set_default_follow_redirects(self, follow: bool) -> None: ... class HttpResponse: status: int redirected_to: str | None def close(self) -> None: ... def getheader(self, name: str, default: str | None = None) -> str | None: ... def getheaders(self) -> list[tuple[str, str]]: ... def read(self, size: int | None = None) -> bytes: ... def readline(self, size: int = ...) -> bytes: ... def readlines(self) -> list[bytes]: ... class HttpTransport(_ConnectedTransportBase): _unqualified_scheme: str def __new__( cls, base: str, ca_certs: str | None = None, disable_verification: bool = False, user_agent: str | None = None, **kwargs: object, ) -> Self: ... def _head(self, relpath: str) -> http.HttpResponse: ... def _post(self, relpath: str, body: bytes) -> tuple[int, bytes]: ... def _remote_path(self, relpath: str) -> str: ... def _clone_from( self, other: http.HttpTransport, offset: str | None ) -> None: ... def _set_activity_callback( self, callback: Callable[[int, Literal["read", "write"]], None] ) -> None: ... def _get_bytes_inner(self, relpath: str) -> bytes: ... class RangeFile: def __enter__(self) -> http.RangeFile: ... def __exit__( self, exc_type: object = None, exc_val: object = None, exc_tb: object = None ) -> None: ... def close(self) -> None: ... def read(self, size: int = ...) -> bytes: ... def read_boundary(self) -> None: ... def read_range_definition(self) -> None: ... def seek(self, offset: int, whence: int = 0) -> None: ... def set_boundary(self, boundary: bytes) -> None: ... def set_range(self, start: int, size: int) -> None: ... def set_range_from_header(self, content_range: str) -> None: ... def tell(self) -> int: ... class ResponseFile: def __enter__(self) -> http.ResponseFile: ... def __exit__( self, exc_type: object = None, exc_val: object = None, exc_tb: object = None ) -> None: ... def __iter__(self) -> Iterator[bytes]: ... def close(self) -> None: ... def read(self, size: int | None = None) -> bytes: ... def readline(self) -> bytes: ... def readlines(self, size: int | None = None) -> list[bytes]: ... def seek(self, offset: int, whence: int = 0) -> None: ... def tell(self) -> int: ... SSL_CA_CERTS_KNOWN_LOCATIONS: list[str] @staticmethod def clear_ca_path_cache() -> None: ... @staticmethod def default_ca_certs() -> str: ... @staticmethod def default_cert_reqs() -> int: ... @staticmethod def default_user_agent() -> str: ... @staticmethod def digest_algorithm_supported(algorithm: str) -> bool: ... @staticmethod def digest_h(algorithm: str, data: str) -> str: ... @staticmethod def digest_kd(algorithm: str, secret: str, data: str) -> str: ... @staticmethod def evaluate_proxy_bypass(host: str, no_proxy: str) -> bool: ... @staticmethod def get_ca_path(use_cache: bool = True) -> str: ... @staticmethod def get_credential_lookup() -> ( Callable[..., tuple[str | None, str | None]] | None ): ... @staticmethod def get_credentials( protocol: str, host: str, port: int | None = None, path: str | None = None, realm: str | None = None, ) -> tuple[str | None, str | None]: ... @staticmethod def get_negotiate_provider() -> Callable[[str], str | None] | None: ... @staticmethod def get_new_cnonce(nonce: str, nonce_count: int) -> str: ... @staticmethod def get_token_provider() -> Callable[..., tuple[str | None, str | None]] | None: ... @staticmethod def handle_response( url: str, code: int, getheader: Callable[[str], str | None], data: IO[bytes] ) -> IO[bytes]: ... @staticmethod def parse_auth_header(header: str) -> tuple[str, dict[str, str]]: ... @staticmethod def parse_http_list(s: str) -> list[str]: ... @staticmethod def parse_keqv_list(items: list[str]) -> dict[str, str]: ... @staticmethod def set_auth_header_trace(func: Callable[[str], None] | None) -> None: ... @staticmethod def set_credential_lookup( func: Callable[..., tuple[str | None, str | None]] | None, ) -> None: ... @staticmethod def set_negotiate_provider(func: Callable[[str], str | None] | None) -> None: ... @staticmethod def set_token_provider( func: Callable[..., tuple[str | None, str | None]] | None, ) -> None: ... @staticmethod def set_user_agent(prefix: str) -> None: ... @staticmethod def splitport(host: str) -> tuple[str, int | None]: ... class memory: class MemoryStoreHandle: ... class MemoryTransport(_TransportBase): def __init__( self, url: str, _shared_store: memory.MemoryStoreHandle | None = None ) -> None: ... class local: class LocalTransport(_TransportBase): @classmethod def from_abspath(cls, abspath: str) -> local.LocalTransport: ... class urlutils: class URL: scheme: str user: str | None password: str | None quoted_password: str | None host: str port: int | None path: str @classmethod def from_string(cls, url: str) -> urlutils.URL: ... def clone(self, offset: str | None = None) -> urlutils.URL: ... def __str__(self) -> str: ... @staticmethod def basename(url: str, exclude_trailing_slash: bool = True) -> str: ... @staticmethod def combine_paths(base_path: str, relpath: str) -> str: ... @staticmethod def derive_to_location(base: str) -> str: ... @staticmethod def dirname(url: str, exclude_trailing_slash: bool = True) -> str: ... @staticmethod def escape(text: str, safe: str | None = None) -> str: ... @staticmethod def file_relpath(base: str, path: str) -> str: ... @staticmethod def is_url(url: str) -> bool: ... @staticmethod def join(url: str, *args: str) -> str: ... @staticmethod def join_segment_parameters(url: str, parameters: dict[str, str]) -> str: ... @staticmethod def join_segment_parameters_raw(url: str, *args: str) -> str: ... @staticmethod def joinpath(url: str, *args: str) -> str: ... @staticmethod def local_path_from_url(url: str) -> str: ... @staticmethod def local_path_to_url(path: str) -> str: ... @staticmethod def normalize_url(url: str) -> str: ... @staticmethod def parse_url( url: str, ) -> tuple[str, str | None, str | None, str, int | None, str]: ... @staticmethod def relative_url(base: str, url: str) -> str: ... @staticmethod def split(url: str, exclude_trailing_slash: bool = True) -> tuple[str, str]: ... @staticmethod def split_segment_parameters(url: str) -> tuple[str, dict[str, str]]: ... @staticmethod def split_segment_parameters_raw(url: str) -> tuple[str, list[str]]: ... @staticmethod def strip_segment_parameters(url: str) -> str: ... @staticmethod def strip_trailing_slash(url: str) -> str: ... @staticmethod def unescape(text: str) -> str: ... posix: object win32: object class brokenrename: class BrokenRenameTransportDecorator(_TransportBase): ... class fakenfs: class FakeNFSTransportDecorator(_TransportBase): ... class fakevfat: class FakeVFATTransportDecorator(_TransportBase): ... class log: class TransportLogDecorator(_TransportBase): ... class readonly: class ReadonlyTransportDecorator(_TransportBase): ... class unlistable: class UnlistableTransportDecorator(_TransportBase): ... class pathfilter: class ChrootTransport(_TransportBase): ... class PathFilteringTransport(_TransportBase): ... class ssh: class LSHSubprocessVendor: executable_path: str | None def connect_ssh( self, username: str, host: str, command: list[str], port: int | None = None ) -> ssh.SSHSubprocessConnection: ... def spawn_sftp( self, username: str, host: str, port: int | None = None ) -> int: ... class LoopbackVendor: def spawn_sftp(self, host: str, port: int) -> int: ... class OpenSSHSubprocessVendor: executable_path: str | None def connect_ssh( self, username: str, host: str, command: list[str], port: int | None = None ) -> ssh.SSHSubprocessConnection: ... def spawn_sftp( self, username: str, host: str, port: int | None = None ) -> int: ... class PLinkSubprocessVendor: executable_path: str | None def connect_ssh( self, username: str, host: str, command: list[str], port: int | None = None ) -> ssh.SSHSubprocessConnection: ... def spawn_sftp( self, username: str, host: str, port: int | None = None ) -> int: ... class RusshSSHConnection: def close(self) -> None: ... def recv(self, count: int) -> bytes: ... def send(self, data: bytes) -> int: ... class RusshVendor: def connect_sftp( self, username: str, password: str | None, host: str, port: int | None = None, ) -> sftp.SFTPClient: ... def connect_ssh( self, username: str, password: str | None, host: str, command: list[str], port: int | None = None, ) -> ssh.RusshSSHConnection: ... class SSHSubprocessConnection: def close(self) -> None: ... def detach_fd(self) -> int: ... def wait(self) -> int: ... @staticmethod def classify_ssh_version(version: str, progname: str) -> str: ... @staticmethod def detect_ssh_vendor(executable: str) -> object: ... class sftp: class SFTPAttributes: st_size: int | None st_uid: int | None st_gid: int | None st_mode: int | None st_atime: float | None st_mtime: float | None class SFTPClient: def __init__(self, fd: int) -> None: ... def chmod(self, path: str, mode: int) -> None: ... def close(self) -> None: ... def extended(self, extension: str, data: bytes) -> bytes: ... def file( self, path: str, mode: str | None = None, create_mode: int | None = None ) -> IO[bytes]: ... def hardlink(self, oldpath: str, newpath: str) -> None: ... def listdir(self, path: str) -> list[sftp.SFTPAttributes]: ... def lstat(self, path: str, flags: int | None = None) -> sftp.SFTPAttributes: ... def mkdir(self, path: str, mode: int | None = None) -> None: ... def open( self, path: str, attr: sftp.SFTPAttributes, *, read: bool = False, write: bool = False, append: bool = False, create: bool = False, truncate: bool = False, excl: bool = False, ) -> IO[bytes]: ... def opendir(self, path: str) -> object: ... def readlink(self, path: str) -> str: ... def realpath( self, path: str, control_byte: int | None = None, compose_path: str | None = None, ) -> str: ... def remove(self, path: str) -> None: ... def rename( self, oldpath: str, newpath: str, flags: int | None = None ) -> None: ... def rmdir(self, path: str) -> None: ... def setstat(self, path: str, attr: sftp.SFTPAttributes) -> None: ... def stat(self, path: str, flags: int | None = None) -> sftp.SFTPAttributes: ... def symlink(self, oldpath: str, newpath: str) -> None: ... class SFTPError(Exception): ... class webdav: class HttpDavTransport(_ConnectedTransportBase): def __new__( cls, base: str, ca_certs: str | None = None, disable_verification: bool = False, user_agent: str | None = None, ) -> Self: ... def _clone_from( self, other: webdav.HttpDavTransport, offset: str | None ) -> None: ... def _set_activity_callback( self, callback: Callable[[int, Literal["read", "write"]], None] ) -> None: ... def _get_bytes(self, relpath: str) -> bytes: ... def put_bytes( self, relpath: str, data: bytes, mode: int | None = None ) -> int: ... def put_bytes_non_atomic( self, relpath: str, data: bytes, mode: int | None = None, create_parent_dir: bool | None = None, dir_mode: int | None = None, ) -> None: ... def append_bytes( self, relpath: str, data: bytes, mode: int | None = None ) -> int: ... class gio: def __getattr__(self, name: str) -> object: ... dromedary-0.1.5/dromedary/_ui.py000066400000000000000000000032751520150013200166130ustar00rootroot00000000000000# Copyright (C) 2005-2012, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """UI integration points for dromedary. Embedders should replace these functions to integrate with their UI. The defaults provide basic functionality using the standard library. """ from typing import Literal def report_transport_activity( transport: object, byte_count: int, direction: Literal["read", "write"] ) -> None: """Called during transport I/O to report activity. Default: no-op.""" pass def get_password(prompt: str = "", **kwargs: object) -> str: """Prompt for a password. Default: uses getpass.""" import getpass if kwargs: prompt = prompt % kwargs return getpass.getpass(prompt) def get_username(prompt: str, **kwargs: object) -> str: """Prompt for a username. Default: uses input().""" if kwargs: prompt = prompt % kwargs return input(prompt) def show_message(msg: str) -> None: """Show a message to the user. Default: print to stderr.""" import sys print(msg, file=sys.stderr) dromedary-0.1.5/dromedary/brokenrename.py000066400000000000000000000022761520150013200205070ustar00rootroot00000000000000# Copyright (C) 2007 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Transport decorator that simulates a transport with broken rename detection.""" from dromedary._transport_rs.brokenrename import BrokenRenameTransportDecorator __all__ = ["BrokenRenameTransportDecorator", "get_test_permutations"] def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" from dromedary.tests import test_server return [(BrokenRenameTransportDecorator, test_server.BrokenRenameServer)] dromedary-0.1.5/dromedary/cethread.py000066400000000000000000000167651520150013200176260ustar00rootroot00000000000000# Copyright (C) 2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Thread implementation that captures and re-raises exceptions. This module provides a thread class that catches exceptions occurring during thread execution and re-raises them when the thread is joined, allowing for better error handling in multi-threaded applications. """ import sys import threading import types from collections.abc import Callable, Iterable, Mapping class CatchingExceptionThread(threading.Thread): """A thread that keeps track of exceptions. If an exception occurs during the thread execution, it's caught and re-raised when the thread is joined(). """ ignored_exceptions: Callable[[BaseException], bool] | None exception: ( tuple[type[BaseException], BaseException, types.TracebackType] | tuple[None, None, None] | None ) def __init__( self, group: None = None, target: Callable[..., object] | None = None, name: str | None = None, args: Iterable[object] = (), kwargs: Mapping[str, object] | None = None, *, daemon: bool | None = None, sync_event: threading.Event | None = None, ) -> None: """Initialize a CatchingExceptionThread instance. Args: sync_event: An optional threading.Event used for synchronization. If not provided, a new Event will be created. This event is used to coordinate exception handling between threads. """ # There are cases where the calling thread must wait, yet, if an # exception occurs, the event should be set so the caller is not # blocked. The main example is a calling thread that want to wait for # the called thread to be in a given state before continuing. if sync_event is None: sync_event = threading.Event() super().__init__( group=group, target=target, name=name, args=args, kwargs=kwargs, daemon=daemon, ) self.set_sync_event(sync_event) self.exception = None self.ignored_exceptions = None # see set_ignored_exceptions self.lock = threading.Lock() def set_sync_event(self, event: threading.Event) -> None: """Set the ``sync_event`` event used to synchronize exception catching. When the thread uses an event to synchronize itself with another thread (setting it when the other thread can wake up from a ``wait`` call), the event must be set after catching an exception or the other thread will hang. Some threads require multiple events and should set the relevant one when appropriate. Note that the event should be initially cleared so the caller can wait() on him and be released when the thread set the event. Also note that the thread can use multiple events, setting them as it progress, while the caller can chose to wait on any of them. What matters is that there is always one event set so that the caller is always released when an exception is caught. Re-using the same event is therefore risky as the thread itself has no idea about which event the caller is waiting on. If the caller has already been released then a cleared event won't guarantee that the caller is still waiting on it. """ self.sync_event = event def switch_and_set(self, new: threading.Event) -> None: """Switch to a new ``sync_event`` and set the current one. Using this method protects against race conditions while setting a new ``sync_event``. Note that this allows a caller to wait either on the old or the new event depending on whether it wants a fine control on what is happening inside a thread. :param new: The event that will become ``sync_event`` """ cur = self.sync_event self.lock.acquire() try: # Always release the lock try: self.set_sync_event(new) # From now on, any exception will be synced with the new event except BaseException: # Unlucky, we couldn't set the new sync event, try restoring a # safe state self.set_sync_event(cur) raise # Setting the current ``sync_event`` will release callers waiting # on it, note that it will also be set in run() if an exception is # raised cur.set() finally: self.lock.release() def set_ignored_exceptions( self, ignored: Callable[[BaseException], bool] | None | list[type[Exception]] | type[Exception], ) -> None: """Declare which exceptions will be ignored. :param ignored: Can be either: - None: all exceptions will be raised, - an exception class: the instances of this class will be ignored, - a tuple of exception classes: the instances of any class of the list will be ignored, - a callable: that will be passed the exception object and should return True if the exception should be ignored """ if ignored is None: self.ignored_exceptions = None elif isinstance(ignored, (Exception, tuple)): self.ignored_exceptions = lambda e: isinstance(e, ignored) elif isinstance(ignored, list): self.ignored_exceptions = lambda e: isinstance(e, tuple(ignored)) # type: ignore else: self.ignored_exceptions = ignored # type: ignore def run(self) -> None: """Overrides Thread.run to capture any exception.""" self.sync_event.clear() try: try: super().run() except BaseException: self.exception = sys.exc_info() finally: # Make sure the calling thread is released self.sync_event.set() def join(self, timeout: float | None = None) -> None: """Overrides Thread.join to raise any exception caught. Calling join(timeout=0) will raise the caught exception or return None if the thread is still alive. """ super().join(timeout) if self.exception is not None: _exc_class, exc_value, _exc_tb = self.exception self.exception = None # The exception should be raised only once if exc_value is not None and ( self.ignored_exceptions is None or not self.ignored_exceptions(exc_value) ): # Raise non ignored exceptions raise exc_value def pending_exception(self) -> None: """Raise the caught exception. This does nothing if no exception occurred. """ self.join(timeout=0) dromedary-0.1.5/dromedary/chroot.py000066400000000000000000000040541520150013200173310ustar00rootroot00000000000000# Copyright (C) 2006-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Implementation of Transport that prevents access to locations above a set root. """ from typing import TYPE_CHECKING from dromedary import pathfilter, register_transport from dromedary._transport_rs.pathfilter import ChrootTransport if TYPE_CHECKING: from dromedary import Transport __all__ = ["ChrootServer", "ChrootTransport", "get_test_permutations"] class ChrootServer(pathfilter.PathFilteringServer): """User space 'chroot' facility. PathFilteringServer does all the path sanitation needed to enforce a chroot, so this is a simple subclass of PathFilteringServer that ignores filter_func. """ def __init__(self, backing_transport: "Transport") -> None: """Initialize the ChrootServer.""" pathfilter.PathFilteringServer.__init__(self, backing_transport, None) def _factory(self, url: str) -> ChrootTransport: return ChrootTransport(self, url) def start_server(self) -> None: """Start the chroot server and register its transport.""" self.scheme = "chroot-%d:///" % id(self) register_transport(self.scheme, self._factory) def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" from dromedary.tests import test_server return [(ChrootTransport, test_server.TestingChrootServer)] dromedary-0.1.5/dromedary/decorator.py000066400000000000000000000175531520150013200200250ustar00rootroot00000000000000# Copyright (C) 2006-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Implementation of Transport that decorates another transport. This does not change the transport behaviour at all, but provides all the stub functions to allow other decorators to be written easily. """ from collections.abc import Iterator from typing import IO, TYPE_CHECKING from dromedary import ( Transport, get_transport_from_path, get_transport_from_url, urlutils, ) if TYPE_CHECKING: import os from dromedary import FileStream, Lock class TransportDecorator(Transport): """A no-change decorator for Transports. Subclasses of this are new transports that are based on an underlying transport and can override or intercept some behavior. For example ReadonlyTransportDecorator prevents all write attempts, and FakeNFSTransportDecorator simulates some NFS quirks. This decorator class is not directly usable as a decorator: you must use a subclass which has overridden the _get_url_prefix() class method to return the url prefix for the subclass. """ def __init__( self, url: str, _decorated: Transport | None = None, _from_transport: "TransportDecorator | None" = None, ) -> None: """Set the 'base' path of the transport. :param _decorated: A private parameter for cloning. :param _from_transport: Is available for subclasses that need to share state across clones. """ prefix = self._get_url_prefix() if not url.startswith(prefix): raise ValueError( f"url {url!r} doesn't start with decorator prefix {prefix!r}" ) not_decorated_url = url[len(prefix) :] if _decorated is None: if urlutils.is_url(not_decorated_url): self._decorated = get_transport_from_url(not_decorated_url) else: self._decorated = get_transport_from_path(not_decorated_url) else: self._decorated = _decorated super().__init__(prefix + self._decorated.base) def abspath(self, relpath: str) -> str: """See Transport.abspath().""" return self._get_url_prefix() + self._decorated.abspath(relpath) def append_file(self, relpath: str, f: IO[bytes], mode: int | None = None) -> int: """See Transport.append_file().""" return self._decorated.append_file(relpath, f, mode=mode) def append_bytes(self, relpath: str, bytes: bytes, mode: int | None = None) -> int: """See Transport.append_bytes().""" return self._decorated.append_bytes(relpath, bytes, mode=mode) def _can_roundtrip_unix_modebits(self) -> bool: """See Transport._can_roundtrip_unix_modebits().""" return self._decorated._can_roundtrip_unix_modebits() def clone(self, offset: str | None = None) -> "TransportDecorator": """See Transport.clone().""" decorated_clone = self._decorated.clone(offset) return self.__class__( self._get_url_prefix() + decorated_clone.base, decorated_clone, self ) def delete(self, relpath: str) -> None: """See Transport.delete().""" return self._decorated.delete(relpath) def delete_tree(self, relpath: str) -> None: """See Transport.delete_tree().""" return self._decorated.delete_tree(relpath) def external_url(self) -> str: """See dromedary.Transport.external_url.""" # while decorators are in-process only, they # can be handed back into breezy safely, so # its just the base. return self.base @classmethod def _get_url_prefix(cls) -> str: """Return the URL prefix of this decorator.""" raise NotImplementedError(cls._get_url_prefix) def get(self, relpath: str) -> IO[bytes]: """See Transport.get().""" return self._decorated.get(relpath) def has(self, relpath: str) -> bool: """See Transport.has().""" return self._decorated.has(relpath) def is_readonly(self) -> bool: """See Transport.is_readonly.""" return self._decorated.is_readonly() def mkdir(self, relpath: str, mode: int | None = None) -> None: """See Transport.mkdir().""" return self._decorated.mkdir(relpath, mode) def open_write_stream(self, relpath: str, mode: int | None = None) -> "FileStream": """See Transport.open_write_stream.""" return self._decorated.open_write_stream(relpath, mode=mode) def put_file(self, relpath: str, f: IO[bytes], mode: int | None = None) -> int: """See Transport.put_file().""" return self._decorated.put_file(relpath, f, mode) def put_bytes(self, relpath: str, bytes: bytes, mode: int | None = None) -> int: """See Transport.put_bytes().""" return self._decorated.put_bytes(relpath, bytes, mode) def listable(self) -> bool: """See Transport.listable.""" return self._decorated.listable() def iter_files_recursive(self) -> Iterator[str]: """See Transport.iter_files_recursive().""" return self._decorated.iter_files_recursive() def list_dir(self, relpath: str) -> list[str]: """See Transport.list_dir().""" return self._decorated.list_dir(relpath) def _readv( self, relpath: str, offsets: list[tuple[int, int]] ) -> Iterator[tuple[int, bytes]]: """See Transport._readv.""" return self._decorated._readv(relpath, offsets) def recommended_page_size(self) -> int: """See Transport.recommended_page_size().""" return self._decorated.recommended_page_size() def rename(self, rel_from: str, rel_to: str) -> None: """See Transport.rename.""" return self._decorated.rename(rel_from, rel_to) def rmdir(self, relpath: str) -> None: """See Transport.rmdir.""" return self._decorated.rmdir(relpath) def _get_segment_parameters(self) -> dict[str, str]: return self._decorated._segment_parameters def _set_segment_parameters(self, value: dict[str, str]) -> None: self._decorated._segment_parameters = value segment_parameters = property( _get_segment_parameters, _set_segment_parameters, doc="See Transport.segment_parameters", ) def stat(self, relpath: str) -> "os.stat_result": """See Transport.stat().""" return self._decorated.stat(relpath) def lock_read(self, relpath: str) -> "Lock": """See Transport.lock_read.""" return self._decorated.lock_read(relpath) def lock_write(self, relpath: str) -> "Lock": """See Transport.lock_write.""" return self._decorated.lock_write(relpath) def _redirected_to(self, source: str, target: str) -> Transport: redirected = self._decorated._redirected_to(source, target) if redirected is not None: return self.__class__(self._get_url_prefix() + redirected.base, redirected) else: return None def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing. The Decorator class is not directly usable, and testing it would not have any benefit - its the concrete classes which need to be tested. """ return [] dromedary-0.1.5/dromedary/errors.py000066400000000000000000000354301520150013200173510ustar00rootroot00000000000000# Copyright (C) 2005-2012, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Exception classes for dromedary transport layer.""" from typing import TYPE_CHECKING if TYPE_CHECKING: from dromedary import Transport class TransportError(Exception): """Base class for transport-related errors.""" internal_error = False _fmt = "Transport error: %(msg)s %(orig_error)s" def __init__(self, msg: str | None = None, orig_error: object = None) -> None: """Initialize with an optional message and originating error.""" if msg is None and orig_error is not None: msg = str(orig_error) if orig_error is None: orig_error = "" if msg is None: msg = "" self.msg = msg self.orig_error = orig_error Exception.__init__(self) def _get_format_string(self) -> str | None: return self._fmt def __str__(self) -> str: """Return the formatted error message.""" fmt = self._get_format_string() if fmt is not None: d = dict(self.__dict__) try: return fmt % d except (KeyError, TypeError): pass if self.args: return str(self.args[0]) return self.msg or "" def __eq__(self, other: object) -> bool: """Return True if both errors are of the same class and have equal state.""" if self.__class__ is not other.__class__: return NotImplemented # type: ignore[return-value] return self.__dict__ == other.__dict__ def __hash__(self) -> int: """Return a hash based on object identity.""" return id(self) def __repr__(self) -> str: """Return a debug representation including the instance dict.""" return f"<{self.__class__.__name__}({self.__dict__!r})>" class PathError(TransportError): """Generic path-related error.""" _fmt = "Generic path error: %(path)r%(extra)s)" def __init__(self, path: str, extra: str | BaseException | None = None) -> None: """Initialize with the offending path and optional extra detail.""" TransportError.__init__(self) self.path = path if extra: self.extra = ": " + str(extra) else: self.extra = "" class NotADirectory(PathError): """Raised when a path is expected to be a directory but is not.""" _fmt = '"%(path)s" is not a directory %(extra)s' class DirectoryNotEmpty(PathError): """Raised when an operation requires an empty directory.""" _fmt = 'Directory not empty: "%(path)s"%(extra)s' class ResourceBusy(PathError): """Raised when the target resource is currently busy.""" _fmt = 'Device or resource busy: "%(path)s"%(extra)s' class PermissionDenied(PathError): """Raised when access to a path is denied.""" _fmt = 'Permission denied: "%(path)s"%(extra)s' class NoSuchFile(PathError): """Raised when a referenced file or directory does not exist.""" _fmt = 'No such file or directory: "%(path)s"%(extra)s' class FileExists(PathError): """Raised when a file unexpectedly already exists.""" _fmt = 'File exists: "%(path)s"%(extra)s' class UnsupportedProtocol(PathError): """Raised when no transport supports the URL's protocol.""" _fmt = 'Unsupported protocol for url "%(path)s"%(extra)s' class ReadError(PathError): """Raised when reading from a path fails.""" _fmt = "Error reading from %(path)r%(extra)s." class ShortReadvError(PathError): """Raised when a readv call returned fewer bytes than requested.""" _fmt = ( "readv() read %(actual)s bytes rather than %(length)s bytes" ' at %(offset)s for "%(path)s"%(extra)s' ) internal_error = True def __init__( self, path: str, offset: int, length: int, actual: int, extra: str | BaseException | None = None, ) -> None: """Initialize with the path, requested offset/length and actual bytes read.""" PathError.__init__(self, path, extra=extra) self.offset = offset self.length = length self.actual = actual class PathNotChild(PathError, ValueError): """Raised when a path is not a descendant of an expected base path.""" _fmt = 'Path "%(path)s" is not a child of path "%(base)s"%(extra)s' internal_error = False def __init__(self, path: str, base: str, extra: str | None = None) -> None: """Initialize with the path, expected base path and optional extra detail.""" TransportError.__init__(self) self.path = path self.base = base if extra: self.extra = ": " + str(extra) else: self.extra = "" class TransportNotPossible(TransportError): """Raised when an operation is not supported by the transport.""" _fmt = "Transport operation not possible: %(msg)s %(orig_error)s" class NotLocalUrl(TransportError): """Raised when a URL was expected to refer to a local path but does not.""" _fmt = "%(url)s is not a local path." def __init__(self, url: str) -> None: """Initialize with the offending URL.""" self.url = url TransportError.__init__(self) class DependencyNotPresent(TransportError): """A required dependency for a transport is not present.""" _fmt = 'Unable to import library "%(library)s": %(error)s' def __init__(self, library: str, error: str | BaseException) -> None: """Initialize with the missing library name and import error.""" self.library = library self.error = error TransportError.__init__(self) class RedirectRequested(TransportError): """Raised when the server requested a redirect to another URL.""" _fmt = "%(source)s is%(permanently)s redirected to %(target)s" def __init__(self, source: str, target: str, is_permanent: bool = False) -> None: """Initialize with the source URL, target URL and whether permanent.""" self.source = source self.target = target if is_permanent: self.permanently = " permanently" else: self.permanently = "" TransportError.__init__(self) class TooManyRedirections(TransportError): """Raised when the maximum redirect chain length was exceeded.""" _fmt = "Too many redirections" class InProcessTransport(TransportError): """Raised when a transport can only be reached from within this process.""" _fmt = "The transport '%(transport)s' is only accessible within this process." def __init__(self, transport: "Transport") -> None: """Initialize with the in-process-only transport.""" self.transport = transport TransportError.__init__(self) class ConnectionError(TransportError): """Raised when a transport connection fails.""" _fmt = "Connection error: %(msg)s" class UnusableRedirect(TransportError): """Raised when a redirect cannot be followed.""" _fmt = "Unable to follow redirect from %(source)s to %(target)s: %(reason)s." def __init__(self, source: str, target: str, reason: str) -> None: """Initialize with the source URL, target URL and reason.""" TransportError.__init__(self) self.source = source self.target = target self.reason = reason # HTTP-specific errors class InvalidHttpResponse(TransportError): """Raised when an HTTP response could not be parsed or was unexpected.""" _fmt = "Invalid http response for %(path)s: %(msg)s%(orig_error)s" def __init__( self, path: str, msg: str, orig_error: str | BaseException | None = None, headers: dict[str, str] | None = None, ) -> None: """Initialize with the path, message, original error and headers.""" self.path = path if orig_error is None: orig_error = "" else: orig_error = f": {orig_error!r}" self.headers = headers TransportError.__init__(self, msg, orig_error=orig_error) class UnexpectedHttpStatus(InvalidHttpResponse): """Raised when an HTTP response had an unexpected status code.""" _fmt = "Unexpected HTTP status %(code)d for %(path)s: %(extra)s" def __init__( self, path: str, code: int, extra: str | None = None, headers: dict[str, str] | None = None, ) -> None: """Initialize with the path, HTTP status code, optional extra and headers.""" self.path = path self.code = code self.extra = extra or "" full_msg = "status code %d unexpected" % code if extra is not None: full_msg += ": " + extra InvalidHttpResponse.__init__(self, path, full_msg, headers=headers) class InvalidHttpRange(InvalidHttpResponse): """Raised when an HTTP range request returned an invalid range.""" _fmt = "Invalid http range %(range)r for %(path)s: %(msg)s" def __init__(self, path: str, range: str | tuple[int, int], msg: str) -> None: """Initialize with the path, requested range and message.""" self.range = range InvalidHttpResponse.__init__(self, path, msg) class HttpBoundaryMissing(InvalidHttpResponse): """Raised when a multipart HTTP response is missing its MIME boundary.""" _fmt = "HTTP MIME Boundary missing for %(path)s: %(msg)s" def __init__(self, path: str, msg: str) -> None: """Initialize with the path and message.""" InvalidHttpResponse.__init__(self, path, msg) class BadHttpRequest(UnexpectedHttpStatus): """Raised when the server reported a bad HTTP request.""" _fmt = "Bad http request for %(path)s: %(reason)s" def __init__(self, path: str, reason: str) -> None: """Initialize with the path and reason.""" self.path = path self.reason = reason TransportError.__init__(self, reason) class InvalidRange(TransportError): """Raised when a range read targets an invalid offset.""" _fmt = "Invalid range access in %(path)s at %(offset)s: %(msg)s" def __init__(self, path: str, offset: int, msg: str | None = None) -> None: """Initialize with the path, offset and optional message.""" TransportError.__init__(self, msg) self.path = path self.offset = offset # Smart protocol errors class SmartProtocolError(TransportError): """Generic error in the bzr smart protocol.""" _fmt = "Generic bzr smart protocol error: %(details)s" def __init__(self, details: str) -> None: """Initialize with the protocol error details.""" self.details = details TransportError.__init__(self) class ErrorFromSmartServer(TransportError): """An error tuple was received from a smart server.""" _fmt = "Error received from smart server: %(error_tuple)r" internal_error = True def __init__(self, error_tuple: tuple[bytes, ...]) -> None: """Initialize with the raw error tuple from the smart server.""" self.error_tuple = error_tuple try: self.error_verb: bytes | None = error_tuple[0] except IndexError: self.error_verb = None self.error_args = error_tuple[1:] TransportError.__init__(self) class UnexpectedSmartServerResponse(TransportError): """The smart server returned a response that could not be understood.""" _fmt = "Could not understand response from smart server: %(response_tuple)r" def __init__(self, response_tuple: tuple[bytes, ...]) -> None: """Initialize with the unexpected response tuple.""" self.response_tuple = response_tuple TransportError.__init__(self) class UnknownSmartMethod(TransportError): """The smart server did not recognise the requested verb.""" _fmt = "The server does not recognise the '%(verb)s' request." internal_error = True def __init__(self, verb: str) -> None: """Initialize with the unrecognised verb.""" self.verb = verb TransportError.__init__(self) # File-level locking errors raised by transport implementations. # # These class names are imported by the Rust extensions (see # dromedary/_transport_rs/src/lib.rs), so they must stay at module level. # Higher-level lock concepts (repository/branch/working-tree locks) belong # in the consuming application (e.g. breezy.errors), which translates these # at the boundary if it wants to surface them as its own lock errors. class LockContention(TransportError): """Raised when a lock is held by another process.""" _fmt = 'Could not acquire lock "%(lock)s": %(msg)s' internal_error = False def __init__(self, lock: str, msg: str = "") -> None: """Initialize with the contended lock and optional message.""" self.lock = lock self.msg = msg TransportError.__init__(self) class LockFailed(TransportError): """Raised when acquiring a lock fails for reasons other than contention.""" internal_error = False _fmt = "Cannot lock %(lock)s: %(why)s" def __init__(self, lock: str, why: str) -> None: """Initialize with the lock and the reason it could not be acquired.""" self.lock = lock self.why = why TransportError.__init__(self) class SocketConnectionError(ConnectionError): """Socket connection error.""" _fmt = "%(formatted_msg)s" def __init__( self, host: str, port: int | None = None, msg: str | None = None, orig_error: str | BaseException | None = None, ) -> None: """Initialize with the host, optional port, message and originating error.""" if msg is None: msg = "Failed to connect to" orig_error_str = "" if orig_error is None else "; " + str(orig_error) self.host = host port_str = "" if port is None else f":{port}" self.port = port_str self.formatted_msg = f"{msg} {host}{port_str}{orig_error_str}" ConnectionError.__init__(self, self.formatted_msg) class StrangeHostname(TransportError): """Refusing to connect to strange SSH hostname.""" _fmt = "Refusing to connect to strange SSH hostname %(hostname)s" def __init__(self, hostname: str) -> None: """Initialize with the rejected hostname.""" self.hostname = hostname TransportError.__init__(self) dromedary-0.1.5/dromedary/fakenfs.py000066400000000000000000000022171520150013200174470ustar00rootroot00000000000000# Copyright (C) 2005, 2006, 2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Transport decorator simulating NFS quirks.""" from dromedary._transport_rs.fakenfs import FakeNFSTransportDecorator __all__ = ["FakeNFSTransportDecorator", "get_test_permutations"] def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" from dromedary.tests import test_server return [(FakeNFSTransportDecorator, test_server.FakeNFSServer)] dromedary-0.1.5/dromedary/fakevfat.py000066400000000000000000000022321520150013200176160ustar00rootroot00000000000000# Copyright (C) 2006 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Transport decorator simulating VFAT filesystem restrictions.""" from dromedary._transport_rs.fakevfat import FakeVFATTransportDecorator __all__ = ["FakeVFATTransportDecorator", "get_test_permutations"] def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" from dromedary.tests import test_server return [(FakeVFATTransportDecorator, test_server.FakeVFATServer)] dromedary-0.1.5/dromedary/gio_transport.py000066400000000000000000000043141520150013200207240ustar00rootroot00000000000000# Copyright (C) 2010 Canonical Ltd. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Author: Mattias Eriksson """Implementation of Transport over gio. It provides the gio+XXX:// protocols where XXX is any of the protocols supported by gio (file, sftp, smb, dav, ftp, ssh, obex). The transport is implemented in Rust against the gtk-rs `gio` crate, gated behind a non-default `gio` Cargo feature. When dromedary is built without that feature, importing this module raises DependencyNotPresent to match the historical behaviour when the legacy Python `gio` module was missing. """ from dromedary import urlutils from dromedary.errors import DependencyNotPresent from dromedary.tests.test_server import TestServer try: from dromedary._transport_rs.gio import GioTransport except ImportError as e: raise DependencyNotPresent("gio", e) from e __all__ = ["GioLocalURLServer", "GioTransport", "get_test_permutations"] class GioLocalURLServer(TestServer): """A pretend server for local transports, using gio+file:// urls. Of course no actual server is required to access the local filesystem, so this just exists to tell the test code how to get to it. """ def start_server(self) -> None: """Start the server (no-op for local filesystem access).""" pass def get_url(self) -> str: """See Transport.Server.get_url.""" return "gio+" + urlutils.local_path_to_url("") def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" return [(GioTransport, GioLocalURLServer)] dromedary-0.1.5/dromedary/http/000077500000000000000000000000001520150013200164355ustar00rootroot00000000000000dromedary-0.1.5/dromedary/http/__init__.py000066400000000000000000000156261520150013200205600ustar00rootroot00000000000000# Copyright (C) 2005-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Base implementation of Transport over http. This module is a thin facade over ``_transport_rs.http``. The User- Agent prefix, the credential-lookup callback, CA bundle resolution (including native-store materialisation on Windows/macOS), and the default certificate-verification requirement all live in Rust; the helpers here just delegate. Breezy overrides ``ssl_ca_certs`` and ``ssl_cert_reqs`` by reassigning the module attributes, so those stay as plain callables at the module level rather than functions that always consult the Rust state. """ DEBUG = 0 from collections.abc import Callable from dromedary.version import version_string as dromedary_version from .._transport_rs import http as _http_rs # Seed the Rust-held User-Agent prefix with our own default; breezy's # transport layer calls set_user_agent() later to replace it. _http_rs.set_user_agent(f"Dromedary/{dromedary_version}") def set_user_agent(prefix: str) -> None: """Set the User-Agent prefix for HTTP requests. Args: prefix: The User-Agent string to use, e.g. "Breezy/3.4.0". """ _http_rs.set_user_agent(prefix) def default_user_agent() -> str: """Get the default User-Agent string for HTTP requests.""" return _http_rs.default_user_agent() def set_credential_lookup( func: Callable[..., tuple[str | None, str | None]] | None, ) -> None: """Set the function used to look up HTTP credentials. Args: func: A callable(protocol, host, port=None, path=None, realm=None) returning (user, password) or (None, None). Pass ``None`` to clear any previously-registered callback. """ _http_rs.set_credential_lookup(func) def get_credential_lookup() -> Callable[..., tuple[str | None, str | None]] | None: """Return the currently-registered credential-lookup callable, or None.""" return _http_rs.get_credential_lookup() def set_negotiate_provider(func: Callable[[str], str | None] | None) -> None: """Register a Negotiate / Kerberos initial-token provider. The callable is invoked as ``func(host)`` and should return a base64-encoded GSSAPI token string to send after ``Negotiate `` in the Authorization header, or ``None`` if no token is available (no Kerberos ticket, library missing, wrong realm). Pass ``None`` to clear any previously-registered callback. """ _http_rs.set_negotiate_provider(func) def get_negotiate_provider() -> Callable[[str], str | None] | None: """Return the currently-registered Negotiate provider, or None.""" return _http_rs.get_negotiate_provider() def set_token_provider( func: Callable[..., tuple[str | None, str | None]] | None, ) -> None: """Register a preemptive bearer-token provider. The callable is invoked as ``func(protocol, host, port=None, path=None)`` and should return ``(token, scheme)`` (or ``(None, None)`` when no token applies). The HTTP client attaches ``Authorization: `` before the request goes on the wire — no server challenge required. Caller-supplied ``Authorization`` headers always win. Pass ``None`` to clear any previously-registered callback. """ _http_rs.set_token_provider(func) def get_token_provider() -> Callable[..., tuple[str | None, str | None]] | None: """Return the currently-registered token provider, or None.""" return _http_rs.get_token_provider() def set_auth_header_trace(func: Callable[[str], None] | None) -> None: """Register a callback invoked when auth credentials are sent. The Rust HTTP client calls ``func(header_name)`` just before sending a request carrying an ``Authorization`` or ``Proxy-Authorization`` header. Breezy wires this up to emit a debug-flag-controlled ``trace.mutter(">
: ")`` line so users can confirm auth happened without leaking the credential value itself. Pass ``None`` to clear any previously-registered callback. """ _http_rs.set_auth_header_trace(func) def _default_kerberos_provider(host: str) -> str | None: """Default Negotiate provider using the Python `kerberos` module. Matches the behaviour of breezy's old NegotiateAuthHandler: if the `kerberos` module isn't installed, or the GSSAPI context setup fails for any reason, we return None so the auth layer falls back to Digest/Basic. """ try: import kerberos except ModuleNotFoundError: return None ret, vc = kerberos.authGSSClientInit(f"HTTP@{host}") if ret < 1: return None ret = kerberos.authGSSClientStep(vc, "") if ret < 0: return None return kerberos.authGSSClientResponse(vc) # Install the default provider at import time. Callers that want to # disable Kerberos can set_negotiate_provider(None); callers that # want to swap in an alternative (e.g. NTLM) replace it outright. _http_rs.set_negotiate_provider(_default_kerberos_provider) def get_credentials( protocol: str, host: str, port: int | None = None, path: str | None = None, realm: str | None = None, ) -> tuple[str | None, str | None]: """Look up stored credentials for an HTTP connection.""" return _http_rs.get_credentials(protocol, host, port=port, path=path, realm=realm) # Known CA bundle locations. Exported for compatibility; the # authoritative list lives in the Rust ``dromedary::http`` module. _ssl_ca_certs_known_locations = list(_http_rs.SSL_CA_CERTS_KNOWN_LOCATIONS) def default_ca_certs() -> str: """Get the default path to CA certificates for SSL verification. On Windows and macOS this returns the path to a PEM tempfile containing the platform's native root store (written once per process). On Linux it returns the first pre-installed bundle found in ``_ssl_ca_certs_known_locations``. """ return _http_rs.default_ca_certs() def default_cert_reqs() -> int: """Get the default certificate verification requirement. Returns an integer matching ``ssl.CERT_NONE`` (0) or ``ssl.CERT_REQUIRED`` (2). On Windows and macOS, returns ``CERT_NONE`` historically — see ``_transport_rs.http.default_cert_reqs`` for the rationale. """ return _http_rs.default_cert_reqs() ssl_ca_certs = default_ca_certs ssl_cert_reqs = default_cert_reqs dromedary-0.1.5/dromedary/http/ca_bundle.py000066400000000000000000000026441520150013200207310ustar00rootroot00000000000000# Copyright (C) 2007 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Auto-detect of CA bundle for SSL connections. The lookup logic lives in the Rust `_transport_rs.http` module; this file re-exports a thin wrapper so that existing callers keep working. """ from .._transport_rs import http as _http_rs def get_ca_path(use_cache: bool = True) -> str: """Return location of CA bundle. Honours the ``CURL_CA_BUNDLE`` environment variable and, on Windows, searches the application directory and ``PATH`` for ``curl-ca-bundle.crt``. Returns an empty string when no bundle can be located. """ return _http_rs.get_ca_path(use_cache) def _clear_cache() -> None: """Clear the cached CA path (for tests).""" _http_rs.clear_ca_path_cache() dromedary-0.1.5/dromedary/http/response.py000066400000000000000000000023571520150013200206540ustar00rootroot00000000000000# Copyright (C) 2006-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Handlers for HTTP Responses. Thin re-export shim over the Rust implementation in ``_transport_rs.http`` which owns the real ``ResponseFile`` / ``RangeFile`` / ``handle_response`` logic. Keeping this module around lets ``from dromedary.http.response import ...`` continue to work for anyone still importing by the old path (urllib.py, breezy tests). """ from dromedary._transport_rs.http import ( RangeFile, ResponseFile, handle_response, ) __all__ = ["RangeFile", "ResponseFile", "handle_response"] dromedary-0.1.5/dromedary/http/urllib.py000066400000000000000000000502671520150013200203120ustar00rootroot00000000000000# Copyright (C) 2005-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """HTTP transport: thin Python subclass over the Rust HttpTransport. The Rust class (``dromedary._transport_rs.http.HttpTransport``) implements the whole Transport protocol including ``readv``, ``_post``, ``_head``, ``_options``, the range-hint degradation loop, and redirect handling. This module subclasses it and layers: * breezy's ``_medium`` slot (populated by ``get_smart_medium``) * resolution of ``ssl_ca_certs`` / ``ssl_cert_reqs`` from the dromedary module-level hooks before calling the Rust constructor * ``_redirected_to`` URL surgery for cross-transport redirects * a ``urlencode``-style ``request`` wrapper that accepts the ``fields`` / ``retries`` kwargs the pre-Rust API exposed * the ``get_test_permutations`` entry point used by ``dromedary/tests/per_transport.py`` """ import sys from io import BytesIO from typing import IO, TYPE_CHECKING, Literal from urllib.parse import urlencode if sys.version_info >= (3, 11): from typing import Self else: from typing_extensions import Self import dromedary as _mod_dromedary from dromedary._transport_rs import http as _http_rs from dromedary.errors import RedirectRequested, UnusableRedirect if TYPE_CHECKING: from collections.abc import Callable # Re-export for backwards compatibility with code that imports the # HttpClient pyclass from dromedary.http.urllib. HttpClient = _http_rs.HttpClient class HttpTransport(_http_rs.HttpTransport): """HTTP(S) transport. The ``+impl`` suffix in URLs like ``http+urllib://host/`` is accepted and dropped — there is only one implementation now. """ def __new__( cls, base: str, _from_transport: "HttpTransport | None" = None, ca_certs: str | None = None, ) -> Self: """Build the Rust transport. We defer TLS-related knobs (``ca_certs``) to ``__init__`` because breezy test subclasses add the ``ca_certs`` argument in their own ``__init__`` and call ``super().__init__(..., ca_certs=...)`` — which only sees the argument after ``__new__`` has already run. Keep ``__new__`` doing the bare-minimum base-URL setup; ``__init__`` rebuilds the underlying client when ``ca_certs`` finally arrives. When ``_from_transport`` is supplied we construct a fresh Rust instance at ``base`` and then graft the source's HttpClient / auth cache / range hint onto it via ``_clone_from`` — so clones share all the per-client state without losing the Python subclass identity. """ self = super().__new__( cls, base, ca_certs=None, disable_verification=False, user_agent=_default_user_agent(), ) if _from_transport is not None: # Compute the offset so the grafted state targets the # right base URL, then swap in the shared state. offset = _offset_from_base(_from_transport.base, base) self._clone_from(_from_transport, offset) return self def __init__( self, base: str, _from_transport: "HttpTransport | None" = None, ca_certs: str | None = None, ) -> None: """Initialise Python-side state and TLS-configured inner client. Rust ``__new__`` populates the base-URL state with a minimal default client. TLS knobs take effect here so subclasses that override ``__init__`` and call ``super().__init__(..., ca_certs=...)`` pick up correctly — otherwise the certs would arrive after the underlying client was built with the wrong ones. We rebuild the whole inner state (fresh HttpClient with the right TLS config) and graft it in. """ self._medium = None if _from_transport is None: import ssl as _ssl import dromedary.http as _mod_http if ca_certs is None: configured = _mod_http.ssl_ca_certs() if configured: ca_certs = configured disable_verification = _mod_http.ssl_cert_reqs() == _ssl.CERT_NONE if disable_verification: ca_certs = None # Build a fresh Rust transport with the right TLS # config, then swap our inner state with it. Creating # a new HttpTransport via the pyclass constructor is # the simplest way to get the config plumbed all the # way down to the reqwest Client — there's no single # setter that re-derives everything. from dromedary._transport_rs.http import ( HttpTransport as _RsHttpTransport, ) fresh = _RsHttpTransport( base, ca_certs=ca_certs, disable_verification=disable_verification, user_agent=_default_user_agent(), ) # ``offset=None`` lets ``_clone_from`` # share ``fresh``'s inner directly, preserving raw_base # (empty-password URL shape) and segment parameters that # ``clone_concrete(None)`` would otherwise strip. self._clone_from(fresh, None) # Wire an activity callback into the Rust transport so # internal get/has/post/readv calls feed breezy's progress # UI too, not just the explicit ``.request()`` path. We use # a lambda that re-looks-up ``_report_activity`` on each # invocation rather than a bound method so tests that # override ``_report_activity`` at class level (notably # ``TestActivity``) still see the replacement. import weakref wself = weakref.ref(self) def _forward(byte_count: int, direction: Literal["read", "write"]) -> None: t = wself() if t is None: return t._report_activity(byte_count, direction) self._set_activity_callback(_forward) def clone(self, offset: str | None = None) -> "HttpTransport": """Return a new transport sharing this transport's HttpClient. Uses ``urlutils.URL.clone`` path-combine semantics rather than ``abspath`` URL-join semantics — an absolute-URL ``offset`` is treated as a path fragment appended to the current base, not as a wholesale base replacement. Breezy's ``do_catching_redirections`` test relies on this quirk: the redirect callback clones at ``exception.target`` to trigger a controlled loop, which only loops if the host stays put. """ if offset is None: new_base = self.base else: from dromedary._transport_rs.urlutils import URL new_base = str(URL.from_string(self.base).clone(offset)) return type(self)(new_base, _from_transport=self) def _report_activity( self, byte_count: int, direction: Literal["read", "write"] ) -> None: """Report byte-count progress to the dromedary UI hook. Called back from the Rust client during ``request()``; feeds into breezy's transport-activity progress bar. """ from dromedary import _ui _ui.report_transport_activity(self, byte_count, direction) # ------------------------------------------------------------------ # Request wrapper — accepts the legacy ``fields`` / ``retries`` args. # The Rust ``request`` uses ``follow_redirects`` (bool) and no # fields encoding. def request( self, method: str, url: str, headers: dict[str, str] | None = None, body: bytes | None = None, follow_redirects: bool = False, report_activity: "Callable[[int, Literal['read', 'write']], None] | None" = None, fields: dict[str, str] | None = None, retries: int = 0, **urlopen_kw: object, ) -> _http_rs.HttpResponse: """Issue a single HTTP request. ``body`` and ``fields`` are mutually exclusive; ``retries > 0`` enables redirect following for this call (matching the pre-Rust API shape). Any remaining keyword arguments raise ``NotImplementedError`` to catch typos early. """ if fields is not None: if body is not None: raise ValueError("body and fields are mutually exclusive") body = urlencode(fields).encode() if headers is None: headers = {} if retries > 0: follow_redirects = True if urlopen_kw: raise NotImplementedError(f"unknown arguments: {list(urlopen_kw.keys())!r}") response = super().request( method, url, headers=headers, body=body, follow_redirects=follow_redirects, report_activity=report_activity or self._report_activity, ) code = response.status if not follow_redirects and code in (301, 302, 303, 307, 308): redirected_to = response.redirected_to or url raise RedirectRequested( url, redirected_to, is_permanent=(code in (301, 308)), ) return response # ------------------------------------------------------------------ # has() / _post() — breezy-shape convenience wrappers. # `has` uses HEAD so the Rust Transport::has (which does GET) is # overridden to avoid pulling a response body just to check # existence. def has(self, relpath: str) -> bool: """Does the target location exist?""" response = self._head(relpath) return response.status == 200 def _post(self, relpath: str, body: bytes) -> tuple[int, bytes]: """POST `body` to `relpath` on this transport.""" return super()._post(relpath, body) # ------------------------------------------------------------------ # Historical breezy-facing helpers. These were public-ish API on # the pre-Rust urllib transport; breezy's own tests and a handful # of production code paths reach into them, so we keep them as # thin shims over the Rust readv / range-header logic. def _get( self, relpath: str, offsets: list | None, tail_amount: int = 0 ) -> tuple[int, IO[bytes]]: """Range-GET ``relpath`` returning ``(code, seekable_bytes)``. `offsets` is either `None` (fetch the whole file) or a list of `_CoalescedOffset` objects from `_coalesce_offsets`. The second element of the returned tuple is a `BytesIO` big enough that `.seek(abs_offset, SEEK_SET)` followed by `.read(length)` produces the bytes that were originally requested — i.e. a sparse file-in-memory. The Python urllib transport used to return a live HTTP body that handled the sparseness via content-range parsing. With the Rust readv machinery doing that work for us, we reconstitute the same sparse-file shape by dropping each range's data at its absolute offset in a BytesIO. """ if not offsets and not tail_amount: # Whole-file fetch. data = self._get_bytes_inner(relpath) return 200, BytesIO(data) # Expand the coalesced-offset structs into the (start, length) # pairs readv wants. _CoalescedOffset carries a `ranges` list # of (sub_offset, sub_length) pairs relative to `start`. pairs = [] if offsets: for coal in offsets: for sub_off, sub_len in coal.ranges: pairs.append((coal.start + sub_off, sub_len)) from dromedary.errors import ( InvalidHttpRange as _InvalidHttpRange, ) from dromedary.errors import ( ShortReadvError as _ShortReadvError, ) # Reject zero-length ranges as syntactically invalid — the # server-side form `bytes=start-end` with start > end is # rejected by any conforming HTTP server, and breezy's # test_syntactically_invalid_range_header counts on us # raising InvalidHttpRange locally rather than either # silently succeeding or letting the server surface the # failure with its own error shape. if any(length <= 0 for _, length in pairs): abspath = self._remote_path(relpath) raise _InvalidHttpRange( abspath, ",".join(f"{s}-{s + l - 1}" for s, l in pairs), "zero-length byte range", ) if tail_amount: # Need the total file length to compute absolute tail # offset; use the Rust stat / HEAD helper. resp = self._head(relpath) length = int(resp.getheader("content-length") or 0) pairs.append((max(length - tail_amount, 0), tail_amount)) # Compute an upper bound so the BytesIO is large enough to # seek into for each returned range. We size it to the # highest (offset + length) any caller will seek+read. highest = max(start + length for start, length in pairs) out = BytesIO(b"\0" * highest) try: for offset, chunk in self.readv(relpath, pairs): out.seek(offset) out.write(chunk) except _ShortReadvError as e: # Readv ran past the end of the file. At the `_get` API # layer this means the caller asked for an out-of-range # byte range — Python urllib raised InvalidHttpRange # here, matching what breezy's TestRanges expect. # Preserve the original ShortReadv context so callers # debugging a live failure still see where it came from. raise _InvalidHttpRange( self._remote_path(relpath), "bytes=%d-%d" % pairs[0] if pairs else "", str(e), ) from e out.seek(0) return 206, out def _get_bytes_inner(self, relpath: str) -> bytes: """Fetch the entire body of `relpath` as bytes. Separate helper so ``_get`` can call it without going through the Python ``get()`` wrapper that returns a file-like object. """ f = self.get(relpath) try: return f.read() finally: if hasattr(f, "close"): f.close() @staticmethod def _range_header(ranges: list, tail_amount: int) -> str: """Build an HTTP Range header value from coalesced offsets. Historical public-ish API — breezy's TestRangeHeader unit tests call this directly to verify the byte-range encoding. The Rust side does the same formatting internally inside `HttpTransport::attempted_range_header`; this Python staticmethod reimplements the simple case the tests need without going through a full HTTP round-trip. """ strings = [ "%d-%d" % (offset.start, offset.start + offset.length - 1) for offset in ranges ] if tail_amount: strings.append("-%d" % tail_amount) return ",".join(strings) # ------------------------------------------------------------------ # Breezy-facing redirect fix-up. The Rust side surfaces a 3xx as # RedirectRequested(source, target); breezy then calls this to # build a transport to retry the request against. def _redirected_to(self, source: str, target: str) -> "_mod_dromedary.AnyTransport": """Return a transport suitable to re-issue a redirected request. The redirect is only handled when the relpath involved wasn't renamed. Otherwise raises UnusableRedirect and the caller decides what to do. """ from dromedary import urlutils parsed_source = urlutils.URL.from_string(source) parsed_target = urlutils.URL.from_string(target) self_url = urlutils.URL.from_string(self.base) pl = len(self_url.path) excess_tail = parsed_source.path[pl:].strip("/") if not parsed_target.path.endswith(excess_tail): raise UnusableRedirect(source, target, "final part of the url was renamed") target_path = parsed_target.path if excess_tail: target_path = target_path[: -len(excess_tail)] unqualified_scheme = self._unqualified_scheme if parsed_target.scheme in ("http", "https"): if ( parsed_target.scheme == unqualified_scheme and parsed_target.host == self_url.host and parsed_target.port == self_url.port and (parsed_target.user is None or parsed_target.user == self_url.user) ): return self.clone(target_path) redir_scheme = parsed_target.scheme new_url = _unsplit_url( redir_scheme, self_url.user, self_url.password, parsed_target.host, parsed_target.port, target_path, ) else: new_url = _unsplit_url( parsed_target.scheme, parsed_target.user, parsed_target.password, parsed_target.host, parsed_target.port, target_path, ) return _mod_dromedary.get_transport_from_url(new_url) def _default_user_agent() -> str: """Return the current User-Agent the client should use.""" from dromedary.http import default_user_agent return default_user_agent() def _offset_from_base(parent_base: str, child_base: str) -> str | None: """Compute ``child_base`` relative to ``parent_base`` for clone(). Used when ``_from_transport`` is supplied — breezy passes the absolute ``child_base``, but the Rust ``clone(offset)`` expects an offset. Returns ``None`` if the two are identical. """ if parent_base == child_base: return None if child_base.startswith(parent_base): return child_base[len(parent_base) :] return child_base def _unsplit_url( scheme: str, user: str | None, password: str | None, host: str, port: int | None, path: str, ) -> str: """Build a URL from its components. Used by ``_redirected_to``.""" from urllib.parse import quote auth = "" if user: auth = quote(user, safe="") if password: auth += ":" + quote(password, safe="") auth += "@" netloc = auth + (host or "") if port is not None: netloc += f":{port}" return f"{scheme}://{netloc}{path}" def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations used by the per-transport test scenarios.""" from dromedary.tests import http_server permutations: list[tuple[type, type]] = [(HttpTransport, http_server.HttpServer)] import importlib.util if importlib.util.find_spec("ssl") is not None: from dromedary.tests import https_server, ssl_certs _ca_path = ssl_certs.build_path("ca.crt") class HTTPS_transport(HttpTransport): def __new__( cls, base: str, _from_transport: "HttpTransport | None" = None ) -> Self: return super().__new__( cls, base, _from_transport=_from_transport, ca_certs=_ca_path, ) def __init__( self, base: str, _from_transport: "HttpTransport | None" = None ) -> None: super().__init__( base, _from_transport=_from_transport, ca_certs=_ca_path, ) permutations.append((HTTPS_transport, https_server.HTTPSServer)) return permutations dromedary-0.1.5/dromedary/local.py000066400000000000000000000076471520150013200171400ustar00rootroot00000000000000# Copyright (C) 2005-2012, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Transport for the local filesystem. This is a fairly thin wrapper on regular file IO. """ import os from collections.abc import Callable from dromedary import errors, urlutils from dromedary.osutils import _win32_normpath, file_kind_from_stat_mode, pathjoin def file_stat( f: str, _lstat: Callable[[str], os.stat_result] = os.lstat ) -> os.stat_result: """Get stat information for a file. Args: f: Path to the file. _lstat: Function to use for stat (defaults to os.lstat). Returns: Stat result object. Raises: NoSuchFile: If the file doesn't exist. """ try: return _lstat(f) except (FileNotFoundError, NotADirectoryError) as err: raise errors.NoSuchFile(f) from err def file_kind(f: str, _lstat: Callable[[str], os.stat_result] = os.lstat) -> str: """Determine the kind of file (regular, directory, symlink, etc). Args: f: Path to the file. _lstat: Function to use for stat (defaults to os.lstat). Returns: String describing the file kind ('file', 'directory', 'symlink', etc). """ stat_value = file_stat(f, _lstat) return file_kind_from_stat_mode(stat_value.st_mode) from ._transport_rs.local import LocalTransport # type:ignore class EmulatedWin32LocalTransport(LocalTransport): # type:ignore """Special transport for testing Win32 [UNC] paths on non-windows.""" def __init__(self, base: str) -> None: """Initialize EmulatedWin32LocalTransport. Args: base: Base URL for the transport. """ if base[-1] != "/": base = base + "/" # The pyo3 LocalTransport base class only defines `__new__`; no # `__init__` to chain to. The base URL is already wired up by the # `__new__` call in `LocalTransport(base)`. self._local_base = urlutils._win32_local_path_from_url(base) def abspath(self, relpath: str) -> str: """Return the absolute URL for a relative path. Args: relpath: Relative path from the transport base. Returns: Absolute URL using Win32 path conventions. """ path = _win32_normpath(pathjoin(self._local_base, urlutils.unescape(relpath))) return urlutils._win32_local_path_to_url(path) def clone(self, offset: str | None = None) -> "EmulatedWin32LocalTransport": """Return a new LocalTransport with root at self.base + offset Because the local filesystem does not require a connection, we can just return a new object. """ if offset is None: return EmulatedWin32LocalTransport(self.base) else: abspath = self.abspath(offset) if abspath == "file://": # fix upwalk for UNC path # when clone from //HOST/path updir recursively # we should stop at least at //HOST part abspath = self.base return EmulatedWin32LocalTransport(abspath) def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" from dromedary.tests import test_server return [ (LocalTransport, test_server.LocalURLServer), ] dromedary-0.1.5/dromedary/log.py000066400000000000000000000022151520150013200166110ustar00rootroot00000000000000# Copyright (C) 2008, 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Transport decorator that logs transport operations.""" from dromedary._transport_rs.log import TransportLogDecorator __all__ = ["TransportLogDecorator", "get_test_permutations"] def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" from dromedary.tests import test_server return [(TransportLogDecorator, test_server.LogDecoratorServer)] dromedary-0.1.5/dromedary/memory.py000066400000000000000000000047751520150013200173550ustar00rootroot00000000000000# Copyright (C) 2005-2011, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Implementation of Transport that uses memory for its storage. The contents of the transport will be lost when the object is discarded, so this is primarily useful for testing. """ from dromedary import Server, register_transport, unregister_transport from dromedary._transport_rs.memory import MemoryStoreHandle from dromedary._transport_rs.memory import MemoryTransport as _RustMemoryTransport __all__ = ["MemoryServer", "MemoryTransport", "get_test_permutations"] class MemoryTransport(_RustMemoryTransport): """This is an in memory file system for transient data storage.""" class MemoryServer(Server): """Server for the MemoryTransport for testing with.""" def start_server(self) -> None: """Start the memory server by initializing storage and registering transport.""" self._store = MemoryStoreHandle() self._scheme = f"memory+{id(self)}:///" def memory_factory(url: str) -> MemoryTransport: return MemoryTransport(url, _shared_store=self._store) self._memory_factory = memory_factory register_transport(self._scheme, self._memory_factory) def stop_server(self) -> None: """Stop the server and unregister the transport.""" unregister_transport(self._scheme, self._memory_factory) def get_url(self) -> str: """See dromedary.Server.get_url.""" return self._scheme def get_bogus_url(self) -> str: """Get a URL for a non-existent location. Raises: NotImplementedError: This method is not implemented for memory transport. """ raise NotImplementedError def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" return [ (MemoryTransport, MemoryServer), ] dromedary-0.1.5/dromedary/osutils.py000066400000000000000000000256321520150013200175420ustar00rootroot00000000000000#!/usr/bin/env python3 # Copyright (C) 2005-2024 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Operating system utilities for dromedary transport layer.""" import errno import os import random import stat import string import sys from collections.abc import Callable from typing import IO if sys.platform != "win32": import fcntl else: fcntl = None # type: ignore[assignment] def pumpfile( from_file: IO[bytes], to_file: IO[bytes], read_length: int = -1, buff_size: int = 32768, ) -> int: """Copy bytes from from_file to to_file, optionally limiting total length. Args: from_file: File-like object to read from. to_file: File-like object to write to. read_length: Total number of bytes to copy. -1 (the default) means copy to EOF. buff_size: Size of each individual read. Returns: The total number of bytes copied. """ written = 0 if read_length is not None and read_length >= 0: # Read exactly read_length bytes total, in buff_size chunks. bytes_left = read_length while bytes_left > 0: chunk = from_file.read(min(buff_size, bytes_left)) if not chunk: break to_file.write(chunk) bytes_left -= len(chunk) written += len(chunk) else: while True: chunk = from_file.read(buff_size) if not chunk: break to_file.write(chunk) written += len(chunk) return written def pump_string_file( string: bytes | str, to_file: IO[bytes], segment_size: int = 8192 ) -> None: """Write a string to a file efficiently. Args: string: String or bytes to write to_file: File-like object to write to segment_size: Size of chunks to write """ if isinstance(string, str): string = string.encode("utf-8") offset = 0 while offset < len(string): segment = string[offset : offset + segment_size] to_file.write(segment) offset += len(segment) def fancy_rename( old: str, new: str, rename_func: Callable[[str, str], None], unlink_func: Callable[[str], None], ) -> None: """A fancy rename, when you don't have atomic rename. :param old: The old path, to rename from :param new: The new path, to rename to :param rename_func: The potentially non-atomic rename function :param unlink_func: A way to delete the target file if the full rename succeeds """ import time from dromedary.errors import NoSuchFile # sftp rename doesn't allow overwriting, so play tricks: base = os.path.basename(new) dirname = os.path.dirname(new) tmp_name = "tmp.%s.%.9f.%d.%s" % (base, time.time(), os.getpid(), rand_chars(10)) tmp_name = pathjoin(dirname, tmp_name) # Rename the file out of the way, but keep track if it didn't exist file_existed = False try: rename_func(new, tmp_name) except NoSuchFile: pass except (FileNotFoundError, NotADirectoryError): pass except OSError as e: # paramiko SFTP rename raises IOError with errno=None on failure. if e.errno not in (errno.ENOENT, errno.ENOTDIR, None): raise else: file_existed = True success = False try: rename_func(old, new) success = True except FileNotFoundError: # source and target may be aliases of each other on a # case-insensitive filesystem if file_existed and old.lower() == new.lower(): pass else: raise finally: if file_existed: if success: unlink_func(tmp_name) else: rename_func(tmp_name, new) def fdatasync(fileno: int | IO[bytes]) -> None: """Force data to be written to disk. Args: fileno: File descriptor or file object with fileno() method """ if hasattr(fileno, "fileno"): fileno = fileno.fileno() if hasattr(os, "fdatasync"): os.fdatasync(fileno) elif hasattr(os, "fsync"): os.fsync(fileno) # If neither is available, do nothing (some platforms don't support this) def file_kind_from_stat_mode(stat_mode: int) -> str: """Determine file type from stat mode bits. Args: stat_mode: Mode from os.stat() Returns: String describing file type: 'file', 'directory', 'symlink' """ if stat.S_ISREG(stat_mode): return "file" elif stat.S_ISDIR(stat_mode): return "directory" elif stat.S_ISLNK(stat_mode): return "symlink" elif stat.S_ISCHR(stat_mode): return "chardev" elif stat.S_ISBLK(stat_mode): return "block" elif stat.S_ISFIFO(stat_mode): return "fifo" elif stat.S_ISSOCK(stat_mode): return "socket" else: return "unknown" def set_fd_cloexec(fd: int | IO[bytes]) -> None: """Set the close-on-exec flag for a file descriptor. Args: fd: File descriptor or file object with fileno() method """ if hasattr(fd, "fileno"): fd = fd.fileno() if fcntl is not None and hasattr(fcntl, "FD_CLOEXEC"): flags = fcntl.fcntl(fd, fcntl.F_GETFD) fcntl.fcntl(fd, fcntl.F_SETFD, flags | fcntl.FD_CLOEXEC) def rand_chars(n: int) -> str: """Generate random characters. Args: n: Number of characters to generate Returns: String of random alphanumeric characters """ chars = string.ascii_letters + string.digits return "".join(random.choice(chars) for _ in range(n)) # noqa: S311 def splitpath(path: str) -> list[str]: """Split a path into components. Args: path: Path to split Returns: List of path components """ if not path or path == "/": return [] # Remove leading slash for consistent behavior if path.startswith("/"): path = path[1:] # Remove trailing slash if path.endswith("/"): path = path[:-1] if not path: return [] return path.split("/") def pathjoin(*args: str) -> str: """Join path components, handling various edge cases. Args: *args: Path components to join Returns: Joined path """ if not args: return "" # Filter out empty components components = [arg for arg in args if arg and arg != "."] if not components: return "" # Join with forward slashes (transport paths use forward slashes) result = "/".join(components) # Handle absolute paths if args[0].startswith("/"): result = "/" + result return result def get_terminal_encoding() -> str: """Get the terminal's character encoding. Returns: String name of encoding, defaults to 'utf-8' """ import locale # Try to get the terminal encoding encoding = None if hasattr(sys.stdout, "encoding") and sys.stdout.encoding: encoding = sys.stdout.encoding if not encoding: try: encoding = locale.getpreferredencoding() except Exception: pass if not encoding: encoding = "utf-8" # Safe default return encoding def getcwd() -> str: """Return the current working directory as a unicode string.""" return os.getcwd() def abspath(path: str) -> str: """Return the absolute version of a path. On Windows the returned path uses forward slashes so that callers can compare it directly against URL-derived paths (which are constructed with forward slashes throughout the codebase). """ result = os.path.abspath(path) if sys.platform == "win32": result = result.replace("\\", "/") return result def get_umask() -> int: """Return the current umask.""" umask = os.umask(0) os.umask(umask) return umask def supports_symlinks(path: str | None = None) -> bool: """Return True if the filesystem supports symlinks.""" return getattr(os, "symlink", None) is not None def get_user_encoding() -> str: """Return the encoding used for user-facing text.""" return get_terminal_encoding() def _posix_normpath(path: str) -> str: return os.path.normpath(path) normpath = os.path.normpath split = os.path.split MIN_ABS_PATHLENGTH = 3 if sys.platform == "win32" else 1 def _win32_abspath(path: str) -> str: return os.path.abspath(path).replace("\\", "/") def _win32_normpath(path: str) -> str: """Normalize a Windows path. This is used on Windows to normalize path separators and handle drive letters properly. Args: path: Path to normalize Returns: Normalized path string """ if sys.platform == "win32": # For UNC paths we do our own `..` collapse before delegating to # `ntpath.normpath`. Python treats the share as the UNC root and # refuses to walk above it (so `\\HOST\share\..` stays # `\\HOST\share\`), but `EmulatedWin32LocalTransport.clone` needs # `..` from the share to land at `\\HOST` so callers can keep # walking up to the host root. if path.startswith("//") or path.startswith("\\\\"): unified = path.replace("\\", "/") # Collapse runs of `/` (except the leading `//` that marks UNC) # so callers like `osutils.pathjoin("//HOST/", "..")` which emit # `///HOST//..` still parse the host correctly. squeezed = "//" for ch in unified[2:]: if not (ch == "/" and squeezed.endswith("/")): squeezed += ch unified = squeezed host_end = unified.find("/", 2) host_part = unified if host_end == -1 else unified[:host_end] tail = "" if host_end == -1 else unified[host_end + 1 :] stack = [] for segment in tail.split("/"): if segment == "..": if stack: stack.pop() # else: silently absorbed; can't go above the host elif segment and segment != ".": stack.append(segment) if not stack: return host_part return host_part + "/" + "/".join(stack) import os.path return os.path.normpath(path).replace("\\", "/") else: # On non-Windows, just return the path as-is return path dromedary-0.1.5/dromedary/pathfilter.py000066400000000000000000000052371520150013200202010ustar00rootroot00000000000000# Copyright (C) 2009, 2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """A transport decorator that filters all paths that are passed to it.""" from collections.abc import Callable from typing import TYPE_CHECKING from dromedary import Server, register_transport, unregister_transport from dromedary._transport_rs.pathfilter import PathFilteringTransport if TYPE_CHECKING: from dromedary import Transport __all__ = [ "PathFilteringServer", "PathFilteringTransport", "get_test_permutations", ] class PathFilteringServer(Server): """Transport server for PathFilteringTransport. It holds the backing_transport and filter_func for PathFilteringTransports. All paths will be passed through filter_func before calling into the backing_transport. """ def __init__( self, backing_transport: "Transport", filter_func: Callable[[str], str] | None, ) -> None: """Constructor. :param backing_transport: a transport :param filter_func: a callable that takes paths, and translates them into paths for use with the backing transport. """ self.backing_transport = backing_transport self.filter_func = filter_func def _factory(self, url: str) -> PathFilteringTransport: return PathFilteringTransport(self, url) def get_url(self) -> str: """Return the URL scheme for this server.""" return self.scheme def start_server(self) -> None: """Start the path filtering transport server.""" self.scheme = "filtered-%d:///" % id(self) register_transport(self.scheme, self._factory) def stop_server(self) -> None: """Stop the path filtering transport server.""" unregister_transport(self.scheme, self._factory) def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" from dromedary.tests import test_server return [(PathFilteringTransport, test_server.TestingPathFilteringServer)] dromedary-0.1.5/dromedary/py.typed000066400000000000000000000000001520150013200171430ustar00rootroot00000000000000dromedary-0.1.5/dromedary/readonly.py000066400000000000000000000023051520150013200176450ustar00rootroot00000000000000# Copyright (C) 2006, 2007, 2009, 2010, 2011, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Implementation of Transport that adapts another transport to be readonly.""" from dromedary._transport_rs.readonly import ReadonlyTransportDecorator __all__ = ["ReadonlyTransportDecorator", "get_test_permutations"] def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" from dromedary.tests import test_server return [(ReadonlyTransportDecorator, test_server.ReadonlyServer)] dromedary-0.1.5/dromedary/setup.py000066400000000000000000000035571520150013200172020ustar00rootroot00000000000000#!/usr/bin/env python3 """Installation script for dromedary. Dromedary is the transport layer abstraction extracted from Breezy. """ import os # Import version from version module import sys from setuptools import find_packages, setup sys.path.insert(0, os.path.dirname(__file__)) from version import version_string try: from setuptools_rust import Binding, RustExtension except ModuleNotFoundError: RustExtension = None # type: ignore[assignment,misc] rust_extensions = [] else: rust_extensions = [ RustExtension( "dromedary._transport_rs", "_transport_rs/Cargo.toml", binding=Binding.PyO3 ), ] with open("README.md", encoding="utf-8") as fh: long_description = fh.read() setup( name="dromedary", version=version_string, author="Breezy Team", author_email="team@breezy-vcs.org", description="Transport layer abstraction for version control systems", long_description=long_description, long_description_content_type="text/markdown", url="https://github.com/breezy-team/dromedary", packages=find_packages(), rust_extensions=rust_extensions, classifiers=[ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "License :: OSI Approved :: GNU General Public License v2 or later (GPLv2+)", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Topic :: Software Development :: Version Control", ], python_requires=">=3.8", install_requires=[], extras_require={ "sftp": ["paramiko"], "gio": ["pygobject"], }, zip_safe=False, ) dromedary-0.1.5/dromedary/sftp.py000066400000000000000000001172151520150013200170130ustar00rootroot00000000000000# Copyright (C) 2005-2011, 2016, 2017 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Implementation of Transport over SFTP, using paramiko.""" # TODO: Remove the transport-based lock_read and lock_write methods. They'll # then raise TransportNotPossible, which will break remote access to any # formats which rely on OS-level locks. That should be fine as those formats # are pretty old, but these combinations may have to be removed from the test # suite. Those formats all date back to 0.7; so we should be able to remove # these methods when we officially drop support for those formats. import bisect import errno import itertools import logging import os import random import stat import sys import time from collections.abc import Callable, Generator, Iterator from typing import IO, Literal, NoReturn, Protocol, cast from dromedary import ( ConnectedTransport, FileFileStream, _config, _file_streams, errors, ssh, urlutils, ) from dromedary.errors import ( DependencyNotPresent, FileExists, LockContention, NoSuchFile, PathError, ReadError, TransportNotPossible, ) from dromedary.osutils import fancy_rename, pumpfile from dromedary.ssh import SFTPClientProtocol logger = logging.getLogger("dromedary.sftp") debug_logger = logging.getLogger("dromedary.sftp") from ._transport_rs import sftp as _sftp_rs SFTPError = _sftp_rs.SFTPError class _BogusLock: """No-op read lock for SFTP (shared reads don't need locking).""" def __init__(self, path: str) -> None: self.path = path def unlock(self) -> None: pass def __exit__( self, exc_type: object, exc_val: object, exc_tb: object ) -> Literal[False]: return False def __enter__(self) -> None: pass class ParamikoNotPresent(DependencyNotPresent): """Paramiko library is not available. Raised when paramiko is required for SFTP support but is not installed or cannot be imported. """ _fmt = "Unable to import paramiko (required for sftp support): %(error)s" def __init__(self, error: "Exception | str") -> None: """Initialize with the import error that prevented loading paramiko.""" DependencyNotPresent.__init__(self, "paramiko", error) class _SFTPFile(Protocol): """Paramiko SFTPFile-like object: IO[bytes] extended with readv, stat, prefetch.""" def read(self, size: int = -1) -> bytes: ... def write(self, data: bytes) -> int: ... def close(self) -> None: ... def tell(self) -> int: ... def seek(self, offset: int, whence: int = 0) -> int: ... def __enter__(self) -> "IO[bytes]": ... def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None: ... def stat(self) -> "_sftp_rs.SFTPAttributes": ... def prefetch(self, file_size: int) -> None: ... def readv(self, chunks: list[tuple[int, int]]) -> Iterator[bytes]: ... class SFTPLock: """This fakes a lock in a remote location. A present lock is indicated just by the existence of a file. This doesn't work well on all transports and they are only used in deprecated storage formats. """ __slots__ = ["lock_file", "lock_path", "path", "transport"] def __init__(self, path: str, transport: "SFTPTransport") -> None: """Initialize SFTP lock. Args: path: Path to lock. transport: SFTP transport to use. Raises: LockContention: If the file is already locked. """ self.lock_file = None self.path = path self.lock_path = path + ".write-lock" self.transport = transport try: # RBC 20060103 FIXME should we be using private methods here ? abspath = transport._remote_path(self.lock_path) self.lock_file = transport._sftp_open_exclusive(abspath) except FileExists as err: raise LockContention(self.path) from err def unlock(self) -> None: """Release the lock by closing and deleting the lock file.""" if not self.lock_file: return self.lock_file.close() self.lock_file = None try: self.transport.delete(self.lock_path) except NoSuchFile: # What specific errors should we catch here? pass class _SFTPReadvHelper: """A class to help with managing the state of a readv request.""" def __init__( self, original_offsets: list[tuple[int, int]], relpath: str, _report_activity: Callable[[int, Literal["read", "write"]], None], ) -> None: """Create a new readv helper. :param original_offsets: The original requests given by the caller of readv() :param relpath: The name of the file (if known) :param _report_activity: A Transport._report_activity bound method, to be called as data arrives. """ self.original_offsets = list(original_offsets) self.relpath = relpath self._report_activity = _report_activity def _get_requests(self) -> list[tuple[int, int]]: """Break up the offsets into individual requests over sftp. The SFTP spec only requires implementers to support 32kB requests. We could try something larger (openssh supports 64kB), but then we have to handle requests that fail. So instead, we just break up our maximum chunks into 32kB chunks, and asyncronously requests them. Newer versions of paramiko would do the chunking for us, but we want to start processing results right away, so we do it ourselves. """ # TODO: Because we issue async requests, we don't 'fudge' any extra # data. I'm not 100% sure that is the best choice. # The first thing we do, is to collapse the individual requests as much # as possible, so we don't issues requests <32kB sorted_offsets = sorted(self.original_offsets) coalesced = list( ConnectedTransport._coalesce_offsets( sorted_offsets, limit=0, fudge_factor=0 ) ) requests = [(c_offset.start, c_offset.length) for c_offset in coalesced] debug_logger.debug( "SFTP.readv(%s) %s offsets => %s coalesced => %s requests", self.relpath, len(sorted_offsets), len(coalesced), len(requests), ) return requests def request_and_yield_offsets( self, fp: _SFTPFile ) -> Generator[tuple[int, bytes], None, None]: """Request the data from the remote machine, yielding the results. :param fp: A Paramiko SFTPFile object that supports readv. :return: Yield the data requested by the original readv caller, one by one. """ requests = self._get_requests() offset_iter = iter(self.original_offsets) cur_offset, cur_size = next(offset_iter) # paramiko .readv() yields strings that are in the order of the requests # So we track the current request to know where the next data is # being returned from. input_start: int | None = None last_end: int | None = None buffered_data: list[bytes] = [] buffered_len = 0 # This is used to buffer chunks which we couldn't process yet # It is (start, end, data) tuples. data_chunks: list[tuple[int, bytes]] = [] # Create an 'unlimited' data stream, so we stop based on requests, # rather than just because the data stream ended. This lets us detect # short readv. data_stream = itertools.chain(fp.readv(requests), itertools.repeat(None)) for (start, length), data in zip(requests, data_stream, strict=False): if data is None and cur_coalesced is not None: # type: ignore[name-defined] raise errors.ShortReadvError(self.relpath, start, length, 0) assert data is not None if len(data) != length: raise errors.ShortReadvError(self.relpath, start, length, len(data)) self._report_activity(length, "read") if last_end is None: # This is the first request, just buffer it buffered_data = [data] buffered_len = length input_start = start elif start == last_end: # The data we are reading fits neatly on the previous # buffer, so this is all part of a larger coalesced range. buffered_data.append(data) buffered_len += length else: # We have an 'interrupt' in the data stream. So we know we are # at a request boundary. if buffered_len > 0: # We haven't consumed the buffer so far, so put it into # data_chunks, and continue. buffered = b"".join(buffered_data) assert input_start is not None data_chunks.append((input_start, buffered)) input_start = start buffered_data = [data] buffered_len = length last_end = start + length if input_start == cur_offset and cur_size <= buffered_len: # Simplify the next steps a bit by transforming buffered_data # into a single string. We also have the nice property that # when there is only one string ''.join([x]) == x, so there is # no data copying. buffered = b"".join(buffered_data) # type: ignore[arg-type] # Clean out buffered data so that we keep memory # consumption low del buffered_data[:] buffered_offset = 0 # TODO: We *could* also consider the case where cur_offset is in # in the buffered range, even though it doesn't *start* # the buffered range. But for packs we pretty much always # read in order, so you won't get any extra data in the # middle. while ( input_start == cur_offset and (buffered_offset + cur_size) <= buffered_len ): # We've buffered enough data to process this request, spit it # out cur_data = buffered[buffered_offset : buffered_offset + cur_size] # move the direct pointer into our buffered data buffered_offset += cur_size # Move the start-of-buffer pointer input_start += cur_size # Yield the requested data yield cur_offset, cur_data try: cur_offset, cur_size = next(offset_iter) except StopIteration: return # at this point, we've consumed as much of buffered as we can, # so break off the portion that we consumed if buffered_offset == len(buffered_data): # No tail to leave behind buffered_data = [] buffered_len = 0 else: buffered = buffered[buffered_offset:] buffered_data = [buffered] buffered_len = len(buffered) # now that the data stream is done, close the handle fp.close() if buffered_len: buffered = b"".join(buffered_data) del buffered_data[:] assert input_start is not None data_chunks.append((input_start, buffered)) if data_chunks: debug_logger.debug( "SFTP readv left with %d out-of-order bytes", sum(len(x[1]) for x in data_chunks), ) # We've processed all the readv data, at this point, anything we # couldn't process is in data_chunks. This doesn't happen often, so # this code path isn't optimized # We use an interesting process for data_chunks # Specifically if we have "bisect_left([(start, len, entries)], # (qstart,)]) # If start == qstart, then we get the specific node. Otherwise we # get the previous node while True: idx = bisect.bisect_left(data_chunks, (cur_offset,)) if idx < len(data_chunks) and data_chunks[idx][0] == cur_offset: # The data starts here data = data_chunks[idx][1][:cur_size] elif idx > 0: # The data is in a portion of a previous page idx -= 1 sub_offset = cur_offset - data_chunks[idx][0] data = data_chunks[idx][1] data = data[sub_offset : sub_offset + cur_size] else: # We are missing the page where the data should be found, # something is wrong data = b"" if len(data) != cur_size: raise AssertionError( "We must have miscalulated." " We expected %d bytes, but only found %d" % (cur_size, len(data)) ) yield cur_offset, data try: cur_offset, cur_size = next(offset_iter) except StopIteration: return class SFTPTransport( ConnectedTransport[SFTPClientProtocol, tuple[str | None, str | None]] ): """Transport implementation for SFTP access.""" # TODO: jam 20060717 Conceivably these could be configurable, either # by auto-tuning at run-time, or by a configuration (per host??) # but the performance curve is pretty flat, so just going with # reasonable defaults. _max_readv_combine = 200 # Having to round trip to the server means waiting for a response, # so it is better to download extra bytes. # 8KiB had good performance for both local and remote network operations _bytes_to_read_before_seek = 8192 def _pump(self, infile: IO[bytes], outfile: IO[bytes]) -> int: return pumpfile(infile, outfile) def _remote_path(self, relpath: str) -> str: """Return the path to be passed along the sftp protocol for relpath. :param relpath: is a urlencoded string. """ remote_path = self._parsed_url.clone(relpath).path # the initial slash should be removed from the path, and treated as a # homedir relative path (the path begins with a double slash if it is # absolute). see draft-ietf-secsh-scp-sftp-ssh-uri-03.txt # RBC 20060118 we are not using this as its too user hostile. instead # we are following lftp and using /~/foo to mean '~/foo' # vila--20070602 and leave absolute paths begin with a single slash. if remote_path.startswith("/~/"): remote_path = remote_path[3:] elif remote_path == "/~": remote_path = "" return remote_path def _create_connection( self, credentials: str | None = None ) -> tuple[SFTPClientProtocol, tuple[str | None, str | None]]: """Create a new connection with the provided credentials. :param credentials: The credentials needed to establish the connection. :return: The created connection and its associated credentials. The credentials are only the password as it may have been entered interactively by the user and may be different from the one provided in base url at transport creation time. """ password = self._parsed_url.password if credentials is None else credentials vendor = ssh._get_ssh_vendor() user = self._parsed_url.user if user is None: user = _config.get_auth_user( "ssh", self._parsed_url.host, self._parsed_url.port ) connection = vendor.connect_sftp( self._parsed_url.user, password, self._parsed_url.host, self._parsed_url.port, ) return connection, (user, password) def disconnect(self) -> None: """Disconnect the current SFTP connection.""" connection = self._get_connection() if connection is not None: connection.close() def _get_sftp(self) -> SFTPClientProtocol: """Ensures that a connection is established.""" connection = self._get_connection() if connection is None: # First connection ever connection, credentials = self._create_connection() self._set_connection(connection, credentials) return connection def has(self, relpath: str) -> bool: """Does the target location exist?""" try: self._get_sftp().stat(self._remote_path(relpath)) # stat result is about 20 bytes, let's say self._report_activity(20, "read") return True except NoSuchFile: return False def get(self, relpath: str) -> IO[bytes]: """Get the file at the given relative path. :param relpath: The relative path to the file """ try: path = self._remote_path(relpath) f = cast("_SFTPFile", self._get_sftp().file(path, mode="rb")) size = f.stat().st_size if getattr(f, "prefetch", None) is not None and size is not None: f.prefetch(size) return cast("IO[bytes]", f) except (OSError, SFTPError) as e: self._translate_io_exception( e, path, ": error retrieving", failure_exc=ReadError ) def get_bytes(self, relpath: str) -> bytes: """Get the contents of a file as a byte string. Args: relpath: Path to the file relative to transport root. Returns: bytes: The file contents. """ # reimplement this here so that we can report how many bytes came back with self.get(relpath) as f: bytes = f.read() self._report_activity(len(bytes), "read") return bytes def _readv( self, relpath: str, offsets: list[tuple[int, int]] ) -> Iterator[tuple[int, bytes]]: """See Transport.readv().""" # We overload the default readv() because we want to use a file # that does not have prefetch enabled. # Also, if we have a new paramiko, it implements an async readv() if not offsets: return iter([]) try: path = self._remote_path(relpath) fp = cast("_SFTPFile", self._get_sftp().file(path, mode="rb")) readv = getattr(fp, "readv", None) if readv: return self._sftp_readv(fp, offsets, relpath) debug_logger.debug("seek and read %s offsets", len(offsets)) return self._seek_and_read(cast("IO[bytes]", fp), offsets, relpath) except (OSError, SFTPError) as e: self._translate_io_exception(e, path, ": error retrieving") def recommended_page_size(self) -> int: """See Transport.recommended_page_size(). For SFTP we suggest a large page size to reduce the overhead introduced by latency. """ return 64 * 1024 def _sftp_readv( self, fp: _SFTPFile, offsets: list[tuple[int, int]], relpath: str ) -> Generator[tuple[int, bytes], None, None]: """Use the readv() member of fp to do async readv. Then read them using paramiko.readv(). paramiko.readv() does not support ranges > 64K, so it caps the request size, and just reads until it gets all the stuff it wants. """ helper = _SFTPReadvHelper(offsets, relpath, self._report_activity) return helper.request_and_yield_offsets(fp) def put_file(self, relpath: str, f: IO[bytes], mode: int | None = None) -> int: """Copy the file-like object into the location. :param relpath: Location to put the contents, relative to base. :param f: File-like object. :param mode: The final mode for the file """ final_path = self._remote_path(relpath) return self._put(final_path, f, mode=mode) def _put(self, abspath: str, f: IO[bytes], mode: int | None = None) -> int: """Helper function so both put() and copy_abspaths can reuse the code.""" tmp_abspath = "%s.tmp.%.9f.%d.%d" % ( abspath, time.time(), os.getpid(), random.randint(0, 0x7FFFFFFF), # noqa: S311 ) fout = self._sftp_open_exclusive(tmp_abspath, mode=mode) closed = False try: try: length = self._pump(f, fout) except (OSError, SFTPError) as e: self._translate_io_exception(e, tmp_abspath) # XXX: This doesn't truly help like we would like it to. # The problem is that openssh strips sticky bits. So while we # can properly set group write permission, we lose the group # sticky bit. So it is probably best to stop chmodding, and # just tell users that they need to set the umask correctly. # The attr.st_mode = mode, in _sftp_open_exclusive # will handle when the user wants the final mode to be more # restrictive. And then we avoid a round trip. Unless # paramiko decides to expose an async chmod() # This is designed to chmod() right before we close. # Because we set_pipelined() earlier, theoretically we might # avoid the round trip for fout.close() if mode is not None: self._get_sftp().chmod(tmp_abspath, mode) fout.close() closed = True self._rename_and_overwrite(tmp_abspath, abspath) return length except Exception as e: # If we fail, try to clean up the temporary file # before we throw the exception # but don't let another exception mess things up # Write out the traceback, because otherwise # the catch and throw destroys it import traceback logger.debug("%s", traceback.format_exc()) try: if not closed: fout.close() self._get_sftp().remove(tmp_abspath) except BaseException: # raise the saved except raise e from None # raise the original with its traceback if we can. raise def _put_non_atomic_helper( self, relpath: str, writer: Callable[[IO[bytes]], None], mode: int | None = None, create_parent_dir: bool = False, dir_mode: int | None = None, ) -> None: abspath = self._remote_path(relpath) # TODO: jam 20060816 paramiko doesn't publicly expose a way to # set the file mode at create time. If it does, use it. # But for now, we just chmod later anyway. def _open_and_write_file() -> None: """Try to open the target file, raise error on failure.""" fout = None try: try: fout = self._get_sftp().file(abspath, mode="wb") writer(fout) except (SFTPError, OSError) as e: self._translate_io_exception(e, abspath, ": unable to open") # This is designed to chmod() right before we close. # Because we set_pipelined() earlier, theoretically we might # avoid the round trip for fout.close() if mode is not None: self._get_sftp().chmod(abspath, mode) finally: if fout is not None: fout.close() if not create_parent_dir: _open_and_write_file() return # Try error handling to create the parent directory if we need to try: _open_and_write_file() except NoSuchFile: # Try to create the parent directory, and then go back to # writing the file parent_dir = os.path.dirname(abspath) self._mkdir(parent_dir, dir_mode) _open_and_write_file() def put_file_non_atomic( self, relpath: str, f: IO[bytes], mode: int | None = None, create_parent_dir: bool = False, dir_mode: int | None = None, ) -> None: """Copy the file-like object into the target location. This function is not strictly safe to use. It is only meant to be used when you already know that the target does not exist. It is not safe, because it will open and truncate the remote file. So there may be a time when the file has invalid contents. :param relpath: The remote location to put the contents. :param f: File-like object. :param mode: Possible access permissions for new file. None means do not set remote permissions. :param create_parent_dir: If we cannot create the target file because the parent directory does not exist, go ahead and create it, and then try again. """ def writer(fout: IO[bytes]) -> None: self._pump(f, fout) self._put_non_atomic_helper( relpath, writer, mode=mode, create_parent_dir=create_parent_dir, dir_mode=dir_mode, ) def put_bytes_non_atomic( self, relpath: str, raw_bytes: bytes, mode: int | None = None, create_parent_dir: bool = False, dir_mode: int | None = None, ) -> None: """Write bytes to a file non-atomically. This is not safe if the target already exists as it will truncate it. Args: relpath: Path relative to transport root. raw_bytes: Bytes to write. mode: File permissions. create_parent_dir: Whether to create parent directory if needed. dir_mode: Permissions for created parent directories. Raises: TypeError: If raw_bytes is not bytes. """ if not isinstance(raw_bytes, bytes): raise TypeError(f"raw_bytes must be a plain string, not {type(raw_bytes)}") def writer(fout: IO[bytes]) -> None: fout.write(raw_bytes) self._put_non_atomic_helper( relpath, writer, mode=mode, create_parent_dir=create_parent_dir, dir_mode=dir_mode, ) def iter_files_recursive(self) -> Iterator[str]: """Walk the relative paths of all files in this transport.""" # progress is handled by list_dir queue = list(self.list_dir(".")) while queue: relpath = queue.pop(0) st = self.stat(relpath) if stat.S_ISDIR(st.st_mode): for i, basename in enumerate(self.list_dir(relpath)): queue.insert(i, relpath + "/" + basename) else: yield relpath def _mkdir(self, abspath: str, mode: int | None = None) -> None: local_mode = 511 if mode is None else mode try: self._report_activity(len(abspath), "write") self._get_sftp().mkdir(abspath, local_mode) self._report_activity(1, "read") if mode is not None: # chmod a dir through sftp will erase any sgid bit set # on the server side. So, if the bit mode are already # set, avoid the chmod. If the mode is not fine but # the sgid bit is set, report a warning to the user # with the umask fix. stat = self._get_sftp().lstat(abspath) assert stat.st_mode is not None mode = mode & 0o777 # can't set special bits anyway if mode != stat.st_mode & 0o777: if stat.st_mode & 0o6000: logger.warning( f"About to chmod {abspath} over sftp, which will result" " in its suid or sgid bits being cleared. If" " you want to preserve those bits, change your " f" environment on the server to use umask 0{0o777 - mode:03o}." ) self._get_sftp().chmod(abspath, mode=mode) except (SFTPError, OSError) as e: self._translate_io_exception( e, abspath, ": unable to mkdir", failure_exc=FileExists ) def mkdir(self, relpath: str, mode: int | None = None) -> None: """Create a directory at the given path.""" self._mkdir(self._remote_path(relpath), mode=mode) def open_write_stream( self, relpath: str, mode: int | None = None ) -> FileFileStream: """See Transport.open_write_stream.""" # initialise the file to zero-length # this is three round trips, but we don't use this # api more than once per write_group at the moment so # it is a tolerable overhead. Better would be to truncate # the file after opening. RBC 20070805 self.put_bytes_non_atomic(relpath, b"", mode) abspath = self._remote_path(relpath) # TODO: jam 20060816 paramiko doesn't publicly expose a way to # set the file mode at create time. If it does, use it. # But for now, we just chmod later anyway. handle: IO[bytes] | None = None try: handle = self._get_sftp().file(abspath, mode="wb") except (SFTPError, OSError) as e: self._translate_io_exception(e, abspath, ": unable to open") assert handle is not None _file_streams[self.abspath(relpath)] = handle return FileFileStream(self, relpath, handle) def _translate_io_exception( self, e: "OSError | SFTPError", path: str, more_info: str = "", failure_exc: type[PathError] = PathError, ) -> NoReturn: """Translate a paramiko or IOError into a friendlier exception. :param e: The original exception :param path: The path in question when the error is raised :param more_info: Extra information that can be included, such as what was going on :param failure_exc: Paramiko has the super fun ability to raise completely opaque errors that just set "e.args = ('Failure',)" with no more information. If this parameter is set, it defines the exception to raise in these cases. """ # paramiko seems to generate detailless errors. self._translate_error(e, path, raise_generic=False) # type: ignore[arg-type] if getattr(e, "args", None) is not None: if e.args == ("No such file or directory",) or e.args == ("No such file",): raise NoSuchFile(path, str(e) + more_info) if e.args == ("mkdir failed",) or e.args[0].startswith( "syserr: File exists" ): raise FileExists(path, str(e) + more_info) # strange but true, for the paramiko server. if e.args == ("Failure",): raise failure_exc(path, str(e) + more_info) # Can be something like args = ('Directory not empty: # '/srv/example.com/blah...: ' # [Errno 39] Directory not empty',) if ( e.args[0].startswith("Directory not empty: ") or getattr(e, "errno", None) == errno.ENOTEMPTY ): raise errors.DirectoryNotEmpty(path, str(e)) if e.args == ("Operation unsupported",): raise TransportNotPossible() logger.debug("Raising exception with args %s", e.args) errno_val = getattr(e, "errno", None) if errno_val is not None: logger.debug("Raising exception with errno %s", errno_val) raise e def append_file(self, relpath: str, f: IO[bytes], mode: int | None = None) -> int: """Append the text in the file-like object into the final location. """ try: path = self._remote_path(relpath) fout = self._get_sftp().file(path, "ab") if mode is not None: self._get_sftp().chmod(path, mode) result = fout.tell() self._pump(f, fout) return result except (OSError, SFTPError) as e: self._translate_io_exception(e, relpath, ": unable to append") def rename(self, rel_from: str, rel_to: str) -> None: """Rename without special overwriting.""" try: self._get_sftp().rename( self._remote_path(rel_from), self._remote_path(rel_to) ) except (OSError, SFTPError) as e: self._translate_io_exception( e, rel_from, f": unable to rename to {rel_to!r}" ) def _rename_and_overwrite(self, abs_from: str, abs_to: str) -> None: """Do a fancy rename on the remote server. Using the implementation provided by osutils. """ try: sftp = self._get_sftp() fancy_rename( abs_from, abs_to, rename_func=sftp.rename, unlink_func=sftp.remove ) except (OSError, SFTPError) as e: self._translate_io_exception( e, abs_from, f": unable to rename to {abs_to!r}" ) def move(self, rel_from: str, rel_to: str) -> None: """Move the item at rel_from to the location at rel_to.""" path_from = self._remote_path(rel_from) path_to = self._remote_path(rel_to) self._rename_and_overwrite(path_from, path_to) def delete(self, relpath: str) -> None: """Delete the item at relpath.""" path = self._remote_path(relpath) try: self._get_sftp().remove(path) except (OSError, SFTPError) as e: self._translate_io_exception(e, path, ": unable to delete") def external_url(self) -> str: """See dromedary.Transport.external_url.""" # the external path for SFTP is the base return self.base def listable(self) -> bool: """Return True if this store supports listing.""" return True def list_dir(self, relpath: str) -> list[str]: """Return a list of all files at the given location.""" # does anything actually use this? # -- Unknown # This is at least used by copy_tree for remote upgrades. # -- David Allouche 2006-08-11 path = self._remote_path(relpath) try: entries = self._get_sftp().listdir(path) self._report_activity(sum(map(len, entries)), "read") except (OSError, SFTPError) as e: self._translate_io_exception(e, path, ": failed to list_dir") return [urlutils.escape(entry) for entry in entries] def rmdir(self, relpath: str) -> None: """See Transport.rmdir.""" path = self._remote_path(relpath) try: return self._get_sftp().rmdir(path) except (OSError, SFTPError) as e: self._translate_io_exception(e, path, ": failed to rmdir") def stat(self, relpath: str) -> os.stat_result: """Return the stat information for a file.""" path = self._remote_path(relpath) try: return cast("os.stat_result", self._get_sftp().lstat(path)) except (OSError, SFTPError) as e: self._translate_io_exception(e, path, ": unable to stat") def readlink(self, relpath: str) -> str: """See Transport.readlink.""" path = self._remote_path(relpath) try: return self._get_sftp().readlink(self._remote_path(path)) except (OSError, SFTPError) as e: self._translate_io_exception(e, path, ": unable to readlink") def symlink(self, source: str, link_name: str) -> None: """See Transport.symlink.""" try: conn = self._get_sftp() conn.symlink(source, self._remote_path(link_name)) except (OSError, SFTPError) as e: self._translate_io_exception( e, link_name, f": unable to create symlink to {source!r}" ) def lock_read(self, relpath: str) -> _BogusLock: """Lock the given file for shared (read) access. :return: A lock object, which has an unlock() member function. """ # FIXME: there should be something clever i can do here... return _BogusLock(relpath) def lock_write(self, relpath: str) -> SFTPLock: """Lock the given file for exclusive (write) access. WARNING: many transports do not support this, so trying avoid using it. :return: A lock object, which has an unlock() member function """ # This is a little bit bogus, but basically, we create a file # which should not already exist, and if it does, we assume # that there is a lock, and if it doesn't, the we assume # that we have taken the lock. return SFTPLock(relpath, self) def _sftp_open_exclusive(self, abspath: str, mode: int | None = None) -> IO[bytes]: """Open a remote path exclusively. SFTP supports O_EXCL (SFTP_FLAG_EXCL), which fails if the file already exists. However it does not expose this at the higher level of SFTPClient.open(), so we have to sneak away with it. WARNING: This breaks the SFTPClient abstraction, so it could easily break against an updated version of paramiko. :param abspath: The remote absolute path where the file should be opened :param mode: The mode permissions bits for the new file """ attr = _sftp_rs.SFTPAttributes() if mode is not None: attr.st_mode = mode | stat.S_IFREG else: # Apply the local umask to the default 0o666 so file permissions # follow the same convention as ordinary file creation. from dromedary.osutils import get_umask attr.st_mode = stat.S_IFREG | (0o666 & ~get_umask()) try: return self._get_sftp().open( abspath, attr, write=True, create=True, excl=True, truncate=True ) except (SFTPError, OSError) as e: self._translate_io_exception( e, abspath, ": unable to open", failure_exc=FileExists ) def _can_roundtrip_unix_modebits(self) -> bool: return sys.platform != "win32" def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" import importlib.util if importlib.util.find_spec("paramiko") is None: raise ParamikoNotPresent("paramiko not installed") from dromedary.tests import stub_sftp return [ (SFTPTransport, stub_sftp.SFTPAbsoluteServer), (SFTPTransport, stub_sftp.SFTPHomeDirServer), (SFTPTransport, stub_sftp.SFTPSiblingAbsoluteServer), ] dromedary-0.1.5/dromedary/ssh/000077500000000000000000000000001520150013200162535ustar00rootroot00000000000000dromedary-0.1.5/dromedary/ssh/__init__.py000066400000000000000000000465741520150013200204040ustar00rootroot00000000000000# Copyright (C) 2006-2011 Robey Pointer # Copyright (C) 2005, 2006, 2007 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Foundation SSH support for SFTP and smart server.""" import errno import logging import os import socket from binascii import hexlify from typing import IO, TYPE_CHECKING, Any, NoReturn, Protocol if TYPE_CHECKING: import paramiko as paramiko_t from catalogus import registry from dromedary import _bedding as bedding from dromedary import _config, _ui, errors from dromedary.errors import SocketConnectionError from dromedary.osutils import pathjoin from .._transport_rs import sftp as _sftp_rs from .._transport_rs import ssh as _ssh_rs logger = logging.getLogger("dromedary.ssh") SFTPClient = _sftp_rs.SFTPClient class SFTPClientProtocol(Protocol): """Shared interface between dromedary (Rust) and paramiko SFTP clients.""" def close(self) -> None: ... def stat(self, path: str) -> "_sftp_rs.SFTPAttributes": ... def lstat(self, path: str) -> "_sftp_rs.SFTPAttributes": ... def listdir(self, path: str) -> list: ... def file(self, path: str, mode: str = "r") -> IO[bytes]: ... def open( self, path: str, attr: "_sftp_rs.SFTPAttributes", *, read: bool = False, write: bool = False, append: bool = False, create: bool = False, truncate: bool = False, excl: bool = False, ) -> IO[bytes]: ... def chmod(self, path: str, mode: int) -> None: ... def mkdir(self, path: str, mode: int = 0o777) -> None: ... def rmdir(self, path: str) -> None: ... def remove(self, path: str) -> None: ... def rename(self, oldpath: str, newpath: str) -> None: ... def readlink(self, path: str) -> str: ... def symlink(self, source: str, dest: str) -> None: ... try: import paramiko except ModuleNotFoundError: # If we have an ssh subprocess, we don't strictly need paramiko for all ssh # access paramiko = None # type: ignore class SSHVendorNotFound(errors.TransportError): """No SSH implementation available.""" _fmt = ( "Don't know how to handle SSH connections." " Please set BRZ_SSH environment variable." ) class UnknownSSH(errors.TransportError): """Unknown SSH implementation specified.""" _fmt = "Unrecognised value for BRZ_SSH environment variable: %(vendor)s" def __init__(self, vendor: str) -> None: """Initialize with the unrecognised vendor name.""" self.vendor = vendor errors.TransportError.__init__(self) class SSHVendorManager(registry.Registry[str, "SSHVendor", None]): """Manager for manage SSH vendors.""" def __init__(self) -> None: """Initialize the SSH vendor manager. Sets up the registry and initializes the vendor cache. """ super().__init__() self._cached_ssh_vendor: SSHVendor | None = None def clear_cache(self) -> None: """Clear previously cached lookup result.""" self._cached_ssh_vendor = None def _get_vendor_by_config(self) -> "SSHVendor | None": """Get SSH vendor based on configuration. Looks up the SSH vendor from the global configuration. If a vendor name is specified but not registered, attempts to use it as an executable path. Returns: SSHVendor: The configured SSH vendor, or None if not configured. Raises: UnknownSSH: If the configured vendor name is not found and cannot be used as an executable path. """ vendor_name = _config.get_ssh_vendor_name() if vendor_name is not None: try: vendor = self.get(vendor_name) except KeyError as err: vendor = self._get_vendor_from_path(vendor_name) if vendor is None: raise UnknownSSH(vendor_name) from err vendor.executable_path = vendor_name return vendor return None def _get_vendor_by_inspection(self) -> "SSHVendor | None": """Return the vendor or None by checking for known SSH implementations. Runs 'ssh -V' to determine the SSH implementation in use. Detection runs in Rust; this just maps the returned registry key back to a vendor instance. Returns: SSHVendor: The detected vendor, or None if not recognized. """ key = _ssh_rs.detect_ssh_vendor("ssh") if key is None: return None logger.debug("ssh implementation detected as %s", key) return self.get(key) def _get_vendor_from_path(self, path: str) -> "SSHVendor | None": """Return the vendor or None using the program at the given path. Runs the specified executable with '-V' to determine its type. Args: path: Path to the SSH executable. Returns: SSHVendor: The detected vendor, or None if not recognized. """ key = _ssh_rs.detect_ssh_vendor(path) if key is None: return None logger.debug("ssh implementation at %s detected as %s", path, key) return self.get(key) def get_vendor(self) -> "SSHVendor": """Find out what version of SSH is on the system. :raises SSHVendorNotFound: if no any SSH vendor is found :raises UnknownSSH: if the BRZ_SSH environment variable contains unknown vendor name """ if self._cached_ssh_vendor is None: vendor = self._get_vendor_by_config() if vendor is None: vendor = self._get_vendor_by_inspection() if vendor is None: logger.debug("falling back to default implementation") if self.default_key is None: raise SSHVendorNotFound() vendor = self.get() self._cached_ssh_vendor = vendor return self._cached_ssh_vendor _ssh_vendor_manager = SSHVendorManager() _get_ssh_vendor = _ssh_vendor_manager.get_vendor register_ssh_vendor = _ssh_vendor_manager.register register_lazy_ssh_vendor = _ssh_vendor_manager.register_lazy class SocketAsChannelAdapter: """Simple wrapper for a socket that pretends to be a paramiko Channel.""" def __init__(self, sock: socket.socket) -> None: """Initialize the adapter with a socket.""" self.__socket = sock def get_name(self) -> str: """Get the name of this channel adapter. Returns: str: A descriptive name for this adapter. """ return "bzr SocketAsChannelAdapter" def send(self, data: bytes) -> int: """Send data through the socket. Args: data: Bytes to send. Returns: int: Number of bytes sent. """ return self.__socket.send(data) def recv(self, n: int) -> bytes: """Receive data from the socket. Args: n: Maximum number of bytes to receive. Returns: bytes: Data received from the socket, or empty string if the connection is closed. Note: Returns empty string instead of raising an exception when the connection is closed, to match paramiko's expected behavior. """ try: return self.__socket.recv(n) except OSError as e: if e.args[0] in ( errno.EPIPE, errno.ECONNRESET, errno.ECONNABORTED, errno.EBADF, ): # Connection has closed. Paramiko expects an empty bytes in # this case, not an exception. return b"" raise def recv_ready(self) -> bool: """Check if data is available for reading. Returns: bool: Always returns True. Should ideally use poll() or select() to check for actual data availability. Note: This is a simplified implementation that always returns True. A proper implementation would check if data is actually available. """ # TODO: jam 20051215 this function is necessary to support the # pipelined() function. In reality, it probably should use # poll() or select() to actually return if there is data # available, otherwise we probably don't get any benefit return True def close(self) -> None: """Close the underlying socket.""" self.__socket.close() class SSHVendor: """Abstract base class for SSH vendor implementations.""" executable_path: str | None = None def connect_sftp( self, username: str, password: str | None, host: str, port: int | None ) -> SFTPClientProtocol: """Make an SSH connection, and return an SFTPClient. :param username: an ascii string :param password: an ascii string :param host: a host name as an ascii string :param port: a port number :type port: int :raises: ConnectionError if it cannot connect. :rtype: paramiko.sftp_client.SFTPClient """ raise NotImplementedError(self.connect_sftp) def connect_ssh( self, username: str, password: str | None, host: str, port: int | None, command: list[str], ) -> "SSHConnection": """Make an SSH connection. :returns: an SSHConnection. """ raise NotImplementedError(self.connect_ssh) def _raise_connection_error( self, host: str, port: int | None = None, orig_error: Exception | None = None, msg: str = "Unable to connect to SSH host", ) -> "NoReturn": """Raise a SocketConnectionError with properly formatted host. This just unifies all the locations that try to raise ConnectionError, so that they format things properly. Args: host: The hostname that failed to connect. port: The port number (optional). orig_error: The original exception that caused the connection failure. msg: Custom error message. Raises: SocketConnectionError: Always raises this error with the provided details. """ raise SocketConnectionError( host=host, port=port, msg=msg, orig_error=orig_error ) class LoopbackVendor(SSHVendor): """SSH "vendor" that connects over a plain TCP socket, not SSH.""" def connect_sftp( self, username: str, password: str | None, host: str, port: int | None ) -> SFTPClientProtocol: """Connect to an SFTP server using a plain TCP socket. This is a loopback implementation that bypasses SSH and connects directly via TCP. Useful for testing or local connections. Args: username: SSH username (ignored in loopback). password: SSH password (ignored in loopback). host: Hostname to connect to. port: Port number to connect to. Returns: SFTPClient: An SFTP client connected via TCP socket. Raises: SocketConnectionError: If connection fails. """ sock = socket.socket() try: sock.connect((host, port)) except OSError as e: self._raise_connection_error(host, port=port, orig_error=e) return SFTPClient(sock.detach()) register_ssh_vendor("loopback", LoopbackVendor()) # Rust-backed vendors. Registered lazily so the extension module is only # imported when one of these vendors is actually selected. register_lazy_ssh_vendor("russh", "dromedary.ssh.russh", "russh_vendor") register_lazy_ssh_vendor("openssh", "dromedary.ssh.subprocess_rs", "openssh_vendor") register_lazy_ssh_vendor("lsh", "dromedary.ssh.subprocess_rs", "lsh_vendor") register_lazy_ssh_vendor("plink", "dromedary.ssh.subprocess_rs", "plink_vendor") _ssh_vendor_manager.default_key = "russh" if paramiko is not None: register_lazy_ssh_vendor("paramiko", "dromedary.ssh.paramiko", "paramiko_vendor") register_lazy_ssh_vendor("none", "dromedary.ssh.paramiko", "paramiko_vendor") def _paramiko_auth( username: str | None, password: str | None, host: str, port: int | None, paramiko_transport: "paramiko_t.Transport", ) -> None: # paramiko requires a username, but it might be none if nothing was # supplied. If so, use the local username. if username is None: username = _config.get_auth_user("ssh", host, port=port) agent = paramiko.Agent() for key in agent.get_keys(): logger.debug("Trying SSH agent key %s", hexlify(key.get_fingerprint()).upper()) try: paramiko_transport.auth_publickey(username, key) return except paramiko.SSHException: pass # okay, try finding id_rsa or id_dss? (posix only) if _try_pkey_auth(paramiko_transport, paramiko.RSAKey, username, "id_rsa"): return # DSSKey was removed in paramiko 4.0.0 as DSA keys are deprecated if hasattr(paramiko, "DSSKey"): if _try_pkey_auth(paramiko_transport, paramiko.DSSKey, username, "id_dsa"): return # If we have gotten this far, we are about to try for passwords, do an # auth_none check to see if it is even supported. supported_auth_types = [] try: # Note that with paramiko <1.7.5 this logs an INFO message: # Authentication type (none) not permitted. # So we explicitly disable the logging level for this action old_level = paramiko_transport.logger.level paramiko_transport.logger.setLevel(logging.WARNING) try: paramiko_transport.auth_none(username) finally: paramiko_transport.logger.setLevel(old_level) except paramiko.BadAuthenticationType as e: # Supported methods are in the exception supported_auth_types = e.allowed_types except paramiko.SSHException: # Don't know what happened, but just ignore it pass # We treat 'keyboard-interactive' and 'password' auth methods identically, # because Paramiko's auth_password method will automatically try # 'keyboard-interactive' auth (using the password as the response) if # 'password' auth is not available. Apparently some Debian and Gentoo # OpenSSH servers require this. # XXX: It's possible for a server to require keyboard-interactive auth that # requires something other than a single password, but we currently don't # support that. if ( "password" not in supported_auth_types and "keyboard-interactive" not in supported_auth_types ): raise errors.ConnectionError( "Unable to authenticate to SSH host as" "\n {}@{}\nsupported auth types: {}".format( username, host, supported_auth_types ) ) if password: try: paramiko_transport.auth_password(username, password) return except paramiko.SSHException: pass # give up and ask for a password password = _config.get_auth_password("ssh", host, username, port=port) # get_password can still return None, which means we should not prompt if password is not None: try: paramiko_transport.auth_password(username, password) except paramiko.SSHException as e: raise errors.ConnectionError( "Unable to authenticate to SSH host as\n {}@{}\n".format( username, host ), e, ) from e else: raise errors.ConnectionError( "Unable to authenticate to SSH host as {}@{}".format(username, host) ) def _try_pkey_auth( paramiko_transport: "paramiko_t.Transport", pkey_class: "type[Any]", username: str, filename: str, ) -> bool: filename = os.path.expanduser("~/.ssh/" + filename) try: key = pkey_class.from_private_key_file(filename) paramiko_transport.auth_publickey(username, key) return True except paramiko.PasswordRequiredException: password = _ui.get_password( "SSH %(filename)s password", filename=os.fsdecode(filename) ) try: key = pkey_class.from_private_key_file(filename, password) paramiko_transport.auth_publickey(username, key) return True except paramiko.SSHException: logger.debug( "SSH authentication via %s key failed.", os.path.basename(filename), ) except paramiko.SSHException: logger.debug( "SSH authentication via %s key failed.", os.path.basename(filename) ) except OSError: pass return False SYSTEM_HOSTKEYS: "paramiko_t.HostKeys | dict" = {} BRZ_HOSTKEYS: "paramiko_t.HostKeys | dict" = {} def _ssh_host_keys_config_dir() -> str: return pathjoin(bedding.config_dir(), "ssh_host_keys") def load_host_keys() -> None: """Load system host keys (probably doesn't work on windows) and any "discovered" keys from previous sessions. """ global SYSTEM_HOSTKEYS, BRZ_HOSTKEYS try: SYSTEM_HOSTKEYS = paramiko.util.load_host_keys( os.path.expanduser("~/.ssh/known_hosts") ) except OSError as e: logger.debug("failed to load system host keys: %s", e) brz_hostkey_path = _ssh_host_keys_config_dir() try: BRZ_HOSTKEYS = paramiko.util.load_host_keys(brz_hostkey_path) except OSError as e: logger.debug("failed to load brz host keys: %s", e) save_host_keys() def save_host_keys() -> None: """Save "discovered" host keys in $(config)/ssh_host_keys/.""" global SYSTEM_HOSTKEYS, BRZ_HOSTKEYS bzr_hostkey_path = _ssh_host_keys_config_dir() bedding.ensure_config_dir_exists() try: with open(bzr_hostkey_path, "w") as f: f.write("# SSH host keys collected by bzr\n") for hostname, keys in BRZ_HOSTKEYS.items(): for keytype, key in keys.items(): f.write("{} {} {}\n".format(hostname, keytype, key.get_base64())) except OSError as e: logger.debug("failed to save bzr host keys: %s", e) class SSHConnection: """Abstract base class for SSH connections.""" def get_sock_or_pipes(self) -> tuple[str, object]: """Returns a (kind, io_object) pair. If kind == 'socket', then io_object is a socket. If kind == 'pipes', then io_object is a pair of file-like objects (read_from, write_to). Returns: tuple: A (kind, io_object) pair where: - kind is either 'socket' or 'pipes' - io_object is either a socket or (read_file, write_file) tuple """ raise NotImplementedError(self.get_sock_or_pipes) def close(self) -> None: """Close the SSH connection.""" raise NotImplementedError(self.close) dromedary-0.1.5/dromedary/ssh/paramiko.py000066400000000000000000000341301520150013200204310ustar00rootroot00000000000000# Copyright (C) 2006-2011 Robey Pointer # Copyright (C) 2005, 2006, 2007 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """SSH transport implementation using Paramiko library. This module provides SSH connection functionality using the Paramiko library for secure transport operations. It handles SSH authentication including agent keys, private key files, and password authentication, as well as host key verification and management. """ import getpass import logging import os from binascii import hexlify from typing import cast import paramiko from dromedary import _bedding, _config, _ui from dromedary.errors import TransportError from dromedary.osutils import pathjoin from dromedary.ssh import SFTPClientProtocol, SSHConnection, SSHVendor logger = logging.getLogger("dromedary.ssh.paramiko") SYSTEM_HOSTKEYS: paramiko.HostKeys = paramiko.HostKeys() BRZ_HOSTKEYS: paramiko.HostKeys = paramiko.HostKeys() def _paramiko_auth( username: str | None, password: str | None, host: str, port: int | None, paramiko_transport: paramiko.Transport, ) -> None: """Authenticate to an SSH server using paramiko. Attempts authentication in the following order: 1. SSH agent keys 2. Private key files (id_rsa, id_dsa) 3. Password authentication Args: username: SSH username, or None to use local username password: SSH password, or None for no password host: SSH hostname port: SSH port number paramiko_transport: Paramiko Transport object for the connection Raises: ConnectionError: If authentication fails or is not supported """ # paramiko requires a username, but it might be none if nothing was # supplied. If so, use the local username. if username is None: username = _config.get_auth_user( "ssh", host, port=port, default=getpass.getuser() ) agent = paramiko.Agent() for key in agent.get_keys(): logger.debug("Trying SSH agent key %s", hexlify(key.get_fingerprint()).upper()) try: paramiko_transport.auth_publickey(username, key) return except paramiko.SSHException: pass # okay, try finding id_rsa or id_dss? (posix only) if _try_pkey_auth(paramiko_transport, paramiko.RSAKey, username, "id_rsa"): return # DSSKey was removed in paramiko 4.0.0 as DSA keys are deprecated if hasattr(paramiko, "DSSKey"): if _try_pkey_auth(paramiko_transport, paramiko.DSSKey, username, "id_dsa"): return # If we have gotten this far, we are about to try for passwords, do an # auth_none check to see if it is even supported. supported_auth_types = [] try: # Note that with paramiko <1.7.5 this logs an INFO message: # Authentication type (none) not permitted. # So we explicitly disable the logging level for this action old_level = paramiko_transport.logger.level paramiko_transport.logger.setLevel(logging.WARNING) try: paramiko_transport.auth_none(username) finally: paramiko_transport.logger.setLevel(old_level) except paramiko.BadAuthenticationType as e: # Supported methods are in the exception supported_auth_types = e.allowed_types except paramiko.SSHException: # Don't know what happened, but just ignore it pass # We treat 'keyboard-interactive' and 'password' auth methods identically, # because Paramiko's auth_password method will automatically try # 'keyboard-interactive' auth (using the password as the response) if # 'password' auth is not available. Apparently some Debian and Gentoo # OpenSSH servers require this. # XXX: It's possible for a server to require keyboard-interactive auth that # requires something other than a single password, but we currently don't # support that. if ( "password" not in supported_auth_types and "keyboard-interactive" not in supported_auth_types ): raise ConnectionError( "Unable to authenticate to SSH host as" f"\n {username}@{host}\nsupported auth types: {supported_auth_types}" ) if password: try: paramiko_transport.auth_password(username, password) return except paramiko.SSHException: pass # give up and ask for a password password = _config.get_auth_password("ssh", host, username, port=port) # get_password can still return None, which means we should not prompt if password is not None: try: paramiko_transport.auth_password(username, password) except paramiko.SSHException as e: raise ConnectionError( f"Unable to authenticate to SSH host as\n {username}@{host}\n", e ) from e else: raise ConnectionError( f"Unable to authenticate to SSH host as {username}@{host}" ) def _try_pkey_auth( paramiko_transport: paramiko.Transport, pkey_class: "type[paramiko.PKey]", username: str, filename: str, ) -> bool: """Attempt public key authentication with a private key file. Args: paramiko_transport: Paramiko Transport object for the connection pkey_class: Paramiko private key class (e.g., RSAKey, DSSKey) username: SSH username for authentication filename: Name of the private key file (relative to ~/.ssh/) Returns: bool: True if authentication succeeded, False otherwise """ filename = os.path.expanduser("~/.ssh/" + filename) try: key = pkey_class.from_private_key_file(filename) paramiko_transport.auth_publickey(username, key) return True except paramiko.PasswordRequiredException: password = _ui.get_password( prompt="SSH %(filename)s password", filename=os.fsdecode(filename) ) try: key = pkey_class.from_private_key_file(filename, password) paramiko_transport.auth_publickey(username, key) return True except paramiko.SSHException: logger.debug( f"SSH authentication via {os.path.basename(filename)} key failed." ) except paramiko.SSHException: logger.debug( "SSH authentication via %s key failed.", os.path.basename(filename) ) except OSError: pass return False def _ssh_host_keys_config_dir() -> str: return pathjoin(_bedding.config_dir(), "ssh_host_keys") def load_host_keys() -> None: """Load system host keys (probably doesn't work on windows) and any "discovered" keys from previous sessions. """ global SYSTEM_HOSTKEYS, BRZ_HOSTKEYS try: SYSTEM_HOSTKEYS = paramiko.util.load_host_keys( os.path.expanduser("~/.ssh/known_hosts") ) except OSError as e: logger.debug("failed to load system host keys: %s", e) brz_hostkey_path = _ssh_host_keys_config_dir() try: BRZ_HOSTKEYS = paramiko.util.load_host_keys(brz_hostkey_path) except OSError as e: logger.debug("failed to load brz host keys: %s", e) save_host_keys() def save_host_keys() -> None: """Save "discovered" host keys in $(config)/ssh_host_keys/.""" global SYSTEM_HOSTKEYS, BRZ_HOSTKEYS bzr_hostkey_path = _ssh_host_keys_config_dir() _bedding.ensure_config_dir_exists() try: with open(bzr_hostkey_path, "w") as f: f.write("# SSH host keys collected by bzr\n") for hostname, keys in BRZ_HOSTKEYS.items(): for keytype, key in keys.items(): f.write(f"{hostname} {keytype} {key.get_base64()}\n") except OSError as e: logger.debug("failed to save bzr host keys: %s", e) class ParamikoVendor(SSHVendor): """Vendor that uses paramiko.""" def _hexify(self, s: bytes) -> str: """Convert a byte string to uppercase hexadecimal representation. Args: s: Byte string to convert Returns: str: Uppercase hexadecimal representation of the input """ return hexlify(s).upper().decode("ascii") def _connect( self, username: str | None, password: str | None, host: str, port: int | None ) -> paramiko.Transport: """Establish a low-level SSH connection using paramiko. Handles host key verification by checking against system known_hosts and breezy's stored host keys. New host keys are automatically stored. Args: username: SSH username password: SSH password or None host: SSH hostname port: SSH port number or None for default Returns: paramiko.Transport: Authenticated paramiko Transport object Raises: TransportError: If host key verification fails ConnectionError: If connection or authentication fails """ global SYSTEM_HOSTKEYS, BRZ_HOSTKEYS from dromedary.ssh.paramiko import ( _paramiko_auth, _ssh_host_keys_config_dir, load_host_keys, save_host_keys, ) load_host_keys() try: t = paramiko.Transport((host, port or 22)) t.set_log_channel("bzr.paramiko") t.start_client() except (paramiko.SSHException, OSError) as e: self._raise_connection_error(host, port=port, orig_error=e) server_key = t.get_remote_server_key() server_key_hex = self._hexify(server_key.get_fingerprint()) keytype = server_key.get_name() if host in SYSTEM_HOSTKEYS and keytype in SYSTEM_HOSTKEYS[host]: our_server_key = SYSTEM_HOSTKEYS[host][keytype] our_server_key_hex = self._hexify(our_server_key.get_fingerprint()) elif host in BRZ_HOSTKEYS and keytype in BRZ_HOSTKEYS[host]: our_server_key = BRZ_HOSTKEYS[host][keytype] our_server_key_hex = self._hexify(our_server_key.get_fingerprint()) else: logger.warning( "Adding %s host key for %s: %s", keytype, host, server_key_hex ) add = getattr(BRZ_HOSTKEYS, "add", None) if add is not None: # paramiko >= 1.X.X BRZ_HOSTKEYS.add(host, keytype, server_key) else: BRZ_HOSTKEYS.setdefault(host, {})[keytype] = server_key # type: ignore[arg-type] our_server_key = server_key our_server_key_hex = self._hexify(our_server_key.get_fingerprint()) save_host_keys() if server_key != our_server_key: filename1 = os.path.expanduser("~/.ssh/known_hosts") filename2 = _ssh_host_keys_config_dir() raise TransportError( f"Host keys for {host} do not match! {our_server_key_hex} != {server_key_hex}", [f"Try editing {filename1} or {filename2}"], ) _paramiko_auth(username, password, host, port, t) return t def connect_sftp( self, username: str, password: str | None, host: str, port: int | None ) -> SFTPClientProtocol: """Connect to an SFTP server using paramiko. Args: username: SSH username password: SSH password or None host: SSH hostname port: SSH port number or None for default Returns: paramiko.SFTPClient: Connected SFTP client object Raises: ConnectionError: If connection or SFTP client creation fails """ t = self._connect(username, password, host, port) try: sftp = t.open_sftp_client() assert sftp is not None return cast("SFTPClientProtocol", sftp) except paramiko.SSHException as e: self._raise_connection_error( host, port=port, orig_error=e, msg="Unable to start sftp client" ) def connect_ssh( self, username: str, password: str | None, host: str, port: int | None, command: list[str], ) -> "_ParamikoSSHConnection": """Connect to SSH server and execute a command. Args: username: SSH username password: SSH password or None host: SSH hostname port: SSH port number or None for default command: List of command arguments to execute Returns: _ParamikoSSHConnection: SSH connection object for command execution Raises: ConnectionError: If connection or command execution fails """ t = self._connect(username, password, host, port) try: channel = t.open_session() cmdline = " ".join(command) channel.exec_command(cmdline) return _ParamikoSSHConnection(channel) except paramiko.SSHException as e: self._raise_connection_error( host, port=port, orig_error=e, msg="Unable to invoke remote bzr" ) class _ParamikoSSHConnection(SSHConnection): """An SSH connection via paramiko.""" def __init__(self, channel: paramiko.Channel) -> None: self.channel = channel def get_sock_or_pipes(self) -> tuple[str, paramiko.Channel]: """Get socket or pipe information for the SSH connection. Returns: tuple: A tuple containing ("socket", channel) where channel is the paramiko Channel object """ return ("socket", self.channel) def close(self) -> None: self.channel.close() paramiko_vendor = ParamikoVendor() dromedary-0.1.5/dromedary/ssh/russh.py000066400000000000000000000047021520150013200177740ustar00rootroot00000000000000# Copyright (C) 2026 Jelmer Vernooij # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Pure-Rust SSH vendor backed by russh. Thin Python adapter over `dromedary._transport_rs.ssh.RusshVendor`, so it plugs into the existing `SSHVendor` registry alongside the paramiko and subprocess vendors. Once the migration is complete this module is what `default_key` points at. """ from dromedary.ssh import SFTPClientProtocol, SSHConnection, SSHVendor from .._transport_rs import ssh as _ssh_rs class _RusshSSHConnection(SSHConnection): """SSHConnection wrapping a `RusshSSHConnection` from the Rust layer.""" def __init__(self, inner: _ssh_rs.RusshSSHConnection) -> None: self._inner = inner def send(self, data: bytes) -> int: return self._inner.send(data) def recv(self, count: int) -> bytes: return self._inner.recv(count) def close(self) -> None: return self._inner.close() class RusshVendor(SSHVendor): """SSH vendor using the pure-Rust russh library.""" def __init__(self) -> None: """Construct a russh-backed SSH vendor.""" self._vendor = _ssh_rs.RusshVendor() def connect_sftp( self, username: str, password: str | None, host: str, port: int | None ) -> SFTPClientProtocol: """Open an SFTP session; returns an `_transport_rs.sftp.SFTPClient`.""" return self._vendor.connect_sftp(username, password, host, port) def connect_ssh( self, username: str, password: str | None, host: str, port: int | None, command: list[str], ) -> _RusshSSHConnection: """Execute `command` on the remote host over SSH.""" inner = self._vendor.connect_ssh(username, password, host, command, port) return _RusshSSHConnection(inner) russh_vendor = RusshVendor() dromedary-0.1.5/dromedary/ssh/subprocess_rs.py000066400000000000000000000056071520150013200215310ustar00rootroot00000000000000# Copyright (C) 2026 Jelmer Vernooij # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Rust-backed subprocess SSH vendors (OpenSSH, LSH, PLink). Thin Python adapters over `dromedary._transport_rs.ssh.{OpenSSH,LSH, PLink}SubprocessVendor`. These are the implementations registered for the `openssh`, `lsh`, and `plink` vendor names (the former pure-Python equivalents have been removed). """ from dromedary.ssh import SFTPClient, SFTPClientProtocol, SSHConnection, SSHVendor from .._transport_rs import ssh as _ssh_rs class _RustSubprocessSSHConnection(SSHConnection): """SSHConnection wrapping a Rust `SSHSubprocessConnection`.""" def __init__(self, inner: _ssh_rs.SSHSubprocessConnection) -> None: self._inner = inner def close(self) -> None: return self._inner.close() class _RustSubprocessVendor(SSHVendor): """Shared adapter logic for the three Rust subprocess vendors.""" def __init__( self, rust_vendor: "_ssh_rs.OpenSSHSubprocessVendor | _ssh_rs.LSHSubprocessVendor | _ssh_rs.PLinkSubprocessVendor", ) -> None: self._vendor = rust_vendor @property def executable_path(self) -> str | None: return self._vendor.executable_path @executable_path.setter def executable_path(self, value: str | None) -> None: # `SSHVendorManager._get_vendor_from_path` assigns this when # `BRZ_SSH=/path/to/ssh` is set, so the override has to reach the # Rust vendor's argv builder. self._vendor.executable_path = value def connect_sftp( self, username: str, password: str | None, host: str, port: int | None ) -> SFTPClientProtocol: fd = self._vendor.spawn_sftp(username, host, port) return SFTPClient(fd) def connect_ssh( self, username: str, password: str | None, host: str, port: int | None, command: list[str], ) -> _RustSubprocessSSHConnection: inner = self._vendor.connect_ssh(username, host, command, port) return _RustSubprocessSSHConnection(inner) openssh_vendor = _RustSubprocessVendor(_ssh_rs.OpenSSHSubprocessVendor()) lsh_vendor = _RustSubprocessVendor(_ssh_rs.LSHSubprocessVendor()) plink_vendor = _RustSubprocessVendor(_ssh_rs.PLinkSubprocessVendor()) dromedary-0.1.5/dromedary/tests/000077500000000000000000000000001520150013200166205ustar00rootroot00000000000000dromedary-0.1.5/dromedary/tests/__init__.py000066400000000000000000000264571520150013200207470ustar00rootroot00000000000000# Copyright (C) 2005-2012, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Tests for dromedary transport functionality.""" import io import logging import os import re import tempfile import unittest class TestNotApplicable( unittest.TestCase.skipException # type: ignore[misc] if hasattr(unittest.TestCase, "skipException") else unittest.SkipTest ): """Test is not applicable to the current situation.""" TestSkipped = unittest.SkipTest def _iter_test_cases(suite_or_case): """Yield individual TestCase leaves from a TestSuite or TestCase.""" if isinstance(suite_or_case, unittest.TestCase): yield suite_or_case return for child in suite_or_case: yield from _iter_test_cases(child) def multiply_tests(tests, scenarios, result): """Multiply tests by scenarios, adding them to result. Uses ``testscenarios.apply_scenarios`` to produce the per-scenario clones with properly-suffixed test ids. """ from testscenarios.scenarios import apply_scenarios for test in _iter_test_cases(tests): for scenario_test in apply_scenarios(scenarios, test): result.addTest(scenario_test) return result class Feature: """A feature that may or may not be available.""" def available(self): try: return self._probe() except Exception: return False def _probe(self): raise NotImplementedError class _Win32Feature(Feature): def _probe(self): import sys return sys.platform == "win32" class _ParamikoFeature(Feature): def _probe(self): import importlib.util return importlib.util.find_spec("paramiko") is not None win32_feature = _Win32Feature() paramiko = _ParamikoFeature() class _AssertHelpersMixin: """Extra assertion methods for dromedary tests.""" def assertStartsWith(self, s, prefix, msg=None): if not s.startswith(prefix): if msg is None: msg = f"{s!r} does not start with {prefix!r}" raise AssertionError(msg) def assertEndsWith(self, s, suffix, msg=None): if not s.endswith(suffix): if msg is None: msg = f"{s!r} does not end with {suffix!r}" raise AssertionError(msg) def assertLength(self, expected, container): if len(container) != expected: raise AssertionError( f"Expected length {expected}, got {len(container)}: {container!r}" ) def assertListRaises(self, exc_type, callable, *args, **kwargs): """Assert that fully consuming an iterator raises the given exception.""" try: list(callable(*args, **kwargs)) except exc_type: return raise AssertionError(f"{exc_type.__name__} not raised") def assertTransportMode(self, transport, path, mode): """Assert a file's mode bits via transport.stat().""" actual_mode = transport.stat(path).st_mode & 0o777 if actual_mode != mode: raise AssertionError( f"mode mismatch for {path!r}: expected {mode:o}, got {actual_mode:o}" ) def assertEqualDiff(self, expected, actual, msg=None): """Assert two values are equal; on failure print a diff.""" if expected == actual: return if isinstance(expected, bytes) and isinstance(actual, bytes): try: expected_text = expected.decode("utf-8") actual_text = actual.decode("utf-8") except UnicodeDecodeError: raise AssertionError( f"{msg + ': ' if msg else ''}{expected!r} != {actual!r}" ) from None else: expected_text = str(expected) actual_text = str(actual) import difflib diff = "\n".join( difflib.unified_diff( expected_text.splitlines(), actual_text.splitlines(), lineterm="", fromfile="expected", tofile="actual", ) ) raise AssertionError(f"{msg + chr(10) if msg else ''}values not equal:\n{diff}") def overrideAttr(self, obj, attr_name, new_value=None): """Temporarily replace an attribute, restoring it after the test.""" old_value = getattr(obj, attr_name) if new_value is not None: setattr(obj, attr_name, new_value) self.addCleanup(setattr, obj, attr_name, old_value) def recordCalls(self, obj, attr_name): """Replace a callable with a wrapper that records calls. Returns the list of call records. Restores the original after the test. """ calls = [] orig = getattr(obj, attr_name) def recorder(*args, **kwargs): calls.append((args, kwargs)) return orig(*args, **kwargs) setattr(obj, attr_name, recorder) self.addCleanup(setattr, obj, attr_name, orig) return calls class TestCase(_AssertHelpersMixin, unittest.TestCase): """Base test case for dromedary tests with extra assertion helpers.""" def setUp(self): super().setUp() self._log_stream = io.StringIO() self._log_handler = logging.StreamHandler(self._log_stream) self._log_handler.setFormatter(logging.Formatter("%(message)s")) dromedary_logger = logging.getLogger("dromedary") dromedary_logger.addHandler(self._log_handler) dromedary_logger.setLevel(logging.DEBUG) self.addCleanup(dromedary_logger.removeHandler, self._log_handler) def get_log(self): """Return captured log output.""" return self._log_stream.getvalue() def log(self, *args): """Append a message to the captured test log.""" if len(args) == 1: msg = args[0] else: msg = args[0] % args[1:] self._log_stream.write(str(msg) + "\n") def start_server(self, server): """Start a test server, registering cleanup to stop it.""" server.start_server() self.addCleanup(server.stop_server) def requireFeature(self, feature): """Skip test if feature is not available.""" if not feature.available(): raise unittest.SkipTest(f"Feature not available: {feature!r}") def assertContainsRe(self, haystack, needle, flags=0): """Assert that a string matches a regular expression.""" if not re.search(needle, haystack, flags): raise AssertionError(f"pattern {needle!r} not found in {haystack!r}") def overrideEnv(self, name, new_value): """Temporarily override an environment variable.""" old_value = os.environ.get(name) if new_value is None: os.environ.pop(name, None) else: os.environ[name] = new_value def restore(): if old_value is None: os.environ.pop(name, None) else: os.environ[name] = old_value self.addCleanup(restore) @staticmethod def _adjust_url(base, relpath): """Get a URL for the transport, adjusted by relpath.""" if relpath is not None and relpath != ".": if not base.endswith("/"): base = base + "/" if base.startswith("./") or base.startswith("/"): base += relpath else: from dromedary import urlutils base += urlutils.escape(relpath) return base class TestCaseInTempDir(TestCase): """A test case that runs in a temporary directory. Creates a fresh temporary directory before each test and changes into it. The original directory is restored and the temporary directory is cleaned up after each test. """ def setUp(self): super().setUp() self._original_dir = os.getcwd() # Resolve symlinks so `_tempdir` matches `os.getcwd()` after `chdir`. # On macOS `tempfile.mkdtemp` returns `/var/folders/...` but `getcwd` # then reports the realpath `/private/var/folders/...`, breaking # tests that compare paths through both code paths. self._tempdir = os.path.realpath(tempfile.mkdtemp(prefix="dromedary-test-")) os.chdir(self._tempdir) self.addCleanup(self._cleanup_tempdir) def _cleanup_tempdir(self): os.chdir(self._original_dir) import shutil shutil.rmtree(self._tempdir, ignore_errors=True) @property def test_dir(self): """The path to the temporary directory for this test.""" return self._tempdir def build_tree(self, shape, line_endings="binary"): """Build a test tree of local files and directories under cwd. shape is a sequence of file specifications. If the final character is '/', a directory is created. """ for name in shape: if name.endswith("/"): os.mkdir(name.rstrip("/")) else: if line_endings == "binary": end = b"\n" elif line_endings == "native": end = os.linesep.encode("ascii") else: raise ValueError(f"Invalid line ending request {line_endings!r}") content = b"contents of %s%s" % (name.encode("utf-8"), end) with open(name, "wb") as f: f.write(content) def build_tree_contents(self, entries): """Build a tree with explicit file contents under cwd.""" for name, content in entries: with open(name, "wb") as f: f.write(content) class TestCaseWithMemoryTransport(TestCase): """A test case that provides a memory transport. Provides get_transport() to obtain a memory transport for testing. """ def setUp(self): super().setUp() from dromedary.memory import MemoryServer self._memory_server = MemoryServer() self._memory_server.start_server() self.addCleanup(self._memory_server.stop_server) def get_transport(self, relpath=""): """Return a memory transport for testing.""" import dromedary base_url = self._memory_server.get_url() t = dromedary.get_transport_from_url(base_url) if relpath: t = t.clone(relpath) return t class TestCaseWithTransport(TestCaseInTempDir): """A test case that provides transport access to a temporary directory.""" def get_transport(self, relpath=""): """Return a local transport for the test's temporary directory.""" from dromedary import get_transport_from_path path = os.path.join(self._tempdir, relpath) if relpath else self._tempdir os.makedirs(path, exist_ok=True) return get_transport_from_path(path) dromedary-0.1.5/dromedary/tests/http_server.py000066400000000000000000000430531520150013200215440ustar00rootroot00000000000000# Copyright (C) 2006-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA import errno import http.client as http_client import http.server as http_server import os import posixpath import random import re from urllib.parse import urlparse from dromedary import osutils, urlutils from dromedary.tests import test_server class BadWebserverPath(ValueError): def __str__(self): return "path {} is not in {}".format(*self.args) class TestingHTTPRequestHandler(http_server.SimpleHTTPRequestHandler): """Handles one request. A TestingHTTPRequestHandler is instantiated for every request received by the associated server. Note that 'request' here is inherited from the base TCPServer class, for the HTTP server it is really a connection which itself will handle one or several HTTP requests. """ # Default protocol version protocol_version = "HTTP/1.1" # The Message-like class used to parse the request headers MessageClass = http_client.HTTPMessage def setup(self): http_server.SimpleHTTPRequestHandler.setup(self) self._cwd = self.server._home_dir tcs = self.server.test_case_server if tcs.protocol_version is not None: # If the test server forced a protocol version, use it self.protocol_version = tcs.protocol_version def log_message(self, format, *args): tcs = self.server.test_case_server tcs.log( 'webserver - %s - - [%s] %s "%s" "%s"', self.address_string(), self.log_date_time_string(), format % args, self.headers.get("referer", "-"), self.headers.get("user-agent", "-"), ) def handle_one_request(self): """Handle a single HTTP request. We catch all socket errors occurring when the client close the connection early to avoid polluting the test results. """ try: self._handle_one_request() except OSError as e: # Any socket error should close the connection, but some errors are # due to the client closing early and we don't want to pollute test # results, so we raise only the others. self.close_connection = 1 if len(e.args) == 0 or e.args[0] not in ( errno.EPIPE, errno.ECONNRESET, errno.ECONNABORTED, errno.EBADF, ): raise error_content_type = "text/plain" error_message_format = """\ Error code: %(code)s. Message: %(message)s. """ def send_error(self, code, message=None): """Send and log an error reply. We redefine the python-provided version to be able to set a ``Content-Length`` header as some http/1.1 clients complain otherwise (see bug #568421). :param code: The HTTP error code. :param message: The explanation of the error code, Defaults to a short entry. """ if message is None: try: message = self.responses[code][0] except KeyError: message = "???" self.log_error("code %d, message %s", code, message) content = self.error_message_format % {"code": code, "message": message} self.send_response(code, message) self.send_header("Content-Type", self.error_content_type) self.send_header("Content-Length", f"{len(content)}") self.send_header("Connection", "close") self.end_headers() if self.command != "HEAD" and code >= 200 and code not in (204, 304): self.wfile.write(content.encode("utf-8")) def _handle_one_request(self): http_server.SimpleHTTPRequestHandler.handle_one_request(self) _range_regexp = re.compile(r"^(?P\d+)-(?P\d+)?$") _tail_regexp = re.compile(r"^-(?P\d+)$") def _parse_ranges(self, ranges_header, file_size): """Parse the range header value and returns ranges. RFC2616 14.35 says that syntactically invalid range specifiers MUST be ignored. In that case, we return None instead of a range list. :param ranges_header: The 'Range' header value. :param file_size: The size of the requested file. :return: A list of (start, end) tuples or None if some invalid range specifier is encountered. """ if not ranges_header.startswith("bytes="): # Syntactically invalid header return None tail = None ranges = [] ranges_header = ranges_header[len("bytes=") :] for range_str in ranges_header.split(","): range_match = self._range_regexp.match(range_str) if range_match is not None: start = int(range_match.group("start")) end_match = range_match.group("end") if end_match is None: # RFC2616 says end is optional and default to file_size end = file_size else: end = int(end_match) if start > end: # Syntactically invalid range return None ranges.append((start, end)) else: tail_match = self._tail_regexp.match(range_str) if tail_match is not None: tail = int(tail_match.group("tail")) else: # Syntactically invalid range return None if tail is not None: # Normalize tail into ranges ranges.append((max(0, file_size - tail), file_size)) checked_ranges = [] for start, end in ranges: if start >= file_size: # RFC2616 14.35, ranges are invalid if start >= file_size return None # RFC2616 14.35, end values should be truncated # to file_size -1 if they exceed it end = min(end, file_size - 1) checked_ranges.append((start, end)) return checked_ranges def _header_line_length(self, keyword, value): header_line = f"{keyword}: {value}\r\n" return len(header_line) def send_range_content(self, file, start, length): file.seek(start) self.wfile.write(file.read(length)) def get_single_range(self, file, file_size, start, end): self.send_response(206) length = end - start + 1 self.send_header("Accept-Ranges", "bytes") self.send_header("Content-Length", "%d" % length) self.send_header("Content-Type", "application/octet-stream") self.send_header("Content-Range", "bytes %d-%d/%d" % (start, end, file_size)) self.end_headers() self.send_range_content(file, start, length) def get_multiple_ranges(self, file, file_size, ranges): self.send_response(206) self.send_header("Accept-Ranges", "bytes") boundary = "%d" % random.randint(0, 0x7FFFFFFF) # noqa: S311 self.send_header("Content-Type", f"multipart/byteranges; boundary={boundary}") boundary_line = b"--%s\r\n" % boundary.encode("ascii") # Calculate the Content-Length content_length = 0 for start, end in ranges: content_length += len(boundary_line) content_length += self._header_line_length( "Content-type", "application/octet-stream" ) content_length += self._header_line_length( "Content-Range", "bytes %d-%d/%d" % (start, end, file_size) ) content_length += len("\r\n") # end headers content_length += end - start + 1 content_length += len(boundary_line) self.send_header("Content-length", content_length) self.end_headers() # Send the multipart body for start, end in ranges: self.wfile.write(boundary_line) self.send_header("Content-type", "application/octet-stream") self.send_header( "Content-Range", "bytes %d-%d/%d" % (start, end, file_size) ) self.end_headers() self.send_range_content(file, start, end - start + 1) # Final boundary self.wfile.write(boundary_line) def do_GET(self): """Serve a GET request. Handles the Range header. """ # Update statistics self.server.test_case_server.GET_request_nb += 1 path = self.translate_path(self.path) ranges_header_value = self.headers.get("Range") if ranges_header_value is None or os.path.isdir(path): # Let the mother class handle most cases return http_server.SimpleHTTPRequestHandler.do_GET(self) try: # Always read in binary mode. Opening files in text # mode may cause newline translations, making the # actual size of the content transmitted *less* than # the content-length! f = open(path, "rb") except OSError: self.send_error(404, "File not found") return file_size = os.fstat(f.fileno())[6] ranges = self._parse_ranges(ranges_header_value, file_size) if not ranges: # RFC2616 14.16 and 14.35 says that when a server # encounters unsatisfiable range specifiers, it # SHOULD return a 416. f.close() # FIXME: We SHOULD send a Content-Range header too, # but the implementation of send_error does not # allows that. So far. self.send_error(416, "Requested range not satisfiable") return if len(ranges) == 1: (start, end) = ranges[0] self.get_single_range(f, file_size, start, end) else: self.get_multiple_ranges(f, file_size, ranges) f.close() def translate_path(self, path): """Translate a /-separated PATH to the local filename syntax. If the server requires it, proxy the path before the usual translation """ if self.server.test_case_server.proxy_requests: # We need to act as a proxy and accept absolute urls, # which SimpleHTTPRequestHandler (parent) is not # ready for. So we just drop the protocol://host:port # part in front of the request-url (because we know # we would not forward the request to *another* # proxy). # So we do what SimpleHTTPRequestHandler.translate_path # do beginning with python 2.4.3: abandon query # parameters, scheme, host port, etc (which ensure we # provide the right behaviour on all python versions). path = urlparse(path)[2] # And now, we can apply *our* trick to proxy files path += "-proxied" return self._translate_path(path) def _translate_path(self, path): """Translate a /-separated PATH to the local filename syntax. Note that we're translating http URLs here, not file URLs. The URL root location is the server's startup directory. Components that mean special things to the local file system (e.g. drive or directory names) are ignored. (XXX They should probably be diagnosed.) Override from python standard library to stop it calling os.getcwd() """ # abandon query parameters path = urlparse(path)[2] path = posixpath.normpath(urlutils.unquote(path)) words = path.split("/") path = self._cwd for num, word in enumerate(w for w in words if w): if num == 0: _drive, word = os.path.splitdrive(word) _head, word = os.path.split(word) if word in (os.curdir, os.pardir): continue path = os.path.join(path, word) return path class TestingHTTPServerMixin: def __init__(self, test_case_server): # test_case_server can be used to communicate between the # tests and the server (or the request handler and the # server), allowing dynamic behaviors to be defined from # the tests cases. self.test_case_server = test_case_server self._home_dir = test_case_server._home_dir class TestingHTTPServer(test_server.TestingTCPServer, TestingHTTPServerMixin): def __init__(self, server_address, request_handler_class, test_case_server): test_server.TestingTCPServer.__init__( self, server_address, request_handler_class ) TestingHTTPServerMixin.__init__(self, test_case_server) class TestingThreadingHTTPServer( test_server.TestingThreadingTCPServer, TestingHTTPServerMixin ): """A threading HTTP test server for HTTP 1.1. Since tests can initiate several concurrent connections to the same http server, we need an independent connection for each of them. We achieve that by spawning a new thread for each connection. """ def __init__(self, server_address, request_handler_class, test_case_server): test_server.TestingThreadingTCPServer.__init__( self, server_address, request_handler_class ) TestingHTTPServerMixin.__init__(self, test_case_server) class HttpServer(test_server.TestingTCPServerInAThread): """A test server for http transports. Subclasses can provide a specific request handler. """ # The real servers depending on the protocol http_server_class = { "HTTP/1.0": TestingHTTPServer, "HTTP/1.1": TestingThreadingHTTPServer, } # Whether or not we proxy the requests (see # TestingHTTPRequestHandler.translate_path). proxy_requests = False # used to form the url that connects to this server _url_protocol = "http" def __init__( self, request_handler=TestingHTTPRequestHandler, protocol_version=None ): """Constructor. :param request_handler: a class that will be instantiated to handle an http connection (one or several requests). :param protocol_version: if specified, will override the protocol version of the request handler. """ # Depending on the protocol version, we will create the approriate # server if protocol_version is None: # Use the request handler one proto_vers = request_handler.protocol_version else: # Use our own, it will be used to override the request handler # one too. proto_vers = protocol_version # Get the appropriate server class for the required protocol serv_cls = self.http_server_class.get(proto_vers, None) if serv_cls is None: raise http_client.UnknownProtocol(proto_vers) self.host = "localhost" self.port = 0 super().__init__((self.host, self.port), serv_cls, request_handler) self.protocol_version = proto_vers # Allows tests to verify number of GET requests issued self.GET_request_nb = 0 self._http_base_url = None self.logs = [] def create_server(self): return self.server_class( (self.host, self.port), self.request_handler_class, self ) def _get_remote_url(self, path): path_parts = path.split(os.path.sep) if os.path.isabs(path): if path_parts[: len(self._local_path_parts)] != self._local_path_parts: raise BadWebserverPath(path, self.test_dir) remote_path = "/".join(path_parts[len(self._local_path_parts) :]) else: remote_path = "/".join(path_parts) return self._http_base_url + remote_path def log(self, format, *args): """Capture Server log output.""" self.logs.append(format % args) def start_server(self, backing_transport_server=None): """See breezy.transport.Server.start_server. :param backing_transport_server: The transport that requests over this protocol should be forwarded to. Note that this is currently not supported for HTTP. """ # XXX: TODO: make the server back onto vfs_server rather than local # disk. if not ( backing_transport_server is None or isinstance(backing_transport_server, test_server.LocalURLServer) ): raise AssertionError( "HTTPServer currently assumes local transport, got {}".format( backing_transport_server ) ) self._home_dir = osutils.getcwd() self._local_path_parts = self._home_dir.split(os.path.sep) self.logs = [] super().start_server() self._http_base_url = f"{self._url_protocol}://{self.host}:{self.port}/" def get_url(self): """See breezy.transport.Server.get_url.""" return self._get_remote_url(self._home_dir) def get_bogus_url(self): """See breezy.transport.Server.get_bogus_url.""" # this is chosen to try to prevent trouble with proxies, weird dns, # etc return self._url_protocol + "://127.0.0.1:1/" dromedary-0.1.5/dromedary/tests/https_server.py000066400000000000000000000115751520150013200217330ustar00rootroot00000000000000# Copyright (C) 2007-2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """HTTPS test server, available when ssl python module is available.""" import ssl from . import http_server, ssl_certs, test_server class TestingHTTPSServerMixin: def __init__(self, key_file, cert_file): self.key_file = key_file self.cert_file = cert_file def _get_ssl_request(self, sock, addr): """Wrap the socket with SSL.""" ssl_context = ssl.SSLContext(ssl.PROTOCOL_TLS_SERVER) if self.cert_file: ssl_context.load_cert_chain(self.cert_file, self.key_file) ssl_sock = ssl_context.wrap_socket( sock=sock, server_side=True, do_handshake_on_connect=False ) return ssl_sock, addr def verify_request(self, request, client_address): """Verify the request. Return True if we should proceed with this request, False if we should not even touch a single byte in the socket ! """ serving = test_server.TestingTCPServerMixin.verify_request( self, request, client_address ) if serving: try: request.do_handshake() except ssl.SSLError: # FIXME: We proabaly want more tests to capture which ssl # errors are worth reporting but mostly our tests want an https # server that works -- vila 2012-01-19 return False return serving def ignored_exceptions_during_shutdown(self, e): base = test_server.TestingTCPServerMixin return base.ignored_exceptions_during_shutdown(self, e) class TestingHTTPSServer(TestingHTTPSServerMixin, http_server.TestingHTTPServer): def __init__( self, server_address, request_handler_class, test_case_server, key_file, cert_file, ): TestingHTTPSServerMixin.__init__(self, key_file, cert_file) http_server.TestingHTTPServer.__init__( self, server_address, request_handler_class, test_case_server ) def get_request(self): sock, addr = http_server.TestingHTTPServer.get_request(self) return self._get_ssl_request(sock, addr) class TestingThreadingHTTPSServer( TestingHTTPSServerMixin, http_server.TestingThreadingHTTPServer ): def __init__( self, server_address, request_handler_class, test_case_server, key_file, cert_file, ): TestingHTTPSServerMixin.__init__(self, key_file, cert_file) http_server.TestingThreadingHTTPServer.__init__( self, server_address, request_handler_class, test_case_server ) def get_request(self): sock, addr = http_server.TestingThreadingHTTPServer.get_request(self) return self._get_ssl_request(sock, addr) class HTTPSServer(http_server.HttpServer): _url_protocol = "https" # The real servers depending on the protocol http_server_class = { "HTTP/1.0": TestingHTTPSServer, # type: ignore "HTTP/1.1": TestingThreadingHTTPSServer, # type: ignore } # Provides usable defaults since an https server requires both a # private key and a certificate to work. def __init__( self, request_handler=http_server.TestingHTTPRequestHandler, protocol_version=None, key_file=ssl_certs.build_path("server_without_pass.key"), # noqa: B008 cert_file=ssl_certs.build_path("server.crt"), # noqa: B008 ): http_server.HttpServer.__init__( self, request_handler=request_handler, protocol_version=protocol_version ) self.key_file = key_file self.cert_file = cert_file self.temp_files = [] def create_server(self): return self.server_class( (self.host, self.port), self.request_handler_class, self, self.key_file, self.cert_file, ) class HTTPSServer_urllib(HTTPSServer): """Subclass of HTTPSServer that gives https+urllib urls. This is for use in testing: connections to this server will always go through urllib where possible. """ # urls returned by this server should require the urllib client impl _url_protocol = "https+urllib" dromedary-0.1.5/dromedary/tests/per_transport.py000066400000000000000000002053771520150013200221120ustar00rootroot00000000000000# Copyright (C) 2005-2011, 2015, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Tests for Transport implementations. Transport implementations tested here are supplied by TransportTestProviderAdapter. """ import contextlib import os import random import stat import sys from io import BytesIO from catalogus import pyutils import dromedary as _mod_transport from dromedary import ( ConnectedTransport, Transport, _get_transport_modules, errors, osutils, urlutils, ) from dromedary.errors import FileExists, NoSuchFile, PathError, TransportNotPossible from dromedary.memory import MemoryTransport from dromedary.osutils import getcwd from dromedary.tests import TestNotApplicable, TestSkipped, multiply_tests from .test_transport import TestTransportImplementation def get_transport_test_permutations(module): """Get the permutations module wants to have tested.""" if getattr(module, "get_test_permutations", None) is None: raise AssertionError( "transport module {} doesn't provide get_test_permutations()".format( module.__name__ ) ) return [] return module.get_test_permutations() def transport_test_permutations(): """Return a list of the klass, server_factory pairs to test.""" result = [] for module in _get_transport_modules(): try: permutations = get_transport_test_permutations( pyutils.get_named_object(module) ) for klass, server_factory in permutations: scenario = ( f"{klass.__name__},{server_factory.__name__}", {"transport_class": klass, "transport_server": server_factory}, ) result.append(scenario) except errors.DependencyNotPresent: # Continue even if a dependency prevents us # from adding this test pass return result def load_tests(loader, standard_tests, pattern): """Multiply tests for tranport implementations.""" result = loader.suiteClass() scenarios = transport_test_permutations() return multiply_tests(standard_tests, scenarios, result) class TransportTests(TestTransportImplementation): def setUp(self): super().setUp() self.overrideEnv("BRZ_NO_SMART_VFS", None) def check_transport_contents(self, content, transport, relpath): """Check that transport.get_bytes(relpath) == content.""" self.assertEqualDiff(content, transport.get_bytes(relpath)) def test_ensure_base_missing(self): """.ensure_base() should create the directory if it doesn't exist.""" t = self.get_transport() t_a = t.clone("a") self.assertFalse(t.ensure_base()) if t_a.is_readonly(): self.assertRaises(TransportNotPossible, t_a.ensure_base) return self.assertTrue(t_a.ensure_base()) self.assertTrue(t.has("a")) def test_ensure_base_exists(self): """.ensure_base() should just be happy if it already exists.""" t = self.get_transport() if t.is_readonly(): return t.mkdir("a") t_a = t.clone("a") # ensure_base returns False if it didn't create the base self.assertFalse(t_a.ensure_base()) def test_ensure_base_missing_parent(self): """.ensure_base() will fail if the parent dir doesn't exist.""" t = self.get_transport() if t.is_readonly(): return t_a = t.clone("a") t_b = t_a.clone("b") self.assertRaises(NoSuchFile, t_b.ensure_base) def test_external_url(self): """.external_url either works or raises InProcessTransport.""" t = self.get_transport() with contextlib.suppress(errors.InProcessTransport): t.external_url() def test_has(self): t = self.get_transport() files = ["a", "b", "e", "g", "%"] self.build_tree(files, transport=t) self.assertEqual(True, t.has("a")) self.assertEqual(False, t.has("c")) self.assertEqual(True, t.has(urlutils.escape("%"))) self.assertEqual(True, t.has_any(["a", "b", "c"])) self.assertEqual(False, t.has_any(["c", "d", "f", urlutils.escape("%%")])) self.assertEqual(False, t.has_any(["c", "c", "c"])) self.assertEqual(True, t.has_any(["b", "b", "b"])) def test_get(self): t = self.get_transport() content = b"contents of a\n" self.build_tree(["a"], transport=t, line_endings="binary") self.check_transport_contents(b"contents of a\n", t, "a") f = t.get("a") self.assertEqual(content, f.read()) def test_get_unknown_file(self): t = self.get_transport() files = ["a", "b"] self.build_tree(files, transport=t, line_endings="binary") self.assertRaises(NoSuchFile, t.get, "c") def iterate_and_close(func, *args): for f in func(*args): # We call f.read() here because things like paramiko actually # spawn a thread to prefetch the content, which we want to # consume before we close the handle. f.read() f.close() def test_get_directory_read_gives_ReadError(self): """Consistent errors for read() on a file returned by get().""" t = self.get_transport() if t.is_readonly(): self.build_tree(["a directory/"]) else: t.mkdir("a%20directory") # getting the file must either work or fail with a PathError try: a_file = t.get("a%20directory") except (errors.PathError, errors.RedirectRequested): # early failure return immediately. return # having got a file, read() must either work (i.e. http reading a dir # listing) or fail with ReadError with contextlib.suppress(errors.ReadError): a_file.read() def test_get_bytes(self): t = self.get_transport() files = ["a", "b", "e", "g"] contents = [ b"contents of a\n", b"contents of b\n", b"contents of e\n", b"contents of g\n", ] self.build_tree(files, transport=t, line_endings="binary") self.check_transport_contents(b"contents of a\n", t, "a") for content, fname in zip(contents, files, strict=False): self.assertEqual(content, t.get_bytes(fname)) def test_get_bytes_unknown_file(self): t = self.get_transport() self.assertRaises(NoSuchFile, t.get_bytes, "c") def test_get_bytes_with_open_write_stream_sees_all_content(self): t = self.get_transport() if t.is_readonly(): return with t.open_write_stream("foo") as handle: handle.write(b"b") self.assertEqual(b"b", t.get_bytes("foo")) with t.get("foo") as f: self.assertEqual(b"b", f.read()) def test_put_bytes(self): t = self.get_transport() if t.is_readonly(): self.assertRaises( TransportNotPossible, t.put_bytes, "a", b"some text for a\n" ) return t.put_bytes("a", b"some text for a\n") self.assertTrue(t.has("a")) self.check_transport_contents(b"some text for a\n", t, "a") # The contents should be overwritten t.put_bytes("a", b"new text for a\n") self.check_transport_contents(b"new text for a\n", t, "a") self.assertRaises(NoSuchFile, t.put_bytes, "path/doesnt/exist/c", b"contents") def test_put_bytes_non_atomic(self): t = self.get_transport() if t.is_readonly(): self.assertRaises( TransportNotPossible, t.put_bytes_non_atomic, "a", b"some text for a\n" ) return self.assertFalse(t.has("a")) t.put_bytes_non_atomic("a", b"some text for a\n") self.assertTrue(t.has("a")) self.check_transport_contents(b"some text for a\n", t, "a") # Put also replaces contents t.put_bytes_non_atomic("a", b"new\ncontents for\na\n") self.check_transport_contents(b"new\ncontents for\na\n", t, "a") # Make sure we can create another file t.put_bytes_non_atomic("d", b"contents for\nd\n") # And overwrite 'a' with empty contents t.put_bytes_non_atomic("a", b"") self.check_transport_contents(b"contents for\nd\n", t, "d") self.check_transport_contents(b"", t, "a") self.assertRaises( NoSuchFile, t.put_bytes_non_atomic, "no/such/path", b"contents\n" ) # Now test the create_parent flag self.assertRaises(NoSuchFile, t.put_bytes_non_atomic, "dir/a", b"contents\n") self.assertFalse(t.has("dir/a")) t.put_bytes_non_atomic("dir/a", b"contents for dir/a\n", create_parent_dir=True) self.check_transport_contents(b"contents for dir/a\n", t, "dir/a") # But we still get NoSuchFile if we can't make the parent dir self.assertRaises( NoSuchFile, t.put_bytes_non_atomic, "not/there/a", b"contents\n", create_parent_dir=True, ) def test_put_bytes_permissions(self): t = self.get_transport() if t.is_readonly(): return if not t._can_roundtrip_unix_modebits(): # Can't roundtrip, so no need to run this test return t.put_bytes("mode644", b"test text\n", mode=0o644) self.assertTransportMode(t, "mode644", 0o644) t.put_bytes("mode666", b"test text\n", mode=0o666) self.assertTransportMode(t, "mode666", 0o666) t.put_bytes("mode600", b"test text\n", mode=0o600) self.assertTransportMode(t, "mode600", 0o600) # Yes, you can put_bytes a file such that it becomes readonly t.put_bytes("mode400", b"test text\n", mode=0o400) self.assertTransportMode(t, "mode400", 0o400) # The default permissions should be based on the current umask umask = osutils.get_umask() t.put_bytes("nomode", b"test text\n", mode=None) self.assertTransportMode(t, "nomode", 0o666 & ~umask) def test_put_bytes_non_atomic_permissions(self): t = self.get_transport() if t.is_readonly(): return if not t._can_roundtrip_unix_modebits(): # Can't roundtrip, so no need to run this test return t.put_bytes_non_atomic("mode644", b"test text\n", mode=0o644) self.assertTransportMode(t, "mode644", 0o644) t.put_bytes_non_atomic("mode666", b"test text\n", mode=0o666) self.assertTransportMode(t, "mode666", 0o666) t.put_bytes_non_atomic("mode600", b"test text\n", mode=0o600) self.assertTransportMode(t, "mode600", 0o600) t.put_bytes_non_atomic("mode400", b"test text\n", mode=0o400) self.assertTransportMode(t, "mode400", 0o400) # The default permissions should be based on the current umask umask = osutils.get_umask() t.put_bytes_non_atomic("nomode", b"test text\n", mode=None) self.assertTransportMode(t, "nomode", 0o666 & ~umask) # We should also be able to set the mode for a parent directory # when it is created t.put_bytes_non_atomic( "dir700/mode664", b"test text\n", mode=0o664, dir_mode=0o700, create_parent_dir=True, ) self.assertTransportMode(t, "dir700", 0o700) t.put_bytes_non_atomic( "dir770/mode664", b"test text\n", mode=0o664, dir_mode=0o770, create_parent_dir=True, ) self.assertTransportMode(t, "dir770", 0o770) t.put_bytes_non_atomic( "dir777/mode664", b"test text\n", mode=0o664, dir_mode=0o777, create_parent_dir=True, ) self.assertTransportMode(t, "dir777", 0o777) def test_put_file(self): t = self.get_transport() if t.is_readonly(): self.assertRaises( TransportNotPossible, t.put_file, "a", BytesIO(b"some text for a\n") ) return result = t.put_file("a", BytesIO(b"some text for a\n")) # put_file returns the length of the data written self.assertEqual(16, result) self.assertTrue(t.has("a")) self.check_transport_contents(b"some text for a\n", t, "a") # Put also replaces contents result = t.put_file("a", BytesIO(b"new\ncontents for\na\n")) self.assertEqual(19, result) self.check_transport_contents(b"new\ncontents for\na\n", t, "a") self.assertRaises( NoSuchFile, t.put_file, "path/doesnt/exist/c", BytesIO(b"contents") ) def test_put_file_non_atomic(self): t = self.get_transport() if t.is_readonly(): self.assertRaises( TransportNotPossible, t.put_file_non_atomic, "a", BytesIO(b"some text for a\n"), ) return self.assertFalse(t.has("a")) t.put_file_non_atomic("a", BytesIO(b"some text for a\n")) self.assertTrue(t.has("a")) self.check_transport_contents(b"some text for a\n", t, "a") # Put also replaces contents t.put_file_non_atomic("a", BytesIO(b"new\ncontents for\na\n")) self.check_transport_contents(b"new\ncontents for\na\n", t, "a") # Make sure we can create another file t.put_file_non_atomic("d", BytesIO(b"contents for\nd\n")) # And overwrite 'a' with empty contents t.put_file_non_atomic("a", BytesIO(b"")) self.check_transport_contents(b"contents for\nd\n", t, "d") self.check_transport_contents(b"", t, "a") self.assertRaises( NoSuchFile, t.put_file_non_atomic, "no/such/path", BytesIO(b"contents\n") ) # Now test the create_parent flag self.assertRaises( NoSuchFile, t.put_file_non_atomic, "dir/a", BytesIO(b"contents\n") ) self.assertFalse(t.has("dir/a")) t.put_file_non_atomic( "dir/a", BytesIO(b"contents for dir/a\n"), create_parent_dir=True ) self.check_transport_contents(b"contents for dir/a\n", t, "dir/a") # But we still get NoSuchFile if we can't make the parent dir self.assertRaises( NoSuchFile, t.put_file_non_atomic, "not/there/a", BytesIO(b"contents\n"), create_parent_dir=True, ) def test_put_file_permissions(self): t = self.get_transport() if t.is_readonly(): return if not t._can_roundtrip_unix_modebits(): # Can't roundtrip, so no need to run this test return t.put_file("mode644", BytesIO(b"test text\n"), mode=0o644) self.assertTransportMode(t, "mode644", 0o644) t.put_file("mode666", BytesIO(b"test text\n"), mode=0o666) self.assertTransportMode(t, "mode666", 0o666) t.put_file("mode600", BytesIO(b"test text\n"), mode=0o600) self.assertTransportMode(t, "mode600", 0o600) # Yes, you can put a file such that it becomes readonly t.put_file("mode400", BytesIO(b"test text\n"), mode=0o400) self.assertTransportMode(t, "mode400", 0o400) # The default permissions should be based on the current umask umask = osutils.get_umask() t.put_file("nomode", BytesIO(b"test text\n"), mode=None) self.assertTransportMode(t, "nomode", 0o666 & ~umask) def test_put_file_non_atomic_permissions(self): t = self.get_transport() if t.is_readonly(): return if not t._can_roundtrip_unix_modebits(): # Can't roundtrip, so no need to run this test return t.put_file_non_atomic("mode644", BytesIO(b"test text\n"), mode=0o644) self.assertTransportMode(t, "mode644", 0o644) t.put_file_non_atomic("mode666", BytesIO(b"test text\n"), mode=0o666) self.assertTransportMode(t, "mode666", 0o666) t.put_file_non_atomic("mode600", BytesIO(b"test text\n"), mode=0o600) self.assertTransportMode(t, "mode600", 0o600) # Yes, you can put_file_non_atomic a file such that it becomes readonly t.put_file_non_atomic("mode400", BytesIO(b"test text\n"), mode=0o400) self.assertTransportMode(t, "mode400", 0o400) # The default permissions should be based on the current umask umask = osutils.get_umask() t.put_file_non_atomic("nomode", BytesIO(b"test text\n"), mode=None) self.assertTransportMode(t, "nomode", 0o666 & ~umask) # We should also be able to set the mode for a parent directory # when it is created sio = BytesIO() t.put_file_non_atomic( "dir700/mode664", sio, mode=0o664, dir_mode=0o700, create_parent_dir=True ) self.assertTransportMode(t, "dir700", 0o700) t.put_file_non_atomic( "dir770/mode664", sio, mode=0o664, dir_mode=0o770, create_parent_dir=True ) self.assertTransportMode(t, "dir770", 0o770) t.put_file_non_atomic( "dir777/mode664", sio, mode=0o664, dir_mode=0o777, create_parent_dir=True ) self.assertTransportMode(t, "dir777", 0o777) def test_put_bytes_unicode(self): t = self.get_transport() if t.is_readonly(): return unicode_string = "\u1234" self.assertRaises(TypeError, t.put_bytes, "foo", unicode_string) def test_mkdir(self): t = self.get_transport() if t.is_readonly(): # cannot mkdir on readonly transports. We're not testing for # cache coherency because cache behaviour is not currently # defined for the transport interface. self.assertRaises(TransportNotPossible, t.mkdir, ".") self.assertRaises(TransportNotPossible, t.mkdir, "new_dir") self.assertRaises(TransportNotPossible, t.mkdir, "path/doesnt/exist") return # Test mkdir t.mkdir("dir_a") self.assertEqual(t.has("dir_a"), True) self.assertEqual(t.has("dir_b"), False) t.mkdir("dir_b") self.assertEqual(t.has("dir_b"), True) self.assertEqual( [t.has(n) for n in ["dir_a", "dir_b", "dir_q", "dir_b"]], [True, True, False, True], ) # we were testing that a local mkdir followed by a transport # mkdir failed thusly, but given that we * in one process * do not # concurrently fiddle with disk dirs and then use transport to do # things, the win here seems marginal compared to the constraint on # the interface. RBC 20051227 t.mkdir("dir_g") self.assertRaises(FileExists, t.mkdir, "dir_g") # Test get/put in sub-directories t.put_bytes("dir_a/a", b"contents of dir_a/a") t.put_file("dir_b/b", BytesIO(b"contents of dir_b/b")) self.check_transport_contents(b"contents of dir_a/a", t, "dir_a/a") self.check_transport_contents(b"contents of dir_b/b", t, "dir_b/b") # mkdir of a dir with an absent parent self.assertRaises(NoSuchFile, t.mkdir, "missing/dir") def test_mkdir_permissions(self): t = self.get_transport() if t.is_readonly(): return if not t._can_roundtrip_unix_modebits(): # no sense testing on this transport return # Test mkdir with a mode t.mkdir("dmode755", mode=0o755) self.assertTransportMode(t, "dmode755", 0o755) t.mkdir("dmode555", mode=0o555) self.assertTransportMode(t, "dmode555", 0o555) t.mkdir("dmode777", mode=0o777) self.assertTransportMode(t, "dmode777", 0o777) t.mkdir("dmode700", mode=0o700) self.assertTransportMode(t, "dmode700", 0o700) t.mkdir("mdmode755", mode=0o755) self.assertTransportMode(t, "mdmode755", 0o755) # Default mode should be based on umask umask = osutils.get_umask() t.mkdir("dnomode", mode=None) self.assertTransportMode(t, "dnomode", 0o777 & ~umask) def test_opening_a_file_stream_creates_file(self): t = self.get_transport() if t.is_readonly(): return handle = t.open_write_stream("foo") try: self.assertEqual(b"", t.get_bytes("foo")) finally: handle.close() def test_opening_a_file_stream_can_set_mode(self): t = self.get_transport() if t.is_readonly(): self.assertRaises( (TransportNotPossible, NotImplementedError), t.open_write_stream, "foo" ) return if not t._can_roundtrip_unix_modebits(): # Can't roundtrip, so no need to run this test return def check_mode(name, mode, expected): handle = t.open_write_stream(name, mode=mode) handle.close() self.assertTransportMode(t, name, expected) check_mode("mode644", 0o644, 0o644) check_mode("mode666", 0o666, 0o666) check_mode("mode600", 0o600, 0o600) # The default permissions should be based on the current umask check_mode("nomode", None, 0o666 & ~osutils.get_umask()) def test_copy_to(self): # FIXME: test: same server to same server (partly done) # same protocol two servers # and different protocols (done for now except for MemoryTransport. # - RBC 20060122 def simple_copy_files(transport_from, transport_to): files = ["a", "b", "c", "d"] self.build_tree(files, transport=transport_from) self.assertEqual(4, transport_from.copy_to(files, transport_to)) for f in files: self.check_transport_contents( transport_to.get_bytes(f), transport_from, f ) t = self.get_transport() if t.__class__.__name__ == "SFTPTransport": self.skipTest("SFTP copy_to currently too flakey to use") temp_transport = MemoryTransport("memory:///") simple_copy_files(t, temp_transport) if not t.is_readonly(): t.mkdir("copy_to_simple") t2 = t.clone("copy_to_simple") simple_copy_files(t, t2) # Test that copying into a missing directory raises # NoSuchFile if t.is_readonly(): self.build_tree(["e/", "e/f"]) else: t.mkdir("e") t.put_bytes("e/f", b"contents of e") self.assertRaises(NoSuchFile, t.copy_to, ["e/f"], temp_transport) temp_transport.mkdir("e") t.copy_to(["e/f"], temp_transport) del temp_transport temp_transport = MemoryTransport("memory:///") files = ["a", "b", "c", "d"] t.copy_to(iter(files), temp_transport) for f in files: self.check_transport_contents(temp_transport.get_bytes(f), t, f) del temp_transport for mode in (0o666, 0o644, 0o600, 0o400): temp_transport = MemoryTransport("memory:///") t.copy_to(files, temp_transport, mode=mode) for f in files: self.assertTransportMode(temp_transport, f, mode) def test_create_prefix(self): t = self.get_transport() sub = t.clone("foo").clone("bar") try: sub.create_prefix() except TransportNotPossible: self.assertTrue(t.is_readonly()) else: self.assertTrue(t.has("foo/bar")) def test_append_file(self): t = self.get_transport() if t.is_readonly(): self.assertRaises( TransportNotPossible, t.append_file, "a", "add\nsome\nmore\ncontents\n" ) return t.put_bytes("a", b"diff\ncontents for\na\n") t.put_bytes("b", b"contents\nfor b\n") self.assertEqual( 20, t.append_file("a", BytesIO(b"add\nsome\nmore\ncontents\n")) ) self.check_transport_contents( b"diff\ncontents for\na\nadd\nsome\nmore\ncontents\n", t, "a" ) # a file with no parent should fail.. self.assertRaises( NoSuchFile, t.append_file, "missing/path", BytesIO(b"content") ) # And we can create new files, too self.assertEqual( 0, t.append_file("c", BytesIO(b"some text\nfor a missing file\n")) ) self.check_transport_contents(b"some text\nfor a missing file\n", t, "c") def test_append_bytes(self): t = self.get_transport() if t.is_readonly(): self.assertRaises( TransportNotPossible, t.append_bytes, "a", b"add\nsome\nmore\ncontents\n", ) return self.assertEqual(0, t.append_bytes("a", b"diff\ncontents for\na\n")) self.assertEqual(0, t.append_bytes("b", b"contents\nfor b\n")) self.assertEqual(20, t.append_bytes("a", b"add\nsome\nmore\ncontents\n")) self.check_transport_contents( b"diff\ncontents for\na\nadd\nsome\nmore\ncontents\n", t, "a" ) # a file with no parent should fail.. self.assertRaises(NoSuchFile, t.append_bytes, "missing/path", b"content") def test_append_file_mode(self): """Check that append accepts a mode parameter.""" # check append accepts a mode t = self.get_transport() if t.is_readonly(): self.assertRaises( TransportNotPossible, t.append_file, "f", BytesIO(b"f"), mode=None ) return t.append_file("f", BytesIO(b"f"), mode=None) def test_append_bytes_mode(self): # check append_bytes accepts a mode t = self.get_transport() if t.is_readonly(): self.assertRaises( TransportNotPossible, t.append_bytes, "f", b"f", mode=None ) return t.append_bytes("f", b"f", mode=None) def test_delete(self): # TODO: Test Transport.delete t = self.get_transport() # Not much to do with a readonly transport if t.is_readonly(): self.assertRaises(TransportNotPossible, t.delete, "missing") return t.put_bytes("a", b"a little bit of text\n") self.assertTrue(t.has("a")) t.delete("a") self.assertFalse(t.has("a")) self.assertRaises(NoSuchFile, t.delete, "a") t.put_bytes("a", b"a text\n") t.put_bytes("b", b"b text\n") t.put_bytes("c", b"c text\n") self.assertEqual([True, True, True], [t.has(n) for n in ["a", "b", "c"]]) t.delete("a") t.delete("c") self.assertEqual([False, True, False], [t.has(n) for n in ["a", "b", "c"]]) self.assertFalse(t.has("a")) self.assertTrue(t.has("b")) self.assertFalse(t.has("c")) for name in ["a", "c", "d"]: self.assertRaises(NoSuchFile, t.delete, name) # We should have deleted everything # SftpServer creates control files in the # working directory, so we can just do a # plain "listdir". # self.assertEqual([], os.listdir('.')) def test_recommended_page_size(self): """Transports recommend a page size for partial access to files.""" t = self.get_transport() self.assertIsInstance(t.recommended_page_size(), int) def test_rmdir(self): t = self.get_transport() # Not much to do with a readonly transport if t.is_readonly(): self.assertRaises(TransportNotPossible, t.rmdir, "missing") return t.mkdir("adir") t.mkdir("adir/bdir") t.rmdir("adir/bdir") # ftp may not be able to raise NoSuchFile for lack of # details when failing self.assertRaises((NoSuchFile, PathError), t.rmdir, "adir/bdir") t.rmdir("adir") self.assertRaises((NoSuchFile, PathError), t.rmdir, "adir") def test_rmdir_not_empty(self): """Deleting a non-empty directory raises an exception. sftp (and possibly others) don't give us a specific "directory not empty" exception -- we can just see that the operation failed. """ t = self.get_transport() if t.is_readonly(): return t.mkdir("adir") t.mkdir("adir/bdir") self.assertRaises(PathError, t.rmdir, "adir") def test_rmdir_empty_but_similar_prefix(self): """Rmdir does not get confused by sibling paths. A naive implementation of MemoryTransport would refuse to rmdir ".bzr/branch" if there is a ".bzr/branch-format" directory, because it uses "path.startswith(dir)" on all file paths to determine if directory is empty. """ t = self.get_transport() if t.is_readonly(): return t.mkdir("foo") t.put_bytes("foo-bar", b"") t.mkdir("foo-baz") t.rmdir("foo") self.assertRaises((NoSuchFile, PathError), t.rmdir, "foo") self.assertTrue(t.has("foo-bar")) def test_rename_dir_succeeds(self): t = self.get_transport() if t.is_readonly(): self.assertRaises( (TransportNotPossible, NotImplementedError), t.rename, "foo", "bar" ) return t.mkdir("adir") t.mkdir("adir/asubdir") t.rename("adir", "bdir") self.assertTrue(t.has("bdir/asubdir")) self.assertFalse(t.has("adir")) def test_rename_dir_nonempty(self): """Attempting to replace a nonemtpy directory should fail.""" t = self.get_transport() if t.is_readonly(): self.assertRaises( (TransportNotPossible, NotImplementedError), t.rename, "foo", "bar" ) return t.mkdir("adir") t.mkdir("adir/asubdir") t.mkdir("bdir") t.mkdir("bdir/bsubdir") # any kind of PathError would be OK, though we normally expect # DirectoryNotEmpty self.assertRaises(PathError, t.rename, "bdir", "adir") # nothing was changed so it should still be as before self.assertTrue(t.has("bdir/bsubdir")) self.assertFalse(t.has("adir/bdir")) self.assertFalse(t.has("adir/bsubdir")) def test_rename_across_subdirs(self): t = self.get_transport() if t.is_readonly(): raise TestNotApplicable("transport is readonly") t.mkdir("a") t.mkdir("b") ta = t.clone("a") tb = t.clone("b") ta.put_bytes("f", b"aoeu") ta.rename("f", "../b/f") self.assertTrue(tb.has("f")) self.assertFalse(ta.has("f")) self.assertTrue(t.has("b/f")) def test_delete_tree(self): t = self.get_transport() # Not much to do with a readonly transport if t.is_readonly(): self.assertRaises(TransportNotPossible, t.delete_tree, "missing") return # and does it like listing ? t.mkdir("adir") try: t.delete_tree("adir") except TransportNotPossible: # ok, this transport does not support delete_tree return # did it delete that trivial case? self.assertRaises(NoSuchFile, t.stat, "adir") self.build_tree( [ "adir/", "adir/file", "adir/subdir/", "adir/subdir/file", "adir/subdir2/", "adir/subdir2/file", ], transport=t, ) t.delete_tree("adir") # adir should be gone now. self.assertRaises(NoSuchFile, t.stat, "adir") def test_move(self): t = self.get_transport() if t.is_readonly(): return # TODO: I would like to use os.listdir() to # make sure there are no extra files, but SftpServer # creates control files in the working directory # perhaps all of this could be done in a subdirectory t.put_bytes("a", b"a first file\n") self.assertEqual([True, False], [t.has(n) for n in ["a", "b"]]) t.move("a", "b") self.assertTrue(t.has("b")) self.assertFalse(t.has("a")) self.check_transport_contents(b"a first file\n", t, "b") self.assertEqual([False, True], [t.has(n) for n in ["a", "b"]]) # Overwrite a file t.put_bytes("c", b"c this file\n") t.move("c", "b") self.assertFalse(t.has("c")) self.check_transport_contents(b"c this file\n", t, "b") # TODO: Try to write a test for atomicity # TODO: Test moving into a non-existent subdirectory def test_copy(self): t = self.get_transport() if t.is_readonly(): return t.put_bytes("a", b"a file\n") t.copy("a", "b") self.check_transport_contents(b"a file\n", t, "b") self.assertRaises(NoSuchFile, t.copy, "c", "d") os.mkdir("c") # What should the assert be if you try to copy a # file over a directory? # self.assertRaises(Something, t.copy, 'a', 'c') t.put_bytes("d", b"text in d\n") t.copy("d", "b") self.check_transport_contents(b"text in d\n", t, "b") def test_connection_error(self): """ConnectionError is raised when connection is impossible. The error should be raised from the first operation on the transport. """ try: url = self._server.get_bogus_url() except NotImplementedError as err: raise TestSkipped( "Transport {} has no bogus URL support.".format(self._server.__class__) ) from err t = _mod_transport.get_transport_from_url(url) self.assertRaises((errors.ConnectionError, NoSuchFile), t.get, ".bzr/branch") def test_stat(self): # TODO: Test stat, just try once, and if it throws, stop testing from stat import S_ISDIR, S_ISREG t = self.get_transport() try: st = t.stat(".") except TransportNotPossible: # This transport cannot stat return paths = ["a", "b/", "b/c", "b/d/", "b/d/e"] sizes = [14, 0, 16, 0, 18] self.build_tree(paths, transport=t, line_endings="binary") for path, size in zip(paths, sizes, strict=False): st = t.stat(path) if path.endswith("/"): self.assertTrue(S_ISDIR(st.st_mode)) # directory sizes are meaningless else: self.assertTrue(S_ISREG(st.st_mode)) self.assertEqual(size, st.st_size) self.assertRaises(NoSuchFile, t.stat, "q") self.assertRaises(NoSuchFile, t.stat, "b/a") self.build_tree(["subdir/", "subdir/file"], transport=t) subdir = t.clone("subdir") st = subdir.stat("./file") st = subdir.stat(".") def test_hardlink(self): from stat import ST_NLINK t = self.get_transport() source_name = "original_target" link_name = "target_link" self.build_tree([source_name], transport=t) try: t.hardlink(source_name, link_name) self.assertTrue(t.has(source_name)) self.assertTrue(t.has(link_name)) try: local_path = t.local_abspath(link_name) st = os.stat(local_path) self.assertEqual(st[ST_NLINK], 2) except errors.NotLocalUrl: pass except TransportNotPossible as err: raise TestSkipped( "Transport {} does not support hardlinks.".format( self._server.__class__ ) ) from err def test_symlink(self): from stat import S_ISLNK t = self.get_transport() source_name = "original_target" link_name = "target_link" self.build_tree([source_name], transport=t) try: t.symlink(source_name, link_name) self.assertTrue(t.has(source_name)) self.assertTrue(t.has(link_name)) st = t.stat(link_name) self.assertTrue( S_ISLNK(st.st_mode), f"expected symlink, got mode {st.st_mode:o}" ) except TransportNotPossible as err: raise TestSkipped( "Transport {} does not support symlinks.".format(self._server.__class__) ) from err self.assertEqual(source_name, t.readlink(link_name)) def test_readlink_nonexistent(self): t = self.get_transport() try: self.assertRaises(NoSuchFile, t.readlink, "nonexistent") except TransportNotPossible as err: raise TestSkipped( "Transport {} does not support symlinks.".format(self._server.__class__) ) from err def test_list_dir(self): # TODO: Test list_dir, just try once, and if it throws, stop testing t = self.get_transport() if not t.listable(): self.assertRaises(TransportNotPossible, t.list_dir, ".") return def sorted_list(d, transport): l = sorted(transport.list_dir(d)) return l self.assertEqual([], sorted_list(".", t)) # c2 is precisely one letter longer than c here to test that # suffixing is not confused. # a%25b checks that quoting is done consistently across transports tree_names = ["a", "a%25b", "b", "c/", "c/d", "c/e", "c2/"] if not t.is_readonly(): self.build_tree(tree_names, transport=t) else: self.build_tree(tree_names) self.assertEqual(["a", "a%2525b", "b", "c", "c2"], sorted_list("", t)) self.assertEqual(["a", "a%2525b", "b", "c", "c2"], sorted_list(".", t)) self.assertEqual(["d", "e"], sorted_list("c", t)) # Cloning the transport produces an equivalent listing self.assertEqual(["d", "e"], sorted_list("", t.clone("c"))) if not t.is_readonly(): t.delete("c/d") t.delete("b") else: os.unlink("c/d") os.unlink("b") self.assertEqual(["a", "a%2525b", "c", "c2"], sorted_list(".", t)) self.assertEqual(["e"], sorted_list("c", t)) self.assertListRaises(PathError, t.list_dir, "q") self.assertListRaises(PathError, t.list_dir, "c/f") # 'a' is a file, list_dir should raise an error self.assertListRaises(PathError, t.list_dir, "a") def test_list_dir_result_is_url_escaped(self): t = self.get_transport() if not t.listable(): raise TestSkipped("transport not listable") if not t.is_readonly(): self.build_tree(["a/", "a/%"], transport=t) else: self.build_tree(["a/", "a/%"]) names = list(t.list_dir("a")) self.assertEqual(["%25"], names) self.assertIsInstance(names[0], str) def test_clone_preserve_info(self): t1 = self.get_transport() if not isinstance(t1, ConnectedTransport): raise TestSkipped("not a connected transport") t2 = t1.clone("subdir") self.assertEqual(t1._parsed_url.scheme, t2._parsed_url.scheme) self.assertEqual(t1._parsed_url.user, t2._parsed_url.user) self.assertEqual(t1._parsed_url.password, t2._parsed_url.password) self.assertEqual(t1._parsed_url.host, t2._parsed_url.host) self.assertEqual(t1._parsed_url.port, t2._parsed_url.port) def test__reuse_for(self): t = self.get_transport() if not isinstance(t, ConnectedTransport): raise TestSkipped("not a connected transport") def new_url( scheme=None, user=None, password=None, host=None, port=None, path=None ): """Build a new url from t.base changing only parts of it. Only the parameters different from None will be changed. """ if scheme is None: scheme = t._parsed_url.scheme if user is None: user = t._parsed_url.user if password is None: password = t._parsed_url.password if user is None: user = t._parsed_url.user if host is None: host = t._parsed_url.host if port is None: port = t._parsed_url.port if path is None: path = t._parsed_url.path return str(urlutils.URL(scheme, user, password, host, port, path)) scheme = "sftp" if t._parsed_url.scheme == "ftp" else "ftp" self.assertIsNot(t, t._reuse_for(new_url(scheme=scheme))) user = "you" if t._parsed_url.user == "me" else "me" self.assertIsNot(t, t._reuse_for(new_url(user=user))) # passwords are not taken into account because: # - it makes no sense to have two different valid passwords for the # same user # - _password in ConnectedTransport is intended to collect what the # user specified from the command-line and there are cases where the # new url can contain no password (if the url was built from an # existing transport.base for example) # - password are considered part of the credentials provided at # connection creation time and as such may not be present in the url # (they may be typed by the user when prompted for example) self.assertIs(t, t._reuse_for(new_url(password="from space"))) # We will not connect, we can use a invalid host self.assertIsNot(t, t._reuse_for(new_url(host=t._parsed_url.host + "bar"))) port = 4321 if t._parsed_url.port == 1234 else 1234 self.assertIsNot(t, t._reuse_for(new_url(port=port))) # No point in trying to reuse a transport for a local URL self.assertIs(None, t._reuse_for("/valid_but_not_existing")) def test_connection_sharing(self): t = self.get_transport() if not isinstance(t, ConnectedTransport): raise TestSkipped("not a connected transport") c = t.clone("subdir") # Some transports will create the connection only when needed t.has("surely_not") # Force connection self.assertIs(t._get_connection(), c._get_connection()) # Temporary failure, we need to create a new dummy connection new_connection = None t._set_connection(new_connection) # Check that both transports use the same connection self.assertIs(new_connection, t._get_connection()) self.assertIs(new_connection, c._get_connection()) def test_reuse_connection_for_various_paths(self): t = self.get_transport() if not isinstance(t, ConnectedTransport): raise TestSkipped("not a connected transport") t.has("surely_not") # Force connection self.assertIsNot(None, t._get_connection()) subdir = t._reuse_for(t.base + "whatever/but/deep/down/the/path") self.assertIsNot(t, subdir) self.assertIs(t._get_connection(), subdir._get_connection()) home = subdir._reuse_for(t.base + "home") self.assertIs(t._get_connection(), home._get_connection()) self.assertIs(subdir._get_connection(), home._get_connection()) def test_clone(self): # TODO: Test that clone moves up and down the filesystem t1 = self.get_transport() self.build_tree(["a", "b/", "b/c"], transport=t1) self.assertTrue(t1.has("a")) self.assertTrue(t1.has("b/c")) self.assertFalse(t1.has("c")) t2 = t1.clone("b") self.assertEqual(t1.base + "b/", t2.base) self.assertTrue(t2.has("c")) self.assertFalse(t2.has("a")) t3 = t2.clone("..") self.assertTrue(t3.has("a")) self.assertFalse(t3.has("c")) self.assertFalse(t1.has("b/d")) self.assertFalse(t2.has("d")) self.assertFalse(t3.has("b/d")) if t1.is_readonly(): self.build_tree_contents([("b/d", b"newfile\n")]) else: t2.put_bytes("d", b"newfile\n") self.assertTrue(t1.has("b/d")) self.assertTrue(t2.has("d")) self.assertTrue(t3.has("b/d")) def test_clone_to_root(self): orig_transport = self.get_transport() # Repeatedly go up to a parent directory until we're at the root # directory of this transport root_transport = orig_transport new_transport = root_transport.clone("..") # as we are walking up directories, the path must be # growing less, except at the top self.assertTrue( len(new_transport.base) < len(root_transport.base) or new_transport.base == root_transport.base ) while new_transport.base != root_transport.base: root_transport = new_transport new_transport = root_transport.clone("..") # as we are walking up directories, the path must be # growing less, except at the top self.assertTrue( len(new_transport.base) < len(root_transport.base) or new_transport.base == root_transport.base ) # Cloning to "/" should take us to exactly the same location. self.assertEqual(root_transport.base, orig_transport.clone("/").base) # the abspath of "/" from the original transport should be the same # as the base at the root: self.assertEqual(orig_transport.abspath("/"), root_transport.base) # At the root, the URL must still end with / as its a directory self.assertEqual(root_transport.base[-1], "/") def test_clone_from_root(self): """At the root, cloning to a simple dir should just do string append.""" orig_transport = self.get_transport() root_transport = orig_transport.clone("/") self.assertEqual( root_transport.base + ".bzr/", root_transport.clone(".bzr").base ) def test_base_url(self): t = self.get_transport() self.assertEqual("/", t.base[-1]) def test_relpath(self): t = self.get_transport() self.assertEqual("", t.relpath(t.base)) # base ends with / self.assertEqual("", t.relpath(t.base[:-1])) # subdirs which don't exist should still give relpaths. self.assertEqual("foo", t.relpath(t.base + "foo")) # trailing slash should be the same. self.assertEqual("foo", t.relpath(t.base + "foo/")) def test_relpath_at_root(self): t = self.get_transport() # clone all the way to the top new_transport = t.clone("..") while new_transport.base != t.base: t = new_transport new_transport = t.clone("..") # we must be able to get a relpath below the root self.assertEqual("", t.relpath(t.base)) # and a deeper one should work too self.assertEqual("foo/bar", t.relpath(t.base + "foo/bar")) def test_abspath(self): # smoke test for abspath. Corner cases for backends like unix fs's # that have aliasing problems like symlinks should go in backend # specific test cases. transport = self.get_transport() self.assertEqual(transport.base + "relpath", transport.abspath("relpath")) # This should work without raising an error. transport.abspath("/") # the abspath of "/" and "/foo/.." should result in the same location self.assertEqual(transport.abspath("/"), transport.abspath("/foo/..")) self.assertEqual(transport.clone("/").abspath("foo"), transport.abspath("/foo")) # GZ 2011-01-26: Test in per_transport but not using self.get_transport? def test_win32_abspath(self): # Note: we tried to set sys.platform='win32' so we could test on # other platforms too, but then osutils does platform specific # things at import time which defeated us... if sys.platform != "win32": raise TestSkipped( "Testing drive letters in abspath implemented only for win32" ) # smoke test for abspath on win32. # a transport based on 'file:///' never fully qualifies the drive. transport = _mod_transport.get_transport_from_url("file:///") self.assertEqual(transport.abspath("/"), "file:///") # but a transport that starts with a drive spec must keep it. transport = _mod_transport.get_transport_from_url("file:///C:/") self.assertEqual(transport.abspath("/"), "file:///C:/") def test_local_abspath(self): transport = self.get_transport() try: p = transport.local_abspath(".") except (errors.NotLocalUrl, TransportNotPossible) as e: # should be formattable str(e) else: self.assertEqual(getcwd(), p) def test_abspath_at_root(self): t = self.get_transport() # clone all the way to the top new_transport = t.clone("..") while new_transport.base != t.base: t = new_transport new_transport = t.clone("..") # we must be able to get a abspath of the root when we ask for # t.abspath('..') - this due to our choice that clone('..') # should return the root from the root, combined with the desire that # the url from clone('..') and from abspath('..') should be the same. self.assertEqual(t.base, t.abspath("..")) # '' should give us the root self.assertEqual(t.base, t.abspath("")) # and a path should append to the url self.assertEqual(t.base + "foo", t.abspath("foo")) def test_iter_files_recursive(self): transport = self.get_transport() if not transport.listable(): self.assertRaises(TransportNotPossible, transport.iter_files_recursive) return self.build_tree( [ "isolated/", "isolated/dir/", "isolated/dir/foo", "isolated/dir/bar", "isolated/dir/b%25z", # make sure quoting is correct "isolated/bar", ], transport=transport, ) paths = set(transport.iter_files_recursive()) # nb the directories are not converted self.assertEqual( paths, { "isolated/dir/foo", "isolated/dir/bar", "isolated/dir/b%2525z", "isolated/bar", }, ) sub_transport = transport.clone("isolated") paths = set(sub_transport.iter_files_recursive()) self.assertEqual(paths, {"dir/foo", "dir/bar", "dir/b%2525z", "bar"}) def test_copy_tree(self): # TODO: test file contents and permissions are preserved. This test was # added just to ensure that quoting was handled correctly. # -- David Allouche 2006-08-11 transport = self.get_transport() if not transport.listable(): self.assertRaises(TransportNotPossible, transport.iter_files_recursive) return if transport.is_readonly(): return self.build_tree( [ "from/", "from/dir/", "from/dir/foo", "from/dir/bar", "from/dir/b%25z", # make sure quoting is correct "from/bar", ], transport=transport, ) transport.copy_tree("from", "to") paths = set(transport.iter_files_recursive()) self.assertEqual( paths, { "from/dir/foo", "from/dir/bar", "from/dir/b%2525z", "from/bar", "to/dir/foo", "to/dir/bar", "to/dir/b%2525z", "to/bar", }, ) def test_copy_tree_to_transport(self): transport = self.get_transport() if not transport.listable(): self.assertRaises(TransportNotPossible, transport.iter_files_recursive) return if transport.is_readonly(): return self.build_tree( [ "from/", "from/dir/", "from/dir/foo", "from/dir/bar", "from/dir/b%25z", # make sure quoting is correct "from/bar", ], transport=transport, ) from_transport = transport.clone("from") to_transport = transport.clone("to") to_transport.ensure_base() from_transport.copy_tree_to_transport(to_transport) paths = set(transport.iter_files_recursive()) self.assertEqual( paths, { "from/dir/foo", "from/dir/bar", "from/dir/b%2525z", "from/bar", "to/dir/foo", "to/dir/bar", "to/dir/b%2525z", "to/bar", }, ) def test_unicode_paths(self): """Test that we can read/write files with Unicode names.""" t = self.get_transport() # With FAT32 and certain encodings on win32 # '\xe5' and '\xe4' actually map to the same file # adding a suffix kicks in the 'preserving but insensitive' # route, and maintains the right files files = [ "\xe5.1", # a w/ circle iso-8859-1 "\xe4.2", # a w/ dots iso-8859-1 "\u017d", # Z with umlat iso-8859-2 "\u062c", # Arabic j "\u0410", # Russian A "\u65e5", # Kanji person ] no_unicode_support = getattr(self._server, "no_unicode_support", False) if no_unicode_support: self.knownFailure("test server cannot handle unicode paths") try: self.build_tree(files, transport=t, line_endings="binary") except UnicodeError as err: raise TestSkipped( "cannot handle unicode paths in current encoding" ) from err # A plain unicode string is not a valid url for fname in files: self.assertRaises(urlutils.InvalidURL, t.get, fname) for fname in files: fname_utf8 = fname.encode("utf-8") contents = b"contents of %s\n" % (fname_utf8,) self.check_transport_contents(contents, t, urlutils.escape(fname)) def test_connect_twice_is_same_content(self): # check that our server (whatever it is) is accessible reliably # via get_transport and multiple connections share content. transport = self.get_transport() if transport.is_readonly(): return transport.put_bytes("foo", b"bar") transport3 = self.get_transport() self.check_transport_contents(b"bar", transport3, "foo") # now opening at a relative url should give use a sane result: transport.mkdir("newdir") transport5 = self.get_transport("newdir") transport6 = transport5.clone("..") self.check_transport_contents(b"bar", transport6, "foo") def test_lock_write(self): """Test transport-level write locks. These are deprecated and transports may decline to support them. """ transport = self.get_transport() if transport.is_readonly(): self.assertRaises(TransportNotPossible, transport.lock_write, "foo") return transport.put_bytes("lock", b"") try: lock = transport.lock_write("lock") except TransportNotPossible: return # TODO make this consistent on all platforms: # self.assertRaises(LockError, transport.lock_write, 'lock') lock.unlock() def test_lock_read(self): """Test transport-level read locks. These are deprecated and transports may decline to support them. """ transport = self.get_transport() if transport.is_readonly(): open("lock", "w").close() else: transport.put_bytes("lock", b"") try: lock = transport.lock_read("lock") except TransportNotPossible: return # TODO make this consistent on all platforms: # self.assertRaises(LockError, transport.lock_read, 'lock') lock.unlock() def test_readv(self): transport = self.get_transport() if transport.is_readonly(): with open("a", "w") as f: f.write("0123456789") else: transport.put_bytes("a", b"0123456789") d = list(transport.readv("a", ((0, 1),))) self.assertEqual(d[0], (0, b"0")) d = list(transport.readv("a", ((0, 1), (1, 1), (3, 2), (9, 1)))) self.assertEqual(d[0], (0, b"0")) self.assertEqual(d[1], (1, b"1")) self.assertEqual(d[2], (3, b"34")) self.assertEqual(d[3], (9, b"9")) def test_readv_out_of_order(self): transport = self.get_transport() if transport.is_readonly(): with open("a", "w") as f: f.write("0123456789") else: transport.put_bytes("a", b"01234567890") d = list(transport.readv("a", ((1, 1), (9, 1), (0, 1), (3, 2)))) self.assertEqual(d[0], (1, b"1")) self.assertEqual(d[1], (9, b"9")) self.assertEqual(d[2], (0, b"0")) self.assertEqual(d[3], (3, b"34")) def test_readv_with_adjust_for_latency(self): transport = self.get_transport() # the adjust for latency flag expands the data region returned # according to a per-transport heuristic, so testing is a little # tricky as we need more data than the largest combining that our # transports do. To accomodate this we generate random data and cross # reference the returned data with the random data. To avoid doing # multiple large random byte look ups we do several tests on the same # backing data. content = random.randbytes(200 * 1024) # noqa: S311 content_size = len(content) if transport.is_readonly(): self.build_tree_contents([("a", content)]) else: transport.put_bytes("a", content) def check_result_data(result_vector): for item in result_vector: data_len = len(item[1]) self.assertEqual(content[item[0] : item[0] + data_len], item[1]) # start corner case result = list( transport.readv( "a", ((0, 30),), adjust_for_latency=True, upper_limit=content_size ) ) # we expect 1 result, from 0, to something > 30 self.assertEqual(1, len(result)) self.assertEqual(0, result[0][0]) self.assertTrue(len(result[0][1]) >= 30) check_result_data(result) # end of file corner case result = list( transport.readv( "a", ((204700, 100),), adjust_for_latency=True, upper_limit=content_size ) ) # we expect 1 result, from 204800- its length, to the end self.assertEqual(1, len(result)) data_len = len(result[0][1]) self.assertEqual(204800 - data_len, result[0][0]) self.assertTrue(data_len >= 100) check_result_data(result) # out of order ranges are made in order result = list( transport.readv( "a", ((204700, 100), (0, 50)), adjust_for_latency=True, upper_limit=content_size, ) ) # we expect 2 results, in order, start and end. self.assertEqual(2, len(result)) # start data_len = len(result[0][1]) self.assertEqual(0, result[0][0]) self.assertTrue(data_len >= 30) # end data_len = len(result[1][1]) self.assertEqual(204800 - data_len, result[1][0]) self.assertTrue(data_len >= 100) check_result_data(result) # close ranges get combined (even if out of order) for request_vector in [((400, 50), (800, 234)), ((800, 234), (400, 50))]: result = list( transport.readv( "a", request_vector, adjust_for_latency=True, upper_limit=content_size, ) ) self.assertEqual(1, len(result)) data_len = len(result[0][1]) # minimum length is from 400 to 1034 - 634 self.assertTrue(data_len >= 634) # must contain the region 400 to 1034 self.assertTrue(result[0][0] <= 400) self.assertTrue(result[0][0] + data_len >= 1034) check_result_data(result) def test_readv_with_adjust_for_latency_with_big_file(self): transport = self.get_transport() # test from observed failure case. if transport.is_readonly(): with open("a", "w") as f: f.write("a" * 1024 * 1024) else: transport.put_bytes("a", b"a" * 1024 * 1024) broken_vector = [ (465219, 800), (225221, 800), (445548, 800), (225037, 800), (221357, 800), (437077, 800), (947670, 800), (465373, 800), (947422, 800), ] results = list(transport.readv("a", broken_vector, True, 1024 * 1024)) found_items = [False] * 9 for pos, (start, length) in enumerate(broken_vector): # check the range is covered by the result for offset, data in results: if offset <= start and start + length <= offset + len(data): found_items[pos] = True self.assertEqual([True] * 9, found_items) def test_get_with_open_write_stream_sees_all_content(self): t = self.get_transport() if t.is_readonly(): return with t.open_write_stream("foo") as handle: handle.write(b"bcd") handle.flush() self.assertEqual( [(0, b"b"), (2, b"d")], list(t.readv("foo", ((0, 1), (2, 1)))) ) def test_readv_short_read(self): transport = self.get_transport() if transport.is_readonly(): with open("a", "w") as f: f.write("0123456789") else: transport.put_bytes("a", b"01234567890") # This is intentionally reading off the end of the file # since we are sure that it cannot get there self.assertListRaises( ( errors.ShortReadvError, errors.InvalidRange, # Can be raised by paramiko AssertionError, ), transport.readv, "a", [(1, 1), (8, 10)], ) # This is trying to seek past the end of the file, it should # also raise a special error self.assertListRaises( (errors.ShortReadvError, errors.InvalidRange), transport.readv, "a", [(12, 2)], ) def test_no_segment_parameters(self): """Segment parameters should be stripped and stored in transport.segment_parameters. """ transport = self.get_transport("foo") self.assertEqual({}, transport.get_segment_parameters()) def test_segment_parameters(self): """Segment parameters should be stripped and stored in transport.get_segment_parameters(). """ base_url = self._server.get_url() parameters = {"key1": "val1", "key2": "val2"} url = urlutils.join_segment_parameters(base_url, parameters) transport = _mod_transport.get_transport_from_url(url) self.assertEqual(parameters, transport.get_segment_parameters()) def test_set_segment_parameters(self): """Segment parameters can be set and show up in base.""" transport = self.get_transport("foo") orig_base = transport.base transport.set_segment_parameter("arm", "board") self.assertEqual(f"{orig_base},arm=board", transport.base) self.assertEqual({"arm": "board"}, transport.get_segment_parameters()) transport.set_segment_parameter("arm", None) transport.set_segment_parameter("nonexistant", None) self.assertEqual({}, transport.get_segment_parameters()) self.assertEqual(orig_base, transport.base) def test_stat_symlink(self): # if a transport points directly to a symlink (and supports symlinks # at all) you can tell this. helps with bug 32669. t = self.get_transport() try: t.symlink("target", "link") except TransportNotPossible as err: raise TestSkipped("symlinks not supported") from err t2 = t.clone("link") st = t2.stat("") self.assertTrue(stat.S_ISLNK(st.st_mode)) def test_abspath_url_unquote_unreserved(self): """URLs from abspath should have unreserved characters unquoted. Need consistent quoting notably for tildes, see lp:842223 for more. """ t = self.get_transport() needlessly_escaped_dir = "%2D%2E%30%39%41%5A%5F%61%7A%7E/" self.assertEqual(t.base + "-.09AZ_az~", t.abspath(needlessly_escaped_dir)) def test_clone_url_unquote_unreserved(self): """Base URL of a cloned branch needs unreserved characters unquoted. Cloned transports should be prefix comparable for things like the isolation checking of tests, see lp:842223 for more. """ t1 = self.get_transport() needlessly_escaped_dir = "%2D%2E%30%39%41%5A%5F%61%7A%7E/" self.build_tree([needlessly_escaped_dir], transport=t1) t2 = t1.clone(needlessly_escaped_dir) self.assertEqual(t1.base + "-.09AZ_az~/", t2.base) def test_hook_post_connection_one(self): """Fire post_connect hook after a ConnectedTransport is first used.""" log = [] Transport.hooks.install_named_hook("post_connect", log.append, None) t = self.get_transport() self.assertEqual([], log) t.has("non-existant") if isinstance(t, ConnectedTransport): self.assertEqual([t], log) else: self.assertEqual([], log) def test_hook_post_connection_multi(self): """Fire post_connect hook once per unshared underlying connection.""" log = [] Transport.hooks.install_named_hook("post_connect", log.append, None) t1 = self.get_transport() t2 = t1.clone(".") t3 = self.get_transport() self.assertEqual([], log) t1.has("x") t2.has("x") t3.has("x") if isinstance(t1, ConnectedTransport): self.assertEqual([t1, t3], log) else: self.assertEqual([], log) dromedary-0.1.5/dromedary/tests/ssl_certs/000077500000000000000000000000001520150013200206215ustar00rootroot00000000000000dromedary-0.1.5/dromedary/tests/ssl_certs/__init__.py000066400000000000000000000020741520150013200227350ustar00rootroot00000000000000# Copyright (C) 2007-2008 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """ssl_certs -- provides access to ssl keys and certificates needed by tests.""" import os # Directory containing all ssl files, keys or certificates base_dir = os.path.dirname(os.path.realpath(__file__)) def build_path(name): """Build and return a path in ssl_certs directory for name.""" return os.path.join(base_dir, name) dromedary-0.1.5/dromedary/tests/ssl_certs/ca.crt000066400000000000000000000041771520150013200217270ustar00rootroot00000000000000-----BEGIN CERTIFICATE----- MIIGGDCCBACgAwIBAgIUAq4oJ9pSQG0lEE+X97sssVXPy7wwDQYJKoZIhvcNAQEL BQAwgZMxCzAJBgNVBAYTAkJaMREwDwYDVQQIDAhJbnRlcm5ldDEPMA0GA1UEBwwG QmF6YWFyMRQwEgYDVQQKDAtEaXN0cmlidXRlZDEMMAoGA1UECwwDVkNTMR8wHQYD VQQDDBZNYXN0ZXIgb2YgY2VydGlmaWNhdGVzMRswGQYJKoZIhvcNAQkBFgxjZXJ0 QG5vLnNwYW0wIBcNMjUwMTA3MTgzODM0WhgPMzAyNTAxMDcxODM4MzRaMIGTMQsw CQYDVQQGEwJCWjERMA8GA1UECAwISW50ZXJuZXQxDzANBgNVBAcMBkJhemFhcjEU MBIGA1UECgwLRGlzdHJpYnV0ZWQxDDAKBgNVBAsMA1ZDUzEfMB0GA1UEAwwWTWFz dGVyIG9mIGNlcnRpZmljYXRlczEbMBkGCSqGSIb3DQEJARYMY2VydEBuby5zcGFt MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA6M1aordgvlnDk/37VhsR JYROTKSYkfQ06qsieorN205M2dcCmugdMdjtXmGAVwE3DZOuRRx+2/4T3Y/wFMFc LCgxfFxKcy7/r6wPBuJO8h2iLijgZGFOKLfcj57nXBoSxQvHTHOuCWwMzcPepAsM kMLknZFWns+yti4Xn31cYYlWjhytp2fxP1NiUKZg6qi5CyPr112ysC61i/1brZbo IHrelbH0v/7IBsqGcSBpRDHLKQbHNBVr2AigDgdt1ayCjOuanmdCGrE7OeHd2ASS VLFpmKRpmfUjwCPPAkXJEifCmVMzO/fxXNV237qQa/osHNSz9k7cmK5U5iR2bVBR oCqae2Y+NQYgiYGJtW5XgfzBhHqH877ZqpMiPtnB9oGmFdXWnhtH8dIZ6YozylIl 9kET3h78SEeNtHMovu+c5rpmdMzbjMM6QEi2aDiJzFSJHikgPfJg4XtIq+OjnDw3 946xUiWsO/Nvq/18MvTDcsa3Y8nyiB7L4Hv4eqGdRUZD0OnfZR6dQIXJV2g0zBbP FCygXpfH2hBQXkG2WqC15RQm/TL5IkAVEogO96b2v1vRs9rabMqzUshWWdFep/1x NLNHPrgNTQvsmEE3LTgXZlRvr/8TGq2D2D6+8ia0UbpaIKMjUMVXQMee/Goo2hhq J0Pa8fpPujB0JC5jWz+yx8MCAwEAAaNgMF4wHQYDVR0OBBYEFMSNRx8FugB7FSR/ u+o0w0PvFqZAMB8GA1UdIwQYMBaAFMSNRx8FugB7FSR/u+o0w0PvFqZAMA8GA1Ud EwEB/wQFMAMBAf8wCwYDVR0PBAQDAgIEMA0GCSqGSIb3DQEBCwUAA4ICAQBZRAVE Y/J4j4fXttPzJ6g5NfemdUemBwnAW3P05AwpD4vxxb6ND5YgjfZun3hMC/Mx64BH 6jox7xCm8KfZYhQkP1xUH2Hz+wjEvdTCVEiO+psiVeyD3PF3z1OwUc/+FJ1rRulK eOIk0xUpBJI1hVcjqGqQiD3nalUgz7WDAdi6hzemxLKdVg+VN6+Td1Ue6+H8IOEI J8c5721PTnXDy8EMbr24bd+8lLw/UN4T8E8CxyJ7VxuMThuKgeNmUQlcPdCIOCy+ ge4GV16oD4xbx7ErQTYTIVJ/SmhbmRsJrrdWWTHVJWEAUkgRqy8t4y7ukLpM6fvZ wOR+pqRFGGQ7uxCMBJJ8vtYMxnloUCB2GFZ1em8QsMVdLn3ofzas3gNk47peHeZf +cFtzcwdcOJZYtf7xqdkbHkYX8j5szEkTkNaDynnPGffWYoKP/2AkbwtZ2OmhxEY PyFfk8D9/du1DTh92LCwJLzKbWRYJcSEI4FdF6z5OkVfg9QlhF/147cfVEtDd/6y ZDh3GjTX/VzyM/N4tR6VHyx75ZZRRanS6bmQAavxvUvcCMjSXWUpST5BFqctl+dV rjrhf1eaEk0GLcOsSbBHQNuY2bpBcQNN0hxbKkMMjKUksUuiNfyKWTTQQiu4Q1Ma 5guXUID4iq8fXeBX7bCuAYlEJuLaq5Xy7ps9qQ== -----END CERTIFICATE----- dromedary-0.1.5/dromedary/tests/ssl_certs/ca.key000066400000000000000000000065661520150013200217330ustar00rootroot00000000000000-----BEGIN ENCRYPTED PRIVATE KEY----- MIIJtTBfBgkqhkiG9w0BBQ0wUjAxBgkqhkiG9w0BBQwwJAQQzg7AaifFy9RB75bv QUXp0wICCAAwDAYIKoZIhvcNAgkFADAdBglghkgBZQMEASoEEBbnNep+doK+koY3 PbImmuUEgglQNBgkn4ysAiQdlp7tx1e2owVxeKmNdjeUIejonvRHXdy0kFEi1hev huKf4UxrOn1+8XxmS6xshK03/9CJQp8deHTJfSwWKc/oASBODFD6Thy9Y5URNvlF vtzUwupsHlQfBN2U/PnHnL0cizW/UtYCK43t1etCL9WYQAlW3bi+Kb+xuPX2alwN OeWvn6b9AsrFIH5kq5lwcojYfCGbHDm4B17ko3wHfdZSattmBwnmiCwYTw6b1WiV AHvcpZ15IItyBdD7/2Rvo83cTvKt1Gu4tPz55PZs11SR138G/QsvCvpQtg89FrcT PtZed+OSITiTxpDcOd4xdCMi5+sAQUJR1fOCKO78Nuc5xqbWOs0M7NFIshHFWJgQ BytqGnEuQ70fw9ZY4COLoemznkxhXwTjksMVywjnk9QwfXTTz/04W0ib1Yc3TlLW LC4BVbJk/BkJLer6QbaDX8OU7D/TkDdp3Gj8oDxWatWEdXazvZvaJuCS08nh6SDM Vi7enBm9ji/apI78qisV8KRjV0QzppVfgtSTpwzOGjYq0aDxBSc4qD+gAxDvP7dY APrX6ZU7Evf7TaTNI7Xpq6QoP83Pwvu0i0TxriyyNRM/fajZo8DpSCCArMWozWQj lg2IFWx71jcxzvypKUD78ZndtQunTVWN4osjrtf6DgSeILQq3n3ixz/DQTozSXPp 2vUgG5iddmN/BgXLr3HWPn/SmuMLLs66YD8DJS094BVOv1WXtqkO2V9lds8j1pTD 548E7Qv5j3MxptTCdm4b2BEbPRGjE9Wdkd20RuTMddUFfNkqbKpMBaFXb6I2fcUl f1NlNnNQdCOFC1qLa2QcKE6cszowoKN8amLXWZ5dQCkR3dOXTqNIrWH7je4BMUxu it19mRhiOrA7vHHcwVSelWu8pjTW/5VWJdSCGMLbQTPeHGce7TBKLVFxGb5tNDDO 4hckLi2XaUkT4dBQubub5Zb55y3KeCzbu0OZW1EMgAreoUt2Msnp+RcJJ0dhyzBr pMyULSvoi/RekSnrRbH4s1q1+agzv3V2tGYqB2Phh2E0tdWcYCzR8V/HpsTJcENW fHSKfkmGQdPlGyjfKpPmPRJ4nSFh0h8lhgh3e18Ydz/4BE1JeZ4wGvQmeM1X+Zwe fuiypTTf0RjFdyBgDvAFFKpXi/2eiunJJP8SIC2hDWwsYbiB50uy7k/wXu0Rbp+7 ormZtfb/E7JvAZkbm/W5KMrPIigbwJoi5l6AD4ZsX+OCNWWhGuFy8z9alWiTUKhA Y5RY1HlENei2z56sxNWoyx4wMtpzmMxly5S0s2aGa6cXvmgW0QJCKQpguxL1AVnS LLtpkl83lc812brlSoGuWpMSIL8j+IJgNK62trlqeqEbVIylbt/TqD+MHugI5RgU wp3cUHeYRSKfonfdIyK5QazbBewLTqYRn+owQEGEQn2aJAkcdBpemZadK+YFR1oM uRiXQA0eunQHk6fzJazDLK48nViBPGZDhFFxBsP/zaVSldlMYDydDqRhKimwPP/N x2Po4k7xR+xiWzxu8pXTIRI3gccbCc9V+xZ2HAVyuAupK95lBckICS/qqDJfS/y9 wVTmevWKRKJIJRogvlj/0LCPl0sZmXoQc9UX8PpqXou0xETS6+gMojnqvvHPZecD wKPy+/64+nPySEpBAPy0LOmoxKhMXuSkyiGr+6gXNbpiKuudTNWoif2iVeyeCdWP dJ3BFJIobgLiXM47bvx/k7oGmqlpIX1VqXqJH5XjOQyw4FFmeL+VzkAVlaXzWN29 qIdypDXJ3HkQtlUp8gn//qycStHzwubavjG5TPRODM66QOoWtw95WzxpTcnwhhnS 4DPs+9B82euGBzalrvh+4kbJDog5gF9Gan7+7B9HZdRbnToU9MJVW10LOxTtIsHQ ebxHNtWEDr2aT/oADcnPQL2f8vpj721bT/G5husfDv0XdAPkDoioPNt7BvT3AZW0 dnuAmeTCKXONyxacHEeVTxWpjuuwa9nGNT/KJM5FB3vZKcvkZzFXoMPBBk8Rrfrd H73lTZYUsCgoX6RlNos/4qqBfcMi1JHQLswP5cD3Fwu4VxZYgrcdEVwE5ry3Jom5 9Yd1cTE+kHZeRLAxKK1e0vTZidPShjX6fI2PSxlyln4Xq8hzQrm9R1RpCsLgRe6H /yv42G2DCKuhTpnD63XZ4rwq0LvRuI8P5AYgcZR8WFXJ4GKJ6SpR5k/D+kABsvh4 3pn4NzTqtGw/euIWM1zp6AdyB5wFNu0L452Ppc+lCX8tB+OFUlSP8Yu+91oC+RET scZqchqxpL48vpQkN7XKqJEt0WYegNDElvoXvOvcPvvsJJl2dagg2wiwFFw+iRc0 ES3HypT0xBIRaTPfCsx7hEoXQrXh5R/gY0JKt8WEMRjrtAfz/bsZt8kc6pgGbwvO ZCCXQqThjDTdHOWh/GUI60pj7q7xmW3v4nDsANheXJXOAfRn8NC6hiYNICgeQWj9 MZYIHGR/VsQRi0msamCoAopeSuApi7yViYX0nVneJQLpwJIUMse5wPeC4dVcLVQF oXJGghXN3JNWUzDoOBMMpAPdFfAHUa8idwsnTxxY4KIORBJGksWB8Tyj0pqwBk8S udI//Vo32HoKckDQM9UG/LHYgrgxyzJTK1fYjU0Jl1MylhWNtHGK83iG5blitObQ 3L8CJY0lHSpE+wGT7CYgFIw8I3HVS9MFr13J+YTP7ld4fQpZk6W+7JsPf7UEfTvM mhRSzVmsC/e6txuTYMVYYA3grZzCNrEomijWbexlu5zGHmXndH69sNNxVAF1BrJZ rKyqMhCYqsexkGMo8zrJspeWhrFRQNGDtAOaS9iRHLM5nJDEsjje9+TIrHatd4sT Os+NL9ac9XbSXFsVUiOKIrEKOrmW+wIjDI9QKoWGBkzNes2j/OHNIGfsp+s1YL3y cpnRYnv0ZfgdAHQu1pfQ1TkJ74M7UHUIrfsCIRie9YQmWbcbZ6s5EVPSRmuHsw4U PaJMZFKQJmiNdMbUf07QbDDHiZ9/HG9tzGjuBRwTgv86uQTce/4F9TiS+Oe8ehn9 SYJubN0GvYg8k5xK7WNsntJDfQwc4Qul2N/HefT6aG7J4e9WK8dsPMtlZPkAyuKU 2Z6LLnaliXWGV1GINjfRbGrprTbMDe5orqGPcpMCATb+Bmus1k0EkDc= -----END ENCRYPTED PRIVATE KEY----- dromedary-0.1.5/dromedary/tests/ssl_certs/create_ssls.py000077500000000000000000000234601520150013200235120ustar00rootroot00000000000000#! /usr/bin/env python3 # Copyright (C) 2007, 2008, 2009, 2017 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """create_ssls.py -- create ssl keys and certificates for tests. The https server requires at least a key and a certificate to start. SSL keys and certificates are created with openssl which may not be available everywhere we want to run the test suite. To simplify test writing, the necessary keys and certificates are generated by this script and used by the tests. Since creating these test keys and certificates requires a good knowledge of openssl and a lot of typing, we record all the needed parameters here. Since this will be used rarely, no effort has been made to handle exotic errors, the basic policy is that openssl should be available in the path and the parameters should be correct, any error will abort the script. Feel free to enhance that. This script provides options for building any individual files or two options to build the certificate authority files (--ca) or the server files (--server). """ import optparse import os import sys from subprocess import PIPE, CalledProcessError, Popen # We want to use the right breezy: the one we are part of # FIXME: The following is correct but looks a bit ugly _dir = os.path.dirname our_bzr = _dir(_dir(_dir(_dir(os.path.realpath(__file__))))) sys.path.insert(0, our_bzr) import contextlib from dromedary.tests import ssl_certs def error(s): print(s) exit(1) def needs(request, *paths): """Errors out if the specified path does not exists.""" missing = [p for p in paths if not os.path.exists(p)] if missing: error(f"{request} needs: {','.join(missing)}") def rm_f(path): """Rm -f path.""" with contextlib.suppress(BaseException): os.unlink(path) def _openssl(args, input=None): """Execute a command in a subproces feeding stdin with the provided input. :return: (returncode, stdout, stderr) """ cmd = ["openssl"] + args proc = Popen(cmd, stdin=PIPE) (stdout, stderr) = proc.communicate(input.encode("utf-8")) if proc.returncode: # Basic error handling, all commands should succeed raise CalledProcessError(proc.returncode, cmd) return proc.returncode, stdout, stderr ssl_params = { # Passwords "server_pass": "I will protect the communications", "server_challenge_pass": "Challenge for the CA", "ca_pass": "I am the authority for the whole... localhost", # CA identity "ca_country_code": "BZ", "ca_state": "Internet", "ca_locality": "Bazaar", "ca_organization": "Distributed", "ca_section": "VCS", "ca_name": "Master of certificates", "ca_email": "cert@no.spam", # Server identity "server_country_code": "LH", "server_state": "Internet", "server_locality": "LocalHost", "server_organization": "Testing Ltd", "server_section": "https server", "server_name": "127.0.0.1", # Always accessed under that name "server_email": "https_server@localhost", "server_optional_company_name": "", } def build_ca_key(): """Generate an ssl certificate authority private key.""" key_path = ssl_certs.build_path("ca.key") rm_f(key_path) _openssl( ["genrsa", "-passout", "stdin", "-des3", "-out", key_path, "4096"], input=f"{ssl_params['ca_pass']}\n{ssl_params['ca_pass']}\n", ) def build_ca_certificate(): """Generate an ssl certificate authority private key.""" key_path = ssl_certs.build_path("ca.key") needs("Building ca.crt", key_path) cert_path = ssl_certs.build_path("ca.crt") rm_f(cert_path) _openssl( [ "req", "-passin", "stdin", "-new", "-x509", # Will need to be generated again in 1000 years -- 20210106 "-days", "365242", "-key", key_path, "-out", cert_path, ], input="{ca_pass}\n" "{ca_country_code}\n" "{ca_state}\n" "{ca_locality}\n" "{ca_organization}\n" "{ca_section}\n" "{ca_name}\n" "{ca_email}\n".format(**ssl_params), ) def build_server_key(): """Generate an ssl server private key. We generates a key with a password and then copy it without password so that a server can use it without prompting. """ key_path = ssl_certs.build_path("server_with_pass.key") rm_f(key_path) _openssl( ["genrsa", "-passout", "stdin", "-des3", "-out", key_path, "4096"], input=f"{ssl_params['server_pass']}\n{ssl_params['server_pass']}\n", ) key_nopass_path = ssl_certs.build_path("server_without_pass.key") rm_f(key_nopass_path) _openssl( ["rsa", "-passin", "stdin", "-in", key_path, "-out", key_nopass_path], input=f"{ssl_params['server_pass']}\n", ) def build_server_signing_request(): """Create a CSR (certificate signing request) to get signed by the CA.""" key_path = ssl_certs.build_path("server_with_pass.key") needs("Building server.csr", key_path) server_csr_path = ssl_certs.build_path("server.csr") rm_f(server_csr_path) _openssl( ["req", "-passin", "stdin", "-new", "-key", key_path, "-out", server_csr_path], input="{server_pass}\n" "{server_country_code}\n" "{server_state}\n" "{server_locality}\n" "{server_organization}\n" "{server_section}\n" "{server_name}\n" "{server_email}\n" "{server_challenge_pass}\n" "{server_optional_company_name}\n".format(**ssl_params), ) def sign_server_certificate(): """CA signs server csr.""" server_csr_path = ssl_certs.build_path("server.csr") ca_cert_path = ssl_certs.build_path("ca.crt") ca_key_path = ssl_certs.build_path("ca.key") needs("Signing server.crt", server_csr_path, ca_cert_path, ca_key_path) server_cert_path = ssl_certs.build_path("server.crt") server_ext_conf = ssl_certs.build_path("server.extensions.cnf") rm_f(server_cert_path) _openssl( [ "x509", "-req", "-passin", "stdin", # Will need to be generated again in 1000 years -- 20210106 "-days", "365242", "-in", server_csr_path, "-CA", ca_cert_path, "-CAkey", ca_key_path, "-set_serial", "01", "-extfile", server_ext_conf, "-out", server_cert_path, ], input=f"{ssl_params['ca_pass']}\n", ) def build_ssls(name, options, builders): if options is not None: for item in options: builder = builders.get(item, None) if builder is None: error(f"{item} is not a known {name}") builder() opt_parser = optparse.OptionParser(usage="usage: %prog [options]") opt_parser.set_defaults(ca=False) opt_parser.set_defaults(server=False) opt_parser.add_option( "--ca", dest="ca", action="store_true", help="Generate CA key and certificate" ) opt_parser.add_option( "--server", dest="server", action="store_true", help="Generate server key, certificate signing request and certificate", ) opt_parser.add_option( "-k", "--key", dest="keys", action="append", metavar="KEY", help="generate a new KEY (several -k options can be specified)", ) opt_parser.add_option( "-c", "--certificate", dest="certificates", action="append", metavar="CERTIFICATE", help="generate a new CERTIFICATE (several -c options can be specified)", ) opt_parser.add_option( "-r", "--sign-request", dest="signing_requests", action="append", metavar="REQUEST", help="generate a new signing REQUEST (can be repeated)", ) opt_parser.add_option( "-s", "--sign", dest="signings", action="append", metavar="SIGNING", help="generate a new SIGNING (several -s options can be specified)", ) key_builders = {"ca": build_ca_key, "server": build_server_key} certificate_builders = {"ca": build_ca_certificate} signing_request_builders = {"server": build_server_signing_request} signing_builders = {"server": sign_server_certificate} if __name__ == "__main__": (Options, args) = opt_parser.parse_args() if Options.ca or Options.server: if ( Options.keys or Options.certificates or Options.signing_requests or Options.signings ): error("--ca and --server can't be used with other options") # Handles --ca before --server so that both can be used in the same run # to generate all the files needed by the https test server if Options.ca: build_ca_key() build_ca_certificate() if Options.server: build_server_key() build_server_signing_request() sign_server_certificate() else: build_ssls("key", Options.keys, key_builders) build_ssls("certificate", Options.certificates, certificate_builders) build_ssls( "signing request", Options.signing_requests, signing_request_builders ) build_ssls("signing", Options.signings, signing_builders) dromedary-0.1.5/dromedary/tests/ssl_certs/server.crt000066400000000000000000000042501520150013200226420ustar00rootroot00000000000000-----BEGIN CERTIFICATE----- MIIGNjCCBB6gAwIBAgIBATANBgkqhkiG9w0BAQsFADCBkzELMAkGA1UEBhMCQlox ETAPBgNVBAgMCEludGVybmV0MQ8wDQYDVQQHDAZCYXphYXIxFDASBgNVBAoMC0Rp c3RyaWJ1dGVkMQwwCgYDVQQLDANWQ1MxHzAdBgNVBAMMFk1hc3RlciBvZiBjZXJ0 aWZpY2F0ZXMxGzAZBgkqhkiG9w0BCQEWDGNlcnRAbm8uc3BhbTAgFw0yNTAxMDcx ODM4MzVaGA8zMDI1MDEwNzE4MzgzNVowgZwxCzAJBgNVBAYTAkxIMREwDwYDVQQI DAhJbnRlcm5ldDESMBAGA1UEBwwJTG9jYWxIb3N0MRQwEgYDVQQKDAtUZXN0aW5n IEx0ZDEVMBMGA1UECwwMaHR0cHMgc2VydmVyMRIwEAYDVQQDDAkxMjcuMC4wLjEx JTAjBgkqhkiG9w0BCQEWFmh0dHBzX3NlcnZlckBsb2NhbGhvc3QwggIiMA0GCSqG SIb3DQEBAQUAA4ICDwAwggIKAoICAQC+4nchFU83BOFSEq1v6d4DKj4I73NzbkeW V1KA7KeM1OfCz75NDR24GzwqdxP2cKaCijZMrSeYv/AqhzTYa3haL9dpKVqUWqMS 0c7iR8AlpZjE2xyNHXjDo/ng9nFLmXt9prvPHTTNG1Pv2yhekmpbDnxMJfj693DF did4Aw7oLSeWNOv0PURmyNDtL9jDTRYUiddzapVp7DhB36St21ph5znt+Bh+mBBB bPdTUktKhuoFSV/wBGv8Id3LhCOzzBgthncWZQcwioIQThcTy7WdLDhM1yF35bVd GA/TvFcxHIXn39ToivPA2GcKG2EAUUq60J0sTzGhIQLYsK22CQDF4gOl6nv9dLr5 tQ6LudpQMKyJelukd7ufqgKHrwJym6riat3HXF5NRomMWlxtSoEGWS02lS3PovFl bnVFxEP7Oirx48u2KdJEYzjgDJiwdB5T6hXQ+rAc+PMGAlvSzygg1iaiRcHxWExz rQrW3DxbZtMvlWgROsoKS3QZe39XoVOKKv3mdtGgc/ETuSntui7TxfoVA5M5ACei me/xpKFX/asaVBMiXo7+3nNfBL6P08uDhb45c2TxlZDHMyJFaumlpBxeiNWDm1dl Ne5bjaMi1oP4DL2ymHuaY1tdbXXbCeaBr/AVItvRTHDY8F5todcpkvk4CI2bxMFI l3ImEQQuMwIDAQABo4GHMIGEMAkGA1UdEwQCMAAwNwYDVR0RBDAwLocEfwAAAYcQ AAAAAAAAAAAAAAAAAAAAAYIJMTI3LjAuMC4xgglsb2NhbGhvc3QwHQYDVR0OBBYE FLwyB3dn5UcprsJChNTdi87AQBb8MB8GA1UdIwQYMBaAFMSNRx8FugB7FSR/u+o0 w0PvFqZAMA0GCSqGSIb3DQEBCwUAA4ICAQAhqP8imBosxExR7hGMoolEfvaUpoa4 1dqwKn/hk+yrIv64X5WTBLBdHhIX4FMdBSk7j03q9H/DUxmQyPLS4qdg7BJvez0I NirUZEktESe/hg4lWXX0axtqnICTzc4EJKjHgl8PT9i5wmFbmIf0db2gBPllLAv7 gH/pVDRLZVrNWbYh9Gm3W4D2SlN20p8Y5bubDtLRMiwOacilYr6gyaRpTp4dGqqu fFSDsKrFlTYt48C/4WX0CmDruOLBCWq7rH0DL3D6J2HZyR7DYLhJ1waDnsQcS04D 5NL2pT/K2ZlMq/J/KkaQBawXotZdPE1z/1bHkNI6KexXmIsCz7xn3jiuGWuDoCYU aTHyBs6xQ2i9LLdrwRxIrdin8xp5sEGGHM4KdqRBb1eLDrxfDqbtosDwdg+XRrCL l1q7oHHNq/OJCRZgWQ0Hms+tUoUXrHc+Pao3I7zhkGxBeb0ytGuBoCHzifB8KW8y 1yuoVhoBz/sU+wqSAXUpJdakV6MxDCyK3OgrrIMfZKqouGnlsuabJw4YFsMBkhuD qn/PSO8OOui7ZF9nL8IxGbsyB9rnv1qWtZ+o/ViUpgJkuhjl+vhkZQUO8y6AJFU6 AS2RSkQaEqd5808WtYK6VAsBnjD04ZCT9aU4d0u0l7qKckOEtNJgUZvWfBVAYo7c +bFRLyQw0xiVYQ== -----END CERTIFICATE----- dromedary-0.1.5/dromedary/tests/ssl_certs/server.csr000066400000000000000000000034351520150013200226450ustar00rootroot00000000000000-----BEGIN CERTIFICATE REQUEST----- MIIFBzCCAu8CAQAwgZwxCzAJBgNVBAYTAkxIMREwDwYDVQQIDAhJbnRlcm5ldDES MBAGA1UEBwwJTG9jYWxIb3N0MRQwEgYDVQQKDAtUZXN0aW5nIEx0ZDEVMBMGA1UE CwwMaHR0cHMgc2VydmVyMRIwEAYDVQQDDAkxMjcuMC4wLjExJTAjBgkqhkiG9w0B CQEWFmh0dHBzX3NlcnZlckBsb2NhbGhvc3QwggIiMA0GCSqGSIb3DQEBAQUAA4IC DwAwggIKAoICAQC+4nchFU83BOFSEq1v6d4DKj4I73NzbkeWV1KA7KeM1OfCz75N DR24GzwqdxP2cKaCijZMrSeYv/AqhzTYa3haL9dpKVqUWqMS0c7iR8AlpZjE2xyN HXjDo/ng9nFLmXt9prvPHTTNG1Pv2yhekmpbDnxMJfj693DFdid4Aw7oLSeWNOv0 PURmyNDtL9jDTRYUiddzapVp7DhB36St21ph5znt+Bh+mBBBbPdTUktKhuoFSV/w BGv8Id3LhCOzzBgthncWZQcwioIQThcTy7WdLDhM1yF35bVdGA/TvFcxHIXn39To ivPA2GcKG2EAUUq60J0sTzGhIQLYsK22CQDF4gOl6nv9dLr5tQ6LudpQMKyJeluk d7ufqgKHrwJym6riat3HXF5NRomMWlxtSoEGWS02lS3PovFlbnVFxEP7Oirx48u2 KdJEYzjgDJiwdB5T6hXQ+rAc+PMGAlvSzygg1iaiRcHxWExzrQrW3DxbZtMvlWgR OsoKS3QZe39XoVOKKv3mdtGgc/ETuSntui7TxfoVA5M5ACeime/xpKFX/asaVBMi Xo7+3nNfBL6P08uDhb45c2TxlZDHMyJFaumlpBxeiNWDm1dlNe5bjaMi1oP4DL2y mHuaY1tdbXXbCeaBr/AVItvRTHDY8F5todcpkvk4CI2bxMFIl3ImEQQuMwIDAQAB oCUwIwYJKoZIhvcNAQkHMRYMFENoYWxsZW5nZSBmb3IgdGhlIENBMA0GCSqGSIb3 DQEBCwUAA4ICAQCELaxal2iWyw4zDbgaAv2zhf4ddb+NLn19XSkI+XmXA1tyW3cS zIDAfO5LKB3hM95CffCJHUsXMr1kE6ik3EzQbrtVtaCJiCBNbxznCQFInymJ2Etc 3DofIe6EfrYh930JnPDSfpf+3D0882Vj0W4NeOcx1ApzPQ/GQ76m7MsMQnFncWdv JUa/rx1u1fcD9yvMRfaAErJ/ddOGlPXTgtqNP4rk03ifztsBjyMEpbufu6DKUdsm hIK7X/JKJtX87tYcaQd38s0b7RUbPQ2w3JKd8ypU5b/YfmkSTapraOajknoKYLuO D+CiEO+VDb8CSuUNmA5k3EJy98RohpqslwADrY7ylE74JAt/3psaFgsyKVinj5Gg 0Ne0JGYWFILNndCVeAsUkidppEaeGYtVq7pcvpVKhEkz3ruAi/XoN79+xS0OZyHx vKqmnYfErpgoZqTfiofHj86H/paMRe14lEUkmDCqJ9vBae09NhoyqOv/scECK52c /nTYkdtdPm03Oo1LoMVRnCLCjuQDF4p6QiGEDVHJBeSAATnCz77Gp4qhhtVI29Ns SOvrhs9jVHz0Ytfofv03tEuFTck94lypzFOj2/jIOa2xVntHWdgMr43C19buWnfK E6bV/GjQPGG5n2N2MQpIPiruwhW53Z3G9FvfI2RMZ6y6xWpxMf3MZU6yLQ== -----END CERTIFICATE REQUEST----- dromedary-0.1.5/dromedary/tests/ssl_certs/server.extensions.cnf000066400000000000000000000003221520150013200250120ustar00rootroot00000000000000basicConstraints=CA:FALSE subjectAltName=@my_subject_alt_names subjectKeyIdentifier = hash [ my_subject_alt_names ] IP.1 = 127.0.0.1 IP.2 = ::1 DNS.1 = 127.0.0.1 # to keep python 2.7 happy DNS.2 = localhost dromedary-0.1.5/dromedary/tests/ssl_certs/server_with_pass.key000066400000000000000000000065361520150013200247340ustar00rootroot00000000000000-----BEGIN ENCRYPTED PRIVATE KEY----- MIIJpDBWBgkqhkiG9w0BBQ0wSTAxBgkqhkiG9w0BBQwwJAQQn5K/WXzTg7+Xo73l BnSBuAICCAAwDAYIKoZIhvcNAgkFADAUBggqhkiG9w0DBwQIctVzZm7/GYAEgglI DiYYoOEwvKjXS/tQeSCg72NOsgMfuJPXZlRcFJdwYSQ95iBnN9iEfWyUqJO/9TPs +jBo3XGssXE7J2RHvOXJU6qmqHfnRf+H030SSCcWWR+qGWeJtknt2IMjzPtoJlqS K4Vl07trEaEdiAp2egr6R7YDab/CcN2QFcL0NFVHYxibe4VMcOGgu7XmVD2vzY5x 54FGcLSMB0U4oN/qAMC59jxJYk2jiaiGfS6/hW/Wlabq/i2AjC6LqNJyyeCBwU76 aZgkjTMAvbDCR/A+sOSv2PciS2jgff96acDGMfkm38xj41XMr8CZkRa+KpRd4aCX WWFti9N9GVFL6dKEO7uvossgPsEq2U7rDs8MdqTTfHwzfGbPsStx5mAF0xR72PqZ k/5hx9gTk2GsZ8p3wlz2Iu4qpKedKWNMtQvp9weXX2QaylFASCY2HyC8XOlxBRbT D08+6ypJ1aXELMiUU4+rkIjILMLZedr5r13md2puBP1v/rNt2XjC//NqmftYhIpX 3ppGXHYkukvZ4Y0tVphiywGUKVRmF9CdHOZ+xEUQDqU3Ea7hLxx2c/VFv5r7VpbU ZsGvOwQOi8UrPPTkwXJApENYeKw5FdMsXV9FOPbgwOAVP5D8Wq99DGOKA6t6jfwZ qj5AXkEP2gu6Gex1DHI2w4emCisRSF0Q9H+ZCEiKnIVFhawCZcPUJqPROdXbDE9C ZpsC34GK4Nq8jl3sfrvgOHN8/eoDu1bRIjhu1MjoWqMCOa0OmL2X4kFU956O+BtR Tgx8FwgQRzM4MKRfBensHsK34WK8HI2JY3Oaj0v1ygCb+9EaNaJQlJ1gU+icBh0R NlM8qkhPktXjbRVsnefBn0wKlvuMLShB78Hn3BHZaSUtwnyBRJWdvJ+CTanEThl4 tWHytvxwfFdVs/K3Lt4OQum32QOGxZ/yTXP6xZ6UE2P2JS0/gMb1VmSxBM/T8hTI zWjK5WNLKgSB5PSxnvAxO+YC+oGEqIullJ2ad6u6/k+JdOU96x2YGwmsTLaluNI9 fn1HljHjTiqNWgIS6hAtQHwnjzuNIPl5MMOGNWPcNyXm8lwl6KzC154CsE8qL2/m yfcd5XmJ8N1USKjHs3KWCLvGy5gJo8oV2rWIHqWmW52MeAWar0DDdFje5gGc/jHL Is6aUSspb3tGDJ0SYs73Og3CWTUKu4Rwouhu4XJWKVJ8pr7w/R7ofZVClBcYpGbu wC0dE55Ot9hdwQTBMObE89g9uNHfCPc4nPsZ+XQBYvfXEMgBvlJZ80Am3IAWu5NR Rdr7Q2nsb6epAn1OoT3m1My5oDIJ5WEbKaiwDTAcZPYY4ivb9i+cPMWPK4/VrDWr aqsZ9HHIwUxazj6LLfAyO6N6dJ9DTuwDyMJ9qK7ez69kbQSn3ex+oi0pPhFxgLFp lwMyPU1SS1xVi6jJDutaYDGKfTbuDMdBubSwcwz9JyT6FYR+DTowSZ/ytRrG3E1C DRCobe59B/e9VLlk3VOSmw/E7zgw1s+jMj8SYnyhNFqqT5nvgSCPV3nPDkpY2cK9 R5gDyM8Nl07wWByhsvRKYmjal+n++k5bEZmaEHnfQf4OWpwgnNKFKC2REYboUKoI bMBmTfcwzbaooJN9G3FZ6RtjWwMDQwIVFB9XaA2+s4q4kYObmANr4E5Eef3uhSz5 6Fm5i6H6nM5NrcTQEERvi52xMO/ZIWr10Hqtf2jhjCx0lxwJAn3v9IUzSyeBjMBr 2T0X4YEtvUO+PCKBUcKww6RntLC1UYjEARUyJBF8VCmFCC8rJjVLIkziyOeeYaSn FzDhh0NGD7s7m7rvhW00RZ3si63LphyMsRai0BD39lBWO59eEic16mfA8JnMXsrJ z8zSsGnxuVjIwLrAmWnhJpUwGGS1CkfmK18hW1m8io/NlDMANfx+Gxh5uMEjYehM hAgSjNLUCIbbidS6GubXe5EOIiwjMZgQ/Q2OmrPOkPoTW0b9ixBMgnkLH4X2XZQZ dq54NFvJAwbDkjSZvXtM7aaPWtskwETZgWB+BLJXI1YiGVVBJhGgYv/SL2WhkzIy VCzIHIZqdVWsEpoix5VTMRN2ItZ2b3wmgo0HhsX1Hfxoz0aCGYpW5zIfZVbvcEj8 PWH1OgaEkT84qVDa0N63r7c9HYkgym901DsdWGNkWerei9yyqdb/4pLQW2q8aXvV 2k0o5ZKN/61lXcRks8usrmDo6rcU/pRWALjjAFK2h0esYfSiQZ3pq/XhcmTNnB72 sRs/5r5eHNjfBjKGe89lhH7VrxOx36UMNxwTIsJtVjunfhHaaAx6bf14XL9VKZla RRDrHCe1ZW4Uz0xEjNI0gqcV8gxIyhfulzQsOuF5QkZ+Ewu0fnSxkcehvldXNMAu oiFSR4rNNgbJRy5AXD7uDdCLokxAMD+XSOdwT7aI+HY3CX0Z8HM6fZolcZQkqHnp OoVTAjgzOl2fSHGLpiUts3PZngzjrKp/kvdQZd4A2lXIqcmqErk7homzLqsLVU9G pOYEstyut7IMzmNm0kEOVkvJpaCy3dAO+TRvDX3AYyKSsZ6/QrPqH2w8kgAOX4mB 1BWc15bttlu91g0D61jYz7/5wMkosQ4hrEvrbNM+CB7lQf9i+1egDh58mPbGGzw2 pKrHh4vR99dh+kUehulK4GBTIYu3Ckm7gZbaahE3XhfqpYsgzp4xZUCVAm3V1IMy 9wKoMf+h5gTFB4l1oIzArwYM8CQmcNvtF7JI8wrHaOQoIs3Z6coNsnQzL0lx4/x/ 8BMC3l0x6v7thzZeeVK8UjTPoMB6eVBige7kuT6+3p5g5QEa30FMnT623FMM/GH2 Wdxpp9HQZRe5D/PemWHxWxlQmSpmIhzHtyeHAfyuXwfDovbY2L5Y0YbcXxeKD+0d xGijAcMCBz5ahGpThYoa0nCSPlnZFwP+ipybn7YD2yM0z7j59Okd7ggSQDkU2Y3L rt8vcCdXSp4EgRWJZ9NTYFFG+JVbXBxdnQIZ7YkbKygSMRj5v4OrcawNpbtW/alX qp5/U4E/lbtbh68YhpYNd0QGRpDDrcSHlHFv4gv2beEdbrt1VYkCfCfo9krO2kJT TScDXHWASw68sYjTgc6fQqAIFxN4QsndoSMUdcaUlGSu3Pn2eSzSmwdLK1saWqJv RqBFvE3FkaUmAZTvoVgGt3uyEh4SSSR5 -----END ENCRYPTED PRIVATE KEY----- dromedary-0.1.5/dromedary/tests/ssl_certs/server_without_pass.key000066400000000000000000000063101520150013200254520ustar00rootroot00000000000000-----BEGIN PRIVATE KEY----- MIIJQgIBADANBgkqhkiG9w0BAQEFAASCCSwwggkoAgEAAoICAQC+4nchFU83BOFS Eq1v6d4DKj4I73NzbkeWV1KA7KeM1OfCz75NDR24GzwqdxP2cKaCijZMrSeYv/Aq hzTYa3haL9dpKVqUWqMS0c7iR8AlpZjE2xyNHXjDo/ng9nFLmXt9prvPHTTNG1Pv 2yhekmpbDnxMJfj693DFdid4Aw7oLSeWNOv0PURmyNDtL9jDTRYUiddzapVp7DhB 36St21ph5znt+Bh+mBBBbPdTUktKhuoFSV/wBGv8Id3LhCOzzBgthncWZQcwioIQ ThcTy7WdLDhM1yF35bVdGA/TvFcxHIXn39ToivPA2GcKG2EAUUq60J0sTzGhIQLY sK22CQDF4gOl6nv9dLr5tQ6LudpQMKyJelukd7ufqgKHrwJym6riat3HXF5NRomM WlxtSoEGWS02lS3PovFlbnVFxEP7Oirx48u2KdJEYzjgDJiwdB5T6hXQ+rAc+PMG AlvSzygg1iaiRcHxWExzrQrW3DxbZtMvlWgROsoKS3QZe39XoVOKKv3mdtGgc/ET uSntui7TxfoVA5M5ACeime/xpKFX/asaVBMiXo7+3nNfBL6P08uDhb45c2TxlZDH MyJFaumlpBxeiNWDm1dlNe5bjaMi1oP4DL2ymHuaY1tdbXXbCeaBr/AVItvRTHDY 8F5todcpkvk4CI2bxMFIl3ImEQQuMwIDAQABAoICAAkI8xaPyaYTBw85bxgi+60u rK0DmHVYPO8yxubvTKbv1OB1sM441rVGJLzl0f4SKu9210cd0wf53cZFjAzKWXH7 XbjOikkHWTykzaQMPV4KzoZS0LElOfgYpNUvFQG9DAlQgQc2nK8wofJybyC60Wnp 75wzF+vZFm9iPlAB5Qy8Rmlnq9ttovUygCEZ6Kql1Wu3cok1/Eh9M9R6X3MTNN35 cdZ/rbkgPXS7UaGR/ZpTdHQ3muDjdLEEcVkWshHqkSJmgPCAa6yygaF/8LuxIrGD zE5myGDCcKktYGJnQcFDVls5TvyKxyrTk1z5GshHEMBy612TigfUZiwXgMFi/7RJ JE4Ul6vdiMxqIqrIDFZuYlRNu5HpY3JKw4e3uMPVEEJ0zX7ecKlNGHlO6d2TVYDn u/K365F7ZqkDiLi4WaWnUcl4ZXBLCVbWUPPxUxT/I1q8hcd+NDSpA+e2Ud3sv968 rsBksqSjHW/mV+INsFbLvbomI/CAdMjg/O2mcy/Qf52Pmju3/usxG1RJurMFgMkF fGn1L86edz7H2iNpWihB9Ny/lJeu5C0BIE28vwI/MrCgSd3DLUqTRPoi61ldUlnP 5Tyagsroy0CXMQREX13f6ljohlMFuAb3bdBDZxtCLDYMLlflOmbO2yi32tzpNtKd 73t9kyjqWUVcZQftzCsxAoIBAQDkatb9fWHRCURDqba5hvLKst60eqv6X3flPQ21 t3u9HV+3i1mcE7Eqc59XGFWYOa6r+UB7zwWZ4DnrwDHpHyRnlhbE2qTBPOO6yTN7 oQr+04xSO91+567P0nKUcSn19vpziR/nWHMwXFjlId2kbMAR2Q0+YLVjMFvyo3rQ gQwBA/9SwPgGIPag8FlUPZRxHsKAT4gVnth4UsfRFFg/qvHewLjsdqF2Fl/uIyeI F1mCgrcWBWGfBHOlNwotN8voS6eWTAP5y+NF/FQQvI48qVNOYYtcUsmXpBGlo5r9 6GBgl26JkN1WtF7k2ryQyMtPZXHnxa4gtsa42WcwrXe03Wg/AoIBAQDV71x3/b6y VEq9j1crpIhAxnF+a6VjVZRbeyl9IbzIwgmgrL2kmduCxK9/dJQMeX/ZCYFATK64 rHPJ3ie9PRoVzhCVYEIPe/g06knXMjbacRUt1/gbQDBUUzAMPSqyF0dZvpiyVuWA Ei/tELgOg85+CFaCGcq58wfTTS76JowGCBGYDHYYqcxRjo9kgvtaPpHT0zMsMx0q a/EkOOp6dIhxWbv7uEskkBE22U5XyM0pGUb9yyjzUfzR2jM1/UR45wSixZWBDsp0 3ivY+LzcejPxwFBkjkTvbK/naYyHCt3A9JtJqVI92jLBNr6J34PyEv1soynBqGlO aFErz7aLWV0NAoIBAQDj3d2ZxlIlueva4FzEGmbtZaGcRGB6hnDSRKT/qgqML9iD /0Um2dI8+ll0BnelQ64IK8BzgqQgzLqbgAGKgaHMoMMYINKJX9gDR6LPa2nPq3Tp uIUPi8st0dCyW24zzO4vAhXMscU/8nBQLQeydzbo1zJKDyoEyIKBvSrRBFvYS4eT o6QKYSoIhZ1n14LVko0QecbVYsCq0FI8NuKOqVdfE72nT/VlG48ZvwI51qlZ9FL3 aejoPQWtRQom7+nAVHDcE/tHYGnbMI0goSn4RCcyI0dmk8Q+PdPI/Tyqnf4/ffIs 1FKqo4ejIL9KZLXF//qw79j1E8GNOHyj5/lqehH7AoIBAH9XhTEfZz17EyoWgorF 2xzDgpb/uGiSbkat3xpO1LKjXVu4twGdW55ROS2i1OVABSvJjpgZjP78F8gXZows LLTB/fkMXQYegrXsp5tilmgcW8D4BwUhhiMLiVQfrKRpWt0+qGGve9hp+wEfrI9n Qaifie7TL2rUENpj3QylmT+V2fMpp7oyiB4bv5rSpI8pI2B1HMa4fincKqOnBVty tizSfyTspD3VS4nce9eg/Q3zr+At5+g960F2onkGkpVs3cON3Nn/Vd+Ox4bVOIX8 b6L9GF1imgHyLhqPJ0jS3QCYGT8VfJC4qvF7hptG4qFbUGI0FQzjFBvJ2Fc9wqjb vpkCggEAFU2w1j8HAvDDJ1GSiwwc353dkCcvwBUAlN+Vryf08+T9p0rLeViZ7u6n sI1otjQhNNitGEmOu30wQTH2eq8nb8UwcZjlxQxMeozKmun74lb2w0l61ZtTVsKg PyTpwRU29s0o7gcCNQQL2ZT1OnnpoFXT+h9A46Qt8pqIKtMzEQjjHiBaD2xqv7O1 UzmSCLp2Nj7+INV/zIOX8HicG7evQHj6pqSCRfPgxJKbPuBfKZJeP4d0xJ6+oapY /ePK5+qu8bpLd8JzEICVgldVYSCeKTexG5enUN9nbktj5ozGv73cYSS+x3GlxhmC qaFsUyg+Hs0Zme451SYuGP/r613TEQ== -----END PRIVATE KEY----- dromedary-0.1.5/dromedary/tests/stub_sftp.py000066400000000000000000000513431520150013200212110ustar00rootroot00000000000000# Copyright (C) 2005, 2006, 2008-2011 Robey Pointer , Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """A stub SFTP server for loopback SFTP testing. Adapted from the one in paramiko's unit tests. """ import os import socket import socketserver import sys import time import paramiko from dromedary import ssh from dromedary.ssh.paramiko import ParamikoVendor from .. import osutils, trace, urlutils from . import test_server class StubServer(paramiko.ServerInterface): def __init__(self, test_case_server): paramiko.ServerInterface.__init__(self) self.log = test_case_server.log def check_auth_password(self, username, password): # all are allowed self.log(f"sftpserver - authorizing: {username}") return paramiko.AUTH_SUCCESSFUL def check_channel_request(self, kind, chanid): self.log(f"sftpserver - channel request: {kind}, {chanid}") return paramiko.OPEN_SUCCEEDED class StubSFTPHandle(paramiko.SFTPHandle): def stat(self): try: return paramiko.SFTPAttributes.from_stat(os.fstat(self.readfile.fileno())) except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) def chattr(self, attr): # python doesn't have equivalents to fchown or fchmod, so we have to # use the stored filename trace.mutter("Changing permissions on %s to %s", self.filename, attr) try: paramiko.SFTPServer.set_file_attr(self.filename, attr) except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) class StubSFTPServer(paramiko.SFTPServerInterface): def __init__(self, server, root, home=None): paramiko.SFTPServerInterface.__init__(self, server) # All paths are actually relative to 'root'. # this is like implementing chroot(). self.root = root if home is None: self.home = "" else: if not home.startswith(self.root): raise AssertionError( f"home must be a subdirectory of root ({home} vs {root})" ) self.home = home[len(self.root) :] if self.home.startswith("/"): self.home = self.home[1:] server.log("sftpserver - new connection") def _realpath(self, path): # paths returned from self.canonicalize() always start with # a path separator. So if 'root' is just '/', this would cause # a double slash at the beginning '//home/dir'. if self.root == "/": return self.canonicalize(path) return self.root + self.canonicalize(path) if sys.platform == "win32": def canonicalize(self, path): # Win32 sftp paths end up looking like # sftp://host@foo/h:/foo/bar # which means absolute paths look like: # /h:/foo/bar # and relative paths stay the same: # foo/bar # win32 needs to use the Unicode APIs. so we require the # paths to be utf8 (Linux just uses bytestreams) thispath = path.decode("utf8") if path.startswith("/"): # Abspath H:/foo/bar return os.path.normpath(thispath[1:]) else: return os.path.normpath(os.path.join(self.home, thispath)) else: def canonicalize(self, path): if os.path.isabs(path): return osutils.normpath(path) else: return osutils.normpath("/" + os.path.join(self.home, path)) def chattr(self, path, attr): try: paramiko.SFTPServer.set_file_attr(path, attr) except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) return paramiko.SFTP_OK def list_folder(self, path): path = self._realpath(path) try: out = [] # TODO: win32 incorrectly lists paths with non-ascii if path is not # unicode. However on unix the server should only deal with # bytestreams and posix.listdir does the right thing if sys.platform == "win32": flist = [f.encode("utf8") for f in os.listdir(path)] else: flist = os.listdir(path) for fname in flist: attr = paramiko.SFTPAttributes.from_stat( os.stat(osutils.pathjoin(path, fname)) ) attr.filename = fname out.append(attr) return out except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) def stat(self, path): path = self._realpath(path) try: return paramiko.SFTPAttributes.from_stat(os.stat(path)) except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) def lstat(self, path): path = self._realpath(path) try: return paramiko.SFTPAttributes.from_stat(os.lstat(path)) except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) def open(self, path, flags, attr): path = self._realpath(path) try: flags |= getattr(os, "O_BINARY", 0) if getattr(attr, "st_mode", None): fd = os.open(path, flags, attr.st_mode) else: # os.open() defaults to 0777 which is # an odd default mode for files fd = os.open(path, flags, 0o666) except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) if (flags & os.O_CREAT) and (attr is not None): attr._flags &= ~attr.FLAG_PERMISSIONS paramiko.SFTPServer.set_file_attr(path, attr) if flags & os.O_WRONLY: fstr = "wb" elif flags & os.O_RDWR: fstr = "rb+" else: # O_RDONLY (== 0) fstr = "rb" try: f = os.fdopen(fd, fstr) except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) fobj = StubSFTPHandle() fobj.filename = path fobj.readfile = f fobj.writefile = f return fobj def remove(self, path): path = self._realpath(path) try: os.remove(path) except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) return paramiko.SFTP_OK def rename(self, oldpath, newpath): oldpath = self._realpath(oldpath) newpath = self._realpath(newpath) try: os.rename(oldpath, newpath) except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) return paramiko.SFTP_OK def symlink(self, target_path, path): path = self._realpath(path) try: os.symlink(target_path, path) except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) return paramiko.SFTP_OK def readlink(self, path): path = self._realpath(path) try: target_path = os.readlink(path) except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) return target_path def mkdir(self, path, attr): path = self._realpath(path) try: # Using getattr() in case st_mode is None or 0 # both evaluate to False if getattr(attr, "st_mode", None): os.mkdir(path, attr.st_mode) else: os.mkdir(path) if attr is not None: attr._flags &= ~attr.FLAG_PERMISSIONS paramiko.SFTPServer.set_file_attr(path, attr) except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) return paramiko.SFTP_OK def rmdir(self, path): path = self._realpath(path) try: os.rmdir(path) except OSError as e: return paramiko.SFTPServer.convert_errno(e.errno) return paramiko.SFTP_OK # removed: chattr # (nothing in bzr's sftp transport uses those) # ------------- server test implementation -------------- STUB_SERVER_KEY = """\ -----BEGIN RSA PRIVATE KEY----- MIICWgIBAAKBgQDTj1bqB4WmayWNPB+8jVSYpZYk80Ujvj680pOTh2bORBjbIAyz oWGW+GUjzKxTiiPvVmxFgx5wdsFvF03v34lEVVhMpouqPAYQ15N37K/ir5XY+9m/ d8ufMCkjeXsQkKqFbAlQcnWMCRnOoPHS3I4vi6hmnDDeeYTSRvfLbW0fhwIBIwKB gBIiOqZYaoqbeD9OS9z2K9KR2atlTxGxOJPXiP4ESqP3NVScWNwyZ3NXHpyrJLa0 EbVtzsQhLn6rF+TzXnOlcipFvjsem3iYzCpuChfGQ6SovTcOjHV9z+hnpXvQ/fon soVRZY65wKnF7IAoUwTmJS9opqgrN6kRgCd3DASAMd1bAkEA96SBVWFt/fJBNJ9H tYnBKZGw0VeHOYmVYbvMSstssn8un+pQpUm9vlG/bp7Oxd/m+b9KWEh2xPfv6zqU avNwHwJBANqzGZa/EpzF4J8pGti7oIAPUIDGMtfIcmqNXVMckrmzQ2vTfqtkEZsA 4rE1IERRyiJQx6EJsz21wJmGV9WJQ5kCQQDwkS0uXqVdFzgHO6S++tjmjYcxwr3g H0CoFYSgbddOT6miqRskOQF3DZVkJT3kyuBgU2zKygz52ukQZMqxCb1fAkASvuTv qfpH87Qq5kQhNKdbbwbmd2NxlNabazPijWuphGTdW0VfJdWfklyS2Kr+iqrs/5wV HhathJt636Eg7oIjAkA8ht3MQ+XSl9yIJIS8gVpbPxSw5OMfw0PjVE7tBdQruiSc nvuQES5C9BMHjF39LZiGH1iLQy7FgdHyoP+eodI7 -----END RSA PRIVATE KEY----- """ class SocketDelay: """A socket decorator to make TCP appear slower. This changes recv, send, and sendall to add a fixed latency to each python call if a new roundtrip is detected. That is, when a recv is called and the flag new_roundtrip is set, latency is charged. Every send and send_all sets this flag. In addition every send, sendall and recv sleeps a bit per character send to simulate bandwidth. Not all methods are implemented, this is deliberate as this class is not a replacement for the builtin sockets layer. fileno is not implemented to prevent the proxy being bypassed. """ simulated_time = 0 _proxied_arguments = dict.fromkeys( [ "close", "getpeername", "getsockname", "getsockopt", "gettimeout", "setblocking", "setsockopt", "settimeout", "shutdown", ] ) def __init__(self, sock, latency, bandwidth=1.0, really_sleep=True): """:param bandwith: simulated bandwith (MegaBit) :param really_sleep: If set to false, the SocketDelay will just increase a counter, instead of calling time.sleep. This is useful for unittesting the SocketDelay. """ self.sock = sock self.latency = latency self.really_sleep = really_sleep self.time_per_byte = 1 / (bandwidth / 8.0 * 1024 * 1024) self.new_roundtrip = False def sleep(self, s): if self.really_sleep: time.sleep(s) else: SocketDelay.simulated_time += s def __getattr__(self, attr): if attr in SocketDelay._proxied_arguments: return getattr(self.sock, attr) raise AttributeError(f"'SocketDelay' object has no attribute {attr!r}") def dup(self): return SocketDelay( self.sock.dup(), self.latency, self.time_per_byte, self._sleep ) def recv(self, *args): data = self.sock.recv(*args) if data and self.new_roundtrip: self.new_roundtrip = False self.sleep(self.latency) self.sleep(len(data) * self.time_per_byte) return data def sendall(self, data, flags=0): if not self.new_roundtrip: self.new_roundtrip = True self.sleep(self.latency) self.sleep(len(data) * self.time_per_byte) return self.sock.sendall(data, flags) def send(self, data, flags=0): if not self.new_roundtrip: self.new_roundtrip = True self.sleep(self.latency) bytes_sent = self.sock.send(data, flags) self.sleep(bytes_sent * self.time_per_byte) return bytes_sent class TestingSFTPConnectionHandler(socketserver.BaseRequestHandler): def setup(self): self.wrap_for_latency() tcs = self.server.test_case_server ptrans = paramiko.Transport(self.request) self.paramiko_transport = ptrans # Set it to a channel under 'bzr' so that we get debug info ptrans.set_log_channel("brz.paramiko.transport") ptrans.add_server_key(tcs.get_host_key()) ptrans.set_subsystem_handler( "sftp", paramiko.SFTPServer, StubSFTPServer, root=tcs._root, home=tcs._server_homedir, ) server = tcs._server_interface(tcs) # This blocks until the key exchange has been done ptrans.start_server(None, server) def finish(self): # Wait for the conversation to finish, when the paramiko.Transport # thread finishes # TODO: Consider timing out after XX seconds rather than hanging. # Also we could check paramiko_transport.active and possibly # paramiko_transport.getException(). self.paramiko_transport.join() def wrap_for_latency(self): tcs = self.server.test_case_server if tcs.add_latency: # Give the socket (which the request really is) a latency adding # decorator. self.request = SocketDelay(self.request, tcs.add_latency) class TestingSFTPWithoutSSHConnectionHandler(TestingSFTPConnectionHandler): def setup(self): self.wrap_for_latency() # Re-import these as locals, so that they're still accessible during # interpreter shutdown (when all module globals get set to None, leading # to confusing errors like "'NoneType' object has no attribute 'error'". class FakeChannel: def __init__(self, sock): self._socket = sock def get_transport(self): return self def get_log_channel(self): return "brz.paramiko" def get_name(self): return "1" def get_hexdump(self): return False def close(self): # Close the underlying socket to ensure that any blocking recv() # calls will be interrupted and return, preventing hangs during # server shutdown. try: self._socket.close() except OSError: pass tcs = self.server.test_case_server fake_channel = FakeChannel(self.request) sftp_server = paramiko.SFTPServer( fake_channel, "sftp", StubServer(tcs), StubSFTPServer, root=tcs._root, home=tcs._server_homedir, ) self.sftp_server = sftp_server try: sftp_server.start_subsystem( "sftp", None, ssh.SocketAsChannelAdapter(self.request) ) except OSError as e: if (len(e.args) > 0) and (e.args[0] == errno.EPIPE): # it's okay for the client to disconnect abruptly # (bug in paramiko 1.6: it should absorb this exception) pass else: raise def finish(self): self.sftp_server.finish_subsystem() class TestingSFTPServer(test_server.TestingThreadingTCPServer): def __init__(self, server_address, request_handler_class, test_case_server): test_server.TestingThreadingTCPServer.__init__( self, server_address, request_handler_class ) self.test_case_server = test_case_server class SFTPServer(test_server.TestingTCPServerInAThread): """Common code for SFTP server facilities.""" def __init__(self, server_interface=StubServer): self.host = "127.0.0.1" self.port = 0 super().__init__( (self.host, self.port), TestingSFTPServer, TestingSFTPConnectionHandler ) self._original_vendor = None self._vendor = ParamikoVendor() self._server_interface = server_interface self._host_key = None self.logs = [] self.add_latency = 0 self._homedir = None self._server_homedir = None self._root = None def _get_sftp_url(self, path): """Calculate an sftp url to this server for path.""" return f"sftp://foo:bar@{self.host}:{self.port}/{path}" def log(self, message): """StubServer uses this to log when a new server is created.""" self.logs.append(message) def create_server(self): server = self.server_class( (self.host, self.port), self.request_handler_class, self ) return server def get_host_key(self): if self._host_key is None: key_file = osutils.pathjoin(self._homedir, "test_rsa.key") f = open(key_file, "w") try: f.write(STUB_SERVER_KEY) finally: f.close() self._host_key = paramiko.RSAKey.from_private_key_file(key_file) return self._host_key def start_server(self, backing_server=None): # XXX: TODO: make sftpserver back onto backing_server rather than local # disk. if not ( backing_server is None or isinstance(backing_server, test_server.LocalURLServer) ): raise AssertionError( "backing_server should not be {!r}, because this can only serve " "the local current working directory.".format(backing_server) ) self._original_vendor = ssh._ssh_vendor_manager._cached_ssh_vendor ssh._ssh_vendor_manager._cached_ssh_vendor = self._vendor self._homedir = os.getcwd() if sys.platform == "win32": # Normalize the path or it will be wrongly escaped self._homedir = osutils.normpath(self._homedir) else: self._homedir = self._homedir if self._server_homedir is None: self._server_homedir = self._homedir self._root = "/" if sys.platform == "win32": self._root = "" super().start_server() def stop_server(self): try: super().stop_server() finally: ssh._ssh_vendor_manager._cached_ssh_vendor = self._original_vendor def get_bogus_url(self): """See dromedary.Server.get_bogus_url.""" # this is chosen to try to prevent trouble with proxies, weird dns, etc # we bind a random socket, so that we get a guaranteed unused port # we just never listen on that port s = socket.socket() s.bind(("localhost", 0)) return "sftp://{}:{}/".format(*s.getsockname()) class SFTPFullAbsoluteServer(SFTPServer): """A test server for sftp transports, using absolute urls and ssh.""" def get_url(self): """See dromedary.Server.get_url.""" homedir = self._homedir if sys.platform != "win32": # Remove the initial '/' on all platforms but win32 homedir = homedir[1:] return self._get_sftp_url(urlutils.escape(homedir)) class SFTPServerWithoutSSH(SFTPServer): """An SFTP server that uses a simple TCP socket pair rather than SSH.""" def __init__(self): super().__init__() self._vendor = ssh.LoopbackVendor() self.request_handler_class = TestingSFTPWithoutSSHConnectionHandler def get_host_key(self): return None class SFTPAbsoluteServer(SFTPServerWithoutSSH): """A test server for sftp transports, using absolute urls.""" def get_url(self): """See dromedary.Server.get_url.""" homedir = self._homedir if sys.platform != "win32": # Remove the initial '/' on all platforms but win32 homedir = homedir[1:] return self._get_sftp_url(urlutils.escape(homedir)) class SFTPHomeDirServer(SFTPServerWithoutSSH): """A test server for sftp transports, using homedir relative urls.""" def get_url(self): """See dromedary.Server.get_url.""" return self._get_sftp_url("%7E/") class SFTPSiblingAbsoluteServer(SFTPAbsoluteServer): """A test server for sftp transports where only absolute paths will work. It does this by serving from a deeply-nested directory that doesn't exist. """ def create_server(self): # FIXME: Can't we do that in a cleaner way ? -- vila 20100623 server = super().create_server() server._server_homedir = "/dev/noone/runs/tests/here" return server dromedary-0.1.5/dromedary/tests/test_cethread.py000066400000000000000000000122631520150013200220140ustar00rootroot00000000000000# Copyright (C) 2011, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA import threading import unittest from dromedary import cethread class TestCatchingExceptionThread(unittest.TestCase): def test_start_and_join_smoke_test(self): def do_nothing(): pass tt = cethread.CatchingExceptionThread(target=do_nothing) tt.start() tt.join() def test_exception_is_re_raised(self): class MyException(Exception): pass def raise_my_exception(): raise MyException() tt = cethread.CatchingExceptionThread(target=raise_my_exception) tt.start() self.assertRaises(MyException, tt.join) def test_join_around_exception(self): resume = threading.Event() class MyException(Exception): pass def raise_my_exception(): # Wait for the test to tell us to resume resume.wait() # Now we can raise raise MyException() tt = cethread.CatchingExceptionThread(target=raise_my_exception) tt.start() tt.join(timeout=0) self.assertIs(None, tt.exception) resume.set() self.assertRaises(MyException, tt.join) def test_sync_event(self): control = threading.Event() in_thread = threading.Event() class MyException(Exception): pass def raise_my_exception(): # Wait for the test to tell us to resume control.wait() # Now we can raise raise MyException() tt = cethread.CatchingExceptionThread( target=raise_my_exception, sync_event=in_thread ) tt.start() tt.join(timeout=0) self.assertIs(None, tt.exception) self.assertIs(in_thread, tt.sync_event) control.set() self.assertRaises(MyException, tt.join) self.assertEqual(True, tt.sync_event.is_set()) def test_switch_and_set(self): """Caller can precisely control a thread.""" control1 = threading.Event() control2 = threading.Event() control3 = threading.Event() class TestThread(cethread.CatchingExceptionThread): def __init__(self): super().__init__(target=self.step_by_step) self.current_step = "starting" self.step1 = threading.Event() self.set_sync_event(self.step1) self.step2 = threading.Event() self.final = threading.Event() def step_by_step(self): control1.wait() self.current_step = "step1" self.switch_and_set(self.step2) control2.wait() self.current_step = "step2" self.switch_and_set(self.final) control3.wait() self.current_step = "done" tt = TestThread() tt.start() self.assertEqual("starting", tt.current_step) control1.set() tt.step1.wait() self.assertEqual("step1", tt.current_step) control2.set() tt.step2.wait() self.assertEqual("step2", tt.current_step) control3.set() # We don't wait on tt.final tt.join() self.assertEqual("done", tt.current_step) def test_exception_while_switch_and_set(self): control1 = threading.Event() class MyException(Exception): pass class TestThread(cethread.CatchingExceptionThread): def __init__(self, *args, **kwargs): self.step1 = threading.Event() self.step2 = threading.Event() super().__init__(target=self.step_by_step, sync_event=self.step1) self.current_step = "starting" self.set_sync_event(self.step1) def step_by_step(self): control1.wait() self.current_step = "step1" self.switch_and_set(self.step2) def set_sync_event(self, event): # We force an exception while trying to set step2 if event is self.step2: raise MyException() super().set_sync_event(event) tt = TestThread() tt.start() self.assertEqual("starting", tt.current_step) control1.set() # We now wait on step1 which will be set when catching the exception tt.step1.wait() self.assertRaises(MyException, tt.pending_exception) self.assertIs(tt.step1, tt.sync_event) self.assertTrue(tt.step1.is_set()) dromedary-0.1.5/dromedary/tests/test_errors.py000066400000000000000000000046541520150013200215560ustar00rootroot00000000000000# Copyright (C) 2006-2012, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Tests for dromedary error classes.""" import unittest from dromedary.errors import SocketConnectionError class TestSocketConnectionError(unittest.TestCase): def assertSocketConnectionError(self, expected, *args, **kwargs): e = SocketConnectionError(*args, **kwargs) self.assertEqual(expected, str(e)) def test_default(self): self.assertSocketConnectionError("Failed to connect to ahost", "ahost") def test_port_none(self): self.assertSocketConnectionError( "Failed to connect to ahost", "ahost", port=None ) def test_port_supplied(self): self.assertSocketConnectionError( "Failed to connect to ahost:22", "ahost", port=22 ) def test_with_orig_error_and_port(self): self.assertSocketConnectionError( "Failed to connect to ahost:22; bogus error", "ahost", port=22, orig_error="bogus error", ) def test_with_orig_error_no_port(self): self.assertSocketConnectionError( "Failed to connect to ahost; bogus error", "ahost", orig_error="bogus error", ) def test_orig_error_exception_object(self): orig_error = ValueError("bad value") self.assertSocketConnectionError( f"Failed to connect to ahost; {orig_error!s}", host="ahost", orig_error=orig_error, ) def test_custom_msg(self): self.assertSocketConnectionError( "Unable to connect to ssh host ahost:444; my_error", host="ahost", port=444, msg="Unable to connect to ssh host", orig_error="my_error", ) dromedary-0.1.5/dromedary/tests/test_gio_transport.py000066400000000000000000000056221520150013200231300ustar00rootroot00000000000000# Copyright (C) 2025 Breezy Developers # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. """Tests for the GIO transport. These exercise the `gio+file://` backend, which is the only gio backend that works without a real gvfs mount. The whole module is skipped when dromedary was built without the `gio` Cargo feature. """ import tempfile import unittest from dromedary import urlutils from dromedary.errors import DependencyNotPresent try: from dromedary.gio_transport import GioTransport except DependencyNotPresent: GioTransport = None @unittest.skipIf(GioTransport is None, "dromedary built without the gio feature") class GioTransportTests(unittest.TestCase): def setUp(self): if GioTransport is None: self.skipTest("dromedary built without the gio feature") self._dir = tempfile.TemporaryDirectory() self.addCleanup(self._dir.cleanup) self.base = "gio+" + urlutils.local_path_to_url(self._dir.name) + "/" self.t = GioTransport(self.base) def test_external_url_round_trips(self): self.assertEqual(self.base, self.t.external_url()) def test_put_get_has(self): self.assertFalse(self.t.has("hello")) self.t.put_bytes("hello", b"world") self.assertTrue(self.t.has("hello")) self.assertEqual(b"world", self.t.get_bytes("hello")) def test_mkdir_stat_list(self): self.t.mkdir("d") self.t.put_bytes("d/a", b"1") self.t.put_bytes("d/b", b"22") self.assertEqual(["a", "b"], sorted(self.t.list_dir("d"))) st = self.t.stat("d/a") self.assertEqual(1, st.st_size) def test_rename_and_delete(self): self.t.put_bytes("a", b"hi") self.t.rename("a", "b") self.assertFalse(self.t.has("a")) self.assertEqual(b"hi", self.t.get_bytes("b")) self.t.delete("b") self.assertFalse(self.t.has("b")) def test_append_extends_file(self): self.t.put_bytes("f", b"abc") from io import BytesIO offset = self.t.append_file("f", BytesIO(b"DEF")) self.assertEqual(3, offset) self.assertEqual(b"abcDEF", self.t.get_bytes("f")) def test_clone_descends(self): self.t.mkdir("sub") self.t.put_bytes("sub/inside", b"x") sub = self.t.clone("sub") self.assertEqual(b"x", sub.get_bytes("inside")) def test_missing_file_raises(self): from dromedary.errors import NoSuchFile self.assertRaises(NoSuchFile, self.t.get_bytes, "nope") dromedary-0.1.5/dromedary/tests/test_http.py000066400000000000000000000224511520150013200212140ustar00rootroot00000000000000# Copyright (C) 2026 Jelmer Vernooij # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Tests for dromedary.http and dromedary.http.ca_bundle.""" import os import ssl import sys import tempfile import threading import unittest from http.server import BaseHTTPRequestHandler, HTTPServer from dromedary import http from dromedary._transport_rs.http import HttpClient from dromedary.http import ca_bundle class TestGetCaPath(unittest.TestCase): def setUp(self): self._orig_env = os.environ.get("CURL_CA_BUNDLE") ca_bundle._clear_cache() self.addCleanup(self._restore_env) self.addCleanup(ca_bundle._clear_cache) tmp = tempfile.TemporaryDirectory() self.addCleanup(tmp.cleanup) self.tmpdir = tmp.name self.bundle_a = os.path.join(self.tmpdir, "a.pem") self.bundle_b = os.path.join(self.tmpdir, "b.pem") def _restore_env(self): if self._orig_env is None: os.environ.pop("CURL_CA_BUNDLE", None) else: os.environ["CURL_CA_BUNDLE"] = self._orig_env def test_env_var_returned(self): os.environ["CURL_CA_BUNDLE"] = self.bundle_a self.assertEqual(self.bundle_a, ca_bundle.get_ca_path(use_cache=False)) def test_empty_when_unset(self): os.environ.pop("CURL_CA_BUNDLE", None) # On non-Windows hosts the Windows-specific fallback doesn't fire, so # the result must be the empty string. if sys.platform != "win32": self.assertEqual("", ca_bundle.get_ca_path(use_cache=False)) def test_cache_freezes_result(self): os.environ["CURL_CA_BUNDLE"] = self.bundle_a first = ca_bundle.get_ca_path(use_cache=True) os.environ["CURL_CA_BUNDLE"] = self.bundle_b second = ca_bundle.get_ca_path(use_cache=True) self.assertEqual(self.bundle_a, first) self.assertEqual(first, second) def test_cache_bypass_sees_new_value(self): os.environ["CURL_CA_BUNDLE"] = self.bundle_a ca_bundle.get_ca_path(use_cache=True) os.environ["CURL_CA_BUNDLE"] = self.bundle_b self.assertEqual(self.bundle_b, ca_bundle.get_ca_path(use_cache=False)) def test_clear_cache_resets(self): os.environ["CURL_CA_BUNDLE"] = self.bundle_a ca_bundle.get_ca_path(use_cache=True) os.environ["CURL_CA_BUNDLE"] = self.bundle_b ca_bundle._clear_cache() self.assertEqual(self.bundle_b, ca_bundle.get_ca_path(use_cache=True)) class TestDefaultCaCerts(unittest.TestCase): def test_returns_string(self): result = http.default_ca_certs() self.assertIsInstance(result, str) self.assertNotEqual("", result) @unittest.skipIf( sys.platform in ("win32", "darwin"), "Linux/BSD-specific behaviour: macOS/Windows materialise the native " "root store to a tempfile rather than using a known on-disk location.", ) def test_result_is_from_known_locations(self): # On Linux/BSD `default_ca_certs()` returns the first existing path # from `_ssl_ca_certs_known_locations` (or the first listed entry as # a breadcrumb when none exist on disk). self.assertIn(http.default_ca_certs(), http._ssl_ca_certs_known_locations) def test_known_locations_non_empty(self): self.assertGreater(len(http._ssl_ca_certs_known_locations), 0) self.assertTrue( all(isinstance(p, str) for p in http._ssl_ca_certs_known_locations) ) class TestDefaultCertReqs(unittest.TestCase): def test_platform_dependent(self): result = http.default_cert_reqs() if sys.platform in ("win32", "darwin"): self.assertEqual(ssl.CERT_NONE, result) else: self.assertEqual(ssl.CERT_REQUIRED, result) class TestUserAgent(unittest.TestCase): def test_default_starts_with_dromedary(self): self.assertTrue(http.default_user_agent().startswith("Dromedary/")) def test_set_user_agent_roundtrips(self): original = http.default_user_agent() self.addCleanup(http.set_user_agent, original) http.set_user_agent("TestAgent/1.2.3") self.assertEqual("TestAgent/1.2.3", http.default_user_agent()) class TestCredentialLookup(unittest.TestCase): def setUp(self): self._original = http.get_credential_lookup() self.addCleanup(http.set_credential_lookup, self._original) def test_default_returns_no_credentials(self): self.assertEqual((None, None), http.get_credentials("https", "example.com")) def test_set_credential_lookup_is_used(self): seen = {} def lookup(protocol, host, port=None, path=None, realm=None): seen["args"] = (protocol, host, port, path, realm) return ("alice", "secret") http.set_credential_lookup(lookup) self.assertEqual( ("alice", "secret"), http.get_credentials("https", "example.com", port=443, path="/", realm="r"), ) self.assertEqual(("https", "example.com", 443, "/", "r"), seen["args"]) class TestTokenProvider(unittest.TestCase): def setUp(self): self._original = http.get_token_provider() self.addCleanup(http.set_token_provider, self._original) def test_default_is_unset(self): http.set_token_provider(None) self.assertIsNone(http.get_token_provider()) def test_set_and_get_roundtrip(self): def cb(protocol, host, port=None, path=None): return (None, None) http.set_token_provider(cb) self.assertIs(cb, http.get_token_provider()) def test_clear_with_none(self): http.set_token_provider(lambda *a, **kw: (None, None)) http.set_token_provider(None) self.assertIsNone(http.get_token_provider()) class _CaptureAuthHandler(BaseHTTPRequestHandler): """HTTP handler that records the request's Authorization header and replies 200 OK regardless. Used to verify that the dromedary client attaches preemptive bearer tokens. """ def do_GET(self): self.server.last_authorization = self.headers.get("Authorization") self.send_response(200) self.send_header("Content-Length", "2") self.end_headers() self.wfile.write(b"OK") def log_message(self, *args, **kwargs): # silence stderr noise pass class TestTokenProviderEndToEnd(unittest.TestCase): """Drive the Rust HTTP client against a stdlib HTTPServer to verify that a registered token provider results in an Authorization header on the wire. """ def setUp(self): self._original_token = http.get_token_provider() self.addCleanup(http.set_token_provider, self._original_token) self.server = HTTPServer(("127.0.0.1", 0), _CaptureAuthHandler) self.server.last_authorization = None self.thread = threading.Thread(target=self.server.serve_forever) self.thread.daemon = True self.thread.start() self.addCleanup(self._stop_server) def _stop_server(self): self.server.shutdown() self.server.server_close() self.thread.join(timeout=5) def _url(self, path="/"): host, port = self.server.server_address return f"http://{host}:{port}{path}" def test_token_attached_when_no_explicit_authorization(self): seen = [] def lookup(protocol, host, port=None, path=None): seen.append((protocol, host, port, path)) return ("abc123", "Bearer") http.set_token_provider(lookup) client = HttpClient() resp = client.request("GET", self._url("/foo"), [], b"") self.assertEqual(200, resp.status) self.assertEqual("Bearer abc123", self.server.last_authorization) # The provider was asked with the URL's components. self.assertEqual(1, len(seen)) protocol, host, port, path = seen[0] self.assertEqual("http", protocol) self.assertEqual("127.0.0.1", host) self.assertEqual(self.server.server_address[1], port) self.assertEqual("/foo", path) def test_explicit_authorization_wins(self): http.set_token_provider(lambda *a, **kw: ("abc123", "Bearer")) client = HttpClient() resp = client.request( "GET", self._url(), [("Authorization", "Custom keep-me")], b"" ) self.assertEqual(200, resp.status) self.assertEqual("Custom keep-me", self.server.last_authorization) def test_provider_returning_none_skips_header(self): http.set_token_provider(lambda *a, **kw: (None, None)) client = HttpClient() resp = client.request("GET", self._url(), [], b"") self.assertEqual(200, resp.status) self.assertIsNone(self.server.last_authorization) if __name__ == "__main__": unittest.main() dromedary-0.1.5/dromedary/tests/test_http_response.py000066400000000000000000000642241520150013200231360ustar00rootroot00000000000000# Copyright (C) 2006-2010, 2012, 2013, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Tests from HTTP response parsing. The handle_response method read the response body of a GET request an returns the corresponding RangeFile. There are four different kinds of RangeFile: - a whole file whose size is unknown, seen as a simple byte stream, - a whole file whose size is known, we can't read past its end, - a single range file, a part of a file with a start and a size, - a multiple range file, several consecutive parts with known start offset and size. Some properties are common to all kinds: - seek can only be forward (its really a socket underneath), - read can't cross ranges, - successive ranges are taken into account transparently, - the expected pattern of use is either seek(offset)+read(size) or a single read with no size specified. For multiple range files, multiple read() will return the corresponding ranges, trying to read further will raise InvalidHttpResponse. """ import http.client as http_client from io import BytesIO parse_headers = http_client.parse_headers from dromedary import errors as transport_errors from dromedary import tests from dromedary.http import response class TestResponseFileIter(tests.TestCase): def test_iter_empty(self): f = response.ResponseFile("empty", BytesIO()) self.assertEqual([], list(f)) def test_iter_many(self): f = response.ResponseFile("many", BytesIO(b"0\n1\nboo!\n")) self.assertEqual([b"0\n", b"1\n", b"boo!\n"], list(f)) def test_readlines(self): f = response.ResponseFile("many", BytesIO(b"0\n1\nboo!\n")) self.assertEqual([b"0\n", b"1\n", b"boo!\n"], f.readlines()) class TestRangeFileMixin: """Tests for accessing the first range in a RangeFile.""" # A simple string used to represent a file part (also called a range), in # which offsets are easy to calculate for test writers. It's used as a # building block with slight variations but basically 'a' is the first char # of the range and 'z' is the last. alpha = b"abcdefghijklmnopqrstuvwxyz" def test_can_read_at_first_access(self): """Test that the just created file can be read.""" self.assertEqual(self.alpha, self._file.read()) def test_seek_read(self): """Test seek/read inside the range.""" f = self._file start = self.first_range_start # Before any use, tell() should be at the range start self.assertEqual(start, f.tell()) cur = start # For an overall offset assertion f.seek(start + 3) cur += 3 self.assertEqual(b"def", f.read(3)) cur += len("def") f.seek(4, 1) cur += 4 self.assertEqual(b"klmn", f.read(4)) cur += len("klmn") # read(0) in the middle of a range self.assertEqual(b"", f.read(0)) # seek in place here = f.tell() f.seek(0, 1) self.assertEqual(here, f.tell()) self.assertEqual(cur, f.tell()) def test_read_zero(self): f = self._file self.assertEqual(b"", f.read(0)) f.seek(10, 1) self.assertEqual(b"", f.read(0)) def test_seek_at_range_end(self): f = self._file f.seek(26, 1) def test_read_at_range_end(self): """Test read behaviour at range end.""" f = self._file self.assertEqual(self.alpha, f.read()) self.assertEqual(b"", f.read(0)) self.assertRaises(transport_errors.InvalidRange, f.read, 1) def test_unbounded_read_after_seek(self): f = self._file f.seek(24, 1) # Should not cross ranges self.assertEqual(b"yz", f.read()) def test_seek_backwards(self): f = self._file start = self.first_range_start f.seek(start) f.read(12) self.assertRaises(transport_errors.InvalidRange, f.seek, start + 5) def test_seek_outside_single_range(self): f = self._file if f._size == -1 or f._boundary is not None: raise tests.TestNotApplicable("Needs a fully defined range") # Will seek past the range and then errors out self.assertRaises( transport_errors.InvalidRange, f.seek, self.first_range_start + 27 ) def test_read_past_end_of_range(self): f = self._file if f._size == -1: raise tests.TestNotApplicable("Can't check an unknown size") start = self.first_range_start f.seek(start + 20) self.assertRaises(transport_errors.InvalidRange, f.read, 10) def test_seek_from_end(self): """Test seeking from the end of the file. The semantic is unclear in case of multiple ranges. Seeking from end exists only for the http transports, cannot be used if the file size is unknown and is not used in breezy itself. This test must be (and is) overridden by daughter classes. Reading from end makes sense only when a range has been requested from the end of the file (see HttpTransportBase._get() when using the 'tail_amount' parameter). The HTTP response can only be a whole file or a single range. """ f = self._file f.seek(-2, 2) self.assertEqual(b"yz", f.read()) class TestRangeFileSizeUnknown(tests.TestCase, TestRangeFileMixin): """Test a RangeFile for a whole file whose size is not known.""" def setUp(self): super().setUp() self._file = response.RangeFile("Whole_file_size_known", BytesIO(self.alpha)) # We define no range, relying on RangeFile to provide default values self.first_range_start = 0 # It's the whole file def test_seek_from_end(self): """See TestRangeFileMixin.test_seek_from_end. The end of the file can't be determined since the size is unknown. """ self.assertRaises(transport_errors.InvalidRange, self._file.seek, -1, 2) def test_read_at_range_end(self): """Test read behaviour at range end.""" f = self._file self.assertEqual(self.alpha, f.read()) self.assertEqual(b"", f.read(0)) self.assertEqual(b"", f.read(1)) class TestRangeFileSizeKnown(tests.TestCase, TestRangeFileMixin): """Test a RangeFile for a whole file whose size is known.""" def setUp(self): super().setUp() self._file = response.RangeFile("Whole_file_size_known", BytesIO(self.alpha)) self._file.set_range(0, len(self.alpha)) self.first_range_start = 0 # It's the whole file class TestRangeFileSingleRange(tests.TestCase, TestRangeFileMixin): """Test a RangeFile for a single range.""" def setUp(self): super().setUp() self._file = response.RangeFile("Single_range_file", BytesIO(self.alpha)) self.first_range_start = 15 self._file.set_range(self.first_range_start, len(self.alpha)) def test_read_before_range(self): # This can't occur under normal circumstances, we have to force it f = self._file f._pos = 0 # Force an invalid pos self.assertRaises(transport_errors.InvalidRange, f.read, 2) class TestRangeFileMultipleRanges(tests.TestCase, TestRangeFileMixin): """Test a RangeFile for multiple ranges. The RangeFile used for the tests contains three ranges: - at offset 25: alpha - at offset 100: alpha - at offset 126: alpha.upper() The two last ranges are contiguous. This only rarely occurs (should not in fact) in real uses but may lead to hard to track bugs. """ # The following is used to represent the boundary paramter defined # in HTTP response headers and the boundary lines that separate # multipart content. boundary = b"separation" def setUp(self): super().setUp() boundary = self.boundary content = b"" self.first_range_start = 25 file_size = 200 # big enough to encompass all ranges for start, part in [ (self.first_range_start, self.alpha), # Two contiguous ranges (100, self.alpha), (126, self.alpha.upper()), ]: content += self._multipart_byterange(part, start, boundary, file_size) # Final boundary content += self._boundary_line() self._file = response.RangeFile("Multiple_ranges_file", BytesIO(content)) self.set_file_boundary() def _boundary_line(self): """Helper to build the formatted boundary line.""" return b"--" + self.boundary + b"\r\n" def set_file_boundary(self): # Ranges are set by decoding the range headers, the RangeFile user is # supposed to call the following before using seek or read since it # requires knowing the *response* headers (in that case the boundary # which is part of the Content-Type header). self._file.set_boundary(self.boundary) def _multipart_byterange(self, data, offset, boundary, file_size=b"*"): """Encode a part of a file as a multipart/byterange MIME type. When a range request is issued, the HTTP response body can be decomposed in parts, each one representing a range (start, size) in a file. :param data: The payload. :param offset: where data starts in the file :param boundary: used to separate the parts :param file_size: the size of the file containing the range (default to '*' meaning unknown) :return: a string containing the data encoded as it will appear in the HTTP response body. """ bline = self._boundary_line() # Each range begins with a boundary line range = bline # A range is described by a set of headers, but only 'Content-Range' is # required for our implementation (TestHandleResponse below will # exercise ranges with multiple or missing headers') if isinstance(file_size, int): file_size = b"%d" % file_size range += b"Content-Range: bytes %d-%d/%s\r\n" % ( offset, offset + len(data) - 1, file_size, ) range += b"\r\n" # Finally the raw bytes range += data return range def test_read_all_ranges(self): f = self._file self.assertEqual(self.alpha, f.read()) # Read first range f.seek(100) # Trigger the second range recognition self.assertEqual(self.alpha, f.read()) # Read second range self.assertEqual(126, f.tell()) f.seek(126) # Start of third range which is also the current pos ! self.assertEqual(b"A", f.read(1)) f.seek(10, 1) self.assertEqual(b"LMN", f.read(3)) def test_seek_from_end(self): """See TestRangeFileMixin.test_seek_from_end.""" # The actual implementation will seek from end for the first range only # and then fail. Since seeking from end is intended to be used for a # single range only anyway, this test just document the actual # behaviour. f = self._file f.seek(-2, 2) self.assertEqual(b"yz", f.read()) self.assertRaises(transport_errors.InvalidRange, f.seek, -2, 2) def test_seek_into_void(self): f = self._file start = self.first_range_start f.seek(start) # Seeking to a point between two ranges is possible (only once) but # reading there is forbidden f.seek(start + 40) # We crossed a range boundary, so now the file is positioned at the # start of the new range (i.e. trying to seek below 100 will error out) f.seek(100) f.seek(125) def test_seek_across_ranges(self): f = self._file f.seek(126) # skip the two first ranges self.assertEqual(b"AB", f.read(2)) def test_checked_read_dont_overflow_buffers(self): f = self._file # We force a very low value to exercise all code paths in _checked_read f._discarded_buf_size = 8 f.seek(126) # skip the two first ranges self.assertEqual(b"AB", f.read(2)) def test_seek_twice_between_ranges(self): f = self._file start = self.first_range_start f.seek(start + 40) # Past the first range but before the second # Now the file is positioned at the second range start (100) self.assertRaises(transport_errors.InvalidRange, f.seek, start + 41) def test_seek_at_range_end(self): """Test seek behavior at range end.""" f = self._file f.seek(25 + 25) f.seek(100 + 25) f.seek(126 + 25) def test_read_at_range_end(self): f = self._file self.assertEqual(self.alpha, f.read()) self.assertEqual(self.alpha, f.read()) self.assertEqual(self.alpha.upper(), f.read()) self.assertRaises(transport_errors.InvalidHttpResponse, f.read, 1) class TestRangeFileMultipleRangesQuotedBoundaries(TestRangeFileMultipleRanges): """Perform the same tests as TestRangeFileMultipleRanges, but uses an angle-bracket quoted boundary string like IIS 6.0 and 7.0 (but not IIS 5, which breaks the RFC in a different way by using square brackets, not angle brackets). This reveals a bug caused by - The bad implementation of RFC 822 unquoting in Python (angles are not quotes), coupled with - The bad implementation of RFC 2046 in IIS (angles are not permitted chars in boundary lines). """ # The boundary as it appears in boundary lines # IIS 6 and 7 use this value _boundary_trimmed = b"q1w2e3r4t5y6u7i8o9p0zaxscdvfbgnhmjklkl" boundary = b"<" + _boundary_trimmed + b">" def set_file_boundary(self): # Emulate broken rfc822.unquote() here by removing angles self._file.set_boundary(self._boundary_trimmed) class TestRangeFileVarious(tests.TestCase): """Tests RangeFile aspects not covered elsewhere.""" def test_seek_whence(self): """Test the seek whence parameter values.""" f = response.RangeFile("foo", BytesIO(b"abc")) f.set_range(0, 3) f.seek(0) f.seek(1, 1) f.seek(-1, 2) self.assertRaises(ValueError, f.seek, 0, 14) def test_range_syntax(self): """Test the Content-Range scanning.""" f = response.RangeFile("foo", BytesIO()) def ok(expected, header_value): f.set_range_from_header(header_value) # Slightly peek under the covers to get the size self.assertEqual(expected, (f.tell(), f._size)) ok((1, 10), "bytes 1-10/11") ok((1, 10), "bytes 1-10/*") ok((12, 2), "\tbytes 12-13/*") ok((28, 1), " bytes 28-28/*") ok((2123, 2120), "bytes 2123-4242/12310") ok((1, 10), "bytes 1-10/ttt") # We don't check total (ttt) def nok(header_value): self.assertRaises( transport_errors.InvalidHttpRange, f.set_range_from_header, header_value ) nok("bytes 10-2/3") nok("chars 1-2/3") nok("bytes xx-yyy/zzz") nok("bytes xx-12/zzz") nok("bytes 11-yy/zzz") nok("bytes10-2/3") # Taken from real request responses _full_text_response = ( 200, b"""HTTP/1.1 200 OK\r Date: Tue, 11 Jul 2006 04:32:56 GMT\r Server: Apache/2.0.54 (Fedora)\r Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r ETag: "56691-23-38e9ae00"\r Accept-Ranges: bytes\r Content-Length: 35\r Connection: close\r Content-Type: text/plain; charset=UTF-8\r \r """, b"""Bazaar-NG meta directory, format 1 """, ) _single_range_response = ( 206, b"""HTTP/1.1 206 Partial Content\r Date: Tue, 11 Jul 2006 04:45:22 GMT\r Server: Apache/2.0.54 (Fedora)\r Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r ETag: "238a3c-16ec2-805c5540"\r Accept-Ranges: bytes\r Content-Length: 100\r Content-Range: bytes 100-199/93890\r Connection: close\r Content-Type: text/plain; charset=UTF-8\r \r """, b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06 mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""", ) _single_range_no_content_type = ( 206, b"""HTTP/1.1 206 Partial Content\r Date: Tue, 11 Jul 2006 04:45:22 GMT\r Server: Apache/2.0.54 (Fedora)\r Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r ETag: "238a3c-16ec2-805c5540"\r Accept-Ranges: bytes\r Content-Length: 100\r Content-Range: bytes 100-199/93890\r Connection: close\r \r """, b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06 mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""", ) _multipart_range_response = ( 206, b"""HTTP/1.1 206 Partial Content\r Date: Tue, 11 Jul 2006 04:49:48 GMT\r Server: Apache/2.0.54 (Fedora)\r Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r ETag: "238a3c-16ec2-805c5540"\r Accept-Ranges: bytes\r Content-Length: 1534\r Connection: close\r Content-Type: multipart/byteranges; boundary=418470f848b63279b\r \r \r""", b"""--418470f848b63279b\r Content-type: text/plain; charset=UTF-8\r Content-range: bytes 0-254/93890\r \r mbp@sourcefrog.net-20050309040815-13242001617e4a06 mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e7627 mbp@sourcefrog.net-20050309040957-6cad07f466bb0bb8 mbp@sourcefrog.net-20050309041501-c840e09071de3b67 mbp@sourcefrog.net-20050309044615-c24a3250be83220a \r --418470f848b63279b\r Content-type: text/plain; charset=UTF-8\r Content-range: bytes 1000-2049/93890\r \r 40-fd4ec249b6b139ab mbp@sourcefrog.net-20050311063625-07858525021f270b mbp@sourcefrog.net-20050311231934-aa3776aff5200bb9 mbp@sourcefrog.net-20050311231953-73aeb3a131c3699a mbp@sourcefrog.net-20050311232353-f5e33da490872c6a mbp@sourcefrog.net-20050312071639-0a8f59a34a024ff0 mbp@sourcefrog.net-20050312073432-b2c16a55e0d6e9fb mbp@sourcefrog.net-20050312073831-a47c3335ece1920f mbp@sourcefrog.net-20050312085412-13373aa129ccbad3 mbp@sourcefrog.net-20050313052251-2bf004cb96b39933 mbp@sourcefrog.net-20050313052856-3edd84094687cb11 mbp@sourcefrog.net-20050313053233-e30a4f28aef48f9d mbp@sourcefrog.net-20050313053853-7c64085594ff3072 mbp@sourcefrog.net-20050313054757-a86c3f5871069e22 mbp@sourcefrog.net-20050313061422-418f1f73b94879b9 mbp@sourcefrog.net-20050313120651-497bd231b19df600 mbp@sourcefrog.net-20050314024931-eae0170ef25a5d1a mbp@sourcefrog.net-20050314025438-d52099f915fe65fc mbp@sourcefrog.net-20050314025539-637a636692c055cf mbp@sourcefrog.net-20050314025737-55eb441f430ab4ba mbp@sourcefrog.net-20050314025901-d74aa93bb7ee8f62 mbp@source\r --418470f848b63279b--\r """, ) _multipart_squid_range_response = ( 206, b"""HTTP/1.0 206 Partial Content\r Date: Thu, 31 Aug 2006 21:16:22 GMT\r Server: Apache/2.2.2 (Unix) DAV/2\r Last-Modified: Thu, 31 Aug 2006 17:57:06 GMT\r Accept-Ranges: bytes\r Content-Type: multipart/byteranges; boundary="squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196"\r Content-Length: 598\r X-Cache: MISS from localhost.localdomain\r X-Cache-Lookup: HIT from localhost.localdomain:3128\r Proxy-Connection: keep-alive\r \r """, b"""\r --squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r Content-Type: text/plain\r Content-Range: bytes 0-99/18672\r \r # bzr knit index 8 scott@netsplit.com-20050708230047-47c7868f276b939f fulltext 0 863 : scott@netsp\r --squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196\r Content-Type: text/plain\r Content-Range: bytes 300-499/18672\r \r com-20050708231537-2b124b835395399a : scott@netsplit.com-20050820234126-551311dbb7435b51 line-delta 1803 479 .scott@netsplit.com-20050820232911-dc4322a084eadf7e : scott@netsplit.com-20050821213706-c86\r --squid/2.5.STABLE12:C99323425AD4FE26F726261FA6C24196--\r """, ) # This is made up _full_text_response_no_content_type = ( 200, b"""HTTP/1.1 200 OK\r Date: Tue, 11 Jul 2006 04:32:56 GMT\r Server: Apache/2.0.54 (Fedora)\r Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r ETag: "56691-23-38e9ae00"\r Accept-Ranges: bytes\r Content-Length: 35\r Connection: close\r \r """, b"""Bazaar-NG meta directory, format 1 """, ) _full_text_response_no_content_length = ( 200, b"""HTTP/1.1 200 OK\r Date: Tue, 11 Jul 2006 04:32:56 GMT\r Server: Apache/2.0.54 (Fedora)\r Last-Modified: Sun, 23 Apr 2006 19:35:20 GMT\r ETag: "56691-23-38e9ae00"\r Accept-Ranges: bytes\r Connection: close\r Content-Type: text/plain; charset=UTF-8\r \r """, b"""Bazaar-NG meta directory, format 1 """, ) _single_range_no_content_range = ( 206, b"""HTTP/1.1 206 Partial Content\r Date: Tue, 11 Jul 2006 04:45:22 GMT\r Server: Apache/2.0.54 (Fedora)\r Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r ETag: "238a3c-16ec2-805c5540"\r Accept-Ranges: bytes\r Content-Length: 100\r Connection: close\r \r """, b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06 mbp@sourcefrog.net-20050309040929-eee0eb3e6d1e762""", ) _single_range_response_truncated = ( 206, b"""HTTP/1.1 206 Partial Content\r Date: Tue, 11 Jul 2006 04:45:22 GMT\r Server: Apache/2.0.54 (Fedora)\r Last-Modified: Thu, 06 Jul 2006 20:22:05 GMT\r ETag: "238a3c-16ec2-805c5540"\r Accept-Ranges: bytes\r Content-Length: 100\r Content-Range: bytes 100-199/93890\r Connection: close\r Content-Type: text/plain; charset=UTF-8\r \r """, b"""mbp@sourcefrog.net-20050309040815-13242001617e4a06""", ) _invalid_response = ( 444, b"""HTTP/1.1 444 Bad Response\r Date: Tue, 11 Jul 2006 04:32:56 GMT\r Connection: close\r Content-Type: text/html; charset=iso-8859-1\r \r """, b""" 404 Not Found

Not Found

I don't know what I'm doing


""", ) _multipart_no_content_range = ( 206, b"""HTTP/1.0 206 Partial Content\r Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r Content-Length: 598\r \r """, b"""\r --THIS_SEPARATES\r Content-Type: text/plain\r \r # bzr knit index 8 --THIS_SEPARATES\r """, ) _multipart_no_boundary = ( 206, b"""HTTP/1.0 206 Partial Content\r Content-Type: multipart/byteranges; boundary=THIS_SEPARATES\r Content-Length: 598\r \r """, b"""\r --THIS_SEPARATES\r Content-Type: text/plain\r Content-Range: bytes 0-18/18672\r \r # bzr knit index 8 The range ended at the line above, this text is garbage instead of a boundary line """, ) class TestHandleResponse(tests.TestCase): def _build_HTTPMessage(self, raw_headers): status_and_headers = BytesIO(raw_headers) # Get rid of the status line status_and_headers.readline() msg = parse_headers(status_and_headers) return msg.get def get_response(self, a_response): """Process a supplied response, and return the result.""" code, raw_headers, _body = a_response getheader = self._build_HTTPMessage(raw_headers) return response.handle_response( "http://foo", code, getheader, BytesIO(a_response[2]) ) def test_full_text(self): out = self.get_response(_full_text_response) # It is a BytesIO from the original data self.assertEqual(_full_text_response[2], out.read()) def test_single_range(self): out = self.get_response(_single_range_response) out.seek(100) self.assertEqual(_single_range_response[2], out.read(100)) def test_single_range_no_content(self): out = self.get_response(_single_range_no_content_type) out.seek(100) self.assertEqual(_single_range_no_content_type[2], out.read(100)) def test_single_range_truncated(self): out = self.get_response(_single_range_response_truncated) # Content-Range declares 100 but only 51 present self.assertRaises(transport_errors.ShortReadvError, out.seek, out.tell() + 51) def test_multi_range(self): out = self.get_response(_multipart_range_response) # Just make sure we can read the right contents out.seek(0) out.read(255) out.seek(1000) out.read(1050) def test_multi_squid_range(self): out = self.get_response(_multipart_squid_range_response) # Just make sure we can read the right contents out.seek(0) out.read(100) out.seek(300) out.read(200) def test_invalid_response(self): self.assertRaises( transport_errors.InvalidHttpResponse, self.get_response, _invalid_response ) def test_full_text_no_content_type(self): # We should not require Content-Type for a full response code, raw_headers, body = _full_text_response_no_content_type getheader = self._build_HTTPMessage(raw_headers) out = response.handle_response("http://foo", code, getheader, BytesIO(body)) self.assertEqual(body, out.read()) def test_full_text_no_content_length(self): code, raw_headers, body = _full_text_response_no_content_length getheader = self._build_HTTPMessage(raw_headers) out = response.handle_response("http://foo", code, getheader, BytesIO(body)) self.assertEqual(body, out.read()) def test_missing_content_range(self): code, raw_headers, body = _single_range_no_content_range getheader = self._build_HTTPMessage(raw_headers) self.assertRaises( transport_errors.InvalidHttpResponse, response.handle_response, "http://bogus", code, getheader, BytesIO(body), ) def test_multipart_no_content_range(self): code, raw_headers, body = _multipart_no_content_range getheader = self._build_HTTPMessage(raw_headers) self.assertRaises( transport_errors.InvalidHttpResponse, response.handle_response, "http://bogus", code, getheader, BytesIO(body), ) def test_multipart_no_boundary(self): out = self.get_response(_multipart_no_boundary) out.read() # Read the whole range # Fail to find the boundary line self.assertRaises(transport_errors.InvalidHttpResponse, out.seek, 1, 1) dromedary-0.1.5/dromedary/tests/test_server.py000066400000000000000000000416231520150013200215450ustar00rootroot00000000000000# Copyright (C) 2010, 2011 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Test server implementations for transport decorators and TCP testing.""" import errno import socket import socketserver import sys import threading import dromedary from dromedary import chroot, pathfilter, urlutils from dromedary.cethread import CatchingExceptionThread def connect_socket(address): """Connect to the given address, trying all results from getaddrinfo.""" err = OSError("getaddrinfo returns an empty list") host, port = address for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM): af, socktype, proto, _canonname, sa = res sock = None try: sock = socket.socket(af, socktype, proto) sock.connect(sa) return sock except OSError as e: err = e if sock is not None: sock.close() raise err # Set by test frameworks to enable debug output for thread operations. debug_threads_hook = None def debug_threads(): """Return True if thread debugging is enabled.""" if debug_threads_hook is not None: return debug_threads_hook() return False class TestServer(dromedary.Server): """A Transport Server dedicated to tests. The TestServer interface provides a server for a given transport. We use these servers as loopback testing tools. For any given transport the Servers it provides must either allow writing, or serve the contents of osutils.getcwd() at the time start_server is called. Note that these are real servers - they must implement all the things that we want bzr transports to take advantage of. """ def get_url(self): """Return a url for this server.""" raise NotImplementedError def get_bogus_url(self): """Return a url for this protocol, that will fail to connect.""" raise NotImplementedError class LocalURLServer(TestServer): """A pretend server for local transports, using file:// urls.""" def start_server(self): pass def get_url(self): return urlutils.local_path_to_url("") class DecoratorServer(TestServer): """Server for the TransportDecorator for testing with.""" def start_server(self, server=None): if server is not None: self._made_server = False self._server = server else: self._made_server = True self._server = LocalURLServer() self._server.start_server() def stop_server(self): if self._made_server: self._server.stop_server() def get_decorator_class(self): raise NotImplementedError(self.get_decorator_class) def get_url_prefix(self): return self.get_decorator_class()._get_url_prefix() def get_bogus_url(self): return self.get_url_prefix() + self._server.get_bogus_url() def get_url(self): return self.get_url_prefix() + self._server.get_url() class BrokenRenameServer(DecoratorServer): """Server for the BrokenRenameTransportDecorator for testing with.""" def get_decorator_class(self): from dromedary import brokenrename return brokenrename.BrokenRenameTransportDecorator class FakeNFSServer(DecoratorServer): """Server for the FakeNFSTransportDecorator for testing with.""" def get_decorator_class(self): from dromedary import fakenfs return fakenfs.FakeNFSTransportDecorator class FakeVFATServer(DecoratorServer): """A server that suggests connections through FakeVFATTransportDecorator.""" def get_decorator_class(self): from dromedary import fakevfat return fakevfat.FakeVFATTransportDecorator class LogDecoratorServer(DecoratorServer): """Server for testing.""" def get_decorator_class(self): from dromedary import log return log.TransportLogDecorator class ReadonlyServer(DecoratorServer): """Server for the ReadonlyTransportDecorator for testing with.""" def get_decorator_class(self): from dromedary import readonly return readonly.ReadonlyTransportDecorator class TraceServer(DecoratorServer): """Server for the TransportTraceDecorator for testing with.""" def get_decorator_class(self): from dromedary import trace return trace.TransportTraceDecorator class UnlistableServer(DecoratorServer): """Server for the UnlistableTransportDecorator for testing with.""" def get_decorator_class(self): from dromedary import unlistable return unlistable.UnlistableTransportDecorator class TestingPathFilteringServer(pathfilter.PathFilteringServer): def __init__(self): """TestingPathFilteringServer is not usable until start_server is called. """ def start_server(self, backing_server=None): """Setup the Chroot on backing_server.""" if backing_server is not None: self.backing_transport = dromedary.get_transport_from_url( backing_server.get_url() ) else: self.backing_transport = dromedary.get_transport_from_path(".") self.backing_transport.clone("added-by-filter").ensure_base() self.filter_func = lambda x: "added-by-filter/" + x super().start_server() def get_bogus_url(self): raise NotImplementedError class TestingChrootServer(chroot.ChrootServer): def __init__(self): """TestingChrootServer is not usable until start_server is called.""" super().__init__(None) def start_server(self, backing_server=None): """Setup the Chroot on backing_server.""" if backing_server is not None: self.backing_transport = dromedary.get_transport_from_url( backing_server.get_url() ) else: self.backing_transport = dromedary.get_transport_from_path(".") super().start_server() def get_bogus_url(self): raise NotImplementedError class TestThread(CatchingExceptionThread): def join(self, timeout=5): """Overrides to use a default timeout. The default timeout is set to 5 and should expire only when a thread serving a client connection is hung. """ super().join(timeout) if timeout and self.is_alive(): # The timeout expired without joining the thread, the thread is # therefore stucked and that's a failure as far as the test is # concerned. We used to hang here. # FIXME: we need to kill the thread, but as far as the test is # concerned, raising an assertion is too strong. On most of the # platforms, this doesn't occur, so just mentioning the problem is # enough for now -- vila 2010824 sys.stderr.write(f"thread {self.name} hung\n") class TestingTCPServerMixin: """Mixin to support running socketserver.TCPServer in a thread. Tests are connecting from the main thread, the server has to be run in a separate thread. """ def __init__(self): self.started = threading.Event() self.serving = None self.stopped = threading.Event() self.clients = [] self.ignored_exceptions = None def server_bind(self): self.socket.bind(self.server_address) self.server_address = self.socket.getsockname() def serve(self): self.serving = True self.started.set() try: while self.serving: self.handle_request() self.server_close() finally: self.stopped.set() def handle_request(self): """Handle one request. The python version swallows some socket exceptions and we don't use timeout, so we override it to better control the server behavior. """ request, client_address = self.get_request() if self.verify_request(request, client_address): try: self.process_request(request, client_address) except BaseException: self.handle_error(request, client_address) else: self.close_request(request) def get_request(self): return self.socket.accept() def verify_request(self, request, client_address): """Verify the request. Return True if we should proceed with this request, False if we should not even touch a single byte in the socket ! This is useful when we stop the server with a dummy last connection. """ return self.serving def handle_error(self, request, client_address): # Stop serving and re-raise the last exception seen self.serving = False # We call close_request manually, because we are going to raise an # exception. The socketserver implementation calls: # handle_error(...) # close_request(...) # But because we raise the exception, close_request will never be # triggered. This helps client not block waiting for a response when # the server gets an exception. self.close_request(request) raise def ignored_exceptions_during_shutdown(self, e): if sys.platform == "win32": accepted_errnos = [ errno.EBADF, errno.EPIPE, errno.WSAEBADF, errno.WSAENOTSOCK, errno.WSAECONNRESET, errno.WSAENOTCONN, errno.WSAESHUTDOWN, ] else: accepted_errnos = [ errno.EBADF, errno.ECONNRESET, errno.ENOTCONN, errno.EPIPE, ] return bool(isinstance(e, socket.error) and e.errno in accepted_errnos) def stop_client_connections(self): while self.clients: c = self.clients.pop() self.shutdown_client(c) def shutdown_socket(self, sock): """Properly shutdown a socket. This should be called only when no other thread is trying to use the socket. """ try: sock.shutdown(socket.SHUT_RDWR) sock.close() except Exception as e: if self.ignored_exceptions(e): pass else: raise def set_ignored_exceptions(self, thread, ignored_exceptions): self.ignored_exceptions = ignored_exceptions thread.set_ignored_exceptions(self.ignored_exceptions) def _pending_exception(self, thread): """Raise server uncaught exception. Daughter classes can override this if they use daughter threads. """ thread.pending_exception() class TestingTCPServer(TestingTCPServerMixin, socketserver.TCPServer): def __init__(self, server_address, request_handler_class): TestingTCPServerMixin.__init__(self) socketserver.TCPServer.__init__(self, server_address, request_handler_class) def get_request(self): """Get the request and client address from the socket.""" sock, addr = TestingTCPServerMixin.get_request(self) self.clients.append((sock, addr)) return sock, addr def shutdown_client(self, client): sock, _addr = client self.shutdown_socket(sock) class TestingThreadingTCPServer(TestingTCPServerMixin, socketserver.ThreadingTCPServer): def __init__(self, server_address, request_handler_class): TestingTCPServerMixin.__init__(self) socketserver.ThreadingTCPServer.__init__( self, server_address, request_handler_class ) def get_request(self): """Get the request and client address from the socket.""" sock, addr = TestingTCPServerMixin.get_request(self) self.clients.append((sock, addr, None)) return sock, addr def process_request_thread( self, started, detached, stopped, request, client_address ): started.set() detached.wait() socketserver.ThreadingTCPServer.process_request_thread( self, request, client_address ) self.close_request(request) stopped.set() def process_request(self, request, client_address): """Start a new thread to process the request.""" started = threading.Event() detached = threading.Event() stopped = threading.Event() t = TestThread( sync_event=stopped, name=f"{client_address} -> {self.server_address}", target=self.process_request_thread, args=(started, detached, stopped, request, client_address), ) self.clients.pop() self.clients.append((request, client_address, t)) t.set_ignored_exceptions(self.ignored_exceptions) t.start() started.wait() t.pending_exception() if debug_threads(): sys.stderr.write(f"Client thread {t.name} started\n") detached.set() def shutdown_client(self, client): sock, _addr, connection_thread = client self.shutdown_socket(sock) if connection_thread is not None: if debug_threads(): sys.stderr.write( f"Client thread {connection_thread.name} will be joined\n" ) connection_thread.join() def set_ignored_exceptions(self, thread, ignored_exceptions): TestingTCPServerMixin.set_ignored_exceptions(self, thread, ignored_exceptions) for _sock, _addr, connection_thread in self.clients: if connection_thread is not None: connection_thread.set_ignored_exceptions(self.ignored_exceptions) def _pending_exception(self, thread): for _sock, _addr, connection_thread in self.clients: if connection_thread is not None: connection_thread.pending_exception() TestingTCPServerMixin._pending_exception(self, thread) class TestingTCPServerInAThread(dromedary.Server): """A server in a thread that re-raise thread exceptions.""" def __init__(self, server_address, server_class, request_handler_class): self.server_class = server_class self.request_handler_class = request_handler_class self.host, self.port = server_address self.server = None self._server_thread = None def __repr__(self): return f"{self.__class__.__name__}({self.host}:{self.port})" def create_server(self): return self.server_class((self.host, self.port), self.request_handler_class) def start_server(self): self.server = self.create_server() self._server_thread = TestThread( sync_event=self.server.started, target=self.run_server ) self._server_thread.start() self.server.started.wait() self.host, self.port = self.server.server_address self._server_thread.name = self.server.server_address if debug_threads(): sys.stderr.write(f"Server thread {self._server_thread.name} started\n") self._server_thread.pending_exception() self._server_thread.set_sync_event(self.server.stopped) def run_server(self): self.server.serve() def stop_server(self): if self.server is None: return try: self.set_ignored_exceptions(self.server.ignored_exceptions_during_shutdown) self.server.serving = False if debug_threads(): sys.stderr.write( f"Server thread {self._server_thread.name} will be joined\n" ) last_conn = None try: last_conn = connect_socket((self.host, self.port)) except OSError: pass self.server.stop_client_connections() self.server.stopped.wait() if last_conn is not None: last_conn.close() try: self._server_thread.join() except Exception as e: if self.server.ignored_exceptions(e): pass else: raise finally: self.server = None def set_ignored_exceptions(self, ignored_exceptions): """Install an exception handler for the server.""" self.server.set_ignored_exceptions(self._server_thread, ignored_exceptions) def pending_exception(self): """Raise uncaught exception in the server.""" self.server._pending_exception(self._server_thread) dromedary-0.1.5/dromedary/tests/test_test_server.py000066400000000000000000000203501520150013200225760ustar00rootroot00000000000000# Copyright (C) 2010, 2011, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA import errno import socket import socketserver import threading import unittest from dromedary.tests import test_server def portable_socket_pair(): """Return a pair of TCP sockets connected to each other. Unlike socket.socketpair, this should work on Windows. """ listen_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) listen_sock.bind(("127.0.0.1", 0)) listen_sock.listen(1) client_sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) client_sock.connect(listen_sock.getsockname()) server_sock, _addr = listen_sock.accept() listen_sock.close() return server_sock, client_sock class TCPClient: def __init__(self): self.sock = None def connect(self, addr): if self.sock is not None: raise AssertionError(f"Already connected to {self.sock.getsockname()!r}") self.sock = test_server.connect_socket(addr) def disconnect(self): if self.sock is not None: try: self.sock.shutdown(socket.SHUT_RDWR) self.sock.close() except OSError as e: if e.errno in (errno.EBADF, errno.ENOTCONN, errno.ECONNRESET): pass else: raise self.sock = None def write(self, s): return self.sock.sendall(s) def read(self, bufsize=4096): try: return self.sock.recv(bufsize) except OSError as e: if e.errno == errno.ECONNRESET: return b"" raise class TCPConnectionHandler(socketserver.BaseRequestHandler): def handle(self): self.done = False self.handle_connection() while not self.done: self.handle_connection() def readline(self): req = self.request.recv(4096) if not req or (req.endswith(b"\n") and req.count(b"\n") == 1): return req raise ValueError(f"[{req!r}] not a simple line") def handle_connection(self): req = self.readline() if not req: self.done = True elif req == b"ping\n": self.request.sendall(b"pong\n") else: raise ValueError(f"[{req}] not understood") class TestTCPServerInAThreadBase: """Mixin with test methods for TCP server implementations.""" server_class: "type[test_server.TestingTCPServerMixin] | None" = None def get_server(self, server_class=None, connection_handler_class=None): if server_class is not None: self.server_class = server_class if connection_handler_class is None: connection_handler_class = TCPConnectionHandler server = test_server.TestingTCPServerInAThread( ("localhost", 0), self.server_class, connection_handler_class ) server.start_server() self.addCleanup(server.stop_server) return server def get_client(self): client = TCPClient() self.addCleanup(client.disconnect) return client def get_server_connection(self, server, conn_rank): return server.server.clients[conn_rank] def assertClientAddr(self, client, server, conn_rank): conn = self.get_server_connection(server, conn_rank) self.assertEqual(client.sock.getsockname(), conn[1]) def test_start_stop(self): server = self.get_server() client = self.get_client() server.stop_server() # since the server doesn't accept connections anymore attempting to # connect should fail client = self.get_client() self.assertRaises(socket.error, client.connect, (server.host, server.port)) def test_client_talks_server_respond(self): server = self.get_server() client = self.get_client() client.connect((server.host, server.port)) self.assertIs(None, client.write(b"ping\n")) resp = client.read() self.assertClientAddr(client, server, 0) self.assertEqual(b"pong\n", resp) def test_server_fails_to_start(self): class CantStart(Exception): pass class CantStartServer(test_server.TestingTCPServer): def server_bind(self): raise CantStart() # The exception is raised in the main thread self.assertRaises(CantStart, self.get_server, server_class=CantStartServer) def test_server_fails_while_serving_or_stopping(self): class CantConnect(Exception): pass class FailingConnectionHandler(TCPConnectionHandler): def handle(self): raise CantConnect() server = self.get_server(connection_handler_class=FailingConnectionHandler) client = self.get_client() client.connect((server.host, server.port)) client.write(b"ping\n") try: self.assertEqual(b"", client.read()) except OSError as e: WSAECONNRESET = 10054 if e.errno in (WSAECONNRESET,): pass self.assertRaises(CantConnect, server.stop_server) def test_server_crash_while_responding(self): caught = threading.Event() caught.clear() self.connection_thread = None class FailToRespond(Exception): pass class FailingDuringResponseHandler(TCPConnectionHandler): def handle_connection(request): # noqa: N805 request.readline() self.connection_thread = threading.current_thread() self.connection_thread.set_sync_event(caught) raise FailToRespond() server = self.get_server(connection_handler_class=FailingDuringResponseHandler) client = self.get_client() client.connect((server.host, server.port)) client.write(b"ping\n") caught.wait() self.assertEqual(b"", client.read()) self.assertRaises(FailToRespond, self.connection_thread.pending_exception) def test_exception_swallowed_while_serving(self): caught = threading.Event() caught.clear() self.connection_thread = None class CantServe(Exception): pass class FailingWhileServingConnectionHandler(TCPConnectionHandler): def handle(request): # noqa: N805 self.connection_thread = threading.current_thread() self.connection_thread.set_sync_event(caught) raise CantServe() server = self.get_server( connection_handler_class=FailingWhileServingConnectionHandler ) self.assertEqual(True, server.server.serving) server.set_ignored_exceptions(CantServe) client = self.get_client() client.connect((server.host, server.port)) caught.wait() self.assertEqual(b"", client.read()) self.assertIs(None, self.connection_thread.pending_exception()) self.assertIs(None, server.pending_exception()) def test_handle_request_closes_if_it_doesnt_process(self): server = self.get_server() client = self.get_client() server.server.serving = False try: client.connect((server.host, server.port)) self.assertEqual(b"", client.read()) except OSError as e: if e.errno != errno.ECONNRESET: raise class TestTCPServerInAThread_TestingTCPServer( TestTCPServerInAThreadBase, unittest.TestCase ): server_class = test_server.TestingTCPServer class TestTCPServerInAThread_TestingThreadingTCPServer( TestTCPServerInAThreadBase, unittest.TestCase ): server_class = test_server.TestingThreadingTCPServer dromedary-0.1.5/dromedary/tests/test_transport.py000066400000000000000000001327771520150013200223060ustar00rootroot00000000000000# Copyright (C) 2005-2011, 2015, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA import errno import os import subprocess import sys import threading from io import BytesIO import dromedary as transport from dromedary import ( chroot, errors, fakenfs, local, memory, osutils, pathfilter, readonly, tests, urlutils, ) from dromedary import tests as features from dromedary.errors import FileExists, NoSuchFile, UnsupportedProtocol from dromedary.local import file_kind from dromedary.tests import test_server # TODO: Should possibly split transport-specific tests into their own files. class TestTransport(tests.TestCase): """Test the non transport-concrete class functionality.""" def test__get_set_protocol_handlers(self): handlers = transport._get_protocol_handlers() self.assertNotEqual([], handlers.keys()) transport._clear_protocol_handlers() self.addCleanup(transport._set_protocol_handlers, handlers) self.assertEqual([], transport._get_protocol_handlers().keys()) def test_get_transport_modules(self): handlers = transport._get_protocol_handlers() self.addCleanup(transport._set_protocol_handlers, handlers) # don't pollute the current handlers transport._clear_protocol_handlers() class SampleHandler: """I exist, isnt that enough?""" transport._clear_protocol_handlers() transport.register_transport_proto("foo") transport.register_lazy_transport( "foo", "dromedary.tests.test_transport", "TestTransport.SampleHandler" ) transport.register_transport_proto("bar") transport.register_lazy_transport( "bar", "dromedary.tests.test_transport", "TestTransport.SampleHandler" ) self.assertCountEqual( [ SampleHandler.__module__, "dromedary.chroot", "dromedary.pathfilter", ], transport._get_transport_modules(), ) def test_transport_dependency(self): """Transport with missing dependency causes no error.""" saved_handlers = transport._get_protocol_handlers() self.addCleanup(transport._set_protocol_handlers, saved_handlers) # don't pollute the current handlers transport._clear_protocol_handlers() transport.register_transport_proto("foo") transport.register_lazy_transport( "foo", "dromedary.tests.test_transport", "BadTransportHandler" ) try: transport.get_transport_from_url("foo://fooserver/foo") except UnsupportedProtocol as e: self.assertEqual( "Unsupported protocol" ' for url "foo://fooserver/foo":' ' Unable to import library "some_lib":' " testing missing dependency", str(e), ) else: self.fail("Did not raise UnsupportedProtocol") def test_transport_fallback(self): """Transport with missing dependency causes no error.""" saved_handlers = transport._get_protocol_handlers() self.addCleanup(transport._set_protocol_handlers, saved_handlers) transport._clear_protocol_handlers() transport.register_transport_proto("foo") transport.register_lazy_transport( "foo", "dromedary.tests.test_transport", "BackupTransportHandler" ) transport.register_lazy_transport( "foo", "dromedary.tests.test_transport", "BadTransportHandler" ) t = transport.get_transport_from_url("foo://fooserver/foo") self.assertIsInstance(t, BackupTransportHandler) def test_ssh_hints(self): """Transport ssh:// should raise UnsupportedProtocol.""" self.assertRaises( UnsupportedProtocol, transport.get_transport_from_url, "ssh://fooserver/foo", ) def test_LateReadError(self): """The LateReadError helper should raise on read().""" a_file = transport.LateReadError("a path") try: a_file.read() except errors.ReadError as error: self.assertEqual("a path", error.path) self.assertRaises(errors.ReadError, a_file.read, 40) a_file.close() def test_local_abspath_non_local_transport(self): # the base implementation should throw t = memory.MemoryTransport() with self.assertRaises(errors.NotLocalUrl) as cm: t.local_abspath("t") self.assertEqual("memory:///t is not a local path.", str(cm.exception)) class TestCoalesceOffsets(tests.TestCase): def check(self, expected, offsets, limit=0, max_size=0, fudge=0): coalesce = transport.Transport._coalesce_offsets exp = [transport._CoalescedOffset(*x) for x in expected] out = list( coalesce(offsets, limit=limit, fudge_factor=fudge, max_size=max_size) ) self.assertEqual(exp, out) def test_coalesce_empty(self): self.check([], []) def test_coalesce_simple(self): self.check([(0, 10, [(0, 10)])], [(0, 10)]) def test_coalesce_unrelated(self): self.check( [ (0, 10, [(0, 10)]), (20, 10, [(0, 10)]), ], [(0, 10), (20, 10)], ) def test_coalesce_unsorted(self): self.check([(0, 10, [(0, 10)]), (20, 10, [(0, 10)])], [(20, 10), (0, 10)]) def test_coalesce_nearby(self): self.check([(0, 20, [(0, 10), (10, 10)])], [(0, 10), (10, 10)]) def test_coalesce_overlapped(self): self.assertRaises( ValueError, self.check, [(0, 15, [(0, 10), (5, 10)])], [(0, 10), (5, 10)] ) def test_coalesce_limit(self): self.check( [ (10, 50, [(0, 10), (10, 10), (20, 10), (30, 10), (40, 10)]), (60, 50, [(0, 10), (10, 10), (20, 10), (30, 10), (40, 10)]), ], [ (10, 10), (20, 10), (30, 10), (40, 10), (50, 10), (60, 10), (70, 10), (80, 10), (90, 10), (100, 10), ], limit=5, ) def test_coalesce_no_limit(self): self.check( [ ( 10, 100, [ (0, 10), (10, 10), (20, 10), (30, 10), (40, 10), (50, 10), (60, 10), (70, 10), (80, 10), (90, 10), ], ), ], [ (10, 10), (20, 10), (30, 10), (40, 10), (50, 10), (60, 10), (70, 10), (80, 10), (90, 10), (100, 10), ], ) def test_coalesce_fudge(self): self.check( [ (10, 30, [(0, 10), (20, 10)]), (100, 10, [(0, 10)]), ], [(10, 10), (30, 10), (100, 10)], fudge=10, ) def test_coalesce_max_size(self): self.check( [ (10, 20, [(0, 10), (10, 10)]), (30, 50, [(0, 50)]), # If one range is above max_size, it gets its own coalesced # offset (100, 80, [(0, 80)]), ], [(10, 10), (20, 10), (30, 50), (100, 80)], max_size=50, ) def test_coalesce_no_max_size(self): self.check( [(10, 170, [(0, 10), (10, 10), (20, 50), (70, 100)])], [(10, 10), (20, 10), (30, 50), (80, 100)], ) def test_coalesce_default_limit(self): # By default we use a 100MB max size. ten_mb = 10 * 1024 * 1024 self.check( [ (0, 10 * ten_mb, [(i * ten_mb, ten_mb) for i in range(10)]), (10 * ten_mb, ten_mb, [(0, ten_mb)]), ], [(i * ten_mb, ten_mb) for i in range(11)], ) self.check( [(0, 11 * ten_mb, [(i * ten_mb, ten_mb) for i in range(11)])], [(i * ten_mb, ten_mb) for i in range(11)], max_size=1 * 1024 * 1024 * 1024, ) class TestMemoryServer(tests.TestCase): def test_create_server(self): server = memory.MemoryServer() server.start_server() url = server.get_url() self.assertIn(url, transport.transport_list_registry) t = transport.get_transport_from_url(url) del t server.stop_server() self.assertNotIn(url, transport.transport_list_registry) self.assertRaises(UnsupportedProtocol, transport.get_transport_from_url, url) class TestMemoryTransport(tests.TestCase): def test_get_transport(self): memory.MemoryTransport() def test_clone(self): t = memory.MemoryTransport() self.assertIsInstance(t, memory.MemoryTransport) self.assertEqual("memory:///", t.clone("/").base) def test_abspath(self): t = memory.MemoryTransport() self.assertEqual("memory:///relpath", t.abspath("relpath")) def test_abspath_of_root(self): t = memory.MemoryTransport() self.assertEqual("memory:///", t.base) self.assertEqual("memory:///", t.abspath("/")) def test_abspath_of_relpath_starting_at_root(self): t = memory.MemoryTransport() self.assertEqual("memory:///foo", t.abspath("/foo")) def test_append_and_get(self): t = memory.MemoryTransport() t.append_bytes("path", b"content") self.assertEqual(t.get("path").read(), b"content") t.append_file("path", BytesIO(b"content")) with t.get("path") as f: self.assertEqual(f.read(), b"contentcontent") def test_put_and_get(self): t = memory.MemoryTransport() t.put_file("path", BytesIO(b"content")) self.assertEqual(t.get("path").read(), b"content") t.put_bytes("path", b"content") self.assertEqual(t.get("path").read(), b"content") def test_append_without_dir_fails(self): t = memory.MemoryTransport() self.assertRaises(NoSuchFile, t.append_bytes, "dir/path", b"content") def test_put_without_dir_fails(self): t = memory.MemoryTransport() self.assertRaises(NoSuchFile, t.put_file, "dir/path", BytesIO(b"content")) def test_get_missing(self): transport = memory.MemoryTransport() self.assertRaises(NoSuchFile, transport.get, "foo") def test_has_missing(self): t = memory.MemoryTransport() self.assertEqual(False, t.has("foo")) def test_has_present(self): t = memory.MemoryTransport() t.append_bytes("foo", b"content") self.assertEqual(True, t.has("foo")) def test_list_dir(self): t = memory.MemoryTransport() t.put_bytes("foo", b"content") t.mkdir("dir") t.put_bytes("dir/subfoo", b"content") t.put_bytes("dirlike", b"content") self.assertEqual(["dir", "dirlike", "foo"], sorted(t.list_dir("."))) self.assertEqual(["subfoo"], sorted(t.list_dir("dir"))) def test_mkdir(self): t = memory.MemoryTransport() t.mkdir("dir") t.append_bytes("dir/path", b"content") with t.get("dir/path") as f: self.assertEqual(f.read(), b"content") def test_mkdir_missing_parent(self): t = memory.MemoryTransport() self.assertRaises(NoSuchFile, t.mkdir, "dir/dir") def test_mkdir_twice(self): t = memory.MemoryTransport() t.mkdir("dir") self.assertRaises(FileExists, t.mkdir, "dir") def test_parameters(self): t = memory.MemoryTransport() self.assertEqual(True, t.listable()) self.assertEqual(False, t.is_readonly()) def test_iter_files_recursive(self): t = memory.MemoryTransport() t.mkdir("dir") t.put_bytes("dir/foo", b"content") t.put_bytes("dir/bar", b"content") t.put_bytes("bar", b"content") paths = set(t.iter_files_recursive()) self.assertEqual({"dir/foo", "dir/bar", "bar"}, paths) def test_stat(self): t = memory.MemoryTransport() t.put_bytes("foo", b"content") t.put_bytes("bar", b"phowar") self.assertEqual(7, t.stat("foo").st_size) self.assertEqual(6, t.stat("bar").st_size) class ChrootDecoratorTransportTest(tests.TestCase): """Chroot decoration specific tests.""" def test_abspath(self): # The abspath is always relative to the chroot_url. server = chroot.ChrootServer( transport.get_transport_from_url("memory:///foo/bar/") ) self.start_server(server) t = transport.get_transport_from_url(server.get_url()) self.assertEqual(server.get_url(), t.abspath("/")) subdir_t = t.clone("subdir") self.assertEqual(server.get_url(), subdir_t.abspath("/")) def test_clone(self): server = chroot.ChrootServer( transport.get_transport_from_url("memory:///foo/bar/") ) self.start_server(server) t = transport.get_transport_from_url(server.get_url()) # relpath from root and root path are the same relpath_cloned = t.clone("foo") abspath_cloned = t.clone("/foo") self.assertEqual(server, relpath_cloned.server) self.assertEqual(server, abspath_cloned.server) def test_chroot_url_preserves_chroot(self): """Calling get_transport on a chroot transport's base should produce a transport with exactly the same behaviour as the original chroot transport. This is so that it is not possible to escape a chroot by doing:: url = chroot_transport.base parent_url = urlutils.join(url, '..') new_t = transport.get_transport_from_url(parent_url) """ server = chroot.ChrootServer( transport.get_transport_from_url("memory:///path/subpath") ) self.start_server(server) t = transport.get_transport_from_url(server.get_url()) new_t = transport.get_transport_from_url(t.base) self.assertEqual(t.server, new_t.server) self.assertEqual(t.base, new_t.base) def test_urljoin_preserves_chroot(self): """Using urlutils.join(url, '..') on a chroot URL should not produce a URL that escapes the intended chroot. This is so that it is not possible to escape a chroot by doing:: url = chroot_transport.base parent_url = urlutils.join(url, '..') new_t = transport.get_transport_from_url(parent_url) """ server = chroot.ChrootServer( transport.get_transport_from_url("memory:///path/") ) self.start_server(server) t = transport.get_transport_from_url(server.get_url()) self.assertRaises(urlutils.InvalidURLJoin, urlutils.join, t.base, "..") class TestChrootServer(tests.TestCase): def test_construct(self): backing_transport = memory.MemoryTransport() server = chroot.ChrootServer(backing_transport) self.assertEqual(backing_transport, server.backing_transport) def test_setUp(self): backing_transport = memory.MemoryTransport() server = chroot.ChrootServer(backing_transport) server.start_server() self.addCleanup(server.stop_server) self.assertIn(server.scheme, transport._get_protocol_handlers().keys()) def test_stop_server(self): backing_transport = memory.MemoryTransport() server = chroot.ChrootServer(backing_transport) server.start_server() server.stop_server() self.assertNotIn(server.scheme, transport._get_protocol_handlers().keys()) def test_get_url(self): backing_transport = memory.MemoryTransport() server = chroot.ChrootServer(backing_transport) server.start_server() self.addCleanup(server.stop_server) self.assertEqual("chroot-%d:///" % id(server), server.get_url()) class TestHooks(tests.TestCase): """Basic tests for transport hooks.""" def _get_connected_transport(self): return transport.ConnectedTransport("bogus:nowhere") def test_transporthooks_initialisation(self): """Check all expected transport hook points are set up.""" hookpoint = transport.TransportHooks() self.assertIn("post_connect", hookpoint, f"post_connect not in {hookpoint}") def test_post_connect(self): """Ensure the post_connect hook is called when _set_transport is.""" calls = [] transport.Transport.hooks.install_named_hook("post_connect", calls.append, None) t = self._get_connected_transport() self.assertLength(0, calls) t._set_connection("connection", "auth") self.assertEqual(calls, [t]) class PathFilteringDecoratorTransportTest(tests.TestCase): """Pathfilter decoration specific tests.""" def test_abspath(self): # The abspath is always relative to the base of the backing transport. server = pathfilter.PathFilteringServer( transport.get_transport_from_url("memory:///foo/bar/"), lambda x: x ) server.start_server() t = transport.get_transport_from_url(server.get_url()) self.assertEqual(server.get_url(), t.abspath("/")) subdir_t = t.clone("subdir") self.assertEqual(server.get_url(), subdir_t.abspath("/")) server.stop_server() def make_pf_transport(self, filter_func=None): """Make a PathFilteringTransport backed by a MemoryTransport. :param filter_func: by default this will be a no-op function. Use this parameter to override it. """ if filter_func is None: def filter_func(x): return x server = pathfilter.PathFilteringServer( transport.get_transport_from_url("memory:///foo/bar/"), filter_func ) server.start_server() self.addCleanup(server.stop_server) return transport.get_transport_from_url(server.get_url()) def test__filter(self): # _filter (with an identity func as filter_func) always returns # paths relative to the base of the backing transport. t = self.make_pf_transport() self.assertEqual("foo", t._filter("foo")) self.assertEqual("foo/bar", t._filter("foo/bar")) self.assertEqual("", t._filter("..")) self.assertEqual("", t._filter("/")) # The base of the pathfiltering transport is taken into account too. t = t.clone("subdir1/subdir2") self.assertEqual("subdir1/subdir2/foo", t._filter("foo")) self.assertEqual("subdir1/subdir2/foo/bar", t._filter("foo/bar")) self.assertEqual("subdir1", t._filter("..")) self.assertEqual("", t._filter("/")) def test_filter_invocation(self): filter_log = [] def filter(path): filter_log.append(path) return path t = self.make_pf_transport(filter) t.has("abc") self.assertEqual(["abc"], filter_log) del filter_log[:] t.clone("abc").has("xyz") self.assertEqual(["abc/xyz"], filter_log) del filter_log[:] t.has("/abc") self.assertEqual(["abc"], filter_log) def test_clone(self): t = self.make_pf_transport() # relpath from root and root path are the same relpath_cloned = t.clone("foo") abspath_cloned = t.clone("/foo") self.assertEqual(t.server, relpath_cloned.server) self.assertEqual(t.server, abspath_cloned.server) def test_url_preserves_pathfiltering(self): """Calling get_transport on a pathfiltered transport's base should produce a transport with exactly the same behaviour as the original pathfiltered transport. This is so that it is not possible to escape (accidentally or otherwise) the filtering by doing:: url = filtered_transport.base parent_url = urlutils.join(url, '..') new_t = transport.get_transport_from_url(parent_url) """ t = self.make_pf_transport() new_t = transport.get_transport_from_url(t.base) self.assertEqual(t.server, new_t.server) self.assertEqual(t.base, new_t.base) class ReadonlyDecoratorTransportTest(tests.TestCase): """Readonly decoration specific tests.""" def test_local_parameters(self): # connect to . in readonly mode t = readonly.ReadonlyTransportDecorator("readonly+.") self.assertEqual(True, t.listable()) self.assertEqual(True, t.is_readonly()) def test_http_parameters(self): from .http_server import HttpServer # connect to '.' via http which is not listable server = HttpServer() self.start_server(server) t = transport.get_transport_from_url("readonly+" + server.get_url()) self.assertIsInstance(t, readonly.ReadonlyTransportDecorator) self.assertEqual(False, t.listable()) self.assertEqual(True, t.is_readonly()) class FakeNFSDecoratorTests(tests.TestCaseWithTransport): """NFS decorator specific tests.""" def get_nfs_transport(self, url): # connect to url with nfs decoration return fakenfs.FakeNFSTransportDecorator("fakenfs+" + url) def test_local_parameters(self): # the listable and is_readonly parameters # are not changed by the fakenfs decorator t = self.get_nfs_transport(".") self.assertEqual(True, t.listable()) self.assertEqual(False, t.is_readonly()) def test_http_parameters(self): # the listable and is_readonly parameters # are not changed by the fakenfs decorator from .http_server import HttpServer # connect to '.' via http which is not listable server = HttpServer() self.start_server(server) t = self.get_nfs_transport(server.get_url()) self.assertIsInstance(t, fakenfs.FakeNFSTransportDecorator) self.assertEqual(False, t.listable()) self.assertEqual(True, t.is_readonly()) def test_fakenfs_server_default(self): # a FakeNFSServer() should bring up a local relpath server for itself server = test_server.FakeNFSServer() self.start_server(server) # the url should be decorated appropriately self.assertStartsWith(server.get_url(), "fakenfs+") # and we should be able to get a transport for it t = transport.get_transport_from_url(server.get_url()) # which must be a FakeNFSTransportDecorator instance. self.assertIsInstance(t, fakenfs.FakeNFSTransportDecorator) def test_fakenfs_rename_semantics(self): # a FakeNFS transport must mangle the way rename errors occur to # look like NFS problems. t = self.get_nfs_transport(".") t.mkdir("from") t.put_bytes("from/foo", b"") t.mkdir("to") t.put_bytes("to/bar", b"") self.assertRaises(errors.ResourceBusy, t.rename, "from", "to") class FakeVFATDecoratorTests(tests.TestCaseWithTransport): """Tests for simulation of VFAT restrictions.""" def get_vfat_transport(self, url): """Return vfat-backed transport for test directory.""" from ..fakevfat import FakeVFATTransportDecorator return FakeVFATTransportDecorator("vfat+" + url) def test_transport_creation(self): from ..fakevfat import FakeVFATTransportDecorator t = self.get_vfat_transport(".") self.assertIsInstance(t, FakeVFATTransportDecorator) def test_transport_mkdir(self): t = self.get_vfat_transport(".") t.mkdir("HELLO") self.assertTrue(t.has("hello")) self.assertTrue(t.has("Hello")) def test_forbidden_chars(self): t = self.get_vfat_transport(".") self.assertRaises(ValueError, t.has, "") class BadTransportHandler(transport.Transport): def __init__(self, base_url): raise errors.DependencyNotPresent("some_lib", "testing missing dependency") class BackupTransportHandler(transport.Transport): """Test transport that works as a backup for the BadTransportHandler.""" pass class TestTransportImplementation(tests.TestCaseInTempDir): """Implementation verification for transports. To verify a transport we need a server factory, which is a callable that accepts no parameters and returns an implementation of breezy.transport.Server. That Server is then used to construct transport instances and test the transport via loopback activity. Currently this assumes that the Transport object is connected to the current working directory. So that whatever is done through the transport, should show up in the working directory, and vice-versa. This is a bug, because its possible to have URL schemes which provide access to something that may not be result in storage on the local disk, i.e. due to file system limits, or due to it being a database or some other non-filesystem tool. This also tests to make sure that the functions work with both generators and lists (assuming iter(list) is effectively a generator) """ def setUp(self): super().setUp() self._server = self.transport_server() self.start_server(self._server) def get_transport(self, relpath=None): """Return a connected transport to the local directory. :param relpath: a path relative to the base url. """ base_url = self._server.get_url() url = self._adjust_url(base_url, relpath) # try getting the transport via the regular interface: t = transport.get_transport_from_url(url) # vila--20070607 if the following are commented out the test suite # still pass. Is this really still needed or was it a forgotten # temporary fix ? if not isinstance(t, self.transport_class): # we did not get the correct transport class type. Override the # regular connection behaviour by direct construction. t = self.transport_class(url) return t def build_tree(self, shape, transport=None, line_endings="binary"): """Build a tree of files via the test transport. Transport implementation tests need to operate on files at the test server (which may not be a local filesystem) rather than the process cwd, so this overrides the cwd-based TestCaseInTempDir version. If `transport` is None or read-only, falls back to a transport on the current working directory. Names are URL-escaped before being passed to the transport. """ if transport is None or transport.is_readonly(): from dromedary import get_transport_from_path transport = get_transport_from_path(".") for name in shape: escaped = urlutils.escape(name.rstrip("/")) if name.endswith("/"): transport.mkdir(escaped) else: if line_endings == "binary": end = b"\n" elif line_endings == "native": end = os.linesep.encode("ascii") else: raise ValueError(f"Invalid line ending request {line_endings!r}") content = b"contents of %s%s" % (name.encode("utf-8"), end) transport.put_bytes(escaped, content) class TestTransportFromPath(tests.TestCaseInTempDir): def test_with_path(self): t = transport.get_transport_from_path(self.test_dir) self.assertIsInstance(t, local.LocalTransport) self.assertEqual(t.base.rstrip("/"), urlutils.local_path_to_url(self.test_dir)) def test_with_url(self): t = transport.get_transport_from_path("file:") self.assertIsInstance(t, local.LocalTransport) self.assertEqual( t.base.rstrip("/"), urlutils.local_path_to_url(os.path.join(self.test_dir, "file:")), ) class TestTransportFromUrl(tests.TestCaseInTempDir): def test_with_path(self): self.assertRaises( urlutils.InvalidURL, transport.get_transport_from_url, self.test_dir ) def test_with_url(self): url = urlutils.local_path_to_url(self.test_dir) t = transport.get_transport_from_url(url) self.assertIsInstance(t, local.LocalTransport) self.assertEqual(t.base.rstrip("/"), url) def test_with_url_and_segment_parameters(self): url = urlutils.local_path_to_url(self.test_dir) + ",branch=foo" t = transport.get_transport_from_url(url) self.assertIsInstance(t, local.LocalTransport) self.assertEqual(t.base.rstrip("/"), url) with open(os.path.join(self.test_dir, "afile"), "w") as f: f.write("data") self.assertTrue(t.has("afile")) class TestLocalTransports(tests.TestCase): def test_get_transport_from_abspath(self): here = osutils.abspath(".") t = transport.get_transport_from_path(here) self.assertIsInstance(t, local.LocalTransport) self.assertEqual(t.base, urlutils.local_path_to_url(here) + "/") def test_get_transport_from_relpath(self): t = transport.get_transport_from_path(".") self.assertIsInstance(t, local.LocalTransport) self.assertEqual(t.base, urlutils.local_path_to_url(".") + "/") def test_get_transport_from_local_url(self): here = osutils.abspath(".") here_url = urlutils.local_path_to_url(here) + "/" t = transport.get_transport_from_url(here_url) self.assertIsInstance(t, local.LocalTransport) self.assertEqual(t.base, here_url) def test_local_abspath(self): here = osutils.abspath(".") t = transport.get_transport_from_path(here) self.assertEqual(t.local_abspath(""), here) def test_from_abspath(self): here = osutils.abspath(".") t = local.LocalTransport.from_abspath(here) self.assertIsInstance(t, local.LocalTransport) self.assertEqual(t.base, urlutils.local_path_to_url(here) + "/") class TestLocalTransportMutation(tests.TestCaseInTempDir): def test_local_transport_mkdir(self): here = osutils.abspath(".") t = transport.get_transport_from_path(here) t.mkdir("test") self.assertTrue(os.path.exists("test")) def test_local_transport_mkdir_permission_denied(self): # See https://bugs.launchpad.net/bzr/+bug/606537 here = osutils.abspath(".") t = transport.get_transport_from_path(here) def fake_chmod(path, mode): e = OSError("permission denied") e.errno = errno.EPERM raise e self.overrideAttr(os, "chmod", fake_chmod) t.mkdir("test") t.mkdir("test2", mode=0o707) self.assertTrue(os.path.exists("test")) self.assertTrue(os.path.exists("test2")) class TestLocalTransportWriteStream(tests.TestCaseWithTransport): def test_local_fdatasync_calls_fdatasync(self): """Check fdatasync on a stream tries to flush the data to the OS. We can't easily observe the external effect but we can at least see it's called. """ sentinel = object() fdatasync = getattr(os, "fdatasync", sentinel) if fdatasync is sentinel: raise tests.TestNotApplicable("fdatasync not supported") t = self.get_transport(".") self.recordCalls(os, "fdatasync") w = t.open_write_stream("out") w.write(b"foo") w.fdatasync() with open("out", "rb") as f: # Should have been flushed. self.assertEqual(f.read(), b"foo") def test_missing_directory(self): t = self.get_transport(".") self.assertRaises(NoSuchFile, t.open_write_stream, "dir/foo") class TestWin32LocalTransport(tests.TestCase): def test_unc_clone_to_root(self): self.requireFeature(features.win32_feature) # Win32 UNC path like \\HOST\path # clone to root should stop at least at \\HOST part # not on \\ # The Rust `url::Url` parser normalises hosts to lowercase per # RFC 3986, so compare case-insensitively (URL hosts are # case-insensitive). t = local.EmulatedWin32LocalTransport("file://HOST/path/to/some/dir/") for _i in range(4): t = t.clone("..") self.assertEqual(t.base.lower(), "file://host/") # make sure we reach the root t = t.clone("..") self.assertEqual(t.base.lower(), "file://host/") class TestConnectedTransport(tests.TestCase): """Tests for connected to remote server transports.""" def test_parse_url(self): t = transport.ConnectedTransport("http://simple.example.com/home/source") self.assertEqual(t._parsed_url.host, "simple.example.com") self.assertEqual(t._parsed_url.port, None) self.assertEqual(t._parsed_url.path, "/home/source/") self.assertIsNone(t._parsed_url.user) self.assertIsNone(t._parsed_url.password) self.assertEqual(t.base, "http://simple.example.com/home/source/") def test_parse_url_with_at_in_user(self): # Bug 228058 t = transport.ConnectedTransport("ftp://user@host.com@www.host.com/") self.assertEqual(t._parsed_url.user, "user@host.com") def test_parse_quoted_url(self): t = transport.ConnectedTransport("http://ro%62ey:h%40t@ex%41mple.com:2222/path") self.assertEqual(t._parsed_url.host, "exAmple.com") self.assertEqual(t._parsed_url.port, 2222) self.assertEqual(t._parsed_url.user, "robey") self.assertEqual(t._parsed_url.password, "h@t") self.assertEqual(t._parsed_url.path, "/path/") # Base should not keep track of the password self.assertEqual(t.base, "http://ro%62ey@ex%41mple.com:2222/path/") def test_parse_invalid_url(self): self.assertRaises( urlutils.InvalidURL, transport.ConnectedTransport, "sftp://lily.org:~janneke/public/bzr/gub", ) def test_relpath(self): t = transport.ConnectedTransport("sftp://user@host.com/abs/path") self.assertEqual(t.relpath("sftp://user@host.com/abs/path/sub"), "sub") self.assertRaises( errors.PathNotChild, t.relpath, "http://user@host.com/abs/path/sub" ) self.assertRaises( errors.PathNotChild, t.relpath, "sftp://user2@host.com/abs/path/sub" ) self.assertRaises( errors.PathNotChild, t.relpath, "sftp://user@otherhost.com/abs/path/sub" ) self.assertRaises( errors.PathNotChild, t.relpath, "sftp://user@host.com:33/abs/path/sub" ) # Make sure it works when we don't supply a username t = transport.ConnectedTransport("sftp://host.com/abs/path") self.assertEqual(t.relpath("sftp://host.com/abs/path/sub"), "sub") # Make sure it works when parts of the path will be url encoded t = transport.ConnectedTransport("sftp://host.com/dev/%path") self.assertEqual(t.relpath("sftp://host.com/dev/%path/sub"), "sub") def test_connection_sharing_propagate_credentials(self): t = transport.ConnectedTransport("ftp://user@host.com/abs/path") self.assertEqual("user", t._parsed_url.user) self.assertEqual("host.com", t._parsed_url.host) self.assertIs(None, t._get_connection()) self.assertIs(None, t._parsed_url.password) c = t.clone("subdir") self.assertIs(None, c._get_connection()) self.assertIs(None, t._parsed_url.password) # Simulate the user entering a password password = "secret" connection = object() t._set_connection(connection, password) self.assertIs(connection, t._get_connection()) self.assertIs(password, t._get_credentials()) self.assertIs(connection, c._get_connection()) self.assertIs(password, c._get_credentials()) # credentials can be updated new_password = "even more secret" c._update_credentials(new_password) self.assertIs(connection, t._get_connection()) self.assertIs(new_password, t._get_credentials()) self.assertIs(connection, c._get_connection()) self.assertIs(new_password, c._get_credentials()) class TestReusedTransports(tests.TestCase): """Tests for transport reuse.""" def test_reuse_same_transport(self): possible_transports = [] t1 = transport.get_transport_from_url( "http://foo/", possible_transports=possible_transports ) self.assertEqual([t1], possible_transports) t2 = transport.get_transport_from_url("http://foo/", possible_transports=[t1]) self.assertIs(t1, t2) # Also check that final '/' are handled correctly t3 = transport.get_transport_from_url("http://foo/path/") t4 = transport.get_transport_from_url( "http://foo/path", possible_transports=[t3] ) self.assertIs(t3, t4) t5 = transport.get_transport_from_url("http://foo/path") t6 = transport.get_transport_from_url( "http://foo/path/", possible_transports=[t5] ) self.assertIs(t5, t6) def test_don_t_reuse_different_transport(self): t1 = transport.get_transport_from_url("http://foo/path") t2 = transport.get_transport_from_url( "http://bar/path", possible_transports=[t1] ) self.assertIsNot(t1, t2) class TestTransportTrace(tests.TestCase): def test_decorator(self): t = transport.get_transport_from_url("trace+memory://") from dromedary.trace import TransportTraceDecorator self.assertIsInstance(t, TransportTraceDecorator) def test_clone_preserves_activity(self): t = transport.get_transport_from_url("trace+memory://") t2 = t.clone(".") self.assertIsNot(t, t2) self.assertIs(t._activity, t2._activity) # the following specific tests are for the operations that have made use of # logging in tests; we could test every single operation but doing that # still won't cause a test failure when the top level Transport API # changes; so there is little return doing that. def test_get(self): t = transport.get_transport_from_url("trace+memory:///") t.put_bytes("foo", b"barish") t.get("foo") expected_result = [] # put_bytes records the bytes, not the content to avoid memory # pressure. expected_result.append(("put_bytes", "foo", 6, None)) # get records the file name only. expected_result.append(("get", "foo")) self.assertEqual(expected_result, t._activity) def test_readv(self): t = transport.get_transport_from_url("trace+memory:///") t.put_bytes("foo", b"barish") list(t.readv("foo", [(0, 1), (3, 2)], adjust_for_latency=True, upper_limit=6)) expected_result = [] # put_bytes records the bytes, not the content to avoid memory # pressure. expected_result.append(("put_bytes", "foo", 6, None)) # readv records the supplied offset request expected_result.append(("readv", "foo", [(0, 1), (3, 2)], True, 6)) self.assertEqual(expected_result, t._activity) class TestSSHConnections(tests.TestCaseWithTransport): def test_bzr_connect_to_bzr_ssh(self): """get_transport of a bzr+ssh:// behaves correctly. bzr+ssh:// should cause bzr to run a remote bzr smart server over SSH. Note: this test requires breezy's bzr+ssh transport to be registered. """ raise tests.TestNotApplicable( "bzr+ssh:// is registered by breezy, not dromedary" ) # This test actually causes a bzr instance to be invoked, which is very # expensive: it should be the only such test in the test suite. # A reasonable evolution for this would be to simply check inside # check_channel_exec_request that the command is appropriate, and then # satisfy requests in-process. self.requireFeature(features.paramiko) # SFTPFullAbsoluteServer has a get_url method, and doesn't # override the interface (doesn't change self._vendor). # Note that this does encryption, so can be slow. from dromedary.tests import stub_sftp # Start an SSH server self.command_executed = [] # XXX: This is horrible -- we define a really dumb SSH server that # executes commands, and manage the hooking up of stdin/out/err to the # SSH channel ourselves. Surely this has already been implemented # elsewhere? started = [] class StubSSHServer(stub_sftp.StubServer): test = self def check_channel_exec_request(self, channel, command): self.test.command_executed.append(command) proc = subprocess.Popen( command, shell=True, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, bufsize=0, ) # XXX: horribly inefficient, not to mention ugly. # Start a thread for each of stdin/out/err, and relay bytes # from the subprocess to channel and vice versa. def ferry_bytes(read, write, close): while True: bytes = read(1) if bytes == b"": close() break write(bytes) file_functions = [ (channel.recv, proc.stdin.write, proc.stdin.close), (proc.stdout.read, channel.sendall, channel.close), (proc.stderr.read, channel.sendall_stderr, channel.close), ] started.append(proc) for read, write, close in file_functions: t = threading.Thread(target=ferry_bytes, args=(read, write, close)) t.start() started.append(t) return True ssh_server = stub_sftp.SFTPFullAbsoluteServer(StubSSHServer) # We *don't* want to override the default SSH vendor: the detected one # is the one to use. # FIXME: I don't understand the above comment, SFTPFullAbsoluteServer # inherits from SFTPServer which forces the SSH vendor to # ssh.ParamikoVendor(). So it's forced, not detected. --vila 20100623 self.start_server(ssh_server) port = ssh_server.port bzr_remote_command = self.get_brz_command() self.overrideEnv("BZR_REMOTE_PATH", " ".join(bzr_remote_command)) self.overrideEnv("PYTHONPATH", ":".join(sys.path)) # Access the branch via a bzr+ssh URL. The BZR_REMOTE_PATH environment # variable is used to tell bzr what command to run on the remote end. path_to_branch = osutils.abspath(".") if sys.platform == "win32": # On Windows, we export all drives as '/C:/, etc. So we need to # prefix a '/' to get the right path. path_to_branch = "/" + path_to_branch url = "bzr+ssh://fred:secret@localhost:%d%s" % (port, path_to_branch) t = transport.get_transport_from_url(url) self.permit_url(t.base) t.mkdir("foo") self.assertEqual( [ b"%s serve --inet --directory=/ --allow-writes" % " ".join(bzr_remote_command).encode() ], self.command_executed, ) # Make sure to disconnect, so that the remote process can stop, and we # can cleanup. Then pause the test until everything is shutdown t._client._medium.disconnect() if not started: return # First wait for the subprocess started[0].wait() # And the rest are threads for t in started[1:]: t.join() class TestKind(tests.TestCaseWithTransport): def test_file_kind(self): import socket self.build_tree(["file", "dir/"]) self.assertEqual("file", file_kind("file")) self.assertEqual("directory", file_kind("dir/")) if osutils.supports_symlinks(self.test_dir): os.symlink("symlink", "symlink") self.assertEqual("symlink", file_kind("symlink")) # TODO: jam 20060529 Test a block device try: os.lstat("/dev/null") except FileNotFoundError: pass else: self.assertEqual("chardev", file_kind(os.path.realpath("/dev/null"))) mkfifo = getattr(os, "mkfifo", None) if mkfifo: mkfifo("fifo") try: self.assertEqual("fifo", file_kind("fifo")) finally: os.remove("fifo") AF_UNIX = getattr(socket, "AF_UNIX", None) if AF_UNIX: s = socket.socket(AF_UNIX) s.bind("socket") try: self.assertEqual("socket", file_kind("socket")) finally: os.remove("socket") dromedary-0.1.5/dromedary/tests/test_transport_log.py000066400000000000000000000117301520150013200231300ustar00rootroot00000000000000# Copyright (C) 2008-2011, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Tests for log+ transport decorator.""" import logging from dromedary.tests import TestCaseWithMemoryTransport logger = logging.getLogger("dromedary.tests.test_transport_log") import dromedary as transport from dromedary.log import TransportLogDecorator class TestTransportLog(TestCaseWithMemoryTransport): def test_log_transport(self): base_transport = self.get_transport("") logging_transport = transport.get_transport_from_url( "log+" + base_transport.base ) # operations such as mkdir are logged logger.debug("where are you?") logging_transport.mkdir("subdir") log = self.get_log() # GZ 2017-05-24: Used to expect abspath logged, logger needs fixing. self.assertContainsRe(log, r"mkdir subdir") self.assertContainsRe(log, " --> None") # they have the expected effect self.assertTrue(logging_transport.has("subdir")) # and they operate on the underlying transport self.assertTrue(base_transport.has("subdir")) def test_log_readv(self): # see # transports are not required to return a generator, but we # specifically want to check that those that do cause it to be passed # through, for the sake of minimum interference base_transport = DummyReadvTransport() # construct it directly to avoid needing the dummy transport to be # registered etc logging_transport = TransportLogDecorator( "log+dummy:///", _decorated=base_transport ) result = base_transport.readv("foo", [(0, 10)]) # sadly there's no types.IteratorType, and GeneratorType is too # specific next(result) result = logging_transport.readv("foo", [(0, 10)]) self.assertEqual(list(result), [(0, b"abcdefghij")]) def test_url_prefix(self): self.assertEqual("log+", TransportLogDecorator._get_url_prefix()) def test_decorated_attribute(self): base_transport = self.get_transport("") logging_transport = transport.get_transport_from_url( "log+" + base_transport.base ) self.assertEqual(base_transport.base, logging_transport._decorated.base) def test_log_has_true_and_false(self): base_transport = self.get_transport("") logging_transport = transport.get_transport_from_url( "log+" + base_transport.base ) base_transport.mkdir("exists") self.assertTrue(logging_transport.has("exists")) self.assertFalse(logging_transport.has("missing")) log = self.get_log() self.assertContainsRe(log, r"has exists") self.assertContainsRe(log, r"has missing") self.assertContainsRe(log, r"--> True") self.assertContainsRe(log, r"--> False") def test_log_error_path(self): base_transport = self.get_transport("") logging_transport = transport.get_transport_from_url( "log+" + base_transport.base ) from dromedary.errors import NoSuchFile self.assertRaises(NoSuchFile, logging_transport.get_bytes, "missing") log = self.get_log() # The error summary should be logged after the call line. self.assertContainsRe(log, r"get missing") self.assertContainsRe(log, r"--> ") def test_clone_returns_logged_transport(self): base_transport = self.get_transport("") base_transport.mkdir("sub") logging_transport = transport.get_transport_from_url( "log+" + base_transport.base ) cloned = logging_transport.clone("sub") # The cloned handle must still carry the log+ prefix so that it # behaves as a logging decorator rather than the bare inner transport. self.assertTrue(cloned.base.startswith("log+")) class DummyReadvTransport: base = "dummy:///" # The Rust-backed decorator forwards the full readv signature, so this # stub accepts the latency/upper_limit arguments as well as the core # (relpath, offsets) pair. def readv( self, filename, offset_length_pairs, adjust_for_latency=False, upper_limit=None ): yield (0, b"abcdefghij") def abspath(self, path): return self.base + path dromedary-0.1.5/dromedary/tests/test_urlutils.py000066400000000000000000001426431520150013200221260ustar00rootroot00000000000000# Copyright (C) 2006-2012, 2015, 2016 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Tests for the urlutils wrapper.""" import ntpath import os import posixpath import sys import unittest from dromedary import osutils, urlutils from dromedary import tests as features from dromedary.errors import PathNotChild from dromedary.tests import TestCase, TestCaseInTempDir, TestSkipped class TestUrlToPath(TestCase): def test_basename(self): # breezy.urlutils.basename # Test breezy.urlutils.split() basename = urlutils.basename if sys.platform == "win32": self.assertRaises(urlutils.InvalidURL, basename, "file:///path/to/foo") self.assertEqual("foo", basename("file:///C|/foo")) self.assertEqual("foo", basename("file:///C:/foo")) self.assertEqual("", basename("file:///C:/")) else: self.assertEqual("foo", basename("file:///foo")) self.assertEqual("", basename("file:///")) self.assertEqual("foo", basename("http://host/path/to/foo")) self.assertEqual("foo", basename("http://host/path/to/foo/")) self.assertEqual( "", basename("http://host/path/to/foo/", exclude_trailing_slash=False) ) self.assertEqual("path", basename("http://host/path")) self.assertEqual("", basename("http://host/")) self.assertEqual("", basename("http://host")) self.assertEqual("path", basename("http:///nohost/path")) self.assertEqual("path", basename("random+scheme://user:pass@ahost:port/path")) self.assertEqual("path", basename("random+scheme://user:pass@ahost:port/path/")) self.assertEqual("", basename("random+scheme://user:pass@ahost:port/")) # relative paths self.assertEqual("foo", basename("path/to/foo")) self.assertEqual("foo", basename("path/to/foo/")) self.assertEqual("", basename("path/to/foo/", exclude_trailing_slash=False)) self.assertEqual("foo", basename("path/../foo")) self.assertEqual("foo", basename("../path/foo")) def test_normalize_url_files(self): # Test that local paths are properly normalized normalize_url = urlutils.normalize_url def norm_file(expected, path): url = normalize_url(path) self.assertStartsWith(url, "file:///") if sys.platform == "win32": url = url[len("file:///C:") :] else: url = url[len("file://") :] self.assertEndsWith(url, expected) norm_file("path/to/foo", "path/to/foo") norm_file("/path/to/foo", "/path/to/foo") norm_file("path/to/foo", "../path/to/foo") # Local paths are assumed to *not* be escaped at all try: "uni/\xb5".encode(osutils.get_user_encoding()) except UnicodeError: # locale cannot handle unicode pass else: norm_file("uni/%C2%B5", "uni/\xb5") norm_file("uni/%25C2%25B5", "uni/%C2%B5") norm_file("uni/%20b", "uni/ b") # All the crazy characters get escaped in local paths => file:/// urls # The ' ' character must not be at the end, because on win32 # it gets stripped off by ntpath.abspath norm_file("%27%20%3B/%3F%3A%40%26%3D%2B%24%2C%23", "' ;/?:@&=+$,#") def test_normalize_url_hybrid(self): # Anything with a scheme:// should be treated as a hybrid url # which changes what characters get escaped. normalize_url = urlutils.normalize_url eq = self.assertEqual eq("file:///foo/", normalize_url("file:///foo/")) eq("file:///foo/%20", normalize_url("file:///foo/ ")) eq("file:///foo/%20", normalize_url("file:///foo/%20")) # Don't escape reserved characters eq( "file:///ab_c.d-e/%f:?g&h=i+j;k,L#M$", normalize_url("file:///ab_c.d-e/%f:?g&h=i+j;k,L#M$"), ) eq( "http://ab_c.d-e/%f:?g&h=i+j;k,L#M$", normalize_url("http://ab_c.d-e/%f:?g&h=i+j;k,L#M$"), ) # Escape unicode characters, but not already escaped chars eq("http://host/ab/%C2%B5/%C2%B5", normalize_url("http://host/ab/%C2%B5/\xb5")) # Unescape characters that don't need to be escaped eq( "http://host/~bob%2525-._", normalize_url("http://host/%7Ebob%2525%2D%2E%5F"), ) eq( "http://host/~bob%2525-._", normalize_url("http://host/%7Ebob%2525%2D%2E%5F"), ) def test_url_scheme_re(self): # Test paths that may be URLs def test_one(url, scheme_and_path): """Assert that _url_scheme_re correctly matches. :param scheme_and_path: The (scheme, path) that should be matched can be None, to indicate it should not match """ (scheme_pos, separator_pos) = urlutils._find_scheme_and_separator(url) if scheme_and_path is None: self.assertIs(None, scheme_pos) self.assertIs(None, separator_pos) else: self.assertEqual(scheme_and_path[0], url[:scheme_pos]) self.assertEqual(scheme_and_path[1], url[separator_pos:]) # Local paths test_one("/path", None) test_one("C:/path", None) test_one("../path/to/foo", None) test_one("../path/to/fo\xe5", None) # Real URLS test_one("http://host/path/", ("http", "/path/")) test_one("sftp://host/path/to/foo", ("sftp", "/path/to/foo")) test_one("file:///usr/bin", ("file", "/usr/bin")) test_one("file:///C:/Windows", ("file", "/C:/Windows")) test_one("file:///C|/Windows", ("file", "/C|/Windows")) test_one("readonly+sftp://host/path/\xe5", ("readonly+sftp", "/path/\xe5")) # Weird stuff # Can't have slashes or colons in the scheme test_one("/path/to/://foo", None) test_one("scheme:stuff://foo", ("scheme", "//foo")) # Must have more than one character for scheme test_one("C://foo", None) test_one("ab://foo", ("ab", "ab://foo")) def test_dirname(self): # Test breezy.urlutils.dirname() dirname = urlutils.dirname if sys.platform == "win32": self.assertRaises(urlutils.InvalidURL, dirname, "file:///path/to/foo") self.assertEqual("file:///C|/", dirname("file:///C|/foo")) self.assertEqual("file:///C|/", dirname("file:///C|/")) else: self.assertEqual("file:///", dirname("file:///foo")) self.assertEqual("file:///", dirname("file:///")) self.assertEqual("http://host/path/to", dirname("http://host/path/to/foo")) self.assertEqual("http://host/path/to", dirname("http://host/path/to/foo/")) self.assertEqual( "http://host/path/to/foo", dirname("http://host/path/to/foo/", exclude_trailing_slash=False), ) self.assertEqual("http://host/", dirname("http://host/path")) self.assertEqual("http://host/", dirname("http://host/")) self.assertEqual("http://host", dirname("http://host")) self.assertEqual("http:///nohost", dirname("http:///nohost/path")) self.assertEqual( "random+scheme://user:pass@ahost:port/", dirname("random+scheme://user:pass@ahost:port/path"), ) self.assertEqual( "random+scheme://user:pass@ahost:port/", dirname("random+scheme://user:pass@ahost:port/path/"), ) self.assertEqual( "random+scheme://user:pass@ahost:port/", dirname("random+scheme://user:pass@ahost:port/"), ) # relative paths self.assertEqual("path/to", dirname("path/to/foo")) self.assertEqual("path/to", dirname("path/to/foo/")) self.assertEqual( "path/to/foo", dirname("path/to/foo/", exclude_trailing_slash=False) ) self.assertEqual("path/..", dirname("path/../foo")) self.assertEqual("../path", dirname("../path/foo")) def test_is_url(self): self.assertTrue(urlutils.is_url("http://foo/bar")) self.assertTrue(urlutils.is_url("bzr+ssh://foo/bar")) self.assertTrue(urlutils.is_url("lp:foo/bar")) self.assertTrue(urlutils.is_url("file:///foo/bar")) self.assertFalse(urlutils.is_url("")) self.assertFalse(urlutils.is_url("foo")) self.assertFalse(urlutils.is_url("foo/bar")) self.assertFalse(urlutils.is_url("/foo")) self.assertFalse(urlutils.is_url("/foo/bar")) self.assertFalse(urlutils.is_url("C:/")) self.assertFalse(urlutils.is_url("C:/foo")) self.assertFalse(urlutils.is_url("C:/foo/bar")) def test_join(self): def test(expected, *args): joined = urlutils.join(*args) self.assertEqual(expected, joined) # Test relative path joining test("foo", "foo") # relative fragment with nothing is preserved. test("foo/bar", "foo", "bar") test("http://foo/bar", "http://foo", "bar") test("http://foo/bar", "http://foo", ".", "bar") test("http://foo/baz", "http://foo", "bar", "../baz") test("http://foo/bar/baz", "http://foo", "bar/baz") test("http://foo/baz", "http://foo", "bar/../baz") test("http://foo/baz", "http://foo/bar/", "../baz") test("lp:foo/bar", "lp:foo", "bar") test("lp:foo/bar/baz", "lp:foo", "bar/baz") # Absolute paths test("http://foo", "http://foo") # abs url with nothing is preserved. test("http://bar", "http://foo", "http://bar") test("sftp://bzr/foo", "http://foo", "bar", "sftp://bzr/foo") test("file:///bar", "foo", "file:///bar") test("http://bar/", "http://foo", "http://bar/") test("http://bar/a", "http://foo", "http://bar/a") test("http://bar/a/", "http://foo", "http://bar/a/") test("lp:bar", "http://foo", "lp:bar") test("lp:bar", "lp:foo", "lp:bar") test("file:///stuff", "lp:foo", "file:///stuff") # From a base path test("file:///foo", "file:///", "foo") test("file:///bar/foo", "file:///bar/", "foo") test("http://host/foo", "http://host/", "foo") test("http://host/", "http://host", "") # Invalid joinings # Cannot go above root # Implicitly at root: self.assertRaises( urlutils.InvalidURLJoin, urlutils.join, "http://foo", "../baz" ) self.assertRaises(urlutils.InvalidURLJoin, urlutils.join, "http://foo", "/..") # Joining from a path explicitly under the root. self.assertRaises( urlutils.InvalidURLJoin, urlutils.join, "http://foo/a", "../../b" ) def test_joinpath(self): def test(expected, *args): joined = urlutils.joinpath(*args) self.assertEqual(expected, joined) # Test a single element test("foo", "foo") # Test relative path joining test("foo/bar", "foo", "bar") test("foo/bar", "foo", ".", "bar") test("foo/baz", "foo", "bar", "../baz") test("foo/bar/baz", "foo", "bar/baz") test("foo/baz", "foo", "bar/../baz") # Test joining to an absolute path test("/foo", "/foo") test("/foo", "/foo", ".") test("/foo/bar", "/foo", "bar") test("/", "/foo", "..") # Test joining with an absolute path test("/bar", "foo", "/bar") # Test joining to a path with a trailing slash test("foo/bar", "foo/", "bar") # Invalid joinings # Cannot go above root self.assertRaises(urlutils.InvalidURLJoin, urlutils.joinpath, "/", "../baz") self.assertRaises(urlutils.InvalidURLJoin, urlutils.joinpath, "/", "..") self.assertRaises(urlutils.InvalidURLJoin, urlutils.joinpath, "/", "/..") def test_join_segment_parameters_raw(self): join_segment_parameters_raw = urlutils.join_segment_parameters_raw self.assertEqual("/somedir/path", join_segment_parameters_raw("/somedir/path")) self.assertEqual( "/somedir/path,rawdata", join_segment_parameters_raw("/somedir/path", "rawdata"), ) self.assertRaises( urlutils.InvalidURLJoin, join_segment_parameters_raw, "/somedir/path", "rawdata1,rawdata2,rawdata3", ) self.assertEqual( "/somedir/path,bla,bar", join_segment_parameters_raw("/somedir/path", "bla", "bar"), ) self.assertEqual( "/somedir,exist=some/path,bla,bar", join_segment_parameters_raw("/somedir,exist=some/path", "bla", "bar"), ) self.assertRaises(TypeError, join_segment_parameters_raw, "/somepath", 42) def test_join_segment_parameters(self): join_segment_parameters = urlutils.join_segment_parameters self.assertEqual("/somedir/path", join_segment_parameters("/somedir/path", {})) self.assertEqual( "/somedir/path,key1=val1", join_segment_parameters("/somedir/path", {"key1": "val1"}), ) self.assertRaises( urlutils.InvalidURLJoin, join_segment_parameters, "/somedir/path", {"branch": "brr,brr,brr"}, ) self.assertRaises( urlutils.InvalidURLJoin, join_segment_parameters, "/somedir/path", {"key1=val1": "val2"}, ) self.assertEqual( "/somedir/path,key1=val1,key2=val2", join_segment_parameters("/somedir/path", {"key1": "val1", "key2": "val2"}), ) self.assertEqual( "/somedir/path,key1=val1,key2=val2", join_segment_parameters("/somedir/path,key1=val1", {"key2": "val2"}), ) self.assertEqual( "/somedir/path,key1=val2", join_segment_parameters("/somedir/path,key1=val1", {"key1": "val2"}), ) self.assertEqual( "/somedir,exist=some/path,key1=val1", join_segment_parameters("/somedir,exist=some/path", {"key1": "val1"}), ) self.assertEqual( "/,key1=val1,key2=val2", join_segment_parameters("/,key1=val1", {"key2": "val2"}), ) self.assertRaises( TypeError, join_segment_parameters, "/,key1=val1", {"foo": 42} ) def test_posix_local_path_to_url(self): to_url = urlutils._posix_local_path_to_url self.assertEqual("file:///path/to/foo", to_url("/path/to/foo")) self.assertEqual("file:///path/to/foo%2Cbar", to_url("/path/to/foo,bar")) try: result = to_url("/path/to/r\xe4ksm\xf6rg\xe5s") except UnicodeError as err: raise TestSkipped("local encoding cannot handle unicode") from err self.assertEqual("file:///path/to/r%C3%A4ksm%C3%B6rg%C3%A5s", result) self.assertIsInstance(result, str) def test_posix_local_path_from_url(self): from_url = urlutils._posix_local_path_from_url self.assertEqual("/path/to/foo", from_url("file:///path/to/foo")) self.assertEqual("/path/to/foo", from_url("file:///path/to/foo,branch=foo")) self.assertEqual( "/path/to/r\xe4ksm\xf6rg\xe5s", from_url("file:///path/to/r%C3%A4ksm%C3%B6rg%C3%A5s"), ) self.assertEqual( "/path/to/r\xe4ksm\xf6rg\xe5s", from_url("file:///path/to/r%c3%a4ksm%c3%b6rg%c3%a5s"), ) self.assertEqual( "/path/to/r\xe4ksm\xf6rg\xe5s", from_url("file://localhost/path/to/r%c3%a4ksm%c3%b6rg%c3%a5s"), ) self.assertRaises(urlutils.InvalidURL, from_url, "/path/to/foo") self.assertRaises( urlutils.InvalidURL, from_url, "file://remotehost/path/to/r%c3%a4ksm%c3%b6rg%c3%a5s", ) def test_win32_local_path_to_url(self): to_url = urlutils._win32_local_path_to_url self.assertEqual("file:///C:/path/to/foo", to_url("C:/path/to/foo")) # BOGUS: on win32, ntpath.abspath will strip trailing # whitespace, so this will always fail # Though under linux, it fakes abspath support # and thus will succeed # self.assertEqual('file:///C:/path/to/foo%20', # to_url('C:/path/to/foo ')) self.assertEqual("file:///C:/path/to/f%20oo", to_url("C:/path/to/f oo")) self.assertEqual("file:///", to_url("/")) self.assertEqual("file:///C:/path/to/foo%2Cbar", to_url("C:/path/to/foo,bar")) try: result = to_url("d:/path/to/r\xe4ksm\xf6rg\xe5s") except UnicodeError as err: raise TestSkipped("local encoding cannot handle unicode") from err self.assertEqual("file:///D:/path/to/r%C3%A4ksm%C3%B6rg%C3%A5s", result) self.assertIsInstance(result, str) def test_win32_unc_path_to_url(self): self.requireFeature(features.win32_feature) to_url = urlutils._win32_local_path_to_url self.assertEqual("file://HOST/path", to_url(r"\\HOST\path")) self.assertEqual("file://HOST/path", to_url("//HOST/path")) try: result = to_url("//HOST/path/to/r\xe4ksm\xf6rg\xe5s") except UnicodeError as err: raise TestSkipped("local encoding cannot handle unicode") from err self.assertEqual("file://HOST/path/to/r%C3%A4ksm%C3%B6rg%C3%A5s", result) # The Python 2 original asserted bytes vs unicode; in Python 3 the # result is always `str`, matching `_posix_local_path_to_url` above. self.assertIsInstance(result, str) def test_win32_local_path_from_url(self): from_url = urlutils._win32_local_path_from_url self.assertEqual("C:/path/to/foo", from_url("file:///C|/path/to/foo")) self.assertEqual( "D:/path/to/r\xe4ksm\xf6rg\xe5s", from_url("file:///d|/path/to/r%C3%A4ksm%C3%B6rg%C3%A5s"), ) self.assertEqual( "D:/path/to/r\xe4ksm\xf6rg\xe5s", from_url("file:///d:/path/to/r%c3%a4ksm%c3%b6rg%c3%a5s"), ) self.assertEqual("/", from_url("file:///")) self.assertEqual( "C:/path/to/foo", from_url("file:///C|/path/to/foo,branch=foo") ) self.assertRaises(urlutils.InvalidURL, from_url, "file:///C:") self.assertRaises(urlutils.InvalidURL, from_url, "file:///c") self.assertRaises(urlutils.InvalidURL, from_url, "/path/to/foo") # Not a valid _win32 url, no drive letter self.assertRaises(urlutils.InvalidURL, from_url, "file:///path/to/foo") def test_win32_unc_path_from_url(self): from_url = urlutils._win32_local_path_from_url self.assertEqual("//HOST/path", from_url("file://HOST/path")) self.assertEqual("//HOST/path", from_url("file://HOST/path,branch=foo")) # despite IE allows 2, 4, 5 and 6 slashes in URL to another machine # we want to use only 2 slashes # Firefox understand only 5 slashes in URL, but it's ugly self.assertRaises(urlutils.InvalidURL, from_url, "file:////HOST/path") self.assertRaises(urlutils.InvalidURL, from_url, "file://///HOST/path") self.assertRaises(urlutils.InvalidURL, from_url, "file://////HOST/path") # check for file://C:/ instead of file:///C:/ self.assertRaises(urlutils.InvalidURL, from_url, "file://C:/path") def test_win32_extract_drive_letter(self): extract = urlutils._win32_extract_drive_letter self.assertEqual(("file:///C:", "/foo"), extract("file://", "/C:/foo")) self.assertEqual(("file:///d|", "/path"), extract("file://", "/d|/path")) self.assertRaises(urlutils.InvalidURL, extract, "file://", "/path") # Root drives without slash treated as invalid, see bug #841322 self.assertEqual(("file:///C:", "/"), extract("file://", "/C:/")) self.assertRaises(urlutils.InvalidURL, extract, "file://", "/C:") # Invalid without drive separator or following forward slash self.assertRaises(urlutils.InvalidURL, extract, "file://", "/C") self.assertRaises(urlutils.InvalidURL, extract, "file://", "/C:ool") def test_split(self): # Test breezy.urlutils.split() split = urlutils.split if sys.platform == "win32": self.assertRaises(urlutils.InvalidURL, split, "file:///path/to/foo") self.assertEqual(("file:///C|/", "foo"), split("file:///C|/foo")) self.assertEqual(("file:///C:/", ""), split("file:///C:/")) else: self.assertEqual(("file:///", "foo"), split("file:///foo")) self.assertEqual(("file:///", ""), split("file:///")) self.assertEqual( ("http://host/path/to", "foo"), split("http://host/path/to/foo") ) self.assertEqual( ("http://host/path/to", "foo"), split("http://host/path/to/foo/") ) self.assertEqual( ("http://host/path/to/foo", ""), split("http://host/path/to/foo/", exclude_trailing_slash=False), ) self.assertEqual(("http://host/", "path"), split("http://host/path")) self.assertEqual(("http://host/", ""), split("http://host/")) self.assertEqual(("http://host", ""), split("http://host")) self.assertEqual(("http:///nohost", "path"), split("http:///nohost/path")) self.assertEqual( ("random+scheme://user:pass@ahost:port/", "path"), split("random+scheme://user:pass@ahost:port/path"), ) self.assertEqual( ("random+scheme://user:pass@ahost:port/", "path"), split("random+scheme://user:pass@ahost:port/path/"), ) self.assertEqual( ("random+scheme://user:pass@ahost:port/", ""), split("random+scheme://user:pass@ahost:port/"), ) # relative paths self.assertEqual(("path/to", "foo"), split("path/to/foo")) self.assertEqual(("path/to", "foo"), split("path/to/foo/")) self.assertEqual( ("path/to/foo", ""), split("path/to/foo/", exclude_trailing_slash=False) ) self.assertEqual(("path/..", "foo"), split("path/../foo")) self.assertEqual(("../path", "foo"), split("../path/foo")) def test_strip_segment_parameters(self): strip_segment_parameters = urlutils.strip_segment_parameters # Check relative references with absolute paths self.assertEqual("/some/path", strip_segment_parameters("/some/path")) self.assertEqual("/some/path", strip_segment_parameters("/some/path,tip")) self.assertEqual( "/some,dir/path", strip_segment_parameters("/some,dir/path,tip") ) self.assertEqual( "/somedir/path", strip_segment_parameters("/somedir/path,heads%2Ftip") ) self.assertEqual( "/somedir/path", strip_segment_parameters("/somedir/path,heads%2Ftip,bar") ) # Check relative references with relative paths self.assertEqual("", strip_segment_parameters(",key1=val1")) self.assertEqual("foo/", strip_segment_parameters("foo/,key1=val1")) self.assertEqual("foo", strip_segment_parameters("foo,key1=val1")) self.assertEqual( "foo/base,la=bla/other/elements", strip_segment_parameters("foo/base,la=bla/other/elements"), ) self.assertEqual( "foo/base,la=bla/other/elements", strip_segment_parameters("foo/base,la=bla/other/elements,a=b"), ) # TODO: Check full URLs as well as relative references def test_split_segment_parameters_raw(self): split_segment_parameters_raw = urlutils.split_segment_parameters_raw # Check relative references with absolute paths self.assertEqual(("/some/path", []), split_segment_parameters_raw("/some/path")) self.assertEqual( ("/some/path", ["tip"]), split_segment_parameters_raw("/some/path,tip") ) self.assertEqual( ("/some,dir/path", ["tip"]), split_segment_parameters_raw("/some,dir/path,tip"), ) self.assertEqual( ("/somedir/path", ["heads%2Ftip"]), split_segment_parameters_raw("/somedir/path,heads%2Ftip"), ) self.assertEqual( ("/somedir/path", ["heads%2Ftip", "bar"]), split_segment_parameters_raw("/somedir/path,heads%2Ftip,bar"), ) # Check relative references with relative paths self.assertEqual( ("", ["key1=val1"]), split_segment_parameters_raw(",key1=val1") ) self.assertEqual( ("foo/", ["key1=val1"]), split_segment_parameters_raw("foo/,key1=val1") ) self.assertEqual( ("foo", ["key1=val1"]), split_segment_parameters_raw("foo,key1=val1") ) self.assertEqual( ("foo/base,la=bla/other/elements", []), split_segment_parameters_raw("foo/base,la=bla/other/elements"), ) self.assertEqual( ("foo/base,la=bla/other/elements", ["a=b"]), split_segment_parameters_raw("foo/base,la=bla/other/elements,a=b"), ) # TODO: Check full URLs as well as relative references def test_split_segment_parameters(self): split_segment_parameters = urlutils.split_segment_parameters # Check relative references with absolute paths self.assertEqual(("/some/path", {}), split_segment_parameters("/some/path")) self.assertEqual( ("/some/path", {"branch": "tip"}), split_segment_parameters("/some/path,branch=tip"), ) self.assertEqual( ("/some,dir/path", {"branch": "tip"}), split_segment_parameters("/some,dir/path,branch=tip"), ) self.assertEqual( ("/somedir/path", {"ref": "heads%2Ftip"}), split_segment_parameters("/somedir/path,ref=heads%2Ftip"), ) self.assertEqual( ("/somedir/path", {"ref": "heads%2Ftip", "key1": "val1"}), split_segment_parameters("/somedir/path,ref=heads%2Ftip,key1=val1"), ) self.assertEqual( ("/somedir/path", {"ref": "heads%2F=tip"}), split_segment_parameters("/somedir/path,ref=heads%2F=tip"), ) # Check relative references with relative paths self.assertEqual(("", {"key1": "val1"}), split_segment_parameters(",key1=val1")) self.assertEqual( ("foo/", {"key1": "val1"}), split_segment_parameters("foo/,key1=val1") ) self.assertEqual( ("foo/base,key1=val1/other/elements", {}), split_segment_parameters("foo/base,key1=val1/other/elements"), ) self.assertEqual( ("foo/base,key1=val1/other/elements", {"key2": "val2"}), split_segment_parameters("foo/base,key1=val1/other/elements,key2=val2"), ) self.assertRaises( urlutils.InvalidURL, split_segment_parameters, "foo/base,key1" ) # TODO: Check full URLs as well as relative references def test_win32_strip_local_trailing_slash(self): strip = urlutils._win32_strip_local_trailing_slash self.assertEqual("file://", strip("file://")) self.assertEqual("file:///", strip("file:///")) self.assertEqual("file:///C", strip("file:///C")) self.assertEqual("file:///C:", strip("file:///C:")) self.assertEqual("file:///d|", strip("file:///d|")) self.assertEqual("file:///C:/", strip("file:///C:/")) self.assertEqual("file:///C:/a", strip("file:///C:/a/")) def test_strip_trailing_slash(self): sts = urlutils.strip_trailing_slash if sys.platform == "win32": self.assertEqual("file:///C|/", sts("file:///C|/")) self.assertEqual("file:///C:/foo", sts("file:///C:/foo")) self.assertEqual("file:///C|/foo", sts("file:///C|/foo/")) else: self.assertEqual("file:///", sts("file:///")) self.assertEqual("file:///foo", sts("file:///foo")) self.assertEqual("file:///foo", sts("file:///foo/")) self.assertEqual("http://host/", sts("http://host/")) self.assertEqual("http://host/foo", sts("http://host/foo")) self.assertEqual("http://host/foo", sts("http://host/foo/")) # No need to fail just because the slash is missing self.assertEqual("http://host", sts("http://host")) # TODO: jam 20060502 Should this raise InvalidURL? self.assertEqual("file://", sts("file://")) self.assertEqual( "random+scheme://user:pass@ahost:port/path", sts("random+scheme://user:pass@ahost:port/path"), ) self.assertEqual( "random+scheme://user:pass@ahost:port/path", sts("random+scheme://user:pass@ahost:port/path/"), ) self.assertEqual( "random+scheme://user:pass@ahost:port/", sts("random+scheme://user:pass@ahost:port/"), ) # Make sure relative paths work too self.assertEqual("path/to/foo", sts("path/to/foo")) self.assertEqual("path/to/foo", sts("path/to/foo/")) self.assertEqual("../to/foo", sts("../to/foo/")) self.assertEqual("path/../foo", sts("path/../foo/")) def test_unescape_for_display_utf8(self): # Test that URLs are converted to nice unicode strings for display def test(expected, url, encoding="utf-8"): disp_url = urlutils.unescape_for_display(url, encoding=encoding) self.assertIsInstance(disp_url, str) self.assertEqual(expected, disp_url) test("http://foo", "http://foo") if sys.platform == "win32": test("C:/foo/path", "file:///C|/foo/path") test("C:/foo/path", "file:///C:/foo/path") else: test("/foo/path", "file:///foo/path") test("http://foo/%2Fbaz", "http://foo/%2Fbaz") test("http://host/r\xe4ksm\xf6rg\xe5s", "http://host/r%C3%A4ksm%C3%B6rg%C3%A5s") # Make sure special escaped characters stay escaped test( "http://host/%3B%2F%3F%3A%40%26%3D%2B%24%2C%23", "http://host/%3B%2F%3F%3A%40%26%3D%2B%24%2C%23", ) # Can we handle sections that don't have utf-8 encoding? test( "http://host/%EE%EE%EE/r\xe4ksm\xf6rg\xe5s", "http://host/%EE%EE%EE/r%C3%A4ksm%C3%B6rg%C3%A5s", ) # Test encoding into output that can handle some characters test( "http://host/%EE%EE%EE/r\xe4ksm\xf6rg\xe5s", "http://host/%EE%EE%EE/r%C3%A4ksm%C3%B6rg%C3%A5s", encoding="iso-8859-1", ) # This one can be encoded into utf8 test( "http://host/\u062c\u0648\u062c\u0648", "http://host/%d8%ac%d9%88%d8%ac%d9%88", encoding="utf-8", ) # This can't be put into 8859-1 and so stays as escapes test( "http://host/%d8%ac%d9%88%d8%ac%d9%88", "http://host/%d8%ac%d9%88%d8%ac%d9%88", encoding="iso-8859-1", ) def test_escape(self): self.assertEqual("%25", urlutils.escape("%")) self.assertEqual("/~", urlutils.escape("/~")) self.assertEqual("/~", urlutils.escape("/~", safe="/")) self.assertEqual("%20", urlutils.escape(" ")) self.assertEqual("%C3%A5", urlutils.escape("\xe5")) self.assertEqual("%E5", urlutils.escape(b"\xe5")) self.assertIsInstance(urlutils.escape("\xe5"), str) def test_escape_tildes(self): self.assertEqual("~foo", urlutils.escape("~foo")) def test_unescape(self): self.assertEqual("%", urlutils.unescape("%25")) self.assertEqual("\xe5", urlutils.unescape("%C3%A5")) self.assertEqual("\xe5", urlutils.unescape("%C3%A5")) def test_escape_unescape(self): self.assertEqual("\xe5", urlutils.unescape(urlutils.escape("\xe5"))) self.assertEqual("%", urlutils.unescape(urlutils.escape("%"))) def test_relative_url(self): def test(expected, base, other): result = urlutils.relative_url(base, other) self.assertEqual(expected, result) test("a", "http://host/", "http://host/a") test( "http://entirely/different", "sftp://host/branch", "http://entirely/different", ) test( "../person/feature", "http://host/branch/mainline", "http://host/branch/person/feature", ) test("..", "http://host/branch", "http://host/") test("http://host2/branch", "http://host1/branch", "http://host2/branch") test(".", "http://host1/branch", "http://host1/branch") test( "../../../branch/2b", "file:///home/jelmer/foo/bar/2b", "file:///home/jelmer/branch/2b", ) test( "../../branch/2b", "sftp://host/home/jelmer/bar/2b", "sftp://host/home/jelmer/branch/2b", ) test( "../../branch/feature/%2b", "http://host/home/jelmer/bar/%2b", "http://host/home/jelmer/branch/feature/%2b", ) test( "../../branch/feature/2b", "http://host/home/jelmer/bar/2b/", "http://host/home/jelmer/branch/feature/2b", ) # relative_url should preserve a trailing slash test( "../../branch/feature/2b/", "http://host/home/jelmer/bar/2b/", "http://host/home/jelmer/branch/feature/2b/", ) test( "../../branch/feature/2b/", "http://host/home/jelmer/bar/2b", "http://host/home/jelmer/branch/feature/2b/", ) # TODO: treat http://host as http://host/ # relative_url is typically called from a branch.base or # transport.base which always ends with a / # test('a', 'http://host', 'http://host/a') test("http://host/a", "http://host", "http://host/a") # test('.', 'http://host', 'http://host/') test("http://host/", "http://host", "http://host/") # test('.', 'http://host/', 'http://host') test("http://host", "http://host/", "http://host") # On Windows file:///C:/path/to and file:///D:/other/path # should not use relative url over the non-existent '/' directory. if sys.platform == "win32": # on the same drive test("../../other/path", "file:///C:/path/to", "file:///C:/other/path") # ~next two tests is failed, i.e. urlutils.relative_url expects # ~to see normalized file URLs? # ~test('../../other/path', # ~ 'file:///C:/path/to', 'file:///c:/other/path') # ~test('../../other/path', # ~ 'file:///C:/path/to', 'file:///C|/other/path') # check UNC paths too test( "../../other/path", "file://HOST/base/path/to", "file://HOST/base/other/path", ) # on different drives test("file:///D:/other/path", "file:///C:/path/to", "file:///D:/other/path") # TODO: strictly saying in UNC path //HOST/base is full analog # of drive letter for hard disk, and this situation is also # should be exception from rules. [bialix 20071221] class TestCwdToURL(TestCaseInTempDir): """Test that local_path_to_url works based on the cwd.""" def test_dot(self): # This test will fail if getcwd is not ascii os.mkdir("mytest") os.chdir("mytest") url = urlutils.local_path_to_url(".") self.assertEndsWith(url, "/mytest") def test_non_ascii(self): try: os.mkdir("dod\xe9") except UnicodeError as err: raise TestSkipped("cannot create unicode directory") from err os.chdir("dod\xe9") # On Mac OSX this directory is actually: # u'/dode\u0301' => '/dode\xcc\x81 # but we should normalize it back to # u'/dod\xe9' => '/dod\xc3\xa9' url = urlutils.local_path_to_url(".") self.assertEndsWith(url, "/dod%C3%A9") class TestDeriveToLocation(TestCase): """Test that the mapping of FROM_LOCATION to TO_LOCATION works.""" def test_to_locations_derived_from_paths(self): derive = urlutils.derive_to_location self.assertEqual("bar", derive("bar")) self.assertEqual("bar", derive("../bar")) self.assertEqual("bar", derive("/foo/bar")) self.assertEqual("bar", derive("c:/foo/bar")) self.assertEqual("bar", derive("c:bar")) def test_to_locations_derived_from_urls(self): derive = urlutils.derive_to_location self.assertEqual("bar", derive("http://foo/bar")) self.assertEqual("bar", derive("bzr+ssh://foo/bar")) self.assertEqual("foo-bar", derive("lp:foo-bar")) class TestRebaseURL(TestCase): """Test the behavior of rebase_url.""" def test_non_relative(self): result = urlutils.rebase_url("file://foo", "file://foo", "file://foo/bar") self.assertEqual("file://foo", result) result = urlutils.rebase_url("/foo", "file://foo", "file://foo/bar") self.assertEqual("/foo", result) def test_different_ports(self): with self.assertRaises(urlutils.InvalidRebaseURLs) as cm: urlutils.rebase_url("foo", "http://bar:80", "http://bar:81") self.assertEqual( str(cm.exception), "URLs differ by more than path: 'http://bar:80' and 'http://bar:81'", ) def test_different_hosts(self): with self.assertRaises(urlutils.InvalidRebaseURLs) as cm: urlutils.rebase_url("foo", "http://bar", "http://baz") self.assertEqual( str(cm.exception), "URLs differ by more than path: 'http://bar' and 'http://baz'", ) def test_different_protocol(self): with self.assertRaises(urlutils.InvalidRebaseURLs) as cm: urlutils.rebase_url("foo", "http://bar", "ftp://bar") self.assertEqual( str(cm.exception), "URLs differ by more than path: 'http://bar' and 'ftp://bar'", ) def test_rebase_success(self): self.assertEqual( "../bar", urlutils.rebase_url("bar", "http://baz/", "http://baz/qux") ) self.assertEqual( "qux/bar", urlutils.rebase_url("bar", "http://baz/qux", "http://baz/") ) self.assertEqual( ".", urlutils.rebase_url("foo", "http://bar/", "http://bar/foo/") ) self.assertEqual( "qux/bar", urlutils.rebase_url("../bar", "http://baz/qux/foo", "http://baz/"), ) def test_determine_relative_path(self): self.assertEqual( "../../baz/bar", urlutils.determine_relative_path("/qux/quxx", "/baz/bar") ) self.assertEqual("..", urlutils.determine_relative_path("/bar/baz", "/bar")) self.assertEqual("baz", urlutils.determine_relative_path("/bar", "/bar/baz")) self.assertEqual(".", urlutils.determine_relative_path("/bar", "/bar")) class TestParseURL(TestCase): def test_parse_simple(self): parsed = urlutils.parse_url("http://example.com:80/one") self.assertEqual(("http", None, None, "example.com", 80, "/one"), parsed) def test_ipv6(self): parsed = urlutils.parse_url("http://[1:2:3::40]/one") self.assertEqual(("http", None, None, "1:2:3::40", None, "/one"), parsed) def test_ipv6_port(self): parsed = urlutils.parse_url("http://[1:2:3::40]:80/one") self.assertEqual(("http", None, None, "1:2:3::40", 80, "/one"), parsed) class TestURL(TestCase): def test_parse_simple(self): parsed = urlutils.URL.from_string("http://example.com:80/one") self.assertEqual("http", parsed.scheme) self.assertIs(None, parsed.user) self.assertIs(None, parsed.password) self.assertEqual("example.com", parsed.host) self.assertEqual(80, parsed.port) self.assertEqual("/one", parsed.path) def test_ipv6(self): parsed = urlutils.URL.from_string("http://[1:2:3::40]/one") self.assertEqual("http", parsed.scheme) self.assertIs(None, parsed.port) self.assertIs(None, parsed.user) self.assertIs(None, parsed.password) self.assertEqual("1:2:3::40", parsed.host) self.assertEqual("/one", parsed.path) def test_ipv6_port(self): parsed = urlutils.URL.from_string("http://[1:2:3::40]:80/one") self.assertEqual("http", parsed.scheme) self.assertEqual("1:2:3::40", parsed.host) self.assertIs(None, parsed.user) self.assertIs(None, parsed.password) self.assertEqual(80, parsed.port) self.assertEqual("/one", parsed.path) def test_quoted(self): parsed = urlutils.URL.from_string( "http://ro%62ey:h%40t@ex%41mple.com:2222/path" ) self.assertEqual(parsed.quoted_host, "ex%41mple.com") self.assertEqual(parsed.host, "exAmple.com") self.assertEqual(parsed.port, 2222) self.assertEqual(parsed.quoted_user, "ro%62ey") self.assertEqual(parsed.user, "robey") self.assertEqual(parsed.quoted_password, "h%40t") self.assertEqual(parsed.password, "h@t") self.assertEqual(parsed.path, "/path") def test_eq(self): parsed1 = urlutils.URL.from_string("http://[1:2:3::40]:80/one") parsed2 = urlutils.URL.from_string("http://[1:2:3::40]:80/one") self.assertEqual(parsed1, parsed2) self.assertEqual(parsed1, parsed1) parsed2.path = "/two" self.assertNotEqual(parsed1, parsed2) def test_repr(self): parsed = urlutils.URL.from_string("http://[1:2:3::40]:80/one") self.assertEqual( "", repr(parsed) ) def test_str(self): parsed = urlutils.URL.from_string("http://[1:2:3::40]:80/one") self.assertEqual("http://[1:2:3::40]:80/one", str(parsed)) def test_combine_paths(self): combine = urlutils.combine_paths self.assertEqual( "/home/sarah/project/foo", combine("/home/sarah", "project/foo") ) self.assertEqual("/etc", combine("/home/sarah", "../../etc")) self.assertEqual("/etc", combine("/home/sarah", "../../../etc")) self.assertEqual("/etc", combine("/home/sarah", "/etc")) def test_clone(self): url = urlutils.URL.from_string("http://[1:2:3::40]:80/one") url1 = url.clone("two") self.assertEqual("/one/two", url1.path) url2 = url.clone("/two") self.assertEqual("/two", url2.path) url3 = url.clone() self.assertIsNot(url, url3) self.assertEqual(url, url3) def test_parse_empty_port(self): parsed = urlutils.URL.from_string("http://example.com:/one") self.assertEqual("http", parsed.scheme) self.assertIs(None, parsed.user) self.assertIs(None, parsed.password) self.assertEqual("example.com", parsed.host) self.assertIs(None, parsed.port) self.assertEqual("/one", parsed.path) class TestFileRelpath(TestCase): # GZ 2011-11-18: A way to override all path handling functions to one # platform or another for testing would be nice. def _with_posix_paths(self): self.overrideAttr( urlutils, "local_path_from_url", urlutils._posix_local_path_from_url ) self.overrideAttr(urlutils, "MIN_ABS_FILEURL_LENGTH", len("file:///")) self.overrideAttr(osutils, "normpath", osutils._posix_normpath) self.overrideAttr(osutils, "abspath", posixpath.abspath) self.overrideAttr(osutils, "normpath", osutils._posix_normpath) self.overrideAttr(osutils, "pathjoin", posixpath.join) self.overrideAttr(osutils, "split", posixpath.split) self.overrideAttr(osutils, "MIN_ABS_PATHLENGTH", 1) def _with_win32_paths(self): self.overrideAttr( urlutils, "local_path_from_url", urlutils._win32_local_path_from_url ) self.overrideAttr( urlutils, "MIN_ABS_FILEURL_LENGTH", urlutils.WIN32_MIN_ABS_FILEURL_LENGTH ) self.overrideAttr(osutils, "abspath", osutils._win32_abspath) self.overrideAttr(osutils, "normpath", osutils._win32_normpath) self.overrideAttr(osutils, "split", ntpath.split) self.overrideAttr(osutils, "MIN_ABS_PATHLENGTH", 3) @unittest.skipIf( sys.platform == "win32", "file_relpath is now implemented in Rust and reads platform-specific " "MIN_ABS_FILEURL_LENGTH at compile time; the Python overrideAttr " "shims used here no longer affect it.", ) def test_same_url_posix(self): self._with_posix_paths() self.assertEqual("", urlutils.file_relpath("file:///a", "file:///a")) self.assertEqual("", urlutils.file_relpath("file:///a", "file:///a/")) self.assertEqual("", urlutils.file_relpath("file:///a/", "file:///a")) def test_same_url_win32(self): if sys.platform != "win32": raise TestSkipped( "broken on non-windows; _with_win32_paths no longer works for rust" ) self._with_win32_paths() self.assertEqual("", urlutils.file_relpath("file:///A:/", "file:///A:/")) self.assertEqual("", urlutils.file_relpath("file:///A|/", "file:///A:/")) self.assertEqual("", urlutils.file_relpath("file:///A:/b/", "file:///A:/b/")) self.assertEqual("", urlutils.file_relpath("file:///A:/b", "file:///A:/b/")) self.assertEqual("", urlutils.file_relpath("file:///A:/b/", "file:///A:/b")) @unittest.skipIf( sys.platform == "win32", "Rust-backed file_relpath uses platform-native MIN_ABS_FILEURL_LENGTH", ) def test_child_posix(self): self._with_posix_paths() self.assertEqual("b", urlutils.file_relpath("file:///a", "file:///a/b")) self.assertEqual("b", urlutils.file_relpath("file:///a/", "file:///a/b")) self.assertEqual("b/c", urlutils.file_relpath("file:///a", "file:///a/b/c")) def test_child_win32(self): if sys.platform != "win32": raise TestSkipped( "broken on non-windows; _with_win32_paths no longer works for rust" ) self._with_win32_paths() self.assertEqual("b", urlutils.file_relpath("file:///A:/", "file:///A:/b")) self.assertEqual("b", urlutils.file_relpath("file:///A|/", "file:///A:/b")) self.assertEqual("c", urlutils.file_relpath("file:///A:/b", "file:///A:/b/c")) self.assertEqual("c", urlutils.file_relpath("file:///A:/b/", "file:///A:/b/c")) self.assertEqual( "c/d", urlutils.file_relpath("file:///A:/b", "file:///A:/b/c/d") ) @unittest.skipIf( sys.platform == "win32", "Rust-backed file_relpath uses platform-native MIN_ABS_FILEURL_LENGTH", ) def test_sibling_posix(self): self._with_posix_paths() self.assertRaises( PathNotChild, urlutils.file_relpath, "file:///a/b", "file:///a/c" ) self.assertRaises( PathNotChild, urlutils.file_relpath, "file:///a/b/", "file:///a/c" ) self.assertRaises( PathNotChild, urlutils.file_relpath, "file:///a/b/", "file:///a/c/" ) def test_sibling_win32(self): self._with_win32_paths() self.assertRaises( PathNotChild, urlutils.file_relpath, "file:///A:/b", "file:///A:/c" ) self.assertRaises( PathNotChild, urlutils.file_relpath, "file:///A:/b/", "file:///A:/c" ) self.assertRaises( PathNotChild, urlutils.file_relpath, "file:///A:/b/", "file:///A:/c/" ) @unittest.skipIf( sys.platform == "win32", "Rust-backed file_relpath uses platform-native MIN_ABS_FILEURL_LENGTH", ) def test_parent_posix(self): self._with_posix_paths() self.assertRaises( PathNotChild, urlutils.file_relpath, "file:///a/b", "file:///a" ) self.assertRaises( PathNotChild, urlutils.file_relpath, "file:///a/b", "file:///a/" ) def test_parent_win32(self): self._with_win32_paths() self.assertRaises( PathNotChild, urlutils.file_relpath, "file:///A:/b", "file:///A:/" ) self.assertRaises( PathNotChild, urlutils.file_relpath, "file:///A:/b/c", "file:///A:/b" ) class QuoteTests(TestCase): def test_quote(self): self.assertEqual("abc%20def", urlutils.quote("abc def")) self.assertEqual("abc%2Fdef", urlutils.quote("abc/def", safe="")) self.assertEqual("abc/def", urlutils.quote("abc/def", safe="/")) def test_quote_tildes(self): self.assertEqual("~foo", urlutils.quote("~foo")) self.assertEqual("~foo", urlutils.quote("~foo", safe="/~")) def test_unquote(self): self.assertEqual("%", urlutils.unquote("%25")) self.assertEqual("\xe5", urlutils.unquote("%C3%A5")) self.assertEqual("\xe5", urlutils.unquote("\xe5")) def test_unquote_to_bytes(self): self.assertEqual(b"%", urlutils.unquote_to_bytes("%25")) self.assertEqual(b"\xc3\xa5", urlutils.unquote_to_bytes("%C3%A5")) dromedary-0.1.5/dromedary/tests/test_webdav.py000066400000000000000000000024241520150013200215030ustar00rootroot00000000000000# Copyright (C) 2026 Jelmer Vernooij # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Re-entry point for the WebDAV tests under the global test-discovery path. The Makefile runs ``unittest discover`` against ``dromedary/tests`` only, so this module uses the ``load_tests`` protocol to pull in the webdav test suite that lives in ``dromedary/webdav/tests/``. """ import unittest def load_tests(loader, basic_tests, pattern): """Delegate to the webdav test package's ``load_tests``.""" from dromedary.webdav import tests as webdav_tests suite = unittest.TestSuite() return webdav_tests.load_tests(loader, suite, pattern) dromedary-0.1.5/dromedary/tests/transport_util.py000066400000000000000000000031701520150013200222640ustar00rootroot00000000000000# Copyright (C) 2007-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Utilities for testing transport connections and hooks.""" from dromedary import Transport # SFTPTransport is the only bundled transport that properly counts connections # at the moment. from . import test_sftp_transport # type: ignore[attr-defined] class TestCaseWithConnectionHookedTransport(test_sftp_transport.TestCaseWithSFTPServer): """Test case that tracks transport connections using hooks.""" def setUp(self): """Set up the test case with connection tracking.""" super().setUp() self.reset_connections() def start_logging_connections(self): """Start logging transport connections using transport hooks.""" Transport.hooks.install_named_hook( "post_connect", self.connections.append, None ) def reset_connections(self): """Reset the connections list to start fresh tracking.""" self.connections = [] dromedary-0.1.5/dromedary/trace.py000066400000000000000000000163731520150013200171400ustar00rootroot00000000000000# Copyright (C) 2007 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Implementation of Transport that traces transport operations. This does not change the transport behaviour at all, merely records every call and then delegates it. """ import os from collections.abc import Iterator from typing import IO, TYPE_CHECKING from dromedary import Transport, decorator if TYPE_CHECKING: from dromedary import FileStream, Lock class TransportTraceDecorator(decorator.TransportDecorator): """A tracing decorator for Transports. Calls that potentially perform IO are logged to self._activity. The _activity attribute is shared as the transport is cloned, but not if a new transport is created without cloning. Not all operations are logged at this point, if you need an unlogged operation please add a test to the tests of this transport, for the logging of the operation you want logged. See also TransportLogDecorator, that records a machine-readable log in memory for eg testing. """ def __init__( self, url: str, _decorated: Transport | None = None, _from_transport: "TransportTraceDecorator | None" = None, ) -> None: """Set the 'base' path where files will be stored. _decorated is a private parameter for cloning. """ super().__init__(url, _decorated) if _from_transport is None: # newly created self._activity: list[tuple] = [] else: # cloned self._activity = _from_transport._activity def append_file(self, relpath: str, f: IO[bytes], mode: int | None = None) -> int: """See Transport.append_file().""" return self._decorated.append_file(relpath, f, mode=mode) def append_bytes(self, relpath: str, bytes: bytes, mode: int | None = None) -> int: """See Transport.append_bytes().""" return self._decorated.append_bytes(relpath, bytes, mode=mode) def delete(self, relpath: str) -> None: """See Transport.delete().""" self._activity.append(("delete", relpath)) return self._decorated.delete(relpath) def delete_tree(self, relpath: str) -> None: """See Transport.delete_tree().""" return self._decorated.delete_tree(relpath) @classmethod def _get_url_prefix(cls) -> str: """Tracing transports are identified by 'trace+'.""" return "trace+" def get(self, relpath: str) -> IO[bytes]: """See Transport.get().""" self._trace(("get", relpath)) return self._decorated.get(relpath) def has(self, relpath: str) -> bool: """See Transport.has().""" return self._decorated.has(relpath) def is_readonly(self) -> bool: """See Transport.is_readonly.""" return self._decorated.is_readonly() def mkdir(self, relpath: str, mode: int | None = None) -> None: """See Transport.mkdir().""" self._trace(("mkdir", relpath, mode)) return self._decorated.mkdir(relpath, mode) def open_write_stream(self, relpath: str, mode: int | None = None) -> "FileStream": """See Transport.open_write_stream.""" return self._decorated.open_write_stream(relpath, mode=mode) def put_file(self, relpath: str, f: IO[bytes], mode: int | None = None) -> int: """See Transport.put_file().""" return self._decorated.put_file(relpath, f, mode) def put_bytes(self, relpath: str, raw_bytes: bytes, mode: int | None = None) -> int: """See Transport.put_bytes().""" self._trace(("put_bytes", relpath, len(raw_bytes), mode)) return self._decorated.put_bytes(relpath, raw_bytes, mode) def put_bytes_non_atomic( self, relpath: str, raw_bytes: bytes, mode: int | None = None, create_parent_dir: bool = False, dir_mode: int | None = None, ) -> None: """See Transport.put_bytes_non_atomic.""" self._trace( ( "put_bytes_non_atomic", relpath, len(raw_bytes), mode, create_parent_dir, dir_mode, ) ) return self._decorated.put_bytes_non_atomic( relpath, raw_bytes, mode=mode, create_parent_dir=create_parent_dir, dir_mode=dir_mode, ) def listable(self) -> bool: """See Transport.listable.""" return self._decorated.listable() def iter_files_recursive(self) -> Iterator[str]: """See Transport.iter_files_recursive().""" return self._decorated.iter_files_recursive() def list_dir(self, relpath: str) -> list[str]: """See Transport.list_dir().""" return self._decorated.list_dir(relpath) def readv( self, relpath: str, offsets: list[tuple[int, int]], adjust_for_latency: bool = False, upper_limit: int | None = None, ) -> Iterator[tuple[int, bytes]]: """Read multiple ranges from a file.""" # we override at the readv() level rather than _readv() so that any # latency adjustments will be done by the underlying transport self._trace(("readv", relpath, offsets, adjust_for_latency, upper_limit)) return self._decorated.readv(relpath, offsets, adjust_for_latency, upper_limit) def recommended_page_size(self) -> int: """See Transport.recommended_page_size().""" return self._decorated.recommended_page_size() def rename(self, rel_from: str, rel_to: str) -> None: """See Transport.rename.""" self._activity.append(("rename", rel_from, rel_to)) return self._decorated.rename(rel_from, rel_to) def rmdir(self, relpath: str) -> None: """See Transport.rmdir.""" self._trace(("rmdir", relpath)) return self._decorated.rmdir(relpath) def stat(self, relpath: str) -> os.stat_result: """See Transport.stat().""" return self._decorated.stat(relpath) def lock_read(self, relpath: str) -> "Lock": """See Transport.lock_read.""" return self._decorated.lock_read(relpath) def lock_write(self, relpath: str) -> "Lock": """See Transport.lock_write.""" return self._decorated.lock_write(relpath) def _trace(self, operation_tuple: tuple) -> None: """Record that a transport operation occurred.""" self._activity.append(operation_tuple) def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" from dromedary.tests import test_server return [(TransportTraceDecorator, test_server.TraceServer)] dromedary-0.1.5/dromedary/unlistable.py000066400000000000000000000023051520150013200201720ustar00rootroot00000000000000# Copyright (C) 2005, 2006 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Transport implementation that disables listing to simulate HTTP cheaply.""" from dromedary._transport_rs.unlistable import UnlistableTransportDecorator __all__ = ["UnlistableTransportDecorator", "get_test_permutations"] def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" from dromedary.tests import test_server return [ (UnlistableTransportDecorator, test_server.UnlistableServer), ] dromedary-0.1.5/dromedary/urlutils.py000066400000000000000000000200531520150013200177130ustar00rootroot00000000000000# Copyright (C) 2006-2010 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """A collection of function for handling URL operations.""" import sys from urllib import parse as urlparse from dromedary.osutils import pathjoin, splitpath from . import errors class InvalidURL(errors.PathError): """Exception raised when an invalid URL is encountered. This is raised when a URL string cannot be parsed or is malformed. """ _fmt = "Invalid url supplied to transport: %(path)r%(extra)s" class InvalidURLJoin(errors.PathError): """Exception raised when a URL join operation is invalid.""" _fmt = "Invalid URL join request: %(reason)s: %(base)r + %(join_args)r" def __init__(self, reason: str, base: str, join_args: tuple[str, ...]) -> None: """Initialize with the join failure reason, base URL, and join args.""" self.reason = reason self.base = base self.join_args = join_args errors.PathError.__init__(self, base, reason) class InvalidRebaseURLs(errors.TransportError): """Exception raised when URLs cannot be rebased. This occurs when trying to rebase URLs that differ by more than just their paths. """ _fmt = "URLs differ by more than path: %(old_base)r and %(new_base)r" def __init__(self, old_base: str, new_base: str) -> None: """Initialize with the original and new base URLs.""" self.old_base = old_base self.new_base = new_base errors.TransportError.__init__(self) quote_from_bytes = urlparse.quote_from_bytes quote = urlparse.quote unquote_to_bytes = urlparse.unquote_to_bytes unquote = urlparse.unquote from ._transport_rs.urlutils import ( # noqa: F401 URL, _find_scheme_and_separator, basename, combine_paths, derive_to_location, dirname, escape, file_relpath, is_url, join, join_segment_parameters, join_segment_parameters_raw, joinpath, local_path_from_url, local_path_to_url, normalize_url, parse_url, relative_url, split, split_segment_parameters, split_segment_parameters_raw, strip_segment_parameters, strip_trailing_slash, unescape, ) from ._transport_rs.urlutils import posix as posix_rs from ._transport_rs.urlutils import win32 as win32_rs _posix_local_path_to_url = posix_rs.local_path_to_url _win32_local_path_to_url = win32_rs.local_path_to_url _win32_local_path_from_url = win32_rs.local_path_from_url _posix_local_path_from_url = posix_rs.local_path_from_url MIN_ABS_FILEURL_LENGTH = len("file:///") WIN32_MIN_ABS_FILEURL_LENGTH = len("file:///C:/") if sys.platform == "win32": MIN_ABS_FILEURL_LENGTH = WIN32_MIN_ABS_FILEURL_LENGTH _win32_extract_drive_letter = win32_rs.extract_drive_letter _win32_strip_local_trailing_slash = win32_rs.strip_local_trailing_slash # These are characters that if escaped, should stay that way _no_decode_chars = ";/?:@&=+$,#" _no_decode_ords = [ord(c) for c in _no_decode_chars] _no_decode_hex = [f"{o:02x}" for o in _no_decode_ords] + [ f"{o:02X}" for o in _no_decode_ords ] _hex_display_map = dict( [(f"{o:02x}", bytes([o])) for o in range(256)] + [(f"{o:02X}", bytes([o])) for o in range(256)] ) # These entries get mapped to themselves _hex_display_map.update((hex, b"%" + hex.encode("ascii")) for hex in _no_decode_hex) # These characters shouldn't be percent-encoded, and it's always safe to # unencode them if they are. _url_dont_escape_characters = set( "abcdefghijklmnopqrstuvwxyz" # Lowercase alpha "ABCDEFGHIJKLMNOPQRSTUVWXYZ" # Uppercase alpha "0123456789" # Numbers "-._~" # Unreserved characters ) def _unescape_segment_for_display(segment: str, encoding: str) -> str: """Unescape a segment for display. Helper for unescape_for_display Args: url: A 7-bit ASCII URL encoding: The final output encoding Returns: A unicode string which can be safely encoded into the specified encoding. """ str_chunks = segment.split("%") byte_chunks: list[bytes] = [str_chunks[0].encode("utf-8")] for j in range(1, len(str_chunks)): item = str_chunks[j] try: chunk = _hex_display_map[item[:2]] except KeyError: # Put back the percent symbol chunk = b"%" + item[:2].encode("utf-8") except UnicodeDecodeError: chunk = chr(int(item[:2], 16)).encode("utf-8") byte_chunks.append(chunk + item[2:].encode("utf-8")) unescaped = b"".join(byte_chunks) try: decoded = unescaped.decode("utf-8") except UnicodeDecodeError: # If this path segment cannot be properly utf-8 decoded # after doing unescaping we will just leave it alone return segment else: try: decoded.encode(encoding) except UnicodeEncodeError: # If this chunk cannot be encoded in the local # encoding, then we should leave it alone return segment else: # Otherwise take the url decoded one return decoded def unescape_for_display(url: str, encoding: str) -> str: """Decode what you can for a URL, so that we get a nice looking path. This will turn file:// urls into local paths, and try to decode any portions of a http:// style url that it can. Any sections of the URL which can't be represented in the encoding or need to stay as escapes are left alone. Args: url: A 7-bit ASCII URL encoding: The final output encoding Returns: A unicode string which can be safely encoded into the specified encoding. """ if encoding is None: raise ValueError("you cannot specify None for the display encoding") if url.startswith("file://"): try: path = local_path_from_url(url) str(path).encode(encoding) return path except UnicodeError: return url # Split into sections to try to decode utf-8 res = url.split("/") for i in range(1, len(res)): res[i] = _unescape_segment_for_display(res[i], encoding) return "/".join(res) def _is_absolute(url: str) -> bool: return url.startswith("/") def rebase_url(url: str, old_base: str, new_base: str) -> str: """Convert a relative path from an old base URL to a new base URL. The result will be a relative path. Absolute paths and full URLs are returned unaltered. """ scheme, _separator = _find_scheme_and_separator(url) if scheme is not None: return url if _is_absolute(url): return url old_parsed = urlparse.urlparse(old_base) new_parsed = urlparse.urlparse(new_base) if (old_parsed[:2]) != (new_parsed[:2]): raise InvalidRebaseURLs(old_base, new_base) return determine_relative_path(new_parsed[2], join(old_parsed[2], url)) def determine_relative_path(from_path: str, to_path: str) -> str: """Determine a relative path from from_path to to_path.""" from_segments = splitpath(from_path) to_segments = splitpath(to_path) count = -1 for count, (from_element, to_element) in enumerate( # noqa: B007 zip( from_segments, to_segments, strict=False, ) ): if from_element != to_element: break else: count += 1 unique_from = from_segments[count:] unique_to = to_segments[count:] segments = [".."] * len(unique_from) + unique_to if len(segments) == 0: return "." return pathjoin(*segments) dromedary-0.1.5/dromedary/version.py000066400000000000000000000040471520150013200175220ustar00rootroot00000000000000#!/usr/bin/env python3 # Copyright (C) 2024 Jelmer Vernooij # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Version information for dromedary.""" __version__ = "0.1.5" # Version information for dromedary version_info = (0, 1, 5, "final", 0) def _format_version_tuple(version_info: tuple) -> str: """Format version tuple into a version string. Args: version_info: Tuple of (major, minor, micro, release_type, sub) Returns: Formatted version string """ if len(version_info) == 2: main_version = "%d.%d" % version_info[:2] else: main_version = "%d.%d.%d" % version_info[:3] if len(version_info) <= 3: return main_version release_type = version_info[3] sub = version_info[4] if release_type == "final" and sub == 0: sub_string = "" elif release_type == "final": sub_string = "." + str(sub) elif release_type == "dev" and sub == 0: sub_string = ".dev" elif release_type == "dev": sub_string = ".dev" + str(sub) elif release_type in ("alpha", "beta"): if version_info[2] == 0: main_version = "%d.%d" % version_info[:2] sub_string = "." + release_type[0] + str(sub) elif release_type == "candidate": sub_string = ".rc" + str(sub) else: return ".".join(map(str, version_info)) return main_version + sub_string version_string = __version__ dromedary-0.1.5/dromedary/webdav/000077500000000000000000000000001520150013200167265ustar00rootroot00000000000000dromedary-0.1.5/dromedary/webdav/__init__.py000066400000000000000000000023471520150013200210450ustar00rootroot00000000000000# Copyright (C) 2006-2009, 2011, 2012 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """An http transport implementing WebDAV on top of dromedary's HTTP transport. This package exposes :class:`HttpDavTransport`, a subclass of :class:`dromedary.http.urllib.HttpTransport` that implements the subset of WebDAV (RFC 4918) needed to support writes over HTTP. The ``http+webdav://`` and ``https+webdav://`` schemes are registered in :mod:`dromedary` so the transport is available out of the box. """ from dromedary.webdav.webdav import HttpDavTransport __all__ = ["HttpDavTransport"] dromedary-0.1.5/dromedary/webdav/tests/000077500000000000000000000000001520150013200200705ustar00rootroot00000000000000dromedary-0.1.5/dromedary/webdav/tests/__init__.py000066400000000000000000000020711520150013200222010ustar00rootroot00000000000000# Copyright (C) 2008 by Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA """Tests for dromedary's WebDAV transport.""" def load_tests(loader, basic_tests, pattern): """Collect the webdav test modules into the given suite.""" testmod_names = [ "test_webdav", ] for tmn in testmod_names: basic_tests.addTest(loader.loadTestsFromName(f"{__name__}.{tmn}")) return basic_tests dromedary-0.1.5/dromedary/webdav/tests/dav_server.py000066400000000000000000000404171520150013200226100ustar00rootroot00000000000000# Copyright (C) 2008, 2009, 2011, 2013 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """DAV test server. This defines the TestingDAVRequestHandler and the DAVServer classes which implements the DAV specification parts used by the webdav plugin. """ import logging import os import re import shutil import stat import time import urllib.parse from dromedary import urlutils from dromedary.tests import http_server logger = logging.getLogger("dromedary.webdav.tests.dav_server") class TestingDAVRequestHandler(http_server.TestingHTTPRequestHandler): """Subclass of TestingHTTPRequestHandler handling DAV requests. This is not a full implementation of a DAV server, only the parts really used by the plugin are. """ _RANGE_HEADER_RE = re.compile(r"bytes (?P\d+)-(?P\d+)/(?P\d+|\*)") delete_success_code = 204 move_default_overwrite = True def date_time_string(self, timestamp=None): """Return the current date and time formatted for a message header.""" if timestamp is None: timestamp = time.time() year, month, day, hh, mm, ss, wd, _y, _z = time.gmtime(timestamp) s = "%s, %02d %3s %4d %02d:%02d:%02d GMT" % ( self.weekdayname[wd], day, self.monthname[month], year, hh, mm, ss, ) return s def _read(self, length): """Read the client socket.""" return self.rfile.read(length) def _readline(self): """Read a full line on the client socket.""" return self.rfile.readline() def read_body(self): """Read the body either by chunk or as a whole.""" content_length = self.headers.get("Content-Length") encoding = self.headers.get("Transfer-Encoding") if encoding is not None: if encoding != "chunked": raise AssertionError( "Unsupported transfer encoding: {}".format(encoding) ) body = [] # We receive the content by chunk while True: length, data = self.read_chunk() if length == 0: break body.append(data) body = "".join(body) elif content_length is not None: body = self._read(int(content_length)) else: # Neither chunked nor Content-Length: treat as a # zero-byte body. RFC 7230 §3.3.3 allows omitting # Content-Length on an empty request body, and reqwest # does exactly that for PUT/POST with no bytes. body = b"" return body def read_chunk(self): """Read a chunk of data. A chunk consists of: - a line containing the length of the data in hexa, - the data. - a empty line. An empty chunk specifies a length of zero """ length = int(self._readline(), 16) data = None if length != 0: data = self._read(length) # Eats the newline following the chunk self._readline() return length, data def send_head(self): """Specialized version of SimpleHttpServer. We *don't* want the apache behavior of permanently redirecting directories without trailing slashes to directories with trailing slashes. That's a waste and a severe penalty for clients with high latency. The installation documentation of the plugin should mention the DirectorySlash apache directive and insists on turning it *Off*. """ path = self.translate_path(self.path) f = None if os.path.isdir(path): for index in "index.html", "index.htm": index = os.path.join(path, index) if os.path.exists(index): path = index break else: return self.list_directory(path) ctype = self.guess_type(path) # Always open in binary. The inherited SimpleHTTPRequestHandler # picked text mode for `text/*` but then writes the file object # to a binary socket via `shutil.copyfileobj` — the str-to-bytes # mismatch raises TypeError on the first write. try: f = open(path, "rb") except OSError: self.send_error(404, "File not found") return None self.send_response(200) self.send_header("Content-type", ctype) fs = os.fstat(f.fileno()) self.send_header("Content-Length", str(fs[6])) self.send_header("Last-Modified", self.date_time_string(fs.st_mtime)) self.end_headers() return f def do_PUT(self): """Serve a PUT request.""" # FIXME: test_put_file_unicode makes us emit a traceback because a # UnicodeEncodeError occurs after the request headers have been sent # but before the body can be send. It's harmless and does not make the # test fails. Adressing that will mean protecting all reads from the # socket, which is too heavy for now -- vila 20070917 path = self.translate_path(self.path) logger.debug(f"do_PUT rel: [{self.path}], abs: [{path}]") do_append = False # Check the Content-Range header range_header = self.headers.get("Content-Range") if range_header is not None: match = self._RANGE_HEADER_RE.match(range_header) if match is None: # FIXME: RFC2616 says to return a 501 if we don't # understand the Content-Range header, but Apache # just ignores them (bad Apache). self.send_error(501, "Not Implemented") return begin = int(match.group("begin")) do_append = True if self.headers.get("Expect") == "100-continue": # Tell the client to go ahead, we're ready to get the content self.send_response(100, "Continue") self.end_headers() try: logger.debug(f"do_PUT will try to open: [{path}]") # Always write in binary mode. if do_append: f = open(path, "ab") f.seek(begin) else: f = open(path, "wb") except OSError as e: logger.debug(f"do_PUT got: [{e!r}] while opening/seeking on [{self.path}]") self.send_error(409, "Conflict") return try: data = self.read_body() f.write(data) except OSError: # FIXME: We leave a partially written file here self.send_error(409, "Conflict") f.close() return f.close() logger.debug(f"do_PUT done: [{self.path}]") self.send_response(201) self.end_headers() def do_MKCOL(self): """Serve a MKCOL request. MKCOL is an mkdir in DAV terminology for our part. """ path = self.translate_path(self.path) logger.debug(f"do_MKCOL rel: [{self.path}], abs: [{path}]") try: os.mkdir(path) except FileNotFoundError: self.send_error(409, "Conflict") except (FileExistsError, NotADirectoryError): self.send_error(405, "Not allowed") else: self.send_response(201) self.end_headers() def do_COPY(self): """Serve a COPY request.""" url_to = self.headers.get("Destination") if url_to is None: self.send_error(400, "Destination header missing") return (_scheme, _netloc, rel_to, _params, _query, _fragment) = urllib.parse.urlparse( url_to ) logger.debug(f"urlparse: ({url_to}) [{rel_to}]") logger.debug(f"do_COPY rel_from: [{self.path}], rel_to: [{rel_to}]") abs_from = self.translate_path(self.path) abs_to = self.translate_path(rel_to) try: # TODO: Check that rel_from exists and rel_to does # not. In the mean time, just go along and trap # exceptions shutil.copyfile(abs_from, abs_to) except FileNotFoundError: self.send_error(404, "File not found") except OSError: self.send_error(409, "Conflict") else: # TODO: We may be able to return 204 "No content" if # rel_to was existing (even if the "No content" part # seems misleading, RFC2518 says so, stop arguing :) self.send_response(201) self.end_headers() def do_DELETE(self): """Serve a DELETE request. We don't implement a true DELETE as DAV defines it because we *should* fail to delete a non empty dir. """ path = self.translate_path(self.path) logger.debug(f"do_DELETE rel: [{self.path}], abs: [{path}]") try: # DAV makes no distinction between files and dirs # when required to nuke them, but we have to. And we # also watch out for symlinks. Per RFC 4918 §9.6, DELETE # on a collection removes the collection *with* all its # members — so use shutil.rmtree for directories rather # than os.rmdir, which would fail on any non-empty # subtree. real_path = os.path.realpath(path) if os.path.isdir(real_path): import shutil shutil.rmtree(path) else: os.remove(path) except FileNotFoundError: self.send_error(404, "File not found") else: self.send_response(self.delete_success_code) self.end_headers() def do_MOVE(self): """Serve a MOVE request.""" url_to = self.headers.get("Destination") if url_to is None: self.send_error(400, "Destination header missing") return overwrite_header = self.headers.get("Overwrite") should_overwrite = self.move_default_overwrite if overwrite_header == "F": should_overwrite = False elif overwrite_header == "T": should_overwrite = True (_scheme, _netloc, rel_to, _params, _query, _fragment) = urllib.parse.urlparse( url_to ) logger.debug(f"urlparse: ({url_to}) [{rel_to}]") logger.debug(f"do_MOVE rel_from: [{self.path}], rel_to: [{rel_to}]") abs_from = self.translate_path(self.path) abs_to = self.translate_path(rel_to) if not should_overwrite and os.access(abs_to, os.F_OK): self.send_error(412, "Precondition Failed") return try: os.rename(abs_from, abs_to) except FileNotFoundError: self.send_error(404, "File not found") except OSError: self.send_error(409, "Conflict") else: # TODO: We may be able to return 204 "No content" if # rel_to was existing (even if the "No content" part # seems misleading, RFC2518 says so, stop arguing :) self.send_response(201) self.end_headers() def _generate_response(self, path): local_path = self.translate_path(path) st = os.stat(local_path) prop = {} def _prop(ns, name, value=None): if value is None: return f"<{ns}:{name}/>" else: return f"<{ns}:{name}>{value}" # For namespaces (and test purposes), where apache2 use: # - lp1, we use liveprop, # - lp2, we use bzr if stat.S_ISDIR(st.st_mode): dpath = path if not dpath.endswith("/"): dpath += "/" prop["href"] = _prop("D", "href", dpath) prop["type"] = _prop("liveprop", "resourcetype", "") prop["length"] = "" prop["exec"] = "" else: # FIXME: assert S_ISREG ? Handle symlinks ? prop["href"] = _prop("D", "href", path) prop["type"] = _prop("liveprop", "resourcetype") prop["length"] = _prop("liveprop", "getcontentlength", st.st_size) is_exec = "T" if st.st_mode & stat.S_IXUSR else "F" prop["exec"] = _prop("bzr", "executable", is_exec) prop["status"] = _prop("D", "status", "HTTP/1.1 200 OK") response = f""" {prop["href"]} {prop["type"]} {prop["length"]} {prop["exec"]} {prop["status"]} """ return response, st def _generate_dir_responses(self, path, depth): local_path = self.translate_path(path) entries = os.listdir(local_path) for entry in entries: entry_path = urlutils.escape(entry) if path.endswith("/"): entry_path = path + entry_path else: entry_path = path + "/" + entry_path response, st = self._generate_response(entry_path) yield response if depth == "Infinity" and stat.S_ISDIR(st.st_mode): yield from self._generate_dir_responses(entry_path, depth) def do_PROPFIND(self): """Serve a PROPFIND request.""" depth = self.headers.get("Depth") if depth is None: depth = "Infinity" if depth not in ("0", "1", "Infinity"): self.send_error(400, "Bad Depth") return # Don't bother parsing the body, we handle only allprop anyway. # FIXME: Handle the body :) self.read_body() try: response, st = self._generate_response(self.path) except FileNotFoundError: self.send_error(404) return if depth in ("1", "Infinity") and stat.S_ISDIR(st.st_mode): dir_responses = self._generate_dir_responses(self.path, depth) else: dir_responses = [] # Generate the response, we don't care about performance, so we just # expand everything into a big string. response = f""" {response}{"".join(dir_responses)} """.encode() self.send_response(207) self.send_header("Content-length", len(response)) self.end_headers() self.wfile.write(response) class DAVServer(http_server.HttpServer): """Subclass of HttpServer that gives http+webdav urls. This is for use in testing: connections to this server will always go through _urllib where possible. """ def __init__(self): """Initialize the DAV server.""" # We have special requests to handle that # HttpServer_urllib doesn't know about super().__init__(TestingDAVRequestHandler) # urls returned by this server should require the webdav client impl _url_protocol = "http+webdav" class QuirkyTestingDAVRequestHandler(TestingDAVRequestHandler): """Various quirky/slightly off-spec behaviors. Used to test how gracefully we handle them. """ delete_success_code = 200 move_default_overwrite = False class QuirkyDAVServer(http_server.HttpServer): """DAVServer implementing various quirky/slightly off-spec behaviors. Used to test how gracefully we handle them. """ def __init__(self): """Initialize the quirky DAV server.""" # We have special requests to handle that # HttpServer_urllib doesn't know about super().__init__(QuirkyTestingDAVRequestHandler) # urls returned by this server should require the webdav client impl _url_protocol = "http+webdav" dromedary-0.1.5/dromedary/webdav/tests/test_webdav.py000066400000000000000000000067251520150013200227630ustar00rootroot00000000000000# Copyright (C) 2006, 2007, 2008, 2013 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """Tests for the webdav transport. PROPFIND response parsing is covered in Rust by the unit tests in ``src/webdav/xml.rs``; this file only holds the few scenarios that exercise the transport end-to-end against a canned HTTP server. """ from http.client import parse_headers from dromedary import errors as transport_errors from dromedary import tests from dromedary.tests import http_server from dromedary.webdav import webdav class CannedRequestHandler(http_server.TestingHTTPRequestHandler): """An HTTP handler that replies with a canned response for each request. We assume that the incoming request is fully readable (we don't check what request it is, we just read until an empty line). """ def _handle_one_request(self): # The communication between the client and the server is achieved # through the server defined in the client test case. tcs = self.server.test_case_server requestline = self.rfile.readline() # Read headers parse_headers(self.rfile) if requestline.startswith(b"POST"): # The body should be a single line (or we don't know where it ends # and we don't want to issue a blocking read) self.rfile.readline() self.wfile.write(tcs.canned_response) class HatterHttpServer(http_server.HttpServer): """A server giving all sort of crazy responses (like Alice's Hatter). This is used to test various error cases in the webdav client. """ def __init__(self): super().__init__(CannedRequestHandler, protocol_version="HTTP/1.1") self.canned_response = None class TestDAVErrors(tests.TestCase): def setUp(self): super().setUp() self._transport = webdav.HttpDavTransport self.server = HatterHttpServer() self.server.start_server() self.addCleanup(self.server.stop_server) def get_transport(self): t = self._transport(self.server.get_url()) return t def test_delete_replies_202(self): """A bogus return code for delete raises an error.""" # Note: this response must be well-formed (blank line after # headers, Content-Length, Connection: close) — the Rust HTTP # client is strict about framing, unlike the old urllib.py # which silently tolerated truncated responses. The test # still exercises the code path we care about: a 202 reply # to DELETE (which WebDAV treats as unexpected). self.server.canned_response = b"""HTTP/1.1 202 OK\r Date: Tue, 10 Aug 2013 14:38:56 GMT\r Server: Apache/42 (Wonderland)\r Content-Length: 0\r Connection: close\r \r """ t = self.get_transport() self.assertRaises(transport_errors.InvalidHttpResponse, t.delete, "whatever") dromedary-0.1.5/dromedary/webdav/webdav.py000066400000000000000000000223421520150013200205530ustar00rootroot00000000000000# Copyright (C) 2006-2009, 2011, 2012, 2013 Canonical Ltd # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """WebDAV transport: thin Python subclass over the Rust HttpDavTransport. The Rust class (``dromedary._transport_rs.webdav.HttpDavTransport``) owns the HTTP verbs, the PROPFIND XML parser, the atomic-put dance, and both append strategies. This module layers the small Python-only bits: * the ``__init__`` handshake that resolves ``ssl_ca_certs`` / ``ssl_cert_reqs`` from ``dromedary.http`` module hooks and grafts the source transport's HttpClient onto clones (same pattern as ``dromedary.http.urllib.HttpTransport``) * Python-flavoured wrappers that adapt the Rust pymethod signatures to the Transport-protocol ones dromedary expects (``put_file`` reading a file-like, ``get`` returning a BytesIO, etc.) * the ``get_test_permutations`` entry point used by ``dromedary/tests/per_transport.py`` """ import sys from io import BytesIO from typing import IO, TYPE_CHECKING, Literal from dromedary.http import urllib if sys.version_info >= (3, 11): from typing import Self else: from typing_extensions import Self if TYPE_CHECKING: from dromedary import AppendBasedFileStream from .._transport_rs import webdav as _webdav_rs class HttpDavTransport(_webdav_rs.HttpDavTransport): """HTTP(S) transport with WebDAV write verbs.""" def __new__( cls, base: str, _from_transport: "HttpDavTransport | None" = None, ca_certs: str | None = None, ) -> Self: """Build the Rust transport. Mirrors :meth:`dromedary.http.urllib.HttpTransport.__new__`: TLS-related knobs are deferred to ``__init__`` so breezy subclasses that add ``ca_certs`` to their own ``__init__`` and call ``super().__init__(..., ca_certs=...)`` see it applied. ``__new__`` only does the bare-minimum base-URL setup; ``__init__`` rebuilds the underlying client with the right TLS config when it finally arrives. When ``_from_transport`` is supplied we construct a fresh Rust instance at ``base`` and then graft the source's HttpClient / auth cache / range hint onto it via ``_clone_from``. """ self = super().__new__( cls, base, ca_certs=None, disable_verification=False, user_agent=urllib._default_user_agent(), ) if _from_transport is not None: # Compute the offset so the grafted state targets the # right base URL, then swap in the shared state. offset = urllib._offset_from_base(_from_transport.base, base) self._clone_from(_from_transport, offset) return self def __init__( self, base: str, _from_transport: "HttpDavTransport | None" = None, ca_certs: str | None = None, ) -> None: """Initialise Python-side state and TLS-configured inner client. Rust ``__new__`` populated the base-URL state with a minimal default client. TLS knobs take effect here so subclasses that override ``__init__`` and call ``super().__init__(..., ca_certs=...)`` pick up correctly — see :meth:`dromedary.http.urllib.HttpTransport.__init__`. """ self._medium = None if _from_transport is None: import ssl as _ssl import dromedary.http as _mod_http if ca_certs is None: configured = _mod_http.ssl_ca_certs() if configured: ca_certs = configured disable_verification = _mod_http.ssl_cert_reqs() == _ssl.CERT_NONE if disable_verification: ca_certs = None fresh = _webdav_rs.HttpDavTransport( base, ca_certs=ca_certs, disable_verification=disable_verification, user_agent=urllib._default_user_agent(), ) # ``offset=None`` lets ``_clone_from`` share # ``fresh``'s inner directly, preserving raw_base and # segment parameters that ``clone_concrete(None)`` would # otherwise strip. self._clone_from(fresh, None) # Wire an activity callback into the Rust transport so # internal get/has/post/readv calls feed breezy's progress # UI too, not just the explicit ``.request()`` path. Same # pattern as ``HttpTransport.__init__``. import weakref wself = weakref.ref(self) def _forward(byte_count: int, direction: Literal["read", "write"]) -> None: t = wself() if t is None: return t._report_activity(byte_count, direction) self._set_activity_callback(_forward) def clone(self, offset: str | None = None) -> "HttpDavTransport": """Return a new transport sharing this transport's HttpClient. Uses ``urlutils.URL.clone`` path-combine semantics rather than ``abspath`` URL-join semantics — see :meth:`dromedary.http.urllib.HttpTransport.clone`. """ if offset is None: new_base = self.base else: from dromedary._transport_rs.urlutils import URL new_base = str(URL.from_string(self.base).clone(offset)) return type(self)(new_base, _from_transport=self) def _report_activity( self, byte_count: int, direction: Literal["read", "write"] ) -> None: """Feed byte-count progress into dromedary's UI hook.""" from dromedary import _ui _ui.report_transport_activity(self, byte_count, direction) def is_readonly(self) -> bool: """WebDAV supports writes.""" return False def listable(self) -> bool: """WebDAV exposes directory listings via PROPFIND.""" return True # ------------------------------------------------------------------ # Transport-protocol adapters. The Rust pyclass exposes bytes-in / # bytes-out APIs; the Python Transport contract wants file-likes # for get / put_file and int offsets for append_file. def get(self, relpath: str) -> IO[bytes]: """Return a file-like of ``relpath``'s contents.""" return BytesIO(self._get_bytes(relpath)) def get_bytes(self, relpath: str) -> bytes: """Return ``relpath``'s contents as bytes.""" return self._get_bytes(relpath) def put_file(self, relpath: str, f: IO[bytes], mode: int | None = None) -> int: """Store the contents of `f` at `relpath`. Returns the length.""" data = f.read() self.put_bytes(relpath, data) return len(data) def put_file_non_atomic( self, relpath: str, f: IO[bytes], mode: int | None = None, create_parent_dir: bool | None = False, dir_mode: int | None = None, ) -> None: """Non-atomic version of put_file (skips the temp-file dance).""" self.put_bytes_non_atomic( relpath, f.read(), create_parent_dir=create_parent_dir ) def append_file(self, relpath: str, f: IO[bytes], mode: int | None = None) -> int: """Append `f.read()` to `relpath`. Returns the old length.""" return self.append_bytes(relpath, f.read()) def open_write_stream( self, relpath: str, mode: int | None = None ) -> "AppendBasedFileStream": """Open a writable stream at ``relpath``. WebDAV has no native append/stream verbs, so we back the stream with append-based writes: the Transport protocol's ``AppendBasedFileStream`` sends one PUT per ``write()``, each concatenating the new bytes onto the server-side file. Inefficient for large files but correct; bzr only uses open_write_stream for small status/lock files. We start by PUT'ing an empty body so the file exists when ``open_write_stream`` returns (the Transport contract requires it — breezy's ``test_opening_a_file_stream_creates_ file`` exercises exactly that shape). ``FileStream.close`` looks the stream up in the module-level ``_file_streams`` registry, so we insert it there too. """ from dromedary import AppendBasedFileStream, _file_streams self.put_bytes(relpath, b"") handle = AppendBasedFileStream(self, relpath) _file_streams[self.abspath(relpath)] = handle return handle def get_test_permutations() -> list[tuple[type, type]]: """Return the permutations to be used in testing.""" from .tests import dav_server return [ (HttpDavTransport, dav_server.DAVServer), (HttpDavTransport, dav_server.QuirkyDAVServer), ] dromedary-0.1.5/pyproject.toml000066400000000000000000000057521520150013200164150ustar00rootroot00000000000000[build-system] requires = [ "setuptools>=60", "setuptools-rust", ] build-backend = "setuptools.build_meta" [project] name = "dromedary" maintainers = [{name = "Breezy Developers", email = "team@breezy-vcs.org"}] description = "Transport abstraction layer split out from Breezy" readme = "README.md" license = "GPL-2.0-or-later" classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Developers", "Operating System :: OS Independent", "Operating System :: POSIX", "Programming Language :: Python", "Programming Language :: Rust", "Topic :: Software Development :: Version Control", "Topic :: System :: Filesystems", ] requires-python = ">=3.10,<3.15" dependencies = [ "catalogus", "typing_extensions; python_version < '3.11'", ] dynamic = ["version"] [project.urls] Homepage = "https://www.breezy-vcs.org/" Repository = "https://github.com/breezy-team/dromedary" [project.optional-dependencies] paramiko = ["paramiko>=1.6.2"] dev = [ "ruff==0.15.12", "types-paramiko", ] testing = [ "testtools", "testscenarios", "python-subunit", ] [tool.setuptools] zip-safe = false include-package-data = false [tool.setuptools.packages.find] include = ["dromedary", "dromedary.*"] namespaces = false [tool.setuptools.package-data] dromedary = [ "tests/ssl_certs/ca.crt", "tests/ssl_certs/server_without_pass.key", "tests/ssl_certs/server_with_pass.key", "tests/ssl_certs/server.crt", "py.typed", ] [tool.setuptools.dynamic] version = {attr = "dromedary.version.__version__"} [tool.mypy] ignore_missing_imports = true [tool.ruff] extend-exclude = ["lib", "bin"] [tool.ruff.lint] select = [ "ANN", # annotations "D", # pydocstyle "E", # pycodestyle "F", # pyflakes "N", # naming "B", # bugbear "I", # isort "S", # bandit "TCH", # typecheck "INT", # gettext "SIM", # simplify "C4", # comprehensions "UP", # pyupgrade "RUF", # ruf-specific ] ignore = [ "ANN001", "ANN002", "ANN003", "ANN201", "ANN202", "ANN204", "ANN205", "ANN206", "D205", "D417", "F821", "E501", "D402", "E402", "E741", "F405", "N801", "N802", "N804", "N806", "N818", "N999", "S602", "S603", "S105", "S106", "S110", "S317", "D102", "D107", "D104", "S101", "RUF012", "RUF005", "RUF015", "SIM102", "SIM105", "SIM108", "SIM114", "SIM115", "SIM118", "UP031", "UP032", ] unfixable = ["ANN204"] [tool.ruff.lint.extend-per-file-ignores] "dromedary/tests/**/*.py" = ["D100", "D101", "D102", "D103", "D104", "D105", "D106", "D107"] "dromedary/**/test_*.py" = ["D100", "D101", "D102", "D103", "D104", "D105", "D106", "D107"] [tool.ruff.lint.pydocstyle] convention = "google" [tool.cibuildwheel.linux] skip = "*-musllinux_*" archs = ["auto", "aarch64"] [tool.cibuildwheel.macos] archs = ["x86_64", "arm64"] [tool.cibuildwheel.windows] skip = "*-win32" dromedary-0.1.5/setup.py000077500000000000000000000014101520150013200152010ustar00rootroot00000000000000#! /usr/bin/env python3 """Installation script for dromedary.""" import sys try: import setuptools # noqa: F401 except ModuleNotFoundError as e: sys.stderr.write(f"[ERROR] Please install setuptools ({e})\n") sys.exit(1) try: from setuptools_rust import Binding, RustExtension except ModuleNotFoundError as e: sys.stderr.write(f"[ERROR] Please install setuptools_rust ({e})\n") sys.exit(1) import os from setuptools import setup rust_features = [] if os.environ.get("DROMEDARY_GIO"): rust_features.append("gio") rust_extensions = [ RustExtension( "dromedary._transport_rs", "_transport_rs/Cargo.toml", binding=Binding.PyO3, features=rust_features, ), ] setup( rust_extensions=rust_extensions, ) dromedary-0.1.5/src/000077500000000000000000000000001520150013200142575ustar00rootroot00000000000000dromedary-0.1.5/src/brokenrename.rs000066400000000000000000000070661520150013200173060ustar00rootroot00000000000000//! BrokenRename Transport decorator, ported from dromedary/brokenrename.py. //! //! A transport that fails to detect clashing renames: if the destination //! exists, the rename is silently absorbed rather than raising an error. use crate::{Result, Transport, UrlFragment}; use url::Url; pub struct BrokenRenameTransport { inner: Box, } impl BrokenRenameTransport { pub const PREFIX: &'static str = "brokenrename+"; pub fn new(inner: Box) -> Self { Self { inner } } } impl std::fmt::Debug for BrokenRenameTransport { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "BrokenRenameTransport({})", self.base()) } } impl Transport for BrokenRenameTransport { crate::fwd_external_url!(inner); crate::fwd_can_roundtrip_unix_modebits!(inner); crate::fwd_is_readonly!(inner); crate::fwd_listable!(inner); crate::fwd_get!(inner); crate::fwd_has!(inner); crate::fwd_stat!(inner); crate::fwd_decorator_url!(inner, BrokenRenameTransport); crate::fwd_put_file!(inner); crate::fwd_put_bytes!(inner); crate::fwd_put_file_non_atomic!(inner); crate::fwd_put_bytes_non_atomic!(inner); crate::fwd_mkdir!(inner); crate::fwd_delete!(inner); crate::fwd_rmdir!(inner); crate::fwd_set_segment_parameter!(inner); crate::fwd_get_segment_parameters!(inner); crate::fwd_append_file!(inner); crate::fwd_readlink!(inner); crate::fwd_hardlink!(inner); crate::fwd_symlink!(inner); crate::fwd_iter_files_recursive!(inner); crate::fwd_open_write_stream!(inner); crate::fwd_delete_tree!(inner); crate::fwd_move!(inner); crate::fwd_list_dir!(inner); crate::fwd_lock_read!(inner); crate::fwd_lock_write!(inner); crate::fwd_local_abspath!(inner); crate::fwd_copy!(inner); fn base(&self) -> Url { crate::decorator::prefixed_base(Self::PREFIX, self.inner.as_ref()) } fn rename(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { match self.inner.rename(rel_from, rel_to) { Ok(()) => Ok(()), // Absorb file-exists clashes silently — that's the whole point. // Directory-not-empty clashes still propagate: the decorator // simulates a transport that misses file rename conflicts, not // one that also silently succeeds when renaming over populated // directories. Err(crate::Error::FileExists(_)) => Ok(()), Err(e) => Err(e), } } } #[cfg(test)] mod tests { use super::*; use crate::memory::MemoryTransport; fn wrap() -> BrokenRenameTransport { let mem = MemoryTransport::new("memory:///").unwrap(); mem.put_bytes("a", b"A", None).unwrap(); mem.put_bytes("b", b"B", None).unwrap(); BrokenRenameTransport::new(Box::new(mem)) } #[test] fn base_prefix() { assert!(wrap().base().as_str().starts_with("brokenrename+")); } #[test] fn ok_rename_still_works() { let t = wrap(); t.rename("a", "c").unwrap(); assert_eq!(t.has("a").unwrap(), false); assert_eq!(t.get_bytes("c").unwrap(), b"A"); } #[test] fn clashing_rename_is_absorbed() { let t = wrap(); // Renaming a over existing b would normally raise FileExists. t.rename("a", "b").unwrap(); // Both files should still exist because the rename was absorbed. assert_eq!(t.get_bytes("a").unwrap(), b"A"); assert_eq!(t.get_bytes("b").unwrap(), b"B"); } } dromedary-0.1.5/src/chroot.rs000066400000000000000000000051311520150013200161230ustar00rootroot00000000000000//! Chroot Transport, ported from dromedary/chroot.py. //! //! A chroot is a [`PathFilteringTransport`](crate::pathfilter::PathFilteringTransport) //! with no user filter function: the server-root rebase that `pathfilter` //! performs is enough to prevent `..` sequences from escaping the backing //! transport's root. use crate::pathfilter::PathFilteringTransport; use crate::{Result, Transport}; /// Construct a chroot transport wrapping `backing`, exposed under `scheme` /// (e.g. "chroot-42:///") with `base_path` (must start with `/`) as the /// chroot root within the backing transport. pub fn new_chroot( backing: Box, scheme: impl Into, base_path: impl Into, ) -> Result { PathFilteringTransport::new(backing, scheme, base_path, None) } #[cfg(test)] mod tests { use super::*; use crate::memory::MemoryTransport; use crate::Error; fn chroot_at() -> PathFilteringTransport { // Mirror the Python setup: backing is already rooted inside the jail, // so escapes via `..` resolve to paths that don't exist on the // backing transport. let mem = MemoryTransport::new("memory:///").unwrap(); mem.mkdir("jail", None).unwrap(); mem.put_bytes("jail/inside", b"ok", None).unwrap(); mem.put_bytes("outside", b"secret", None).unwrap(); let jail = mem.clone(Some("jail")).unwrap(); new_chroot(jail, "chroot-1:///", "/").unwrap() } #[test] fn reads_inside_jail() { let t = chroot_at(); assert_eq!(t.get_bytes("inside").unwrap(), b"ok"); } #[test] fn dotdot_cannot_escape_chroot() { let t = chroot_at(); match t.get_bytes("../outside") { Err(Error::NoSuchFile(_)) => {} other => panic!("expected NoSuchFile, got {:?}", other), } } #[test] fn deeper_dotdot_cannot_escape_chroot() { let t = chroot_at(); match t.get_bytes("../../outside") { Err(Error::NoSuchFile(_)) => {} other => panic!("expected NoSuchFile, got {:?}", other), } } #[test] fn absolute_path_cannot_escape_chroot() { let t = chroot_at(); match t.get_bytes("/outside") { Err(Error::NoSuchFile(_)) => {} other => panic!("expected NoSuchFile, got {:?}", other), } } #[test] fn mkdir_and_delete_round_trip() { let t = chroot_at(); t.mkdir("new", None).unwrap(); t.put_bytes("new/f", b"x", None).unwrap(); assert_eq!(t.get_bytes("new/f").unwrap(), b"x"); } } dromedary-0.1.5/src/decorator.rs000066400000000000000000000354531520150013200166210ustar00rootroot00000000000000//! Shared forwarding helpers for Transport decorators, matching //! dromedary/decorator.py. //! //! `forward_transport_all!($field)` emits a default forwarding impl for //! every Transport method. Each individual forwarder is also exposed as //! `fwd_!` so decorators that want to override a few methods can //! invoke only the forwarders they need instead of relying on skip //! lists. /// Build a decorator base URL by prefixing the inner transport's base. /// `prefix` should include the trailing `+`, matching Python's /// `_get_url_prefix()` convention (e.g. "fakenfs+"). pub fn prefixed_base(prefix: &str, inner: &dyn crate::Transport) -> ::url::Url { let inner_base = inner.base(); let url = format!("{}{}", prefix, inner_base); ::url::Url::parse(&url).unwrap_or(inner_base) } /// Compute the decorator-level abspath: inner's abspath with `prefix` /// prepended. Mirrors Python's `TransportDecorator.abspath`. pub fn prefixed_abspath( prefix: &str, inner: &dyn crate::Transport, relpath: &crate::UrlFragment, ) -> crate::Result<::url::Url> { let inner_abs = inner.abspath(relpath)?; let prefixed = format!("{}{}", prefix, inner_abs); ::url::Url::parse(&prefixed).map_err(crate::Error::from) } /// Compute the decorator-level relpath: strip `prefix` from `abspath` if /// present, then delegate to the inner transport. The Python base class's /// default `relpath` implementation works directly against `self.base` /// (which already has the prefix), so this matches its observable behaviour /// while keeping the inner transport ignorant of the decoration. pub fn stripped_relpath( prefix: &str, inner: &dyn crate::Transport, abspath: &::url::Url, ) -> crate::Result { let as_str = abspath.as_str(); let stripped = as_str.strip_prefix(prefix).unwrap_or(as_str); let stripped_url = ::url::Url::parse(stripped).map_err(crate::Error::from)?; inner.relpath(&stripped_url) } #[macro_export] macro_rules! fwd_external_url { ($field:ident) => { fn external_url(&self) -> $crate::Result<::url::Url> { self.$field.external_url() } }; } #[macro_export] macro_rules! fwd_can_roundtrip_unix_modebits { ($field:ident) => { fn can_roundtrip_unix_modebits(&self) -> bool { self.$field.can_roundtrip_unix_modebits() } }; } #[macro_export] macro_rules! fwd_is_readonly { ($field:ident) => { fn is_readonly(&self) -> bool { self.$field.is_readonly() } }; } #[macro_export] macro_rules! fwd_listable { ($field:ident) => { fn listable(&self) -> bool { self.$field.listable() } }; } #[macro_export] macro_rules! fwd_get { ($field:ident) => { fn get( &self, relpath: &$crate::UrlFragment, ) -> $crate::Result> { self.$field.get(relpath) } }; } #[macro_export] macro_rules! fwd_has { ($field:ident) => { fn has(&self, relpath: &$crate::UrlFragment) -> $crate::Result { self.$field.has(relpath) } }; } #[macro_export] macro_rules! fwd_stat { ($field:ident) => { fn stat(&self, relpath: &$crate::UrlFragment) -> $crate::Result<$crate::Stat> { self.$field.stat(relpath) } }; } #[macro_export] macro_rules! fwd_clone { ($field:ident) => { fn clone( &self, offset: Option<&$crate::UrlFragment>, ) -> $crate::Result> { self.$field.clone(offset) } }; } #[macro_export] macro_rules! fwd_abspath { ($field:ident) => { fn abspath(&self, relpath: &$crate::UrlFragment) -> $crate::Result<::url::Url> { self.$field.abspath(relpath) } }; } #[macro_export] macro_rules! fwd_relpath { ($field:ident) => { fn relpath(&self, abspath: &::url::Url) -> $crate::Result { self.$field.relpath(abspath) } }; } #[macro_export] macro_rules! fwd_put_file { ($field:ident) => { fn put_file( &self, relpath: &$crate::UrlFragment, f: &mut dyn ::std::io::Read, permissions: Option<::std::fs::Permissions>, ) -> $crate::Result { self.$field.put_file(relpath, f, permissions) } }; } #[macro_export] macro_rules! fwd_put_bytes { ($field:ident) => { fn put_bytes( &self, relpath: &$crate::UrlFragment, data: &[u8], permissions: Option<::std::fs::Permissions>, ) -> $crate::Result<()> { self.$field.put_bytes(relpath, data, permissions) } }; } #[macro_export] macro_rules! fwd_put_file_non_atomic { ($field:ident) => { fn put_file_non_atomic( &self, relpath: &$crate::UrlFragment, f: &mut dyn ::std::io::Read, permissions: Option<::std::fs::Permissions>, create_parent_dir: Option, dir_permissions: Option<::std::fs::Permissions>, ) -> $crate::Result<()> { self.$field.put_file_non_atomic( relpath, f, permissions, create_parent_dir, dir_permissions, ) } }; } #[macro_export] macro_rules! fwd_put_bytes_non_atomic { ($field:ident) => { fn put_bytes_non_atomic( &self, relpath: &$crate::UrlFragment, data: &[u8], permissions: Option<::std::fs::Permissions>, create_parent_dir: Option, dir_permissions: Option<::std::fs::Permissions>, ) -> $crate::Result<()> { self.$field.put_bytes_non_atomic( relpath, data, permissions, create_parent_dir, dir_permissions, ) } }; } #[macro_export] macro_rules! fwd_mkdir { ($field:ident) => { fn mkdir( &self, relpath: &$crate::UrlFragment, permissions: Option<::std::fs::Permissions>, ) -> $crate::Result<()> { self.$field.mkdir(relpath, permissions) } }; } #[macro_export] macro_rules! fwd_delete { ($field:ident) => { fn delete(&self, relpath: &$crate::UrlFragment) -> $crate::Result<()> { self.$field.delete(relpath) } }; } #[macro_export] macro_rules! fwd_rmdir { ($field:ident) => { fn rmdir(&self, relpath: &$crate::UrlFragment) -> $crate::Result<()> { self.$field.rmdir(relpath) } }; } #[macro_export] macro_rules! fwd_rename { ($field:ident) => { fn rename( &self, rel_from: &$crate::UrlFragment, rel_to: &$crate::UrlFragment, ) -> $crate::Result<()> { self.$field.rename(rel_from, rel_to) } }; } #[macro_export] macro_rules! fwd_set_segment_parameter { ($field:ident) => { fn set_segment_parameter(&mut self, key: &str, value: Option<&str>) -> $crate::Result<()> { self.$field.set_segment_parameter(key, value) } }; } #[macro_export] macro_rules! fwd_get_segment_parameters { ($field:ident) => { fn get_segment_parameters( &self, ) -> $crate::Result<::std::collections::HashMap> { self.$field.get_segment_parameters() } }; } #[macro_export] macro_rules! fwd_append_file { ($field:ident) => { fn append_file( &self, relpath: &$crate::UrlFragment, f: &mut dyn ::std::io::Read, permissions: Option<::std::fs::Permissions>, ) -> $crate::Result { self.$field.append_file(relpath, f, permissions) } }; } #[macro_export] macro_rules! fwd_readlink { ($field:ident) => { fn readlink(&self, relpath: &$crate::UrlFragment) -> $crate::Result { self.$field.readlink(relpath) } }; } #[macro_export] macro_rules! fwd_hardlink { ($field:ident) => { fn hardlink( &self, rel_from: &$crate::UrlFragment, rel_to: &$crate::UrlFragment, ) -> $crate::Result<()> { self.$field.hardlink(rel_from, rel_to) } }; } #[macro_export] macro_rules! fwd_symlink { ($field:ident) => { fn symlink( &self, rel_from: &$crate::UrlFragment, rel_to: &$crate::UrlFragment, ) -> $crate::Result<()> { self.$field.symlink(rel_from, rel_to) } }; } #[macro_export] macro_rules! fwd_iter_files_recursive { ($field:ident) => { fn iter_files_recursive(&self) -> Box>> { self.$field.iter_files_recursive() } }; } #[macro_export] macro_rules! fwd_open_write_stream { ($field:ident) => { fn open_write_stream( &self, relpath: &$crate::UrlFragment, permissions: Option<::std::fs::Permissions>, ) -> $crate::Result> { self.$field.open_write_stream(relpath, permissions) } }; } #[macro_export] macro_rules! fwd_delete_tree { ($field:ident) => { fn delete_tree(&self, relpath: &$crate::UrlFragment) -> $crate::Result<()> { self.$field.delete_tree(relpath) } }; } #[macro_export] macro_rules! fwd_move { ($field:ident) => { fn r#move( &self, rel_from: &$crate::UrlFragment, rel_to: &$crate::UrlFragment, ) -> $crate::Result<()> { self.$field.r#move(rel_from, rel_to) } }; } #[macro_export] macro_rules! fwd_list_dir { ($field:ident) => { fn list_dir( &self, relpath: &$crate::UrlFragment, ) -> Box>> { self.$field.list_dir(relpath) } }; } #[macro_export] macro_rules! fwd_lock_read { ($field:ident) => { fn lock_read( &self, relpath: &$crate::UrlFragment, ) -> $crate::Result> { self.$field.lock_read(relpath) } }; } #[macro_export] macro_rules! fwd_lock_write { ($field:ident) => { fn lock_write( &self, relpath: &$crate::UrlFragment, ) -> $crate::Result> { self.$field.lock_write(relpath) } }; } #[macro_export] macro_rules! fwd_local_abspath { ($field:ident) => { fn local_abspath( &self, relpath: &$crate::UrlFragment, ) -> $crate::Result<::std::path::PathBuf> { self.$field.local_abspath(relpath) } }; } #[macro_export] macro_rules! fwd_copy { ($field:ident) => { fn copy( &self, rel_from: &$crate::UrlFragment, rel_to: &$crate::UrlFragment, ) -> $crate::Result<()> { self.$field.copy(rel_from, rel_to) } }; } /// Emit the three URL-aware forwarders (`abspath`, `relpath`, `clone`) that /// a plain prefix decorator wants. Requires the outer type to expose an /// associated `PREFIX: &'static str` and a `fn new(inner: Box) -> Self` constructor. Pass the field that /// holds the inner transport plus the decorator's own type name: /// /// ```ignore /// crate::fwd_decorator_url!(inner, MyDecorator); /// ``` #[macro_export] macro_rules! fwd_decorator_url { ($field:ident, $ty:ident) => { fn abspath(&self, relpath: &$crate::UrlFragment) -> $crate::Result<::url::Url> { $crate::decorator::prefixed_abspath(Self::PREFIX, self.$field.as_ref(), relpath) } fn relpath(&self, abspath: &::url::Url) -> $crate::Result { $crate::decorator::stripped_relpath(Self::PREFIX, self.$field.as_ref(), abspath) } fn clone( &self, offset: Option<&$crate::UrlFragment>, ) -> $crate::Result> { let inner_clone = self.$field.clone(offset)?; Ok(Box::new($ty::new(inner_clone))) } }; } /// Forward every Transport method to `self.$field`. The caller must still /// define `fn base(&self) -> Url`. Use this when no methods need to be /// overridden; decorators that override a few methods should invoke the /// individual `fwd_*!` macros instead. #[macro_export] macro_rules! fwd_all { ($field:ident) => { $crate::fwd_external_url!($field); $crate::fwd_can_roundtrip_unix_modebits!($field); $crate::fwd_is_readonly!($field); $crate::fwd_listable!($field); $crate::fwd_get!($field); $crate::fwd_has!($field); $crate::fwd_stat!($field); $crate::fwd_clone!($field); $crate::fwd_abspath!($field); $crate::fwd_relpath!($field); $crate::fwd_put_file!($field); $crate::fwd_mkdir!($field); $crate::fwd_delete!($field); $crate::fwd_rmdir!($field); $crate::fwd_rename!($field); $crate::fwd_set_segment_parameter!($field); $crate::fwd_get_segment_parameters!($field); $crate::fwd_append_file!($field); $crate::fwd_readlink!($field); $crate::fwd_hardlink!($field); $crate::fwd_symlink!($field); $crate::fwd_iter_files_recursive!($field); $crate::fwd_open_write_stream!($field); $crate::fwd_delete_tree!($field); $crate::fwd_move!($field); $crate::fwd_list_dir!($field); $crate::fwd_lock_read!($field); $crate::fwd_lock_write!($field); $crate::fwd_local_abspath!($field); $crate::fwd_copy!($field); }; } #[cfg(test)] mod tests { use super::*; use crate::memory::MemoryTransport; #[test] fn prefixed_abspath_prepends_prefix() { let mem = MemoryTransport::new("memory:///").unwrap(); let abs = prefixed_abspath("readonly+", &mem, "foo").unwrap(); assert_eq!(abs.as_str(), "readonly+memory:///foo"); } #[test] fn stripped_relpath_removes_prefix_before_delegating() { let mem = MemoryTransport::new("memory:///").unwrap(); let decorated_abs = ::url::Url::parse("readonly+memory:///sub/file").unwrap(); let rel = stripped_relpath("readonly+", &mem, &decorated_abs).unwrap(); assert_eq!(rel, "sub/file"); } #[test] fn stripped_relpath_passes_through_when_prefix_absent() { // If the caller hands us a url without the prefix (e.g. because // they've already stripped it) we should still delegate safely. let mem = MemoryTransport::new("memory:///").unwrap(); let bare = ::url::Url::parse("memory:///x").unwrap(); let rel = stripped_relpath("readonly+", &mem, &bare).unwrap(); assert_eq!(rel, "x"); } } dromedary-0.1.5/src/fakenfs.rs000066400000000000000000000111441520150013200162430ustar00rootroot00000000000000//! FakeNFS Transport decorator, ported from dromedary/fakenfs.py. //! //! Adapts any Transport to behave like NFS for testing: rename against a //! non-empty target directory raises ResourceBusy, and deleting a file //! whose basename starts with ".nfs" raises ResourceBusy. use crate::{Error, Result, Stat, Transport, UrlFragment}; use url::Url; pub struct FakeNfsTransport { inner: Box, } impl FakeNfsTransport { pub const PREFIX: &'static str = "fakenfs+"; pub fn new(inner: Box) -> Self { Self { inner } } } impl std::fmt::Debug for FakeNfsTransport { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "FakeNfsTransport({})", self.base()) } } fn basename(path: &str) -> &str { match path.rsplit_once('/') { Some((_, tail)) => tail, None => path, } } impl Transport for FakeNfsTransport { crate::fwd_external_url!(inner); crate::fwd_can_roundtrip_unix_modebits!(inner); crate::fwd_is_readonly!(inner); crate::fwd_listable!(inner); crate::fwd_get!(inner); crate::fwd_has!(inner); crate::fwd_stat!(inner); crate::fwd_decorator_url!(inner, FakeNfsTransport); crate::fwd_put_file!(inner); crate::fwd_put_bytes!(inner); crate::fwd_put_file_non_atomic!(inner); crate::fwd_put_bytes_non_atomic!(inner); crate::fwd_mkdir!(inner); crate::fwd_rmdir!(inner); crate::fwd_set_segment_parameter!(inner); crate::fwd_get_segment_parameters!(inner); crate::fwd_append_file!(inner); crate::fwd_readlink!(inner); crate::fwd_hardlink!(inner); crate::fwd_symlink!(inner); crate::fwd_iter_files_recursive!(inner); crate::fwd_open_write_stream!(inner); crate::fwd_delete_tree!(inner); crate::fwd_move!(inner); crate::fwd_list_dir!(inner); crate::fwd_lock_read!(inner); crate::fwd_lock_write!(inner); crate::fwd_local_abspath!(inner); crate::fwd_copy!(inner); fn base(&self) -> Url { crate::decorator::prefixed_base(Self::PREFIX, self.inner.as_ref()) } fn rename(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { match self.inner.rename(rel_from, rel_to) { Ok(()) => Ok(()), Err(e @ Error::DirectoryNotEmptyError(_)) | Err(e @ Error::FileExists(_)) => { match self.inner.stat(rel_to) { Ok(Stat { kind, .. }) if kind == crate::FileKind::Dir => { Err(Error::ResourceBusy(Some(rel_to.to_string()))) } _ => Err(e), } } Err(e) => Err(e), } } fn delete(&self, relpath: &UrlFragment) -> Result<()> { if basename(relpath).starts_with(".nfs") { return Err(Error::ResourceBusy(Some(relpath.to_string()))); } self.inner.delete(relpath) } } #[cfg(test)] mod tests { use super::*; use crate::memory::MemoryTransport; fn wrap() -> FakeNfsTransport { let mem = MemoryTransport::new("memory:///").unwrap(); mem.put_bytes("regular", b"x", None).unwrap(); mem.put_bytes(".nfs1234", b"busy", None).unwrap(); mem.mkdir("dir1", None).unwrap(); mem.mkdir("dir2", None).unwrap(); mem.put_bytes("f1", b"a", None).unwrap(); mem.put_bytes("f2", b"b", None).unwrap(); FakeNfsTransport::new(Box::new(mem)) } #[test] fn base_has_fakenfs_prefix() { assert!(wrap().base().as_str().starts_with("fakenfs+")); } #[test] fn regular_delete_passes_through() { wrap().delete("regular").unwrap(); } #[test] fn dotnfs_delete_is_busy() { match wrap().delete(".nfs1234") { Err(Error::ResourceBusy(_)) => {} other => panic!("expected ResourceBusy, got {:?}", other), } } #[test] fn rename_dir_over_dir_becomes_busy() { let t = wrap(); match t.rename("dir1", "dir2") { Err(Error::ResourceBusy(_)) => {} other => panic!("expected ResourceBusy, got {:?}", other), } } #[test] fn rename_file_over_file_propagates_original_error() { // Destination is a file, so the translator's stat check falls through // and the original FileExists is re-raised. let t = wrap(); match t.rename("f1", "f2") { Err(Error::FileExists(_)) => {} other => panic!("expected FileExists, got {:?}", other), } } #[test] fn reads_pass_through() { let t = wrap(); assert_eq!(t.get_bytes("regular").unwrap(), b"x"); } } dromedary-0.1.5/src/fakevfat.rs000066400000000000000000000127541520150013200164250ustar00rootroot00000000000000//! FakeVFAT Transport decorator, ported from dromedary/fakevfat.py. //! //! Simulates VFAT restrictions: filenames are squashed to lowercase, and //! names containing any of `?*:;<>` are rejected. Only a subset of //! Transport methods route through the squash; others forward unchanged. use crate::{Error, Result, Transport, UrlFragment}; use std::fs::Permissions; use url::Url; pub struct FakeVfatTransport { inner: Box, } impl FakeVfatTransport { pub const PREFIX: &'static str = "vfat+"; pub fn new(inner: Box) -> Self { Self { inner } } fn squash_name(name: &str) -> Result { if name.contains(|c: char| matches!(c, '?' | '*' | ':' | ';' | '<' | '>')) { return Err(Error::PathNotChild); } Ok(name.to_lowercase()) } } impl std::fmt::Debug for FakeVfatTransport { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "FakeVfatTransport({})", self.base()) } } impl Transport for FakeVfatTransport { crate::fwd_external_url!(inner); crate::fwd_is_readonly!(inner); crate::fwd_listable!(inner); crate::fwd_stat!(inner); crate::fwd_decorator_url!(inner, FakeVfatTransport); crate::fwd_delete!(inner); crate::fwd_rmdir!(inner); crate::fwd_rename!(inner); crate::fwd_set_segment_parameter!(inner); crate::fwd_get_segment_parameters!(inner); crate::fwd_append_file!(inner); crate::fwd_readlink!(inner); crate::fwd_hardlink!(inner); crate::fwd_symlink!(inner); crate::fwd_iter_files_recursive!(inner); crate::fwd_open_write_stream!(inner); crate::fwd_delete_tree!(inner); crate::fwd_move!(inner); crate::fwd_list_dir!(inner); crate::fwd_lock_read!(inner); crate::fwd_lock_write!(inner); crate::fwd_local_abspath!(inner); crate::fwd_copy!(inner); fn base(&self) -> Url { crate::decorator::prefixed_base(Self::PREFIX, self.inner.as_ref()) } fn can_roundtrip_unix_modebits(&self) -> bool { false } fn get(&self, relpath: &UrlFragment) -> Result> { self.inner.get(&Self::squash_name(relpath)?) } fn has(&self, relpath: &UrlFragment) -> Result { self.inner.has(&Self::squash_name(relpath)?) } fn put_file( &self, relpath: &UrlFragment, f: &mut dyn std::io::Read, permissions: Option, ) -> Result { self.inner .put_file(&Self::squash_name(relpath)?, f, permissions) } fn put_bytes( &self, relpath: &UrlFragment, data: &[u8], permissions: Option, ) -> Result<()> { self.inner .put_bytes(&Self::squash_name(relpath)?, data, permissions) } fn put_file_non_atomic( &self, relpath: &UrlFragment, f: &mut dyn std::io::Read, permissions: Option, create_parent_dir: Option, dir_permissions: Option, ) -> Result<()> { self.inner.put_file_non_atomic( &Self::squash_name(relpath)?, f, permissions, create_parent_dir, dir_permissions, ) } fn put_bytes_non_atomic( &self, relpath: &UrlFragment, data: &[u8], permissions: Option, create_parent_dir: Option, dir_permissions: Option, ) -> Result<()> { self.inner.put_bytes_non_atomic( &Self::squash_name(relpath)?, data, permissions, create_parent_dir, dir_permissions, ) } fn mkdir(&self, relpath: &UrlFragment, _permissions: Option) -> Result<()> { // Python hard-codes 0o755 for VFAT mkdir. #[cfg(unix)] let perms = { use std::os::unix::fs::PermissionsExt; Some(Permissions::from_mode(0o755)) }; #[cfg(not(unix))] let perms: Option = None; self.inner.mkdir(&Self::squash_name(relpath)?, perms) } } #[cfg(test)] mod tests { use super::*; use crate::memory::MemoryTransport; fn wrap() -> FakeVfatTransport { let mem = MemoryTransport::new("memory:///").unwrap(); // Pre-seed lowercase names so squashed reads find them. mem.put_bytes("readme", b"hi", None).unwrap(); FakeVfatTransport::new(Box::new(mem)) } #[test] fn base_prefix() { assert!(wrap().base().as_str().starts_with("vfat+")); } #[test] fn uppercase_get_squashes_to_lowercase() { assert_eq!(wrap().get_bytes("README").unwrap(), b"hi"); } #[test] fn illegal_character_rejected() { match wrap().put_bytes("bad:name", b"x", None) { Err(Error::PathNotChild) => {} other => panic!("expected PathNotChild, got {:?}", other), } } #[test] fn put_and_get_round_trip_lowercase() { let t = wrap(); t.put_bytes("HELLO", b"world", None).unwrap(); assert_eq!(t.get_bytes("hello").unwrap(), b"world"); assert_eq!(t.get_bytes("Hello").unwrap(), b"world"); } #[test] fn roundtrip_unix_modebits_false() { assert_eq!(wrap().can_roundtrip_unix_modebits(), false); } #[test] fn mkdir_squashes_name() { let t = wrap(); t.mkdir("NewDir", None).unwrap(); assert_eq!(t.has("newdir").unwrap(), true); } } dromedary-0.1.5/src/fcntl-locks.rs000066400000000000000000000237141520150013200170530ustar00rootroot00000000000000use crate::lock::{FileLock, Lock, LockError}; use lazy_static::lazy_static; use log::debug; use nix::fcntl::{fcntl, FcntlArg}; use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; use std::fs::{File, OpenOptions}; use std::path::{Path, PathBuf}; fn open(filename: &Path, options: &OpenOptions) -> std::result::Result<(PathBuf, File), LockError> { let filename = crate::osutils::path::realpath(filename)?; match options.open(&filename) { Ok(f) => Ok((filename, f)), Err(e) => match e.kind() { std::io::ErrorKind::PermissionDenied => Err(LockError::Failed(filename, e.to_string())), std::io::ErrorKind::NotFound => { // Maybe this is an old branch (before 2005)? debug!( "trying to create missing lock {}", filename.to_string_lossy() ); let f = OpenOptions::new() .create(true) .write(true) .read(true) .open(&filename)?; Ok((filename, f)) } _ => Err(e.into()), }, } } lazy_static! { static ref OPEN_WRITE_LOCKS: std::sync::Mutex> = std::sync::Mutex::new(HashSet::new()); static ref OPEN_READ_LOCKS: std::sync::Mutex> = std::sync::Mutex::new(HashMap::new()); } pub struct WriteLock { filename: PathBuf, f: File, } impl WriteLock { pub fn new(filename: &Path, strict_locks: bool) -> Result { let filename = crate::osutils::path::realpath(filename)?; if OPEN_WRITE_LOCKS.lock().unwrap().contains(&filename) { return Err(LockError::Contention(filename)); } if OPEN_READ_LOCKS.lock().unwrap().contains_key(&filename) { if strict_locks { return Err(LockError::Contention(filename)); } else { debug!( "Write lock taken w/ an open read lock on: {}", filename.to_string_lossy() ); } } let (filename, f) = open( filename.as_path(), OpenOptions::new().read(true).write(true), )?; OPEN_WRITE_LOCKS.lock().unwrap().insert(filename.clone()); let flock = nix::libc::flock { l_type: nix::libc::F_WRLCK as i16, l_whence: nix::libc::SEEK_SET as i16, l_start: 0, l_len: 0, l_pid: 0, }; match fcntl(&f, FcntlArg::F_SETLK(&flock)) { Ok(_) => Ok(WriteLock { filename, f }), Err(e) => { if e == nix::errno::Errno::EAGAIN || e == nix::errno::Errno::EACCES { let flock = nix::libc::flock { l_type: nix::libc::F_UNLCK as i16, l_whence: nix::libc::SEEK_SET as i16, l_start: 0, l_len: 0, l_pid: 0, }; let _ = fcntl(&f, FcntlArg::F_SETLK(&flock)); } // we should be more precise about whats a locking // error and whats a random-other error Err(LockError::Contention(filename)) } } } } impl Lock for WriteLock { fn unlock(&mut self) -> Result<(), LockError> { OPEN_WRITE_LOCKS.lock().unwrap().remove(&self.filename); let flock = nix::libc::flock { l_type: nix::libc::F_UNLCK as i16, l_whence: nix::libc::SEEK_SET as i16, l_start: 0, l_len: 0, l_pid: 0, }; let _ = fcntl(&self.f, FcntlArg::F_SETLK(&flock)); Ok(()) } } impl FileLock for WriteLock { fn file(&self) -> std::io::Result> { Ok(Box::new(self.f.try_clone()?)) } fn path(&self) -> &Path { &self.filename } } pub struct ReadLock { filename: PathBuf, f: File, } impl ReadLock { pub fn new(filename: &Path, strict_locks: bool) -> std::result::Result { let filename = crate::osutils::path::realpath(filename)?; if OPEN_WRITE_LOCKS.lock().unwrap().contains(&filename) { if strict_locks { return Err(LockError::Contention(filename)); } else { debug!( "Read lock taken w/ an open write lock on: {}", filename.to_string_lossy() ); } } OPEN_READ_LOCKS .lock() .unwrap() .entry(filename.clone()) .and_modify(|count| *count += 1) .or_insert(1); let (filename, f) = open(&filename, OpenOptions::new().read(true))?; let flock = nix::libc::flock { l_type: nix::libc::F_RDLCK as i16, l_whence: nix::libc::SEEK_SET as i16, l_start: 0, l_len: 0, l_pid: 0, }; match fcntl(&f, FcntlArg::F_SETLK(&flock)) { Ok(_) => {} Err(_e) => { // we should be more precise about whats a locking // error and whats a random-other error return Err(LockError::Contention(filename)); } } Ok(ReadLock { filename, f }) } /// Try to grab a write lock on the file. /// /// On platforms that support it, this will upgrade to a write lock /// without unlocking the file. /// Otherwise, this will release the read lock, and try to acquire a /// write lock. /// /// Returns: A token which can be used to switch back to a read lock. pub fn temporary_write_lock( self, ) -> std::result::Result { if OPEN_WRITE_LOCKS.lock().unwrap().contains(&self.filename) { panic!("file already locked: {}", self.filename.to_string_lossy()); } TemporaryWriteLock::new(self) } } impl Lock for ReadLock { fn unlock(&mut self) -> std::result::Result<(), LockError> { match OPEN_READ_LOCKS.lock().unwrap().entry(self.filename.clone()) { Entry::Occupied(mut entry) => { let count = entry.get_mut(); if *count == 1 { entry.remove(); } else { *count -= 1; } } Entry::Vacant(_) => panic!("no read lock on {}", self.filename.to_string_lossy()), } let flock = nix::libc::flock { l_type: nix::libc::F_UNLCK as i16, l_whence: nix::libc::SEEK_SET as i16, l_start: 0, l_len: 0, l_pid: 0, }; let _ = fcntl(&self.f, FcntlArg::F_SETLK(&flock)); Ok(()) } } impl FileLock for ReadLock { fn file(&self) -> std::io::Result> { Ok(Box::new(self.f.try_clone()?)) } fn path(&self) -> &Path { &self.filename } } /// A token used when grabbing a temporary_write_lock. /// /// Call restore_read_lock() when you are done with the write lock. pub struct TemporaryWriteLock { read_lock: ReadLock, filename: PathBuf, f: File, } impl TemporaryWriteLock { pub fn new(read_lock: ReadLock) -> std::result::Result { let filename = read_lock.filename.clone(); if let Some(count) = OPEN_READ_LOCKS.lock().unwrap().get(&filename) { if *count > 1 { // Something else also has a read-lock, so we cannot grab a // write lock. return Err((read_lock, LockError::Contention(filename))); } } if OPEN_WRITE_LOCKS.lock().unwrap().contains(&filename) { panic!("file already locked: {}", filename.to_string_lossy()); } // See if we can open the file for writing. Another process might // have a read lock. We don't use self._open() because we don't want // to create the file if it exists. That would have already been // done by ReadLock let f = match OpenOptions::new() .write(true) .read(true) .create(true) .open(&filename) { Ok(f) => Ok(f), Err(e) => return Err((read_lock, e.into())), }?; // LOCK_NB will cause IOError to be raised if we can't grab a // lock right away. let flock = nix::libc::flock { l_type: nix::libc::F_RDLCK as i16, l_whence: nix::libc::SEEK_SET as i16, l_start: 0, l_len: 0, l_pid: 0, }; match fcntl(&f, FcntlArg::F_SETLK(&flock)) { Ok(_) => Ok(()), Err(_) => { return Err((read_lock, LockError::Contention(filename))); } }?; OPEN_WRITE_LOCKS.lock().unwrap().insert(filename.clone()); Ok(Self { read_lock, filename, f, }) } /// Restore the original ReadLock. pub fn restore_read_lock(self) -> ReadLock { // For fcntl, since we never released the read lock, just release // the write lock, and return the original lock. let flock = nix::libc::flock { l_type: nix::libc::F_UNLCK as i16, l_whence: nix::libc::SEEK_SET as i16, l_start: 0, l_len: 0, l_pid: 0, }; match fcntl(&self.f, FcntlArg::F_SETLK(&flock)) { Ok(_) => {} Err(e) => { debug!( "error unlocking file {}: {}", &self.filename.to_string_lossy(), e ); } } OPEN_WRITE_LOCKS.lock().unwrap().remove(&self.filename); self.read_lock } } impl FileLock for TemporaryWriteLock { fn file(&self) -> std::io::Result> { Ok(Box::new(self.f.try_clone()?)) } fn path(&self) -> &Path { &self.filename } } dromedary-0.1.5/src/gio.rs000066400000000000000000000730451520150013200154140ustar00rootroot00000000000000//! GIO Transport, ported from dromedary/gio_transport.py. //! //! Wraps `gio::File` to expose the dromedary [`Transport`] trait over //! anything gvfs can mount: `gio+file://`, `gio+sftp://`, `gio+smb://`, //! `gio+dav://`, `gio+ftp://`, `gio+ssh://`, `gio+obex://`. //! //! `gio::File` is `!Send`/`!Sync`, so we never store one on the struct; //! every method reconstructs files via `gio::File::for_uri` from the //! `String` base URL and a relpath. This sidesteps the threading //! constraints `Transport` imposes (`Send + Sync`). //! //! Mounting volumes that need credentials currently isn't implemented — //! v1 only handles URLs that gvfs can already enumerate. See the TODO //! near `ensure_mounted` for the path forward. use crate::lock::{Lock, LockError}; use crate::urlutils::escape; use crate::{Error, FileKind, ReadStream, Result, Stat, Transport, UrlFragment, WriteStream}; use ::gio::prelude::*; use ::gio::{FileCopyFlags, FileQueryInfoFlags, IOErrorEnum}; use std::collections::HashMap; use std::fs::Permissions; use std::io::{Cursor, Read}; use std::sync::mpsc; use std::thread; use url::Url; const GIO_BACKENDS: &[&str] = &["dav", "file", "ftp", "obex", "sftp", "ssh", "smb"]; /// A transport that proxies through a gvfs mount. pub struct GioTransport { /// Public dromedary base, including the `gio+` scheme prefix and a /// trailing slash. base: Url, /// URL stripped of the `gio+` prefix and any embedded credentials, /// suitable to pass to `gio::File::for_uri`. backend_url: String, } impl std::fmt::Debug for GioTransport { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "GioTransport({})", self.base) } } impl GioTransport { pub fn new(base: &str) -> Result { let mut base = base.to_string(); if !base.ends_with('/') { base.push('/'); } let stripped = base .strip_prefix("gio+") .ok_or_else(|| Error::NotLocalUrl(base.clone()))?; let parsed = Url::parse(stripped).map_err(Error::from)?; if !GIO_BACKENDS.contains(&parsed.scheme()) { return Err(Error::UrlError(url::ParseError::IdnaError)); } // Reconstruct the backend URL with any embedded user/password // stripped — gvfs handles credentials via MountOperation, not // via the URL. let mut backend = parsed.clone(); let _ = backend.set_username(""); let _ = backend.set_password(None); let backend_url = backend.to_string(); let public_base = Url::parse(&base).map_err(Error::from)?; Ok(Self { base: public_base, backend_url, }) } fn child_url(&self, relpath: &UrlFragment) -> Result { // The backend URL ends with `/` so url::Url::join treats it as a // directory and resolves relpaths relative to it. An empty or // `.` relpath returns the directory itself. let base = Url::parse(&self.backend_url).map_err(Error::from)?; let trimmed = if relpath == "." || relpath.is_empty() { "" } else { relpath }; let joined = base.join(trimmed).map_err(Error::from)?; Ok(joined.to_string()) } fn file_for(&self, relpath: &UrlFragment) -> Result<::gio::File> { let url = self.child_url(relpath)?; Ok(::gio::File::for_uri(&url)) } /// Translate a gvfs error into the dromedary error vocabulary. fn translate(err: glib::Error, relpath: Option<&UrlFragment>) -> Error { let path = relpath.map(|p| p.to_string()); match err.kind::() { Some(IOErrorEnum::NotFound) => Error::NoSuchFile(path), Some(IOErrorEnum::Exists) => Error::FileExists(path), Some(IOErrorEnum::NotDirectory) => Error::NotADirectoryError(path), Some(IOErrorEnum::IsDirectory) => Error::IsADirectoryError(path), Some(IOErrorEnum::NotEmpty) => Error::DirectoryNotEmptyError(path), Some(IOErrorEnum::PermissionDenied) => Error::PermissionDenied(path), Some(IOErrorEnum::Busy) => Error::ResourceBusy(path), Some(IOErrorEnum::NotMounted) => { Error::TransportNotPossible(Some("volume not mounted".into())) } Some(IOErrorEnum::ReadOnly) => Error::PermissionDenied(path), // Everything else folds into a generic IO error so the caller // gets *something* useful instead of a panic. _ => Error::Io(std::io::Error::other(err.to_string())), } } } struct GioReadStream(Cursor>); impl Read for GioReadStream { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { self.0.read(buf) } } impl std::io::Seek for GioReadStream { fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { self.0.seek(pos) } } impl ReadStream for GioReadStream {} /// Commands sent from the public `GioWriteStream` handle to the worker /// thread that owns the underlying `gio::FileOutputStream`. The worker /// is necessary because `FileOutputStream` is `!Send`, but `WriteStream` /// requires `Send + Sync`. enum WriterCmd { Write(Vec), Flush, Close, } /// Reply payload returned over a one-shot reply channel for each command. /// `glib::Error` is `!Send`, so any error is converted to a string here. type WriterReply = std::result::Result; /// Send+Sync handle to a writer thread that owns a gvfs output stream. struct GioWriteStream { tx: Option)>>, join: Option>, } impl GioWriteStream { fn spawn(url: String) -> Result { // The worker creates the output stream itself so the !Send // `gio::File` / `FileOutputStream` never crosses thread boundaries. // Open synchronously via a one-shot channel so we can surface // errors before returning the handle. let (open_tx, open_rx) = mpsc::channel::>(); let (cmd_tx, cmd_rx) = mpsc::channel::<(WriterCmd, mpsc::Sender)>(); let join = thread::spawn(move || { let file = ::gio::File::for_uri(&url); let stream = match file.replace( None, false, ::gio::FileCreateFlags::REPLACE_DESTINATION, ::gio::Cancellable::NONE, ) { Ok(s) => { if open_tx.send(Ok(())).is_err() { // Caller went away; clean up and exit. let _ = s.close(::gio::Cancellable::NONE); return; } s } Err(e) => { let _ = open_tx.send(Err(e.to_string())); return; } }; while let Ok((cmd, reply)) = cmd_rx.recv() { match cmd { WriterCmd::Write(buf) => { let res = match stream.write_all(&buf, ::gio::Cancellable::NONE) { Ok((written, None)) => Ok(written), Ok((_, Some(e))) => Err(e.to_string()), Err(e) => Err(e.to_string()), }; let _ = reply.send(res); } WriterCmd::Flush => { let res = stream .flush(::gio::Cancellable::NONE) .map(|_| 0usize) .map_err(|e| e.to_string()); let _ = reply.send(res); } WriterCmd::Close => { let res = stream .close(::gio::Cancellable::NONE) .map(|_| 0usize) .map_err(|e| e.to_string()); let _ = reply.send(res); return; } } } // Sender dropped without an explicit Close — best-effort close. let _ = stream.close(::gio::Cancellable::NONE); }); match open_rx.recv() { Ok(Ok(())) => Ok(GioWriteStream { tx: Some(cmd_tx), join: Some(join), }), Ok(Err(msg)) => { let _ = join.join(); Err(Error::Io(std::io::Error::other(msg))) } Err(_) => { let _ = join.join(); Err(Error::Io(std::io::Error::other( "gio writer thread exited before opening stream", ))) } } } fn dispatch(&self, cmd: WriterCmd) -> std::io::Result { let tx = self .tx .as_ref() .ok_or_else(|| std::io::Error::other("gio write stream already closed"))?; let (reply_tx, reply_rx) = mpsc::channel(); tx.send((cmd, reply_tx)) .map_err(|_| std::io::Error::other("gio writer thread exited"))?; match reply_rx.recv() { Ok(Ok(n)) => Ok(n), Ok(Err(msg)) => Err(std::io::Error::other(msg)), Err(_) => Err(std::io::Error::other("gio writer thread exited")), } } } impl std::io::Write for GioWriteStream { fn write(&mut self, buf: &[u8]) -> std::io::Result { // write_all on the worker drains the whole buffer or returns an error. self.dispatch(WriterCmd::Write(buf.to_vec()))?; Ok(buf.len()) } fn flush(&mut self) -> std::io::Result<()> { self.dispatch(WriterCmd::Flush).map(|_| ()) } } impl WriteStream for GioWriteStream { fn sync_data(&self) -> std::io::Result<()> { // gvfs has no fsync; OutputStream::flush is the strongest durability // primitive available, matching what the Python port did. self.dispatch(WriterCmd::Flush).map(|_| ()) } } impl Drop for GioWriteStream { fn drop(&mut self) { // Best-effort close. Errors here have nowhere to go. if let Some(tx) = self.tx.take() { let (reply_tx, reply_rx) = mpsc::channel(); if tx.send((WriterCmd::Close, reply_tx)).is_ok() { let _ = reply_rx.recv(); } } if let Some(join) = self.join.take() { let _ = join.join(); } } } /// gvfs offers no real lock primitive, matching the Python implementation /// which returned a no-op lock. We do the same. struct BogusLock; impl Lock for BogusLock { fn unlock(&mut self) -> std::result::Result<(), LockError> { Ok(()) } } impl Transport for GioTransport { fn external_url(&self) -> Result { Ok(self.base.clone()) } fn can_roundtrip_unix_modebits(&self) -> bool { false } fn base(&self) -> Url { self.base.clone() } fn get(&self, relpath: &UrlFragment) -> Result> { let f = self.file_for(relpath)?; let input = f .read(::gio::Cancellable::NONE) .map_err(|e| Self::translate(e, Some(relpath)))?; let mut buf = Vec::new(); loop { let chunk = input .read_bytes(64 * 1024, ::gio::Cancellable::NONE) .map_err(|e| Self::translate(e, Some(relpath)))?; if chunk.is_empty() { break; } buf.extend_from_slice(&chunk); } let _ = input.close(::gio::Cancellable::NONE); Ok(Box::new(GioReadStream(Cursor::new(buf)))) } fn has(&self, relpath: &UrlFragment) -> Result { let f = self.file_for(relpath)?; match f.query_info( "standard::type", FileQueryInfoFlags::NONE, ::gio::Cancellable::NONE, ) { Ok(info) => Ok(matches!( info.file_type(), ::gio::FileType::Regular | ::gio::FileType::Directory )), Err(e) if e.kind::() == Some(IOErrorEnum::NotFound) => Ok(false), Err(e) => Err(Self::translate(e, Some(relpath))), } } fn mkdir(&self, relpath: &UrlFragment, _permissions: Option) -> Result<()> { let f = self.file_for(relpath)?; f.make_directory(::gio::Cancellable::NONE) .map_err(|e| Self::translate(e, Some(relpath))) } fn stat(&self, relpath: &UrlFragment) -> Result { let f = self.file_for(relpath)?; let info = f .query_info( "standard::size,standard::type", FileQueryInfoFlags::NONE, ::gio::Cancellable::NONE, ) .map_err(|e| Self::translate(e, Some(relpath)))?; let kind = match info.file_type() { ::gio::FileType::Regular => FileKind::File, ::gio::FileType::Directory => FileKind::Dir, ::gio::FileType::SymbolicLink => FileKind::Symlink, _ => FileKind::Other, }; Ok(Stat { size: info.size().max(0) as usize, #[cfg(unix)] mode: match kind { FileKind::Dir => 0o040755, FileKind::Symlink => 0o120777, _ => 0o100644, }, kind, mtime: None, }) } fn clone(&self, offset: Option<&UrlFragment>) -> Result> { let new_backend = match offset { Some(o) if !o.is_empty() => { let base = Url::parse(&self.backend_url).map_err(Error::from)?; base.join(o).map_err(Error::from)?.to_string() } _ => self.backend_url.clone(), }; let new_base = format!("gio+{}", new_backend); Ok(Box::new(GioTransport::new(&new_base)?)) } fn abspath(&self, relpath: &UrlFragment) -> Result { let trimmed = if relpath == "." || relpath.is_empty() { "" } else { relpath }; self.base.join(trimmed).map_err(Error::from) } fn relpath(&self, abspath: &Url) -> Result { let base = self.base.as_str(); abspath .as_str() .strip_prefix(base) .map(|s| s.to_string()) .ok_or(Error::PathNotChild) } fn put_file( &self, relpath: &UrlFragment, f: &mut dyn Read, _permissions: Option, ) -> Result { // Mirror Python: write to a temp sibling, then move-with-overwrite. let tmp_rel = format!("{}.tmp.{}", relpath, std::process::id()); let tmp_file = self.file_for(&tmp_rel)?; let dest_file = self.file_for(relpath)?; let mut buf = Vec::new(); f.read_to_end(&mut buf) .map_err(|e| Error::Io(std::io::Error::other(e.to_string())))?; let out = tmp_file .create(::gio::FileCreateFlags::NONE, ::gio::Cancellable::NONE) .map_err(|e| Self::translate(e, Some(relpath)))?; // OutputStreamExtManual::write_all loops until the buffer drains. // Signature: Result<(written, Option), full_err>. match out.write_all(&buf, ::gio::Cancellable::NONE) { Ok((_, None)) => {} Ok((_, Some(e))) => return Err(Self::translate(e, Some(relpath))), Err(e) => return Err(Self::translate(e, Some(relpath))), } out.close(::gio::Cancellable::NONE) .map_err(|e| Self::translate(e, Some(relpath)))?; let move_result = tmp_file.move_( &dest_file, FileCopyFlags::OVERWRITE, ::gio::Cancellable::NONE, None, ); if let Err(e) = move_result { // Best-effort cleanup; ignore secondary errors. let _ = tmp_file.delete(::gio::Cancellable::NONE); return Err(Self::translate(e, Some(relpath))); } Ok(buf.len() as u64) } fn delete(&self, relpath: &UrlFragment) -> Result<()> { let f = self.file_for(relpath)?; f.delete(::gio::Cancellable::NONE) .map_err(|e| Self::translate(e, Some(relpath))) } fn rmdir(&self, relpath: &UrlFragment) -> Result<()> { let st = self.stat(relpath)?; if st.kind != FileKind::Dir { return Err(Error::NotADirectoryError(Some(relpath.to_string()))); } let f = self.file_for(relpath)?; f.delete(::gio::Cancellable::NONE) .map_err(|e| Self::translate(e, Some(relpath))) } fn rename(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { let from = self.file_for(rel_from)?; let to = self.file_for(rel_to)?; from.move_(&to, FileCopyFlags::NONE, ::gio::Cancellable::NONE, None) .map_err(|e| Self::translate(e, Some(rel_from))) } fn r#move(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { let from = self.file_for(rel_from)?; let to = self.file_for(rel_to)?; from.move_( &to, FileCopyFlags::OVERWRITE, ::gio::Cancellable::NONE, None, ) .map_err(|e| Self::translate(e, Some(rel_from))) } fn copy(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { let data = self.get_bytes(rel_from)?; let mut cur = Cursor::new(data); self.put_file(rel_to, &mut cur, None).map(|_| ()) } fn append_file( &self, relpath: &UrlFragment, f: &mut dyn Read, _permissions: Option, ) -> Result { // Python notes that GIO's append_to truncates instead of appending, // so it implements a manual read+rewrite-via-tempfile. Mirror that. let mut existing = match self.get_bytes(relpath) { Ok(b) => b, Err(Error::NoSuchFile(_)) => Vec::new(), Err(e) => return Err(e), }; let original_len = existing.len() as u64; let mut to_append = Vec::new(); f.read_to_end(&mut to_append) .map_err(|e| Error::Io(std::io::Error::other(e.to_string())))?; existing.extend_from_slice(&to_append); let mut cur = Cursor::new(existing); self.put_file(relpath, &mut cur, None)?; Ok(original_len) } fn list_dir(&self, relpath: &UrlFragment) -> Box>> { let f = match self.file_for(relpath) { Ok(f) => f, Err(e) => return Box::new(std::iter::once(Err(e))), }; let enumerator = match f.enumerate_children( "standard::name", FileQueryInfoFlags::NONE, ::gio::Cancellable::NONE, ) { Ok(e) => e, Err(e) => return Box::new(std::iter::once(Err(Self::translate(e, Some(relpath))))), }; let mut entries: Vec> = Vec::new(); loop { match enumerator.next_file(::gio::Cancellable::NONE) { Ok(Some(info)) => { let name = info.name(); let name_str = name.to_string_lossy(); entries.push(Ok(escape(name_str.as_bytes(), None))); } Ok(None) => break, Err(e) => { entries.push(Err(Self::translate(e, Some(relpath)))); break; } } } let _ = enumerator.close(::gio::Cancellable::NONE); Box::new(entries.into_iter()) } fn iter_files_recursive(&self) -> Box>> { let mut queue: Vec = Vec::new(); for entry in self.list_dir(".") { match entry { Ok(name) => queue.push(name), Err(e) => return Box::new(std::iter::once(Err(e))), } } let mut results: Vec> = Vec::new(); while let Some(rel) = queue.pop() { match self.stat(&rel) { Ok(st) if st.kind == FileKind::Dir => { for child in self.list_dir(&rel) { match child { Ok(name) => queue.push(format!("{}/{}", rel, name)), Err(e) => { results.push(Err(e)); break; } } } } Ok(_) => results.push(Ok(rel)), Err(e) => results.push(Err(e)), } } Box::new(results.into_iter()) } fn lock_read(&self, _relpath: &UrlFragment) -> Result> { Ok(Box::new(BogusLock)) } fn lock_write(&self, _relpath: &UrlFragment) -> Result> { Ok(Box::new(BogusLock)) } fn local_abspath(&self, relpath: &UrlFragment) -> Result { Err(Error::NotLocalUrl(format!("{}{}", self.base, relpath))) } fn listable(&self) -> bool { true } fn set_segment_parameter(&mut self, _key: &str, _value: Option<&str>) -> Result<()> { Err(Error::TransportNotPossible(Some( "gio transport does not support segment parameters".into(), ))) } fn get_segment_parameters(&self) -> Result> { Ok(HashMap::new()) } fn readlink(&self, _relpath: &UrlFragment) -> Result { // gvfs symlinks are exposed by query_info but resolving them // requires standard::symlink-target. The Python port did not // implement readlink either; keep parity. Err(Error::TransportNotPossible(Some( "gio transport does not support readlink".into(), ))) } fn hardlink(&self, _rel_from: &UrlFragment, _rel_to: &UrlFragment) -> Result<()> { Err(Error::TransportNotPossible(Some( "gio transport does not support hardlink".into(), ))) } fn symlink(&self, _rel_from: &UrlFragment, _rel_to: &UrlFragment) -> Result<()> { Err(Error::TransportNotPossible(Some( "gio transport does not support symlink".into(), ))) } fn delete_tree(&self, relpath: &UrlFragment) -> Result<()> { let st = self.stat(relpath)?; if st.kind != FileKind::Dir { return Err(Error::NotADirectoryError(Some(relpath.to_string()))); } // Depth-first removal: enumerate entries, recurse into directories, // delete files, then delete the now-empty directory. let f = self.file_for(relpath)?; let enumerator = f .enumerate_children( "standard::name,standard::type", FileQueryInfoFlags::NOFOLLOW_SYMLINKS, ::gio::Cancellable::NONE, ) .map_err(|e| Self::translate(e, Some(relpath)))?; loop { match enumerator.next_file(::gio::Cancellable::NONE) { Ok(Some(info)) => { let name = info.name(); let name_str = name.to_string_lossy(); let child_rel = format!("{}/{}", relpath.trim_end_matches('/'), name_str); match info.file_type() { ::gio::FileType::Directory => self.delete_tree(&child_rel)?, _ => { let child = self.file_for(&child_rel)?; child .delete(::gio::Cancellable::NONE) .map_err(|e| Self::translate(e, Some(&child_rel)))?; } } } Ok(None) => break, Err(e) => return Err(Self::translate(e, Some(relpath))), } } let _ = enumerator.close(::gio::Cancellable::NONE); f.delete(::gio::Cancellable::NONE) .map_err(|e| Self::translate(e, Some(relpath))) } fn open_write_stream( &self, relpath: &UrlFragment, _permissions: Option, ) -> Result> { // gio::FileOutputStream is !Send. We dedicate one worker thread per // open stream — the worker owns the underlying handle and we drive // it via a synchronous command channel. let url = self.child_url(relpath)?; let stream = GioWriteStream::spawn(url)?; Ok(Box::new(stream)) } } // `gio::File` and friends are `!Send + !Sync`. Our struct only stores // owned `String`/`Url`, which are Send+Sync, and `GioWriteStream` keeps // its !Send `gio::FileOutputStream` pinned to a worker thread. #[cfg(test)] mod tests { use super::*; use tempfile::TempDir; fn temp_transport() -> (TempDir, GioTransport) { let dir = TempDir::new().unwrap(); // gio::File::for_uri wants file:///abs/path — build it via Url so // path escaping is handled for us. let file_url = url::Url::from_directory_path(dir.path()).unwrap(); let base = format!("gio+{}", file_url.as_str()); let t = GioTransport::new(&base).unwrap(); (dir, t) } #[test] fn rejects_unknown_scheme() { match GioTransport::new("gio+nope:///x/") { Err(Error::UrlError(_)) => {} other => panic!("expected UrlError, got {:?}", other), } } #[test] fn requires_gio_prefix() { match GioTransport::new("file:///does/not/matter/") { Err(Error::NotLocalUrl(_)) => {} other => panic!("expected NotLocalUrl, got {:?}", other), } } #[test] fn put_get_has_round_trip() { let (_dir, t) = temp_transport(); assert!(!t.has("hello").unwrap()); t.put_bytes("hello", b"world", None).unwrap(); assert!(t.has("hello").unwrap()); assert_eq!(t.get_bytes("hello").unwrap(), b"world"); } #[test] fn mkdir_stat_list_round_trip() { let (_dir, t) = temp_transport(); t.mkdir("d", None).unwrap(); t.put_bytes("d/a", b"1", None).unwrap(); t.put_bytes("d/b", b"22", None).unwrap(); let mut entries: Vec = t.list_dir("d").filter_map(|r| r.ok()).collect(); entries.sort(); assert_eq!(entries, vec!["a".to_string(), "b".to_string()]); assert_eq!(t.stat("d").unwrap().kind, FileKind::Dir); } #[test] fn rename_and_delete() { let (_dir, t) = temp_transport(); t.put_bytes("a", b"hi", None).unwrap(); t.rename("a", "b").unwrap(); assert!(!t.has("a").unwrap()); assert_eq!(t.get_bytes("b").unwrap(), b"hi"); t.delete("b").unwrap(); assert!(!t.has("b").unwrap()); } #[test] fn append_extends_file() { let (_dir, t) = temp_transport(); t.put_bytes("f", b"abc", None).unwrap(); let mut more = Cursor::new(b"DEF".to_vec()); let offset = t.append_file("f", &mut more, None).unwrap(); assert_eq!(offset, 3); assert_eq!(t.get_bytes("f").unwrap(), b"abcDEF"); } #[test] fn missing_file_get_returns_no_such_file() { let (_dir, t) = temp_transport(); match t.get_bytes("nope") { Err(Error::NoSuchFile(_)) => {} other => panic!("expected NoSuchFile, got {:?}", other), } } #[test] fn open_write_stream_round_trip() { use std::io::Write; let (_dir, t) = temp_transport(); let mut stream = t.open_write_stream("w", None).unwrap(); stream.write_all(b"hello ").unwrap(); stream.write_all(b"world").unwrap(); stream.flush().unwrap(); drop(stream); assert_eq!(t.get_bytes("w").unwrap(), b"hello world"); } #[test] fn open_write_stream_visible_after_flush() { // After explicit flush, a concurrent read on the same path must see // the buffered writes — this is what the per_transport // test_get_with_open_write_stream_sees_all_content scenario asserts. use std::io::Write; let (_dir, t) = temp_transport(); let mut stream = t.open_write_stream("w", None).unwrap(); stream.write_all(b"bcd").unwrap(); stream.flush().unwrap(); assert_eq!(t.get_bytes("w").unwrap(), b"bcd"); drop(stream); } #[test] fn open_write_stream_overwrites_existing() { use std::io::Write; let (_dir, t) = temp_transport(); t.put_bytes("w", b"old contents", None).unwrap(); let mut stream = t.open_write_stream("w", None).unwrap(); stream.write_all(b"new").unwrap(); drop(stream); assert_eq!(t.get_bytes("w").unwrap(), b"new"); } #[test] fn delete_tree_removes_nested() { let (_dir, t) = temp_transport(); t.mkdir("d", None).unwrap(); t.put_bytes("d/a", b"1", None).unwrap(); t.mkdir("d/sub", None).unwrap(); t.put_bytes("d/sub/b", b"2", None).unwrap(); t.delete_tree("d").unwrap(); assert!(!t.has("d").unwrap()); } #[test] fn delete_tree_rejects_non_directory() { let (_dir, t) = temp_transport(); t.put_bytes("f", b"x", None).unwrap(); match t.delete_tree("f") { Err(Error::NotADirectoryError(_)) => {} other => panic!("expected NotADirectoryError, got {:?}", other), } } #[test] fn iter_files_recursive_walks() { let (_dir, t) = temp_transport(); t.mkdir("d", None).unwrap(); t.put_bytes("d/a", b"1", None).unwrap(); t.mkdir("d/sub", None).unwrap(); t.put_bytes("d/sub/b", b"2", None).unwrap(); let mut files: Vec = t.iter_files_recursive().filter_map(|r| r.ok()).collect(); files.sort(); assert_eq!(files, vec!["d/a".to_string(), "d/sub/b".to_string()]); } } dromedary-0.1.5/src/http/000077500000000000000000000000001520150013200152365ustar00rootroot00000000000000dromedary-0.1.5/src/http/auth.rs000066400000000000000000000327551520150013200165610ustar00rootroot00000000000000//! HTTP authentication header builders. //! //! Thin composition layer over the primitives ported in Stage 2 //! (`DigestAlgorithm`, `new_cnonce`, `parse_auth_header`, //! `parse_http_list`, `parse_keqv_list`). This module holds the //! recipes that turn a parsed challenge + credentials into the //! `Authorization:` header value. //! //! Stage 6 scope is pure-Rust composition. The Python side still //! owns the handler chain (BasicAuthHandler / DigestAuthHandler / //! NegotiateAuthHandler in `dromedary/http/urllib.py`); Stage 7 is //! where the Rust `HttpClient` starts driving auth itself. //! Negotiate (kerberos) intentionally lives outside this module — //! it's a pluggable callback on the client, not a header formula. use base64::Engine; use super::{new_cnonce, DigestAlgorithm}; /// Build the value of an `Authorization: Basic ...` header. /// /// Mirrors `BasicAuthHandler.build_auth_header`: base64-encode /// `"user:password"` as UTF-8 and prepend the scheme keyword. pub fn build_basic_auth_header(user: &str, password: &str) -> String { let raw = format!("{}:{}", user, password); let encoded = base64::engine::general_purpose::STANDARD.encode(raw.as_bytes()); format!("Basic {}", encoded) } /// Per-connection digest-auth state. /// /// The `nonce_count` counter must be monotonic across retries /// against the same server nonce; a fresh nonce resets it to zero /// (matching `DigestAuthHandler.auth_match`'s behaviour when it /// sees `auth["nonce"] != nonce`). #[derive(Debug, Clone)] pub struct DigestAuthState { pub user: String, pub password: String, pub realm: String, pub nonce: String, /// The last `nonce_count` used. `build_digest_auth_header` /// increments this before formatting the `nc=...` field, so the /// first request bumps from 0 → 1 (matching Python's behaviour /// where `nonce_count` starts at 0 and the header shows `nc=00000001`). pub nonce_count: u64, pub algorithm: DigestAlgorithm, pub algorithm_name: Option, pub opaque: Option, pub qop: String, } /// Parsed `WWW-Authenticate: Digest ...` challenge that we can /// actually handle. `parse_digest_challenge` returns `None` when /// anything the Python version would have rejected is missing: /// unsupported `qop`, unsupported `algorithm`, or missing `nonce` / /// `realm`. #[derive(Debug, Clone, PartialEq, Eq)] pub struct DigestChallenge { pub realm: String, pub nonce: String, pub algorithm: DigestAlgorithm, /// Original `algorithm=` parameter value as the server sent it. /// Kept verbatim so the echoed `algorithm=` field in the /// response can be byte-for-byte the same. pub algorithm_name: Option, pub opaque: Option, /// Currently we only accept `qop=auth` (no `auth-int`), mirroring /// the Python `DigestAuthHandler.auth_match` check at urllib.py:1894. pub qop: String, } /// Parse the remainder of a `WWW-Authenticate: Digest ...` header /// (the part after the `Digest ` scheme keyword, typically what /// [`super::parse_auth_header`] hands back as the remainder). /// /// Returns `None` if the challenge is missing a required field or /// specifies an algorithm/qop we can't handle. Matches the Python /// `DigestAuthHandler.auth_match` accept/reject criteria. pub fn parse_digest_challenge(raw_auth: &str) -> Option { let params = super::parse_keqv_list(&super::parse_http_list(raw_auth)); // qop=auth only — Python `auth_match` rejects everything else // including the `auth-int` variant. let qop = params.get("qop")?.clone(); if qop != "auth" { return None; } // Default algorithm is MD5 when the server doesn't specify one. let algorithm_name = params.get("algorithm").cloned(); let algorithm_str = algorithm_name.as_deref().unwrap_or("MD5"); let algorithm = DigestAlgorithm::parse(algorithm_str)?; let realm = params.get("realm")?.clone(); let nonce = params.get("nonce")?.clone(); let opaque = params.get("opaque").cloned(); Some(DigestChallenge { realm, nonce, algorithm, algorithm_name, opaque, qop, }) } /// Build the `Authorization: Digest ...` header value for the given /// request, bumping `state.nonce_count` by one in the process. /// /// Follows RFC 2617 §3.2.2 — the same recipe the Python /// `DigestAuthHandler.build_auth_header` uses: /// /// ```text /// A1 = user:realm:password /// A2 = method:uri /// response = KD(H(A1), nonce:nc:cnonce:qop:H(A2)) /// ``` /// /// The `uri` argument should be the path component the client /// sends (what the Python version extracts with /// `urlparse(request.selector).path`). pub fn build_digest_auth_header(state: &mut DigestAuthState, method: &str, uri: &str) -> String { state.nonce_count += 1; let ncvalue = format!("{:08x}", state.nonce_count); let cnonce = new_cnonce(&state.nonce, state.nonce_count); let algo = state.algorithm; let a1 = format!("{}:{}:{}", state.user, state.realm, state.password); let a2 = format!("{}:{}", method, uri); let nonce_data = format!( "{}:{}:{}:{}:{}", state.nonce, ncvalue, cnonce, state.qop, algo.h(a2.as_bytes()) ); let response_digest = algo.kd(&algo.h(a1.as_bytes()), &nonce_data); let mut header = format!( "Digest username=\"{user}\", realm=\"{realm}\", nonce=\"{nonce}\", uri=\"{uri}\", cnonce=\"{cnonce}\", nc={nc}, qop=\"{qop}\", response=\"{resp}\"", user = state.user, realm = state.realm, nonce = state.nonce, uri = uri, cnonce = cnonce, nc = ncvalue, qop = state.qop, resp = response_digest, ); if let Some(opaque) = &state.opaque { // Python only appends `opaque` when the value is truthy; // mirror that by skipping empty strings too. if !opaque.is_empty() { header.push_str(&format!(", opaque=\"{}\"", opaque)); } } if let Some(alg) = &state.algorithm_name { if !alg.is_empty() { header.push_str(&format!(", algorithm=\"{}\"", alg)); } } header } #[cfg(test)] mod tests { use super::*; #[test] fn basic_header_matches_known_vector() { // RFC 7617 §2 example: user `Aladdin`, password `open sesame` // → `QWxhZGRpbjpvcGVuIHNlc2FtZQ==`. assert_eq!( build_basic_auth_header("Aladdin", "open sesame"), "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ==" ); } #[test] fn basic_header_handles_empty_password() { // Some services (webdav guests) do `user:` — the colon is // still present. assert_eq!(build_basic_auth_header("user", ""), "Basic dXNlcjo="); } #[test] fn basic_header_does_not_embed_newlines() { // Regression test for https://bugs.launchpad.net/bzr/+bug/1606203: // Python's base64 module wrapped at 76 chars and embedded // '\n' into long-credential Authorization headers, which // the server rejected as a malformed line. Long creds here // exercise the wrap-trigger path. let user = "user".repeat(10); // 40 chars let password = "password".repeat(5); // 40 chars let hdr = build_basic_auth_header(&user, &password); assert!( !hdr.contains('\n'), "header must not embed newlines: {:?}", hdr ); } #[test] fn parse_digest_challenge_happy_path() { let raw = r#"realm="Example", nonce="abc123", qop="auth", algorithm="MD5", opaque="o"# .to_string() + "\""; let c = parse_digest_challenge(&raw).expect("valid challenge"); assert_eq!(c.realm, "Example"); assert_eq!(c.nonce, "abc123"); assert_eq!(c.qop, "auth"); assert_eq!(c.algorithm, DigestAlgorithm::Md5); assert_eq!(c.algorithm_name.as_deref(), Some("MD5")); assert_eq!(c.opaque.as_deref(), Some("o")); } #[test] fn parse_digest_challenge_rejects_auth_int() { // Python `auth_match` explicitly checks `qop != "auth"` and // returns False for anything else, including `auth-int`. let raw = r#"realm="Example", nonce="abc123", qop="auth-int""#; assert!(parse_digest_challenge(raw).is_none()); } #[test] fn parse_digest_challenge_rejects_unknown_algorithm() { // SHA-256 isn't in our table yet — Python returns False. let raw = r#"realm="Example", nonce="abc123", qop="auth", algorithm="SHA-256""#; assert!(parse_digest_challenge(raw).is_none()); } #[test] fn parse_digest_challenge_defaults_md5() { // Algorithm field is optional; servers that omit it mean MD5. let raw = r#"realm="Example", nonce="abc123", qop="auth""#; let c = parse_digest_challenge(raw).expect("valid challenge"); assert_eq!(c.algorithm, DigestAlgorithm::Md5); assert_eq!(c.algorithm_name, None); } #[test] fn parse_digest_challenge_missing_nonce() { let raw = r#"realm="Example", qop="auth""#; assert!(parse_digest_challenge(raw).is_none()); } #[test] fn digest_header_has_rfc_shape() { // Drive with fixed inputs and check the header contains the // pieces we expect. The `response=` and `cnonce=` values // include a time-dependent cnonce, so we don't assert on the // exact digest — only that the field layout matches what // Python's version emits. let mut state = DigestAuthState { user: "alice".into(), password: "secret".into(), realm: "Example".into(), nonce: "abc123".into(), nonce_count: 0, algorithm: DigestAlgorithm::Md5, algorithm_name: Some("MD5".into()), opaque: Some("opaqueval".into()), qop: "auth".into(), }; let header = build_digest_auth_header(&mut state, "GET", "/path"); assert!(header.starts_with("Digest ")); assert!(header.contains("username=\"alice\"")); assert!(header.contains("realm=\"Example\"")); assert!(header.contains("nonce=\"abc123\"")); assert!(header.contains("uri=\"/path\"")); // Python's ncvalue is zero-padded to 8 hex digits. assert!(header.contains("nc=00000001")); assert!(header.contains("qop=\"auth\"")); assert!(header.contains("response=\"")); assert!(header.contains("opaque=\"opaqueval\"")); assert!(header.contains("algorithm=\"MD5\"")); // nonce_count bumped. assert_eq!(state.nonce_count, 1); } #[test] fn digest_header_increments_nonce_count() { let mut state = DigestAuthState { user: "alice".into(), password: "s".into(), realm: "R".into(), nonce: "n".into(), nonce_count: 5, algorithm: DigestAlgorithm::Md5, algorithm_name: None, opaque: None, qop: "auth".into(), }; let header = build_digest_auth_header(&mut state, "GET", "/"); assert_eq!(state.nonce_count, 6); assert!(header.contains("nc=00000006")); } #[test] fn digest_header_skips_empty_opaque_and_algorithm() { // Matches Python: `if opaque:` skips empty strings. let mut state = DigestAuthState { user: "a".into(), password: "b".into(), realm: "r".into(), nonce: "n".into(), nonce_count: 0, algorithm: DigestAlgorithm::Md5, algorithm_name: Some("".into()), opaque: Some("".into()), qop: "auth".into(), }; let header = build_digest_auth_header(&mut state, "GET", "/"); assert!( !header.contains("opaque"), "empty opaque should be omitted: {}", header ); assert!( !header.contains("algorithm"), "empty algorithm should be omitted: {}", header ); } #[test] fn digest_response_matches_rfc_2617_vector() { // The canonical RFC 2617 §3.5 example, minus `uri=` in the // request-line (we use the absolute path). Given: // user = Mufasa // password = Circle Of Life // realm = testrealm@host.com // nonce = dcd98b7102dd2f0e8b11d0f600bfb0c093 // method = GET // uri = /dir/index.html // qop = auth // nc = 00000001 // cnonce = 0a4f113b // the RFC says the response digest is: // 6629fae49393a05397450978507c4ef1 // // Our cnonce is random, so we can't reuse the vector as-is. // Instead we reconstruct it by computing the digest by hand // with the same inputs our function uses, and then assert // that `build_digest_auth_header` produces a `response=` we // can't easily verify against a published fixture — so skip // the end-to-end response check here and let the per-piece // tests in DigestAlgorithm cover the crypto. The other tests // above cover the *shape* of the header, which is what we // control. // // This keeps the RFC test as a documentation reference. let a1 = "Mufasa:testrealm@host.com:Circle Of Life"; let a2 = "GET:/dir/index.html"; let h_a1 = DigestAlgorithm::Md5.h(a1.as_bytes()); assert_eq!(h_a1, "939e7578ed9e3c518a452acee763bce9"); let h_a2 = DigestAlgorithm::Md5.h(a2.as_bytes()); assert_eq!(h_a2, "39aff3a2bab6126f332b942af96d3366"); } } dromedary-0.1.5/src/http/client.rs000066400000000000000000003124501520150013200170670ustar00rootroot00000000000000//! Rust HTTP client used by the `HttpTransport` port. //! //! Wraps a [`reqwest::blocking::Client`] with dromedary-specific //! defaults: proxy config read from `_proxy` / `no_proxy` //! env vars via our own resolver (keeps breezy's historical //! behaviour), root certificates loaded from a user-supplied bundle //! or the platform's native store, and the User-Agent managed by //! the module-level setter. //! //! # Choice of HTTP library //! //! We started out on `ureq`. That lasted until the WebDAV port — //! `ureq-proto` (pulled in transitively) hard-codes a whitelist of //! HTTP methods in `ext.rs::verify_version` and rejects anything //! outside GET/HEAD/POST/PUT/DELETE/CONNECT/OPTIONS/TRACE/PATCH as //! `MethodVersionMismatch`. WebDAV's MKCOL / MOVE / COPY / PROPFIND //! / PROPPATCH are perfectly valid HTTP/1.1 methods per RFC 7230 but //! ureq-proto won't let them through. Swapping to `reqwest` (which //! sits on hyper and happily forwards any method) fixed that in one //! deliberate step — the swap is contained in this module and //! didn't touch any caller. //! //! # Known limitations //! //! ## Proxy client caching //! //! reqwest bakes the proxy into the `Client` at construction //! (unlike ureq which let us override per-request). We work around //! that by caching a small set of pre-built clients keyed by the //! effective proxy URL; the common cases (no proxy, one proxy) hit //! at most two distinct clients. Tests that flip env vars mid-run //! rebuild a client on demand — there's no connection-pool warmup //! worth protecting at that scale. use std::collections::HashMap; use std::path::Path; use std::sync::Mutex; use std::time::Duration; use http::{Method, Uri}; use reqwest::blocking::{Client, ClientBuilder, Request as ReqwestRequest, Response}; use reqwest::{Certificate, Proxy}; use url::Url; use crate::http::auth::{ build_basic_auth_header, build_digest_auth_header, parse_digest_challenge, DigestAuthState, }; use crate::http::{ evaluate_proxy_bypass, get_proxy_env_var, getproxies_environment, parse_auth_header, ProxyBypass, }; /// Errors surfaced by the Rust HTTP client. /// /// These are translated to Python exceptions at the PyO3 boundary; /// the Python side catches them and re-maps to the existing /// `dromedary.errors` classes so existing callers don't notice. #[derive(Debug)] pub enum ClientError { /// The underlying reqwest call failed (TLS, transport, timeout, …). Transport(reqwest::Error), /// A URL or HTTP method was supplied that we couldn't parse. InvalidRequest(String), /// Error reading or writing the response body. Io(std::io::Error), } impl std::fmt::Display for ClientError { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Self::Transport(e) => write!(f, "HTTP transport error: {}", e), Self::InvalidRequest(s) => write!(f, "invalid request: {}", s), Self::Io(e) => write!(f, "I/O error: {}", e), } } } impl std::error::Error for ClientError {} impl From for ClientError { fn from(e: reqwest::Error) -> Self { Self::Transport(e) } } impl From for ClientError { fn from(e: std::io::Error) -> Self { Self::Io(e) } } pub type Result = std::result::Result; /// Direction of a byte transfer reported via [`ActivityCallback`]. /// /// The two values are the only ones breezy's /// `Transport._report_activity` ever sees. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ActivityDirection { /// Bytes received from the server. Read, /// Bytes sent to the server. Write, } impl ActivityDirection { pub fn as_str(self) -> &'static str { match self { Self::Read => "read", Self::Write => "write", } } } /// Reports byte transfers to the surrounding progress UI. /// /// Stored as an `Arc` so it can be shared between the upload-side /// report (fired once before sending the request body) and the /// download-side reader wrapper that tallies bytes as the /// response streams. Each call receives a chunk size and a /// direction; callbacks should be cheap because they may fire /// thousands of times per large download. pub type ActivityCallback = std::sync::Arc; /// Per-request knobs that callers sometimes need to override. The /// defaults match breezy's urllib-layer behaviour: no redirect /// following, so 3xx responses surface as-is for the caller to /// translate into a `RedirectRequested` if they want. /// /// Deliberately `Clone` + `Copy`-free: the activity callback is /// passed separately to [`HttpClient::request_with`] because it's a /// closure, not a plain config knob. #[derive(Debug, Clone)] pub struct RequestOptions { /// Follow 301/302/303/307/308 redirects automatically. pub follow_redirects: bool, /// Maximum number of redirects to follow before giving up. /// Mirrors the Python `HTTPRedirectHandler.max_redirections` /// default (10). pub max_redirects: u32, /// Maximum number of visits to the same URL in a redirect chain. /// Mirrors the Python `HTTPRedirectHandler.max_repeats` default /// (4). pub max_repeats: u32, } impl Default for RequestOptions { fn default() -> Self { Self { follow_redirects: false, max_redirects: 10, max_repeats: 4, } } } /// HTTP status codes we follow as redirects when /// `follow_redirects=true`. 300 / 304 / 305 / 306 are intentionally /// excluded, matching Python `HTTPRedirectHandler.redirect_request` /// which raises on anything outside (301, 302, 303, 307, 308). fn is_redirect(code: u16) -> bool { matches!(code, 301 | 302 | 303 | 307 | 308) } /// Drive a redirect loop around any single-round-trip function. /// /// Extracted from [`HttpClient::request_with`] so tests can exercise /// the loop without a real network round-trip. The closure is /// invoked once per hop with the target URL; it should return the /// raw response without following any redirects of its own. fn drive_redirects( options: &RequestOptions, url: &str, mut send: impl FnMut(&str) -> Result, ) -> Result { let mut visited: std::collections::HashMap = std::collections::HashMap::new(); let mut current_url = url.to_string(); let mut redirects = 0u32; loop { let resp = send(¤t_url)?; if !options.follow_redirects || !is_redirect(resp.status) { // Non-redirect, or caller opted out: if this *is* a 3xx // and we have a Location, expose it as `redirected_to` // so the transport layer can raise RedirectRequested // without re-parsing the headers. if is_redirect(resp.status) { if let Some(target) = redirect_target(&resp, ¤t_url) { return Ok(HttpResponse { redirected_to: Some(target), ..resp }); } } return Ok(resp); } // Pick the redirect target from Location / URI; if neither // is present the Python impl silently returns the response, // so we do too. let Some(newurl) = redirect_target(&resp, ¤t_url) else { return Ok(resp); }; redirects += 1; let visits = visited.entry(newurl.clone()).or_insert(0); *visits += 1; if *visits > options.max_repeats || redirects > options.max_redirects { return Err(ClientError::InvalidRequest(format!( "too many redirects (at {} after {} hops)", newurl, redirects ))); } current_url = newurl; // Carry headers and body forward unchanged; the Python // HTTPRedirectHandler.redirect_request does the same. } } /// Key under which [`AuthCache`] stores successful auth state. /// /// Scheme matters (http vs https shouldn't share auth even on the /// same host); port is normalised to the scheme default so a bare /// `http://host/` and `http://host:80/` hit the same cache entry. fn auth_cache_key(uri: &Uri) -> AuthCacheKey { let scheme = uri.scheme_str().unwrap_or("http").to_ascii_lowercase(); let host = uri.host().unwrap_or_default().to_ascii_lowercase(); let port = uri .port_u16() .unwrap_or_else(|| if scheme == "https" { 443 } else { 80 }); (scheme, host, port) } /// Remove the `user[:password]@` chunk between scheme and host, /// leaving everything else untouched. Used to hand reqwest a /// proxy URL without auth so reqwest doesn't auto-attach /// Proxy-Authorization — we want our own 407 retry path to /// negotiate that, matching urllib's behaviour. fn strip_userinfo(url: &str) -> String { let Some(scheme_end) = url.find("://") else { return url.to_string(); }; let body_start = scheme_end + 3; let rest = &url[body_start..]; // Authority ends at the first path/query/fragment marker. let auth_end = rest.find(['/', '?', '#']).unwrap_or(rest.len()); let authority = &rest[..auth_end]; let Some((_, host)) = authority.split_once('@') else { return url.to_string(); }; format!("{}{}{}", &url[..body_start], host, &rest[auth_end..]) } /// Extract the userinfo segment from a URL string without parsing /// the URL — that way we preserve the distinction between /// `http://joe:@host/` (empty password) and `http://joe@host/` /// (password absent) that both `http::Uri` and `url::Url` /// normalise away. Returns the substring between `://` and `@` if /// there is one, else `None`. fn extract_userinfo_raw(url: &str) -> Option { let scheme_end = url.find("://")?; let rest = &url[scheme_end + 3..]; // Authority ends at the first path/query/fragment char. let auth_end = rest.find(['/', '?', '#']).unwrap_or(rest.len()); let authority = &rest[..auth_end]; let (userinfo, _) = authority.split_once('@')?; Some(userinfo.to_string()) } /// Break a proxy URL down into the parts the credential-lookup path /// needs. Returns `(scheme, host, port, embedded_creds, user_hint)`. /// /// When the proxy URL carries a complete userinfo section with /// password (`http://joe:pw@proxy/` or the empty-password form /// `http://joe:@proxy/`) we lift those out so the 407 retry can /// send them as Proxy-Authorization without consulting the /// CredentialProvider — matching urllib's behaviour where an /// embedded user/password on the proxy URL was authoritative. /// /// When the URL carries a user but no `:` separator /// (`http://joe@proxy/`), that's a hint: we still need the /// CredentialProvider to supply the password, but we want it to /// skip its own user lookup/prompt and use `joe` as the default. fn proxy_connection_parts( proxy_url: &str, ) -> ( String, String, Option, Option<(String, String)>, Option, ) { let parsed = match Url::parse(proxy_url) { Ok(u) => u, Err(_) => return (String::new(), String::new(), None, None, None), }; let scheme = parsed.scheme().to_string(); let host = parsed.host_str().unwrap_or("").to_string(); let port = parsed.port(); // url::Url strips the empty-password form (`joe:@proxy/` parses // as `password=None`, indistinguishable from `joe@proxy/`). Parse // the userinfo ourselves to keep the two shapes apart, matching // what the origin-side `extract_userinfo_raw` + `split_userinfo` // flow does — `joe:@` is authoritative empty-password creds, // `joe@` is a hint requiring a password lookup. let (embedded, user_hint) = extract_userinfo_raw(proxy_url) .map(|raw| split_userinfo(&raw)) .unwrap_or((None, None)); (scheme, host, port, embedded, user_hint) } /// Split a raw userinfo segment into either authoritative credentials /// (`user:pass`) or a user hint (`user`-only). /// /// Returns `(Some((user, password)), None)` when both halves are /// present (`joe:@host` is authoritative empty-password creds); /// `(None, Some(user))` when only the user is present (a hint to the /// CredentialProvider); `(None, None)` if the user half can't be /// decoded. The empty-vs-missing-password distinction is preserved /// because `Url::parse` normalises `joe:@host` and `joe@host` to the /// same shape — callers who care extract the raw userinfo first via /// `extract_userinfo_raw`. fn split_userinfo(userinfo: &str) -> (Option<(String, String)>, Option) { match userinfo.split_once(':') { Some((u, p)) => { let user = percent_encoding::percent_decode_str(u) .decode_utf8() .ok() .map(|s| s.into_owned()); let password = percent_encoding::percent_decode_str(p) .decode_utf8() .ok() .map(|s| s.into_owned()) .unwrap_or_default(); match user { Some(u) => (Some((u, password)), None), None => (None, None), } } None => { let user = percent_encoding::percent_decode_str(userinfo) .decode_utf8() .ok() .map(|s| s.into_owned()); (None, user) } } } /// Key under which proxy auth is cached. Proxy credentials bind to /// the proxy URL, not the origin, so we key on the proxy's own URL. /// reqwest's `Proxy` type doesn't expose the URL back for inspection, /// so we track the raw string we built the proxy from alongside it. fn proxy_cache_key(proxy_url: &str) -> AuthCacheKey { let parsed = Url::parse(proxy_url).ok(); let host = parsed .as_ref() .and_then(|u| u.host_str().map(str::to_ascii_lowercase)) .unwrap_or_default(); let port = parsed .as_ref() .and_then(|u| u.port_or_known_default()) .unwrap_or(80); ("proxy".to_string(), host, port) } /// Estimate the number of bytes the request will take on the /// wire, matching what breezy's socket-level byte accounting in /// `PredefinedRequestHandler` adds up: request line + header bytes /// (including `\r\n` after each) + blank line + body. /// /// We replay the headers reqwest/hyper will inject — our caller- /// supplied ones plus the fixed-by-config `accept: */*`, /// `user-agent: `, `accept-encoding: `, `host: `, /// and (for bodied requests) `content-length`. That keeps /// byte-exact parity with the socket without re-plumbing reqwest /// through a custom connector. fn estimate_request_wire_size( method: &Method, url: &str, headers: &[(String, String)], body: &[u8], user_agent: Option<&str>, ) -> usize { // Request-line: "METHOD /path?query HTTP/1.1\r\n". Use // url::Url so we handle default/missing paths the same way // reqwest does. let parsed = match Url::parse(url) { Ok(u) => u, Err(_) => return 0, }; let path = { let mut p = parsed.path().to_string(); if p.is_empty() { p.push('/'); } if let Some(q) = parsed.query() { p.push('?'); p.push_str(q); } p }; let mut size = method.as_str().len() + 1 + path.len() + 1 + "HTTP/1.1\r\n".len(); // reqwest's default-headers set: accept, user-agent. hyper-util // adds host; reqwest's decoder feature list drives // accept-encoding. Replay what we're compiled with so the // count matches the wire. let host = parsed.host_str().unwrap_or(""); let host_header = match parsed.port() { Some(p) => format!("{}:{}", host, p), None => host.to_string(), }; let ua = user_agent.unwrap_or(DEFAULT_USER_AGENT_FALLBACK); // The default accept-encoding is just "gzip" — the `gzip` // feature is on by default; `brotli`/`zstd`/`deflate` are off // in our Cargo.toml, so only gzip is advertised. let defaults: [(&str, &str); 4] = [ ("accept", "*/*"), ("user-agent", ua), ("accept-encoding", "gzip"), ("host", host_header.as_str()), ]; for (k, v) in defaults.iter() { size += k.len() + 2 + v.len() + 2; // "name: value\r\n" } // Caller-supplied headers. reqwest lowercases header names on // the wire, so count lowercase lengths — identical to the // caller's length anyway since header names are ASCII. for (k, v) in headers { size += k.len() + 2 + v.len() + 2; } // Content-Length is added by hyper for any bodied request, and // we force a (possibly-empty) body for PUT/POST/PATCH so those // methods always carry the header. let expects_body = matches!(method, &Method::PUT | &Method::POST | &Method::PATCH); if !body.is_empty() || expects_body { // "content-length: N\r\n" let digits = body.len().checked_ilog10().unwrap_or(0) as usize + 1; size += "content-length: ".len() + digits + 2; } size += 2; // blank line separating headers from body. size += body.len(); size } /// Fallback user-agent used by `estimate_request_wire_size` when /// the config didn't supply one. Mirrors what `build_client` /// installs via `default_user_agent`. const DEFAULT_USER_AGENT_FALLBACK: &str = "Dromedary/0.1.0"; /// Walk a `reqwest::Error`'s source chain looking for a /// `hyper::Error` whose `is_incomplete_message()` is true — /// hyper's typed signal for "connection closed before the response /// was fully parsed." Used to distinguish "request sent, response /// corrupt" from "failed to even send the request." fn is_incomplete_message(err: &reqwest::Error) -> bool { let mut source: Option<&(dyn std::error::Error + 'static)> = std::error::Error::source(err); while let Some(cause) = source { if let Some(hyper_err) = cause.downcast_ref::() { return hyper_err.is_incomplete_message(); } source = cause.source(); } false } /// Estimate the status-line + header block length of the response /// as the server wrote it on the wire: "HTTP/1.1 NNN reason\r\n" /// + each header line + blank line. Mirrors the server-side count /// `PredefinedRequestHandler` uses for `bytes_written` (which is /// just `len(canned_response)`). fn estimate_response_header_size( _status: u16, reason: &str, headers: &[(String, String)], ) -> usize { // "HTTP/1.1 NNN Reason\r\n" let mut size = "HTTP/1.1 ".len() + 3 + 1 + reason.len() + 2; for (k, v) in headers { size += k.len() + 2 + v.len() + 2; } size += 2; // blank line size } /// Build an `Authorization:` header value from the cached state. /// Mutates digest state in place so `nonce_count` bumps correctly. fn cached_auth_header(cached: &CachedAuth, method: &Method, uri: &Uri) -> Option { match cached { CachedAuth::Basic { user, password } => Some(build_basic_auth_header(user, password)), CachedAuth::Digest(state) => { // `build_digest_auth_header` mutates nonce_count. This // function only reads the cached state; the caller owns // the mutation and re-stores after. We work on a local // clone — `&CachedAuth` doesn't give us a write path. let mut s = state.clone(); Some(build_digest_auth_header( &mut s, method.as_str(), uri.path(), )) } CachedAuth::Negotiate { token } => Some(format!("Negotiate {}", token)), } } /// Pull the `realm` value out of a Basic-auth challenge remainder. /// The challenge looks like `realm="Secure Area"`; we match the /// outermost quoted string after `realm=`. Returns `None` if /// `realm` is missing — callers then pass `None` into the /// credential lookup. fn extract_basic_realm(raw: &str) -> Option<&str> { let after = raw.split("realm=").nth(1)?; let trimmed = after.trim_start(); if let Some(inner) = trimmed.strip_prefix('"') { // Stop at the next unescaped `"`. The Basic-auth grammar // doesn't allow backslash-quote inside a quoted string // (RFC 7617 uses token68 for the credentials, and the // challenge parameters follow RFC 7235's auth-param rules). inner.find('"').map(|end| &inner[..end]) } else { // Unquoted token — read up to whitespace or comma. let end = trimmed .find(|c: char| c.is_ascii_whitespace() || c == ',') .unwrap_or(trimmed.len()); Some(&trimmed[..end]) } } /// Resolve the redirect target for a 3xx response: prefer /// `Location:`, fall back to `URI:`. Matches the Python handler /// which also accepts the antiquated `URI` header. Returns `None` /// if neither header is present or the value fails to parse as a /// URL even after joining with the request URL. fn redirect_target(resp: &HttpResponse, current_url: &str) -> Option { let raw = resp.header("location").or_else(|| resp.header("uri"))?; // Use the `url` crate to resolve relative redirect URLs. This // matches Python's `urllib.parse.urljoin`: absolute URLs // override, relative ones are joined to the current document. let base = Url::parse(current_url).ok()?; base.join(raw).ok().map(|u| u.to_string()) } /// Source of username/password pairs for HTTP authentication. /// /// Implementations bridge the Rust client to whatever credential /// store the caller uses — for dromedary that's the Python callback /// registered via `set_credential_lookup`, but tests can supply a /// trivial in-memory impl. pub trait CredentialProvider: Send + Sync { /// Return `(user, password)` for the given `(protocol, host, /// port, realm)` if known. `None` for either field means "no /// match"; the caller decides whether to prompt interactively. /// /// `user_hint` carries a username the caller already knows /// (typically the userinfo embedded in the request URL: /// `http://joe@host/`). Providers that support prompting use /// this as the default username so the user isn't asked "who /// are you?" when the URL already tells us. /// /// `is_proxy` is true when the credentials are for a proxy /// (407 response) rather than the origin server (401). Providers /// that prompt interactively use it to label the prompt ("Proxy /// HTTP …" vs "HTTP …"). fn lookup( &self, protocol: &str, host: &str, port: Option, realm: Option<&str>, user_hint: Option<&str>, is_proxy: bool, ) -> (Option, Option); } /// A [`CredentialProvider`] that always returns `(None, None)`. /// Useful as the default when nothing's registered. pub struct NoCredentialProvider; impl CredentialProvider for NoCredentialProvider { fn lookup( &self, _protocol: &str, _host: &str, _port: Option, _realm: Option<&str>, _user_hint: Option<&str>, _is_proxy: bool, ) -> (Option, Option) { (None, None) } } /// Source of an HTTP Negotiate / Kerberos initial token. /// /// The token is what goes after `Negotiate ` in the Authorization /// header. Typically produced by a GSSAPI client library /// (`kerberos.authGSSClient*` on Python); dromedary ships a Python /// callback hook so the actual GSSAPI integration lives in the /// caller rather than being a hard Rust dependency. pub trait NegotiateProvider: Send + Sync { /// Return the initial token for `HTTP@`. `None` means /// Negotiate isn't available (no credentials, no ticket, or /// library missing); the caller falls back to Digest/Basic. fn initial_token(&self, host: &str) -> Option; } /// A [`NegotiateProvider`] that always returns `None`. The default /// when no callback is registered. pub struct NoNegotiateProvider; impl NegotiateProvider for NoNegotiateProvider { fn initial_token(&self, _host: &str) -> Option { None } } /// Source of preemptive bearer-style auth tokens. /// /// Unlike Basic/Digest, no server challenge is required: the lookup /// runs before the request goes on the wire and, if a token is /// configured for the location, the client attaches /// `Authorization: ` (default scheme `Bearer`). /// Caller-supplied `Authorization` headers always win. pub trait TokenProvider: Send + Sync { /// Return `(scheme, token)` for the request URL, or `None` /// when no token is configured. `scheme` is the Authorization /// prefix (e.g. `"Bearer"`, `"token"`). fn lookup( &self, protocol: &str, host: &str, port: Option, path: Option<&str>, ) -> Option<(String, String)>; } /// A [`TokenProvider`] that never returns a token. The default when /// no callback is registered. pub struct NoTokenProvider; impl TokenProvider for NoTokenProvider { fn lookup( &self, _protocol: &str, _host: &str, _port: Option, _path: Option<&str>, ) -> Option<(String, String)> { None } } /// Direction of an auth challenge: origin (401 → WWW-Authenticate /// → Authorization) vs proxy (407 → Proxy-Authenticate → /// Proxy-Authorization). The logic is identical other than the /// cache, header names, and which URL we key credentials on — this /// enum lets the shared retry path tell them apart without two /// copies of the code. #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum AuthKind { Origin, Proxy, } /// Cached per-origin authentication state. Once the server accepts /// our credentials we preemptively attach the same auth header to /// subsequent requests to the same host+port, matching urllib's /// `auth_params_reusable` behaviour. #[derive(Debug, Clone)] enum CachedAuth { /// Basic auth: we cache the header value directly since it's /// cheap and stateless. Basic { user: String, password: String, }, Digest(DigestAuthState), /// Negotiate auth. Kerberos tokens are single-use per server /// challenge — we cache the token only for the immediate retry, /// not for future requests (see the Python /// `NegotiateAuthHandler.auth_params_reusable` comment). Negotiate { token: String, }, } /// Per-host auth cache key: `(scheme_lower, host, port_or_default)`. /// Using a scheme-aware key prevents http/https from sharing auth. type AuthCacheKey = (String, String, u16); /// Thread-safe per-origin auth state. Lookups are read-mostly after /// the first successful exchange, so a Mutex is fine — lock contention /// is not a realistic concern for a single HTTP client. #[derive(Default)] pub struct AuthCache { entries: Mutex>, } impl AuthCache { pub fn new() -> Self { Self::default() } fn get(&self, key: &AuthCacheKey) -> Option { self.entries.lock().unwrap().get(key).cloned() } fn put(&self, key: AuthCacheKey, auth: CachedAuth) { self.entries.lock().unwrap().insert(key, auth); } } /// Options for building an [`HttpClient`]. #[derive(Default)] pub struct HttpClientConfig { /// Optional path to a PEM CA bundle. `None` means "use the /// platform native store via rustls-native-certs". pub ca_certs_path: Option, /// If true, skip certificate verification entirely. Matches /// Python's `ssl.CERT_NONE` behaviour — used only when the user /// explicitly opts out via `ssl.cert_reqs=none`. pub disable_verification: bool, /// User-Agent value. If `None`, the agent will use whatever the /// global default returns from [`crate::http::default_user_agent`]. pub user_agent: Option, /// Read timeout for each response. `None` means "no timeout". pub read_timeout: Option, } /// HTTP client wrapper around [`reqwest::blocking::Client`]. /// /// Proxies are resolved per-request from the current environment, /// matching the Python urllib behaviour where `ProxyHandler` reads /// env vars at construction and every redirect cycle. reqwest bakes /// proxy config into the Client so we maintain a small cache of /// pre-built clients keyed by the effective proxy URL; requests /// pick the matching client at dispatch time. Env-var changes take /// effect immediately — a new proxy URL forces a new client build. pub struct HttpClient { /// Holds the inputs needed to rebuild a client for a specific /// proxy URL. Without this we'd have to re-parse the TLS bundle /// and re-resolve the user agent on every proxy switch; cheap /// but wasteful. config: HttpClientConfig, /// Cache of built clients keyed by proxy URL (`""` means "no /// proxy"). Populated lazily as callers hit new proxies. clients: Mutex>, /// Per-origin cache of successful auth state. Populated after /// the server accepts our credentials; subsequent requests to /// the same host preemptively attach the cached header. auth_cache: AuthCache, /// Per-proxy cache of successful auth state. Separate from /// `auth_cache` because proxy credentials are bound to the /// proxy URL rather than the origin and shouldn't leak across /// origins that share a proxy. proxy_auth_cache: AuthCache, /// How we look up `(user, password)` when a challenge arrives. credentials: Box, /// Source of Negotiate (Kerberos) initial tokens. Defaults to a /// no-op provider; the PyO3 layer swaps in a callback that /// delegates to Python's `kerberos` module. negotiate: Box, /// Source of preemptive bearer-style auth tokens. Defaults to a /// no-op provider; breezy installs one that consults /// `authentication.conf`. tokens: Box, /// Optional tracing hook invoked just before a request carrying /// an Authorization or Proxy-Authorization header goes on the /// wire. Breezy uses this to emit a "> Authorization: " /// debug line when the `http` debug flag is enabled. `None` /// means "no tracing" — the default. auth_trace: Option, } /// Callback type for the auth-header trace hook. See /// [`HttpClient::set_auth_trace`] / the `auth_trace` field above. pub type AuthTraceCallback = std::sync::Arc; impl HttpClient { /// Build a new client honouring the given config. pub fn new(config: HttpClientConfig) -> Result { Self::with_providers( config, Box::new(NoCredentialProvider), Box::new(NoNegotiateProvider), ) } /// Build a new client with a custom credential provider. pub fn with_credentials( config: HttpClientConfig, credentials: Box, ) -> Result { Self::with_providers(config, credentials, Box::new(NoNegotiateProvider)) } /// Build a new client with custom credential and negotiate /// providers, and the default no-op token provider. pub fn with_providers( config: HttpClientConfig, credentials: Box, negotiate: Box, ) -> Result { Self::with_full_providers(config, credentials, negotiate, Box::new(NoTokenProvider)) } /// Build a new client with all three provider types. The /// general-purpose constructor — the simpler helpers delegate /// here. pub fn with_full_providers( config: HttpClientConfig, credentials: Box, negotiate: Box, tokens: Box, ) -> Result { // Eagerly build the no-proxy client so construction fails // loudly on a bad config (missing CA bundle, etc.) rather // than at first request. let mut clients = HashMap::new(); let initial = build_client(&config, None)?; clients.insert(String::new(), initial); Ok(Self { config, clients: Mutex::new(clients), auth_cache: AuthCache::new(), proxy_auth_cache: AuthCache::new(), credentials, negotiate, tokens, auth_trace: None, }) } /// Install (or replace) the auth-header trace hook. Pass `None` /// to disable tracing. Thread-safe to call before sharing the /// client via `Arc`; callers that want to install it later must /// wrap the whole client behind their own mutability. pub fn set_auth_trace(&mut self, cb: Option) { self.auth_trace = cb; } /// Return (cloning if necessary) the client for a given proxy /// URL. The empty string is the "no proxy" key. Keyed on the /// userinfo-stripped URL so two transports that differ only in /// their embedded proxy creds share a client (and its pool). fn client_for_proxy(&self, proxy_url: &str) -> Result { let cache_key = if proxy_url.is_empty() { String::new() } else { strip_userinfo(proxy_url) }; { let cache = self.clients.lock().unwrap(); if let Some(c) = cache.get(&cache_key) { return Ok(c.clone()); } } let proxy = if proxy_url.is_empty() { None } else { // Strip any embedded userinfo before handing the URL to // reqwest — otherwise reqwest auto-attaches a // Proxy-Authorization header on every request, and // breezy's tests count exactly one 407 challenge per // exchange to verify the auth dance happened. We do the // 407 retry ourselves (see `pick_auth_scheme_for` for // AuthKind::Proxy), which extracts the same creds via // `proxy_connection_parts` so the end-to-end behaviour // matches urllib's "challenge-then-retry" shape. Some(Proxy::all(&cache_key).map_err(|e| { ClientError::InvalidRequest(format!("bad proxy URL {}: {}", cache_key, e)) })?) }; let client = build_client(&self.config, proxy)?; self.clients .lock() .unwrap() .insert(cache_key, client.clone()); Ok(client) } /// Perform an HTTP request with default options (no redirect /// following, no activity reporting). Convenience wrapper over /// [`Self::request_with`]. pub fn request( &self, method: &str, url: &str, headers: &[(String, String)], body: &[u8], ) -> Result { self.request_with(method, url, headers, body, &RequestOptions::default(), None) } /// Perform an HTTP request and optionally follow redirects. /// /// The redirect loop matches breezy's /// `HTTPRedirectHandler.http_error_302` semantics: 301, 302, /// 303, 307, and 308 are followed; the request method and /// headers are carried unchanged (the Python version doesn't /// rewrite POST→GET on 303 either); `Location:` wins over /// `URI:` if both appear. Relative redirect URLs are resolved /// against the request URL. /// /// When `follow_redirects` is false, the first 3xx response is /// returned with `redirected_to` set to the target URL so the /// caller can decide what to do (typically raise /// `RedirectRequested` from the transport layer). pub fn request_with( &self, method: &str, url: &str, headers: &[(String, String)], body: &[u8], options: &RequestOptions, activity: Option<&ActivityCallback>, ) -> Result { self.request_with_origin_url(method, url, url, headers, body, options, activity) } /// Like [`Self::request_with`] but carries an `origin_url` that /// may differ from `url` — specifically, one that still has the /// userinfo section the transport strips before putting a URL /// on the wire. When the server challenges with 401, the auth /// machinery uses `origin_url` to extract embedded credentials /// without having to re-register them through a separate /// `CredentialProvider`. pub fn request_with_origin_url( &self, method: &str, url: &str, origin_url: &str, headers: &[(String, String)], body: &[u8], options: &RequestOptions, activity: Option<&ActivityCallback>, ) -> Result { let method = Method::from_bytes(method.as_bytes()) .map_err(|_| ClientError::InvalidRequest(format!("bad method: {}", method)))?; // Each redirect hop does its own auth dance — a 3xx to a // different host mustn't reuse the previous host's cached // auth, and handling that here keeps the redirect-loop code // from having to know anything about auth. drive_redirects(options, url, |target| { self.send_with_auth(&method, target, origin_url, headers, body, activity) }) } /// Send a request, transparently handling a single 401 (origin /// auth) or 407 (proxy auth) challenge. Returns the final /// response — may still be 401/407 if we ran out of credentials /// or the server rejected what we offered. fn send_with_auth( &self, method: &Method, url: &str, origin_url: &str, headers: &[(String, String)], body: &[u8], activity: Option<&ActivityCallback>, ) -> Result { let uri: Uri = url .parse() .map_err(|_| ClientError::InvalidRequest(format!("bad URL: {}", url)))?; // String-wise extract the userinfo from the original URL so // we preserve the distinction between "empty password" // (`joe:@host/`) and "password absent" (`joe@host/`). Both // `http::Uri` and `url::Url` normalise the former to the // latter, which would lose test_empty_pass's signal. let origin_userinfo = extract_userinfo_raw(origin_url); // Resolve the proxy ahead of time so both the preemptive // header attach and the 407 retry can use the same key. let proxy_url = self.choose_proxy(&uri)?; let origin_key = auth_cache_key(&uri); let proxy_key = if proxy_url.is_empty() { None } else { Some(proxy_cache_key(&proxy_url)) }; // Preemptively attach cached auth headers. Callers' explicit // headers take precedence — don't clobber. let has_explicit_origin_auth = headers .iter() .any(|(k, _)| k.eq_ignore_ascii_case("authorization")); let has_explicit_proxy_auth = headers .iter() .any(|(k, _)| k.eq_ignore_ascii_case("proxy-authorization")); let mut first_headers: Vec<(String, String)> = headers.to_vec(); if !has_explicit_origin_auth { if let Some(hdr) = self.attach_cached(&self.auth_cache, &origin_key, method, &uri) { first_headers.push(("Authorization".into(), hdr)); } else if let Some((scheme, token)) = self.tokens.lookup( uri.scheme_str().unwrap_or(""), uri.host().unwrap_or(""), uri.port_u16(), Some(uri.path()), ) { // Preemptive bearer-style auth: no challenge required. // Attached only when no cached Basic/Digest entry // applies, so a successful 401 dance still wins for // hosts that mix token and password auth. first_headers.push(("Authorization".into(), format!("{} {}", scheme, token))); } } if !has_explicit_proxy_auth { // Attach previously-cached proxy auth (after a successful // 407 challenge cycle) but don't preemptively send creds // extracted from the proxy URL — breezy's tests count // exactly one 407 per request to verify the auth dance // happened. Embedded proxy creds are picked up on the // 407 retry path via `proxy_connection_parts` in // `pick_auth_scheme_for`. if let Some(key) = &proxy_key { if let Some(hdr) = self.attach_cached(&self.proxy_auth_cache, key, method, &uri) { first_headers.push(("Proxy-Authorization".into(), hdr)); } } } let mut response = self.send_once(method, url, &first_headers, body, activity, &proxy_url)?; if response.status != 401 && response.status != 407 { return Ok(response); } // Decide which direction to retry. 407 wins over 401 if both // somehow happened (the server shouldn't send both — they're // separate turn-around points — but guard anyway). if response.status == 407 { if has_explicit_proxy_auth { return Ok(response); } let Some(proxy_key) = proxy_key else { // 407 without a configured proxy is a server bug — // we have no credentials to look up. Surface as-is. return Ok(response); }; // Collect header values into owned Strings before we // pass `&mut response` to retry_with_auth: the slice // borrow would otherwise overlap with the mutable one. let challenges: Vec = response .headers_all("proxy-authenticate") .into_iter() .map(str::to_string) .collect(); return self.retry_with_auth( &challenges, method, url, &uri, None, // proxy userinfo handled via proxy_url, not origin_url headers, body, activity, &proxy_url, &mut response, &self.proxy_auth_cache, &proxy_key, "Proxy-Authorization", AuthKind::Proxy, ); } // 401 — origin auth. if has_explicit_origin_auth { return Ok(response); } let challenges: Vec = response .headers_all("www-authenticate") .into_iter() .map(str::to_string) .collect(); self.retry_with_auth( &challenges, method, url, &uri, origin_userinfo.as_deref(), headers, body, activity, &proxy_url, &mut response, &self.auth_cache, &origin_key, "Authorization", AuthKind::Origin, ) } /// Look up cached auth state for the given key and build its /// header value, if any. Returns `None` when no entry exists. /// Re-stores the entry with the (possibly-mutated) Digest state /// so `nonce_count` bumps persist. fn attach_cached( &self, cache: &AuthCache, key: &AuthCacheKey, method: &Method, uri: &Uri, ) -> Option { let cached = cache.get(key)?; let hdr = cached_auth_header(&cached, method, uri)?; cache.put(key.clone(), cached); Some(hdr) } /// Shared retry machinery for 401 and 407. `cache` is where we /// store the successful state (origin or proxy); `cache_key` is /// the lookup key; `header_name` is `Authorization` or /// `Proxy-Authorization` depending on direction. #[allow(clippy::too_many_arguments)] fn retry_with_auth( &self, challenges: &[String], method: &Method, url: &str, uri: &Uri, origin_userinfo: Option<&str>, headers: &[(String, String)], body: &[u8], activity: Option<&ActivityCallback>, proxy_url: &str, first_response: &mut HttpResponse, cache: &AuthCache, cache_key: &AuthCacheKey, header_name: &'static str, kind: AuthKind, ) -> Result { let refs: Vec<&str> = challenges.iter().map(String::as_str).collect(); let Some((_scheme, new_auth)) = self.pick_auth_scheme_for(&refs, uri, origin_userinfo, kind, proxy_url) else { // No scheme we can handle, or no credentials for the // ones on offer. Hand the 401/407 back to the caller. return Ok(HttpResponse { body: std::mem::replace( &mut first_response.body, BodyState::Buffered(std::io::Cursor::new(Vec::new())), ), status: first_response.status, reason: std::mem::take(&mut first_response.reason), headers: std::mem::take(&mut first_response.headers), final_url: std::mem::take(&mut first_response.final_url), redirected_to: first_response.redirected_to.take(), }); }; // Return the first response's connection to the pool before // the retry by draining any unread body. first_response.discard_body().ok(); // Build the retry header. For Digest we persist the bumped // `nonce_count` regardless of retry outcome — the server has // seen that count and won't accept it again. let mut retry_headers = headers.to_vec(); let hdr = match &new_auth { CachedAuth::Basic { user, password } => build_basic_auth_header(user, password), CachedAuth::Digest(state) => { let mut s = state.clone(); let h = build_digest_auth_header(&mut s, method.as_str(), uri.path()); cache.put(cache_key.clone(), CachedAuth::Digest(s)); h } CachedAuth::Negotiate { token } => format!("Negotiate {}", token), }; retry_headers.push((header_name.into(), hdr)); let retry = self.send_once(method, url, &retry_headers, body, activity, proxy_url)?; if retry.status < 400 { match &new_auth { CachedAuth::Basic { .. } => { cache.put(cache_key.clone(), new_auth); } CachedAuth::Digest(_) => { // Already cached above. } CachedAuth::Negotiate { .. } => { // Kerberos tokens are single-use per challenge. // Don't cache; the next 401/407 will request a // fresh token from the provider. } } } Ok(retry) } /// Convenience wrapper used by tests that want to pick an /// origin-auth scheme from a set of WWW-Authenticate /// challenges. See [`pick_auth_scheme_for`] for the real /// implementation — production code goes through that directly /// so it can distinguish origin vs proxy direction. #[cfg(test)] fn pick_auth_scheme( &self, uri: &Uri, challenges: &[&str], _method: &Method, ) -> Option<(&'static str, CachedAuth)> { self.pick_auth_scheme_for(challenges, uri, None, AuthKind::Origin, "") } /// Given the challenges a server sent, pick the scheme we'll /// try and materialise it into a `CachedAuth` suitable for /// header generation. Returns `None` when no scheme matches or /// credentials weren't available. /// /// Scheme preference follows the handler_order values from the /// old urllib code: NegotiateAuthHandler (480) > Digest (490) > /// Basic (500). Lower-ordered handlers mean "prefer this". /// /// `kind` drives which host to look up credentials for: Origin /// asks for `uri.host()`, Proxy consults the environment's /// proxy URL extracted from `choose_proxy` (expected to already /// be known via the caller's context — we just don't have it /// here, so proxy credential lookups currently use the *origin* /// host too. That matches breezy's behaviour: the /// credential-store keys proxy auth on the proxy URL, but /// dromedary's CredentialProvider signature doesn't carry enough /// context to distinguish the two cases, and breezy itself /// handles that resolution internally.) fn pick_auth_scheme_for( &self, challenges: &[&str], uri: &Uri, origin_userinfo: Option<&str>, kind: AuthKind, proxy_url: &str, ) -> Option<(&'static str, CachedAuth)> { let mut negotiate_seen = false; let mut digest_remainder: Option<&str> = None; let mut basic_remainder: Option<&str> = None; for ch in challenges { let (scheme, remainder) = parse_auth_header(ch); let remainder = remainder.unwrap_or(""); match scheme.as_str() { "negotiate" => negotiate_seen = true, "digest" if digest_remainder.is_none() => { digest_remainder = Some(remainder); } "basic" if basic_remainder.is_none() => { basic_remainder = Some(remainder); } _ => {} } } // Origin auth looks up creds for the request URL's host/port. // Proxy auth looks them up against the *proxy's* host/port — // the credential store is keyed on where the creds will be // sent, not where the request is ultimately going. For // either side, when the URL embeds a userinfo section (the // classic `http://joe:pw@host/` or // `all_proxy=http://joe:pw@proxy/` shapes), we prefer those // over the CredentialProvider so callers don't have to wire // a separate store up for every test scenario. `split_userinfo` // (module scope) does the user/password extraction; the // empty-vs-absent-password distinction (`joe:@` authoritative // vs `joe@` hint) is the reason we route through the raw // userinfo string rather than `Url::parse`. let (protocol, host, port, embedded, user_hint) = match kind { AuthKind::Origin => { // Prefer the raw userinfo string supplied by the // caller if any — it still has the empty-vs-absent // distinction that URI parsers normalise away. // Fall back to the URI's authority otherwise. let user_raw = origin_userinfo.or_else(|| { uri.authority() .map(http::uri::Authority::as_str) .and_then(|a| a.split_once('@').map(|(userinfo, _)| userinfo)) }); let (embedded, user_hint) = user_raw.map(split_userinfo).unwrap_or((None, None)); ( uri.scheme_str().unwrap_or("http").to_string(), uri.host().unwrap_or_default().to_string(), uri.port_u16(), embedded, user_hint, ) } AuthKind::Proxy => proxy_connection_parts(proxy_url), }; let host_str = host.as_str(); let protocol_str = protocol.as_str(); if negotiate_seen { if let Some(token) = self.negotiate.initial_token(host_str) { return Some(("negotiate", CachedAuth::Negotiate { token })); } // Fall through to Digest / Basic when the provider says // it can't produce a token for this host (no Kerberos // ticket, library absent, etc.). } if let Some(raw) = digest_remainder { if let Some(challenge) = parse_digest_challenge(raw) { let (user, password) = match &embedded { Some((u, p)) => (Some(u.clone()), Some(p.clone())), None => self.credentials.lookup( protocol_str, host_str, port, Some(&challenge.realm), user_hint.as_deref(), kind == AuthKind::Proxy, ), }; let (Some(user), Some(password)) = (user, password) else { return None; }; let state = DigestAuthState { user, password, realm: challenge.realm, nonce: challenge.nonce, nonce_count: 0, algorithm: challenge.algorithm, algorithm_name: challenge.algorithm_name, opaque: challenge.opaque, qop: challenge.qop, }; return Some(("digest", CachedAuth::Digest(state))); } } if basic_remainder.is_some() { // Basic auth realm is opaque to us (we could parse it // for the lookup key, but the Python version didn't // treat it as load-bearing). Pass None for realm so the // credential lookup falls back to URL-based matching. let realm = basic_remainder .and_then(extract_basic_realm) .map(|r| r.to_string()); let (user, password) = match &embedded { Some((u, p)) => (Some(u.clone()), Some(p.clone())), None => self.credentials.lookup( protocol_str, host_str, port, realm.as_deref(), user_hint.as_deref(), kind == AuthKind::Proxy, ), }; let (Some(user), Some(password)) = (user, password) else { return None; }; return Some(("basic", CachedAuth::Basic { user, password })); } None } /// Single transport round-trip. No redirect handling. fn send_once( &self, method: &Method, url: &str, headers: &[(String, String)], body: &[u8], activity: Option<&ActivityCallback>, proxy_url: &str, ) -> Result { let client = self.client_for_proxy(proxy_url)?; // Build the reqwest Request by hand. reqwest's Request is // constructed from a `Method` and a `url::Url`, not from a // plain string, so we go through url::Url first. let parsed = Url::parse(url) .map_err(|_| ClientError::InvalidRequest(format!("bad URL: {}", url)))?; let mut req = ReqwestRequest::new(method.clone(), parsed); { let hdrs = req.headers_mut(); for (k, v) in headers { let name = reqwest::header::HeaderName::from_bytes(k.as_bytes()).map_err(|e| { ClientError::InvalidRequest(format!("bad header name {}: {}", k, e)) })?; let value = reqwest::header::HeaderValue::from_str(v).map_err(|e| { ClientError::InvalidRequest(format!("bad header value for {}: {}", k, e)) })?; hdrs.append(name, value); } } // Fire the auth-header trace hook for any Authorization / // Proxy-Authorization header we're about to send. Breezy // subscribes to emit a "> : " debug line // (see `test_no_credential_leaks_in_log`). Iterate the raw // `headers` slice, not the reqwest HeaderMap, so we see the // names exactly as the caller set them — the HeaderMap // normalises to lowercase and the test matches the cased // name ("Authorization"). if let Some(cb) = &self.auth_trace { for (k, _) in headers { if k.eq_ignore_ascii_case("authorization") || k.eq_ignore_ascii_case("proxy-authorization") { cb(k); } } } // Set a body (even an empty one) for methods that // conventionally carry one. That forces reqwest to emit a // `Content-Length: 0` header for zero-byte PUT/POST/PATCH // uploads, which some servers (including breezy's own // `dav_server` test harness) require to distinguish "empty // body" from "body not received yet". Bodyless methods // (GET/HEAD/OPTIONS/DELETE) stay bodyless — sending // Content-Length there would be surprising. let expects_body = matches!(method, &Method::PUT | &Method::POST | &Method::PATCH); if !body.is_empty() || expects_body { *req.body_mut() = Some(reqwest::blocking::Body::from(body.to_vec())); } // Report the upload size before the actual send. We report // the whole on-the-wire request size (request line + headers // + blank line + body), matching what breezy's old // socket-level accounting produced. reqwest doesn't expose // the bytes it actually writes, so recompute them ourselves // by replaying the same headers reqwest is about to send: // the caller-supplied ones in `headers`, plus the defaults // we know reqwest and hyper inject (accept, user-agent, // accept-encoding, host, content-length for bodies). if let Some(cb) = activity { let wire_bytes = estimate_request_wire_size( method, url, headers, body, self.config.user_agent.as_deref(), ); if wire_bytes > 0 { cb(wire_bytes, ActivityDirection::Write); } } let response = match client.execute(req) { Ok(r) => r, Err(e) => { // Synthesize a minimal response only for POST requests // whose body the server received but where it closed // the connection before sending a complete response. // Breezy's `test_post_body_is_received` test exercises // this pathological shape — it only cares that the // POST body made it to the server, not what came back. // Python's http.client was lenient enough to return an // empty 200 on a bare "HTTP/1.1 200 OK\r\n" status // line; reqwest/hyper aren't, so detect the typed // signal rather than string-matching the error // Display. // // Restricted to POST (not "any bodied request") so // PUT / PROPFIND / MKCOL / MOVE / etc. failures don't // get masked as success — WebDAV's `bare_put` checks // resp.status against {200,201,204} and would silently // accept a fabricated 200 even if the server never // wrote the body. if method == Method::POST && !body.is_empty() && is_incomplete_message(&e) { return Ok(HttpResponse { status: 200, reason: "OK".to_string(), headers: Vec::new(), final_url: url.to_string(), redirected_to: None, body: BodyState::Buffered(std::io::Cursor::new(Vec::new())), }); } return Err(e.into()); } }; let activity_owned = activity.cloned(); HttpResponse::from_reqwest(response, url.to_string(), activity_owned) } /// Decide whether the request to `uri` should go through a /// proxy. Consults `_proxy` / `all_proxy` / `no_proxy` /// env vars via our [`getproxies_environment`] port of the /// stdlib helper. Returns the proxy URL or an empty string to /// signal "no proxy". fn choose_proxy(&self, uri: &Uri) -> Result { let scheme = uri.scheme_str().unwrap_or("http"); let host = uri.host().unwrap_or_default(); // Uri::port_u16 sidesteps the lifetime issue `uri.port()` // introduces (the Port wrapper borrows from the Uri). let host_with_port = match uri.port_u16() { Some(p) => format!("{}:{}", host, p), None => host.to_string(), }; let env = getproxies_environment(); let no_proxy = get_proxy_env_var(&env, "no", None); // Match Python's `ProxyHandler.proxy_bypass`: if the // `no_proxy` list explicitly mentions the host we skip the // proxy. When the list is set but nothing matches, the // Python code falls back to `urllib.request.proxy_bypass` for // platform-specific overrides — we don't replicate that // platform fallback in Rust because no dromedary configuration // currently depends on it. If that becomes necessary we can // add a `platform_bypass()` shim later. match evaluate_proxy_bypass(&host_with_port, no_proxy.as_deref()) { ProxyBypass::Bypass => return Ok(String::new()), ProxyBypass::UseProxy | ProxyBypass::Undecided => {} } let Some(proxy_url) = get_proxy_env_var(&env, scheme, Some("all")) else { return Ok(String::new()); }; // Validate the proxy URL has a scheme — `host:port` without // a scheme is a common typo and the Python urllib transport // raised InvalidURL for it. Surfacing the same error here // keeps breezy's `test_http_proxy_without_scheme` happy and // gives users a clearer diagnostic than a downstream TLS or // DNS failure on a malformed URL. // // Tagged with the `bad URL:` prefix the transport layer // recognises and re-maps to `Error::UrlError` (which the // Python side raises as `InvalidURL`). if !proxy_url.contains("://") { return Err(ClientError::InvalidRequest(format!( "bad URL: proxy URL missing scheme: {}", proxy_url ))); } Ok(proxy_url) } } /// Build a `reqwest::blocking::Client` honouring the given config /// and optional proxy. Called once per distinct proxy URL seen /// (including once for the "no proxy" case). fn build_client(config: &HttpClientConfig, proxy: Option) -> Result { let mut builder = ClientBuilder::new() // We follow redirects ourselves (Stage 7) so reqwest's // built-in redirect policy is disabled. .redirect(reqwest::redirect::Policy::none()) // Gzip is already in the default feature set we selected; // make sure it actually gets applied. .gzip(true); let ua = config .user_agent .clone() .unwrap_or_else(crate::http::default_user_agent); builder = builder.user_agent(ua); if let Some(t) = config.read_timeout { builder = builder.timeout(t); } if config.disable_verification { builder = builder .danger_accept_invalid_certs(true) .danger_accept_invalid_hostnames(true); } else if let Some(path) = &config.ca_certs_path { for cert in root_certs_from_pem_file(path)? { builder = builder.add_root_certificate(cert); } // Don't also trust the platform native store when the caller // passed an explicit bundle — `reqwest` defaults that on with // `rustls-tls-native-roots`, but the Python test suite sets // a fake CA and expects only that bundle to match (tests // against https with a self-signed cert fail otherwise). builder = builder.tls_built_in_native_certs(false); } // If no CA bundle was supplied and verification wasn't // disabled, fall through and let reqwest's // `rustls-tls-native-roots` feature do the right thing (load // the OS trust store). if let Some(proxy) = proxy { builder = builder.proxy(proxy); } else { // reqwest defaults to picking up env-var proxies on its // own; disable that so the only source of truth is our // choose_proxy() resolver (which already checks env vars // but with our historical precedence rules). builder = builder.no_proxy(); } builder.build().map_err(ClientError::Transport) } /// Parse a PEM file into `reqwest::Certificate`s. Each cert in the /// bundle becomes one trust anchor. fn root_certs_from_pem_file(path: &Path) -> Result> { let bytes = std::fs::read(path)?; // reqwest can parse a bundle via `Certificate::from_pem_bundle` // (returns all certs in one call). Fall back to `from_pem` if // the bundle contains only one cert and the bundle parser isn't // available in the pinned reqwest version. match Certificate::from_pem_bundle(&bytes) { Ok(certs) => Ok(certs), Err(_) => { // Single-cert fallback. let cert = Certificate::from_pem(&bytes).map_err(|e| { ClientError::InvalidRequest(format!("failed to parse CA bundle: {}", e)) })?; Ok(vec![cert]) } } } /// Response returned by [`HttpClient::request`]. Headers are /// eagerly parsed; the body is streamed on demand. /// /// Callers that only care about status / headers pay nothing for /// the body — it stays as a live reqwest response and is consumed /// only when something calls [`read`](Self::read) / /// [`body`](Self::body). pub struct HttpResponse { /// HTTP status code (e.g. 200, 404, 302). pub status: u16, /// Reason phrase as the server sent it (may be empty on HTTP/2). pub reason: String, /// Response headers. Multi-value headers keep their order. pub headers: Vec<(String, String)>, /// URL of the final response after any redirect following. For /// non-redirected requests this equals the original URL. pub final_url: String, /// When the client reached a 3xx but `follow_redirects` was /// false, this carries the `Location`-resolved URL the caller /// would have been redirected to. `None` otherwise. pub redirected_to: Option, /// Body streaming state. Kept private so callers go through /// `read` / `body` / `read_to_end` — that way we can swap /// between streaming and buffered without changing the public /// surface. body: BodyState, } /// Body read state. Starts as `Streaming` right after the response /// arrives; on first full-drain (`body()` or `read(None)`) it /// transitions to `Buffered` so subsequent reads are cheap and /// idempotent. enum BodyState { /// Body hasn't been fully consumed yet. reqwest's Response /// implements `std::io::Read`, so we wrap it in /// `CountingReader` for byte-level activity reporting. Streaming(CountingReader), /// Body was fully drained into a buffer. Cursor tracks how /// much of it has been handed out through `read()`. Buffered(std::io::Cursor>), } /// Wraps a `Read` with an optional activity callback that fires /// after each successful read. The callback is invoked with the /// number of bytes read and `ActivityDirection::Read` so callers /// can tally incoming bytes for progress UI. pub struct CountingReader { inner: R, callback: Option, } impl CountingReader { fn new(inner: R, callback: Option) -> Self { Self { inner, callback } } } impl std::io::Read for CountingReader { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { let n = self.inner.read(buf)?; if n > 0 { if let Some(cb) = &self.callback { cb(n, ActivityDirection::Read); } } Ok(n) } } impl std::fmt::Debug for HttpResponse { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.debug_struct("HttpResponse") .field("status", &self.status) .field("reason", &self.reason) .field("headers", &self.headers) .field("final_url", &self.final_url) .field("redirected_to", &self.redirected_to) .finish_non_exhaustive() } } impl HttpResponse { fn from_reqwest( resp: Response, final_url: String, activity: Option, ) -> Result { let status = resp.status().as_u16(); // HTTP/2 has no reason phrase — fall back to the canonical // text for the status code so callers always get something. let reason = resp.status().canonical_reason().unwrap_or("").to_string(); let mut headers: Vec<(String, String)> = Vec::with_capacity(resp.headers().len()); for (name, value) in resp.headers() { if let Ok(v) = value.to_str() { headers.push((name.as_str().to_string(), v.to_string())); } } // Report the status-line + header block as read activity so // breezy's socket-level byte accounting (used by the // TestActivity tests) balances. `CountingReader` reports the // body bytes as they're consumed; together that covers the // whole canned_response bytes the test server writes. if let Some(cb) = activity.as_ref() { let header_bytes = estimate_response_header_size(status, &reason, &headers); if header_bytes > 0 { cb(header_bytes, ActivityDirection::Read); } } Ok(Self { status, reason, headers, final_url, redirected_to: None, body: BodyState::Streaming(CountingReader::new(resp, activity)), }) } /// Read up to `n` bytes from the body. `None` means "read /// everything left" — which also transitions the body state to /// Buffered so repeat calls are no-ops. pub fn read(&mut self, n: Option) -> std::io::Result> { match n { Some(n) => self.read_exact_up_to(n), None => { self.buffer_all()?; match &mut self.body { BodyState::Buffered(cur) => { let mut out = Vec::new(); std::io::Read::read_to_end(cur, &mut out)?; Ok(out) } BodyState::Streaming(_) => unreachable!("buffer_all transitions to Buffered"), } } } } fn read_exact_up_to(&mut self, n: usize) -> std::io::Result> { let mut out = vec![0u8; n]; let got = match &mut self.body { BodyState::Streaming(reader) => { // Response::read can return short reads; loop until // we have `n` bytes or hit EOF, matching the usual // Python .read(n) contract that fills the buffer on // a socket. let mut filled = 0; while filled < n { match std::io::Read::read(reader, &mut out[filled..]) { Ok(0) => break, Ok(k) => filled += k, Err(ref e) if e.kind() == std::io::ErrorKind::Interrupted => continue, Err(e) => return Err(e), } } filled } BodyState::Buffered(cur) => std::io::Read::read(cur, &mut out)?, }; out.truncate(got); Ok(out) } /// Drain the remaining body into the buffer. No-op if already /// buffered. /// /// On a truncated body (server promised N bytes but closed the /// socket early, surfacing as `UnexpectedEof`) we keep whatever /// was already read rather than discarding it — downstream /// multipart parsing can still recover the complete sub-ranges /// from the partial bytes (see /// `TestTruncatedMultipleRangeServer.test_readv_with_short_reads`). /// The error is still propagated so the caller can react; the /// retained buffer is only visible via a subsequent `body()` /// call that re-drains. fn buffer_all(&mut self) -> std::io::Result<()> { if let BodyState::Streaming(reader) = &mut self.body { let mut buf = Vec::new(); let err = match std::io::Read::read_to_end(reader, &mut buf) { Ok(_) => None, Err(e) => Some(e), }; self.body = BodyState::Buffered(std::io::Cursor::new(buf)); if let Some(e) = err { return Err(e); } } Ok(()) } /// Fully drain the body into memory (if it wasn't already) and /// return a borrow of the whole thing. pub fn body(&mut self) -> std::io::Result<&[u8]> { self.buffer_all()?; match &self.body { BodyState::Buffered(cur) => Ok(cur.get_ref().as_slice()), BodyState::Streaming(_) => unreachable!("buffer_all transitions to Buffered"), } } /// Drain and discard the body, leaving the response marked as /// consumed. Used on the 401 path when we're about to retry — /// we need the underlying socket returned to the pool but don't /// care about the body content. Subsequent `read` / `body` /// calls return empty. pub fn discard_body(&mut self) -> std::io::Result<()> { if let BodyState::Streaming(reader) = &mut self.body { std::io::copy(reader, &mut std::io::sink())?; } // Whether we were streaming or already buffered, flip to a // fresh empty buffer so the response is effectively closed. self.body = BodyState::Buffered(std::io::Cursor::new(Vec::new())); Ok(()) } /// Case-insensitive header lookup, first match wins. pub fn header(&self, name: &str) -> Option<&str> { self.headers .iter() .find(|(k, _)| k.eq_ignore_ascii_case(name)) .map(|(_, v)| v.as_str()) } /// All values for the given header (order preserved). pub fn headers_all(&self, name: &str) -> Vec<&str> { self.headers .iter() .filter(|(k, _)| k.eq_ignore_ascii_case(name)) .map(|(_, v)| v.as_str()) .collect() } } #[cfg(test)] mod tests { use super::*; // We avoid hitting the live network in unit tests. ureq's // `test_transport` feature would let us mock the connector, but // it's not enabled in our build. The tests here cover the // pieces that don't need a real connection: URL parsing, proxy // selection, and the config builder. #[test] fn client_builds_with_defaults() { let _ = HttpClient::new(HttpClientConfig::default()).expect("default config should build"); } #[test] fn client_builds_with_verification_disabled() { let _ = HttpClient::new(HttpClientConfig { disable_verification: true, ..HttpClientConfig::default() }) .expect("no-verify config should build"); } #[test] fn client_rejects_invalid_urls() { let client = HttpClient::new(HttpClientConfig::default()).unwrap(); // A blank URL has no scheme/authority — `Uri::parse` rejects it. let err = client.request("GET", "", &[], &[]).unwrap_err(); assert!(matches!(err, ClientError::InvalidRequest(_))); } #[test] fn client_rejects_invalid_methods() { let client = HttpClient::new(HttpClientConfig::default()).unwrap(); // Methods must be a valid HTTP token — spaces aren't allowed. let err = client .request("GET FOO", "http://example.com/", &[], &[]) .unwrap_err(); assert!(matches!(err, ClientError::InvalidRequest(_))); } #[test] fn choose_proxy_respects_no_proxy() { // We construct the client with defaults; `choose_proxy` reads // the environment at call time so we can scope the test via // the ENV_LOCK guard from `super::super::tests`. use super::super::tests::with_env_vars; let client = HttpClient::new(HttpClientConfig::default()).unwrap(); with_env_vars( &["http_proxy", "HTTP_PROXY", "no_proxy", "NO_PROXY"], &[ ("http_proxy", "http://proxy.example:8080/"), ("no_proxy", "internal.example"), ], || { // Host listed in no_proxy → no proxy applied. let uri: Uri = "http://internal.example/".parse().unwrap(); let p = client.choose_proxy(&uri).unwrap(); assert!(p.is_empty(), "no_proxy match should skip the proxy"); // Host not listed → proxy applies. let uri: Uri = "http://public.example/".parse().unwrap(); let p = client.choose_proxy(&uri).unwrap(); assert!(!p.is_empty(), "non-matching host should honour the proxy"); }, ); } #[test] fn choose_proxy_uses_scheme_specific_env_var() { use super::super::tests::with_env_vars; let client = HttpClient::new(HttpClientConfig::default()).unwrap(); with_env_vars( &[ "http_proxy", "HTTP_PROXY", "https_proxy", "HTTPS_PROXY", "all_proxy", "ALL_PROXY", "no_proxy", "NO_PROXY", ], &[("https_proxy", "http://sproxy.example:8443/")], || { let uri: Uri = "https://public.example/".parse().unwrap(); let p = client.choose_proxy(&uri).unwrap(); assert!(!p.is_empty(), "HTTPS request should pick up https_proxy"); let uri: Uri = "http://public.example/".parse().unwrap(); let p = client.choose_proxy(&uri).unwrap(); assert!( p.is_empty(), "HTTP request shouldn't pick up https_proxy when http_proxy is unset" ); }, ); } #[test] fn response_header_lookup_is_case_insensitive() { let resp = HttpResponse { status: 200, reason: "OK".into(), headers: vec![ ("Content-Type".to_string(), "text/plain".to_string()), ("X-Custom".to_string(), "a".to_string()), ("X-Custom".to_string(), "b".to_string()), ], body: BodyState::Buffered(std::io::Cursor::new(Vec::new())), final_url: "http://example.com/".into(), redirected_to: None, }; assert_eq!(resp.header("content-type"), Some("text/plain")); assert_eq!(resp.headers_all("x-custom"), vec!["a", "b"]); assert_eq!(resp.header("missing"), None); } #[test] fn redirect_target_prefers_location() { let resp = HttpResponse { status: 302, reason: "Found".into(), headers: vec![ ("Location".into(), "/new".into()), ("URI".into(), "/ignored".into()), ], body: BodyState::Buffered(std::io::Cursor::new(Vec::new())), final_url: "http://example.com/".into(), redirected_to: None, }; assert_eq!( redirect_target(&resp, "http://example.com/old"), Some("http://example.com/new".into()) ); } #[test] fn redirect_target_falls_back_to_uri_header() { let resp = HttpResponse { status: 301, reason: "Moved".into(), headers: vec![("URI".into(), "http://other.example/".into())], body: BodyState::Buffered(std::io::Cursor::new(Vec::new())), final_url: "http://example.com/".into(), redirected_to: None, }; assert_eq!( redirect_target(&resp, "http://example.com/"), Some("http://other.example/".into()) ); } #[test] fn redirect_target_returns_none_if_missing() { let resp = HttpResponse { status: 302, reason: "Found".into(), headers: vec![], body: BodyState::Buffered(std::io::Cursor::new(Vec::new())), final_url: "http://example.com/".into(), redirected_to: None, }; assert_eq!(redirect_target(&resp, "http://example.com/"), None); } #[test] fn redirect_target_joins_relative_path() { let resp = HttpResponse { status: 303, reason: "See Other".into(), headers: vec![("Location".into(), "../b".into())], body: BodyState::Buffered(std::io::Cursor::new(Vec::new())), final_url: "http://example.com/a/c".into(), redirected_to: None, }; assert_eq!( redirect_target(&resp, "http://example.com/a/c"), Some("http://example.com/b".into()) ); } /// Build a `HttpResponse` with the given code and optional /// `Location`. Keeps the test bodies short. fn mk_resp(code: u16, location: Option<&str>, url: &str) -> HttpResponse { let mut headers = Vec::new(); if let Some(l) = location { headers.push(("Location".into(), l.into())); } HttpResponse { status: code, reason: "".into(), headers, body: BodyState::Buffered(std::io::Cursor::new(Vec::new())), final_url: url.into(), redirected_to: None, } } #[test] fn drive_redirects_returns_non_3xx_as_is() { let opts = RequestOptions::default(); let resp = drive_redirects(&opts, "http://a/", |u| Ok(mk_resp(200, None, u))).unwrap(); assert_eq!(resp.status, 200); assert!(resp.redirected_to.is_none()); } #[test] fn drive_redirects_without_follow_sets_redirected_to() { let opts = RequestOptions::default(); // follow_redirects=false let resp = drive_redirects(&opts, "http://a/", |u| { Ok(mk_resp(302, Some("http://b/"), u)) }) .unwrap(); assert_eq!(resp.status, 302); assert_eq!(resp.redirected_to.as_deref(), Some("http://b/")); } #[test] fn drive_redirects_follows_when_enabled() { let opts = RequestOptions { follow_redirects: true, ..RequestOptions::default() }; let mut hops = 0; let resp = drive_redirects(&opts, "http://a/", |u| { hops += 1; // First hop returns 302 → /b, second returns 200. if u == "http://a/" { Ok(mk_resp(302, Some("http://a/b"), u)) } else { Ok(mk_resp(200, None, u)) } }) .unwrap(); assert_eq!(hops, 2); assert_eq!(resp.status, 200); assert_eq!(resp.final_url, "http://a/b"); } #[test] fn drive_redirects_follows_mixed_code_chain() { // Mirrors breezy's TestHTTPSilentRedirections.test_five_redirections: // a chain mixing 301 / 302 / 303 / 307 codes must be // followed all the way when follow_redirects=true. Each // hop uses a different redirect code to ensure none of // them are treated as a terminal response. let opts = RequestOptions { follow_redirects: true, max_redirects: 10, max_repeats: 10, }; let resp = drive_redirects(&opts, "http://a/1/a", |u| { let (code, target) = match u { "http://a/1/a" => (301, Some("http://a/2/a")), "http://a/2/a" => (302, Some("http://a/3/a")), "http://a/3/a" => (303, Some("http://a/4/a")), "http://a/4/a" => (307, Some("http://a/5/a")), _ => (200, None), }; Ok(mk_resp(code, target, u)) }) .unwrap(); assert_eq!(resp.status, 200); assert_eq!(resp.final_url, "http://a/5/a"); } #[test] fn drive_redirects_rejects_too_many_hops() { let opts = RequestOptions { follow_redirects: true, max_redirects: 2, max_repeats: 10, }; // Chain that bounces between /a and /b forever, but each // distinct URL stays under max_repeats so the cap we hit is // max_redirects. let mut toggle = false; let err = drive_redirects(&opts, "http://a/", |_u| { toggle = !toggle; let next = if toggle { "http://a/b" } else { "http://a/c" }; Ok(mk_resp(302, Some(next), "http://a/")) }) .unwrap_err(); match err { ClientError::InvalidRequest(msg) => assert!(msg.contains("too many redirects")), other => panic!("unexpected error: {:?}", other), } } #[test] fn drive_redirects_detects_loops() { let opts = RequestOptions { follow_redirects: true, max_redirects: 100, max_repeats: 2, }; // Every request redirects back to /a — we cap at // max_repeats visits to the same URL. let err = drive_redirects(&opts, "http://a/", |u| { Ok(mk_resp(302, Some("http://a/b"), u)) }) .unwrap_err(); match err { ClientError::InvalidRequest(msg) => assert!(msg.contains("too many redirects")), other => panic!("unexpected error: {:?}", other), } } #[test] fn drive_redirects_stops_when_location_absent() { // A 3xx with no Location isn't a redirect per Python's // handler — it just gets returned as-is. let opts = RequestOptions { follow_redirects: true, ..RequestOptions::default() }; let resp = drive_redirects(&opts, "http://a/", |u| Ok(mk_resp(302, None, u))).unwrap(); assert_eq!(resp.status, 302); assert!(resp.redirected_to.is_none()); } #[test] fn is_redirect_table() { for &code in &[301, 302, 303, 307, 308] { assert!(is_redirect(code), "{} should be a redirect", code); } // 300/304/305/306 are deliberately excluded — see the // comment on HTTPRedirectHandler.redirect_request. for &code in &[200, 300, 304, 305, 306, 400, 401, 404, 500] { assert!(!is_redirect(code), "{} should not be a redirect", code); } } // ------------------------------------------------------------------ // Auth tests. We exercise the helpers directly — the full // send_with_auth loop needs a real HTTP server, which we cover // elsewhere (breezy's test suite, plus the Python integration // tests). struct FixedCreds { user: &'static str, password: &'static str, } impl CredentialProvider for FixedCreds { fn lookup( &self, _protocol: &str, _host: &str, _port: Option, _realm: Option<&str>, _user_hint: Option<&str>, _is_proxy: bool, ) -> (Option, Option) { (Some(self.user.into()), Some(self.password.into())) } } struct NoCreds; impl CredentialProvider for NoCreds { fn lookup( &self, _: &str, _: &str, _: Option, _: Option<&str>, _: Option<&str>, _: bool, ) -> (Option, Option) { (None, None) } } struct FixedToken(&'static str); impl NegotiateProvider for FixedToken { fn initial_token(&self, _host: &str) -> Option { Some(self.0.into()) } } fn fresh_client(creds: Box) -> HttpClient { HttpClient::with_credentials(HttpClientConfig::default(), creds) .expect("config should build") } fn client_with_negotiate( creds: Box, neg: Box, ) -> HttpClient { HttpClient::with_providers(HttpClientConfig::default(), creds, neg) .expect("config should build") } #[test] fn extract_basic_realm_quoted() { assert_eq!( extract_basic_realm(r#"realm="Secure Area""#), Some("Secure Area") ); } #[test] fn extract_basic_realm_unquoted() { assert_eq!( extract_basic_realm("realm=unquoted,charset=UTF-8"), Some("unquoted") ); } #[test] fn extract_basic_realm_missing() { assert_eq!(extract_basic_realm("charset=UTF-8"), None); } #[test] fn auth_cache_key_normalises_scheme_and_port() { let a: Uri = "http://example.com/".parse().unwrap(); let b: Uri = "http://example.com:80/".parse().unwrap(); assert_eq!(auth_cache_key(&a), auth_cache_key(&b)); let c: Uri = "https://example.com/".parse().unwrap(); assert_ne!(auth_cache_key(&a), auth_cache_key(&c)); // Different port ⇒ different cache bucket. let d: Uri = "http://example.com:8080/".parse().unwrap(); assert_ne!(auth_cache_key(&a), auth_cache_key(&d)); } #[test] fn pick_auth_scheme_prefers_digest_over_basic() { let client = fresh_client(Box::new(FixedCreds { user: "alice", password: "sekret", })); let uri: Uri = "http://example.com/".parse().unwrap(); let challenges = [ r#"Basic realm="fallback""#, r#"Digest realm="secure", nonce="n", qop="auth""#, ]; let got = client .pick_auth_scheme(&uri, &challenges, &Method::GET) .unwrap(); assert_eq!(got.0, "digest"); assert!(matches!(got.1, CachedAuth::Digest(_))); } #[test] fn pick_auth_scheme_passes_none_port_when_uri_has_no_port() { // Regression test for https://bugs.launchpad.net/bzr/+bug/654684: // the credential lookup should still succeed when the URI // omits a port (common for `http://host/path`). The Python // side historically surfaced `None` as the port and the // auth-config store matched credentials on host only; the // Rust client propagates the same None to the provider. struct SeesPort(std::sync::Mutex>>); impl CredentialProvider for SeesPort { fn lookup( &self, _protocol: &str, _host: &str, port: Option, _realm: Option<&str>, _user_hint: Option<&str>, _is_proxy: bool, ) -> (Option, Option) { *self.0.lock().unwrap() = Some(port); (Some("joe".into()), Some("foo".into())) } } let seen = std::sync::Arc::new(SeesPort(std::sync::Mutex::new(None))); struct Shared(std::sync::Arc); impl CredentialProvider for Shared { fn lookup( &self, protocol: &str, host: &str, port: Option, realm: Option<&str>, user_hint: Option<&str>, is_proxy: bool, ) -> (Option, Option) { self.0 .lookup(protocol, host, port, realm, user_hint, is_proxy) } } let client = fresh_client(Box::new(Shared(seen.clone()))); let uri: Uri = "http://localhost/path".parse().unwrap(); let challenges = [r#"Basic realm="R""#]; client .pick_auth_scheme(&uri, &challenges, &Method::GET) .unwrap(); assert_eq!(*seen.0.lock().unwrap(), Some(None)); } #[test] fn pick_auth_scheme_uses_basic_when_digest_absent() { let client = fresh_client(Box::new(FixedCreds { user: "u", password: "p", })); let uri: Uri = "http://example.com/".parse().unwrap(); let challenges = [r#"Basic realm="r""#]; let got = client .pick_auth_scheme(&uri, &challenges, &Method::GET) .unwrap(); assert_eq!(got.0, "basic"); match got.1 { CachedAuth::Basic { user, password } => { assert_eq!(user, "u"); assert_eq!(password, "p"); } _ => panic!("expected Basic"), } } #[test] fn pick_auth_scheme_returns_none_when_credentials_missing() { let client = fresh_client(Box::new(NoCreds)); let uri: Uri = "http://example.com/".parse().unwrap(); let challenges = [r#"Basic realm="r""#]; assert!(client .pick_auth_scheme(&uri, &challenges, &Method::GET) .is_none()); } #[test] fn pick_auth_scheme_returns_none_for_unknown_scheme() { let client = fresh_client(Box::new(FixedCreds { user: "u", password: "p", })); let uri: Uri = "http://example.com/".parse().unwrap(); let challenges = ["Bearer realm=whatever"]; assert!(client .pick_auth_scheme(&uri, &challenges, &Method::GET) .is_none()); } #[test] fn pick_auth_scheme_rejects_unsupported_digest_algorithm() { let client = fresh_client(Box::new(FixedCreds { user: "u", password: "p", })); let uri: Uri = "http://example.com/".parse().unwrap(); let challenges = [ // SHA-256 isn't in our DigestAlgorithm table; the // challenge parser returns None so we fall back to Basic // (which isn't offered either) and ultimately give up. r#"Digest realm="r", nonce="n", qop="auth", algorithm="SHA-256""#, ]; assert!(client .pick_auth_scheme(&uri, &challenges, &Method::GET) .is_none()); } #[test] fn cached_auth_header_basic_formats_correctly() { let cached = CachedAuth::Basic { user: "Aladdin".into(), password: "open sesame".into(), }; let uri: Uri = "http://example.com/resource".parse().unwrap(); let hdr = cached_auth_header(&cached, &Method::GET, &uri).unwrap(); assert_eq!(hdr, "Basic QWxhZGRpbjpvcGVuIHNlc2FtZQ=="); } #[test] fn proxy_cache_key_differs_from_origin_key() { // Same host reached directly vs via a proxy should use // different cache buckets so credentials don't leak. let origin: Uri = "http://example.com/".parse().unwrap(); let ok = auth_cache_key(&origin); let pk = proxy_cache_key("http://proxy.example:8080"); assert_ne!(ok, pk); } #[test] fn proxy_cache_key_is_case_insensitive_on_host() { // Proxy host lookup shouldn't be affected by case variation. let a = proxy_cache_key("http://PROXY.Example:3128"); let b = proxy_cache_key("http://proxy.example:3128"); assert_eq!(a, b); } #[test] fn pick_auth_scheme_for_proxy_uses_credentials() { // Proxy auth without an embedded userinfo falls through to // the credential provider — that branch still needs to work // for callers that register a Python hook instead of // sticking the password in the proxy URL. let client = fresh_client(Box::new(FixedCreds { user: "px-u", password: "px-p", })); let uri: Uri = "http://example.com/".parse().unwrap(); let challenges = [r#"Basic realm="proxy""#]; let got = client .pick_auth_scheme_for( &challenges, &uri, None, AuthKind::Proxy, "http://proxy:3128", ) .unwrap(); assert_eq!(got.0, "basic"); match got.1 { CachedAuth::Basic { user, password } => { assert_eq!(user, "px-u"); assert_eq!(password, "px-p"); } _ => panic!("expected Basic"), } } #[test] fn pick_auth_scheme_prefers_negotiate_over_digest_and_basic() { let client = client_with_negotiate( Box::new(FixedCreds { user: "u", password: "p", }), Box::new(FixedToken("YIIDSS...base64...")), ); let uri: Uri = "http://example.com/".parse().unwrap(); let challenges = [ "Negotiate", r#"Digest realm="r", nonce="n", qop="auth""#, r#"Basic realm="r""#, ]; let got = client .pick_auth_scheme(&uri, &challenges, &Method::GET) .unwrap(); assert_eq!(got.0, "negotiate"); match got.1 { CachedAuth::Negotiate { token } => { assert_eq!(token, "YIIDSS...base64..."); } _ => panic!("expected Negotiate"), } } #[test] fn pick_auth_scheme_falls_back_when_negotiate_provider_returns_none() { // Provider says "no ticket available" → we should fall // through to Digest/Basic rather than fail. struct NoToken; impl NegotiateProvider for NoToken { fn initial_token(&self, _: &str) -> Option { None } } let client = client_with_negotiate( Box::new(FixedCreds { user: "u", password: "p", }), Box::new(NoToken), ); let uri: Uri = "http://example.com/".parse().unwrap(); let challenges = ["Negotiate", r#"Basic realm="r""#]; let got = client .pick_auth_scheme(&uri, &challenges, &Method::GET) .unwrap(); assert_eq!(got.0, "basic"); } #[test] fn cached_auth_header_negotiate_formats_with_scheme_prefix() { let cached = CachedAuth::Negotiate { token: "TOKEN-BYTES".into(), }; let uri: Uri = "http://example.com/".parse().unwrap(); let hdr = cached_auth_header(&cached, &Method::GET, &uri).unwrap(); assert_eq!(hdr, "Negotiate TOKEN-BYTES"); } /// Build an HttpResponse whose body is pre-buffered with the /// given bytes. Convenient for testing the read path without a /// real network connection. fn mk_buffered(status: u16, body: &[u8]) -> HttpResponse { HttpResponse { status, reason: "".into(), headers: vec![], final_url: "http://example/".into(), redirected_to: None, body: BodyState::Buffered(std::io::Cursor::new(body.to_vec())), } } #[test] fn response_read_returns_all_when_size_none() { let mut r = mk_buffered(200, b"hello"); let got = r.read(None).unwrap(); assert_eq!(got, b"hello"); } #[test] fn response_read_returns_up_to_n() { let mut r = mk_buffered(200, b"abcdef"); assert_eq!(r.read(Some(3)).unwrap(), b"abc"); assert_eq!(r.read(Some(10)).unwrap(), b"def"); // Further reads return empty. assert_eq!(r.read(Some(5)).unwrap(), Vec::::new()); } #[test] fn response_body_drains_once() { let mut r = mk_buffered(200, b"hello"); assert_eq!(r.body().unwrap(), b"hello"); // Subsequent body() calls return the same bytes. assert_eq!(r.body().unwrap(), b"hello"); } #[test] fn response_body_after_partial_read_contains_everything() { // `body()` forces a full drain regardless of where read() // left off — it returns the full buffer. let mut r = mk_buffered(200, b"abcdef"); assert_eq!(r.read(Some(3)).unwrap(), b"abc"); assert_eq!(r.body().unwrap(), b"abcdef"); } #[test] fn response_discard_body_marks_as_consumed() { let mut r = mk_buffered(200, b"hello"); r.discard_body().unwrap(); // After discard, reads return empty. assert_eq!(r.read(None).unwrap(), Vec::::new()); } #[test] fn cached_auth_header_digest_bumps_nonce_count_via_clone() { // cached_auth_header works on a local clone, so the cached // state's nonce_count is NOT bumped here; send_with_auth // persists the bump separately. let state = DigestAuthState { user: "u".into(), password: "p".into(), realm: "r".into(), nonce: "n".into(), nonce_count: 5, algorithm: crate::http::DigestAlgorithm::Md5, algorithm_name: None, opaque: None, qop: "auth".into(), }; let cached = CachedAuth::Digest(state.clone()); let uri: Uri = "http://example.com/x".parse().unwrap(); let hdr = cached_auth_header(&cached, &Method::GET, &uri).unwrap(); assert!(hdr.contains("nc=00000006")); // The original state wasn't mutated. if let CachedAuth::Digest(orig) = cached { assert_eq!(orig.nonce_count, 5); } } #[test] fn no_token_provider_returns_none() { let p = NoTokenProvider; assert!(p.lookup("https", "example.com", None, Some("/")).is_none()); } #[test] fn token_provider_records_lookup_arguments() { // Custom provider that captures the lookup args so we can // assert send_with_auth (and direct callers) hand through // protocol/host/port/path verbatim. struct Recorder { seen: Mutex, Option)>>, answer: Option<(String, String)>, } impl TokenProvider for Recorder { fn lookup( &self, protocol: &str, host: &str, port: Option, path: Option<&str>, ) -> Option<(String, String)> { *self.seen.lock().unwrap() = Some((protocol.into(), host.into(), port, path.map(str::to_string))); self.answer.clone() } } let rec = Recorder { seen: Mutex::new(None), answer: Some(("Bearer".into(), "abc123".into())), }; let result = rec.lookup("https", "example.com", Some(8443), Some("/foo")); assert_eq!(result, Some(("Bearer".into(), "abc123".into()))); assert_eq!( *rec.seen.lock().unwrap(), Some(( "https".into(), "example.com".into(), Some(8443), Some("/foo".into()), )) ); } } dromedary-0.1.5/src/http/mod.rs000066400000000000000000001235231520150013200163710ustar00rootroot00000000000000//! HTTP support helpers. //! //! These are environment/filesystem lookups that don't need any HTTP client //! to be wired in: locating the CA certificate bundle and the User-Agent //! default. The Python wrapper in `dromedary.http` delegates to these. pub mod auth; pub mod client; pub mod response; pub mod transport; pub use auth::{ build_basic_auth_header, build_digest_auth_header, parse_digest_challenge, DigestAuthState, DigestChallenge, }; pub use client::{ClientError, HttpClient, HttpClientConfig, HttpResponse}; pub use response::{handle_response, InFile, RangeFile, ResponseError, ResponseFile, ResponseKind}; pub use transport::{HttpTransport, ReadvTuning}; use std::path::{Path, PathBuf}; use std::sync::Mutex; use lazy_static::lazy_static; /// Known locations for CA certificate bundles on common Unix platforms. /// /// The first existing path wins. Note for packagers: if there is no package /// providing certs for your platform, the curl project produces /// weekly. pub const SSL_CA_CERTS_KNOWN_LOCATIONS: &[&str] = &[ "/etc/ssl/certs/ca-certificates.crt", // Ubuntu/Debian/Gentoo "/etc/pki/tls/certs/ca-bundle.crt", // Fedora/CentOS/RH "/etc/ssl/ca-bundle.pem", // OpenSUSE "/etc/ssl/cert.pem", // OpenSUSE "/usr/local/share/certs/ca-root-nss.crt", // FreeBSD "/etc/openssl/certs/ca-certificates.crt", // Solaris (unchecked) ]; lazy_static! { static ref CA_PATH_CACHE: Mutex> = Mutex::new(None); /// Current User-Agent prefix used by the HTTP client. Starts as /// `"Dromedary/"`; breezy overrides this via /// [`set_user_agent`] at module load. static ref USER_AGENT_PREFIX: Mutex = Mutex::new(format!("Dromedary/{}", env!("CARGO_PKG_VERSION"))); /// Path to the PEM bundle we materialised from the platform's /// native certificate store. Cached for the process lifetime so /// repeated calls don't re-read the keychain / registry. static ref NATIVE_CA_BUNDLE_PATH: Mutex> = Mutex::new(None); } /// Replace the current User-Agent prefix. pub fn set_user_agent(prefix: String) { *USER_AGENT_PREFIX.lock().unwrap() = prefix; } /// Return the current User-Agent prefix. pub fn default_user_agent() -> String { USER_AGENT_PREFIX.lock().unwrap().clone() } /// Certificate verification requirement. The integer representation /// matches the Python `ssl.CERT_*` constants so the Rust and Python /// sides can interchange values without a translation table. #[derive(Debug, Clone, Copy, PartialEq, Eq)] #[repr(u8)] pub enum CertReqs { /// `ssl.CERT_NONE` — no verification. None = 0, /// `ssl.CERT_REQUIRED` — verify the peer certificate. Required = 2, } impl CertReqs { pub fn to_int(self) -> u8 { self as u8 } } /// Path to a PEM bundle materialised from the platform's native /// certificate store (macOS keychain, Windows cert store, or the /// Linux `ca-certificates` bundle by way of `SSL_CERT_FILE` / /// `SSL_CERT_DIR` env vars). /// /// Returns `None` if nothing could be loaded — that includes the case /// where the platform has no native store at all, or where loading /// failed for any reason (we treat failure as "no certs" rather than /// poisoning the Python side with an exception). /// /// The file is written once per process and kept on disk so Python's /// `ssl.load_verify_locations(cafile=...)` has a stable path to /// reference. Subsequent calls return the cached path. /// /// Tests may invalidate the cache via [`clear_native_ca_bundle_cache`]. pub fn native_ca_bundle_path() -> Option { if let Some(cached) = NATIVE_CA_BUNDLE_PATH.lock().unwrap().as_ref() { return Some(cached.clone()); } let certs = match rustls_native_certs::load_native_certs() { result if result.errors.is_empty() && !result.certs.is_empty() => result.certs, _ => return None, }; // Serialise to PEM. Writing "-----BEGIN CERTIFICATE-----" wrappers // around base64-encoded DER by hand keeps us off the `pem` crate. let mut pem = String::with_capacity(certs.len() * 2000); for der in &certs { use base64::Engine; let encoded = base64::engine::general_purpose::STANDARD.encode(der); pem.push_str("-----BEGIN CERTIFICATE-----\n"); // PEM wraps at 64 chars. for chunk in encoded.as_bytes().chunks(64) { pem.push_str(std::str::from_utf8(chunk).unwrap()); pem.push('\n'); } pem.push_str("-----END CERTIFICATE-----\n"); } let mut tmp = match tempfile::Builder::new() .prefix("dromedary-native-ca-") .suffix(".pem") .tempfile() { Ok(t) => t, Err(_) => return None, }; use std::io::Write; if tmp.write_all(pem.as_bytes()).is_err() { return None; } let path = match tmp.into_temp_path().keep() { Ok(p) => p, Err(_) => return None, }; let path_str = path.to_string_lossy().into_owned(); *NATIVE_CA_BUNDLE_PATH.lock().unwrap() = Some(path_str.clone()); Some(path_str) } /// Invalidate the cached native CA bundle path (for tests). pub fn clear_native_ca_bundle_cache() { *NATIVE_CA_BUNDLE_PATH.lock().unwrap() = None; } /// Platform-default certificate verification requirement. /// /// Windows and macOS historically had no native access to root /// certificates from Python's `ssl`, so Breezy chose `CERT_NONE` /// there to avoid false negatives. Everywhere else `CERT_REQUIRED` /// is the safe default. With the native-certs branch in /// [`default_ca_certs`] we could tighten this later, but for now we /// preserve the historical behaviour. pub fn default_cert_reqs() -> CertReqs { if cfg!(any(target_os = "windows", target_os = "macos")) { CertReqs::None } else { CertReqs::Required } } /// Clear the cached CA bundle path. /// /// Primarily useful in tests that mutate `CURL_CA_BUNDLE`. pub fn clear_ca_path_cache() { *CA_PATH_CACHE.lock().unwrap() = None; } /// Locate the CA bundle to use for SSL connections. /// /// Mirrors the behaviour of curl's `CURL_CA_BUNDLE` lookup: /// /// 1. If `CURL_CA_BUNDLE` is set, use it. /// 2. On Windows, search the application directory and `PATH` entries for /// `curl-ca-bundle.crt` (the current working directory is deliberately /// excluded). /// 3. Otherwise return an empty string. /// /// When `use_cache` is true the result is memoised in a process-global /// cache; subsequent calls return the cached value regardless of environment /// changes. Call [`clear_ca_path_cache`] to invalidate it. pub fn get_ca_path(use_cache: bool) -> String { if use_cache { if let Some(cached) = CA_PATH_CACHE.lock().unwrap().as_ref() { return cached.clone(); } } let mut path = std::env::var("CURL_CA_BUNDLE").unwrap_or_default(); if path.is_empty() && cfg!(target_os = "windows") { path = find_windows_ca_bundle().unwrap_or_default(); } if !path.is_empty() { log::debug!("using CA bundle: {:?}", path); } if use_cache { *CA_PATH_CACHE.lock().unwrap() = Some(path.clone()); } path } /// Search the application directory and `PATH` for `curl-ca-bundle.crt`. /// /// Kept separate from [`get_ca_path`] so it can be unit-tested without an /// actual Windows host. The cwd is intentionally not searched — see the /// comments in the original Python implementation. fn find_windows_ca_bundle() -> Option { let mut dirs: Vec = Vec::new(); if let Some(argv0) = std::env::args_os().next() { if let Ok(canon) = Path::new(&argv0).canonicalize() { if let Some(parent) = canon.parent() { dirs.push(parent.to_path_buf()); } } } if let Some(paths) = std::env::var_os("PATH") { for entry in std::env::split_paths(&paths) { let s = entry.as_os_str(); if s.is_empty() || s == std::ffi::OsStr::new(".") { continue; } dirs.push(entry); } } for d in dirs { let candidate = d.join("curl-ca-bundle.crt"); if candidate.is_file() { return Some(candidate.to_string_lossy().into_owned()); } } None } /// Return the default CA certificates path for the running platform. /// /// Precedence: /// /// 1. On Linux, scan [`SSL_CA_CERTS_KNOWN_LOCATIONS`] first — the /// system bundle there is what most TLS libraries read anyway, and /// keeping it means we pass the *real* path (not a materialised /// copy) to Python's `ssl.load_verify_locations`. /// 2. Otherwise materialise the native certificate store to a PEM /// tempfile (via [`native_ca_bundle_path`]) and return that path. /// This is the main win on Windows and macOS where the Python /// `ssl` module otherwise can't see the native root CAs. /// 3. On Linux with nothing installed, return the first known /// location as a breadcrumb so error messages point at a plausible /// path. On Windows, fall back to looking for `cacert.pem` next to /// the executable (the historical default Breezy used). pub fn default_ca_certs() -> String { // Linux first: prefer the real system bundle over a // materialisation of it. if !cfg!(any(target_os = "windows", target_os = "macos")) { for path in SSL_CA_CERTS_KNOWN_LOCATIONS { if Path::new(path).exists() { return (*path).to_string(); } } } if let Some(native) = native_ca_bundle_path() { return native; } if cfg!(target_os = "windows") { if let Some(argv0) = std::env::args_os().next() { if let Ok(canon) = Path::new(&argv0).canonicalize() { if let Some(parent) = canon.parent() { return parent.join("cacert.pem").to_string_lossy().into_owned(); } } } return "cacert.pem".to_string(); } // Linux no-bundle fallback (Unix with no known location on disk). SSL_CA_CERTS_KNOWN_LOCATIONS[0].to_string() } /// Format a User-Agent prefix from a product name and version. pub fn format_user_agent(product: &str, version: &str) -> String { format!("{}/{}", product, version) } /// Decision returned by [`evaluate_proxy_bypass`]: a definite match in /// the `no_proxy` list (`Bypass`), a definite non-match (`UseProxy`), /// or "nothing explicit — leave it to the platform fallback" /// (`Undecided`). /// /// The trichotomy mirrors the Python `ProxyHandler.evaluate_proxy_bypass` /// return values of `True` / `False` / `None`. Python's `None` lets the /// caller fall through to the stdlib `urllib.request.proxy_bypass`, /// which consults platform-specific sources (Windows registry, /// system-wide proxy config, etc.). We surface that as its own /// variant so the caller can make the same choice. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum ProxyBypass { /// The host matched the `no_proxy` list; skip the proxy. Bypass, /// No `no_proxy` list was configured, so every host is proxied. /// The Python original returned `False` here, and the caller /// never consulted the platform fallback in this case. UseProxy, /// A `no_proxy` list was configured but nothing matched the /// host. Python returned `None`, and the caller fell through to /// the platform-specific proxy-bypass check. Undecided, } /// Snapshot the proxy-related environment variables into a map of /// `scheme.lower() -> proxy_url`. Mirrors the Python /// `urllib.request.getproxies_environment` implementation /// byte-for-byte so breezy users who rely on its quirks (CGI /// `HTTP_PROXY` guard, lowercase-wins, empty-lowercase-deletes) keep /// getting the same answers. /// /// Intentionally reads the live environment on every call; callers /// that want caching should cache the returned map. Reading fresh /// matches stdlib's behaviour and keeps the implementation /// thread-safe without a module-level mutex. pub fn getproxies_environment() -> std::collections::HashMap { let mut proxies: std::collections::HashMap = std::collections::HashMap::new(); // First pass: any case is accepted, `_PROXY → proxies[scheme.lower()]`. // Collect into a Vec so the second pass doesn't pay the env-read cost twice. let mut environment: Vec<(String, String, String)> = Vec::new(); for (name, value) in std::env::vars() { if name.len() > 5 && name.as_bytes()[name.len() - 6] == b'_' && name[name.len() - 5..].eq_ignore_ascii_case("proxy") { let proxy_name = name[..name.len() - 6].to_ascii_lowercase(); if !value.is_empty() { proxies.insert(proxy_name.clone(), value.clone()); } environment.push((name, value, proxy_name)); } } // CVE-2016-1000110: when running as a CGI script, drop `HTTP_PROXY` // to avoid honouring a client-supplied `Proxy:` header. if std::env::var_os("REQUEST_METHOD").is_some() { proxies.remove("http"); } // Second pass: lowercase-only names override (including "set empty to delete"). for (name, value, proxy_name) in environment { if name.ends_with("_proxy") { if !value.is_empty() { proxies.insert(proxy_name, value); } else { proxies.remove(&proxy_name); } } } proxies } /// Look up a proxy URL in the map returned by [`getproxies_environment`], /// with a `default_to` fallback (typically `"all"` to honour /// `ALL_PROXY` / `all_proxy`). /// /// Mirrors breezy's `ProxyHandler.get_proxy_env_var`. `name` is /// lower-cased before lookup; `default_to=None` disables the /// fallback. pub fn get_proxy_env_var( proxies: &std::collections::HashMap, name: &str, default_to: Option<&str>, ) -> Option { let name = name.to_ascii_lowercase(); if let Some(v) = proxies.get(&name) { return Some(v.clone()); } if let Some(fallback) = default_to { return proxies.get(fallback).cloned(); } None } /// Check a host against a comma-separated `no_proxy` list and /// return whether the proxy should be bypassed. /// /// Mirrors breezy's `ProxyHandler.evaluate_proxy_bypass`, including /// its quirks: /// /// - entries are `host[:port]`, port-matched against the client's /// `hport` — an entry without a port matches any port; /// - `*` and `?` inside `dhost` act as shell-style globs, `.` is /// treated literally; /// - matching is case-insensitive and **anchored at the start /// only** (the Python implementation uses `re.match`), so an /// entry of `example.com` matches both `example.com` and /// `example.com.evil.com`. That's surprising, but it's what the /// existing tests depend on — don't "fix" it here. pub fn evaluate_proxy_bypass(host: &str, no_proxy: Option<&str>) -> ProxyBypass { let Some(no_proxy) = no_proxy else { // Python returns `False` here: "All hosts are proxied" when // no `no_proxy` list is configured. Callers of the Python // version only fall through to the platform fallback when // the *list* was configured but nothing matched — that's // the `None` / Undecided case below. return ProxyBypass::UseProxy; }; let (hhost, hport) = splitport(host); for domain in no_proxy.split(',') { let domain = domain.trim(); if domain.is_empty() { continue; } let (dhost, dport) = splitport(domain); if hport == dport || dport.is_none() { if glob_prefix_match_ignore_ascii_case(dhost, hhost) { return ProxyBypass::Bypass; } } } // A no_proxy list was configured but the host didn't match any // entry. Python returned `None` here, which its caller unboxed // via `if bypass is None: fall back to urllib.proxy_bypass`. ProxyBypass::Undecided } /// Match `host` against `pattern` using the same dialect the Python /// helper built from `re.sub`: `.` is literal, `*` is `.*`, `?` is /// `.`, case-insensitive, anchored at the start only. /// /// Implemented by hand rather than compiling a `regex::Regex` because /// this is called once per `no_proxy` entry per request — the regex /// crate's fixed overhead isn't worth it for such small patterns, and /// keeping it regex-free means we don't get any of the regex /// engine's idiosyncrasies (e.g. DOT-ALL handling, Unicode tables). fn glob_prefix_match_ignore_ascii_case(pattern: &str, host: &str) -> bool { // Recursion at most as deep as `pattern.len()`; no_proxy entries // are short in practice. fn go(pat: &[u8], s: &[u8]) -> bool { let mut pi = 0; let mut si = 0; while pi < pat.len() { match pat[pi] { b'*' => { // Skip runs of `*` so `**` behaves like `*`. while pi < pat.len() && pat[pi] == b'*' { pi += 1; } if pi == pat.len() { // Trailing `*` matches the rest of the // string (actually, `re.match` only anchors // at the start so everything from here on // already matches — the match ends wherever // we like). return true; } let rest = &pat[pi..]; while si <= s.len() { if go(rest, &s[si..]) { return true; } if si == s.len() { return false; } si += 1; } return false; } b'?' => { // `?` becomes `.` — match exactly one char. if si == s.len() { return false; } pi += 1; si += 1; } pc => { if si == s.len() { return false; } let sc = s[si]; if pc.eq_ignore_ascii_case(&sc) { pi += 1; si += 1; } else { return false; } } } } // Prefix-only: consuming the whole pattern is a match even // if there's unmatched input remaining. true } go(pattern.as_bytes(), host.as_bytes()) } /// Split a `host[:port]` string into its two parts. /// /// Mirrors the Python helper in `dromedary/http/urllib.py`: the port is /// the digits after the *last* colon (so IPv6 literals like `"::1"` parse /// as host `":"` with port `"1"`, matching the regex). An empty port /// (`"host:"`) is returned as `None` instead of an empty string. If no /// `:` is present, port is `None` and the whole input is returned as the /// host. pub fn splitport(host: &str) -> (&str, Option<&str>) { if let Some(idx) = host.rfind(':') { let port = &host[idx + 1..]; if port.is_empty() { return (&host[..idx], None); } if port.bytes().all(|b| b.is_ascii_digit()) { return (&host[..idx], Some(port)); } } (host, None) } /// Split a WWW-Authenticate / Proxy-Authenticate header into `(scheme, /// remainder)`. /// /// The scheme is always lowercased. The remainder is whatever follows /// the first whitespace run, trimmed of leading whitespace (preserving /// internal spaces and quoting). If the header has no whitespace, the /// whole header is the scheme and the remainder is `None`. pub fn parse_auth_header(server_header: &str) -> (String, Option<&str>) { if let Some(idx) = server_header.find(|c: char| c.is_ascii_whitespace()) { let (scheme, rest) = server_header.split_at(idx); let remainder = rest.trim_start(); (scheme.to_ascii_lowercase(), Some(remainder)) } else { (server_header.to_ascii_lowercase(), None) } } /// Split an RFC 2068 §2 comma-separated list while honouring quoted /// strings and backslash escapes. Matches the behaviour of /// `urllib.request.parse_http_list`. /// /// Commas inside `"..."` don't split; a `\` inside a quoted string /// escapes the next character (typically `\"` or `\\`). Each element is /// trimmed of surrounding whitespace, but inner whitespace and the /// surrounding quotes are preserved. An empty input yields an empty /// list. pub fn parse_http_list(s: &str) -> Vec { let mut parts: Vec = Vec::new(); let mut part = String::new(); let mut quote = false; let mut escape = false; for cur in s.chars() { if escape { part.push(cur); escape = false; continue; } if quote { if cur == '\\' { escape = true; continue; } else if cur == '"' { quote = false; } part.push(cur); continue; } if cur == ',' { parts.push(std::mem::take(&mut part)); continue; } if cur == '"' { quote = true; } part.push(cur); } if !part.is_empty() { parts.push(part); } parts.into_iter().map(|p| p.trim().to_string()).collect() } /// HTTP Digest authentication hash algorithm as named in the /// `algorithm=` parameter of a `WWW-Authenticate: Digest` header. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum DigestAlgorithm { /// RFC 2617 `MD5` — the default and still the most commonly seen. Md5, /// RFC 2617 `SHA` — SHA-1 based digest. RFC 7616's SHA-256 family is /// *not* yet supported (the Python side never accepted it either). Sha1, } impl DigestAlgorithm { /// Parse the `algorithm=` value. Returns `None` for unsupported /// algorithms, matching the Python behaviour of failing the /// `auth_match` check. pub fn parse(name: &str) -> Option { match name { "MD5" => Some(Self::Md5), "SHA" => Some(Self::Sha1), _ => None, } } /// The digest function `H(x)` from RFC 2617 §3.2.1: hex-encoded /// digest of the raw input bytes. pub fn h(self, data: &[u8]) -> String { match self { Self::Md5 => { use md5::{Digest, Md5}; hex::encode(Md5::digest(data)) } Self::Sha1 => { use sha1::{Digest, Sha1}; hex::encode(Sha1::digest(data)) } } } /// The keyed-digest function `KD(secret, data) = H(secret ":" data)`. pub fn kd(self, secret: &str, data: &str) -> String { let mut buf = String::with_capacity(secret.len() + 1 + data.len()); buf.push_str(secret); buf.push(':'); buf.push_str(data); self.h(buf.as_bytes()) } } /// Generate a client nonce for HTTP Digest authentication. /// /// Builds `":::"` and returns /// the first 16 hex characters of its SHA-1 digest, matching the /// Python `get_new_cnonce`. Uniqueness is what the cnonce needs; the /// exact bit-mixing is not security-critical beyond collision /// resistance. pub fn new_cnonce(nonce: &str, nonce_count: u64) -> String { use rand::Rng; use sha1::{Digest, Sha1}; use std::time::{SystemTime, UNIX_EPOCH}; let ts = SystemTime::now() .duration_since(UNIX_EPOCH) .map(|d| d.as_nanos()) .unwrap_or(0); const CHARS: &[u8] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ abcdefghijklmnopqrstuvwxyz\ 0123456789"; let mut rng = rand::thread_rng(); let rand_suffix: String = (0..8) .map(|_| CHARS[rng.gen_range(0..CHARS.len())] as char) .collect(); let raw = format!("{}:{}:{}:{}", nonce, nonce_count, ts, rand_suffix); let digest = Sha1::digest(raw.as_bytes()); hex::encode(digest)[..16].to_string() } /// Parse a list of `key=value` pairs into a map. Matches /// `urllib.request.parse_keqv_list`. /// /// Values wrapped in a single pair of double quotes are unquoted /// verbatim (no escape processing — [`parse_http_list`] already /// consumed any `\"`). Duplicated keys follow Python semantics: the /// last one wins. Entries without `=` are silently dropped, matching /// stdlib behaviour when the caller feeds it through /// [`parse_http_list`] first. pub fn parse_keqv_list(items: &[String]) -> std::collections::HashMap { let mut parsed = std::collections::HashMap::new(); for elt in items { if let Some((k, v)) = elt.split_once('=') { let v = if v.len() >= 2 && v.starts_with('"') && v.ends_with('"') { &v[1..v.len() - 1] } else { v }; parsed.insert(k.to_string(), v.to_string()); } } parsed } #[cfg(test)] mod tests { use super::*; // Tests in this module mutate the shared `CURL_CA_BUNDLE` environment // variable and the module-level cache, so they must not run in parallel // with each other. static ENV_LOCK: Mutex<()> = Mutex::new(()); #[test] fn ca_path_honours_env_var() { let _guard = ENV_LOCK.lock().unwrap(); clear_ca_path_cache(); let sentinel = "dromedary-test-ca-bundle.pem"; // SAFETY: serialised against the other tests in this module via // `ENV_LOCK`; std::env::set_var is unsafe only under concurrent access. unsafe { std::env::set_var("CURL_CA_BUNDLE", sentinel) }; let got = get_ca_path(false); unsafe { std::env::remove_var("CURL_CA_BUNDLE") }; assert_eq!(got, sentinel); } #[test] fn ca_path_caches_when_requested() { let _guard = ENV_LOCK.lock().unwrap(); clear_ca_path_cache(); // SAFETY: serialised against the other tests in this module via // `ENV_LOCK`. unsafe { std::env::set_var("CURL_CA_BUNDLE", "first-sentinel") }; let first = get_ca_path(true); unsafe { std::env::set_var("CURL_CA_BUNDLE", "second-sentinel") }; let second = get_ca_path(true); unsafe { std::env::remove_var("CURL_CA_BUNDLE") }; assert_eq!(first, "first-sentinel"); assert_eq!(second, "first-sentinel"); clear_ca_path_cache(); } #[test] fn default_ca_certs_returns_known_fallback() { // We don't know which path exists on the test host, but it must be // one of the known locations (or the first one as a last resort). let result = default_ca_certs(); let allowed = || SSL_CA_CERTS_KNOWN_LOCATIONS.contains(&result.as_str()); if cfg!(any(target_os = "windows", target_os = "macos")) { // Both materialise the native root store to a tempfile via // `native_ca_bundle_path`. Only when that fails does Windows // fall back to the `cacert.pem`-next-to-the-exe convention, // which won't happen in a normal cargo-test run. assert!( result.ends_with(".pem") && result.contains("dromedary-native-ca-"), "unexpected CA path: {}", result, ); } else { assert!(allowed(), "unexpected CA path: {}", result); } } #[test] fn user_agent_format() { assert_eq!(format_user_agent("Dromedary", "0.1.0"), "Dromedary/0.1.0"); } #[test] fn default_cert_reqs_matches_ssl_constants() { // `ssl.CERT_NONE == 0` and `ssl.CERT_REQUIRED == 2` are load- // bearing: the Python side compares this integer against those // constants. If the enum ever grows a new variant, it must // reuse the corresponding stdlib integer. let v = default_cert_reqs().to_int(); assert!(matches!(v, 0 | 2)); } // Proxy env-var tests must not race on the shared process // environment. We reuse `ENV_LOCK` defined above for the // `CURL_CA_BUNDLE` tests. Exposed to sibling test modules // (e.g. `client::tests`) that also mutate env vars. pub(crate) fn with_env_vars( clear: &[&str], set: &[(&str, &str)], f: impl FnOnce() -> R, ) -> R { let _guard = ENV_LOCK.lock().unwrap(); // Snapshot the bits we're about to touch so we can restore them. let snapshot: Vec<(String, Option)> = clear .iter() .chain(set.iter().map(|(k, _)| k)) .map(|k| (k.to_string(), std::env::var(k).ok())) .collect(); // SAFETY: serialised against other tests in this module via ENV_LOCK. unsafe { for k in clear { std::env::remove_var(k); } for (k, v) in set { std::env::set_var(k, v); } } let r = f(); unsafe { for (k, v) in snapshot { match v { Some(v) => std::env::set_var(&k, v), None => std::env::remove_var(&k), } } } r } #[test] fn getproxies_environment_reads_any_case() { with_env_vars( &["http_proxy", "HTTP_PROXY", "https_proxy", "HTTPS_PROXY"], &[("HTTP_PROXY", "http://upper.example/")], || { let p = getproxies_environment(); assert_eq!( p.get("http").map(String::as_str), Some("http://upper.example/") ); }, ); } #[test] fn getproxies_environment_lowercase_wins() { with_env_vars( &["http_proxy", "HTTP_PROXY"], &[ ("HTTP_PROXY", "http://upper.example/"), ("http_proxy", "http://lower.example/"), ], || { let p = getproxies_environment(); assert_eq!( p.get("http").map(String::as_str), Some("http://lower.example/") ); }, ); } #[test] fn getproxies_environment_empty_lowercase_deletes() { with_env_vars( &["http_proxy", "HTTP_PROXY"], &[("HTTP_PROXY", "http://upper.example/"), ("http_proxy", "")], || { let p = getproxies_environment(); // Python explicitly removes the entry when the // lowercase variant is set to empty. Preserve that. assert_eq!(p.get("http"), None); }, ); } #[test] fn getproxies_environment_cgi_guard() { with_env_vars( &["http_proxy", "HTTP_PROXY", "REQUEST_METHOD"], &[ ("HTTP_PROXY", "http://attacker.example/"), ("REQUEST_METHOD", "GET"), ], || { let p = getproxies_environment(); // CVE-2016-1000110: CGI scripts must ignore HTTP_PROXY. assert_eq!(p.get("http"), None); }, ); } #[test] fn get_proxy_env_var_falls_back_to_all() { let mut proxies = std::collections::HashMap::new(); proxies.insert("all".to_string(), "http://all.example/".to_string()); assert_eq!( get_proxy_env_var(&proxies, "http", Some("all")).as_deref(), Some("http://all.example/"), ); // No fallback configured: returns None even when `all` is set. assert_eq!(get_proxy_env_var(&proxies, "http", None), None); } #[test] fn evaluate_proxy_bypass_use_proxy_when_unset() { // Python returns `False` when no_proxy is None — meaning // every host is proxied, skip the platform fallback. assert_eq!( evaluate_proxy_bypass("example.com", None), ProxyBypass::UseProxy ); } #[test] fn evaluate_proxy_bypass_exact_match() { assert_eq!( evaluate_proxy_bypass("example.com", Some("example.com")), ProxyBypass::Bypass ); } #[test] fn evaluate_proxy_bypass_is_prefix_only() { // Python's re.match anchors at the start only, so this // surprising case matches. We preserve that behaviour. assert_eq!( evaluate_proxy_bypass("example.com.evil.com", Some("example.com")), ProxyBypass::Bypass ); } #[test] fn evaluate_proxy_bypass_no_match() { assert_eq!( evaluate_proxy_bypass("foo.com", Some("bar.com,baz.com")), ProxyBypass::Undecided ); } #[test] fn evaluate_proxy_bypass_dot_is_literal() { // `.` shouldn't act as a regex wildcard; `exampleXcom` is // not equivalent to `example.com`. assert_eq!( evaluate_proxy_bypass("exampleXcom", Some("example.com")), ProxyBypass::Undecided ); } #[test] fn evaluate_proxy_bypass_star_glob() { assert_eq!( evaluate_proxy_bypass("host1.internal", Some("*.internal")), ProxyBypass::Bypass ); } #[test] fn evaluate_proxy_bypass_leading_star_glob() { // `*example.com` with prefix-only matching still works // because `*` eats the leading label(s). Matches breezy's // TestHttpProxyWhiteBox.test_evaluate_proxy_bypass_true. assert_eq!( evaluate_proxy_bypass("bzr.example.com", Some("*example.com")), ProxyBypass::Bypass ); } #[test] fn evaluate_proxy_bypass_question_glob() { // `?` matches exactly one character. assert_eq!( evaluate_proxy_bypass("host1.com", Some("host?.com")), ProxyBypass::Bypass ); // `host10.com` doesn't match `host?.com` because after `host?` // eats `host1`, the pattern still expects a literal `.com` — // but the input has `0.com` at that point, and `0` isn't a // `.`. Matches Python's `re.match('host.\.com', 'host10.com')`. assert_eq!( evaluate_proxy_bypass("host10.com", Some("host?.com")), ProxyBypass::Undecided ); } #[test] fn evaluate_proxy_bypass_case_insensitive() { assert_eq!( evaluate_proxy_bypass("EXAMPLE.COM", Some("example.com")), ProxyBypass::Bypass ); } #[test] fn evaluate_proxy_bypass_port_wildcard_entry() { // Entry without a port matches any port. assert_eq!( evaluate_proxy_bypass("example.com:8080", Some("example.com")), ProxyBypass::Bypass ); } #[test] fn evaluate_proxy_bypass_port_must_match_when_specified() { assert_eq!( evaluate_proxy_bypass("example.com:8080", Some("example.com:80")), ProxyBypass::Undecided ); assert_eq!( evaluate_proxy_bypass("example.com:80", Some("example.com:80")), ProxyBypass::Bypass ); } #[test] fn evaluate_proxy_bypass_commas_and_whitespace() { // Leading/trailing whitespace around each entry is stripped; // empty entries from e.g. a trailing comma are skipped. assert_eq!( evaluate_proxy_bypass("foo.com", Some(" bar.com , foo.com ,")), ProxyBypass::Bypass ); } #[test] fn evaluate_proxy_bypass_empty_list_entries() { // A `no_proxy` value that's entirely empty or commas-only // or contains empty inner entries should be equivalent to // "no bypass list entries matched": callers fall through to // the default proxy behaviour. Mirrors breezy's // TestHttpProxyWhiteBox.test_evaluate_proxy_bypass_empty_entries. assert_eq!( evaluate_proxy_bypass("example.com", Some("")), ProxyBypass::Undecided ); assert_eq!( evaluate_proxy_bypass("example.com", Some(",")), ProxyBypass::Undecided ); assert_eq!( evaluate_proxy_bypass("example.com", Some("foo,,bar")), ProxyBypass::Undecided ); } #[test] fn user_agent_setter_roundtrips() { // The User-Agent prefix is process-global state, so other // tests may have mutated it. Save and restore around this // test to keep the suite self-contained. let prev = default_user_agent(); set_user_agent("Test-Agent/1.0".into()); assert_eq!(default_user_agent(), "Test-Agent/1.0"); set_user_agent(prev); } #[test] fn splitport_splits_trailing_port() { assert_eq!(splitport("example.com:443"), ("example.com", Some("443"))); } #[test] fn splitport_no_port() { assert_eq!(splitport("example.com"), ("example.com", None)); } #[test] fn splitport_empty_port_is_none() { // "host:" matches the Python regex with an empty port group; the // Python helper normalises that to `None` via `port or None`. assert_eq!(splitport("example.com:"), ("example.com", None)); } #[test] fn splitport_non_numeric_port_keeps_whole_host() { // Python's regex only matches digits, so everything after the last // ':' has to be all-digits for the split to happen. Anything else // falls through to `return host, None`. assert_eq!(splitport("example.com:http"), ("example.com:http", None)); } #[test] fn splitport_ipv6_like_rightmost_split() { // The Python regex is greedy from the left, so `"::1"` splits into // host `":"` and port `"1"`. Our rfind-based version reproduces that. assert_eq!(splitport("::1"), (":", Some("1"))); } #[test] fn parse_auth_header_basic() { let (scheme, rest) = parse_auth_header("Basic realm=\"secure area\""); assert_eq!(scheme, "basic"); assert_eq!(rest, Some("realm=\"secure area\"")); } #[test] fn parse_auth_header_no_remainder() { let (scheme, rest) = parse_auth_header("Negotiate"); assert_eq!(scheme, "negotiate"); assert_eq!(rest, None); } #[test] fn parse_auth_header_empty() { // Empty header: scheme is "" (lowercased of ""), no remainder. // Matches the Python `AbstractAuthHandler._parse_auth_header` // behaviour exercised by breezy's TestAuthHeader.test_empty_header. let (scheme, rest) = parse_auth_header(""); assert_eq!(scheme, ""); assert_eq!(rest, None); } #[test] fn parse_http_list_simple() { assert_eq!(parse_http_list("a, b, c"), vec!["a", "b", "c"]); } #[test] fn parse_http_list_quoted_commas() { assert_eq!( parse_http_list(r#"a="hello, world", b=42"#), vec![r#"a="hello, world""#, "b=42"] ); } #[test] fn parse_http_list_escaped_quote() { // Matches stdlib `urllib.request.parse_http_list`: the backslash // is consumed, the following character is appended verbatim. So // `\"` inside a quoted string contributes a bare `"` to the // output and does *not* terminate the quoted region. assert_eq!( parse_http_list(r#"a="he said \"hi\"", b=1"#), vec![r#"a="he said "hi"""#, "b=1"] ); } #[test] fn parse_http_list_empty() { assert_eq!(parse_http_list(""), Vec::::new()); } #[test] fn parse_keqv_list_unquotes_values() { let items = vec![ r#"realm="secure""#.to_string(), "nonce=abc".to_string(), "qop=auth".to_string(), ]; let m = parse_keqv_list(&items); assert_eq!(m.get("realm").map(String::as_str), Some("secure")); assert_eq!(m.get("nonce").map(String::as_str), Some("abc")); assert_eq!(m.get("qop").map(String::as_str), Some("auth")); } #[test] fn parse_keqv_list_preserves_inner_quotes() { // Only a matched outer pair is stripped; inner quotes (rare but // possible when `\"` was in the original header) stay put. let items = vec![r#"k="a""b""#.to_string()]; assert_eq!( parse_keqv_list(&items).get("k").map(String::as_str), Some(r#"a""b"#) ); } #[test] fn parse_keqv_list_drops_items_without_eq() { // Mirrors the Python impl when called via `parse_http_list`: the // stdlib would raise on a missing `=`; we choose to drop silently // so the Rust side never panics on a malformed header. let items = vec!["bare".to_string(), "k=v".to_string()]; let m = parse_keqv_list(&items); assert_eq!(m.len(), 1); assert_eq!(m.get("k").map(String::as_str), Some("v")); } #[test] fn digest_md5_vector() { // "abc" -> well-known MD5 digest. assert_eq!( DigestAlgorithm::Md5.h(b"abc"), "900150983cd24fb0d6963f7d28e17f72" ); } #[test] fn digest_sha1_vector() { // "abc" -> well-known SHA-1 digest. assert_eq!( DigestAlgorithm::Sha1.h(b"abc"), "a9993e364706816aba3e25717850c26c9cd0d89d" ); } #[test] fn digest_kd_prepends_colon_separator() { // KD("secret", "data") == H("secret:data"). let kd = DigestAlgorithm::Md5.kd("secret", "data"); let h = DigestAlgorithm::Md5.h(b"secret:data"); assert_eq!(kd, h); } #[test] fn digest_algorithm_parse() { assert_eq!(DigestAlgorithm::parse("MD5"), Some(DigestAlgorithm::Md5)); assert_eq!(DigestAlgorithm::parse("SHA"), Some(DigestAlgorithm::Sha1)); assert_eq!(DigestAlgorithm::parse("SHA-256"), None); } #[test] fn new_cnonce_is_16_hex_chars() { let c = new_cnonce("servernonce", 1); assert_eq!(c.len(), 16); assert!(c.chars().all(|c| c.is_ascii_hexdigit())); } #[test] fn new_cnonce_varies() { // Two calls back-to-back should differ (timestamp nanos + random). let a = new_cnonce("nonce", 1); let b = new_cnonce("nonce", 1); assert_ne!(a, b); } } dromedary-0.1.5/src/http/response.rs000066400000000000000000001140241520150013200174440ustar00rootroot00000000000000//! HTTP response body helpers. //! //! Port of `dromedary/http/response.py`: two file-like wrappers //! (`ResponseFile`, `RangeFile`) and the `handle_response` factory. //! The parse logic is written over an [`InFile`] trait so it can be //! unit-tested in pure Rust against a `Cursor`, and then re-used from //! PyO3 by implementing [`InFile`] on a Python file-like. //! //! The class contract is defined by the Python originals. In //! particular: //! //! - `ResponseFile` supports forward-only seeks and proxies `read` / //! `readline` / `readlines` / `tell` to the wrapped file-like. //! - `RangeFile` overlays range-window semantics on top: reading past //! the current range raises `InvalidRange`, and in a multipart //! response a seek past the end discards data and walks to the next //! part by reading the boundary and `Content-Range:` header. The //! grammar we accept is //! //! ```text //! file: single_range | multiple_range //! single_range: content_range_header data //! multiple_range: boundary_header boundary //! (content_range_header data boundary)+ //! ``` //! //! Sockets can't be rewound, so "seek backwards" is always a hard //! error — that's enforced in both `ResponseFile::seek` and //! `RangeFile::seek`. use std::collections::HashMap; use std::io; /// Source of bytes backing a `ResponseFile` / `RangeFile`. /// /// Mirrors the subset of the Python file-like protocol the Python /// implementation actually used: byte-oriented `read(n)` and /// `readline()` (newline-terminated, empty at EOF). No `seek` — the /// original works on sockets, which are inherently forward-only, and /// the range-file simulates backwards-disallowed seeks by discarding. pub trait InFile { /// Read *up to* `n` bytes. Returning fewer than `n` is not /// necessarily EOF (matches the Python semantics of `socket.recv` /// / `BytesIO.read`): callers that need exactly `n` bytes must /// loop. fn read(&mut self, n: usize) -> io::Result>; /// Read a line, newline character included, like /// `io.BufferedReader.readline()` or `socket.makefile().readline()`. /// Returns empty on EOF. fn readline(&mut self) -> io::Result>; } /// Errors raised by the response parser. Each variant maps 1:1 to a /// Python exception class defined in `dromedary.errors`, with the /// exact field set the Python side constructs. #[derive(Debug)] pub enum ResponseError { /// `dromedary.errors.InvalidHttpResponse(path, msg)`. InvalidResponse { path: String, msg: String }, /// `dromedary.errors.InvalidHttpRange(path, range, msg)`. InvalidHttpRange { path: String, range: String, msg: String, }, /// `dromedary.errors.HttpBoundaryMissing(path, msg)`. The Python /// side passes the raw boundary bytes as the `msg`; we mirror that /// by keeping the field name. BoundaryMissing { path: String, boundary: Vec }, /// `dromedary.errors.ShortReadvError(path, offset, length, actual)`. ShortReadv { path: String, offset: u64, length: u64, actual: u64, }, /// `dromedary.errors.InvalidRange(path, offset, msg)`. InvalidRange { path: String, offset: u64, msg: String, }, /// `dromedary.errors.UnexpectedHttpStatus(path, code)`. UnexpectedStatus { path: String, code: u16 }, /// An IO error surfaced from the underlying file-like. The Python /// side lets these bubble up as-is. Io(io::Error), /// A seek was requested with an unknown `whence` value. The /// Python `RangeFile.seek` raised `ValueError` for this case, /// distinct from `InvalidRange` (a legitimate-but-out-of-bounds /// seek). InvalidWhence(u32), /// Forward-only seek rejected: the caller asked to seek to an /// absolute position that was earlier than the current one. The /// Python `ResponseFile.seek` flagged this as `AssertionError`; /// we surface it as its own variant so the PyO3 layer can raise /// a distinct exception type rather than something generic. BackwardSeek { path: String, pos: u64, offset: i64 }, } impl From for ResponseError { fn from(e: io::Error) -> Self { Self::Io(e) } } pub type Result = std::result::Result; // --------------------------------------------------------------------------- // ResponseFile: forward-only wrapper that tracks position across the // underlying stream. Reads pass through; seek forward is simulated by // reading and discarding. // --------------------------------------------------------------------------- /// A wrapper around the http socket containing the result of a GET /// request. Only `read()` and forward-only `seek()` are supported. pub struct ResponseFile { path: String, file: F, pos: u64, } impl ResponseFile { /// Construct around the given input; `path` is used only in error /// messages. pub fn new(path: impl Into, file: F) -> Self { Self { path: path.into(), file, pos: 0, } } /// Current position reported to callers. pub fn tell(&self) -> u64 { self.pos } /// Read up to `size` bytes; `None` means read all. Mirrors the /// Python signature. pub fn read(&mut self, size: Option) -> Result> { let data = match size { Some(n) => self.file.read(n)?, None => read_all(&mut self.file)?, }; self.pos += data.len() as u64; Ok(data) } /// Read a single line. pub fn readline(&mut self) -> Result> { let data = self.file.readline()?; self.pos += data.len() as u64; Ok(data) } /// Read all remaining lines and return them as a vector. Matches /// the Python `readlines()` in that it only honours the split /// performed by the underlying file-like. pub fn readlines(&mut self) -> Result>> { let mut out: Vec> = Vec::new(); loop { let line = self.file.readline()?; if line.is_empty() { break; } self.pos += line.len() as u64; out.push(line); } Ok(out) } /// Forward-only seek. Whence matches `os.SEEK_*` values: /// `0 == SET`, `1 == CUR`. Python's original also rejects `SEEK_END` /// (anything other than SET/CUR) with an assertion. pub fn seek(&mut self, offset: i64, whence: u32) -> Result<()> { let to_discard: u64 = match whence { 0 => { // absolute if offset < 0 || (offset as u64) < self.pos { return Err(ResponseError::BackwardSeek { path: self.path.clone(), pos: self.pos, offset, }); } (offset as u64) - self.pos } 1 => { // relative; Python's version accepts negative offset // but the later `read()` call on the socket would just // block. We mirror its "read offset bytes forward" // behaviour for offset >= 0 and reject negatives. if offset < 0 { return Err(ResponseError::BackwardSeek { path: self.path.clone(), pos: self.pos, offset, }); } offset as u64 } other => return Err(ResponseError::InvalidWhence(other)), }; if to_discard > 0 { self.read(Some(to_discard as usize))?; } Ok(()) } /// Borrow the wrapped path (useful for subclasses built on top). pub fn path(&self) -> &str { &self.path } } // --------------------------------------------------------------------------- // RangeFile: overlays range/multipart semantics on top of the same // forward-only stream. Track the current `(start, size)` window; when // we exhaust it in a multipart response we read a boundary + Content- // Range header and move the window forward. // --------------------------------------------------------------------------- /// In `_checked_read()` we may have to discard several MB in the worst /// case. To avoid buffering that much, we read-and-discard by chunks. /// The underlying file is either a socket or a `BytesIO`, so 8 KiB /// chunks are fine. const DISCARDED_BUF_SIZE: usize = 8192; /// File-like exposing ranges of a larger resource. All accesses must /// be sequential: ranges are discovered as the stream is consumed. pub struct RangeFile { inner: ResponseFile, start: u64, /// `None` means "size unknown" (i.e. the whole file, or a range /// whose length the server didn't declare). size: Option, boundary: Option>, /// Parsed headers for the *current* part of a multipart response. /// `None` before any part has been read. headers: Option>, /// Chunk size used by `checked_read` — tests set this very low to /// exercise the buffer loop. discarded_buf_size: usize, } impl RangeFile { pub fn new(path: impl Into, file: F) -> Self { let mut rf = Self { inner: ResponseFile::new(path, file), start: 0, size: None, boundary: None, headers: None, discarded_buf_size: DISCARDED_BUF_SIZE, }; // Default to "the whole file of unspecified size", matching // `RangeFile.__init__` in Python. rf.set_range(0, None); rf } /// Change the range window. `size=None` means "unknown" (Python's /// `-1`). Resets `pos` to `start` like the Python original does. pub fn set_range(&mut self, start: u64, size: Option) { self.start = start; self.size = size; self.inner.pos = start; } /// Multipart mode: once the boundary is known, the wrapper reads /// the first boundary + Content-Range headers to position itself /// at the start of the first part's body. Subsequent boundary /// crossings happen automatically as `read`/`seek` walks forward. pub fn set_boundary(&mut self, boundary: Vec) -> Result<()> { self.boundary = Some(boundary); self.read_boundary()?; self.read_range_definition()?; Ok(()) } /// Read the boundary line. RFC 2616 §19.2 allows additional /// `CRLF` preceding the boundary so we skip any we find. IIS 6/7 /// wraps the boundary in `<>`; we unquote those too. pub fn read_boundary(&mut self) -> Result<()> { let boundary = self .boundary .as_ref() .expect("set_boundary() must be called before read_boundary()") .clone(); let mut line = b"\r\n".to_vec(); while line == b"\r\n" { line = self.inner.file.readline()?; } if line.is_empty() { // A timeout in the proxy caused the response to end early // (launchpad bug 198646). return Err(ResponseError::BoundaryMissing { path: self.inner.path.clone(), boundary, }); } let mut expected = Vec::with_capacity(boundary.len() + 4); expected.extend_from_slice(b"--"); expected.extend_from_slice(&boundary); expected.extend_from_slice(b"\r\n"); if line != expected { // email.utils.unquote() mis-handles `<...>`-wrapped // boundaries (IIS 6/7), so let it take a second pass. if unquote_boundary(&line) != expected { let shown = String::from_utf8_lossy(&line); let b_shown = String::from_utf8_lossy(&boundary); return Err(ResponseError::InvalidResponse { path: self.inner.path.clone(), msg: format!("Expected a boundary ({}) line, got '{}'", b_shown, shown), }); } } Ok(()) } /// Parse the headers introducing the new range and apply the /// Content-Range value. pub fn read_range_definition(&mut self) -> Result<()> { let headers = parse_headers(&mut self.inner.file)?; let cr = headers.get("content-range").cloned().ok_or_else(|| { ResponseError::InvalidResponse { path: self.inner.path.clone(), msg: "Content-Range header missing in a multi-part response".into(), } })?; self.headers = Some(headers); self.set_range_from_header(&cr) } /// Apply a `Content-Range: bytes START-END/TOTAL` header. Values /// other than `bytes` or malformed numbers raise /// [`ResponseError::InvalidHttpRange`]. pub fn set_range_from_header(&mut self, content_range: &str) -> Result<()> { // Python's version uses `str.split()` with no args, which // splits on any whitespace run and drops empty tokens. So // leading/trailing/internal spaces and tabs are all OK, but // `"bytes10-2/3"` (no whitespace between type and values) is // a malformed header. let mut it = content_range.split_ascii_whitespace(); let rtype = it.next().ok_or_else(|| ResponseError::InvalidHttpRange { path: self.inner.path.clone(), range: content_range.to_string(), msg: "Malformed header".into(), })?; let values = it.next().ok_or_else(|| ResponseError::InvalidHttpRange { path: self.inner.path.clone(), range: content_range.to_string(), msg: "Malformed header".into(), })?; // Python's unpack via `rtype, values = content_range.split()` // raises `ValueError` if there are more than two tokens too. if it.next().is_some() { return Err(ResponseError::InvalidHttpRange { path: self.inner.path.clone(), range: content_range.to_string(), msg: "Malformed header".into(), }); } if rtype != "bytes" { return Err(ResponseError::InvalidHttpRange { path: self.inner.path.clone(), range: content_range.to_string(), msg: format!("Unsupported range type '{}'", rtype), }); } // The grammar is START-END/TOTAL (total may be `*`). let (start_end, _total) = values .split_once('/') .ok_or_else(|| ResponseError::InvalidHttpRange { path: self.inner.path.clone(), range: content_range.to_string(), msg: "Invalid range values".into(), })?; let (start_s, end_s) = start_end .split_once('-') .ok_or_else(|| ResponseError::InvalidHttpRange { path: self.inner.path.clone(), range: content_range.to_string(), msg: "Invalid range values".into(), })?; let start: i64 = start_s .parse() .map_err(|_| ResponseError::InvalidHttpRange { path: self.inner.path.clone(), range: content_range.to_string(), msg: "Invalid range values".into(), })?; let end: i64 = end_s.parse().map_err(|_| ResponseError::InvalidHttpRange { path: self.inner.path.clone(), range: content_range.to_string(), msg: "Invalid range values".into(), })?; let size = end - start + 1; if size <= 0 { return Err(ResponseError::InvalidHttpRange { path: self.inner.path.clone(), range: content_range.to_string(), msg: "Invalid range, size <= 0".into(), }); } self.set_range(start as u64, Some(size as u64)); Ok(()) } pub fn tell(&self) -> u64 { self.inner.pos } pub fn path(&self) -> &str { &self.inner.path } // Accessors below are named with an `rs_` prefix so the PyO3 // bindings can mirror the original pure-Python attributes // (`_start`, `_size`, `_pos`, `_boundary`) without colliding // with `RangeFile`'s own methods. The underscore-prefixed // attribute names are part of the observable API — a handful of // tests and callers read or assign to them directly. pub fn rs_start(&self) -> u64 { self.start } pub fn rs_set_start(&mut self, start: u64) { self.start = start; } pub fn rs_size(&self) -> Option { self.size } pub fn rs_set_size(&mut self, size: Option) { self.size = size; } pub fn rs_set_pos(&mut self, pos: u64) { self.inner.pos = pos; } pub fn rs_boundary(&self) -> Option<&[u8]> { self.boundary.as_deref() } pub fn rs_discarded_buf_size(&self) -> usize { self.discarded_buf_size } pub fn rs_set_discarded_buf_size(&mut self, value: usize) { self.discarded_buf_size = value; } /// Read and discard exactly `size` bytes; used internally by /// seek/boundary-walking. Raises `ShortReadv` if the stream ends /// early — this is what signals the server misbehaved. fn checked_read(&mut self, size: u64) -> Result<()> { let pos = self.inner.pos; let mut remaining = size; while remaining > 0 { let take = remaining.min(self.discarded_buf_size as u64) as usize; let data = self.inner.file.read(take)?; if data.is_empty() { return Err(ResponseError::ShortReadv { path: self.inner.path.clone(), offset: pos, length: size, actual: size - remaining, }); } remaining -= data.len() as u64; } self.inner.pos += size; Ok(()) } /// Walk forward to the next part of a multipart response. Raises /// `InvalidRange` if this wasn't multipart to begin with (there's /// no next range to advance to). fn seek_to_next_range(&mut self) -> Result<()> { if self.boundary.is_none() { return Err(ResponseError::InvalidRange { path: self.inner.path.clone(), offset: self.inner.pos, msg: format!( "Range ({}, {}) exhausted", self.start, format_size(self.size) ), }); } self.read_boundary()?; self.read_range_definition()?; Ok(()) } /// Read up to `size` bytes from the current range. `size < 0` /// means "read to end of range". Reading across ranges is not /// supported (the socket would already be past the boundary). pub fn read(&mut self, size: i64) -> Result> { // If we're sitting exactly at the end of a known-size range, // decide whether to walk to the next range or stop. if let Some(sz) = self.size { if self.inner.pos == self.start + sz { if size == 0 { return Ok(Vec::new()); } else { self.seek_to_next_range()?; } } } if self.inner.pos < self.start { return Err(ResponseError::InvalidRange { path: self.inner.path.clone(), offset: self.inner.pos, msg: format!( "Can't read {} bytes before range ({}, {})", size, self.start, format_size(self.size) ), }); } if let Some(sz) = self.size { if size > 0 && self.inner.pos + (size as u64) > self.start + sz { return Err(ResponseError::InvalidRange { path: self.inner.path.clone(), offset: self.inner.pos, msg: format!( "Can't read {} bytes across range ({}, {})", size, self.start, format_size(self.size) ), }); } } // Cap the read so we never overflow past the range end. let limited: Option = match (self.size, size) { (Some(sz), _) => { let remaining = self.start + sz - self.inner.pos; let cap = match size { n if n >= 0 => remaining.min(n as u64), _ => remaining, }; Some(cap as usize) } (None, n) if n < 0 => None, (None, n) => Some(n as usize), }; let data = match limited { Some(n) => pump_exactly(&mut self.inner.file, n)?, None => read_all(&mut self.inner.file)?, }; self.inner.pos += data.len() as u64; Ok(data) } /// Forward-only seek, with whence meaning `os.SEEK_*`. Seeking /// past the current range in a multipart response walks to the /// next part by reading the boundary + Content-Range; size `None` /// (unknown) rejects SEEK_END. pub fn seek(&mut self, offset: i64, whence: u32) -> Result<()> { let start_pos = self.inner.pos; let final_pos: i64 = match whence { 0 => offset, 1 => start_pos as i64 + offset, 2 => match self.size { Some(sz) => self.start as i64 + sz as i64 + offset, None => { return Err(ResponseError::InvalidRange { path: self.inner.path.clone(), offset: start_pos, msg: "RangeFile: can't seek from end while size is unknown".into(), }); } }, other => return Err(ResponseError::InvalidWhence(other)), }; if final_pos < self.inner.pos as i64 { return Err(ResponseError::InvalidRange { path: self.inner.path.clone(), offset: start_pos, msg: format!("RangeFile: trying to seek backwards to {}", final_pos), }); } let final_pos = final_pos as u64; if let Some(sz) = self.size { let mut cur_limit = self.start + sz; while final_pos > cur_limit { let remain = cur_limit - self.inner.pos; if remain > 0 { self.checked_read(remain)?; } self.seek_to_next_range()?; cur_limit = self.start + self.size.expect("after seek_to_next_range size is set"); } } let size = final_pos.saturating_sub(self.inner.pos); if size > 0 { self.checked_read(size)?; } Ok(()) } } // --------------------------------------------------------------------------- // handle_response: the factory that picks between ResponseFile and // RangeFile based on the status code + headers. 200 returns the plain // wrapper; 206 returns a RangeFile (with boundary set for multipart // responses, or a single Content-Range applied otherwise). Anything // else raises UnexpectedHttpStatus. // --------------------------------------------------------------------------- /// The two possible wrappers returned by [`handle_response`]. pub enum ResponseKind { Plain(ResponseFile), Range(RangeFile), } /// Inspect the status code + headers and wrap `data` in the right /// response type. See `dromedary/http/response.py::handle_response`. /// /// `get_header` returns the (lower-cased) header value or `None`; the /// caller is responsible for case-insensitive lookup. On 206 we need /// `content-type` and possibly `content-range` from the real response /// headers. pub fn handle_response( url: impl Into, code: u16, get_header: &dyn Fn(&str) -> Option, data: F, ) -> Result> { let url = url.into(); match code { 200 => Ok(ResponseKind::Plain(ResponseFile::new(url, data))), 206 => { let mut rf = RangeFile::new(url.clone(), data); // RFC 2616 §7.2.1: missing Content-Type defaults to // application/octet-stream, so this is never multipart. let content_type = get_header("content-type") .unwrap_or_else(|| "application/octet-stream".to_string()); let (mimetype, params) = parse_content_type(&content_type); if mimetype == "multipart/byteranges" { let boundary = params .get("boundary") .ok_or_else(|| ResponseError::InvalidResponse { path: url.clone(), msg: "multipart/byteranges missing boundary parameter".into(), })?; rf.set_boundary(boundary.as_bytes().to_vec())?; } else { let cr = get_header("content-range").ok_or_else(|| ResponseError::InvalidResponse { path: url.clone(), msg: "Missing the Content-Range header in a 206 range response".into(), })?; rf.set_range_from_header(&cr)?; } Ok(ResponseKind::Range(rf)) } code => Err(ResponseError::UnexpectedStatus { path: url, code }), } } // --------------------------------------------------------------------------- // Helpers // --------------------------------------------------------------------------- fn read_all(file: &mut F) -> io::Result> { let mut out = Vec::new(); loop { let chunk = file.read(DISCARDED_BUF_SIZE)?; if chunk.is_empty() { break; } out.extend_from_slice(&chunk); } Ok(out) } /// Read exactly `n` bytes from `file`, looping over short reads. /// Returns less than `n` only on EOF — matching how the Python /// `pumpfile` helper behaves when the source is a socket. fn pump_exactly(file: &mut F, n: usize) -> io::Result> { let mut out = Vec::with_capacity(n); while out.len() < n { let chunk = file.read(n - out.len())?; if chunk.is_empty() { break; } out.extend_from_slice(&chunk); } Ok(out) } /// Parse `Content-Type: mimetype; key=value; key2=value2`. Returns the /// lower-cased mime type and a map of parameter keys (lower-cased) to /// values with any surrounding double-quotes stripped. fn parse_content_type(value: &str) -> (String, HashMap) { let mut parts = value.split(';'); let mime = parts .next() .map(|s| s.trim().to_ascii_lowercase()) .unwrap_or_default(); let mut params = HashMap::new(); for p in parts { let p = p.trim(); if let Some((k, v)) = p.split_once('=') { let v = v.trim(); let v = if v.starts_with('"') && v.ends_with('"') && v.len() >= 2 { &v[1..v.len() - 1] } else { v }; params.insert(k.trim().to_ascii_lowercase(), v.to_string()); } } (mime, params) } /// Unquote a boundary line that IIS 6/7 wraps in angle brackets. /// Preserves the `"--"` prefix and the trailing `\r\n`; only the /// middle 20-odd bytes are run through the unquoter. Mirrors the /// Python helper verbatim. fn unquote_boundary(line: &[u8]) -> Vec { if line.len() < 4 { return line.to_vec(); } let prefix = &line[..2]; let suffix = &line[line.len() - 2..]; let body = &line[2..line.len() - 2]; // email.utils.unquote strips `"..."` or `<...>` wrapping (and // handles a couple of backslash escapes inside `"..."`). We do // the same for the inner bytes, interpreted as ASCII. let inner = match std::str::from_utf8(body) { Ok(s) => email_unquote(s).into_bytes(), Err(_) => body.to_vec(), }; let mut out = Vec::with_capacity(prefix.len() + inner.len() + suffix.len()); out.extend_from_slice(prefix); out.extend_from_slice(&inner); out.extend_from_slice(suffix); out } /// Stripped-down port of `email.utils.unquote`: removes a matching /// pair of `"..."` (unescaping `\\` and `\"`) or `<...>` wrappers. fn email_unquote(s: &str) -> String { if s.len() <= 1 { return s.to_string(); } if s.starts_with('"') && s.ends_with('"') { return s[1..s.len() - 1] .replace("\\\\", "\\") .replace("\\\"", "\""); } if s.starts_with('<') && s.ends_with('>') { return s[1..s.len() - 1].to_string(); } s.to_string() } /// Read RFC 822-style headers up to the blank line. Header names are /// lower-cased; values are trimmed of leading/trailing whitespace. /// Multi-line continuation (leading whitespace on the next line) is /// folded with a single space, matching `http.client.parse_headers`. fn parse_headers(file: &mut F) -> Result> { let mut out: HashMap = HashMap::new(); let mut last_key: Option = None; loop { let raw = file.readline()?; if raw.is_empty() { break; } let line = std::str::from_utf8(&raw).map_err(|_| ResponseError::InvalidResponse { path: String::new(), msg: "non-UTF-8 header".into(), })?; // Strip the CRLF / LF terminator. let line = line.trim_end_matches(['\r', '\n']); if line.is_empty() { break; } if line.starts_with(' ') || line.starts_with('\t') { if let Some(k) = &last_key { let prev = out.get(k).cloned().unwrap_or_default(); let folded = format!("{} {}", prev, line.trim()); out.insert(k.clone(), folded); } continue; } if let Some((k, v)) = line.split_once(':') { let k = k.trim().to_ascii_lowercase(); let v = v.trim().to_string(); last_key = Some(k.clone()); out.insert(k, v); } } Ok(out) } fn format_size(size: Option) -> String { match size { Some(n) => n.to_string(), None => "-1".to_string(), } } // --------------------------------------------------------------------------- // Tests: drive the logic against a Cursor-backed InFile. // --------------------------------------------------------------------------- #[cfg(test)] mod tests { use super::*; use std::io::{BufRead, Cursor}; /// Test adapter: a `Cursor>`-backed `InFile`. Matches /// Python's `BytesIO` semantics (unbuffered `read`, `readline` /// terminates on `\n` or EOF). struct TestFile { cur: Cursor>, } impl TestFile { fn new(bytes: impl Into>) -> Self { Self { cur: Cursor::new(bytes.into()), } } } impl InFile for TestFile { fn read(&mut self, n: usize) -> io::Result> { use std::io::Read; let mut out = vec![0u8; n]; let got = self.cur.read(&mut out)?; out.truncate(got); Ok(out) } fn readline(&mut self) -> io::Result> { let mut out = Vec::new(); self.cur.read_until(b'\n', &mut out)?; Ok(out) } } #[test] fn response_file_read_tracks_position() { let mut rf = ResponseFile::new("p", TestFile::new(b"hello, world".to_vec())); assert_eq!(rf.read(Some(5)).unwrap(), b"hello"); assert_eq!(rf.tell(), 5); assert_eq!(rf.read(None).unwrap(), b", world"); assert_eq!(rf.tell(), 12); } #[test] fn response_file_readline() { let mut rf = ResponseFile::new("p", TestFile::new(b"a\nbb\nccc".to_vec())); assert_eq!(rf.readline().unwrap(), b"a\n"); assert_eq!(rf.readline().unwrap(), b"bb\n"); assert_eq!(rf.readline().unwrap(), b"ccc"); assert_eq!(rf.readline().unwrap(), b""); } #[test] fn response_file_forward_seek_discards() { let mut rf = ResponseFile::new("p", TestFile::new(b"0123456789".to_vec())); rf.seek(3, 0).unwrap(); assert_eq!(rf.tell(), 3); assert_eq!(rf.read(Some(3)).unwrap(), b"345"); } #[test] fn response_file_seek_backwards_errors() { let mut rf = ResponseFile::new("p", TestFile::new(b"abcd".to_vec())); rf.read(Some(3)).unwrap(); let err = rf.seek(1, 0).unwrap_err(); assert!(matches!(err, ResponseError::BackwardSeek { .. })); } #[test] fn response_file_seek_invalid_whence() { let mut rf = ResponseFile::new("p", TestFile::new(b"abcd".to_vec())); let err = rf.seek(0, 14).unwrap_err(); assert!(matches!(err, ResponseError::InvalidWhence(14))); } #[test] fn range_file_default_reads_whole_stream() { let mut rf = RangeFile::new("p", TestFile::new(b"the quick brown fox".to_vec())); let got = rf.read(-1).unwrap(); assert_eq!(got, b"the quick brown fox"); } #[test] fn range_file_set_range_caps_read() { let mut rf = RangeFile::new("p", TestFile::new(b"abcdefg".to_vec())); rf.set_range(0, Some(3)); let got = rf.read(-1).unwrap(); assert_eq!(got, b"abc"); } #[test] fn range_file_read_past_range_errors() { let mut rf = RangeFile::new("p", TestFile::new(b"abcdefg".to_vec())); rf.set_range(0, Some(3)); let err = rf.read(5).unwrap_err(); assert!(matches!(err, ResponseError::InvalidRange { .. })); } #[test] fn parse_headers_basic() { let mut f = TestFile::new(b"Content-Type: text/plain\r\nContent-Length: 12\r\n\r\n".to_vec()); let h = parse_headers(&mut f).unwrap(); assert_eq!( h.get("content-type").map(String::as_str), Some("text/plain") ); assert_eq!(h.get("content-length").map(String::as_str), Some("12")); } #[test] fn parse_headers_folds_continuations() { let mut f = TestFile::new(b"X-Foo: alpha\r\n beta\r\n\r\n".to_vec()); let h = parse_headers(&mut f).unwrap(); assert_eq!(h.get("x-foo").map(String::as_str), Some("alpha beta")); } #[test] fn parse_content_type_with_boundary() { let (mime, params) = parse_content_type(r#"multipart/byteranges; boundary="abc123""#); assert_eq!(mime, "multipart/byteranges"); assert_eq!(params.get("boundary").map(String::as_str), Some("abc123")); } #[test] fn unquote_boundary_handles_angle_brackets() { // IIS 6/7 wraps the boundary in <...> let line = b"--\r\n"; let expected = b"--abc\r\n"; assert_eq!(unquote_boundary(line), expected); } #[test] fn set_range_from_header_parses_bytes_range() { let mut rf = RangeFile::new("p", TestFile::new(Vec::new())); rf.set_range_from_header("bytes 200-999/1234").unwrap(); assert_eq!(rf.start, 200); assert_eq!(rf.size, Some(800)); } #[test] fn set_range_from_header_rejects_non_bytes() { let mut rf = RangeFile::new("p", TestFile::new(Vec::new())); let err = rf.set_range_from_header("lines 0-10/20").unwrap_err(); assert!(matches!(err, ResponseError::InvalidHttpRange { .. })); } #[test] fn set_range_from_header_rejects_inverted() { let mut rf = RangeFile::new("p", TestFile::new(Vec::new())); let err = rf.set_range_from_header("bytes 10-5/20").unwrap_err(); assert!(matches!(err, ResponseError::InvalidHttpRange { .. })); } #[test] fn handle_response_200_plain() { let data = TestFile::new(b"body".to_vec()); let get = |_: &str| -> Option { None }; let k = handle_response("u", 200, &get, data).unwrap(); match k { ResponseKind::Plain(mut rf) => { assert_eq!(rf.read(None).unwrap(), b"body"); } _ => panic!("expected Plain"), } } #[test] fn handle_response_206_single_range() { let data = TestFile::new(b"abcde".to_vec()); let get = |name: &str| match name { "content-type" => Some("application/octet-stream".to_string()), "content-range" => Some("bytes 0-4/5".to_string()), _ => None, }; let k = handle_response("u", 206, &get, data).unwrap(); match k { ResponseKind::Range(mut rf) => { assert_eq!(rf.read(-1).unwrap(), b"abcde"); } _ => panic!("expected Range"), } } #[test] fn handle_response_other_is_unexpected_status() { let data = TestFile::new(Vec::new()); let get = |_: &str| -> Option { None }; // `unwrap_err` needs `T: Debug` and `ResponseKind` isn't Debug // (its inner `F` wouldn't be, in general), so match explicitly. match handle_response("u", 404, &get, data) { Err(ResponseError::UnexpectedStatus { code: 404, .. }) => {} Err(e) => panic!("unexpected error: {:?}", e), Ok(_) => panic!("expected an error"), } } /// Multipart walk: two parts separated by boundaries; seek to the /// second range and read it. #[test] fn range_file_multipart_walk() { let boundary = b"XYZ"; let mut body: Vec = Vec::new(); body.extend_from_slice(b"--XYZ\r\n"); body.extend_from_slice(b"Content-Range: bytes 0-2/10\r\n\r\n"); body.extend_from_slice(b"abc"); body.extend_from_slice(b"\r\n--XYZ\r\n"); body.extend_from_slice(b"Content-Range: bytes 5-7/10\r\n\r\n"); body.extend_from_slice(b"fgh"); let mut rf = RangeFile::new("u", TestFile::new(body)); rf.set_boundary(boundary.to_vec()).unwrap(); // After set_boundary we're positioned at the start of part #1. assert_eq!(rf.tell(), 0); assert_eq!(rf.read(3).unwrap(), b"abc"); // Seek forward into the second range; the wrapper walks the // boundary automatically. rf.seek(5, 0).unwrap(); assert_eq!(rf.read(3).unwrap(), b"fgh"); } } dromedary-0.1.5/src/http/transport.rs000066400000000000000000002363331520150013200176520ustar00rootroot00000000000000//! `HttpTransport` — a `dromedary::Transport` over HTTP(S). //! //! Bridges the low-level [`HttpClient`] (TLS, proxy, redirects, auth) //! to the dromedary transport trait so Rust callers can drive a //! `dyn Transport` against an `http://` or `https://` URL without //! going through PyO3. //! //! The read-side machinery — `get`, `readv` with Range coalescing, //! `_post`/`_head` — is ported from the Python HttpTransport in //! `dromedary/http/urllib.py`. Write operations are rejected with //! `Error::TransportNotPossible` because HTTP is a read-only //! transport; WebDAV-style writes live in a separate transport. use std::sync::{Arc, Mutex}; use url::Url; use crate::http::client::{HttpClient, HttpResponse, RequestOptions}; use crate::http::response::{handle_response, InFile, RangeFile, ResponseError, ResponseKind}; use crate::lock::BogusLock; use crate::{Error, Permissions, ReadStream, Result, Stat, Transport, UrlFragment}; /// Range-request support hint. The client starts at `Multi` /// (multi-range request per coalesced readv) and degrades when the /// server misbehaves: first to `Single` (one range per request), /// then to `None` (download whole file). Once degraded it never /// climbs back — the cost of a failed upgrade is worse than the /// benefit of recovery for the typical bzr use case. #[derive(Debug, Clone, Copy, PartialEq, Eq)] enum RangeHint { Multi, Single, None, } /// HTTP(S) transport. /// /// Holds an `Arc` so clones share the same connection /// pool, auth cache, and credentials. `base` is the transport's /// root URL without segment parameters (always ends with `/`); /// `segment_parameters` stores the breezy-style `,key=value` suffix /// parameters separately so they never leak into requests on the /// wire but are still visible via `get_segment_parameters()` and /// the joined form reported by `Transport::base()`. /// `unqualified_scheme` is the HTTP scheme without any `+impl` /// qualifier so we can hand back clean URLs from `external_url` /// and `_remote_path`. /// Per-transport readv tunables. Defaults match the historical /// urllib HttpTransport values; tests poke individual fields to /// force specific batching behaviour. Stored behind an `Arc` /// so clones share the same tuning state. #[derive(Debug, Clone)] pub struct ReadvTuning { /// Max number of separate offsets to coalesce into one /// `_CoalescedOffset` group. `0` means "no limit". Mirrors the /// Python `_max_readv_combine` attribute. pub max_readv_combine: usize, /// "Fudge factor" — bytes to read between offsets before /// preferring a seek. Default 128. Mirrors /// `_bytes_to_read_before_seek`. pub bytes_to_read_before_seek: usize, /// Max byte size of a single coalesced range. `0` = no limit. /// Mirrors `_get_max_size`. pub get_max_size: usize, /// Max number of byte ranges packed into one HTTP Range header. /// Mirrors `_max_get_ranges`. pub max_get_ranges: usize, } impl Default for ReadvTuning { fn default() -> Self { Self { max_readv_combine: 0, bytes_to_read_before_seek: 128, get_max_size: 0, // Apache's default range cap is ~400; pick well under that. max_get_ranges: 200, } } } #[derive(Clone)] pub struct HttpTransport { base: Url, /// The base string as supplied to `new`, used as the authoritative /// source of any URL-embedded userinfo that `url::Url` would /// otherwise normalise away (`http://joe:@host/` → `http://joe@host/` /// loses the empty password `test_empty_pass` distinguishes from /// "password absent"). Contains the full original URL including /// any `+impl` scheme suffix, password, and path. raw_base: String, unqualified_scheme: String, segment_parameters: std::collections::BTreeMap, client: Arc, range_hint: Arc>, readv_tuning: Arc>, /// Optional per-transport activity callback. When set, it's /// passed as the `activity` arg to each internal request /// (get/has/post/_get/readv) so breezy's `_report_activity` /// progress-bar hook sees byte counts for these implicit /// requests as well. Wrapped in `Arc>>` so /// clones share the slot — setting it on one transport /// propagates to clones and back. activity: Arc>>, } impl HttpTransport { /// Build a new transport over `base`. The URL must use an /// `http` or `https` scheme (optionally with a `+impl` suffix /// like `http+urllib://`, which we ignore beyond logging). pub fn new(base: &str, client: Arc) -> Result { let (unqualified_scheme, normalised_base, segment_parameters) = normalise_http_url(base)?; Ok(Self { base: normalised_base, raw_base: base.to_string(), unqualified_scheme, segment_parameters, client, range_hint: Arc::new(Mutex::new(RangeHint::Multi)), readv_tuning: Arc::new(Mutex::new(ReadvTuning::default())), activity: Arc::new(Mutex::new(None)), }) } /// Install (or replace) the per-transport activity callback. /// Called from the PyO3 layer after construction so breezy's /// `_report_activity` hook reaches internal get/has/post/readv /// calls, not just the explicit `.request()` API that already /// threads its own callback. Pass `None` to clear. pub fn set_activity(&self, cb: Option) { *self.activity.lock().unwrap() = cb; } /// Snapshot of the currently-installed activity callback, if /// any. Helper for the request helpers — we clone the Arc so /// the lock stays short-lived even when the request itself /// takes a while. fn activity_snapshot(&self) -> Option { self.activity.lock().unwrap().clone() } /// Clone this transport at a new base URL. Shares the underlying /// `HttpClient` — so the auth cache, connection pool, and /// credentials follow us. Segment parameters are cleared on the /// clone (matching the Python `ConnectedTransport.clone` shape, /// which did the same via `_raw_base`). fn clone_at(&self, new_base: Url) -> Self { Self { // The clone's raw_base is the new canonical URL. We lose // the original's literal text (including any empty // password markers), but clones are typically produced // by offsetting from the parent, and credentials in the // URL only matter on the initial construction. raw_base: new_base.to_string(), base: new_base, unqualified_scheme: self.unqualified_scheme.clone(), segment_parameters: std::collections::BTreeMap::new(), client: self.client.clone(), range_hint: self.range_hint.clone(), readv_tuning: self.readv_tuning.clone(), activity: self.activity.clone(), } } /// Concrete version of [`Transport::clone`]. Same semantics — /// optionally apply an offset — but returns `Self` so callers /// that need the concrete type (the PyO3 wrapper, mostly) don't /// have to downcast a `Box`. /// /// `clone` differs from `abspath` in that the result is always /// directory-shaped (ends in `/`): a transport's base URL is /// conventionally a directory, and downstream `join`/`abspath` /// calls only work right when the base ends with a slash. pub fn clone_concrete(&self, offset: Option<&UrlFragment>) -> Result { let new_base = match offset { Some(o) => { let mut url = self.abspath(o)?; if !url.path().ends_with('/') { let mut path = url.path().to_string(); path.push('/'); url.set_path(&path); } url } None => self.base.clone(), }; Ok(self.clone_at(new_base)) } /// The URL a server sees for `relpath`. Credentials are stripped /// from the userinfo (they belong in headers, not in the path we /// hand the server), and the scheme is the unqualified form so /// `http+urllib://host/` never leaks upstream. pub fn remote_url(&self, relpath: &UrlFragment) -> Result { let mut url = self.abspath(relpath)?; let _ = url.set_username(""); let _ = url.set_password(None); let _ = url.set_scheme(&self.unqualified_scheme); Ok(url) } /// Issue a raw HTTP request. Mirrors the Python /// `HttpTransport.request` method: returns the Rust /// `HttpResponse` with redirect / auth / activity machinery /// already applied by the client. pub fn request( &self, method: &str, url: &str, headers: &[(String, String)], body: &[u8], follow_redirects: bool, ) -> Result { let opts = RequestOptions { follow_redirects, ..RequestOptions::default() }; let activity = self.activity_snapshot(); let resp = self .client .request_with_origin_url( method, url, &self.raw_base, headers, body, &opts, activity.as_ref(), ) .map_err(client_err_to_transport_err)?; let code = resp.status; if !follow_redirects && matches!(code, 301 | 302 | 303 | 307 | 308) { let target = resp .redirected_to .clone() .unwrap_or_else(|| url.to_string()); return Err(Error::RedirectRequested { source: url.to_string(), target, is_permanent: matches!(code, 301 | 308), }); } Ok(resp) } /// HEAD with breezy-compatible status-code handling. fn head_request(&self, relpath: &UrlFragment) -> Result { let abspath = self.remote_url(relpath)?.to_string(); let resp = self.request("HEAD", &abspath, &[], &[], false)?; if !matches!(resp.status, 200 | 404) { return Err(Error::UnexpectedHttpStatus { path: abspath, code: resp.status, extra: None, }); } Ok(resp) } /// POST a body to `relpath`. Mirrors Python `HttpTransport._post`. /// Returns `(status, range_file)` where `range_file` is a /// `RangeFile` wrapping the response body (suitable for feeding /// into the bzr smart-protocol medium reader). pub fn post(&self, relpath: &UrlFragment, body: &[u8]) -> Result<(u16, HttpRangeFile)> { let abspath = self.remote_url(relpath)?.to_string(); let headers = [( "Content-Type".to_string(), "application/octet-stream".to_string(), )]; let resp = self.request("POST", &abspath, &headers, body, false)?; let (status, file) = wrap_response_body(abspath, resp)?; Ok((status, file)) } /// Internal `_get` with range support. Returns `(status, /// range_file)` for a GET that may be range-limited by /// `attempted_range_header`. 404 → `NoSuchFile`, 416 → /// `InvalidHttpRange`, 400 → `BadHttpRequest` or /// `InvalidHttpRange` depending on whether we sent a Range /// header, other non-2xx → `UnexpectedHttpStatus`. fn _get( &self, relpath: &UrlFragment, attempted_range_header: Option<&str>, ) -> Result<(u16, HttpRangeFile)> { let abspath = self.remote_url(relpath)?.to_string(); let headers: Vec<(String, String)> = attempted_range_header .map(|r| vec![("Range".to_string(), format!("bytes={}", r))]) .unwrap_or_default(); let resp = self.request("GET", &abspath, &headers, &[], false)?; match resp.status { 200 | 206 => {} 404 => return Err(Error::NoSuchFile(Some(abspath))), 416 => { return Err(Error::InvalidHttpRange { path: abspath, range: attempted_range_header.unwrap_or("").to_string(), msg: format!("Server return code {}", resp.status), }) } 400 => { if let Some(r) = attempted_range_header { return Err(Error::InvalidHttpRange { path: abspath, range: r.to_string(), msg: format!("Server return code {}", resp.status), }); } return Err(Error::BadHttpRequest { path: abspath, reason: resp.reason.clone(), }); } code => { return Err(Error::UnexpectedHttpStatus { path: abspath, code, extra: None, }) } } wrap_response_body(abspath, resp) } /// Format the current ranges + tail amount into a Range-header /// value if any can be built. Mirrors the Python /// `_attempted_range_header` with the same downgrade logic. fn attempted_range_header( &self, offsets: &[(usize, usize)], tail_amount: usize, ) -> Option { let hint = *self.range_hint.lock().unwrap(); match hint { RangeHint::Multi => Some(format_range_header(offsets, tail_amount)), RangeHint::Single => { if !offsets.is_empty() { if tail_amount != 0 { // Can't merge ranges with a tail_amount into // one; caller falls back to the whole file. return None; } let first = offsets.first().unwrap(); let last = offsets.last().unwrap(); let start = first.0; let end = last.0 + last.1 - 1; Some(format_range_header(&[(start, end - start + 1)], 0)) } else { Some(format_range_header(offsets, tail_amount)) } } RangeHint::None => None, } } /// Step the range hint down one rung after a server misbehaves. /// Returns false if we've already hit the floor (no ranges) — /// caller must surface the error to the user. pub fn degrade_range_hint(&self) -> bool { let mut hint = self.range_hint.lock().unwrap(); match *hint { RangeHint::Multi => { *hint = RangeHint::Single; true } RangeHint::Single => { *hint = RangeHint::None; true } RangeHint::None => false, } } /// Shortcut the degradation ladder: jump straight to /// `RangeHint::None` (full-file downloads). Used when a server /// explicitly rejects a Range header rather than misbehaves on /// the response — probing one range at a time gains nothing /// there, and the full-file path finishes in a single GET. /// Returns false if we're already at None. pub(crate) fn jump_range_hint_to_none(&self) -> bool { let mut hint = self.range_hint.lock().unwrap(); if *hint == RangeHint::None { return false; } *hint = RangeHint::None; true } /// Current range hint as a short string: `"multi"`, `"single"`, /// or `None`. Matches the values the Python HttpTransport used /// so tests that reach into `_range_hint` continue to work. pub fn range_hint_str(&self) -> Option<&'static str> { match *self.range_hint.lock().unwrap() { RangeHint::Multi => Some("multi"), RangeHint::Single => Some("single"), RangeHint::None => None, } } /// Apply a [`ReadvTuning`] update — shared across all clones /// because the tuning sits behind an `Arc`. Used by the /// PyO3 wrapper's `_max_readv_combine` / `_max_get_ranges` / /// `_get_max_size` / `_bytes_to_read_before_seek` setters so /// breezy's test-harness tunables reach the readv coalescer. pub fn set_readv_tuning(&self, tuning: ReadvTuning) { *self.readv_tuning.lock().unwrap() = tuning; } /// Read the current [`ReadvTuning`] snapshot. Cloned to avoid /// holding the lock across the readv call. pub fn readv_tuning(&self) -> ReadvTuning { self.readv_tuning.lock().unwrap().clone() } /// Accessor for the shared `HttpClient`. Exposed so the PyO3 /// wrapper can pass through `request(...)` with a Python-side /// activity callback without re-resolving the client through /// a second URL parse. pub fn client(&self) -> &Arc { &self.client } /// OPTIONS request: returns the response headers or raises for /// 404 / 403 / 405. Mirrors the Python `_options`. pub fn options(&self, relpath: &UrlFragment) -> Result> { let abspath = self.remote_url(relpath)?.to_string(); let resp = self.request("OPTIONS", &abspath, &[], &[], false)?; match resp.status { 404 => Err(Error::NoSuchFile(Some(abspath))), 403 | 405 => Err(Error::InvalidHttpResponse { path: abspath, msg: "OPTIONS not supported or forbidden for remote URL".into(), }), _ => Ok(resp.headers.clone()), } } /// HEAD request: returns the raw response so callers can inspect /// headers. Rejects non-200/404 statuses. pub fn head(&self, relpath: &UrlFragment) -> Result { self.head_request(relpath) } } impl std::fmt::Debug for HttpTransport { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "HttpTransport({})", self.base) } } /// `RangeFile` wrapping the response body with its source URL for /// error reporting. Implements `ReadStream` so callers that treat /// `get()` as returning a read+seek stream work unchanged. pub struct HttpRangeFile { inner: RangeFile, } impl HttpRangeFile { /// Read bytes at `offset` with the given `size`. Wraps /// `RangeFile::seek` + `read`. The caller is responsible for /// ensuring `offset` falls within the current range window. pub fn read_at(&mut self, offset: u64, size: usize) -> Result> { self.inner .seek(offset as i64, 0) .map_err(response_err_to_transport_err)?; let got = self .inner .read(size as i64) .map_err(response_err_to_transport_err)?; if got.len() != size { return Err(Error::ShortReadvError( self.inner.path().to_string(), offset, size as u64, got.len() as u64, )); } Ok(got) } } impl std::io::Read for HttpRangeFile { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { let chunk = self .inner .read(buf.len() as i64) .map_err(|e| std::io::Error::other(format!("{:?}", e)))?; let n = chunk.len().min(buf.len()); buf[..n].copy_from_slice(&chunk[..n]); Ok(n) } } impl std::io::Seek for HttpRangeFile { fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { let (offset, whence) = match pos { std::io::SeekFrom::Start(o) => (o as i64, 0u32), std::io::SeekFrom::Current(o) => (o, 1u32), std::io::SeekFrom::End(o) => (o, 2u32), }; self.inner .seek(offset, whence) .map_err(|e| std::io::Error::other(format!("{:?}", e)))?; Ok(self.inner.tell()) } } impl ReadStream for HttpRangeFile {} /// Adapter: wraps a fully-buffered body (`Vec` + cursor) as the /// `InFile` type that `RangeFile` consumes. The body is already in /// memory at this point — `HttpResponse` eagerly drains it on /// access so seeking works without replaying the network stream. struct BufferedBody { body: Vec, pos: usize, } impl BufferedBody { fn new(body: Vec) -> Self { Self { body, pos: 0 } } } impl InFile for BufferedBody { fn read(&mut self, n: usize) -> std::io::Result> { let end = (self.pos + n).min(self.body.len()); let chunk = self.body[self.pos..end].to_vec(); self.pos = end; Ok(chunk) } fn readline(&mut self) -> std::io::Result> { let mut out = Vec::new(); while self.pos < self.body.len() { let b = self.body[self.pos]; self.pos += 1; out.push(b); if b == b'\n' { break; } } Ok(out) } } /// Normalise an HTTP URL: enforce a trailing slash and split out /// the unqualified scheme. Accepts `+impl` suffixes (e.g. /// `http+urllib://host/`) but rewrites them to the plain scheme /// form before parsing so the resulting `Url` reports `http` / /// `https` as its scheme. The `url` crate is strict about scheme /// changes between "special" (http/https/etc.) and "non-special" /// schemes, so we can't fix this up after-the-fact via /// `Url::set_scheme`. fn normalise_http_url( base: &str, ) -> Result<(String, Url, std::collections::BTreeMap)> { let trimmed = base.trim(); let scheme_end = trimmed .find("://") .ok_or_else(|| Error::UrlError(url::ParseError::RelativeUrlWithoutBase))?; let raw_scheme = &trimmed[..scheme_end]; // Strip any `+impl` suffix: `http+urllib` → `http`. let unqualified = raw_scheme .split_once('+') .map(|(s, _)| s) .unwrap_or(raw_scheme) .to_string(); if unqualified != "http" && unqualified != "https" { return Err(Error::UrlError(url::ParseError::RelativeUrlWithoutBase)); } let rest = &trimmed[scheme_end..]; let canonical = format!("{}{}", unqualified, rest); // Segment parameters (`,key=value`) appended to the path are a // breezy/dromedary Transport-layer convention (e.g. `,branch=foo`) // that we store separately from the URL proper: they never go on // the wire. Split them off here so the rest of the transport // sees a clean URL. let (base_part, params) = crate::urlutils::split_segment_parameters(&canonical) .map_err(|_| Error::UrlError(url::ParseError::RelativeUrlWithoutBase))?; let base_with_slash = if base_part.ends_with('/') { base_part.to_string() } else { format!("{}/", base_part) }; // RFC 3986 §6.2.2.2: percent-encoded unreserved characters must be // decoded to their literal form for a URL to be in canonical form. // Apply that here so cloning and abspath round-trip consistently with // paths that came in needlessly-escaped (`%7E` → `~` etc.). let base_with_slash = crate::urlutils::unquote_unreserved(&base_with_slash); let parsed = Url::parse(&base_with_slash)?; let segment_parameters: std::collections::BTreeMap = params .iter() .map(|(k, v)| (k.to_string(), v.to_string())) .collect(); Ok((unqualified, parsed, segment_parameters)) } /// Format a list of (start, length) offsets + optional tail amount /// as an HTTP Range header value. /// Collapse `//` and `/./` path segments per RFC 3986 §5.2.4. /// /// `url::Url::join` applies this to the base URL but leaves the /// output path alone when relative joins introduce stray `./` or /// double slashes. breezy's tests assert that abspath emits the /// canonical form — `abspath(".bzr/1//2/./3")` on /// `http://host/bzr/bzr.dev/` should yield `/bzr/bzr.dev/.bzr/1/2/3`. fn collapse_path_segments(path: &str) -> String { let has_leading_slash = path.starts_with('/'); let has_trailing_slash = path.len() > 1 && path.ends_with('/'); let mut parts: Vec<&str> = Vec::new(); for part in path.split('/') { match part { "" => {} // empty segment from `//` or leading `/` "." => {} // drop current-directory markers ".." => { // Pop the previous segment if we have one; otherwise // leave the `..` in place (url::Url already handled // overflow against the base). if parts.pop().is_none() { parts.push(".."); } } s => parts.push(s), } } let mut out = String::with_capacity(path.len()); if has_leading_slash { out.push('/'); } out.push_str(&parts.join("/")); if has_trailing_slash && !out.ends_with('/') { out.push('/'); } out } fn format_range_header(offsets: &[(usize, usize)], tail_amount: usize) -> String { let mut parts: Vec = offsets .iter() .map(|(start, length)| format!("{}-{}", start, start + length - 1)) .collect(); if tail_amount != 0 { parts.push(format!("-{}", tail_amount)); } parts.join(",") } /// Wrap a response body in a `RangeFile`. For 200 responses we /// build the RangeFile directly from the buffered body (no range /// metadata to parse). For 206 we run through `handle_response` /// which inspects Content-Type / Content-Range to set up the /// boundary or single-range window. fn wrap_response_body(url: String, mut resp: HttpResponse) -> Result<(u16, HttpRangeFile)> { let status = resp.status; // Truncated response bodies (server promised Content-Length N // but closed the socket before sending N bytes) surface as an // io::Error wrapping a hyper::Error whose `is_incomplete_ // message()` is true. That's the same signal breezy's "short // readv" retry path keys off. Preserve the partial bytes we // *did* read so the downstream multipart parser can extract // the ranges that arrived in full before the cut — the readv // eager/retry loop then only has to fetch the tail, not redo // everything. Non-truncation errors still bubble up. let body = match resp.read(None) { Ok(b) => b, Err(e) => { if !is_io_hyper_parse_error(&e) { return Err(Error::Io(e)); } // `buffer_all` keeps the partial read in the // response's buffer even on UnexpectedEof, so a // second call reads it out without re-hitting the // socket. resp.read(None).unwrap_or_default() } }; if status == 200 { // Plain whole-file response: skip handle_response and build // a RangeFile directly so we don't need to thread the body // through the ResponseFile intermediate. let rf = RangeFile::new(url, BufferedBody::new(body)); return Ok((status, HttpRangeFile { inner: rf })); } // 206 (or anything else handle_response accepts): inspect the // headers to set up the range window. let headers = resp.headers.clone(); let get_header = |name: &str| -> Option { headers .iter() .find(|(k, _)| k.eq_ignore_ascii_case(name)) .map(|(_, v)| v.to_string()) }; let kind = handle_response(url.clone(), status, &get_header, BufferedBody::new(body)) .map_err(response_err_to_transport_err)?; let rf = match kind { ResponseKind::Range(rf) => rf, ResponseKind::Plain(_) => { // handle_response only returns Plain for 200, which we // intercepted above. Anything else here is a parse-layer // bug we shouldn't paper over. return Err(Error::InvalidHttpResponse { path: url, msg: format!("unexpected handle_response shape for status {}", status), }); } }; Ok((status, HttpRangeFile { inner: rf })) } /// Map a `ResponseError` from the parse layer to our transport /// `Error`. The `ResponseError` variants are already 1-to-1 with /// dromedary errors (see `src/http/response.rs`), so this is a /// straightforward translation. fn response_err_to_transport_err(err: ResponseError) -> Error { match err { ResponseError::InvalidResponse { path, msg } => Error::InvalidHttpResponse { path, msg }, ResponseError::InvalidHttpRange { path, range, msg } => { Error::InvalidHttpRange { path, range, msg } } ResponseError::BoundaryMissing { path, boundary } => Error::InvalidHttpResponse { path, msg: format!( "HTTP MIME Boundary missing ({})", String::from_utf8_lossy(&boundary) ), }, ResponseError::ShortReadv { path, offset, length, actual, } => Error::ShortReadvError(path, offset, length, actual), ResponseError::InvalidRange { path, offset, msg } => Error::InvalidHttpResponse { path, msg: format!("invalid range at offset {}: {}", offset, msg), }, ResponseError::UnexpectedStatus { path, code } => Error::UnexpectedHttpStatus { path, code, extra: None, }, ResponseError::Io(e) => Error::Io(e), ResponseError::InvalidWhence(w) => Error::InvalidHttpResponse { path: String::new(), msg: format!("invalid whence: {}", w), }, ResponseError::BackwardSeek { path, pos, offset } => Error::InvalidHttpResponse { path, msg: format!("backward seek: pos={}, offset={}", pos, offset), }, } } /// Map a `ClientError` from the HTTP client to our transport /// `Error`. Transport-level failures (DNS, TCP, TLS) surface as /// `Error::Io` wrapping the underlying io::Error; malformed requests /// as `Error::InvalidHttpResponse`. fn client_err_to_transport_err(err: crate::http::client::ClientError) -> Error { use crate::http::client::ClientError; match err { ClientError::InvalidRequest(msg) => { // The client tags messages with `bad URL:` when the // failure is the URL itself (request URL or proxy URL // shape) — those map to InvalidURL on the Python side // rather than the more generic InvalidHttpResponse. if msg.starts_with("bad URL") { Error::UrlError(url::ParseError::RelativeUrlWithoutBase) } else { Error::InvalidHttpResponse { path: String::new(), msg, } } } ClientError::Io(e) => Error::Io(e), ClientError::Transport(e) => { // Classify: protocol-level parse errors (bad HTTP // version, bad status line, truncated framing) map to // `InvalidHttpResponse` — a semantic "server misbehaved" // that breezy's tests explicitly distinguish from // network-level failures. Everything else (DNS, TCP, // TLS) maps to `ConnectionError` for the retry loop. // // reqwest walks the error source chain for the typed // tests below; we mirror that approach rather than // string-matching the Display output, which would // inevitably drift. let msg = e.to_string(); if is_http_parse_error(&e) { Error::InvalidHttpResponse { path: String::new(), msg, } } else { Error::ConnectionError(msg) } } } } /// Walk an error source chain looking for a `hyper::Error` and /// ask it whether this was a protocol-level parse failure. No /// string-matching: `hyper::Error::is_parse` / `is_parse_status` / /// `is_incomplete_message` are the authoritative classifiers. /// /// Used for both `reqwest::Error` (handshake / request-send /// failures) and `std::io::Error` (body-read failures where /// reqwest wraps the hyper error in an io::Error) — both surface /// the hyper error via their source chain. fn is_hyper_parse_chain(start: &(dyn std::error::Error + 'static)) -> bool { let mut cur: Option<&(dyn std::error::Error + 'static)> = Some(start); while let Some(cause) = cur { if let Some(hyper_err) = cause.downcast_ref::() { // `is_parse()` is the general "we couldn't decode this // as HTTP" check; `is_parse_status()` covers specifically // bad status lines (which `is_parse` also catches — // included for clarity). `is_incomplete_message()` is // the truncated-framing case breezy's BadProtocol tests // also exercise, and the same signal truncated response // bodies raise. return hyper_err.is_parse() || hyper_err.is_parse_status() || hyper_err.is_incomplete_message(); } cur = cause.source(); } false } /// Classifier for `reqwest::Error` — walks the error's own source /// chain. Start after the error (skipping `err` itself) because /// `reqwest::Error::source()` is the thing carrying the real cause. fn is_http_parse_error(err: &reqwest::Error) -> bool { match std::error::Error::source(err) { Some(s) => is_hyper_parse_chain(s), None => false, } } /// Classifier for `std::io::Error` raised while reading a response /// body. When the server advertises `Content-Length: N` but closes /// the socket early, reqwest surfaces an io::Error whose cause /// chain ends in an inner `io::Error` of kind `UnexpectedEof`. /// That's the truncated-body signal — bubble it up as a parse /// error so the readv retry loop can degrade and retry instead of /// treating it as a hard failure. fn is_io_hyper_parse_error(err: &std::io::Error) -> bool { // Top-level kind first (best if reqwest ever surfaces kind // directly). if err.kind() == std::io::ErrorKind::UnexpectedEof { return true; } let Some(inner) = err.get_ref() else { return false; }; // Dig for the innermost io::Error and check its kind, too — // that's where the IncompleteBody marker ends up. let mut cur: Option<&(dyn std::error::Error + 'static)> = Some(inner); while let Some(cause) = cur { if let Some(io_err) = cause.downcast_ref::() { if io_err.kind() == std::io::ErrorKind::UnexpectedEof { return true; } } cur = cause.source(); } // Fall back to the hyper-typed classifier in case the truncation // shape is one of the parse-layer variants. is_hyper_parse_chain(inner) } impl Transport for HttpTransport { fn external_url(&self) -> Result { // `base` already has the unqualified scheme after // `normalise_http_url`, so we just hand it back. let mut url = self.base.clone(); let _ = url.set_scheme(&self.unqualified_scheme); Ok(url) } fn base(&self) -> Url { if self.segment_parameters.is_empty() { return self.base.clone(); } let raw = self.base.as_str(); let params: std::collections::HashMap<&str, &str> = self .segment_parameters .iter() .map(|(k, v)| (k.as_str(), v.as_str())) .collect(); // join_segment_parameters can only fail for malformed inputs // (`=` in key, `,` in value); our params came from set_segment // which pre-validates, so this is infallible in practice. let joined = crate::urlutils::join_segment_parameters(raw, ¶ms) .unwrap_or_else(|_| raw.to_string()); Url::parse(&joined).unwrap_or_else(|_| self.base.clone()) } fn can_roundtrip_unix_modebits(&self) -> bool { false } fn get(&self, relpath: &UrlFragment) -> Result> { let (_code, rf) = self._get(relpath, None)?; Ok(Box::new(rf)) } fn has(&self, relpath: &UrlFragment) -> Result { let resp = self.head_request(relpath)?; Ok(resp.status == 200) } fn stat(&self, _relpath: &UrlFragment) -> Result { Err(Error::TransportNotPossible(Some( "http does not support stat()".into(), ))) } fn clone(&self, offset: Option<&UrlFragment>) -> Result> { Ok(Box::new(self.clone_concrete(offset)?)) } fn abspath(&self, relpath: &UrlFragment) -> Result { // URLs must be ASCII on the wire. Callers hand us pre-escaped // paths (via urlutils.escape); silently percent-encoding here // would hide bugs where that escape was skipped. if !relpath.is_ascii() { return Err(Error::UrlError(url::ParseError::InvalidDomainCharacter)); } let mut joined = if relpath.is_empty() || relpath == "." { self.base.clone() } else { // Unescape unreserved characters (RFC 3986: a-z A-Z 0-9 // - . _ ~) that callers sometimes percent-encode // needlessly. Keeps abspath output canonical so // clone()s of sibling branches produce identical- // looking base URLs. (lp:842223) let normalised = crate::urlutils::unquote_unreserved(relpath); self.base .join(&normalised) .map_err(|_| Error::UrlError(url::ParseError::InvalidDomainCharacter))? }; // Collapse `//` and `./` path segments that `url::Url::join` // leaves intact — the test suite expects RFC 3986 §5.2.4 // "remove_dot_segments" style normalisation at the abspath // boundary. (url::Url does that for the base URL but not // for the relative join output.) let collapsed = collapse_path_segments(joined.path()); if collapsed != joined.path() { joined.set_path(&collapsed); } // Match the Python `URL.clone(relpath)` semantics of // dropping a trailing slash from the joined path. Callers // that explicitly want the directory form re-append `/` // themselves (e.g. clone() in the Python subclass). if joined.path().len() > 1 && joined.path().ends_with('/') { let new_path = joined.path().trim_end_matches('/').to_string(); joined.set_path(&new_path); } Ok(joined) } fn relpath(&self, abspath: &Url) -> Result { crate::relpath_against_base(&self.base, abspath) } fn mkdir(&self, _relpath: &UrlFragment, _permissions: Option) -> Result<()> { Err(Error::TransportNotPossible(Some( "http does not support mkdir()".into(), ))) } fn put_file( &self, _relpath: &UrlFragment, _f: &mut dyn std::io::Read, _permissions: Option, ) -> Result { Err(Error::TransportNotPossible(Some( "http does not support put_file()".into(), ))) } fn delete(&self, _relpath: &UrlFragment) -> Result<()> { Err(Error::TransportNotPossible(Some( "http does not support delete()".into(), ))) } fn rmdir(&self, _relpath: &UrlFragment) -> Result<()> { Err(Error::TransportNotPossible(Some( "http does not support rmdir()".into(), ))) } fn rename(&self, _rel_from: &UrlFragment, _rel_to: &UrlFragment) -> Result<()> { Err(Error::TransportNotPossible(Some( "http does not support rename()".into(), ))) } fn set_segment_parameter(&mut self, key: &str, value: Option<&str>) -> Result<()> { if key.contains('=') { return Err(Error::UrlError(url::ParseError::InvalidDomainCharacter)); } match value { Some(v) => { if v.contains(',') { return Err(Error::UrlError(url::ParseError::InvalidDomainCharacter)); } self.segment_parameters .insert(key.to_string(), v.to_string()); } None => { self.segment_parameters.remove(key); } } Ok(()) } fn get_segment_parameters(&self) -> Result> { Ok(self .segment_parameters .iter() .map(|(k, v)| (k.clone(), v.clone())) .collect()) } fn readlink(&self, _relpath: &UrlFragment) -> Result { Err(Error::TransportNotPossible(Some( "http does not support readlink()".into(), ))) } fn hardlink(&self, _from: &UrlFragment, _to: &UrlFragment) -> Result<()> { Err(Error::TransportNotPossible(Some( "http does not support hardlink()".into(), ))) } fn symlink(&self, _from: &UrlFragment, _to: &UrlFragment) -> Result<()> { Err(Error::TransportNotPossible(Some( "http does not support symlink()".into(), ))) } fn iter_files_recursive(&self) -> Box>> { Box::new(std::iter::once(Err(Error::TransportNotPossible(Some( "http does not support iter_files_recursive()".into(), ))))) } fn open_write_stream( &self, _relpath: &UrlFragment, _permissions: Option, ) -> Result> { Err(Error::TransportNotPossible(Some( "http does not support open_write_stream()".into(), ))) } fn delete_tree(&self, _relpath: &UrlFragment) -> Result<()> { Err(Error::TransportNotPossible(Some( "http does not support delete_tree()".into(), ))) } fn is_readonly(&self) -> bool { true } fn listable(&self) -> bool { false } fn recommended_page_size(&self) -> usize { 64 * 1024 } fn lock_read(&self, _relpath: &UrlFragment) -> Result> { // HTTP doesn't have shared-read locks; return a bogus lock // that no-ops on unlock, matching the Python version. Ok(Box::new(BogusLock)) } fn lock_write(&self, _relpath: &UrlFragment) -> Result> { Err(Error::TransportNotPossible(Some( "http does not support lock_write()".into(), ))) } fn local_abspath(&self, _relpath: &UrlFragment) -> Result { Err(Error::NotLocalUrl(self.base.to_string())) } fn list_dir(&self, _relpath: &UrlFragment) -> Box>> { Box::new(std::iter::once(Err(Error::TransportNotPossible(Some( "http does not support list_dir()".into(), ))))) } fn append_file( &self, _relpath: &UrlFragment, _f: &mut dyn std::io::Read, _permissions: Option, ) -> Result { Err(Error::TransportNotPossible(Some( "http does not support append_file()".into(), ))) } fn copy(&self, _rel_from: &UrlFragment, _rel_to: &UrlFragment) -> Result<()> { Err(Error::TransportNotPossible(Some( "http does not support copy()".into(), ))) } fn readv<'a>( &self, relpath: &'a UrlFragment, offsets: Vec<(u64, usize)>, adjust_for_latency: bool, upper_limit: Option, ) -> Box)>> + Send + 'a> { let offsets = if adjust_for_latency { crate::readv::sort_expand_and_combine( offsets, upper_limit, self.recommended_page_size(), ) } else { offsets }; Box::new(LazyReadv::new( Clone::clone(self), relpath.to_string(), offsets, )) } } impl crate::ConnectedTransport for HttpTransport {} impl HttpTransport { /// Eager `readv` implementation: issue coalesced GET requests, /// degrade the range hint on failure, and return the results as /// a Vec so the `Transport::readv` iterator can yield them. /// /// Matches the Python `_readv` algorithm: sort + coalesce /// offsets into the smallest number of Range-header entries, /// issue one or more GET requests respecting the range hint, /// parse multipart / single-range responses via `RangeFile`, /// and fall back to `single` and then `none` (full-file /// download) on failure. fn readv_eager( &self, relpath: &UrlFragment, offsets: Vec<(u64, usize)>, ) -> Vec)>> { let offsets_usize: Vec<(usize, usize)> = offsets.iter().map(|(o, s)| (*o as usize, *s)).collect(); let mut remaining = offsets_usize.clone(); let mut out: Vec)>> = Vec::with_capacity(remaining.len()); // The last per-pass error we saw. We prefer to surface the // specific server/IO error once we run out of degradation // rungs — "server misbehaved on range requests" is less // useful than e.g. `InvalidHttpRange` or `ShortReadvError`. let mut last_retry_err: Option = None; loop { let sorted: Vec<(usize, usize)> = { let mut v = remaining.clone(); v.sort(); v }; let tuning = self.readv_tuning(); // `coalesce_offsets` treats `Some(0)` as the unlimited // sentinel; pass `None` for defaults and `Some(n)` for // explicit limits so the callee's own defaults don't // override our config. let opt = |v: usize| if v == 0 { None } else { Some(v) }; let coalesced = match crate::readv::coalesce_offsets( &sorted, opt(tuning.max_readv_combine), Some(tuning.bytes_to_read_before_seek), opt(tuning.get_max_size), ) { Ok(c) => c, Err(e) => { out.push(Err(Error::InvalidHttpResponse { path: relpath.to_string(), msg: format!("overlapping ranges: {}", e), })); return out; } }; match self.readv_one_pass(relpath, &coalesced, &remaining) { Ok(pass_out) => { out.extend(pass_out); return out; } Err(ReadvPassError::Retry { remaining: new_remaining, cause, }) => { // Server misbehaved; degrade the range hint. // Distinguish "server explicitly rejected the // Range request" (400/416 → InvalidHttpRange) // from "server's response was corrupt" (truncated // multipart, bad boundaries, etc. → // InvalidHttpResponse / ShortReadv). An explicit // rejection means *any* number of ranges is // risky — drop straight to full-file rather than // probe Single one-GET-per-range. The truncation // case steps Multi→Single→None one rung at a time // so servers that cope with fewer ranges don't // force a whole-file download. let jumped = matches!(cause, Some(Error::InvalidHttpRange { .. })) && self.jump_range_hint_to_none(); if !jumped && !self.degrade_range_hint() { out.push(Err(cause.or(last_retry_err).unwrap_or_else(|| { Error::InvalidHttpResponse { path: relpath.to_string(), msg: "server repeatedly misbehaved on range requests".into(), } }))); return out; } last_retry_err = cause; remaining = new_remaining; } Err(ReadvPassError::Hard(err)) => { out.push(Err(err)); return out; } } } } /// One pass of the `readv` coalescing loop. Groups coalesced /// chunks into batches of up to `MAX_GET_RANGES` per request /// when running under `RangeHint::Multi`; under `Single` each /// chunk gets its own request; under `None` we issue one full- /// file GET that covers everything. fn readv_one_pass( &self, relpath: &UrlFragment, coalesced: &[(usize, usize, Vec<(usize, usize)>)], offsets_order: &[(usize, usize)], ) -> std::result::Result)>>, ReadvPassError> { let tuning = self.readv_tuning(); let max_get_ranges = tuning.max_get_ranges.max(1); let get_max_size = tuning.get_max_size; // 0 = unlimited let hint = *self.range_hint.lock().unwrap(); // Slice the coalesced list into batches honouring both the // per-request range count and per-request total-bytes caps. // `RangeHint::None` collapses everything into one full-file // GET; `Single` is one chunk per request; `Multi` packs as // much as the caps allow. let batches: Vec<&[(usize, usize, Vec<(usize, usize)>)]> = match hint { RangeHint::None => vec![coalesced], RangeHint::Single => coalesced.chunks(1).collect::>(), RangeHint::Multi => { let mut batches: Vec<&[(usize, usize, Vec<(usize, usize)>)]> = Vec::new(); let mut start = 0; let mut acc_bytes = 0usize; let mut acc_ranges = 0usize; for (i, coal) in coalesced.iter().enumerate() { let length = coal.1; let would_exceed_size = get_max_size > 0 && acc_bytes + length > get_max_size && acc_ranges > 0; let would_exceed_ranges = acc_ranges >= max_get_ranges; if would_exceed_size || would_exceed_ranges { batches.push(&coalesced[start..i]); start = i; acc_bytes = 0; acc_ranges = 0; } acc_bytes += length; acc_ranges += 1; } if start < coalesced.len() { batches.push(&coalesced[start..]); } batches } }; let mut results: Vec)>> = Vec::with_capacity(offsets_order.len()); let mut data_map: std::collections::HashMap<(usize, usize), Vec> = std::collections::HashMap::new(); let mut iter = offsets_order.iter(); let Some(mut current) = iter.next().copied() else { return Ok(results); }; for batch in batches { // Build the Range header from this batch's coalesced // chunks. Under `RangeHint::None` we pass None to skip // the header entirely (full-file download). let flat: Vec<(usize, usize)> = batch .iter() .map(|(start, length, _ranges)| (*start, *length)) .collect(); let range_header = self.attempted_range_header(&flat, 0); let (_code, mut rf) = match self._get(relpath, range_header.as_deref()) { Ok(pair) => pair, Err( e @ (Error::InvalidHttpRange { .. } | Error::InvalidHttpResponse { .. } | Error::ShortReadvError(_, _, _, _)), ) => { return Err(ReadvPassError::Retry { remaining: offsets_order.to_vec(), cause: Some(e), }); } Err(other) => return Err(ReadvPassError::Hard(other)), }; for (coal_start, _coal_length, ranges) in batch { for (sub_offset, sub_size) in ranges { let abs_start = coal_start + sub_offset; let data = match rf.read_at(abs_start as u64, *sub_size) { Ok(d) => d, Err( e @ (Error::ShortReadvError(_, _, _, _) | Error::InvalidHttpRange { .. } | Error::InvalidHttpResponse { .. }), ) => { return Err(ReadvPassError::Retry { remaining: offsets_order.to_vec(), cause: Some(e), }); } Err(other) => return Err(ReadvPassError::Hard(other)), }; if (abs_start, *sub_size) == current { results.push(Ok((abs_start as u64, data))); match iter.next() { Some(next) => current = *next, None => return Ok(results), } } else { data_map.insert((abs_start, *sub_size), data); } while let Some(d) = data_map.remove(¤t) { results.push(Ok((current.0 as u64, d))); match iter.next() { Some(next) => current = *next, None => return Ok(results), } } } } } Ok(results) } } #[cfg(test)] mod tests { use super::*; use crate::http::client::HttpClientConfig; fn fresh_client() -> Arc { Arc::new(HttpClient::new(HttpClientConfig::default()).expect("client builds")) } #[test] fn normalise_http_url_keeps_trailing_slash() { let (scheme, url, _params) = normalise_http_url("http://example.com").unwrap(); assert_eq!(scheme, "http"); assert!(url.as_str().ends_with('/')); } #[test] fn normalise_http_url_strips_impl_suffix() { let (scheme, _url, _params) = normalise_http_url("http+urllib://example.com/").unwrap(); // The unqualified scheme drops the +urllib qualifier so // external_url and remote_url emit the canonical form. assert_eq!(scheme, "http"); } #[test] fn normalise_http_url_rejects_non_http() { assert!(normalise_http_url("ftp://example.com/").is_err()); } #[test] fn transport_is_readonly() { let t = HttpTransport::new("http://example.com/", fresh_client()).unwrap(); assert!(t.is_readonly()); } #[test] fn transport_listable_false() { let t = HttpTransport::new("http://example.com/", fresh_client()).unwrap(); assert!(!t.listable()); } #[test] fn transport_external_url_is_canonical() { let t = HttpTransport::new("http+urllib://example.com/", fresh_client()).unwrap(); let url = t.external_url().unwrap(); // The +urllib qualifier shouldn't leak. assert_eq!(url.scheme(), "http"); } #[test] fn transport_remote_url_strips_credentials() { let t = HttpTransport::new("http://user:pass@example.com/", fresh_client()).unwrap(); let url = t.remote_url("path").unwrap(); // user/password belong in headers, not in the URL we send // upstream. Servers shouldn't see credentials in path-form. assert_eq!(url.username(), ""); assert_eq!(url.password(), None); } #[test] fn transport_clone_with_offset_resolves_against_base() { let t = HttpTransport::new("http://example.com/a/", fresh_client()).unwrap(); let cloned = Transport::clone(&t, Some("b/")).unwrap(); assert_eq!(cloned.base().as_str(), "http://example.com/a/b/"); } #[test] fn transport_write_methods_reject_with_transport_not_possible() { let t = HttpTransport::new("http://example.com/", fresh_client()).unwrap(); let mut empty = std::io::Cursor::new(Vec::::new()); assert!(matches!( t.put_file("x", &mut empty, None), Err(Error::TransportNotPossible(_)) )); assert!(matches!( t.mkdir("d", None), Err(Error::TransportNotPossible(_)) )); assert!(matches!(t.delete("x"), Err(Error::TransportNotPossible(_)))); } #[test] fn format_range_header_basic() { assert_eq!( format_range_header(&[(0, 100), (200, 50)], 0), "0-99,200-249" ); } #[test] fn format_range_header_with_tail() { assert_eq!(format_range_header(&[(0, 100)], 50), "0-99,-50"); } #[test] fn format_range_header_tail_only() { assert_eq!(format_range_header(&[], 100), "-100"); } // ----- abspath / clone_concrete ----- #[test] fn abspath_empty_strips_trailing_slash() { // `abspath` returns the file-form URL (no trailing slash) // even when handed the empty string or "." — matching // Python `URL.clone()`, which breezy's tests assert // against. The directory form lives on `base()` (which // keeps the trailing slash) and `clone()` (which re-adds // one). let t = HttpTransport::new("http://example.com/a/", fresh_client()).unwrap(); assert_eq!(t.abspath("").unwrap().as_str(), "http://example.com/a"); assert_eq!(t.abspath(".").unwrap().as_str(), "http://example.com/a"); } #[test] fn abspath_collapses_redundant_slashes_and_dots() { // `abspath("foo/1//2/./3")` should canonicalise to // `/foo/1/2/3` per RFC 3986 §5.2.4 "remove_dot_segments". let t = HttpTransport::new("http://example.com/root/", fresh_client()).unwrap(); assert_eq!( t.abspath(".bzr/1//2/./3").unwrap().as_str(), "http://example.com/root/.bzr/1/2/3" ); } #[test] fn abspath_rejects_non_ascii() { let t = HttpTransport::new("http://example.com/", fresh_client()).unwrap(); assert!(matches!(t.abspath("héllo"), Err(Error::UrlError(_)))); } #[test] fn abspath_unescapes_unreserved_characters() { // RFC 3986 unreserved chars (A-Z a-z 0-9 - . _ ~) must come // out un-escaped so clone()s of sibling branches are prefix- // comparable (lp:842223). let t = HttpTransport::new("http://example.com/", fresh_client()).unwrap(); let url = t.abspath("%2D%2E%30%39%41%5A%5F%61%7A%7E").unwrap(); assert_eq!(url.as_str(), "http://example.com/-.09AZ_az~"); } #[test] fn abspath_drops_trailing_slash_from_relpath() { // Matches Python `URL.clone(relpath)` semantics: callers that // want directory-shape use clone() (which re-adds the slash), // abspath is path-only. let t = HttpTransport::new("http://example.com/", fresh_client()).unwrap(); assert_eq!( t.abspath("foo/").unwrap().as_str(), "http://example.com/foo" ); } #[test] fn clone_concrete_adds_trailing_slash_when_missing() { let t = HttpTransport::new("http://example.com/a/", fresh_client()).unwrap(); // Offset without a trailing slash — the clone should still // be directory-shaped so downstream joins work correctly. let cloned = t.clone_concrete(Some("b")).unwrap(); assert_eq!(cloned.base().as_str(), "http://example.com/a/b/"); } #[test] fn clone_concrete_preserves_existing_trailing_slash() { let t = HttpTransport::new("http://example.com/a/", fresh_client()).unwrap(); let cloned = t.clone_concrete(Some("b/")).unwrap(); assert_eq!(cloned.base().as_str(), "http://example.com/a/b/"); } #[test] fn clone_concrete_none_returns_identical_base() { let t = HttpTransport::new("http://example.com/a/", fresh_client()).unwrap(); let cloned = t.clone_concrete(None).unwrap(); assert_eq!(cloned.base().as_str(), "http://example.com/a/"); } // ----- segment parameters ----- #[test] fn segment_parameters_parsed_from_base_url() { let t = HttpTransport::new( "http://example.com/path/,key1=val1,key2=val2", fresh_client(), ) .unwrap(); let params = t.get_segment_parameters().unwrap(); assert_eq!(params.get("key1"), Some(&"val1".to_string())); assert_eq!(params.get("key2"), Some(&"val2".to_string())); // base() reports the joined form so the segment params // survive any serialisation round-trip. assert!(t.base().as_str().contains("key1=val1")); assert!(t.base().as_str().contains("key2=val2")); } #[test] fn segment_parameters_dont_appear_on_the_wire() { // `remote_url` is the URL we hand the server — segment params // are a dromedary-internal convention and must stay local. let t = HttpTransport::new("http://example.com/path/,key=val", fresh_client()).unwrap(); let remote = t.remote_url("file").unwrap(); assert!(!remote.as_str().contains(',')); assert!(!remote.as_str().contains("key=val")); } #[test] fn set_segment_parameter_round_trip() { let mut t = HttpTransport::new("http://example.com/", fresh_client()).unwrap(); t.set_segment_parameter("arm", Some("board")).unwrap(); assert_eq!( t.get_segment_parameters().unwrap().get("arm"), Some(&"board".to_string()) ); // Setting value=None removes the parameter. t.set_segment_parameter("arm", None).unwrap(); assert!(t.get_segment_parameters().unwrap().get("arm").is_none()); // Removing a nonexistent parameter is a no-op, not an error. t.set_segment_parameter("nonexistent", None).unwrap(); } #[test] fn set_segment_parameter_rejects_equals_in_key() { let mut t = HttpTransport::new("http://example.com/", fresh_client()).unwrap(); assert!(t.set_segment_parameter("k=bad", Some("v")).is_err()); } #[test] fn set_segment_parameter_rejects_comma_in_value() { let mut t = HttpTransport::new("http://example.com/", fresh_client()).unwrap(); // Commas separate parameters, so a value containing one would // be indistinguishable from two parameters on the next parse. assert!(t.set_segment_parameter("k", Some("bad,value")).is_err()); } #[test] fn clone_clears_segment_parameters() { // Matches Python ConnectedTransport semantics: clone drops // segment parameters because they belong to the specific // base URL, not to the connection. let t = HttpTransport::new("http://example.com/a/,key=val", fresh_client()).unwrap(); let cloned = t.clone_concrete(None).unwrap(); assert!(cloned.get_segment_parameters().unwrap().is_empty()); } // ----- ConnectedTransport trait getters ----- #[test] fn connected_scheme_is_unqualified() { let t = HttpTransport::new("http+urllib://example.com/", fresh_client()).unwrap(); // `+urllib` is stripped at construction, so scheme() reports // the unqualified form even though the caller gave a suffix. assert_eq!( ::scheme(&t), "http" ); } #[test] fn connected_host_and_port() { let t = HttpTransport::new("http://example.com:8080/", fresh_client()).unwrap(); assert_eq!( ::host(&t), Some("example.com".to_string()) ); assert_eq!( ::port(&t), Some(8080) ); } #[test] fn connected_port_absent_for_default_port() { // Url::port() returns None when the scheme's default port is // used (80 for http, 443 for https) — callers that want the // default port fall back themselves. let t = HttpTransport::new("http://example.com/", fresh_client()).unwrap(); assert_eq!(::port(&t), None); } #[test] fn connected_user_and_password_are_percent_decoded() { let t = HttpTransport::new("http://jo%40home:p%40ss@example.com/", fresh_client()).unwrap(); assert_eq!( ::user(&t), Some("jo@home".to_string()) ); assert_eq!( ::password(&t), Some("p@ss".to_string()) ); } #[test] fn connected_user_and_password_absent_when_not_in_url() { let t = HttpTransport::new("http://example.com/", fresh_client()).unwrap(); assert_eq!(::user(&t), None); assert_eq!( ::password(&t), None ); } // ----- classify_reuse_for ----- #[test] fn classify_reuse_same_origin_same_path() { let base = url::Url::parse("http://host/path/").unwrap(); assert_eq!( crate::classify_reuse_for(&base, "http://host/path/"), crate::ReuseMatch::Same ); } #[test] fn classify_reuse_same_origin_normalises_trailing_slash() { // `/path` and `/path/` are the same transport. let base = url::Url::parse("http://host/path/").unwrap(); assert_eq!( crate::classify_reuse_for(&base, "http://host/path"), crate::ReuseMatch::Same ); } #[test] fn classify_reuse_same_origin_different_path() { let base = url::Url::parse("http://host/path/").unwrap(); assert_eq!( crate::classify_reuse_for(&base, "http://host/other/"), crate::ReuseMatch::Sibling ); } #[test] fn classify_reuse_different_scheme_rejected() { let base = url::Url::parse("http://host/path/").unwrap(); assert_eq!( crate::classify_reuse_for(&base, "https://host/path/"), crate::ReuseMatch::None ); } #[test] fn classify_reuse_impl_qualifier_stripped() { // `http://` and `http+urllib://` address the same origin — // the qualifier is implementation choice, not part of identity. let base = url::Url::parse("http://host/path/").unwrap(); assert_eq!( crate::classify_reuse_for(&base, "http+urllib://host/path/"), crate::ReuseMatch::Same ); } #[test] fn classify_reuse_different_host_or_port_rejected() { let base = url::Url::parse("http://host/path/").unwrap(); assert_eq!( crate::classify_reuse_for(&base, "http://other/path/"), crate::ReuseMatch::None ); assert_eq!( crate::classify_reuse_for(&base, "http://host:9090/path/"), crate::ReuseMatch::None ); } #[test] fn classify_reuse_different_user_rejected() { let base = url::Url::parse("http://alice@host/path/").unwrap(); assert_eq!( crate::classify_reuse_for(&base, "http://bob@host/path/"), crate::ReuseMatch::None ); } #[test] fn classify_reuse_unparseable_url_returns_none() { let base = url::Url::parse("http://host/").unwrap(); assert_eq!( crate::classify_reuse_for(&base, "not a url"), crate::ReuseMatch::None ); } } /// Internal control flow between `readv_eager` and `readv_one_pass`. enum ReadvPassError { /// Server misbehaved; step the range hint down and try again /// with the given remaining offsets. `cause` is the underlying /// error (if any) that triggered the retry — surfaced to the /// caller once the degradation ladder runs out, so the final /// error is specific rather than a generic "server misbehaved". Retry { remaining: Vec<(usize, usize)>, cause: Option, }, /// Hard error — surface to the caller. Hard(Error), } /// Lazy `readv` iterator that issues one HTTP GET per batch on /// demand. Breezy's `test_*_leave_pipe_clean` tests pull the /// first yield, check how many GETs the server saw, then stop — /// so an eager implementation that drains all batches up-front /// fails those assertions even though the data it returns is /// correct. /// /// On a retry-worthy error we fall back to the eager path, which /// runs the whole degrade-and-retry loop. The eager fallback /// includes any batches we haven't touched yet plus the one that /// failed. struct LazyReadv { transport: HttpTransport, relpath: String, /// The caller's offsets in yield order. Populated with /// `(offset, size)` pairs; we pop the head each time we yield. pending: std::collections::VecDeque<(usize, usize)>, /// Pre-fetched `(offset, size) -> data` map from the most /// recent batch, read in-order out of the HTTP response. yielded: std::collections::VecDeque)>>, /// Coalesced batches we haven't fetched yet, computed once at /// iterator construction from the sorted+coalesced plan. batches: std::collections::VecDeque)>>, /// Set once we've fallen back to the eager path — all /// subsequent yields come from `yielded` and we stop issuing /// new GETs. exhausted: bool, /// A retry-worthy failure from an earlier batch that we /// haven't acted on yet — deferred so callers can consume /// the ranges we *did* manage to parse from the partial /// response before we start issuing fresh GETs. The error is /// discharged the next time `fetch_next_batch` runs and /// `yielded` has drained. deferred_fallback: Option, } impl LazyReadv { fn new(transport: HttpTransport, relpath: String, offsets: Vec<(u64, usize)>) -> Self { let offsets_usize: Vec<(usize, usize)> = offsets.iter().map(|(o, s)| (*o as usize, *s)).collect(); let sorted: Vec<(usize, usize)> = { let mut v = offsets_usize.clone(); v.sort(); v }; let tuning = transport.readv_tuning(); let opt = |v: usize| if v == 0 { None } else { Some(v) }; let coalesced = match crate::readv::coalesce_offsets( &sorted, opt(tuning.max_readv_combine), Some(tuning.bytes_to_read_before_seek), opt(tuning.get_max_size), ) { Ok(c) => c, Err(e) => { // Can't coalesce — degenerate to the eager path's // error shape for uniformity with existing tests. let mut yielded = std::collections::VecDeque::new(); yielded.push_back(Err(Error::InvalidHttpResponse { path: relpath.clone(), msg: format!("overlapping ranges: {}", e), })); return Self { transport, relpath, pending: offsets_usize.into(), yielded, batches: std::collections::VecDeque::new(), exhausted: true, deferred_fallback: None, }; } }; let batches = compute_batches(&coalesced, &tuning, &transport.range_hint.lock().unwrap()); Self { transport, relpath, pending: offsets_usize.into(), yielded: std::collections::VecDeque::new(), batches, exhausted: false, deferred_fallback: None, } } /// Issue the next batch's GET and push its sub-ranges into /// `yielded` in the order the caller originally asked for /// them (reordering within the batch using pending's head). fn fetch_next_batch(&mut self) -> bool { // Discharge any pending fallback before issuing a fresh GET: // an earlier batch left partial results and a deferred // error, and now the caller's asking for more offsets than // that partial could satisfy. Running the fallback here // (rather than inline with the failed read) means we yield // the partial ranges first, then only replay the eager // path for the offsets that actually went unsatisfied. if let Some(err) = self.deferred_fallback.take() { self.run_eager_fallback(Some(err)); return true; } let Some(batch) = self.batches.pop_front() else { return false; }; let flat: Vec<(usize, usize)> = batch .iter() .map(|(start, length, _)| (*start, *length)) .collect(); let range_header = self.transport.attempted_range_header(&flat, 0); let mut rf = match self.transport._get(&self.relpath, range_header.as_deref()) { Ok((_code, rf)) => rf, Err(e) => { // Fall back to the eager/degrade loop — hand it // the remaining offsets (the current batch's plus // anything we hadn't started yet, reconstructed // from `pending`). self.run_eager_fallback(Some(e)); return true; } }; // Pull each sub-range's bytes from the response and route // them to either `yielded` (for the next caller request) // or a per-batch data_map for out-of-order reassembly. We // drain `pending` incrementally — if reading later in the // batch fails (e.g. a truncated multipart body), earlier // ranges we already decoded are still yielded before the // fallback kicks in, matching // `test_readv_with_short_reads`'s expectation that partial // progress survives a cut-short response. let mut data_map: std::collections::HashMap<(usize, usize), Vec> = std::collections::HashMap::new(); let flush_available = |data_map: &mut std::collections::HashMap<(usize, usize), Vec>, pending: &mut std::collections::VecDeque<(usize, usize)>, yielded: &mut std::collections::VecDeque)>>| { while let Some(&(off, size)) = pending.front() { if let Some(data) = data_map.remove(&(off, size)) { pending.pop_front(); yielded.push_back(Ok((off as u64, data))); } else { break; } } }; for (coal_start, _coal_length, ranges) in &batch { for (sub_offset, sub_size) in ranges { let abs_start = coal_start + sub_offset; match rf.read_at(abs_start as u64, *sub_size) { Ok(d) => { data_map.insert((abs_start, *sub_size), d); flush_available(&mut data_map, &mut self.pending, &mut self.yielded); } Err(e) => { flush_available(&mut data_map, &mut self.pending, &mut self.yielded); // If we yielded anything this batch, defer // the fallback — the caller asked for one // range at a time via next(), so don't // burn a fresh GET until they actually // reach a range we couldn't satisfy. If we // have nothing to show, fall back now so // the caller sees results rather than an // infinite empty-batch loop. if self.yielded.is_empty() { self.run_eager_fallback(Some(e)); } else { self.deferred_fallback = Some(e); } return true; } } } } flush_available(&mut data_map, &mut self.pending, &mut self.yielded); true } /// Fall back to the eager readv path for any remaining offsets /// plus the current failure. Matches the previous behaviour for /// the retry-worthy error categories — those need the full /// degrade-and-retry loop which is too invasive to replicate /// here. Invoked both synchronously when we have no partial /// results, and as the `deferred_fallback` discharge in /// `fetch_next_batch`. Degrades the range hint based on the /// failure shape before handing off so the eager path doesn't /// replay the same doomed request. fn run_eager_fallback(&mut self, cause: Option) { let remaining: Vec<(u64, usize)> = self.pending.iter().map(|(o, s)| (*o as u64, *s)).collect(); self.pending.clear(); self.batches.clear(); self.exhausted = true; match &cause { Some(Error::InvalidHttpRange { .. }) => { self.transport.jump_range_hint_to_none(); } Some(Error::InvalidHttpResponse { .. }) | Some(Error::ShortReadvError(_, _, _, _)) => { self.transport.degrade_range_hint(); } _ => {} } let results = self.transport.readv_eager(&self.relpath, remaining); for r in results { self.yielded.push_back(r); } } } impl Iterator for LazyReadv { type Item = Result<(u64, Vec)>; fn next(&mut self) -> Option { loop { if let Some(next) = self.yielded.pop_front() { return Some(next); } if self.exhausted { return None; } if !self.fetch_next_batch() { // No more batches left; whatever is in `yielded` // got yielded on prior iterations. return None; } } } } /// Carve a coalesced offset list into batches honouring the per- /// request caps (max_get_ranges, get_max_size) and the current /// range hint. Shared between `readv_one_pass` and `LazyReadv`. fn compute_batches( coalesced: &[(usize, usize, Vec<(usize, usize)>)], tuning: &ReadvTuning, hint: &RangeHint, ) -> std::collections::VecDeque)>> { let max_get_ranges = tuning.max_get_ranges.max(1); let get_max_size = tuning.get_max_size; let mut batches: std::collections::VecDeque)>> = std::collections::VecDeque::new(); match hint { RangeHint::None => { batches.push_back(coalesced.to_vec()); } RangeHint::Single => { for coal in coalesced { batches.push_back(vec![coal.clone()]); } } RangeHint::Multi => { let mut current: Vec<(usize, usize, Vec<(usize, usize)>)> = Vec::new(); let mut acc_bytes = 0usize; for coal in coalesced { let length = coal.1; let would_exceed_size = get_max_size > 0 && acc_bytes + length > get_max_size && !current.is_empty(); let would_exceed_ranges = current.len() >= max_get_ranges; if would_exceed_size || would_exceed_ranges { batches.push_back(std::mem::take(&mut current)); acc_bytes = 0; } current.push(coal.clone()); acc_bytes += length; } if !current.is_empty() { batches.push_back(current); } } } batches } dromedary-0.1.5/src/lib.rs000066400000000000000000000646521520150013200154100ustar00rootroot00000000000000use crate::lock::{Lock, LockError}; use std::collections::HashMap; use std::fs::{Metadata, Permissions}; use std::io::{Read, Seek}; #[cfg(unix)] use std::os::unix::fs::PermissionsExt; use std::time::UNIX_EPOCH; use url::Url; #[derive(Debug)] pub enum Error { InProcessTransport, NotLocalUrl(String), NoSuchFile(Option), FileExists(Option), TransportNotPossible(Option), UrlError(url::ParseError), UrlutilsError(crate::urlutils::Error), PermissionDenied(Option), Io(std::io::Error), PathNotChild, UnexpectedEof, ShortReadvError(String, u64, u64, u64), LockContention(std::path::PathBuf), LockFailed(std::path::PathBuf, String), IsADirectoryError(Option), NotADirectoryError(Option), DirectoryNotEmptyError(Option), ResourceBusy(Option), /// HTTP server returned a status code we couldn't interpret, or /// the response body was malformed beyond what RangeFile could /// parse. Maps to `dromedary.errors.InvalidHttpResponse`. InvalidHttpResponse { path: String, msg: String, }, /// HTTP server returned a status code we *can* interpret but /// didn't expect at this point. Maps to /// `dromedary.errors.UnexpectedHttpStatus`. UnexpectedHttpStatus { path: String, code: u16, extra: Option, }, /// HTTP server's response to a Range request was malformed or /// rejected our range. Maps to `dromedary.errors.InvalidHttpRange`. InvalidHttpRange { path: String, range: String, msg: String, }, /// HTTP server returned 400 (Bad Request) without us asking for /// a Range — usually a client-side bug or malformed URL. Maps to /// `dromedary.errors.BadHttpRequest`. BadHttpRequest { path: String, reason: String, }, /// HTTP server redirected us but we weren't asked to follow. /// Carries the original and target URLs so callers can retry on /// a fresh transport. Maps to `dromedary.errors.RedirectRequested`. RedirectRequested { source: String, target: String, is_permanent: bool, }, /// HTTP server tried to redirect us somewhere that doesn't fit /// the transport's URL shape (e.g. a different scheme). Maps to /// `dromedary.errors.UnusableRedirect`. UnusableRedirect { source: String, target: String, reason: String, }, /// Network-level failure talking to the server — DNS, TCP, /// TLS handshake — distinct from an `Io` error during a /// successful exchange. Maps to `dromedary.errors.ConnectionError`. ConnectionError(String), } pub type Result = std::result::Result; pub type UrlFragment = str; pub fn map_io_err_to_transport_err(err: std::io::Error, path: Option<&str>) -> Error { match err.kind() { std::io::ErrorKind::NotFound => Error::NoSuchFile(path.map(|p| p.to_string())), std::io::ErrorKind::AlreadyExists => Error::FileExists(path.map(|p| p.to_string())), std::io::ErrorKind::PermissionDenied => { Error::PermissionDenied(path.map(|p| p.to_string())) } // use of unstable library feature 'io_error_more' // https://github.com/rust-lang/rust/issues/86442 // // std::io::ErrorKind::NotADirectoryError => Error::NotADirectoryError(None), // std::io::ErrorKind::IsADirectoryError => Error::IsADirectoryError(None), _ => { #[cfg(unix)] { match err.raw_os_error() { Some(e) if e == libc::ENOTDIR => { Error::NotADirectoryError(path.map(|p| p.to_string())) } Some(e) if e == libc::EISDIR => { Error::IsADirectoryError(path.map(|p| p.to_string())) } Some(e) if e == libc::ENOTEMPTY => { Error::DirectoryNotEmptyError(path.map(|p| p.to_string())) } _ => Error::Io(err), } } #[cfg(windows)] { // Windows error codes from winerror.h. Mirror the unix // mapping above for the equivalents that show up via // `std::fs` operations. const ERROR_DIRECTORY: i32 = 267; // The directory name is invalid. const ERROR_DIR_NOT_EMPTY: i32 = 145; match err.raw_os_error() { Some(e) if e == ERROR_DIRECTORY => { Error::NotADirectoryError(path.map(|p| p.to_string())) } Some(e) if e == ERROR_DIR_NOT_EMPTY => { Error::DirectoryNotEmptyError(path.map(|p| p.to_string())) } _ => Error::Io(err), } } } } } impl From for Error { fn from(err: url::ParseError) -> Self { Error::UrlError(err) } } impl From for Error { fn from(err: crate::urlutils::Error) -> Self { Error::UrlutilsError(err) } } /// Compute a relative path for `abspath` against `base`. /// /// Mirrors the Python `Transport.relpath` base-class implementation: /// accepts `base` with or without its trailing slash, and strips any /// trailing slash from the returned relpath. Transports whose URL /// scheme doesn't need special handling can call this directly from /// their `relpath` impl. pub fn relpath_against_base(base: &Url, abspath: &Url) -> Result { let base_str = base.as_str(); let target = abspath.as_str(); // Accept the exact base, or the base with its trailing slash stripped. let base_no_slash = base_str.strip_suffix('/').unwrap_or(base_str); if target == base_no_slash { return Ok(String::new()); } match target.strip_prefix(base_str) { Some(rest) => Ok(rest.trim_end_matches('/').to_string()), None => Err(Error::PathNotChild), } } /// Coarse file kind. Mirrors `std::fs::FileType` but is cross-platform and /// sidesteps the Unix-only mode-bit parsing the old implementation relied on. #[derive(Debug, Clone, Copy, PartialEq, Eq)] pub enum FileKind { File, Dir, Symlink, Other, } pub struct Stat { pub size: usize, /// Unix permission bits. Not present on Windows — see /// `memory/project_windows_port.md` for the design rationale. #[cfg(unix)] pub mode: u32, pub kind: FileKind, pub mtime: Option, } impl From for Stat { fn from(metadata: Metadata) -> Self { let ft = metadata.file_type(); let kind = if ft.is_dir() { FileKind::Dir } else if ft.is_file() { FileKind::File } else if ft.is_symlink() { FileKind::Symlink } else { FileKind::Other }; Stat { size: metadata.len() as usize, #[cfg(unix)] mode: metadata.permissions().mode(), kind, mtime: metadata.modified().map_or(None, |t| { Some(t.duration_since(UNIX_EPOCH).unwrap().as_secs_f64()) }), } } } impl Stat { pub fn is_dir(&self) -> bool { self.kind == FileKind::Dir } pub fn is_file(&self) -> bool { self.kind == FileKind::File } } pub trait WriteStream: std::io::Write { fn sync_data(&self) -> std::io::Result<()>; } pub trait ReadStream: Read + Seek {} pub trait Transport: std::fmt::Debug + 'static + Send + Sync { /// Return a URL for self that can be given to an external process. /// /// There is no guarantee that the URL can be accessed from a different /// machine - e.g. file:/// urls are only usable on the local machine, /// sftp:/// urls when the server is only bound to localhost are only /// usable from localhost etc. /// /// NOTE: This method may remove security wrappers (e.g. on chroot /// transports) and thus should *only* be used when the result will not /// be used to obtain a new transport within breezy. Ideally chroot /// transports would know enough to cause the external url to be the exact /// one used that caused the chrooting in the first place, but that is not /// currently the case. /// /// Returns: A URL that can be given to another process. /// Raises:InProcessTransport: If the transport is one that cannot be /// accessed out of the current process (e.g. a MemoryTransport) /// then InProcessTransport is raised. fn external_url(&self) -> Result; fn can_roundtrip_unix_modebits(&self) -> bool; fn get_bytes(&self, relpath: &UrlFragment) -> Result> { let mut file = self.get(relpath)?; let mut result = Vec::new(); file.read_to_end(&mut result) .map_err(|err| map_io_err_to_transport_err(err, Some(relpath)))?; Ok(result) } fn get(&self, relpath: &UrlFragment) -> Result>; fn base(&self) -> Url; /// Ensure that the directory this transport references exists. /// /// This will create a directory if it doesn't exist. /// Returns: True if the directory was created, False otherwise. fn ensure_base(&self, permissions: Option) -> Result { if let Err(err) = self.mkdir(".", permissions) { match err { Error::FileExists(_) => Ok(false), Error::PermissionDenied(_) => Ok(false), Error::TransportNotPossible(_) => { if self.has(".")? { Ok(false) } else { Err(err) } } _ => Err(err), } } else { Ok(true) } } fn create_prefix(&self, permissions: Option) -> Result<()> { let mut cur_transport = self.clone(None)?; let mut needed = vec![]; loop { match cur_transport.mkdir(".", permissions.clone()) { Err(Error::NoSuchFile(_)) => { let new_transport = Transport::clone(cur_transport.as_ref(), Some(".."))?; assert_ne!( new_transport.base(), cur_transport.base(), "Failed to create path prefix for {}", cur_transport.base() ); needed.push(cur_transport); cur_transport = new_transport; } Err(Error::FileExists(_)) | Ok(()) => { break; } Err(err) => { return Err(err); } } } while let Some(transport) = needed.pop() { transport.ensure_base(permissions.clone())?; } Ok(()) } fn has(&self, relpath: &UrlFragment) -> Result; fn has_any(&self, relpaths: &[&UrlFragment]) -> Result { for relpath in relpaths { if self.has(relpath)? { return Ok(true); } } Ok(false) } fn mkdir(&self, relpath: &UrlFragment, permissions: Option) -> Result<()>; fn stat(&self, relpath: &UrlFragment) -> Result; fn clone(&self, offset: Option<&UrlFragment>) -> Result>; fn abspath(&self, relpath: &UrlFragment) -> Result; fn relpath(&self, abspath: &Url) -> Result; fn put_file( &self, relpath: &UrlFragment, f: &mut dyn Read, permissions: Option, ) -> Result; fn put_bytes( &self, relpath: &UrlFragment, data: &[u8], permissions: Option, ) -> Result<()> { let mut f = std::io::Cursor::new(data); self.put_file(relpath, &mut f, permissions)?; Ok(()) } fn put_file_non_atomic( &self, relpath: &UrlFragment, f: &mut dyn Read, permissions: Option, create_parent_dir: Option, dir_permissions: Option, ) -> Result<()> { match self.put_file(relpath, f, permissions.clone()) { Ok(_) => Ok(()), Err(Error::NoSuchFile(filename)) => { if create_parent_dir.unwrap_or(false) { if let Some(parent) = relpath.rsplit_once('/').map(|x| x.0) { self.mkdir(parent, dir_permissions)?; self.put_file(relpath, f, permissions)?; Ok(()) } else { Err(Error::NoSuchFile(filename)) } } else { Err(Error::NoSuchFile(filename)) } } Err(err) => Err(err), } } fn put_bytes_non_atomic( &self, relpath: &UrlFragment, data: &[u8], permissions: Option, create_parent_dir: Option, dir_permissions: Option, ) -> Result<()> { let mut f = std::io::Cursor::new(data); self.put_file_non_atomic( relpath, &mut f, permissions, create_parent_dir, dir_permissions, ) } fn delete(&self, relpath: &UrlFragment) -> Result<()>; fn rmdir(&self, relpath: &UrlFragment) -> Result<()>; fn rename(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()>; fn set_segment_parameter(&mut self, key: &str, value: Option<&str>) -> Result<()>; fn get_segment_parameters(&self) -> Result>; /// Return the recommended page size for this transport. /// /// This is potentially different for every path in a given namespace. /// For example, local transports might use an operating system call to /// get the block size for a given path, which can vary due to mount /// points. /// /// Returns: The page size in bytes. fn recommended_page_size(&self) -> usize { 4 * 1024 } fn is_readonly(&self) -> bool { false } fn readv<'a>( &self, relpath: &'a UrlFragment, offsets: Vec<(u64, usize)>, adjust_for_latency: bool, upper_limit: Option, ) -> Box)>> + Send + 'a> { let offsets = if adjust_for_latency { crate::readv::sort_expand_and_combine( offsets, upper_limit, self.recommended_page_size(), ) } else { offsets }; let buf = match self.get_bytes(relpath) { Err(err) => return Box::new(std::iter::once(Err(err))), Ok(file) => file, }; let mut file = std::io::Cursor::new(buf); Box::new( offsets .into_iter() .map(move |(offset, length)| -> Result<(u64, Vec)> { let mut buf = vec![0; length]; match file.seek(std::io::SeekFrom::Start(offset)) { Ok(_) => {} Err(err) => match err.kind() { std::io::ErrorKind::UnexpectedEof => { return Err(Error::ShortReadvError( relpath.to_owned(), offset, length as u64, file.position().saturating_sub(offset), )) } _ => return Err(map_io_err_to_transport_err(err, Some(relpath))), }, } match file.read_exact(&mut buf) { Ok(_) => Ok((offset, buf)), Err(err) => match err.kind() { std::io::ErrorKind::UnexpectedEof => Err(Error::ShortReadvError( relpath.to_owned(), offset, length as u64, file.position().saturating_sub(offset), )), _ => Err(map_io_err_to_transport_err(err, Some(relpath))), }, } }), ) } fn append_bytes( &self, relpath: &UrlFragment, data: &[u8], permissions: Option, ) -> Result { let mut f = std::io::Cursor::new(data); self.append_file(relpath, &mut f, permissions) } fn append_file( &self, relpath: &UrlFragment, f: &mut dyn std::io::Read, permissions: Option, ) -> Result; fn readlink(&self, relpath: &UrlFragment) -> Result; fn hardlink(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()>; fn symlink(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()>; fn iter_files_recursive(&self) -> Box>>; fn open_write_stream( &self, relpath: &UrlFragment, permissions: Option, ) -> Result>; fn delete_tree(&self, relpath: &UrlFragment) -> Result<()>; /// Move an entry, overwriting the destination if it exists. /// /// Mirrors Python's Transport.move default: delegates to copy/copy_tree /// then delete/delete_tree, which handles overwrite via copy's /// replace-on-write semantics. Transports with a native atomic move /// should override. fn r#move(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { if self.stat(rel_from)?.is_dir() { self.copy_tree(rel_from, rel_to)?; self.delete_tree(rel_from)?; } else { self.copy(rel_from, rel_to)?; self.delete(rel_from)?; } Ok(()) } fn copy_tree(&self, from_relpath: &UrlFragment, to_relpath: &UrlFragment) -> Result<()> { let source = self.clone(Some(from_relpath))?; let target = self.clone(Some(to_relpath))?; // create target directory with the same rwx bits as source // use umask to ensure bits other than rwx are ignored let stat = self.stat(from_relpath)?; #[cfg(unix)] let perms = Some(Permissions::from_mode(stat.mode)); #[cfg(not(unix))] let perms: Option = { let _ = stat; None }; target.mkdir(".", perms)?; source.copy_tree_to_transport(target.as_ref())?; Ok(()) } fn copy_tree_to_transport(&self, to_transport: &dyn Transport) -> Result<()> { let mut files = Vec::new(); let mut directories = vec![".".to_string()]; while let Some(dir) = directories.pop() { if dir != "." { to_transport.mkdir(dir.as_str(), None)?; } for entry in self.list_dir(dir.as_str()) { let entry = entry?; let full_path = format!("{}/{}", dir, entry); let stat = self.stat(&full_path)?; if stat.is_dir() { directories.push(full_path); } else { files.push(full_path); } } } self.copy_to( files .iter() .map(|x| x.as_str()) .collect::>() .as_slice(), to_transport, None, )?; Ok(()) } fn copy_to( &self, relpaths: &[&UrlFragment], to_transport: &dyn Transport, permissions: Option, ) -> Result { copy_to(self, to_transport, relpaths, permissions) } fn list_dir(&self, relpath: &UrlFragment) -> Box>>; fn listable(&self) -> bool { true } fn lock_read(&self, relpath: &UrlFragment) -> Result>; fn lock_write(&self, relpath: &UrlFragment) -> Result>; fn local_abspath(&self, relpath: &UrlFragment) -> Result; fn copy(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()>; } /// Transport that connects to a remote server. /// /// Provides a common shape for transports that need to expose their /// connection endpoint (host, port, credentials) so higher-level code /// can reason about connection sharing — notably /// `get_transport_from_url(possible_transports=…)`, which walks existing /// transports looking for one that already talks to the same origin. /// /// The default implementations all parse `Transport::base()` via the /// `connected_url_*` helpers below, so concrete transports rarely need /// to override anything; declaring `impl ConnectedTransport for MyT {}` /// is usually enough. pub trait ConnectedTransport: Transport { fn scheme(&self) -> String { connected_url_scheme(&self.base()) } fn host(&self) -> Option { connected_url_host(&self.base()) } fn port(&self) -> Option { connected_url_port(&self.base()) } fn user(&self) -> Option { connected_url_user(&self.base()) } fn password(&self) -> Option { connected_url_password(&self.base()) } fn path(&self) -> String { connected_url_path(&self.base()) } /// Drop any cached connection state. Default no-op — transports /// with an explicit connection handle (SSH sessions, SFTP /// channels) override this to tear it down. fn disconnect(&self) -> Result<()> { Ok(()) } } /// URL scheme (`"http"`, `"https"`, `"sftp"`, …). pub fn connected_url_scheme(url: &Url) -> String { url.scheme().to_string() } /// Host portion of the URL, or `None` for URLs without a host /// component (e.g. `file:///`). pub fn connected_url_host(url: &Url) -> Option { url.host_str().map(|s| s.to_string()) } /// TCP port if explicitly present in the URL. Callers that want the /// default port for the scheme should fall back themselves. pub fn connected_url_port(url: &Url) -> Option { url.port() } /// URL-decoded username, or `None` if the URL has no userinfo. pub fn connected_url_user(url: &Url) -> Option { let raw = url.username(); if raw.is_empty() { return None; } percent_encoding::percent_decode_str(raw) .decode_utf8() .ok() .map(|s| s.into_owned()) } /// URL-decoded password, or `None` if the URL has no password. pub fn connected_url_password(url: &Url) -> Option { let raw = url.password()?; percent_encoding::percent_decode_str(raw) .decode_utf8() .ok() .map(|s| s.into_owned()) } /// Path portion of the URL (always starts with `/`). pub fn connected_url_path(url: &Url) -> String { url.path().to_string() } /// Result of comparing a connected transport's base URL against /// another URL. Drives the `_reuse_for` / connection-pooling logic. #[derive(Debug, PartialEq, Eq)] pub enum ReuseMatch { /// `other_base` addresses the same origin and the same path as /// the base URL — callers return `self` unchanged. Same, /// `other_base` addresses the same origin but a different path. /// Callers construct a sibling transport at `other_base` sharing /// this transport's connection state. Sibling, /// Different origin (or unparseable URL). No reuse possible. None, } /// Decide whether a transport at `base` can be reused for /// `other_base`. Pure function over URLs so the PyO3 layer and any /// pure-Rust caller share the same comparison rules. An unparseable /// `other_base` is treated as `None` rather than an error — reuse is /// advisory and the caller will construct a fresh transport. pub fn classify_reuse_for(base: &Url, other_base: &str) -> ReuseMatch { let other = match Url::parse(other_base) { Ok(u) => u, Err(_) => return ReuseMatch::None, }; // Compare against the unqualified form of `other`'s scheme so // `http+urllib://` and `http://` are treated as equivalent for // reuse purposes — they'd produce the same underlying transport. let other_scheme = connected_url_scheme(&other); let other_scheme_unqualified = other_scheme .split_once('+') .map(|(s, _)| s.to_string()) .unwrap_or(other_scheme); if connected_url_scheme(base) != other_scheme_unqualified || connected_url_host(base) != connected_url_host(&other) || connected_url_port(base) != connected_url_port(&other) || connected_url_user(base) != connected_url_user(&other) { return ReuseMatch::None; } // Normalise trailing slash so `/foo` and `/foo/` compare equal — // `get_transport_from_url` can see either form from caller input. let ensure_slash = |mut s: String| { if !s.ends_with('/') { s.push('/'); } s }; let self_path = ensure_slash(base.path().to_string()); let other_path = ensure_slash(other.path().to_string()); if self_path == other_path { ReuseMatch::Same } else { ReuseMatch::Sibling } } pub fn copy_to( from_transport: &T, to_transport: &dyn Transport, relpaths: &[&UrlFragment], permissions: Option, ) -> Result { let mut count = 0; relpaths.iter().try_for_each(|relpath| -> Result<()> { let mut src = from_transport.get(relpath)?; let mut target = to_transport.open_write_stream(relpath, permissions.clone())?; std::io::copy(&mut src, &mut target) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; count += 1; Ok(()) })?; Ok(count) } pub mod local; pub mod brokenrename; pub mod chroot; pub mod decorator; pub mod fakenfs; pub mod fakevfat; #[cfg(feature = "gio")] pub mod gio; pub mod http; #[cfg(feature = "webdav")] pub mod webdav; pub mod log; pub mod memory; pub mod pathfilter; pub mod readonly; pub mod ssh; #[cfg(feature = "sftp")] pub mod sftp; pub mod registry; pub mod unlistable; pub mod osutils; #[cfg(feature = "pyo3")] pub mod pyo3; pub mod readv; #[cfg(unix)] #[path = "fcntl-locks.rs"] pub mod filelock; #[cfg(target_os = "windows")] #[path = "win32-locks.rs"] pub mod filelock; pub mod lock; pub mod urlutils; dromedary-0.1.5/src/local.rs000066400000000000000000000424321520150013200157240ustar00rootroot00000000000000use crate::lock::LockError; use crate::urlutils::{escape, unescape}; use crate::{ map_io_err_to_transport_err, Error, Lock, ReadStream, Result, Stat, Transport, UrlFragment, WriteStream, }; use std::collections::HashMap; use std::convert::TryFrom; use std::fs::File; use std::fs::Permissions; use std::io::{Read, Seek}; use std::path::{Path, PathBuf}; use url::Url; use walkdir; pub struct LocalTransport { base: Url, path: PathBuf, } impl TryFrom<&Path> for LocalTransport { type Error = Error; fn try_from(path: &Path) -> Result { let url = crate::urlutils::local_path_to_url(path).map_err(|e| { map_io_err_to_transport_err(e, Some(path.to_path_buf().to_str().unwrap())) })?; LocalTransport::new(&url) } } impl TryFrom for LocalTransport { type Error = Error; fn try_from(url: Url) -> Result { LocalTransport::new(url.as_str()) } } impl Clone for LocalTransport { fn clone(&self) -> Self { LocalTransport { path: self.path.clone(), base: self.base.clone(), } } } impl WriteStream for File { fn sync_data(&self) -> std::io::Result<()> { self.sync_data() } } impl ReadStream for File {} impl LocalTransport { pub fn new(base: &str) -> Result { let base = if base.ends_with('/') { base.to_string() } else { format!("{}/", base) }; let mut path = crate::urlutils::local_path_from_url(&base)?; if !path.to_string_lossy().ends_with('/') { path.push("") } let base = Url::parse(&base)?; Ok(LocalTransport { base, path }) } pub fn from_abspath(path: &Path) -> Result { let url = crate::urlutils::local_path_to_url(path).map_err(|e| { map_io_err_to_transport_err(e, Some(path.to_path_buf().to_str().unwrap())) })?; LocalTransport::new(&url) } fn _abspath(&self, relative_reference: &str) -> Result { if relative_reference == "." || relative_reference.is_empty() { Ok(self.path.clone()) } else { let mut ret = self.path.clone(); let extra = crate::urlutils::unescape(relative_reference)?; let extra = extra.trim_start_matches('/'); ret.push(extra); Ok(ret) } } } impl std::fmt::Debug for LocalTransport { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "LocalTransport({})", self.base) } } fn lock_err_to_transport_err(e: LockError) -> Error { match e { LockError::Contention(p) => Error::LockContention(p), LockError::IoError(e) => Error::Io(e), LockError::Failed(p, w) => Error::LockFailed(p, w), } } impl Transport for LocalTransport { fn external_url(&self) -> Result { Ok(self.base.clone()) } fn base(&self) -> Url { self.base.clone() } fn local_abspath(&self, relpath: &UrlFragment) -> Result { let absurl = self.abspath(relpath)?; crate::urlutils::local_path_from_url(absurl.as_str()).map_err(Error::from) } fn can_roundtrip_unix_modebits(&self) -> bool { #[cfg(unix)] return true; #[cfg(not(unix))] return false; } fn get(&self, relpath: &UrlFragment) -> Result> { let path = self._abspath(relpath)?; let f = std::fs::File::open(path).map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; Ok(Box::new(f)) } fn mkdir(&self, relpath: &UrlFragment, permissions: Option) -> Result<()> { let path = self._abspath(relpath)?; std::fs::create_dir(&path).map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; if let Some(permissions) = permissions { std::fs::set_permissions(&path, permissions) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; } Ok(()) } fn has(&self, path: &UrlFragment) -> Result { let path = self._abspath(path)?; Ok(path.exists()) } fn stat(&self, relpath: &UrlFragment) -> Result { use std::ffi::OsStr; let path = self._abspath(relpath)?; // Strip trailing slashes, so we can properly stat broken symlinks. // We work on the encoded bytes of the OsStr so this stays // WTF-8-safe on Windows and UTF-8-safe on Unix. let path = { let b = path.as_path().as_os_str().as_encoded_bytes(); if let Some(stripped) = b.strip_suffix(b"/") { // SAFETY: we only stripped an ASCII byte; the rest of the // encoding is intact. PathBuf::from(unsafe { OsStr::from_encoded_bytes_unchecked(stripped) }) } else { path } }; Ok(Stat::from(std::fs::symlink_metadata(path).map_err( |e| map_io_err_to_transport_err(e, Some(relpath)), )?)) } fn clone(&self, offset: Option<&UrlFragment>) -> Result> { let new_base = match offset { Some(offset) => self.abspath(offset)?, None => self.base.clone(), }; Ok(Box::new(LocalTransport::new(new_base.as_str())?)) } fn abspath(&self, relpath: &UrlFragment) -> Result { let path = self.path.join(unescape(relpath)?); let path = crate::osutils::path::normpath(path); let url_str = crate::urlutils::local_path_to_url(path.as_path()) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; Ok(Url::parse(&url_str).unwrap()) } fn relpath(&self, abspath: &Url) -> Result { let relpath = crate::urlutils::file_relpath(self.base.as_str(), abspath.as_str()) .map_err(Error::from)?; Ok(relpath) } fn put_file( &self, relpath: &UrlFragment, f: &mut dyn Read, permissions: Option, ) -> Result { let path = self._abspath(relpath)?; let mut tmpfile = tempfile::Builder::new() .tempfile_in(path.parent().unwrap()) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; let n = std::io::copy(f, &mut tmpfile) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; let f = tmpfile .persist(&path) .map_err(|e| map_io_err_to_transport_err(e.error, Some(relpath)))?; if let Some(permissions) = permissions { f.set_permissions(permissions) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; } Ok(n) } fn delete(&self, relpath: &UrlFragment) -> Result<()> { let path = self._abspath(relpath)?; std::fs::remove_file(path).map_err(|e| map_io_err_to_transport_err(e, Some(relpath))) } fn rmdir(&self, relpath: &UrlFragment) -> Result<()> { let path = self._abspath(relpath)?; std::fs::remove_dir(path).map_err(|e| map_io_err_to_transport_err(e, Some(relpath))) } fn rename(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { let abs_from = self._abspath(rel_from)?; let abs_to = self._abspath(rel_to)?; std::fs::rename(abs_from, abs_to) .map_err(|e| map_io_err_to_transport_err(e, Some(rel_from))) } fn set_segment_parameter(&mut self, key: &str, value: Option<&str>) -> Result<()> { let (raw, mut params) = crate::urlutils::split_segment_parameters(self.base.as_str())?; if let Some(value) = value { params.insert(key, value); } else { params.remove(key); } self.base = Url::parse(&crate::urlutils::join_segment_parameters(raw, ¶ms)?)?; Ok(()) } fn get_segment_parameters(&self) -> Result> { let (_, params) = crate::urlutils::split_segment_parameters(self.base.as_str())?; Ok(params .iter() .map(|(k, v)| (k.to_string(), v.to_string())) .collect()) } #[cfg(unix)] fn readv<'a>( &self, path: &'a UrlFragment, offsets: Vec<(u64, usize)>, adjust_for_latency: bool, upper_limit: Option, ) -> Box)>> + Send + 'a> { let offsets = if adjust_for_latency { crate::readv::sort_expand_and_combine( offsets, upper_limit, self.recommended_page_size(), ) } else { offsets }; use nix::libc::off_t; use nix::sys::uio::pread; let abspath = match self._abspath(path) { Ok(p) => p, Err(err) => return Box::new(std::iter::once(Err(err))), }; let file = match std::fs::File::open(abspath) { Ok(f) => f, Err(err) => { return Box::new(std::iter::once(Err(map_io_err_to_transport_err( err, Some(path), )))) } }; Box::new( offsets .into_iter() .map(move |(offset, len)| -> Result<(u64, Vec)> { let mut buf = vec![0; len]; match pread(&file, &mut buf[..], offset as off_t) { Ok(n) if n == len => Ok((offset, buf)), Ok(n) => Err(Error::ShortReadvError( path.to_owned(), offset, len as u64, n as u64, )), Err(e) => Err(map_io_err_to_transport_err( std::io::Error::from_raw_os_error(e as i32), Some(path), )), } }), ) } fn append_file( &self, relpath: &UrlFragment, f: &mut dyn std::io::Read, permissions: Option, ) -> Result { let path = self._abspath(relpath)?; let mut file = std::fs::OpenOptions::new() .append(true) .create(true) .open(path) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; if let Some(permissions) = permissions { file.set_permissions(permissions) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; } let pos = file .seek(std::io::SeekFrom::End(0)) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; std::io::copy(f, &mut file).map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; Ok(pos) } fn readlink(&self, relpath: &UrlFragment) -> Result { let path = self._abspath(relpath)?; let target = std::fs::read_link(path).map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; Ok(escape(target.as_os_str().as_encoded_bytes(), None)) } fn hardlink(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { let from = self._abspath(rel_from)?; let to = self._abspath(rel_to)?; std::fs::hard_link(from, to).map_err(|e| map_io_err_to_transport_err(e, Some(rel_from))) } fn symlink(&self, source: &UrlFragment, link_name: &UrlFragment) -> Result<()> { let abs_to = self.abspath(link_name)?; let abs_link_dirpath = crate::urlutils::dirname(abs_to.as_str(), true); let source_rel = crate::urlutils::file_relpath( abs_link_dirpath.as_str(), self.abspath(source)?.as_str(), )?; #[cfg(unix)] { std::os::unix::fs::symlink(source_rel, self._abspath(link_name)?) .map_err(|e| map_io_err_to_transport_err(e, Some(link_name))) } #[cfg(windows)] { // On Windows `symlink_file` / `symlink_dir` require distinguishing // file vs directory targets and typically Administrator privilege // or Developer Mode. Emulate the common case (file symlink). // TODO(windows): pick `symlink_dir` when the source is a directory. let _ = source_rel; let _ = link_name; Err(Error::Io(std::io::Error::new( std::io::ErrorKind::Unsupported, "symlink is not implemented on Windows yet", ))) } } fn iter_files_recursive(&self) -> Box>> { let wd = walkdir::WalkDir::new(&self.path); fn walkdir_err(e: walkdir::Error) -> Error { let ioerr: std::io::Error = e.into(); map_io_err_to_transport_err(ioerr, None) } let base = self.path.clone(); Box::new(wd.into_iter().filter_map(move |e| match e { Ok(e) => { if !e.file_type().is_dir() { Some(Ok(escape( e.path() .strip_prefix(base.as_path()) .unwrap() .as_os_str() .as_encoded_bytes(), None, ))) } else { None } } Err(e) => Some(Err(walkdir_err(e))), })) } fn open_write_stream( &self, relpath: &UrlFragment, permissions: Option, ) -> Result> { let path = self._abspath(relpath)?; let file = File::create(path).map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; file.set_len(0) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; if let Some(permissions) = permissions { file.set_permissions(permissions) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; } Ok(Box::new(file)) } fn delete_tree(&self, relpath: &UrlFragment) -> Result<()> { let path = self._abspath(relpath)?; std::fs::remove_dir_all(path).map_err(|e| map_io_err_to_transport_err(e, Some(relpath))) } fn r#move(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { let from = self._abspath(rel_from)?; let to = self._abspath(rel_to)?; // TODO(jelmer): Should remove destination if necessary std::fs::rename(from, to).map_err(|e| map_io_err_to_transport_err(e, Some(rel_from))) } fn list_dir(&self, relpath: &UrlFragment) -> Box>> { let path = match self._abspath(relpath) { Ok(p) => p, Err(err) => return Box::new(std::iter::once(Err(err))), }; let entries = match std::fs::read_dir(path) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath))) { Ok(e) => e, Err(err) => return Box::new(std::iter::once(Err(err))), }; Box::new( entries .map(|entry| entry.map_err(|e| map_io_err_to_transport_err(e, None))) .map(|entry| { entry .map(|entry| escape(entry.file_name().as_os_str().as_encoded_bytes(), None)) }), ) } fn listable(&self) -> bool { true } fn lock_read(&self, relpath: &UrlFragment) -> Result> { let path = self._abspath(relpath)?; let lock = crate::filelock::ReadLock::new(path.as_path(), false) .map_err(lock_err_to_transport_err)?; Ok(Box::new(lock)) } fn lock_write(&self, relpath: &UrlFragment) -> Result> { let path = self._abspath(relpath)?; let lock = crate::filelock::WriteLock::new(path.as_path(), false) .map_err(lock_err_to_transport_err)?; Ok(Box::new(lock)) } fn copy_to( &self, relpaths: &[&UrlFragment], target: &dyn Transport, permissions: Option, ) -> Result { if relpaths.is_empty() { return Ok(0); } match target.local_abspath(relpaths[0]) { // Fall back to default Err(Error::NotLocalUrl(_)) => { return super::copy_to(self, target, relpaths, permissions) } Err(e) => return Err(e), _ => {} } let mut count = 0; relpaths.iter().try_for_each(|relpath| { let path = self._abspath(relpath)?; let target_path = target.local_abspath(relpath)?; std::fs::copy(path, &target_path) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; if let Some(permissions) = permissions.clone() { std::fs::set_permissions(target_path, permissions) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; } count += 1; Ok::<(), Error>(()) })?; Ok(count) } fn copy(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { std::fs::copy( self._abspath(rel_from)?.as_path(), self._abspath(rel_to)?.as_path(), ) .map_err(|e| map_io_err_to_transport_err(e, Some(rel_from)))?; Ok(()) } } dromedary-0.1.5/src/lock.rs000066400000000000000000000012321520150013200155530ustar00rootroot00000000000000pub trait Lock { fn unlock(&mut self) -> std::result::Result<(), LockError>; } pub enum LockError { Contention(std::path::PathBuf), Failed(std::path::PathBuf, String), IoError(std::io::Error), } pub type LockResult = std::result::Result; impl From for LockError { fn from(err: std::io::Error) -> LockError { LockError::IoError(err) } } pub struct BogusLock; impl Lock for BogusLock { fn unlock(&mut self) -> std::result::Result<(), LockError> { Ok(()) } } pub trait FileLock { fn file(&self) -> std::io::Result>; fn path(&self) -> &std::path::Path; } dromedary-0.1.5/src/log.rs000066400000000000000000000453331520150013200154160ustar00rootroot00000000000000//! Transport decorator that logs each operation, ported from //! `dromedary/log.py`. //! //! [`LogTransport`] wraps any Transport and emits a debug line per call //! via a user-supplied sink. The sink is a closure rather than a direct //! dependency on Python's `logging` module so this module stays usable //! from pure-Rust callers; the PyO3 wrapper in //! `_transport_rs::log` supplies a sink that forwards to //! `dromedary.log.logger.debug`. use crate::lock::Lock; use crate::{Error, ReadStream, Result, Stat, Transport, UrlFragment, WriteStream}; use std::fs::Permissions; use std::io::Read; use std::sync::Arc; use url::Url; /// Message sink for a LogTransport. Wrapped in `Arc` so that clones of a /// LogTransport (produced via `Transport::clone`) can continue logging /// through the same sink as the original. pub type LogSink = Arc; pub struct LogTransport { inner: Box, sink: LogSink, } impl LogTransport { pub const PREFIX: &'static str = "log+"; pub fn new(inner: Box, sink: LogSink) -> Self { Self { inner, sink } } fn log_call(&self, method: &str, relpath: &str, extra: &str) { let msg = if extra.is_empty() { format!("{} {} ", method, relpath) } else { format!("{} {} {}", method, relpath, extra) }; (self.sink)(&msg); } fn log_result(&self, summary: &str) { (self.sink)(&format!(" --> {}", shorten(summary))); } fn log_error(&self, err: &Error) { (self.sink)(&format!(" --> {:?}", err)); } } impl std::fmt::Debug for LogTransport { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "LogTransport({})", self.base()) } } /// Trim long single-line repr output to match the Python `_shorten` helper. pub fn shorten(s: &str) -> String { if s.chars().count() > 70 { // Match Python's character-based slice (s[:67] + "..."). let end: usize = s.char_indices().nth(67).map(|(i, _)| i).unwrap_or(s.len()); let mut out = String::with_capacity(end + 3); out.push_str(&s[..end]); out.push_str("..."); out } else { s.to_string() } } /// Strip the enclosing parentheses of a Python tuple repr. Mirrors /// `_strip_tuple_parens` in the Python implementation. pub fn strip_tuple_parens(s: &str) -> &str { let bytes = s.as_bytes(); if bytes.len() >= 2 && bytes[0] == b'(' && bytes[bytes.len() - 1] == b')' { &s[1..s.len() - 1] } else { s } } /// Method names that the Python decorator wraps with the per-call logger. /// Exposed so the PyO3 layer (which owns the Python wrapper) can share the /// same list instead of duplicating it. pub const LOGGED_METHODS: &[&str] = &[ "append_bytes", "append_file", "copy_to", "delete", "get", "has", "open_write_stream", "mkdir", "move", "put_bytes", "put_bytes_non_atomic", "put_file", "put_file_non_atomic", "list_dir", "lock_read", "lock_write", "readv", "rename", "rmdir", "stat", "ulock", ]; impl Transport for LogTransport { crate::fwd_external_url!(inner); crate::fwd_can_roundtrip_unix_modebits!(inner); crate::fwd_is_readonly!(inner); crate::fwd_listable!(inner); crate::fwd_set_segment_parameter!(inner); crate::fwd_get_segment_parameters!(inner); crate::fwd_readlink!(inner); crate::fwd_hardlink!(inner); crate::fwd_symlink!(inner); crate::fwd_local_abspath!(inner); fn base(&self) -> Url { crate::decorator::prefixed_base(Self::PREFIX, self.inner.as_ref()) } fn abspath(&self, relpath: &UrlFragment) -> Result { crate::decorator::prefixed_abspath(Self::PREFIX, self.inner.as_ref(), relpath) } fn relpath(&self, abspath: &Url) -> Result { crate::decorator::stripped_relpath(Self::PREFIX, self.inner.as_ref(), abspath) } fn clone(&self, offset: Option<&UrlFragment>) -> Result> { let inner_clone = self.inner.clone(offset)?; Ok(Box::new(LogTransport::new( inner_clone, Arc::clone(&self.sink), ))) } fn get(&self, relpath: &UrlFragment) -> Result> { self.log_call("get", relpath, ""); match self.inner.get(relpath) { Ok(s) => { self.log_result(""); Ok(s) } Err(e) => { self.log_error(&e); Err(e) } } } fn has(&self, relpath: &UrlFragment) -> Result { self.log_call("has", relpath, ""); match self.inner.has(relpath) { Ok(v) => { self.log_result(if v { "True" } else { "False" }); Ok(v) } Err(e) => { self.log_error(&e); Err(e) } } } fn stat(&self, relpath: &UrlFragment) -> Result { self.log_call("stat", relpath, ""); match self.inner.stat(relpath) { Ok(s) => { self.log_result(""); Ok(s) } Err(e) => { self.log_error(&e); Err(e) } } } fn put_file( &self, relpath: &UrlFragment, f: &mut dyn Read, permissions: Option, ) -> Result { self.log_call("put_file", relpath, ""); match self.inner.put_file(relpath, f, permissions) { Ok(n) => { self.log_result(&n.to_string()); Ok(n) } Err(e) => { self.log_error(&e); Err(e) } } } fn mkdir(&self, relpath: &UrlFragment, permissions: Option) -> Result<()> { self.log_call("mkdir", relpath, ""); match self.inner.mkdir(relpath, permissions) { Ok(()) => { self.log_result("None"); Ok(()) } Err(e) => { self.log_error(&e); Err(e) } } } fn delete(&self, relpath: &UrlFragment) -> Result<()> { self.log_call("delete", relpath, ""); match self.inner.delete(relpath) { Ok(()) => { self.log_result("None"); Ok(()) } Err(e) => { self.log_error(&e); Err(e) } } } fn rmdir(&self, relpath: &UrlFragment) -> Result<()> { self.log_call("rmdir", relpath, ""); match self.inner.rmdir(relpath) { Ok(()) => { self.log_result("None"); Ok(()) } Err(e) => { self.log_error(&e); Err(e) } } } fn rename(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { self.log_call("rename", rel_from, rel_to); match self.inner.rename(rel_from, rel_to) { Ok(()) => { self.log_result("None"); Ok(()) } Err(e) => { self.log_error(&e); Err(e) } } } fn append_file( &self, relpath: &UrlFragment, f: &mut dyn Read, permissions: Option, ) -> Result { self.log_call("append_file", relpath, ""); match self.inner.append_file(relpath, f, permissions) { Ok(n) => { self.log_result(&n.to_string()); Ok(n) } Err(e) => { self.log_error(&e); Err(e) } } } fn iter_files_recursive(&self) -> Box>> { // Mirrors the Python override that logs without a relpath. (self.sink)(&format!("iter_files_recursive {}", self.base())); let results: Vec> = self.inner.iter_files_recursive().collect(); let summary = format!("{} entries", results.len()); self.log_result(&summary); Box::new(results.into_iter()) } fn open_write_stream( &self, relpath: &UrlFragment, permissions: Option, ) -> Result> { self.log_call("open_write_stream", relpath, ""); match self.inner.open_write_stream(relpath, permissions) { Ok(s) => { self.log_result(""); Ok(s) } Err(e) => { self.log_error(&e); Err(e) } } } fn delete_tree(&self, relpath: &UrlFragment) -> Result<()> { self.log_call("delete_tree", relpath, ""); match self.inner.delete_tree(relpath) { Ok(()) => { self.log_result("None"); Ok(()) } Err(e) => { self.log_error(&e); Err(e) } } } fn r#move(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { self.log_call("move", rel_from, rel_to); match self.inner.r#move(rel_from, rel_to) { Ok(()) => { self.log_result("None"); Ok(()) } Err(e) => { self.log_error(&e); Err(e) } } } fn list_dir(&self, relpath: &UrlFragment) -> Box>> { self.log_call("list_dir", relpath, ""); let results: Vec> = self.inner.list_dir(relpath).collect(); let summary = format!("{} entries", results.len()); self.log_result(&summary); Box::new(results.into_iter()) } fn lock_read(&self, relpath: &UrlFragment) -> Result> { self.log_call("lock_read", relpath, ""); match self.inner.lock_read(relpath) { Ok(l) => { self.log_result(""); Ok(l) } Err(e) => { self.log_error(&e); Err(e) } } } fn lock_write(&self, relpath: &UrlFragment) -> Result> { self.log_call("lock_write", relpath, ""); match self.inner.lock_write(relpath) { Ok(l) => { self.log_result(""); Ok(l) } Err(e) => { self.log_error(&e); Err(e) } } } fn copy(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { self.log_call("copy", rel_from, rel_to); match self.inner.copy(rel_from, rel_to) { Ok(()) => { self.log_result("None"); Ok(()) } Err(e) => { self.log_error(&e); Err(e) } } } fn readv<'a>( &self, relpath: &'a UrlFragment, offsets: Vec<(u64, usize)>, adjust_for_latency: bool, upper_limit: Option, ) -> Box)>> + Send + 'a> { self.log_call("readv", relpath, ""); // Collect so we can log the summary before yielding. Matches the // Python decorator which consumes the generator eagerly and returns // a fresh iterator. let results: Vec)>> = self .inner .readv(relpath, offsets, adjust_for_latency, upper_limit) .collect(); let (hunks, bytes) = results .iter() .filter_map(|r| r.as_ref().ok()) .fold((0usize, 0usize), |(h, b), (_, d)| (h + 1, b + d.len())); let summary = format!("readv response, {} hunks, {} total bytes", hunks, bytes); self.log_result(&summary); Box::new(results.into_iter()) } fn put_bytes( &self, relpath: &UrlFragment, data: &[u8], permissions: Option, ) -> Result<()> { self.log_call("put_bytes", relpath, ""); match self.inner.put_bytes(relpath, data, permissions) { Ok(()) => { self.log_result("None"); Ok(()) } Err(e) => { self.log_error(&e); Err(e) } } } fn append_bytes( &self, relpath: &UrlFragment, data: &[u8], permissions: Option, ) -> Result { self.log_call("append_bytes", relpath, ""); match self.inner.append_bytes(relpath, data, permissions) { Ok(n) => { self.log_result(&n.to_string()); Ok(n) } Err(e) => { self.log_error(&e); Err(e) } } } // Forward the *_non_atomic variants to the inner transport rather than // relying on the trait default. The default retries self.put_file after // a NoSuchFile, which would log twice and — when the inner is a // PyTransport — also consume the caller's stream on the first attempt, // leaving the retry with an empty reader. Delegating keeps the // non-atomic operation atomic from the inner's perspective. fn put_file_non_atomic( &self, relpath: &UrlFragment, f: &mut dyn Read, permissions: Option, create_parent_dir: Option, dir_permissions: Option, ) -> Result<()> { self.log_call("put_file_non_atomic", relpath, ""); match self.inner.put_file_non_atomic( relpath, f, permissions, create_parent_dir, dir_permissions, ) { Ok(()) => { self.log_result("None"); Ok(()) } Err(e) => { self.log_error(&e); Err(e) } } } fn put_bytes_non_atomic( &self, relpath: &UrlFragment, data: &[u8], permissions: Option, create_parent_dir: Option, dir_permissions: Option, ) -> Result<()> { self.log_call("put_bytes_non_atomic", relpath, ""); match self.inner.put_bytes_non_atomic( relpath, data, permissions, create_parent_dir, dir_permissions, ) { Ok(()) => { self.log_result("None"); Ok(()) } Err(e) => { self.log_error(&e); Err(e) } } } } #[cfg(test)] mod tests { use super::*; use crate::memory::MemoryTransport; use std::sync::{Arc, Mutex}; fn capturing_sink() -> (Arc>>, LogSink) { let buf: Arc>> = Arc::new(Mutex::new(Vec::new())); let buf_cl = Arc::clone(&buf); let sink: LogSink = Arc::new(move |msg: &str| buf_cl.lock().unwrap().push(msg.to_string())); (buf, sink) } fn wrap() -> (Arc>>, LogTransport) { let mem = MemoryTransport::new("memory:///").unwrap(); mem.put_bytes("hello", b"world", None).unwrap(); let (buf, sink) = capturing_sink(); (buf, LogTransport::new(Box::new(mem), sink)) } #[test] fn base_has_log_prefix() { let (_buf, t) = wrap(); assert!(t.base().as_str().starts_with("log+")); } #[test] fn mkdir_is_logged_with_result() { let (buf, t) = wrap(); t.mkdir("subdir", None).unwrap(); let log = buf.lock().unwrap().clone(); assert!(log.iter().any(|l| l.starts_with("mkdir subdir"))); assert!(log.iter().any(|l| l == " --> None")); } #[test] fn has_logs_true_and_false() { let (buf, t) = wrap(); assert!(t.has("hello").unwrap()); assert!(!t.has("missing").unwrap()); let log = buf.lock().unwrap().clone(); assert!(log.iter().any(|l| l == " --> True")); assert!(log.iter().any(|l| l == " --> False")); } #[test] fn readv_summary_matches_python_format() { let (buf, t) = wrap(); let results: Vec<_> = t.readv("hello", vec![(0, 5)], false, None).collect(); assert_eq!(results.len(), 1); let log = buf.lock().unwrap().clone(); assert!(log .iter() .any(|l| l == " --> readv response, 1 hunks, 5 total bytes")); } #[test] fn error_is_logged() { let (buf, t) = wrap(); let _ = t.get_bytes("nope"); let log = buf.lock().unwrap().clone(); assert!(log.iter().any(|l| l.starts_with(" --> "))); assert!(log.iter().any(|l| l.starts_with("get nope"))); } #[test] fn shorten_truncates_long_strings() { let long: String = "a".repeat(100); let out = shorten(&long); assert_eq!(out.len(), 70); assert!(out.ends_with("...")); } #[test] fn shorten_leaves_short_strings_alone() { assert_eq!(shorten("short"), "short"); } #[test] fn strip_tuple_parens_strips_and_skips() { assert_eq!(strip_tuple_parens("(1, 2, 3)"), "1, 2, 3"); assert_eq!(strip_tuple_parens("not a tuple"), "not a tuple"); assert_eq!(strip_tuple_parens("()"), ""); } #[test] fn list_dir_logs_entry_count() { let mem = MemoryTransport::new("memory:///").unwrap(); mem.mkdir("d", None).unwrap(); mem.put_bytes("d/a", b"1", None).unwrap(); mem.put_bytes("d/b", b"2", None).unwrap(); let (buf, sink) = capturing_sink(); let t = LogTransport::new(Box::new(mem), sink); let entries: Vec<_> = t.list_dir("d").filter_map(|r| r.ok()).collect(); assert_eq!(entries.len(), 2); let log = buf.lock().unwrap().clone(); assert!(log.iter().any(|l| l.starts_with("list_dir d"))); assert!(log.iter().any(|l| l == " --> 2 entries")); } #[test] fn abspath_carries_prefix() { let (_buf, t) = wrap(); assert_eq!( t.abspath("relpath").unwrap().as_str(), "log+memory:///relpath" ); } #[test] fn clone_keeps_log_wrapping_and_shares_sink() { // The clone should continue emitting through the original sink so // that a single logger captures both the parent and cloned // transport's activity. let (buf, t) = wrap(); let cloned = t.clone(Some("sub")).unwrap(); assert!(cloned.base().as_str().starts_with("log+")); let _ = cloned.has("anything"); let log = buf.lock().unwrap().clone(); assert!(log.iter().any(|l| l.starts_with("has anything"))); } } dromedary-0.1.5/src/memory.rs000066400000000000000000000724141520150013200161450ustar00rootroot00000000000000//! In-memory Transport implementation, ported from dromedary/memory.py. //! //! Storage is shared across clones via `Arc>`, matching //! the Python semantics where `clone()` passes the same dict references. use crate::lock::{Lock, LockError}; use crate::urlutils::{escape, unescape}; use crate::{ map_io_err_to_transport_err, Error, FileKind, ReadStream, Result, Stat, Transport, UrlFragment, WriteStream, }; use std::collections::HashMap; use std::fs::Permissions; use std::io::{Cursor, Read, Write}; #[cfg(unix)] use std::os::unix::fs::PermissionsExt; use std::sync::{Arc, Mutex}; use url::Url; /// Raw mode bits stored alongside each entry. On Unix we round-trip the full /// `Permissions` value; on Windows the concept is largely meaningless so we /// simply track the u32 that the caller supplied (if any), same as the /// Python implementation. type Mode = Option; fn perms_to_mode(p: Option) -> Mode { #[cfg(unix)] { p.map(|p| p.mode()) } #[cfg(not(unix))] { let _ = p; None } } #[derive(Default)] pub struct MemoryStore { files: HashMap, Mode)>, dirs: HashMap, symlinks: HashMap>, locks: HashMap, } impl MemoryStore { fn new() -> Self { let mut dirs = HashMap::new(); dirs.insert("/".to_string(), None); Self { files: HashMap::new(), dirs, symlinks: HashMap::new(), locks: HashMap::new(), } } } pub struct MemoryTransport { base: Url, scheme: String, cwd: String, store: Arc>, } impl std::fmt::Debug for MemoryTransport { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "MemoryTransport({})", self.base) } } impl MemoryTransport { pub fn new(url: &str) -> Result { let mut url = url.to_string(); if url.is_empty() { url = "memory:///".to_string(); } if !url.ends_with('/') { url.push('/'); } let split = url .find(':') .ok_or_else(|| Error::NotLocalUrl(url.clone()))? + 3; if split > url.len() { return Err(Error::NotLocalUrl(url)); } let scheme = url[..split].to_string(); let cwd = url[split..].to_string(); let parsed = Url::parse(&url).map_err(Error::from)?; Ok(Self { base: parsed, scheme, cwd, store: Arc::new(Mutex::new(MemoryStore::new())), }) } /// Construct a MemoryTransport that shares storage with `other`. /// Used by `clone()` and by `MemoryServer` to hand out multiple /// transports sharing a single backing store. pub fn with_shared_store(url: &str, store: Arc>) -> Result { let mut t = Self::new(url)?; t.store = store; Ok(t) } pub fn shared_store(&self) -> Arc> { self.store.clone() } fn abspath_internal(&self, relpath: &UrlFragment) -> Result { let relpath = unescape(relpath).map_err(Error::from)?; if relpath.starts_with('/') { return Ok(relpath); } let cwd_parts = self.cwd.split('/'); let rel_parts = relpath.split('/'); let mut r: Vec = Vec::new(); let store = self.store.lock().unwrap(); for part in cwd_parts.chain(rel_parts) { if part == ".." { if r.is_empty() { return Err(Error::PathNotChild); } r.pop(); } else if part == "." || part.is_empty() { // skip } else { r.push(part.to_string()); // Match Python memory.py _abspath: look up by joined key // without leading slash. Stored symlink keys include a leading // slash, so this effectively never matches; symlink following // happens in resolve_symlinks instead. Kept for byte-for-byte // parity with the Python implementation. let key = r.join("/"); if let Some(target) = store.symlinks.get(&key) { r = target.clone(); } } } Ok(format!("/{}", r.join("/"))) } fn resolve_symlinks(&self, relpath: &UrlFragment) -> Result { let mut path = self.abspath_internal(relpath)?; let store = self.store.lock().unwrap(); while let Some(target) = store.symlinks.get(&path) { path = target.join("/"); if !path.starts_with('/') { path = format!("/{}", path); } } Ok(path) } fn check_parent(store: &MemoryStore, abspath: &str) -> Result<()> { let parent = match abspath.rsplit_once('/') { Some((head, _)) if head.is_empty() => "/".to_string(), Some((head, _)) => head.to_string(), None => "/".to_string(), }; if parent != "/" && !store.dirs.contains_key(&parent) { return Err(Error::NoSuchFile(Some(abspath.to_string()))); } Ok(()) } } struct MemoryReadStream(Cursor>); impl Read for MemoryReadStream { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { self.0.read(buf) } } impl std::io::Seek for MemoryReadStream { fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { self.0.seek(pos) } } impl ReadStream for MemoryReadStream {} /// Write stream that appends straight into the shared MemoryStore so a /// concurrent get_bytes on the same path sees the in-flight bytes /// without an explicit flush — matching the per_transport /// `get_with_open_write_stream_sees_all_content` contract. struct MemoryWriteStream { store: Arc>, abspath: String, } impl Write for MemoryWriteStream { fn write(&mut self, buf: &[u8]) -> std::io::Result { let mut store = self.store.lock().unwrap(); match store.files.get_mut(&self.abspath) { Some((data, _)) => { data.extend_from_slice(buf); Ok(buf.len()) } None => Err(std::io::Error::new( std::io::ErrorKind::NotFound, "memory file removed while write stream was open", )), } } fn flush(&mut self) -> std::io::Result<()> { Ok(()) } } impl WriteStream for MemoryWriteStream { fn sync_data(&self) -> std::io::Result<()> { Ok(()) } } struct MemoryLock { path: String, store: Arc>, } impl Lock for MemoryLock { fn unlock(&mut self) -> std::result::Result<(), LockError> { let mut store = self.store.lock().unwrap(); store.locks.remove(&self.path); Ok(()) } } fn acquire_lock( store: &Arc>, path: &str, ) -> Result> { let mut s = store.lock().unwrap(); if s.locks.contains_key(path) { return Err(Error::LockContention(std::path::PathBuf::from(path))); } s.locks.insert(path.to_string(), ()); Ok(Box::new(MemoryLock { path: path.to_string(), store: store.clone(), })) } impl Transport for MemoryTransport { fn external_url(&self) -> Result { Err(Error::InProcessTransport) } fn can_roundtrip_unix_modebits(&self) -> bool { false } fn base(&self) -> Url { self.base.clone() } fn get(&self, relpath: &UrlFragment) -> Result> { let abspath = self.resolve_symlinks(relpath)?; let store = self.store.lock().unwrap(); if let Some((data, _mode)) = store.files.get(&abspath) { Ok(Box::new(MemoryReadStream(Cursor::new(data.clone())))) } else if store.dirs.contains_key(&abspath) { // Python returns a LateReadError here; we translate that into // an immediate IsADirectoryError for the Rust API. Err(Error::IsADirectoryError(Some(relpath.to_string()))) } else { Err(Error::NoSuchFile(Some(relpath.to_string()))) } } fn has(&self, relpath: &UrlFragment) -> Result { let abspath = self.abspath_internal(relpath)?; let store = self.store.lock().unwrap(); Ok(store.files.contains_key(&abspath) || store.dirs.contains_key(&abspath) || store.symlinks.contains_key(&abspath)) } fn mkdir(&self, relpath: &UrlFragment, permissions: Option) -> Result<()> { let abspath = self.resolve_symlinks(relpath)?; let mut store = self.store.lock().unwrap(); Self::check_parent(&store, &abspath)?; if store.dirs.contains_key(&abspath) { return Err(Error::FileExists(Some(relpath.to_string()))); } store.dirs.insert(abspath, perms_to_mode(permissions)); Ok(()) } fn stat(&self, relpath: &UrlFragment) -> Result { let abspath = self.abspath_internal(relpath)?; let store = self.store.lock().unwrap(); if let Some((data, mode)) = store.files.get(&abspath) { #[cfg(unix)] let stat_mode = (0o100000u32) | mode.unwrap_or(0o644); Ok(Stat { size: data.len(), #[cfg(unix)] mode: stat_mode, kind: FileKind::File, mtime: None, }) } else if let Some(mode) = store.dirs.get(&abspath) { #[cfg(unix)] let stat_mode = (0o040000u32) | mode.unwrap_or(0o755); #[cfg(not(unix))] let _ = mode; Ok(Stat { size: 0, #[cfg(unix)] mode: stat_mode, kind: FileKind::Dir, mtime: None, }) } else if store.symlinks.contains_key(&abspath) { #[cfg(unix)] let stat_mode = 0o120000u32; Ok(Stat { size: 0, #[cfg(unix)] mode: stat_mode, kind: FileKind::Symlink, mtime: None, }) } else { Err(Error::NoSuchFile(Some(abspath))) } } fn clone(&self, offset: Option<&UrlFragment>) -> Result> { let path = crate::urlutils::combine_paths(&self.cwd, offset.unwrap_or("")); let path = if path.is_empty() || !path.ends_with('/') { format!("{}/", path) } else { path }; let url = format!("{}{}", self.scheme, path); let cloned = Self::with_shared_store(&url, self.store.clone())?; Ok(Box::new(cloned)) } fn abspath(&self, relpath: &UrlFragment) -> Result { // Mirror Python: clone(relpath).base, stripping trailing slash unless root. let cloned = self.clone(Some(relpath))?; let s = cloned.base().to_string(); let url_str = if s.matches('/').count() == 3 { s } else { s.trim_end_matches('/').to_string() }; Url::parse(&url_str).map_err(Error::from) } fn relpath(&self, abspath: &Url) -> Result { crate::relpath_against_base(&self.base, abspath) } fn put_file( &self, relpath: &UrlFragment, f: &mut dyn Read, permissions: Option, ) -> Result { let abspath = self.resolve_symlinks(relpath)?; // Validate the parent directory exists *before* reading the stream // so that a failed put_file leaves the reader untouched. This lets // the default put_file_non_atomic retry with the same stream after // creating the missing parent. { let store = self.store.lock().unwrap(); Self::check_parent(&store, &abspath)?; } let mut buf = Vec::new(); f.read_to_end(&mut buf) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; let mut store = self.store.lock().unwrap(); Self::check_parent(&store, &abspath)?; let len = buf.len() as u64; store .files .insert(abspath, (buf, perms_to_mode(permissions))); Ok(len) } fn delete(&self, relpath: &UrlFragment) -> Result<()> { let abspath = self.abspath_internal(relpath)?; let mut store = self.store.lock().unwrap(); if store.files.remove(&abspath).is_some() { Ok(()) } else if store.symlinks.remove(&abspath).is_some() { Ok(()) } else { Err(Error::NoSuchFile(Some(relpath.to_string()))) } } fn rmdir(&self, relpath: &UrlFragment) -> Result<()> { let abspath = self.resolve_symlinks(relpath)?; let mut store = self.store.lock().unwrap(); if store.files.contains_key(&abspath) { return Err(Error::NotADirectoryError(Some(relpath.to_string()))); } let prefix = format!("{}/", abspath); for path in store.files.keys().chain(store.symlinks.keys()) { if path.starts_with(&prefix) { return Err(Error::DirectoryNotEmptyError(Some(relpath.to_string()))); } } for path in store.dirs.keys() { if path.starts_with(&prefix) && path != &abspath { return Err(Error::DirectoryNotEmptyError(Some(relpath.to_string()))); } } if store.dirs.remove(&abspath).is_none() { return Err(Error::NoSuchFile(Some(relpath.to_string()))); } Ok(()) } fn rename(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { let abs_from = self.resolve_symlinks(rel_from)?; let abs_to = self.resolve_symlinks(rel_to)?; let from_prefix = format!("{}/", abs_from); let replace = |x: &str| -> String { if x == abs_from { abs_to.clone() } else if let Some(rest) = x.strip_prefix(&from_prefix) { format!("{}/{}", abs_to, rest) } else { x.to_string() } }; let mut store = self.store.lock().unwrap(); // Work on copies so rename is atomic on error. let mut files_new = store.files.clone(); let mut symlinks_new = store.symlinks.clone(); let mut dirs_new = store.dirs.clone(); // Collect renames across all three containers, checking for collisions. let mut file_renames: Vec<(String, String)> = Vec::new(); for path in store.files.keys() { let np = replace(path); if np != *path { if files_new.contains_key(&np) { return Err(Error::FileExists(Some(np))); } file_renames.push((path.clone(), np)); } } let mut symlink_renames: Vec<(String, String)> = Vec::new(); for path in store.symlinks.keys() { let np = replace(path); if np != *path { if symlinks_new.contains_key(&np) { return Err(Error::FileExists(Some(np))); } symlink_renames.push((path.clone(), np)); } } let mut dir_renames: Vec<(String, String)> = Vec::new(); for path in store.dirs.keys() { let np = replace(path); if np != *path { if dirs_new.contains_key(&np) { return Err(Error::FileExists(Some(np))); } dir_renames.push((path.clone(), np)); } } for (old, new) in file_renames { let v = files_new.remove(&old).unwrap(); files_new.insert(new, v); } for (old, new) in symlink_renames { let v = symlinks_new.remove(&old).unwrap(); symlinks_new.insert(new, v); } for (old, new) in dir_renames { let v = dirs_new.remove(&old).unwrap(); dirs_new.insert(new, v); } store.files = files_new; store.symlinks = symlinks_new; store.dirs = dirs_new; Ok(()) } fn set_segment_parameter(&mut self, key: &str, value: Option<&str>) -> Result<()> { let (raw, mut params) = crate::urlutils::split_segment_parameters(self.base.as_str())?; if let Some(value) = value { params.insert(key, value); } else { params.remove(key); } self.base = Url::parse(&crate::urlutils::join_segment_parameters(raw, ¶ms)?)?; Ok(()) } fn get_segment_parameters(&self) -> Result> { let (_, params) = crate::urlutils::split_segment_parameters(self.base.as_str())?; Ok(params .iter() .map(|(k, v)| (k.to_string(), v.to_string())) .collect()) } fn append_file( &self, relpath: &UrlFragment, f: &mut dyn Read, permissions: Option, ) -> Result { let abspath = self.resolve_symlinks(relpath)?; let mut buf = Vec::new(); f.read_to_end(&mut buf) .map_err(|e| map_io_err_to_transport_err(e, Some(relpath)))?; let mut store = self.store.lock().unwrap(); Self::check_parent(&store, &abspath)?; let (orig, orig_mode) = store .files .get(&abspath) .cloned() .unwrap_or_else(|| (Vec::new(), None)); let orig_len = orig.len() as u64; let mode = match perms_to_mode(permissions) { Some(m) => Some(m), None => orig_mode, }; let mut combined = orig; combined.extend_from_slice(&buf); store.files.insert(abspath, (combined, mode)); Ok(orig_len) } fn readlink(&self, relpath: &UrlFragment) -> Result { let abspath = self.abspath_internal(relpath)?; let store = self.store.lock().unwrap(); match store.symlinks.get(&abspath) { Some(parts) => Ok(parts.join("/")), None => Err(Error::NoSuchFile(Some(relpath.to_string()))), } } fn hardlink(&self, _rel_from: &UrlFragment, _rel_to: &UrlFragment) -> Result<()> { Err(Error::TransportNotPossible(None)) } fn symlink(&self, source: &UrlFragment, link_name: &UrlFragment) -> Result<()> { let abspath = self.abspath_internal(link_name)?; let mut store = self.store.lock().unwrap(); Self::check_parent(&store, &abspath)?; let target: Vec = source.split('/').map(|s| s.to_string()).collect(); store.symlinks.insert(abspath, target); Ok(()) } fn iter_files_recursive(&self) -> Box>> { let store = self.store.lock().unwrap(); let cwd = self.cwd.clone(); let mut results: Vec = Vec::new(); for path in store.files.keys().chain(store.symlinks.keys()) { if path.starts_with(&cwd) { let rest = &path[cwd.len()..]; match escape(rest.as_bytes(), None) { s if !s.is_empty() => results.push(s), _ => {} } } } Box::new(results.into_iter().map(Ok)) } fn open_write_stream( &self, relpath: &UrlFragment, permissions: Option, ) -> Result> { let abspath = self.resolve_symlinks(relpath)?; let mode = perms_to_mode(permissions); // Truncate any existing file and validate the parent exists; this // matches LocalTransport semantics (write streams start empty). { let mut store = self.store.lock().unwrap(); Self::check_parent(&store, &abspath)?; store.files.insert(abspath.clone(), (Vec::new(), mode)); } Ok(Box::new(MemoryWriteStream { store: Arc::clone(&self.store), abspath, })) } fn delete_tree(&self, relpath: &UrlFragment) -> Result<()> { let abspath = self.resolve_symlinks(relpath)?; let mut store = self.store.lock().unwrap(); if !store.dirs.contains_key(&abspath) { return Err(Error::NoSuchFile(Some(relpath.to_string()))); } let prefix = format!("{}/", abspath); store.files.retain(|k, _| !k.starts_with(&prefix)); store.symlinks.retain(|k, _| !k.starts_with(&prefix)); store .dirs .retain(|k, _| !k.starts_with(&prefix) && k != &abspath); Ok(()) } fn list_dir(&self, relpath: &UrlFragment) -> Box>> { let abspath = match self.resolve_symlinks(relpath) { Ok(p) => p, Err(e) => return Box::new(std::iter::once(Err(e))), }; let store = self.store.lock().unwrap(); if abspath != "/" && !store.dirs.contains_key(&abspath) { return Box::new(std::iter::once(Err(Error::NoSuchFile(Some( relpath.to_string(), ))))); } let prefix = if abspath.ends_with('/') { abspath.clone() } else { format!("{}/", abspath) }; let mut results: Vec = Vec::new(); for group in [ store.files.keys().collect::>(), store.dirs.keys().collect::>(), store.symlinks.keys().collect::>(), ] { for path in group { if let Some(trailing) = path.strip_prefix(&prefix) { if !trailing.is_empty() && !trailing.contains('/') { results.push(escape(trailing.as_bytes(), None)); } } } } Box::new(results.into_iter().map(Ok)) } fn lock_read(&self, relpath: &UrlFragment) -> Result> { let abspath = self.abspath_internal(relpath)?; acquire_lock(&self.store, &abspath) } fn lock_write(&self, relpath: &UrlFragment) -> Result> { let abspath = self.abspath_internal(relpath)?; acquire_lock(&self.store, &abspath) } fn local_abspath(&self, relpath: &UrlFragment) -> Result { Err(Error::NotLocalUrl(format!("{}{}", self.base, relpath))) } fn copy(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { let data = self.get_bytes(rel_from)?; let mut cur = Cursor::new(data); self.put_file(rel_to, &mut cur, None).map(|_| ()) } } #[cfg(test)] mod tests { use super::*; fn t() -> MemoryTransport { MemoryTransport::new("memory:///").unwrap() } #[test] fn new_defaults_and_normalises_url() { let t = MemoryTransport::new("").unwrap(); assert_eq!(t.base().as_str(), "memory:///"); let t = MemoryTransport::new("memory:///foo").unwrap(); assert_eq!(t.base().as_str(), "memory:///foo/"); } #[test] fn put_get_has_and_stat_file() { let t = t(); assert_eq!(t.has("hello").unwrap(), false); t.put_bytes("hello", b"world", None).unwrap(); assert_eq!(t.has("hello").unwrap(), true); assert_eq!(t.get_bytes("hello").unwrap(), b"world"); let st = t.stat("hello").unwrap(); assert_eq!(st.size, 5); assert_eq!(st.kind, FileKind::File); } #[test] fn get_missing_returns_no_such_file() { let t = t(); match t.get_bytes("nope") { Err(Error::NoSuchFile(_)) => {} other => panic!("expected NoSuchFile, got {:?}", other), } } #[test] fn put_file_into_missing_parent_fails() { let t = t(); match t.put_bytes("missing/child", b"x", None) { Err(Error::NoSuchFile(_)) => {} other => panic!("expected NoSuchFile, got {:?}", other), } } #[test] fn mkdir_and_list_dir() { let t = t(); t.mkdir("d", None).unwrap(); t.put_bytes("d/a", b"1", None).unwrap(); t.put_bytes("d/b", b"22", None).unwrap(); let mut entries: Vec = t.list_dir("d").filter_map(|r| r.ok()).collect(); entries.sort(); assert_eq!(entries, vec!["a".to_string(), "b".to_string()]); assert_eq!(t.stat("d").unwrap().kind, FileKind::Dir); } #[test] fn mkdir_existing_fails() { let t = t(); t.mkdir("d", None).unwrap(); match t.mkdir("d", None) { Err(Error::FileExists(_)) => {} other => panic!("expected FileExists, got {:?}", other), } } #[test] fn delete_and_rmdir() { let t = t(); t.mkdir("d", None).unwrap(); t.put_bytes("d/f", b"x", None).unwrap(); t.delete("d/f").unwrap(); assert_eq!(t.has("d/f").unwrap(), false); t.rmdir("d").unwrap(); assert_eq!(t.has("d").unwrap(), false); } #[test] fn rmdir_nonempty_fails() { let t = t(); t.mkdir("d", None).unwrap(); t.put_bytes("d/f", b"x", None).unwrap(); match t.rmdir("d") { Err(Error::DirectoryNotEmptyError(_)) => {} other => panic!("expected DirectoryNotEmptyError, got {:?}", other), } } #[test] fn rename_file() { let t = t(); t.put_bytes("a", b"hi", None).unwrap(); t.rename("a", "b").unwrap(); assert_eq!(t.has("a").unwrap(), false); assert_eq!(t.get_bytes("b").unwrap(), b"hi"); } #[test] fn append_file_extends_content_and_returns_offset() { let t = t(); t.put_bytes("f", b"abc", None).unwrap(); let mut more = Cursor::new(b"DEF".to_vec()); let offset = t.append_file("f", &mut more, None).unwrap(); assert_eq!(offset, 3); assert_eq!(t.get_bytes("f").unwrap(), b"abcDEF"); } #[test] fn symlink_and_readlink() { let t = t(); t.put_bytes("target", b"data", None).unwrap(); t.symlink("target", "link").unwrap(); assert_eq!(t.readlink("link").unwrap(), "target"); assert_eq!(t.stat("link").unwrap().kind, FileKind::Symlink); assert_eq!(t.get_bytes("link").unwrap(), b"data"); } #[test] fn lock_read_contention() { let t = t(); t.put_bytes("f", b"", None).unwrap(); let _l = t.lock_read("f").ok().expect("first lock"); match t.lock_read("f") { Err(Error::LockContention(_)) => {} Err(other) => panic!("expected LockContention, got {:?}", other), Ok(_) => panic!("expected LockContention, got Ok"), } } #[test] fn lock_release_allows_reacquire() { let t = t(); t.put_bytes("f", b"", None).unwrap(); { let mut l = t.lock_read("f").ok().expect("first lock"); l.unlock().ok().expect("unlock"); } let _l2 = t.lock_read("f").ok().expect("reacquire"); } #[test] fn clone_shares_storage() { let t = t(); t.mkdir("sub", None).unwrap(); let c = t.clone(Some("sub")).unwrap(); t.put_bytes("sub/f", b"shared", None).unwrap(); assert_eq!(c.get_bytes("f").unwrap(), b"shared"); } #[test] fn external_url_errors_in_process() { let t = t(); match t.external_url() { Err(Error::InProcessTransport) => {} other => panic!("expected InProcessTransport, got {:?}", other), } } #[test] fn iter_files_recursive_lists_files_and_symlinks() { let t = t(); t.mkdir("d", None).unwrap(); t.put_bytes("d/a", b"1", None).unwrap(); t.put_bytes("d/b", b"2", None).unwrap(); t.symlink("d/a", "d/link").unwrap(); let sub = t.clone(Some("d")).unwrap(); let mut files: Vec = sub.iter_files_recursive().filter_map(|r| r.ok()).collect(); files.sort(); assert_eq!( files, vec!["a".to_string(), "b".to_string(), "link".to_string()] ); } #[test] fn open_write_stream_round_trip() { let t = t(); let mut stream = t.open_write_stream("w", None).unwrap(); stream.write_all(b"hello ").unwrap(); stream.write_all(b"world").unwrap(); drop(stream); assert_eq!(t.get_bytes("w").unwrap(), b"hello world"); } #[test] fn open_write_stream_visible_without_flush() { // The per_transport contract: a concurrent get_bytes after write // (no explicit flush) sees the in-flight bytes. let t = t(); let mut stream = t.open_write_stream("w", None).unwrap(); stream.write_all(b"abc").unwrap(); assert_eq!(t.get_bytes("w").unwrap(), b"abc"); stream.write_all(b"def").unwrap(); assert_eq!(t.get_bytes("w").unwrap(), b"abcdef"); } #[test] fn open_write_stream_truncates_existing() { let t = t(); t.put_bytes("w", b"old contents", None).unwrap(); let mut stream = t.open_write_stream("w", None).unwrap(); stream.write_all(b"new").unwrap(); drop(stream); assert_eq!(t.get_bytes("w").unwrap(), b"new"); } #[test] fn open_write_stream_rejects_missing_parent() { let t = t(); match t.open_write_stream("missing/child", None) { Ok(_) => panic!("expected NoSuchFile, got Ok"), Err(Error::NoSuchFile(_)) => {} Err(other) => panic!("expected NoSuchFile, got {:?}", other), } } } dromedary-0.1.5/src/osutils/000077500000000000000000000000001520150013200157615ustar00rootroot00000000000000dromedary-0.1.5/src/osutils/mod.rs000066400000000000000000000077431520150013200171210ustar00rootroot00000000000000pub mod path; /// Transport-style path helpers ported from dromedary/osutils.py. /// /// These operate on forward-slash URL-style paths (strings), not on /// native OS paths. See `path` for OS-path helpers. /// Split a forward-slash path into its components. /// /// Leading and trailing slashes are stripped. `""` and `"/"` both return /// an empty vector. pub fn splitpath(path: &str) -> Vec { if path.is_empty() || path == "/" { return Vec::new(); } let trimmed = path.trim_start_matches('/').trim_end_matches('/'); if trimmed.is_empty() { return Vec::new(); } trimmed.split('/').map(|s| s.to_string()).collect() } /// Join forward-slash path components. /// /// Empty and `"."` components are dropped. If the first component starts /// with `/`, the result is absolute. pub fn pathjoin(parts: &[&str]) -> String { if parts.is_empty() { return String::new(); } let absolute = parts[0].starts_with('/'); let components: Vec<&str> = parts .iter() .copied() .filter(|p| !p.is_empty() && *p != ".") .collect(); if components.is_empty() { return String::new(); } let joined = components .iter() .map(|c| c.trim_start_matches('/')) .collect::>() .join("/"); if absolute { format!("/{}", joined) } else { joined } } /// File kinds reported by `file_kind_from_stat_mode`. String values match /// the Python implementation for byte-for-byte parity. pub const KIND_FILE: &str = "file"; pub const KIND_DIRECTORY: &str = "directory"; pub const KIND_SYMLINK: &str = "symlink"; pub const KIND_CHARDEV: &str = "chardev"; pub const KIND_BLOCK: &str = "block"; pub const KIND_FIFO: &str = "fifo"; pub const KIND_SOCKET: &str = "socket"; pub const KIND_UNKNOWN: &str = "unknown"; /// Translate a Unix stat mode into a kind string matching Python's /// `stat.S_IS*` classification. pub fn file_kind_from_stat_mode(stat_mode: u32) -> &'static str { const S_IFMT: u32 = 0o170000; const S_IFREG: u32 = 0o100000; const S_IFDIR: u32 = 0o040000; const S_IFLNK: u32 = 0o120000; const S_IFCHR: u32 = 0o020000; const S_IFBLK: u32 = 0o060000; const S_IFIFO: u32 = 0o010000; const S_IFSOCK: u32 = 0o140000; match stat_mode & S_IFMT { S_IFREG => KIND_FILE, S_IFDIR => KIND_DIRECTORY, S_IFLNK => KIND_SYMLINK, S_IFCHR => KIND_CHARDEV, S_IFBLK => KIND_BLOCK, S_IFIFO => KIND_FIFO, S_IFSOCK => KIND_SOCKET, _ => KIND_UNKNOWN, } } #[cfg(test)] mod tests { use super::*; #[test] fn splitpath_basics() { assert_eq!(splitpath(""), Vec::::new()); assert_eq!(splitpath("/"), Vec::::new()); assert_eq!(splitpath("a"), vec!["a".to_string()]); assert_eq!(splitpath("/a"), vec!["a".to_string()]); assert_eq!(splitpath("a/"), vec!["a".to_string()]); assert_eq!( splitpath("/a/b/c"), vec!["a".to_string(), "b".to_string(), "c".to_string()] ); } #[test] fn pathjoin_basics() { assert_eq!(pathjoin(&[]), ""); assert_eq!(pathjoin(&["a", "b"]), "a/b"); assert_eq!(pathjoin(&["a", "", "b"]), "a/b"); assert_eq!(pathjoin(&["a", ".", "b"]), "a/b"); assert_eq!(pathjoin(&["/a", "b"]), "/a/b"); assert_eq!(pathjoin(&["", ""]), ""); } #[test] fn file_kind_from_stat_mode_regular() { assert_eq!(file_kind_from_stat_mode(0o100644), KIND_FILE); assert_eq!(file_kind_from_stat_mode(0o040755), KIND_DIRECTORY); assert_eq!(file_kind_from_stat_mode(0o120777), KIND_SYMLINK); } #[test] fn file_kind_from_stat_mode_special() { assert_eq!(file_kind_from_stat_mode(0o020000), KIND_CHARDEV); assert_eq!(file_kind_from_stat_mode(0o060000), KIND_BLOCK); assert_eq!(file_kind_from_stat_mode(0o010000), KIND_FIFO); assert_eq!(file_kind_from_stat_mode(0o140000), KIND_SOCKET); } } dromedary-0.1.5/src/osutils/path.rs000066400000000000000000000166401520150013200172720ustar00rootroot00000000000000use std::path::{Component, Path, PathBuf}; pub mod win32 { use lazy_static::lazy_static; use regex::Regex; use std::path::{Path, PathBuf}; pub fn fixdrive(path: &Path) -> PathBuf { if path.as_os_str().len() < 2 || path.to_str().unwrap().chars().nth(1).unwrap() != ':' { return path.into(); } if let Some(drive) = path.as_os_str().to_str().unwrap().get(..2) { PathBuf::from(drive.to_uppercase() + path.to_str().unwrap().get(2..).unwrap()) } else { path.into() } } pub fn fix_separators(path: &Path) -> PathBuf { if path.to_path_buf().to_str().unwrap().contains('\\') { path.to_path_buf() .to_str() .unwrap() .replace('\\', "/") .into() } else { path.into() } } lazy_static! { static ref ABS_WINDOWS_PATH_RE: Regex = Regex::new(r#"^[A-Za-z]:[/\\]"#).unwrap(); } pub fn abspath(path: &Path) -> Result { #[cfg(not(windows))] if ABS_WINDOWS_PATH_RE.is_match(path.to_str().unwrap()) { return Ok(path.to_path_buf()); } use path_clean::PathClean; let cwd = std::env::current_dir()?; let ap = cwd.join(path).clean(); Ok(fixdrive(&fix_separators(ap.as_path()))) } /// Resolve a Windows path, following symlinks and junctions. /// /// The `std::fs::canonicalize` Windows implementation already does the /// right thing (it issues `GetFinalPathNameByHandleW`), so we just delegate /// and normalize the forward-slash convention the rest of this module /// relies on. pub fn realpath(path: &Path) -> Result { let canonical = std::fs::canonicalize(path)?; Ok(fixdrive(&fix_separators(canonical.as_path()))) } } pub mod posix { use std::collections::HashMap; use std::path::{Component, Path, PathBuf}; pub fn abspath(path: &Path) -> Result { use path_clean::PathClean; // Treat leading-`/` paths as absolute even on Windows, where // `Path::is_absolute` returns false for them. The posix helpers must // produce posix-style URLs from posix-style inputs regardless of // host OS — that's the whole point of having a `posix` module. let posix_absolute = path.as_os_str().as_encoded_bytes().first().copied() == Some(b'/'); if path.is_absolute() || posix_absolute { return Ok(path.to_path_buf()); } let cwd = std::env::current_dir()?; let ap = cwd.join(path).clean(); Ok(ap.as_path().to_path_buf()) } pub fn realpath>(filename: P) -> std::io::Result { let filename = filename.as_ref().to_path_buf(); let (path, _) = join_realpath(Path::new(""), &filename, &mut HashMap::new())?; abspath(path.as_path()) } fn join_realpath( path: &Path, rest: &Path, seen: &mut HashMap>, ) -> std::io::Result<(PathBuf, bool)> { let rest = rest.to_path_buf(); let mut path = path.to_path_buf(); let mut components = rest.components(); while let Some(component) = components.next() { match component { Component::RootDir => { path = PathBuf::from("/"); } Component::CurDir | Component::Prefix(_) => {} Component::ParentDir => { if path.components().next().is_none() { path = PathBuf::from(".."); } else if path.file_name().unwrap() == ".." { path = path.join(".."); } else { path = path.parent().unwrap().to_path_buf(); } } Component::Normal(name) => { let mut newpath = path.join(name); let st = std::fs::symlink_metadata(&newpath); let is_link = st.is_ok() && st.unwrap().file_type().is_symlink(); if !is_link { path = newpath; } else if let Some(cached) = seen.get(&newpath) { match cached { Some(target) => { path = target.clone(); } None => { return Ok((newpath, false)); } } } else { seen.insert(newpath.clone(), None); let ok; (path, ok) = join_realpath( path.as_path(), std::fs::read_link(&newpath)?.as_path(), seen, )?; if !ok { components.for_each(|c| newpath.push(c)); return Ok((newpath, false)); } seen.insert(newpath, Some(path.clone())); } } } } Ok((path.to_path_buf(), true)) } } pub fn abspath(path: &Path) -> Result { #[cfg(windows)] return win32::abspath(path); #[cfg(not(windows))] return posix::abspath(path); } pub fn normpath>(path: P) -> PathBuf { let mut stack = Vec::new(); let mut had_prefix = false; for component in path.as_ref().components() { match component { Component::Prefix(_) => { stack.clear(); stack.push(component.as_os_str()); had_prefix = true; } Component::RootDir => { // On Windows `Prefix("A:")` is followed by `RootDir`; both // belong to the path root and we must not drop the prefix. if !had_prefix { stack.clear(); } stack.push(component.as_os_str()); } Component::CurDir => {} Component::ParentDir => { if stack.len() > 1 { stack.pop(); } } Component::Normal(c) => { stack.push(c); } } } let mut result = PathBuf::new(); for c in stack { result.push(c); } result } #[cfg(not(windows))] pub const MIN_ABS_PATHLENGTH: usize = 1; #[cfg(windows)] pub const MIN_ABS_PATHLENGTH: usize = 3; pub fn relpath(base: &Path, path: &Path) -> Option { if base.to_str().unwrap().len() < MIN_ABS_PATHLENGTH { return None; } let rp = abspath(path).unwrap(); let mut s = Vec::new(); let mut head = rp.as_path(); let mut tail; loop { if head.as_os_str().len() <= base.as_os_str().len() && head != base { return None; } if head == base { break; } (head, tail) = (head.parent().unwrap(), head.file_name().unwrap()); if !tail.is_empty() { s.push(tail); } } Some(s.into_iter().rev().collect::()) } pub fn realpath(f: &Path) -> std::io::Result { #[cfg(windows)] return win32::realpath(f); #[cfg(not(windows))] return posix::realpath(f); } dromedary-0.1.5/src/pathfilter.rs000066400000000000000000000352221520150013200167730ustar00rootroot00000000000000//! Path-filtering Transport decorator, ported from dromedary/pathfilter.py. //! //! Wraps a backing transport and passes every relpath through a filter //! function before delegating. The filter rebases relpaths against a //! "server root" path derived from the transport's base URL. An optional //! user-supplied callable can further rewrite paths (chroot omits it). use crate::lock::Lock; use crate::urlutils::combine_paths; use crate::{Error, ReadStream, Result, Stat, Transport, UrlFragment, WriteStream}; use std::collections::HashMap; use std::fs::Permissions; use std::sync::Arc; use url::Url; /// Shared filter callback. Wrapped in `Arc` so clone() can hand a copy to /// the cloned transport without moving ownership. Returns `Err` to refuse /// the path (e.g. PermissionDenied); the error propagates out of the /// transport method that called the filter. pub type FilterFunc = Arc Result + Send + Sync>; pub struct PathFilteringTransport { backing: Box, base_path: String, scheme: String, base: Url, filter_func: Option, } impl PathFilteringTransport { /// Construct a PathFilteringTransport. /// /// `scheme` is the URL scheme this transport exposes (e.g. "filtered-42:///" /// or "chroot-42:///"), `base_path` is the path portion of the transport's /// base URL (must start with `/`), and `filter_func` is an optional /// rewriter applied after the server-root rebase. pub fn new( backing: Box, scheme: impl Into, base_path: impl Into, filter_func: Option, ) -> Result { let scheme = scheme.into(); let mut base_path = base_path.into(); if !base_path.starts_with('/') { return Err(Error::PathNotChild); } if !base_path.ends_with('/') { base_path.push('/'); } // scheme is expected to end with ":///"; base_path starts with "/", // so join by stripping the leading slash of base_path. let base_url = format!("{}{}", scheme, &base_path[1..]); let base = Url::parse(&base_url).map_err(Error::from)?; Ok(Self { backing, base_path, scheme, base, filter_func, }) } pub fn relpath_from_server_root(&self, relpath: &str) -> Result { let unfiltered = combine_paths(&self.base_path, relpath); if !unfiltered.starts_with('/') { return Err(Error::PathNotChild); } Ok(unfiltered[1..].to_string()) } pub fn filter(&self, relpath: &str) -> Result { let p = self.relpath_from_server_root(relpath)?; match &self.filter_func { Some(f) => f(&p), None => Ok(p), } } } impl std::fmt::Debug for PathFilteringTransport { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "PathFilteringTransport({})", self.base) } } impl Transport for PathFilteringTransport { fn external_url(&self) -> Result { self.backing.external_url() } fn can_roundtrip_unix_modebits(&self) -> bool { self.backing.can_roundtrip_unix_modebits() } fn base(&self) -> Url { self.base.clone() } fn is_readonly(&self) -> bool { self.backing.is_readonly() } fn listable(&self) -> bool { self.backing.listable() } fn get(&self, relpath: &UrlFragment) -> Result> { self.backing.get(&self.filter(relpath)?) } fn has(&self, relpath: &UrlFragment) -> Result { self.backing.has(&self.filter(relpath)?) } fn stat(&self, relpath: &UrlFragment) -> Result { self.backing.stat(&self.filter(relpath)?) } fn clone(&self, offset: Option<&UrlFragment>) -> Result> { // Mirror the Python constructor: // __class__(self.server, self.abspath(relpath)) // The filter_func and backing stay identical; only base_path // changes so that future relpath-to-backing resolutions rebase // against the new root. Cloning the backing too would // double-apply the filter on the next operation. let offset_s = offset.unwrap_or(""); let new_base_path = self.relpath_from_server_root(offset_s)?; let path_for_new = if new_base_path.starts_with('/') { new_base_path } else { format!("/{}", new_base_path) }; // Clone the backing at its current root ("."), so the new // PathFilteringTransport sees the same underlying store but we // still produce a fresh Send+Sync handle rather than aliasing // self.backing (which we don't own). let backing_clone = self.backing.clone(None)?; let wrapped = PathFilteringTransport::new( backing_clone, self.scheme.clone(), path_for_new, self.filter_func.as_ref().map(Arc::clone), )?; Ok(Box::new(wrapped)) } fn abspath(&self, relpath: &UrlFragment) -> Result { // Deliberately unfiltered: filtering happens when the base is // resolved against the backing transport, not at abspath time. let p = self.relpath_from_server_root(relpath)?; let url = format!("{}{}", self.scheme, p); Url::parse(&url).map_err(Error::from) } fn relpath(&self, abspath: &Url) -> Result { crate::relpath_against_base(&self.base, abspath) } fn put_file( &self, relpath: &UrlFragment, f: &mut dyn std::io::Read, permissions: Option, ) -> Result { self.backing .put_file(&self.filter(relpath)?, f, permissions) } fn put_bytes( &self, relpath: &UrlFragment, data: &[u8], permissions: Option, ) -> Result<()> { self.backing .put_bytes(&self.filter(relpath)?, data, permissions) } fn put_file_non_atomic( &self, relpath: &UrlFragment, f: &mut dyn std::io::Read, permissions: Option, create_parent_dir: Option, dir_permissions: Option, ) -> Result<()> { self.backing.put_file_non_atomic( &self.filter(relpath)?, f, permissions, create_parent_dir, dir_permissions, ) } fn put_bytes_non_atomic( &self, relpath: &UrlFragment, data: &[u8], permissions: Option, create_parent_dir: Option, dir_permissions: Option, ) -> Result<()> { self.backing.put_bytes_non_atomic( &self.filter(relpath)?, data, permissions, create_parent_dir, dir_permissions, ) } fn mkdir(&self, relpath: &UrlFragment, permissions: Option) -> Result<()> { self.backing.mkdir(&self.filter(relpath)?, permissions) } fn delete(&self, relpath: &UrlFragment) -> Result<()> { self.backing.delete(&self.filter(relpath)?) } fn rmdir(&self, relpath: &UrlFragment) -> Result<()> { self.backing.rmdir(&self.filter(relpath)?) } fn rename(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { self.backing .rename(&self.filter(rel_from)?, &self.filter(rel_to)?) } fn set_segment_parameter(&mut self, key: &str, value: Option<&str>) -> Result<()> { // Segment parameters live on the filter's own URL rather than the // backing transport: the backing has its own scheme (file://, etc.) // and mutating its base would drop the filter's scheme on the next // base() read. let (raw, mut params) = crate::urlutils::split_segment_parameters(self.base.as_str())?; if let Some(value) = value { params.insert(key, value); } else { params.remove(key); } self.base = Url::parse(&crate::urlutils::join_segment_parameters(raw, ¶ms)?)?; Ok(()) } fn get_segment_parameters(&self) -> Result> { let (_, params) = crate::urlutils::split_segment_parameters(self.base.as_str())?; Ok(params .iter() .map(|(k, v)| (k.to_string(), v.to_string())) .collect()) } fn append_file( &self, relpath: &UrlFragment, f: &mut dyn std::io::Read, permissions: Option, ) -> Result { self.backing .append_file(&self.filter(relpath)?, f, permissions) } fn readlink(&self, relpath: &UrlFragment) -> Result { self.backing.readlink(&self.filter(relpath)?) } fn hardlink(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { self.backing .hardlink(&self.filter(rel_from)?, &self.filter(rel_to)?) } fn symlink(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { self.backing .symlink(&self.filter(rel_from)?, &self.filter(rel_to)?) } fn iter_files_recursive(&self) -> Box>> { // Clone the backing transport to the filtered "." path and let it // walk from there. let filtered = match self.filter(".") { Ok(p) => p, Err(e) => return Box::new(std::iter::once(Err(e))), }; match self.backing.clone(Some(&filtered)) { Ok(cloned) => cloned.iter_files_recursive(), Err(e) => Box::new(std::iter::once(Err(e))), } } fn open_write_stream( &self, relpath: &UrlFragment, permissions: Option, ) -> Result> { self.backing .open_write_stream(&self.filter(relpath)?, permissions) } fn delete_tree(&self, relpath: &UrlFragment) -> Result<()> { self.backing.delete_tree(&self.filter(relpath)?) } fn r#move(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { self.backing .r#move(&self.filter(rel_from)?, &self.filter(rel_to)?) } fn list_dir(&self, relpath: &UrlFragment) -> Box>> { match self.filter(relpath) { Ok(p) => self.backing.list_dir(&p), Err(e) => Box::new(std::iter::once(Err(e))), } } fn lock_read(&self, relpath: &UrlFragment) -> Result> { self.backing.lock_read(&self.filter(relpath)?) } fn lock_write(&self, relpath: &UrlFragment) -> Result> { self.backing.lock_write(&self.filter(relpath)?) } fn local_abspath(&self, relpath: &UrlFragment) -> Result { // Matches Python's base Transport.local_abspath: filtered transports // don't expose a local path because the filter can hide or rewrite // the on-disk location. Callers that want the backing's view should // use the backing transport directly. let _ = relpath; Err(Error::NotLocalUrl(self.base().to_string())) } fn copy(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { self.backing .copy(&self.filter(rel_from)?, &self.filter(rel_to)?) } } #[cfg(test)] mod tests { use super::*; use crate::memory::MemoryTransport; fn backing_with(files: &[(&str, &[u8])]) -> Box { let mem = MemoryTransport::new("memory:///").unwrap(); for (p, data) in files { // Ensure parents exist. if let Some(parent) = std::path::Path::new(p).parent() { let parent = parent.to_string_lossy().to_string(); if !parent.is_empty() { let _ = mem.mkdir(&parent, None); } } mem.put_bytes(p, data, None).unwrap(); } Box::new(mem) } fn make(base_path: &str, filter: Option) -> PathFilteringTransport { let backing = backing_with(&[("a", b"A"), ("sub/b", b"B")]); PathFilteringTransport::new(backing, "filtered-1:///", base_path, filter).unwrap() } #[test] fn pass_through_filter_none() { let t = make("/", None); assert_eq!(t.get_bytes("a").unwrap(), b"A"); assert_eq!(t.has("sub/b").unwrap(), true); } #[test] fn rebases_under_subdirectory() { let t = make("/sub/", None); assert_eq!(t.get_bytes("b").unwrap(), b"B"); match t.get_bytes("a") { Err(Error::NoSuchFile(_)) => {} other => panic!("expected NoSuchFile, got {:?}", other), } } #[test] fn filter_func_rewrites_path() { // Filter prepends "sub/" to every relpath; starting from root that // means every get effectively reads from /sub/... let filter: FilterFunc = Arc::new(|p: &str| Ok(format!("sub/{}", p))); let t = make("/", Some(filter)); assert_eq!(t.get_bytes("b").unwrap(), b"B"); } #[test] fn mkdir_and_delete_round_trip() { let t = make("/", None); t.mkdir("new", None).unwrap(); t.put_bytes("new/f", b"x", None).unwrap(); assert_eq!(t.get_bytes("new/f").unwrap(), b"x"); t.delete("new/f").unwrap(); assert_eq!(t.has("new/f").unwrap(), false); t.rmdir("new").unwrap(); } #[test] fn list_dir_passes_through() { let t = make("/", None); let mut entries: Vec = t.list_dir("sub").filter_map(|r| r.ok()).collect(); entries.sort(); assert_eq!(entries, vec!["b".to_string()]); } #[test] fn iter_files_recursive_uses_filtered_root() { let t = make("/sub/", None); let mut files: Vec = t.iter_files_recursive().filter_map(|r| r.ok()).collect(); files.sort(); assert_eq!(files, vec!["b".to_string()]); } #[test] fn abspath_is_not_filtered() { let filter: FilterFunc = Arc::new(|p: &str| Ok(format!("sub/{}", p))); let t = make("/", Some(filter)); let u = t.abspath("x").unwrap(); assert!(u.as_str().ends_with("/x"), "got {}", u); } #[test] fn is_readonly_forwards() { let t = make("/", None); assert_eq!(t.is_readonly(), false); } #[test] fn base_path_must_start_with_slash() { let backing = backing_with(&[]); match PathFilteringTransport::new(backing, "filtered-1:///", "sub", None) { Err(Error::PathNotChild) => {} other => panic!("expected PathNotChild, got {:?}", other), } } } dromedary-0.1.5/src/pyo3.rs000066400000000000000000000615551520150013200155330ustar00rootroot00000000000000use crate::{ Error, Lock, LockError, ReadStream, Result, Stat, Transport, Url, UrlFragment, WriteStream, }; use pyo3::import_exception; use pyo3::prelude::*; use pyo3::types::PyBytes; use pyo3_filelike::PyBinaryFile; use std::collections::HashMap; use std::fs::Permissions; use std::io::{Read, Write}; #[cfg(unix)] use std::os::unix::fs::PermissionsExt; use std::path::PathBuf; /// Convert `Option` to the `Optional[int]` mode expected by the /// Python transport API. On Windows Python itself ignores the mode argument /// (chmod there only touches the read-only bit), so we pass `None`. #[inline] fn perms_to_py_mode(perms: Option<&Permissions>) -> Option { #[cfg(unix)] { perms.map(|p| p.mode()) } #[cfg(not(unix))] { let _ = perms; None } } import_exception!(dromedary.errors, TransportError); import_exception!(dromedary.errors, InProcessTransport); import_exception!(dromedary.errors, NotLocalUrl); import_exception!(dromedary.errors, NoSuchFile); import_exception!(dromedary.errors, FileExists); import_exception!(dromedary.errors, TransportNotPossible); import_exception!(dromedary.errors, UrlError); import_exception!(dromedary.errors, PermissionDenied); import_exception!(dromedary.errors, PathNotChild); import_exception!(dromedary.errors, ShortReadvError); import_exception!(dromedary.errors, LockContention); import_exception!(dromedary.errors, LockFailed); import_exception!(dromedary.errors, DirectoryNotEmpty); import_exception!(dromedary.errors, NotADirectory); import_exception!(dromedary.errors, ResourceBusy); import_exception!(dromedary.errors, ReadError); import_exception!(dromedary.urlutils, InvalidURL); pub struct PyTransport(Py); impl From> for PyTransport { fn from(obj: Py) -> Self { PyTransport(obj) } } fn map_py_err_to_lock_err(e: PyErr) -> LockError { Python::attach(|py| { if e.is_instance_of::(py) { LockError::Contention(e.value(py).getattr("lock").unwrap().extract().unwrap()) } else if e.is_instance_of::(py) { let v = e.value(py); LockError::Failed( v.getattr("lock").unwrap().extract().unwrap(), v.getattr("why").unwrap().extract().unwrap(), ) } else { LockError::IoError(e.into()) } }) } struct PyLock(Py); impl Lock for PyLock { fn unlock(&mut self) -> std::result::Result<(), LockError> { Python::attach(|py| { self.0 .call_method0(py, "unlock") .map_err(map_py_err_to_lock_err)?; Ok(()) }) } } impl<'py> IntoPyObject<'py> for PyTransport { type Target = PyAny; type Output = Bound<'py, Self::Target>; type Error = PyErr; fn into_pyobject(self, py: Python<'py>) -> std::result::Result { Ok(self.0.bind(py).clone()) } } impl From for Error { fn from(e: PyErr) -> Self { Python::attach(|py| { let arg = |_i| -> Option { let args = e.value(py).getattr("args").ok()?; let item = args.get_item(0).ok()?; if item.is_none() { None } else { item.extract::().ok() } }; if e.is_instance_of::(py) { Error::InProcessTransport } else if e.is_instance_of::(py) { let url = e .value(py) .getattr("url") .ok() .and_then(|u| u.extract::().ok()) .unwrap_or_default(); Error::NotLocalUrl(url) } else if e.is_instance_of::(py) { Error::NoSuchFile(arg(0)) } else if e.is_instance_of::(py) { Error::FileExists(arg(0)) } else if e.is_instance_of::(py) { let msg = e .value(py) .getattr("msg") .ok() .and_then(|m| m.extract::().ok()) .filter(|s| !s.is_empty()); Error::TransportNotPossible(msg) } else if e.is_instance_of::(py) { Error::PermissionDenied(arg(0)) } else if e.is_instance_of::(py) { Error::PathNotChild } else if e.is_instance_of::(py) { Error::DirectoryNotEmptyError(arg(0)) } else if e.is_instance_of::(py) { Error::NotADirectoryError(arg(0)) } else if e.is_instance_of::(py) { Error::ResourceBusy(arg(0)) } else if e.is_instance_of::(py) { Error::IsADirectoryError(arg(0)) } else if e.is_instance_of::(py) { Error::UrlutilsError(crate::urlutils::Error::UrlNotAscii( arg(0).unwrap_or_default(), )) } else if e.is_instance_of::(py) { let value = e.value(py); Error::ShortReadvError( value.getattr("path").unwrap().extract::().unwrap(), value.getattr("offset").unwrap().extract::().unwrap(), value.getattr("length").unwrap().extract::().unwrap(), value.getattr("actual").unwrap().extract::().unwrap(), ) } else { // Don't panic on unrecognised exception types — funnel them // through Error::Io so the caller sees something useful and // the worker stays alive. New variants should still be added // explicitly above when they have a real semantic mapping. Error::Io(std::io::Error::other(e.to_string())) } }) } } impl ReadStream for PyBinaryFile {} // Bit of a hack - this reads the entire buffer, and then streams it fn py_read(r: &mut dyn Read) -> PyResult> { Python::attach(|py| { let mut buffer = Vec::new(); r.read_to_end(&mut buffer)?; let io = py.import("io")?; let bytesio = io.getattr("BytesIO")?; Ok(bytesio.call1((buffer,))?.unbind()) }) } struct PyWriteStream(Py); impl Write for PyWriteStream { fn write(&mut self, buf: &[u8]) -> std::io::Result { Python::attach(|py| { let obj = self.0.call_method1(py, "write", (buf,))?; Ok(obj.extract::(py)?) }) } fn flush(&mut self) -> std::io::Result<()> { Python::attach(|py| { self.0.call_method0(py, "flush")?; Ok(()) }) } } impl WriteStream for PyWriteStream { fn sync_data(&self) -> std::io::Result<()> { Python::attach(|py| { self.0.call_method0(py, "fdatasync")?; Ok(()) }) } } impl std::fmt::Debug for PyTransport { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { write!(f, "PyTransport({:?})", self.0) } } impl Transport for PyTransport { fn external_url(&self) -> Result { Python::attach(|py| { let obj = self.0.call_method0(py, "external_url")?; let s = obj.extract::(py)?; Url::parse(&s).map_err(Error::from) }) } fn get_bytes(&self, path: &str) -> Result> { Python::attach(|py| { let obj = self.0.call_method1(py, "get_bytes", (path,))?; let bytes = obj.cast_bound::(py).map_err(PyErr::from)?; Ok(bytes.as_bytes().to_vec()) }) } fn get(&self, path: &str) -> Result> { Python::attach(|py| { let obj = self.0.call_method1(py, "get", (path,))?; Ok(Box::new(PyBinaryFile::from(obj)) as Box) }) } fn base(&self) -> Url { Python::attach(|py| { // `.base` is a required attribute on every Transport. If the // wrapped Python object can't produce one we have no choice but // to fall back to a placeholder URL — none of the trait callers // expect this to fail. let url_str = self .0 .getattr(py, "base") .ok() .and_then(|obj| obj.extract::(py).ok()) .unwrap_or_default(); Url::parse(&url_str).unwrap_or_else(|_| Url::parse("file:///").unwrap()) }) } fn has(&self, path: &UrlFragment) -> Result { Python::attach(|py| { let obj = self.0.call_method1(py, "has", (path,))?; Ok(obj.extract::(py)?) }) } fn has_any(&self, paths: &[&UrlFragment]) -> Result { Python::attach(|py| { let obj = self.0.call_method1(py, "has_any", (paths.to_vec(),))?; Ok(obj.extract::(py)?) }) } fn mkdir(&self, relpath: &UrlFragment, perms: Option) -> Result<()> { Python::attach(|py| { self.0 .call_method1(py, "mkdir", (relpath, perms_to_py_mode(perms.as_ref())))?; Ok(()) }) } fn ensure_base(&self, perms: Option) -> Result { Python::attach(|py| { let obj = self.0 .call_method1(py, "ensure_base", (perms_to_py_mode(perms.as_ref()),))?; Ok(obj.extract::(py)?) }) } fn stat(&self, path: &UrlFragment) -> Result { Python::attach(|py| { let stat_result = self.0.call_method1(py, "stat", (path,))?; let mtime = if let Ok(mtime) = stat_result.getattr(py, "mtime") { Some(mtime.extract::(py)?) } else { None }; let st_mode = stat_result.getattr(py, "st_mode")?.extract::(py)?; // Derive kind from the POSIX mode bits Python reported. // On Windows Python still reports something meaningful for dir vs file. let kind = { const S_IFMT: u32 = 0o170000; const S_IFDIR: u32 = 0o040000; const S_IFREG: u32 = 0o100000; const S_IFLNK: u32 = 0o120000; match st_mode & S_IFMT { S_IFDIR => crate::FileKind::Dir, S_IFREG => crate::FileKind::File, S_IFLNK => crate::FileKind::Symlink, _ => crate::FileKind::Other, } }; Ok(Stat { #[cfg(unix)] mode: st_mode, kind, size: stat_result.getattr(py, "st_size")?.extract::(py)?, mtime, }) }) } fn clone(&self, path: Option<&UrlFragment>) -> Result> { Python::attach(|py| { let obj = self.0.call_method1(py, "clone", (path,))?; let transport: Box = Box::new(PyTransport(obj)); Ok(transport) }) } fn relpath(&self, path: &Url) -> Result { Python::attach(|py| { let obj = self.0.call_method1(py, "relpath", (path.to_string(),))?; Ok(obj.extract::(py)?) }) } fn abspath(&self, relpath: &UrlFragment) -> Result { let s = Python::attach(|py| { let obj = self.0.call_method1(py, "abspath", (relpath,))?; obj.extract::(py) })?; Url::parse(&s).map_err(Error::from) } fn put_file( &self, relpath: &UrlFragment, f: &mut dyn Read, mode: Option, ) -> Result { let f = py_read(f)?; Python::attach(|py| { let ret = self.0.call_method1( py, "put_file", (relpath, f, perms_to_py_mode(mode.as_ref())), )?; Ok(ret.extract::(py)?) }) } fn put_bytes( &self, relpath: &UrlFragment, bytes: &[u8], mode: Option, ) -> Result<()> { Python::attach(|py| { self.0.call_method1( py, "put_bytes", (relpath, bytes, perms_to_py_mode(mode.as_ref())), )?; Ok(()) }) } fn put_file_non_atomic( &self, relpath: &UrlFragment, f: &mut dyn Read, mode: Option, create_parent: Option, parent_mode: Option, ) -> Result<()> { let f = py_read(f)?; Python::attach(|py| { self.0.call_method1( py, "put_file_non_atomic", ( relpath, f, perms_to_py_mode(mode.as_ref()), create_parent, perms_to_py_mode(parent_mode.as_ref()), ), )?; Ok(()) }) } fn put_bytes_non_atomic( &self, relpath: &UrlFragment, bytes: &[u8], mode: Option, create_parent: Option, parent_mode: Option, ) -> Result<()> { Python::attach(|py| { self.0.call_method1( py, "put_bytes_non_atomic", ( relpath, bytes, perms_to_py_mode(mode.as_ref()), create_parent, perms_to_py_mode(parent_mode.as_ref()), ), )?; Ok(()) }) } fn delete(&self, relpath: &UrlFragment) -> Result<()> { Python::attach(|py| { self.0.call_method1(py, "delete", (relpath,))?; Ok(()) }) } fn rmdir(&self, relpath: &UrlFragment) -> Result<()> { Python::attach(|py| { self.0.call_method1(py, "rmdir", (relpath,))?; Ok(()) }) } fn rename(&self, relpath: &UrlFragment, new_relpath: &UrlFragment) -> Result<()> { Python::attach(|py| { self.0.call_method1(py, "rename", (relpath, new_relpath))?; Ok(()) }) } fn set_segment_parameter(&mut self, key: &str, value: Option<&str>) -> Result<()> { Python::attach(|py| { self.0 .call_method1(py, "set_segment_parameter", (key, value))?; Ok(()) }) } fn get_segment_parameters(&self) -> Result> { Python::attach(|py| { Ok(self .0 .call_method0(py, "get_segment_parameters")? .extract::>(py)?) }) } fn create_prefix(&self, permissions: Option) -> Result<()> { Python::attach(|py| { self.0.call_method1( py, "create_prefix", (perms_to_py_mode(permissions.as_ref()),), )?; Ok(()) }) } fn recommended_page_size(&self) -> usize { Python::attach(|py| { self.0 .call_method0(py, "recommended_page_size") .ok() .and_then(|obj| obj.extract::(py).ok()) .unwrap_or(4 * 1024) }) } fn is_readonly(&self) -> bool { Python::attach(|py| { self.0 .call_method0(py, "is_readonly") .ok() .and_then(|obj| obj.extract::(py).ok()) .unwrap_or(false) }) } fn readv( &self, relpath: &UrlFragment, offsets: Vec<(u64, usize)>, adjust_for_latency: bool, upper_limit: Option, ) -> Box)>> + Send> { let iter = Python::attach(|py| -> Result> { let raw = self.0.call_method1( py, "readv", (relpath, offsets, adjust_for_latency, upper_limit), )?; let it = raw.bind(py).try_iter().map_err(Error::from)?; Ok(it.unbind().into_any()) }); let iter = match iter { Ok(i) => i, Err(e) => return Box::new(std::iter::once(Err(e))), }; Box::new(std::iter::from_fn(move || { Python::attach(|py| -> Option)>> { match iter.call_method0(py, "__next__") { Ok(obj) => { if obj.is_none(py) { None } else { match obj.extract::<(u64, Vec)>(py) { Ok(pair) => Some(Ok(pair)), Err(e) => Some(Err(Error::from(e))), } } } Err(e) if e.is_instance_of::(py) => None, Err(e) => Some(Err(Error::from(e))), } }) })) } fn append_bytes( &self, relpath: &UrlFragment, bytes: &[u8], permissions: Option, ) -> Result { Python::attach(|py| { let pos = self.0.call_method1( py, "append_bytes", (relpath, bytes, perms_to_py_mode(permissions.as_ref())), )?; Ok(pos.extract::(py)?) }) } fn append_file( &self, relpath: &UrlFragment, f: &mut dyn Read, permissions: Option, ) -> Result { let f = py_read(f)?; Python::attach(|py| { let pos = self.0.call_method1( py, "append_file", (relpath, f, perms_to_py_mode(permissions.as_ref())), )?; Ok(pos.extract::(py)?) }) } fn readlink(&self, relpath: &UrlFragment) -> Result { Python::attach(|py| { Ok(self .0 .call_method1(py, "readlink", (relpath,))? .extract::(py)?) }) } fn hardlink(&self, relpath: &UrlFragment, new_relpath: &UrlFragment) -> Result<()> { Python::attach(|py| { self.0 .call_method1(py, "hardlink", (relpath, new_relpath))?; Ok(()) }) } fn symlink(&self, relpath: &UrlFragment, new_relpath: &UrlFragment) -> Result<()> { Python::attach(|py| { self.0.call_method1(py, "symlink", (relpath, new_relpath))?; Ok(()) }) } fn iter_files_recursive(&self) -> Box>> { let iter = Python::attach(|py| -> Result> { let raw = self.0.call_method0(py, "iter_files_recursive")?; let it = raw.bind(py).try_iter().map_err(Error::from)?; Ok(it.unbind().into_any()) }); let iter = match iter { Ok(i) => i, Err(e) => return Box::new(std::iter::once(Err(e))), }; Box::new(std::iter::from_fn(move || { Python::attach(|py| -> Option> { match iter.call_method0(py, "__next__") { Ok(obj) => { if obj.is_none(py) { None } else { Some(obj.extract::(py).map_err(Error::from)) } } Err(e) if e.is_instance_of::(py) => None, Err(e) => Some(Err(Error::from(e))), } }) })) } fn open_write_stream( &self, relpath: &UrlFragment, permissions: Option, ) -> Result> { Python::attach(|py| { let obj = self.0.call_method1( py, "open_write_stream", (relpath, perms_to_py_mode(permissions.as_ref())), )?; let file = PyWriteStream(obj); Ok(Box::new(file) as Box) }) } fn delete_tree(&self, relpath: &UrlFragment) -> Result<()> { Python::attach(|py| { self.0.call_method1(py, "delete_tree", (relpath,))?; Ok(()) }) } fn r#move(&self, src: &UrlFragment, dst: &UrlFragment) -> Result<()> { Python::attach(|py| { self.0.call_method1(py, "move", (src, dst))?; Ok(()) }) } fn copy_tree(&self, src: &UrlFragment, dst: &UrlFragment) -> Result<()> { Python::attach(|py| { self.0.call_method1(py, "copy_tree", (src, dst))?; Ok(()) }) } fn copy_tree_to_transport(&self, _to_transport: &dyn Transport) -> Result<()> { // TODO(jelmer): bridge copy_tree_to_transport across the Py↔Rust // boundary. The Python side expects a Transport object, but we'd // need to obtain its underlying Py wrapper. For now signal the // caller that this transport can't perform the operation. Err(Error::TransportNotPossible(None)) } fn copy_to( &self, _relpaths: &[&UrlFragment], _to_transport: &dyn Transport, _permissions: Option, ) -> Result { // TODO(jelmer): same blocker as copy_tree_to_transport — the // destination transport isn't readily expressible as a Py object // from inside the Rust adapter. Err(Error::TransportNotPossible(None)) } fn can_roundtrip_unix_modebits(&self) -> bool { // Python convention names this `_can_roundtrip_unix_modebits` (with // a leading underscore) on every transport class; the unprefixed // form does not exist on the Python side. Default to false if the // wrapped object doesn't expose either spelling. Python::attach(|py| { self.0 .call_method0(py, "_can_roundtrip_unix_modebits") .ok() .and_then(|obj| obj.extract::(py).ok()) .unwrap_or(false) }) } fn local_abspath(&self, relpath: &UrlFragment) -> Result { Python::attach(|py| { let obj = self.0.call_method1(py, "local_abspath", (relpath,))?; Ok(obj.extract::(py)?) }) } fn list_dir(&self, relpath: &UrlFragment) -> Box>> { // Python list_dir may return a list, tuple, or iterator. Coerce via // try_iter so __next__ always works. let iter = Python::attach(|py| -> Result> { let raw = self.0.call_method1(py, "list_dir", (relpath,))?; let it = raw.bind(py).try_iter().map_err(Error::from)?; Ok(it.unbind().into_any()) }); let iter = match iter { Ok(i) => i, Err(e) => return Box::new(std::iter::once(Err(e))), }; Box::new(std::iter::from_fn(move || { Python::attach(|py| -> Option> { match iter.call_method0(py, "__next__") { Ok(obj) => { if obj.is_none(py) { None } else { Some(obj.extract::(py).map_err(|e| e.into())) } } Err(e) if e.is_instance_of::(py) => None, Err(e) => Some(Err(e.into())), } }) })) } fn listable(&self) -> bool { Python::attach(|py| { self.0 .call_method0(py, "listable") .ok() .and_then(|obj| obj.extract::(py).ok()) .unwrap_or(false) }) } fn lock_write(&self, relpath: &UrlFragment) -> Result> { Python::attach(|py| { let obj = self.0.call_method1(py, "lock_write", (relpath,))?; let file: Box = Box::new(PyLock(obj)); Ok(file) }) } fn lock_read(&self, relpath: &UrlFragment) -> Result> { Python::attach(|py| { let obj = self.0.call_method1(py, "lock_read", (relpath,))?; let file: Box = Box::new(PyLock(obj)); Ok(file) }) } fn copy(&self, src: &UrlFragment, dst: &UrlFragment) -> Result<()> { Python::attach(|py| { self.0.call_method1(py, "copy", (src, dst))?; Ok(()) }) } } dromedary-0.1.5/src/readonly.rs000066400000000000000000000274001520150013200164450ustar00rootroot00000000000000//! Readonly Transport decorator, ported from dromedary/readonly.py. //! //! Wraps any Transport and rejects every mutation with TransportNotPossible, //! forwarding read-only operations unchanged. use crate::lock::Lock; use crate::{Error, ReadStream, Result, Stat, Transport, UrlFragment, WriteStream}; use std::collections::HashMap; use std::fs::Permissions; use url::Url; pub struct ReadonlyTransport { decorated: Box, } impl ReadonlyTransport { const PREFIX: &'static str = "readonly+"; pub fn new(decorated: Box) -> Self { Self { decorated } } } impl std::fmt::Debug for ReadonlyTransport { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "ReadonlyTransport({})", self.base()) } } fn not_possible() -> Error { Error::TransportNotPossible(Some("readonly transport".to_string())) } impl Transport for ReadonlyTransport { fn external_url(&self) -> Result { self.decorated.external_url() } fn can_roundtrip_unix_modebits(&self) -> bool { self.decorated.can_roundtrip_unix_modebits() } fn base(&self) -> Url { // Recompute each call from the inner's current base so that updates // like set_segment_parameter (which mutate the inner) are reflected // in the decorator's base. crate::decorator::prefixed_base(Self::PREFIX, self.decorated.as_ref()) } fn is_readonly(&self) -> bool { true } crate::fwd_listable!(decorated); fn get(&self, relpath: &UrlFragment) -> Result> { self.decorated.get(relpath) } fn has(&self, relpath: &UrlFragment) -> Result { self.decorated.has(relpath) } fn stat(&self, relpath: &UrlFragment) -> Result { self.decorated.stat(relpath) } fn clone(&self, offset: Option<&UrlFragment>) -> Result> { let inner_clone = self.decorated.clone(offset)?; Ok(Box::new(ReadonlyTransport::new(inner_clone))) } fn abspath(&self, relpath: &UrlFragment) -> Result { crate::decorator::prefixed_abspath(Self::PREFIX, self.decorated.as_ref(), relpath) } fn relpath(&self, abspath: &Url) -> Result { crate::decorator::stripped_relpath(Self::PREFIX, self.decorated.as_ref(), abspath) } fn put_file( &self, _relpath: &UrlFragment, _f: &mut dyn std::io::Read, _permissions: Option, ) -> Result { Err(not_possible()) } fn mkdir(&self, _relpath: &UrlFragment, _permissions: Option) -> Result<()> { Err(not_possible()) } fn delete(&self, _relpath: &UrlFragment) -> Result<()> { Err(not_possible()) } fn rmdir(&self, _relpath: &UrlFragment) -> Result<()> { Err(not_possible()) } fn rename(&self, _rel_from: &UrlFragment, _rel_to: &UrlFragment) -> Result<()> { Err(not_possible()) } fn set_segment_parameter(&mut self, key: &str, value: Option<&str>) -> Result<()> { // Forwarding a &mut call to the inner transport is awkward with a // Box; defer to the inner via the mutable reference // we hold. self.decorated.set_segment_parameter(key, value) } fn get_segment_parameters(&self) -> Result> { self.decorated.get_segment_parameters() } fn append_file( &self, _relpath: &UrlFragment, _f: &mut dyn std::io::Read, _permissions: Option, ) -> Result { Err(not_possible()) } fn readlink(&self, relpath: &UrlFragment) -> Result { self.decorated.readlink(relpath) } fn hardlink(&self, _rel_from: &UrlFragment, _rel_to: &UrlFragment) -> Result<()> { Err(not_possible()) } fn symlink(&self, _rel_from: &UrlFragment, _rel_to: &UrlFragment) -> Result<()> { Err(not_possible()) } fn iter_files_recursive(&self) -> Box>> { self.decorated.iter_files_recursive() } fn open_write_stream( &self, _relpath: &UrlFragment, _permissions: Option, ) -> Result> { Err(not_possible()) } fn delete_tree(&self, _relpath: &UrlFragment) -> Result<()> { Err(not_possible()) } fn r#move(&self, _rel_from: &UrlFragment, _rel_to: &UrlFragment) -> Result<()> { Err(not_possible()) } fn list_dir(&self, relpath: &UrlFragment) -> Box>> { self.decorated.list_dir(relpath) } fn lock_read(&self, relpath: &UrlFragment) -> Result> { self.decorated.lock_read(relpath) } fn lock_write(&self, _relpath: &UrlFragment) -> Result> { Err(not_possible()) } fn local_abspath(&self, relpath: &UrlFragment) -> Result { self.decorated.local_abspath(relpath) } fn copy(&self, _rel_from: &UrlFragment, _rel_to: &UrlFragment) -> Result<()> { Err(not_possible()) } } #[cfg(test)] mod tests { use super::*; use crate::memory::MemoryTransport; fn ro() -> ReadonlyTransport { let mem = MemoryTransport::new("memory:///").unwrap(); mem.put_bytes("hello", b"world", None).unwrap(); ReadonlyTransport::new(Box::new(mem)) } #[test] fn reads_pass_through() { let t = ro(); assert_eq!(t.get_bytes("hello").unwrap(), b"world"); assert_eq!(t.has("hello").unwrap(), true); assert_eq!(t.has("missing").unwrap(), false); } #[test] fn is_readonly_returns_true() { assert!(ro().is_readonly()); } #[test] fn put_bytes_rejected() { match ro().put_bytes("x", b"y", None) { Err(Error::TransportNotPossible(_)) => {} other => panic!("expected TransportNotPossible, got {:?}", other), } } #[test] fn mkdir_rejected() { match ro().mkdir("d", None) { Err(Error::TransportNotPossible(_)) => {} other => panic!("expected TransportNotPossible, got {:?}", other), } } #[test] fn delete_rejected() { match ro().delete("hello") { Err(Error::TransportNotPossible(_)) => {} other => panic!("expected TransportNotPossible, got {:?}", other), } } #[test] fn rename_rejected() { match ro().rename("hello", "world") { Err(Error::TransportNotPossible(_)) => {} other => panic!("expected TransportNotPossible, got {:?}", other), } } #[test] fn lock_read_passes_but_lock_write_rejected() { let t = ro(); let _l = t.lock_read("hello").ok().expect("read lock"); match t.lock_write("hello") { Err(Error::TransportNotPossible(_)) => {} Err(other) => panic!("expected TransportNotPossible, got {:?}", other), Ok(_) => panic!("expected TransportNotPossible, got Ok"), } } #[test] fn base_has_readonly_prefix() { let t = ro(); assert!(t.base().as_str().starts_with("readonly+")); } #[test] fn abspath_carries_prefix() { // The Python decorator contract is: decorator.base + relpath == // decorator.abspath(relpath). Regression guard for a bug where // abspath used to forward straight to the inner transport and // dropped the `readonly+` prefix. let t = ro(); let abs = t.abspath("relpath").unwrap(); assert_eq!(abs.as_str(), "readonly+memory:///relpath"); } #[test] fn relpath_round_trips_through_abspath() { let t = ro(); let abs = t.abspath("sub/file").unwrap(); assert_eq!(t.relpath(&abs).unwrap(), "sub/file"); } #[test] fn clone_keeps_readonly_wrapping() { // Cloning must return another readonly transport rather than // silently dropping down to the inner, otherwise callers can bypass // readonly enforcement just by cloning. let t = ro(); let cloned = t.clone(Some("subdir")).unwrap(); assert!(cloned.base().as_str().starts_with("readonly+")); assert!(cloned.is_readonly()); } fn expect_not_possible(r: Result, label: &str) { match r { Err(Error::TransportNotPossible(_)) => {} Err(other) => panic!("{}: expected TransportNotPossible, got {:?}", label, other), Ok(ok) => panic!("{}: expected TransportNotPossible, got Ok({:?})", label, ok), } } #[test] fn rmdir_rejected() { // Seed the inner store with a directory we could otherwise remove. let mem = MemoryTransport::new("memory:///").unwrap(); mem.mkdir("d", None).unwrap(); let t = ReadonlyTransport::new(Box::new(mem)); expect_not_possible(t.rmdir("d"), "rmdir"); } #[test] fn delete_tree_rejected() { expect_not_possible(ro().delete_tree("hello"), "delete_tree"); } #[test] fn symlink_rejected() { expect_not_possible(ro().symlink("hello", "link"), "symlink"); } #[test] fn hardlink_rejected() { expect_not_possible(ro().hardlink("hello", "link"), "hardlink"); } #[test] fn append_file_rejected() { let mut cur = std::io::Cursor::new(b"more".to_vec()); expect_not_possible(ro().append_file("hello", &mut cur, None), "append_file"); } #[test] fn append_bytes_rejected() { expect_not_possible(ro().append_bytes("hello", b"more", None), "append_bytes"); } #[test] fn open_write_stream_rejected() { match ro().open_write_stream("hello", None) { Err(Error::TransportNotPossible(_)) => {} Err(other) => panic!("expected TransportNotPossible, got {:?}", other), Ok(_) => panic!("expected TransportNotPossible, got Ok"), } } #[test] fn move_rejected() { expect_not_possible(ro().r#move("hello", "world"), "move"); } #[test] fn copy_rejected() { expect_not_possible(ro().copy("hello", "world"), "copy"); } #[test] fn stat_passes_through() { let st = ro().stat("hello").unwrap(); assert_eq!(st.size, 5); } #[test] fn list_dir_passes_through() { let mem = MemoryTransport::new("memory:///").unwrap(); mem.mkdir("d", None).unwrap(); mem.put_bytes("d/a", b"1", None).unwrap(); mem.put_bytes("d/b", b"2", None).unwrap(); let t = ReadonlyTransport::new(Box::new(mem)); let mut entries: Vec = t.list_dir("d").filter_map(|r| r.ok()).collect(); entries.sort(); assert_eq!(entries, vec!["a".to_string(), "b".to_string()]); } #[test] fn iter_files_recursive_passes_through() { let mem = MemoryTransport::new("memory:///").unwrap(); mem.put_bytes("a", b"1", None).unwrap(); mem.put_bytes("b", b"2", None).unwrap(); let t = ReadonlyTransport::new(Box::new(mem)); let mut files: Vec = t.iter_files_recursive().filter_map(|r| r.ok()).collect(); files.sort(); assert_eq!(files, vec!["a".to_string(), "b".to_string()]); } #[test] fn external_url_forwards_error() { // MemoryTransport returns InProcessTransport; readonly should forward it. match ro().external_url() { Err(Error::InProcessTransport) => {} other => panic!("expected InProcessTransport, got {:?}", other), } } #[test] fn get_missing_forwards_no_such_file() { match ro().get_bytes("nope") { Err(Error::NoSuchFile(_)) => {} other => panic!("expected NoSuchFile, got {:?}", other), } } } dromedary-0.1.5/src/readv.rs000066400000000000000000000216361520150013200157360ustar00rootroot00000000000000use std::collections::HashMap; use std::collections::VecDeque; use std::io::{Read, Seek, SeekFrom}; pub struct OverlappingRange { last_end: usize, start: usize, } impl std::fmt::Display for OverlappingRange { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!( f, "Overlapping range not allowed: last range ended at {}, new one starts at {}", self.last_end, self.start ) } } /// Yield coalesced offsets. /// /// With a long list of neighboring requests, combine them /// into a single large request, while retaining the original /// offsets. /// Turns [(15, 10), (25, 10)] => [(15, 20, [(0, 10), (10, 10)])] /// Note that overlapping requests are not permitted. (So [(15, 10), (20, /// 10)] will raise a ValueError.) This is because the data we access never /// overlaps, and it allows callers to trust that we only need any byte of /// data for 1 request (so nothing needs to be buffered to fulfill a second /// request.) /// /// :param offsets: A list of (start, length) pairs /// :param limit: Only combine a maximum of this many pairs Some transports /// penalize multiple reads more than others, and sometimes it is /// better to return early. /// 0 means no limit /// :param fudge_factor: All transports have some level of 'it is /// better to read some more data and throw it away rather /// than seek', so collapse if we are 'close enough' /// :param max_size: Create coalesced offsets no bigger than this size. /// When a single offset is bigger than 'max_size', it will keep /// its size and be alone in the coalesced offset. /// 0 means no maximum size. /// :return: return a list of _CoalescedOffset objects, which have members /// for where to start, how much to read, and how to split those chunks /// back up pub fn coalesce_offsets( offsets: &[(usize, usize)], limit: Option, fudge_factor: Option, max_size: Option, ) -> std::result::Result)>, OverlappingRange> { let mut offsets = offsets.to_vec(); offsets.sort(); struct CoalescedOffset { start: usize, length: usize, ranges: Vec<(usize, usize)>, } if offsets.is_empty() { return Ok(vec![]); } let mut cur = CoalescedOffset { start: offsets[0].0, length: offsets[0].1, ranges: vec![(0, offsets[0].1)], }; let mut last_end = cur.start + cur.length; let mut coalesced_offsets = Vec::new(); let fudge_factor = fudge_factor.unwrap_or(0); // unlimited, but we actually take this to mean 100MB buffer limit let max_size = max_size.unwrap_or(100 * 1024 * 1024); for (start, size) in &offsets[1..] { let end = start + size; if *start <= last_end + fudge_factor && *start >= cur.start && (limit.is_none() || cur.ranges.len() < limit.unwrap()) && (end - cur.start <= max_size) { if *start < last_end { return Err(OverlappingRange { last_end, start: *start, }); } cur.length = end - cur.start; cur.ranges.push((start - cur.start, *size)); } else { coalesced_offsets.push((cur.start, cur.length, cur.ranges)); cur = CoalescedOffset { start: *start, length: *size, ranges: vec![(0, *size)], }; } last_end = end; } coalesced_offsets.push((cur.start, cur.length, cur.ranges)); Ok(coalesced_offsets) } struct ReadvIter { fp: T, offsets: VecDeque<(usize, usize)>, coalesced: VecDeque<(usize, usize, Vec<(usize, usize)>)>, data_map: HashMap<(usize, usize), Vec>, } impl ReadvIter { fn new( fp: T, offsets: Vec<(usize, usize)>, max_readv_combine: usize, bytes_to_read_before_seek: usize, ) -> std::io::Result { // Turn list of offsets into a stack let coalesced = coalesce_offsets( &offsets, Some(max_readv_combine), Some(bytes_to_read_before_seek), None, ) .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e.to_string()))?; Ok(Self { fp, offsets: VecDeque::from(offsets), coalesced: coalesced.into_iter().collect(), data_map: std::collections::HashMap::new(), }) } fn read_more(&mut self) -> Result { // Cache the results, but only until they have been fulfilled if let Some((start, length, ranges)) = self.coalesced.pop_front() { self.fp .seek(SeekFrom::Start(start as u64)) .map_err(|e| (e, start, length, 0))?; let mut data = vec![0; length]; self.fp .read_exact(&mut data) .map_err(|e| (e, start, length, 0))?; for (suboffset, subsize) in ranges { self.data_map.insert( (start + suboffset, subsize), data[suboffset..suboffset + subsize].to_vec(), ); } Ok(true) } else { Ok(false) } } } impl Iterator for ReadvIter { type Item = Result<(usize, Vec), (std::io::Error, usize, usize, usize)>; fn next(&mut self) -> Option { if let Some(key) = self.offsets.pop_front() { loop { if let Some(data) = self.data_map.remove(&key) { break Some(Ok((key.0, data))); } else { match self.read_more() { Ok(true) => continue, Ok(false) => break None, Err(e) => break Some(Err(e)), } } } } else { None } } } /// An implementation of readv that uses fp.seek and fp.read. /// /// This uses _coalesce_offsets to issue larger reads and fewer seeks. /// /// :param fp: A file-like object that supports seek() and read(size). /// Note that implementations are allowed to call .close() on this file /// handle, so don't trust that you can use it for other work. /// :param offsets: A list of offsets to be read from the given file. /// :return: yield (pos, data) tuples for each request pub fn seek_and_read( fp: T, offsets: Vec<(usize, usize)>, max_readv_combine: usize, bytes_to_read_before_seek: usize, ) -> std::io::Result< impl Iterator), (std::io::Error, usize, usize, usize)>>, > { ReadvIter::new(fp, offsets, max_readv_combine, bytes_to_read_before_seek) } pub fn sort_expand_and_combine( offsets: Vec<(u64, usize)>, upper_limit: Option, recommended_page_size: usize, ) -> Vec<(u64, usize)> { // Sort the offsets by start address. let mut sorted_offsets = offsets.to_vec(); sorted_offsets.sort_unstable_by_key(|&(offset, _)| offset); // Short circuit empty requests. if sorted_offsets.is_empty() { return Vec::new(); } // Expand the offsets by page size at either end. let maximum_expansion = recommended_page_size; let mut new_offsets = Vec::with_capacity(sorted_offsets.len()); for (offset, length) in sorted_offsets { let expansion = maximum_expansion.saturating_sub(length); let reduction = expansion / 2; let new_offset = offset.saturating_sub(reduction as u64); let new_length = length + expansion; let new_length = if let Some(upper_limit) = upper_limit { let new_end = new_offset.saturating_add(new_length as u64); let new_length = std::cmp::min(upper_limit, new_end) - new_offset; std::cmp::max(0, new_length as isize) as usize } else { new_length }; if new_length > 0 { new_offsets.push((new_offset, new_length)); } } // Combine the expanded offsets. let mut result = Vec::with_capacity(new_offsets.len()); if let Some((mut current_offset, mut current_length)) = new_offsets.first().copied() { let mut current_finish = current_offset + current_length as u64; for (offset, length) in new_offsets.iter().skip(1) { let finish = offset + *length as u64; if *offset > current_finish { result.push((current_offset, current_length)); current_offset = *offset; current_length = *length; current_finish = finish; } else if finish > current_finish { current_finish = finish; current_length = (current_finish - current_offset) as usize; } } result.push((current_offset, current_length)); } result } dromedary-0.1.5/src/registry.rs000066400000000000000000000455051520150013200165060ustar00rootroot00000000000000//! URL → [`Transport`](crate::Transport) registry for the pure-Rust crate. //! //! This is intentionally separate from the Python-side //! `dromedary.transport` registry: that one bridges into PyO3-wrapped //! transports, this one stays in pure Rust. The two never share state. //! //! ## URL prefixes //! //! Built-in base schemes — `file://`, `memory://`, `http://`, `https://` //! (and `webdav://`/`webdavs://` with the `webdav` feature) — are //! pre-registered against default-config clients. //! //! Decorator prefixes — `readonly+`, `log+`, `unlistable+`, //! `brokenrename+`, `fakenfs+`, `vfat+` — are also pre-registered. These //! wrap an inner transport built by recursively dispatching the rest of //! the URL: `get_transport("readonly+memory:///")` produces a //! [`ReadonlyTransport`](crate::readonly::ReadonlyTransport) wrapping a //! [`MemoryTransport`](crate::memory::MemoryTransport). Decorators chain: //! `log+readonly+memory:///` works. //! //! `sftp://` is opt-in: it needs a caller-supplied SSH channel opener, //! which can't be inferred from the URL alone. Call [`register_sftp`] //! once at startup if you want `sftp://` URLs to resolve. //! //! ## Custom registrations //! //! [`register`] / [`unregister`] take prefix strings exactly as they //! should match a URL — `"file://"` for a base scheme, `"readonly+"` for //! a decorator. Lookup is **longest-prefix-match**, so registering both //! `"http://"` and `"http+urllib://"` does the right thing. //! //! ## What's intentionally not done //! //! `chroot+` and `pathfilter+` are dynamic in Python — the prefix is //! registered per-instance — and that pattern doesn't fit a static //! registry. Use [`crate::chroot::ChrootTransport::new`] / //! [`crate::pathfilter::PathFilterTransport`] directly. use std::collections::HashMap; use std::sync::{OnceLock, RwLock}; use crate::{Error, Result, Transport}; /// `HttpClient::new` has its own error type; map into the transport /// `Error` so the registry can present a uniform failure surface. fn http_client_err_to_transport_err(e: crate::http::client::ClientError) -> Error { use crate::http::client::ClientError; match e { ClientError::Transport(re) => Error::ConnectionError(re.to_string()), ClientError::InvalidRequest(s) => Error::TransportNotPossible(Some(s)), ClientError::Io(io) => Error::Io(io), } } /// Trait for things that can build a [`Transport`] from a URL string. /// /// The argument is the **full** URL the registry was asked to dispatch. /// Decorator factories typically peel their own prefix and recurse via /// [`get_transport`]; base-scheme factories use the URL as-is. pub trait TransportFactory: Send + Sync { fn build(&self, url: &str) -> Result>; } /// Convenience blanket impl: a closure is automatically a factory. impl TransportFactory for F where F: Fn(&str) -> Result> + Send + Sync, { fn build(&self, url: &str) -> Result> { self(url) } } type Registry = RwLock>>; /// Lazily-initialised global registry. The first access seeds it with /// the built-in prefixes so `get_transport("file:///tmp")` works without /// any setup. fn registry() -> &'static Registry { static R: OnceLock = OnceLock::new(); R.get_or_init(|| { let mut m: HashMap> = HashMap::new(); register_builtins(&mut m); RwLock::new(m) }) } fn register_builtins(m: &mut HashMap>) { // ---- Base schemes --------------------------------------------------- m.insert( "file://".into(), Box::new(|url: &str| -> Result> { Ok(Box::new(crate::local::LocalTransport::new(url)?)) }), ); m.insert( "memory://".into(), Box::new(|url: &str| -> Result> { Ok(Box::new(crate::memory::MemoryTransport::new(url)?)) }), ); // HTTP/HTTPS share a default-config client. Callers needing a custom // client (proxies, mTLS, custom CA bundle, ...) should construct // `HttpTransport` directly rather than going through the registry. fn build_http(url: &str) -> Result> { let client = std::sync::Arc::new( crate::http::client::HttpClient::new(crate::http::client::HttpClientConfig::default()) .map_err(http_client_err_to_transport_err)?, ); Ok(Box::new(crate::http::transport::HttpTransport::new( url, client, )?)) } m.insert("http://".into(), Box::new(|u: &str| build_http(u))); m.insert("https://".into(), Box::new(|u: &str| build_http(u))); #[cfg(feature = "webdav")] { fn build_webdav(url: &str) -> Result> { let client = std::sync::Arc::new( crate::http::client::HttpClient::new( crate::http::client::HttpClientConfig::default(), ) .map_err(http_client_err_to_transport_err)?, ); Ok(Box::new(crate::webdav::transport::HttpDavTransport::new( url, client, )?)) } m.insert("webdav://".into(), Box::new(|u: &str| build_webdav(u))); m.insert("webdavs://".into(), Box::new(|u: &str| build_webdav(u))); } // ---- Decorators ----------------------------------------------------- // // Each decorator wraps the result of recursively dispatching the rest // of the URL. They're cheap to compose: `log+readonly+memory:///` → // LogTransport(ReadonlyTransport(MemoryTransport)). m.insert( "readonly+".into(), Box::new(|url: &str| -> Result> { let inner = decorate_inner(url, "readonly+")?; Ok(Box::new(crate::readonly::ReadonlyTransport::new(inner))) }), ); m.insert( "unlistable+".into(), Box::new(|url: &str| -> Result> { let inner = decorate_inner(url, "unlistable+")?; Ok(Box::new(crate::unlistable::UnlistableTransport::new(inner))) }), ); m.insert( "brokenrename+".into(), Box::new(|url: &str| -> Result> { let inner = decorate_inner(url, "brokenrename+")?; Ok(Box::new(crate::brokenrename::BrokenRenameTransport::new( inner, ))) }), ); m.insert( "fakenfs+".into(), Box::new(|url: &str| -> Result> { let inner = decorate_inner(url, "fakenfs+")?; Ok(Box::new(crate::fakenfs::FakeNfsTransport::new(inner))) }), ); m.insert( "vfat+".into(), Box::new(|url: &str| -> Result> { let inner = decorate_inner(url, "vfat+")?; Ok(Box::new(crate::fakevfat::FakeVfatTransport::new(inner))) }), ); m.insert( "log+".into(), Box::new(|url: &str| -> Result> { let inner = decorate_inner(url, "log+")?; Ok(Box::new(crate::log::LogTransport::new( inner, default_log_sink(), ))) }), ); } /// Strip `prefix` from `url` and recursively build the inner transport. /// Used by decorator factories so they don't all duplicate the same /// peel-and-recurse pattern. fn decorate_inner(url: &str, prefix: &str) -> Result> { let inner_url = url.strip_prefix(prefix).ok_or_else(|| { // Should be unreachable when called via the registry, but guard // anyway so a misuse from a custom factory doesn't panic. Error::TransportNotPossible(Some(format!( "decorator `{}` invoked on URL that doesn't start with it: {}", prefix, url ))) })?; get_transport(inner_url) } /// Default log sink for `log+` URLs: forward to the `log` crate at /// debug level. Callers wanting a custom sink should construct /// [`crate::log::LogTransport`] directly. fn default_log_sink() -> crate::log::LogSink { std::sync::Arc::new(|msg: &str| log::debug!("{}", msg)) } /// Register a factory for a URL `prefix` (e.g. `"file://"`, /// `"readonly+"`), replacing any previous registration. Returns the /// displaced factory if there was one. /// /// Useful for tests, or for swapping the default HTTP client for one /// with custom credentials/proxy/CA configuration. pub fn register( prefix: &str, factory: Box, ) -> Option> { registry() .write() .unwrap() .insert(prefix.to_string(), factory) } /// Drop the registration for `prefix`. Returns the displaced factory if /// there was one. pub fn unregister(prefix: &str) -> Option> { registry().write().unwrap().remove(prefix) } /// True if a factory is registered for `prefix`. pub fn is_registered(prefix: &str) -> bool { registry().read().unwrap().contains_key(prefix) } /// List the currently registered prefixes. Order is unspecified. pub fn registered_prefixes() -> Vec { registry().read().unwrap().keys().cloned().collect() } /// Build a transport for `url` by longest-prefix-matching against the /// registry. /// /// Returns [`Error::TransportNotPossible`] if no factory matches, /// [`Error::UrlError`] if the URL is malformed in a way the registry /// can detect. pub fn get_transport(url: &str) -> Result> { // Find the longest registered prefix that's a literal prefix of the // URL. We do this under a single read lock; the registry is small // enough that scanning is cheaper than maintaining a sorted index. let r = registry().read().unwrap(); let mut best: Option<(&str, &Box)> = None; for (prefix, factory) in r.iter() { if !url.starts_with(prefix.as_str()) { continue; } match best { Some((cur, _)) if cur.len() >= prefix.len() => {} _ => best = Some((prefix.as_str(), factory)), } } if let Some((_, f)) = best { return f.build(url); } drop(r); // Fallback: a `+vendor` qualifier on a base scheme (e.g. // `http+urllib://`) — strip the qualifier and retry. Mirrors what // `classify_reuse_for` does for connection reuse and matches the // Python `register_lazy_transport("http+urllib://", ...)` registration. if let Some((scheme_with_vendor, rest)) = url.split_once("://") { if let Some((scheme, _vendor)) = scheme_with_vendor.split_once('+') { let stripped = format!("{}://{}", scheme, rest); if stripped != url { return get_transport(&stripped); } } } Err(Error::TransportNotPossible(Some(format!( "no transport registered for URL `{}`", url )))) } /// Register an SFTP factory backed by a caller-supplied SSH channel /// opener. The opener takes the parsed URL's host/port/user/password and /// returns a `Read + Write + Send` byte-stream the SFTP client will run /// over (russh, ssh2, libssh, a spawned `ssh -s sftp` subprocess, ...). /// /// Re-registering replaces any previous SFTP factory. Available only /// when the `sftp` feature is enabled. #[cfg(feature = "sftp")] pub fn register_sftp(opener: F) where F: Fn(&url::Url) -> Result + Send + Sync + 'static, { let opener = std::sync::Arc::new(opener); register( "sftp://", Box::new( move |url: &str| -> Result> { let parsed = url::Url::parse(url)?; let channel = opener(&parsed)?; Ok(Box::new(crate::sftp::SftpTransport::from_channel( url, channel, )?)) }, ), ); } #[cfg(test)] mod tests { use super::*; #[test] fn local_scheme_resolves_to_local_transport() { let dir = tempfile::tempdir().unwrap(); let url = url::Url::from_directory_path(dir.path()).unwrap(); let t = get_transport(url.as_str()).unwrap(); // Round-trip a write through the boxed trait object to confirm // we got a real working transport, not an empty stub. t.put_bytes("hello", b"world", None).unwrap(); assert_eq!(t.get_bytes("hello").unwrap(), b"world"); } #[test] fn memory_scheme_resolves_to_memory_transport() { let t = get_transport("memory:///").unwrap(); t.put_bytes("k", b"v", None).unwrap(); assert_eq!(t.get_bytes("k").unwrap(), b"v"); } #[test] fn unknown_scheme_returns_transport_not_possible() { let err = get_transport("xyzzy://example/").unwrap_err(); assert!( matches!(err, Error::TransportNotPossible(Some(ref m)) if m.contains("xyzzy")), "unexpected error: {:?}", err ); } #[test] fn invalid_url_returns_url_error() { let err = get_transport("not a url").unwrap_err(); // No registered prefix matches "not a url", so we fall through to // TransportNotPossible — UrlError is reserved for the few paths // that actually parse the URL. assert!( matches!(err, Error::TransportNotPossible(_) | Error::UrlError(_)), "unexpected error: {:?}", err ); } #[test] fn http_plus_vendor_qualifier_resolves_to_http_factory() { // We don't actually open a connection — just confirm dispatch // accepts the qualified form and reaches the http factory. // HttpTransport::new validates the URL synchronously. let t = get_transport("http+urllib://example.com/"); assert!(t.is_ok(), "http+urllib should resolve: {:?}", t.err()); } #[test] fn register_and_unregister_round_trips() { // Use a unique scheme so we don't collide with other tests // running in parallel. let prefix = "test-roundtrip-scheme://"; assert!(!is_registered(prefix)); register( prefix, Box::new(|_url: &str| -> Result> { Ok(Box::new(crate::memory::MemoryTransport::new("memory:///")?)) }), ); assert!(is_registered(prefix)); let t = get_transport(&format!("{}anywhere/", prefix)).unwrap(); // The factory builds a memory transport regardless of the URL, // so it must be functional. t.put_bytes("k", b"v", None).unwrap(); assert!(unregister(prefix).is_some()); assert!(!is_registered(prefix)); } #[test] fn registered_prefixes_includes_builtins() { let s = registered_prefixes(); assert!(s.iter().any(|x| x == "file://")); assert!(s.iter().any(|x| x == "memory://")); assert!(s.iter().any(|x| x == "http://")); assert!(s.iter().any(|x| x == "https://")); assert!(s.iter().any(|x| x == "readonly+")); assert!(s.iter().any(|x| x == "log+")); } // ---- Decorator dispatch --------------------------------------------- #[test] fn readonly_decorator_wraps_inner_memory_transport() { let t = get_transport("readonly+memory:///").unwrap(); // Readonly transports report is_readonly() == true and reject // mutating ops with TransportNotPossible. assert!(t.is_readonly()); let err = t.put_bytes("k", b"v", None).unwrap_err(); assert!(matches!(err, Error::TransportNotPossible(_))); } #[test] fn unlistable_decorator_blocks_list_dir() { let t = get_transport("unlistable+memory:///").unwrap(); assert!(!t.listable()); // list_dir must yield TransportNotPossible (via the iterator). let mut it = t.list_dir("."); let first = it .next() .expect("at least one item from unlistable list_dir"); assert!(matches!(first, Err(Error::TransportNotPossible(_)))); } #[test] fn log_decorator_forwards_writes_to_inner() { let t = get_transport("log+memory:///").unwrap(); // We don't validate the log output here — that's covered in // log.rs. The point is that the decorated transport works. t.put_bytes("k", b"v", None).unwrap(); assert_eq!(t.get_bytes("k").unwrap(), b"v"); } #[test] fn fakenfs_decorator_round_trips_writes() { let t = get_transport("fakenfs+memory:///").unwrap(); t.put_bytes("a", b"x", None).unwrap(); assert_eq!(t.get_bytes("a").unwrap(), b"x"); } #[test] fn vfat_decorator_round_trips_writes() { let t = get_transport("vfat+memory:///").unwrap(); t.put_bytes("a", b"x", None).unwrap(); assert_eq!(t.get_bytes("a").unwrap(), b"x"); } #[test] fn brokenrename_decorator_round_trips_writes() { let t = get_transport("brokenrename+memory:///").unwrap(); t.put_bytes("a", b"x", None).unwrap(); assert_eq!(t.get_bytes("a").unwrap(), b"x"); } #[test] fn decorators_chain_left_to_right() { // Outermost decorator listed first: readonly wraps log wraps // memory. The readonly behavior should be visible at the top. let t = get_transport("readonly+log+memory:///").unwrap(); assert!(t.is_readonly()); let err = t.put_bytes("k", b"v", None).unwrap_err(); assert!(matches!(err, Error::TransportNotPossible(_))); } #[test] fn decorator_on_unknown_inner_scheme_propagates_error() { let err = get_transport("readonly+xyzzy://nope/").unwrap_err(); assert!( matches!(err, Error::TransportNotPossible(Some(ref m)) if m.contains("xyzzy")), "unexpected error: {:?}", err ); } #[cfg(feature = "webdav")] #[test] fn webdav_scheme_is_registered_when_feature_on() { assert!(is_registered("webdav://")); assert!(is_registered("webdavs://")); } #[cfg(feature = "sftp")] #[test] fn sftp_factory_uses_caller_supplied_opener() { use std::os::unix::net::UnixStream; // Spin up the same loopback fake server the sftp tests use so // we can verify the registry-built transport is functional. The // opener returns one end of a UnixStream pair; the other end // runs the fake server. register_sftp(|_url: &url::Url| -> Result { let (a, b) = UnixStream::pair().map_err(Error::Io)?; crate::sftp::tests::loopback::spawn_for_registry(b); Ok(Box::new(a)) }); let t = get_transport("sftp://test/tmp/").unwrap(); t.put_bytes("k", b"v", None).unwrap(); assert_eq!(t.get_bytes("k").unwrap(), b"v"); unregister("sftp://"); } } dromedary-0.1.5/src/sftp.rs000066400000000000000000002165421520150013200156130ustar00rootroot00000000000000//! Pure-Rust SFTP transport built on the `sftp` crate. //! //! Construction is decoupled from how the underlying SSH channel is //! produced: [`SftpTransport::from_channel`] takes any `Read + Write + //! Send` byte-stream, so callers are free to bring their own backend //! (russh, ssh2, libssh, a spawned `ssh -s sftp` subprocess on Unix, //! …). This keeps the pure-Rust crate free of an SSH-library dep and //! mirrors how the PyO3 wrapper composes the SFTP client with whichever //! SSH vendor is in play. use std::collections::HashMap; use std::fs::Permissions; #[cfg(unix)] use std::os::unix::fs::PermissionsExt; use std::path::PathBuf; use std::sync::Arc; use url::Url; use crate::lock::{BogusLock, Lock}; use crate::{ relpath_against_base, ConnectedTransport, Error, ReadStream, Result, Stat, Transport, UrlFragment, WriteStream, }; /// Synchronous bidirectional byte stream the SFTP client runs over. /// Boxed so [`SftpTransport`] has a single concrete type regardless of /// which SSH backend produced the channel. pub trait SshChannel: std::io::Read + std::io::Write + Send {} impl SshChannel for T {} pub type BoxedChannel = Box; /// POSIX `S_IFDIR` — set on `permissions` to mark a directory at the /// SFTP wire level. The `sftp` crate exposes file kind only via these /// mode bits in `Attributes`. const S_IFDIR: u32 = 0o040000; const S_IFLNK: u32 = 0o120000; const S_IFMT: u32 = 0o170000; fn map_sftp_err(e: sftp::Error, path: Option<&str>) -> Error { match e { sftp::Error::Io(e) => Error::Io(e), sftp::Error::NoSuchFile(_, _) | sftp::Error::NoSuchPath(_, _) => { Error::NoSuchFile(path.map(|s| s.to_string())) } sftp::Error::PermissionDenied(_, _) | sftp::Error::WriteProtect(_, _) => { Error::PermissionDenied(path.map(|s| s.to_string())) } sftp::Error::FileAlreadyExists(_, _) => Error::FileExists(path.map(|s| s.to_string())), sftp::Error::DirNotEmpty(_, _) => { Error::DirectoryNotEmptyError(path.map(|s| s.to_string())) } sftp::Error::NotADirectory(_, _) => Error::NotADirectoryError(path.map(|s| s.to_string())), sftp::Error::FileIsADirectory(_, _) => { Error::IsADirectoryError(path.map(|s| s.to_string())) } sftp::Error::OpUnsupported(_, _) => { Error::TransportNotPossible(path.map(|s| s.to_string())) } sftp::Error::ConnectionLost(_, m) | sftp::Error::NoConnection(_, m) => { Error::ConnectionError(m) } // Anything else lands as an Io error so callers see a single // bucket for "the SFTP layer was unhappy" — the message keeps // the original variant name for triage. other => Error::Io(std::io::Error::other(format!("{:?}", other))), } } /// Convert a Unix permission set into an SFTP attribute bag. #[cfg(unix)] fn perms_to_attr(p: Option) -> sftp::Attributes { let mut attr = sftp::Attributes::new(); if let Some(p) = p { attr.permissions = Some(p.mode()); } attr } #[cfg(not(unix))] fn perms_to_attr(_p: Option) -> sftp::Attributes { sftp::Attributes::new() } /// Translate Python `_remote_path`'s URL → server-path rules into a /// pure function so it can be unit-tested without a live channel. /// /// The Python transport encodes Breezy convention: /// * Paths starting with `/~/` are home-relative — strip the prefix. /// * Bare `/~` means the home directory itself — empty string. /// * Otherwise leave the path as-is (a leading `/` denotes absolute). fn remote_path_for(base: &Url, relpath: &UrlFragment) -> Result { let joined = base.join(relpath)?; let path = joined.path(); let path = if let Some(rest) = path.strip_prefix("/~/") { rest.to_string() } else if path == "/~" { String::new() } else { path.to_string() }; // Decode percent-escapes so the SFTP layer sees raw bytes. Ok(percent_encoding::percent_decode_str(&path) .decode_utf8() .map_err(|_| Error::TransportNotPossible(Some(relpath.to_string())))? .into_owned()) } /// SFTP transport. /// /// Cheap to clone — clones share the underlying SFTP session and only /// vary in the URL prefix they apply to relpaths. #[derive(Clone)] pub struct SftpTransport { base: Url, sftp: Arc>, } impl std::fmt::Debug for SftpTransport { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "SftpTransport({})", self.base) } } impl SftpTransport { /// Build a transport from an already-open SSH byte-stream channel. /// Performs the SFTP version handshake before returning. pub fn from_channel(base: &str, channel: BoxedChannel) -> Result { let session = sftp::SftpClient::new(channel).map_err(Error::Io)?; Self::from_session(base, session) } /// Build a transport from an already-handshaken SFTP session. /// Use this when you've constructed the `SftpClient` yourself /// (e.g. to inspect its server extension list before wrapping). pub fn from_session(base: &str, session: sftp::SftpClient) -> Result { let base = if base.ends_with('/') { base.to_string() } else { format!("{}/", base) }; let base = Url::parse(&base)?; if base.scheme() != "sftp" { return Err(Error::TransportNotPossible(Some(format!( "expected sftp:// URL, got {}", base.scheme() )))); } Ok(SftpTransport { base, sftp: Arc::new(session), }) } fn remote_path(&self, relpath: &UrlFragment) -> Result { remote_path_for(&self.base, relpath) } } /// Streaming reader over a remote SFTP file. Holds an open `sftp::File` /// handle and issues `pread` lazily as the caller `read`s, so opening a /// large file no longer pulls the whole thing into memory. `Seek` adjusts /// the offset without a server round-trip — random access and `read_to_end` /// from arbitrary positions both work. /// /// Server `pread` may return fewer bytes than requested but is allowed to /// return any non-zero amount up to the requested length. We surface those /// short reads to the caller rather than looping internally — that's the /// `Read` contract and lets callers detect EOF via the standard /// "returned 0" signal. The end of file is communicated either as a /// zero-length reply, or as `sftp::Error::Eof`; both are mapped to /// `Ok(0)`. struct SftpReadStream { sftp: Arc>, file: sftp::File, /// Current logical position in the remote file. Advances on `read`, /// updated freely by `seek`; the size cache below feeds `SeekFrom::End`. offset: u64, /// Cached file size from the initial `fstat`, so `SeekFrom::End` /// doesn't need a round-trip. Refreshed lazily if a write extends the /// file under us — but a streaming reader is the wrong place to /// observe that, so we leave it stale. size: u64, closed: bool, } impl std::io::Read for SftpReadStream { fn read(&mut self, buf: &mut [u8]) -> std::io::Result { if buf.is_empty() { return Ok(0); } // pread length is u32 on the wire; cap to that and to a sensible // chunk so we don't ask the server for an arbitrarily-large reply. let want = u32::try_from(buf.len()).unwrap_or(u32::MAX).min(64 * 1024); match self.sftp.pread(&self.file, self.offset, want) { Ok(chunk) if chunk.is_empty() => Ok(0), Ok(chunk) => { let n = chunk.len(); buf[..n].copy_from_slice(&chunk); self.offset += n as u64; Ok(n) } Err(sftp::Error::Eof(_, _)) => Ok(0), Err(e) => Err(std::io::Error::from(e)), } } } impl std::io::Seek for SftpReadStream { fn seek(&mut self, pos: std::io::SeekFrom) -> std::io::Result { // Pure offset arithmetic — no wire I/O. Rejects negative results // the same way std's Cursor does, so callers see a familiar error. let new = match pos { std::io::SeekFrom::Start(o) => o as i128, std::io::SeekFrom::Current(d) => self.offset as i128 + d as i128, std::io::SeekFrom::End(d) => self.size as i128 + d as i128, }; if new < 0 { return Err(std::io::Error::new( std::io::ErrorKind::InvalidInput, "seek before start of file", )); } self.offset = new as u64; Ok(self.offset) } } impl ReadStream for SftpReadStream {} impl Drop for SftpReadStream { fn drop(&mut self) { if !self.closed { // Best-effort close; nobody to report errors to here. let _ = self.sftp.fclose(&self.file); self.closed = true; } } } /// Append-only write stream. Buffers locally and flushes via `pwrite` /// at every `write` call so that a `sync_data` sees committed bytes. struct SftpWriteStream { sftp: Arc>, file: sftp::File, offset: u64, closed: bool, } impl std::io::Write for SftpWriteStream { fn write(&mut self, buf: &[u8]) -> std::io::Result { self.sftp .pwrite(&self.file, self.offset, buf) .map_err(std::io::Error::from)?; self.offset += buf.len() as u64; Ok(buf.len()) } fn flush(&mut self) -> std::io::Result<()> { Ok(()) } } impl WriteStream for SftpWriteStream { fn sync_data(&self) -> std::io::Result<()> { Ok(()) } } impl Drop for SftpWriteStream { fn drop(&mut self) { if !self.closed { // Best-effort close — the user has dropped the stream so // there's nobody to report close errors to. let _ = self.sftp.fclose(&self.file); self.closed = true; } } } impl Transport for SftpTransport { fn external_url(&self) -> Result { Ok(self.base.clone()) } fn can_roundtrip_unix_modebits(&self) -> bool { true } fn base(&self) -> Url { self.base.clone() } fn get(&self, relpath: &UrlFragment) -> Result> { let path = self.remote_path(relpath)?; let opts = sftp::OpenOptions::new().read(true); let attr = sftp::Attributes::new(); let file = self .sftp .open(&path, opts, &attr) .map_err(|e| map_sftp_err(e, Some(&path)))?; // One fstat to seed the size cache for SeekFrom::End. Bytes are // read lazily on Read calls; the file handle stays open until the // returned stream is dropped. let st = self .sftp .fstat(&file, None) .map_err(|e| map_sftp_err(e, Some(&path)))?; Ok(Box::new(SftpReadStream { sftp: Arc::clone(&self.sftp), file, offset: 0, size: st.size.unwrap_or(0), closed: false, })) } fn has(&self, relpath: &UrlFragment) -> Result { let path = self.remote_path(relpath)?; match self.sftp.stat(&path, None) { Ok(_) => Ok(true), Err(sftp::Error::NoSuchFile(_, _)) | Err(sftp::Error::NoSuchPath(_, _)) => Ok(false), Err(e) => Err(map_sftp_err(e, Some(&path))), } } fn mkdir(&self, relpath: &UrlFragment, permissions: Option) -> Result<()> { let path = self.remote_path(relpath)?; let mut attr = perms_to_attr(permissions); // Server expects the directory bit set so the inode is created // with the right type — mirrors what the PyO3 wrapper does. attr.permissions = Some(attr.permissions.unwrap_or(0o777) | S_IFDIR); self.sftp .mkdir(&path, &attr) .map_err(|e| map_sftp_err(e, Some(&path))) } fn stat(&self, relpath: &UrlFragment) -> Result { let path = self.remote_path(relpath)?; let attr = self .sftp .stat(&path, None) .map_err(|e| map_sftp_err(e, Some(&path)))?; Ok(attrs_to_stat(&attr)) } fn clone(&self, offset: Option<&UrlFragment>) -> Result> { let new_base = match offset { Some(o) => self.base.join(o)?, None => self.base.clone(), }; let mut new_base_str = new_base.to_string(); if !new_base_str.ends_with('/') { new_base_str.push('/'); } Ok(Box::new(SftpTransport { base: Url::parse(&new_base_str)?, sftp: Arc::clone(&self.sftp), })) } fn abspath(&self, relpath: &UrlFragment) -> Result { Ok(self.base.join(relpath)?) } fn relpath(&self, abspath: &Url) -> Result { relpath_against_base(&self.base, abspath) } fn put_file( &self, relpath: &UrlFragment, f: &mut dyn std::io::Read, permissions: Option, ) -> Result { let path = self.remote_path(relpath)?; let opts = sftp::OpenOptions::new() .write(true) .create(true) .truncate(true); let attr = perms_to_attr(permissions); let file = self .sftp .open(&path, opts, &attr) .map_err(|e| map_sftp_err(e, Some(&path)))?; let mut buf = [0u8; 32 * 1024]; let mut offset = 0u64; loop { let n = f .read(&mut buf) .map_err(|e| crate::map_io_err_to_transport_err(e, Some(relpath)))?; if n == 0 { break; } self.sftp .pwrite(&file, offset, &buf[..n]) .map_err(|e| map_sftp_err(e, Some(&path)))?; offset += n as u64; } self.sftp .fclose(&file) .map_err(|e| map_sftp_err(e, Some(&path)))?; Ok(offset) } fn delete(&self, relpath: &UrlFragment) -> Result<()> { let path = self.remote_path(relpath)?; self.sftp .remove(&path) .map_err(|e| map_sftp_err(e, Some(&path))) } fn rmdir(&self, relpath: &UrlFragment) -> Result<()> { let path = self.remote_path(relpath)?; self.sftp .rmdir(&path) .map_err(|e| map_sftp_err(e, Some(&path))) } fn rename(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { let from = self.remote_path(rel_from)?; let to = self.remote_path(rel_to)?; self.sftp .rename(&from, &to, None) .map_err(|e| map_sftp_err(e, Some(&to))) } fn set_segment_parameter(&mut self, _key: &str, _value: Option<&str>) -> Result<()> { // Segment params are URL-shape metadata; the SFTP transport // doesn't consume any. Mirror Memory/Local: silently ignore. Ok(()) } fn get_segment_parameters(&self) -> Result> { Ok(HashMap::new()) } fn append_file( &self, relpath: &UrlFragment, f: &mut dyn std::io::Read, permissions: Option, ) -> Result { let path = self.remote_path(relpath)?; // Open with append flag; capture the pre-existing size as the // returned offset (Transport contract). let opts = sftp::OpenOptions::new() .write(true) .create(true) .append(true); let attr = perms_to_attr(permissions); let file = self .sftp .open(&path, opts, &attr) .map_err(|e| map_sftp_err(e, Some(&path)))?; let st = self .sftp .fstat(&file, None) .map_err(|e| map_sftp_err(e, Some(&path)))?; let mut offset = st.size.unwrap_or(0); let result = offset; let mut buf = [0u8; 32 * 1024]; loop { let n = f .read(&mut buf) .map_err(|e| crate::map_io_err_to_transport_err(e, Some(relpath)))?; if n == 0 { break; } self.sftp .pwrite(&file, offset, &buf[..n]) .map_err(|e| map_sftp_err(e, Some(&path)))?; offset += n as u64; } self.sftp .fclose(&file) .map_err(|e| map_sftp_err(e, Some(&path)))?; Ok(result) } fn readlink(&self, relpath: &UrlFragment) -> Result { let path = self.remote_path(relpath)?; self.sftp .readlink(&path) .map_err(|e| map_sftp_err(e, Some(&path))) } fn hardlink(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { let from = self.remote_path(rel_from)?; let to = self.remote_path(rel_to)?; self.sftp .hardlink(&from, &to) .map_err(|e| map_sftp_err(e, Some(&to))) } fn symlink(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { // `rel_from` is the symlink target (may be absolute on the // remote, may be a path that doesn't exist yet); only `rel_to` // is resolved against our base URL. Matches Python's // `SFTPTransport.symlink`. let to = self.remote_path(rel_to)?; self.sftp .symlink(rel_from, &to) .map_err(|e| map_sftp_err(e, Some(&to))) } fn iter_files_recursive(&self) -> Box>> { // Walk eagerly. SFTP doesn't have a native walk and the // boxed-iterator return type doesn't carry a borrow, so // produce results up-front. let results = match self.collect_files_recursive() { Ok(v) => v, Err(e) => return Box::new(std::iter::once(Err(e))), }; Box::new(results.into_iter().map(Ok)) } fn open_write_stream( &self, relpath: &UrlFragment, permissions: Option, ) -> Result> { let path = self.remote_path(relpath)?; let opts = sftp::OpenOptions::new() .write(true) .create(true) .truncate(true); let attr = perms_to_attr(permissions); let file = self .sftp .open(&path, opts, &attr) .map_err(|e| map_sftp_err(e, Some(&path)))?; Ok(Box::new(SftpWriteStream { sftp: Arc::clone(&self.sftp), file, offset: 0, closed: false, })) } fn delete_tree(&self, relpath: &UrlFragment) -> Result<()> { // Recurse depth-first, deleting files then dirs. let abspath = self.remote_path(relpath)?; self.delete_tree_abs(&abspath) } fn list_dir(&self, relpath: &UrlFragment) -> Box>> { let path = match self.remote_path(relpath) { Ok(p) => p, Err(e) => return Box::new(std::iter::once(Err(e))), }; let dir = match self.sftp.opendir(&path) { Ok(d) => d, Err(e) => return Box::new(std::iter::once(Err(map_sftp_err(e, Some(&path))))), }; let mut names: Vec = Vec::new(); loop { match self.sftp.readdir(&dir) { Ok(entries) => { for (name, _, _) in entries { if name == "." || name == ".." { continue; } names.push(name); } } Err(sftp::Error::Eof(_, _)) => break, Err(e) => { let _ = self.sftp.closedir(&dir); return Box::new(std::iter::once(Err(map_sftp_err(e, Some(&path))))); } } } let _ = self.sftp.closedir(&dir); Box::new(names.into_iter().map(Ok)) } fn lock_read(&self, _relpath: &UrlFragment) -> Result> { // SFTP doesn't expose a portable advisory lock primitive; the // Python side returns a no-op lock for compatibility, so do the // same here. Ok(Box::new(BogusLock)) } fn lock_write(&self, _relpath: &UrlFragment) -> Result> { Ok(Box::new(BogusLock)) } fn local_abspath(&self, _relpath: &UrlFragment) -> Result { Err(Error::NotLocalUrl(self.base.to_string())) } fn copy(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { // No native SFTP copy — read source, write destination. The // default Transport::copy_to fallback would do roughly the same, // but having an explicit copy keeps the Transport trait happy. let mut src = self.get(rel_from)?; let mut dst = self.open_write_stream(rel_to, None)?; std::io::copy(&mut src, &mut dst) .map_err(|e| crate::map_io_err_to_transport_err(e, Some(rel_to)))?; Ok(()) } } impl ConnectedTransport for SftpTransport {} impl SftpTransport { /// DFS recursive walk used by `iter_files_recursive`. Splits out so /// errors can short-circuit without smuggling `Result` through the /// boxed iterator. fn collect_files_recursive(&self) -> Result> { let mut out = Vec::new(); // Queue holds (display_relpath, server_abspath) — the display // form has no leading slash and is what callers expect; the // server form is what we feed back into SFTP. let mut queue: Vec<(String, String)> = Vec::new(); let root = self.remote_path(".")?; for entry in self.list_dir_attrs(&root)? { queue.push((entry.0.clone(), join_remote(&root, &entry.0))); } while let Some((rel, abs)) = queue.pop() { let attr = self .sftp .stat(&abs, None) .map_err(|e| map_sftp_err(e, Some(&abs)))?; if is_dir(&attr) { for child in self.list_dir_attrs(&abs)? { let child_rel = format!("{}/{}", rel, child.0); let child_abs = join_remote(&abs, &child.0); queue.push((child_rel, child_abs)); } } else { out.push(rel); } } Ok(out) } fn list_dir_attrs(&self, abspath: &str) -> Result> { let dir = self .sftp .opendir(abspath) .map_err(|e| map_sftp_err(e, Some(abspath)))?; let mut out = Vec::new(); loop { match self.sftp.readdir(&dir) { Ok(entries) => { for (name, _, attr) in entries { if name == "." || name == ".." { continue; } out.push((name, attr)); } } Err(sftp::Error::Eof(_, _)) => break, Err(e) => { let _ = self.sftp.closedir(&dir); return Err(map_sftp_err(e, Some(abspath))); } } } let _ = self.sftp.closedir(&dir); Ok(out) } fn delete_tree_abs(&self, abspath: &str) -> Result<()> { for (name, attr) in self.list_dir_attrs(abspath)? { let child = join_remote(abspath, &name); if is_dir(&attr) { self.delete_tree_abs(&child)?; } else { self.sftp .remove(&child) .map_err(|e| map_sftp_err(e, Some(&child)))?; } } self.sftp .rmdir(abspath) .map_err(|e| map_sftp_err(e, Some(abspath))) } } fn join_remote(parent: &str, child: &str) -> String { if parent.ends_with('/') { format!("{}{}", parent, child) } else { format!("{}/{}", parent, child) } } fn is_dir(attr: &sftp::Attributes) -> bool { matches!(attr.permissions, Some(p) if p & S_IFMT == S_IFDIR) } fn is_symlink(attr: &sftp::Attributes) -> bool { matches!(attr.permissions, Some(p) if p & S_IFMT == S_IFLNK) } fn attrs_to_stat(attr: &sftp::Attributes) -> Stat { let kind = if is_dir(attr) { crate::FileKind::Dir } else if is_symlink(attr) { crate::FileKind::Symlink } else { // Default to File when the server doesn't tell us — matches // what callers expect for plain SFTP responses where mode bits // may be missing entirely. crate::FileKind::File }; Stat { size: attr.size.unwrap_or(0) as usize, #[cfg(unix)] mode: attr.permissions.unwrap_or(0), kind, mtime: attr.modify_time.map(|(s, _)| s as f64), } } #[cfg(test)] pub(crate) mod tests { use super::*; fn base() -> Url { Url::parse("sftp://user@example.com/home/user/").unwrap() } #[test] fn remote_path_plain_relative() { assert_eq!( remote_path_for(&base(), "foo/bar").unwrap(), "/home/user/foo/bar" ); } #[test] fn remote_path_dot() { assert_eq!(remote_path_for(&base(), ".").unwrap(), "/home/user/"); } #[test] fn remote_path_homedir_relative() { // sftp://example.com/~/proj resolves to "proj" — the server // interprets the bare path as $HOME-relative. let b = Url::parse("sftp://example.com/~/").unwrap(); assert_eq!(remote_path_for(&b, "proj/file").unwrap(), "proj/file"); } #[test] fn remote_path_bare_homedir() { let b = Url::parse("sftp://example.com/~").unwrap(); // base ends with /~, joining "" preserves the path. Confirm // the special-case mapping kicks in. assert_eq!(remote_path_for(&b, "").unwrap(), ""); } #[test] fn remote_path_percent_decodes() { // %20 in a relpath should be delivered to the server as a // literal space so SFTP can find the file. assert_eq!(remote_path_for(&base(), "a%20b").unwrap(), "/home/user/a b"); } #[test] fn from_session_rejects_non_sftp_url() { // We don't have a real channel here but URL validation runs // before the session is used; assemble a dummy session via a // pipe pair would require live IO, so cover this via the URL // shape only — the parse step itself is the guard. let url = "http://example.com/"; let parsed = Url::parse(url).unwrap(); assert_ne!(parsed.scheme(), "sftp"); } #[test] fn map_sftp_err_classifies_known_errors() { let e = map_sftp_err(sftp::Error::NoSuchFile("nope".into(), "".into()), Some("p")); assert!(matches!(e, Error::NoSuchFile(Some(ref p)) if p == "p")); let e = map_sftp_err( sftp::Error::PermissionDenied("denied".into(), "".into()), Some("p"), ); assert!(matches!(e, Error::PermissionDenied(Some(ref p)) if p == "p")); let e = map_sftp_err( sftp::Error::FileAlreadyExists("dup".into(), "".into()), Some("p"), ); assert!(matches!(e, Error::FileExists(Some(ref p)) if p == "p")); let e = map_sftp_err( sftp::Error::DirNotEmpty("nope".into(), "".into()), Some("p"), ); assert!(matches!(e, Error::DirectoryNotEmptyError(Some(ref p)) if p == "p")); let e = map_sftp_err( sftp::Error::OpUnsupported("nope".into(), "".into()), Some("p"), ); assert!(matches!(e, Error::TransportNotPossible(Some(ref p)) if p == "p")); let e = map_sftp_err( sftp::Error::ConnectionLost("bye".into(), "msg".into()), None, ); assert!(matches!(e, Error::ConnectionError(ref m) if m == "msg")); } #[test] fn is_dir_detects_directory_bit() { let mut attr = sftp::Attributes::new(); attr.permissions = Some(0o040755); assert!(is_dir(&attr)); attr.permissions = Some(0o100644); assert!(!is_dir(&attr)); attr.permissions = None; assert!(!is_dir(&attr)); } #[test] fn attrs_to_stat_carries_size_and_mtime() { let mut attr = sftp::Attributes::new(); attr.size = Some(42); attr.modify_time = Some((1700000000, None)); attr.permissions = Some(0o100644); let st = attrs_to_stat(&attr); assert_eq!(st.size, 42); assert!(st.is_file()); assert_eq!(st.mtime, Some(1_700_000_000.0)); } // ---- Loopback integration tests ------------------------------------- // // The harness spins up an in-process fake SFTP server on one end of a // `UnixStream::pair()` and drives `SftpTransport` against it. The // server understands enough of the wire protocol to round-trip the // operations we actually exercise — it is not a full implementation. // // Unix-only because `UnixStream::pair()` is Unix-only. #[cfg(unix)] pub(crate) mod loopback { use super::super::*; use std::collections::{HashMap, HashSet}; use std::io::{Read, Write}; use std::os::unix::net::UnixStream; use std::sync::atomic::{AtomicU32, Ordering}; use std::thread; // Opcodes & status codes — kept locally so we don't depend on // `sftp` crate's private constants. const SSH_FXP_INIT: u8 = 1; const SSH_FXP_VERSION: u8 = 2; const SSH_FXP_OPEN: u8 = 3; const SSH_FXP_CLOSE: u8 = 4; const SSH_FXP_READ: u8 = 5; const SSH_FXP_WRITE: u8 = 6; const SSH_FXP_LSTAT: u8 = 7; const SSH_FXP_FSTAT: u8 = 8; const SSH_FXP_OPENDIR: u8 = 11; const SSH_FXP_READDIR: u8 = 12; const SSH_FXP_REMOVE: u8 = 13; const SSH_FXP_MKDIR: u8 = 14; const SSH_FXP_RMDIR: u8 = 15; const SSH_FXP_STAT: u8 = 17; const SSH_FXP_RENAME: u8 = 18; const SSH_FXP_READLINK: u8 = 19; const SSH_FXP_SYMLINK: u8 = 20; const SSH_FXP_LINK: u8 = 21; const SSH_FXP_STATUS: u8 = 101; const SSH_FXP_HANDLE: u8 = 102; const SSH_FXP_DATA: u8 = 103; const SSH_FXP_NAME: u8 = 104; const SSH_FXP_ATTRS: u8 = 105; const SSH_FX_OK: u32 = 0; const SSH_FX_EOF: u32 = 1; const SSH_FX_NO_SUCH_FILE: u32 = 2; const SSH_FX_FAILURE: u32 = 4; const SSH_FX_FILE_ALREADY_EXISTS: u32 = 11; const SSH_FX_DIR_NOT_EMPTY: u32 = 18; const SFTP_FLAG_APPEND: u32 = 0x04; const SFTP_FLAG_CREAT: u32 = 0x08; const SFTP_FLAG_TRUNC: u32 = 0x10; const SFTP_FLAG_EXCL: u32 = 0x20; const ATTR_SIZE: u32 = 0x01; const ATTR_PERMISSIONS: u32 = 0x04; /// Read a length-prefixed packet: `u32 length, u8 kind, body`. fn read_packet(r: &mut R) -> std::io::Result<(u8, Vec)> { let mut len_buf = [0u8; 4]; r.read_exact(&mut len_buf)?; let len = u32::from_be_bytes(len_buf) as usize; let mut buf = vec![0u8; len]; r.read_exact(&mut buf)?; assert!(!buf.is_empty(), "zero-length SFTP packet"); Ok((buf[0], buf[1..].to_vec())) } /// Frame and write a packet. fn write_packet(w: &mut W, kind: u8, body: &[u8]) -> std::io::Result<()> { let len = (body.len() as u32 + 1).to_be_bytes(); w.write_all(&len)?; w.write_all(&[kind])?; w.write_all(body)?; w.flush() } /// Pull a length-prefixed UTF-8 string off a cursor. fn read_string(c: &mut std::io::Cursor<&[u8]>) -> String { let mut len = [0u8; 4]; c.read_exact(&mut len).unwrap(); let len = u32::from_be_bytes(len) as usize; let pos = c.position() as usize; let s = std::str::from_utf8(&c.get_ref()[pos..pos + len]) .unwrap() .to_string(); c.set_position((pos + len) as u64); s } fn read_u32(c: &mut std::io::Cursor<&[u8]>) -> u32 { let mut b = [0u8; 4]; c.read_exact(&mut b).unwrap(); u32::from_be_bytes(b) } fn read_u64(c: &mut std::io::Cursor<&[u8]>) -> u64 { let mut b = [0u8; 8]; c.read_exact(&mut b).unwrap(); u64::from_be_bytes(b) } /// Write a minimal ATTRS body covering size + permissions. fn encode_attrs(size: Option, perms: Option) -> Vec { let mut flags = 0u32; let mut body = Vec::new(); if size.is_some() { flags |= ATTR_SIZE; } if perms.is_some() { flags |= ATTR_PERMISSIONS; } body.extend_from_slice(&flags.to_be_bytes()); if let Some(s) = size { body.extend_from_slice(&s.to_be_bytes()); } if let Some(p) = perms { body.extend_from_slice(&p.to_be_bytes()); } body } /// Parse a minimal ATTRS body — only the fields the client /// actually sends in our tests (permissions, optionally size). /// Returns the cursor position so the caller can keep reading. fn skip_attrs(c: &mut std::io::Cursor<&[u8]>) -> (Option, Option) { let flags = read_u32(c); let mut size = None; let mut perms = None; if flags & ATTR_SIZE != 0 { size = Some(read_u64(c)); } if flags & 0x02 != 0 { // UID/GID — uid then gid let _ = read_u32(c); let _ = read_u32(c); } if flags & 0x400 != 0 { // ALLOCATION_SIZE let _ = read_u64(c); } if flags & 0x80 != 0 { // OWNERGROUP — owner then group strings let _ = read_string(c); let _ = read_string(c); } if flags & ATTR_PERMISSIONS != 0 { perms = Some(read_u32(c)); } // We don't drive any further attributes from the client side // in these tests, so stop here. (size, perms) } /// In-memory filesystem the fake server serves. #[derive(Default)] struct Fs { files: HashMap>, dirs: HashSet, symlinks: HashMap, } impl Fs { fn new_with_root() -> Self { let mut fs = Fs::default(); // Pre-create / and /tmp so transports rooted there work // without an explicit mkdir. fs.dirs.insert("/".into()); fs.dirs.insert("/tmp".into()); fs } fn parent_of(path: &str) -> Option<&str> { let trimmed = path.trim_end_matches('/'); trimmed .rsplit_once('/') .map(|(p, _)| if p.is_empty() { "/" } else { p }) } fn entries_under(&self, dir: &str) -> Vec<(String, bool)> { // Returns (name, is_dir). let prefix = if dir.ends_with('/') { dir.to_string() } else { format!("{}/", dir) }; let mut seen = HashSet::new(); let mut out = Vec::new(); for path in self.files.keys() { if let Some(rest) = path.strip_prefix(&prefix) { if !rest.contains('/') && seen.insert(rest.to_string()) { out.push((rest.to_string(), false)); } } } for path in &self.dirs { if path == dir { continue; } if let Some(rest) = path.strip_prefix(&prefix) { if !rest.contains('/') && seen.insert(rest.to_string()) { out.push((rest.to_string(), true)); } } } for path in self.symlinks.keys() { if let Some(rest) = path.strip_prefix(&prefix) { if !rest.contains('/') && seen.insert(rest.to_string()) { out.push((rest.to_string(), false)); } } } out } } enum Handle { File { path: String, append: bool }, Dir { path: String, drained: bool }, } /// Spawn a fake SFTP server on `stream`. Returns a join handle /// so tests can wait for it to exit (it exits when the client /// disconnects). fn spawn(mut stream: UnixStream) -> thread::JoinHandle<()> { thread::spawn(move || { let mut fs = Fs::new_with_root(); let mut handles: HashMap, Handle> = HashMap::new(); static NEXT_HANDLE: AtomicU32 = AtomicU32::new(1); loop { let (kind, body) = match read_packet(&mut stream) { Ok(v) => v, Err(_) => return, // peer closed }; if kind == SSH_FXP_INIT { // body is the version u32; reply with VERSION 3, // no extensions. INIT has no request-id. let _client_ver = u32::from_be_bytes(body[..4].try_into().unwrap()); let mut reply = Vec::new(); reply.extend_from_slice(&3u32.to_be_bytes()); write_packet(&mut stream, SSH_FXP_VERSION, &reply).unwrap(); continue; } let req_id = u32::from_be_bytes(body[..4].try_into().unwrap()); let mut c = std::io::Cursor::new(&body[4..]); // Strip a trailing slash (except for the root) so // callers can pass `/tmp/` or `/tmp` interchangeably. fn norm(p: String) -> String { if p.len() > 1 && p.ends_with('/') { p.trim_end_matches('/').to_string() } else { p } } let send_status = |stream: &mut UnixStream, code: u32, msg: &str| { let mut r = Vec::new(); r.extend_from_slice(&req_id.to_be_bytes()); r.extend_from_slice(&code.to_be_bytes()); r.extend_from_slice(&(msg.len() as u32).to_be_bytes()); r.extend_from_slice(msg.as_bytes()); r.extend_from_slice(&0u32.to_be_bytes()); // empty lang_tag write_packet(stream, SSH_FXP_STATUS, &r).unwrap(); }; match kind { SSH_FXP_MKDIR => { let path = norm(read_string(&mut c)); let _ = skip_attrs(&mut c); if fs.dirs.contains(&path) || fs.files.contains_key(&path) { send_status(&mut stream, SSH_FX_FILE_ALREADY_EXISTS, "exists"); } else if matches!(Fs::parent_of(&path), Some(p) if !fs.dirs.contains(p)) { send_status(&mut stream, SSH_FX_NO_SUCH_FILE, "no parent"); } else { fs.dirs.insert(path); send_status(&mut stream, SSH_FX_OK, ""); } } SSH_FXP_RMDIR => { let path = norm(read_string(&mut c)); if !fs.dirs.contains(&path) { send_status(&mut stream, SSH_FX_NO_SUCH_FILE, ""); continue; } if !fs.entries_under(&path).is_empty() { send_status(&mut stream, SSH_FX_DIR_NOT_EMPTY, ""); continue; } fs.dirs.remove(&path); send_status(&mut stream, SSH_FX_OK, ""); } SSH_FXP_REMOVE => { let path = norm(read_string(&mut c)); if fs.files.remove(&path).is_some() || fs.symlinks.remove(&path).is_some() { send_status(&mut stream, SSH_FX_OK, ""); } else { send_status(&mut stream, SSH_FX_NO_SUCH_FILE, ""); } } SSH_FXP_RENAME => { let from = norm(read_string(&mut c)); let to = norm(read_string(&mut c)); let _flags = read_u32(&mut c); if let Some(data) = fs.files.remove(&from) { fs.files.insert(to, data); send_status(&mut stream, SSH_FX_OK, ""); } else if fs.dirs.remove(&from) { fs.dirs.insert(to); send_status(&mut stream, SSH_FX_OK, ""); } else { send_status(&mut stream, SSH_FX_NO_SUCH_FILE, ""); } } SSH_FXP_STAT | SSH_FXP_LSTAT => { let path = norm(read_string(&mut c)); let _flags = read_u32(&mut c); if let Some(data) = fs.files.get(&path) { let attrs = encode_attrs(Some(data.len() as u64), Some(0o100644)); let mut r = req_id.to_be_bytes().to_vec(); r.extend_from_slice(&attrs); write_packet(&mut stream, SSH_FXP_ATTRS, &r).unwrap(); } else if fs.dirs.contains(&path) { let attrs = encode_attrs(Some(0), Some(0o040755)); let mut r = req_id.to_be_bytes().to_vec(); r.extend_from_slice(&attrs); write_packet(&mut stream, SSH_FXP_ATTRS, &r).unwrap(); } else if let Some(target) = fs.symlinks.get(&path) { if kind == SSH_FXP_LSTAT { let attrs = encode_attrs(Some(target.len() as u64), Some(0o120777)); let mut r = req_id.to_be_bytes().to_vec(); r.extend_from_slice(&attrs); write_packet(&mut stream, SSH_FXP_ATTRS, &r).unwrap(); } else { // STAT follows: resolve once. let resolved = target.clone(); if let Some(data) = fs.files.get(&resolved) { let attrs = encode_attrs(Some(data.len() as u64), Some(0o100644)); let mut r = req_id.to_be_bytes().to_vec(); r.extend_from_slice(&attrs); write_packet(&mut stream, SSH_FXP_ATTRS, &r).unwrap(); } else { send_status(&mut stream, SSH_FX_NO_SUCH_FILE, ""); } } } else { send_status(&mut stream, SSH_FX_NO_SUCH_FILE, ""); } } SSH_FXP_OPEN => { let path = norm(read_string(&mut c)); let flags = read_u32(&mut c); let _ = skip_attrs(&mut c); let exists = fs.files.contains_key(&path); if flags & SFTP_FLAG_EXCL != 0 && exists { send_status(&mut stream, SSH_FX_FILE_ALREADY_EXISTS, "exists"); continue; } if !exists && flags & SFTP_FLAG_CREAT == 0 { send_status(&mut stream, SSH_FX_NO_SUCH_FILE, ""); continue; } // Verify parent exists when creating. if !exists { if let Some(parent) = Fs::parent_of(&path) { if !fs.dirs.contains(parent) { send_status(&mut stream, SSH_FX_NO_SUCH_FILE, "no parent"); continue; } } fs.files.insert(path.clone(), Vec::new()); } else if flags & SFTP_FLAG_TRUNC != 0 { fs.files.insert(path.clone(), Vec::new()); } let h_id = NEXT_HANDLE.fetch_add(1, Ordering::SeqCst); let h = format!("f{}", h_id).into_bytes(); handles.insert( h.clone(), Handle::File { path, append: flags & SFTP_FLAG_APPEND != 0, }, ); let mut r = req_id.to_be_bytes().to_vec(); r.extend_from_slice(&(h.len() as u32).to_be_bytes()); r.extend_from_slice(&h); write_packet(&mut stream, SSH_FXP_HANDLE, &r).unwrap(); } SSH_FXP_OPENDIR => { let path = norm(read_string(&mut c)); if !fs.dirs.contains(&path) { send_status(&mut stream, SSH_FX_NO_SUCH_FILE, ""); continue; } let h_id = NEXT_HANDLE.fetch_add(1, Ordering::SeqCst); let h = format!("d{}", h_id).into_bytes(); handles.insert( h.clone(), Handle::Dir { path, drained: false, }, ); let mut r = req_id.to_be_bytes().to_vec(); r.extend_from_slice(&(h.len() as u32).to_be_bytes()); r.extend_from_slice(&h); write_packet(&mut stream, SSH_FXP_HANDLE, &r).unwrap(); } SSH_FXP_READDIR => { let h_len = read_u32(&mut c) as usize; let pos = c.position() as usize; let h = c.get_ref()[pos..pos + h_len].to_vec(); let entries = match handles.get_mut(&h) { Some(Handle::Dir { path, drained }) => { if *drained { None } else { *drained = true; Some(fs.entries_under(path)) } } _ => { send_status(&mut stream, SSH_FX_FAILURE, "bad handle"); continue; } }; match entries { None => { send_status(&mut stream, SSH_FX_EOF, ""); } Some(es) => { let mut r = req_id.to_be_bytes().to_vec(); r.extend_from_slice(&(es.len() as u32).to_be_bytes()); for (name, is_dir) in es { r.extend_from_slice(&(name.len() as u32).to_be_bytes()); r.extend_from_slice(name.as_bytes()); // longname = same as name for tests r.extend_from_slice(&(name.len() as u32).to_be_bytes()); r.extend_from_slice(name.as_bytes()); let attrs = if is_dir { encode_attrs(Some(0), Some(0o040755)) } else { encode_attrs( Some( fs.files .get(&format!( "{}/{}", match handles.get(&h).unwrap() { Handle::Dir { path, .. } => path, _ => unreachable!(), }, name )) .map(|v| v.len() as u64) .unwrap_or(0), ), Some(0o100644), ) }; r.extend_from_slice(&attrs); } write_packet(&mut stream, SSH_FXP_NAME, &r).unwrap(); } } } SSH_FXP_CLOSE => { let h_len = read_u32(&mut c) as usize; let pos = c.position() as usize; let h = c.get_ref()[pos..pos + h_len].to_vec(); handles.remove(&h); send_status(&mut stream, SSH_FX_OK, ""); } SSH_FXP_WRITE => { let h_len = read_u32(&mut c) as usize; let pos = c.position() as usize; let h = c.get_ref()[pos..pos + h_len].to_vec(); c.set_position((pos + h_len) as u64); let offset = read_u64(&mut c) as usize; let data_len = read_u32(&mut c) as usize; let pos = c.position() as usize; let data = c.get_ref()[pos..pos + data_len].to_vec(); let path = match handles.get(&h) { Some(Handle::File { path, append }) => { let p = path.clone(); let _ = append; p } _ => { send_status(&mut stream, SSH_FX_FAILURE, "bad handle"); continue; } }; let f = fs.files.entry(path).or_default(); // For append handles the client passes the // current size as the offset, so the same // logic works for both append and pwrite. if f.len() < offset + data_len { f.resize(offset + data_len, 0); } f[offset..offset + data_len].copy_from_slice(&data); send_status(&mut stream, SSH_FX_OK, ""); } SSH_FXP_READ => { let h_len = read_u32(&mut c) as usize; let pos = c.position() as usize; let h = c.get_ref()[pos..pos + h_len].to_vec(); c.set_position((pos + h_len) as u64); let offset = read_u64(&mut c) as usize; let length = read_u32(&mut c) as usize; let path = match handles.get(&h) { Some(Handle::File { path, .. }) => path.clone(), _ => { send_status(&mut stream, SSH_FX_FAILURE, "bad handle"); continue; } }; let data = fs.files.get(&path).cloned().unwrap_or_default(); if offset >= data.len() { send_status(&mut stream, SSH_FX_EOF, ""); } else { let end = (offset + length).min(data.len()); let chunk = &data[offset..end]; let mut r = req_id.to_be_bytes().to_vec(); r.extend_from_slice(&(chunk.len() as u32).to_be_bytes()); r.extend_from_slice(chunk); write_packet(&mut stream, SSH_FXP_DATA, &r).unwrap(); } } SSH_FXP_FSTAT => { let h_len = read_u32(&mut c) as usize; let pos = c.position() as usize; let h = c.get_ref()[pos..pos + h_len].to_vec(); let size = match handles.get(&h) { Some(Handle::File { path, .. }) => { fs.files.get(path).map(|v| v.len() as u64).unwrap_or(0) } _ => { send_status(&mut stream, SSH_FX_FAILURE, "bad handle"); continue; } }; let attrs = encode_attrs(Some(size), Some(0o100644)); let mut r = req_id.to_be_bytes().to_vec(); r.extend_from_slice(&attrs); write_packet(&mut stream, SSH_FXP_ATTRS, &r).unwrap(); } SSH_FXP_READLINK => { let path = norm(read_string(&mut c)); match fs.symlinks.get(&path) { Some(target) => { let mut r = req_id.to_be_bytes().to_vec(); r.extend_from_slice(&1u32.to_be_bytes()); r.extend_from_slice(&(target.len() as u32).to_be_bytes()); r.extend_from_slice(target.as_bytes()); // Empty attrs body — the client only // looks at name[0]. r.extend_from_slice(&0u32.to_be_bytes()); write_packet(&mut stream, SSH_FXP_NAME, &r).unwrap(); } None => send_status(&mut stream, SSH_FX_NO_SUCH_FILE, ""), } } SSH_FXP_SYMLINK => { let path = norm(read_string(&mut c)); let target = read_string(&mut c); // OpenSSH's argument order is swapped from // the spec: the wire body sends (linkpath, // targetpath) but Python paramiko / our // client passes target then link. The sftp // crate does `path` first, `target` second // — match what it sends literally. fs.symlinks.insert(path, target); send_status(&mut stream, SSH_FX_OK, ""); } SSH_FXP_LINK => { // The `sftp` crate sends path=existing, // target=new-link (matching how OpenSSH // serializes SYMLINK/LINK on the wire, // which is opposite to most APIs). let existing = norm(read_string(&mut c)); let new_link = norm(read_string(&mut c)); let mut sl = [0u8; 1]; c.read_exact(&mut sl).unwrap(); let is_symlink = sl[0] != 0; if is_symlink { fs.symlinks.insert(new_link, existing); send_status(&mut stream, SSH_FX_OK, ""); } else if let Some(data) = fs.files.get(&existing).cloned() { // Cheap clone-as-hardlink; tests only // observe content equality. fs.files.insert(new_link, data); send_status(&mut stream, SSH_FX_OK, ""); } else { send_status(&mut stream, SSH_FX_NO_SUCH_FILE, ""); } } _ => { send_status(&mut stream, SSH_FX_FAILURE, "unsupported"); } } } }) } /// Spin up a server, return (transport, server-thread). The /// server exits cleanly when `transport` is dropped. fn server_with_transport(base: &str) -> (SftpTransport, thread::JoinHandle<()>) { let (a, b) = UnixStream::pair().unwrap(); let server = spawn(b); let channel: BoxedChannel = Box::new(a); let transport = SftpTransport::from_channel(base, channel).expect("handshake"); (transport, server) } /// Public-in-crate spawn for cross-module tests. The registry /// test wants to verify a registered SFTP factory talks to a /// real (fake) server end-to-end without re-implementing the /// wire harness. pub(crate) fn spawn_for_registry(stream: UnixStream) -> thread::JoinHandle<()> { spawn(stream) } #[test] fn handshake_succeeds() { let (t, server) = server_with_transport("sftp://test/tmp/"); assert_eq!(t.base().as_str(), "sftp://test/tmp/"); drop(t); server.join().unwrap(); } #[test] fn put_then_get_round_trips() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.put_bytes("hello.txt", b"hello world", None).unwrap(); let bytes = t.get_bytes("hello.txt").unwrap(); assert_eq!(bytes, b"hello world"); drop(t); server.join().unwrap(); } #[test] fn mkdir_and_list_dir() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.mkdir("sub", None).unwrap(); t.put_bytes("sub/a", b"A", None).unwrap(); t.put_bytes("sub/b", b"BB", None).unwrap(); let entries: Vec = t.list_dir("sub").filter_map(|r| r.ok()).collect(); let mut sorted = entries.clone(); sorted.sort(); assert_eq!(sorted, vec!["a".to_string(), "b".to_string()]); drop(t); server.join().unwrap(); } #[test] fn has_returns_true_for_existing_and_false_for_missing() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.put_bytes("present", b"x", None).unwrap(); assert!(t.has("present").unwrap()); assert!(!t.has("missing").unwrap()); drop(t); server.join().unwrap(); } #[test] fn stat_reports_file_size() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.put_bytes("f", b"123456", None).unwrap(); let st = t.stat("f").unwrap(); assert_eq!(st.size, 6); assert!(st.is_file()); drop(t); server.join().unwrap(); } #[test] fn stat_reports_directory_kind() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.mkdir("d", None).unwrap(); let st = t.stat("d").unwrap(); assert!(st.is_dir()); drop(t); server.join().unwrap(); } #[test] fn rename_moves_file() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.put_bytes("a", b"x", None).unwrap(); t.rename("a", "b").unwrap(); assert!(!t.has("a").unwrap()); assert_eq!(t.get_bytes("b").unwrap(), b"x"); drop(t); server.join().unwrap(); } #[test] fn delete_removes_file() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.put_bytes("doomed", b"x", None).unwrap(); t.delete("doomed").unwrap(); assert!(!t.has("doomed").unwrap()); drop(t); server.join().unwrap(); } #[test] fn rmdir_rejects_non_empty() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.mkdir("d", None).unwrap(); t.put_bytes("d/x", b"x", None).unwrap(); let err = t.rmdir("d").unwrap_err(); assert!(matches!(err, Error::DirectoryNotEmptyError(_))); drop(t); server.join().unwrap(); } #[test] fn append_file_extends_and_returns_prior_offset() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.put_bytes("log", b"first\n", None).unwrap(); let mut more = std::io::Cursor::new(b"second\n"); let offset = t.append_file("log", &mut more, None).unwrap(); assert_eq!(offset, 6); assert_eq!(t.get_bytes("log").unwrap(), b"first\nsecond\n"); drop(t); server.join().unwrap(); } #[test] fn open_write_stream_flushes_on_drop() { let (t, server) = server_with_transport("sftp://test/tmp/"); { let mut w = t.open_write_stream("ws", None).unwrap(); w.write_all(b"streamed").unwrap(); } assert_eq!(t.get_bytes("ws").unwrap(), b"streamed"); drop(t); server.join().unwrap(); } #[test] fn missing_file_maps_to_no_such_file() { let (t, server) = server_with_transport("sftp://test/tmp/"); let err = t.get_bytes("nope").unwrap_err(); assert!(matches!(err, Error::NoSuchFile(_))); drop(t); server.join().unwrap(); } #[test] fn mkdir_existing_maps_to_file_exists() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.mkdir("d", None).unwrap(); let err = t.mkdir("d", None).unwrap_err(); assert!(matches!(err, Error::FileExists(_))); drop(t); server.join().unwrap(); } #[test] fn iter_files_recursive_walks_subdirs() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.mkdir("a", None).unwrap(); t.mkdir("a/b", None).unwrap(); t.put_bytes("top", b"x", None).unwrap(); t.put_bytes("a/inside", b"y", None).unwrap(); t.put_bytes("a/b/deep", b"z", None).unwrap(); let mut files: Vec = t.iter_files_recursive().filter_map(|r| r.ok()).collect(); files.sort(); assert_eq!( files, vec![ "a/b/deep".to_string(), "a/inside".to_string(), "top".to_string(), ] ); drop(t); server.join().unwrap(); } #[test] fn hardlink_creates_independent_path_with_same_content() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.put_bytes("orig", b"shared", None).unwrap(); t.hardlink("orig", "alias").unwrap(); assert_eq!(t.get_bytes("alias").unwrap(), b"shared"); assert!(t.has("orig").unwrap()); assert!(t.has("alias").unwrap()); drop(t); server.join().unwrap(); } #[test] fn hardlink_to_missing_target_errors() { let (t, server) = server_with_transport("sftp://test/tmp/"); let err = t.hardlink("nope", "alias").unwrap_err(); assert!(matches!(err, Error::NoSuchFile(_))); drop(t); server.join().unwrap(); } #[test] fn delete_tree_removes_nested_subdirs_and_files() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.mkdir("tree", None).unwrap(); t.mkdir("tree/sub", None).unwrap(); t.put_bytes("tree/a", b"1", None).unwrap(); t.put_bytes("tree/sub/b", b"2", None).unwrap(); t.put_bytes("tree/sub/c", b"3", None).unwrap(); t.delete_tree("tree").unwrap(); assert!(!t.has("tree").unwrap()); assert!(!t.has("tree/a").unwrap()); assert!(!t.has("tree/sub").unwrap()); assert!(!t.has("tree/sub/b").unwrap()); drop(t); server.join().unwrap(); } #[test] fn copy_duplicates_file_content() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.put_bytes("src", b"copy-me", None).unwrap(); t.copy("src", "dst").unwrap(); assert_eq!(t.get_bytes("src").unwrap(), b"copy-me"); assert_eq!(t.get_bytes("dst").unwrap(), b"copy-me"); drop(t); server.join().unwrap(); } #[test] fn clone_with_offset_rebases_relpaths_to_subdir() { let (t, server) = server_with_transport("sftp://test/tmp/"); t.mkdir("nested", None).unwrap(); t.put_bytes("nested/inside", b"data", None).unwrap(); // Clone descends into the subdir; relpaths against the // clone resolve to /tmp/nested/ on the wire. let sub = Transport::clone(&t, Some("nested")).unwrap(); assert_eq!(sub.base().as_str(), "sftp://test/tmp/nested/"); assert_eq!(sub.get_bytes("inside").unwrap(), b"data"); // Confirm it's a real shared session: writes through the // clone are visible to the parent. sub.put_bytes("via_clone", b"x", None).unwrap(); assert_eq!(t.get_bytes("nested/via_clone").unwrap(), b"x"); drop(sub); drop(t); server.join().unwrap(); } #[test] fn put_and_get_round_trip_a_large_file() { // 256 KiB exercises the >64 KiB chunking in `read_full` // and the put_file write loop. Pattern is an incrementing // byte so a wrong-offset bug shows up as a content // mismatch rather than a length mismatch. let (t, server) = server_with_transport("sftp://test/tmp/"); let big: Vec = (0..256 * 1024).map(|i| (i % 251) as u8).collect(); t.put_bytes("big", &big, None).unwrap(); let got = t.get_bytes("big").unwrap(); assert_eq!(got.len(), big.len()); assert!(got == big, "256 KiB round-trip content differs"); drop(t); server.join().unwrap(); } #[test] fn get_streams_lazily_via_seek_and_read() { use std::io::{Read, Seek, SeekFrom}; // Lay down 8 KiB so SeekFrom::End is meaningful. let (t, server) = server_with_transport("sftp://test/tmp/"); let payload: Vec = (0..8192).map(|i| (i % 251) as u8).collect(); t.put_bytes("f", &payload, None).unwrap(); let mut s = t.get("f").unwrap(); // Read first 100 bytes from a fresh stream — confirms the // initial offset is 0 and pread returns the head. let mut head = [0u8; 100]; s.read_exact(&mut head).unwrap(); assert_eq!(&head[..], &payload[..100]); // Seek mid-file and read 50 bytes — confirms Seek/Current // and that Read advances after the seek. let pos = s.seek(SeekFrom::Start(2000)).unwrap(); assert_eq!(pos, 2000); let mut mid = [0u8; 50]; s.read_exact(&mut mid).unwrap(); assert_eq!(&mid[..], &payload[2000..2050]); // Seek from end and read to EOF — confirms the size cache // and that read returns 0 at EOF. let pos = s.seek(SeekFrom::End(-30)).unwrap(); assert_eq!(pos, 8192 - 30); let mut tail = Vec::new(); s.read_to_end(&mut tail).unwrap(); assert_eq!(tail, payload[8162..]); drop(s); drop(t); server.join().unwrap(); } #[test] fn get_seek_before_start_errors() { use std::io::{Seek, SeekFrom}; let (t, server) = server_with_transport("sftp://test/tmp/"); t.put_bytes("f", b"abc", None).unwrap(); let mut s = t.get("f").unwrap(); let err = s.seek(SeekFrom::Current(-5)).unwrap_err(); assert_eq!(err.kind(), std::io::ErrorKind::InvalidInput); drop(s); drop(t); server.join().unwrap(); } #[test] fn set_segment_parameter_is_a_noop() { let (mut t, server) = server_with_transport("sftp://test/tmp/"); // SFTP doesn't consume segment params; setting one must // succeed and leave the parameter map empty. t.set_segment_parameter("foo", Some("bar")).unwrap(); assert!(t.get_segment_parameters().unwrap().is_empty()); drop(t); server.join().unwrap(); } } } dromedary-0.1.5/src/ssh.rs000066400000000000000000000273131520150013200154300ustar00rootroot00000000000000//! Pure-logic pieces of the SSH module — argv construction for subprocess //! vendors and vendor auto-detection, shared by the PyO3 layer in //! `_transport_rs` and unit-testable without Python link symbols. //! //! Spawning, connection wrappers, and library-backed vendors live in //! `_transport_rs/src/ssh/` because they're PyO3-facing. use std::error::Error; use std::ffi::OsStr; use std::fmt; use std::path::Path; use std::process::Command; /// Which subprocess vendor we're building argv for. Collapses the per-vendor /// Python classes into an enum because they differ only in flag syntax. #[derive(Clone, Copy, Debug, PartialEq, Eq)] pub enum Flavor { OpenSSH, Lsh, PLink, } impl Flavor { pub fn executable(self) -> &'static str { match self { Flavor::OpenSSH => "ssh", Flavor::Lsh => "lsh", Flavor::PLink => "plink", } } } /// Reasons `build_argv` may reject its input. Callers map these to the /// appropriate Python exceptions (`StrangeHostname`, etc.). #[derive(Debug)] pub enum ArgvError { /// Hostname starts with `-`; would be interpreted as a flag by the ssh /// binary. Matches Python `StrangeHostname`. StrangeHostname(String), /// Neither (or both of) `subsystem` and `command` were provided. InvalidArguments, } impl fmt::Display for ArgvError { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { ArgvError::StrangeHostname(h) => { write!(f, "Refusing to connect to strange SSH hostname {h}") } ArgvError::InvalidArguments => { write!(f, "exactly one of subsystem or command must be provided") } } } } impl Error for ArgvError {} /// Build the full argv for a subprocess vendor. /// /// Exactly one of `subsystem` and `command` must be `Some`. `executable` /// overrides the default binary name (used when `BRZ_SSH=/path/to/ssh` /// selects a vendor via auto-detection). The produced argv matches the /// Python vendor classes in `dromedary/ssh/__init__.py` byte-for-byte so /// behavioral tests that compare argv stay stable across the port. pub fn build_argv( flavor: Flavor, executable: Option<&str>, username: Option<&str>, host: &str, port: Option, subsystem: Option<&str>, command: Option<&[String]>, ) -> Result, ArgvError> { // Match Python exactly: OpenSSH doesn't call _check_hostname, the others // do. The `--` separator before host in the OpenSSH argv makes this safe // for OpenSSH specifically. // TODO: Python OpenSSH vendor likely should also reject leading-dash // hostnames defensively — raise upstream before changing behavior. if !matches!(flavor, Flavor::OpenSSH) && host.starts_with('-') { return Err(ArgvError::StrangeHostname(host.to_string())); } let mut args: Vec = Vec::new(); args.push(executable.unwrap_or(flavor.executable()).to_string()); match flavor { Flavor::OpenSSH => { args.extend( [ "-oForwardX11=no", "-oForwardAgent=no", "-oClearAllForwardings=yes", "-oNoHostAuthenticationForLocalhost=yes", ] .iter() .map(|s| s.to_string()), ); } Flavor::Lsh => {} Flavor::PLink => { args.extend( ["-x", "-a", "-ssh", "-2", "-batch"] .iter() .map(|s| s.to_string()), ); } } let port_flag = match flavor { Flavor::PLink => "-P", _ => "-p", }; if let Some(p) = port { args.push(port_flag.to_string()); args.push(p.to_string()); } if let Some(u) = username { args.push("-l".to_string()); args.push(u.to_string()); } match (subsystem, command) { (Some(sub), None) => match flavor { Flavor::OpenSSH => { args.extend(["-s", "--", host, sub].iter().map(|s| s.to_string())); } Flavor::Lsh => { args.extend(["--subsystem", sub, host].iter().map(|s| s.to_string())); } Flavor::PLink => { args.extend(["-s", host, sub].iter().map(|s| s.to_string())); } }, (None, Some(cmd)) => { if matches!(flavor, Flavor::OpenSSH) { args.push("--".to_string()); } args.push(host.to_string()); args.extend(cmd.iter().cloned()); } _ => return Err(ArgvError::InvalidArguments), } Ok(args) } /// Classify an `ssh -V` version string into the registry key of the matching /// vendor. `progname` is the basename (no extension) of the binary that was /// run — it's only consulted for plink because Windows `ssh -V` sometimes /// reports "plink" in its version output (launchpad bug 107155); we only /// accept it when plink was actually the binary. /// /// Returns `None` if the version doesn't match any known implementation. pub fn classify_ssh_version(version: &str, progname: &str) -> Option<&'static str> { if version.contains("OpenSSH") { Some("openssh") } else if version.contains("lsh") { Some("lsh") } else if version.contains("plink") && progname == "plink" { // plink prompts aren't wired up, so we don't auto-detect it from // inspection — require the user to name `plink` explicitly via // BRZ_SSH=plink. See https://bugs.launchpad.net/bugs/414743. Some("plink") } else { None } } /// Run `executable -V` and classify the output. Returns the vendor registry /// key, or `None` if the binary can't be run or produces an unrecognized /// version. `progname` is derived from the executable's file stem. /// /// Combines stdout+stderr to match paramiko/OpenSSH behavior where the /// version lands on stderr. Decodes as UTF-8 lossy — we only look for ASCII /// substrings, so encoding mismatches on non-UTF-8 locales don't matter. pub fn detect_ssh_vendor(executable: &OsStr) -> Option<&'static str> { let progname = Path::new(executable) .file_stem() .and_then(|s| s.to_str()) .unwrap_or(""); let output = Command::new(executable).arg("-V").output().ok()?; let mut combined = output.stdout; combined.extend_from_slice(&output.stderr); let version = String::from_utf8_lossy(&combined); classify_ssh_version(&version, progname) } #[cfg(test)] mod tests { use super::*; fn s(v: &[&str]) -> Vec { v.iter().map(|x| x.to_string()).collect() } #[test] fn openssh_sftp_argv() { let argv = build_argv( Flavor::OpenSSH, None, Some("alice"), "example.com", Some(2222), Some("sftp"), None, ) .unwrap(); assert_eq!( argv, s(&[ "ssh", "-oForwardX11=no", "-oForwardAgent=no", "-oClearAllForwardings=yes", "-oNoHostAuthenticationForLocalhost=yes", "-p", "2222", "-l", "alice", "-s", "--", "example.com", "sftp", ]) ); } #[test] fn openssh_command_argv() { let cmd = s(&["bzr", "serve", "--inet"]); let argv = build_argv( Flavor::OpenSSH, None, None, "example.com", None, None, Some(&cmd), ) .unwrap(); assert_eq!( argv, s(&[ "ssh", "-oForwardX11=no", "-oForwardAgent=no", "-oClearAllForwardings=yes", "-oNoHostAuthenticationForLocalhost=yes", "--", "example.com", "bzr", "serve", "--inet", ]) ); } #[test] fn lsh_sftp_argv() { let argv = build_argv( Flavor::Lsh, None, Some("bob"), "example.com", Some(22), Some("sftp"), None, ) .unwrap(); assert_eq!( argv, s(&[ "lsh", "-p", "22", "-l", "bob", "--subsystem", "sftp", "example.com", ]) ); } #[test] fn plink_sftp_argv() { let argv = build_argv( Flavor::PLink, None, Some("carol"), "example.com", Some(22), Some("sftp"), None, ) .unwrap(); assert_eq!( argv, s(&[ "plink", "-x", "-a", "-ssh", "-2", "-batch", "-P", "22", "-l", "carol", "-s", "example.com", "sftp", ]) ); } #[test] fn strange_hostname_rejected_for_non_openssh() { let err = build_argv(Flavor::Lsh, None, None, "-evil", None, Some("sftp"), None); assert!(matches!(err, Err(ArgvError::StrangeHostname(_)))); } #[test] fn openssh_does_not_check_hostname() { // Matches Python: OpenSSH vendor never called _check_hostname. The // -- separator before host makes this safe for OpenSSH. let argv = build_argv( Flavor::OpenSSH, None, None, "-evil", None, Some("sftp"), None, ) .unwrap(); assert!(argv.contains(&"-evil".to_string())); assert!(argv.contains(&"--".to_string())); } #[test] fn missing_both_subsystem_and_command_errors() { let err = build_argv(Flavor::OpenSSH, None, None, "h", None, None, None); assert!(matches!(err, Err(ArgvError::InvalidArguments))); } #[test] fn classify_openssh_version() { assert_eq!( classify_ssh_version("OpenSSH_9.6p1 Ubuntu-3ubuntu13.5", "ssh"), Some("openssh") ); } #[test] fn classify_lsh_version() { assert_eq!(classify_ssh_version("lsh-2.1", "lsh"), Some("lsh")); } #[test] fn classify_plink_requires_plink_progname() { // Windows sometimes reports "plink" in `ssh -V` output even when ssh // is actually OpenSSH (launchpad bug 107155), so the progname must // also be plink before we claim it. assert_eq!(classify_ssh_version("plink 0.80", "plink"), Some("plink")); assert_eq!(classify_ssh_version("plink 0.80", "ssh"), None); } #[test] fn classify_unknown_returns_none() { assert_eq!(classify_ssh_version("Dropbear v2022.83", "ssh"), None); assert_eq!(classify_ssh_version("", "ssh"), None); } #[test] fn classify_ssh_corp_no_longer_matches() { // SSH Corporation's "SSH Secure Shell" used to map to its own vendor; // now that SSHCorp is gone, the string falls through to None. assert_eq!(classify_ssh_version("SSH Secure Shell 3.2", "ssh"), None); } #[test] fn executable_override_replaces_default() { let argv = build_argv( Flavor::OpenSSH, Some("/usr/local/bin/my-ssh"), None, "example.com", None, Some("sftp"), None, ) .unwrap(); assert_eq!(argv[0], "/usr/local/bin/my-ssh"); } } dromedary-0.1.5/src/unlistable.rs000066400000000000000000000102131520150013200167640ustar00rootroot00000000000000//! Unlistable Transport decorator, ported from dromedary/unlistable.py. //! //! A transport that disables directory listing, to simulate HTTP cheaply //! in tests. `listable()` returns false; `list_dir` and //! `iter_files_recursive` both yield a single TransportNotPossible error. use crate::{Error, Result, Transport, UrlFragment}; use url::Url; pub struct UnlistableTransport { inner: Box, } impl UnlistableTransport { pub const PREFIX: &'static str = "unlistable+"; pub fn new(inner: Box) -> Self { Self { inner } } } impl std::fmt::Debug for UnlistableTransport { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "UnlistableTransport({})", self.base()) } } impl Transport for UnlistableTransport { crate::fwd_external_url!(inner); crate::fwd_can_roundtrip_unix_modebits!(inner); crate::fwd_is_readonly!(inner); crate::fwd_get!(inner); crate::fwd_has!(inner); crate::fwd_stat!(inner); crate::fwd_decorator_url!(inner, UnlistableTransport); crate::fwd_put_file!(inner); crate::fwd_put_bytes!(inner); crate::fwd_put_file_non_atomic!(inner); crate::fwd_put_bytes_non_atomic!(inner); crate::fwd_mkdir!(inner); crate::fwd_delete!(inner); crate::fwd_rmdir!(inner); crate::fwd_rename!(inner); crate::fwd_set_segment_parameter!(inner); crate::fwd_get_segment_parameters!(inner); crate::fwd_append_file!(inner); crate::fwd_readlink!(inner); crate::fwd_hardlink!(inner); crate::fwd_symlink!(inner); crate::fwd_open_write_stream!(inner); crate::fwd_delete_tree!(inner); crate::fwd_move!(inner); crate::fwd_lock_read!(inner); crate::fwd_lock_write!(inner); crate::fwd_local_abspath!(inner); crate::fwd_copy!(inner); fn base(&self) -> Url { crate::decorator::prefixed_base(Self::PREFIX, self.inner.as_ref()) } fn listable(&self) -> bool { false } fn list_dir(&self, _relpath: &UrlFragment) -> Box>> { Box::new(std::iter::once(Err(Error::TransportNotPossible(None)))) } fn iter_files_recursive(&self) -> Box>> { Box::new(std::iter::once(Err(Error::TransportNotPossible(None)))) } } #[cfg(test)] mod tests { use super::*; use crate::memory::MemoryTransport; fn wrap() -> UnlistableTransport { let mem = MemoryTransport::new("memory:///").unwrap(); mem.put_bytes("a", b"1", None).unwrap(); mem.put_bytes("b", b"2", None).unwrap(); UnlistableTransport::new(Box::new(mem)) } #[test] fn base_prefix() { assert!(wrap().base().as_str().starts_with("unlistable+")); } #[test] fn listable_returns_false() { assert_eq!(wrap().listable(), false); } #[test] fn list_dir_yields_not_possible() { let t = wrap(); let results: Vec> = t.list_dir(".").collect(); assert_eq!(results.len(), 1); match &results[0] { Err(Error::TransportNotPossible(None)) => {} other => panic!("expected TransportNotPossible, got {:?}", other), } } #[test] fn iter_files_recursive_yields_not_possible() { let t = wrap(); let results: Vec> = t.iter_files_recursive().collect(); assert_eq!(results.len(), 1); match &results[0] { Err(Error::TransportNotPossible(None)) => {} other => panic!("expected TransportNotPossible, got {:?}", other), } } #[test] fn reads_pass_through() { let t = wrap(); assert_eq!(t.get_bytes("a").unwrap(), b"1"); } #[test] fn abspath_carries_prefix() { let t = wrap(); assert_eq!( t.abspath("relpath").unwrap().as_str(), "unlistable+memory:///relpath" ); } #[test] fn clone_keeps_unlistable_wrapping() { let t = wrap(); let cloned = t.clone(Some("sub")).unwrap(); assert!(cloned.base().as_str().starts_with("unlistable+")); assert_eq!(cloned.listable(), false); } } dromedary-0.1.5/src/urlutils.rs000066400000000000000000001035161520150013200165160ustar00rootroot00000000000000use lazy_static::lazy_static; use regex::Regex; use std::collections::HashMap; use std::path::{Path, PathBuf}; lazy_static! { static ref URL_SCHEME_RE: Regex = Regex::new(r"^(?P[^:/]{2,}):(//)?(?P.*)$").unwrap(); static ref URL_HEX_ESCAPES_RE: Regex = Regex::new(r"(%[0-9a-fA-F]{2})").unwrap(); } #[derive(Debug)] pub enum Error { AboveRoot(String, String), SubsegmentMissesEquals(String), UnsafeCharacters(char), IoError(std::io::Error), SegmentParameterKeyContainsEquals(String, String), SegmentParameterContainsComma(String, Vec), NotLocalUrl(String), InvalidUNCUrl(String), UrlNotAscii(String), InvalidWin32LocalUrl(String), InvalidWin32Path(String), UrlTooShort(String), PathNotChild(String, String), InvalidUrlPort(String, String), } type Result = std::result::Result; /// Split a URL into its parent directory and a child directory. /// /// Args: /// url: A relative or absolute URL /// exclude_trailing_slash: Strip off a final '/' if it is part /// of the path (but not if it is part of the protocol specification) /// /// Returns: (parent_url, child_dir). child_dir may be the empty string if /// we're at the root. pub fn split(url: &str, exclude_trailing_slash: bool) -> (String, String) { let (scheme_loc, first_path_slash) = find_scheme_and_separator(url); if first_path_slash.is_none() { // We have either a relative path, or no separating slash if scheme_loc.is_none() { // Relative path let mut url = url; if exclude_trailing_slash && url.ends_with('/') { url = &url[..url.len() - 1]; } let split = url.rsplit_once('/').map(|(head, tail)| { if head.is_empty() { ("/", tail) } else { (head, tail) } }); match split { None => return (String::new(), url.to_string()), Some((head, tail)) => return (head.to_string(), tail.to_string()), } } else { // Scheme with no path return (url.to_string(), String::new()); } } // We have a fully defined path let url_base = &url[..first_path_slash.unwrap()]; // http://host, file:// let mut path = &url[first_path_slash.unwrap()..]; // /file/foo // TODO(windows): the original breezy code rebinds `url_base`/`path` here // via `_extract_drive_letter` so that `file:///C:/foo` splits as // `file:///C:` + `/foo` rather than `file://` + `/C:/foo`. The direct port // below shadowed but never used the rebinding, so on Windows `split` for // drive-letter URLs is currently wrong. See `win32::extract_drive_letter`. #[cfg(target_os = "windows")] { // Reference the symbol so the cfg-gated branch compiles; no-op for now. let _ = win32::extract_drive_letter; } if exclude_trailing_slash && path.len() > 1 && path.ends_with('/') { path = &path[..path.len() - 1]; } let split = path.rsplit_once('/').map(|(head, tail)| { if head.is_empty() { ("/", tail) } else { (head, tail) } }); match split { None => (url_base.to_string(), path.to_string()), Some((head, tail)) => (url_base.to_string() + head, tail.to_string()), } } /// Find the scheme separator (://) and the first path separator /// /// This is just a helper functions for other path utilities. /// It could probably be replaced by urlparse pub fn find_scheme_and_separator(url: &str) -> (Option, Option) { if let Some(m) = URL_SCHEME_RE.captures(url) { let scheme = m.name("scheme").unwrap().as_str(); let path = m.name("path").unwrap().as_str(); // Find the path separating slash // (first slash after the ://) if let Some(first_path_slash) = path.find('/') { ( Some(scheme.len()), Some(first_path_slash + m.name("path").unwrap().start()), ) } else { (Some(scheme.len()), None) } } else { (None, None) } } pub fn is_url(url: &str) -> bool { // Tests whether a URL is in actual fact a URL. URL_SCHEME_RE.is_match(url) } /// Strip trailing slash, except for root paths. /// /// The definition of 'root path' is platform-dependent. /// This assumes that all URLs are valid netloc urls, such that they /// form: /// scheme://host/path /// It searches for ://, and then refuses to remove the next '/'. /// It can also handle relative paths /// Examples: /// path/to/foo => path/to/foo /// path/to/foo/ => path/to/foo /// http://host/path/ => http://host/path /// http://host/path => http://host/path /// http://host/ => http://host/ /// file:/// => file:/// /// file:///foo/ => file:///foo /// # This is unique on win32 platforms, and is the only URL /// # format which does it differently. /// file:///c|/ => file:///c:/ pub fn strip_trailing_slash(url: &str) -> &str { if !url.ends_with('/') { // Nothing to do return url; } // TODO(windows): `win32::strip_local_trailing_slash` returns `String`, // so we can't return it from this `&str`-returning function directly. // The Python original returned a new string here; porting that requires // changing the signature to `Cow<'_, str>`. For now Windows callers get // the same generic handling as Unix, which is incorrect for drive-letter // file:/// URLs but compiles. let (scheme_loc, first_path_slash) = find_scheme_and_separator(url); if scheme_loc.is_none() { // This is a relative path, as it has no scheme // so just chop off the last character &url[..url.len() - 1] } else if first_path_slash.is_none() || first_path_slash.unwrap() == url.len() - 1 { // Don't chop off anything if the only slash is the path // separating slash url } else { &url[..url.len() - 1] } } /// Join URL path segments to a URL path segment. /// /// This is somewhat like osutils.joinpath, but intended for URLs. /// /// XXX: this duplicates some normalisation logic, and also duplicates a lot of /// path handling logic that already exists in some Transport implementations. /// We really should try to have exactly one place in the code base responsible /// for combining paths of URLs. pub fn joinpath(base: &str, args: &[&str]) -> Result { let mut path = base.split('/').collect::>(); if path.len() > 1 && path[path.len() - 1].is_empty() { // If the path ends in a trailing /, remove it. path.pop(); } for arg in args { if arg.starts_with('/') { path = vec![]; } for chunk in arg.split('/') { if chunk == "." { continue; } else if chunk == ".." { if path == [""] { return Err(Error::AboveRoot(base.to_string(), args.join("/"))); } path.pop(); } else { path.push(chunk); } } } Ok(if path == [""] { "/".to_string() } else { path.join("/") }) } /// Return the last component of a URL. /// /// Args: /// url The URL in question /// exclude_trailing_slash: If the url looks like "path/to/foo/", /// ignore the final slash and return 'foo' rather than '' /// Returns: /// Just the final component of the URL. This can return '' /// if you don't exclude_trailing_slash, or if you are at the /// root of the URL. pub fn basename(url: &str, exclude_trailing_slash: bool) -> String { split(url, exclude_trailing_slash).1 } /// Return the parent directory of the given path. /// /// Args: /// url: Relative or absolute URL /// exclude_trailing_slash: Remove a final slash (treat http://host/foo/ as http://host/foo, but /// http://host/ stays http://host/) /// /// Returns: Everything in the URL except the last path chunk // jam 20060502: This was named dirname to be consistent // with the os functions, but maybe "parent" would be better pub fn dirname(url: &str, exclude_trailing_slash: bool) -> String { split(url, exclude_trailing_slash).0 } /// Create a URL by joining sections. /// /// This will normalize '..', assuming that paths are absolute /// (it assumes no symlinks in either path) /// /// If any of *args is an absolute URL, it will be treated correctly. /// Example: /// join('http://foo', 'http://bar') => 'http://bar' /// join('http://foo', 'bar') => 'http://foo/bar' /// join('http://foo', 'bar', '../baz') => 'http://foo/baz' pub fn join<'a>(mut base: &'a str, args: &[&'a str]) -> Result { if args.is_empty() { return Ok(base.to_string()); } let (scheme_end, path_start) = find_scheme_and_separator(base); let mut path_start = if scheme_end.is_none() && path_start.is_none() { 0 } else if path_start.is_none() { base.len() } else { path_start.unwrap() }; let mut path = base[path_start..].to_string(); for arg in args { let (arg_scheme_end, arg_path_start) = find_scheme_and_separator(arg); let arg_path_start = if arg_scheme_end.is_none() && arg_path_start.is_none() { 0 } else if arg_path_start.is_none() { arg.len() } else { arg_path_start.unwrap() }; if arg_scheme_end.is_some() { base = arg; path = arg[arg_path_start..].to_string(); path_start = arg_path_start; } else { path = joinpath(path.as_str(), vec![*arg].as_slice())?; } } Ok(base[..path_start].to_string() + &path) } /// Split the subsegment of the last segment of a URL. /// ///Args: /// url: A relative or absolute URL ///Returns: (url, subsegments) pub fn split_segment_parameters_raw(url: &str) -> (&str, Vec<&str>) { // GZ 2011-11-18: Dodgy removing the terminal slash like this, function // operates on urls not url+segments, and Transport classes // should not be blindly adding slashes in the first place. let lurl = strip_trailing_slash(url); let segment_start = lurl.rfind('/').map_or_else(|| 0, |i| i + 1); if !lurl[segment_start..].contains(',') { return (url, vec![]); } let mut iter = lurl[segment_start..].split(','); let first = iter.next().unwrap(); ( &lurl[..segment_start + first.len()], iter.map(|s| s.trim()).collect(), ) } /// Split the segment parameters of the last segment of a URL. /// /// Args: /// url: A relative or absolute URL /// Returns: (url, segment_parameters) pub fn split_segment_parameters( url: &str, ) -> Result<(&str, std::collections::HashMap<&str, &str>)> { let (base_url, subsegments) = split_segment_parameters_raw(url); let parameters = subsegments .iter() .map(|subsegment| { subsegment .split_once('=') .ok_or_else(|| Error::SubsegmentMissesEquals(subsegment.to_string())) .map(|(key, value)| (key.trim(), value.trim())) }) .collect::>>()?; Ok((base_url, parameters)) } /// Strip the segment parameters from a URL. /// /// Args: /// url: A relative or absolute URL /// Returns: url pub fn strip_segment_parameters(url: &str) -> &str { split_segment_parameters_raw(url).0 } /// Create a new URL by adding subsegments to an existing one. /// /// This adds the specified subsegments to the last path in the specified /// base URL. The subsegments should be bytestrings. /// /// Note: You probably want to use join_segment_parameters instead. pub fn join_segment_parameters_raw(base: &str, subsegments: &[&str]) -> Result { if subsegments.is_empty() { return Ok(base.to_string()); } for subsegment in subsegments { if subsegment.contains(',') { return Err(Error::SegmentParameterContainsComma( base.to_string(), subsegments.iter().map(|s| s.to_string()).collect(), )); } } Ok(format!("{},{}", base, subsegments.join(","))) } /// Create a new URL by adding segment parameters to an existing one. /// /// The parameters of the last segment in the URL will be updated; if a /// parameter with the same key already exists it will be overwritten. /// /// Args: /// url: A URL, as string /// parameters: Dictionary of parameters, keys and values as bytestrings pub fn join_segment_parameters(url: &str, parameters: &HashMap<&str, &str>) -> Result { let (base, existing_parameters) = split_segment_parameters(url)?; let mut new_parameters = existing_parameters.clone(); for (key, value) in parameters { if key.contains('=') { return Err(Error::SegmentParameterKeyContainsEquals( url.to_string(), key.to_string(), )); } new_parameters.insert(key, value); } let mut items: Vec<_> = new_parameters.iter().collect(); items.sort_by(|a, b| a.0.cmp(b.0)); let sorted_parameters: Vec<_> = items .iter() .map(|(key, value)| format!("{}={}", key, value)) .collect(); join_segment_parameters_raw( base, &sorted_parameters .iter() .map(|s| s.as_str()) .collect::>(), ) } /// Return a path to other from base. /// /// If other is unrelated to base, return other. Else return a relative path. /// This assumes no symlinks as part of the url. pub fn relative_url(base: &str, other: &str) -> String { let (_, base_first_slash) = find_scheme_and_separator(base); if base_first_slash.is_none() { return other.to_string(); } let (_, other_first_slash) = find_scheme_and_separator(other); if other_first_slash.is_none() { return other.to_string(); } // this takes care of differing schemes or hosts let base_scheme = &base[..base_first_slash.unwrap()]; let other_scheme = &other[..other_first_slash.unwrap()]; if base_scheme != other_scheme { return other.to_string(); } #[cfg(target_os = "windows")] if base_scheme == "file://" { let base_drive = &base[base_first_slash.unwrap() + 1..base_first_slash.unwrap() + 3]; let other_drive = &other[other_first_slash.unwrap() + 1..other_first_slash.unwrap() + 3]; if base_drive != other_drive { return other.to_string(); } } let mut base_path = &base[base_first_slash.unwrap() + 1..]; let other_path = &other[other_first_slash.unwrap() + 1..]; if base_path.ends_with('/') { base_path = &base_path[..base_path.len() - 1]; } let mut base_sections: Vec<_> = base_path.split('/').collect(); let mut other_sections: Vec<_> = other_path.split('/').collect(); if base_sections == [""] { base_sections = Vec::new(); } if other_sections == [""] { other_sections = Vec::new(); } let mut output_sections = Vec::new(); for (b, o) in base_sections.iter().zip(other_sections.iter()) { if b != o { break; } output_sections.push(b); } let match_len = output_sections.len(); let mut output_sections: Vec<_> = base_sections[match_len..].iter().map(|_x| "..").collect(); output_sections.extend_from_slice(&other_sections[match_len..]); let ret = output_sections.join("/"); if ret.is_empty() { ".".to_string() } else { ret } } fn char_is_safe(c: char) -> bool { c.is_ascii_alphanumeric() || c == '-' || c == '_' || c == '.' || c == '~' } /// Unquote percent-escapes of RFC 3986 unreserved characters /// (`A-Z a-z 0-9 - . _ ~`). Other percent-escapes are preserved. /// Callers use this to canonicalise URL paths that have been /// needlessly over-escaped (`%7E` → `~`). pub fn unquote_unreserved(path: &str) -> String { URL_HEX_ESCAPES_RE .replace_all(path, unescape_safe_chars) .into_owned() } fn unescape_safe_chars(captures: ®ex::Captures) -> String { let hex_digits = &captures[0][1..]; let char_code = u8::from_str_radix(hex_digits, 16).unwrap(); let character = char::from(char_code); if char_is_safe(character) { character.to_string() } else { captures[0].to_uppercase() } } /// Transform a Transport-relative path to a remote absolute path. /// /// This does not handle substitution of ~ but does handle '..' and '.' /// components. /// /// # Examples /// /// use dromedary::urlutils::combine_paths; /// assert_eq!("/home/sarah/project/foo", combine_paths("/home/sarah", "project/foo")); /// assert_eq!("/etc", combine_paths("/home/sarah", "../../etc")); /// assert_eq!("/etc", combine_paths("/home/sarah", "/etc")); /// /// # Arguments /// /// * `base_path` - base path /// * `relpath` - relative path /// /// # Returns /// /// urlencoded string for final path. pub fn combine_paths(base_path: &str, relpath: &str) -> String { let relpath = URL_HEX_ESCAPES_RE .replace_all(relpath, unescape_safe_chars) .to_string(); let mut base_parts: Vec<&str> = if relpath.starts_with('/') { vec![] } else { base_path.split('/').collect() }; if base_parts.last() == Some(&"") { base_parts.pop(); } for p in relpath.split('/') { match p { ".." => { if let Some(last) = base_parts.last() { if !last.is_empty() { base_parts.pop(); } } } "." | "" => (), _ => base_parts.push(p), } } let mut path = base_parts.join("/"); if !path.starts_with('/') { path.insert(0, '/'); } path } /// Make sure that a path string is in fully normalized URL form. /// /// This handles URLs which have unicode characters, spaces, /// special characters, etc. /// /// It has two basic modes of operation, depending on whether the /// supplied string starts with a url specifier (scheme://) or not. /// If it does not have a specifier it is considered a local path, /// and will be converted into a file:/// url. Non-ascii characters /// will be encoded using utf-8. /// If it does have a url specifier, it will be treated as a "hybrid" /// URL. Basically, a URL that should have URL special characters already /// escaped (like +?&# etc), but may have unicode characters, etc /// which would not be valid in a real URL. /// /// Args: /// url: Either a hybrid URL or a local path /// Returns: A normalized URL which only includes 7-bit ASCII characters. pub fn normalize_url(url: &str) -> Result { let (scheme_end, path_start) = find_scheme_and_separator(url); if scheme_end.is_none() { local_path_to_url(url).map_err(Error::IoError) } else { let prefix = &url[..path_start.unwrap()]; let path = &url[path_start.unwrap()..]; // These characters should not be escaped const URL_SAFE_CHARACTERS: &[u8] = b"_.-!~*'()/;?:@&=+$,%#"; let path = path .as_bytes() .iter() .map(|c| { if !c.is_ascii_alphanumeric() && !URL_SAFE_CHARACTERS.contains(c) { format!("%{:02X}", c) } else { (*c as char).to_string() } }) .collect::(); let path = URL_HEX_ESCAPES_RE.replace_all(path.as_str(), unescape_safe_chars); Ok(prefix.to_string() + path.as_ref()) } } pub fn escape(relpath: &[u8], safe: Option<&str>) -> String { let mut result = String::new(); let safe = safe.unwrap_or("/~").as_bytes(); for b in relpath { if char_is_safe(char::from(*b)) || safe.contains(b) { result.push(char::from(*b)); } else { result.push_str(&format!("%{:02X}", *b)); } } result } pub fn unescape(url: &str) -> Result { use percent_encoding::percent_decode_str; if !url.is_ascii() { return Err(Error::UrlNotAscii(url.to_string())); } Ok(percent_decode_str(url) .decode_utf8() .map(|s| s.to_string()) .unwrap_or_else(|_| url.to_string())) } pub mod win32 { use std::path::{Path, PathBuf}; /// Convert a local path like ./foo into a URL like file:///C:/path/to/foo /// /// This also handles transforming escaping unicode characters, etc. pub fn local_path_to_url>(path: P) -> std::io::Result { if path.as_ref().as_os_str() == "/" { return Ok("file:///".to_string()); } // UNC paths (`\\HOST\path` or `//HOST/path`) are already absolute; // running them through `win32::abspath` on Windows would join with // cwd and mangle them into a drive-letter path because Rust's // `Path::is_absolute` doesn't recognise the forward-slash form. let raw = path.as_ref().to_str().ok_or_else(|| { std::io::Error::new(std::io::ErrorKind::InvalidInput, "non-utf8 path") })?; let unc_input = raw.starts_with("//") || raw.starts_with(r"\\"); let win32_path_owned; let win32_path: &str = if unc_input { // Normalise separators but skip cwd-join. win32_path_owned = raw.replace('\\', "/"); &win32_path_owned } else { win32_path_owned = crate::osutils::path::win32::abspath(path.as_ref())? .as_path() .to_str() .unwrap() .to_owned(); &win32_path_owned }; // `path_clean` can leave a trailing slash on UNC paths (e.g. // `//HOST/path/`); the caller passed a path-like input, so strip // the trailing slash that wasn't in the input. Keep the URL root // case (just `//`) intact. let trimmed = if win32_path.len() > 2 && win32_path.ends_with('/') { &win32_path[..win32_path.len() - 1] } else { win32_path }; if trimmed.starts_with("//") { Ok(format!( "file:{}", super::escape(trimmed.as_bytes(), Some("/~")) )) } else { let drive = trimmed.chars().next().unwrap().to_ascii_uppercase(); Ok(format!( "file:///{}:{}", drive, super::escape(trimmed[2..].as_bytes(), Some("/~")) )) } } /// Convert a url like file:///C:/path/to/foo into C:/path/to/foo pub fn local_path_from_url(url: &str) -> super::Result { if !url.starts_with("file://") { return Err(super::Error::NotLocalUrl(url.to_string())); } let url = super::strip_segment_parameters(url); let win32_url = &url[5..]; if !win32_url.starts_with("///") { if win32_url.len() < 3 || win32_url.chars().nth(2).unwrap() == '/' || "|:".contains(win32_url.chars().nth(3).unwrap()) { return Err(super::Error::InvalidUNCUrl(url.to_string())); } return Ok(super::unescape(win32_url)?.into()); } // Allow empty paths so we can serve all roots if win32_url == "///" { return Ok(PathBuf::from("/")); } // Usual local path with drive letter if win32_url.len() < 6 || !("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ".contains(&win32_url[3..=3])) || !("|:".contains(win32_url.chars().nth(4).unwrap())) || win32_url.chars().nth(5) != Some('/') { return Err(super::Error::InvalidWin32LocalUrl(url.to_string())); } Ok(PathBuf::from(format!( "{}:{}", win32_url[3..=3].to_uppercase(), super::unescape(&win32_url[5..])? ))) } const WIN32_MIN_ABS_FILEURL_LENGTH: usize = "file:///C:/".len(); pub fn extract_drive_letter(url_base: &str, path: &str) -> super::Result<(String, String)> { if path.len() < 4 || !":|".contains(path.chars().nth(2).unwrap()) || path.chars().nth(3).unwrap() != '/' { return Err(super::Error::InvalidWin32Path(path.to_owned())); } let url_base = url_base.to_owned() + &path[0..3]; let path = &path[3..]; Ok((url_base, path.to_owned())) } pub fn strip_local_trailing_slash(url: &str) -> String { if url.len() > WIN32_MIN_ABS_FILEURL_LENGTH { url[..url.len() - 1].to_owned() } else { url.to_owned() } } } pub mod posix { use std::path::{Path, PathBuf}; /// Convert a local path like ./foo into a URL like file:///path/to/foo /// /// This also handles transforming escaping unicode characters, etc. pub fn local_path_to_url>(path: P) -> std::io::Result { let abs_path = crate::osutils::path::posix::abspath(path.as_ref())?; let escaped_path = super::escape( abs_path.as_path().as_os_str().as_encoded_bytes(), Some("/~"), ); Ok(format!("file://{}", escaped_path)) } const FILE_LOCALHOST_PREFIX: &str = "file://localhost"; const PLAIN_FILE_PREFIX: &str = "file:///"; pub fn local_path_from_url(url: &str) -> std::result::Result { let url = super::strip_segment_parameters(url); let path = if let Some(suffix) = url.strip_prefix(FILE_LOCALHOST_PREFIX) { suffix } else if url.starts_with(PLAIN_FILE_PREFIX) { &url[PLAIN_FILE_PREFIX.len() - 1..] } else { return Err(super::Error::NotLocalUrl(url.to_string())); }; Ok(PathBuf::from(super::unescape(path)?)) } } pub fn local_path_to_url>(path: P) -> std::io::Result { #[cfg(target_os = "windows")] return Ok(win32::local_path_to_url(path)?); #[cfg(unix)] return posix::local_path_to_url(path); } pub fn local_path_from_url(url: &str) -> Result { #[cfg(target_os = "windows")] return Ok(win32::local_path_from_url(url)?); #[cfg(unix)] return posix::local_path_from_url(url); } /// Derive a TO_LOCATION given a FROM_LOCATION. /// /// The normal case is a FROM_LOCATION of http://foo/bar => bar. /// The Right Thing for some logical destinations may differ though /// because no / may be present at all. In that case, the result is /// the full name without the scheme indicator, e.g. lp:foo-bar => foo-bar. /// This latter case also applies when a Windows drive /// is used without a path, e.g. c:foo-bar => foo-bar. /// If no /, path separator or : is found, the from_location is returned. pub fn derive_to_location(from_location: &str) -> String { let from_location = strip_segment_parameters(from_location); if let Some(separator_index) = from_location.rfind('/') { let basename = &from_location[separator_index + 1..]; basename.trim_end_matches("/\\").to_string() } else if let Some(separator_index) = from_location.find(':') { return from_location[separator_index + 1..].to_string(); } else { return from_location.to_string(); } } #[cfg(target_os = "windows")] pub const MIN_ABS_FILEURL_LENGTH: usize = "file:///C:".len(); #[cfg(not(target_os = "windows"))] pub const MIN_ABS_FILEURL_LENGTH: usize = "file:///".len(); /// Compute just the relative sub-portion of a url /// /// This assumes that both paths are already fully specified file:// URLs. pub fn file_relpath(base: &str, path: &str) -> Result { if base.len() < MIN_ABS_FILEURL_LENGTH { return Err(Error::UrlTooShort(base.to_string())); } let base: PathBuf = crate::osutils::path::normpath(local_path_from_url(base)?); let path: PathBuf = crate::osutils::path::normpath(local_path_from_url(path)?); let relpath = crate::osutils::path::relpath(base.as_path(), path.as_path()); if relpath.is_none() { return Err(Error::PathNotChild( path.display().to_string(), base.display().to_string(), )); } let relpath = relpath.unwrap(); // On Windows `relpath` returns components joined with `\`; URLs always // use `/` as the path separator, so normalise before escaping. #[cfg(target_os = "windows")] let bytes = relpath .to_str() .ok_or_else(|| Error::UrlTooShort(String::new()))? .replace('\\', "/") .into_bytes(); #[cfg(target_os = "windows")] let bytes_ref: &[u8] = &bytes; #[cfg(not(target_os = "windows"))] let bytes_ref: &[u8] = relpath.as_os_str().as_encoded_bytes(); Ok(escape(bytes_ref, None)) } /// Run the URL_HEX_ESCAPES_RE pass over a path, decoding "safe" hex /// escapes (alphanumerics + `-._~`) and uppercasing the rest. Mirrors /// the `_url_hex_escapes_re.sub(_unescape_safe_chars, ...)` used by the /// Python URL.__init__. pub fn normalize_quoted_path(path: &str) -> String { URL_HEX_ESCAPES_RE .replace_all(path, unescape_safe_chars) .to_string() } /// Decoded URL components, mirroring dromedary.urlutils.URL. #[derive(Clone, Debug, PartialEq, Eq)] pub struct ParsedUrl { pub scheme: String, pub quoted_user: Option, pub quoted_password: Option, pub quoted_host: String, pub port: Option, pub quoted_path: String, } /// Split a URL into (scheme, netloc, path), matching urlparse.urlparse /// with `allow_fragments=False`. The path includes the leading `/`. fn split_scheme_netloc_path(url: &str) -> (String, String, String) { // Find scheme: characters up to the first ':' followed by '//'. let scheme_end = url.find(':'); let (scheme, rest) = match scheme_end { Some(i) if url[i + 1..].starts_with("//") => (url[..i].to_string(), &url[i + 3..]), _ => return (String::new(), String::new(), url.to_string()), }; // After ://, netloc runs up to the next '/' (or end). match rest.find('/') { Some(j) => (scheme, rest[..j].to_string(), rest[j..].to_string()), None => (scheme, rest.to_string(), String::new()), } } /// Parse a URL into its quoted components. Mirrors /// dromedary.urlutils.URL.from_string and parse_url. Quoted forms are /// preserved verbatim — no percent decoding is done here. pub fn parse_url(url: &str) -> Result { let (scheme, netloc, path) = split_scheme_netloc_path(url); // Pull out user[:password]@ if present. let (user, password, host_part) = if let Some(at) = netloc.rfind('@') { let user_part = &netloc[..at]; let host_part = &netloc[at + 1..]; let (u, p) = match user_part.find(':') { Some(c) => (&user_part[..c], Some(&user_part[c + 1..])), None => (user_part, None), }; (Some(u.to_string()), p.map(|s| s.to_string()), host_part) } else { (None, None, netloc.as_str()) }; // Extract port if there's a `:` in the host portion AND the host // isn't a bracketed IPv6 literal (which itself contains colons). let mut host = host_part.to_string(); let mut port: Option = None; let bracketed = host.starts_with('[') && host.ends_with(']'); if host.contains(':') && !bracketed { if let Some(c) = host.rfind(':') { let port_str = host[c + 1..].to_string(); host.truncate(c); if !port_str.is_empty() { port = Some( port_str .parse::() .map_err(|_| Error::InvalidUrlPort(url.to_string(), port_str.clone()))?, ); } } } // Strip the brackets off an IPv6 literal once port handling is done. if host.starts_with('[') && host.ends_with(']') && host.len() >= 2 { host = host[1..host.len() - 1].to_string(); } Ok(ParsedUrl { scheme, quoted_user: user, quoted_password: password, quoted_host: host, port, quoted_path: path, }) } #[cfg(test)] mod parse_url_tests { use super::*; #[test] fn simple() { let p = parse_url("http://example.com:80/one").unwrap(); assert_eq!(p.scheme, "http"); assert_eq!(p.quoted_user, None); assert_eq!(p.quoted_password, None); assert_eq!(p.quoted_host, "example.com"); assert_eq!(p.port, Some(80)); assert_eq!(p.quoted_path, "/one"); } #[test] fn ipv6() { let p = parse_url("http://[1:2:3::40]/one").unwrap(); assert_eq!(p.quoted_host, "1:2:3::40"); assert_eq!(p.port, None); assert_eq!(p.quoted_path, "/one"); } #[test] fn ipv6_with_port() { let p = parse_url("http://[1:2:3::40]:80/one").unwrap(); assert_eq!(p.quoted_host, "1:2:3::40"); assert_eq!(p.port, Some(80)); } #[test] fn user_password() { let p = parse_url("http://ro%62ey:h%40t@ex%41mple.com:2222/path").unwrap(); assert_eq!(p.quoted_user.as_deref(), Some("ro%62ey")); assert_eq!(p.quoted_password.as_deref(), Some("h%40t")); assert_eq!(p.quoted_host, "ex%41mple.com"); assert_eq!(p.port, Some(2222)); assert_eq!(p.quoted_path, "/path"); } #[test] fn empty_port() { let p = parse_url("http://example.com:/one").unwrap(); assert_eq!(p.quoted_host, "example.com"); assert_eq!(p.port, None); } #[test] fn invalid_port() { match parse_url("http://example.com:abc/one") { Err(Error::InvalidUrlPort(_, port_str)) => assert_eq!(port_str, "abc"), other => panic!("expected InvalidUrlPort, got {:?}", other), } } #[test] fn normalize_quoted_path_unescapes_safe_chars() { // %7E is ~, which is unreserved → unescape to literal. assert_eq!(normalize_quoted_path("/foo%7Ebar"), "/foo~bar"); // %40 is @, which is reserved → keep as %40 (uppercased). assert_eq!(normalize_quoted_path("/foo%40bar"), "/foo%40bar"); // Lowercase hex → uppercase. assert_eq!(normalize_quoted_path("/foo%2fbar"), "/foo%2Fbar"); } } dromedary-0.1.5/src/webdav/000077500000000000000000000000001520150013200155275ustar00rootroot00000000000000dromedary-0.1.5/src/webdav/mod.rs000066400000000000000000000012201520150013200166470ustar00rootroot00000000000000//! WebDAV transport on top of HTTP. //! //! Implements the subset of RFC 4918 bzr / dromedary needs: PUT/GET, //! MKCOL, MOVE, DELETE, COPY, and PROPFIND (depth 0 / 1 / Infinity). //! Locking and property setting are out of scope — bzr fakes locks //! with a bogus lock and stores everything it needs in file content. //! //! The module is feature-gated behind `webdav`: it pulls in //! `quick-xml` for parsing multi-status PROPFIND responses, which //! callers that only talk plain HTTP shouldn't pay for. pub mod transport; pub mod xml; pub use transport::HttpDavTransport; pub use xml::{parse_propfind_dir, parse_propfind_stat, DavEntry, DavStat}; dromedary-0.1.5/src/webdav/transport.rs000066400000000000000000000677221520150013200201470ustar00rootroot00000000000000//! `HttpDavTransport` — a `dromedary::Transport` over WebDAV. //! //! Composition on top of [`HttpTransport`]: all reads / HEAD / GET //! delegate to the underlying HTTP transport, and the WebDAV- //! specific verbs (PUT, MKCOL, MOVE, DELETE, COPY, PROPFIND) are //! implemented here by issuing raw HTTP requests through the //! shared client and interpreting the status codes. //! //! Ported from `dromedary/webdav/webdav.py`. Like the Python //! version, this implements the subset bzr needs — no LOCK/UNLOCK, //! no PROPPATCH beyond allprop PROPFIND, no chunked upload. Bzr's //! locking is faked with a bogus lock held on the read side. use std::io::Read; use std::sync::Arc; use url::Url; use crate::http::client::HttpClient; use crate::http::HttpTransport; use crate::{ ConnectedTransport, Error, FileKind, Permissions, ReadStream, Result, Stat, Transport, UrlFragment, }; use super::xml::{parse_propfind_dir, parse_propfind_stat}; /// WebDAV transport over HTTP(S). /// /// `inner` is the plain-HTTP transport that owns the `HttpClient` /// (connection pool, auth cache, range-hint state). WebDAV write /// verbs are issued through `inner.request(...)` so they inherit /// the same auth / redirect / proxy machinery. #[derive(Clone)] pub struct HttpDavTransport { inner: HttpTransport, } impl HttpDavTransport { /// Build a new transport over `base`. Accepts URLs with schemes /// `http`, `https`, or `http[s]+urllib` / `http[s]+webdav`; the /// implementation suffix is stripped and ignored. pub fn new(base: &str, client: Arc) -> Result { // Drop any `+webdav` qualifier before handing the URL to // HttpTransport, which would otherwise reject the scheme. let trimmed = strip_dav_scheme_suffix(base); Ok(Self { inner: HttpTransport::new(&trimmed, client)?, }) } /// Concrete version of [`Transport::clone`]. Mirrors /// `HttpTransport::clone_concrete` — always directory-shaped, /// shares the underlying `HttpClient`. pub fn clone_concrete(&self, offset: Option<&UrlFragment>) -> Result { Ok(Self { inner: self.inner.clone_concrete(offset)?, }) } /// Access the underlying HTTP transport. Useful for the PyO3 /// wrapper which exposes HttpTransport-inherited methods. pub fn http(&self) -> &HttpTransport { &self.inner } /// Issue a PROPFIND with the given depth and return the raw /// response body as bytes. Common to `stat`, `list_dir`, and /// `iter_files_recursive`. fn propfind(&self, relpath: &UrlFragment, depth: &str) -> Result> { let abspath = self.inner.remote_url(relpath)?.to_string(); let body = br#" "#; let headers = [ ("Depth".to_string(), depth.to_string()), ( "Content-Type".to_string(), "application/xml; charset=\"utf-8\"".to_string(), ), ]; let mut resp = self .inner .request("PROPFIND", &abspath, &headers, body, false)?; match resp.status { 207 => resp.body().map(|b| b.to_vec()).map_err(Error::Io), 404 | 409 => Err(Error::NoSuchFile(Some(abspath))), other => Err(Error::InvalidHttpResponse { path: abspath, msg: format!( "unable to list directory (status {}: {})", other, resp.reason ), }), } } /// PUT `bytes` at `abspath` without any atomicity guard. Used /// by `put_bytes_non_atomic` and as the final step of the /// atomic put algorithm. fn bare_put(&self, abspath: &str, bytes: &[u8], range_header: Option) -> Result<()> { let mut headers: Vec<(String, String)> = vec![ ("Accept".to_string(), "*/*".to_string()), ( "Content-Type".to_string(), "application/octet-stream".to_string(), ), ]; if let Some(range) = range_header { headers.push(("Content-Range".to_string(), range)); } let resp = self.inner.request("PUT", abspath, &headers, bytes, false)?; match resp.status { 200 | 201 | 204 => Ok(()), // Intermediate directories missing. 403 | 404 | 409 => Err(Error::NoSuchFile(Some(abspath.to_string()))), other => Err(Error::InvalidHttpResponse { path: abspath.to_string(), msg: format!("put file failed (status {}: {})", other, resp.reason), }), } } /// MOVE `from` to `to`. `overwrite=false` causes the server to /// refuse if `to` exists (412); `overwrite=true` replaces. fn webdav_move(&self, abs_from: &str, abs_to: &str, overwrite: bool) -> Result<()> { let headers = [ ("Destination".to_string(), abs_to.to_string()), ( "Overwrite".to_string(), if overwrite { "T" } else { "F" }.to_string(), ), ]; let resp = self.inner.request("MOVE", abs_from, &headers, &[], false)?; match resp.status { 201 => Ok(()), // 204 means `to` already existed — allowed only when // we asked for overwrite. With overwrite=false a 204 // is a server bug (it should have been 412). 204 if overwrite => Ok(()), 404 => Err(Error::NoSuchFile(Some(abs_from.to_string()))), 412 => Err(Error::FileExists(Some(abs_to.to_string()))), 409 if overwrite => Err(Error::DirectoryNotEmptyError(Some(abs_to.to_string()))), 409 => Err(Error::NoSuchFile(Some(abs_to.to_string()))), other => Err(Error::InvalidHttpResponse { path: abs_from.to_string(), msg: format!( "unable to move to {} (status {}: {})", abs_to, other, resp.reason ), }), } } /// Generate a random temp-file suffix. Used to stamp a /// not-yet-committed PUT so we can MOVE it into place /// atomically (and delete it on failure). Matches the Python /// `".tmp.%.9f.%d.%d" % (time.time(), os.getpid(), rand)` shape /// closely enough that clients debugging a hung upload can /// still recognise the leftover. fn temp_suffix() -> String { use std::time::{SystemTime, UNIX_EPOCH}; let now = SystemTime::now() .duration_since(UNIX_EPOCH) .map(|d| d.as_secs_f64()) .unwrap_or(0.0); let pid = std::process::id(); let r: u32 = rand::random(); format!(".tmp.{:.9}.{}.{}", now, pid, r) } /// Append via HEAD-then-ranged-PUT. Efficient when the server /// supports Content-Range (Apache does). fn append_by_head_put(&self, relpath: &UrlFragment, bytes: &[u8]) -> Result { let resp = match self.inner.head(relpath) { Ok(r) => r, // 404 means the file doesn't exist yet; fall back to a // plain put_bytes that creates it. Err(Error::NoSuchFile(_)) => { self.put_bytes(relpath, bytes, None)?; return Ok(0); } Err(e) => return Err(e), }; // `head()` currently returns Ok for both 200 and 404 (the // transport-level wrapper only rejects *other* statuses). // A 404 here means the file doesn't exist yet — treat it // like the NoSuchFile branch above rather than parsing the // 404 body's Content-Length, which would mis-report the // error page size as the file's pre-append length. if resp.status == 404 { self.put_bytes(relpath, bytes, None)?; return Ok(0); } let current_size = resp .header("content-length") .and_then(|v| v.parse::().ok()) .unwrap_or(0); if current_size == 0 { // Apache omits Content-Length on empty files (module // source calls it a "hack"). Treat as new-file create. self.put_bytes(relpath, bytes, None)?; return Ok(0); } let abspath = self.inner.remote_url(relpath)?.to_string(); let range = format!( "bytes {}-{}/*", current_size, current_size + bytes.len() as u64 - 1 ); self.bare_put(&abspath, bytes, Some(range))?; Ok(current_size) } /// Append via GET+modify+PUT. Universal fallback for servers /// that don't honour Content-Range on PUT. fn append_by_get_put(&self, relpath: &UrlFragment, bytes: &[u8]) -> Result { let mut existing = Vec::new(); match self.inner.get(relpath) { Ok(mut rf) => { std::io::Read::read_to_end(&mut rf, &mut existing).map_err(Error::Io)?; } Err(Error::NoSuchFile(_)) => { // File doesn't exist; `existing` stays empty and we // put exactly the new bytes. } Err(e) => return Err(e), } let before = existing.len() as u64; existing.extend_from_slice(bytes); self.put_bytes(relpath, &existing, None)?; Ok(before) } } /// Construct a `TransportNotPossible` error for the verbs WebDAV /// doesn't implement (symlinks, hardlinks, write streams, mode bits). fn unsupported(what: &str) -> Error { Error::TransportNotPossible(Some(format!("webdav does not support {}", what))) } /// Strip `+webdav` / `+urllib` / `+impl` suffixes from the scheme /// before handing the URL to `HttpTransport::new`, which only /// accepts `http` / `https`. fn strip_dav_scheme_suffix(url: &str) -> String { let Some(scheme_end) = url.find("://") else { return url.to_string(); }; let scheme = &url[..scheme_end]; let unqualified = scheme.split_once('+').map(|(s, _)| s).unwrap_or(scheme); format!("{}{}", unqualified, &url[scheme_end..]) } impl std::fmt::Debug for HttpDavTransport { fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { write!(f, "HttpDavTransport({})", self.inner.base()) } } /// Write stream returned by `HttpDavTransport::open_write_stream`. /// /// Buffers all `write` calls in memory and PUTs the accumulated /// body when the stream is flushed or dropped. WebDAV has no /// native streaming-write verb, so one buffered PUT is the best /// approximation: bzr only uses open_write_stream for small files /// (lock markers, knit indices), where the buffer cost is /// negligible. struct DavWriteStream { transport: HttpDavTransport, relpath: String, buffer: Vec, permissions: Option, flushed: bool, } impl DavWriteStream { /// Send the accumulated buffer as a PUT. Safe to call multiple /// times — subsequent calls are no-ops unless `buffer` has /// grown since the last flush. fn flush_buffer(&mut self) -> std::io::Result<()> { if self.flushed && self.buffer.is_empty() { return Ok(()); } self.transport .put_bytes(&self.relpath, &self.buffer, self.permissions.clone()) .map_err(|e| std::io::Error::other(format!("{:?}", e)))?; self.flushed = true; Ok(()) } } impl std::io::Write for DavWriteStream { fn write(&mut self, buf: &[u8]) -> std::io::Result { self.buffer.extend_from_slice(buf); self.flushed = false; Ok(buf.len()) } fn flush(&mut self) -> std::io::Result<()> { self.flush_buffer() } } impl crate::WriteStream for DavWriteStream { fn sync_data(&self) -> std::io::Result<()> { // HTTP has no fsync. The server's durability model is // whatever the DAV implementation provides. Treat as a // no-op rather than raising, matching the Python // `AppendBasedFileStream` fdatasync behaviour (which just // raises `TransportNotPossible` but that's silently // swallowed by `FileStream.close(want_fdatasync=True)`). Ok(()) } } impl Drop for DavWriteStream { fn drop(&mut self) { // Flush on drop so users who forget to call close() still // get their bytes to the server. Errors are swallowed — // Drop can't surface them, and the stream contract says // close() is the authoritative end-of-write signal. let _ = self.flush_buffer(); } } impl Transport for HttpDavTransport { fn external_url(&self) -> Result { self.inner.external_url() } fn base(&self) -> Url { self.inner.base() } fn can_roundtrip_unix_modebits(&self) -> bool { false } fn is_readonly(&self) -> bool { false } fn listable(&self) -> bool { true } fn recommended_page_size(&self) -> usize { 64 * 1024 } fn get(&self, relpath: &UrlFragment) -> Result> { self.inner.get(relpath) } fn has(&self, relpath: &UrlFragment) -> Result { self.inner.has(relpath) } fn stat(&self, relpath: &UrlFragment) -> Result { let abspath = self.inner.remote_url(relpath)?.to_string(); let body = self.propfind(relpath, "0")?; let dav = parse_propfind_stat(&body, &abspath)?; // bzr expects a conventional unix mode. Directories go to // 040644, regular files to 100644, with the exec bit // flipped on if DAV reported the `executable` property. let kind = if dav.is_dir { FileKind::Dir } else { FileKind::File }; #[cfg(unix)] let mode = if dav.is_dir { 0o040644 } else if dav.is_exec { 0o100644 | 0o755 } else { 0o100644 }; let size = if dav.is_dir { 0 } else { dav.size.max(0) as usize }; Ok(Stat { size, #[cfg(unix)] mode, kind, // WebDAV PROPFIND does surface `getlastmodified`, but // bzr doesn't consult mtimes over remote transports — // so skip the parse rather than pay for a chrono dep. mtime: None, }) } fn clone(&self, offset: Option<&UrlFragment>) -> Result> { Ok(Box::new(self.clone_concrete(offset)?)) } fn abspath(&self, relpath: &UrlFragment) -> Result { self.inner.abspath(relpath) } fn relpath(&self, abspath: &Url) -> Result { self.inner.relpath(abspath) } fn set_segment_parameter(&mut self, key: &str, value: Option<&str>) -> Result<()> { self.inner.set_segment_parameter(key, value) } fn get_segment_parameters(&self) -> Result> { self.inner.get_segment_parameters() } fn mkdir(&self, relpath: &UrlFragment, _permissions: Option) -> Result<()> { let abspath = self.inner.remote_url(relpath)?.to_string(); let resp = self.inner.request("MKCOL", &abspath, &[], &[], false)?; match resp.status { 201 => Ok(()), // 405 Method Not Allowed is returned when the resource // already exists; map to FileExists for bzr. 405 => Err(Error::FileExists(Some(abspath))), // Missing intermediate directories. 404 | 409 => Err(Error::NoSuchFile(Some(abspath))), other => Err(Error::InvalidHttpResponse { path: abspath, msg: format!("mkdir failed (status {}: {})", other, resp.reason), }), } } fn put_file( &self, relpath: &UrlFragment, f: &mut dyn Read, _permissions: Option, ) -> Result { // Match Python put_file: eager-read the source so we can // honour the atomic-put dance. Streaming uploads would need // Transfer-Encoding: chunked which the Python version also // skips. let mut buf = Vec::new(); f.read_to_end(&mut buf).map_err(Error::Io)?; let n = buf.len() as u64; self.put_bytes(relpath, &buf, None)?; Ok(n) } fn put_bytes( &self, relpath: &UrlFragment, bytes: &[u8], _permissions: Option, ) -> Result<()> { // RFC 2068 said PUT was atomic; practice disagreed. Apache // in particular leaves a half-written file behind if the // client disconnects mid-PUT. We therefore put to a temp // relpath first, then MOVE it into place. let stamp = Self::temp_suffix(); let tmp_relpath = format!("{}{}", relpath, stamp); self.put_bytes_non_atomic(&tmp_relpath, bytes, None, None, None)?; // Move the temp file into place. On failure, try to clean // up the temp file before surfacing the original error. let abs_tmp = self.inner.remote_url(&tmp_relpath)?.to_string(); let abs_dst = self.inner.remote_url(relpath)?.to_string(); if let Err(primary) = self.webdav_move(&abs_tmp, &abs_dst, true) { // Best-effort cleanup; ignore secondary errors so the // caller sees the real failure. let _ = self.delete(&tmp_relpath); return Err(primary); } Ok(()) } fn put_bytes_non_atomic( &self, relpath: &UrlFragment, bytes: &[u8], _permissions: Option, create_parent_dir: Option, dir_permissions: Option, ) -> Result<()> { let abspath = self.inner.remote_url(relpath)?.to_string(); match self.bare_put(&abspath, bytes, None) { Ok(()) => Ok(()), Err(Error::NoSuchFile(_)) if create_parent_dir.unwrap_or(false) => { if let Some(parent) = relpath.rsplit_once('/').map(|x| x.0) { self.mkdir(parent, dir_permissions)?; self.bare_put(&abspath, bytes, None) } else { Err(Error::NoSuchFile(Some(abspath))) } } Err(e) => Err(e), } } fn append_bytes( &self, relpath: &UrlFragment, bytes: &[u8], _permissions: Option, ) -> Result { // Python picks between HEAD+PUT-with-Content-Range (cheap, // but needs a server that honours ranges) and GET+PUT // (expensive but universal). The selection key is the // current range_hint — if it's degraded all the way, we // fall back to GET+PUT. if self.inner.range_hint_str().is_some() { self.append_by_head_put(relpath, bytes) } else { self.append_by_get_put(relpath, bytes) } } fn append_file( &self, relpath: &UrlFragment, f: &mut dyn Read, permissions: Option, ) -> Result { let mut buf = Vec::new(); f.read_to_end(&mut buf).map_err(Error::Io)?; self.append_bytes(relpath, &buf, permissions) } fn rename(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { let abs_from = self.inner.remote_url(rel_from)?.to_string(); let abs_to = self.inner.remote_url(rel_to)?.to_string(); // Rename in bzr-speak means "don't overwrite" — the // destination must not exist. self.webdav_move(&abs_from, &abs_to, false) } fn delete(&self, relpath: &UrlFragment) -> Result<()> { let abspath = self.inner.remote_url(relpath)?.to_string(); let resp = self.inner.request("DELETE", &abspath, &[], &[], false)?; match resp.status { 200 | 204 => Ok(()), 404 => Err(Error::NoSuchFile(Some(abspath))), other => Err(Error::InvalidHttpResponse { path: abspath, msg: format!("unable to delete (status {}: {})", other, resp.reason), }), } } fn rmdir(&self, relpath: &UrlFragment) -> Result<()> { // Transport::rmdir contract: fail if the directory isn't // empty. RFC 4918 DELETE on a collection removes it *with* // contents, so we list first and raise DirectoryNotEmpty // ourselves when needed. let mut iter = self.list_dir(relpath); if let Some(first) = iter.next() { // Surface a listing error rather than misreporting it // as "directory not empty". first?; let abspath = self.inner.remote_url(relpath)?.to_string(); return Err(Error::DirectoryNotEmptyError(Some(abspath))); } self.delete(relpath) } fn delete_tree(&self, relpath: &UrlFragment) -> Result<()> { // DELETE on a collection in WebDAV removes it plus all // children. The Python transport doesn't expose delete_tree // but a recursive delete maps naturally onto the protocol. self.delete(relpath) } fn copy(&self, rel_from: &UrlFragment, rel_to: &UrlFragment) -> Result<()> { let abs_from = self.inner.remote_url(rel_from)?.to_string(); let abs_to = self.inner.remote_url(rel_to)?.to_string(); let headers = [("Destination".to_string(), abs_to.clone())]; let resp = self .inner .request("COPY", &abs_from, &headers, &[], false)?; match resp.status { // Apache returns 204 on overwrite; the dromedary test // server returns 201. Both are acceptable per RFC 4918. 201 | 204 => Ok(()), 404 | 409 => Err(Error::NoSuchFile(Some(abs_from))), other => Err(Error::InvalidHttpResponse { path: abs_from.clone(), msg: format!( "unable to copy from {} to {} (status {}: {})", abs_from, abs_to, other, resp.reason ), }), } } fn list_dir(&self, relpath: &UrlFragment) -> Box>> { let abspath = match self.inner.remote_url(relpath) { Ok(u) => u.to_string(), Err(e) => return Box::new(std::iter::once(Err(e))), }; match self.propfind(relpath, "1") { Ok(body) => match parse_propfind_dir(&body, &abspath) { Ok(entries) => Box::new(entries.into_iter().map(|e| Ok(e.href))), Err(e) => Box::new(std::iter::once(Err(e))), }, Err(e) => Box::new(std::iter::once(Err(e))), } } fn iter_files_recursive(&self) -> Box>> { // PROPFIND with depth=Infinity. Some real servers disable // this; bzr/dromedary has always relied on it so we match. let abspath = match self.inner.remote_url(".") { Ok(u) => u.to_string(), Err(e) => return Box::new(std::iter::once(Err(e))), }; match self.propfind(".", "Infinity") { Ok(body) => match parse_propfind_dir(&body, &abspath) { Ok(entries) => Box::new( entries .into_iter() .filter(|e| !e.is_dir) .map(|e| Ok(e.href)), ), Err(e) => Box::new(std::iter::once(Err(e))), }, Err(e) => Box::new(std::iter::once(Err(e))), } } fn readv<'a>( &self, relpath: &'a UrlFragment, offsets: Vec<(u64, usize)>, adjust_for_latency: bool, upper_limit: Option, ) -> Box)>> + Send + 'a> { self.inner .readv(relpath, offsets, adjust_for_latency, upper_limit) } fn lock_read(&self, relpath: &UrlFragment) -> Result> { self.inner.lock_read(relpath) } fn lock_write(&self, relpath: &UrlFragment) -> Result> { // Python follows FTP: return a bogus read lock rather than // implement WebDAV LOCK/UNLOCK. Comment from the Python // version: "WebDAV supports some sort of locking [but] we // don't explicitly support locking a specific file." self.inner.lock_read(relpath) } fn readlink(&self, _relpath: &UrlFragment) -> Result { Err(unsupported("readlink()")) } fn symlink(&self, _source: &UrlFragment, _link_name: &UrlFragment) -> Result<()> { Err(unsupported("symlink()")) } fn hardlink(&self, _source: &UrlFragment, _link_name: &UrlFragment) -> Result<()> { Err(unsupported("hardlink()")) } fn open_write_stream( &self, relpath: &UrlFragment, permissions: Option, ) -> Result> { // WebDAV has no native streaming-write verb. Create the // file up front with an empty body so `open_write_stream` // honours the Transport contract that the file exists // before the handle is returned. Then return a memory- // buffered stream that PUTs the whole accumulated body // when it's flushed or dropped. self.put_bytes(relpath, &[], permissions.clone())?; Ok(Box::new(DavWriteStream { transport: Clone::clone(self), relpath: relpath.to_string(), buffer: Vec::new(), permissions, flushed: false, })) } fn local_abspath(&self, _relpath: &UrlFragment) -> Result { Err(Error::NotLocalUrl(self.inner.base().to_string())) } } impl ConnectedTransport for HttpDavTransport {} #[cfg(test)] mod tests { use super::*; fn fresh_client() -> Arc { Arc::new( HttpClient::new(crate::http::client::HttpClientConfig::default()) .expect("client builds"), ) } #[test] fn strip_dav_scheme_suffix_drops_impl_qualifier() { assert_eq!( strip_dav_scheme_suffix("http+webdav://example.com/"), "http://example.com/" ); assert_eq!( strip_dav_scheme_suffix("https+urllib://example.com/path/"), "https://example.com/path/" ); } #[test] fn strip_dav_scheme_suffix_passes_through_plain_scheme() { assert_eq!( strip_dav_scheme_suffix("http://example.com/"), "http://example.com/" ); } #[test] fn strip_dav_scheme_suffix_passes_through_non_url() { // No `://` — we don't know how to classify it, but we // shouldn't panic either. HttpTransport::new will reject it. assert_eq!(strip_dav_scheme_suffix("not a url"), "not a url"); } #[test] fn new_accepts_webdav_scheme_suffix() { // `+webdav` is a breezy-era suffix used to pin the WebDAV // implementation; HttpTransport rejects it outright so we // must strip before delegating. let t = HttpDavTransport::new("http+webdav://example.com/", fresh_client()).unwrap(); assert_eq!(t.base().scheme(), "http"); } #[test] fn new_normalises_trailing_slash() { let t = HttpDavTransport::new("http://example.com/a", fresh_client()).unwrap(); assert!(t.base().as_str().ends_with('/')); } #[test] fn transport_is_not_readonly() { // Unlike plain HTTP, WebDAV supports writes — so is_readonly // must return false so bzr tries write operations. let t = HttpDavTransport::new("http://example.com/", fresh_client()).unwrap(); assert!(!t.is_readonly()); } #[test] fn transport_is_listable() { let t = HttpDavTransport::new("http://example.com/", fresh_client()).unwrap(); assert!(t.listable()); } #[test] fn temp_suffix_starts_with_dot_tmp_and_varies() { let a = HttpDavTransport::temp_suffix(); let b = HttpDavTransport::temp_suffix(); assert!(a.starts_with(".tmp.")); // Random component is 32 bits so collisions are unlikely // enough that this flaky-flake is cheap insurance against a // regression where the random bit gets dropped. assert_ne!(a, b); } } dromedary-0.1.5/src/webdav/xml.rs000066400000000000000000000413631520150013200167040ustar00rootroot00000000000000//! PROPFIND multi-status response parser. //! //! Ports `DavStatHandler` / `DavListDirHandler` from //! `dromedary/webdav/webdav.py`. The Python version is a SAX //! ContentHandler maintaining an element stack; we use `quick-xml`'s //! pull-parser and keep an equivalent stack of stripped element //! names (namespace prefixes dropped — we don't care about them in //! practice because the WebDAV vocabulary is flat enough that //! `response/href`, `propstat/prop/resourcetype/collection` etc. //! are unambiguous). use quick_xml::events::Event; use quick_xml::Reader; use crate::{Error, Result}; /// Stat-like data extracted from a depth-0 PROPFIND response. #[derive(Debug, Clone, PartialEq, Eq)] pub struct DavStat { /// Size in bytes; `-1` for directories where the length is /// meaningless. Matches Python `_DAVStat.st_size`. pub size: i64, pub is_dir: bool, pub is_exec: bool, } /// One entry from a depth-1 or -Infinity PROPFIND response. #[derive(Debug, Clone, PartialEq, Eq)] pub struct DavEntry { pub href: String, pub is_dir: bool, pub size: i64, pub is_exec: bool, } /// State accumulated while walking a single `response` element — /// drained into a `DavEntry` or promoted into a `DavStat` depending /// on which parser called us. #[derive(Default)] struct ResponseAccumulator { href: Option, length: Option, executable: Option, is_dir: bool, } impl ResponseAccumulator { fn into_entry(self) -> Option { let href = self.href?; let size = if self.is_dir { -1 } else { self.length.unwrap_or(-1) }; let is_exec = if self.is_dir { // Directories are reported as executable to match // Python `_extract_stat_info` which does the same — // bzr expects to be able to descend into them. true } else { matches!(self.executable.as_deref(), Some("T")) }; Some(DavEntry { href, is_dir: self.is_dir, size, is_exec, }) } } /// Parse a depth-0 PROPFIND response and return the single entry's /// stat metadata. `url` is only used in error messages. pub fn parse_propfind_stat(body: &[u8], url: &str) -> Result { let entries = parse_responses(body, url)?; let first = entries .into_iter() .next() .ok_or_else(|| Error::InvalidHttpResponse { path: url.to_string(), msg: "PROPFIND returned no response elements".into(), })?; Ok(DavStat { size: first.size, is_dir: first.is_dir, is_exec: first.is_exec, }) } /// Parse a depth-1 or -Infinity PROPFIND response. The first entry /// is the directory itself (its href is used to compute relative /// names); subsequent entries are the children. Returns the child /// entries with their `href` trimmed to the name relative to the /// directory, matching `_extract_dir_content`. pub fn parse_propfind_dir(body: &[u8], url: &str) -> Result> { let entries = parse_responses(body, url)?; let mut iter = entries.into_iter(); let first = iter.next().ok_or_else(|| Error::InvalidHttpResponse { path: url.to_string(), msg: "PROPFIND returned no response elements".into(), })?; if !first.is_dir { return Err(Error::NotADirectoryError(Some(url.to_string()))); } let dir_href = first.href; let dir_len = dir_href.len(); let mut out = Vec::new(); for mut entry in iter { if !entry.href.starts_with(&dir_href) { continue; } let mut name = entry.href[dir_len..].to_string(); if name.ends_with('/') { name.pop(); } entry.href = name; out.push(entry); } Ok(out) } /// Walk every `` element in the body and return a /// `DavEntry` per response. Shared backend for the two entry points. fn parse_responses(body: &[u8], url: &str) -> Result> { let mut reader = Reader::from_reader(body); reader.config_mut().trim_text(true); let mut buf = Vec::new(); let mut stack: Vec = Vec::new(); let mut acc: Option = None; let mut chars = String::new(); let mut entries: Vec = Vec::new(); let parse_err = |e: quick_xml::Error| Error::InvalidHttpResponse { path: url.to_string(), msg: format!("Malformed xml response: {}", e), }; loop { match reader.read_event_into(&mut buf).map_err(parse_err)? { Event::Start(e) => { let name = strip_ns(e.name().as_ref()); stack.push(name.clone()); chars.clear(); if name == "response" { acc = Some(ResponseAccumulator::default()); } } Event::End(_) => { let Some(name) = stack.pop() else { continue }; if let Some(ref mut a) = acc { commit_element(a, &stack, &name, &chars); } if name == "response" { if let Some(a) = acc.take() { if let Some(entry) = a.into_entry() { entries.push(entry); } } } chars.clear(); } Event::Empty(e) => { // quick-xml yields self-closing tags (``) // as Empty events rather than Start+End. Synthesise // the stack push/pop so commit_element sees the right // path. let name = strip_ns(e.name().as_ref()); stack.push(name.clone()); if let Some(ref mut a) = acc { commit_element(a, &stack[..stack.len() - 1], &name, ""); } stack.pop(); } Event::Text(t) => { if acc.is_some() { let s = t.unescape().map_err(parse_err)?; chars.push_str(&s); } } Event::CData(t) => { if acc.is_some() { chars.push_str(&String::from_utf8_lossy(&t)); } } Event::Eof => break, _ => {} } buf.clear(); } if entries.is_empty() { return Err(Error::InvalidHttpResponse { path: url.to_string(), msg: "Unknown xml response".into(), }); } Ok(entries) } /// Apply a completed element's content to the accumulator. /// /// `stack` is the element stack *not* including the element we're /// committing — mirrors how the Python version checked the stack /// before the pop. fn commit_element(acc: &mut ResponseAccumulator, stack: &[String], name: &str, chars: &str) { // Expected stacks, matching the Python _href_end / _getcontentlength_end // / _executable_end / _collection_end helpers: // /multistatus/response/href // /multistatus/response/propstat/prop/getcontentlength // /multistatus/response/propstat/prop/executable // /multistatus/response/propstat/prop/resourcetype/collection let depth = stack.len(); match name { "href" => { if depth == 2 && stack[0] == "multistatus" && stack[1] == "response" { acc.href = Some(chars.to_string()); } } "getcontentlength" => { if depth == 4 && stack[0] == "multistatus" && stack[1] == "response" && stack[2] == "propstat" && stack[3] == "prop" { if let Ok(n) = chars.trim().parse::() { acc.length = Some(n); } } } "executable" => { if depth == 4 && stack[0] == "multistatus" && stack[1] == "response" && stack[2] == "propstat" && stack[3] == "prop" { acc.executable = Some(chars.trim().to_string()); } } "collection" => { if depth == 5 && stack[0] == "multistatus" && stack[1] == "response" && stack[2] == "propstat" && stack[3] == "prop" && stack[4] == "resourcetype" { acc.is_dir = true; } } _ => {} } } /// Strip a `ns:name` qualifier from an element name. WebDAV uses /// namespace prefixes (`D:response`, `liveprop:getcontentlength`) /// but the vocabulary is flat enough that we don't need to track /// which prefix binds which URI — treating `href` and `D:href` as /// equivalent is what the Python version does. fn strip_ns(raw: &[u8]) -> String { let s = std::str::from_utf8(raw).unwrap_or(""); match s.split_once(':') { Some((_, rest)) => rest.to_string(), None => s.to_string(), } } #[cfg(test)] mod tests { use super::*; const STAT_FILE: &[u8] = br#" /some/file.txt 1234 F HTTP/1.1 200 OK "#; const STAT_DIR: &[u8] = br#" /some/dir/ HTTP/1.1 200 OK "#; const STAT_EXEC: &[u8] = br#" /some/script.sh 99 T HTTP/1.1 200 OK "#; const DIR_LIST: &[u8] = br#" /dir/ HTTP/1.1 200 OK /dir/a.txt 10 F HTTP/1.1 200 OK /dir/sub/ HTTP/1.1 200 OK "#; #[test] fn stat_file_extracts_size_and_not_dir() { let stat = parse_propfind_stat(STAT_FILE, "/some/file.txt").unwrap(); assert_eq!( stat, DavStat { size: 1234, is_dir: false, is_exec: false, } ); } #[test] fn stat_directory_reports_is_dir_and_exec_sentinel() { let stat = parse_propfind_stat(STAT_DIR, "/some/dir/").unwrap(); // Matches Python: directories carry size=-1 and is_exec=True // so bzr can descend into them. assert_eq!( stat, DavStat { size: -1, is_dir: true, is_exec: true, } ); } #[test] fn stat_executable_flag_recognised() { let stat = parse_propfind_stat(STAT_EXEC, "/some/script.sh").unwrap(); assert!(stat.is_exec); } #[test] fn listdir_strips_parent_prefix_and_trailing_slash() { let entries = parse_propfind_dir(DIR_LIST, "/dir/").unwrap(); assert_eq!(entries.len(), 2); assert_eq!(entries[0].href, "a.txt"); assert_eq!(entries[0].is_dir, false); assert_eq!(entries[0].size, 10); assert_eq!(entries[1].href, "sub"); assert_eq!(entries[1].is_dir, true); } #[test] fn listdir_on_file_yields_not_a_directory() { // A depth-1 PROPFIND issued against a file returns a single // response whose resourcetype is not `collection`. We surface // that as NotADirectory rather than silently returning empty. let result = parse_propfind_dir(STAT_FILE, "/some/file.txt"); assert!(matches!(result, Err(Error::NotADirectoryError(_)))); } #[test] fn malformed_xml_raises_invalid_http_response() { let result = parse_propfind_stat(b">", "/url"); assert!(matches!(result, Err(Error::InvalidHttpResponse { .. }))); } #[test] fn empty_multistatus_reports_unknown_xml_response() { let body = br#""#; let result = parse_propfind_stat(body, "/url"); assert!(matches!(result, Err(Error::InvalidHttpResponse { .. }))); } #[test] fn namespace_prefix_variations_treated_equivalently() { // Apache mod_dav uses `lp1:` and `lp2:` for the bzr/liveprop // namespaces. The stripper drops the prefix, so both parse. let body = br#" /f 42 F "#; let stat = parse_propfind_stat(body, "/f").unwrap(); assert_eq!(stat.size, 42); } #[test] fn unknown_format_xml_rejected() { // Valid XML but not a multistatus — Python raises // InvalidHttpResponse with msg="Unknown xml response". let result = parse_propfind_stat(b"", "/url"); assert!(matches!(result, Err(Error::InvalidHttpResponse { .. }))); // Same for listdir. let result = parse_propfind_dir(b"", "/url"); assert!(matches!(result, Err(Error::InvalidHttpResponse { .. }))); } #[test] fn listdir_first_entry_without_resourcetype_rejected() { // lighttpd returns no resourcetype elements at all. Without a // collection marker on the first entry, it's indistinguishable // from a file, so listdir must fail with NotADirectory. let body = br#" /dir/ /dir/a /dir/b "#; let result = parse_propfind_dir(body, "/dir/"); assert!(matches!(result, Err(Error::NotADirectoryError(_)))); } #[test] fn apache_lp1_lp2_prefixes_parsed() { // Apache mod_dav's allprop response uses `lp1:` and `lp2:` // prefixes for the live and dead properties respectively. let body = br#" /executable 12 T HTTP/1.1 200 OK "#; let stat = parse_propfind_stat(body, "/executable").unwrap(); assert_eq!(stat.size, 12); assert!(!stat.is_dir); assert!(stat.is_exec); } #[test] fn href_outside_response_stack_ignored() { // An `href` at a different stack depth (e.g. inside a // propstat) must not be picked up as the response's href. let body = br#" noise /real 0 "#; // The real href comes second and should win over the noise one. let entries = parse_responses(body, "/real").unwrap(); assert_eq!(entries[0].href, "/real"); } } dromedary-0.1.5/src/win32-locks.rs000066400000000000000000000170641520150013200167100ustar00rootroot00000000000000//! Windows file locking. //! //! This uses `std::fs::File::try_lock` / `File::lock` (stable in Rust 1.89) //! to take advisory OS-level locks via `LockFileEx`, combined with the same //! process-local tracking tables that `fcntl-locks.rs` uses so that a single //! process can still distinguish read vs. write contention. //! //! TODO(windows): `File::try_lock` is exclusive-only. Implementing shared //! read locks that interoperate with other processes requires dropping to //! `LockFileEx` directly via `windows-sys`. For pass 1 we rely on the //! process-local tracking alone for read-vs-write arbitration, which is //! sufficient for single-process tests but will not coordinate across //! processes for shared reads. use crate::lock::{FileLock, Lock, LockError}; use lazy_static::lazy_static; use log::debug; use std::collections::hash_map::Entry; use std::collections::{HashMap, HashSet}; use std::fs::{File, OpenOptions}; use std::path::{Path, PathBuf}; fn open(filename: &Path, options: &OpenOptions) -> std::result::Result<(PathBuf, File), LockError> { let filename = crate::osutils::path::realpath(filename)?; match options.open(&filename) { Ok(f) => Ok((filename, f)), Err(e) => match e.kind() { std::io::ErrorKind::PermissionDenied => Err(LockError::Failed(filename, e.to_string())), std::io::ErrorKind::NotFound => { debug!( "trying to create missing lock {}", filename.to_string_lossy() ); let f = OpenOptions::new() .create(true) .write(true) .read(true) .open(&filename)?; Ok((filename, f)) } _ => Err(e.into()), }, } } lazy_static! { static ref OPEN_WRITE_LOCKS: std::sync::Mutex> = std::sync::Mutex::new(HashSet::new()); static ref OPEN_READ_LOCKS: std::sync::Mutex> = std::sync::Mutex::new(HashMap::new()); } pub struct WriteLock { filename: PathBuf, f: File, } impl WriteLock { pub fn new(filename: &Path, strict_locks: bool) -> Result { let filename = crate::osutils::path::realpath(filename)?; if OPEN_WRITE_LOCKS.lock().unwrap().contains(&filename) { return Err(LockError::Contention(filename)); } if OPEN_READ_LOCKS.lock().unwrap().contains_key(&filename) { if strict_locks { return Err(LockError::Contention(filename)); } else { debug!( "Write lock taken w/ an open read lock on: {}", filename.to_string_lossy() ); } } let (filename, f) = open( filename.as_path(), OpenOptions::new().read(true).write(true), )?; match f.try_lock() { Ok(()) => {} Err(std::fs::TryLockError::WouldBlock) => { return Err(LockError::Contention(filename)); } Err(std::fs::TryLockError::Error(_)) => { // Fall through — we still have process-local tracking. } } OPEN_WRITE_LOCKS.lock().unwrap().insert(filename.clone()); Ok(WriteLock { filename, f }) } } impl Lock for WriteLock { fn unlock(&mut self) -> Result<(), LockError> { OPEN_WRITE_LOCKS.lock().unwrap().remove(&self.filename); let _ = self.f.unlock(); Ok(()) } } impl FileLock for WriteLock { fn file(&self) -> std::io::Result> { Ok(Box::new(self.f.try_clone()?)) } fn path(&self) -> &Path { &self.filename } } pub struct ReadLock { filename: PathBuf, f: File, } impl ReadLock { pub fn new(filename: &Path, strict_locks: bool) -> std::result::Result { let filename = crate::osutils::path::realpath(filename)?; if OPEN_WRITE_LOCKS.lock().unwrap().contains(&filename) { if strict_locks { return Err(LockError::Contention(filename)); } else { debug!( "Read lock taken w/ an open write lock on: {}", filename.to_string_lossy() ); } } OPEN_READ_LOCKS .lock() .unwrap() .entry(filename.clone()) .and_modify(|count| *count += 1) .or_insert(1); let (filename, f) = open(&filename, OpenOptions::new().read(true))?; // `File::try_lock_shared` would be the right call here, but it is // currently unstable. See the module-level TODO. Ok(ReadLock { filename, f }) } /// Try to grab a write lock on the file. pub fn temporary_write_lock( self, ) -> std::result::Result { if OPEN_WRITE_LOCKS.lock().unwrap().contains(&self.filename) { panic!("file already locked: {}", self.filename.to_string_lossy()); } TemporaryWriteLock::new(self) } } impl Lock for ReadLock { fn unlock(&mut self) -> std::result::Result<(), LockError> { match OPEN_READ_LOCKS.lock().unwrap().entry(self.filename.clone()) { Entry::Occupied(mut entry) => { let count = entry.get_mut(); if *count == 1 { entry.remove(); } else { *count -= 1; } } Entry::Vacant(_) => panic!("no read lock on {}", self.filename.to_string_lossy()), } Ok(()) } } impl FileLock for ReadLock { fn file(&self) -> std::io::Result> { Ok(Box::new(self.f.try_clone()?)) } fn path(&self) -> &Path { &self.filename } } /// A token used when grabbing a temporary_write_lock. pub struct TemporaryWriteLock { read_lock: ReadLock, filename: PathBuf, f: File, } impl TemporaryWriteLock { pub fn new(read_lock: ReadLock) -> std::result::Result { let filename = read_lock.filename.clone(); if let Some(count) = OPEN_READ_LOCKS.lock().unwrap().get(&filename) { if *count > 1 { return Err((read_lock, LockError::Contention(filename))); } } if OPEN_WRITE_LOCKS.lock().unwrap().contains(&filename) { panic!("file already locked: {}", filename.to_string_lossy()); } let f = match OpenOptions::new() .write(true) .read(true) .create(true) .open(&filename) { Ok(f) => Ok(f), Err(e) => return Err((read_lock, e.into())), }?; match f.try_lock() { Ok(()) => {} Err(std::fs::TryLockError::WouldBlock) => { return Err((read_lock, LockError::Contention(filename))); } Err(std::fs::TryLockError::Error(_)) => { // Fall through — process-local tracking is what we rely on. } } OPEN_WRITE_LOCKS.lock().unwrap().insert(filename.clone()); Ok(Self { read_lock, filename, f, }) } pub fn restore_read_lock(self) -> ReadLock { let _ = self.f.unlock(); OPEN_WRITE_LOCKS.lock().unwrap().remove(&self.filename); self.read_lock } } impl FileLock for TemporaryWriteLock { fn file(&self) -> std::io::Result> { Ok(Box::new(self.f.try_clone()?)) } fn path(&self) -> &Path { &self.filename } }