pax_global_header00006660000000000000000000000064146301342730014515gustar00rootroot0000000000000052 comment=168beb4da1c971e8c99cee2c06c4862e267f8d81 conda-package-streaming-0.10.0/000077500000000000000000000000001463013427300162375ustar00rootroot00000000000000conda-package-streaming-0.10.0/.flake8000066400000000000000000000000371463013427300174120ustar00rootroot00000000000000[flake8] max-line-length = 100 conda-package-streaming-0.10.0/.github/000077500000000000000000000000001463013427300175775ustar00rootroot00000000000000conda-package-streaming-0.10.0/.github/ISSUE_TEMPLATE/000077500000000000000000000000001463013427300217625ustar00rootroot00000000000000conda-package-streaming-0.10.0/.github/ISSUE_TEMPLATE/0_bug.yml000066400000000000000000000053001463013427300234770ustar00rootroot00000000000000name: Bug Report description: Create a bug report. labels: - type::bug body: - type: markdown attributes: value: | Because processing new bug reports is time-consuming, we would like to ask you to fill out the following form to the best of your ability and as completely as possible. > [!NOTE] > Bug reports that are incomplete or missing information may be closed as inactionable. Since there are already a lot of open issues, please also take a moment to search existing ones to see if your bug has already been reported. If you find something related, please upvote that issue and provide additional details as necessary. 💐 Thank you for helping to make Conda better. We would be unable to improve Conda without our community! - type: checkboxes id: checks attributes: label: Checklist description: Please confirm and check all of the following options. options: - label: I added a descriptive title required: true - label: I searched open reports and couldn't find a duplicate required: true - type: textarea id: what attributes: label: What happened? description: What should have happened instead? Please provide as many details as possible. The more information provided, the more likely we are able to replicate your problem and offer a solution. validations: required: true - type: textarea id: info attributes: label: Conda Info description: | Let's collect some basic information about your conda install. Please run the following command from your command line and paste the output below. ```bash conda info ``` render: shell - type: textarea id: config attributes: label: Conda Config description: | Let's collect any customizations you may have for your conda install. Please run the following command from your command line and paste the output below. ```bash conda config --show-sources ``` render: shell - type: textarea id: list attributes: label: Conda list description: | The packages installed into your environment can offer clues as to the problem you are facing. Please activate the environment within which you are encountering this bug, run the following command from your command line, and paste the output below. ```bash conda list --show-channel-urls ``` render: shell - type: textarea id: context attributes: label: Additional Context description: Include any additional information (or screenshots) that you think would be valuable. conda-package-streaming-0.10.0/.github/ISSUE_TEMPLATE/1_feature.yml000066400000000000000000000036131463013427300243630ustar00rootroot00000000000000name: Feature Request description: Create a feature request. labels: - type::feature body: - type: markdown attributes: value: | Because processing new feature requests is time-consuming, we would like to ask you to fill out the following form to the best of your ability and as completely as possible. > [!NOTE] > Feature requests that are incomplete or missing information may be closed as inactionable. Since there are already a lot of open issues, please also take a moment to search existing ones to see if your feature request has already been submitted. If you find something related, please upvote that issue and provide additional details as necessary. 💐 Thank you for helping to make Conda better. We would be unable to improve Conda without our community! - type: checkboxes id: checks attributes: label: Checklist description: Please confirm and check all of the following options. options: - label: I added a descriptive title required: true - label: I searched open requests and couldn't find a duplicate required: true - type: textarea id: idea attributes: label: What is the idea? description: Describe what the feature is and the desired state. validations: required: true - type: textarea id: why attributes: label: Why is this needed? description: Who would benefit from this feature? Why would this add value to them? What problem does this solve? - type: textarea id: what attributes: label: What should happen? description: What should be the user experience with the feature? Describe from a user perspective what they would do and see. - type: textarea id: context attributes: label: Additional Context description: Include any additional information that you think would be valuable. conda-package-streaming-0.10.0/.github/ISSUE_TEMPLATE/2_documentation.yml000066400000000000000000000026511463013427300256030ustar00rootroot00000000000000name: Documentation description: Create a documentation related issue. labels: - type::documentation body: - type: markdown attributes: value: | > [!NOTE] > Documentation requests that are incomplete or missing information may be closed as inactionable. Since there are already a lot of open issues, please also take a moment to search existing ones to see if your bug has already been reported. If you find something related, please upvote that issue and provide additional details as necessary. 💐 Thank you for helping to make conda better. We would be unable to improve conda without our community! - type: checkboxes id: checks attributes: label: Checklist description: Please confirm and check all of the following options. options: - label: I added a descriptive title required: true - label: I searched open reports and couldn't find a duplicate required: true - type: textarea id: what attributes: label: What happened? description: Mention here any typos, broken links, or missing, incomplete, or outdated information, etc. that you have noticed in the conda docs or CLI help. validations: required: true - type: textarea id: context attributes: label: Additional Context description: Include any additional information (or screenshots) that you think would be valuable. conda-package-streaming-0.10.0/.github/ISSUE_TEMPLATE/epic.yml000066400000000000000000000060741463013427300234340ustar00rootroot00000000000000name: Epic description: A collection of related tickets. labels: - epic body: - type: markdown attributes: value: | This form is intended for grouping and collecting together related tickets to better gauge the scope of a problem/feature. If you are attempting to report a bug, propose a new feature, or some other code change please use one of the other forms available. > [!NOTE] > Epics that are incomplete or missing information may be closed as inactionable. Since there are already a lot of open issues, please also take a moment to search existing ones to see if a similar epic has already been opened. If you find something related, please upvote that issue and provide additional details as necessary. 💐 Thank you for helping to make Conda better. We would be unable to improve Conda without our community! - type: checkboxes id: checks attributes: label: Checklist description: Please confirm and check all of the following options. options: - label: I added a descriptive title required: true - label: I searched open issues and couldn't find a duplicate required: true - type: textarea id: what attributes: label: What? description: >- What feature or problem will be addressed in this epic? placeholder: Please describe here. validations: required: true - type: textarea id: why attributes: label: Why? description: >- Why is the reported issue(s) a problem, or why is the proposed feature needed? (Research and spike issues can be linked here.) value: | - [ ] placeholder: Please describe here and/or link to relevant supporting issues. validations: required: true - type: textarea id: user_impact attributes: label: User impact description: >- In what specific way(s) will users benefit from this change? (e.g. use cases or performance improvements) placeholder: Please describe here. validations: required: true - type: textarea id: goals attributes: label: Goals description: >- What goal(s) should this epic accomplish? value: | - [ ] validations: required: true - type: textarea id: tasks attributes: label: Tasks description: >- What needs to be done to implement this change? value: | - [ ] validations: required: false - type: textarea id: blocked_by attributes: label: 'This epic is blocked by:' description: >- Epics and issues that block this epic. value: | - [ ] validations: required: false - type: textarea id: blocks attributes: label: 'This epic blocks:' description: >- Epics and issues that are blocked by this epic. value: | - [ ] validations: required: false conda-package-streaming-0.10.0/.github/workflows/000077500000000000000000000000001463013427300216345ustar00rootroot00000000000000conda-package-streaming-0.10.0/.github/workflows/cla.yml000066400000000000000000000021611463013427300231160ustar00rootroot00000000000000name: CLA on: issue_comment: types: - created pull_request_target: jobs: check: if: >- !github.event.repository.fork && ( github.event.issue.pull_request && github.event.comment.body == '@conda-bot check' || github.event_name == 'pull_request_target' ) runs-on: ubuntu-latest steps: - name: Check CLA uses: conda/actions/check-cla@976289d0cfd85139701b26ddd133abdd025a7b5f # v24.5.0 with: # [required] # A token with ability to comment, label, and modify the commit status # (`pull_request: write` and `statuses: write` for fine-grained PAT; `repo` for classic PAT) # (default: secrets.GITHUB_TOKEN) token: ${{ secrets.CLA_ACTION_TOKEN }} # [required] # Label to apply to contributor's PR once CLA is signed label: cla-signed # [required] # Token for opening signee PR in the provided `cla_repo` # (`pull_request: write` for fine-grained PAT; `repo` and `workflow` for classic PAT) cla_token: ${{ secrets.CLA_FORK_TOKEN }} conda-package-streaming-0.10.0/.github/workflows/issues.yml000066400000000000000000000024651463013427300237010ustar00rootroot00000000000000name: Automate Issues on: # NOTE: github.event is issue_comment payload: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#issue_comment issue_comment: types: [created] env: FEEDBACK_LBL: pending::feedback SUPPORT_LBL: pending::support jobs: # NOTE: will update label if anyone responds, not just the author/reporter # TODO: create conda-issue-sorting team and modify this to toggle label based on # whether a non-issue-sorting engineer commented pending_support: # if [pending::feedback] and anyone responds if: >- !github.event.repository.fork && !github.event.issue.pull_request && contains(github.event.issue.labels.*.name, 'pending::feedback') runs-on: ubuntu-latest steps: # remove [pending::feedback] - uses: actions-ecosystem/action-remove-labels@2ce5d41b4b6aa8503e285553f75ed56e0a40bae0 # v1.3.0 with: labels: ${{ env.FEEDBACK_LBL }} github_token: ${{ secrets.PROJECT_TOKEN }} # add [pending::support], if still open - uses: actions-ecosystem/action-add-labels@18f1af5e3544586314bbe15c0273249c770b2daf # v1.1.3 if: github.event.issue.state == 'open' with: labels: ${{ env.SUPPORT_LBL }} github_token: ${{ secrets.PROJECT_TOKEN }} conda-package-streaming-0.10.0/.github/workflows/labels.yml000066400000000000000000000027721463013427300236310ustar00rootroot00000000000000name: Sync Labels on: # NOTE: github.event is workflow_dispatch payload: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_dispatch workflow_dispatch: inputs: dryrun: description: 'dryrun: Preview changes to labels without editing them (true|false)' required: true type: boolean default: true jobs: sync: if: '!github.event.repository.fork' runs-on: ubuntu-latest env: GLOBAL: https://raw.githubusercontent.com/conda/infra/main/.github/global.yml LOCAL: .github/labels.yml steps: - uses: actions/checkout@a5ac7e51b41094c92402da3b24376905380afc29 # v4.1.6 - id: has_local uses: andstor/file-existence-action@076e0072799f4942c8bc574a82233e1e4d13e9d6 # v3.0.0 with: files: ${{ env.LOCAL }} - name: Global Only uses: EndBug/label-sync@52074158190acb45f3077f9099fea818aa43f97a # v2.3.3 if: steps.has_local.outputs.files_exists == 'false' with: config-file: ${{ env.GLOBAL }} delete-other-labels: true dry-run: ${{ github.event.inputs.dryrun }} - name: Global & Local uses: EndBug/label-sync@52074158190acb45f3077f9099fea818aa43f97a # v2.3.3 if: steps.has_local.outputs.files_exists == 'true' with: config-file: | ${{ env.GLOBAL }} ${{ env.LOCAL }} delete-other-labels: true dry-run: ${{ github.event.inputs.dryrun }} conda-package-streaming-0.10.0/.github/workflows/lock.yml000066400000000000000000000037411463013427300233140ustar00rootroot00000000000000name: Lock on: # NOTE: github.event is workflow_dispatch payload: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_dispatch workflow_dispatch: schedule: - cron: 0 6 * * * permissions: issues: write pull-requests: write jobs: lock: if: '!github.event.repository.fork' runs-on: ubuntu-latest steps: - uses: dessant/lock-threads@1bf7ec25051fe7c00bdd17e6a7cf3d7bfb7dc771 # v5.0.1 with: # Number of days of inactivity before a closed issue is locked issue-inactive-days: 365 # Do not lock issues created before a given timestamp, value must follow ISO 8601 exclude-issue-created-before: '' # Do not lock issues with these labels, value must be a comma separated list of labels or '' exclude-any-issue-labels: '' # Labels to add before locking an issue, value must be a comma separated list of labels or '' add-issue-labels: locked # Reason for locking an issue, value must be one of resolved, off-topic, too heated, spam or '' issue-lock-reason: resolved # Number of days of inactivity before a closed pull request is locked pr-inactive-days: 365 # Do not lock pull requests created before a given timestamp, value must follow ISO 8601 exclude-pr-created-before: '' # Do not lock pull requests with these labels, value must be a comma separated list of labels or '' exclude-any-pr-labels: '' # Labels to add before locking a pull request, value must be a comma separated list of labels or '' add-pr-labels: locked # Reason for locking a pull request, value must be one of resolved, off-topic, too heated, spam or '' pr-lock-reason: resolved # Limit locking to issues, pull requests or discussions, value must be a comma separated list of issues, prs, discussions or '' process-only: issues, prs conda-package-streaming-0.10.0/.github/workflows/project.yml000066400000000000000000000011031463013427300240200ustar00rootroot00000000000000name: Add to Project on: issues: types: - opened pull_request_target: types: - opened jobs: add_to_project: if: '!github.event.repository.fork' runs-on: ubuntu-latest steps: - uses: actions/add-to-project@9bfe908f2eaa7ba10340b31e314148fcfe6a2458 # v1.0.1 with: # issues are added to the Planning project # PRs are added to the Review project project-url: https://github.com/orgs/conda/projects/${{ github.event_name == 'issues' && 2 || 16 }} github-token: ${{ secrets.PROJECT_TOKEN }} conda-package-streaming-0.10.0/.github/workflows/sphinx.yml000066400000000000000000000020251463013427300236670ustar00rootroot00000000000000name: Sphinx on: push: branches: - main pull_request: branches: - main jobs: sphinx: runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 - uses: actions/setup-python@v4 with: python-version: "3.x" architecture: "x64" cache: "pip" - name: Build Documentation run: | pip install -e .[docs] make html - name: Upload artifact uses: actions/upload-pages-artifact@v1 with: # Upload entire repository path: 'build/html' pages: runs-on: ubuntu-latest if: github.ref == 'refs/heads/main' needs: [sphinx] # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages permissions: contents: read pages: write id-token: write environment: name: github-pages url: ${{ steps.deployment.outputs.page_url }} steps: - name: Deploy to GitHub Pages id: deployment uses: actions/deploy-pages@v1 conda-package-streaming-0.10.0/.github/workflows/stale.yml000066400000000000000000000072741463013427300235010ustar00rootroot00000000000000name: Stale on: # NOTE: github.event is workflow_dispatch payload: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#workflow_dispatch workflow_dispatch: inputs: dryrun: description: 'dryrun: Preview stale issues/prs without marking them (true|false)' required: true type: boolean default: true schedule: - cron: 0 4 * * * permissions: issues: write pull-requests: write jobs: stale: if: '!github.event.repository.fork' runs-on: ubuntu-latest strategy: matrix: include: - only-issue-labels: '' days-before-issue-stale: 365 days-before-issue-close: 30 # [type::support] issues have a more aggressive stale/close timeline - only-issue-labels: type::support days-before-issue-stale: 90 days-before-issue-close: 21 steps: - uses: conda/actions/read-yaml@976289d0cfd85139701b26ddd133abdd025a7b5f # v24.5.0 id: read_yaml with: path: https://raw.githubusercontent.com/conda/infra/main/.github/messages.yml - uses: actions/stale@28ca1036281a5e5922ead5184a1bbf96e5fc984e # v9.0.0 id: stale with: # Only issues with these labels are checked whether they are stale only-issue-labels: ${{ matrix.only-issue-labels }} # Idle number of days before marking issues stale days-before-issue-stale: ${{ matrix.days-before-issue-stale }} # Idle number of days before closing stale issues/PRs days-before-issue-close: ${{ matrix.days-before-issue-close }} # Idle number of days before marking PRs stale days-before-pr-stale: 365 # Idle number of days before closing stale PRs days-before-pr-close: 30 # Comment on the staled issues stale-issue-message: ${{ fromJSON(steps.read_yaml.outputs.value)['stale-issue'] }} # Label to apply on staled issues stale-issue-label: stale # Label to apply on closed issues close-issue-label: stale::closed # Reason to use when closing issues close-issue-reason: not_planned # Comment on the staled PRs stale-pr-message: ${{ fromJSON(steps.read_yaml.outputs.value)['stale-pr'] }} # Label to apply on staled PRs stale-pr-label: stale # Label to apply on closed PRs close-pr-label: stale::closed # Remove stale label from issues/PRs on updates/comments remove-stale-when-updated: true # Add specified labels to issues/PRs when they become unstale labels-to-add-when-unstale: stale::recovered # Remove specified labels to issues/PRs when they become unstale labels-to-remove-when-unstale: stale,stale::closed # Max number of operations per run operations-per-run: ${{ secrets.STALE_OPERATIONS_PER_RUN || 100 }} # Dry-run debug-only: ${{ github.event.inputs.dryrun || false }} # Order to get issues/PRs ascending: true # Delete branch after closing a stale PR delete-branch: false # Issues with these labels will never be considered stale exempt-issue-labels: stale::recovered,epic # Issues with these labels will never be considered stale exempt-pr-labels: stale::recovered,epic # Exempt all issues/PRs with milestones from stale exempt-all-milestones: true # Assignees on issues/PRs exempted from stale exempt-assignees: mingwandroid - name: Print outputs run: echo ${{ join(steps.stale.outputs.*, ',') }} conda-package-streaming-0.10.0/.github/workflows/tests.yml000066400000000000000000000057731463013427300235350ustar00rootroot00000000000000name: Tests on: # NOTE: github.event context is push payload: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#push push: branches: - main - feature/** # NOTE: github.event context is pull_request payload: # https://docs.github.com/en/developers/webhooks-and-events/webhooks/webhook-events-and-payloads#pull_request pull_request: concurrency: # Concurrency group that uses the workflow name and PR number if available # or commit SHA as a fallback. If a new build is triggered under that # concurrency group while a previous build is running it will be canceled. # Repeated pushes to a PR will cancel all previous builds, while multiple # merges to main will not cancel. group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.sha }} cancel-in-progress: true jobs: linux: runs-on: ubuntu-latest defaults: run: shell: bash -l {0} strategy: fail-fast: false matrix: python-version: ['3.10', '3.11'] steps: - name: Checkout repository uses: actions/checkout@v2 with: fetch-depth: 0 - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} architecture: "x64" cache: "pip" - name: Setup Miniconda uses: conda-incubator/setup-miniconda@v2 with: python-version: ${{ matrix.python-version }} channels: defaults activate-environment: test_env auto-update-conda: false auto-activate-base: false show-channel-urls: true - name: Source Scripts run: | set -x # conda is our test dependency but can't be pip installed conda install --quiet conda pip pip install -e .[test] conda info --json echo "condarc" cat ~/.condarc echo "conda_pkgs_dir" ls /home/runner/conda_pkgs_dir echo "miniconda/pkgs" ls /usr/share/miniconda/pkgs echo "test_env" ls /usr/share/miniconda/envs/test_env pytest analyze: name: Analyze test results needs: [linux] if: always() runs-on: ubuntu-latest steps: - name: Download test results uses: actions/download-artifact@v3 - name: Upload combined test results # provides one downloadable archive of all .coverage/test-report.xml files # of all matrix runs for further analysis. uses: actions/upload-artifact@v3 with: name: test-results-${{ github.sha }}-all path: test-results-${{ github.sha }}-* retention-days: 90 # default: 90 - name: Test Summary uses: test-summary/action@v2 with: paths: ./test-results-${{ github.sha }}-**/test-report*.xml - name: Decide whether the needed jobs succeeded or failed uses: re-actors/alls-green@release/v1 with: jobs: ${{ toJSON(needs) }} conda-package-streaming-0.10.0/.gitignore000066400000000000000000000001151463013427300202240ustar00rootroot00000000000000.coverage* .vscode .nox __pycache__ PKG-INFO build dist deploy/metadata.json conda-package-streaming-0.10.0/.pre-commit-config.yaml000066400000000000000000000020521463013427300225170ustar00rootroot00000000000000# disable autofixing PRs, commenting "pre-commit.ci autofix" on a pull request triggers a autofix ci: autofix_prs: false repos: - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.6.0 hooks: - id: check-added-large-files - id: check-ast - id: fix-byte-order-marker - id: check-case-conflict - id: check-executables-have-shebangs - id: check-merge-conflict - id: check-shebang-scripts-are-executable - id: debug-statements - id: detect-private-key - id: mixed-line-ending - id: end-of-file-fixer - id: trailing-whitespace - id: check-yaml exclude: conda.recipe/meta.yaml - repo: https://github.com/asottile/pyupgrade rev: v3.15.2 hooks: - id: pyupgrade args: ["--py37-plus"] - repo: https://github.com/PyCQA/isort rev: 5.13.2 hooks: - id: isort - repo: https://github.com/psf/black rev: 24.4.2 hooks: - id: black - repo: https://github.com/PyCQA/flake8 rev: 7.0.0 hooks: - id: flake8 conda-package-streaming-0.10.0/CHANGELOG.md000066400000000000000000000012351463013427300200510ustar00rootroot00000000000000[//]: # (current developments) ## 0.10.0 (2024-06) * Use zip64 extensions when converting .tar.bz2 to .conda, if uncompressed size is close to the 2GB ZIP64_LIMIT. (#79) ## 0.9.0 (2023-07) * Respect umask when extracting files. [#65](https://github.com/conda/conda-package-streaming/pulls/65); [conda issue #12829](https://github.com/conda/conda/issues/12829). ## 0.8.0 (2023-05) * Update transmute to use SpooledTemporaryFile instead of streaming directly to zip [(#57)](https://github.com/conda/conda-package-streaming/issues/57). This can reduce zstd memory usage during decompression. * `transmute` returns Path to transmuted package instead of `None`. conda-package-streaming-0.10.0/CODE_OF_CONDUCT.md000066400000000000000000000024321463013427300210370ustar00rootroot00000000000000# Conda Organization Code of Conduct > [!NOTE] > Below is the short version of our CoC, see the long version [here](https://github.com/conda-incubator/governance/blob/main/CODE_OF_CONDUCT.md). # The Short Version Be kind to others. Do not insult or put down others. Behave professionally. Remember that harassment and sexist, racist, or exclusionary jokes are not appropriate for the conda Organization. All communication should be appropriate for a professional audience including people of many different backgrounds. Sexual language and imagery is not appropriate. The conda Organization is dedicated to providing a harassment-free community for everyone, regardless of gender, sexual orientation, gender identity and expression, disability, physical appearance, body size, race, or religion. We do not tolerate harassment of community members in any form. Thank you for helping make this a welcoming, friendly community for all. ## Report an Incident * Report a code of conduct incident [using a form](https://form.jotform.com/221527028480048). * Report a code of conduct incident via email: [conduct@conda.org](mailto:conduct@conda.org). * Contact [an individual committee member](#committee-membership) or [CoC event representative](#coc-representatives) to report an incident in confidence. conda-package-streaming-0.10.0/HOW_WE_USE_GITHUB.md000066400000000000000000000546161463013427300214030ustar00rootroot00000000000000 [conda-org]: https://github.com/conda [sub-team]: https://github.com/conda-incubator/governance#sub-teams [project-planning]: https://github.com/orgs/conda/projects/2/views/11 [project-sorting]: https://github.com/orgs/conda/projects/2/views/11 [project-support]: https://github.com/orgs/conda/projects/2/views/12 [project-backlog]: https://github.com/orgs/conda/projects/2/views/13 [project-in-progress]: https://github.com/orgs/conda/projects/2/views/14 [docs-toc]: https://github.blog/changelog/2021-04-13-table-of-contents-support-in-markdown-files/ [docs-actions]: https://docs.github.com/en/actions [docs-saved-reply]: https://docs.github.com/en/get-started/writing-on-github/working-with-saved-replies/creating-a-saved-reply [docs-commit-signing]: https://docs.github.com/en/authentication/managing-commit-signature-verification/signing-commits [infrastructure]: https://github.com/conda/infrastructure [workflow-sync]: https://github.com/conda/infrastructure/blob/main/.github/workflows/sync.yml [labels-global]: https://github.com/conda/infrastructure/blob/main/.github/global.yml [workflow-cla]: /.github/workflows/cla.yml [workflow-issues]: /.github/workflows/issues.yml [workflow-labels]: /.github/workflows/labels.yml [workflow-lock]: /.github/workflows/lock.yml [workflow-project]: /.github/workflows/project.yml [workflow-stale]: /.github/workflows/stale.yml [labels-local]: /.github/labels.yml [labels-page]: ../../labels # How We Use GitHub This document seeks to outline how we as a community use GitHub Issues to track bugs and feature requests while still catering to development practices & project management (_e.g._, release cycles, feature planning, priority sorting, etc.). **Topics:** - [What is "Issue Sorting"?](#what-is-issue-sorting) - [Issue Sorting Procedures](#issue-sorting-procedures) - [Commit Signing](#commit-signing) - [Types of Issues](#types-of-issues) - [Standard Issue](#standard-issue) - [Epics](#epics) - [Spikes](#spikes) - [Working on Issues](#working-on-issues) > [!NOTE] > This document is written in the style of an FAQ. For easier navigation, use [GitHub's table of contents feature][docs-toc]. ## What is "Issue Sorting"? > [!NOTE] > "Issue sorting" is similar to that of "triaging", but we've chosen to use different terminology because "triaging" is a word related to very weighty topics (_e.g._, injuries and war) and we would like to be sensitive to those connotations. Additionally, we are taking a more "fuzzy" approach to sorting (_e.g._, severities may not be assigned, etc.). "Issue Sorting" refers to the process of assessing the priority of incoming issues. Below is a high-level diagram of the flow of issues: ```mermaid flowchart LR subgraph flow_sorting [Issue Sorting] board_sorting{{Sorting}} board_support{{Support}} board_sorting<-->board_support end subgraph flow_refinement [Refinement] board_backlog{{Backlog}} board_backlog-- refine -->board_backlog end subgraph flow_progress [In Progress] board_progress{{In Progress}} end state_new(New Issues) state_closed(Closed) state_new-->board_sorting board_sorting-- investigated -->board_backlog board_sorting-- duplicates, off-topic -->state_closed board_support-- resolved, unresponsive -->state_closed board_backlog-- pending work -->board_progress board_backlog-- resolved, irrelevant -->state_closed board_progress-- resolved -->state_closed ``` ### Why sort issues? At the most basic "bird's eye view" level, sorted issues will fall into the category of four main priority levels: - Do now - Do sometime - Provide user support - Never do (_i.e._, close) At its core, sorting enables new issues to be placed into these four categories, which helps to ensure that they will be processed at a velocity similar to or exceeding the rate at which new issues are coming in. One of the benefits of actively sorting issues is to avoid engineer burnout and to make necessary work sustainable; this is done by eliminating a never-ending backlog that has not been reviewed by any maintainers. There will always be broad-scope design and architecture implementations that the maintainers will be interested in pursuing; by actively organizing issues, the sorting engineers will be able to more easily track and tackle both specific and big-picture goals. ### Who does the sorting? Sorting engineers are a conda governance [sub-team][sub-team]; they are a group of community members who are responsible for making decisions regarding closing issues and setting feature work priorities, among other sorting-related tasks. ### How do items show up for sorting? New issues that are opened in any of the repositories in the [conda GitHub organization][conda-org] will show up in the "Sorting" tab of the [Planning project][project-planning]. There are two [GitHub Actions][docs-actions] workflows utilized for this purpose; [`.github/workflows/issues.yml`][workflow-issues] and [`.github/workflows/project.yml`][workflow-project]. The GitHub Actions in the [`conda/infrastructure`][infrastructure] repository are viewed as canonical; the [`.github/workflows/sync.yml` workflow][workflow-sync] sends out any modifications to other `conda` repositories from there. ### What is done about the issues in the "Sorting" tab? Issues in the ["Sorting" tab of the project board][project-sorting] are considered ready for the following procedures: - Mitigation via short-term workarounds and fixes - Redirection to the correct project - Determining if support can be provided for errors and questions - Closing out of any duplicate/off-topic issues The sorting engineers on rotation are not seeking to _resolve_ issues that arise. Instead, the goal is to understand the issue and to determine whether it is legitimate, and then to collect as much relevant information as possible so that the maintainers can make an informed decision about the appropriate resolution schedule. Issues will remain in the ["Sorting" tab][project-sorting] as long as the issue is in an investigatory phase (_e.g._, querying the user for more details, asking the user to attempt other workarounds, other debugging efforts, etc.) and are likely to remain in this state the longest, but should still be progressing over the course of 1-2 weeks. For more information on the sorting process, see [Issue Sorting Procedures](#issue-sorting-procedures). ### When do items move out of the "Sorting" tab? Items move out of the ["Sorting" tab][project-sorting] once the investigatory phase described in [What is done about the issues in the "Sorting" tab?](#what-is-done-about-the-issues-in-the-sorting-tab) has concluded and the sorting engineer has enough information to make a decision about the appropriate resolution schedule for the issue. The additional tabs in the project board that the issues can be moved to include the following: - **"Support"** - Any issue in the ["Support" tab of the Planning board][project-support] is a request for support and is not a feature request or a bug report. Add the [`type::support`](https://github.com/conda/infrastructure/labels/type%3A%3Asupport) label to move an issue to this tab. - **"Backlog"** - The issue has revealed a bug or feature request. We have collected enough details to understand the problem/request and to reproduce it on our own. These issues have been moved into the [Backlog tab of the Planning board][project-backlog] at the end of the sorting rotation during Refinement. Add the [`backlog`](https://github.com/conda/infrastructure/labels/backlog) label to move an issue to this tab. - **"Closed"** - The issue was closed due to being a duplicate, being redirected to a different project, was a user error, a question that has been resolved, etc. ### Where do work issues go after being sorted? Once issues are deemed ready to be worked on, they will be moved to the ["Backlog" tab of the Planning board][project-backlog]. Once actively in progress, the issues will be moved to the ["In Progress" tab of the Planning board][project-in-progress] and then closed out once the work is complete. ### What is the purpose of having a "Backlog"? Issues are "backlogged" when they have been sorted but not yet earmarked for an upcoming release. ### What automation procedures are currently in place? Global automation procedures synced out from the [`conda/infrastructure`][infrastructure] repo include: - [Marking of issues and pull requests as stale][workflow-stale], resulting in: - issues marked as [`type::support`](https://github.com/conda/infrastructure/labels/type%3A%3Asupport) being labeled stale after 21 days of inactivity and being closed after 7 further days of inactivity (that is, closed after 30 inactive days total) - all other inactive issues (not labeled as [`type::support`](https://github.com/conda/infrastructure/labels/type%3A%3Asupport) being labeled stale after 365 days of inactivity and being closed after 30 further days of inactivity (that is, closed after an approximate total of 1 year and 1 month of inactivity) - all inactive pull requests being labeled stale after 365 days of inactivity and being closed after 30 further days of inactivity (that is, closed after an approximate total of 1 year and 1 month of inactivity) - [Locking of closed issues and pull requests with no further activity][workflow-lock] after 365 days - [Adding new issues and pull requests to the respective project boards][workflow-project] - [Indicating an issue is ready for the sorting engineer's attention][workflow-issues] by toggling [`pending::feedback`](https://github.com/conda/infrastructure/labels/pending%3A%3Afeedback) with [`pending::support`](https://github.com/conda/infrastructure/labels/pending%3A%3Asupport) after a contributor leaves a comment - [Verifying that contributors have signed the CLA][workflow-cla] before allowing pull requests to be merged; if the contributor hasn't signed the CLA previously, merging is be blocked until a manual review can be done - [Syncing out templates, labels, workflows, and documentation][workflow-sync] from [`conda/infrastructure`][infrastructure] to the other repositories ## Issue Sorting Procedures ### How are issues sorted? Issues in the ["Sorting" tab of the Planning board][project-sorting] are reviewed by issue sorting engineers, who take rotational sorting shifts. In the process of sorting issues, engineers label the issues and move them to the other tabs of the project board for further action. Issues that require input from multiple members of the sorting team will be brought up during refinement meetings in order to understand how those particular issues fit into the short- and long-term roadmap. These meetings enable the sorting engineers to get together to collectively prioritize issues, earmark feature requests for specific future releases (versus a more open-ended backlog), tag issues as ideal for first-time contributors, as well as whether or not to close/reject specific feature requests. ### How does labeling work? Labeling is a very important means for sorting engineers to keep track of the current state of an issue with regards to the asynchronous nature of communicating with users. Utilizing the proper labels helps to identify the severity of the issue as well as to quickly understand the current state of a discussion. Each label has an associated description that clarifies how the label should be used. Hover on the label to see its description. Label colors are used to distinguish labels by category. Generally speaking, labels with the same category are considered mutually exclusive, but in some cases labels sharing the same category can occur concurrently, as they indicate qualifiers as opposed to types. For example, we may have the following types, [`type::bug`](https://github.com/conda/infrastructure/labels/type%3A%3Abug), [`type::feature`](https://github.com/conda/infrastructure/labels/type%3A%3Afeature), and [`type::documentation`](https://github.com/conda/infrastructure/labels/type%3A%3Adocumentation), where for any one issue there would be _at most_ **one** of these to be defined (_i.e._ an issue should not be a bug _and_ a feature request at the same time). Alternatively, with issues involving specific operating systems (_i.e._, [`os::linux`](https://github.com/conda/infrastructure/labels/os%3A%3Alinux), [`os::macos`](https://github.com/conda/infrastructure/labels/os%3A%3Amacos), and [`os::windows`](https://github.com/conda/infrastructure/labels/os%3A%3Awindows)), an issue could be labeled with one or more, depending on the system(s) the issue occurs on. Please note that there are also automation policies in place that are affected by labeling. For example, if an issue is labeled as [`type::support`](https://github.com/conda/infrastructure/labels/type%3A%3Asupport), that issue will be marked [`stale`](https://github.com/conda/infrastructure/labels/stale) after 21 days of inactivity and auto-closed after seven more days without activity (30 inactive days total), which is earlier than issues without this label. See [What automation procedures are currently in place?](#what-automation-procedures-are-currently-in-place) for more details. ### What labels are required for each issue? At minimum, both `type` and `source` labels should be specified on each issue before moving it from the "Sorting" tab to the "Backlog" tab. All issues that are bugs should also be tagged with a `severity` label. The `type` labels are exclusive of each other: each sorted issue should have exactly one `type` label. These labels give high-level information on the issue's classification (_e.g._, bug, feature, tech debt, etc.) The `source` labels are exclusive of each other: each sorted issue should have exactly one `source` label. These labels give information on the sub-group to which the issue's author belongs (_e.g._, a partner, a frequent contributor, the wider community, etc.). Through these labels, maintainers gain insight into how well we're meeting the needs of various groups. The `severity` labels are exclusive of each other and, while required for the [`type::bug`](https://github.com/conda/infrastructure/labels/type%3A%bug) label, they can also be applied to other types to indicate demand or need. These labels help us to prioritize our work. Severity is not the only factor for work prioritization, but it is an important consideration. Please review the descriptions of the `type`, `source`, and `severity` labels on the [labels page][labels-page] prior to use. ### How are new labels defined? Labels are defined using a scoped syntax with an optional high-level category (_e.g._, `source`, `tag`, `type`, etc.) and a specific topic, much like the following: - `[topic]` - `[category::topic]` - `[category::topic-phrase]` This syntax helps with issue sorting enforcement, as it helps to ensure that sorted issues are, at minimum, categorized by type and source. There are a number of labels that have been defined for the different repositories. In order to create a streamlined sorting process, label terminologies are standardized using similar (if not the same) labels. ### How are new labels added? New **global** labels (_i.e._, labels that apply equally to all repositories within the conda GitHub organization) are added to [`conda/infrastructure`][infrastructure]'s [`.github/global.yml` file][labels-global]; new **local** labels (_i.e._, labels specific to particular repositories) are added to each repository's [`.github/labels.yml` file][labels-local]. All new labels should follow the labeling syntax described in ["How are new labels defined?"](#how-are-new-labels-defined). Global labels are combined with any local labels and these aggregated labels are used by the [`.github/workflows/labels.yml` workflow][workflow-labels] to synchronize the labels available for the repository. ### Are there any templates to use as responses for commonly-seen issues? Some of the same types of issues appear regularly (_e.g._, issues that are duplicates of others, issues that should be filed in the Anaconda issue tracker, errors that are due to a user's specific setup/environment, etc.). Below are some boilerplate responses for the most commonly-seen issues to be sorted:
Duplicate Issue

This is a duplicate of [link to primary issue]; please feel free to continue the discussion there.
> [!WARNING] > Apply the https://github.com/conda/infrastructure/labels/duplicate label to the issue being closed and https://github.com/conda/infrastructure/labels/duplicate%3A%3Aprimary to the original issue.
Requesting an Uninstall/Reinstall of conda

Please uninstall your current version of `conda` and reinstall the latest version.
Feel free to use either the [miniconda](https://docs.anaconda.com/free/miniconda/)
or [anaconda](https://www.anaconda.com/products/individual) installer,
whichever is more appropriate for your needs.
Redirect to Anaconda Issue Tracker

Thank you for filing this issue! Unfortunately, this is off-topic for this repo.
If you are still encountering this issue please reopen in the
[Anaconda issue tracker](https://github.com/ContinuumIO/anaconda-issues/issues)
where `conda` installer/package issues are addressed.
> [!WARNING] > Apply the https://github.com/conda/infrastructure/labels/off-topic label to these issues before closing them out.
Redirecting to Nucleus Forums

Unfortunately, this issue is outside the scope of support we offer via GitHub;
if you continue to experience the problems described here,
please post details to the [Nucleus forums](https://community.anaconda.cloud/).
> [!WARNING] > Apply the https://github.com/conda/infrastructure/labels/off-topic label to these issues before closing them out.
Slow solving of conda environment
Hi [@username],

Thanks for voicing your concern about the performance of the classic dependency solver. To fix this, our official recommendation is using the new default "conda-libmamba-solver" instead of the classic solver (more information about the "conda-libmamba-solver" can be found here: https://conda.github.io/conda-libmamba-solver/getting-started/).

In most cases "conda-libmamba-solver" should be significantly faster than the "classic" solver. We hope it provides you with a much better experience going forward.
In order to not have to manually type or copy/paste the above repeatedly, note that it's possible to add text for the most commonly-used responses via [GitHub's "Add Saved Reply" option][docs-saved-reply]. ## Commit Signing For all conda maintainers, we require commit signing and strongly recommend it for all others wishing to contribute to conda related projects. More information about how to set this up within GitHub can be found here: - [GitHub's signing commits docs][docs-commit-signing] ## Types of Issues ### Standard Issue TODO ### Epics TODO ### Spikes #### What is a spike? "Spike" is a term that is borrowed from extreme programming and agile development. They are used when the **outcome of an issue is unknown or even optional**. For example, when first coming across a problem that has not been solved before, a project may choose to either research the problem or create a prototype in order to better understand it. Additionally, spikes represent work that **may or may not actually be completed or implemented**. An example of this are prototypes created to explore possible solutions. Not all prototypes are implemented and the purpose of creating a prototype is often to explore the problem space more. For research-oriented tasks, the end result of this research may be that a feature request simply is not viable at the moment and would result in putting a stop to that work. Finally, spikes are usually **timeboxed**. However, given the open source/volunteer nature of our contributions, we do not enforce this for our contributors. When a timebox is set, this means that we are limiting how long we want someone to work on said spike. We do this to prevent contributors from falling into a rabbit hole they may never return from. Instead, we set a time limit to perform work on the spike and then have the assignee report back. If the tasks defined in the spike have not yet been completed, a decision is made on whether it makes sense to perform further work on the spike. #### When do I create a spike? A spike should be created when we do not have enough information to move forward with solving a problem. That simply means that, whenever we are dealing with unknowns or processes the project team has never encountered before, it may be useful for us to create a spike. In day-to-day work, this kind of situation may appear when new bug reports or feature requests come in that deal with problems or technologies that the project team is unfamiliar with. All issues that the project team has sufficient knowledge of should instead proceed as regular issues. #### When do I not create a spike? Below are some common scenarios where creating a spike is not appropriate: - Writing a technical specification for a feature we know how to implement - Design work that would go into drafting how an API is going to look and function - Any work that must be completed or is not optional ## Working on Issues ### How do I assign myself to an issue I am actively reviewing? If you do **not** have permissions, please indicate that you are working on an issue by leaving a comment. Someone who has permissions will assign you to the issue. If two weeks have passed without a pull request or an additional comment requesting information, you may be removed from the issue and the issue reassigned. If you are assigned to an issue but will not be able to continue work on it, please comment to indicate that you will no longer be working on it and press `unassign me` next to your username in the `Assignees` section of the issue page (top right). If you **do** have permissions, please assign yourself to the issue by pressing `assign myself` under the `Assignees` section of the issue page (top right). conda-package-streaming-0.10.0/LICENSE000066400000000000000000000051561463013427300172530ustar00rootroot00000000000000BSD 3-Clause License Copyright (c) 2022, Anaconda, Inc. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. lazy_wheel.py: Copyright (c) 2008-present The pip developers (see https://github.com/pypa/pip/blob/main/AUTHORS.txt file) Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. conda-package-streaming-0.10.0/Makefile000066400000000000000000000012461463013427300177020ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = docs BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) style: isort --profile=black . black . conda-package-streaming-0.10.0/README.md000066400000000000000000000061351463013427300175230ustar00rootroot00000000000000# conda-package-streaming [![pre-commit.ci status](https://results.pre-commit.ci/badge/github/conda/conda-package-streaming/main.svg)](https://results.pre-commit.ci/latest/github/conda/conda-package-streaming/main) An efficient library to read from new and old format .conda and .tar.bz2 conda packages. Download conda metadata from packages without transferring entire file. Get metadata from local `.tar.bz2` packages without reading entire files. Uses enhanced pip `lazy_wheel` to fetch a file out of `.conda` with no more than 3 range requests, but usually 2. Uses `tar = tarfile.open(fileobj=...)` to stream remote `.tar.bz2`. Closes the HTTP request once desired files have been seen. # Quickstart The basic API yields (tarfile, member) tuples from conda files as tarfile is needed to extract member. Note the `.tar.bz2` format yields all members, not just `info/`, from `stream_conda_info` / `stream_conda_component`, while the `.conda` format yields members from the requested inner archive — allowing the caller to decide when to stop reading. From a url, ```python from conda_package_streaming.url import stream_conda_info # url = (ends with .conda or .tar.bz2) for tar, member in stream_conda_info(url): if member.name == "info/index.json": index_json = json.load(tar.extractfile(member)) break ``` From s3, ```python client = boto3.client("s3") from conda_package_streaming.s3 import stream_conda_info # key = (ends with .conda or .tar.bz2) for tar, member in stream_conda_info(client, bucket, key): if member.name == "info/index.json": index_json = json.load(tar.extractfile(member)) break ``` From a filename, ```python from conda_package_streaming import package_streaming # filename = (ends with .conda or .tar.bz2) for tar, member in package_streaming.stream_conda_info(filename): if member.name == "info/index.json": index_json = json.load(tar.extractfile(member)) break ``` From a file-like object, ```python from contextlib import closing from conda_package_streaming.url import conda_reader_for_url from conda_package_streaming.package_streaming import stream_conda_component filename, conda = conda_reader_for_url(url) # file object must be seekable for `.conda` format, but merely readable for `.tar.bz2` with closing(conda): for tar, member in stream_conda_component(filename, conda, component="info"): if member.name == "info/index.json": index_json = json.load(tar.extractfile(member)) break ``` If you need the entire package, download it first and use the file-based APIs. The URL-based APIs are more efficient if you only need to access package metadata. # Package goals * Extract conda packages (both formats) * Easy to install from pypi or conda * Do the least amount of I/O possible (no temporary files, transfer partial packages) * Open files from the network / standard HTTP / s3 * Continue using conda-package-handling to create .conda packages # Generating documentation Uses markdown, furo theme. Requires newer mdit-py-plugins. `pip install conda-package-streaming[docs]` One time: `sphinx-apidoc -o docs .` conda-package-streaming-0.10.0/conda.recipe/000077500000000000000000000000001463013427300205715ustar00rootroot00000000000000conda-package-streaming-0.10.0/conda.recipe/meta.yaml000066400000000000000000000023301463013427300224010ustar00rootroot00000000000000{% set name = "conda-package-streaming" %} {% set version_match = load_file_regex( load_file="conda_package_streaming/__init__.py", regex_pattern='^__version__ = "(.+)"') %} {% set version = version_match[1] %} package: name: {{ name|lower }} version: {{ version }} source: git_url: ../ # url: https://github.com/conda/conda-package-streaming/archive/refs/tags/v{{ version }}.tar.gz # sha256: 60a064dcb6adf775362339ffc8728320d89115c0f3870f2bb43fd368240a3205 build: script: {{ PYTHON }} -m pip install --no-build-isolation . -vv number: 0 noarch: python requirements: host: - flit-core - python >=3.7 - pip run: - zstandard >=0.15 - python >=3.7 # allow optional 'requests' test: imports: - conda_package_streaming.url commands: - pip check requires: - pip - requests about: home: https://github.com/conda/conda-package-streaming summary: An efficient library to read from new and old format .conda and .tar.bz2 conda packages. license: BSD-3-Clause license_family: BSD license_file: LICENSE doc_url: https://conda.github.io/conda-package-streaming/ dev_url: https://github.com/conda/conda-package-streaming extra: recipe-maintainers: - dholth conda-package-streaming-0.10.0/conda_package_streaming/000077500000000000000000000000001463013427300230475ustar00rootroot00000000000000conda-package-streaming-0.10.0/conda_package_streaming/__init__.py000066400000000000000000000000271463013427300251570ustar00rootroot00000000000000__version__ = "0.10.0" conda-package-streaming-0.10.0/conda_package_streaming/exceptions.py000066400000000000000000000012451463013427300256040ustar00rootroot00000000000000import tarfile class SafetyError(tarfile.TarError): def __init__(self, msg, *args, **kw): msg = f"Error with archive. {msg}" super().__init__(msg) class CaseInsensitiveFileSystemError(OSError): def __init__(self): message = """\ Cannot extract package to a case-insensitive file system. Your install destination does not differentiate between upper and lowercase characters, and this breaks things. Try installing to a location that is case-sensitive. Windows drives are usually the culprit here - can you install to a native Unix drive, or turn on case sensitivity for this (Windows) location? """ super().__init__(message) conda-package-streaming-0.10.0/conda_package_streaming/extract.py000066400000000000000000000047521463013427300251030ustar00rootroot00000000000000""" Extract package to directory, with checks against tar members extracting outside the target directory. """ from __future__ import annotations import os import tarfile from errno import ELOOP from pathlib import Path from typing import Generator from . import exceptions, package_streaming __all__ = ["extract_stream", "extract"] def extract_stream( stream: Generator[tuple[tarfile.TarFile, tarfile.TarInfo], None, None], dest_dir: Path | str, ): """ Pipe ``stream_conda_component`` output here to extract every member into dest_dir. For ``.conda`` will need to be called twice (for info and pkg components); for ``.tar.bz2`` every member is extracted. """ dest_dir = os.path.realpath(dest_dir) def is_within_dest_dir(name): abs_target = os.path.realpath(os.path.join(dest_dir, name)) prefix = os.path.commonpath((dest_dir, abs_target)) return prefix == dest_dir for tar_file, _ in stream: # careful not to seek backwards def checked_members(): # from conda_package_handling for member in tar_file: if not is_within_dest_dir(member.name): raise exceptions.SafetyError(f"contains unsafe path: {member.name}") yield member try: tar_file.extractall(path=dest_dir, members=checked_members()) except OSError as e: if e.errno == ELOOP: raise exceptions.CaseInsensitiveFileSystemError() from e raise # next iteraton of for loop raises GeneratorExit in stream stream.close() def extract(filename, dest_dir=None, fileobj=None): """ Extract all components of conda package to dest_dir. fileobj: must be seekable if provided, if a ``.conda`` package. """ assert dest_dir, "dest_dir is required" if str(filename).endswith(".conda"): components = [ package_streaming.CondaComponent.pkg, package_streaming.CondaComponent.info, ] else: # .tar.bz2 doesn't filter by component components = [package_streaming.CondaComponent.pkg] closefd = False if not fileobj: fileobj = open(filename, "rb") closefd = True try: for component in components: stream = package_streaming.stream_conda_component( filename, fileobj, component=component ) extract_stream(stream, dest_dir) finally: if closefd: fileobj.close() conda-package-streaming-0.10.0/conda_package_streaming/lazy_wheel.py000066400000000000000000000223511463013427300255670ustar00rootroot00000000000000"""Lazy ZIP over HTTP""" from __future__ import annotations import logging import zipfile from bisect import bisect_left, bisect_right from contextlib import contextmanager from tempfile import NamedTemporaryFile from typing import Any, Iterator from zipfile import BadZipfile, ZipFile from requests import Session from requests.models import CONTENT_CHUNK_SIZE, Response # from pip 22.0.3 with fixes & remove imports from pip log = logging.getLogger(__name__) # If-Match (etag) to detect file changed during fetch would also be nice HEADERS = {"Accept-Encoding": "identity"} class HTTPRangeRequestUnsupported(Exception): pass class LazyZipOverHTTP: """File-like object mapped to a ZIP file over HTTP. This uses HTTP range requests to lazily fetch the file's content, which is supposed to be fed to ZipFile. If such requests are not supported by the server, raise HTTPRangeRequestUnsupported during initialization. """ def __init__( self, url: str, session: Session, chunk_size: int = CONTENT_CHUNK_SIZE ) -> None: # if CONTENT_CHUNK_SIZE is bigger than the file: # In [8]: response.headers["Content-Range"] # Out[8]: 'bytes 0-3133374/3133375' self._request_count = 0 self._session, self._url, self._chunk_size = session, url, chunk_size # initial range request for the end of the file tail = self._stream_response(start="", end=CONTENT_CHUNK_SIZE) # e.g. {'accept-ranges': 'bytes', 'content-length': '10240', # 'content-range': 'bytes 12824-23063/23064', 'last-modified': 'Sat, 16 # Apr 2022 13:03:02 GMT', 'date': 'Thu, 21 Apr 2022 11:34:04 GMT'} if tail.status_code != 206: raise HTTPRangeRequestUnsupported("range request is not supported") # lowercase content-range to support s3 self._length = int(tail.headers["content-range"].partition("/")[-1]) self._file = NamedTemporaryFile() self.truncate(self._length) # length is also in Content-Length and Content-Range header with self._stay(): content_length = int(tail.headers["content-length"]) if hasattr(tail, "content"): assert content_length == len(tail.content) self.seek(self._length - content_length) for chunk in tail.iter_content(self._chunk_size): self._file.write(chunk) self._left: list[int] = [self._length - content_length] self._right: list[int] = [self._length - 1] @property def mode(self) -> str: """Opening mode, which is always rb.""" return "rb" @property def name(self) -> str: """Path to the underlying file.""" return self._file.name def seekable(self) -> bool: """Return whether random access is supported, which is True.""" return True def close(self) -> None: """Close the file.""" self._file.close() @property def closed(self) -> bool: """Whether the file is closed.""" return self._file.closed def read(self, size: int = -1) -> bytes: """Read up to size bytes from the object and return them. As a convenience, if size is unspecified or -1, all bytes until EOF are returned. Fewer than size bytes may be returned if EOF is reached. """ # BUG does not download correctly if size is unspecified download_size = size start, length = self.tell(), self._length stop = length if size < 0 else min(start + download_size, length) start = max(0, stop - download_size) self._download(start, stop - 1) return self._file.read(size) def readable(self) -> bool: """Return whether the file is readable, which is True.""" return True def seek(self, offset: int, whence: int = 0) -> int: """Change stream position and return the new absolute position. Seek to offset relative position indicated by whence: * 0: Start of stream (the default). pos should be >= 0; * 1: Current position - pos may be negative; * 2: End of stream - pos usually negative. """ return self._file.seek(offset, whence) def tell(self) -> int: """Return the current position.""" return self._file.tell() def truncate(self, size: int | None = None) -> int: """Resize the stream to the given size in bytes. If size is unspecified resize to the current position. The current stream position isn't changed. Return the new file size. """ return self._file.truncate(size) def writable(self) -> bool: """Return False.""" return False def __enter__(self) -> LazyZipOverHTTP: self._file.__enter__() return self def __exit__(self, *exc: Any) -> bool | None: return self._file.__exit__(*exc) @contextmanager def _stay(self) -> Iterator[None]: """Return a context manager keeping the position. At the end of the block, seek back to original position. """ pos = self.tell() try: yield finally: self.seek(pos) def _check_zip(self) -> None: """Check and download until the file is a valid ZIP.""" end = self._length - 1 for start in reversed(range(0, end, self._chunk_size)): self._download(start, end) with self._stay(): try: # For read-only ZIP files, ZipFile only needs # methods read, seek, seekable and tell. ZipFile(self) # type: ignore except BadZipfile: pass else: break def _stream_response( self, start: int | str, end: int, base_headers: dict[str, str] = HEADERS ) -> Response: """Return HTTP response to a range request from start to end. :param start: if "", request ``end` bytes from end of file.""" headers = base_headers.copy() headers["Range"] = f"bytes={start}-{end}" log.debug("%s", headers["Range"]) # TODO: Get range requests to be correctly cached headers["Cache-Control"] = "no-cache" self._request_count += 1 response = self._session.get(self._url, headers=headers, stream=True) response.raise_for_status() return response def _merge( self, start: int, end: int, left: int, right: int ) -> Iterator[tuple[int, int]]: """Return an iterator of intervals to be fetched. Args: start (int): Start of needed interval end (int): End of needed interval left (int): Index of first overlapping downloaded data right (int): Index after last overlapping downloaded data """ lslice, rslice = self._left[left:right], self._right[left:right] i = start = min([start] + lslice[:1]) end = max([end] + rslice[-1:]) for j, k in zip(lslice, rslice): if j > i: yield i, j - 1 i = k + 1 if i <= end: yield i, end self._left[left:right], self._right[left:right] = [start], [end] def _download(self, start: int, end: int) -> None: """Download bytes from start to end inclusively.""" with self._stay(): left = bisect_left(self._right, start) right = bisect_right(self._left, end) for start, end in self._merge(start, end, left, right): response = self._stream_response(start, end) self.seek(start) for chunk in response.iter_content(self._chunk_size): self._file.write(chunk) class LazyConda(LazyZipOverHTTP): def prefetch(self, conda_file_id): """ Conda fork specific. Prefetch the `.info` range from the remote archive. Reduces number of Range requests to 2 or 3 (1 or 2 for the directory, 1 for the file). conda_file_id: name of .conda without path or `.conda` extension """ target_file = f"info-{conda_file_id}.tar.zst" with self._stay(): # not strictly necessary # try to read entire conda info in one request zf = zipfile.ZipFile(self) infolist = zf.infolist() for i, info in enumerate(infolist): if info.filename == target_file: # could be incorrect if zipfile was concatenated to another # file (not likely for .conda) start = info.header_offset try: end = infolist[i + 1].header_offset # or info.header_offset # + len(info.filename) # + len(info.extra) # + info.compress_size # (unless Zip64) except IndexError: end = zf.start_dir self.seek(start) self.read(end - start) log.debug( "prefetch %s-%s", info.header_offset, end, ) break else: log.debug("no zip prefetch") conda-package-streaming-0.10.0/conda_package_streaming/package_streaming.py000066400000000000000000000113231463013427300270650ustar00rootroot00000000000000""" Unpack conda packages without using a temporary file. """ from __future__ import annotations import bz2 import os import os.path import tarfile import zipfile from enum import Enum from typing import Generator # acquire umask taking advantage of import system lock, instead of possibly in # multiple threads at once. UMASK = os.umask(0) os.umask(UMASK) try: import zstandard except ImportError: import warnings warnings.warn("zstandard could not be imported. Running without .conda support.") zstandard = None class CondaComponent(Enum): pkg = "pkg" info = "info" def __str__(self): return self.value class TarfileNoSameOwner(tarfile.TarFile): def __init__(self, *args, umask=UMASK, **kwargs): """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to read from an existing archive, 'a' to append data to an existing file or 'w' to create a new file overwriting an existing one. `mode' defaults to 'r'. If `fileobj' is given, it is used for reading or writing data. If it can be determined, `mode' is overridden by `fileobj's mode. `fileobj' is not closed, when TarFile is closed. """ super().__init__(*args, **kwargs) self.umask = umask def chown(self, tarinfo, targetpath, numeric_owner): """ Override chown to be a no-op, since we don't want to preserve ownership here. (tarfile.TarFile only lets us toggle all of (chown, chmod, mtime)) """ return def chmod(self, tarinfo, targetpath): """ Set file permissions of targetpath according to tarinfo, respecting umask. """ try: os.chmod(targetpath, tarinfo.mode & (0o777 - self.umask)) except OSError as e: raise tarfile.ExtractError("could not change mode") from e def tar_generator( fileobj, tarfile_open=TarfileNoSameOwner.open, closefd=False ) -> Generator[tuple[tarfile.TarFile, tarfile.TarInfo], None, None]: """ Yield (tar, member) from fileobj. """ # tarfile will not close fileobj because _extfileobj is True # caller should take care to close files all the way back to the http request... try: with tarfile_open(fileobj=fileobj, mode="r|") as tar: for member in tar: yield tar, member finally: if closefd: fileobj.close() def stream_conda_info( filename, fileobj=None ) -> Generator[tuple[tarfile.TarFile, tarfile.TarInfo], None, None]: """ Yield members from conda's embedded info/ tarball. For .tar.bz2 packages, yield all members. Yields (tar, member) tuples. You must only use the current member to prevent tar seeks and scans. To extract to disk, it's possible to call ``tar.extractall(path)`` on the first result and then ignore the rest of this generator. ``extractall`` takes care of some directory permissions/mtime issues, compared to ``extract`` or writing out the file objects yourself. """ component = "info" return stream_conda_component(filename, fileobj, component) def stream_conda_component( filename, fileobj=None, component: CondaComponent | str = CondaComponent.pkg ) -> Generator[tuple[tarfile.TarFile, tarfile.TarInfo], None, None]: """ Yield members from .conda's embedded {component}- tarball. "info" or "pkg". For .tar.bz2 packages, yield all members. Yields (tar, member) tuples. You must only use the current member to prevent tar seeks and scans. To extract to disk, it's possible to call ``tar.extractall(path)`` on the first result and then ignore the rest of this generator. ``extractall`` takes care of some directory permissions/mtime issues, compared to ``extract`` or writing out the file objects yourself. """ if str(filename).endswith(".conda"): if zstandard is None: raise RuntimeError("Cannot unpack `.conda` without zstandard") zf = zipfile.ZipFile(fileobj or filename) file_id, _, _ = os.path.basename(filename).rpartition(".") component_name = f"{component}-{file_id}" component_filename = [ info for info in zf.infolist() if info.filename.startswith(component_name) ] if not component_filename: raise LookupError(f"didn't find {component_name} component in {filename}") assert len(component_filename) == 1 reader = zstandard.ZstdDecompressor().stream_reader( zf.open(component_filename[0]) ) elif str(filename).endswith(".tar.bz2"): reader = bz2.open(fileobj or filename, mode="rb") else: raise ValueError("unsupported file extension") return tar_generator(reader, closefd=fileobj is None) conda-package-streaming-0.10.0/conda_package_streaming/s3.py000066400000000000000000000043561463013427300237560ustar00rootroot00000000000000""" Adapt s3 to package_streaming """ from __future__ import annotations import typing from contextlib import closing from typing import Any from . import package_streaming if typing.TYPE_CHECKING: # pragma: no cover from mypy_boto3_s3 import Client from mypy_boto3_s3.type_defs import GetObjectOutputTypeDef else: Client = GetObjectOutputTypeDef = None from .url import conda_reader_for_url __all__ = ["stream_conda_info", "conda_reader_for_s3"] class ResponseFacade: def __init__(self, response: GetObjectOutputTypeDef): self.response = response self.raw: Any = response["Body"] def raise_for_status(self): # s3 get_object raises automatically? pass @property def status_code(self): return self.response["ResponseMetadata"]["HTTPStatusCode"] @property def headers(self): # a case-sensitive dict; keys may be lowercased always? return self.response["ResponseMetadata"]["HTTPHeaders"] def iter_content(self, n: int): return iter(lambda: self.raw.read(n), b"") class SessionFacade: """ Make s3 client look just enough like a requests.session for LazyZipOverHTTP """ def __init__(self, client: Client, bucket: str, key: str): self.client = client self.bucket = bucket self.key = key def get(self, url, *, headers: dict | None = None, stream=True): if headers and "Range" in headers: response = self.client.get_object( Bucket=self.bucket, Key=self.key, Range=headers["Range"] ) else: response = self.client.get_object(Bucket=self.bucket, Key=self.key) return ResponseFacade(response) def stream_conda_info(client, bucket, key): """ Yield (tar, member) for conda package. Just "info/" for .conda, all members for tar. """ filename, conda = conda_reader_for_s3(client, bucket, key) with closing(conda): yield from package_streaming.stream_conda_info(filename, conda) def conda_reader_for_s3(client: Client, bucket: str, key: str): """ Return (name, file_like) suitable for package_streaming APIs """ session: Any = SessionFacade(client, bucket, key) return conda_reader_for_url(key, session) conda-package-streaming-0.10.0/conda_package_streaming/transmute.py000066400000000000000000000136521463013427300254520ustar00rootroot00000000000000""" Convert .tar.bz2 to .conda Uses `tempfile.SpooledTemporaryFile` to buffer `pkg-*` `.tar` and `info-*` `.tar`, then compress directly into an open `ZipFile` at the end. `SpooledTemporaryFile` buffers the first 10MB of the package and its metadata in memory, but writes out to disk for larger packages. Conda packages created this way have `info-*` as the last element in the `ZipFile`, instead of the first for `.conda` packages created with pre-2.0 `conda-package-handling`. """ from __future__ import annotations import json import os import shutil import tarfile import tempfile import zipfile from pathlib import Path from typing import Callable import zstandard # streams everything in .tar.bz2 mode from .package_streaming import CondaComponent, stream_conda_component # increase to reduce speed and increase compression (levels above 19 use much # more memory) ZSTD_COMPRESS_LEVEL = 19 # increase to reduce compression and increase speed ZSTD_COMPRESS_THREADS = 1 CONDA_PACKAGE_FORMAT_VERSION = 2 # Account for growth from "2 GB of /dev/urandom" to not exceed ZIP64_LIMIT after # compression CONDA_ZIP64_LIMIT = zipfile.ZIP64_LIMIT - (1 << 18) - 1 def transmute( package, path, *, compressor: Callable[ [], zstandard.ZstdCompressor ] = lambda: zstandard.ZstdCompressor( level=ZSTD_COMPRESS_LEVEL, threads=ZSTD_COMPRESS_THREADS ), is_info: Callable[[str], bool] = lambda filename: filename.startswith("info/"), ) -> Path: """ Convert .tar.bz2 conda :package to .conda-format under path. :param package: path to .tar.bz2 conda package :param path: destination path for transmuted .conda package :param compressor: A function that creates instances of ``zstandard.ZstdCompressor()`` to override defaults. :param is_info: A function that returns True if a file belongs in the ``info`` component of a `.conda` package. ``conda-package-handling`` (not this package ``conda-package-streaming``) uses a set of regular expressions to keep expected items in the info- component, while other items starting with ``info/`` wind up in the pkg- component. :return: Path to transmuted package. """ assert package.endswith(".tar.bz2"), "can only convert .tar.bz2 to .conda" assert os.path.isdir(path) file_id = os.path.basename(package)[: -len(".tar.bz2")] output_path = Path(path, f"{file_id}.conda") with tempfile.SpooledTemporaryFile() as info_file, tempfile.SpooledTemporaryFile() as pkg_file: with tarfile.TarFile(fileobj=info_file, mode="w") as info_tar, tarfile.TarFile( fileobj=pkg_file, mode="w" ) as pkg_tar: # If we wanted to compress these at a low setting to save temporary # space, we could insert a file object that counts bytes written in # front of a zstd (level between 1..3) compressor. stream = iter(stream_conda_component(package)) for tar, member in stream: tar_get = info_tar if is_info(member.name) else pkg_tar if member.isfile(): tar_get.addfile(member, tar.extractfile(member)) else: tar_get.addfile(member) info_tar.close() pkg_tar.close() info_size = info_file.tell() pkg_size = pkg_file.tell() info_file.seek(0) pkg_file.seek(0) with zipfile.ZipFile( output_path, "x", # x to not append to existing compresslevel=zipfile.ZIP_STORED, ) as conda_file: # Use a maximum of one Zstd compressor, stream_writer at a time to save memory. data_compress = compressor() pkg_metadata = {"conda_pkg_format_version": CONDA_PACKAGE_FORMAT_VERSION} conda_file.writestr("metadata.json", json.dumps(pkg_metadata)) with conda_file.open( f"pkg-{file_id}.tar.zst", "w", force_zip64=(pkg_size > CONDA_ZIP64_LIMIT), ) as pkg_file_zip, data_compress.stream_writer( pkg_file_zip, size=pkg_size, closefd=False ) as pkg_stream: shutil.copyfileobj(pkg_file._file, pkg_stream) with conda_file.open( f"info-{file_id}.tar.zst", "w", force_zip64=(info_size > CONDA_ZIP64_LIMIT), ) as info_file_zip, data_compress.stream_writer( info_file_zip, size=info_size, closefd=False, ) as info_stream: shutil.copyfileobj(info_file._file, info_stream) return output_path def transmute_tar_bz2( package: str, path, ) -> Path: """ Convert .conda :package to .tar.bz2 format under path. Can recompress .tar.bz2 packages. :param package: path to `.conda` or `.tar.bz2` package. :param path: destination path for transmuted package. :return: Path to transmuted package. """ assert package.endswith((".tar.bz2", ".conda")), "Unknown extension" assert os.path.isdir(path) incoming_format = ".conda" if package.endswith(".conda") else ".tar.bz2" file_id = os.path.basename(package)[: -len(incoming_format)] if incoming_format == ".conda": # .tar.bz2 MUST place info/ first. components = [CondaComponent.info, CondaComponent.pkg] else: # .tar.bz2 doesn't filter by component components = [CondaComponent.pkg] output_path = Path(path, f"{file_id}.tar.bz2") with open(package, "rb") as fileobj, tarfile.open(output_path, "x:bz2") as pkg_tar: for component in components: stream = iter(stream_conda_component(package, fileobj, component=component)) for tar, member in stream: if member.isfile(): pkg_tar.addfile(member, tar.extractfile(member)) else: pkg_tar.addfile(member) return output_path conda-package-streaming-0.10.0/conda_package_streaming/url.py000066400000000000000000000052231463013427300242250ustar00rootroot00000000000000""" Fetch metadata from remote .conda or .tar.bz2 package. Try to fetch less than the whole file if possible. This module should only be used to make *partial* reads against a remote package, typically just the ``info`` portion. If a full ``.conda`` format package is needed, it is more efficient to download locally first and then use the file-based API. """ import logging import sys import urllib.parse from pathlib import Path import requests from . import package_streaming # Excellent HTTP Range request file-like object from .lazy_wheel import LazyConda log = logging.getLogger(__name__) session = requests.Session() session.headers["User-Agent"] = "conda-package-streaming/0.1.0" METADATA_CHECKLIST = frozenset({"info/index.json", "info/recipe/meta.yaml"}) def extract_conda_info(url, destdir, checklist=METADATA_CHECKLIST, session=session): """ Extract info/index.json and info/recipe/meta.yaml from url to destdir; close url as soon as those files are found. """ checklist = set(checklist) stream = stream_conda_info(url, session=session) for tar, member in stream: if member.name in checklist: tar.extract(member, destdir) checklist.remove(member.name) if not checklist: stream.close() # next iteraton of for loop raises GeneratorExit in stream def stream_conda_info(url, session=session): """ Yield (tar, member) for conda package at url Just "info/" for .conda, all members for tar. """ filename, conda = conda_reader_for_url(url, session=session) try: yield from package_streaming.stream_conda_info(filename, conda) finally: if hasattr(conda, "release_conn"): # For .tar.bz2. Take extra care to drop connections after we are # done reading a partial response. conda.release_conn() conda.close() def conda_reader_for_url(url, session=session): """ Return (name, file_like) suitable for package_streaming APIs """ parsed_url = urllib.parse.urlparse(url) *_, filename = parsed_url.path.rsplit("/", 1) if filename.endswith(".conda"): file_id = filename[: -len(".conda")] conda = LazyConda(url, session) conda.prefetch(file_id) elif filename.endswith(".tar.bz2"): response = session.get(url, stream=True, headers={"Connection": "close"}) conda = response.raw else: raise ValueError("Unsupported extension %s", url) return filename, conda if __name__ == "__main__": # pragma nocover import logging logging.basicConfig(level=logging.DEBUG) extract_conda_info(sys.argv[1], Path(sys.argv[2]).absolute()) conda-package-streaming-0.10.0/docs/000077500000000000000000000000001463013427300171675ustar00rootroot00000000000000conda-package-streaming-0.10.0/docs/changelog.md000066400000000000000000000000551463013427300214400ustar00rootroot00000000000000# Changelog ```{include} ../CHANGELOG.md ``` conda-package-streaming-0.10.0/docs/conf.py000066400000000000000000000035271463013427300204750ustar00rootroot00000000000000# Configuration file for the Sphinx documentation builder. # # This file only contains a selection of the most common options. For a full # list see the documentation: # https://www.sphinx-doc.org/en/master/usage/configuration.html # -- Path setup -------------------------------------------------------------- # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # import os import sys sys.path.insert(0, os.path.abspath("..")) # -- Project information ----------------------------------------------------- project = "conda-package-streaming" copyright = "2022, Anaconda, Inc." author = "Anaconda, Inc." # -- General configuration --------------------------------------------------- # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ "sphinx.ext.autodoc", "myst_parser", ] # Add any paths that contain templates here, relative to this directory. templates_path = ["_templates"] # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This pattern also affects html_static_path and html_extra_path. exclude_patterns = [] # -- Options for HTML output ------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # html_theme = "furo" # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ["_static"] conda-package-streaming-0.10.0/docs/extract.rst000066400000000000000000000002601463013427300213710ustar00rootroot00000000000000extract module ============== Extract conda packages to the filesystem. .. automodule:: conda_package_streaming.extract :members: :undoc-members: :show-inheritance: conda-package-streaming-0.10.0/docs/index.md000066400000000000000000000025421463013427300206230ustar00rootroot00000000000000% conda-package-streaming documentation master file, created by % sphinx-quickstart on Fri Jun 17 14:43:38 2022. % You can adapt this file completely to your liking, but it should at least % contain the root `toctree` directive. # Welcome to conda-package-streaming's documentation! `conda-package-streaming` strives to be the most efficient way to read from new and old format `.conda` and `.tar.bz2` [conda packages](https://docs.conda.io/projects/conda/en/latest/user-guide/concepts/packages.html). `conda-package-streaming` can read from conda packages without ever writing to disk, unlike [conda-package-handling](https://github.com/conda/conda-package-handling) `< 2.0.0`'s temporary directories. [conda-package-handling](https://github.com/conda/conda-package-handling) `>= 2.0.0` uses `conda-package-streaming`. This library can also read a package from a URL or a stream without transferring the entire archive. `conda-package-streaming` uses the standard library [`zipfile`](https://docs.python.org/3/library/zipfile.html) and [`tarfile`](https://docs.python.org/3/library/tarfile.html), and [`zstandard`](https://github.com/indygreg/python-zstandard) to handle zstd-compressed streams. ```{include} ../README.md ``` ```{toctree} :caption: 'Contents:' :maxdepth: 2 modules changelog ``` # Indices and tables - {ref}`genindex` - {ref}`modindex` - {ref}`search` conda-package-streaming-0.10.0/docs/lazy_wheel.md000066400000000000000000000014361463013427300216600ustar00rootroot00000000000000# lazy_wheel module `lazy_wheel` is derived from pip's wheel download code. It is really a seekable file-like based on HTTP range requests, backed by a sparse temporary file. Each `read()` issues one or more HTTP range requests to the URL depending on how much of the file has already been downloaded, while read()\`s from already-fetched portions of the file are fulfilled by the backing file. ZIP archives have a directory at the end of the file giving the offset to each compressed member. We fetch the directory, and then the portion of the file containing the member or members of interest, for a maximum of 3 requests to retrieve any individual file in the archive. ```{eval-rst} .. automodule:: conda_package_streaming.lazy_wheel :members: :undoc-members: :show-inheritance: ``` conda-package-streaming-0.10.0/docs/modules.md000066400000000000000000000052441463013427300211660ustar00rootroot00000000000000# conda_package_streaming Fetch metadata from remote .conda or .tar.bz2 package. Try to fetch less than the whole file if possible. Zip (.conda) is made for this: ``` $ python -m conda_package_streaming.url https://repo.anaconda.com/pkgs/main/osx-64/sqlalchemy-1.4.32-py310hca72f7f_0.conda /tmp/ DEBUG:conda_package_streaming.lazy_wheel:bytes=-10240 DEBUG:urllib3.connectionpool:Starting new HTTPS connection (1): repo.anaconda.com:443 DEBUG:urllib3.connectionpool:https://repo.anaconda.com:443 "GET /pkgs/main/osx-64/sqlalchemy-1.4.32-py310hca72f7f_0.conda HTTP/1.1" 206 10240 DEBUG:conda_package_streaming.lazy_wheel:bytes=43-38176 DEBUG:urllib3.connectionpool:https://repo.anaconda.com:443 "GET /pkgs/main/osx-64/sqlalchemy-1.4.32-py310hca72f7f_0.conda HTTP/1.1" 206 38134 DEBUG:conda_package_streaming.lazy_wheel:prefetch 43-38177 $ curl -s -I https://repo.anaconda.com/pkgs/main/osx-64/sqlalchemy-1.4.32-py310hca72f7f_0.conda | grep content-length content-length: 1984926 ``` We fetch 10240 + 38134 = 48374 bytes in two requests of this 1984926-byte package. ## Older format bzip2 has a very large block size, and we don't know if the info/ directory is finished before reading the entire archive. However if we only want certain files from info/ we can stop after we've seen them all. Fetching repodata and calling response.raw.tell() after each tar member: ``` $ python -m metayaml.fetch_metadata \ https://repo.anaconda.com/pkgs/main/linux-64/absl-py-0.1.10-py27_0.tar.bz2 128948 info/hash_input.json 128948 info/index.json 128948 info/files 128948 info/about.json 128948 info/paths.json 128948 info/LICENSE.txt 128948 info/git 128948 lib/python2.7/site-packages/absl_py-0.1.10-py2.7.egg-info/dependency_links.txt 128948 lib/python2.7/site-packages/absl_py-0.1.10-py2.7.egg-info/requires.txt 128948 lib/python2.7/site-packages/absl_py-0.1.10-py2.7.egg-info/top_level.txt 128948 lib/python2.7/site-packages/absl/__init__.pyc 128948 lib/python2.7/site-packages/absl/testing/__init__.pyc 128948 info/test/run_test.py ... ``` A larger package: ``` # Fetch https://repo.anaconda.com/pkgs/main/linux-64/airflow-1.10.10-py36_0.tar.bz2 # Printing bytes transferred after each archive member, 286720 info/hash_input.json 286720 info/has_prefix 286720 info/index.json 286720 info/about.json 286720 info/git 286720 info/files 286720 info/paths.json 286720 lib/python3.6/site-packages/airflow/alembic.ini 286720 lib/python3.6/site-packages/airflow/www/templates/airflow/variables/README.md ... 286720 info/test/test_time_dependencies.json ... 634880 lib/python3.6/site-packages/airflow/www/static/ace.js 634880 bin/airflow ``` ```{toctree} :maxdepth: 4 url s3 lazy_wheel package_streaming extract transmute ``` conda-package-streaming-0.10.0/docs/package_streaming.rst000066400000000000000000000002451463013427300233660ustar00rootroot00000000000000package\_streaming module ========================= .. automodule:: conda_package_streaming.package_streaming :members: :undoc-members: :show-inheritance: conda-package-streaming-0.10.0/docs/s3.md000066400000000000000000000005261463013427300200410ustar00rootroot00000000000000s3 module ====================== conda_package_streaming.s3 adapts a s3 client, bucket name, and key to `LazyConda`, or, for `.tar.bz2`, a normal streaming `GET` request that can be closed before transferring the whole file. ```{eval-rst} .. automodule:: conda_package_streaming.s3 :members: :undoc-members: :show-inheritance: ``` conda-package-streaming-0.10.0/docs/transmute.rst000066400000000000000000000002131463013427300217370ustar00rootroot00000000000000transmute module ================ .. automodule:: conda_package_streaming.transmute :members: :undoc-members: :show-inheritance: conda-package-streaming-0.10.0/docs/url.rst000066400000000000000000000002051463013427300205200ustar00rootroot00000000000000url module ====================== .. automodule:: conda_package_streaming.url :members: :undoc-members: :show-inheritance: conda-package-streaming-0.10.0/make.bat000066400000000000000000000014441463013427300176470ustar00rootroot00000000000000@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=source set BUILDDIR=build %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.https://www.sphinx-doc.org/ exit /b 1 ) if "%1" == "" goto help %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% :end popd conda-package-streaming-0.10.0/noxfile.py000066400000000000000000000003411463013427300202530ustar00rootroot00000000000000import nox @nox.session(venv_backend="conda") @nox.parametrize( "python", [(python) for python in ("3.7", "3.8", "3.9", "3.10")], ) def tests(session): session.install("-e", ".[test]") session.run("pytest") conda-package-streaming-0.10.0/pyproject.toml000066400000000000000000000024421463013427300211550ustar00rootroot00000000000000[tool.black] target-version = ["py38", "py39", "py310"] [tool.isort] profile = "black" [build-system] requires = ["flit_core >=3.2,<4"] build-backend = "flit_core.buildapi" [project] name = "conda_package_streaming" authors = [ { name = "Anaconda, Inc. & Contributors", email = "conda@continuum.io" }, ] description = "An efficient library to read from new and old format .conda and .tar.bz2 conda packages." license = { file = "LICENSE" } readme = "README.md" classifiers = [ "License :: OSI Approved :: BSD License", "Programming Language :: Python :: 3", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] dynamic = ["version"] requires-python = ">=3.7" dependencies = ["requests", "zstandard >=0.15"] [project.optional-dependencies] test = [ "pytest >=7", "pytest-cov", "pytest-mock", "boto3", "boto3-stubs[essential]", "bottle", "conda", "conda-package-handling >=2", ] docs = ["furo", "sphinx", "myst-parser", "mdit-py-plugins>=0.3.0"] [project.urls] Home = "https://github.com/conda/conda-package-streaming" Documentation = "https://conda.github.io/conda-package-streaming/" # pyproject.toml [tool.pytest.ini_options] minversion = "7.0" addopts = "--cov=conda_package_streaming" testpaths = ["tests"] conda-package-streaming-0.10.0/requirements.txt000066400000000000000000000002431463013427300215220ustar00rootroot00000000000000requests zstandard >=0.15 # test pytest >=7 pytest-cov pytest-mock boto3 boto3-stubs[essential] bottle conda # docs furo sphinx myst-parser mdit-py-plugins>=0.3.0 conda-package-streaming-0.10.0/tests/000077500000000000000000000000001463013427300174015ustar00rootroot00000000000000conda-package-streaming-0.10.0/tests/conftest.py000066400000000000000000000052101463013427300215760ustar00rootroot00000000000000import json import logging import os.path import shutil import subprocess from pathlib import Path import pytest import server from conda_package_streaming.transmute import transmute_tar_bz2 log = logging.getLogger(__name__) LIMIT_TEST_PACKAGES = 16 def find_packages_dirs() -> Path: """ Ask conda for package directories. """ conda_info = json.loads( subprocess.run( [os.environ["CONDA_EXE"], "info", "--json"], stdout=subprocess.PIPE, check=True, ).stdout ) # XXX can run individual environment's conda (base conda is more likely to # have useful cached packages) pkgs_dirs = conda_info["pkgs_dirs"] + [os.path.expanduser("~/miniconda3/pkgs")] log.debug("search %s", pkgs_dirs) first_pkg_dir = next(path for path in pkgs_dirs if os.path.exists(path)) return Path(first_pkg_dir) @pytest.fixture(scope="session") def pkgs_dir(tmp_path_factory): """ Dedicated test package directory. """ return tmp_path_factory.mktemp("pkgs") @pytest.fixture(scope="session") def package_server(pkgs_dir, conda_paths): thread = server.get_server_thread(pkgs_dir) thread.start() return thread @pytest.fixture(scope="session") def conda_paths(pkgs_dir: Path): found_packages = find_packages_dirs() conda_paths = [] for path in found_packages.iterdir(): if path.name.endswith((".tar.bz2", ".conda")): conda_paths.append(path) return add_tar_bz2s(conda_paths, pkgs_dir) def add_tar_bz2s(paths: list[Path], pkgs_dir: Path): """ If there aren't enough .tar.bz2's available, create some from available .conda's. Return paths. """ conda_paths: list[Path] = [] tarbz2_paths: list[Path] = [] output_paths: list[Path] = [] assert isinstance(pkgs_dir, Path) for path in paths: if path.name.endswith(".tar.bz2"): tarbz2_paths.append(path) elif path.name.endswith(".conda"): conda_paths.append(path) tarbz2_path: Path = pkgs_dir medium_conda_paths = [] for path in conda_paths: if 1 << 20 < path.stat().st_size < 1 << 22: medium_conda_paths.append(path) medium_conda_paths = medium_conda_paths[:LIMIT_TEST_PACKAGES] # this ignores existing .tar.bz2 for simplicity (.tar.bz2 is missing in CI) for conda in set(medium_conda_paths + conda_paths[:10]): shutil.copy(conda, tarbz2_path) transmute_tar_bz2(str(conda), tarbz2_path) output_paths.extend(tarbz2_path.glob("*.tar.bz2")) output_paths.extend(tarbz2_path.glob("*.conda")) return sorted(output_paths) # sort interleaves .tar.bz2 and .conda conda-package-streaming-0.10.0/tests/server.py000066400000000000000000000035111463013427300212610ustar00rootroot00000000000000""" Test web server. """ import logging import threading import wsgiref.simple_server from pathlib import Path from typing import Any import bottle import conftest log = logging.getLogger(__name__) def get_app(pkgs_dir): """ Bottle conveniently supports Range requests. Server may block if browser etc. keeps connection open. """ app = bottle.Bottle() app.pkgs_dir = pkgs_dir def serve_file(filename): mimetype = "auto" # from https://repo.anaconda.com/ behavior: if filename.endswith(".tar.bz2"): mimetype = "application/x-tar" elif filename.endswith(".conda"): mimetype = "binary/octet-stream" return bottle.static_file(filename, root=pkgs_dir, mimetype=mimetype) app.route("/pkgs/", "GET", serve_file) return app def selftest(): """ Run server in a thread that will die when the application exits. """ t = get_server_thread(conftest.find_packages_dirs()) t.start() import time time.sleep(300) class ServerThread(threading.Thread): server: wsgiref.simple_server.WSGIServer app: Any def get_server_thread(pkgs_dir: Path): """ Return test server thread with additional .server, .app properties. Call .start() to serve in the background. """ app = get_app(pkgs_dir) server = wsgiref.simple_server.make_server("127.0.0.1", 0, app) log.info(f"serving {app.pkgs_dir} on {server.server_address}/pkgs") t = ServerThread(daemon=True, target=server.serve_forever) t.app = app t.server = server # server.application == app return t if __name__ == "__main__": import logging logging.basicConfig( level=logging.INFO, format="%(asctime)s %(message)s", datefmt="%Y-%m-%dT%H:%M:%S", ) log.setLevel(logging.DEBUG) selftest() conda-package-streaming-0.10.0/tests/test_degraded.py000066400000000000000000000027731463013427300225620ustar00rootroot00000000000000""" Allow conda_package_streaming to work in .tar.bz2-only mode if zstandard is not available (please immediately install zstandard if this is the case). """ import importlib import sys import tarfile import zipfile from pathlib import Path import pytest def test_degraded(tmpdir): try: sys.modules["zstandard"] = None # type: ignore import conda_package_streaming.extract import conda_package_streaming.package_streaming importlib.reload(conda_package_streaming.package_streaming) testconda = Path(tmpdir, "testconda.conda") with zipfile.ZipFile(testconda, "w"): pass testtar = Path(tmpdir, "test.tar.bz2") with tarfile.open(testtar, "w:bz2") as tar: pass for ( tar, member, ) in conda_package_streaming.package_streaming.stream_conda_component(testtar): pass with pytest.raises(RuntimeError): for ( tar, member, ) in conda_package_streaming.package_streaming.stream_conda_component( testconda ): pass with pytest.raises(RuntimeError): conda_package_streaming.extract.extract(testconda, tmpdir) finally: sys.modules.pop("zstandard", None) import conda_package_streaming.package_streaming importlib.reload(conda_package_streaming.package_streaming) assert conda_package_streaming.package_streaming.zstandard conda-package-streaming-0.10.0/tests/test_extract.py000066400000000000000000000073621463013427300224740ustar00rootroot00000000000000import io import stat import tarfile from errno import ELOOP import pytest from conda_package_streaming import exceptions, extract, package_streaming MAX_CONDAS = 8 def test_extract_stream(conda_paths, tmp_path): for i, package in enumerate(conda_paths): print(package) with open(package, "rb") as fileobj: stream = package_streaming.stream_conda_component( package, fileobj, component=package_streaming.CondaComponent.pkg ) dest_dir = tmp_path / package.name extract.extract_stream(stream, dest_dir) if i >= MAX_CONDAS: break def test_extract_all(conda_paths, tmp_path): for i, package in enumerate(conda_paths): print(package) dest_dir = tmp_path / package.name extract.extract(package, dest_dir=dest_dir) if i >= MAX_CONDAS: break def empty_tarfile(name, mode=0o644): """ Return BytesIO containing a tarfile with one empty file named :name """ tar = io.BytesIO() t = tarfile.TarFile(mode="w", fileobj=tar) tarinfo = tarfile.TarInfo(name=name) tarinfo.mode = mode t.addfile(tarinfo, io.BytesIO()) t.close() tar.seek(0) return tar def test_oserror(tmp_path): """ Fail if tarfile raises OSError (formerly known as IOError) """ tar = empty_tarfile("empty-test") class TarELOOP(tarfile.TarFile): def extractall(self, path=None, members=None): raise OSError(ELOOP, "case sensitivity") class TarOSError(tarfile.TarFile): def extractall(self, path=None, members=None): raise OSError("not eloop") def stream(cls): yield (cls(fileobj=tar), tarfile.TarInfo()) with pytest.raises(exceptions.CaseInsensitiveFileSystemError): extract.extract_stream(stream(TarELOOP), tmp_path) with pytest.raises(OSError): extract.extract_stream(stream(TarOSError), tmp_path) def stream(fileobj): """ Like the tuples produced by part of conda-package-streaming. """ yield (package_streaming.TarfileNoSameOwner(fileobj=fileobj), tarfile.TarInfo()) def stream_stdlib(fileobj): """ Like the tuples produced by part of conda-package-streaming. """ yield (tarfile.TarFile(fileobj=fileobj), tarfile.TarInfo()) def test_slip(tmp_path): """ Fail if tarfile tries to put files outside its dest_dir (tmp_path) """ tar = empty_tarfile(name="../slip") with pytest.raises(exceptions.SafetyError): extract.extract_stream(stream(tar), tmp_path) tar2 = empty_tarfile(name="/absolute") with pytest.raises(exceptions.SafetyError): extract.extract_stream(stream(tar2), tmp_path) def test_chown(conda_paths, tmp_path, mocker): for package in conda_paths[:2]: print(package) with open(package, "rb") as fileobj: stream = package_streaming.stream_conda_component( package, fileobj, component=package_streaming.CondaComponent.pkg ) for tar, member in stream: assert isinstance(tar, package_streaming.TarfileNoSameOwner), tar break def test_umask(tmp_path, mocker): """ Demonstrate that umask-respecting tar implementation works. Mock umask in case it is different on your system. """ mocker.patch("conda_package_streaming.package_streaming.UMASK", new=0o22) tar3 = empty_tarfile(name="naughty_umask", mode=0o777) extract.extract_stream(stream_stdlib(tar3), tmp_path) mode = (tmp_path / "naughty_umask").stat().st_mode assert mode & stat.S_IWGRP, "%o" % mode tar3.seek(0) extract.extract_stream(stream(tar3), tmp_path) mode = (tmp_path / "naughty_umask").stat().st_mode assert not mode & stat.S_IWGRP, "%o" % mode conda-package-streaming-0.10.0/tests/test_s3.py000066400000000000000000000022001463013427300213310ustar00rootroot00000000000000import boto3 import pytest from conda_package_streaming import s3 LIMIT = 16 @pytest.fixture def s3_client(package_server): host, port = package_server.server.server_address client = boto3.client( "s3", aws_access_key_id="test_id", aws_secret_access_key="test_key", endpoint_url=f"http://{host}:{port}", use_ssl=False, verify=False, ) return client def test_head_objects(s3_client, conda_paths): bucket = "pkgs" # independent of filesystem path for path in conda_paths[:LIMIT]: s3_client.head_object(Bucket=bucket, Key=path.name) def test_stream_s3(s3_client, conda_paths): with pytest.raises(ValueError): next(s3.stream_conda_info(s3_client, "pkgs", "notaconda.rar")) for path in conda_paths[:LIMIT]: members = s3.stream_conda_info(s3_client, "pkgs", path.name) print("stream s3", path.name) for tar, member in members: if member.name == "info/index.json": members.close() # faster than waiting for gc? break else: pytest.fail("info/index.json not found") conda-package-streaming-0.10.0/tests/test_streaming.py000066400000000000000000000032771463013427300230140ustar00rootroot00000000000000import io import json import tarfile import pytest from conda_package_streaming import package_streaming def test_package_streaming(conda_paths): for path in conda_paths: if str(path).endswith(".conda"): with pytest.raises(LookupError): package_streaming.stream_conda_component(path, component="notfound") with pytest.raises(ValueError): package_streaming.stream_conda_component("notapackage.rar") def test_early_exit(conda_paths): for package in conda_paths: print(package) stream = iter(package_streaming.stream_conda_info(package)) found = False for tar, member in stream: assert not found, "early exit did not work" if member.name == "info/index.json": reader = tar.extractfile(member) if reader: json.load(reader) found = True stream.close() # PEP 342 close() # stream_conda_info doesn't close a passed-in fileobj, but a # filename should be closed. assert found, f"index.json not found in {package}" def test_chmod_error(tmp_path, mocker): """ Coverage for os.chmod() error handling. """ with package_streaming.TarfileNoSameOwner(tmp_path / "test.tar", mode="w") as tar: member = tarfile.TarInfo(name="file") tar.addfile(member, io.BytesIO()) mocker.patch("os.chmod", side_effect=OSError) with pytest.raises(tarfile.ExtractError): # only logs a debug message if errorlevel<=1 with package_streaming.TarfileNoSameOwner( tmp_path / "test.tar", errorlevel=2 ) as tar: tar.extractall(tmp_path) conda-package-streaming-0.10.0/tests/test_transmute.py000066400000000000000000000125501463013427300230370ustar00rootroot00000000000000import contextlib import io import os import tarfile import time from pathlib import Path from zipfile import ZipFile import pytest from conda_package_handling.validate import validate_converted_files_match_streaming from conda_package_streaming.package_streaming import ( CondaComponent, stream_conda_component, ) from conda_package_streaming.transmute import transmute, transmute_tar_bz2 @pytest.fixture def testtar_bytes(): buffer = io.BytesIO() with tarfile.open("test.tar.bz2", "w:bz2", fileobj=buffer) as tar: symlink = tarfile.TarInfo(name="symlink") symlink.type = tarfile.LNKTYPE symlink.linkname = "target" tar.addfile(symlink) expected = tarfile.TarInfo(name="info/expected") tar.addfile(expected, io.BytesIO()) unexpected = tarfile.TarInfo(name="info/unexpected") tar.addfile(unexpected, io.BytesIO()) return buffer.getbuffer() @contextlib.contextmanager def timeme(message: str = ""): begin = time.time() yield end = time.time() print(f"{message}{end-begin:0.2f}s") def test_transmute(conda_paths: list[Path], tmpdir): tarbz_packages = [] for path in conda_paths: path = str(path) if path.endswith(".tar.bz2") and (1 << 20 < os.stat(path).st_size < 1 << 22): tarbz_packages = [path] conda_packages = [] # not supported assert tarbz_packages, "no medium-sized .tar.bz2 packages found" metadata_checks = 0 for packages in (conda_packages, tarbz_packages): for package in packages: with timeme(f"{package} took "): out = transmute(package, tmpdir) _, missing, mismatched = validate_converted_files_match_streaming( out, package, strict=True ) assert missing == mismatched == [] if out.name.endswith(".conda"): with ZipFile(out) as zf: metadata_checks += 1 assert "metadata.json" in zf.namelist() assert metadata_checks > 0 def test_transmute_symlink(tmpdir, testtar_bytes): testtar = Path(tmpdir, "test.tar.bz2") testtar.write_bytes(testtar_bytes) out = transmute(str(testtar), tmpdir) _, missing, mismatched = validate_converted_files_match_streaming( out, testtar, strict=True ) assert missing == mismatched == [] def test_transmute_info_filter(tmpdir, testtar_bytes): testtar = Path(tmpdir, "test.tar.bz2") testtar.write_bytes(testtar_bytes) transmute( str(testtar), tmpdir, is_info=lambda filename: filename == "info/expected" ) with open(Path(tmpdir, "test.conda"), "rb") as fileobj: for component, expected in ( (CondaComponent.info, {"info/expected"}), ( CondaComponent.pkg, { "info/unexpected", "symlink", }, ), ): items = stream_conda_component("test.conda", fileobj, component) assert {member.name for tar, member in items} == expected, items def test_transmute_backwards(tmpdir, conda_paths): tarbz_packages = [] for path in conda_paths: path = str(path) if path.endswith(".conda") and (1 << 20 < os.stat(path).st_size < 1 << 22): tarbz_packages = [path] conda_packages = [] # not supported assert tarbz_packages, "no medium-sized .conda packages found" for packages in (conda_packages, tarbz_packages): for package in packages: with timeme(f"{package} took "): out = transmute_tar_bz2(package, tmpdir) _, missing, mismatched = validate_converted_files_match_streaming( out, package, strict=True ) assert missing == mismatched == [] def test_transmute_tarbz2_to_tarbz2(tmpdir, testtar_bytes): testtar = Path(tmpdir, "test.tar.bz2") testtar.write_bytes(testtar_bytes) outdir = Path(tmpdir, "output") outdir.mkdir() out = transmute_tar_bz2(str(testtar), outdir) _, missing, mismatched = validate_converted_files_match_streaming( out, testtar, strict=True ) assert missing == mismatched == [] def test_transmute_conditional_zip64(tmp_path, mocker): """ Test that zip64 is used in transmute after a threshold. """ LIMIT = 16384 for test_size, extra_expected in (LIMIT // 2, False), (LIMIT * 2, True): mocker.patch("conda_package_streaming.transmute.CONDA_ZIP64_LIMIT", new=LIMIT) mocker.patch("zipfile.ZIP64_LIMIT", new=LIMIT) tmp_tar = tmp_path / f"{test_size}.tar.bz2" with tarfile.open(tmp_tar, "w:bz2") as tar: pkg = tarfile.TarInfo(name="packagedata") data = io.BytesIO(os.urandom(test_size)) pkg.size = len(data.getbuffer()) tar.addfile(pkg, data) info = tarfile.TarInfo(name="info/data") data = io.BytesIO(os.urandom(test_size)) info.size = len(data.getbuffer()) tar.addfile(info, data) out = transmute(str(tmp_tar), tmp_path) with ZipFile(out) as e: assert e.filelist[0].extra == b"" # when zip64 extension is used, extra contains zip64 headers assert bool(e.filelist[1].extra) == extra_expected assert bool(e.filelist[2].extra) == extra_expected conda-package-streaming-0.10.0/tests/test_url.py000066400000000000000000000116341463013427300216210ustar00rootroot00000000000000import io import tempfile from contextlib import closing, contextmanager from pathlib import Path from zipfile import ZipFile import pytest from requests import HTTPError, Session from conda_package_streaming import lazy_wheel from conda_package_streaming.lazy_wheel import LazyConda from conda_package_streaming.url import ( conda_reader_for_url, extract_conda_info, stream_conda_info, ) LIMIT = 16 @pytest.fixture def package_url(package_server): """ Base url for all test packages. """ host, port = package_server.server.server_address return f"http://{host}:{port}/pkgs" @pytest.fixture def package_urls(package_server, package_url): pkgs_dir = Path(package_server.app.pkgs_dir) conda = [] tar_bz2 = [] for path in pkgs_dir.iterdir(): if len(conda) > LIMIT and len(tar_bz2) > LIMIT: break url = f"{package_url}/{path.name}" if path.name.endswith(".tar.bz2") and len(tar_bz2) < LIMIT: tar_bz2.append(url) elif path.name.endswith(".conda") and len(conda) < LIMIT: conda.append(url) # interleave urls = [] for pair in zip(conda, tar_bz2): urls.extend(pair) return urls def test_stream_url(package_urls): with pytest.raises(ValueError): next(stream_conda_info("https://localhost/notaconda.rar")) for url in package_urls: with closing(stream_conda_info(url)) as members: print("stream_url", url) for tar, member in members: if member.name == "info/index.json": break else: pytest.fail("info/index.json not found") def test_fetch_meta(package_urls): for url in package_urls: with tempfile.TemporaryDirectory() as destdir: extract_conda_info(url, destdir) def test_lazy_wheel(package_urls): lazy_tests = 7 for url in package_urls: if url.endswith(".conda"): # API works with `.tar.bz2` but only returns LazyConda for `.conda` filename, conda = conda_reader_for_url(url) assert filename == url.rsplit("/")[-1] with conda: assert isinstance(conda, LazyConda) assert conda.mode == "rb" assert conda.readable() assert not conda.writable() assert not conda.closed request_count = conda._request_count # did we really prefetch the info? zf = ZipFile(conda) # type: ignore filename = filename[: -len(".conda")] zf.open(f"info-{filename}.tar.zst").read() assert ( conda._request_count == request_count ), "info required extra GET request" assert conda._request_count <= 3 conda.prefetch("not-appearing-in-archive.txt") # zip will figure this out naturally; delete method? conda._check_zip() lazy_tests -= 1 if lazy_tests <= 0: break else: raise LookupError( "not enough .conda packages found %d %s" % (lazy_tests, package_urls) ) with pytest.raises(HTTPError): conda_reader_for_url(package_urls[0] + ".404.conda") class Session200(Session): def get(self, *args, **kwargs): response = super().get(*args, **kwargs) response.status_code = 200 return response with pytest.raises(lazy_wheel.HTTPRangeRequestUnsupported): LazyConda(package_urls[0], Session200()) for url in package_urls: if url.endswith(".tar.bz2"): LazyConda(url, Session())._check_zip() break else: raise LookupError("no .tar.bz2 packages found") def test_no_file_after_info(): """ If info is the last file, LazyConda must fetch (start of info file .. start of zip directory) instead of to the next file in the zip. """ class MockBytesIO(io.BytesIO): prefetch = LazyConda.prefetch @contextmanager def _stay(self): yield zip = MockBytesIO() zf = ZipFile(zip, "w") zf.writestr("info-test.tar.zst", b"00000000") # a short file zf.close() zip.prefetch("test") @pytest.mark.skip() def test_obsolete_lazy_wheel_selftest(): import logging import requests logging.basicConfig(level=logging.DEBUG) session = requests.Session() lzoh = lazy_wheel.LazyZipOverHTTP( "https://repodata.fly.dev/repo.anaconda.com/pkgs/main/win-32/current_repodata.jlap", session, ) lzoh.seek(1024) lzoh.read(768) lzoh.seek(0) # compare against regular fetch with open("outfile.txt", "wb+") as out: buf = b" " while buf: buf = lzoh.read(1024 * 10) print(list(zip(lzoh._left, lzoh._right)), lzoh._length) if not buf: break out.write(buf)