././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 010212 x ustar 00 28 mtime=1715863246.1010513
cwltool-3.1.20240508115724/ 0000755 0001750 0001750 00000000000 14621377316 014267 5 ustar 00michael michael ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.codecov.yml 0000644 0001750 0001750 00000000050 14621376047 016505 0 ustar 00michael michael codecov:
notify:
wait_for_ci: yes
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.coveragerc 0000644 0001750 0001750 00000000401 14621376047 016403 0 ustar 00michael michael [run]
branch = True
source_pkgs = cwltool
omit = cwltool/run_job.py
[report]
exclude_lines =
if self.debug:
pragma: no cover
raise NotImplementedError
if __name__ == .__main__.:
if TYPE_CHECKING:
ignore_errors = True
omit =
tests/*
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.dockerignore 0000644 0001750 0001750 00000000442 14621376047 016743 0 ustar 00michael michael .coverage
coverage.xml
.tox/
.eggs/
.vscode/
build/
dist/
.swp
.mypy_cache/
.git/
.pytest_cache/
*.whl
env*/
testenv*/
*.img
*.sif
*.so
.github/
cwltool/*.so
*.Dockerfile
tmp/
build-cwltool-docker.sh
__pycache__/
*/__pycache__/
**/__pycache__/
*.egg-info/
*.orig
.dockerignore
cache*
*.swp
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.flake8 0000644 0001750 0001750 00000000340 14621376047 015437 0 ustar 00michael michael [flake8]
ignore = E203,W503
max-line-length = 100
select = B,C,E,F,W,T4
exclude = cwltool/schemas
extend-ignore = E501,B905
# when Python 3.10 is the minimum version, re-enable check B905 for zip + strict
extend-select = B9
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.git-blame-ignore-revs 0000644 0001750 0001750 00000000413 14621376047 020365 0 ustar 00michael michael # isort
46e0485a889453dc178a878b5b5ebbfc7e4eb5f1
# upgrade to black 20.8b1
6fd6fdb381fe3f347627fd517a8f2dba7b0a7029
# upgrade to black 23.1, longer lines (100)
7fe8c0739b0515d00daabc7db87bc5fad926d345
# upgrade to black 24
80c9ec01b4602fc51ac1f53744f98df8baba3c31
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.gitattributes 0000644 0001750 0001750 00000000037 14621376047 017162 0 ustar 00michael michael tests/wf/whale.txt test eol=lf
././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 010212 x ustar 00 28 mtime=1715863245.9610486
cwltool-3.1.20240508115724/.github/ 0000755 0001750 0001750 00000000000 14621377316 015627 5 ustar 00michael michael ././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 010212 x ustar 00 28 mtime=1715863245.9610486
cwltool-3.1.20240508115724/.github/ISSUE_TEMPLATE/ 0000755 0001750 0001750 00000000000 14621377316 020012 5 ustar 00michael michael ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.github/ISSUE_TEMPLATE/config.yml 0000644 0001750 0001750 00000000217 14621376047 022002 0 ustar 00michael michael contact_links:
- name: CWL Community Forum
url: https://cwl.discourse.group/c/q-and-a/5
about: Please ask and answer questions here.
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.github/ISSUE_TEMPLATE.md 0000644 0001750 0001750 00000002351 14621376047 020335 0 ustar 00michael michael This issue tracker is for reporting bugs in cwltool (the CWL reference implementation) itself. Please use the [Gitter channel](https://gitter.im/common-workflow-language/common-workflow-language) or [CWL Discourse Forum](https://cwl.discourse.group/) for general questions about using cwltool to create/run workflows or issues not related to cwltool. Don't forget to use cwl tag when posting on Biostars Forum.
If you'd like to report a bug, fill out the template below and provide any extra information that may be useful / related to your problem. Ideally, you create an [a Minimal, Complete, and Verifiable example](http://stackoverflow.com/help/mcve) reproducing the problem before opening an issue to ensure it's not caused by something in your code.
Before you submit, please delete this help text above the ---
Thanks!
---
## Expected Behavior
Tell us what should happen
## Actual Behavior
Tell us what happens instead
## Workflow Code
```
Paste the template code (ideally a minimal example) that causes the issue
```
## Full Traceback
```pytb
Paste the full traceback in case there is an exception
Run the workflow with ``--debug`` flag for more verbose logging
```
## Your Environment
* cwltool version:
Check using ``cwltool --version``
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.github/dependabot.yml 0000644 0001750 0001750 00000001203 14621376047 020453 0 ustar 00michael michael # To get started with Dependabot version updates, you'll need to specify which
# package ecosystems to update and where the package manifests are located.
# Please see the documentation for all configuration options:
# https://help.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
version: 2
updates:
- package-ecosystem: "pip" # See documentation for possible values
directory: "/" # Location of package manifests
schedule:
interval: "daily"
# Maintain dependencies for GitHub Actions
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "daily"
././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 010212 x ustar 00 28 mtime=1715863245.9610486
cwltool-3.1.20240508115724/.github/workflows/ 0000755 0001750 0001750 00000000000 14621377316 017664 5 ustar 00michael michael ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.github/workflows/ci-tests.yml 0000644 0001750 0001750 00000021543 14621376047 022147 0 ustar 00michael michael name: CI Tests
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:
concurrency:
group: build-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
env:
TOX_SKIP_MISSING_INTERPRETERS: False
# Rich (pip)
FORCE_COLOR: 1
# Tox
PY_COLORS: 1
# Mypy (see https://github.com/python/mypy/issues/7771)
TERM: xterm-color
MYPY_FORCE_COLOR: 1
MYPY_FORCE_TERMINAL_WIDTH: 200
# Pytest
PYTEST_ADDOPTS: --color=yes
jobs:
tox:
name: Tox
runs-on: ubuntu-22.04
strategy:
matrix:
py-ver-major: [3]
py-ver-minor: [8, 9, 10, 11, 12]
step: [lint, unit, bandit, mypy]
env:
py-semver: ${{ format('{0}.{1}', matrix.py-ver-major, matrix.py-ver-minor) }}
TOXENV: ${{ format('py{0}{1}-{2}', matrix.py-ver-major, matrix.py-ver-minor, matrix.step) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Singularity
if: ${{ matrix.step == 'unit' || matrix.step == 'mypy' }}
run: |
wget --no-verbose https://github.com/sylabs/singularity/releases/download/v3.10.4/singularity-ce_3.10.4-focal_amd64.deb
sudo apt-get install -y ./singularity-ce_3.10.4-focal_amd64.deb
- name: Give the test runner user a name to make provenance happy.
if: ${{ matrix.step == 'unit' || matrix.step == 'mypy' }}
run: sudo usermod -c 'CI Runner' "$(whoami)"
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.py-semver }}
allow-prereleases: true
cache: pip
cache-dependency-path: |
requirements.txt
tox.ini
- name: Upgrade setuptools and install tox
run: |
pip install -U pip setuptools wheel
pip install "tox<4" "tox-gh-actions<3"
- name: MyPy cache
if: ${{ matrix.step == 'mypy' }}
uses: actions/cache@v4
with:
path: .mypy_cache/${{ env.py-semver }}
key: mypy-${{ env.py-semver }}
- name: Test with tox
run: APPTAINER_TMPDIR=${RUNNER_TEMP} tox
- name: Upload coverage to Codecov
if: ${{ matrix.step == 'unit' }}
uses: codecov/codecov-action@v4
with:
fail_ci_if_error: true
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
tox-style:
name: Linters
runs-on: ubuntu-20.04
strategy:
matrix:
step: [lintreadme, shellcheck, pydocstyle]
env:
py-semver: "3.12"
TOXENV: ${{ format('py312-{0}', matrix.step) }}
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.py-semver }}
cache: pip
- name: Upgrade setuptools and install tox
run: |
pip install -U pip setuptools wheel
pip install "tox<4" "tox-gh-actions<3"
- if: ${{ matrix.step == 'pydocstyle' && github.event_name == 'pull_request'}}
name: Create local branch for diff-quality for PRs
run: git branch ${{github.base_ref}} origin/${{github.base_ref}}
- name: Test with tox
run: tox
clean_working_dir:
name: No leftovers
runs-on: ubuntu-22.04
env:
py-semver: "3.12"
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Singularity
run: |
wget --no-verbose https://github.com/sylabs/singularity/releases/download/v3.10.4/singularity-ce_3.10.4-focal_amd64.deb
sudo apt-get install -y ./singularity-ce_3.10.4-focal_amd64.deb
- name: Give the test runner user a name to make provenance happy.
run: sudo usermod -c 'CI Runner' "$(whoami)"
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: ${{ env.py-semver }}
cache: pip
- name: install with test dependencies
run: |
pip install -U pip setuptools wheel
pip install --no-build-isolation -rtest-requirements.txt .[deps]
- name: make working directory read-only
run: |
mkdir .pytest_cache
chmod a-w .
- name: run tests
run: APPTAINER_TMPDIR=${RUNNER_TEMP} make test
conformance_tests:
name: CWL conformance
runs-on: ubuntu-22.04
strategy:
matrix:
cwl-version: [v1.0, v1.1, v1.2]
container: [docker, singularity, podman]
extras: [""]
include:
- cwl-version: v1.2
container: docker
extras: "--fast-parser"
steps:
- uses: actions/checkout@v4
- name: Set up Singularity
if: ${{ matrix.container == 'singularity' }}
run: |
wget --no-verbose https://github.com/sylabs/singularity/releases/download/v3.10.4/singularity-ce_3.10.4-jammy_amd64.deb
sudo apt-get install -y ./singularity-ce_3.10.4-jammy_amd64.deb
- name: Singularity cache
if: ${{ matrix.container == 'singularity' }}
uses: actions/cache@v4
with:
path: sifcache
key: singularity
- name: Set up Podman
if: ${{ matrix.container == 'podman' }}
run: sudo rm -f /usr/bin/docker ; sudo apt-get install -y podman
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.12
cache: pip
- name: "Test CWL ${{ matrix.cwl-version }} conformance"
env:
VERSION: ${{ matrix.cwl-version }}
CONTAINER: ${{ matrix.container }}
GIT_TARGET: main
CWLTOOL_OPTIONS: ${{ matrix.cwl-version == 'v1.2' && '--relax-path-checks' || '' }} ${{ matrix.extras }}
run: ./conformance-test.sh
- name: Archive test results
uses: actions/upload-artifact@v4
with:
name: cwl-${{ matrix.cwl-version }}-${{ matrix.container }}${{ matrix.extras }}-conformance-results
path: |
**/cwltool_conf*.xml
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
with:
fail_ci_if_error: true
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
release_test:
name: cwltool release test
runs-on: ubuntu-22.04
steps:
- uses: actions/checkout@v4
- name: Set up Singularity
run: |
wget --no-verbose https://github.com/sylabs/singularity/releases/download/v3.10.4/singularity-ce_3.10.4-jammy_amd64.deb
sudo apt-get install -y ./singularity-ce_3.10.4-jammy_amd64.deb
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.12
cache: pip
cache-dependency-path: |
requirements.txt
test-requirements.txt
- name: Give the test runner user a name to make provenance happy.
run: sudo usermod -c 'CI Runner' "$(whoami)"
- name: Install packages
run: |
pip install -U pip setuptools wheel
pip install virtualenv
- name: Release test
env:
RELEASE_SKIP: head
run: ./release-test.sh
build_test_container:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: record cwltool version
run: pip install -U setuptools wheel && pip install setuptools_scm[toml] && python setup.py --version
- name: build & test cwltool_module container
run: ./build-cwltool-docker.sh
macos:
name: Test on macos-latest
runs-on: macos-13 # not latest, that is now an Apple Silicon M1, for which seqtk is not yet built on bioconda
env:
TOXENV: py312-unit
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: 3.12
cache: pip
cache-dependency-path: |
requirements.txt
tox.ini
- name: Upgrade setuptools and install tox
run: |
pip install -U pip setuptools wheel
pip install "tox<4" "tox-gh-actions<3"
# # docker for mac install is not currently stable
# - name: 'SETUP MacOS: load Homebrew cache'
# uses: actions/cache@v4
# if: runner.os == 'macOS'
# with:
# path: |
# ~/Library/Caches/Homebrew/downloads/*--Docker.dmg
# key: brew-actions-setup-docker-1.0.11
# restore-keys: brew-actions-setup-docker-
# - name: setup docker on macos (default stable version)
# uses: docker-practice/actions-setup-docker@master
- name: Test with tox
run: tox
- name: Upload coverage to Codecov
uses: codecov/codecov-action@v4
with:
fail_ci_if_error: true
env:
CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.github/workflows/codeql-analysis.yml 0000644 0001750 0001750 00000001273 14621376047 023502 0 ustar 00michael michael name: "Code scanning - action"
on:
push:
branches: [main]
pull_request:
branches: [main]
schedule:
- cron: '0 10 * * 2'
concurrency:
group: codeql-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true
jobs:
CodeQL-Build:
runs-on: ubuntu-latest
permissions:
# required for all workflows
security-events: write
steps:
- name: Checkout repository
uses: actions/checkout@v4
# Initializes the CodeQL tools for scanning.
- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
languages: python
- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.github/workflows/quay-publish.yml 0000644 0001750 0001750 00000003756 14621376047 023045 0 ustar 00michael michael name: publish-quay
on:
push:
tags:
- '*'
workflow_dispatch: {}
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
with:
fetch-depth: 0
- name: Get image tags
id: image_tags
run: |
echo -n "IMAGE_TAGS=${GITHUB_REF#refs/*/}" >> "$GITHUB_OUTPUT"
- name: record cwltool version
run: |
pip install "setuptools>=61"
pip install setuptools_scm[toml] wheel
python setup.py --version
- name: Set up QEMU
uses: docker/setup-qemu-action@v3
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3
- name: Cache Docker layers
uses: actions/cache@v4
with:
path: /tmp/.buildx-cache
key: ${{ runner.os }}-multi-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-multi-buildx
- name: Login to Quay.io
uses: docker/login-action@v3
with:
registry: ${{ secrets.REGISTRY_SERVER }}
username: ${{ secrets.REGISTRY_USERNAME }}
password: ${{ secrets.REGISTRY_PASSWORD }}
- name: Build and publish cwltool_module image to Quay
uses: docker/build-push-action@v5
with:
context: .
file: cwltool.Dockerfile
tags: quay.io/commonwl/cwltool_module:${{ steps.image_tags.outputs.IMAGE_TAGS }},quay.io/commonwl/cwltool_module:latest
target: module
platforms: linux/amd64,linux/arm64
push: true
cache-from: type=gha
cache-to: type=gha,mode=max
- name: Build and publish cwltool image to Quay
uses: docker/build-push-action@v5
with:
context: .
file: cwltool.Dockerfile
tags: quay.io/commonwl/cwltool:${{ steps.image_tags.outputs.IMAGE_TAGS }},quay.io/commonwl/cwltool:latest
platforms: linux/amd64,linux/arm64
push: true
cache-from: type=gha
cache-to: type=gha,mode=max
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.gitignore 0000644 0001750 0001750 00000001121 14621376047 016252 0 ustar 00michael michael # Generated during tests
pytestdebug.log
tmp/
# Python temps
__pycache__/
*.py[cod]
*$py.class
build/
dist/
eggs/
.eggs/
*.egg-info/
*.egg
.tox/
.pytest_cache
# Editor Temps
.*.sw?
*~
\#*\#
.desktop
# virtualenv
venv/
venv3/
# pycharm
.idea/
# local stubs
mypy-stubs/ruamel/yaml
#mypy
.mypy_cache/
bin/
lib/
# Files generated by Makefile
.cache/
cache/
.coverage
.coverage.*
coverage.xml
htmlcov
output
output.txt
pydocstyle_report.txt
response.txt
test.txt
time.txt
value
.python-version
cwltool/_version.py
# Folder created when using make
cwltool_deps
docs/_build/
docs/autoapi/
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.mergify.yml 0000644 0001750 0001750 00000001336 14621376047 016535 0 ustar 00michael michael queue_rules:
- name: default
conditions:
- -draft # not a draft
- base=main
pull_request_rules:
- name: Automatic merge on approval and when when GitHub branch protection passes on main
conditions:
- "#approved-reviews-by>=1"
- -draft
- base=main
actions:
queue:
method: merge
name: default
pull_request_rules:
- name: Automatic merge for leadership team members when there are no reviewers and the label is "ready"
conditions:
- "#review-requested=0"
- "#changes-requested-reviews-by<1"
- -draft
- base=main
- author=@leadership
- label=ready
actions:
queue:
method: merge
name: default
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.readthedocs.yml 0000644 0001750 0001750 00000001000 14621376047 017344 0 ustar 00michael michael # .readthedocs.yml
# Read the Docs configuration file
# See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
# Required
version: 2
# Build documentation in the docs/ directory with Sphinx
sphinx:
configuration: docs/conf.py
# Optionally build your docs in additional formats such as PDF and ePub
formats: all
build:
os: ubuntu-22.04
tools:
python: "3.11"
apt_packages:
- graphviz
python:
install:
- requirements: docs/requirements.txt
- method: pip
path: .
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/.snyk 0000644 0001750 0001750 00000000601 14621376047 015251 0 ustar 00michael michael # Snyk (https://snyk.io) policy file, patches or ignores known vulnerabilities.
version: v1.19.0
# ignores vulnerabilities until expiry date; change duration by modifying expiry date
ignore:
SNYK-PYTHON-NETWORKX-1062709:
- '*':
reason: Prov does not use the affected code path
expires: 2022-08-08T15:27:21.289Z
created: 2021-07-09T15:27:21.300Z
patch: {}
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/CODE_OF_CONDUCT.md 0000644 0001750 0001750 00000012473 14621376047 017075 0 ustar 00michael michael CWL Code of Conduct
===================
The CWL Project is dedicated to providing a harassment-free experience for
everyone. We do not tolerate harassment of participants in any form.
This code of conduct applies to all CWL Project spaces both online and off: the
Google Group, the Gitter chat room, the Google Hangouts chats, and any other
CWL spaces. Anyone who violates this code of conduct may be sanctioned or
expelled from these spaces at the discretion of the CWL Leadership Team.
Some CWL Project spaces may have additional rules in place, which will be
made clearly available to participants. Participants are responsible for
knowing and abiding by these rules.
Harassment includes, but is not limited to:
- Offensive comments related to gender, gender identity and expression, sexual
orientation, disability, mental illness, neuro(a)typicality, physical
appearance, body size, age, race, or religion.
- Unwelcome comments regarding a person’s lifestyle choices and practices,
including those related to food, health, parenting, drugs, and employment.
- Deliberate misgendering or use of [dead](https://www.quora.com/What-is-deadnaming/answer/Nancy-C-Walker)
or rejected names.
- Gratuitous or off-topic sexual images or behaviour in spaces where they’re not
appropriate.
- Physical contact and simulated physical contact (eg, textual descriptions like
“\*hug\*” or “\*backrub\*”) without consent or after a request to stop.
- Threats of violence.
- Incitement of violence towards any individual, including encouraging a person
to commit suicide or to engage in self-harm.
- Deliberate intimidation.
- Stalking or following.
- Harassing photography or recording, including logging online activity for
harassment purposes.
- Sustained disruption of discussion.
- Unwelcome sexual attention.
- Pattern of inappropriate social contact, such as requesting/assuming
inappropriate levels of intimacy with others
- Continued one-on-one communication after requests to cease.
- Deliberate “outing” of any aspect of a person’s identity without their consent
except as necessary to protect vulnerable people from intentional abuse.
- Publication of non-harassing private communication.
The CWL Project prioritizes marginalized people’s safety over privileged
people’s comfort. The CWL Leadeship Team will not act on complaints regarding:
- ‘Reverse’ -isms, including ‘reverse racism,’ ‘reverse sexism,’ and ‘cisphobia’
- Reasonable communication of boundaries, such as “leave me alone,” “go away,” or
“I’m not discussing this with you.”
- Communicating in a [tone](http://geekfeminism.wikia.com/wiki/Tone_argument)
you don’t find congenial
Reporting
---------
If you are being harassed by a member of the CWL Project, notice that someone
else is being harassed, or have any other concerns, please contact the CWL
Leadership Team at leadership@commonwl.org. If person who is harassing
you is on the team, they will recuse themselves from handling your incident. We
will respond as promptly as we can.
This code of conduct applies to CWL Project spaces, but if you are being
harassed by a member of CWL Project outside our spaces, we still want to
know about it. We will take all good-faith reports of harassment by CWL Project
members, especially the CWL Leadership Team, seriously. This includes harassment
outside our spaces and harassment that took place at any point in time. The
abuse team reserves the right to exclude people from the CWL Project based on
their past behavior, including behavior outside CWL Project spaces and
behavior towards people who are not in the CWL Project.
In order to protect volunteers from abuse and burnout, we reserve the right to
reject any report we believe to have been made in bad faith. Reports intended
to silence legitimate criticism may be deleted without response.
We will respect confidentiality requests for the purpose of protecting victims
of abuse. At our discretion, we may publicly name a person about whom we’ve
received harassment complaints, or privately warn third parties about them, if
we believe that doing so will increase the safety of CWL Project members or
the general public. We will not name harassment victims without their
affirmative consent.
Consequences
------------
Participants asked to stop any harassing behavior are expected to comply
immediately.
If a participant engages in harassing behavior, the CWL Leadership Team may
take any action they deem appropriate, up to and including expulsion from all
CWL Project spaces and identification of the participant as a harasser to other
CWL Project members or the general public.
This anti-harassment policy is based on the [example policy from the Geek
Feminism wiki](http://geekfeminism.wikia.com/wiki/Community_anti-harassment/Policy),
created by the Geek Feminism community.
CWL Leadership Team
-------------------
As a stop gap measure until a more formal governance structure is adopted, the
following individuals make up the leadership of the CWL Project: Peter Amstutz,
John Chilton, Michael R. Crusoe, and Nebojša Tijanić.
To report an issue with anyone on the team you can escalate to Ward Vandewege
(Curoverse) ward@curoverse.com, Anton Nekrutenko (Galaxy)
anton AT bx DOT psu DOT edu, C. Titus Brown (UC Davis) ctbrown@ucdavis.edu, or
Brandi Davis-Dusenbery (Seven Bridges Genomics) brandi@sbgenomics.com.
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/CONTRIBUTING.md 0000644 0001750 0001750 00000010367 14621376047 016527 0 ustar 00michael michael Style guide:
- PEP-8 (as implemented by the `black` code formatting tool)
- Python 3.8+ compatible code
- PEP-484 type hints
The development is done using `git`, we encourage you to get familiar with it.
Here's a rough guide (improvements are welcome!)
To get the code and start working on the changes you can start a console and:
- Clone the cwltool: `git clone https://github.com/common-workflow-language/cwltool.git`
- Switch to cwltool directory: `cd cwltool`
It is suggested that you run `git config blame.ignoreRevsFile .git-blame-ignore-revs`
to filter out mass-formatting commits from `git blame`.
In order to contribute to the development of `cwltool`, the source code needs to
pass the test before your changes are accepted. There are a couple ways to test
the code with your changes: let `tox` manage installation and test running in
virtual environments, or do it manually (preferably in a virtual environment):
- Install `tox` preferably using the OS' package manager, otherwise it can be
installed with `pip install --user -U tox`
- Make your changes to the code and add tests for new cool things you're adding!
- Run the tests with the command `tox`, it's recommended to use some parameters
as tox will try to run all the checks in all available python interpreters.
- The important tests to run are "unit tests" and "type tests".
To run these two in Python 3.7 (for example), we can tell tox to run only those
tests by running: `tox -e py37-mypy`. (the `mypy` tox target also runs the
unit tests; to just run the type checker use `make mypy` as shown below)
- Run `tox -l` to see all available tests and runtimes
For the more traditional workflow:
- Create a virtual environment: `python3 -m venv cwltool`
(if you have multiple version of Python installed, then you can replace `python3`
in this command with the version you want to use, `python3.10` for example)
- To begin using the virtual environment, it needs to be activated: `source venv/bin/activate`
- To check if you have the virtual environment set up: `which python`
and it should point to python executable in your virtualenv
- Install the latest versions of `pip` and `wheel` with `pip install --upgrade pip wheel`
- Install cwltool in development mode, along with development tools:
`make install-dep dev`
- Check the `cwltool` version which might be different from the version installed in
general on your system: `cwltool --version`
- Make your changes to the code and add tests for new cool things you're adding!
- Run the unit-tests to see : `make test`
- After you're done working on `cwltool`, you can deactivate the virtual
environment: `deactivate`
Before you commit your code (or at least before you push to GitHub) it is
recommended to run `make cleanup`
(which is a shortcut for `make sort_imports format flake8 diff_pydocstyle_report`)
to fix common issues and point out any remaining code formatting issues
that cannot be fixed automatically.
When tests are passing, you can create git commits in a new branch and push to
GitHub to make a pull request on `cwltool` repo
Useful `make` commands:
The `make` commands below can help you to install the dependencies, format and test your code
- `make help` to show the list of commands that can be used with the make function
- `make install-dep` will install the dependencies needed by `cwltool`
- `make format` will clean up your code according to the accepted python standard
- `make test` will perform the tests of `cwltool` (can take a while to run all tests)
- `make sort_imports` will clean up and sort your import statements
- `make remove_unused_imports` will remove any unneeded `import` statements
- `make flake8` will catch format issues that `make format` cannot fix automatically
- `make diff_pydocstyle_report` check Python docstring style for changed files only
- `make diff-cover` to run the tests and point out which lines of code that have been
changed as compared to the `main` branch are missing test coverage. This is also calculated
automatically after all the automatic tests run when you open a Pull Request on GitHub;
running it locally can help you confirm that any changes or additions
to the tests are sufficient before pushing to GitHub.
- `make cleanup` is a shortcut for `make sort_imports format flake8 diff_pydocstyle_report`
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/LICENSE.txt 0000644 0001750 0001750 00000026136 14621376047 016122 0 ustar 00michael michael
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/MANIFEST.in 0000644 0001750 0001750 00000006155 14621376047 016034 0 ustar 00michael michael include README.rst CODE_OF_CONDUCT.md CONTRIBUTING.md
include MANIFEST.in
include LICENSE.txt
include *requirements.txt mypy.ini tox.ini
include Makefile cwltool.py
recursive-include mypy-stubs *.pyi *.py
include tests/*
include tests/cwl-conformance/cwltool-conftest.py
include tests/loop/*
include tests/tmp1/tmp2/tmp3/.gitkeep
include tests/tmp4/alpha/*
include tests/wf/*
include tests/wf/operation/*
include tests/override/*
include tests/reloc/*.cwl
include tests/reloc/dir1/*
include tests/reloc/dir2/*
include tests/checker_wf/*
include tests/subgraph/*
include tests/input_deps/*
include tests/trs/*
include tests/wf/generator/*
include cwltool/py.typed
include cwltool/schemas/v1.0/*.yml
include cwltool/schemas/v1.0/*.yml
include cwltool/schemas/v1.0/*.md
include cwltool/schemas/v1.0/salad/schema_salad/metaschema/*.yml
include cwltool/schemas/v1.0/salad/schema_salad/metaschema/*.md
include cwltool/schemas/v1.1/*.yml
include cwltool/schemas/v1.1/*.md
include cwltool/schemas/v1.1/salad/schema_salad/metaschema/*.yml
include cwltool/schemas/v1.1/salad/schema_salad/metaschema/*.md
include cwltool/schemas/v1.1.0-dev1/*.yml
include cwltool/schemas/v1.1.0-dev1/*.md
include cwltool/schemas/v1.1.0-dev1/salad/schema_salad/metaschema/*.yml
include cwltool/schemas/v1.1.0-dev1/salad/schema_salad/metaschema/*.md
include cwltool/schemas/v1.2.0-dev2/*.yml
include cwltool/schemas/v1.2.0-dev2/*.md
include cwltool/schemas/v1.2.0-dev2/salad/schema_salad/metaschema/*.yml
include cwltool/schemas/v1.2.0-dev2/salad/schema_salad/metaschema/*.md
include cwltool/schemas/v1.2.0-dev3/*.yml
include cwltool/schemas/v1.2.0-dev3/*.md
include cwltool/schemas/v1.2.0-dev3/salad/schema_salad/metaschema/*.yml
include cwltool/schemas/v1.2.0-dev3/salad/schema_salad/metaschema/*.md
include cwltool/schemas/v1.2.0-dev4/*.yml
include cwltool/schemas/v1.2.0-dev4/*.md
include cwltool/schemas/v1.2.0-dev4/salad/schema_salad/metaschema/*.yml
include cwltool/schemas/v1.2.0-dev4/salad/schema_salad/metaschema/*.md
include cwltool/schemas/v1.2.0-dev5/*.yml
include cwltool/schemas/v1.2.0-dev5/*.md
include cwltool/schemas/v1.2.0-dev5/salad/schema_salad/metaschema/*.yml
include cwltool/schemas/v1.2.0-dev5/salad/schema_salad/metaschema/*.md
include cwltool/schemas/v1.2/*.yml
include cwltool/schemas/v1.2/*.md
include cwltool/schemas/v1.2/salad/schema_salad/metaschema/*.yml
include cwltool/schemas/v1.2/salad/schema_salad/metaschema/*.md
include cwltool/extensions.yml
include cwltool/extensions-v1.1.yml
include cwltool/extensions-v1.2.yml
include cwltool/jshint/jshint_wrapper.js
include cwltool/jshint/jshint.js
include cwltool/hello.simg
include cwltool/rdfqueries/*.sparql
prune cwltool/schemas/v1.0/salad/typeshed
prune cwltool/schemas/v1.0/salad/schema_salad/tests
prune cwltool/schemas/v1.1.0-dev1/salad/typeshed
prune cwltool/schemas/v1.1.0-dev1/salad/schema_salad/tests
prune cwltool/schemas/presentations
prune cwltool/schemas/site
prune cwltool/schemas/v1.0/examples
prune cwltool/schemas/v1.0/v1.0
prune cwltool/schemas/v1.1.0-dev1/examples
prune cwltool/schemas/v1.1.0-dev1/v1.1.0-dev1
recursive-exclude cwltool/schemas *.py
exclude debian.img
global-exclude *~
global-exclude *.pyc
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/Makefile 0000644 0001750 0001750 00000016221 14621376047 015731 0 ustar 00michael michael # This file is part of cwltool,
# https://github.com/common-workflow-language/cwltool/, and is
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Contact: common-workflow-language@googlegroups.com
# make format to fix most python formatting errors
# make pylint to check Python code for enhanced compliance including naming
# and documentation
# make coverage-report to check coverage of the python scripts by the tests
MODULE=cwltool
# `SHELL=bash` doesn't work for some, so don't use BASH-isms like
# `[[` conditional expressions.
PYSOURCES=$(wildcard ${MODULE}/**.py cwltool/cwlprov/*.py tests/*.py) setup.py
DEVPKGS=diff_cover pylint pep257 pydocstyle 'tox<4' tox-pyenv auto-walrus \
isort wheel autoflake pyupgrade bandit -rlint-requirements.txt\
-rtest-requirements.txt -rmypy-requirements.txt -rdocs/requirements.txt
DEBDEVPKGS=pep8 python-autopep8 pylint python-coverage pydocstyle sloccount \
python-flake8 python-mock shellcheck
VERSION=3.1.$(shell TZ=UTC git log --first-parent --max-count=1 \
--format=format:%cd --date=format-local:%Y%m%d%H%M%S)
mkfile_dir := $(dir $(abspath $(lastword $(MAKEFILE_LIST))))
UNAME_S=$(shell uname -s)
## all : default task (install cwltool in dev mode)
all: dev
## help : print this help message and exit
help: Makefile
@sed -n 's/^##//p' $<
## cleanup : shortcut for "make sort_imports format flake8 diff_pydocstyle_report"
cleanup: sort_imports format flake8 diff_pydocstyle_report
## install-dep : install most of the development dependencies via pip
install-dep: install-dependencies
install-dependencies: FORCE
pip install --upgrade $(DEVPKGS)
pip install -r requirements.txt
install-doc-dep:
pip install -r docs/requirements.txt
## install-deb-dep : install many of the dev dependencies via apt-get
install-deb-dep:
sudo apt-get install $(DEBDEVPKGS)
## install : install the cwltool module with the "deps" dependency (galaxy-tool-util)
install: FORCE
pip install .[deps]
## dev : install the cwltool module in dev mode
dev: install-dep
pip install -e .[deps]
## dist : create a module package for distribution
dist: dist/${MODULE}-$(VERSION).tar.gz
dist/${MODULE}-$(VERSION).tar.gz: $(SOURCES)
python3 -m build
## docs : make the docs
docs: FORCE
cd docs && $(MAKE) html
## clean : clean up all temporary / machine-generated files
clean: FORCE
rm -f ${MODULE}/*.pyc tests/*.pyc *.so ${MODULE}/*.so cwltool/cwlprov/*.so
rm -Rf ${MODULE}/__pycache__/
rm -Rf build
rm -Rf .coverage
rm -f diff-cover.html
# Linting and code style related targets
## sort_import : sorting imports using isort: https://github.com/timothycrosley/isort
sort_imports: $(PYSOURCES) mypy-stubs
isort $^
remove_unused_imports: $(PYSOURCES)
autoflake --in-place --remove-all-unused-imports $^
pep257: pydocstyle
## pydocstyle : check Python docstring style
pydocstyle: $(PYSOURCES)
pydocstyle --add-ignore=D100,D101,D102,D103 $^ || true
pydocstyle_report.txt: $(PYSOURCES)
pydocstyle setup.py $^ > $@ 2>&1 || true
## diff_pydocstyle_report : check Python docstring style for changed files only
diff_pydocstyle_report: pydocstyle_report.txt
diff-quality --compare-branch=main --violations=pydocstyle --fail-under=100 $^
## codespell-check : check for common misspellings
codespell-check:
@codespell $(shell git ls-files | grep -v cwltool/schemas | grep -v cwltool/jshint/ | grep -v mypy-stubs) \
|| (echo Probable typo foun. Run \"make codespell-fix\" to accept suggested fixes, or add the word to the ignore list in setup.cfg ; exit 1)
## codespell-fix : fix common misspellings
codespell-fix:
@codespell -w $(shell git ls-files | grep -v cwltool/schemas | grep -v cwltool/jshint/ | grep -v mypy-stubs)
## format : check/fix all code indentation and formatting (runs black)
format:
black --exclude cwltool/schemas --exclude cwltool/_version.py setup.py cwltool.py cwltool tests mypy-stubs
format-check:
black --diff --check --exclude cwltool/schemas setup.py --exclude cwltool/_version.py cwltool.py cwltool tests mypy-stubs
## pylint : run static code analysis on Python code
pylint: $(PYSOURCES)
pylint --msg-template="{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}" \
$^ -j0|| true
pylint_report.txt: $(PYSOURCES)
pylint --msg-template="{path}:{line}: [{msg_id}({symbol}), {obj}] {msg}" \
$^ -j0> $@ || true
diff_pylint_report: pylint_report.txt
diff-quality --compare-branch=main --violations=pylint pylint_report.txt
.coverage: testcov
coverage: .coverage
coverage report
coverage.xml: .coverage
coverage xml
coverage.html: htmlcov/index.html
htmlcov/index.html: .coverage
coverage html
@echo Test coverage of the Python code is now in htmlcov/index.html
coverage-report: .coverage
coverage report
diff-cover: coverage.xml
diff-cover --compare-branch=main $^
diff-cover.html: coverage.xml
diff-cover --compare-branch=main $^ --html-report $@
## test : run the cwltool test suite
test: $(PYSOURCES)
python3 -m pytest -rs ${PYTEST_EXTRA}
## testcov : run the cwltool test suite and collect coverage
testcov: $(PYSOURCES)
python3 -m pytest -rs --cov --cov-config=.coveragerc --cov-report= ${PYTEST_EXTRA}
sloccount.sc: $(PYSOURCES) Makefile
sloccount --duplicates --wide --details $^ > $@
## sloccount : count lines of code
sloccount: $(PYSOURCES) Makefile
sloccount $^
list-author-emails:
@echo 'name, E-Mail Address'
@git log --format='%aN,%aE' | sort -u | grep -v 'root'
mypy3: mypy
mypy: $(PYSOURCES)
MYPYPATH=$$MYPYPATH:mypy-stubs mypy $^
mypyc: $(PYSOURCES)
MYPYPATH=mypy-stubs CWLTOOL_USE_MYPYC=1 pip install --verbose -e . \
&& pytest -rs -vv ${PYTEST_EXTRA}
shellcheck: FORCE
shellcheck build-cwltool-docker.sh cwl-docker.sh release-test.sh conformance-test.sh \
cwltool-in-docker.sh
pyupgrade: $(PYSOURCES)
pyupgrade --exit-zero-even-if-changed --py38-plus $^
auto-walrus $^
release-test: FORCE
git diff-index --quiet HEAD -- || ( echo You have uncommitted changes, please commit them and try again; false )
./release-test.sh
release:
export SETUPTOOLS_SCM_PRETEND_VERSION_FOR_CWLTOOL=${VERSION} && \
./release-test.sh && \
. testenv2/bin/activate && \
pip install build && \
python3 -m build testenv2/src/${MODULE} && \
pip install twine && \
twine upload testenv2/src/${MODULE}/dist/* && \
git tag ${VERSION} && git push --tags
flake8: $(PYSOURCES)
flake8 $^
FORCE:
# Use this to print the value of a Makefile variable
# Example `make print-VERSION`
# From https://www.cmcrossroads.com/article/printing-value-makefile-variable
print-% : ; @echo $* = $($*)
././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 010212 x ustar 00 28 mtime=1715863246.0970511
cwltool-3.1.20240508115724/PKG-INFO 0000644 0001750 0001750 00000117137 14621377316 015376 0 ustar 00michael michael Metadata-Version: 2.1
Name: cwltool
Version: 3.1.20240508115724
Summary: Common workflow language reference implementation
Home-page: https://github.com/common-workflow-language/cwltool
Download-URL: https://github.com/common-workflow-language/cwltool
Author: Common workflow language working group
Author-email: common-workflow-language@googlegroups.com
Classifier: Development Status :: 5 - Production/Stable
Classifier: Environment :: Console
Classifier: Intended Audience :: Developers
Classifier: Intended Audience :: Science/Research
Classifier: Intended Audience :: Healthcare Industry
Classifier: License :: OSI Approved :: Apache Software License
Classifier: Natural Language :: English
Classifier: Operating System :: MacOS :: MacOS X
Classifier: Operating System :: POSIX
Classifier: Operating System :: POSIX :: Linux
Classifier: Programming Language :: Python :: 3
Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3.9
Classifier: Programming Language :: Python :: 3.10
Classifier: Programming Language :: Python :: 3.11
Classifier: Programming Language :: Python :: 3.12
Classifier: Topic :: Scientific/Engineering
Classifier: Topic :: Scientific/Engineering :: Bio-Informatics
Classifier: Topic :: Scientific/Engineering :: Astronomy
Classifier: Topic :: Scientific/Engineering :: Atmospheric Science
Classifier: Topic :: Scientific/Engineering :: Information Analysis
Classifier: Topic :: Scientific/Engineering :: Medical Science Apps.
Classifier: Topic :: System :: Distributed Computing
Classifier: Topic :: Utilities
Requires-Python: >=3.8, <4
Description-Content-Type: text/x-rst
License-File: LICENSE.txt
Requires-Dist: setuptools
Requires-Dist: requests>=2.6.1
Requires-Dist: ruamel.yaml<0.19,>=0.16
Requires-Dist: rdflib<7.1.0,>=4.2.2
Requires-Dist: shellescape<3.9,>=3.4.1
Requires-Dist: schema-salad<9,>=8.4.20230426093816
Requires-Dist: prov==1.5.1
Requires-Dist: mypy-extensions
Requires-Dist: psutil>=5.6.6
Requires-Dist: importlib_resources>=1.4; python_version < "3.9"
Requires-Dist: coloredlogs
Requires-Dist: pydot>=1.4.1
Requires-Dist: argcomplete
Requires-Dist: pyparsing!=3.0.2
Requires-Dist: cwl-utils>=0.32
Requires-Dist: spython>=0.3.0
Provides-Extra: deps
Requires-Dist: galaxy-tool-util!=23.0.1,!=23.0.2,!=23.0.3,!=23.0.4,!=23.0.5,<24.1,>=22.1.2; extra == "deps"
Requires-Dist: galaxy-util<24.1; extra == "deps"
#############################################################################################
``cwltool``: The reference reference implementation of the Common Workflow Language standards
#############################################################################################
|Linux Status| |Coverage Status| |Docs Status|
PyPI: |PyPI Version| |PyPI Downloads Month| |Total PyPI Downloads|
Conda: |Conda Version| |Conda Installs|
Debian: |Debian Testing package| |Debian Stable package|
Quay.io (Docker): |Quay.io Container|
.. |Linux Status| image:: https://github.com/common-workflow-language/cwltool/actions/workflows/ci-tests.yml/badge.svg?branch=main
:target: https://github.com/common-workflow-language/cwltool/actions/workflows/ci-tests.yml
.. |Debian Stable package| image:: https://badges.debian.net/badges/debian/stable/cwltool/version.svg
:target: https://packages.debian.org/stable/cwltool
.. |Debian Testing package| image:: https://badges.debian.net/badges/debian/testing/cwltool/version.svg
:target: https://packages.debian.org/testing/cwltool
.. |Coverage Status| image:: https://img.shields.io/codecov/c/github/common-workflow-language/cwltool.svg
:target: https://codecov.io/gh/common-workflow-language/cwltool
.. |PyPI Version| image:: https://badge.fury.io/py/cwltool.svg
:target: https://badge.fury.io/py/cwltool
.. |PyPI Downloads Month| image:: https://pepy.tech/badge/cwltool/month
:target: https://pepy.tech/project/cwltool
.. |Total PyPI Downloads| image:: https://static.pepy.tech/personalized-badge/cwltool?period=total&units=international_system&left_color=black&right_color=orange&left_text=Total%20PyPI%20Downloads
:target: https://pepy.tech/project/cwltool
.. |Conda Version| image:: https://anaconda.org/conda-forge/cwltool/badges/version.svg
:target: https://anaconda.org/conda-forge/cwltool
.. |Conda Installs| image:: https://anaconda.org/conda-forge/cwltool/badges/downloads.svg
:target: https://anaconda.org/conda-forge/cwltool
.. |Quay.io Container| image:: https://quay.io/repository/commonwl/cwltool/status
:target: https://quay.io/repository/commonwl/cwltool
.. |Docs Status| image:: https://readthedocs.org/projects/cwltool/badge/?version=latest
:target: https://cwltool.readthedocs.io/en/latest/?badge=latest
:alt: Documentation Status
This is the reference implementation of the `Common Workflow Language open
standards `_. It is intended to be feature complete
and provide comprehensive validation of CWL
files as well as provide other tools related to working with CWL.
``cwltool`` is written and tested for
`Python `_ ``3.x {x = 6, 8, 9, 10, 11}``
The reference implementation consists of two packages. The ``cwltool`` package
is the primary Python module containing the reference implementation in the
``cwltool`` module and console executable by the same name.
The ``cwlref-runner`` package is optional and provides an additional entry point
under the alias ``cwl-runner``, which is the implementation-agnostic name for the
default CWL interpreter installed on a host.
``cwltool`` is provided by the CWL project, `a member project of Software Freedom Conservancy `_
and our `many contributors `_.
.. contents:: Table of Contents
*******
Install
*******
``cwltool`` packages
====================
Your operating system may offer cwltool directly. For `Debian `_, `Ubuntu `_,
and similar Linux distribution try
.. code:: bash
sudo apt-get install cwltool
If you encounter an error, first try to update package information by using
.. code:: bash
sudo apt-get update
If you are running macOS X or other UNIXes and you want to use packages prepared by the conda-forge project, then
please follow the install instructions for `conda-forge `_ (if you haven't already) and then
.. code:: bash
conda install -c conda-forge cwltool
All of the above methods of installing ``cwltool`` use packages that might contain bugs already fixed in newer versions or be missing desired features.
If the packaged version of ``cwltool`` available to you is too old, then we recommend installing using ``pip`` and ``venv``
.. code:: bash
python3 -m venv env # Create a virtual environment named 'env' in the current directory
source env/bin/activate # Activate environment before installing `cwltool`
Then install the latest ``cwlref-runner`` package from PyPi (which will install the latest ``cwltool`` package as
well)
.. code:: bash
pip install cwlref-runner
If installing alongside another CWL implementation (like ``toil-cwl-runner`` or ``arvados-cwl-runner``) then instead run
.. code:: bash
pip install cwltool
MS Windows users
================
1. `Install Windows Subsystem for Linux 2 and Docker Desktop `_.
2. `Install Debian from the Microsoft Store `_.
3. Set Debian as your default WSL 2 distro: ``wsl --set-default debian``.
4. Return to the Docker Desktop, choose ``Settings`` → ``Resources`` → ``WSL Integration`` and under "Enable integration with additional distros" select "Debian",
5. Reboot if you have not yet already.
6. Launch Debian and follow the Linux instructions above (``apt-get install cwltool`` or use the ``venv`` method)
Network problems from within WSL2? Try `these instructions `_ followed by ``wsl --shutdown``.
``cwltool`` development version
===============================
Or you can skip the direct ``pip`` commands above and install the latest development version of ``cwltool``:
.. code:: bash
git clone https://github.com/common-workflow-language/cwltool.git # clone (copy) the cwltool git repository
cd cwltool # Change to source directory that git clone just downloaded
pip install .[deps] # Installs ``cwltool`` from source
cwltool --version # Check if the installation works correctly
Remember, if co-installing multiple CWL implementations, then you need to
maintain which implementation ``cwl-runner`` points to via a symbolic file
system link or `another facility `_.
Recommended Software
====================
We strongly suggested to have the following installed:
* One of the following software container engines
* `Podman `_
* `Docker `_
* Singularity/Apptainer: See `Using Singularity`_
* udocker: See `Using uDocker`_
* `node.js `_ for evaluating CWL Expressions quickly
(required for `udocker` users, optional but recommended for the other container engines).
Without these, some examples in the CWL tutorials at http://www.commonwl.org/user_guide/ may not work.
***********************
Run on the command line
***********************
Simple command::
cwl-runner my_workflow.cwl my_inputs.yaml
Or if you have multiple CWL implementations installed and you want to override
the default cwl-runner then use::
cwltool my_workflow.cwl my_inputs.yml
You can set cwltool options in the environment with ``CWLTOOL_OPTIONS``,
these will be inserted at the beginning of the command line::
export CWLTOOL_OPTIONS="--debug"
Use with boot2docker on macOS
=============================
boot2docker runs Docker inside a virtual machine, and it only mounts ``Users``
on it. The default behavior of CWL is to create temporary directories under e.g.
``/Var`` which is not accessible to Docker containers.
To run CWL successfully with boot2docker you need to set the ``--tmpdir-prefix``
and ``--tmp-outdir-prefix`` to somewhere under ``/Users``::
$ cwl-runner --tmp-outdir-prefix=/Users/username/project --tmpdir-prefix=/Users/username/project wc-tool.cwl wc-job.json
Using uDocker
=============
Some shared computing environments don't support Docker software containers for technical or policy reasons.
As a workaround, the CWL reference runner supports using the `udocker `_
program on Linux using ``--udocker``.
udocker installation: https://indigo-dc.github.io/udocker/installation_manual.html
Run `cwltool` just as you usually would, but with ``--udocker`` prior to the workflow path:
.. code:: bash
cwltool --udocker https://github.com/common-workflow-language/common-workflow-language/raw/main/v1.0/v1.0/test-cwl-out2.cwl https://github.com/common-workflow-language/common-workflow-language/raw/main/v1.0/v1.0/empty.json
As was mentioned in the `Recommended Software`_ section,
Using Singularity
=================
``cwltool`` can also use `Singularity `_ version 2.6.1
or later as a Docker container runtime.
``cwltool`` with Singularity will run software containers specified in
``DockerRequirement`` and therefore works with Docker images only, native
Singularity images are not supported. To use Singularity as the Docker container
runtime, provide ``--singularity`` command line option to ``cwltool``.
With Singularity, ``cwltool`` can pass all CWL v1.0 conformance tests, except
those involving Docker container ENTRYPOINTs.
Example
.. code:: bash
cwltool --singularity https://github.com/common-workflow-language/common-workflow-language/raw/main/v1.0/v1.0/cat3-tool-mediumcut.cwl https://github.com/common-workflow-language/common-workflow-language/raw/main/v1.0/v1.0/cat-job.json
Running a tool or workflow from remote or local locations
=========================================================
``cwltool`` can run tool and workflow descriptions on both local and remote
systems via its support for HTTP[S] URLs.
Input job files and Workflow steps (via the `run` directive) can reference CWL
documents using absolute or relative local filesystem paths. If a relative path
is referenced and that document isn't found in the current directory, then the
following locations will be searched:
http://www.commonwl.org/v1.0/CommandLineTool.html#Discovering_CWL_documents_on_a_local_filesystem
You can also use `cwldep `_
to manage dependencies on external tools and workflows.
Overriding workflow requirements at load time
=============================================
Sometimes a workflow needs additional requirements to run in a particular
environment or with a particular dataset. To avoid the need to modify the
underlying workflow, cwltool supports requirement "overrides".
The format of the "overrides" object is a mapping of item identifier (workflow,
workflow step, or command line tool) to the process requirements that should be applied.
.. code:: yaml
cwltool:overrides:
echo.cwl:
requirements:
EnvVarRequirement:
envDef:
MESSAGE: override_value
Overrides can be specified either on the command line, or as part of the job
input document. Workflow steps are identified using the name of the workflow
file followed by the step name as a document fragment identifier "#id".
Override identifiers are relative to the top-level workflow document.
.. code:: bash
cwltool --overrides overrides.yml my-tool.cwl my-job.yml
.. code:: yaml
input_parameter1: value1
input_parameter2: value2
cwltool:overrides:
workflow.cwl#step1:
requirements:
EnvVarRequirement:
envDef:
MESSAGE: override_value
.. code:: bash
cwltool my-tool.cwl my-job-with-overrides.yml
Combining parts of a workflow into a single document
====================================================
Use ``--pack`` to combine a workflow made up of multiple files into a
single compound document. This operation takes all the CWL files
referenced by a workflow and builds a new CWL document with all
Process objects (CommandLineTool and Workflow) in a list in the
``$graph`` field. Cross references (such as ``run:`` and ``source:``
fields) are updated to internal references within the new packed
document. The top-level workflow is named ``#main``.
.. code:: bash
cwltool --pack my-wf.cwl > my-packed-wf.cwl
Running only part of a workflow
===============================
You can run a partial workflow with the ``--target`` (``-t``) option. This
takes the name of an output parameter, workflow step, or input
parameter in the top-level workflow. You may provide multiple
targets.
.. code:: bash
cwltool --target step3 my-wf.cwl
If a target is an output parameter, it will only run only the steps
that contribute to that output. If a target is a workflow step, it
will run the workflow starting from that step. If a target is an
input parameter, it will only run the steps connected to
that input.
Use ``--print-targets`` to get a listing of the targets of a workflow.
To see which steps will run, use ``--print-subgraph`` with
``--target`` to get a printout of the workflow subgraph for the
selected targets.
.. code:: bash
cwltool --print-targets my-wf.cwl
cwltool --target step3 --print-subgraph my-wf.cwl > my-wf-starting-from-step3.cwl
Visualizing a CWL document
==========================
The ``--print-dot`` option will print a file suitable for Graphviz ``dot`` program. Here is a bash onliner to generate a Scalable Vector Graphic (SVG) file:
.. code:: bash
cwltool --print-dot my-wf.cwl | dot -Tsvg > my-wf.svg
Modeling a CWL document as RDF
==============================
CWL documents can be expressed as RDF triple graphs.
.. code:: bash
cwltool --print-rdf --rdf-serializer=turtle mywf.cwl
Environment Variables in cwltool
================================
This reference implementation supports several ways of setting
environment variables for tools, in addition to the standard
``EnvVarRequirement``. The sequence of steps applied to create the
environment is:
0. If the ``--preserve-entire-environment`` flag is present, then begin with the current
environment, else begin with an empty environment.
1. Add any variables specified by ``--preserve-environment`` option(s).
2. Set ``TMPDIR`` and ``HOME`` per `the CWL v1.0+ CommandLineTool specification `_.
3. Apply any ``EnvVarRequirement`` from the ``CommandLineTool`` description.
4. Apply any manipulations required by any ``cwltool:MPIRequirement`` extensions.
5. Substitute any secrets required by ``Secrets`` extension.
6. Modify the environment in response to ``SoftwareRequirement`` (see below).
Leveraging SoftwareRequirements (Beta)
--------------------------------------
CWL tools may be decorated with ``SoftwareRequirement`` hints that cwltool
may in turn use to resolve to packages in various package managers or
dependency management systems such as `Environment Modules
`__.
Utilizing ``SoftwareRequirement`` hints using cwltool requires an optional
dependency, for this reason be sure to use specify the ``deps`` modifier when
installing cwltool. For instance::
$ pip install 'cwltool[deps]'
Installing cwltool in this fashion enables several new command line options.
The most general of these options is ``--beta-dependency-resolvers-configuration``.
This option allows one to specify a dependency resolver's configuration file.
This file may be specified as either XML or YAML and very simply describes various
plugins to enable to "resolve" ``SoftwareRequirement`` dependencies.
Using these hints will allow cwltool to modify the environment in
which your tool runs, for example by loading one or more Environment
Modules. The environment is constructed as above, then the environment
may modified by the selected tool resolver. This currently means that
you cannot override any environment variables set by the selected tool
resolver. Note that the environment given to the configured dependency
resolver has the variable `_CWLTOOL` set to `1` to allow introspection.
To discuss some of these plugins and how to configure them, first consider the
following ``hint`` definition for an example CWL tool.
.. code:: yaml
SoftwareRequirement:
packages:
- package: seqtk
version:
- r93
Now imagine deploying cwltool on a cluster with Software Modules installed
and that a ``seqtk`` module is available at version ``r93``. This means cluster
users likely won't have the binary ``seqtk`` on their ``PATH`` by default, but after
sourcing this module with the command ``modulecmd sh load seqtk/r93`` ``seqtk`` is
available on the ``PATH``. A simple dependency resolvers configuration file, called
``dependency-resolvers-conf.yml`` for instance, that would enable cwltool to source
the correct module environment before executing the above tool would simply be:
.. code:: yaml
- type: modules
The outer list indicates that one plugin is being enabled, the plugin parameters are
defined as a dictionary for this one list item. There is only one required parameter
for the plugin above, this is ``type`` and defines the plugin type. This parameter
is required for all plugins. The available plugins and the parameters
available for each are documented (incompletely) `here
`__.
Unfortunately, this documentation is in the context of Galaxy tool
``requirement`` s instead of CWL ``SoftwareRequirement`` s, but the concepts map fairly directly.
cwltool is distributed with an example of such seqtk tool and sample corresponding
job. It could executed from the cwltool root using a dependency resolvers
configuration file such as the above one using the command::
cwltool --beta-dependency-resolvers-configuration /path/to/dependency-resolvers-conf.yml \
tests/seqtk_seq.cwl \
tests/seqtk_seq_job.json
This example demonstrates both that cwltool can leverage
existing software installations and also handle workflows with dependencies
on different versions of the same software and libraries. However the above
example does require an existing module setup so it is impossible to test this example
"out of the box" with cwltool. For a more isolated test that demonstrates all
the same concepts - the resolver plugin type ``galaxy_packages`` can be used.
"Galaxy packages" are a lighter-weight alternative to Environment Modules that are
really just defined by a way to lay out directories into packages and versions
to find little scripts that are sourced to modify the environment. They have
been used for years in Galaxy community to adapt Galaxy tools to cluster
environments but require neither knowledge of Galaxy nor any special tools to
setup. These should work just fine for CWL tools.
The cwltool source code repository's test directory is setup with a very simple
directory that defines a set of "Galaxy packages" (but really just defines one
package named ``random-lines``). The directory layout is simply::
tests/test_deps_env/
random-lines/
1.0/
env.sh
If the ``galaxy_packages`` plugin is enabled and pointed at the
``tests/test_deps_env`` directory in cwltool's root and a ``SoftwareRequirement``
such as the following is encountered.
.. code:: yaml
hints:
SoftwareRequirement:
packages:
- package: 'random-lines'
version:
- '1.0'
Then cwltool will simply find that ``env.sh`` file and source it before executing
the corresponding tool. That ``env.sh`` script is only responsible for modifying
the job's ``PATH`` to add the required binaries.
This is a full example that works since resolving "Galaxy packages" has no
external requirements. Try it out by executing the following command from cwltool's
root directory::
cwltool --beta-dependency-resolvers-configuration tests/test_deps_env_resolvers_conf.yml \
tests/random_lines.cwl \
tests/random_lines_job.json
The resolvers configuration file in the above example was simply:
.. code:: yaml
- type: galaxy_packages
base_path: ./tests/test_deps_env
It is possible that the ``SoftwareRequirement`` s in a given CWL tool will not
match the module names for a given cluster. Such requirements can be re-mapped
to specific deployed packages or versions using another file specified using
the resolver plugin parameter `mapping_files`. We will
demonstrate this using `galaxy_packages,` but the concepts apply equally well
to Environment Modules or Conda packages (described below), for instance.
So consider the resolvers configuration file.
(`tests/test_deps_env_resolvers_conf_rewrite.yml`):
.. code:: yaml
- type: galaxy_packages
base_path: ./tests/test_deps_env
mapping_files: ./tests/test_deps_mapping.yml
And the corresponding mapping configuration file (`tests/test_deps_mapping.yml`):
.. code:: yaml
- from:
name: randomLines
version: 1.0.0-rc1
to:
name: random-lines
version: '1.0'
This is saying if cwltool encounters a requirement of ``randomLines`` at version
``1.0.0-rc1`` in a tool, to rewrite to our specific plugin as ``random-lines`` at
version ``1.0``. cwltool has such a test tool called ``random_lines_mapping.cwl``
that contains such a source ``SoftwareRequirement``. To try out this example with
mapping, execute the following command from the cwltool root directory::
cwltool --beta-dependency-resolvers-configuration tests/test_deps_env_resolvers_conf_rewrite.yml \
tests/random_lines_mapping.cwl \
tests/random_lines_job.json
The previous examples demonstrated leveraging existing infrastructure to
provide requirements for CWL tools. If instead a real package manager is used
cwltool has the opportunity to install requirements as needed. While initial
support for Homebrew/Linuxbrew plugins is available, the most developed such
plugin is for the `Conda `__ package manager. Conda has the nice properties
of allowing multiple versions of a package to be installed simultaneously,
not requiring evaluated permissions to install Conda itself or packages using
Conda, and being cross-platform. For these reasons, cwltool may run as a normal
user, install its own Conda environment and manage multiple versions of Conda packages
on Linux and Mac OS X.
The Conda plugin can be endlessly configured, but a sensible set of defaults
that has proven a powerful stack for dependency management within the Galaxy tool
development ecosystem can be enabled by simply passing cwltool the
``--beta-conda-dependencies`` flag.
With this, we can use the seqtk example above without Docker or any externally managed services - cwltool should install everything it needs
and create an environment for the tool. Try it out with the following command::
cwltool --beta-conda-dependencies tests/seqtk_seq.cwl tests/seqtk_seq_job.json
The CWL specification allows URIs to be attached to ``SoftwareRequirement`` s
that allow disambiguation of package names. If the mapping files described above
allow deployers to adapt tools to their infrastructure, this mechanism allows
tools to adapt their requirements to multiple package managers. To demonstrate
this within the context of the seqtk, we can simply break the package name we
use and then specify a specific Conda package as follows:
.. code:: yaml
hints:
SoftwareRequirement:
packages:
- package: seqtk_seq
version:
- '1.2'
specs:
- https://anaconda.org/bioconda/seqtk
- https://packages.debian.org/sid/seqtk
The example can be executed using the command::
cwltool --beta-conda-dependencies tests/seqtk_seq_wrong_name.cwl tests/seqtk_seq_job.json
The plugin framework for managing the resolution of these software requirements
as maintained as part of `galaxy-tool-util `__ - a small,
portable subset of the Galaxy project. More information on configuration and implementation can be found
at the following links:
- `Dependency Resolvers in Galaxy `__
- `Conda for [Galaxy] Tool Dependencies `__
- `Mapping Files - Implementation `__
- `Specifications - Implementation `__
- `Initial cwltool Integration Pull Request `__
Use with GA4GH Tool Registry API
================================
Cwltool can launch tools directly from `GA4GH Tool Registry API`_ endpoints.
By default, cwltool searches https://dockstore.org/ . Use ``--add-tool-registry`` to add other registries to the search path.
For example ::
cwltool quay.io/collaboratory/dockstore-tool-bamstats:develop test.json
and (defaults to latest when a version is not specified) ::
cwltool quay.io/collaboratory/dockstore-tool-bamstats test.json
For this example, grab the test.json (and input file) from https://github.com/CancerCollaboratory/dockstore-tool-bamstats ::
wget https://dockstore.org/api/api/ga4gh/v2/tools/quay.io%2Fbriandoconnor%2Fdockstore-tool-bamstats/versions/develop/PLAIN-CWL/descriptor/test.json
wget https://github.com/CancerCollaboratory/dockstore-tool-bamstats/raw/develop/rna.SRR948778.bam
.. _`GA4GH Tool Registry API`: https://github.com/ga4gh/tool-registry-schemas
Running MPI-based tools that need to be launched
================================================
Cwltool supports an extension to the CWL spec
``http://commonwl.org/cwltool#MPIRequirement``. When the tool
definition has this in its ``requirements``/``hints`` section, and
cwltool has been run with ``--enable-ext``, then the tool's command
line will be extended with the commands needed to launch it with
``mpirun`` or similar. You can specify the number of processes to
start as either a literal integer or an expression (that will result
in an integer). For example::
#!/usr/bin/env cwl-runner
cwlVersion: v1.1
class: CommandLineTool
$namespaces:
cwltool: "http://commonwl.org/cwltool#"
requirements:
cwltool:MPIRequirement:
processes: $(inputs.nproc)
inputs:
nproc:
type: int
Interaction with containers: the MPIRequirement currently prepends its
commands to the front of the command line that is constructed. If you
wish to run a containerized application in parallel, for simple use
cases, this does work with Singularity, depending upon the platform
setup. However, this combination should be considered "alpha" -- please
do report any issues you have! This does not work with Docker at the
moment. (More precisely, you get `n` copies of the same single process
image run at the same time that cannot communicate with each other.)
The host-specific parameters are configured in a simple YAML file
(specified with the ``--mpi-config-file`` flag). The allowed keys are
given in the following table; all are optional.
+----------------+------------------+----------+------------------------------+
| Key | Type | Default | Description |
+================+==================+==========+==============================+
| runner | str | "mpirun" | The primary command to use. |
+----------------+------------------+----------+------------------------------+
| nproc_flag | str | "-n" | Flag to set number of |
| | | | processes to start. |
+----------------+------------------+----------+------------------------------+
| default_nproc | int | 1 | Default number of processes. |
+----------------+------------------+----------+------------------------------+
| extra_flags | List[str] | [] | A list of any other flags to |
| | | | be added to the runner's |
| | | | command line before |
| | | | the ``baseCommand``. |
+----------------+------------------+----------+------------------------------+
| env_pass | List[str] | [] | A list of environment |
| | | | variables that should be |
| | | | passed from the host |
| | | | environment through to the |
| | | | tool (e.g., giving the |
| | | | node list as set by your |
| | | | scheduler). |
+----------------+------------------+----------+------------------------------+
| env_pass_regex | List[str] | [] | A list of python regular |
| | | | expressions that will be |
| | | | matched against the host's |
| | | | environment. Those that match|
| | | | will be passed through. |
+----------------+------------------+----------+------------------------------+
| env_set | Mapping[str,str] | {} | A dictionary whose keys are |
| | | | the environment variables set|
| | | | and the values being the |
| | | | values. |
+----------------+------------------+----------+------------------------------+
Enabling Fast Parser (experimental)
===================================
For very large workflows, `cwltool` can spend a lot of time in
initialization, before the first step runs. There is an experimental
flag ``--fast-parser`` which can dramatically reduce the
initialization overhead, however as of this writing it has several limitations:
- Error reporting in general is worse than the standard parser, you will want to use it with workflows that you know are already correct.
- It does not check for dangling links (these will become runtime errors instead of loading errors)
- Several other cases fail, as documented in https://github.com/common-workflow-language/cwltool/pull/1720
***********
Development
***********
Running tests locally
=====================
- Running basic tests ``(/tests)``:
To run the basic tests after installing `cwltool` execute the following:
.. code:: bash
pip install -rtest-requirements.txt
pytest ## N.B. This requires node.js or docker to be available
To run various tests in all supported Python environments, we use `tox `_. To run the test suite in all supported Python environments
first clone the complete code repository (see the ``git clone`` instructions above) and then run
the following in the terminal:
``pip install "tox<4"; tox -p``
List of all environment can be seen using:
``tox --listenvs``
and running a specific test env using:
``tox -e ``
and additionally run a specific test using this format:
``tox -e py310-unit -- -v tests/test_examples.py::test_scandeps``
- Running the entire suite of CWL conformance tests:
The GitHub repository for the CWL specifications contains a script that tests a CWL
implementation against a wide array of valid CWL files using the `cwltest `_
program
Instructions for running these tests can be found in the Common Workflow Language Specification repository at https://github.com/common-workflow-language/common-workflow-language/blob/main/CONFORMANCE_TESTS.md .
Import as a module
==================
Add
.. code:: python
import cwltool
to your script.
The easiest way to use cwltool to run a tool or workflow from Python is to use a Factory
.. code:: python
import cwltool.factory
fac = cwltool.factory.Factory()
echo = fac.make("echo.cwl")
result = echo(inp="foo")
# result["out"] == "foo"
CWL Tool Control Flow
=====================
Technical outline of how cwltool works internally, for maintainers.
#. Use CWL ``load_tool()`` to load document.
#. Fetches the document from file or URL
#. Applies preprocessing (syntax/identifier expansion and normalization)
#. Validates the document based on cwlVersion
#. If necessary, updates the document to the latest spec
#. Constructs a Process object using ``make_tool()``` callback. This yields a
CommandLineTool, Workflow, or ExpressionTool. For workflows, this
recursively constructs each workflow step.
#. To construct custom types for CommandLineTool, Workflow, or
ExpressionTool, provide a custom ``make_tool()``
#. Iterate on the ``job()`` method of the Process object to get back runnable jobs.
#. ``job()`` is a generator method (uses the Python iterator protocol)
#. Each time the ``job()`` method is invoked in an iteration, it returns one
of: a runnable item (an object with a ``run()`` method), ``None`` (indicating
there is currently no work ready to run) or end of iteration (indicating
the process is complete.)
#. Invoke the runnable item by calling ``run()``. This runs the tool and gets output.
#. An output callback reports the output of a process.
#. ``job()`` may be iterated over multiple times. It will yield all the work
that is currently ready to run and then yield None.
#. ``Workflow`` objects create a corresponding ``WorkflowJob`` and ``WorkflowJobStep`` objects to hold the workflow state for the duration of the job invocation.
#. The WorkflowJob iterates over each WorkflowJobStep and determines if the
inputs the step are ready.
#. When a step is ready, it constructs an input object for that step and
iterates on the ``job()`` method of the workflow job step.
#. Each runnable item is yielded back up to top-level run loop
#. When a step job completes and receives an output callback, the
job outputs are assigned to the output of the workflow step.
#. When all steps are complete, the intermediate files are moved to a final
workflow output, intermediate directories are deleted, and the workflow's output callback is called.
#. ``CommandLineTool`` job() objects yield a single runnable object.
#. The CommandLineTool ``job()`` method calls ``make_job_runner()`` to create a
``CommandLineJob`` object
#. The job method configures the CommandLineJob object by setting public
attributes
#. The job method iterates over file and directories inputs to the
CommandLineTool and creates a "path map".
#. Files are mapped from their "resolved" location to a "target" path where
they will appear at tool invocation (for example, a location inside a
Docker container.) The target paths are used on the command line.
#. Files are staged to targets paths using either Docker volume binds (when
using containers) or symlinks (if not). This staging step enables files
to be logically rearranged or renamed independent of their source layout.
#. The ``run()`` method of CommandLineJob executes the command line tool or
Docker container, waits for it to complete, collects output, and makes
the output callback.
Extension points
================
The following functions can be passed to main() to override or augment
the listed behaviors.
executor
::
executor(tool, job_order_object, runtimeContext, logger)
(Process, Dict[Text, Any], RuntimeContext) -> Tuple[Dict[Text, Any], Text]
An implementation of the top-level workflow execution loop should
synchronously run a process object to completion and return the
output object.
versionfunc
::
()
() -> Text
Return version string.
logger_handler
::
logger_handler
logging.Handler
Handler object for logging.
The following functions can be set in LoadingContext to override or
augment the listed behaviors.
fetcher_constructor
::
fetcher_constructor(cache, session)
(Dict[unicode, unicode], requests.sessions.Session) -> Fetcher
Construct a Fetcher object with the supplied cache and HTTP session.
resolver
::
resolver(document_loader, document)
(Loader, Union[Text, dict[Text, Any]]) -> Text
Resolve a relative document identifier to an absolute one that can be fetched.
The following functions can be set in RuntimeContext to override or
augment the listed behaviors.
construct_tool_object
::
construct_tool_object(toolpath_object, loadingContext)
(MutableMapping[Text, Any], LoadingContext) -> Process
Hook to construct a Process object (eg CommandLineTool) object from a document.
select_resources
::
selectResources(request)
(Dict[str, int], RuntimeContext) -> Dict[Text, int]
Take a resource request and turn it into a concrete resource assignment.
make_fs_access
::
make_fs_access(basedir)
(Text) -> StdFsAccess
Return a file system access object.
In addition, when providing custom subclasses of Process objects, you can override the following methods:
CommandLineTool.make_job_runner
::
make_job_runner(RuntimeContext)
(RuntimeContext) -> Type[JobBase]
Create and return a job runner object (this implements concrete execution of a command line tool).
Workflow.make_workflow_step
::
make_workflow_step(toolpath_object, pos, loadingContext, parentworkflowProv)
(Dict[Text, Any], int, LoadingContext, Optional[ProvenanceProfile]) -> WorkflowStep
Create and return a workflow step object.
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/README.rst 0000644 0001750 0001750 00000112377 14621376047 015771 0 ustar 00michael michael #############################################################################################
``cwltool``: The reference reference implementation of the Common Workflow Language standards
#############################################################################################
|Linux Status| |Coverage Status| |Docs Status|
PyPI: |PyPI Version| |PyPI Downloads Month| |Total PyPI Downloads|
Conda: |Conda Version| |Conda Installs|
Debian: |Debian Testing package| |Debian Stable package|
Quay.io (Docker): |Quay.io Container|
.. |Linux Status| image:: https://github.com/common-workflow-language/cwltool/actions/workflows/ci-tests.yml/badge.svg?branch=main
:target: https://github.com/common-workflow-language/cwltool/actions/workflows/ci-tests.yml
.. |Debian Stable package| image:: https://badges.debian.net/badges/debian/stable/cwltool/version.svg
:target: https://packages.debian.org/stable/cwltool
.. |Debian Testing package| image:: https://badges.debian.net/badges/debian/testing/cwltool/version.svg
:target: https://packages.debian.org/testing/cwltool
.. |Coverage Status| image:: https://img.shields.io/codecov/c/github/common-workflow-language/cwltool.svg
:target: https://codecov.io/gh/common-workflow-language/cwltool
.. |PyPI Version| image:: https://badge.fury.io/py/cwltool.svg
:target: https://badge.fury.io/py/cwltool
.. |PyPI Downloads Month| image:: https://pepy.tech/badge/cwltool/month
:target: https://pepy.tech/project/cwltool
.. |Total PyPI Downloads| image:: https://static.pepy.tech/personalized-badge/cwltool?period=total&units=international_system&left_color=black&right_color=orange&left_text=Total%20PyPI%20Downloads
:target: https://pepy.tech/project/cwltool
.. |Conda Version| image:: https://anaconda.org/conda-forge/cwltool/badges/version.svg
:target: https://anaconda.org/conda-forge/cwltool
.. |Conda Installs| image:: https://anaconda.org/conda-forge/cwltool/badges/downloads.svg
:target: https://anaconda.org/conda-forge/cwltool
.. |Quay.io Container| image:: https://quay.io/repository/commonwl/cwltool/status
:target: https://quay.io/repository/commonwl/cwltool
.. |Docs Status| image:: https://readthedocs.org/projects/cwltool/badge/?version=latest
:target: https://cwltool.readthedocs.io/en/latest/?badge=latest
:alt: Documentation Status
This is the reference implementation of the `Common Workflow Language open
standards `_. It is intended to be feature complete
and provide comprehensive validation of CWL
files as well as provide other tools related to working with CWL.
``cwltool`` is written and tested for
`Python `_ ``3.x {x = 6, 8, 9, 10, 11}``
The reference implementation consists of two packages. The ``cwltool`` package
is the primary Python module containing the reference implementation in the
``cwltool`` module and console executable by the same name.
The ``cwlref-runner`` package is optional and provides an additional entry point
under the alias ``cwl-runner``, which is the implementation-agnostic name for the
default CWL interpreter installed on a host.
``cwltool`` is provided by the CWL project, `a member project of Software Freedom Conservancy `_
and our `many contributors `_.
.. contents:: Table of Contents
*******
Install
*******
``cwltool`` packages
====================
Your operating system may offer cwltool directly. For `Debian `_, `Ubuntu `_,
and similar Linux distribution try
.. code:: bash
sudo apt-get install cwltool
If you encounter an error, first try to update package information by using
.. code:: bash
sudo apt-get update
If you are running macOS X or other UNIXes and you want to use packages prepared by the conda-forge project, then
please follow the install instructions for `conda-forge `_ (if you haven't already) and then
.. code:: bash
conda install -c conda-forge cwltool
All of the above methods of installing ``cwltool`` use packages that might contain bugs already fixed in newer versions or be missing desired features.
If the packaged version of ``cwltool`` available to you is too old, then we recommend installing using ``pip`` and ``venv``
.. code:: bash
python3 -m venv env # Create a virtual environment named 'env' in the current directory
source env/bin/activate # Activate environment before installing `cwltool`
Then install the latest ``cwlref-runner`` package from PyPi (which will install the latest ``cwltool`` package as
well)
.. code:: bash
pip install cwlref-runner
If installing alongside another CWL implementation (like ``toil-cwl-runner`` or ``arvados-cwl-runner``) then instead run
.. code:: bash
pip install cwltool
MS Windows users
================
1. `Install Windows Subsystem for Linux 2 and Docker Desktop `_.
2. `Install Debian from the Microsoft Store `_.
3. Set Debian as your default WSL 2 distro: ``wsl --set-default debian``.
4. Return to the Docker Desktop, choose ``Settings`` → ``Resources`` → ``WSL Integration`` and under "Enable integration with additional distros" select "Debian",
5. Reboot if you have not yet already.
6. Launch Debian and follow the Linux instructions above (``apt-get install cwltool`` or use the ``venv`` method)
Network problems from within WSL2? Try `these instructions `_ followed by ``wsl --shutdown``.
``cwltool`` development version
===============================
Or you can skip the direct ``pip`` commands above and install the latest development version of ``cwltool``:
.. code:: bash
git clone https://github.com/common-workflow-language/cwltool.git # clone (copy) the cwltool git repository
cd cwltool # Change to source directory that git clone just downloaded
pip install .[deps] # Installs ``cwltool`` from source
cwltool --version # Check if the installation works correctly
Remember, if co-installing multiple CWL implementations, then you need to
maintain which implementation ``cwl-runner`` points to via a symbolic file
system link or `another facility `_.
Recommended Software
====================
We strongly suggested to have the following installed:
* One of the following software container engines
* `Podman `_
* `Docker `_
* Singularity/Apptainer: See `Using Singularity`_
* udocker: See `Using uDocker`_
* `node.js `_ for evaluating CWL Expressions quickly
(required for `udocker` users, optional but recommended for the other container engines).
Without these, some examples in the CWL tutorials at http://www.commonwl.org/user_guide/ may not work.
***********************
Run on the command line
***********************
Simple command::
cwl-runner my_workflow.cwl my_inputs.yaml
Or if you have multiple CWL implementations installed and you want to override
the default cwl-runner then use::
cwltool my_workflow.cwl my_inputs.yml
You can set cwltool options in the environment with ``CWLTOOL_OPTIONS``,
these will be inserted at the beginning of the command line::
export CWLTOOL_OPTIONS="--debug"
Use with boot2docker on macOS
=============================
boot2docker runs Docker inside a virtual machine, and it only mounts ``Users``
on it. The default behavior of CWL is to create temporary directories under e.g.
``/Var`` which is not accessible to Docker containers.
To run CWL successfully with boot2docker you need to set the ``--tmpdir-prefix``
and ``--tmp-outdir-prefix`` to somewhere under ``/Users``::
$ cwl-runner --tmp-outdir-prefix=/Users/username/project --tmpdir-prefix=/Users/username/project wc-tool.cwl wc-job.json
Using uDocker
=============
Some shared computing environments don't support Docker software containers for technical or policy reasons.
As a workaround, the CWL reference runner supports using the `udocker `_
program on Linux using ``--udocker``.
udocker installation: https://indigo-dc.github.io/udocker/installation_manual.html
Run `cwltool` just as you usually would, but with ``--udocker`` prior to the workflow path:
.. code:: bash
cwltool --udocker https://github.com/common-workflow-language/common-workflow-language/raw/main/v1.0/v1.0/test-cwl-out2.cwl https://github.com/common-workflow-language/common-workflow-language/raw/main/v1.0/v1.0/empty.json
As was mentioned in the `Recommended Software`_ section,
Using Singularity
=================
``cwltool`` can also use `Singularity `_ version 2.6.1
or later as a Docker container runtime.
``cwltool`` with Singularity will run software containers specified in
``DockerRequirement`` and therefore works with Docker images only, native
Singularity images are not supported. To use Singularity as the Docker container
runtime, provide ``--singularity`` command line option to ``cwltool``.
With Singularity, ``cwltool`` can pass all CWL v1.0 conformance tests, except
those involving Docker container ENTRYPOINTs.
Example
.. code:: bash
cwltool --singularity https://github.com/common-workflow-language/common-workflow-language/raw/main/v1.0/v1.0/cat3-tool-mediumcut.cwl https://github.com/common-workflow-language/common-workflow-language/raw/main/v1.0/v1.0/cat-job.json
Running a tool or workflow from remote or local locations
=========================================================
``cwltool`` can run tool and workflow descriptions on both local and remote
systems via its support for HTTP[S] URLs.
Input job files and Workflow steps (via the `run` directive) can reference CWL
documents using absolute or relative local filesystem paths. If a relative path
is referenced and that document isn't found in the current directory, then the
following locations will be searched:
http://www.commonwl.org/v1.0/CommandLineTool.html#Discovering_CWL_documents_on_a_local_filesystem
You can also use `cwldep `_
to manage dependencies on external tools and workflows.
Overriding workflow requirements at load time
=============================================
Sometimes a workflow needs additional requirements to run in a particular
environment or with a particular dataset. To avoid the need to modify the
underlying workflow, cwltool supports requirement "overrides".
The format of the "overrides" object is a mapping of item identifier (workflow,
workflow step, or command line tool) to the process requirements that should be applied.
.. code:: yaml
cwltool:overrides:
echo.cwl:
requirements:
EnvVarRequirement:
envDef:
MESSAGE: override_value
Overrides can be specified either on the command line, or as part of the job
input document. Workflow steps are identified using the name of the workflow
file followed by the step name as a document fragment identifier "#id".
Override identifiers are relative to the top-level workflow document.
.. code:: bash
cwltool --overrides overrides.yml my-tool.cwl my-job.yml
.. code:: yaml
input_parameter1: value1
input_parameter2: value2
cwltool:overrides:
workflow.cwl#step1:
requirements:
EnvVarRequirement:
envDef:
MESSAGE: override_value
.. code:: bash
cwltool my-tool.cwl my-job-with-overrides.yml
Combining parts of a workflow into a single document
====================================================
Use ``--pack`` to combine a workflow made up of multiple files into a
single compound document. This operation takes all the CWL files
referenced by a workflow and builds a new CWL document with all
Process objects (CommandLineTool and Workflow) in a list in the
``$graph`` field. Cross references (such as ``run:`` and ``source:``
fields) are updated to internal references within the new packed
document. The top-level workflow is named ``#main``.
.. code:: bash
cwltool --pack my-wf.cwl > my-packed-wf.cwl
Running only part of a workflow
===============================
You can run a partial workflow with the ``--target`` (``-t``) option. This
takes the name of an output parameter, workflow step, or input
parameter in the top-level workflow. You may provide multiple
targets.
.. code:: bash
cwltool --target step3 my-wf.cwl
If a target is an output parameter, it will only run only the steps
that contribute to that output. If a target is a workflow step, it
will run the workflow starting from that step. If a target is an
input parameter, it will only run the steps connected to
that input.
Use ``--print-targets`` to get a listing of the targets of a workflow.
To see which steps will run, use ``--print-subgraph`` with
``--target`` to get a printout of the workflow subgraph for the
selected targets.
.. code:: bash
cwltool --print-targets my-wf.cwl
cwltool --target step3 --print-subgraph my-wf.cwl > my-wf-starting-from-step3.cwl
Visualizing a CWL document
==========================
The ``--print-dot`` option will print a file suitable for Graphviz ``dot`` program. Here is a bash onliner to generate a Scalable Vector Graphic (SVG) file:
.. code:: bash
cwltool --print-dot my-wf.cwl | dot -Tsvg > my-wf.svg
Modeling a CWL document as RDF
==============================
CWL documents can be expressed as RDF triple graphs.
.. code:: bash
cwltool --print-rdf --rdf-serializer=turtle mywf.cwl
Environment Variables in cwltool
================================
This reference implementation supports several ways of setting
environment variables for tools, in addition to the standard
``EnvVarRequirement``. The sequence of steps applied to create the
environment is:
0. If the ``--preserve-entire-environment`` flag is present, then begin with the current
environment, else begin with an empty environment.
1. Add any variables specified by ``--preserve-environment`` option(s).
2. Set ``TMPDIR`` and ``HOME`` per `the CWL v1.0+ CommandLineTool specification `_.
3. Apply any ``EnvVarRequirement`` from the ``CommandLineTool`` description.
4. Apply any manipulations required by any ``cwltool:MPIRequirement`` extensions.
5. Substitute any secrets required by ``Secrets`` extension.
6. Modify the environment in response to ``SoftwareRequirement`` (see below).
Leveraging SoftwareRequirements (Beta)
--------------------------------------
CWL tools may be decorated with ``SoftwareRequirement`` hints that cwltool
may in turn use to resolve to packages in various package managers or
dependency management systems such as `Environment Modules
`__.
Utilizing ``SoftwareRequirement`` hints using cwltool requires an optional
dependency, for this reason be sure to use specify the ``deps`` modifier when
installing cwltool. For instance::
$ pip install 'cwltool[deps]'
Installing cwltool in this fashion enables several new command line options.
The most general of these options is ``--beta-dependency-resolvers-configuration``.
This option allows one to specify a dependency resolver's configuration file.
This file may be specified as either XML or YAML and very simply describes various
plugins to enable to "resolve" ``SoftwareRequirement`` dependencies.
Using these hints will allow cwltool to modify the environment in
which your tool runs, for example by loading one or more Environment
Modules. The environment is constructed as above, then the environment
may modified by the selected tool resolver. This currently means that
you cannot override any environment variables set by the selected tool
resolver. Note that the environment given to the configured dependency
resolver has the variable `_CWLTOOL` set to `1` to allow introspection.
To discuss some of these plugins and how to configure them, first consider the
following ``hint`` definition for an example CWL tool.
.. code:: yaml
SoftwareRequirement:
packages:
- package: seqtk
version:
- r93
Now imagine deploying cwltool on a cluster with Software Modules installed
and that a ``seqtk`` module is available at version ``r93``. This means cluster
users likely won't have the binary ``seqtk`` on their ``PATH`` by default, but after
sourcing this module with the command ``modulecmd sh load seqtk/r93`` ``seqtk`` is
available on the ``PATH``. A simple dependency resolvers configuration file, called
``dependency-resolvers-conf.yml`` for instance, that would enable cwltool to source
the correct module environment before executing the above tool would simply be:
.. code:: yaml
- type: modules
The outer list indicates that one plugin is being enabled, the plugin parameters are
defined as a dictionary for this one list item. There is only one required parameter
for the plugin above, this is ``type`` and defines the plugin type. This parameter
is required for all plugins. The available plugins and the parameters
available for each are documented (incompletely) `here
`__.
Unfortunately, this documentation is in the context of Galaxy tool
``requirement`` s instead of CWL ``SoftwareRequirement`` s, but the concepts map fairly directly.
cwltool is distributed with an example of such seqtk tool and sample corresponding
job. It could executed from the cwltool root using a dependency resolvers
configuration file such as the above one using the command::
cwltool --beta-dependency-resolvers-configuration /path/to/dependency-resolvers-conf.yml \
tests/seqtk_seq.cwl \
tests/seqtk_seq_job.json
This example demonstrates both that cwltool can leverage
existing software installations and also handle workflows with dependencies
on different versions of the same software and libraries. However the above
example does require an existing module setup so it is impossible to test this example
"out of the box" with cwltool. For a more isolated test that demonstrates all
the same concepts - the resolver plugin type ``galaxy_packages`` can be used.
"Galaxy packages" are a lighter-weight alternative to Environment Modules that are
really just defined by a way to lay out directories into packages and versions
to find little scripts that are sourced to modify the environment. They have
been used for years in Galaxy community to adapt Galaxy tools to cluster
environments but require neither knowledge of Galaxy nor any special tools to
setup. These should work just fine for CWL tools.
The cwltool source code repository's test directory is setup with a very simple
directory that defines a set of "Galaxy packages" (but really just defines one
package named ``random-lines``). The directory layout is simply::
tests/test_deps_env/
random-lines/
1.0/
env.sh
If the ``galaxy_packages`` plugin is enabled and pointed at the
``tests/test_deps_env`` directory in cwltool's root and a ``SoftwareRequirement``
such as the following is encountered.
.. code:: yaml
hints:
SoftwareRequirement:
packages:
- package: 'random-lines'
version:
- '1.0'
Then cwltool will simply find that ``env.sh`` file and source it before executing
the corresponding tool. That ``env.sh`` script is only responsible for modifying
the job's ``PATH`` to add the required binaries.
This is a full example that works since resolving "Galaxy packages" has no
external requirements. Try it out by executing the following command from cwltool's
root directory::
cwltool --beta-dependency-resolvers-configuration tests/test_deps_env_resolvers_conf.yml \
tests/random_lines.cwl \
tests/random_lines_job.json
The resolvers configuration file in the above example was simply:
.. code:: yaml
- type: galaxy_packages
base_path: ./tests/test_deps_env
It is possible that the ``SoftwareRequirement`` s in a given CWL tool will not
match the module names for a given cluster. Such requirements can be re-mapped
to specific deployed packages or versions using another file specified using
the resolver plugin parameter `mapping_files`. We will
demonstrate this using `galaxy_packages,` but the concepts apply equally well
to Environment Modules or Conda packages (described below), for instance.
So consider the resolvers configuration file.
(`tests/test_deps_env_resolvers_conf_rewrite.yml`):
.. code:: yaml
- type: galaxy_packages
base_path: ./tests/test_deps_env
mapping_files: ./tests/test_deps_mapping.yml
And the corresponding mapping configuration file (`tests/test_deps_mapping.yml`):
.. code:: yaml
- from:
name: randomLines
version: 1.0.0-rc1
to:
name: random-lines
version: '1.0'
This is saying if cwltool encounters a requirement of ``randomLines`` at version
``1.0.0-rc1`` in a tool, to rewrite to our specific plugin as ``random-lines`` at
version ``1.0``. cwltool has such a test tool called ``random_lines_mapping.cwl``
that contains such a source ``SoftwareRequirement``. To try out this example with
mapping, execute the following command from the cwltool root directory::
cwltool --beta-dependency-resolvers-configuration tests/test_deps_env_resolvers_conf_rewrite.yml \
tests/random_lines_mapping.cwl \
tests/random_lines_job.json
The previous examples demonstrated leveraging existing infrastructure to
provide requirements for CWL tools. If instead a real package manager is used
cwltool has the opportunity to install requirements as needed. While initial
support for Homebrew/Linuxbrew plugins is available, the most developed such
plugin is for the `Conda `__ package manager. Conda has the nice properties
of allowing multiple versions of a package to be installed simultaneously,
not requiring evaluated permissions to install Conda itself or packages using
Conda, and being cross-platform. For these reasons, cwltool may run as a normal
user, install its own Conda environment and manage multiple versions of Conda packages
on Linux and Mac OS X.
The Conda plugin can be endlessly configured, but a sensible set of defaults
that has proven a powerful stack for dependency management within the Galaxy tool
development ecosystem can be enabled by simply passing cwltool the
``--beta-conda-dependencies`` flag.
With this, we can use the seqtk example above without Docker or any externally managed services - cwltool should install everything it needs
and create an environment for the tool. Try it out with the following command::
cwltool --beta-conda-dependencies tests/seqtk_seq.cwl tests/seqtk_seq_job.json
The CWL specification allows URIs to be attached to ``SoftwareRequirement`` s
that allow disambiguation of package names. If the mapping files described above
allow deployers to adapt tools to their infrastructure, this mechanism allows
tools to adapt their requirements to multiple package managers. To demonstrate
this within the context of the seqtk, we can simply break the package name we
use and then specify a specific Conda package as follows:
.. code:: yaml
hints:
SoftwareRequirement:
packages:
- package: seqtk_seq
version:
- '1.2'
specs:
- https://anaconda.org/bioconda/seqtk
- https://packages.debian.org/sid/seqtk
The example can be executed using the command::
cwltool --beta-conda-dependencies tests/seqtk_seq_wrong_name.cwl tests/seqtk_seq_job.json
The plugin framework for managing the resolution of these software requirements
as maintained as part of `galaxy-tool-util `__ - a small,
portable subset of the Galaxy project. More information on configuration and implementation can be found
at the following links:
- `Dependency Resolvers in Galaxy `__
- `Conda for [Galaxy] Tool Dependencies `__
- `Mapping Files - Implementation `__
- `Specifications - Implementation `__
- `Initial cwltool Integration Pull Request `__
Use with GA4GH Tool Registry API
================================
Cwltool can launch tools directly from `GA4GH Tool Registry API`_ endpoints.
By default, cwltool searches https://dockstore.org/ . Use ``--add-tool-registry`` to add other registries to the search path.
For example ::
cwltool quay.io/collaboratory/dockstore-tool-bamstats:develop test.json
and (defaults to latest when a version is not specified) ::
cwltool quay.io/collaboratory/dockstore-tool-bamstats test.json
For this example, grab the test.json (and input file) from https://github.com/CancerCollaboratory/dockstore-tool-bamstats ::
wget https://dockstore.org/api/api/ga4gh/v2/tools/quay.io%2Fbriandoconnor%2Fdockstore-tool-bamstats/versions/develop/PLAIN-CWL/descriptor/test.json
wget https://github.com/CancerCollaboratory/dockstore-tool-bamstats/raw/develop/rna.SRR948778.bam
.. _`GA4GH Tool Registry API`: https://github.com/ga4gh/tool-registry-schemas
Running MPI-based tools that need to be launched
================================================
Cwltool supports an extension to the CWL spec
``http://commonwl.org/cwltool#MPIRequirement``. When the tool
definition has this in its ``requirements``/``hints`` section, and
cwltool has been run with ``--enable-ext``, then the tool's command
line will be extended with the commands needed to launch it with
``mpirun`` or similar. You can specify the number of processes to
start as either a literal integer or an expression (that will result
in an integer). For example::
#!/usr/bin/env cwl-runner
cwlVersion: v1.1
class: CommandLineTool
$namespaces:
cwltool: "http://commonwl.org/cwltool#"
requirements:
cwltool:MPIRequirement:
processes: $(inputs.nproc)
inputs:
nproc:
type: int
Interaction with containers: the MPIRequirement currently prepends its
commands to the front of the command line that is constructed. If you
wish to run a containerized application in parallel, for simple use
cases, this does work with Singularity, depending upon the platform
setup. However, this combination should be considered "alpha" -- please
do report any issues you have! This does not work with Docker at the
moment. (More precisely, you get `n` copies of the same single process
image run at the same time that cannot communicate with each other.)
The host-specific parameters are configured in a simple YAML file
(specified with the ``--mpi-config-file`` flag). The allowed keys are
given in the following table; all are optional.
+----------------+------------------+----------+------------------------------+
| Key | Type | Default | Description |
+================+==================+==========+==============================+
| runner | str | "mpirun" | The primary command to use. |
+----------------+------------------+----------+------------------------------+
| nproc_flag | str | "-n" | Flag to set number of |
| | | | processes to start. |
+----------------+------------------+----------+------------------------------+
| default_nproc | int | 1 | Default number of processes. |
+----------------+------------------+----------+------------------------------+
| extra_flags | List[str] | [] | A list of any other flags to |
| | | | be added to the runner's |
| | | | command line before |
| | | | the ``baseCommand``. |
+----------------+------------------+----------+------------------------------+
| env_pass | List[str] | [] | A list of environment |
| | | | variables that should be |
| | | | passed from the host |
| | | | environment through to the |
| | | | tool (e.g., giving the |
| | | | node list as set by your |
| | | | scheduler). |
+----------------+------------------+----------+------------------------------+
| env_pass_regex | List[str] | [] | A list of python regular |
| | | | expressions that will be |
| | | | matched against the host's |
| | | | environment. Those that match|
| | | | will be passed through. |
+----------------+------------------+----------+------------------------------+
| env_set | Mapping[str,str] | {} | A dictionary whose keys are |
| | | | the environment variables set|
| | | | and the values being the |
| | | | values. |
+----------------+------------------+----------+------------------------------+
Enabling Fast Parser (experimental)
===================================
For very large workflows, `cwltool` can spend a lot of time in
initialization, before the first step runs. There is an experimental
flag ``--fast-parser`` which can dramatically reduce the
initialization overhead, however as of this writing it has several limitations:
- Error reporting in general is worse than the standard parser, you will want to use it with workflows that you know are already correct.
- It does not check for dangling links (these will become runtime errors instead of loading errors)
- Several other cases fail, as documented in https://github.com/common-workflow-language/cwltool/pull/1720
***********
Development
***********
Running tests locally
=====================
- Running basic tests ``(/tests)``:
To run the basic tests after installing `cwltool` execute the following:
.. code:: bash
pip install -rtest-requirements.txt
pytest ## N.B. This requires node.js or docker to be available
To run various tests in all supported Python environments, we use `tox `_. To run the test suite in all supported Python environments
first clone the complete code repository (see the ``git clone`` instructions above) and then run
the following in the terminal:
``pip install "tox<4"; tox -p``
List of all environment can be seen using:
``tox --listenvs``
and running a specific test env using:
``tox -e ``
and additionally run a specific test using this format:
``tox -e py310-unit -- -v tests/test_examples.py::test_scandeps``
- Running the entire suite of CWL conformance tests:
The GitHub repository for the CWL specifications contains a script that tests a CWL
implementation against a wide array of valid CWL files using the `cwltest `_
program
Instructions for running these tests can be found in the Common Workflow Language Specification repository at https://github.com/common-workflow-language/common-workflow-language/blob/main/CONFORMANCE_TESTS.md .
Import as a module
==================
Add
.. code:: python
import cwltool
to your script.
The easiest way to use cwltool to run a tool or workflow from Python is to use a Factory
.. code:: python
import cwltool.factory
fac = cwltool.factory.Factory()
echo = fac.make("echo.cwl")
result = echo(inp="foo")
# result["out"] == "foo"
CWL Tool Control Flow
=====================
Technical outline of how cwltool works internally, for maintainers.
#. Use CWL ``load_tool()`` to load document.
#. Fetches the document from file or URL
#. Applies preprocessing (syntax/identifier expansion and normalization)
#. Validates the document based on cwlVersion
#. If necessary, updates the document to the latest spec
#. Constructs a Process object using ``make_tool()``` callback. This yields a
CommandLineTool, Workflow, or ExpressionTool. For workflows, this
recursively constructs each workflow step.
#. To construct custom types for CommandLineTool, Workflow, or
ExpressionTool, provide a custom ``make_tool()``
#. Iterate on the ``job()`` method of the Process object to get back runnable jobs.
#. ``job()`` is a generator method (uses the Python iterator protocol)
#. Each time the ``job()`` method is invoked in an iteration, it returns one
of: a runnable item (an object with a ``run()`` method), ``None`` (indicating
there is currently no work ready to run) or end of iteration (indicating
the process is complete.)
#. Invoke the runnable item by calling ``run()``. This runs the tool and gets output.
#. An output callback reports the output of a process.
#. ``job()`` may be iterated over multiple times. It will yield all the work
that is currently ready to run and then yield None.
#. ``Workflow`` objects create a corresponding ``WorkflowJob`` and ``WorkflowJobStep`` objects to hold the workflow state for the duration of the job invocation.
#. The WorkflowJob iterates over each WorkflowJobStep and determines if the
inputs the step are ready.
#. When a step is ready, it constructs an input object for that step and
iterates on the ``job()`` method of the workflow job step.
#. Each runnable item is yielded back up to top-level run loop
#. When a step job completes and receives an output callback, the
job outputs are assigned to the output of the workflow step.
#. When all steps are complete, the intermediate files are moved to a final
workflow output, intermediate directories are deleted, and the workflow's output callback is called.
#. ``CommandLineTool`` job() objects yield a single runnable object.
#. The CommandLineTool ``job()`` method calls ``make_job_runner()`` to create a
``CommandLineJob`` object
#. The job method configures the CommandLineJob object by setting public
attributes
#. The job method iterates over file and directories inputs to the
CommandLineTool and creates a "path map".
#. Files are mapped from their "resolved" location to a "target" path where
they will appear at tool invocation (for example, a location inside a
Docker container.) The target paths are used on the command line.
#. Files are staged to targets paths using either Docker volume binds (when
using containers) or symlinks (if not). This staging step enables files
to be logically rearranged or renamed independent of their source layout.
#. The ``run()`` method of CommandLineJob executes the command line tool or
Docker container, waits for it to complete, collects output, and makes
the output callback.
Extension points
================
The following functions can be passed to main() to override or augment
the listed behaviors.
executor
::
executor(tool, job_order_object, runtimeContext, logger)
(Process, Dict[Text, Any], RuntimeContext) -> Tuple[Dict[Text, Any], Text]
An implementation of the top-level workflow execution loop should
synchronously run a process object to completion and return the
output object.
versionfunc
::
()
() -> Text
Return version string.
logger_handler
::
logger_handler
logging.Handler
Handler object for logging.
The following functions can be set in LoadingContext to override or
augment the listed behaviors.
fetcher_constructor
::
fetcher_constructor(cache, session)
(Dict[unicode, unicode], requests.sessions.Session) -> Fetcher
Construct a Fetcher object with the supplied cache and HTTP session.
resolver
::
resolver(document_loader, document)
(Loader, Union[Text, dict[Text, Any]]) -> Text
Resolve a relative document identifier to an absolute one that can be fetched.
The following functions can be set in RuntimeContext to override or
augment the listed behaviors.
construct_tool_object
::
construct_tool_object(toolpath_object, loadingContext)
(MutableMapping[Text, Any], LoadingContext) -> Process
Hook to construct a Process object (eg CommandLineTool) object from a document.
select_resources
::
selectResources(request)
(Dict[str, int], RuntimeContext) -> Dict[Text, int]
Take a resource request and turn it into a concrete resource assignment.
make_fs_access
::
make_fs_access(basedir)
(Text) -> StdFsAccess
Return a file system access object.
In addition, when providing custom subclasses of Process objects, you can override the following methods:
CommandLineTool.make_job_runner
::
make_job_runner(RuntimeContext)
(RuntimeContext) -> Type[JobBase]
Create and return a job runner object (this implements concrete execution of a command line tool).
Workflow.make_workflow_step
::
make_workflow_step(toolpath_object, pos, loadingContext, parentworkflowProv)
(Dict[Text, Any], int, LoadingContext, Optional[ProvenanceProfile]) -> WorkflowStep
Create and return a workflow step object.
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/build-cwltool-docker.sh 0000755 0001750 0001750 00000002074 14621376047 020656 0 ustar 00michael michael #!/bin/bash
set -ex
engine=${ENGINE:-docker} # example: `ENGINE=podman ./build-cwltool-docker.sh`
${engine} build --file=cwltool.Dockerfile --tag=quay.io/commonwl/cwltool_module --target module .
${engine} build --file=cwltool.Dockerfile --tag=quay.io/commonwl/cwltool .
${engine} run -t -v /var/run/docker.sock:/var/run/docker.sock \
-v /tmp:/tmp \
-v "$PWD":/tmp/cwltool \
quay.io/commonwl/cwltool_module /bin/sh -c \
"apk add gcc bash git && pip install -r/tmp/cwltool/test-requirements.txt ; pytest -k 'not (test_bioconda or test_double_overwrite or test_env_filtering or test_biocontainers or test_disable_file_overwrite_without_ext or test_disable_file_creation_in_outdir_with_ext or test_write_write_conflict or test_directory_literal_with_real_inputs_inside or test_revsort_workflow or test_stdin_with_id_preset or test_no_compute_chcksum or test_packed_workflow_execution[tests/wf/count-lines1-wf.cwl-tests/wf/wc-job.json-False] or test_sequential_workflow or test_single_process_subwf_subwf_inline_step)' --ignore-glob '*test_udocker.py' -n 0 -v -rs --pyargs cwltool"
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/conformance-test.sh 0000755 0001750 0001750 00000014551 14621376047 020103 0 ustar 00michael michael #!/bin/bash
venv() {
if ! test -d "$1" ; then
if command -v virtualenv > /dev/null; then
virtualenv -p python3 "$1"
else
python3 -m venv "$1"
fi
fi
# shellcheck source=/dev/null
source "$1"/bin/activate
}
# Set these variables when running the script, e.g.:
# VERSION=v1.2 GIT_TARGET=main CONTAINER=podman ./conformance_test.sh
# Version of the standard to test against
# Current options: v1.0, v1.1, v1.2
VERSION=${VERSION:-"v1.2"}
# Which commit of the standard's repo to use
# Defaults to the last commit of the main branch
GIT_TARGET=${GIT_TARGET:-"main"}
# Which container runtime to use
# Valid options: docker, singularity
CONTAINER=${CONTAINER:-docker}
# Comma-separated list of test names that should be excluded from execution
# Defaults to "docker_entrypoint, inplace_update_on_file_content"
# EXCLUDE=${EXCLUDE:-"some_default_test_to_exclude"}
set -e
set -x
# Additional arguments for the pytest command
# Defaults to none
# PYTEST_EXTRA=
# The directory where this script resides
SCRIPT_DIRECTORY="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
TMP_DIR=${TMP_DIR:-${SCRIPT_DIRECTORY}}
# Download archive from GitHub
if [[ "${VERSION}" = "v1.0" ]] ; then
REPO=common-workflow-language
else
# shellcheck disable=SC2001
REPO=cwl-$(echo "$VERSION" | sed 's/\(v[0-9]*\.\)\([0-9]*\).*/\1\2/')
fi
if [ ! -d "${TMP_DIR}/${REPO}-${GIT_TARGET}" ] ; then
if [ ! -f "${TMP_DIR}/${GIT_TARGET}.tar.gz" ] ; then
wget --directory-prefix "${TMP_DIR}" "https://github.com/common-workflow-language/${REPO}/archive/${GIT_TARGET}.tar.gz"
fi
tar --directory "${TMP_DIR}" -x -f "${TMP_DIR}/${GIT_TARGET}.tar.gz"
fi
if [ -v SKIP_PULL ] ; then
echo Skipping node container pull.
elif [ "${CONTAINER}" == "docker" ]; then
docker pull docker.io/node:slim
elif [ "${CONTAINER}" == "podman" ]; then
podman pull docker.io/node:slim
elif [ "${CONTAINER}" == "singularity" ]; then
export CWL_SINGULARITY_CACHE="$SCRIPT_DIRECTORY/sifcache"
mkdir --parents "${CWL_SINGULARITY_CACHE}"
fi
# Setup environment
if [ -v SKIP_INSTALL ] ; then
echo 'Skip installing dependencies; cwltool & cwltest must already be installed'
else
venv "${TMP_DIR}/cwl-conformance-venv"
pip install -U setuptools wheel pip
pip uninstall -y cwltool
pip install -r"${SCRIPT_DIRECTORY}/mypy-requirements.txt"
CWLTOOL_USE_MYPYC=1 MYPYPATH="${SCRIPT_DIRECTORY}/mypy-stubs" pip install "${SCRIPT_DIRECTORY}" -r"${SCRIPT_DIRECTORY}/requirements.txt"
pip install 'cwltest>=2.5' pytest-cov pytest-xdist>=3.2.0 psutil
fi
# Set conformance test filename
if [[ "${VERSION}" = "v1.0" ]] ; then
CONFORMANCE_TEST="${TMP_DIR}/${REPO}-${GIT_TARGET}/${VERSION}/conformance_test_v1.0.yaml"
else
CONFORMANCE_TEST="${TMP_DIR}/${REPO}-${GIT_TARGET}/conformance_tests.yaml"
fi
cp "${CONFORMANCE_TEST}" "${CONFORMANCE_TEST%".yaml"}.cwltest.yaml"
CONFORMANCE_TEST="${CONFORMANCE_TEST%".yaml"}.cwltest.yaml"
CWLTOOL_OPTIONS+=" --parallel"
unset exclusions
declare -a exclusions
if [[ "$VERSION" = *dev* ]]
then
CWLTOOL_OPTIONS+=" --enable-dev"
fi
if [[ "$CONTAINER" = "singularity" ]]; then
CWLTOOL_OPTIONS+=" --singularity"
# This test fails because Singularity and Docker have
# different views on how to deal with this.
exclusions+=(docker_entrypoint)
if [[ "${VERSION}" = "v1.1" ]]; then
# This fails because of a difference (in Singularity vs Docker) in
# the way filehandles are passed to processes in the container and
# wc can tell somehow.
# See issue #1440
exclusions+=(stdin_shorcut)
fi
elif [[ "$CONTAINER" = "podman" ]]; then
CWLTOOL_OPTIONS+=" --podman"
fi
if [[ -n "${EXCLUDE}" ]] ; then
EXCLUDE="${EXCLUDE},"
fi
if (( "${#exclusions[*]}" > 0 )); then
EXCLUDE=${EXCLUDE}$(IFS=,; echo "${exclusions[*]}")
fi
# Build command
TEST_COMMAND="python -m pytest ${CONFORMANCE_TEST} -n logical --dist worksteal -rs --junit-xml=${TMP_DIR}/cwltool_conf_${VERSION}_${GIT_TARGET}_${CONTAINER}.xml -o junit_suite_name=cwltool_$(echo "${CWLTOOL_OPTIONS}" | tr "[:blank:]-" _)"
if [[ -n "${EXCLUDE}" ]] ; then
TEST_COMMAND="${TEST_COMMAND} --cwl-exclude ${EXCLUDE}"
fi
if [ -v SKIP_COV ] ; then
echo Skipping gathering of coverage information
else
TEST_COMMAND="${TEST_COMMAND} --cov --cov-config ${SCRIPT_DIRECTORY}/.coveragerc --cov-report= ${PYTEST_EXTRA}"
# Clean up all old coverage data
find "${SCRIPT_DIRECTORY}" \( -type f -name .coverage -or -name '.coverage.*' -or -name coverage.xml \) -delete
fi
if [ "$GIT_BRANCH" = "origin/main" ] && [[ "$VERSION" = "v1.0" ]] && [[ "$CONTAINER" = "docker" ]]
then
rm -Rf conformance
# shellcheck disable=SC2154
git clone http://"${jenkins_cwl_conformance}"@github.com/common-workflow-language/conformance.git
git -C conformance config user.email "cwl-bot@users.noreply.github.com"
git -C conformance config user.name "CWL Jenkins build bot"
tool_ver=$(cwltool --version | awk '{ print $2 }')
badgedir=${PWD}/conformance/cwltool/cwl_${VERSION}/cwltool_${tool_ver}
mkdir -p "${PWD}"/conformance/cwltool/cwl_"${VERSION}"/
rm -fr "${badgedir}"
TEST_COMMAND="${TEST_COMMAND} --cwl-badgedir=${badgedir}"
CONFORMANCE_MSG=$(cat << EOM
Conformance test of cwltool ${tool_ver} for CWL ${VERSION}
Commit: ${GIT_COMMIT}
Python version: 3
Container: ${CONTAINER}
Extra options: ${CWLTOOL_OPTIONS}
EOM
)
fi
export CWLTOOL_OPTIONS
echo CWLTOOL_OPTIONS="${CWLTOOL_OPTIONS}"
# Run test
cp "${SCRIPT_DIRECTORY}/tests/cwl-conformance/cwltool-conftest.py" "$(dirname "${CONFORMANCE_TEST}")/conftest.py"
bash -c "cd ${TMP_DIR} && ${TEST_COMMAND}"
RETURN_CODE=$?
# Coverage report
if [ ! -v SKIP_COV ] && [ "${RETURN_CODE}" -eq "0" ] ; then
coverage report
coverage xml
fi
if [ -d conformance ]
then
rm -rf conformance/cwltool/cwl_"${VERSION}"/cwltool_latest
cp -r conformance/cwltool/cwl_"${VERSION}"/cwltool_"${tool_ver}" conformance/cwltool/cwl_"${VERSION}"/cwltool_latest
git -C conformance add --all
git -C conformance diff-index --quiet HEAD || git -C conformance commit -m "${CONFORMANCE_MSG}"
git -C conformance push http://"${jenkins_cwl_conformance}":x-oauth-basic@github.com/common-workflow-language/conformance.git
fi
# Cleanup
if [ -z "$SKIP_INSTALL" ] ; then
echo Skipping venv cleanup
else
deactivate
fi
#rm -rf "${GIT_TARGET}.tar.gz" "${SCRIPT_DIRECTORY}/${REPO}-${GIT_TARGET}" "${SCRIPT_DIRECTORY}/cwl-conformance-venv"
# Exit
exit ${RETURN_CODE}
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwl-docker.sh 0000755 0001750 0001750 00000000430 14621376047 016655 0 ustar 00michael michael #!/bin/sh
# Changing the line below? Update https://cloud.docker.com/u/commonworkflowlanguage/repository/docker/commonworkflowlanguage/cwltool
exec docker run -v /var/run/docker.sock:/var/run/docker.sock -v /tmp:/tmp -v "$PWD":"$PWD" -w="$PWD" commonworkflowlanguage/cwltool "$@"
././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 010212 x ustar 00 28 mtime=1715863245.9610486
cwltool-3.1.20240508115724/cwlref-runner/ 0000755 0001750 0001750 00000000000 14621377316 017060 5 ustar 00michael michael ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwlref-runner/README 0000644 0001750 0001750 00000000337 14621376047 017743 0 ustar 00michael michael This an optional companion package to "cwltool" which provides provides an
additional entry point under the alias "cwl-runner", which is the
implementation-agnostic name for the default CWL interpreter installed on a
host.
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwlref-runner/setup.py 0000644 0001750 0001750 00000001403 14621376047 020570 0 ustar 00michael michael #!/usr/bin/env python
import os
from setuptools import setup, find_packages
SETUP_DIR = os.path.dirname(__file__)
README = os.path.join(SETUP_DIR, 'README')
setup(name='cwlref-runner',
version='1.0',
description='Common workflow language reference implementation',
long_description=open(README).read(),
author='Common workflow language working group',
author_email='common-workflow-language@googlegroups.com',
url="http://www.commonwl.org",
download_url="https://github.com/common-workflow-language/common-workflow-language",
license='Apache 2.0',
install_requires=[
'cwltool'
],
entry_points={
'console_scripts': [ "cwl-runner=cwltool.main:main" ]
},
zip_safe=True
)
././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 010212 x ustar 00 28 mtime=1715863245.9650488
cwltool-3.1.20240508115724/cwltool/ 0000755 0001750 0001750 00000000000 14621377316 015752 5 ustar 00michael michael ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/__init__.py 0000644 0001750 0001750 00000000430 14621376047 020060 0 ustar 00michael michael """Reference implementation of the CWL standards."""
__author__ = "pamstutz@veritasgenetics.com"
CWL_CONTENT_TYPES = [
"text/plain",
"application/json",
"text/vnd.yaml",
"text/yaml",
"text/x-yaml",
"application/x-yaml",
"application/octet-stream",
]
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/__main__.py 0000644 0001750 0001750 00000000121 14621376047 020036 0 ustar 00michael michael """Default entrypoint for the cwltool module."""
from . import main
main.run()
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715863245.0
cwltool-3.1.20240508115724/cwltool/_version.py 0000644 0001750 0001750 00000000665 14621377315 020156 0 ustar 00michael michael # file generated by setuptools_scm
# don't change, don't track in version control
TYPE_CHECKING = False
if TYPE_CHECKING:
from typing import Tuple, Union
VERSION_TUPLE = Tuple[Union[int, str], ...]
else:
VERSION_TUPLE = object
version: str
__version__: str
__version_tuple__: VERSION_TUPLE
version_tuple: VERSION_TUPLE
__version__ = version = '3.1.20240508115724'
__version_tuple__ = version_tuple = (3, 1, 20240508115724)
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/argparser.py 0000644 0001750 0001750 00000074124 14621376047 020322 0 ustar 00michael michael """Command line argument parsing for cwltool."""
import argparse
import os
import urllib
from typing import (
Any,
Callable,
Dict,
List,
MutableMapping,
MutableSequence,
Optional,
Sequence,
Type,
Union,
cast,
)
from .loghandler import _logger
from .process import Process, shortname
from .resolver import ga4gh_tool_registries
from .software_requirements import SOFTWARE_REQUIREMENTS_ENABLED
from .utils import DEFAULT_TMP_PREFIX
def arg_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser(
description="Reference executor for Common Workflow Language standards. "
"Not for production use."
)
parser.add_argument("--basedir", type=str)
parser.add_argument(
"--outdir",
type=str,
default=os.path.abspath("."),
help="Output directory. The default is the current directory.",
)
parser.add_argument(
"--log-dir",
type=str,
default="",
help="Log your tools stdout/stderr to this location outside of container "
"This will only log stdout/stderr if you specify stdout/stderr in their "
"respective fields or capture it as an output",
)
parser.add_argument(
"--parallel",
action="store_true",
default=False,
help="Run jobs in parallel. ",
)
envgroup = parser.add_mutually_exclusive_group()
envgroup.add_argument(
"--preserve-environment",
type=str,
action="append",
help="Preserve specific environment variable when running "
"CommandLineTools. May be provided multiple times. By default PATH is "
"preserved when not running in a container.",
metavar="ENVVAR",
default=[],
dest="preserve_environment",
)
envgroup.add_argument(
"--preserve-entire-environment",
action="store_true",
help="Preserve all environment variables when running CommandLineTools "
"without a software container.",
default=False,
dest="preserve_entire_environment",
)
containergroup = parser.add_mutually_exclusive_group()
containergroup.add_argument(
"--rm-container",
action="store_true",
default=True,
help="Delete Docker container used by jobs after they exit (default)",
dest="rm_container",
)
containergroup.add_argument(
"--leave-container",
action="store_false",
default=True,
help="Do not delete Docker container used by jobs after they exit",
dest="rm_container",
)
cidgroup = parser.add_argument_group(
"Options for recording the Docker container identifier into a file."
)
cidgroup.add_argument(
# Disabled as containerid is now saved by default
"--record-container-id",
action="store_true",
default=False,
help=argparse.SUPPRESS,
dest="record_container_id",
)
cidgroup.add_argument(
"--cidfile-dir",
type=str,
help="Store the Docker container ID into a file in the specified directory.",
default=None,
dest="cidfile_dir",
)
cidgroup.add_argument(
"--cidfile-prefix",
type=str,
help="Specify a prefix to the container ID filename. "
"Final file name will be followed by a timestamp. "
"The default is no prefix.",
default=None,
dest="cidfile_prefix",
)
parser.add_argument(
"--tmpdir-prefix",
type=str,
help="Path prefix for temporary directories. If --tmpdir-prefix is not "
"provided, then the prefix for temporary directories is influenced by "
"the value of the TMPDIR, TEMP, or TMP environment variables. Taking "
"those into consideration, the current default is {}.".format(DEFAULT_TMP_PREFIX),
default=DEFAULT_TMP_PREFIX,
)
intgroup = parser.add_mutually_exclusive_group()
intgroup.add_argument(
"--tmp-outdir-prefix",
type=str,
help="Path prefix for intermediate output directories. Defaults to the "
"value of --tmpdir-prefix.",
default="",
)
intgroup.add_argument(
"--cachedir",
type=str,
default="",
help="Directory to cache intermediate workflow outputs to avoid "
"recomputing steps. Can be very helpful in the development and "
"troubleshooting of CWL documents.",
)
tmpgroup = parser.add_mutually_exclusive_group()
tmpgroup.add_argument(
"--rm-tmpdir",
action="store_true",
default=True,
help="Delete intermediate temporary directories (default)",
dest="rm_tmpdir",
)
tmpgroup.add_argument(
"--leave-tmpdir",
action="store_false",
default=True,
help="Do not delete intermediate temporary directories",
dest="rm_tmpdir",
)
outgroup = parser.add_mutually_exclusive_group()
outgroup.add_argument(
"--move-outputs",
action="store_const",
const="move",
default="move",
help="Move output files to the workflow output directory and delete "
"intermediate output directories (default).",
dest="move_outputs",
)
outgroup.add_argument(
"--leave-outputs",
action="store_const",
const="leave",
default="move",
help="Leave output files in intermediate output directories.",
dest="move_outputs",
)
outgroup.add_argument(
"--copy-outputs",
action="store_const",
const="copy",
default="move",
help="Copy output files to the workflow output directory and don't "
"delete intermediate output directories.",
dest="move_outputs",
)
pullgroup = parser.add_mutually_exclusive_group()
pullgroup.add_argument(
"--enable-pull",
default=True,
action="store_true",
help="Try to pull Docker images",
dest="pull_image",
)
pullgroup.add_argument(
"--disable-pull",
default=True,
action="store_false",
help="Do not try to pull Docker images",
dest="pull_image",
)
parser.add_argument(
"--rdf-serializer",
help="Output RDF serialization format used by --print-rdf (one of "
"turtle (default), n3, nt, xml)",
default="turtle",
)
parser.add_argument(
"--eval-timeout",
help="Time to wait for a Javascript expression to evaluate before giving "
"an error, default 60s.",
type=float,
default=60,
)
provgroup = parser.add_argument_group(
"Options for recording provenance information of the execution"
)
provgroup.add_argument(
"--provenance",
help="Save provenance to specified folder as a "
"Research Object that captures and aggregates "
"workflow execution and data products.",
type=str,
)
provgroup.add_argument(
"--enable-user-provenance",
default=False,
action="store_true",
help="Record user account info as part of provenance.",
dest="user_provenance",
)
provgroup.add_argument(
"--disable-user-provenance",
default=False,
action="store_false",
help="Do not record user account info in provenance.",
dest="user_provenance",
)
provgroup.add_argument(
"--enable-host-provenance",
default=False,
action="store_true",
help="Record host info as part of provenance.",
dest="host_provenance",
)
provgroup.add_argument(
"--disable-host-provenance",
default=False,
action="store_false",
help="Do not record host info in provenance.",
dest="host_provenance",
)
provgroup.add_argument(
"--orcid",
help="Record user ORCID identifier as part of "
"provenance, e.g. https://orcid.org/0000-0002-1825-0097 "
"or 0000-0002-1825-0097. Alternatively the environment variable "
"ORCID may be set.",
dest="orcid",
default=os.environ.get("ORCID", ""),
type=str,
)
provgroup.add_argument(
"--full-name",
help="Record full name of user as part of provenance, "
"e.g. Josiah Carberry. You may need to use shell quotes to preserve "
"spaces. Alternatively the environment variable CWL_FULL_NAME may "
"be set.",
dest="cwl_full_name",
default=os.environ.get("CWL_FULL_NAME", ""),
type=str,
)
printgroup = parser.add_mutually_exclusive_group()
printgroup.add_argument(
"--print-rdf",
action="store_true",
help="Print corresponding RDF graph for workflow and exit",
)
printgroup.add_argument(
"--print-dot",
action="store_true",
help="Print workflow visualization in graphviz format and exit",
)
printgroup.add_argument(
"--print-pre",
action="store_true",
help="Print CWL document after preprocessing.",
)
printgroup.add_argument(
"--print-deps", action="store_true", help="Print CWL document dependencies."
)
printgroup.add_argument(
"--print-input-deps",
action="store_true",
help="Print input object document dependencies.",
)
printgroup.add_argument(
"--pack",
action="store_true",
help="Combine components into single document and print.",
)
printgroup.add_argument("--version", action="store_true", help="Print version and exit")
printgroup.add_argument("--validate", action="store_true", help="Validate CWL document only.")
printgroup.add_argument(
"--print-supported-versions",
action="store_true",
help="Print supported CWL specs.",
)
printgroup.add_argument(
"--print-subgraph",
action="store_true",
help="Print workflow subgraph that will execute. Can combined with "
"--target or --single-step",
)
printgroup.add_argument(
"--print-targets", action="store_true", help="Print targets (output parameters)"
)
printgroup.add_argument(
"--make-template", action="store_true", help="Generate a template input object"
)
strictgroup = parser.add_mutually_exclusive_group()
strictgroup.add_argument(
"--strict",
action="store_true",
help="Strict validation (unrecognized or out of place fields are error)",
default=True,
dest="strict",
)
strictgroup.add_argument(
"--non-strict",
action="store_false",
help="Lenient validation (ignore unrecognized fields)",
default=True,
dest="strict",
)
parser.add_argument(
"--skip-schemas",
action="store_true",
help="Skip loading of schemas",
default=False,
dest="skip_schemas",
)
doccachegroup = parser.add_mutually_exclusive_group()
doccachegroup.add_argument(
"--no-doc-cache",
action="store_false",
help="Disable disk cache for documents loaded over HTTP",
default=True,
dest="doc_cache",
)
doccachegroup.add_argument(
"--doc-cache",
action="store_true",
help="Enable disk cache for documents loaded over HTTP",
default=True,
dest="doc_cache",
)
volumegroup = parser.add_mutually_exclusive_group()
volumegroup.add_argument("--verbose", action="store_true", help="Default logging")
volumegroup.add_argument("--no-warnings", action="store_true", help="Only print errors.")
volumegroup.add_argument("--quiet", action="store_true", help="Only print warnings and errors.")
volumegroup.add_argument("--debug", action="store_true", help="Print even more logging")
parser.add_argument(
"--write-summary",
"-w",
type=str,
help="Path to write the final output JSON object to. Default is stdout.",
default="",
dest="write_summary",
)
parser.add_argument(
"--strict-memory-limit",
action="store_true",
help="When running with "
"software containers and the Docker engine, pass either the "
"calculated memory allocation from ResourceRequirements or the "
"default of 1 gigabyte to Docker's --memory option.",
)
parser.add_argument(
"--strict-cpu-limit",
action="store_true",
help="When running with "
"software containers and the Docker engine, pass either the "
"calculated cpu allocation from ResourceRequirements or the "
"default of 1 core to Docker's --cpu option. "
"Requires docker version >= v1.13.",
)
parser.add_argument(
"--timestamps",
action="store_true",
help="Add timestamps to the errors, warnings, and notifications.",
)
parser.add_argument(
"--js-console", action="store_true", help="Enable javascript console output"
)
parser.add_argument(
"--disable-js-validation",
action="store_true",
help="Disable javascript validation.",
)
parser.add_argument(
"--js-hint-options-file",
type=str,
help="File of options to pass to jshint. "
'This includes the added option "includewarnings". ',
)
dockergroup = parser.add_mutually_exclusive_group()
dockergroup.add_argument(
"--user-space-docker-cmd",
metavar="CMD",
help="(Linux/OS X only) Specify the path to udocker. Implies --udocker",
)
dockergroup.add_argument(
"--udocker",
help="(Linux/OS X only) Use the udocker runtime for running containers "
"(equivalent to --user-space-docker-cmd=udocker).",
action="store_const",
const="udocker",
dest="user_space_docker_cmd",
)
dockergroup.add_argument(
"--singularity",
action="store_true",
default=False,
help="Use "
"Singularity or Apptainer runtime for running containers. "
"Requires Singularity v2.6.1+ and Linux with kernel "
"version v3.18+ or with overlayfs support "
"backported.",
)
dockergroup.add_argument(
"--podman",
action="store_true",
default=False,
help="Use Podman runtime for running containers. ",
)
dockergroup.add_argument(
"--no-container",
action="store_false",
default=True,
help="Do not execute jobs in a "
"Docker container, even when `DockerRequirement` "
"is specified under `hints`.",
dest="use_container",
)
dependency_resolvers_configuration_help = argparse.SUPPRESS
dependencies_directory_help = argparse.SUPPRESS
use_biocontainers_help = argparse.SUPPRESS
conda_dependencies = argparse.SUPPRESS
if SOFTWARE_REQUIREMENTS_ENABLED:
dependency_resolvers_configuration_help = (
"Dependency resolver "
"configuration file describing how to adapt 'SoftwareRequirement' "
"packages to current system."
)
dependencies_directory_help = (
"Default root directory used by dependency resolvers configuration."
)
use_biocontainers_help = (
"Use biocontainers for tools without an " "explicitly annotated Docker container."
)
conda_dependencies = "Short cut to use Conda to resolve 'SoftwareRequirement' packages."
parser.add_argument(
"--beta-dependency-resolvers-configuration",
default=None,
help=dependency_resolvers_configuration_help,
)
parser.add_argument(
"--beta-dependencies-directory", default=None, help=dependencies_directory_help
)
parser.add_argument(
"--beta-use-biocontainers",
default=None,
help=use_biocontainers_help,
action="store_true",
)
parser.add_argument(
"--beta-conda-dependencies",
default=None,
help=conda_dependencies,
action="store_true",
)
parser.add_argument("--tool-help", action="store_true", help="Print command line help for tool")
parser.add_argument(
"--relative-deps",
choices=["primary", "cwd"],
default="primary",
help="When using --print-deps, print paths "
"relative to primary file or current working directory.",
)
parser.add_argument(
"--enable-dev",
action="store_true",
help="Enable loading and running unofficial development versions of " "the CWL standards.",
default=False,
)
parser.add_argument(
"--enable-ext",
action="store_true",
help="Enable loading and running 'cwltool:' extensions to the CWL standards.",
default=False,
)
colorgroup = parser.add_mutually_exclusive_group()
colorgroup.add_argument(
"--enable-color",
action="store_true",
help="Enable logging color (default enabled)",
default=True,
)
colorgroup.add_argument(
"--disable-color",
action="store_false",
dest="enable_color",
help="Disable colored logging (default false)",
)
parser.add_argument(
"--default-container",
help="Specify a default software container to use for any "
"CommandLineTool without a DockerRequirement.",
)
parser.add_argument(
"--no-match-user",
action="store_true",
help="Disable passing the current uid to `docker run --user`",
)
parser.add_argument(
"--custom-net",
type=str,
help="Passed to `docker run` as the '--net' parameter when "
"NetworkAccess is true, which is its default setting.",
)
parser.add_argument(
"--disable-validate",
dest="do_validate",
action="store_false",
default=True,
help=argparse.SUPPRESS,
)
parser.add_argument(
"--fast-parser",
dest="fast_parser",
action="store_true",
default=False,
help=argparse.SUPPRESS,
)
reggroup = parser.add_mutually_exclusive_group()
reggroup.add_argument(
"--enable-ga4gh-tool-registry",
action="store_true",
help="Enable tool resolution using GA4GH tool registry API",
dest="enable_ga4gh_tool_registry",
default=True,
)
reggroup.add_argument(
"--disable-ga4gh-tool-registry",
action="store_false",
help="Disable tool resolution using GA4GH tool registry API",
dest="enable_ga4gh_tool_registry",
default=True,
)
parser.add_argument(
"--add-ga4gh-tool-registry",
action="append",
help="Add a GA4GH tool registry endpoint to use for resolution, default %s"
% ga4gh_tool_registries,
dest="ga4gh_tool_registries",
default=[],
)
parser.add_argument(
"--on-error",
help="Desired workflow behavior when a step fails. One of 'stop' (do "
"not submit any more steps) or 'continue' (may submit other steps that "
"are not downstream from the error). Default is 'stop'.",
default="stop",
choices=("stop", "continue"),
)
checkgroup = parser.add_mutually_exclusive_group()
checkgroup.add_argument(
"--compute-checksum",
action="store_true",
default=True,
help="Compute checksum of contents while collecting outputs",
dest="compute_checksum",
)
checkgroup.add_argument(
"--no-compute-checksum",
action="store_false",
help="Do not compute checksum of contents while collecting outputs",
dest="compute_checksum",
)
parser.add_argument(
"--relax-path-checks",
action="store_true",
default=False,
help="Relax requirements on path names to permit " "spaces and hash characters.",
dest="relax_path_checks",
)
parser.add_argument(
"--force-docker-pull",
action="store_true",
default=False,
help="Pull latest software container image even if it is locally present",
dest="force_docker_pull",
)
parser.add_argument(
"--no-read-only",
action="store_true",
default=False,
help="Do not set root directory in the container as read-only",
dest="no_read_only",
)
parser.add_argument(
"--overrides",
type=str,
default=None,
help="Read process requirement overrides from file.",
)
subgroup = parser.add_mutually_exclusive_group()
subgroup.add_argument(
"--target",
"-t",
action="append",
help="Only execute steps that contribute to listed targets (can be "
"provided more than once).",
)
subgroup.add_argument(
"--single-step",
type=str,
default=None,
help="Only executes a single step in a workflow. The input object must "
"match that step's inputs. Can be combined with --print-subgraph.",
)
subgroup.add_argument(
"--single-process",
type=str,
default=None,
help="Only executes the underlying Process (CommandLineTool, "
"ExpressionTool, or sub-Workflow) for the given step in a workflow. "
"This will not include any step-level processing: 'scatter', 'when'; "
"and there will be no processing of step-level 'default', or 'valueFrom' "
"input modifiers. However, requirements/hints from the step or parent "
"workflow(s) will be inherited as usual."
"The input object must match that Process's inputs.",
)
parser.add_argument(
"--mpi-config-file",
type=str,
default=None,
help="Platform specific configuration for MPI (parallel launcher, its "
"flag etc). See README section 'Running MPI-based tools' for details "
"of the format.",
)
parser.add_argument(
"workflow",
type=str,
nargs="?",
default=None,
metavar="cwl_document",
help="path or URL to a CWL Workflow, "
"CommandLineTool, or ExpressionTool. If the `inputs_object` has a "
"`cwl:tool` field indicating the path or URL to the cwl_document, "
" then the `cwl_document` argument is optional.",
)
parser.add_argument(
"job_order",
nargs=argparse.REMAINDER,
metavar="inputs_object",
help="path or URL to a YAML or JSON "
"formatted description of the required input values for the given "
"`cwl_document`.",
)
return parser
def get_default_args() -> Dict[str, Any]:
"""Get default values of cwltool's command line options."""
ap = arg_parser()
args = ap.parse_args([])
return vars(args)
class FSAction(argparse.Action):
"""Base action for our custom actions."""
objclass: Optional[str] = None
def __init__(
self,
option_strings: List[str],
dest: str,
nargs: Any = None,
urljoin: Callable[[str, str], str] = urllib.parse.urljoin,
base_uri: str = "",
**kwargs: Any,
) -> None:
"""Fail if nargs is used."""
if nargs is not None:
raise ValueError("nargs not allowed")
self.urljoin = urljoin
self.base_uri = base_uri
super().__init__(option_strings, dest, **kwargs)
def __call__(
self,
parser: argparse.ArgumentParser,
namespace: argparse.Namespace,
values: Union[str, Sequence[Any], None],
option_string: Optional[str] = None,
) -> None:
setattr(
namespace,
self.dest,
{
"class": self.objclass,
"location": self.urljoin(self.base_uri, cast(str, values)),
},
)
class FSAppendAction(argparse.Action):
"""Appending version of the base action for our custom actions."""
objclass: Optional[str] = None
def __init__(
self,
option_strings: List[str],
dest: str,
nargs: Any = None,
urljoin: Callable[[str, str], str] = urllib.parse.urljoin,
base_uri: str = "",
**kwargs: Any,
) -> None:
"""Initialize."""
if nargs is not None:
raise ValueError("nargs not allowed")
self.urljoin = urljoin
self.base_uri = base_uri
super().__init__(option_strings, dest, **kwargs)
def __call__(
self,
parser: argparse.ArgumentParser,
namespace: argparse.Namespace,
values: Union[str, Sequence[Any], None],
option_string: Optional[str] = None,
) -> None:
g = getattr(namespace, self.dest)
if not g:
g = []
setattr(namespace, self.dest, g)
g.append(
{
"class": self.objclass,
"location": self.urljoin(self.base_uri, cast(str, values)),
}
)
class FileAction(FSAction):
objclass: Optional[str] = "File"
class DirectoryAction(FSAction):
objclass: Optional[str] = "Directory"
class FileAppendAction(FSAppendAction):
objclass: Optional[str] = "File"
class DirectoryAppendAction(FSAppendAction):
objclass: Optional[str] = "Directory"
class AppendAction(argparse.Action):
"""An argparse action that clears the default values if any value is provided."""
_called: bool
"""Initially set to ``False``, changed if any value is appended."""
def __init__(
self,
option_strings: List[str],
dest: str,
nargs: Any = None,
**kwargs: Any,
) -> None:
"""Initialize."""
super().__init__(option_strings, dest, **kwargs)
self._called = False
def __call__(
self,
parser: argparse.ArgumentParser,
namespace: argparse.Namespace,
values: Union[str, Sequence[Any], None],
option_string: Optional[str] = None,
) -> None:
g = getattr(namespace, self.dest, None)
if g is None:
g = []
if self.default is not None and not self._called:
# If any value was specified, we then clear the list of options before appending.
# We cannot always clear the ``default`` attribute since it collects the ``values`` appended.
self.default.clear()
self._called = True
g.append(values)
setattr(namespace, self.dest, g)
def add_argument(
toolparser: argparse.ArgumentParser,
name: str,
inptype: Any,
records: List[str],
description: str = "",
default: Any = None,
input_required: bool = True,
urljoin: Callable[[str, str], str] = urllib.parse.urljoin,
base_uri: str = "",
) -> None:
if len(name) == 1:
flag = "-"
else:
flag = "--"
# if input_required is false, don't make the command line
# parameter required.
required = default is None and input_required
if isinstance(inptype, MutableSequence):
if len(inptype) == 1:
inptype = inptype[0]
elif len(inptype) == 2 and inptype[0] == "null":
required = False
inptype = inptype[1]
elif len(inptype) == 2 and inptype[1] == "null":
required = False
inptype = inptype[0]
else:
_logger.debug("Can't make command line argument from %s", inptype)
return None
ahelp = description.replace("%", "%%")
action: Optional[Union[Type[argparse.Action], str]] = None
atype: Optional[Any] = None
typekw: Dict[str, Any] = {}
if inptype == "File":
action = FileAction
elif inptype == "Directory":
action = DirectoryAction
elif isinstance(inptype, MutableMapping) and inptype["type"] == "array":
if inptype["items"] == "File":
action = FileAppendAction
elif inptype["items"] == "Directory":
action = DirectoryAppendAction
else:
action = AppendAction
elif isinstance(inptype, MutableMapping) and inptype["type"] == "enum":
atype = str
elif isinstance(inptype, MutableMapping) and inptype["type"] == "record":
records.append(name)
for field in inptype["fields"]:
fieldname = name + "." + shortname(field["name"])
fieldtype = field["type"]
fielddescription = field.get("doc", "")
add_argument(
toolparser,
fieldname,
fieldtype,
records,
fielddescription,
default=default.get(shortname(field["name"]), None) if default else None,
input_required=required,
)
return
elif inptype == "string":
atype = str
elif inptype == "int":
atype = int
elif inptype == "long":
atype = int
elif inptype == "double":
atype = float
elif inptype == "float":
atype = float
elif inptype == "boolean":
action = "store_true"
else:
_logger.debug("Can't make command line argument from %s", inptype)
return None
if action in (FileAction, DirectoryAction, FileAppendAction, DirectoryAppendAction):
typekw["urljoin"] = urljoin
typekw["base_uri"] = base_uri
if inptype != "boolean":
typekw["type"] = atype
toolparser.add_argument(
flag + name,
required=required,
help=ahelp,
action=action, # type: ignore
default=default,
**typekw,
)
def generate_parser(
toolparser: argparse.ArgumentParser,
tool: Process,
namemap: Dict[str, str],
records: List[str],
input_required: bool = True,
urljoin: Callable[[str, str], str] = urllib.parse.urljoin,
base_uri: str = "",
) -> argparse.ArgumentParser:
"""Generate an ArgumentParser for the given CWL Process."""
toolparser.description = tool.tool.get("doc", tool.tool.get("label", None))
toolparser.add_argument("job_order", nargs="?", help="Job input json file")
namemap["job_order"] = "job_order"
for inp in tool.tool["inputs"]:
name = shortname(inp["id"])
namemap[name.replace("-", "_")] = name
inptype = inp["type"]
description = inp.get("doc", inp.get("label", ""))
default = inp.get("default", None)
add_argument(
toolparser,
name,
inptype,
records,
description,
default,
input_required,
urljoin,
base_uri,
)
return toolparser
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/builder.py 0000644 0001750 0001750 00000071656 14621376047 017771 0 ustar 00michael michael """Command line builder."""
import copy
import logging
import math
from decimal import Decimal
from typing import (
IO,
TYPE_CHECKING,
Any,
Callable,
Dict,
List,
MutableMapping,
MutableSequence,
Optional,
Type,
Union,
cast,
)
from cwl_utils import expression
from cwl_utils.file_formats import check_format
from mypy_extensions import mypyc_attr
from rdflib import Graph
from ruamel.yaml.comments import CommentedMap
from ruamel.yaml.representer import RoundTripRepresenter
from ruamel.yaml.scalarfloat import ScalarFloat
from schema_salad.avro.schema import Names, Schema, make_avsc_object
from schema_salad.exceptions import ValidationException
from schema_salad.sourceline import SourceLine
from schema_salad.utils import convert_to_dict, json_dumps
from schema_salad.validate import validate
from .errors import WorkflowException
from .loghandler import _logger
from .mutation import MutationManager
from .software_requirements import DependenciesConfiguration
from .stdfsaccess import StdFsAccess
from .utils import (
CONTENT_LIMIT,
CWLObjectType,
CWLOutputType,
HasReqsHints,
LoadListingType,
aslist,
get_listing,
normalizeFilesDirs,
visit_class,
)
if TYPE_CHECKING:
from .cwlprov.provenance_profile import (
ProvenanceProfile, # pylint: disable=unused-import
)
from .pathmapper import PathMapper
INPUT_OBJ_VOCAB: Dict[str, str] = {
"Any": "https://w3id.org/cwl/salad#Any",
"File": "https://w3id.org/cwl/cwl#File",
"Directory": "https://w3id.org/cwl/cwl#Directory",
}
def content_limit_respected_read_bytes(f: IO[bytes]) -> bytes:
"""
Read a file as bytes, respecting the :py:data:`~cwltool.utils.CONTENT_LIMIT`.
:param f: file handle
:returns: the file contents
:raises WorkflowException: if the file is too large
"""
contents = f.read(CONTENT_LIMIT + 1)
if len(contents) > CONTENT_LIMIT:
raise WorkflowException(
"file is too large, loadContents limited to %d bytes" % CONTENT_LIMIT
)
return contents
def content_limit_respected_read(f: IO[bytes]) -> str:
"""
Read a file as a string, respecting the :py:data:`~cwltool.utils.CONTENT_LIMIT`.
:param f: file handle
:returns: the file contents
:raises WorkflowException: if the file is too large
"""
return str(content_limit_respected_read_bytes(f), "utf-8")
def substitute(value: str, replace: str) -> str:
"""Perform CWL SecondaryFilesDSL style substitution."""
if replace.startswith("^"):
try:
return substitute(value[0 : value.rindex(".")], replace[1:])
except ValueError:
# No extension to remove
return value + replace.lstrip("^")
return value + replace
@mypyc_attr(allow_interpreted_subclasses=True)
class Builder(HasReqsHints):
"""Helper class to construct a command line from a CWL CommandLineTool."""
def __init__(
self,
job: CWLObjectType,
files: List[CWLObjectType],
bindings: List[CWLObjectType],
schemaDefs: MutableMapping[str, CWLObjectType],
names: Names,
requirements: List[CWLObjectType],
hints: List[CWLObjectType],
resources: Dict[str, Union[int, float]],
mutation_manager: Optional[MutationManager],
formatgraph: Optional[Graph],
make_fs_access: Type[StdFsAccess],
fs_access: StdFsAccess,
job_script_provider: Optional[DependenciesConfiguration],
timeout: float,
debug: bool,
js_console: bool,
force_docker_pull: bool,
loadListing: LoadListingType,
outdir: str,
tmpdir: str,
stagedir: str,
cwlVersion: str,
container_engine: str,
) -> None:
"""
Initialize this Builder.
:param timeout: Maximum number of seconds to wait while evaluating CWL
expressions.
"""
super().__init__()
self.job = job
self.files = files
self.bindings = bindings
self.schemaDefs = schemaDefs
self.names = names
self.requirements = requirements
self.hints = hints
self.resources = resources
self.mutation_manager = mutation_manager
self.formatgraph = formatgraph
self.make_fs_access = make_fs_access
self.fs_access = fs_access
self.job_script_provider = job_script_provider
self.timeout = timeout
self.debug = debug
self.js_console = js_console
self.force_docker_pull = force_docker_pull
self.loadListing = loadListing
self.outdir = outdir
self.tmpdir = tmpdir
self.stagedir = stagedir
self.cwlVersion = cwlVersion
self.pathmapper: Optional["PathMapper"] = None
self.prov_obj: Optional["ProvenanceProfile"] = None
self.find_default_container: Optional[Callable[[], str]] = None
self.container_engine = container_engine
def build_job_script(self, commands: List[str]) -> Optional[str]:
if self.job_script_provider is not None:
return self.job_script_provider.build_job_script(self, commands)
return None
def bind_input(
self,
schema: CWLObjectType,
datum: Union[CWLObjectType, List[CWLObjectType]],
discover_secondaryFiles: bool,
lead_pos: Optional[Union[int, List[int]]] = None,
tail_pos: Optional[Union[str, List[int]]] = None,
) -> List[MutableMapping[str, Union[str, List[int]]]]:
"""
Bind an input object to the command line.
:raises ValidationException: in the event of an invalid type union
:raises WorkflowException: if a CWL Expression ("position", "required",
"pattern", "format") evaluates to the wrong type or if a required
secondary file is missing
"""
debug = _logger.isEnabledFor(logging.DEBUG)
if tail_pos is None:
tail_pos = []
if lead_pos is None:
lead_pos = []
bindings: List[MutableMapping[str, Union[str, List[int]]]] = []
binding: Union[MutableMapping[str, Union[str, List[int]]], CommentedMap] = {}
value_from_expression = False
if "inputBinding" in schema and isinstance(schema["inputBinding"], MutableMapping):
binding = CommentedMap(schema["inputBinding"].items())
bp = list(aslist(lead_pos))
if "position" in binding:
position = binding["position"]
if isinstance(position, str): # no need to test the CWL Version
# the schema for v1.0 only allow ints
result = self.do_eval(position, context=datum)
if not isinstance(result, int):
raise SourceLine(
schema["inputBinding"], "position", WorkflowException, debug
).makeError(
"'position' expressions must evaluate to an int, "
f"not a {type(result)}. Expression {position} "
f"resulted in {result!r}."
)
binding["position"] = result
bp.append(result)
else:
bp.extend(aslist(binding["position"]))
else:
bp.append(0)
bp.extend(aslist(tail_pos))
binding["position"] = bp
binding["datum"] = datum
if "valueFrom" in binding:
value_from_expression = True
# Handle union types
if isinstance(schema["type"], MutableSequence):
bound_input = False
for t in schema["type"]:
avsc: Optional[Schema] = None
if isinstance(t, str) and self.names.has_name(t, None):
avsc = self.names.get_name(t, None)
elif (
isinstance(t, MutableMapping)
and "name" in t
and self.names.has_name(cast(str, t["name"]), None)
):
avsc = self.names.get_name(cast(str, t["name"]), None)
if not avsc:
avsc = make_avsc_object(convert_to_dict(t), self.names)
if validate(avsc, datum, vocab=INPUT_OBJ_VOCAB):
schema = copy.deepcopy(schema)
schema["type"] = t
if not value_from_expression:
return self.bind_input(
schema,
datum,
lead_pos=lead_pos,
tail_pos=tail_pos,
discover_secondaryFiles=discover_secondaryFiles,
)
else:
self.bind_input(
schema,
datum,
lead_pos=lead_pos,
tail_pos=tail_pos,
discover_secondaryFiles=discover_secondaryFiles,
)
bound_input = True
if not bound_input:
raise ValidationException(
"'{}' is not a valid union {}".format(datum, schema["type"])
)
elif isinstance(schema["type"], MutableMapping):
st = copy.deepcopy(schema["type"])
if (
binding
and "inputBinding" not in st
and "type" in st
and st["type"] == "array"
and "itemSeparator" not in binding
):
st["inputBinding"] = {}
for k in ("secondaryFiles", "format", "streamable"):
if k in schema:
st[k] = schema[k]
if value_from_expression:
self.bind_input(
st,
datum,
lead_pos=lead_pos,
tail_pos=tail_pos,
discover_secondaryFiles=discover_secondaryFiles,
)
else:
bindings.extend(
self.bind_input(
st,
datum,
lead_pos=lead_pos,
tail_pos=tail_pos,
discover_secondaryFiles=discover_secondaryFiles,
)
)
else:
if schema["type"] == "org.w3id.cwl.salad.Any":
if isinstance(datum, dict):
if datum.get("class") == "File":
schema["type"] = "org.w3id.cwl.cwl.File"
elif datum.get("class") == "Directory":
schema["type"] = "org.w3id.cwl.cwl.Directory"
else:
schema["type"] = "record"
schema["fields"] = [
{"name": field_name, "type": "Any"} for field_name in datum.keys()
]
elif isinstance(datum, list):
schema["type"] = "array"
schema["items"] = "Any"
if schema["type"] in self.schemaDefs:
schema = self.schemaDefs[cast(str, schema["type"])]
if schema["type"] == "record":
datum = cast(CWLObjectType, datum)
for f in cast(List[CWLObjectType], schema["fields"]):
name = cast(str, f["name"])
if name in datum and datum[name] is not None:
bindings.extend(
self.bind_input(
f,
cast(CWLObjectType, datum[name]),
lead_pos=lead_pos,
tail_pos=name,
discover_secondaryFiles=discover_secondaryFiles,
)
)
else:
datum[name] = f.get("default")
if schema["type"] == "array":
for n, item in enumerate(cast(MutableSequence[CWLObjectType], datum)):
b2 = None
if binding:
b2 = cast(CWLObjectType, copy.deepcopy(binding))
b2["datum"] = item
itemschema: CWLObjectType = {
"type": schema["items"],
"inputBinding": b2,
}
for k in ("secondaryFiles", "format", "streamable"):
if k in schema:
itemschema[k] = schema[k]
bindings.extend(
self.bind_input(
itemschema,
item,
lead_pos=n,
tail_pos=tail_pos,
discover_secondaryFiles=discover_secondaryFiles,
)
)
binding = {}
def _capture_files(f: CWLObjectType) -> CWLObjectType:
self.files.append(f)
return f
if schema["type"] == "org.w3id.cwl.cwl.File":
datum = cast(CWLObjectType, datum)
self.files.append(datum)
loadContents_sourceline: Union[
None, MutableMapping[str, Union[str, List[int]]], CWLObjectType
] = None
if binding and binding.get("loadContents"):
loadContents_sourceline = binding
elif schema.get("loadContents"):
loadContents_sourceline = schema
if loadContents_sourceline and loadContents_sourceline["loadContents"]:
with SourceLine(
loadContents_sourceline,
"loadContents",
WorkflowException,
debug,
):
try:
with self.fs_access.open(cast(str, datum["location"]), "rb") as f2:
datum["contents"] = content_limit_respected_read(f2)
except Exception as e:
raise Exception("Reading {}\n{}".format(datum["location"], e)) from e
if "secondaryFiles" in schema:
if "secondaryFiles" not in datum:
datum["secondaryFiles"] = []
sf_schema = aslist(schema["secondaryFiles"])
elif not discover_secondaryFiles:
sf_schema = [] # trust the inputs
else:
sf_schema = aslist(schema["secondaryFiles"])
for num, sf_entry in enumerate(sf_schema):
if "required" in sf_entry and sf_entry["required"] is not None:
required_result = self.do_eval(sf_entry["required"], context=datum)
if not (isinstance(required_result, bool) or required_result is None):
if sf_schema == schema["secondaryFiles"]:
sf_item: Any = sf_schema[num]
else:
sf_item = sf_schema
raise SourceLine(
sf_item, "required", WorkflowException, debug
).makeError(
"The result of a expression in the field "
"'required' must "
f"be a bool or None, not a {type(required_result)}. "
f"Expression {sf_entry['required']!r} resulted "
f"in {required_result!r}."
)
sf_required = required_result
else:
sf_required = True
if "$(" in sf_entry["pattern"] or "${" in sf_entry["pattern"]:
sfpath = self.do_eval(sf_entry["pattern"], context=datum)
else:
sfpath = substitute(cast(str, datum["basename"]), sf_entry["pattern"])
for sfname in aslist(sfpath):
if not sfname:
continue
found = False
if isinstance(sfname, str):
d_location = cast(str, datum["location"])
if "/" in d_location:
sf_location = (
d_location[0 : d_location.rindex("/") + 1] + sfname
)
else:
sf_location = d_location + sfname
sfbasename = sfname
elif isinstance(sfname, MutableMapping):
sf_location = sfname["location"]
sfbasename = sfname["basename"]
else:
raise SourceLine(
sf_entry, "pattern", WorkflowException, debug
).makeError(
"Expected secondaryFile expression to "
"return type 'str', a 'File' or 'Directory' "
"dictionary, or a list of the same. Received "
f"{type(sfname)!r} from {sf_entry['pattern']!r}."
)
for d in cast(
MutableSequence[MutableMapping[str, str]],
datum["secondaryFiles"],
):
if not d.get("basename"):
d["basename"] = d["location"][d["location"].rindex("/") + 1 :]
if d["basename"] == sfbasename:
found = True
if not found:
def addsf(
files: MutableSequence[CWLObjectType],
newsf: CWLObjectType,
) -> None:
for f in files:
if f["location"] == newsf["location"]:
f["basename"] = newsf["basename"]
return
files.append(newsf)
if isinstance(sfname, MutableMapping):
addsf(
cast(
MutableSequence[CWLObjectType],
datum["secondaryFiles"],
),
sfname,
)
elif discover_secondaryFiles and self.fs_access.exists(sf_location):
addsf(
cast(
MutableSequence[CWLObjectType],
datum["secondaryFiles"],
),
{
"location": sf_location,
"basename": sfname,
"class": "File",
},
)
elif sf_required:
raise SourceLine(
schema,
"secondaryFiles",
WorkflowException,
debug,
).makeError(
"Missing required secondary file '%s' from file object: %s"
% (sfname, json_dumps(datum, indent=4))
)
normalizeFilesDirs(
cast(MutableSequence[CWLObjectType], datum["secondaryFiles"])
)
if "format" in schema:
eval_format: Any = self.do_eval(schema["format"])
if isinstance(eval_format, str):
evaluated_format: Union[str, List[str]] = eval_format
elif isinstance(eval_format, MutableSequence):
for index, entry in enumerate(eval_format):
message = None
if not isinstance(entry, str):
message = (
"An expression in the 'format' field must "
"evaluate to a string, or list of strings. "
"However a non-string item was received: "
f"{entry!r} of type {type(entry)!r}. "
f"The expression was {schema['format']!r} and "
f"its fully evaluated result is {eval_format!r}."
)
if expression.needs_parsing(entry):
message = (
"For inputs, 'format' field can either "
"contain a single CWL Expression or CWL Parameter "
"Reference, a single format string, or a list of "
"format strings. But the list cannot contain CWL "
"Expressions or CWL Parameter References. List "
f"entry number {index + 1} contains the following "
"unallowed CWL Parameter Reference or Expression: "
f"{entry!r}."
)
if message:
raise SourceLine(
schema["format"], index, WorkflowException, debug
).makeError(message)
evaluated_format = cast(List[str], eval_format)
else:
raise SourceLine(schema, "format", WorkflowException, debug).makeError(
"An expression in the 'format' field must "
"evaluate to a string, or list of strings. "
"However the type of the expression result was "
f"{type(eval_format)}. "
f"The expression was {schema['format']!r} and "
f"its fully evaluated result is {eval_format!r}."
)
try:
check_format(
datum,
evaluated_format,
self.formatgraph,
)
except ValidationException as ve:
raise WorkflowException(
f"Expected value of {schema['name']!r} to have "
f"format {schema['format']!r} but\n {ve}"
) from ve
visit_class(
datum.get("secondaryFiles", []),
("File", "Directory"),
_capture_files,
)
if schema["type"] == "org.w3id.cwl.cwl.Directory":
datum = cast(CWLObjectType, datum)
ll = schema.get("loadListing") or self.loadListing
if ll and ll != "no_listing":
get_listing(
self.fs_access,
datum,
(ll == "deep_listing"),
)
self.files.append(datum)
if schema["type"] == "Any":
visit_class(datum, ("File", "Directory"), _capture_files)
# Position to front of the sort key
if binding:
for bi in bindings:
bi["position"] = cast(List[int], binding["position"]) + cast(
List[int], bi["position"]
)
bindings.append(binding)
return bindings
def tostr(self, value: Union[MutableMapping[str, str], Any]) -> str:
"""
Represent an input parameter as a string.
:raises WorkflowException: if the item is a File or Directory and the
"path" is missing.
"""
if isinstance(value, MutableMapping) and value.get("class") in (
"File",
"Directory",
):
if "path" not in value:
raise WorkflowException(
'{} object missing "path": {}'.format(value["class"], value)
)
return value["path"]
elif isinstance(value, ScalarFloat):
rep = RoundTripRepresenter()
dec_value = Decimal(rep.represent_scalar_float(value).value)
if "E" in str(dec_value):
return str(dec_value.quantize(1))
return str(dec_value)
else:
return str(value)
def generate_arg(self, binding: CWLObjectType) -> List[str]:
value = binding.get("datum")
debug = _logger.isEnabledFor(logging.DEBUG)
if "valueFrom" in binding:
with SourceLine(
binding,
"valueFrom",
WorkflowException,
debug,
):
value = self.do_eval(cast(str, binding["valueFrom"]), context=value)
prefix = cast(Optional[str], binding.get("prefix"))
sep = binding.get("separate", True)
if prefix is None and not sep:
with SourceLine(
binding,
"separate",
WorkflowException,
debug,
):
raise WorkflowException("'separate' option can not be specified without prefix")
argl: MutableSequence[CWLOutputType] = []
if isinstance(value, MutableSequence):
if binding.get("itemSeparator") and value:
itemSeparator = cast(str, binding["itemSeparator"])
argl = [itemSeparator.join([self.tostr(v) for v in value])]
elif binding.get("valueFrom"):
value = [self.tostr(v) for v in value]
return cast(List[str], ([prefix] if prefix else [])) + cast(List[str], value)
elif prefix and value:
return [prefix]
else:
return []
elif isinstance(value, MutableMapping) and value.get("class") in (
"File",
"Directory",
):
argl = cast(MutableSequence[CWLOutputType], [value])
elif isinstance(value, MutableMapping):
return [prefix] if prefix else []
elif value is True and prefix:
return [prefix]
elif value is False or value is None or (value is True and not prefix):
return []
else:
argl = [value]
args = []
for j in argl:
if sep:
args.extend([prefix, self.tostr(j)])
else:
args.append(self.tostr(j) if prefix is None else prefix + self.tostr(j))
return [a for a in args if a is not None]
def do_eval(
self,
ex: Optional[CWLOutputType],
context: Optional[Any] = None,
recursive: bool = False,
strip_whitespace: bool = True,
) -> Optional[CWLOutputType]:
if recursive:
if isinstance(ex, MutableMapping):
return {k: self.do_eval(v, context, recursive) for k, v in ex.items()}
if isinstance(ex, MutableSequence):
return [self.do_eval(v, context, recursive) for v in ex]
resources = self.resources
if self.resources and "cores" in self.resources:
cores = resources["cores"]
resources = copy.copy(resources)
resources["cores"] = int(math.ceil(cores))
return expression.do_eval(
ex,
self.job,
self.requirements,
self.outdir,
self.tmpdir,
resources,
context=context,
timeout=self.timeout,
debug=self.debug,
js_console=self.js_console,
force_docker_pull=self.force_docker_pull,
strip_whitespace=strip_whitespace,
cwlVersion=self.cwlVersion,
container_engine=self.container_engine,
)
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/checker.py 0000644 0001750 0001750 00000051460 14621376047 017736 0 ustar 00michael michael """Static checking of CWL workflow connectivity."""
from collections import namedtuple
from typing import (
Any,
Dict,
Iterator,
List,
Literal,
MutableMapping,
MutableSequence,
Optional,
Sized,
Union,
cast,
)
from schema_salad.exceptions import ValidationException
from schema_salad.sourceline import SourceLine, bullets, strip_dup_lineno
from schema_salad.utils import json_dumps
from .errors import WorkflowException
from .loghandler import _logger
from .process import shortname
from .utils import CWLObjectType, CWLOutputType, SinkType, aslist
def _get_type(tp):
# type: (Any) -> Any
if isinstance(tp, MutableMapping):
if tp.get("type") not in ("array", "record", "enum"):
return tp["type"]
return tp
def check_types(
srctype: SinkType,
sinktype: SinkType,
linkMerge: Optional[str],
valueFrom: Optional[str],
) -> Union[Literal["pass"], Literal["warning"], Literal["exception"]]:
"""
Check if the source and sink types are correct.
:raises WorkflowException: If there is an unrecognized linkMerge type
"""
if valueFrom is not None:
return "pass"
if linkMerge is None:
if can_assign_src_to_sink(srctype, sinktype, strict=True):
return "pass"
if can_assign_src_to_sink(srctype, sinktype, strict=False):
return "warning"
return "exception"
if linkMerge == "merge_nested":
return check_types(
{"items": _get_type(srctype), "type": "array"},
_get_type(sinktype),
None,
None,
)
if linkMerge == "merge_flattened":
return check_types(merge_flatten_type(_get_type(srctype)), _get_type(sinktype), None, None)
raise WorkflowException(f"Unrecognized linkMerge enum {linkMerge!r}")
def merge_flatten_type(src: SinkType) -> CWLOutputType:
"""Return the merge flattened type of the source type."""
if isinstance(src, MutableSequence):
return [merge_flatten_type(cast(SinkType, t)) for t in src]
if isinstance(src, MutableMapping) and src.get("type") == "array":
return src
return {"items": src, "type": "array"}
def can_assign_src_to_sink(src: SinkType, sink: Optional[SinkType], strict: bool = False) -> bool:
"""
Check for identical type specifications, ignoring extra keys like inputBinding.
In non-strict comparison, at least one source type must match one sink type,
except for 'null'.
In strict comparison, all source types must match at least one sink type.
:param src: admissible source types
:param sink: admissible sink types
"""
if src == "Any" or sink == "Any":
return True
if isinstance(src, MutableMapping) and isinstance(sink, MutableMapping):
if sink.get("not_connected") and strict:
return False
if src["type"] == "array" and sink["type"] == "array":
return can_assign_src_to_sink(
cast(MutableSequence[CWLOutputType], src["items"]),
cast(MutableSequence[CWLOutputType], sink["items"]),
strict,
)
if src["type"] == "record" and sink["type"] == "record":
return _compare_records(src, sink, strict)
if src["type"] == "File" and sink["type"] == "File":
for sinksf in cast(List[CWLObjectType], sink.get("secondaryFiles", [])):
if not [
1
for srcsf in cast(List[CWLObjectType], src.get("secondaryFiles", []))
if sinksf == srcsf
]:
if strict:
return False
return True
return can_assign_src_to_sink(
cast(SinkType, src["type"]), cast(Optional[SinkType], sink["type"]), strict
)
if isinstance(src, MutableSequence):
if strict:
for this_src in src:
if not can_assign_src_to_sink(cast(SinkType, this_src), sink):
return False
return True
for this_src in src:
if this_src != "null" and can_assign_src_to_sink(cast(SinkType, this_src), sink):
return True
return False
if isinstance(sink, MutableSequence):
for this_sink in sink:
if can_assign_src_to_sink(src, cast(SinkType, this_sink)):
return True
return False
return bool(src == sink)
def _compare_records(src: CWLObjectType, sink: CWLObjectType, strict: bool = False) -> bool:
"""
Compare two records, ensuring they have compatible fields.
This handles normalizing record names, which will be relative to workflow
step, so that they can be compared.
:return: True if the records have compatible fields, False otherwise.
"""
def _rec_fields(rec: MutableMapping[str, Any]) -> MutableMapping[str, Any]:
out = {}
for field in rec["fields"]:
name = shortname(field["name"])
out[name] = field["type"]
return out
srcfields = _rec_fields(src)
sinkfields = _rec_fields(sink)
for key in sinkfields.keys():
if (
not can_assign_src_to_sink(
srcfields.get(key, "null"), sinkfields.get(key, "null"), strict
)
and sinkfields.get(key) is not None
):
_logger.info(
"Record comparison failure for %s and %s\n"
"Did not match fields for %s: %s and %s",
src["name"],
sink["name"],
key,
srcfields.get(key),
sinkfields.get(key),
)
return False
return True
def missing_subset(fullset: List[Any], subset: List[Any]) -> List[Any]:
missing = []
for i in subset:
if i not in fullset:
missing.append(i)
return missing
def static_checker(
workflow_inputs: List[CWLObjectType],
workflow_outputs: MutableSequence[CWLObjectType],
step_inputs: MutableSequence[CWLObjectType],
step_outputs: List[CWLObjectType],
param_to_step: Dict[str, CWLObjectType],
) -> None:
"""
Check if all source and sink types of a workflow are compatible before run time.
:raises ValidationException: If any incompatibilities are detected.
"""
# source parameters: workflow_inputs and step_outputs
# sink parameters: step_inputs and workflow_outputs
# make a dictionary of source parameters, indexed by the "id" field
src_dict: Dict[str, CWLObjectType] = {}
for param in workflow_inputs + step_outputs:
src_dict[cast(str, param["id"])] = param
step_inputs_val = check_all_types(src_dict, step_inputs, "source", param_to_step)
workflow_outputs_val = check_all_types(
src_dict, workflow_outputs, "outputSource", param_to_step
)
warnings = step_inputs_val["warning"] + workflow_outputs_val["warning"]
exceptions = step_inputs_val["exception"] + workflow_outputs_val["exception"]
warning_msgs = []
exception_msgs = []
for warning in warnings:
src = warning.src
sink = warning.sink
linkMerge = warning.linkMerge
sinksf = sorted(
p["pattern"] for p in sink.get("secondaryFiles", []) if p.get("required", True)
)
srcsf = sorted(p["pattern"] for p in src.get("secondaryFiles", []))
# Every secondaryFile required by the sink, should be declared
# by the source
missing = missing_subset(srcsf, sinksf)
if missing:
msg1 = "Parameter '{}' requires secondaryFiles {} but".format(
shortname(sink["id"]),
missing,
)
msg3 = SourceLine(src, "id").makeError(
"source '%s' does not provide those secondaryFiles." % (shortname(src["id"]))
)
msg4 = SourceLine(src.get("_tool_entry", src), "secondaryFiles").makeError(
"To resolve, add missing secondaryFiles patterns to definition of '%s' or"
% (shortname(src["id"]))
)
msg5 = SourceLine(sink.get("_tool_entry", sink), "secondaryFiles").makeError(
"mark missing secondaryFiles in definition of '%s' as optional."
% shortname(sink["id"])
)
msg = SourceLine(sink).makeError(
"{}\n{}".format(msg1, bullets([msg3, msg4, msg5], " "))
)
elif sink.get("not_connected"):
if not sink.get("used_by_step"):
msg = SourceLine(sink, "type").makeError(
"'%s' is not an input parameter of %s, expected %s"
% (
shortname(sink["id"]),
param_to_step[sink["id"]]["run"],
", ".join(
shortname(cast(str, s["id"]))
for s in cast(
List[Dict[str, Union[str, bool]]],
param_to_step[sink["id"]]["inputs"],
)
if not s.get("not_connected")
),
)
)
else:
msg = ""
else:
msg = (
SourceLine(src, "type").makeError(
"Source '%s' of type %s may be incompatible"
% (shortname(src["id"]), json_dumps(src["type"]))
)
+ "\n"
+ SourceLine(sink, "type").makeError(
" with sink '%s' of type %s"
% (shortname(sink["id"]), json_dumps(sink["type"]))
)
)
if linkMerge is not None:
msg += "\n" + SourceLine(sink).makeError(
" source has linkMerge method %s" % linkMerge
)
if warning.message is not None:
msg += "\n" + SourceLine(sink).makeError(" " + warning.message)
if msg:
warning_msgs.append(msg)
for exception in exceptions:
src = exception.src
sink = exception.sink
linkMerge = exception.linkMerge
extra_message = exception.message
msg = (
SourceLine(src, "type").makeError(
"Source '%s' of type %s is incompatible"
% (shortname(src["id"]), json_dumps(src["type"]))
)
+ "\n"
+ SourceLine(sink, "type").makeError(
" with sink '{}' of type {}".format(
shortname(sink["id"]), json_dumps(sink["type"])
)
)
)
if extra_message is not None:
msg += "\n" + SourceLine(sink).makeError(" " + extra_message)
if linkMerge is not None:
msg += "\n" + SourceLine(sink).makeError(" source has linkMerge method %s" % linkMerge)
exception_msgs.append(msg)
for sink in step_inputs:
if (
"null" != sink["type"]
and "null" not in sink["type"]
and "source" not in sink
and "default" not in sink
and "valueFrom" not in sink
):
msg = SourceLine(sink).makeError(
"Required parameter '%s' does not have source, default, or valueFrom expression"
% shortname(sink["id"])
)
exception_msgs.append(msg)
all_warning_msg = strip_dup_lineno("\n".join(warning_msgs))
all_exception_msg = strip_dup_lineno("\n" + "\n".join(exception_msgs))
if all_warning_msg:
_logger.warning("Workflow checker warning:\n%s", all_warning_msg)
if exceptions:
raise ValidationException(all_exception_msg)
SrcSink = namedtuple("SrcSink", ["src", "sink", "linkMerge", "message"])
def check_all_types(
src_dict: Dict[str, CWLObjectType],
sinks: MutableSequence[CWLObjectType],
sourceField: Union[Literal["source"], Literal["outputSource"]],
param_to_step: Dict[str, CWLObjectType],
) -> Dict[str, List[SrcSink]]:
"""
Given a list of sinks, check if their types match with the types of their sources.
:raises WorkflowException: if there is an unrecognized linkMerge value
(from :py:func:`check_types`)
:raises ValidationException: if a sourceField is missing
"""
validation = {"warning": [], "exception": []} # type: Dict[str, List[SrcSink]]
for sink in sinks:
if sourceField in sink:
valueFrom = cast(Optional[str], sink.get("valueFrom"))
pickValue = cast(Optional[str], sink.get("pickValue"))
extra_message = None
if pickValue is not None:
extra_message = "pickValue is: %s" % pickValue
if isinstance(sink[sourceField], MutableSequence):
linkMerge = cast(
Optional[str],
sink.get(
"linkMerge",
("merge_nested" if len(cast(Sized, sink[sourceField])) > 1 else None),
),
) # type: Optional[str]
if pickValue in ["first_non_null", "the_only_non_null"]:
linkMerge = None
srcs_of_sink = [] # type: List[CWLObjectType]
for parm_id in cast(MutableSequence[str], sink[sourceField]):
srcs_of_sink += [src_dict[parm_id]]
if is_conditional_step(param_to_step, parm_id) and pickValue is None:
validation["warning"].append(
SrcSink(
src_dict[parm_id],
sink,
linkMerge,
message="Source is from conditional step, but pickValue is not used",
)
)
if is_all_output_method_loop_step(param_to_step, parm_id):
src_dict[parm_id]["type"] = {
"type": "array",
"items": src_dict[parm_id]["type"],
}
else:
parm_id = cast(str, sink[sourceField])
if parm_id not in src_dict:
raise SourceLine(sink, sourceField, ValidationException).makeError(
f"{sourceField} not found: {parm_id}"
)
srcs_of_sink = [src_dict[parm_id]]
linkMerge = None
if pickValue is not None:
validation["warning"].append(
SrcSink(
src_dict[parm_id],
sink,
linkMerge,
message="pickValue is used but only a single input source is declared",
)
)
if is_conditional_step(param_to_step, parm_id):
src_typ = aslist(srcs_of_sink[0]["type"])
snk_typ = sink["type"]
if "null" not in src_typ:
src_typ = ["null"] + cast(List[Any], src_typ)
if "null" not in cast(
Union[List[str], CWLObjectType], snk_typ
): # Given our type names this works even if not a list
validation["warning"].append(
SrcSink(
src_dict[parm_id],
sink,
linkMerge,
message="Source is from conditional step and may produce `null`",
)
)
srcs_of_sink[0]["type"] = src_typ
if is_all_output_method_loop_step(param_to_step, parm_id):
src_dict[parm_id]["type"] = {
"type": "array",
"items": src_dict[parm_id]["type"],
}
for src in srcs_of_sink:
check_result = check_types(src, sink, linkMerge, valueFrom)
if check_result == "warning":
validation["warning"].append(
SrcSink(src, sink, linkMerge, message=extra_message)
)
elif check_result == "exception":
validation["exception"].append(
SrcSink(src, sink, linkMerge, message=extra_message)
)
return validation
def circular_dependency_checker(step_inputs: List[CWLObjectType]) -> None:
"""
Check if a workflow has circular dependency.
:raises ValidationException: If a circular dependency is detected.
"""
adjacency = get_dependency_tree(step_inputs)
vertices = adjacency.keys()
processed: List[str] = []
cycles: List[List[str]] = []
for vertex in vertices:
if vertex not in processed:
traversal_path = [vertex]
processDFS(adjacency, traversal_path, processed, cycles)
if cycles:
exception_msg = "The following steps have circular dependency:\n"
cyclestrs = [str(cycle) for cycle in cycles]
exception_msg += "\n".join(cyclestrs)
raise ValidationException(exception_msg)
def get_dependency_tree(step_inputs: List[CWLObjectType]) -> Dict[str, List[str]]:
"""Get the dependency tree in the form of adjacency list."""
adjacency = {} # adjacency list of the dependency tree
for step_input in step_inputs:
if "source" in step_input:
if isinstance(step_input["source"], list):
vertices_in = [get_step_id(cast(str, src)) for src in step_input["source"]]
else:
vertices_in = [get_step_id(cast(str, step_input["source"]))]
vertex_out = get_step_id(cast(str, step_input["id"]))
for vertex_in in vertices_in:
if vertex_in not in adjacency:
adjacency[vertex_in] = [vertex_out]
elif vertex_out not in adjacency[vertex_in]:
adjacency[vertex_in].append(vertex_out)
if vertex_out not in adjacency:
adjacency[vertex_out] = []
return adjacency
def processDFS(
adjacency: Dict[str, List[str]],
traversal_path: List[str],
processed: List[str],
cycles: List[List[str]],
) -> None:
"""Perform depth first search."""
tip = traversal_path[-1]
for vertex in adjacency[tip]:
if vertex in traversal_path:
i = traversal_path.index(vertex)
cycles.append(traversal_path[i:])
elif vertex not in processed:
traversal_path.append(vertex)
processDFS(adjacency, traversal_path, processed, cycles)
processed.append(tip)
traversal_path.pop()
def get_step_id(field_id: str) -> str:
"""Extract step id from either input or output fields."""
if "/" in field_id.split("#")[1]:
step_id = "/".join(field_id.split("/")[:-1])
else:
step_id = field_id.split("#")[0]
return step_id
def is_conditional_step(param_to_step: Dict[str, CWLObjectType], parm_id: str) -> bool:
if (source_step := param_to_step.get(parm_id)) is not None:
if source_step.get("when") is not None:
return True
return False
def is_all_output_method_loop_step(param_to_step: Dict[str, CWLObjectType], parm_id: str) -> bool:
"""Check if a step contains a http://commonwl.org/cwltool#Loop requirement with `all` outputMethod."""
source_step: Optional[MutableMapping[str, Any]] = param_to_step.get(parm_id)
if source_step is not None:
for requirement in source_step.get("requirements", []):
if (
requirement["class"] == "http://commonwl.org/cwltool#Loop"
and requirement.get("outputMethod") == "all"
):
return True
return False
def loop_checker(steps: Iterator[MutableMapping[str, Any]]) -> None:
"""
Check http://commonwl.org/cwltool#Loop requirement compatibility with other directives.
:raises ValidationException: If there is an incompatible combination between
cwltool:loop and 'scatter' or 'when'.
"""
exceptions = []
for step in steps:
requirements = {
**{h["class"]: h for h in step.get("hints", [])},
**{r["class"]: r for r in step.get("requirements", [])},
}
if "http://commonwl.org/cwltool#Loop" in requirements:
if "when" in step:
exceptions.append(
SourceLine(step, "id").makeError(
"The `cwltool:Loop` clause is not compatible with the `when` directive."
)
)
if "scatter" in step:
exceptions.append(
SourceLine(step, "id").makeError(
"The `cwltool:Loop` clause is not compatible with the `scatter` directive."
)
)
if exceptions:
raise ValidationException("\n".join(exceptions))
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/command_line_tool.py 0000644 0001750 0001750 00000201055 14621376047 022011 0 ustar 00michael michael """Implementation of CommandLineTool."""
import copy
import hashlib
import json
import locale
import logging
import os
import re
import shutil
import threading
import urllib
import urllib.parse
from enum import Enum
from functools import cmp_to_key, partial
from typing import (
TYPE_CHECKING,
Any,
Dict,
Generator,
List,
Mapping,
MutableMapping,
MutableSequence,
Optional,
Pattern,
Set,
TextIO,
Type,
Union,
cast,
)
import shellescape
from mypy_extensions import mypyc_attr
from ruamel.yaml.comments import CommentedMap, CommentedSeq
from schema_salad.avro.schema import Schema
from schema_salad.exceptions import ValidationException
from schema_salad.ref_resolver import file_uri, uri_file_path
from schema_salad.sourceline import SourceLine
from schema_salad.utils import json_dumps
from schema_salad.validate import validate_ex
from .builder import (
INPUT_OBJ_VOCAB,
Builder,
content_limit_respected_read_bytes,
substitute,
)
from .context import LoadingContext, RuntimeContext, getdefault
from .docker import DockerCommandLineJob, PodmanCommandLineJob
from .errors import UnsupportedRequirement, WorkflowException
from .flatten import flatten
from .job import CommandLineJob, JobBase
from .loghandler import _logger
from .mpi import MPIRequirementName
from .mutation import MutationManager
from .pathmapper import PathMapper
from .process import (
Process,
_logger_validation_warnings,
compute_checksums,
shortname,
uniquename,
)
from .singularity import SingularityCommandLineJob
from .stdfsaccess import StdFsAccess
from .udocker import UDockerCommandLineJob
from .update import ORDERED_VERSIONS, ORIGINAL_CWLVERSION
from .utils import (
CWLObjectType,
CWLOutputType,
DirectoryType,
JobsGeneratorType,
OutputCallbackType,
adjustDirObjs,
adjustFileObjs,
aslist,
get_listing,
normalizeFilesDirs,
random_outdir,
shared_file_lock,
trim_listing,
upgrade_lock,
visit_class,
)
if TYPE_CHECKING:
from .cwlprov.provenance_profile import (
ProvenanceProfile, # pylint: disable=unused-import
)
class PathCheckingMode(Enum):
"""
What characters are allowed in path names.
We have the strict (default) mode and the relaxed mode.
"""
STRICT = re.compile(r"^[\w.+\,\-:@\]^\u2600-\u26FF\U0001f600-\U0001f64f]+$")
r"""
Accepts names that contain one or more of the following:
.. list-table::
* - ``\w``
- unicode word characters
this includes most characters that can be part of a word in any
language, as well as numbers and the underscore
* - ``.``
- a literal period
* - ``+``
- a literal plus sign
* - ``,``
- a literal comma
* - ``-``
- a literal minus sign
* - ``:``
- a literal colon
* - ``@``
- a literal at-symbol
* - ``]``
- a literal end-square-bracket
* - ``^``
- a literal caret symbol
* - ``\u2600-\u26FF``
- matches a single character in the range between ☀ (index 9728) and ⛿ (index 9983)
* - ``\U0001f600-\U0001f64f``
- matches a single character in the range between 😀 (index 128512) and 🙏 (index 128591)
Note: the following characters are intentionally not included:
1. reserved words in POSIX: ``!``, :code:`{`, ``}``
2. POSIX metacharacters listed in the CWL standard as okay to reject: ``|``,
``&``, ``;``, ``<``, ``>``, ``(``, ``)``, ``$``, `````, ``"``, ``'``,
:kbd:``, :kbd:``, :kbd:``.
(In accordance with https://www.commonwl.org/v1.0/CommandLineTool.html#File under "path" )
3. POSIX path separator: ``\``
(also listed at https://www.commonwl.org/v1.0/CommandLineTool.html#File under "path")
4. Additional POSIX metacharacters: ``*``, ``?``, ``[``, ``#``, ``˜``,
``=``, ``%``.
TODO: switch to https://pypi.org/project/regex/ and use
``\p{Extended_Pictographic}`` instead of the manual emoji ranges
"""
RELAXED = re.compile(r".*")
"""Accept anything."""
class ExpressionJob:
"""Job for :py:class:`ExpressionTool`."""
def __init__(
self,
builder: Builder,
script: str,
output_callback: Optional[OutputCallbackType],
requirements: List[CWLObjectType],
hints: List[CWLObjectType],
outdir: Optional[str] = None,
tmpdir: Optional[str] = None,
) -> None:
"""Initialize this ExpressionJob."""
self.builder = builder
self.requirements = requirements
self.hints = hints
self.output_callback = output_callback
self.outdir = outdir
self.tmpdir = tmpdir
self.script = script
self.prov_obj: Optional["ProvenanceProfile"] = None
def run(
self,
runtimeContext: RuntimeContext,
tmpdir_lock: Optional[threading.Lock] = None,
) -> None:
try:
normalizeFilesDirs(self.builder.job)
ev = self.builder.do_eval(self.script)
normalizeFilesDirs(
cast(
Optional[
Union[
MutableSequence[MutableMapping[str, Any]],
MutableMapping[str, Any],
DirectoryType,
]
],
ev,
)
)
if self.output_callback:
self.output_callback(cast(Optional[CWLObjectType], ev), "success")
except WorkflowException as err:
_logger.warning(
"Failed to evaluate expression:\n%s",
str(err),
exc_info=runtimeContext.debug,
)
if self.output_callback:
self.output_callback({}, "permanentFail")
@mypyc_attr(allow_interpreted_subclasses=True)
class ExpressionTool(Process):
def job(
self,
job_order: CWLObjectType,
output_callbacks: Optional[OutputCallbackType],
runtimeContext: RuntimeContext,
) -> Generator[ExpressionJob, None, None]:
builder = self._init_job(job_order, runtimeContext)
job = ExpressionJob(
builder,
self.tool["expression"],
output_callbacks,
self.requirements,
self.hints,
)
job.prov_obj = runtimeContext.prov_obj
yield job
class AbstractOperation(Process):
def job(
self,
job_order: CWLObjectType,
output_callbacks: Optional[OutputCallbackType],
runtimeContext: RuntimeContext,
) -> JobsGeneratorType:
raise WorkflowException("Abstract operation cannot be executed.")
def remove_path(f): # type: (CWLObjectType) -> None
if "path" in f:
del f["path"]
def revmap_file(builder: Builder, outdir: str, f: CWLObjectType) -> Optional[CWLObjectType]:
"""
Remap a file from internal path to external path.
For Docker, this maps from the path inside tho container to the path
outside the container. Recognizes files in the pathmapper or remaps
internal output directories to the external directory.
"""
# builder.outdir is the inner (container/compute node) output directory
# outdir is the outer (host/storage system) output directory
if outdir.startswith("/"):
# local file path, turn it into a file:// URI
outdir = file_uri(outdir)
# note: outer outdir should already be a URI and should not be URI
# quoted any further.
if "location" in f and "path" not in f:
location = cast(str, f["location"])
if location.startswith("file://"):
f["path"] = uri_file_path(location)
else:
f["location"] = builder.fs_access.join(outdir, cast(str, f["location"]))
return f
if "dirname" in f:
del f["dirname"]
if "path" in f:
path = builder.fs_access.join(builder.outdir, cast(str, f["path"]))
uripath = file_uri(path)
del f["path"]
if "basename" not in f:
f["basename"] = os.path.basename(path)
if not builder.pathmapper:
raise ValueError(
"Do not call revmap_file using a builder that doesn't have a pathmapper."
)
revmap_f = builder.pathmapper.reversemap(path)
if revmap_f and not builder.pathmapper.mapper(revmap_f[0]).type.startswith("Writable"):
f["location"] = revmap_f[1]
elif (
uripath == outdir
or uripath.startswith(outdir + os.sep)
or uripath.startswith(outdir + "/")
):
f["location"] = uripath
elif (
path == builder.outdir
or path.startswith(builder.outdir + os.sep)
or path.startswith(builder.outdir + "/")
):
joined_path = builder.fs_access.join(
outdir, urllib.parse.quote(path[len(builder.outdir) + 1 :])
)
f["location"] = joined_path
else:
raise WorkflowException(
"Output file path %s must be within designated output directory (%s) or an input "
"file pass through." % (path, builder.outdir)
)
return f
raise WorkflowException(
"Output File object is missing both 'location' and 'path' fields: %s" % f
)
@mypyc_attr(serializable=True)
class CallbackJob:
"""Callback Job class, used by :py:func:`CommandLineTool.job`."""
def __init__(
self,
job: "CommandLineTool",
output_callback: Optional[OutputCallbackType],
cachebuilder: Builder,
jobcache: str,
) -> None:
"""Initialize this CallbackJob."""
self.job = job
self.output_callback = output_callback
self.cachebuilder = cachebuilder
self.outdir = jobcache
self.prov_obj = None # type: Optional[ProvenanceProfile]
def run(
self,
runtimeContext: RuntimeContext,
tmpdir_lock: Optional[threading.Lock] = None,
) -> None:
if self.output_callback:
self.output_callback(
self.job.collect_output_ports(
self.job.tool["outputs"],
self.cachebuilder,
self.outdir,
getdefault(runtimeContext.compute_checksum, True),
),
"success",
)
def check_adjust(accept_re: Pattern[str], builder: Builder, file_o: CWLObjectType) -> CWLObjectType:
"""
Map files to assigned path inside a container.
We need to also explicitly walk over input, as implicit reassignment
doesn't reach everything in builder.bindings
"""
if not builder.pathmapper:
raise ValueError("Do not call check_adjust using a builder that doesn't have a pathmapper.")
file_o["path"] = path = builder.pathmapper.mapper(cast(str, file_o["location"]))[1]
basename = cast(str, file_o.get("basename"))
dn, bn = os.path.split(path)
if file_o.get("dirname") != dn:
file_o["dirname"] = str(dn)
if basename != bn:
file_o["basename"] = basename = str(bn)
if file_o["class"] == "File":
nr, ne = os.path.splitext(basename)
if file_o.get("nameroot") != nr:
file_o["nameroot"] = str(nr)
if file_o.get("nameext") != ne:
file_o["nameext"] = str(ne)
if not accept_re.match(basename):
raise WorkflowException(
f"Invalid filename: {file_o['basename']!r} contains illegal characters"
)
return file_o
def check_valid_locations(fs_access: StdFsAccess, ob: CWLObjectType) -> None:
location = cast(str, ob["location"])
if location.startswith("_:"):
pass
if ob["class"] == "File" and not fs_access.isfile(location):
raise ValidationException("Does not exist or is not a File: '%s'" % location)
if ob["class"] == "Directory" and not fs_access.isdir(location):
raise ValidationException("Does not exist or is not a Directory: '%s'" % location)
OutputPortsType = Dict[str, Optional[CWLOutputType]]
class ParameterOutputWorkflowException(WorkflowException):
def __init__(self, msg: str, port: CWLObjectType, **kwargs: Any) -> None:
"""Exception for when there was an error collecting output for a parameter."""
super().__init__(
"Error collecting output for parameter '%s': %s"
% (shortname(cast(str, port["id"])), msg),
kwargs,
)
@mypyc_attr(allow_interpreted_subclasses=True)
class CommandLineTool(Process):
def __init__(self, toolpath_object: CommentedMap, loadingContext: LoadingContext) -> None:
"""Initialize this CommandLineTool."""
super().__init__(toolpath_object, loadingContext)
self.prov_obj = loadingContext.prov_obj
self.path_check_mode = (
PathCheckingMode.RELAXED
if loadingContext.relax_path_checks
else PathCheckingMode.STRICT
) # type: PathCheckingMode
def make_job_runner(self, runtimeContext: RuntimeContext) -> Type[JobBase]:
dockerReq, dockerRequired = self.get_requirement("DockerRequirement")
mpiReq, mpiRequired = self.get_requirement(MPIRequirementName)
if not dockerReq and runtimeContext.use_container:
if runtimeContext.find_default_container is not None:
default_container = runtimeContext.find_default_container(self)
if default_container is not None:
dockerReq = {
"class": "DockerRequirement",
"dockerPull": default_container,
}
if mpiRequired:
self.hints.insert(0, dockerReq)
dockerRequired = False
else:
self.requirements.insert(0, dockerReq)
dockerRequired = True
if dockerReq is not None and runtimeContext.use_container:
if mpiReq is not None:
_logger.warning("MPIRequirement with containers is a beta feature")
if runtimeContext.singularity:
return SingularityCommandLineJob
elif runtimeContext.user_space_docker_cmd:
return UDockerCommandLineJob
if mpiReq is not None:
if mpiRequired:
if dockerRequired:
raise UnsupportedRequirement(
"No support for Docker and MPIRequirement both being required"
)
else:
_logger.warning(
"MPI has been required while Docker is hinted, discarding Docker hint(s)"
)
self.hints = [h for h in self.hints if h["class"] != "DockerRequirement"]
return CommandLineJob
else:
if dockerRequired:
_logger.warning(
"Docker has been required while MPI is hinted, discarding MPI hint(s)"
)
self.hints = [h for h in self.hints if h["class"] != MPIRequirementName]
else:
raise UnsupportedRequirement(
"Both Docker and MPI have been hinted - don't know what to do"
)
if runtimeContext.podman:
return PodmanCommandLineJob
return DockerCommandLineJob
if dockerRequired:
raise UnsupportedRequirement(
"--no-container, but this CommandLineTool has "
"DockerRequirement under 'requirements'."
)
return CommandLineJob
@staticmethod
def make_path_mapper(
reffiles: List[CWLObjectType],
stagedir: str,
runtimeContext: RuntimeContext,
separateDirs: bool,
) -> PathMapper:
return PathMapper(reffiles, runtimeContext.basedir, stagedir, separateDirs)
def updatePathmap(self, outdir: str, pathmap: PathMapper, fn: CWLObjectType) -> None:
"""Update a PathMapper with a CWL File or Directory object."""
if not isinstance(fn, MutableMapping):
raise WorkflowException("Expected File or Directory object, was %s" % type(fn))
basename = cast(str, fn["basename"])
if "location" in fn:
location = cast(str, fn["location"])
if location in pathmap:
pathmap.update(
location,
pathmap.mapper(location).resolved,
os.path.join(outdir, basename),
("Writable" if fn.get("writable") else "") + cast(str, fn["class"]),
False,
)
for sf in cast(List[CWLObjectType], fn.get("secondaryFiles", [])):
self.updatePathmap(outdir, pathmap, sf)
for ls in cast(List[CWLObjectType], fn.get("listing", [])):
self.updatePathmap(os.path.join(outdir, cast(str, fn["basename"])), pathmap, ls)
def _initialworkdir(self, j: JobBase, builder: Builder) -> None:
initialWorkdir, _ = self.get_requirement("InitialWorkDirRequirement")
if initialWorkdir is None:
return
debug = _logger.isEnabledFor(logging.DEBUG)
cwl_version = cast(Optional[str], self.metadata.get(ORIGINAL_CWLVERSION, None))
classic_dirent: bool = cwl_version is not None and (
ORDERED_VERSIONS.index(cwl_version) < ORDERED_VERSIONS.index("v1.2.0-dev2")
)
classic_listing = cwl_version and ORDERED_VERSIONS.index(
cwl_version
) < ORDERED_VERSIONS.index("v1.1.0-dev1")
ls = [] # type: List[CWLObjectType]
if isinstance(initialWorkdir["listing"], str):
# "listing" is just a string (must be an expression) so
# just evaluate it and use the result as if it was in
# listing
ls_evaluated = builder.do_eval(initialWorkdir["listing"])
fail: Any = False
fail_suffix: str = ""
if not isinstance(ls_evaluated, MutableSequence):
fail = ls_evaluated
else:
ls_evaluated2 = cast(MutableSequence[Union[None, CWLOutputType]], ls_evaluated)
for entry in ls_evaluated2:
if entry == None: # noqa
if classic_dirent:
fail = entry
fail_suffix = (
" Dirent.entry cannot return 'null' before CWL "
"v1.2. Please consider using 'cwl-upgrader' to "
"upgrade your document to CWL version v1.2."
)
elif isinstance(entry, MutableSequence):
if classic_listing:
raise SourceLine(
initialWorkdir, "listing", WorkflowException, debug
).makeError(
"InitialWorkDirRequirement.listing expressions "
"cannot return arrays of Files or Directories "
"before CWL v1.1. Please "
"considering using 'cwl-upgrader' to upgrade "
"your document to CWL v1.1' or later."
)
else:
for entry2 in entry:
if not (
isinstance(entry2, MutableMapping)
and (
"class" in entry2
and entry2["class"] == "File"
or "Directory"
)
):
fail = (
"an array with an item ('{entry2}') that is "
"not a File nor a Directory object."
)
elif not (
isinstance(entry, MutableMapping)
and (
"class" in entry
and (entry["class"] == "File" or "Directory")
or "entry" in entry
)
):
fail = entry
if fail is not False:
message = (
"Expression in a 'InitialWorkdirRequirement.listing' field "
"must return a list containing zero or more of: File or "
"Directory objects; Dirent objects"
)
if classic_dirent:
message += ". "
else:
message += "; null; or arrays of File or Directory objects. "
message += f"Got {fail!r} among the results from "
message += f"{initialWorkdir['listing'].strip()!r}." + fail_suffix
raise SourceLine(initialWorkdir, "listing", WorkflowException, debug).makeError(
message
)
ls = cast(List[CWLObjectType], ls_evaluated)
else:
# "listing" is an array of either expressions or Dirent so
# evaluate each item
for t in cast(
MutableSequence[Union[str, CWLObjectType]],
initialWorkdir["listing"],
):
if isinstance(t, Mapping) and "entry" in t:
# Dirent
entry_field = cast(str, t["entry"])
# the schema guarantees that 'entry' is a string, so the cast is safe
entry = builder.do_eval(entry_field, strip_whitespace=False)
if entry is None:
continue
if isinstance(entry, MutableSequence):
if classic_listing:
raise SourceLine(t, "entry", WorkflowException, debug).makeError(
"'entry' expressions are not allowed to evaluate "
"to an array of Files or Directories until CWL "
"v1.2. Consider using 'cwl-upgrader' to upgrade "
"your document to CWL version 1.2."
)
# Nested list. If it is a list of File or
# Directory objects, add it to the
# file list, otherwise JSON serialize it if CWL v1.2.
filelist = True
for e in entry:
if not isinstance(e, MutableMapping) or e.get("class") not in (
"File",
"Directory",
):
filelist = False
break
if filelist:
if "entryname" in t:
raise SourceLine(
t, "entryname", WorkflowException, debug
).makeError(
"'entryname' is invalid when 'entry' returns list of File or Directory"
)
for e in entry:
ec = cast(CWLObjectType, e)
ec["writable"] = t.get("writable", False)
ls.extend(cast(List[CWLObjectType], entry))
continue
et = {} # type: CWLObjectType
if isinstance(entry, Mapping) and entry.get("class") in (
"File",
"Directory",
):
et["entry"] = cast(CWLOutputType, entry)
else:
if isinstance(entry, str):
et["entry"] = entry
else:
if classic_dirent:
raise SourceLine(t, "entry", WorkflowException, debug).makeError(
"'entry' expression resulted in "
"something other than number, object or "
"array besides a single File or Dirent object. "
"In CWL v1.2+ this would be serialized to a JSON object. "
"However this is a {cwl_version} document. "
"If that is the desired result then please "
"consider using 'cwl-upgrader' to upgrade "
"your document to CWL version 1.2. "
f"Result of {entry_field!r} was {entry!r}."
)
et["entry"] = json_dumps(entry, sort_keys=True)
if "entryname" in t:
entryname_field = cast(str, t["entryname"])
if "${" in entryname_field or "$(" in entryname_field:
en = builder.do_eval(cast(str, t["entryname"]))
if not isinstance(en, str):
raise SourceLine(
t, "entryname", WorkflowException, debug
).makeError(
"'entryname' expression must result a string. "
f"Got {en!r} from {entryname_field!r}"
)
et["entryname"] = en
else:
et["entryname"] = entryname_field
else:
et["entryname"] = None
et["writable"] = t.get("writable", False)
ls.append(et)
else:
# Expression, must return a Dirent, File, Directory
# or array of such.
initwd_item = builder.do_eval(t)
if not initwd_item:
continue
if isinstance(initwd_item, MutableSequence):
ls.extend(cast(List[CWLObjectType], initwd_item))
else:
ls.append(cast(CWLObjectType, initwd_item))
for i, t2 in enumerate(ls):
if not isinstance(t2, Mapping):
raise SourceLine(initialWorkdir, "listing", WorkflowException, debug).makeError(
f"Entry at index {i} of listing is not a record, was {type(t2)}"
)
if "entry" not in t2:
continue
# Dirent
if isinstance(t2["entry"], str):
if not t2["entryname"]:
raise SourceLine(initialWorkdir, "listing", WorkflowException, debug).makeError(
"Entry at index %s of listing missing entryname" % (i)
)
ls[i] = {
"class": "File",
"basename": t2["entryname"],
"contents": t2["entry"],
"writable": t2.get("writable"),
}
continue
if not isinstance(t2["entry"], Mapping):
raise SourceLine(initialWorkdir, "listing", WorkflowException, debug).makeError(
"Entry at index {} of listing is not a record, was {}".format(
i, type(t2["entry"])
)
)
if t2["entry"].get("class") not in ("File", "Directory"):
raise SourceLine(initialWorkdir, "listing", WorkflowException, debug).makeError(
"Entry at index %s of listing is not a File or Directory object, was %s"
% (i, t2)
)
if t2.get("entryname") or t2.get("writable"):
t2 = copy.deepcopy(t2)
t2entry = cast(CWLObjectType, t2["entry"])
if t2.get("entryname"):
t2entry["basename"] = t2["entryname"]
t2entry["writable"] = t2.get("writable")
ls[i] = cast(CWLObjectType, t2["entry"])
for i, t3 in enumerate(ls):
if t3.get("class") not in ("File", "Directory"):
# Check that every item is a File or Directory object now
raise SourceLine(initialWorkdir, "listing", WorkflowException, debug).makeError(
f"Entry at index {i} of listing is not a Dirent, File or "
f"Directory object, was {t2}."
)
if "basename" not in t3:
continue
basename = os.path.normpath(cast(str, t3["basename"]))
t3["basename"] = basename
if basename.startswith("../"):
raise SourceLine(initialWorkdir, "listing", WorkflowException, debug).makeError(
f"Name {basename!r} at index {i} of listing is invalid, "
"cannot start with '../'"
)
if basename.startswith("/"):
# only if DockerRequirement in requirements
if cwl_version and ORDERED_VERSIONS.index(cwl_version) < ORDERED_VERSIONS.index(
"v1.2.0-dev4"
):
raise SourceLine(initialWorkdir, "listing", WorkflowException, debug).makeError(
f"Name {basename!r} at index {i} of listing is invalid, "
"paths starting with '/' are only permitted in CWL 1.2 "
"and later. Consider changing the absolute path to a relative "
"path, or upgrade the CWL description to CWL v1.2 using "
"https://pypi.org/project/cwl-upgrader/"
)
req, is_req = self.get_requirement("DockerRequirement")
if is_req is not True:
raise SourceLine(initialWorkdir, "listing", WorkflowException, debug).makeError(
f"Name {basename!r} at index {i} of listing is invalid, "
"name can only start with '/' when DockerRequirement "
"is in 'requirements'."
)
with SourceLine(initialWorkdir, "listing", WorkflowException, debug):
j.generatefiles["listing"] = ls
for entry in ls:
if "basename" in entry:
basename = cast(str, entry["basename"])
entry["dirname"] = os.path.join(builder.outdir, os.path.dirname(basename))
entry["basename"] = os.path.basename(basename)
normalizeFilesDirs(entry)
self.updatePathmap(
cast(Optional[str], entry.get("dirname")) or builder.outdir,
cast(PathMapper, builder.pathmapper),
entry,
)
if "listing" in entry:
def remove_dirname(d: CWLObjectType) -> None:
if "dirname" in d:
del d["dirname"]
visit_class(
entry["listing"],
("File", "Directory"),
remove_dirname,
)
visit_class(
[builder.files, builder.bindings],
("File", "Directory"),
partial(check_adjust, self.path_check_mode.value, builder),
)
def job(
self,
job_order: CWLObjectType,
output_callbacks: Optional[OutputCallbackType],
runtimeContext: RuntimeContext,
) -> Generator[Union[JobBase, CallbackJob], None, None]:
workReuse, _ = self.get_requirement("WorkReuse")
enableReuse = workReuse.get("enableReuse", True) if workReuse else True
jobname = uniquename(runtimeContext.name or shortname(self.tool.get("id", "job")))
if runtimeContext.cachedir and enableReuse:
cachecontext = runtimeContext.copy()
cachecontext.outdir = "/out"
cachecontext.tmpdir = "/tmp" # nosec
cachecontext.stagedir = "/stage"
cachebuilder = self._init_job(job_order, cachecontext)
cachebuilder.pathmapper = PathMapper(
cachebuilder.files,
runtimeContext.basedir,
cachebuilder.stagedir,
separateDirs=False,
)
_check_adjust = partial(check_adjust, self.path_check_mode.value, cachebuilder)
_checksum = partial(
compute_checksums,
runtimeContext.make_fs_access(runtimeContext.basedir),
)
visit_class(
[cachebuilder.files, cachebuilder.bindings],
("File", "Directory"),
_check_adjust,
)
visit_class([cachebuilder.files, cachebuilder.bindings], ("File"), _checksum)
cmdline = flatten(list(map(cachebuilder.generate_arg, cachebuilder.bindings)))
docker_req, _ = self.get_requirement("DockerRequirement")
if docker_req is not None and runtimeContext.use_container:
dockerimg = docker_req.get("dockerImageId") or docker_req.get("dockerPull")
elif runtimeContext.default_container is not None and runtimeContext.use_container:
dockerimg = runtimeContext.default_container
else:
dockerimg = None
if dockerimg is not None:
cmdline = ["docker", "run", dockerimg] + cmdline
# not really run using docker, just for hashing purposes
keydict = {
"cmdline": cmdline
} # type: Dict[str, Union[MutableSequence[Union[str, int]], CWLObjectType]]
for shortcut in ["stdin", "stdout", "stderr"]:
if shortcut in self.tool:
keydict[shortcut] = self.tool[shortcut]
def calc_checksum(location: str) -> Optional[str]:
for e in cachebuilder.files:
if (
"location" in e
and e["location"] == location
and "checksum" in e
and e["checksum"] != "sha1$hash"
):
return cast(str, e["checksum"])
return None
def remove_prefix(s: str, prefix: str) -> str:
# replace with str.removeprefix when Python 3.9+
return s[len(prefix) :] if s.startswith(prefix) else s
for location, fobj in cachebuilder.pathmapper.items():
if fobj.type == "File":
checksum = calc_checksum(location)
fobj_stat = os.stat(fobj.resolved)
path = remove_prefix(fobj.resolved, runtimeContext.basedir + "/")
if checksum is not None:
keydict[path] = [fobj_stat.st_size, checksum]
else:
keydict[path] = [
fobj_stat.st_size,
int(fobj_stat.st_mtime * 1000),
]
interesting = {
"DockerRequirement",
"EnvVarRequirement",
"InitialWorkDirRequirement",
"ShellCommandRequirement",
"NetworkAccess",
}
for rh in (self.original_requirements, self.original_hints):
for r in reversed(rh):
cls = cast(str, r["class"])
if cls in interesting and cls not in keydict:
keydict[cls] = r
keydictstr = json_dumps(keydict, separators=(",", ":"), sort_keys=True)
cachekey = hashlib.md5(keydictstr.encode("utf-8")).hexdigest() # nosec
_logger.debug("[job %s] keydictstr is %s -> %s", jobname, keydictstr, cachekey)
jobcache = os.path.join(runtimeContext.cachedir, cachekey)
# Create a lockfile to manage cache status.
jobcachepending = f"{jobcache}.status"
jobcachelock = None
jobstatus = None
# Opens the file for read/write, or creates an empty file.
jobcachelock = open(jobcachepending, "a+")
# get the shared lock to ensure no other process is trying
# to write to this cache
shared_file_lock(jobcachelock)
jobcachelock.seek(0)
jobstatus = jobcachelock.read()
if os.path.isdir(jobcache) and jobstatus == "success":
if docker_req and runtimeContext.use_container:
cachebuilder.outdir = runtimeContext.docker_outdir or random_outdir()
else:
cachebuilder.outdir = jobcache
_logger.info("[job %s] Using cached output in %s", jobname, jobcache)
yield CallbackJob(self, output_callbacks, cachebuilder, jobcache)
# we're done with the cache so release lock
jobcachelock.close()
return
else:
_logger.info("[job %s] Output of job will be cached in %s", jobname, jobcache)
# turn shared lock into an exclusive lock since we'll
# be writing the cache directory
upgrade_lock(jobcachelock)
shutil.rmtree(jobcache, True)
os.makedirs(jobcache)
runtimeContext = runtimeContext.copy()
runtimeContext.outdir = jobcache
def update_status_output_callback(
output_callbacks: OutputCallbackType,
jobcachelock: TextIO,
outputs: Optional[CWLObjectType],
processStatus: str,
) -> None:
# save status to the lockfile then release the lock
jobcachelock.seek(0)
jobcachelock.truncate()
jobcachelock.write(processStatus)
jobcachelock.close()
output_callbacks(outputs, processStatus)
output_callbacks = partial(
update_status_output_callback, output_callbacks, jobcachelock
)
builder = self._init_job(job_order, runtimeContext)
reffiles = copy.deepcopy(builder.files)
j = self.make_job_runner(runtimeContext)(
builder,
builder.job,
self.make_path_mapper,
self.requirements,
self.hints,
jobname,
)
j.prov_obj = self.prov_obj
j.successCodes = self.tool.get("successCodes", [])
j.temporaryFailCodes = self.tool.get("temporaryFailCodes", [])
j.permanentFailCodes = self.tool.get("permanentFailCodes", [])
debug = _logger.isEnabledFor(logging.DEBUG)
if debug:
_logger.debug(
"[job %s] initializing from %s%s",
j.name,
self.tool.get("id", ""),
" as part of %s" % runtimeContext.part_of if runtimeContext.part_of else "",
)
_logger.debug("[job %s] %s", j.name, json_dumps(builder.job, indent=4))
builder.pathmapper = self.make_path_mapper(reffiles, builder.stagedir, runtimeContext, True)
builder.requirements = j.requirements
_check_adjust = partial(check_adjust, self.path_check_mode.value, builder)
visit_class([builder.files, builder.bindings], ("File", "Directory"), _check_adjust)
self._initialworkdir(j, builder)
if debug:
_logger.debug(
"[job %s] path mappings is %s",
j.name,
json_dumps(
{p: builder.pathmapper.mapper(p) for p in builder.pathmapper.files()},
indent=4,
),
)
if self.tool.get("stdin"):
with SourceLine(self.tool, "stdin", ValidationException, debug):
stdin_eval = builder.do_eval(self.tool["stdin"])
if not (isinstance(stdin_eval, str) or stdin_eval is None):
raise ValidationException(
f"'stdin' expression must return a string or null. Got {stdin_eval!r} "
f"for {self.tool['stdin']!r}."
)
j.stdin = stdin_eval
if j.stdin:
reffiles.append({"class": "File", "path": j.stdin})
if self.tool.get("stderr"):
with SourceLine(self.tool, "stderr", ValidationException, debug):
stderr_eval = builder.do_eval(self.tool["stderr"])
if not isinstance(stderr_eval, str):
raise ValidationException(
f"'stderr' expression must return a string. Got {stderr_eval!r} "
f"for {self.tool['stderr']!r}."
)
j.stderr = stderr_eval
if j.stderr:
if os.path.isabs(j.stderr) or ".." in j.stderr:
raise ValidationException(
"stderr must be a relative path, got '%s'" % j.stderr
)
if self.tool.get("stdout"):
with SourceLine(self.tool, "stdout", ValidationException, debug):
stdout_eval = builder.do_eval(self.tool["stdout"])
if not isinstance(stdout_eval, str):
raise ValidationException(
f"'stdout' expression must return a string. Got {stdout_eval!r} "
f"for {self.tool['stdout']!r}."
)
j.stdout = stdout_eval
if j.stdout:
if os.path.isabs(j.stdout) or ".." in j.stdout or not j.stdout:
raise ValidationException(
"stdout must be a relative path, got '%s'" % j.stdout
)
if debug:
_logger.debug(
"[job %s] command line bindings is %s",
j.name,
json_dumps(builder.bindings, indent=4),
)
dockerReq, _ = self.get_requirement("DockerRequirement")
if dockerReq is not None and runtimeContext.use_container:
j.outdir = runtimeContext.get_outdir()
j.tmpdir = runtimeContext.get_tmpdir()
j.stagedir = runtimeContext.create_tmpdir()
else:
j.outdir = builder.outdir
j.tmpdir = builder.tmpdir
j.stagedir = builder.stagedir
inplaceUpdateReq, _ = self.get_requirement("InplaceUpdateRequirement")
if inplaceUpdateReq is not None:
j.inplace_update = cast(bool, inplaceUpdateReq["inplaceUpdate"])
normalizeFilesDirs(j.generatefiles)
readers = {} # type: Dict[str, CWLObjectType]
muts = set() # type: Set[str]
if builder.mutation_manager is not None:
def register_mut(f: CWLObjectType) -> None:
mm = cast(MutationManager, builder.mutation_manager)
muts.add(cast(str, f["location"]))
mm.register_mutation(j.name, f)
def register_reader(f: CWLObjectType) -> None:
mm = cast(MutationManager, builder.mutation_manager)
if cast(str, f["location"]) not in muts:
mm.register_reader(j.name, f)
readers[cast(str, f["location"])] = copy.deepcopy(f)
for li in j.generatefiles["listing"]:
if li.get("writable") and j.inplace_update:
adjustFileObjs(li, register_mut)
adjustDirObjs(li, register_mut)
else:
adjustFileObjs(li, register_reader)
adjustDirObjs(li, register_reader)
adjustFileObjs(builder.files, register_reader)
adjustFileObjs(builder.bindings, register_reader)
adjustDirObjs(builder.files, register_reader)
adjustDirObjs(builder.bindings, register_reader)
timelimit, _ = self.get_requirement("ToolTimeLimit")
if timelimit is not None:
with SourceLine(timelimit, "timelimit", ValidationException, debug):
limit_field = cast(Dict[str, Union[str, int]], timelimit)["timelimit"]
if isinstance(limit_field, str):
timelimit_eval = builder.do_eval(limit_field)
if timelimit_eval and not isinstance(timelimit_eval, int):
raise WorkflowException(
"'timelimit' expression must evaluate to a long/int. Got "
f"{timelimit_eval!r} for expression {limit_field!r}."
)
else:
timelimit_eval = limit_field
if not isinstance(timelimit_eval, int) or timelimit_eval < 0:
raise WorkflowException(
f"timelimit must be an integer >= 0, got: {timelimit_eval!r}"
)
j.timelimit = timelimit_eval
networkaccess, _ = self.get_requirement("NetworkAccess")
if networkaccess is not None:
with SourceLine(networkaccess, "networkAccess", ValidationException, debug):
networkaccess_field = networkaccess["networkAccess"]
if isinstance(networkaccess_field, str):
networkaccess_eval = builder.do_eval(networkaccess_field)
if not isinstance(networkaccess_eval, bool):
raise WorkflowException(
"'networkAccess' expression must evaluate to a bool. "
f"Got {networkaccess_eval!r} for expression {networkaccess_field!r}."
)
else:
networkaccess_eval = networkaccess_field
if not isinstance(networkaccess_eval, bool):
raise WorkflowException(
"networkAccess must be a boolean, got: {networkaccess_eval!r}."
)
j.networkaccess = networkaccess_eval
# Build a mapping to hold any EnvVarRequirement
required_env = {}
evr, _ = self.get_requirement("EnvVarRequirement")
if evr is not None:
for eindex, t3 in enumerate(cast(List[Dict[str, str]], evr["envDef"])):
env_value_field = t3["envValue"]
if "${" in env_value_field or "$(" in env_value_field:
env_value_eval = builder.do_eval(env_value_field)
if not isinstance(env_value_eval, str):
raise SourceLine(evr["envDef"], eindex, WorkflowException, debug).makeError(
"'envValue expression must evaluate to a str. "
f"Got {env_value_eval!r} for expression {env_value_field!r}."
)
env_value = env_value_eval
else:
env_value = env_value_field
required_env[t3["envName"]] = env_value
# Construct the env
j.prepare_environment(runtimeContext, required_env)
shellcmd, _ = self.get_requirement("ShellCommandRequirement")
if shellcmd is not None:
cmd = [] # type: List[str]
for b in builder.bindings:
arg = builder.generate_arg(b)
if b.get("shellQuote", True):
arg = [shellescape.quote(a) for a in aslist(arg)]
cmd.extend(aslist(arg))
j.command_line = ["/bin/sh", "-c", " ".join(cmd)]
else:
j.command_line = flatten(list(map(builder.generate_arg, builder.bindings)))
j.pathmapper = builder.pathmapper
j.collect_outputs = partial(
self.collect_output_ports,
self.tool["outputs"],
builder,
compute_checksum=getdefault(runtimeContext.compute_checksum, True),
jobname=jobname,
readers=readers,
)
j.output_callback = output_callbacks
mpi, _ = self.get_requirement(MPIRequirementName)
if mpi is not None:
np = cast( # From the schema for MPIRequirement.processes
Union[int, str],
mpi.get("processes", runtimeContext.mpi_config.default_nproc),
)
if isinstance(np, str):
np_eval = builder.do_eval(np)
if not isinstance(np_eval, int):
raise SourceLine(mpi, "processes", WorkflowException, debug).makeError(
f"{MPIRequirementName} needs 'processes' expression to "
f"evaluate to an int, got {np_eval!r} for expression {np!r}."
)
np = np_eval
j.mpi_procs = np
yield j
def collect_output_ports(
self,
ports: Union[CommentedSeq, Set[CWLObjectType]],
builder: Builder,
outdir: str,
rcode: int,
compute_checksum: bool = True,
jobname: str = "",
readers: Optional[MutableMapping[str, CWLObjectType]] = None,
) -> OutputPortsType:
ret = {} # type: OutputPortsType
debug = _logger.isEnabledFor(logging.DEBUG)
cwl_version = self.metadata.get(ORIGINAL_CWLVERSION, None)
if cwl_version != "v1.0":
builder.resources["exitCode"] = rcode
try:
fs_access = builder.make_fs_access(outdir)
custom_output = fs_access.join(outdir, "cwl.output.json")
if fs_access.exists(custom_output):
with fs_access.open(custom_output, "r") as f:
ret = json.load(f)
if debug:
_logger.debug(
"Raw output from %s: %s",
custom_output,
json_dumps(ret, indent=4),
)
else:
for i, port in enumerate(ports):
with SourceLine(
ports,
i,
partial(ParameterOutputWorkflowException, port=port),
debug,
):
fragment = shortname(port["id"])
ret[fragment] = self.collect_output(
port,
builder,
outdir,
fs_access,
compute_checksum=compute_checksum,
)
if ret:
revmap = partial(revmap_file, builder, outdir)
adjustDirObjs(ret, trim_listing)
visit_class(ret, ("File", "Directory"), revmap)
visit_class(ret, ("File", "Directory"), remove_path)
normalizeFilesDirs(ret)
visit_class(
ret,
("File", "Directory"),
partial(check_valid_locations, fs_access),
)
if compute_checksum:
adjustFileObjs(ret, partial(compute_checksums, fs_access))
expected_schema = cast(Schema, self.names.get_name("outputs_record_schema", None))
validate_ex(
expected_schema,
ret,
strict=False,
logger=_logger_validation_warnings,
vocab=INPUT_OBJ_VOCAB,
)
if ret is not None and builder.mutation_manager is not None:
adjustFileObjs(ret, builder.mutation_manager.set_generation)
return ret if ret is not None else {}
except ValidationException as e:
raise WorkflowException(
"Error validating output record. " + str(e) + "\n in " + json_dumps(ret, indent=4)
) from e
finally:
if builder.mutation_manager and readers:
for r in readers.values():
builder.mutation_manager.release_reader(jobname, r)
def collect_output(
self,
schema: CWLObjectType,
builder: Builder,
outdir: str,
fs_access: StdFsAccess,
compute_checksum: bool = True,
) -> Optional[CWLOutputType]:
r = [] # type: List[CWLOutputType]
empty_and_optional = False
debug = _logger.isEnabledFor(logging.DEBUG)
result: Optional[CWLOutputType] = None
if "outputBinding" in schema:
binding = cast(
MutableMapping[str, Union[bool, str, List[str]]],
schema["outputBinding"],
)
globpatterns = [] # type: List[str]
revmap = partial(revmap_file, builder, outdir)
if "glob" in binding:
with SourceLine(binding, "glob", WorkflowException, debug):
for gb in aslist(binding["glob"]):
gb = builder.do_eval(gb)
if gb:
gb_eval_fail = False
if not isinstance(gb, str):
if isinstance(gb, list):
for entry in gb:
if not isinstance(entry, str):
gb_eval_fail = True
else:
gb_eval_fail = True
if gb_eval_fail:
raise WorkflowException(
"Resolved glob patterns must be strings "
f"or list of strings, not "
f"{gb!r} from {binding['glob']!r}"
)
globpatterns.extend(aslist(gb))
for gb in globpatterns:
if gb.startswith(builder.outdir):
gb = gb[len(builder.outdir) + 1 :]
elif gb == ".":
gb = outdir
elif gb.startswith("/"):
raise WorkflowException("glob patterns must not start with '/'")
try:
prefix = fs_access.glob(outdir)
sorted_glob_result = sorted(
fs_access.glob(fs_access.join(outdir, gb)),
key=cmp_to_key(locale.strcoll),
)
r.extend(
[
{
"location": g,
"path": fs_access.join(
builder.outdir,
urllib.parse.unquote(g[len(prefix[0]) + 1 :]),
),
"basename": decoded_basename,
"nameroot": os.path.splitext(decoded_basename)[0],
"nameext": os.path.splitext(decoded_basename)[1],
"class": "File" if fs_access.isfile(g) else "Directory",
}
for g, decoded_basename in zip(
sorted_glob_result,
map(
lambda x: os.path.basename(urllib.parse.unquote(x)),
sorted_glob_result,
),
)
]
)
except OSError as e:
_logger.warning(str(e))
except Exception:
_logger.error("Unexpected error from fs_access", exc_info=True)
raise
for files in cast(List[Dict[str, Optional[CWLOutputType]]], r):
rfile = files.copy()
revmap(rfile)
if files["class"] == "Directory":
ll = binding.get("loadListing") or builder.loadListing
if ll and ll != "no_listing":
get_listing(fs_access, files, (ll == "deep_listing"))
else:
if binding.get("loadContents"):
with fs_access.open(cast(str, rfile["location"]), "rb") as f:
files["contents"] = str(
content_limit_respected_read_bytes(f), "utf-8"
)
if compute_checksum:
with fs_access.open(cast(str, rfile["location"]), "rb") as f:
checksum = hashlib.sha1() # nosec
contents = f.read(1024 * 1024)
while contents != b"":
checksum.update(contents)
contents = f.read(1024 * 1024)
files["checksum"] = "sha1$%s" % checksum.hexdigest()
files["size"] = fs_access.size(cast(str, rfile["location"]))
optional = False
single = False
if isinstance(schema["type"], MutableSequence):
if "null" in schema["type"]:
optional = True
if "File" in schema["type"] or "Directory" in schema["type"]:
single = True
elif schema["type"] == "File" or schema["type"] == "Directory":
single = True
if "outputEval" in binding:
with SourceLine(binding, "outputEval", WorkflowException, debug):
result = builder.do_eval(cast(CWLOutputType, binding["outputEval"]), context=r)
else:
result = cast(CWLOutputType, r)
if single:
with SourceLine(binding, "glob", WorkflowException, debug):
if not result and not optional:
raise WorkflowException(
f"Did not find output file with glob pattern: {globpatterns!r}."
)
elif not result and optional:
pass
elif isinstance(result, MutableSequence):
if len(result) > 1:
raise WorkflowException(
"Multiple matches for output item that is a single file."
)
else:
result = cast(CWLOutputType, result[0])
if "secondaryFiles" in schema:
with SourceLine(schema, "secondaryFiles", WorkflowException, debug):
for primary in aslist(result):
if isinstance(primary, MutableMapping):
primary.setdefault("secondaryFiles", [])
pathprefix = primary["path"][0 : primary["path"].rindex(os.sep) + 1]
for sf in aslist(schema["secondaryFiles"]):
if "required" in sf:
with SourceLine(
schema["secondaryFiles"],
"required",
WorkflowException,
debug,
):
sf_required_eval = builder.do_eval(
sf["required"], context=primary
)
if not (
isinstance(sf_required_eval, bool)
or sf_required_eval is None
):
raise WorkflowException(
"Expressions in the field "
"'required' must evaluate to a "
"Boolean (true or false) or None. "
f"Got {sf_required_eval!r} for "
f"{sf['required']!r}."
)
sf_required: bool = sf_required_eval or False
else:
sf_required = False
if "$(" in sf["pattern"] or "${" in sf["pattern"]:
sfpath = builder.do_eval(sf["pattern"], context=primary)
else:
sfpath = substitute(primary["basename"], sf["pattern"])
for sfitem in aslist(sfpath):
if not sfitem:
continue
if isinstance(sfitem, str):
sfitem = {"path": pathprefix + sfitem}
original_sfitem = copy.deepcopy(sfitem)
if (
not fs_access.exists(
cast(
str, cast(CWLObjectType, revmap(sfitem))["location"]
)
)
and sf_required
):
raise WorkflowException(
"Missing required secondary file '%s'"
% (original_sfitem["path"])
)
if "path" in sfitem and "location" not in sfitem:
revmap(sfitem)
if fs_access.isfile(sfitem["location"]):
sfitem["class"] = "File"
primary["secondaryFiles"].append(sfitem)
elif fs_access.isdir(sfitem["location"]):
sfitem["class"] = "Directory"
primary["secondaryFiles"].append(sfitem)
if "format" in schema:
format_field = cast(str, schema["format"])
if "$(" in format_field or "${" in format_field:
for index, primary in enumerate(aslist(result)):
format_eval = builder.do_eval(format_field, context=primary)
if not isinstance(format_eval, str):
message = (
f"'format' expression must evaluate to a string. "
f"Got {format_eval!r} from {format_field!r}."
)
if isinstance(result, list):
message += f" 'self' had the value of the index {index} result: {primary!r}."
raise SourceLine(schema, "format", WorkflowException, debug).makeError(
message
)
primary["format"] = format_eval
else:
for primary in aslist(result):
primary["format"] = format_field
# Ensure files point to local references outside of the run environment
adjustFileObjs(result, revmap)
if not result and optional:
# Don't convert zero or empty string to None
if result in [0, ""]:
return result
# For [] or None, return None
else:
return None
if (
not result
and not empty_and_optional
and isinstance(schema["type"], MutableMapping)
and schema["type"]["type"] == "record"
):
out = {}
for field in cast(List[CWLObjectType], schema["type"]["fields"]):
out[shortname(cast(str, field["name"]))] = self.collect_output(
field, builder, outdir, fs_access, compute_checksum=compute_checksum
)
return out
return result
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/context.py 0000644 0001750 0001750 00000021740 14621376047 020014 0 ustar 00michael michael """Shared context objects that replace use of kwargs."""
import copy
import os
import shutil
import tempfile
import threading
from typing import (
IO,
TYPE_CHECKING,
Any,
Callable,
Dict,
Iterable,
List,
Literal,
Optional,
TextIO,
Tuple,
Union,
)
from ruamel.yaml.comments import CommentedMap
from schema_salad.avro.schema import Names
from schema_salad.ref_resolver import Loader
from schema_salad.utils import FetcherCallableType
from .mpi import MpiConfig
from .pathmapper import PathMapper
from .stdfsaccess import StdFsAccess
from .utils import DEFAULT_TMP_PREFIX, CWLObjectType, HasReqsHints, ResolverType
if TYPE_CHECKING:
from cwl_utils.parser.cwl_v1_2 import LoadingOptions
from .builder import Builder
from .cwlprov.provenance_profile import ProvenanceProfile
from .cwlprov.ro import ResearchObject
from .mutation import MutationManager
from .process import Process
from .secrets import SecretStore
from .software_requirements import DependenciesConfiguration
class ContextBase:
"""Shared kwargs based initializer for :py:class:`RuntimeContext` and :py:class:`LoadingContext`."""
def __init__(self, kwargs: Optional[Dict[str, Any]] = None) -> None:
"""Initialize."""
if kwargs:
for k, v in kwargs.items():
if hasattr(self, k):
setattr(self, k, v)
def make_tool_notimpl(toolpath_object: CommentedMap, loadingContext: "LoadingContext") -> "Process":
"""Fake implementation of the make tool function."""
raise NotImplementedError()
default_make_tool = make_tool_notimpl
def log_handler(
outdir: str,
base_path_logs: str,
stdout_path: Optional[str],
stderr_path: Optional[str],
) -> None:
"""Move logs from log location to final output."""
if outdir != base_path_logs:
if stdout_path:
new_stdout_path = stdout_path.replace(base_path_logs, outdir)
shutil.copy2(stdout_path, new_stdout_path)
if stderr_path:
new_stderr_path = stderr_path.replace(base_path_logs, outdir)
shutil.copy2(stderr_path, new_stderr_path)
def set_log_dir(outdir: str, log_dir: str, subdir_name: str) -> str:
"""Set the log directory."""
if log_dir == "":
return outdir
else:
return log_dir + "/" + subdir_name
class LoadingContext(ContextBase):
def __init__(self, kwargs: Optional[Dict[str, Any]] = None) -> None:
"""Initialize the LoadingContext from the kwargs."""
self.debug: bool = False
self.metadata: CWLObjectType = {}
self.requirements: Optional[List[CWLObjectType]] = None
self.hints: Optional[List[CWLObjectType]] = None
self.overrides_list: List[CWLObjectType] = []
self.loader: Optional[Loader] = None
self.avsc_names: Optional[Names] = None
self.disable_js_validation: bool = False
self.js_hint_options_file: Optional[str] = None
self.do_validate: bool = True
self.enable_dev: bool = False
self.strict: bool = True
self.resolver: Optional[ResolverType] = None
self.fetcher_constructor: Optional[FetcherCallableType] = None
self.construct_tool_object = default_make_tool
self.research_obj: Optional[ResearchObject] = None
self.orcid: str = ""
self.cwl_full_name: str = ""
self.host_provenance: bool = False
self.user_provenance: bool = False
self.prov_obj: Optional["ProvenanceProfile"] = None
self.do_update: Optional[bool] = None
self.jobdefaults: Optional[CommentedMap] = None
self.doc_cache: bool = True
self.relax_path_checks: bool = False
self.singularity: bool = False
self.podman: bool = False
self.eval_timeout: float = 60
self.codegen_idx: Dict[str, Tuple[Any, "LoadingOptions"]] = {}
self.fast_parser = False
self.skip_resolve_all = False
self.skip_schemas = False
super().__init__(kwargs)
def copy(self) -> "LoadingContext":
"""Return a copy of this :py:class:`LoadingContext`."""
return copy.copy(self)
class RuntimeContext(ContextBase):
outdir: Optional[str] = None
tmpdir: str = ""
tmpdir_prefix: str = DEFAULT_TMP_PREFIX
tmp_outdir_prefix: str = ""
stagedir: str = ""
def __init__(self, kwargs: Optional[Dict[str, Any]] = None) -> None:
"""Initialize the RuntimeContext from the kwargs."""
select_resources_callable = Callable[
[Dict[str, Union[int, float]], RuntimeContext],
Dict[str, Union[int, float]],
]
self.user_space_docker_cmd: Optional[str] = None
self.secret_store: Optional["SecretStore"] = None
self.no_read_only: bool = False
self.custom_net: Optional[str] = None
self.no_match_user: bool = False
self.preserve_environment: Optional[Iterable[str]] = None
self.preserve_entire_environment: bool = False
self.use_container: bool = True
self.force_docker_pull: bool = False
self.rm_tmpdir: bool = True
self.pull_image: bool = True
self.rm_container: bool = True
self.move_outputs: Union[Literal["move"], Literal["leave"], Literal["copy"]] = "move"
self.log_dir: str = ""
self.set_log_dir = set_log_dir
self.log_dir_handler = log_handler
self.streaming_allowed: bool = False
self.singularity: bool = False
self.podman: bool = False
self.debug: bool = False
self.compute_checksum: bool = True
self.name: str = ""
self.default_container: Optional[str] = ""
self.find_default_container: Optional[Callable[[HasReqsHints], Optional[str]]] = None
self.cachedir: Optional[str] = None
self.part_of: str = ""
self.basedir: str = ""
self.toplevel: bool = False
self.mutation_manager: Optional["MutationManager"] = None
self.make_fs_access = StdFsAccess
self.path_mapper = PathMapper
self.builder: Optional["Builder"] = None
self.docker_outdir: str = ""
self.docker_tmpdir: str = ""
self.docker_stagedir: str = ""
self.js_console: bool = False
self.job_script_provider: Optional[DependenciesConfiguration] = None
self.select_resources: Optional[select_resources_callable] = None
self.eval_timeout: float = 60
self.postScatterEval: Optional[Callable[[CWLObjectType], Optional[CWLObjectType]]] = None
self.on_error: Union[Literal["stop"], Literal["continue"]] = "stop"
self.strict_memory_limit: bool = False
self.strict_cpu_limit: bool = False
self.cidfile_dir: Optional[str] = None
self.cidfile_prefix: Optional[str] = None
self.workflow_eval_lock: Optional[threading.Condition] = None
self.research_obj: Optional[ResearchObject] = None
self.orcid: str = ""
self.cwl_full_name: str = ""
self.process_run_id: Optional[str] = None
self.prov_obj: Optional[ProvenanceProfile] = None
self.mpi_config: MpiConfig = MpiConfig()
self.default_stdout: Optional[Union[IO[bytes], TextIO]] = None
self.default_stderr: Optional[Union[IO[bytes], TextIO]] = None
self.validate_only: bool = False
self.validate_stdout: Optional[Union[IO[bytes], TextIO, IO[str]]] = None
super().__init__(kwargs)
if self.tmp_outdir_prefix == "":
self.tmp_outdir_prefix = self.tmpdir_prefix
def get_outdir(self) -> str:
"""Return :py:attr:`outdir` or create one with :py:attr:`tmp_outdir_prefix`."""
if self.outdir:
return self.outdir
return self.create_outdir()
def get_tmpdir(self) -> str:
"""Return :py:attr:`tmpdir` or create one with :py:attr:`tmpdir_prefix`."""
if self.tmpdir:
return self.tmpdir
return self.create_tmpdir()
def get_stagedir(self) -> str:
"""Return :py:attr:`stagedir` or create one with :py:attr:`tmpdir_prefix`."""
if self.stagedir:
return self.stagedir
tmp_dir, tmp_prefix = os.path.split(self.tmpdir_prefix)
return tempfile.mkdtemp(prefix=tmp_prefix, dir=tmp_dir)
def create_tmpdir(self) -> str:
"""Create a temporary directory that respects :py:attr:`tmpdir_prefix`."""
tmp_dir, tmp_prefix = os.path.split(self.tmpdir_prefix)
return tempfile.mkdtemp(prefix=tmp_prefix, dir=tmp_dir)
def create_outdir(self) -> str:
"""Create a temporary directory that respects :py:attr:`tmp_outdir_prefix`."""
out_dir, out_prefix = os.path.split(self.tmp_outdir_prefix)
return tempfile.mkdtemp(prefix=out_prefix, dir=out_dir)
def copy(self) -> "RuntimeContext":
"""Return a copy of this :py:class:`RuntimeContext`."""
return copy.copy(self)
def getdefault(val: Any, default: Any) -> Any:
"""Return the ``val`` using the ``default`` as backup in case the val is ``None``."""
if val is None:
return default
else:
return val
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/cuda.py 0000644 0001750 0001750 00000004445 14621376047 017247 0 ustar 00michael michael """Support utilities for CUDA."""
import subprocess # nosec
import xml.dom.minidom # nosec
from typing import Tuple
from .loghandler import _logger
from .utils import CWLObjectType
def cuda_version_and_device_count() -> Tuple[str, int]:
"""Determine the CUDA version and number of attached CUDA GPUs."""
try:
out = subprocess.check_output(["nvidia-smi", "-q", "-x"]) # nosec
except Exception as e:
_logger.warning("Error checking CUDA version with nvidia-smi: %s", e)
return ("", 0)
dm = xml.dom.minidom.parseString(out) # nosec
ag = dm.getElementsByTagName("attached_gpus")
if len(ag) < 1 or ag[0].firstChild is None:
_logger.warning(
"Error checking CUDA version with nvidia-smi. Missing 'attached_gpus' or it is empty.: %s",
out,
)
return ("", 0)
ag_element = ag[0].firstChild
cv = dm.getElementsByTagName("cuda_version")
if len(cv) < 1 or cv[0].firstChild is None:
_logger.warning(
"Error checking CUDA version with nvidia-smi. Missing 'cuda_version' or it is empty.: %s",
out,
)
return ("", 0)
cv_element = cv[0].firstChild
if isinstance(cv_element, xml.dom.minidom.Text) and isinstance(
ag_element, xml.dom.minidom.Text
):
return (cv_element.data, int(ag_element.data))
_logger.warning(
"Error checking CUDA version with nvidia-smi. "
"Either 'attached_gpus' or 'cuda_version' was not a text node: %s",
out,
)
return ("", 0)
def cuda_check(cuda_req: CWLObjectType, requestCount: int) -> int:
try:
vmin = float(str(cuda_req["cudaVersionMin"]))
version, devices = cuda_version_and_device_count()
if version == "":
# nvidia-smi not detected, or failed some other way
return 0
versionf = float(version)
if versionf < vmin:
_logger.warning("CUDA version '%s' is less than minimum version '%s'", version, vmin)
return 0
if requestCount > devices:
_logger.warning("Requested %d GPU devices but only %d available", requestCount, devices)
return 0
return requestCount
except Exception as e:
_logger.warning("Error checking CUDA requirements: %s", e)
return 0
././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 010212 x ustar 00 28 mtime=1715863245.9690487
cwltool-3.1.20240508115724/cwltool/cwlprov/ 0000755 0001750 0001750 00000000000 14621377316 017446 5 ustar 00michael michael ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/cwlprov/__init__.py 0000644 0001750 0001750 00000012024 14621376047 021556 0 ustar 00michael michael """Stores Research Object including provenance."""
import hashlib
import os
import pwd
import re
import uuid
from getpass import getuser
from typing import IO, Any, Callable, Dict, List, Optional, Tuple, TypedDict, Union
def _whoami() -> Tuple[str, str]:
"""Return the current operating system account as (username, fullname)."""
username = getuser()
try:
fullname = pwd.getpwuid(os.getuid())[4].split(",")[0]
except (KeyError, IndexError):
fullname = username
return (username, fullname)
def _check_mod_11_2(numeric_string: str) -> bool:
"""
Validate numeric_string for its MOD-11-2 checksum.
Any "-" in the numeric_string are ignored.
The last digit of numeric_string is assumed to be the checksum, 0-9 or X.
See ISO/IEC 7064:2003 and
https://support.orcid.org/knowledgebase/articles/116780-structure-of-the-orcid-identifier
"""
# Strip -
nums = numeric_string.replace("-", "")
total = 0
# skip last (check)digit
for num in nums[:-1]:
digit = int(num)
total = (total + digit) * 2
remainder = total % 11
result = (12 - remainder) % 11
if result == 10:
checkdigit = "X"
else:
checkdigit = str(result)
# Compare against last digit or X
return nums[-1].upper() == checkdigit
def _valid_orcid(orcid: Optional[str]) -> str:
"""
Ensure orcid is a valid ORCID identifier.
The string must be equivalent to one of these forms:
0000-0002-1825-0097
orcid.org/0000-0002-1825-0097
http://orcid.org/0000-0002-1825-0097
https://orcid.org/0000-0002-1825-0097
If the ORCID number or prefix is invalid, a ValueError is raised.
The returned ORCID string is always in the form of:
https://orcid.org/0000-0002-1825-0097
"""
if orcid is None or not orcid:
raise ValueError("ORCID cannot be unspecified")
# Liberal in what we consume, e.g. ORCID.org/0000-0002-1825-009x
orcid = orcid.lower()
match = re.match(
# Note: concatenated r"" r"" below so we can add comments to pattern
# Optional hostname, with or without protocol
r"(http://orcid\.org/|https://orcid\.org/|orcid\.org/)?"
# alternative pattern, but probably messier
# r"^((https?://)?orcid.org/)?"
# ORCID number is always 4x4 numerical digits,
# but last digit (modulus 11 checksum)
# can also be X (but we made it lowercase above).
# e.g. 0000-0002-1825-0097
# or 0000-0002-1694-233x
r"(?P(\d{4}-\d{4}-\d{4}-\d{3}[0-9x]))$",
orcid,
)
help_url = (
"https://support.orcid.org/knowledgebase/articles/"
"116780-structure-of-the-orcid-identifier"
)
if not match:
raise ValueError(f"Invalid ORCID: {orcid}\n{help_url}")
# Conservative in what we produce:
# a) Ensure any checksum digit is uppercase
orcid_num = match.group("orcid").upper()
# b) ..and correct
if not _check_mod_11_2(orcid_num):
raise ValueError(f"Invalid ORCID checksum: {orcid_num}\n{help_url}")
# c) Re-add the official prefix https://orcid.org/
return "https://orcid.org/%s" % orcid_num
Annotation = TypedDict(
"Annotation",
{
"uri": str,
"about": str,
"content": Optional[Union[str, List[str]]],
"oa:motivatedBy": Dict[str, str],
},
)
class Aggregate(TypedDict, total=False):
"""RO Aggregate class."""
uri: Optional[str]
bundledAs: Optional[Dict[str, Any]]
mediatype: Optional[str]
conformsTo: Optional[Union[str, List[str]]]
createdOn: Optional[str]
createdBy: Optional[Dict[str, str]]
# Aggregate.bundledAs is actually type Aggregate, but cyclic definitions are not supported
class AuthoredBy(TypedDict, total=False):
"""RO AuthoredBy class."""
orcid: Optional[str]
name: Optional[str]
uri: Optional[str]
def checksum_copy(
src_file: IO[Any],
dst_file: Optional[IO[Any]] = None,
hasher: Optional[Callable[[], "hashlib._Hash"]] = None,
buffersize: int = 1024 * 1024,
) -> str:
"""Compute checksums while copying a file."""
# TODO: Use hashlib.new(Hasher_str) instead?
if hasher:
checksum = hasher()
else:
from .provenance_constants import Hasher
checksum = Hasher()
contents = src_file.read(buffersize)
if dst_file and hasattr(dst_file, "name") and hasattr(src_file, "name"):
temp_location = os.path.join(os.path.dirname(dst_file.name), str(uuid.uuid4()))
try:
os.rename(dst_file.name, temp_location)
os.link(src_file.name, dst_file.name)
dst_file = None
os.unlink(temp_location)
except OSError:
pass
if os.path.exists(temp_location):
os.rename(temp_location, dst_file.name) # type: ignore
while contents != b"":
if dst_file is not None:
dst_file.write(contents)
checksum.update(contents)
contents = src_file.read(buffersize)
if dst_file is not None:
dst_file.flush()
return checksum.hexdigest().lower()
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/cwlprov/provenance_constants.py 0000644 0001750 0001750 00000003537 14621376047 024264 0 ustar 00michael michael import hashlib
import os
import uuid
from prov.identifier import Namespace
__citation__ = "https://doi.org/10.5281/zenodo.1208477"
# NOTE: Semantic versioning of the CWLProv Research Object
# **and** the cwlprov files
#
# Rough guide (major.minor.patch):
# 1. Bump major number if removing/"breaking" resources or PROV statements
# 2. Bump minor number if adding resources or PROV statements
# 3. Bump patch number for non-breaking non-adding changes,
# e.g. fixing broken relative paths
CWLPROV_VERSION = "https://w3id.org/cwl/prov/0.6.0"
# Research Object folders
METADATA = "metadata"
DATA = "data"
WORKFLOW = "workflow"
SNAPSHOT = "snapshot"
# sub-folders
MAIN = os.path.join(WORKFLOW, "main")
PROVENANCE = os.path.join(METADATA, "provenance")
LOGS = os.path.join(METADATA, "logs")
WFDESC = Namespace("wfdesc", "http://purl.org/wf4ever/wfdesc#")
WFPROV = Namespace("wfprov", "http://purl.org/wf4ever/wfprov#")
WF4EVER = Namespace("wf4ever", "http://purl.org/wf4ever/wf4ever#")
RO = Namespace("ro", "http://purl.org/wf4ever/ro#")
ORE = Namespace("ore", "http://www.openarchives.org/ore/terms/")
FOAF = Namespace("foaf", "http://xmlns.com/foaf/0.1/")
SCHEMA = Namespace("schema", "http://schema.org/")
CWLPROV = Namespace("cwlprov", "https://w3id.org/cwl/prov#")
ORCID = Namespace("orcid", "https://orcid.org/")
UUID = Namespace("id", "urn:uuid:")
# BagIt and YAML always use UTF-8
ENCODING = "UTF-8"
TEXT_PLAIN = f"text/plain; charset={ENCODING!r}"
# sha1, compatible with the File type's "checksum" field
# e.g. "checksum" = "sha1$47a013e660d408619d894b20806b1d5086aab03b"
# See ./cwltool/schemas/v1.0/Process.yml
Hasher = hashlib.sha1
SHA1 = "sha1"
SHA256 = "sha256"
SHA512 = "sha512"
# TODO: Better identifiers for user, at least
# these should be preserved in ~/.config/cwl for every execution
# on this host
USER_UUID = uuid.uuid4().urn
ACCOUNT_UUID = uuid.uuid4().urn
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/cwlprov/provenance_profile.py 0000644 0001750 0001750 00000100265 14621376047 023704 0 ustar 00michael michael import copy
import datetime
import logging
import urllib
import uuid
from io import BytesIO
from pathlib import PurePath, PurePosixPath
from socket import getfqdn
from typing import (
TYPE_CHECKING,
Any,
Dict,
List,
MutableMapping,
MutableSequence,
Optional,
Sequence,
Tuple,
Union,
cast,
)
from prov.identifier import Identifier, QualifiedName
from prov.model import PROV, PROV_LABEL, PROV_TYPE, PROV_VALUE, ProvDocument, ProvEntity
from schema_salad.sourceline import SourceLine
from ..errors import WorkflowException
from ..job import CommandLineJob, JobBase
from ..loghandler import _logger
from ..process import Process, shortname
from ..stdfsaccess import StdFsAccess
from ..utils import CWLObjectType, JobsType, get_listing, posix_path, versionstring
from ..workflow_job import WorkflowJob
from .provenance_constants import (
ACCOUNT_UUID,
CWLPROV,
ENCODING,
FOAF,
METADATA,
ORE,
PROVENANCE,
RO,
SCHEMA,
SHA1,
SHA256,
TEXT_PLAIN,
UUID,
WF4EVER,
WFDESC,
WFPROV,
)
from .writablebagfile import create_job, write_bag_file # change this later
if TYPE_CHECKING:
from .ro import ResearchObject
def copy_job_order(job: Union[Process, JobsType], job_order_object: CWLObjectType) -> CWLObjectType:
"""Create copy of job object for provenance."""
if not isinstance(job, WorkflowJob):
# direct command line tool execution
return job_order_object
customised_job: CWLObjectType = {}
# new job object for RO
debug = _logger.isEnabledFor(logging.DEBUG)
for each, i in enumerate(job.tool["inputs"]):
with SourceLine(job.tool["inputs"], each, WorkflowException, debug):
iid = shortname(i["id"])
if iid in job_order_object:
customised_job[iid] = copy.deepcopy(job_order_object[iid])
# add the input element in dictionary for provenance
elif "default" in i:
customised_job[iid] = copy.deepcopy(i["default"])
# add the default elements in the dictionary for provenance
else:
pass
return customised_job
class ProvenanceProfile:
"""
Provenance profile.
Populated as the workflow runs.
"""
def __init__(
self,
research_object: "ResearchObject",
full_name: str,
host_provenance: bool,
user_provenance: bool,
orcid: str,
fsaccess: StdFsAccess,
run_uuid: Optional[uuid.UUID] = None,
) -> None:
"""Initialize the provenance profile."""
self.fsaccess = fsaccess
self.orcid = orcid
self.research_object = research_object
self.folder = self.research_object.folder
self.document = ProvDocument()
self.host_provenance = host_provenance
self.user_provenance = user_provenance
self.engine_uuid = research_object.engine_uuid
self.add_to_manifest = self.research_object.add_to_manifest
if self.orcid:
_logger.debug("[provenance] Creator ORCID: %s", self.orcid)
self.full_name = full_name
if self.full_name:
_logger.debug("[provenance] Creator Full name: %s", self.full_name)
self.workflow_run_uuid = run_uuid or uuid.uuid4()
self.workflow_run_uri = self.workflow_run_uuid.urn
self.generate_prov_doc()
def __str__(self) -> str:
"""Represent this Provenvance profile as a string."""
return f"ProvenanceProfile <{self.workflow_run_uri}> in <{self.research_object}>"
def generate_prov_doc(self) -> Tuple[str, ProvDocument]:
"""Add basic namespaces."""
def host_provenance(document: ProvDocument) -> None:
"""Record host provenance."""
document.add_namespace(CWLPROV)
document.add_namespace(UUID)
document.add_namespace(FOAF)
hostname = getfqdn()
# won't have a foaf:accountServiceHomepage for unix hosts, but
# we can at least provide hostname
document.agent(
ACCOUNT_UUID,
{
PROV_TYPE: FOAF["OnlineAccount"],
"prov:location": hostname,
CWLPROV["hostname"]: hostname,
},
)
self.cwltool_version = f"cwltool {versionstring().split()[-1]}"
self.document.add_namespace("wfprov", "http://purl.org/wf4ever/wfprov#")
# document.add_namespace('prov', 'http://www.w3.org/ns/prov#')
self.document.add_namespace("wfdesc", "http://purl.org/wf4ever/wfdesc#")
# TODO: Make this ontology. For now only has cwlprov:image
self.document.add_namespace("cwlprov", "https://w3id.org/cwl/prov#")
self.document.add_namespace("foaf", "http://xmlns.com/foaf/0.1/")
self.document.add_namespace("schema", "http://schema.org/")
self.document.add_namespace("orcid", "https://orcid.org/")
self.document.add_namespace("id", "urn:uuid:")
# NOTE: Internet draft expired 2004-03-04 (!)
# https://tools.ietf.org/html/draft-thiemann-hash-urn-01
# TODO: Change to nih:sha-256; hashes
# https://tools.ietf.org/html/rfc6920#section-7
self.document.add_namespace("data", "urn:hash::sha1:")
# Also needed for docker images
self.document.add_namespace(SHA256, "nih:sha-256;")
# info only, won't really be used by prov as sub-resources use /
self.document.add_namespace("researchobject", self.research_object.base_uri)
# annotations
self.metadata_ns = self.document.add_namespace(
"metadata", self.research_object.base_uri + METADATA + "/"
)
# Pre-register provenance directory so we can refer to its files
self.provenance_ns = self.document.add_namespace(
"provenance", self.research_object.base_uri + posix_path(PROVENANCE) + "/"
)
ro_identifier_workflow = self.research_object.base_uri + "workflow/packed.cwl#"
self.wf_ns = self.document.add_namespace("wf", ro_identifier_workflow)
ro_identifier_input = self.research_object.base_uri + "workflow/primary-job.json#"
self.document.add_namespace("input", ro_identifier_input)
# More info about the account (e.g. username, fullname)
# may or may not have been previously logged by user_provenance()
# .. but we always know cwltool was launched (directly or indirectly)
# by a user account, as cwltool is a command line tool
account = self.document.agent(ACCOUNT_UUID)
if self.orcid or self.full_name:
person: Dict[Union[str, Identifier], Any] = {
PROV_TYPE: PROV["Person"],
"prov:type": SCHEMA["Person"],
}
if self.full_name:
person["prov:label"] = self.full_name
person["foaf:name"] = self.full_name
person["schema:name"] = self.full_name
else:
# TODO: Look up name from ORCID API?
pass
agent = self.document.agent(self.orcid or uuid.uuid4().urn, person)
self.document.actedOnBehalfOf(account, agent)
else:
if self.host_provenance:
host_provenance(self.document)
if self.user_provenance:
self.research_object.user_provenance(self.document)
# The execution of cwltool
wfengine = self.document.agent(
self.engine_uuid,
{
PROV_TYPE: PROV["SoftwareAgent"],
"prov:type": WFPROV["WorkflowEngine"],
"prov:label": self.cwltool_version,
},
)
# FIXME: This datetime will be a bit too delayed, we should
# capture when cwltool.py earliest started?
self.document.wasStartedBy(wfengine, None, account, datetime.datetime.now())
# define workflow run level activity
self.document.activity(
self.workflow_run_uri,
datetime.datetime.now(),
None,
{
PROV_TYPE: WFPROV["WorkflowRun"],
"prov:label": "Run of workflow/packed.cwl#main",
},
)
# association between SoftwareAgent and WorkflowRun
main_workflow = "wf:main"
self.document.wasAssociatedWith(self.workflow_run_uri, self.engine_uuid, main_workflow)
self.document.wasStartedBy(
self.workflow_run_uri, None, self.engine_uuid, datetime.datetime.now()
)
return (self.workflow_run_uri, self.document)
def evaluate(
self,
process: Process,
job: JobsType,
job_order_object: CWLObjectType,
research_obj: "ResearchObject",
) -> None:
"""Evaluate the nature of job."""
if not hasattr(process, "steps"):
# record provenance of independent commandline tool executions
self.prospective_prov(job)
customised_job = copy_job_order(job, job_order_object)
self.used_artefacts(customised_job, self.workflow_run_uri)
create_job(research_obj, customised_job)
elif hasattr(job, "workflow"):
# record provenance of workflow executions
self.prospective_prov(job)
customised_job = copy_job_order(job, job_order_object)
self.used_artefacts(customised_job, self.workflow_run_uri)
def record_process_start(
self, process: Process, job: JobsType, process_run_id: Optional[str] = None
) -> Optional[str]:
if not hasattr(process, "steps"):
process_run_id = self.workflow_run_uri
elif not hasattr(job, "workflow"):
# commandline tool execution as part of workflow
name = ""
if isinstance(job, (CommandLineJob, JobBase, WorkflowJob)):
name = job.name
process_name = urllib.parse.quote(name, safe=":/,#")
process_run_id = self.start_process(process_name, datetime.datetime.now())
return process_run_id
def start_process(
self,
process_name: str,
when: datetime.datetime,
process_run_id: Optional[str] = None,
) -> str:
"""Record the start of each Process."""
if process_run_id is None:
process_run_id = uuid.uuid4().urn
prov_label = "Run of workflow/packed.cwl#main/" + process_name
self.document.activity(
process_run_id,
None,
None,
{PROV_TYPE: WFPROV["ProcessRun"], PROV_LABEL: prov_label},
)
self.document.wasAssociatedWith(
process_run_id, self.engine_uuid, str("wf:main/" + process_name)
)
self.document.wasStartedBy(process_run_id, None, self.workflow_run_uri, when, None, None)
return process_run_id
def record_process_end(
self,
process_name: str,
process_run_id: str,
outputs: Union[CWLObjectType, MutableSequence[CWLObjectType], None],
when: datetime.datetime,
) -> None:
self.generate_output_prov(outputs, process_run_id, process_name)
self.document.wasEndedBy(process_run_id, None, self.workflow_run_uri, when)
def declare_file(self, value: CWLObjectType) -> Tuple[ProvEntity, ProvEntity, str]:
if value["class"] != "File":
raise ValueError("Must have class:File: %s" % value)
# Need to determine file hash aka RO filename
entity: Optional[ProvEntity] = None
checksum = None
if "checksum" in value:
csum = cast(str, value["checksum"])
(method, checksum) = csum.split("$", 1)
if method == SHA1 and self.research_object.has_data_file(checksum):
entity = self.document.entity("data:" + checksum)
if not entity and "location" in value:
location = str(value["location"])
# If we made it here, we'll have to add it to the RO
with self.fsaccess.open(location, "rb") as fhandle:
relative_path = self.research_object.add_data_file(fhandle)
# FIXME: This naively relies on add_data_file setting hash as filename
checksum = PurePath(relative_path).name
entity = self.document.entity("data:" + checksum, {PROV_TYPE: WFPROV["Artifact"]})
if "checksum" not in value:
value["checksum"] = f"{SHA1}${checksum}"
if not entity and "contents" in value:
# Anonymous file, add content as string
entity, checksum = self.declare_string(cast(str, value["contents"]))
# By here one of them should have worked!
if not entity or not checksum:
raise ValueError("class:File but missing checksum/location/content: %r" % value)
# Track filename and extension, this is generally useful only for
# secondaryFiles. Note that multiple uses of a file might thus record
# different names for the same entity, so we'll
# make/track a specialized entity by UUID
file_id = cast(str, value.setdefault("@id", uuid.uuid4().urn))
# A specialized entity that has just these names
file_entity = self.document.entity(
file_id,
[(PROV_TYPE, WFPROV["Artifact"]), (PROV_TYPE, WF4EVER["File"])],
)
if "basename" in value:
file_entity.add_attributes({CWLPROV["basename"]: cast(str, value["basename"])})
if "nameroot" in value:
file_entity.add_attributes({CWLPROV["nameroot"]: cast(str, value["nameroot"])})
if "nameext" in value:
file_entity.add_attributes({CWLPROV["nameext"]: cast(str, value["nameext"])})
self.document.specializationOf(file_entity, entity)
# Check for secondaries
for sec in cast(MutableSequence[CWLObjectType], value.get("secondaryFiles", [])):
# TODO: Record these in a specializationOf entity with UUID?
if sec["class"] == "File":
(sec_entity, _, _) = self.declare_file(sec)
elif sec["class"] == "Directory":
sec_entity = self.declare_directory(sec)
else:
raise ValueError(f"Got unexpected secondaryFiles value: {sec}")
# We don't know how/when/where the secondary file was generated,
# but CWL convention is a kind of summary/index derived
# from the original file. As its generally in a different format
# then prov:Quotation is not appropriate.
self.document.derivation(
sec_entity,
file_entity,
other_attributes={PROV["type"]: CWLPROV["SecondaryFile"]},
)
return file_entity, entity, checksum
def declare_directory(self, value: CWLObjectType) -> ProvEntity:
"""Register any nested files/directories."""
# FIXME: Calculate a hash-like identifier for directory
# so we get same value if it's the same filenames/hashes
# in a different location.
# For now, mint a new UUID to identify this directory, but
# attempt to keep it inside the value dictionary
dir_id = cast(str, value.setdefault("@id", uuid.uuid4().urn))
# New annotation file to keep the ORE Folder listing
ore_doc_fn = dir_id.replace("urn:uuid:", "directory-") + ".ttl"
dir_bundle = self.document.bundle(self.metadata_ns[ore_doc_fn])
coll = self.document.entity(
dir_id,
[
(PROV_TYPE, WFPROV["Artifact"]),
(PROV_TYPE, PROV["Collection"]),
(PROV_TYPE, PROV["Dictionary"]),
(PROV_TYPE, RO["Folder"]),
],
)
if "basename" in value:
coll.add_attributes({CWLPROV["basename"]: cast(str, value["basename"])})
# ORE description of ro:Folder, saved separately
coll_b = dir_bundle.entity(
dir_id,
[(PROV_TYPE, RO["Folder"]), (PROV_TYPE, ORE["Aggregation"])],
)
self.document.mentionOf(dir_id + "#ore", dir_id, dir_bundle.identifier)
# dir_manifest = dir_bundle.entity(
# dir_bundle.identifier, {PROV["type"]: ORE["ResourceMap"],
# ORE["describes"]: coll_b.identifier})
coll_attribs: List[Tuple[Union[str, Identifier], Any]] = [
(ORE["isDescribedBy"], dir_bundle.identifier)
]
coll_b_attribs: List[Tuple[Union[str, Identifier], Any]] = []
# FIXME: .listing might not be populated yet - hopefully
# a later call to this method will sort that
is_empty = True
if "listing" not in value:
get_listing(self.fsaccess, value)
for entry in cast(MutableSequence[CWLObjectType], value.get("listing", [])):
is_empty = False
# Declare child-artifacts
entity = self.declare_artefact(entry)
self.document.membership(coll, entity)
# Membership relation aka our ORE Proxy
m_id = uuid.uuid4().urn
m_entity = self.document.entity(m_id)
m_b = dir_bundle.entity(m_id)
# PROV-O style Dictionary
# https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
# ..as prov.py do not currently allow PROV-N extensions
# like hadDictionaryMember(..)
m_entity.add_asserted_type(PROV["KeyEntityPair"])
m_entity.add_attributes(
{
PROV["pairKey"]: cast(str, entry["basename"]),
PROV["pairEntity"]: entity,
}
)
# As well as a being a
# http://wf4ever.github.io/ro/2016-01-28/ro/#FolderEntry
m_b.add_asserted_type(RO["FolderEntry"])
m_b.add_asserted_type(ORE["Proxy"])
m_b.add_attributes(
{
RO["entryName"]: cast(str, entry["basename"]),
ORE["proxyIn"]: coll,
ORE["proxyFor"]: entity,
}
)
coll_attribs.append((PROV["hadDictionaryMember"], m_entity))
coll_b_attribs.append((ORE["aggregates"], m_b))
coll.add_attributes(coll_attribs)
coll_b.add_attributes(coll_b_attribs)
# Also Save ORE Folder as annotation metadata
ore_doc = ProvDocument()
ore_doc.add_namespace(ORE)
ore_doc.add_namespace(RO)
ore_doc.add_namespace(UUID)
ore_doc.add_bundle(dir_bundle)
ore_doc = ore_doc.flattened()
ore_doc_path = str(PurePosixPath(METADATA, ore_doc_fn))
with write_bag_file(self.research_object, ore_doc_path) as provenance_file:
ore_doc.serialize(provenance_file, format="rdf", rdf_format="turtle")
self.research_object.add_annotation(dir_id, [ore_doc_fn], ORE["isDescribedBy"].uri)
if is_empty:
# Empty directory
coll.add_asserted_type(PROV["EmptyCollection"])
coll.add_asserted_type(PROV["EmptyDictionary"])
self.research_object.add_uri(coll.identifier.uri)
return coll
def declare_string(self, value: str) -> Tuple[ProvEntity, str]:
"""Save as string in UTF-8."""
byte_s = BytesIO(str(value).encode(ENCODING))
data_file = self.research_object.add_data_file(byte_s, content_type=TEXT_PLAIN)
checksum = PurePosixPath(data_file).name
# FIXME: Don't naively assume add_data_file uses hash in filename!
data_id = f"data:{PurePosixPath(data_file).stem}"
entity = self.document.entity(
data_id, {PROV_TYPE: WFPROV["Artifact"], PROV_VALUE: str(value)}
)
return entity, checksum
def declare_artefact(self, value: Any) -> ProvEntity:
"""Create data artefact entities for all file objects."""
if value is None:
# FIXME: If this can happen in CWL, we'll
# need a better way to represent this in PROV
return self.document.entity(CWLPROV["None"], {PROV_LABEL: "None"})
if isinstance(value, (bool, int, float)):
# Typically used in job documents for flags
# FIXME: Make consistent hash URIs for these
# that somehow include the type
# (so "1" != 1 != "1.0" != true)
entity = self.document.entity(uuid.uuid4().urn, {PROV_VALUE: value})
self.research_object.add_uri(entity.identifier.uri)
return entity
if isinstance(value, str):
(entity, _) = self.declare_string(value)
return entity
if isinstance(value, bytes):
# If we got here then we must be in Python 3
byte_s = BytesIO(value)
data_file = self.research_object.add_data_file(byte_s)
# FIXME: Don't naively assume add_data_file uses hash in filename!
data_id = f"data:{PurePosixPath(data_file).stem}"
return self.document.entity(
data_id,
{PROV_TYPE: WFPROV["Artifact"], PROV_VALUE: str(value)},
)
if isinstance(value, MutableMapping):
if "@id" in value:
# Already processed this value, but it might not be in this PROV
entities = self.document.get_record(value["@id"])
if entities:
return cast(List[ProvEntity], entities)[0]
# else, unknown in PROV, re-add below as if it's fresh
# Base case - we found a File we need to update
if value.get("class") == "File":
(entity, _, _) = self.declare_file(value)
value["@id"] = entity.identifier.uri
return entity
if value.get("class") == "Directory":
entity = self.declare_directory(value)
value["@id"] = entity.identifier.uri
return entity
coll_id = value.setdefault("@id", uuid.uuid4().urn)
# some other kind of dictionary?
# TODO: also Save as JSON
coll = self.document.entity(
coll_id,
[
(PROV_TYPE, WFPROV["Artifact"]),
(PROV_TYPE, PROV["Collection"]),
(PROV_TYPE, PROV["Dictionary"]),
],
)
if value.get("class"):
_logger.warning("Unknown data class %s.", value["class"])
# FIXME: The class might be "http://example.com/somethingelse"
coll.add_asserted_type(CWLPROV[value["class"]])
# Let's iterate and recurse
coll_attribs: List[Tuple[Union[str, Identifier], Any]] = []
for key, val in value.items():
v_ent = self.declare_artefact(val)
self.document.membership(coll, v_ent)
m_entity = self.document.entity(uuid.uuid4().urn)
# Note: only support PROV-O style dictionary
# https://www.w3.org/TR/prov-dictionary/#dictionary-ontological-definition
# as prov.py do not easily allow PROV-N extensions
m_entity.add_asserted_type(PROV["KeyEntityPair"])
m_entity.add_attributes({PROV["pairKey"]: str(key), PROV["pairEntity"]: v_ent})
coll_attribs.append((PROV["hadDictionaryMember"], m_entity))
coll.add_attributes(coll_attribs)
self.research_object.add_uri(coll.identifier.uri)
return coll
# some other kind of Collection?
# TODO: also save as JSON
try:
members = []
for each_input_obj in iter(value):
# Recurse and register any nested objects
e = self.declare_artefact(each_input_obj)
members.append(e)
# If we reached this, then we were allowed to iterate
coll = self.document.entity(
uuid.uuid4().urn,
[
(PROV_TYPE, WFPROV["Artifact"]),
(PROV_TYPE, PROV["Collection"]),
],
)
if not members:
coll.add_asserted_type(PROV["EmptyCollection"])
else:
for member in members:
# FIXME: This won't preserve order, for that
# we would need to use PROV.Dictionary
# with numeric keys
self.document.membership(coll, member)
self.research_object.add_uri(coll.identifier.uri)
# FIXME: list value does not support adding "@id"
return coll
except TypeError:
_logger.warning("Unrecognized type %s of %r", type(value), value)
# Let's just fall back to Python repr()
entity = self.document.entity(uuid.uuid4().urn, {PROV_LABEL: repr(value)})
self.research_object.add_uri(entity.identifier.uri)
return entity
def used_artefacts(
self,
job_order: Union[CWLObjectType, List[CWLObjectType]],
process_run_id: str,
name: Optional[str] = None,
) -> None:
"""Add used() for each data artefact."""
if isinstance(job_order, list):
for entry in job_order:
self.used_artefacts(entry, process_run_id, name)
else:
# FIXME: Use workflow name in packed.cwl, "main" is wrong for nested workflows
base = "main"
if name is not None:
base += "/" + name
for key, value in job_order.items():
prov_role = self.wf_ns[f"{base}/{key}"]
try:
entity = self.declare_artefact(value)
self.document.used(
process_run_id,
entity,
datetime.datetime.now(),
None,
{"prov:role": prov_role},
)
except OSError:
pass
def generate_output_prov(
self,
final_output: Union[CWLObjectType, MutableSequence[CWLObjectType], None],
process_run_id: Optional[str],
name: Optional[str],
) -> None:
"""Call wasGeneratedBy() for each output,copy the files into the RO."""
if isinstance(final_output, MutableSequence):
for entry in final_output:
self.generate_output_prov(entry, process_run_id, name)
elif final_output is not None:
# Timestamp should be created at the earliest
timestamp = datetime.datetime.now()
# For each output, find/register the corresponding
# entity (UUID) and document it as generated in
# a role corresponding to the output
for output, value in final_output.items():
entity = self.declare_artefact(value)
if name is not None:
name = urllib.parse.quote(str(name), safe=":/,#")
# FIXME: Probably not "main" in nested workflows
role = self.wf_ns[f"main/{name}/{output}"]
else:
role = self.wf_ns[f"main/{output}"]
if not process_run_id:
process_run_id = self.workflow_run_uri
self.document.wasGeneratedBy(
entity, process_run_id, timestamp, None, {"prov:role": role}
)
def prospective_prov(self, job: JobsType) -> None:
"""Create prospective prov recording as wfdesc prov:Plan."""
if not isinstance(job, WorkflowJob):
# direct command line tool execution
self.document.entity(
"wf:main",
{
PROV_TYPE: WFDESC["Process"],
"prov:type": PROV["Plan"],
"prov:label": "Prospective provenance",
},
)
return
self.document.entity(
"wf:main",
{
PROV_TYPE: WFDESC["Workflow"],
"prov:type": PROV["Plan"],
"prov:label": "Prospective provenance",
},
)
for step in job.steps:
stepnametemp = "wf:main/" + str(step.name)[5:]
stepname = urllib.parse.quote(stepnametemp, safe=":/,#")
provstep = self.document.entity(
stepname,
{PROV_TYPE: WFDESC["Process"], "prov:type": PROV["Plan"]},
)
self.document.entity(
"wf:main",
{
"wfdesc:hasSubProcess": provstep,
"prov:label": "Prospective provenance",
},
)
# TODO: Declare roles/parameters as well
def activity_has_provenance(self, activity: str, prov_ids: Sequence[Identifier]) -> None:
"""Add http://www.w3.org/TR/prov-aq/ relations to nested PROV files."""
# NOTE: The below will only work if the corresponding metadata/provenance arcp URI
# is a pre-registered namespace in the PROV Document
attribs: List[Tuple[Union[str, Identifier], Any]] = [
(PROV["has_provenance"], prov_id) for prov_id in prov_ids
]
self.document.activity(activity, other_attributes=attribs)
# Tip: we can't use https://www.w3.org/TR/prov-links/#term-mention
# as prov:mentionOf() is only for entities, not activities
uris = [i.uri for i in prov_ids]
self.research_object.add_annotation(activity, uris, PROV["has_provenance"].uri)
def finalize_prov_profile(self, name: Optional[str]) -> List[QualifiedName]:
"""Transfer the provenance related files to the RO."""
# NOTE: Relative posix path
if name is None:
# main workflow, fixed filenames
filename = "primary.cwlprov"
else:
# ASCII-friendly filename, avoiding % as we don't want %2520 in manifest.json
wf_name = urllib.parse.quote(str(name), safe="").replace("%", "_")
# Note that the above could cause overlaps for similarly named
# workflows, but that's OK as we'll also include run uuid
# which also covers thhe case of this step being run in
# multiple places or iterations
filename = f"{wf_name}.{self.workflow_run_uuid}.cwlprov"
basename = str(PurePosixPath(PROVENANCE) / filename)
# TODO: Also support other profiles than CWLProv, e.g. ProvOne
# list of prov identifiers of provenance files
prov_ids = []
# https://www.w3.org/TR/prov-xml/
with write_bag_file(self.research_object, basename + ".xml") as provenance_file:
self.document.serialize(provenance_file, format="xml", indent=4)
prov_ids.append(self.provenance_ns[filename + ".xml"])
# https://www.w3.org/TR/prov-n/
with write_bag_file(self.research_object, basename + ".provn") as provenance_file:
self.document.serialize(provenance_file, format="provn", indent=2)
prov_ids.append(self.provenance_ns[filename + ".provn"])
# https://www.w3.org/Submission/prov-json/
with write_bag_file(self.research_object, basename + ".json") as provenance_file:
self.document.serialize(provenance_file, format="json", indent=2)
prov_ids.append(self.provenance_ns[filename + ".json"])
# "rdf" aka https://www.w3.org/TR/prov-o/
# which can be serialized to ttl/nt/jsonld (and more!)
# https://www.w3.org/TR/turtle/
with write_bag_file(self.research_object, basename + ".ttl") as provenance_file:
self.document.serialize(provenance_file, format="rdf", rdf_format="turtle")
prov_ids.append(self.provenance_ns[filename + ".ttl"])
# https://www.w3.org/TR/n-triples/
with write_bag_file(self.research_object, basename + ".nt") as provenance_file:
self.document.serialize(provenance_file, format="rdf", rdf_format="ntriples")
prov_ids.append(self.provenance_ns[filename + ".nt"])
# https://www.w3.org/TR/json-ld/
# TODO: Use a nice JSON-LD context
# see also https://eprints.soton.ac.uk/395985/
# 404 Not Found on https://provenance.ecs.soton.ac.uk/prov.jsonld :(
with write_bag_file(self.research_object, basename + ".jsonld") as provenance_file:
self.document.serialize(provenance_file, format="rdf", rdf_format="json-ld")
prov_ids.append(self.provenance_ns[filename + ".jsonld"])
_logger.debug("[provenance] added provenance: %s", prov_ids)
return prov_ids
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/cwlprov/ro.py 0000644 0001750 0001750 00000056446 14621376047 020457 0 ustar 00michael michael """Stores class definition of ResearchObject and WritableBagFile."""
import datetime
import hashlib
import os
import shutil
import tempfile
import urllib
import uuid
from pathlib import Path, PurePosixPath
from typing import (
IO,
Any,
Dict,
List,
MutableMapping,
MutableSequence,
Optional,
Set,
Tuple,
Union,
cast,
)
import prov.model as provM
from prov.model import PROV, ProvDocument
from ..loghandler import _logger
from ..stdfsaccess import StdFsAccess
from ..utils import (
CWLObjectType,
CWLOutputType,
create_tmp_dir,
local_path,
posix_path,
versionstring,
)
from . import Aggregate, Annotation, AuthoredBy, _valid_orcid, _whoami, checksum_copy
from .provenance_constants import (
ACCOUNT_UUID,
CWLPROV_VERSION,
DATA,
ENCODING,
FOAF,
LOGS,
METADATA,
ORCID,
PROVENANCE,
SHA1,
SHA256,
SHA512,
SNAPSHOT,
TEXT_PLAIN,
USER_UUID,
UUID,
WORKFLOW,
Hasher,
)
class ResearchObject:
"""CWLProv Research Object."""
def __init__(
self,
fsaccess: StdFsAccess,
temp_prefix_ro: str = "tmp",
orcid: str = "",
full_name: str = "",
) -> None:
"""Initialize the ResearchObject."""
self.temp_prefix = temp_prefix_ro
self.orcid = "" if not orcid else _valid_orcid(orcid)
self.full_name = full_name
self.folder = create_tmp_dir(temp_prefix_ro)
self.closed = False
# map of filename "data/de/alsdklkas": 12398123 bytes
self.bagged_size: Dict[str, int] = {}
self.tagfiles: Set[str] = set()
self._file_provenance: Dict[str, Aggregate] = {}
self._external_aggregates: List[Aggregate] = []
self.annotations: List[Annotation] = []
self._content_types: Dict[str, str] = {}
self.fsaccess = fsaccess
# These should be replaced by generate_prov_doc when workflow/run IDs are known:
self.engine_uuid = f"urn:uuid:{uuid.uuid4()}"
self.ro_uuid = uuid.uuid4()
self.base_uri = f"arcp://uuid,{self.ro_uuid}/"
self.cwltool_version = f"cwltool {versionstring().split()[-1]}"
self.has_manifest = False
self.relativised_input_object: CWLObjectType = {}
self._initialize()
_logger.debug("[provenance] Temporary research object: %s", self.folder)
def self_check(self) -> None:
"""Raise ValueError if this RO is closed."""
if self.closed:
raise ValueError(
"This ResearchObject has already been closed and is not "
"available for further manipulation."
)
def __str__(self) -> str:
"""Represent this RO as a string."""
return f"ResearchObject <{self.ro_uuid}> in <{self.folder}>"
def _initialize(self) -> None:
for research_obj_folder in (
METADATA,
DATA,
WORKFLOW,
SNAPSHOT,
PROVENANCE,
LOGS,
):
os.makedirs(os.path.join(self.folder, research_obj_folder))
self._initialize_bagit()
def _initialize_bagit(self) -> None:
"""Write fixed bagit header."""
self.self_check()
bagit = os.path.join(self.folder, "bagit.txt")
# encoding: always UTF-8 (although ASCII would suffice here)
with open(bagit, "w", encoding=ENCODING, newline="\n") as bag_it_file:
# TODO: \n or \r\n ?
bag_it_file.write("BagIt-Version: 0.97\n")
bag_it_file.write(f"Tag-File-Character-Encoding: {ENCODING}\n")
def user_provenance(self, document: ProvDocument) -> None:
"""Add the user provenance."""
self.self_check()
(username, fullname) = _whoami()
if not self.full_name:
self.full_name = fullname
document.add_namespace(UUID)
document.add_namespace(ORCID)
document.add_namespace(FOAF)
account = document.agent(
ACCOUNT_UUID,
{
provM.PROV_TYPE: FOAF["OnlineAccount"],
"prov:label": username,
FOAF["accountName"]: username,
},
)
user = document.agent(
self.orcid or USER_UUID,
{
provM.PROV_TYPE: PROV["Person"],
"prov:label": self.full_name,
FOAF["name"]: self.full_name,
FOAF["account"]: account,
},
)
# cwltool may be started on the shell (directly by user),
# by shell script (indirectly by user)
# or from a different program
# (which again is launched by any of the above)
#
# We can't tell in which way, but ultimately we're still
# acting in behalf of that user (even if we might
# get their name wrong!)
document.actedOnBehalfOf(account, user)
def add_tagfile(self, path: str, timestamp: Optional[datetime.datetime] = None) -> None:
"""Add tag files to our research object."""
self.self_check()
checksums = {}
# Read file to calculate its checksum
if os.path.isdir(path):
return
# FIXME: do the right thing for directories
with open(path, "rb") as tag_file:
# FIXME: Should have more efficient open_tagfile() that
# does all checksums in one go while writing through,
# adding checksums after closing.
# Below probably OK for now as metadata files
# are not too large..?
checksums[SHA1] = checksum_copy(tag_file, hasher=hashlib.sha1)
tag_file.seek(0)
checksums[SHA256] = checksum_copy(tag_file, hasher=hashlib.sha256)
tag_file.seek(0)
checksums[SHA512] = checksum_copy(tag_file, hasher=hashlib.sha512)
rel_path = posix_path(os.path.relpath(path, self.folder))
self.tagfiles.add(rel_path)
self.add_to_manifest(rel_path, checksums)
if timestamp is not None:
self._file_provenance[rel_path] = {
"createdOn": timestamp.isoformat(),
"uri": None,
"bundledAs": None,
"mediatype": None,
"conformsTo": None,
}
def _ro_aggregates(self) -> List[Aggregate]:
"""Gather dictionary of files to be added to the manifest."""
def guess_mediatype(
rel_path: str,
) -> Tuple[Optional[str], Optional[Union[str, List[str]]]]:
"""Return the mediatypes."""
media_types: Dict[Union[str, None], str] = {
# Adapted from
# https://w3id.org/bundle/2014-11-05/#media-types
"txt": TEXT_PLAIN,
"ttl": 'text/turtle; charset="UTF-8"',
"rdf": "application/rdf+xml",
"json": "application/json",
"jsonld": "application/ld+json",
"xml": "application/xml",
##
"cwl": 'text/x+yaml; charset="UTF-8"',
"provn": 'text/provenance-notation; charset="UTF-8"',
"nt": "application/n-triples",
}
conforms_to: Dict[Union[str, None], str] = {
"provn": "http://www.w3.org/TR/2013/REC-prov-n-20130430/",
"cwl": "https://w3id.org/cwl/",
}
prov_conforms_to: Dict[str, str] = {
"provn": "http://www.w3.org/TR/2013/REC-prov-n-20130430/",
"rdf": "http://www.w3.org/TR/2013/REC-prov-o-20130430/",
"ttl": "http://www.w3.org/TR/2013/REC-prov-o-20130430/",
"nt": "http://www.w3.org/TR/2013/REC-prov-o-20130430/",
"jsonld": "http://www.w3.org/TR/2013/REC-prov-o-20130430/",
"xml": "http://www.w3.org/TR/2013/NOTE-prov-xml-20130430/",
"json": "http://www.w3.org/Submission/2013/SUBM-prov-json-20130424/",
}
extension: Optional[str] = rel_path.rsplit(".", 1)[-1].lower()
if extension == rel_path:
# No ".", no extension
extension = None
mediatype: Optional[str] = media_types.get(extension, None)
conformsTo: Optional[Union[str, List[str]]] = conforms_to.get(extension, None)
# TODO: Open CWL file to read its declared "cwlVersion", e.g.
# cwlVersion = "v1.0"
if rel_path.startswith(posix_path(PROVENANCE)) and extension in prov_conforms_to:
if ".cwlprov" in rel_path:
# Our own!
conformsTo = [
prov_conforms_to[extension],
CWLPROV_VERSION,
]
else:
# Some other PROV
# TODO: Recognize ProvOne etc.
conformsTo = prov_conforms_to[extension]
return (mediatype, conformsTo)
aggregates: List[Aggregate] = []
for path in self.bagged_size.keys():
temp_path = PurePosixPath(path)
folder = temp_path.parent
filename = temp_path.name
# NOTE: Here we end up aggregating the abstract
# data items by their sha1 hash, so that it matches
# the entity() in the prov files.
# TODO: Change to nih:sha-256; hashes
# https://tools.ietf.org/html/rfc6920#section-7
aggregate_dict: Aggregate = {
"uri": "urn:hash::sha1:" + filename,
"bundledAs": {
# The arcp URI is suitable ORE proxy; local to this Research Object.
# (as long as we don't also aggregate it by relative path!)
"uri": self.base_uri + path,
# relate it to the data/ path
"folder": f"/{folder}/",
"filename": filename,
},
}
if path in self._file_provenance:
# Made by workflow run, merge captured provenance
bundledAs = aggregate_dict["bundledAs"]
if bundledAs:
bundledAs.update(self._file_provenance[path])
else:
aggregate_dict["bundledAs"] = cast(
Optional[Dict[str, Any]], self._file_provenance[path]
)
else:
# Probably made outside wf run, part of job object?
pass
if path in self._content_types:
aggregate_dict["mediatype"] = self._content_types[path]
aggregates.append(aggregate_dict)
for path in self.tagfiles:
if not (
path.startswith(METADATA) or path.startswith(WORKFLOW) or path.startswith(SNAPSHOT)
):
# probably a bagit file
continue
if path == str(PurePosixPath(METADATA) / "manifest.json"):
# Should not really be there yet! But anyway, we won't
# aggregate it.
continue
# These are local paths like metadata/provenance - but
# we need to relativize them for our current directory for
# as we are saved in metadata/manifest.json
mediatype, conformsTo = guess_mediatype(path)
rel_aggregates: Aggregate = {
"uri": str(Path(os.pardir) / path),
"mediatype": mediatype,
"conformsTo": conformsTo,
}
if path in self._file_provenance:
# Propagate file provenance (e.g. timestamp)
rel_aggregates.update(self._file_provenance[path])
elif not path.startswith(SNAPSHOT):
# make new timestamp?
(
rel_aggregates["createdOn"],
rel_aggregates["createdBy"],
) = self._self_made()
aggregates.append(rel_aggregates)
aggregates.extend(self._external_aggregates)
return aggregates
def add_uri(self, uri: str, timestamp: Optional[datetime.datetime] = None) -> Aggregate:
self.self_check()
aggr: Aggregate = {"uri": uri}
aggr["createdOn"], aggr["createdBy"] = self._self_made(timestamp=timestamp)
self._external_aggregates.append(aggr)
return aggr
def add_annotation(
self, about: str, content: List[str], motivated_by: str = "oa:describing"
) -> str:
"""Cheap URI relativize for current directory and /."""
self.self_check()
curr = self.base_uri + METADATA + "/"
content = [c.replace(curr, "").replace(self.base_uri, "../") for c in content]
uri = uuid.uuid4().urn
ann: Annotation = {
"uri": uri,
"about": about,
"content": content,
"oa:motivatedBy": {"@id": motivated_by},
}
self.annotations.append(ann)
return uri
def _ro_annotations(self) -> List[Annotation]:
annotations: List[Annotation] = []
annotations.append(
{
"uri": uuid.uuid4().urn,
"about": self.ro_uuid.urn,
"content": "/",
# https://www.w3.org/TR/annotation-vocab/#named-individuals
"oa:motivatedBy": {"@id": "oa:describing"},
}
)
# How was it run?
# FIXME: Only primary*
prov_files = [
str(PurePosixPath(p).relative_to(METADATA))
for p in self.tagfiles
if p.startswith(posix_path(PROVENANCE)) and "/primary." in p
]
annotations.append(
{
"uri": uuid.uuid4().urn,
"about": self.ro_uuid.urn,
"content": prov_files,
# Modulation of https://www.w3.org/TR/prov-aq/
"oa:motivatedBy": {"@id": "http://www.w3.org/ns/prov#has_provenance"},
}
)
# Where is the main workflow?
annotations.append(
{
"uri": uuid.uuid4().urn,
"about": str(PurePosixPath("..") / WORKFLOW / "packed.cwl"),
"content": None,
"oa:motivatedBy": {"@id": "oa:highlighting"},
}
)
annotations.append(
{
"uri": uuid.uuid4().urn,
"about": self.ro_uuid.urn,
"content": [
str(PurePosixPath("..") / WORKFLOW / "packed.cwl"),
str(PurePosixPath("..") / WORKFLOW / "primary-job.json"),
],
"oa:motivatedBy": {"@id": "oa:linking"},
}
)
# Add user-added annotations at end
annotations.extend(self.annotations)
return annotations
def _authored_by(self) -> Optional[AuthoredBy]:
authored_by: AuthoredBy = {}
if self.orcid:
authored_by["orcid"] = self.orcid
if self.full_name:
authored_by["name"] = self.full_name
if not self.orcid:
authored_by["uri"] = USER_UUID
if authored_by:
return authored_by
return None
def generate_snapshot(self, prov_dep: CWLObjectType) -> None:
"""Copy all of the CWL files to the snapshot/ directory."""
self.self_check()
for key, value in prov_dep.items():
if key == "location" and cast(str, value).split("/")[-1]:
location = urllib.parse.unquote(cast(str, value))
filename = location.split("/")[-1]
path = os.path.join(self.folder, SNAPSHOT, filename)
filepath = ""
if "file://" in location:
filepath = location[7:]
else:
filepath = location
# FIXME: What if destination path already exists?
if os.path.exists(filepath):
try:
if os.path.isdir(filepath):
shutil.copytree(filepath, path)
else:
shutil.copy(filepath, path)
timestamp = datetime.datetime.fromtimestamp(os.path.getmtime(filepath))
self.add_tagfile(path, timestamp)
except PermissionError:
pass # FIXME: avoids duplicate snapshotting; need better solution
elif key in ("secondaryFiles", "listing"):
for files in cast(MutableSequence[CWLObjectType], value):
if isinstance(files, MutableMapping):
self.generate_snapshot(files)
else:
pass
def has_data_file(self, sha1hash: str) -> bool:
"""Confirm the presence of the given file in the RO."""
folder = os.path.join(self.folder, DATA, sha1hash[0:2])
hash_path = os.path.join(folder, sha1hash)
return os.path.isfile(hash_path)
def add_data_file(
self,
from_fp: IO[Any],
timestamp: Optional[datetime.datetime] = None,
content_type: Optional[str] = None,
) -> str:
"""Copy inputs to data/ folder."""
self.self_check()
tmp_dir, tmp_prefix = os.path.split(self.temp_prefix)
with tempfile.NamedTemporaryFile(prefix=tmp_prefix, dir=tmp_dir, delete=False) as tmp:
checksum = checksum_copy(from_fp, tmp)
# Calculate hash-based file path
folder = os.path.join(self.folder, DATA, checksum[0:2])
path = os.path.join(folder, checksum)
# os.rename assumed safe, as our temp file should
# be in same file system as our temp folder
if not os.path.isdir(folder):
os.makedirs(folder)
os.rename(tmp.name, path)
# Relative posix path
rel_path = posix_path(os.path.relpath(path, self.folder))
# Register in bagit checksum
if Hasher == hashlib.sha1:
self._add_to_bagit(rel_path, sha1=checksum)
else:
_logger.warning("[provenance] Unknown hash method %s for bagit manifest", Hasher)
# Inefficient, bagit support need to checksum again
self._add_to_bagit(rel_path)
_logger.debug("[provenance] Added data file %s", path)
if timestamp is not None:
createdOn, createdBy = self._self_made(timestamp)
self._file_provenance[rel_path] = cast(
Aggregate, {"createdOn": createdOn, "createdBy": createdBy}
)
_logger.debug("[provenance] Relative path for data file %s", rel_path)
if content_type is not None:
self._content_types[rel_path] = content_type
return rel_path
def _self_made(
self, timestamp: Optional[datetime.datetime] = None
) -> Tuple[str, Dict[str, str]]: # createdOn, createdBy
if timestamp is None:
timestamp = datetime.datetime.now()
return (
timestamp.isoformat(),
{"uri": self.engine_uuid, "name": self.cwltool_version},
)
def add_to_manifest(self, rel_path: str, checksums: Dict[str, str]) -> None:
"""Add files to the research object manifest."""
self.self_check()
if PurePosixPath(rel_path).is_absolute():
raise ValueError(f"rel_path must be relative: {rel_path}")
if os.path.commonprefix(["data/", rel_path]) == "data/":
# payload file, go to manifest
manifest = "manifest"
self.has_manifest = True
else:
# metadata file, go to tag manifest
manifest = "tagmanifest"
# Add checksums to corresponding manifest files
for method, hash_value in checksums.items():
# File not in manifest because we bailed out on
# existence in bagged_size above
manifestpath = os.path.join(self.folder, f"{manifest}-{method.lower()}.txt")
# encoding: match Tag-File-Character-Encoding: UTF-8
with open(manifestpath, "a", encoding=ENCODING, newline="\n") as checksum_file:
line = f"{hash_value} {rel_path}\n"
_logger.debug("[provenance] Added to %s: %s", manifestpath, line)
checksum_file.write(line)
def _add_to_bagit(self, rel_path: str, **checksums: str) -> None:
if PurePosixPath(rel_path).is_absolute():
raise ValueError(f"rel_path must be relative: {rel_path}")
lpath = os.path.join(self.folder, local_path(rel_path))
if not os.path.exists(lpath):
raise OSError(f"File {rel_path} does not exist within RO: {lpath}")
if rel_path in self.bagged_size:
# Already added, assume checksum OK
return
self.bagged_size[rel_path] = os.path.getsize(lpath)
if SHA1 not in checksums:
# ensure we always have sha1
checksums = dict(checksums)
with open(lpath, "rb") as file_path:
# FIXME: Need sha-256 / sha-512 as well for Research Object BagIt profile?
checksums[SHA1] = checksum_copy(file_path, hasher=hashlib.sha1)
self.add_to_manifest(rel_path, checksums)
def _relativise_files(
self,
structure: Union[CWLObjectType, CWLOutputType, MutableSequence[CWLObjectType]],
) -> None:
"""Save any file objects into the RO and update the local paths."""
# Base case - we found a File we need to update
_logger.debug("[provenance] Relativising: %s", structure)
if isinstance(structure, MutableMapping):
if structure.get("class") == "File":
relative_path: Optional[Union[str, PurePosixPath]] = None
if "checksum" in structure:
raw_checksum = cast(str, structure["checksum"])
alg, checksum = raw_checksum.split("$")
if alg != SHA1:
raise TypeError(
f"Only SHA1 CWL checksums are currently supported: {structure}"
)
if self.has_data_file(checksum):
prefix = checksum[0:2]
relative_path = PurePosixPath("data") / prefix / checksum
if not (relative_path is not None and "location" in structure):
# Register in RO; but why was this not picked
# up by used_artefacts?
_logger.info("[provenance] Adding to RO %s", structure["location"])
with self.fsaccess.open(cast(str, structure["location"]), "rb") as fp:
relative_path = self.add_data_file(fp)
checksum = PurePosixPath(relative_path).name
structure["checksum"] = f"{SHA1}${checksum}"
# RO-relative path as new location
structure["location"] = str(PurePosixPath("..") / relative_path)
if "path" in structure:
del structure["path"]
if structure.get("class") == "Directory":
# TODO: Generate anonymoys Directory with a "listing"
# pointing to the hashed files
del structure["location"]
for val in structure.values():
try:
self._relativise_files(cast(CWLOutputType, val))
except OSError:
pass
return
if isinstance(structure, MutableSequence):
for obj in structure:
# Recurse and rewrite any nested File objects
self._relativise_files(cast(CWLOutputType, obj))
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/cwlprov/writablebagfile.py 0000644 0001750 0001750 00000025223 14621376047 023147 0 ustar 00michael michael """Stores class definition of ResearchObject and WritableBagFile."""
import copy
import datetime
import hashlib
import os
import shutil
import uuid
from array import array
from collections import OrderedDict
from io import FileIO, TextIOWrapper
from mmap import mmap
from pathlib import Path, PurePosixPath
from typing import Any, BinaryIO, Dict, MutableMapping, Optional, Union, cast
from schema_salad.utils import json_dumps
from ..loghandler import _logger
from ..utils import CWLObjectType, local_path, posix_path
from .provenance_constants import (
CWLPROV,
CWLPROV_VERSION,
ENCODING,
LOGS,
METADATA,
SHA1,
SHA256,
SHA512,
WORKFLOW,
)
from .ro import ResearchObject
class WritableBagFile(FileIO):
"""Writes files in research object."""
def __init__(self, research_object: "ResearchObject", rel_path: str) -> None:
"""Initialize an ROBagIt."""
self.research_object = research_object
if Path(rel_path).is_absolute():
raise ValueError("rel_path must be relative: %s" % rel_path)
self.rel_path = rel_path
self.hashes = {
SHA1: hashlib.sha1(), # nosec
SHA256: hashlib.sha256(),
SHA512: hashlib.sha512(),
}
# Open file in Research Object folder
path = os.path.abspath(os.path.join(research_object.folder, local_path(rel_path)))
if not path.startswith(os.path.abspath(research_object.folder)):
raise ValueError("Path is outside Research Object: %s" % path)
_logger.debug("[provenance] Creating WritableBagFile at %s.", path)
super().__init__(path, mode="w")
def write(self, b: Any) -> int:
"""Write some content to the Bag."""
real_b = b if isinstance(b, (bytes, mmap, array)) else b.encode("utf-8")
total = 0
length = len(real_b)
while total < length:
ret = super().write(real_b)
if ret:
total += ret
for val in self.hashes.values():
val.update(real_b)
return total
def close(self) -> None:
"""
Flush and close this stream.
Finalize checksums and manifests.
"""
# FIXME: Convert below block to a ResearchObject method?
if self.rel_path.startswith("data/"):
self.research_object.bagged_size[self.rel_path] = self.tell()
else:
self.research_object.tagfiles.add(self.rel_path)
super().close()
# { "sha1": "f572d396fae9206628714fb2ce00f72e94f2258f" }
checksums = {}
for name, val in self.hashes.items():
checksums[name] = val.hexdigest().lower()
self.research_object.add_to_manifest(self.rel_path, checksums)
# To simplify our hash calculation we won't support
# seeking, reading or truncating, as we can't do
# similar seeks in the current hash.
# TODO: Support these? At the expense of invalidating
# the current hash, then having to recalculate at close()
def seekable(self) -> bool:
"""Return False, seeking is not supported."""
return False
def readable(self) -> bool:
"""Return False, reading is not supported."""
return False
def truncate(self, size: Optional[int] = None) -> int:
"""Resize the stream, only if we haven't started writing."""
# FIXME: This breaks contract IOBase,
# as it means we would have to recalculate the hash
if size is not None:
raise OSError("WritableBagFile can't truncate")
return self.tell()
def write_bag_file(
research_object: "ResearchObject", path: str, encoding: Optional[str] = ENCODING
) -> Union[TextIOWrapper, WritableBagFile]:
"""Write the bag file into our research object."""
research_object.self_check()
# For some reason below throws BlockingIOError
# fp = BufferedWriter(WritableBagFile(self, path))
bag_file = WritableBagFile(research_object, path)
if encoding is not None:
# encoding: match Tag-File-Character-Encoding: UTF-8
return TextIOWrapper(cast(BinaryIO, bag_file), encoding=encoding, newline="\n")
return bag_file
def open_log_file_for_activity(
research_object: "ResearchObject", uuid_uri: str
) -> Union[TextIOWrapper, WritableBagFile]:
"""Begin the per-activity log."""
research_object.self_check()
# Ensure valid UUID for safe filenames
activity_uuid = uuid.UUID(uuid_uri)
if activity_uuid.urn == research_object.engine_uuid:
# It's the engine aka cwltool!
name = "engine"
else:
name = "activity"
p = os.path.join(LOGS, f"{name}.{activity_uuid}.txt")
_logger.debug(f"[provenance] Opening log file for {name}: {p}")
research_object.add_annotation(activity_uuid.urn, [p], CWLPROV["log"].uri)
return write_bag_file(research_object, p)
def _write_ro_manifest(research_object: "ResearchObject") -> None:
# Does not have to be this order, but it's nice to be consistent
filename = "manifest.json"
createdOn, createdBy = research_object._self_made()
manifest = OrderedDict(
{
"@context": [
{"@base": f"{research_object.base_uri}{posix_path(METADATA)}/"},
"https://w3id.org/bundle/context",
],
"id": "/",
"conformsTo": CWLPROV_VERSION,
"manifest": filename,
"createdOn": createdOn,
"createdBy": createdBy,
"authoredBy": research_object._authored_by(),
"aggregates": research_object._ro_aggregates(),
"annotations": research_object._ro_annotations(),
}
)
json_manifest = json_dumps(manifest, indent=4, ensure_ascii=False)
rel_path = str(PurePosixPath(METADATA) / filename)
json_manifest += "\n"
with write_bag_file(research_object, rel_path) as manifest_file:
manifest_file.write(json_manifest)
def _write_bag_info(research_object: "ResearchObject") -> None:
with write_bag_file(research_object, "bag-info.txt") as info_file:
info_file.write("Bag-Software-Agent: %s\n" % research_object.cwltool_version)
# FIXME: require sha-512 of payload to comply with profile?
# FIXME: Update profile
info_file.write("BagIt-Profile-Identifier: https://w3id.org/ro/bagit/profile\n")
info_file.write("Bagging-Date: %s\n" % datetime.date.today().isoformat())
info_file.write("External-Description: Research Object of CWL workflow run\n")
if research_object.full_name:
info_file.write("Contact-Name: %s\n" % research_object.full_name)
# NOTE: We can't use the urn:uuid:{UUID} of the workflow run (a prov:Activity)
# as identifier for the RO/bagit (a prov:Entity). However the arcp base URI is good.
info_file.write("External-Identifier: %s\n" % research_object.base_uri)
# Calculate size of data/ (assuming no external fetch.txt files)
total_size = sum(research_object.bagged_size.values())
num_files = len(research_object.bagged_size)
info_file.write("Payload-Oxum: %d.%d\n" % (total_size, num_files))
_logger.debug("[provenance] Generated bagit metadata: %s", research_object.folder)
def _finalize(research_object: "ResearchObject") -> None:
_write_ro_manifest(research_object)
_write_bag_info(research_object)
if not research_object.has_manifest:
(Path(research_object.folder) / "manifest-sha1.txt").touch()
def close_ro(research_object: "ResearchObject", save_to: Optional[str] = None) -> None:
"""Close the Research Object, optionally saving to specified folder.
Closing will remove any temporary files used by this research object.
After calling this method, this ResearchObject instance can no longer
be used, except for no-op calls to .close().
The 'saveTo' folder should not exist - if it does, it will be deleted.
It is safe to call this function multiple times without the
'saveTo' argument, e.g. within a try..finally block to
ensure the temporary files of this Research Object are removed.
"""
if save_to is None:
if not research_object.closed:
_logger.debug("[provenance] Deleting temporary %s", research_object.folder)
shutil.rmtree(research_object.folder, ignore_errors=True)
else:
save_to = os.path.abspath(save_to)
_logger.info("[provenance] Finalizing Research Object")
_finalize(research_object) # write manifest etc.
# TODO: Write as archive (.zip or .tar) based on extension?
if os.path.isdir(save_to):
_logger.info("[provenance] Deleting existing %s", save_to)
shutil.rmtree(save_to)
shutil.move(research_object.folder, save_to)
_logger.info("[provenance] Research Object saved to %s", save_to)
research_object.folder = save_to
research_object.closed = True
def packed_workflow(research_object: "ResearchObject", packed: str) -> None:
"""Pack CWL description to generate re-runnable CWL object in RO."""
research_object.self_check()
rel_path = str(PurePosixPath(WORKFLOW) / "packed.cwl")
# Write as binary
with write_bag_file(research_object, rel_path, encoding=None) as write_pack:
write_pack.write(packed)
_logger.debug("[provenance] Added packed workflow: %s", rel_path)
def create_job(
research_object: "ResearchObject", builder_job: CWLObjectType, is_output: bool = False
) -> CWLObjectType:
# TODO customise the file
"""Generate the new job object with RO specific relative paths."""
copied = copy.deepcopy(builder_job)
relativised_input_objecttemp: CWLObjectType = {}
research_object._relativise_files(copied)
def jdefault(o: Any) -> Dict[Any, Any]:
return dict(o)
if is_output:
rel_path = PurePosixPath(WORKFLOW) / "primary-output.json"
else:
rel_path = PurePosixPath(WORKFLOW) / "primary-job.json"
j = json_dumps(copied, indent=4, ensure_ascii=False, default=jdefault)
with write_bag_file(research_object, str(rel_path)) as file_path:
file_path.write(j + "\n")
_logger.debug("[provenance] Generated customised job file: %s", rel_path)
# Generate dictionary with keys as workflow level input IDs and values
# as
# 1) for files the relativised location containing hash
# 2) for other attributes, the actual value.
for key, value in copied.items():
if isinstance(value, MutableMapping):
if value.get("class") in ("File", "Directory"):
relativised_input_objecttemp[key] = value
else:
relativised_input_objecttemp[key] = value
research_object.relativised_input_object.update(
{k: v for k, v in relativised_input_objecttemp.items() if v}
)
return research_object.relativised_input_object
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/cwlrdf.py 0000644 0001750 0001750 00000014374 14621376047 017616 0 ustar 00michael michael import urllib
from codecs import StreamWriter
from typing import IO, Any, Dict, Iterator, Optional, TextIO, Union, cast
from rdflib import Graph
from rdflib.query import ResultRow
from ruamel.yaml.comments import CommentedMap
from schema_salad.jsonld_context import makerdf
from schema_salad.utils import ContextType
from .cwlviewer import CWLViewer
from .process import Process
def gather(tool: Process, ctx: ContextType) -> Graph:
g = Graph()
def visitor(t: CommentedMap) -> None:
makerdf(t["id"], t, ctx, graph=g)
tool.visit(visitor)
return g
def printrdf(wflow: Process, ctx: ContextType, style: str) -> str:
"""Serialize the CWL document into a string, ready for printing."""
rdf = gather(wflow, ctx).serialize(format=style, encoding="utf-8")
if not rdf:
return ""
return str(rdf, "utf-8")
def lastpart(uri: Any) -> str:
uri2 = str(uri)
if "/" in uri2:
return uri2[uri2.rindex("/") + 1 :]
return uri2
def dot_with_parameters(g: Graph, stdout: Union[TextIO, StreamWriter]) -> None:
qres = cast(
Iterator[ResultRow],
g.query(
"""SELECT ?step ?run ?runtype
WHERE {
?step cwl:run ?run .
?run rdf:type ?runtype .
}"""
),
) # ResultRow because the query is of type SELECT
for step, run, _ in qres:
stdout.write(
'"{}" [label="{}"]\n'.format(lastpart(step), f"{lastpart(step)} ({lastpart(run)})")
)
qres = cast(
Iterator[ResultRow],
g.query(
"""SELECT ?step ?inp ?source
WHERE {
?wf Workflow:steps ?step .
?step cwl:inputs ?inp .
?inp cwl:source ?source .
}"""
),
) # ResultRow because the query is of type SELECT
for step, inp, source in qres:
stdout.write('"%s" [shape=box]\n' % (lastpart(inp)))
stdout.write('"{}" -> "{}" [label="{}"]\n'.format(lastpart(source), lastpart(inp), ""))
stdout.write('"{}" -> "{}" [label="{}"]\n'.format(lastpart(inp), lastpart(step), ""))
qres = cast(
Iterator[ResultRow],
g.query(
"""SELECT ?step ?out
WHERE {
?wf Workflow:steps ?step .
?step cwl:outputs ?out .
}"""
),
) # ResultRow because the query is of type SELECT
for step, out in qres:
stdout.write('"%s" [shape=box]\n' % (lastpart(out)))
stdout.write('"{}" -> "{}" [label="{}"]\n'.format(lastpart(step), lastpart(out), ""))
qres = cast(
Iterator[ResultRow],
g.query(
"""SELECT ?out ?source
WHERE {
?wf cwl:outputs ?out .
?out cwl:source ?source .
}"""
),
) # ResultRow because the query is of type SELECT
for out, source in qres:
stdout.write('"%s" [shape=octagon]\n' % (lastpart(out)))
stdout.write('"{}" -> "{}" [label="{}"]\n'.format(lastpart(source), lastpart(out), ""))
qres = cast(
Iterator[ResultRow],
g.query(
"""SELECT ?inp
WHERE {
?wf rdf:type cwl:Workflow .
?wf cwl:inputs ?inp .
}"""
),
) # ResultRow because the query is of type SELECT
for (inp,) in qres:
stdout.write('"%s" [shape=octagon]\n' % (lastpart(inp)))
def dot_without_parameters(g: Graph, stdout: Union[TextIO, StreamWriter]) -> None:
dotname: Dict[str, str] = {}
clusternode = {}
stdout.write("compound=true\n")
subworkflows = set()
qres = cast(
Iterator[ResultRow],
g.query(
"""SELECT ?run
WHERE {
?wf rdf:type cwl:Workflow .
?wf Workflow:steps ?step .
?step cwl:run ?run .
?run rdf:type cwl:Workflow .
} ORDER BY ?wf"""
),
) # ResultRow because the query is of type SELECT
for (run,) in qres:
subworkflows.add(run)
qres = cast(
Iterator[ResultRow],
g.query(
"""SELECT ?wf ?step ?run ?runtype
WHERE {
?wf rdf:type cwl:Workflow .
?wf Workflow:steps ?step .
?step cwl:run ?run .
?run rdf:type ?runtype .
} ORDER BY ?wf"""
),
) # ResultRow because the query is of type SELECT
currentwf: Optional[str] = None
for wf, step, _run, runtype in qres:
if step not in dotname:
dotname[step] = lastpart(step)
if wf != currentwf:
if currentwf is not None:
stdout.write("}\n")
if wf in subworkflows:
if wf not in dotname:
dotname[wf] = "cluster_" + lastpart(wf)
stdout.write(f'subgraph "{dotname[wf]}" {{ label="{lastpart(wf)}"\n') # noqa: B907
currentwf = wf
clusternode[wf] = step
else:
currentwf = None
if str(runtype) != "https://w3id.org/cwl/cwl#Workflow":
stdout.write(
f'"{dotname[step]}" [label="{urllib.parse.urldefrag(str(step))[1]}"]\n' # noqa: B907
)
if currentwf is not None:
stdout.write("}\n")
qres = cast(
Iterator[ResultRow],
g.query(
"""SELECT DISTINCT ?src ?sink ?srcrun ?sinkrun
WHERE {
?wf1 Workflow:steps ?src .
?wf2 Workflow:steps ?sink .
?src cwl:out ?out .
?inp cwl:source ?out .
?sink cwl:in ?inp .
?src cwl:run ?srcrun .
?sink cwl:run ?sinkrun .
}"""
),
) # ResultRow because the query is of type SELECT
for src, sink, srcrun, sinkrun in qres:
attr = ""
if srcrun in clusternode:
attr += 'ltail="%s"' % dotname[srcrun]
src = clusternode[srcrun]
if sinkrun in clusternode:
attr += ' lhead="%s"' % dotname[sinkrun]
sink = clusternode[sinkrun]
stdout.write(f'"{dotname[src]}" -> "{dotname[sink]}" [{attr}]\n') # noqa: B907
def printdot(
wf: Process,
ctx: ContextType,
stdout: IO[str],
) -> None:
cwl_viewer: CWLViewer = CWLViewer(printrdf(wf, ctx, "n3"))
stdout.write(cwl_viewer.dot().replace(f"{wf.metadata['id']}#", ""))
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/cwlviewer.py 0000644 0001750 0001750 00000016226 14621376047 020342 0 ustar 00michael michael """Visualize a CWL workflow."""
from pathlib import Path
from typing import Iterator, List, cast
from urllib.parse import urlparse
import pydot
import rdflib
_queries_dir = (Path(__file__).parent / "rdfqueries").resolve()
_get_inner_edges_query_path = _queries_dir / "get_inner_edges.sparql"
_get_input_edges_query_path = _queries_dir / "get_input_edges.sparql"
_get_output_edges_query_path = _queries_dir / "get_output_edges.sparql"
_get_root_query_path = _queries_dir / "get_root.sparql"
class CWLViewer:
"""Produce similar images with the https://github.com/common-workflow-language/cwlviewer."""
def __init__(self, rdf_description: str):
"""Create a viewer object based on the rdf description of the workflow."""
self._dot_graph: pydot.Graph = CWLViewer._init_dot_graph()
self._rdf_graph: rdflib.graph.Graph = self._load_cwl_graph(rdf_description)
self._root_graph_uri: str = self._get_root_graph_uri()
self._set_inner_edges()
self._set_input_edges()
self._set_output_edges()
def _load_cwl_graph(self, rdf_description: str) -> rdflib.graph.Graph:
rdf_graph = rdflib.Graph()
rdf_graph.parse(data=rdf_description, format="n3")
return rdf_graph
def _set_inner_edges(self) -> None:
with open(_get_inner_edges_query_path) as f:
get_inner_edges_query = f.read()
inner_edges = cast(
Iterator[rdflib.query.ResultRow],
self._rdf_graph.query(
get_inner_edges_query, initBindings={"root_graph": self._root_graph_uri}
),
) # ResultRow because the query is of type SELECT
for inner_edge_row in inner_edges:
source_label = (
inner_edge_row["source_label"]
if inner_edge_row["source_label"] is not None
else urlparse(inner_edge_row["source_step"]).fragment
)
# Node color and style depend on class
source_color = (
"#F3CEA1"
if inner_edge_row["source_step_class"].endswith("Workflow")
else "lightgoldenrodyellow"
)
source_style = (
"dashed" if inner_edge_row["source_step_class"].endswith("Operation") else "filled"
)
n = pydot.Node(
"",
fillcolor=source_color,
style=source_style,
label=source_label,
shape="record",
)
n.set_name(str(inner_edge_row["source_step"]))
self._dot_graph.add_node(n)
target_label = (
inner_edge_row["target_label"]
if inner_edge_row["target_label"] is not None
else urlparse(inner_edge_row["target_step"]).fragment
)
target_color = (
"#F3CEA1"
if inner_edge_row["target_step_class"].endswith("Workflow")
else "lightgoldenrodyellow"
)
target_style = (
"dashed" if inner_edge_row["target_step_class"].endswith("Operation") else "filled"
)
n = pydot.Node(
"",
fillcolor=target_color,
style=target_style,
label=target_label,
shape="record",
)
n.set_name(str(inner_edge_row["target_step"]))
self._dot_graph.add_node(n)
self._dot_graph.add_edge(
pydot.Edge(
str(inner_edge_row["source_step"]),
str(inner_edge_row["target_step"]),
)
)
def _set_input_edges(self) -> None:
with open(_get_input_edges_query_path) as f:
get_input_edges_query = f.read()
inputs_subgraph = pydot.Subgraph(graph_name="cluster_inputs")
self._dot_graph.add_subgraph(inputs_subgraph)
inputs_subgraph.set("rank", "same")
inputs_subgraph.create_attribute_methods(["style"])
inputs_subgraph.set("style", "dashed")
inputs_subgraph.set("label", "Workflow Inputs")
input_edges = cast(
Iterator[rdflib.query.ResultRow],
self._rdf_graph.query(
get_input_edges_query, initBindings={"root_graph": self._root_graph_uri}
),
) # ResultRow because the query is of type SELECT
for input_row in input_edges:
n = pydot.Node(
"",
fillcolor="#94DDF4",
style="filled",
label=urlparse(input_row["input"]).fragment,
shape="record",
)
n.set_name(str(input_row["input"]))
inputs_subgraph.add_node(n)
self._dot_graph.add_edge(pydot.Edge(str(input_row["input"]), str(input_row["step"])))
def _set_output_edges(self) -> None:
with open(_get_output_edges_query_path) as f:
get_output_edges = f.read()
outputs_graph = pydot.Subgraph(graph_name="cluster_outputs")
self._dot_graph.add_subgraph(outputs_graph)
outputs_graph.set("rank", "same")
outputs_graph.create_attribute_methods(["style"])
outputs_graph.set("style", "dashed")
outputs_graph.set("label", "Workflow Outputs")
outputs_graph.set("labelloc", "b")
output_edges = cast(
Iterator[rdflib.query.ResultRow],
self._rdf_graph.query(
get_output_edges, initBindings={"root_graph": self._root_graph_uri}
),
) # ResultRow because the query is of type SELECT
for output_edge_row in output_edges:
n = pydot.Node(
"",
fillcolor="#94DDF4",
style="filled",
label=urlparse(output_edge_row["output"]).fragment,
shape="record",
)
n.set_name(str(output_edge_row["output"]))
outputs_graph.add_node(n)
self._dot_graph.add_edge(pydot.Edge(output_edge_row["step"], output_edge_row["output"]))
def _get_root_graph_uri(self) -> rdflib.term.Identifier:
with open(_get_root_query_path) as f:
get_root_query = f.read()
root = cast(
List[rdflib.query.ResultRow],
list(
self._rdf_graph.query(
get_root_query,
)
),
) # ResultRow because the query is of type SELECT
if len(root) != 1:
raise RuntimeError(
"Cannot identify root workflow! Notice that only Workflows can be visualized"
)
workflow = root[0]["workflow"]
return workflow
@classmethod
def _init_dot_graph(cls) -> pydot.Graph:
graph = pydot.Graph(graph_type="digraph", simplify=False)
graph.set("bgcolor", "#eeeeee")
graph.set("clusterrank", "local")
graph.set("labelloc", "bottom")
graph.set("labelloc", "bottom")
graph.set("labeljust", "right")
return graph
def get_dot_graph(self) -> pydot.Graph:
"""Get the dot graph object."""
return self._dot_graph
def dot(self) -> str:
"""Get the graph as graphviz."""
return str(self._dot_graph.to_string())
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/docker.py 0000644 0001750 0001750 00000044306 14621376047 017602 0 ustar 00michael michael """Enables Docker software containers via the {u,}docker or podman runtimes."""
import csv
import datetime
import json
import math
import os
import shutil
import subprocess # nosec
import sys
import threading
from io import StringIO # pylint: disable=redefined-builtin
from typing import Callable, Dict, List, MutableMapping, Optional, Set, Tuple, cast
import requests
from .builder import Builder
from .context import RuntimeContext
from .docker_id import docker_vm_id
from .errors import WorkflowException
from .job import ContainerCommandLineJob
from .loghandler import _logger
from .pathmapper import MapperEnt, PathMapper
from .utils import CWLObjectType, create_tmp_dir, ensure_writable
_IMAGES: Set[str] = set()
_IMAGES_LOCK = threading.Lock()
__docker_machine_mounts: Optional[List[str]] = None
__docker_machine_mounts_lock = threading.Lock()
def _get_docker_machine_mounts() -> List[str]:
global __docker_machine_mounts
if __docker_machine_mounts is None:
with __docker_machine_mounts_lock:
if "DOCKER_MACHINE_NAME" not in os.environ:
__docker_machine_mounts = []
else:
__docker_machine_mounts = [
"/" + line.split(None, 1)[0]
for line in subprocess.check_output( # nosec
[
"docker-machine",
"ssh",
os.environ["DOCKER_MACHINE_NAME"],
"mount",
"-t",
"vboxsf",
],
text=True,
).splitlines()
]
return __docker_machine_mounts
def _check_docker_machine_path(path: Optional[str]) -> None:
if path is None:
return
mounts = _get_docker_machine_mounts()
found = False
for mount in mounts:
if path.startswith(mount):
found = True
break
if not found and mounts:
name = os.environ.get("DOCKER_MACHINE_NAME", "???")
raise WorkflowException(
"Input path {path} is not in the list of host paths mounted "
"into the Docker virtual machine named {name}. Already mounted "
"paths: {mounts}.\n"
"See https://docs.docker.com/toolbox/toolbox_install_windows/"
"#optional-add-shared-directories for instructions on how to "
"add this path to your VM.".format(path=path, name=name, mounts=mounts)
)
class DockerCommandLineJob(ContainerCommandLineJob):
"""Runs a :py:class:`~cwltool.job.CommandLineJob` in a software container using the Docker engine."""
def __init__(
self,
builder: Builder,
joborder: CWLObjectType,
make_path_mapper: Callable[[List[CWLObjectType], str, RuntimeContext, bool], PathMapper],
requirements: List[CWLObjectType],
hints: List[CWLObjectType],
name: str,
) -> None:
"""Initialize a command line builder using the Docker software container engine."""
super().__init__(builder, joborder, make_path_mapper, requirements, hints, name)
self.docker_exec = "docker"
def get_image(
self,
docker_requirement: Dict[str, str],
pull_image: bool,
force_pull: bool,
tmp_outdir_prefix: str,
) -> bool:
"""
Retrieve the relevant Docker container image.
:returns: True upon success
"""
found = False
if "dockerImageId" not in docker_requirement and "dockerPull" in docker_requirement:
docker_requirement["dockerImageId"] = docker_requirement["dockerPull"]
with _IMAGES_LOCK:
if docker_requirement["dockerImageId"] in _IMAGES:
return True
if (docker_image_id := docker_requirement.get("dockerImageId")) is not None:
try:
manifest = json.loads(
str(
subprocess.check_output(
[self.docker_exec, "inspect", docker_image_id]
), # nosec
"utf-8",
)
)
found = manifest is not None
except (OSError, subprocess.CalledProcessError, UnicodeError):
pass
cmd: List[str] = []
if "dockerFile" in docker_requirement:
dockerfile_dir = create_tmp_dir(tmp_outdir_prefix)
with open(os.path.join(dockerfile_dir, "Dockerfile"), "w") as dfile:
dfile.write(docker_requirement["dockerFile"])
cmd = [
self.docker_exec,
"build",
"--tag=%s" % str(docker_requirement["dockerImageId"]),
dockerfile_dir,
]
_logger.info(str(cmd))
subprocess.check_call(cmd, stdout=sys.stderr) # nosec
found = True
if (force_pull or not found) and pull_image:
if "dockerPull" in docker_requirement:
cmd = [self.docker_exec, "pull", str(docker_requirement["dockerPull"])]
_logger.info(str(cmd))
subprocess.check_call(cmd, stdout=sys.stderr) # nosec
found = True
elif "dockerLoad" in docker_requirement:
cmd = [self.docker_exec, "load"]
_logger.info(str(cmd))
if os.path.exists(docker_requirement["dockerLoad"]):
_logger.info(
"Loading docker image from %s",
docker_requirement["dockerLoad"],
)
with open(docker_requirement["dockerLoad"], "rb") as dload:
loadproc = subprocess.Popen(cmd, stdin=dload, stdout=sys.stderr) # nosec
else:
loadproc = subprocess.Popen( # nosec
cmd, stdin=subprocess.PIPE, stdout=sys.stderr
)
assert loadproc.stdin is not None # nosec
_logger.info("Sending GET request to %s", docker_requirement["dockerLoad"])
req = requests.get(docker_requirement["dockerLoad"], stream=True, timeout=60)
size = 0
for chunk in req.iter_content(1024 * 1024):
size += len(chunk)
_logger.info("\r%i bytes", size)
loadproc.stdin.write(chunk)
loadproc.stdin.close()
rcode = loadproc.wait()
if rcode != 0:
raise WorkflowException(
"Docker load returned non-zero exit status %i" % (rcode)
)
found = True
elif "dockerImport" in docker_requirement:
cmd = [
self.docker_exec,
"import",
str(docker_requirement["dockerImport"]),
str(docker_requirement["dockerImageId"]),
]
_logger.info(str(cmd))
subprocess.check_call(cmd, stdout=sys.stderr) # nosec
found = True
if found:
with _IMAGES_LOCK:
_IMAGES.add(docker_requirement["dockerImageId"])
return found
def get_from_requirements(
self,
r: CWLObjectType,
pull_image: bool,
force_pull: bool,
tmp_outdir_prefix: str,
) -> Optional[str]:
if not shutil.which(self.docker_exec):
raise WorkflowException(f"{self.docker_exec} executable is not available")
if self.get_image(cast(Dict[str, str], r), pull_image, force_pull, tmp_outdir_prefix):
return cast(Optional[str], r["dockerImageId"])
raise WorkflowException("Docker image %s not found" % r["dockerImageId"])
@staticmethod
def append_volume(
runtime: List[str],
source: str,
target: str,
writable: bool = False,
skip_mkdirs: bool = False,
) -> None:
"""Add binding arguments to the runtime list."""
options = [
"type=bind",
"source=" + source,
"target=" + target,
]
if not writable:
options.append("readonly")
output = StringIO()
csv.writer(output).writerow(options)
mount_arg = output.getvalue().strip()
runtime.append(f"--mount={mount_arg}")
# Unlike "--volume", "--mount" will fail if the volume doesn't already exist.
if (not skip_mkdirs) and (not os.path.exists(source)):
os.makedirs(source)
def add_file_or_directory_volume(
self, runtime: List[str], volume: MapperEnt, host_outdir_tgt: Optional[str]
) -> None:
"""Append volume a file/dir mapping to the runtime option list."""
if not volume.resolved.startswith("_:"):
_check_docker_machine_path(volume.resolved)
self.append_volume(runtime, volume.resolved, volume.target)
def add_writable_file_volume(
self,
runtime: List[str],
volume: MapperEnt,
host_outdir_tgt: Optional[str],
tmpdir_prefix: str,
) -> None:
"""Append a writable file mapping to the runtime option list."""
if self.inplace_update:
self.append_volume(runtime, volume.resolved, volume.target, writable=True)
else:
if host_outdir_tgt:
# shortcut, just copy to the output directory
# which is already going to be mounted
if not os.path.exists(os.path.dirname(host_outdir_tgt)):
os.makedirs(os.path.dirname(host_outdir_tgt))
shutil.copy(volume.resolved, host_outdir_tgt)
else:
tmpdir = create_tmp_dir(tmpdir_prefix)
file_copy = os.path.join(tmpdir, os.path.basename(volume.resolved))
shutil.copy(volume.resolved, file_copy)
self.append_volume(runtime, file_copy, volume.target, writable=True)
ensure_writable(host_outdir_tgt or file_copy)
def add_writable_directory_volume(
self,
runtime: List[str],
volume: MapperEnt,
host_outdir_tgt: Optional[str],
tmpdir_prefix: str,
) -> None:
"""Append a writable directory mapping to the runtime option list."""
if volume.resolved.startswith("_:"):
# Synthetic directory that needs creating first
if not host_outdir_tgt:
new_dir = os.path.join(
create_tmp_dir(tmpdir_prefix),
os.path.basename(volume.target),
)
self.append_volume(runtime, new_dir, volume.target, writable=True)
elif not os.path.exists(host_outdir_tgt):
os.makedirs(host_outdir_tgt)
else:
if self.inplace_update:
self.append_volume(runtime, volume.resolved, volume.target, writable=True)
else:
if not host_outdir_tgt:
tmpdir = create_tmp_dir(tmpdir_prefix)
new_dir = os.path.join(tmpdir, os.path.basename(volume.resolved))
shutil.copytree(volume.resolved, new_dir)
self.append_volume(runtime, new_dir, volume.target, writable=True)
else:
shutil.copytree(volume.resolved, host_outdir_tgt)
ensure_writable(host_outdir_tgt or new_dir)
def _required_env(self) -> Dict[str, str]:
# spec currently says "HOME must be set to the designated output
# directory." but spec might change to designated temp directory.
# runtime.append("--env=HOME=/tmp")
return {
"TMPDIR": self.CONTAINER_TMPDIR,
"HOME": self.builder.outdir,
}
def create_runtime(
self, env: MutableMapping[str, str], runtimeContext: RuntimeContext
) -> Tuple[List[str], Optional[str]]:
any_path_okay = self.builder.get_requirement("DockerRequirement")[1] or False
user_space_docker_cmd = runtimeContext.user_space_docker_cmd
if user_space_docker_cmd:
if "udocker" in user_space_docker_cmd:
if runtimeContext.debug:
runtime = [user_space_docker_cmd, "run", "--nobanner"]
else:
runtime = [user_space_docker_cmd, "--quiet", "run", "--nobanner"]
else:
runtime = [user_space_docker_cmd, "run"]
else:
runtime = [self.docker_exec, "run", "-i"]
if runtimeContext.podman:
runtime.append("--userns=keep-id")
self.append_volume(
runtime, os.path.realpath(self.outdir), self.builder.outdir, writable=True
)
self.append_volume(
runtime, os.path.realpath(self.tmpdir), self.CONTAINER_TMPDIR, writable=True
)
self.add_volumes(
self.pathmapper,
runtime,
any_path_okay=True,
secret_store=runtimeContext.secret_store,
tmpdir_prefix=runtimeContext.tmpdir_prefix,
)
if self.generatemapper is not None:
self.add_volumes(
self.generatemapper,
runtime,
any_path_okay=any_path_okay,
secret_store=runtimeContext.secret_store,
tmpdir_prefix=runtimeContext.tmpdir_prefix,
)
if user_space_docker_cmd:
runtime = [x.replace(":ro", "") for x in runtime]
runtime = [x.replace(":rw", "") for x in runtime]
runtime.append("--workdir=%s" % (self.builder.outdir))
if not user_space_docker_cmd:
if not runtimeContext.no_read_only:
runtime.append("--read-only=true")
if self.networkaccess:
if runtimeContext.custom_net:
runtime.append(f"--net={runtimeContext.custom_net}")
else:
runtime.append("--net=none")
if self.stdout is not None:
runtime.append("--log-driver=none")
euid, egid = docker_vm_id()
euid, egid = euid or os.geteuid(), egid or os.getgid()
if runtimeContext.no_match_user is False and (euid is not None and egid is not None):
runtime.append("--user=%d:%d" % (euid, egid))
if runtimeContext.rm_container:
runtime.append("--rm")
if self.builder.resources.get("cudaDeviceCount"):
runtime.append("--gpus=" + str(self.builder.resources["cudaDeviceCount"]))
cidfile_path: Optional[str] = None
# add parameters to docker to write a container ID file
if runtimeContext.user_space_docker_cmd is None:
if runtimeContext.cidfile_dir:
cidfile_dir = runtimeContext.cidfile_dir
if not os.path.exists(str(cidfile_dir)):
_logger.error(
"--cidfile-dir %s error:\n%s",
cidfile_dir,
"directory doesn't exist, please create it first",
)
exit(2)
if not os.path.isdir(cidfile_dir):
_logger.error(
"--cidfile-dir %s error:\n%s",
cidfile_dir,
cidfile_dir + " is not a directory, please check it first",
)
exit(2)
else:
cidfile_dir = runtimeContext.create_tmpdir()
cidfile_name = datetime.datetime.now().strftime("%Y%m%d%H%M%S-%f") + ".cid"
if runtimeContext.cidfile_prefix is not None:
cidfile_name = str(runtimeContext.cidfile_prefix + "-" + cidfile_name)
cidfile_path = os.path.join(cidfile_dir, cidfile_name)
runtime.append("--cidfile=%s" % cidfile_path)
for key, value in self.environment.items():
runtime.append(f"--env={key}={value}")
res_req, _ = self.builder.get_requirement("ResourceRequirement")
if runtimeContext.strict_memory_limit and not user_space_docker_cmd:
ram = self.builder.resources["ram"]
runtime.append("--memory=%dm" % ram)
elif not user_space_docker_cmd:
if res_req and ("ramMin" in res_req or "ramMax" in res_req):
_logger.warning(
"[job %s] Skipping Docker software container '--memory' limit "
"despite presence of ResourceRequirement with ramMin "
"and/or ramMax setting. Consider running with "
"--strict-memory-limit for increased portability "
"assurance.",
self.name,
)
if runtimeContext.strict_cpu_limit and not user_space_docker_cmd:
cpus = math.ceil(self.builder.resources["cores"])
runtime.append(f"--cpus={cpus}")
elif not user_space_docker_cmd:
if res_req and ("coresMin" in res_req or "coresMax" in res_req):
_logger.warning(
"[job %s] Skipping Docker software container '--cpus' limit "
"despite presence of ResourceRequirement with coresMin "
"and/or coresMax setting. Consider running with "
"--strict-cpu-limit for increased portability "
"assurance.",
self.name,
)
shm_size_od, shm_bool = self.builder.get_requirement("http://commonwl.org/cwltool#ShmSize")
if shm_bool:
shm_size = cast(CWLObjectType, shm_size_od)["shmSize"]
runtime.append(f"--shm-size={shm_size}")
return runtime, cidfile_path
class PodmanCommandLineJob(DockerCommandLineJob):
"""Runs a :py:class:`~cwltool.job.CommandLineJob` in a software container using the podman engine."""
def __init__(
self,
builder: Builder,
joborder: CWLObjectType,
make_path_mapper: Callable[[List[CWLObjectType], str, RuntimeContext, bool], PathMapper],
requirements: List[CWLObjectType],
hints: List[CWLObjectType],
name: str,
) -> None:
"""Initialize a command line builder using the Podman software container engine."""
super().__init__(builder, joborder, make_path_mapper, requirements, hints, name)
self.docker_exec = "podman"
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/docker_id.py 0000644 0001750 0001750 00000010273 14621376047 020252 0 ustar 00michael michael """Helper functions for docker."""
import subprocess # nosec
from typing import List, Optional, Tuple
def docker_vm_id() -> Tuple[Optional[int], Optional[int]]:
"""
Return the User ID and Group ID of the default docker user inside the VM.
When a host is using boot2docker or docker-machine to run docker with
boot2docker.iso (As on Mac OS X), the UID that mounts the shared filesystem
inside the VirtualBox VM is likely different than the user's UID on the host.
:return: A tuple containing numeric User ID and Group ID of the docker account inside
the boot2docker VM
"""
if boot2docker_running():
return boot2docker_id()
if docker_machine_running():
return docker_machine_id()
return (None, None)
def check_output_and_strip(cmd: List[str]) -> Optional[str]:
"""
Pass a command list to :py:func:`subprocess.check_output`.
Returning None if an expected exception is raised
:param cmd: The command to execute
:return: Stripped string output of the command, or ``None`` if error
"""
try:
result = subprocess.check_output(cmd, stderr=subprocess.STDOUT, text=True) # nosec
return result.strip()
except (OSError, subprocess.CalledProcessError, TypeError, AttributeError):
# OSError is raised if command doesn't exist
# CalledProcessError is raised if command returns nonzero
# AttributeError is raised if result cannot be strip()ped
return None
def docker_machine_name() -> Optional[str]:
"""
Get the machine name of the active docker-machine machine.
:return: Name of the active machine or ``None`` if error
"""
return check_output_and_strip(["docker-machine", "active"])
def cmd_output_matches(check_cmd: List[str], expected_status: str) -> bool:
"""
Run a command and compares output to expected.
:param check_cmd: Command list to execute
:param expected_status: Expected output, e.g. "Running" or "poweroff"
:return: Boolean value, indicating whether or not command result matched
"""
return check_output_and_strip(check_cmd) == expected_status
def boot2docker_running() -> bool:
"""
Check if boot2docker CLI reports that boot2docker vm is running.
:return: ``True`` if vm is running, ``False`` otherwise
"""
return cmd_output_matches(["boot2docker", "status"], "running")
def docker_machine_running() -> bool:
"""
Ask docker-machine for the active machine and checks if its VM is running.
:return: ``True`` if vm is running, ``False`` otherwise
"""
machine_name = docker_machine_name()
if not machine_name:
return False
return cmd_output_matches(["docker-machine", "status", machine_name], "Running")
def cmd_output_to_int(cmd: List[str]) -> Optional[int]:
"""
Run the provided command and returns the integer value of the result.
:param cmd: The command to run
:return: Integer value of result, or None if an error occurred
"""
result = check_output_and_strip(cmd) # may return None
if result is not None:
try:
return int(result)
except ValueError:
# ValueError is raised if int conversion fails
return None
return None
def boot2docker_id() -> Tuple[Optional[int], Optional[int]]:
"""
Get the UID and GID of the docker user inside a running boot2docker vm.
:return: Tuple (UID, GID), or (None, None) if error (e.g. boot2docker not present or stopped)
"""
uid = cmd_output_to_int(["boot2docker", "ssh", "id", "-"])
gid = cmd_output_to_int(["boot2docker", "ssh", "id", "-g"])
return (uid, gid)
def docker_machine_id() -> Tuple[Optional[int], Optional[int]]:
"""
Ask docker-machine for active machine and gets the UID of the docker user.
inside the vm
:return: tuple (UID, GID), or (None, None) if error (e.g. docker-machine not present or stopped)
"""
machine_name = docker_machine_name()
if not machine_name:
return (None, None)
uid = cmd_output_to_int(["docker-machine", "ssh", machine_name, "id -"])
gid = cmd_output_to_int(["docker-machine", "ssh", machine_name, "id -g"])
return (uid, gid)
if __name__ == "__main__":
print(docker_vm_id())
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/env_to_stdout.py 0000644 0001750 0001750 00000001637 14621376047 021227 0 ustar 00michael michael r"""Python script that acts like (GNU coreutils) env -0.
When run as a script, it prints the the environment as
`(VARNAME=value\0)*`.
Ideally we would just use `env -0`, because python (thanks to PEPs 538
and 540) will set zero to two environment variables to better handle
Unicode-locale interactions, however BSD family implementations of
`env` do not all support the `-0` flag so we supply this script that
produces equivalent output.
"""
import os
from typing import Dict
def deserialize_env(data: str) -> Dict[str, str]:
"""Deserialize the output of `env -0` to dictionary."""
result = {}
for item in data.strip("\0").split("\0"):
key, val = item.split("=", 1)
result[key] = val
return result
def main() -> None:
"""Print the null-separated environment to stdout."""
for k, v in os.environ.items():
print(f"{k}={v}", end="\0")
if __name__ == "__main__":
main()
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/errors.py 0000644 0001750 0001750 00000000532 14621376047 017640 0 ustar 00michael michael class WorkflowException(Exception):
pass
class UnsupportedRequirement(WorkflowException):
pass
class ArgumentException(Exception):
"""Mismatched command line arguments provided."""
class GraphTargetMissingException(WorkflowException):
"""When a ``$graph`` is encountered and there is no target and no ``main``/``#main``."""
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/executors.py 0000644 0001750 0001750 00000047043 14621376047 020355 0 ustar 00michael michael """Single and multi-threaded executors."""
import datetime
import functools
import logging
import math
import os
import threading
from abc import ABCMeta, abstractmethod
from threading import Lock
from typing import (
Dict,
Iterable,
List,
MutableSequence,
Optional,
Set,
Tuple,
Union,
cast,
)
import psutil
from mypy_extensions import mypyc_attr
from schema_salad.exceptions import ValidationException
from schema_salad.sourceline import SourceLine
from .command_line_tool import CallbackJob, ExpressionJob
from .context import RuntimeContext, getdefault
from .cuda import cuda_version_and_device_count
from .cwlprov.provenance_profile import ProvenanceProfile
from .errors import WorkflowException
from .job import JobBase
from .loghandler import _logger
from .mutation import MutationManager
from .process import Process, cleanIntermediate, relocateOutputs
from .task_queue import TaskQueue
from .update import ORIGINAL_CWLVERSION
from .utils import CWLObjectType, JobsType
from .workflow import Workflow
from .workflow_job import WorkflowJob, WorkflowJobStep
TMPDIR_LOCK = Lock()
@mypyc_attr(allow_interpreted_subclasses=True)
class JobExecutor(metaclass=ABCMeta):
"""Abstract base job executor."""
def __init__(self) -> None:
"""Initialize."""
self.final_output: MutableSequence[Optional[CWLObjectType]] = []
self.final_status: List[str] = []
self.output_dirs: Set[str] = set()
def __call__(
self,
process: Process,
job_order_object: CWLObjectType,
runtime_context: RuntimeContext,
logger: logging.Logger = _logger,
) -> Tuple[Optional[CWLObjectType], str]:
return self.execute(process, job_order_object, runtime_context, logger)
def output_callback(self, out: Optional[CWLObjectType], process_status: str) -> None:
"""Collect the final status and outputs."""
self.final_status.append(process_status)
self.final_output.append(out)
@abstractmethod
def run_jobs(
self,
process: Process,
job_order_object: CWLObjectType,
logger: logging.Logger,
runtime_context: RuntimeContext,
) -> None:
"""Execute the jobs for the given Process."""
def execute(
self,
process: Process,
job_order_object: CWLObjectType,
runtime_context: RuntimeContext,
logger: logging.Logger = _logger,
) -> Tuple[Union[Optional[CWLObjectType]], str]:
"""Execute the process."""
self.final_output = []
self.final_status = []
if not runtime_context.basedir:
raise WorkflowException("Must provide 'basedir' in runtimeContext")
def check_for_abstract_op(tool: CWLObjectType) -> None:
if tool["class"] == "Operation":
raise SourceLine(tool, "class", WorkflowException, runtime_context.debug).makeError(
"Workflow has unrunnable abstract Operation"
)
process.visit(check_for_abstract_op)
finaloutdir = None # Type: Optional[str]
original_outdir = runtime_context.outdir
if isinstance(original_outdir, str):
finaloutdir = os.path.abspath(original_outdir)
runtime_context = runtime_context.copy()
outdir = runtime_context.create_outdir()
self.output_dirs.add(outdir)
runtime_context.outdir = outdir
runtime_context.mutation_manager = MutationManager()
runtime_context.toplevel = True
runtime_context.workflow_eval_lock = threading.Condition(threading.RLock())
job_reqs: Optional[List[CWLObjectType]] = None
if "https://w3id.org/cwl/cwl#requirements" in job_order_object:
if process.metadata.get(ORIGINAL_CWLVERSION) == "v1.0":
raise WorkflowException(
"`cwl:requirements` in the input object is not part of CWL "
"v1.0. You can adjust to use `cwltool:overrides` instead; or you "
"can set the cwlVersion to v1.1"
)
job_reqs = cast(
List[CWLObjectType],
job_order_object["https://w3id.org/cwl/cwl#requirements"],
)
elif "cwl:defaults" in process.metadata and "https://w3id.org/cwl/cwl#requirements" in cast(
CWLObjectType, process.metadata["cwl:defaults"]
):
if process.metadata.get(ORIGINAL_CWLVERSION) == "v1.0":
raise WorkflowException(
"`cwl:requirements` in the input object is not part of CWL "
"v1.0. You can adjust to use `cwltool:overrides` instead; or you "
"can set the cwlVersion to v1.1"
)
job_reqs = cast(
Optional[List[CWLObjectType]],
cast(CWLObjectType, process.metadata["cwl:defaults"])[
"https://w3id.org/cwl/cwl#requirements"
],
)
if job_reqs is not None:
for req in job_reqs:
process.requirements.append(req)
self.run_jobs(process, job_order_object, logger, runtime_context)
if runtime_context.validate_only is True:
return (None, "ValidationSuccess")
if self.final_output and self.final_output[0] is not None and finaloutdir is not None:
self.final_output[0] = relocateOutputs(
self.final_output[0],
finaloutdir,
self.output_dirs,
runtime_context.move_outputs,
runtime_context.make_fs_access(""),
getdefault(runtime_context.compute_checksum, True),
path_mapper=runtime_context.path_mapper,
)
if runtime_context.rm_tmpdir:
if not runtime_context.cachedir:
output_dirs: Iterable[str] = self.output_dirs
else:
output_dirs = filter(
lambda x: not x.startswith(runtime_context.cachedir), # type: ignore
self.output_dirs,
)
cleanIntermediate(output_dirs)
if self.final_output and self.final_status:
if (
runtime_context.research_obj is not None
and isinstance(process, (JobBase, Process, WorkflowJobStep, WorkflowJob))
and process.parent_wf
):
process_run_id: Optional[str] = None
name = "primary"
process.parent_wf.generate_output_prov(self.final_output[0], process_run_id, name)
process.parent_wf.document.wasEndedBy(
process.parent_wf.workflow_run_uri,
None,
process.parent_wf.engine_uuid,
datetime.datetime.now(),
)
process.parent_wf.finalize_prov_profile(name=None)
return (self.final_output[0], self.final_status[0])
return (None, "permanentFail")
@mypyc_attr(allow_interpreted_subclasses=True)
class SingleJobExecutor(JobExecutor):
"""Default single-threaded CWL reference executor."""
def run_jobs(
self,
process: Process,
job_order_object: CWLObjectType,
logger: logging.Logger,
runtime_context: RuntimeContext,
) -> None:
process_run_id: Optional[str] = None
# define provenance profile for single commandline tool
if not isinstance(process, Workflow) and runtime_context.research_obj is not None:
process.provenance_object = ProvenanceProfile(
runtime_context.research_obj,
full_name=runtime_context.cwl_full_name,
host_provenance=False,
user_provenance=False,
orcid=runtime_context.orcid,
# single tool execution, so RO UUID = wf UUID = tool UUID
run_uuid=runtime_context.research_obj.ro_uuid,
fsaccess=runtime_context.make_fs_access(""),
)
process.parent_wf = process.provenance_object
jobiter = process.job(job_order_object, self.output_callback, runtime_context)
try:
for job in jobiter:
if job is not None:
if runtime_context.builder is not None and hasattr(job, "builder"):
job.builder = runtime_context.builder
if job.outdir is not None:
self.output_dirs.add(job.outdir)
if runtime_context.research_obj is not None:
if not isinstance(process, Workflow):
prov_obj = process.provenance_object
else:
prov_obj = job.prov_obj
if prov_obj:
runtime_context.prov_obj = prov_obj
prov_obj.fsaccess = runtime_context.make_fs_access("")
prov_obj.evaluate(
process,
job,
job_order_object,
runtime_context.research_obj,
)
process_run_id = prov_obj.record_process_start(process, job)
runtime_context = runtime_context.copy()
runtime_context.process_run_id = process_run_id
if runtime_context.validate_only is True:
if isinstance(job, WorkflowJob):
name = job.tool.lc.filename
else:
name = getattr(job, "name", str(job))
print(
f"{name} is valid CWL. No errors detected in the inputs.",
file=runtime_context.validate_stdout,
)
return
job.run(runtime_context)
else:
logger.error("Workflow cannot make any more progress.")
break
except (
ValidationException,
WorkflowException,
): # pylint: disable=try-except-raise
raise
except Exception as err:
logger.exception("Got workflow error")
raise WorkflowException(str(err)) from err
class MultithreadedJobExecutor(JobExecutor):
"""
Experimental multi-threaded CWL executor.
Does simple resource accounting, will not start a job unless it
has cores / ram available, but does not make any attempt to
optimize usage.
"""
def __init__(self) -> None:
"""Initialize."""
super().__init__()
self.exceptions: List[WorkflowException] = []
self.pending_jobs: List[JobsType] = []
self.pending_jobs_lock = threading.Lock()
self.max_ram = int(psutil.virtual_memory().available / 2**20)
self.max_cores = float(psutil.cpu_count())
self.max_cuda = cuda_version_and_device_count()[1]
self.allocated_ram = float(0)
self.allocated_cores = float(0)
self.allocated_cuda: int = 0
def select_resources(
self, request: Dict[str, Union[int, float]], runtime_context: RuntimeContext
) -> Dict[str, Union[int, float]]: # pylint: disable=unused-argument
"""Naïve check for available cpu cores and memory."""
result: Dict[str, Union[int, float]] = {}
maxrsc = {"cores": self.max_cores, "ram": self.max_ram}
resources_types = {"cores", "ram"}
if "cudaDeviceCountMin" in request or "cudaDeviceCountMax" in request:
maxrsc["cudaDeviceCount"] = self.max_cuda
resources_types.add("cudaDeviceCount")
for rsc in resources_types:
rsc_min = request[rsc + "Min"]
if rsc_min > maxrsc[rsc]:
raise WorkflowException(
f"Requested at least {rsc_min} {rsc} but only " f"{maxrsc[rsc]} available"
)
rsc_max = request[rsc + "Max"]
if rsc_max < maxrsc[rsc]:
result[rsc] = math.ceil(rsc_max)
else:
result[rsc] = maxrsc[rsc]
result["tmpdirSize"] = math.ceil(request["tmpdirMin"])
result["outdirSize"] = math.ceil(request["outdirMin"])
return result
def _runner(
self,
job: Union[JobBase, WorkflowJob, CallbackJob, ExpressionJob],
runtime_context: RuntimeContext,
TMPDIR_LOCK: threading.Lock,
) -> None:
"""Job running thread."""
try:
_logger.debug(
"job: {}, runtime_context: {}, TMPDIR_LOCK: {}".format(
job, runtime_context, TMPDIR_LOCK
)
)
job.run(runtime_context, TMPDIR_LOCK)
except WorkflowException as err:
_logger.exception(f"Got workflow error: {err}")
self.exceptions.append(err)
except Exception as err: # pylint: disable=broad-except
_logger.exception(f"Got workflow error: {err}")
self.exceptions.append(WorkflowException(str(err)))
finally:
if runtime_context.workflow_eval_lock:
with runtime_context.workflow_eval_lock:
if isinstance(job, JobBase):
ram = job.builder.resources["ram"]
self.allocated_ram -= ram
cores = job.builder.resources["cores"]
self.allocated_cores -= cores
cudaDevices: int = cast(
int, job.builder.resources.get("cudaDeviceCount", 0)
)
self.allocated_cuda -= cudaDevices
runtime_context.workflow_eval_lock.notify_all()
def run_job(
self,
job: Optional[JobsType],
runtime_context: RuntimeContext,
) -> None:
"""Execute a single Job in a separate thread."""
if job is not None:
with self.pending_jobs_lock:
self.pending_jobs.append(job)
with self.pending_jobs_lock:
n = 0
while (n + 1) <= len(self.pending_jobs):
# Simple greedy resource allocation strategy. Go
# through pending jobs in the order they were
# generated and add them to the queue only if there
# are resources available.
job = self.pending_jobs[n]
if isinstance(job, JobBase):
ram = job.builder.resources["ram"]
cores = job.builder.resources["cores"]
cudaDevices = cast(int, job.builder.resources.get("cudaDeviceCount", 0))
if ram > self.max_ram or cores > self.max_cores or cudaDevices > self.max_cuda:
_logger.error(
'Job "%s" cannot be run, requests more resources (%s) '
"than available on this host (already allocated ram is %d, "
"allocated cores is %d, allocated CUDA is %d, "
"max ram %d, max cores %d, max CUDA %d).",
job.name,
job.builder.resources,
self.allocated_ram,
self.allocated_cores,
self.allocated_cuda,
self.max_ram,
self.max_cores,
self.max_cuda,
)
self.pending_jobs.remove(job)
return
if (
self.allocated_ram + ram > self.max_ram
or self.allocated_cores + cores > self.max_cores
or self.allocated_cuda + cudaDevices > self.max_cuda
):
_logger.debug(
'Job "%s" cannot run yet, resources (%s) are not '
"available (already allocated ram is %d, allocated cores is %d, "
"allocated CUDA devices is %d, "
"max ram %d, max cores %d, max CUDA %d).",
job.name,
job.builder.resources,
self.allocated_ram,
self.allocated_cores,
self.allocated_cuda,
self.max_ram,
self.max_cores,
self.max_cuda,
)
n += 1
continue
if isinstance(job, JobBase):
ram = job.builder.resources["ram"]
self.allocated_ram += ram
cores = job.builder.resources["cores"]
self.allocated_cores += cores
cuda = cast(int, job.builder.resources.get("cudaDevices", 0))
self.allocated_cuda += cuda
self.taskqueue.add(
functools.partial(self._runner, job, runtime_context, TMPDIR_LOCK),
runtime_context.workflow_eval_lock,
)
self.pending_jobs.remove(job)
def wait_for_next_completion(self, runtime_context: RuntimeContext) -> None:
"""Wait for jobs to finish."""
if runtime_context.workflow_eval_lock is not None:
runtime_context.workflow_eval_lock.wait(timeout=3)
if self.exceptions:
raise self.exceptions[0]
def run_jobs(
self,
process: Process,
job_order_object: CWLObjectType,
logger: logging.Logger,
runtime_context: RuntimeContext,
) -> None:
self.taskqueue: TaskQueue = TaskQueue(threading.Lock(), psutil.cpu_count())
try:
jobiter = process.job(job_order_object, self.output_callback, runtime_context)
if runtime_context.workflow_eval_lock is None:
raise WorkflowException("runtimeContext.workflow_eval_lock must not be None")
runtime_context.workflow_eval_lock.acquire()
for job in jobiter:
if job is not None:
if isinstance(job, JobBase):
job.builder = runtime_context.builder or job.builder
if job.outdir is not None:
self.output_dirs.add(job.outdir)
self.run_job(job, runtime_context)
if job is None:
if self.taskqueue.in_flight > 0:
self.wait_for_next_completion(runtime_context)
else:
logger.error("Workflow cannot make any more progress.")
break
self.run_job(None, runtime_context)
while self.taskqueue.in_flight > 0:
self.wait_for_next_completion(runtime_context)
self.run_job(None, runtime_context)
runtime_context.workflow_eval_lock.release()
finally:
self.taskqueue.drain()
self.taskqueue.join()
class NoopJobExecutor(JobExecutor):
"""Do nothing executor, for testing purposes only."""
def run_jobs(
self,
process: Process,
job_order_object: CWLObjectType,
logger: logging.Logger,
runtime_context: RuntimeContext,
) -> None:
pass
def execute(
self,
process: Process,
job_order_object: CWLObjectType,
runtime_context: RuntimeContext,
logger: Optional[logging.Logger] = None,
) -> Tuple[Optional[CWLObjectType], str]:
return {}, "success"
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/extensions-v1.1.yml 0000644 0001750 0001750 00000010101 14621376047 021350 0 ustar 00michael michael $base: http://commonwl.org/cwltool#
$namespaces:
cwl: "https://w3id.org/cwl/cwl#"
cwltool: "http://commonwl.org/cwltool#"
$graph:
- $import: https://w3id.org/cwl/CommonWorkflowLanguage.yml
- name: Secrets
type: record
inVocab: false
extends: cwl:ProcessRequirement
fields:
class:
type: string
doc: "Always 'Secrets'"
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
secrets:
type: string[]
doc: |
List one or more input parameters that are sensitive (such as passwords)
which will be deliberately obscured from logging.
jsonldPredicate:
"_type": "@id"
refScope: 0
- name: ProcessGenerator
type: record
inVocab: true
extends: cwl:Process
documentRoot: true
fields:
- name: class
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
type: string
- name: run
type: [string, cwl:Process]
jsonldPredicate:
_id: "cwl:run"
_type: "@id"
subscope: run
doc: |
Specifies the process to run.
- name: MPIRequirement
type: record
inVocab: false
extends: cwl:ProcessRequirement
doc: |
Indicates that a process requires an MPI runtime.
fields:
- name: class
type: string
doc: "Always 'MPIRequirement'"
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
- name: processes
type: [int, cwl:Expression]
doc: |
The number of MPI processes to start. If you give a string,
this will be evaluated as a CWL Expression and it must
evaluate to an integer.
- name: CUDARequirement
type: record
extends: cwl:ProcessRequirement
inVocab: false
doc: |
Require support for NVIDA CUDA (GPU hardware acceleration).
fields:
class:
type: string
doc: 'cwltool:CUDARequirement'
jsonldPredicate:
_id: "@type"
_type: "@vocab"
cudaVersionMin:
type: string
doc: |
Minimum CUDA version to run the software, in X.Y format. This
corresponds to a CUDA SDK release. When running directly on
the host (not in a container) the host must have a compatible
CUDA SDK (matching the exact version, or, starting with CUDA
11.3, matching major version). When run in a container, the
container image should provide the CUDA runtime, and the host
driver is injected into the container. In this case, because
CUDA drivers are backwards compatible, it is possible to
use an older SDK with a newer driver across major versions.
See https://docs.nvidia.com/deploy/cuda-compatibility/ for
details.
cudaComputeCapability:
type:
- 'string'
- 'string[]'
doc: |
CUDA hardware capability required to run the software, in X.Y
format.
* If this is a single value, it defines only the minimum
compute capability. GPUs with higher capability are also
accepted.
* If it is an array value, then only select GPUs with compute
capabilities that explicitly appear in the array.
cudaDeviceCountMin:
type: ['null', int, cwl:Expression]
default: 1
doc: |
Minimum number of GPU devices to request. If not specified,
same as `cudaDeviceCountMax`. If neither are specified,
default 1.
cudaDeviceCountMax:
type: ['null', int, cwl:Expression]
doc: |
Maximum number of GPU devices to request. If not specified,
same as `cudaDeviceCountMin`.
- name: ShmSize
type: record
extends: cwl:ProcessRequirement
inVocab: false
fields:
class:
type: string
doc: 'cwltool:ShmSize'
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
shmSize:
type: string
doc: |
Size of /dev/shm. The format is ``. must be greater
than 0. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m`
(megabytes), or `g` (gigabytes). If you omit the unit, the default is
bytes. If you omit the size entirely, the value is `64m`."
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/extensions-v1.2.yml 0000644 0001750 0001750 00000020565 14621376047 021370 0 ustar 00michael michael $base: http://commonwl.org/cwltool#
$namespaces:
cwl: "https://w3id.org/cwl/cwl#"
cwltool: "http://commonwl.org/cwltool#"
$graph:
- $import: https://w3id.org/cwl/CommonWorkflowLanguage.yml
- name: Secrets
type: record
inVocab: false
extends: cwl:ProcessRequirement
fields:
class:
type: string
doc: "Always 'Secrets'"
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
secrets:
type: string[]
doc: |
List one or more input parameters that are sensitive (such as passwords)
which will be deliberately obscured from logging.
jsonldPredicate:
"_type": "@id"
refScope: 0
- name: ProcessGenerator
type: record
inVocab: true
extends: cwl:Process
documentRoot: true
fields:
- name: class
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
type: string
- name: run
type: [string, cwl:Process]
jsonldPredicate:
_id: "cwl:run"
_type: "@id"
subscope: run
doc: |
Specifies the process to run.
- name: MPIRequirement
type: record
inVocab: false
extends: cwl:ProcessRequirement
doc: |
Indicates that a process requires an MPI runtime.
fields:
- name: class
type: string
doc: "Always 'MPIRequirement'"
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
- name: processes
type: [int, cwl:Expression]
doc: |
The number of MPI processes to start. If you give a string,
this will be evaluated as a CWL Expression and it must
evaluate to an integer.
- name: CUDARequirement
type: record
extends: cwl:ProcessRequirement
inVocab: false
doc: |
Require support for NVIDA CUDA (GPU hardware acceleration).
fields:
class:
type: string
doc: 'cwltool:CUDARequirement'
jsonldPredicate:
_id: "@type"
_type: "@vocab"
cudaVersionMin:
type: string
doc: |
Minimum CUDA version to run the software, in X.Y format. This
corresponds to a CUDA SDK release. When running directly on
the host (not in a container) the host must have a compatible
CUDA SDK (matching the exact version, or, starting with CUDA
11.3, matching major version). When run in a container, the
container image should provide the CUDA runtime, and the host
driver is injected into the container. In this case, because
CUDA drivers are backwards compatible, it is possible to
use an older SDK with a newer driver across major versions.
See https://docs.nvidia.com/deploy/cuda-compatibility/ for
details.
cudaComputeCapability:
type:
- 'string'
- 'string[]'
doc: |
CUDA hardware capability required to run the software, in X.Y
format.
* If this is a single value, it defines only the minimum
compute capability. GPUs with higher capability are also
accepted.
* If it is an array value, then only select GPUs with compute
capabilities that explicitly appear in the array.
cudaDeviceCountMin:
type: ['null', int, cwl:Expression]
default: 1
doc: |
Minimum number of GPU devices to request. If not specified,
same as `cudaDeviceCountMax`. If neither are specified,
default 1.
cudaDeviceCountMax:
type: ['null', int, cwl:Expression]
doc: |
Maximum number of GPU devices to request. If not specified,
same as `cudaDeviceCountMin`.
- name: LoopInput
type: record
fields:
id:
type: string?
jsonldPredicate: "@id"
doc: "It must reference the `id` of one of the elements in the `in` field of the step."
loopSource:
doc: |
Specifies one or more of the step output parameters that will
provide input to the loop iterations after the first one (inputs
of the first iteration are the step input parameters).
type:
- string?
- string[]?
jsonldPredicate:
"_type": "@id"
refScope: 1
linkMerge:
type: cwl:LinkMergeMethod?
jsonldPredicate: "cwl:linkMerge"
default: merge_nested
doc: |
The method to use to merge multiple inbound links into a single array.
If not specified, the default method is "merge_nested".
pickValue:
type: ["null", cwl:PickValueMethod]
jsonldPredicate: "cwl:pickValue"
doc: |
The method to use to choose non-null elements among multiple sources.
default:
type: ["null", Any]
doc: |
The default value for this parameter to use if either there is no
`source` field, or the value produced by the `source` is `null`. The
default must be applied prior to scattering or evaluating `valueFrom`.
jsonldPredicate:
_id: "sld:default"
noLinkCheck: true
valueFrom:
type:
- "null"
- string
- cwl:Expression
jsonldPredicate: "cwl:valueFrom"
doc: |
To use valueFrom, [StepInputExpressionRequirement](#StepInputExpressionRequirement) must
be specified in the workflow or workflow step requirements.
If `valueFrom` is a constant string value, use this as the value for
this input parameter.
If `valueFrom` is a parameter reference or expression, it must be
evaluated to yield the actual value to be assigned to the input field.
The `self` value in the parameter reference or expression must be
`null` if there is no `loopSource` field, or the value of the
parameter(s) specified in the `loopSource` field.
The value of `inputs` in the parameter reference or expression must be
the input object to the previous iteration of the workflow step (or the initial
inputs for the first iteration).
- name: Loop
type: record
extends: cwl:ProcessRequirement
inVocab: false
doc: |
Prototype to enable workflow-level looping of a step.
Valid only under `requirements` of a https://www.commonwl.org/v1.2/Workflow.html#WorkflowStep.
Unlike other CWL requirements, Loop requirement is not propagated to inner steps.
`loopWhen` is an expansion of the CWL v1.2 `when` construct which controls
conditional execution.
Using `loopWhen` and `when` for the same step will produce an error.
`loopWhen` is not compatible with `scatter` at this time and combining the
two in the same step will produce an error.
fields:
class:
type: string
doc: 'cwltool:Loop'
jsonldPredicate:
_id: "@type"
_type: "@vocab"
loopWhen:
type: cwl:Expression
doc: |
Only run the step while the expression evaluates to `true`.
If `false` and no iteration has been performed, the step is skipped.
A skipped step produces a `null` on each output.
The `inputs` value in the expression must be the step input object.
It is an error if this expression returns a value other than `true` or `false`.
loop:
type: LoopInput[]
jsonldPredicate:
_id: "cwltool:loop"
mapSubject: id
mapPredicate: loopSource
doc: |
Defines the input parameters of the loop iterations after the first one
(inputs of the first iteration are the step input parameters). If no
`loop` rule is specified for a given step `in` field, the initial value
is kept constant among all iterations.
outputMethod:
type:
type: enum
name: LoopOutputModes
symbols: [ last, all ]
default: last
doc:
- Specify the desired method of dealing with loop outputs
- Default. Propagates only the last computed element to the subsequent steps when the loop terminates.
- Propagates a single array with all output values to the subsequent steps when the loop terminates.
- name: ShmSize
type: record
extends: cwl:ProcessRequirement
inVocab: false
fields:
class:
type: string
doc: 'cwltool:ShmSize'
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
shmSize:
type: string
doc: |
Size of /dev/shm. The format is ``. must be greater
than 0. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m`
(megabytes), or `g` (gigabytes). If you omit the unit, the default is
bytes. If you omit the size entirely, the value is `64m`."
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/extensions.yml 0000644 0001750 0001750 00000016354 14621376047 020705 0 ustar 00michael michael $base: http://commonwl.org/cwltool#
$namespaces:
cwl: "https://w3id.org/cwl/cwl#"
cwltool: "http://commonwl.org/cwltool#"
$graph:
- $import: https://w3id.org/cwl/CommonWorkflowLanguage.yml
- name: LoadListingRequirement
type: record
extends: cwl:ProcessRequirement
inVocab: false
fields:
class:
type: string
doc: "Always 'LoadListingRequirement'"
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
loadListing:
type:
- type: enum
name: LoadListingEnum
symbols: [no_listing, shallow_listing, deep_listing]
- name: InplaceUpdateRequirement
type: record
inVocab: false
extends: cwl:ProcessRequirement
fields:
class:
type: string
doc: "Always 'InplaceUpdateRequirement'"
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
inplaceUpdate:
type: boolean
- name: Secrets
type: record
inVocab: false
extends: cwl:ProcessRequirement
fields:
class:
type: string
doc: "Always 'Secrets'"
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
secrets:
type: string[]
doc: |
List one or more input parameters that are sensitive (such as passwords)
which will be deliberately obscured from logging.
jsonldPredicate:
"_type": "@id"
refScope: 0
- name: TimeLimit
type: record
inVocab: false
extends: cwl:ProcessRequirement
doc: |
Set an upper limit on the execution time of a CommandLineTool or
ExpressionTool. A tool execution which exceeds the time limit may
be preemptively terminated and considered failed. May also be
used by batch systems to make scheduling decisions.
fields:
- name: class
type: string
doc: "Always 'TimeLimit'"
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
- name: timelimit
type: [long, string]
doc: |
The time limit, in seconds. A time limit of zero means no
time limit. Negative time limits are an error.
- name: WorkReuse
type: record
inVocab: false
extends: cwl:ProcessRequirement
doc: |
For implementations that support reusing output from past work (on
the assumption that same code and same input produce same
results), control whether to enable or disable the reuse behavior
for a particular tool or step (to accommodate situations where that
assumption is incorrect). A reused step is not executed but
instead returns the same output as the original execution.
If `enableReuse` is not specified, correct tools should assume it
is enabled by default.
fields:
- name: class
type: string
doc: "Always 'WorkReuse'"
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
- name: enableReuse
type: [boolean, string]
#default: true
- name: NetworkAccess
type: record
inVocab: false
extends: cwl:ProcessRequirement
doc: |
Indicate whether a process requires outgoing IPv4/IPv6 network
access. Choice of IPv4 or IPv6 is implementation and site
specific, correct tools must support both.
If `networkAccess` is false or not specified, tools must not
assume network access, except for localhost (the loopback device).
If `networkAccess` is true, the tool must be able to make outgoing
connections to network resources. Resources may be on a private
subnet or the public Internet. However, implementations and sites
may apply their own security policies to restrict what is
accessible by the tool.
Enabling network access does not imply a publicly routable IP
address or the ability to accept inbound connections.
fields:
- name: class
type: string
doc: "Always 'NetworkAccess'"
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
- name: networkAccess
type: [boolean, string]
- name: ProcessGenerator
type: record
inVocab: true
extends: cwl:Process
documentRoot: true
fields:
- name: class
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
type: string
- name: run
type: [string, cwl:Process]
jsonldPredicate:
_id: "cwl:run"
_type: "@id"
doc: |
Specifies the process to run.
- name: MPIRequirement
type: record
inVocab: false
extends: cwl:ProcessRequirement
doc: |
Indicates that a process requires an MPI runtime.
fields:
- name: class
type: string
doc: "Always 'MPIRequirement'"
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
- name: processes
type: [int, cwl:Expression]
doc: |
The number of MPI processes to start. If you give a string,
this will be evaluated as a CWL Expression and it must
evaluate to an integer.
- name: CUDARequirement
type: record
extends: cwl:ProcessRequirement
inVocab: false
doc: |
Require support for NVIDA CUDA (GPU hardware acceleration).
fields:
class:
type: string
doc: 'cwltool:CUDARequirement'
jsonldPredicate:
_id: "@type"
_type: "@vocab"
cudaVersionMin:
type: string
doc: |
Minimum CUDA version to run the software, in X.Y format. This
corresponds to a CUDA SDK release. When running directly on
the host (not in a container) the host must have a compatible
CUDA SDK (matching the exact version, or, starting with CUDA
11.3, matching major version). When run in a container, the
container image should provide the CUDA runtime, and the host
driver is injected into the container. In this case, because
CUDA drivers are backwards compatible, it is possible to
use an older SDK with a newer driver across major versions.
See https://docs.nvidia.com/deploy/cuda-compatibility/ for
details.
cudaComputeCapability:
type:
- 'string'
- 'string[]'
doc: |
CUDA hardware capability required to run the software, in X.Y
format.
* If this is a single value, it defines only the minimum
compute capability. GPUs with higher capability are also
accepted.
* If it is an array value, then only select GPUs with compute
capabilities that explicitly appear in the array.
cudaDeviceCountMin:
type: ['null', int, cwl:Expression]
default: 1
doc: |
Minimum number of GPU devices to request. If not specified,
same as `cudaDeviceCountMax`. If neither are specified,
default 1.
cudaDeviceCountMax:
type: ['null', int, cwl:Expression]
doc: |
Maximum number of GPU devices to request. If not specified,
same as `cudaDeviceCountMin`.
- name: ShmSize
type: record
extends: cwl:ProcessRequirement
inVocab: false
fields:
class:
type: string
doc: 'cwltool:ShmSize'
jsonldPredicate:
"_id": "@type"
"_type": "@vocab"
shmSize:
type: string
doc: |
Size of /dev/shm. The format is ``. must be greater
than 0. Unit is optional and can be `b` (bytes), `k` (kilobytes), `m`
(megabytes), or `g` (gigabytes). If you omit the unit, the default is
bytes. If you omit the size entirely, the value is `64m`."
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/factory.py 0000644 0001750 0001750 00000004542 14621376047 020000 0 ustar 00michael michael import os
from typing import Any, Dict, Optional, Union
from . import load_tool
from .context import LoadingContext, RuntimeContext
from .errors import WorkflowException
from .executors import JobExecutor, SingleJobExecutor
from .process import Process
from .utils import CWLObjectType
class WorkflowStatus(Exception):
def __init__(self, out: Optional[CWLObjectType], status: str) -> None:
"""Signaling exception for the status of a Workflow."""
super().__init__("Completed %s" % status)
self.out = out
self.status = status
class Callable:
"""Result of ::py:func:`Factory.make`."""
def __init__(self, t: Process, factory: "Factory") -> None:
"""Initialize."""
self.t = t
self.factory = factory
def __call__(self, **kwargs):
# type: (**Any) -> Union[str, Optional[CWLObjectType]]
runtime_context = self.factory.runtime_context.copy()
runtime_context.basedir = os.getcwd()
out, status = self.factory.executor(self.t, kwargs, runtime_context)
if status != "success":
raise WorkflowStatus(out, status)
else:
return out
class Factory:
"""Easy way to load a CWL document for execution."""
loading_context: LoadingContext
runtime_context: RuntimeContext
def __init__(
self,
executor: Optional[JobExecutor] = None,
loading_context: Optional[LoadingContext] = None,
runtime_context: Optional[RuntimeContext] = None,
) -> None:
if executor is None:
executor = SingleJobExecutor()
self.executor = executor
if runtime_context is None:
self.runtime_context = RuntimeContext()
else:
self.runtime_context = runtime_context
if loading_context is None:
self.loading_context = LoadingContext()
self.loading_context.singularity = self.runtime_context.singularity
self.loading_context.podman = self.runtime_context.podman
else:
self.loading_context = loading_context
def make(self, cwl: Union[str, Dict[str, Any]]) -> Callable:
"""Instantiate a CWL object from a CWl document."""
load = load_tool.load_tool(cwl, self.loading_context)
if isinstance(load, int):
raise WorkflowException("Error loading tool")
return Callable(load, self)
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/flatten.py 0000644 0001750 0001750 00000001300 14621376047 017753 0 ustar 00michael michael from typing import Any, Callable, List, cast
# http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html
def flatten(thing, ltypes=(list, tuple)):
# type: (Any, Any) -> List[Any]
if thing is None:
return []
if not isinstance(thing, ltypes):
return [thing]
ltype = type(thing)
thing_list = list(thing)
i = 0
while i < len(thing_list):
while isinstance(thing_list[i], ltypes):
if not thing_list[i]:
thing_list.pop(i)
i -= 1
break
else:
thing_list[i : i + 1] = thing_list[i]
i += 1
return cast(Callable[[Any], List[Any]], ltype)(thing_list)
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1715862567.0
cwltool-3.1.20240508115724/cwltool/hello.simg 0000755 0001750 0001750 00000010037 14621376047 017742 0 ustar 00michael michael #!/usr/bin/env run-singularity
hsqs% 1[ w o V
a xYnvЋ}8M8mڭeG""h
cHPufhE o^!$yR)ɱ)Lr+?Zm,PiN)a^7GCS_Ԏ[gcQdk58^ӣ@3E7NY`=?;{Oۭ)C1lmNDei>{.
pe"|= c5a"-tFa`ƓTb˭Ι#t"KJ@6\F*<|vd
CUEg%ڃL$Y$+%ft̓mȃyw{|P+b-}#bY}a0iot/揠L*HEf3*,Ҥj6rlIJd"RimӦ!1i+e.%ϜSb_0x쁑KGޝOOخ&QFiWG+R':mhwk]>,7͛9?rPbރ~į0\tYIC5W<6*7 6MDTx2e^sՌM0aF1u"RqΕ&B!J
@+Mb貹i[^/nj)Kp(xh)rIdk&+%Yԙ
=83|#:YGР̩%u"Bj`fȩx~i~xT"z?LS