pax_global_header00006660000000000000000000000064147010723440014514gustar00rootroot0000000000000052 comment=04e3e936451919e3481c35c8e2781981e80c96c7 scitrack-2024.10.8/000077500000000000000000000000001470107234400136355ustar00rootroot00000000000000scitrack-2024.10.8/.gitattributes000066400000000000000000000000241470107234400165240ustar00rootroot00000000000000tests/*fasta binary scitrack-2024.10.8/.github/000077500000000000000000000000001470107234400151755ustar00rootroot00000000000000scitrack-2024.10.8/.github/dependabot.yml000066400000000000000000000004631470107234400200300ustar00rootroot00000000000000version: 2 updates: - package-ecosystem: pip directory: "/" schedule: interval: weekly time: "19:00" open-pull-requests-limit: 10 - package-ecosystem: "github-actions" directory: "/" schedule: interval: "weekly" time: "19:00" open-pull-requests-limit: 10 scitrack-2024.10.8/.github/workflows/000077500000000000000000000000001470107234400172325ustar00rootroot00000000000000scitrack-2024.10.8/.github/workflows/release.yml000066400000000000000000000045121470107234400213770ustar00rootroot00000000000000name: Release on: [workflow_dispatch] jobs: test: name: "Test on Python ${{ matrix.python-version }} (${{ matrix.os }})" runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: "actions/checkout@v4" with: fetch-depth: 0 - uses: "actions/setup-python@v5" with: python-version: "${{ matrix.python-version }}" - name: "Installs for ${{ matrix.python-version }}" run: | pip install --upgrade pip pip install nox uv - name: "Run nox for Python ${{ matrix.python-version }}" run: "nox -db uv -s test-${{ matrix.python-version }}" build: name: Build wheel and sdist needs: test runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 with: fetch-depth: 0 - uses: actions/setup-python@v5 with: python-version: '3.12' - name: Install build dependency run: | pip install build pip install --upgrade pip - name: Build sdist and wheel run: python -m build --wheel --sdist - name: Upload sdist and wheel uses: actions/upload-artifact@v4 with: name: scitrack-wheel-sdist path: | ./dist/*.whl ./dist/*.tar.gz release_test: name: Release to Test PyPI needs: build environment: release_test runs-on: ubuntu-latest permissions: id-token: write steps: - name: Download sdist and wheel uses: actions/download-artifact@v4 with: name: scitrack-wheel-sdist path: ./dist - name: Publish package distributions to Test PyPI uses: pypa/gh-action-pypi-publish@release/v1 with: repository-url: https://test.pypi.org/legacy/ release: name: Release to PyPI needs: release_test environment: release runs-on: ubuntu-latest permissions: id-token: write steps: - name: Download sdist and wheel uses: actions/download-artifact@v4 with: name: scitrack-wheel-sdist path: ./dist - name: Publish package distributions to PyPI uses: pypa/gh-action-pypi-publish@release/v1scitrack-2024.10.8/.github/workflows/testing_develop.yml000066400000000000000000000030621470107234400231510ustar00rootroot00000000000000name: CI on: push: branches-ignore: - master pull_request: branches-ignore: - master jobs: tests: name: "Python ${{ matrix.python-version }} (${{ matrix.os }})" runs-on: ${{ matrix.os }} strategy: matrix: os: [ubuntu-latest, macos-latest, windows-latest] python-version: ["3.9", "3.10", "3.11", "3.12"] steps: - uses: "actions/checkout@v4" with: fetch-depth: 0 # Setup env - uses: "actions/setup-python@v5" with: python-version: "${{ matrix.python-version }}" - name: "Installs for ${{ matrix.python-version }}" run: | python --version pip install --upgrade pip wheel setuptools flit pip install --upgrade nox - name: "Run nox for ${{ matrix.python-version }}" run: "nox -s test-${{ matrix.python-version }} -- --cov-report lcov:lcov-${{matrix.os}}-${{matrix.python-version}}.lcov --cov-report term --cov-append --cov scitrack" - name: Coveralls Parallel uses: coverallsapp/github-action@v2 with: parallel: true github-token: ${{ secrets.github_token }} flag-name: run-${{matrix.python-version}}-${{matrix.os}} file: "tests/lcov-${{matrix.os}}-${{matrix.python-version}}.lcov" finish: name: "Finish Coveralls" needs: tests runs-on: ubuntu-latest steps: - name: Coveralls Finished uses: coverallsapp/github-action@v2 with: github-token: ${{ secrets.github_token }} parallel-finished: truescitrack-2024.10.8/.gitignore000066400000000000000000000006211470107234400156240ustar00rootroot00000000000000*.py[cod] # C extensions *.so # Packages *.egg *.egg-info dist build eggs parts bin var sdist develop-eggs .installed.cfg lib lib64 # Installer logs pip-log.txt # Unit test / coverage reports .coverage .tox *.nox nosetests.xml tests/draw_results *.pytest_cache tests/junit-*.xml tests/coverage.xml # Translations *.mo # Mr Developer .mr.developer.cfg .project .pydevproject *.vscode # vi .*.swp scitrack-2024.10.8/.hgignore000066400000000000000000000004121470107234400154350ustar00rootroot00000000000000syntax:glob .svn *.pyc *.pyo *.so *.o *.DS_Store *.tmproj *.rej *.orig *.wpr *.pdf _build/* build *htmlcov* *.idea *.coverage *trackcomp.egg-info* dist/* *.cache scitrack.egg* *.sublime-* *.wpu *.pytest_cache *.tox *.nox tests/junit-*.xml tests/coverage.xml *.vscodescitrack-2024.10.8/.hgtags000066400000000000000000000004421470107234400151130ustar00rootroot000000000000002c80657fecfe617eab8b5e071da8b4b494ca3636 0.1.6 402b7daea661f3904f85f6f248f46c8d9f588704 0.1.7 038183f48645c7ba0417fa98946689f86efca803 0.1.8 d8aa7076747c991858750c0ecfa5915c6d421d5f 0.1.8.1 d223af41764757c47b24f6594a4597818f18e785 2020.6.5 def51b1fa1ac0ab7f68d4e97dc7c187079d45e4e 2021.5.3 scitrack-2024.10.8/LICENSE000066400000000000000000000026671470107234400146550ustar00rootroot00000000000000Copyright 2019-2020 Gavin Huttley Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. scitrack-2024.10.8/README.rst000066400000000000000000000165031470107234400153310ustar00rootroot00000000000000|CI| |coverall| |Using Ruff| |Python 3.9+| .. |CI| image:: https://github.com/HuttleyLab/scitrack/actions/workflows/testing_develop.yml/badge.svg :target: https://github.com/HuttleyLab/scitrack/actions/workflows/testing_develop.yml .. |coverall| image:: https://coveralls.io/repos/github/GavinHuttley/scitrack/badge.svg?branch=develop :target: https://coveralls.io/github/GavinHuttley/scitrack?branch=develop .. |Using Ruff| image:: https://img.shields.io/endpoint?url=https://raw.githubusercontent.com/astral-sh/ruff/main/assets/badge/v2.json :target: https://github.com/astral-sh/ruff .. |Python 3.9+| image:: https://img.shields.io/badge/python-3.9+-blue.svg :target: https://www.python.org/downloads/release/python-390/ ################## About ``scitrack`` ################## One of the critical challenges in scientific analysis is to track all the elements involved. This includes the arguments provided to a specific application (including default values), input data files referenced by those arguments and output data generated by the application. In addition to this, tracking a minimal set of system specific information. ``scitrack`` is a simple package aimed at researchers writing scripts, or more substantial scientific software, to support the tracking of scientific computation. The package provides elementary functionality to support logging. The primary capabilities concern generating checksums on input and output files and facilitating logging of the computational environment. To see some projects using ``scitrack``, see the "Used by" link at the top of the `project GitHub page `_. ********** Installing ********** For the released version:: $ pip install scitrack For the very latest version:: $ pip install git+https://github.com/HuttleyLab/scitrack Or clone it:: $ git clone git@github.com:HuttleyLab/scitrack.git And then install:: $ pip install ~/path/to/scitrack ***************** ``CachingLogger`` ***************** There is a single object provided by ``scitrack``, ``CachingLogger``. This object is basically a wrapper around the Python standard library ``logging`` module. On invocation, ``CachingLogger`` captures basic information regarding the system and the command line call that was made to invoke the application. In addition, the class provides convenience methods for logging both the path and the md5 hexdigest checksum [1]_ of input/output files. A method is also provided for producing checksums of text data. The latter is useful for the case when data are from a stream or a database, for instance. All logging calls are cached until a path for a logfile is provided. The logger can also, optionally, create directories. ********************************** Simple instantiation of the logger ********************************** Creating the logger. Setting ``create_dir=True`` means on creation of the logfile, the directory path will be created also. .. code:: python from scitrack import CachingLogger LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = "somedir/some_path.log" The last assignment triggers creation of ``somedir/some_path.log``. .. warning:: Once set, a loggers ``.log_file_path`` cannot be changed. ****************************************** Capturing a programs arguments and options ****************************************** ``scitrack`` will write the contents of ``sys.argv`` to the log file, prefixed by ``command_string``. However, this only captures arguments specified on the command line. Tracking the value of optional arguments not specified, which may have default values, is critical to tracking the full command set. Doing this is now easy with the simple statement ``LOGGER.log_args()``. The logger can also record the versions of named dependencies. Here's one approach to incorporating ``scitrack`` into a command line application built using the ``click`` `command line interface library `_. Below we create a simple ``click`` app and capture the required and optional argument values. .. note:: ``LOGGER.log_args()`` should be called immediately after the function definition, or after "true" default values have been configured. .. code:: python import click from scitrack import CachingLogger LOGGER = CachingLogger() @click.command() @click.option("-i", "--infile", type=click.Path(exists=True)) @click.option("-t", "--test", is_flag=True, help="Run test.") def main(infile, test): # capture the local variables, at this point just provided arguments LOGGER.log_args() LOGGER.log_versions("numpy") LOGGER.input_file(infile) LOGGER.log_file_path = "some_path.log" if __name__ == "__main__": main() The ``CachingLogger.write()`` method takes a message and a label. All other logging methods wrap ``log_message()``, providing a specific label. For instance, the method ``input_file()`` writes out two lines in the log. - ``input_file_path``, the absolute path to the intput file - ``input_file_path md5sum``, the hex digest of the file ``output_file()`` behaves analogously. An additional method ``text_data()`` is useful for other data input/output sources (e.g. records from a database). For this to have value for arbitrary data types requires a systematic approach to ensuring the text conversion is robust across platforms. The ``log_args()`` method captures all local variables within a scope. The ``log_versions()`` method captures versions for the current file and that of a list of named packages, e.g. ``LOGGER.log_versions(['numpy', 'sklearn'])``. Some sample output ================== :: 2020-05-25 13:32:07 Eratosthenes:98447 INFO system_details : system=Darwin Kernel Version 19.4.0: Wed Mar 4 22:28:40 PST 2020; root:xnu-6153.101.6~15/RELEASE_X86_64 2020-05-25 13:32:07 Eratosthenes:98447 INFO python : 3.8.2 2020-05-25 13:32:07 Eratosthenes:98447 INFO user : gavin 2020-05-25 13:32:07 Eratosthenes:98447 INFO command_string : ./demo.py -i /Users/gavin/repos/SciTrack/tests/sample-lf.fasta 2020-05-25 13:32:07 Eratosthenes:98447 INFO params : {'infile': '/Users/gavin/repos/SciTrack/tests/sample-lf.fasta', 'test': False} 2020-05-25 13:32:07 Eratosthenes:98447 INFO version : __main__==None 2020-05-25 13:32:07 Eratosthenes:98447 INFO version : numpy==1.18.4 2020-05-25 13:32:07 Eratosthenes:98447 INFO input_file_path : /Users/gavin/repos/SciTrack/tests/sample-lf.fasta 2020-05-25 13:32:07 Eratosthenes:98447 INFO input_file_path md5sum : 96eb2c2632bae19eb65ea9224aaafdad ********************** Other useful functions ********************** Two other useful functions are ``get_file_hexdigest`` and ``get_text_hexdigest``. **************** Reporting issues **************** Use the project `issue tracker `_. ************** For Developers ************** We use flit_ for package building. Having cloned the repository onto your machine. Install ``flit``:: $ python3 -m pip install flit Do a developer install of ``scitrack`` using flit as:: $ cd path/to/cloned/repo $ flit install -s --python `which python` .. note:: This installs a symlink into ``site-packages`` of the python identified by ``which python``. .. [1] The hexdigest serves as a unique signature of a files contents. .. _flit: https://flit.readthedocs.io/en/latest/ scitrack-2024.10.8/noxfile.py000066400000000000000000000003571470107234400156600ustar00rootroot00000000000000import nox @nox.session(python=[f"3.{v}" for v in range(9, 14)]) def test(session): session.install(".[test]") session.chdir("tests") session.run( "pytest", "-s", "-x", *session.posargs, ) scitrack-2024.10.8/pyproject.toml000066400000000000000000000061061470107234400165540ustar00rootroot00000000000000[build-system] requires = ["flit_core >=2,<4"] build-backend = "flit_core.buildapi" [tool.flit.metadata] module = "scitrack" author = "Gavin Huttley" author-email = "Gavin.Huttley@anu.edu.au" home-page = "https://github.com/HuttleyLab/scitrack" license = "BSD-3" description-file = "README.rst" keywords = "science, logging" classifiers = [ "Development Status :: 4 - Beta", "Intended Audience :: Science/Research", "License :: OSI Approved :: BSD License", "Topic :: Scientific/Engineering :: Bio-Informatics", "Topic :: Software Development :: Libraries :: Python Modules", "Operating System :: OS Independent", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", ] requires-python = ">=3.9" [tool.flit.sdist] include = ["src/*", "pyproject.toml", "*.rst"] exclude = ["*.xml"] [tool.flit.metadata.urls] Documentation = "https://github.com/HuttleyLab/scitrack" "Bug Tracker" = "https://github.com/HuttleyLab/scitrack/issues" "Source Code" = "https://github.com/HuttleyLab/scitrack" [tool.flit.metadata.requires-extra] test = [ "numpy", "pytest", "pytest-cov", "ruff==0.6.9", "nox"] [tool.ruff] exclude = [ ".direnv", ".eggs", ".git", ".git-rewrite", ".hg", ".ipynb_checkpoints", ".mypy_cache", ".nox", ".pants.d", ".pyenv", ".pytest_cache", ".pytype", ".ruff_cache", ".svn", ".tox", ".venv", ".vscode", "__pypackages__", "_build", "build", "dist", "site-packages", "venv", ] # Same as Black. line-length = 88 indent-width = 4 target-version = "py39" [tool.ruff.lint] # Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. # Unlike Flake8, Ruff doesn't enable pycodestyle warnings (`W`) or # McCabe complexity (`C901`) by default. select = ["ALL"] ignore = ["EXE002", "FA100", "E501", "D"] # Allow fix for all enabled rules (when `--fix`) is provided. fixable = ["ALL"] unfixable = [] # Allow unused variables when underscore-prefixed. dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$" [tool.ruff.lint.per-file-ignores] "tests/**/*.py" = [ "S101", # asserts allowed in tests... "INP001", # __init__.py files are not required... "ANN", "N802", "N803" ] "noxfile.py" = [ "S101", # asserts allowed in tests... "INP001", # __init__.py files are not required... "ANN", "N802", "N803" ] [tool.ruff.format] # Like Black, use double quotes for strings. quote-style = "double" # Like Black, indent with spaces, rather than tabs. indent-style = "space" # Like Black, respect magic trailing commas. skip-magic-trailing-comma = false # Like Black, automatically detect the appropriate line ending. line-ending = "lf" docstring-code-format = true # Set the line length limit used when formatting code snippets in # docstrings. # # This only has an effect when the `docstring-code-format` setting is # enabled. docstring-code-line-length = "dynamic"scitrack-2024.10.8/src/000077500000000000000000000000001470107234400144245ustar00rootroot00000000000000scitrack-2024.10.8/src/scitrack/000077500000000000000000000000001470107234400162275ustar00rootroot00000000000000scitrack-2024.10.8/src/scitrack/__init__.py000066400000000000000000000204601470107234400203420ustar00rootroot00000000000000""" SciTrack provides basic logging capabilities to track scientific computations. """ import contextlib import hashlib import importlib import inspect import logging import os import platform import socket import sys from getpass import getuser __version__ = "2024.10.8" VERSION_ATTRS = ["__version__", "version", "VERSION"] def abspath(path): """returns an expanded, absolute path""" return os.path.abspath(os.path.expanduser(path)) def _create_path(path): """creates path""" if os.path.exists(path): return os.makedirs(path, exist_ok=True) def get_package_name(object): """returns the package name for the provided object""" name = inspect.getmodule(object).__name__ return name.split(".")[0] def get_version_for_package(package): """returns the version of package""" if type(package) == str: try: mod = importlib.import_module(package) except ModuleNotFoundError as e: raise ValueError(f"Unknown package {package}") from e elif inspect.ismodule(package): mod = package else: raise ValueError(f"Unknown type, package {package}") vn = None for v in VERSION_ATTRS: with contextlib.suppress(AttributeError): vn = getattr(mod, v) if callable(vn): vn = vn() break if type(vn) in (tuple, list): vn = vn[0] del mod return vn create_path = _create_path FileHandler = logging.FileHandler class CachingLogger: """stores log messages until a log filename is provided""" def __init__(self, log_file_path=None, create_dir=True, mode="w"): super(CachingLogger, self).__init__() self._log_file_path = None self._logfile = None self._started = False self.create_dir = create_dir self._messages = [] self._hostname = socket.gethostname() self._mode = mode if log_file_path: self.log_file_path = log_file_path def _reset(self, mode="w"): self._mode = mode self._started = False self._messages = [] if self._logfile is not None: self._logfile.flush() self._logfile.close() self._logfile = None self._log_file_path = None @property def log_file_path(self): return self._log_file_path @log_file_path.setter def log_file_path(self, path): """set the log file path and then dump cached log messages""" if self._log_file_path is not None: raise AttributeError( f"log_file_path already defined as {self._log_file_path}", ) path = abspath(path) if self.create_dir: dirname = os.path.dirname(path) create_path(dirname) self._log_file_path = path self._logfile = set_logger(self._log_file_path, mode=self.mode) for m in self._messages: logging.info(m) self._messages = [] self._started = True @property def mode(self): """the logfile opening mode""" return self._mode @mode.setter def mode(self, mode): """the logfile file opening mode""" self._mode = mode def _record_file(self, file_class, file_path): """writes the file path and md5 checksum to log file""" file_path = abspath(file_path) md5sum = get_file_hexdigest(file_path) self.log_message(file_path, label=file_class) self.log_message(md5sum, label=f"{file_class} md5sum") def input_file(self, file_path, label="input_file_path"): """logs path and md5 checksum Argument: - label is inserted before the message""" self._record_file(label, file_path) def output_file(self, file_path, label="output_file_path"): """logs path and md5 checksum Argument: - label is inserted before the message""" self._record_file(label, file_path) def text_data(self, data, label=None): """logs md5 checksum for input text data. Argument: - label is inserted before the message For this to be useful you must ensure the text order is persistent.""" assert label is not None, "You must provide a data label" md5sum = get_text_hexdigest(data) self.log_message(md5sum, label=label) def log_message(self, msg, label=None): """writes a log message Argument: - label is inserted before the message""" label = label or "misc" data = [label, msg] msg = " : ".join(data) if not self._started: self._messages.append(msg) else: logging.info(msg) def log_args(self, args=None): """save arguments to file using label='params' Argument: - args: if None, uses inspect module to get locals from the calling frame""" if args is None: parent = inspect.currentframe().f_back args = inspect.getargvalues(parent).locals result = { k: args[k] for k in list(args) if type(args[k]) != self.__class__ and type(args[k]).__name__ != "module" } self.log_message(str(result), label="params") def shutdown(self): """safely shutdown the logger""" logging.getLogger().removeHandler(self._logfile) self._reset() def log_versions(self, packages=None): """logs version from the global namespace where method is invoked, plus from any named packages""" if type(packages) == str or inspect.ismodule(packages): packages = [packages] elif packages is None: packages = [] for i, p in enumerate(packages): if inspect.ismodule(p): packages[i] = p.__name__ parent = inspect.currentframe().f_back g = parent.f_globals name = g.get("__package__", g.get("__name__", "")) if name: vn = get_version_for_package(name) else: vn = [g.get(v, None) for v in VERSION_ATTRS if g.get(v, None)] vn = vn[0] if vn else None name = get_package_name(parent) versions = [(name, vn)] for package in packages: vn = get_version_for_package(package) versions.append((package, vn)) for n_v in versions: self.log_message("%s==%s" % n_v, label="version") def set_logger(log_file_path, level=logging.DEBUG, mode="w"): """setup logging""" handler = FileHandler(log_file_path, mode) handler.setLevel(level) hostpid = f"{socket.gethostname()}:{os.getpid()}" fmt = "%(asctime)s\t" + hostpid + "\t%(levelname)s\t%(message)s" formatter = logging.Formatter(fmt, datefmt="%Y-%m-%d %H:%M:%S") handler.setFormatter(formatter) logging.root.addHandler(handler) logging.root.setLevel(level) logging.info(f"system_details : system={platform.version()}") logging.info(f"python : {platform.python_version()}") logging.info(f"user : {getuser()}") logging.info(f'command_string : {" ".join(sys.argv)}') return handler def get_file_hexdigest(filename): """returns the md5 hexadecimal checksum of the file NOTE ---- The md5 sum of get_text_hexdigest can differ from get_file_hexdigest. This will occur if the line ending character differs from being read in 'rb' versus 'r' modes. """ # from # http://stackoverflow.com/questions/1131220/get-md5-hash-of-big-files-in-python with open(filename, "rb") as infile: md5 = hashlib.md5() while True: if data := infile.read(128): md5.update(data) else: break return md5.hexdigest() def get_text_hexdigest(data): """returns md5 hexadecimal checksum of string/unicode data NOTE ---- The md5 sum of get_text_hexdigest can differ from get_file_hexdigest. This will occur if the line ending character differs from being read in 'rb' versus 'r' modes. """ data_class = data.__class__ # fmt: off if data_class in ("".__class__, "".__class__): data = data.encode("utf-8") elif data.__class__ != b"".__class__: raise TypeError("can only checksum string, unicode or bytes data") # fmt: on md5 = hashlib.md5() md5.update(data) return md5.hexdigest() scitrack-2024.10.8/tests/000077500000000000000000000000001470107234400147775ustar00rootroot00000000000000scitrack-2024.10.8/tests/sample-crlf.fasta000066400000000000000000000030711470107234400202250ustar00rootroot00000000000000>Rhesus with extra words tgtggcacaaatactcatgccagctcattacagcatgagaac---agtttgttactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaagcaaacagcctggcttg gcaaggagccaacataacagatggactggaagtaaggaaacatgtaatgataggcagact cccagcacagagaaaaaggtagatctgaatgctaatgccctgtatgagagaaaagaatgg aataagcaaaaactgccatgctctgagaatcctagagacactgaagatgttccttgg >Manatee tgtggcacaaatactcatgccagctcattacagcatgagaatagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattctgtcataaaagcaaacagcctggctta acaaggagccagcagagcagatgggctgaaagtaaggaaacatgtaatgataggcagact cctagcacagagaaaaaggtagatatgaatgctaatccattgtatgagagaaaagaagtg aataagcagaaacctccatgctccgagagtgttagagatacacaagatattccttgg >Pig tgtggcacagatactcatgccagctcgttacagcatgagaacagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattttgtaataaaagcaagcagcctgtctta gcaaagagccaacagagcagatgggctgaaagtaagggcacatgtaatgataggcagact cctaacacagagaaaaaggtagttctgaatactgatctcctgtatgggagaaacgaactg aataagcagaaacctgcgtgctctgacagtcctagagattcccaagatgttccttgg >GoldenMol tgtggcacaaatactcatgccagctcattacagcatgagaacagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaaacaaacagtctggctta gcgaggagccagcagagcagatgggctggaagtaaggcagcgtgcaatgacaagcagact cctagcacacagacagagctatataggagtgctggtcccatgcacaggagaaaagaagta aataagctgaaatctccatggtctgagagtcctggagctacccaagagattccttgg >Rat tgtggcacagatgctcgtgccagctcattacagcgtgggacccgcagtttattgttcact gaggacagactggatgcagaaaaggctgaattctgtgatagaagcaaacagtctggcgca gcagtgagccagcagagcagatgggctgacagtaaagaaacatgtaatggcaggccggtt ccccgcactgagggaaaggcagatccaaatgtggattccctctgtggtagaaagcagtgg aatcatccgaaaagcctgtgccctgagaattctggagctaccactgacgttccttggscitrack-2024.10.8/tests/sample-lf.fasta000066400000000000000000000030341470107234400176770ustar00rootroot00000000000000>Rhesus with extra words tgtggcacaaatactcatgccagctcattacagcatgagaac---agtttgttactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaagcaaacagcctggcttg gcaaggagccaacataacagatggactggaagtaaggaaacatgtaatgataggcagact cccagcacagagaaaaaggtagatctgaatgctaatgccctgtatgagagaaaagaatgg aataagcaaaaactgccatgctctgagaatcctagagacactgaagatgttccttgg >Manatee tgtggcacaaatactcatgccagctcattacagcatgagaatagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattctgtcataaaagcaaacagcctggctta acaaggagccagcagagcagatgggctgaaagtaaggaaacatgtaatgataggcagact cctagcacagagaaaaaggtagatatgaatgctaatccattgtatgagagaaaagaagtg aataagcagaaacctccatgctccgagagtgttagagatacacaagatattccttgg >Pig tgtggcacagatactcatgccagctcgttacagcatgagaacagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattttgtaataaaagcaagcagcctgtctta gcaaagagccaacagagcagatgggctgaaagtaagggcacatgtaatgataggcagact cctaacacagagaaaaaggtagttctgaatactgatctcctgtatgggagaaacgaactg aataagcagaaacctgcgtgctctgacagtcctagagattcccaagatgttccttgg >GoldenMol tgtggcacaaatactcatgccagctcattacagcatgagaacagcagtttattactcact aaagacagaatgaatgtagaaaaggctgaattctgtaataaaaacaaacagtctggctta gcgaggagccagcagagcagatgggctggaagtaaggcagcgtgcaatgacaagcagact cctagcacacagacagagctatataggagtgctggtcccatgcacaggagaaaagaagta aataagctgaaatctccatggtctgagagtcctggagctacccaagagattccttgg >Rat tgtggcacagatgctcgtgccagctcattacagcgtgggacccgcagtttattgttcact gaggacagactggatgcagaaaaggctgaattctgtgatagaagcaaacagtctggcgca gcagtgagccagcagagcagatgggctgacagtaaagaaacatgtaatggcaggccggtt ccccgcactgagggaaaggcagatccaaatgtggattccctctgtggtagaaagcagtgg aatcatccgaaaagcctgtgccctgagaattctggagctaccactgacgttccttggscitrack-2024.10.8/tests/test_logging.py000066400000000000000000000252011470107234400200360ustar00rootroot00000000000000import contextlib import sys from collections import Counter from pathlib import Path import pytest from scitrack import ( CachingLogger, get_file_hexdigest, get_package_name, get_text_hexdigest, get_version_for_package, ) __version__ = "2024.10.8" TEST_ROOTDIR = Path(__file__).parent DIRNAME = "delme" LOGFILE_NAME = "delme.log" @pytest.fixture def logfile(tmp_path): return tmp_path / LOGFILE_NAME def test_creates_path(logfile): """creates a log path""" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile LOGGER.input_file(TEST_ROOTDIR / "sample-lf.fasta") LOGGER.shutdown() assert logfile.exists() def test_set_path_if_exists(logfile): """cannot change an existing logging path""" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile LOGGER.input_file(TEST_ROOTDIR / "sample-lf.fasta") with pytest.raises(AttributeError): LOGGER.log_file_path = logfile.parent / "invalid.log" LOGGER.shutdown() def test_tracks_args(logfile): """details on host, python version should be present in log""" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile LOGGER.input_file(TEST_ROOTDIR / "sample-lf.fasta") LOGGER.shutdown() contents = logfile.read_text() for label in ["system_details", "python", "user", "command_string"]: assert contents.count(f"\t{label}") == 1, ( label, contents.count(label), ) def test_tracks_locals(logfile): """details on local arguments should be present in log""" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile def track_func(a=1, b="abc"): LOGGER.log_args() track_func() LOGGER.shutdown() log_data = logfile.read_text().splitlines() for line in log_data: index = line.find("params :") if index > 0: got = eval(line.split("params :")[1]) break assert got == dict(a=1, b="abc") def test_tracks_locals_skip_module(logfile): """local arguments should exclude modules""" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile def track_func(a=1, b="abc"): import gzip # noqa LOGGER.log_args() track_func() LOGGER.shutdown() for line in logfile.read_text().splitlines(): index = line.find("params :") if index > 0: got = eval(line.split("params :")[1]) break assert got == dict(a=1, b="abc") def test_package_inference(): """correctly identify the package name""" name = get_package_name(CachingLogger) assert name == "scitrack" def test_package_versioning(): """correctly identify versions for specified packages""" vn = get_version_for_package("numpy") assert type(vn) is str with contextlib.suppress(ValueError): get_version_for_package("gobbledygook") with contextlib.suppress(ValueError): get_version_for_package(1) def test_tracks_versions(logfile): """should track versions""" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile LOGGER.input_file(TEST_ROOTDIR / "sample-lf.fasta") LOGGER.log_versions(["numpy"]) LOGGER.shutdown() contents = logfile.read_text() for label in ["system_details", "python", "user", "command_string"]: assert contents.count(f"\t{label}") == 1, ( label, contents.count(label), ) for line in contents.splitlines(): if "version :" in line: if "numpy" not in line: assert f"=={__version__}" in line, line else: assert "numpy" in line, line def test_caching(logfile): """should cache calls prior to logging""" LOGGER = CachingLogger(create_dir=True) LOGGER.input_file(TEST_ROOTDIR / "sample-lf.fasta") assert ( "sample-lf.fasta" in LOGGER._messages[-2] and "md5sum" in LOGGER._messages[-1] ) LOGGER.log_versions(["numpy"]) assert "numpy==" in LOGGER._messages[-1] LOGGER.log_file_path = logfile LOGGER.shutdown() def test_shutdown(logfile): """correctly purges contents""" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile LOGGER.input_file(TEST_ROOTDIR / "sample-lf.fasta") LOGGER.shutdown() def test_tracks_versions_empty(logfile): """should track version of scitrack""" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile LOGGER.input_file(TEST_ROOTDIR / "sample-lf.fasta") LOGGER.log_versions() LOGGER.shutdown() contents = logfile.read_text() for label in ["system_details", "python", "user", "command_string"]: assert contents.count(f"\t{label}") == 1, ( label, contents.count(label), ) for line in contents.splitlines(): if "version :" in line: assert f"=={__version__}" in line, line def test_tracks_versions_string(logfile): """should track version if package name is a string""" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile LOGGER.log_versions("numpy") LOGGER.shutdown() import numpy expect = f"numpy=={numpy.__version__}" del numpy for line in logfile.read_text().splitlines(): if "version :" in line and "numpy" in line: assert expect in line, line def test_get_version_for_package(): """should track version if package is a module""" import numpy got = get_version_for_package(numpy) assert got == numpy.__version__ # one with a callable pyfile = TEST_ROOTDIR / "delme.py" pyfile.write_text("\n".join(["def version():", " return 'my-version'"])) sys.path.append(TEST_ROOTDIR) import delme got = get_version_for_package("delme") assert got == "my-version" pyfile.unlink() # func returns a list pyfile.write_text("version = ['my-version']\n") from importlib import reload got = get_version_for_package(reload(delme)) assert got == "my-version" pyfile.unlink() def test_tracks_versions_module(logfile): """should track version if package is a module""" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile import numpy expect = f"numpy=={numpy.__version__}" LOGGER.log_versions(numpy) LOGGER.shutdown() del numpy for line in logfile.read_text().splitlines(): if "version :" in line and "numpy" in line: assert expect in line, line def test_appending(logfile): """appending to an existing logfile should work""" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile LOGGER.input_file(TEST_ROOTDIR / "sample-lf.fasta") LOGGER.shutdown() records = Counter() for line in logfile.read_text().splitlines(): records[line] += 1 vals = set(list(records.values())) assert vals == {1} LOGGER = CachingLogger(create_dir=True) LOGGER.mode = "a" LOGGER.log_file_path = logfile LOGGER.input_file(TEST_ROOTDIR / "sample-lf.fasta") LOGGER.shutdown() records = Counter() for line in logfile.read_text().splitlines(): records[line] += 1 vals = set(list(records.values())) assert vals == {2} def test_mdsum_input(logfile): """md5 sum of input file should be correct""" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile # first file has LF, second has CRLF line endings hex_path = [ ("96eb2c2632bae19eb65ea9224aaafdad", "sample-lf.fasta"), ("e7e219f66be15d8afc7cdb85303305a7", "sample-crlf.fasta"), ] LOGGER.input_file(TEST_ROOTDIR / "sample-lf.fasta") LOGGER.input_file(TEST_ROOTDIR / "sample-crlf.fasta") LOGGER.shutdown() with open(logfile) as infile: num = 0 for line in infile: for h, p in hex_path: if p in line: assert "input_file" in line line = next(infile) assert h in line num += 1 assert num == len(hex_path) def test_mdsum_output(logfile): """md5 sum of output file should be correct""" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile # first file has LF, second has CRLF line endings hex_path = [ ("96eb2c2632bae19eb65ea9224aaafdad", "sample-lf.fasta"), ] LOGGER.output_file(TEST_ROOTDIR / "sample-lf.fasta") LOGGER.shutdown() with open(logfile) as infile: num = 0 for line in infile: for h, p in hex_path: if p in line: line = next(infile) assert h in line num += 1 assert num == len(hex_path) def test_logging_text(logfile): """correctly logs text data""" text = "abcde\nedfgu\nyhbnd" hexd = "f06597f8a983dfc93744192b505a8af9" LOGGER = CachingLogger(create_dir=True) LOGGER.log_file_path = logfile LOGGER.text_data(text, label="UNIQUE") LOGGER.shutdown() contents = logfile.read_text().splitlines() unique = next((line for line in contents if "UNIQUE" in line), None) assert hexd in unique def test_logfile_path(logfile): """correctly assigned""" LOGGER = CachingLogger(create_dir=True, log_file_path=logfile) assert LOGGER.log_file_path == str(logfile) LOGGER.shutdown() def test_md5sum_text(): """md5 sum for text data should be computed""" data = "åbcde" s = get_text_hexdigest(data) assert s data = "abcde" s = get_text_hexdigest(data) assert s # loading contents from files with diff line-endings and check hex_path = [ ("96eb2c2632bae19eb65ea9224aaafdad", "sample-lf.fasta"), ("e7e219f66be15d8afc7cdb85303305a7", "sample-crlf.fasta"), ] for h, p in hex_path: p = TEST_ROOTDIR / p data = p.read_bytes() got = get_text_hexdigest(data) assert got == h, (p, repr(data)) def test_get_text_hexdigest_invalid(): """raises TypeError when invalid data provided""" with pytest.raises(TypeError): get_text_hexdigest(None) with pytest.raises(TypeError): get_text_hexdigest([]) def test_read_from_written(tmp_path): """create files with different line endings dynamically""" text = "abcdeENDedfguENDyhbnd" for ex, lf in ( ("f06597f8a983dfc93744192b505a8af9", "\n"), ("39db5cc2f7749f02e0c712a3ece12ffc", "\r\n"), ): p = tmp_path / "test.txt" data = text.replace("END", lf) p.write_bytes(data.encode("utf-8")) expect = get_text_hexdigest(data) assert expect == ex, (expect, ex) got = get_file_hexdigest(p) assert got == expect, f"FAILED: {lf!r}, {(ex, got)}"