././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1705353420.6568758 scantree-0.0.2/0000755000175100001770000000000014551320315012721 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/LICENSE0000644000175100001770000000205414551320304013725 0ustar00runnerdockerMIT License Copyright (c) 2019 Anders Huss Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/MANIFEST.in0000644000175100001770000000003214551320304014450 0ustar00runnerdockerinclude README.md LICENSE ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1705353420.6568758 scantree-0.0.2/PKG-INFO0000644000175100001770000001011214551320315014011 0ustar00runnerdockerMetadata-Version: 2.1 Name: scantree Version: 0.0.2 Summary: Flexible recursive directory iterator: scandir meets glob("**", recursive=True) Home-page: https://github.com/andhus/scantree Author: Anders Huss Author-email: andhus@kth.se License: MIT Description-Content-Type: text/markdown License-File: LICENSE Requires-Dist: attrs>=18.0.0 Requires-Dist: pathspec>=0.5.9 [![codecov](https://codecov.io/gh/andhus/scantree/branch/master/graph/badge.svg)](https://codecov.io/gh/andhus/scantree) # `scantree` Recursive directory iterator supporting: - flexible filtering including wildcard path matching - in memory representation of file-tree (for repeated access) - efficient access to directory entry properties (`posix.DirEntry` interface) extended with real path and path relative to the recursion root directory - detection and handling of cyclic symlinks ## Installation ```commandline pip install scantree ``` ## Usage See source code for full documentation, some generic examples below. Get matching file paths: ```python from scantree import scantree, RecursionFilter tree = scantree('/path/to/dir', RecursionFilter(match=['*.txt'])) print([path.relative for path in tree.filepaths()]) print([path.real for path in tree.filepaths()]) ``` ``` ['d1/d2/file3.txt', 'd1/file2.txt', 'file1.txt'] ['/path/to/other_dir/file3.txt', '/path/to/dir/d1/file2.txt', '/path/to/dir/file1.txt'] ``` Access metadata of directory entries in file tree: ```python d2 = tree.directories[0].directories[0] print(type(d2)) print(d2.path.absolute) print(d2.path.real) print(d2.path.is_symlink()) print(d2.files[0].relative) ``` ``` scantree._node.DirNode /path/to/dir/d1/d2 /path/to/other_dir True d1/d2/file3.txt ``` Aggregate information by operating on tree: ```python hello_count = tree.apply( file_apply=lambda path: sum([ w.lower() == 'hello' for w in path.as_pathlib().read_text().split() ]), dir_apply=lambda dir_: sum(dir_.entries), ) print(hello_count) ``` ``` 3 ``` ```python hello_count_tree = tree.apply( file_apply=lambda path: { 'name': path.name, 'count': sum([ w.lower() == 'hello' for w in path.as_pathlib().read_text().split() ]) }, dir_apply=lambda dir_: { 'name': dir_.path.name, 'count': sum(e['count'] for e in dir_.entries), 'sub_counts': [e for e in dir_.entries] }, ) from pprint import pprint pprint(hello_count_tree) ``` ``` {'count': 3, 'name': 'dir', 'sub_counts': [{'count': 2, 'name': 'file1.txt'}, {'count': 1, 'name': 'd1', 'sub_counts': [{'count': 1, 'name': 'file2.txt'}, {'count': 0, 'name': 'd2', 'sub_counts': [{'count': 0, 'name': 'file3.txt'}]}]}]} ``` Flexible filtering: ```python without_hidden_files = scantree('.', RecursionFilter(match=['*', '!.*'])) without_palindrome_linked_dirs = scantree( '.', lambda paths: [ p for p in paths if not ( p.is_dir() and p.is_symlink() and p.name == p.name[::-1] ) ] ) ``` Comparison: ```python tree = scandir('path/to/dir') # make some operations on filesystem, make sure file tree is the same: assert tree == scandir('path/to/dir') # tree contains absolute/real path info: import shutil shutil.copytree('path/to/dir', 'path/to/other_dir') new_tree = scandir('path/to/other_dir') assert tree != new_tree assert ( [p.relative for p in tree.leafpaths()] == [p.relative for p in new_tree.leafpaths()] ) ``` Inspect symlinks: ```python from scantree import CyclicLinkedDir file_links = [] dir_links = [] cyclic_links = [] def file_apply(path): if path.is_symlink(): file_links.append(path) def dir_apply(dir_node): if dir_node.path.is_symlink(): dir_links.append(dir_node.path) if isinstance(dir_node, CyclicLinkedDir): cyclic_links.append((dir_node.path, dir_node.target_path)) scantree('.', file_apply=file_apply, dir_apply=dir_apply) ``` ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/README.md0000644000175100001770000000732614551320304014206 0ustar00runnerdocker[![codecov](https://codecov.io/gh/andhus/scantree/branch/master/graph/badge.svg)](https://codecov.io/gh/andhus/scantree) # `scantree` Recursive directory iterator supporting: - flexible filtering including wildcard path matching - in memory representation of file-tree (for repeated access) - efficient access to directory entry properties (`posix.DirEntry` interface) extended with real path and path relative to the recursion root directory - detection and handling of cyclic symlinks ## Installation ```commandline pip install scantree ``` ## Usage See source code for full documentation, some generic examples below. Get matching file paths: ```python from scantree import scantree, RecursionFilter tree = scantree('/path/to/dir', RecursionFilter(match=['*.txt'])) print([path.relative for path in tree.filepaths()]) print([path.real for path in tree.filepaths()]) ``` ``` ['d1/d2/file3.txt', 'd1/file2.txt', 'file1.txt'] ['/path/to/other_dir/file3.txt', '/path/to/dir/d1/file2.txt', '/path/to/dir/file1.txt'] ``` Access metadata of directory entries in file tree: ```python d2 = tree.directories[0].directories[0] print(type(d2)) print(d2.path.absolute) print(d2.path.real) print(d2.path.is_symlink()) print(d2.files[0].relative) ``` ``` scantree._node.DirNode /path/to/dir/d1/d2 /path/to/other_dir True d1/d2/file3.txt ``` Aggregate information by operating on tree: ```python hello_count = tree.apply( file_apply=lambda path: sum([ w.lower() == 'hello' for w in path.as_pathlib().read_text().split() ]), dir_apply=lambda dir_: sum(dir_.entries), ) print(hello_count) ``` ``` 3 ``` ```python hello_count_tree = tree.apply( file_apply=lambda path: { 'name': path.name, 'count': sum([ w.lower() == 'hello' for w in path.as_pathlib().read_text().split() ]) }, dir_apply=lambda dir_: { 'name': dir_.path.name, 'count': sum(e['count'] for e in dir_.entries), 'sub_counts': [e for e in dir_.entries] }, ) from pprint import pprint pprint(hello_count_tree) ``` ``` {'count': 3, 'name': 'dir', 'sub_counts': [{'count': 2, 'name': 'file1.txt'}, {'count': 1, 'name': 'd1', 'sub_counts': [{'count': 1, 'name': 'file2.txt'}, {'count': 0, 'name': 'd2', 'sub_counts': [{'count': 0, 'name': 'file3.txt'}]}]}]} ``` Flexible filtering: ```python without_hidden_files = scantree('.', RecursionFilter(match=['*', '!.*'])) without_palindrome_linked_dirs = scantree( '.', lambda paths: [ p for p in paths if not ( p.is_dir() and p.is_symlink() and p.name == p.name[::-1] ) ] ) ``` Comparison: ```python tree = scandir('path/to/dir') # make some operations on filesystem, make sure file tree is the same: assert tree == scandir('path/to/dir') # tree contains absolute/real path info: import shutil shutil.copytree('path/to/dir', 'path/to/other_dir') new_tree = scandir('path/to/other_dir') assert tree != new_tree assert ( [p.relative for p in tree.leafpaths()] == [p.relative for p in new_tree.leafpaths()] ) ``` Inspect symlinks: ```python from scantree import CyclicLinkedDir file_links = [] dir_links = [] cyclic_links = [] def file_apply(path): if path.is_symlink(): file_links.append(path) def dir_apply(dir_node): if dir_node.path.is_symlink(): dir_links.append(dir_node.path) if isinstance(dir_node, CyclicLinkedDir): cyclic_links.append((dir_node.path, dir_node.target_path)) scantree('.', file_apply=file_apply, dir_apply=dir_apply) ``` ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/pyproject.toml0000644000175100001770000000014514551320304015633 0ustar00runnerdocker[build-system] requires = ["setuptools", "versioneer==0.29"] build-backend = "setuptools.build_meta" ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1705353420.6568758 scantree-0.0.2/setup.cfg0000644000175100001770000000032014551320315014535 0ustar00runnerdocker[versioneer] VCS = git style = pep440 versionfile_source = src/scantree/_version.py versionfile_build = scantree/_version.py tag_prefix = v parentdir_prefix = scantree- [egg_info] tag_build = tag_date = 0 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/setup.py0000644000175100001770000000172514551320304014436 0ustar00runnerdockerimport io import os from setuptools import setup, find_packages import versioneer DESCRIPTION = ( 'Flexible recursive directory iterator: scandir meets glob("**", recursive=True)' ) PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__)) try: with io.open(os.path.join(PROJECT_ROOT, 'README.md'), encoding='utf-8') as f: long_description = '\n' + f.read() except IOError: long_description = DESCRIPTION setup( name='scantree', version=versioneer.get_version(), description=DESCRIPTION, long_description=long_description, long_description_content_type="text/markdown", url='https://github.com/andhus/scantree', author="Anders Huss", author_email="andhus@kth.se", license='MIT', install_requires=[ 'attrs>=18.0.0', 'pathspec>=0.5.9' ], packages=find_packages('src'), package_dir={'': 'src'}, include_package_data=True, entry_points={}, tests_require=['pytest', 'pytest-cov'] ) ././@PaxHeader0000000000000000000000000000003300000000000010211 xustar0027 mtime=1705353420.652876 scantree-0.0.2/src/0000755000175100001770000000000014551320315013510 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1705353420.6568758 scantree-0.0.2/src/scantree/0000755000175100001770000000000014551320315015314 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/src/scantree/__init__.py0000644000175100001770000000054714551320304017431 0ustar00runnerdockerfrom __future__ import print_function, division from ._path import ( RecursionPath, DirEntryReplacement ) from ._node import ( DirNode, LinkedDir, CyclicLinkedDir ) from ._filter import RecursionFilter from ._scan import ( scantree, SymlinkRecursionError ) from . import _version __version__ = _version.get_versions()['version'] ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/src/scantree/_filter.py0000644000175100001770000000545114551320304017315 0ustar00runnerdockerfrom __future__ import print_function, division from pathspec import PathSpec from pathspec.util import normalize_file, match_file from pathspec.patterns import GitWildMatchPattern class RecursionFilter(object): """Callable object for filtering of sequence of `RecursionPath`:s. Intended for use as `recursion_filter` argument in `scantree`. # Arguments: linked_dirs (bool): Whether to include linked directories. Default True. linked_files (bool): Whether to include linked files. Default True. match ([str] | None): List of gitignore-style wildcard match patterns. The `RecursionPath.relative` path must match at least one of the patterns not starting with `'!'` and none of the patterns starting with `'!'`. Matching is done based on the `pathspec` library implementation (https://github.com/cpburnz/python-path-specification). Default `None` which is equivalent to ['*'] matching all file paths. """ def __init__( self, linked_dirs=True, linked_files=True, match=None, ): self.linked_dirs = linked_dirs self.linked_files = linked_files self._match_patterns = tuple('*') if match is None else tuple(match) if self._match_patterns != tuple('*'): self._path_spec = PathSpec.from_lines( GitWildMatchPattern, self.match_patterns ) else: self._path_spec = None @property def match_patterns(self): return self._match_patterns def include(self, recursion_path): if recursion_path.is_symlink(): if recursion_path.is_dir() and not self.linked_dirs: return False if recursion_path.is_file() and not self.linked_files: return False if recursion_path.is_dir(): # only filepaths are matched against patterns return True return self.match_file(recursion_path.relative) def match_file(self, filepath): """Match file against match patterns. NOTE: only match patterns are considered, not the `linked_files` argument of this class. # Arguments: filepath (str): the path to match. # Returns: Boolean, whether the path is a match or not. """ if self._path_spec is None: return True return match_file(self._path_spec.patterns, normalize_file(filepath)) def __call__(self, paths): """Filter recursion paths. # Arguments: paths ([RecursionPath]): The recursion paths to filter. # Returns: A generator of (filtered) recursion paths. """ for path in paths: if self.include(path): yield path ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/src/scantree/_node.py0000644000175100001770000001606614551320304016761 0ustar00runnerdockerfrom __future__ import print_function, division import attr from ._path import RecursionPath @attr.s(slots=True, frozen=True) class DirNode(object): """A directory node in a Directed Acyclic Graph (DAG) representing a file system tree. NOTE: this class is normally only ever instantiated by the `scantree` function. # Arguments: path (RecursionPath): The recursion path to the directory. directories ([object]): The result of `scantree` `dir_apply` argument applied to the subdirectories of this directory. files ([object]): The result of `scantree` `file_apply` argument applied to the files of this directory. """ path = attr.ib(validator=attr.validators.instance_of(RecursionPath)) files = attr.ib(default=tuple(), converter=tuple) directories = attr.ib(default=tuple(), converter=tuple) @property def empty(self): """Boolean: does this directory node have any files or subdirectories.""" return not (self.files or self.directories) @property def entries(self): """Tuple of files followed by directories.""" return self.files + self.directories def apply(self, dir_apply, file_apply): """Operate on the file tree under this directory node recursively. # Arguments: file_apply (f: f(object) -> object): The function to apply to the to each file. Default "identity", i.e. `lambda x: x`. dir_apply (f: f(DirNode) -> object): The function to apply to the `DirNode` for each (sub) directory. Default "identity", i.e. `lambda x: x`. # Returns: The `object` returned by `dir_apply` on this `DirNode` after recursive application of `file_apply` and `dir_apply` on its subdirectories and files. """ dir_node = DirNode( self.path, [dir_.apply(dir_apply, file_apply) for dir_ in self.directories], [file_apply(file_) for file_ in self.files] ) return dir_apply(dir_node) def leafpaths(self): """Get the leafs of the file tree under this directory node. # Returns: A list of `RecursionPaths` sorted on relative path. If the tree contains empty directories, `LinkedDir` or `CyclicLinkedDir` nodes these will be included. If none of these are present (which is the case for the result of `scantree('.', include_empty=False, follow_links=True, allow_cyclic_links=False)`) this method will only return paths to the files, i.e. the same as the `filepaths` method. NOTE: `LinkedDir` and `CyclicLinkedDir` nodes are considered leafs since they are leafs in the actual DAG data structure, even though they are not necessarily leafs in terms of the underlying file-system structure that they represent. """ leafs = [] def file_apply(path): leafs.append(path) def dir_apply(dir_node): if isinstance(dir_node, (LinkedDir, CyclicLinkedDir)) or dir_node.empty: leafs.append(dir_node.path) self.apply(dir_apply=dir_apply, file_apply=file_apply) return sorted(leafs, key=lambda path: path.relative) def filepaths(self): """Get the filepaths of the file tree under this directory. # Returns: A list of `RecursionPaths` sorted on relative path. """ files = [] def file_apply(path): files.append(path) self.apply(dir_apply=identity, file_apply=file_apply) return sorted(files, key=lambda path: path.relative) @attr.s(slots=True, frozen=True) class LinkedDir(object): """This node represents a symbolic link to a directory. It is created by `scantree` to represent a linked directory when the argument `follow_links` is set tot `False`. NOTE: this class is normally only ever instantiated by the `scantree` function. # Arguments: path (RecursionPath): The recursion path to the *link* to a directory. """ path = attr.ib(validator=attr.validators.instance_of(RecursionPath)) @property def directories(self): raise AttributeError( '`directories` is undefined for `LinkedDir` nodes. Use e.g. ' '`[de for de in scandir(linked_dir.path.real) if de.is_dir()]` ' 'to get a list of the sub directories of the linked directory' ) @property def files(self): raise AttributeError( '`files` is undefined for `LinkedDir` nodes. Use e.g. ' '`[de for de in scandir(linked_dir.path.real) if de.is_file()]` ' ' to get a list of the files of the linked directory' ) @property def entries(self): raise AttributeError( '`entries` is undefined for `LinkedDir` nodes. Use e.g. ' '`scandir(linked_dir.path.real)` to get the entries of the linked ' 'directory' ) @property def empty(self): raise AttributeError('`empty` is undefined for `LinkedDir` nodes.') def apply(self, dir_apply, file_apply=None): return dir_apply(self) @attr.s(slots=True, frozen=True) class CyclicLinkedDir(object): """This node represents a symbolic link causing a cycle of symlinks. It is created by `scantree` to represent a cyclic links when the argument `allow_cyclic_links` is set tot `True`. NOTE: this class is normally only ever instantiated by the `scantree` function. # Arguments: path (RecursionPath): The recursion path to the *symlink* to a directory (which is a parent of this directory). target_path (RecursionPath): The recursion path to the target directory of the link (which is a parent of this directory). """ path = attr.ib(validator=attr.validators.instance_of(RecursionPath)) target_path = attr.ib(validator=attr.validators.instance_of(RecursionPath)) @property def directories(self): raise AttributeError( '`directories` is undefined for `CyclicLinkedDir` to avoid infinite ' 'recursion. `target_path` property contains the `RecursionPath` for the ' 'target directory.' ) @property def files(self): raise AttributeError( '`files` is undefined for `CyclicLinkedDir` to avoid infinite ' 'recursion. `target_path` property contains the `RecursionPath` for the ' 'target directory.' ) @property def entries(self): raise AttributeError( '`entries` is undefined for `CyclicLinkedDir` to avoid infinite ' 'recursion. `target_path` property contains the `RecursionPath` for the ' 'target directory.' ) @property def empty(self): """A cyclic linked dir is never empty.""" return False def apply(self, dir_apply, file_apply=None): return dir_apply(self) def is_empty_dir_node(dir_node): return isinstance(dir_node, DirNode) and dir_node.empty def identity(x): return x ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/src/scantree/_path.py0000644000175100001770000001570714551320304016771 0ustar00runnerdockerfrom __future__ import print_function, division import os from os import scandir from pathlib import Path from posix import DirEntry import attr from .compat import fspath @attr.s(slots=True) # TODO consider make frozen. class RecursionPath(object): """Caches the properties of directory entries including the path relative to the root directory for recursion. NOTE: this class is normally only ever instantiated by the `scantree` function. The class provides the `DirEntry` interface (found in the external `scandir` module in Python < 3.5 or builtin `posix` module in Python >= 3.5). """ root = attr.ib() relative = attr.ib() real = attr.ib() _dir_entry = attr.ib(eq=False, order=False) @classmethod def from_root(cls, directory): """Instantiate a `RecursionPath` from given directory.""" if isinstance(directory, (DirEntry, DirEntryReplacement)): dir_entry = directory else: dir_entry = DirEntryReplacement.from_path(directory) return cls( root=dir_entry.path, relative='', real=os.path.realpath(dir_entry.path), dir_entry=dir_entry ) def scandir(self): """Scan the underlying directory. # Returns: A generator of `RecursionPath`:s representing the directory entries. """ return (self._join(dir_entry) for dir_entry in scandir(self.absolute)) def _join(self, dir_entry): relative = os.path.join(self.relative, dir_entry.name) real = os.path.join(self.real, dir_entry.name) if dir_entry.is_symlink(): # For large number of files/directories it improves performance # significantly to only call `os.realpath` when we are actually # encountering a symlink. real = os.path.realpath(real) return attr.evolve(self, relative=relative, real=real, dir_entry=dir_entry) @property def absolute(self): """The absolute path to this entry""" if self.relative == '': return self.root # don't join in this case as that appends trailing '/' return os.path.join(self.root, self.relative) @property def path(self): """The path property according `DirEntry` interface. NOTE: this property is only here to fully implement the `DirEntry` interface (which is useful in comparison etc.). It is recommended to use one on of (the well defined) `real`, `relative` or `absolute` properties instead. """ return self._dir_entry.path @property def name(self): return self._dir_entry.name def is_dir(self, follow_symlinks=True): return self._dir_entry.is_dir(follow_symlinks=follow_symlinks) def is_file(self, follow_symlinks=True): return self._dir_entry.is_file(follow_symlinks=follow_symlinks) def is_symlink(self): return self._dir_entry.is_symlink() def stat(self, follow_symlinks=True): return self._dir_entry.stat(follow_symlinks=follow_symlinks) def inode(self): return self._dir_entry.inode() def __fspath__(self): return self.absolute def as_pathlib(self): """Get a pathlib version of this path.""" return Path(self.absolute) @staticmethod def _getstate(self): return ( self.root, self.relative, self.real, DirEntryReplacement.from_dir_entry(self._dir_entry) ) @staticmethod def _setstate(self, state): self.root, self.relative, self.real, self._dir_entry = state # Attrs overrides __get/setstate__ for slotted classes, see: # https://github.com/python-attrs/attrs/issues/512 RecursionPath.__getstate__ = RecursionPath._getstate RecursionPath.__setstate__ = RecursionPath._setstate @attr.s(slots=True, eq=False, order=False) class DirEntryReplacement(object): """Pure python implementation of the `DirEntry` interface (found in the external `scandir` module in Python < 3.5 or builtin `posix` module in Python >= 3.5) A `DirEntry` cannot be instantiated directly (only returned from a call to `scandir`). This class offers a drop in replacement. Useful in testing and for representing the root directory for `scantree` implementation. """ path = attr.ib(converter=fspath) name = attr.ib() _is_dir = attr.ib(init=False, default=None) _is_file = attr.ib(init=False, default=None) _is_symlink = attr.ib(init=False, default=None) _stat_sym = attr.ib(init=False, default=None) _stat_nosym = attr.ib(init=False, default=None) @classmethod def from_path(cls, path): path = fspath(path) if not os.path.exists(path): raise IOError('{} does not exist'.format(path)) basename = os.path.basename(path) if basename in ['', '.', '..']: name = os.path.basename(os.path.realpath(path)) else: name = basename return cls(path, name) @classmethod def from_dir_entry(cls, dir_entry): return cls(dir_entry.path, dir_entry.name) def is_dir(self, follow_symlinks=True): if self._is_dir is None: self._is_dir = os.path.isdir(self.path) if follow_symlinks: return self._is_dir else: return self._is_dir and not self.is_symlink() def is_file(self, follow_symlinks=True): if self._is_file is None: self._is_file = os.path.isfile(self.path) if follow_symlinks: return self._is_file else: return self._is_file and not self.is_symlink() def is_symlink(self): if self._is_symlink is None: self._is_symlink = os.path.islink(self.path) return self._is_symlink def stat(self, follow_symlinks=True): if follow_symlinks: if self._stat_sym is None: self._stat_sym = os.stat(self.path) return self._stat_sym if self._stat_nosym is None: self._stat_nosym = os.lstat(self.path) return self._stat_nosym def inode(self): return self.stat(follow_symlinks=False).st_ino def __eq__(self, other): if not isinstance(other, (DirEntryReplacement, DirEntry)): return False if not self.path == other.path: return False if not self.name == other.name: return False for method, kwargs in [ ('is_dir', {'follow_symlinks': True}), ('is_dir', {'follow_symlinks': False}), ('is_file', {'follow_symlinks': True}), ('is_file', {'follow_symlinks': False}), ('is_symlink', {}), ('stat', {'follow_symlinks': True}), ('stat', {'follow_symlinks': False}), ('inode', {}) ]: this_res = getattr(self, method)(**kwargs) other_res = getattr(other, method)(**kwargs) if not this_res == other_res: return False return True ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/src/scantree/_scan.py0000644000175100001770000003330114551320304016747 0ustar00runnerdockerfrom __future__ import print_function, division import os from multiprocessing.pool import Pool from pathspec import RecursionError as _RecursionError from .compat import fspath from ._node import ( DirNode, LinkedDir, CyclicLinkedDir, identity, is_empty_dir_node ) from ._path import RecursionPath def scantree( directory, recursion_filter=identity, file_apply=identity, dir_apply=identity, follow_links=True, allow_cyclic_links=True, cache_file_apply=False, include_empty=False, jobs=1 ): """Recursively scan the file tree under the given directory. The files and subdirectories in each directory will be used to initialize a the object: `DirNode(path=..., files=[...], directories=[...])`, where `path` is the `RecursionPath` to the directory (relative to the root directory of the recursion), `files` is a list of the results of `file_apply` called on the recursion path of each file, and `directories` is a list of the results of `dir_apply` called on each `DirNode` obtained (recursively) for each subdirectory. Hence, with the default value (identity function) for `file_apply` and `dir_apply`, a tree-like data structure is returned representing the file tree of the scanned directory, with all relevant metadata *cached in memory*. This example illustrates the core concepts: ``` >>> tree = scantree('/path/to/dir') >>> tree.directories[0].directories[0].path.absolute '/path/to/dir/sub_dir_0/sub_sub_dir_0' >>> tree.directories[0].directories[0].path.relative 'sub_dir_0/sub_sub_dir_0' >>> tree.directories[0].files[0].relative 'sub_dir_0/file_0' >>> tree.directories[0].path.real '/path/to/linked_dir/' >>> tree.directories[0].path.is_symlink() # already cached, no OS call needed True ``` By providing a different `dir_apply` and `file_apply` function, you can operate on the paths and/or data of files while scanning the directory recursively. If `dir_apply` returns some aggregate or nothing (i.e. `None`) the full tree will never be stored in memory. The same result can be obtained by calling `tree.apply(dir_apply=..., file_apply=...)` but this can be done repeatedly without having to rerun expensive OS calls. # Arguments: directory (str | os.PathLike): The directory to scan. recursion_filter (f: f([RecursionPath]) -> [RecursionPath]): A filter function, defining which files to include and which subdirectories to scan, e.g. an instance of `scantree.RecursionFilter`. The `RecursionPath` implements the `DirEntry` interface (found in the external `scandir` module in Python < 3.5 or builtin `posix` module in Python >= 3.5). It caches metadata efficiently and, in addition to DirEntry, provides real path and path relative to the root directory for the recursion as properties, see `scantree.RecursionPath` for further details. file_apply (f: f(RecursionPath) -> object): The function to apply to the `RecursionPath` for each file. Default "identity", i.e. `lambda x: x`. dir_apply (f: f(DirNode) -> object): The function to apply to the `DirNode` for each (sub) directory. Default "identity", i.e. `lambda x: x`. follow_links (bool): Whether to follow symbolic links for not, i.e. to continue the recursive scanning in linked directories. If False, linked directories are represented by the `LinkedDir` object which does e.g. not have the `files` and `directories` properties (as these cannot be known without following the link). Default `True`. allow_cyclic_links (bool): If set to `False`, a `SymlinkRecursionError` is raised on detection of cyclic symbolic links, if `True` (default), the cyclic link is represented by a `CyclicLinkedDir` object. See "Cyclic Links Handling" section below for further details. cache_file_apply: If set to `True`, the `file_apply` result will be cached by *real* path. Default `False`. include_empty (bool): If set to `True`, empty directories are included in the result of the recursive scanning, represented by an empty directory node: `DirNode(directories=[], files=[])`. If `False` (default), empty directories are not included in the parent directory node (and subsequently never passed to `dir_apply`). jobs (int | None): If `1` (default), no multiprocessing is used. If jobs > 1, the number of processes to use for parallelizing `file_apply` over included files. If `None`, `os.cpu_count()` number of processes are used. NOTE: if jobs is `None` or > 1, the entire file tree will first be stored in memory before applying `file_apply` and `dir_apply`. # Returns: The `object` returned by `dir_apply` on the `DirNode` for the top level `directory`. If the default value ("identity" function: `lambda x: x`) is used for `dir_apply`, it will be the `DirNode` representing the root node of the file tree. # Raises: SymlinkRecursionError: if `allow_cyclic_links=False` and any cyclic symbolic links are detected. # Cyclic Links Handling: Symbolically linked directories can create cycles in the, otherwise acyclic, graph representing the file tree. If not handled properly, this leads to infinite recursion when traversing the file tree (this is e.g. the case for Python's built-in `os.walk(directory, followlinks=True)`). Sometimes multiple links form cycles together, therefore - without loss of generality - cyclic links are defined as: The first occurrence of a link to a directory that has already been visited on the current branch of recursion. With `allow_cyclic_links=True` any link to such a directory is represented by the object `CyclicLinkedDir(path=..., target_path=...)` where `path` is the `RecursionPath` to the link and `target_path` the `RecursionPath` to the parent directory that is the target of the link. In the example below there are cycles on all branches A/B, A/C and D. root/ |__A/ | |__B/ | | |__toA@ -> .. | |__C/ | |__toA@ -> .. |__D/ |__toB@ -> ../A/B In this case, the symlinks with relative paths A/B/toA, A/C/toA and D/toB/toA/B/toA will be represented by a `CyclicLinkedDir` object. Note that for the third branch, the presence of cyclic links can be *detected* already at D/toB/toA/B (since B is already visited) but it is D/toB/toA/B/toA which is considered a cyclic link (and gets represented by a `CyclicLinkedDir`). This reflects the fact that it is the toA that's "causing" the cycle, not D/toB or D/toB/toA/B (which is not even a link), and at D/toB/toA/ the cycle can not yet be detected. Below is another example where multiple links are involved in forming cycles as well as links which absolute path is external to the root directory for the recursion. In this case the symlinks with relative paths A/toB/toA, B/toA/toB and C/toD/toC are considered cyclic links for `scandir('/path/to/root')`. /path/to/root/ |__A/ | |__toB@ -> ../B |__B/ | |__toA@ -> /path/to/root/A |__C/ |__toD@ -> /path/to/D /path/to/D/ |__toC@ -> /path/to/root/C """ _verify_is_directory(directory) if jobs is None or jobs > 1: return _scantree_multiprocess(**vars()) path = RecursionPath.from_root(directory) if cache_file_apply: file_apply = _cached_by_realpath(file_apply) root_dir_node = _scantree_recursive( path=path, recursion_filter=recursion_filter, file_apply=file_apply, dir_apply=dir_apply, follow_links=follow_links, allow_cyclic_links=allow_cyclic_links, include_empty=include_empty, parents={path.real: path}, ) result = dir_apply(root_dir_node) return result def _scantree_multiprocess(**kwargs): """Multiprocess implementation of scantree. Note that it is only the `file_apply` function that is parallelized. """ file_apply = kwargs.pop('file_apply') dir_apply = kwargs.pop('dir_apply') jobs = kwargs.pop('jobs') file_paths = [] def extract_paths(path): result_idx = len(file_paths) file_paths.append(path) return result_idx root_dir_node = scantree(file_apply=extract_paths, dir_apply=identity, **kwargs) pool = Pool(jobs) try: file_results = pool.map(file_apply, file_paths) finally: pool.close() def fetch_result(result_idx): return file_results[result_idx] return root_dir_node.apply(dir_apply=dir_apply, file_apply=fetch_result) def _verify_is_directory(directory): """Verify that `directory` path exists and is a directory, otherwise raise ValueError""" directory = fspath(directory) if not os.path.exists(directory): raise ValueError('{}: No such directory'.format(directory)) if not os.path.isdir(directory): raise ValueError('{}: Is not a directory'.format(directory)) def _cached_by_realpath(file_apply): """Wrapps the `file_apply` function with a cache, if `path.real` is already in the cache, the cached value is returned""" cache = {} def file_apply_cached(path): if path.real not in cache: cache[path.real] = file_apply(path) return cache[path.real] return file_apply_cached def _scantree_recursive( path, recursion_filter, file_apply, dir_apply, follow_links, allow_cyclic_links, include_empty, parents, ): """The underlying recursive implementation of scantree. # Arguments: path (RecursionPath): the recursion path relative the directory where recursion was initialized. recursion_filter (f: f([RecursionPath]) -> [RecursionPath]): A filter function, defining which files to include and which subdirectories to scan, e.g. an instance of `scantree.RecursionFilter`. file_apply (f: f(RecursionPath) -> object): The function to apply to the `RecursionPath` for each file. Default "identity", i.e. `lambda x: x`. dir_apply (f: f(DirNode) -> object): The function to apply to the `DirNode` for each (sub) directory. Default "identity", i.e. `lambda x: x`. follow_links (bool): Whether to follow symbolic links for not, i.e. to continue the recursive scanning in linked directories. If False, linked directories are represented by the `LinkedDir` object which does e.g. not have the `files` and `directories` properties (as these cannot be known without following the link). Default `True`. allow_cyclic_links (bool): If set to `False`, a `SymlinkRecursionError` is raised on detection of cyclic symbolic links, if `True` (default), the cyclic link is represented by a `CyclicLinkedDir` object. include_empty (bool): If set to `True`, empty directories are included in the result of the recursive scanning, represented by an empty directory node: `DirNode(directories=[], files=[])`. If `False` (default), empty directories are not included in the parent directory node (and subsequently never passed to `dir_apply`). parents ({str: RecursionPath}): Mapping from real path (`str`) to `RecursionPath` of parent directories. # Returns: `DirNode` for the directory at `path`. # Raises: SymlinkRecursionError: if `allow_cyclic_links=False` and any cyclic symbolic links are detected. """ fwd_kwargs = vars() del fwd_kwargs['path'] if path.is_symlink(): if not follow_links: return LinkedDir(path) previous_path = parents.get(path.real, None) if previous_path is not None: if allow_cyclic_links: return CyclicLinkedDir(path, previous_path) else: raise SymlinkRecursionError(path, previous_path) if follow_links: parents[path.real] = path dirs = [] files = [] for subpath in sorted(recursion_filter(path.scandir())): if subpath.is_dir(): dir_node = _scantree_recursive(subpath, **fwd_kwargs) if include_empty or not is_empty_dir_node(dir_node): dirs.append(dir_apply(dir_node)) if subpath.is_file(): files.append(file_apply(subpath)) if follow_links: del parents[path.real] return DirNode(path=path, directories=dirs, files=files) class SymlinkRecursionError(_RecursionError): """Raised when symlinks cause a cyclic graph of directories. Extends the `pathspec.util.RecursionError` but with a different name (avoid overriding the built-in error!) and with a more informative string representation (used in `dirhash.cli`). """ def __init__(self, path, target_path): super(SymlinkRecursionError, self).__init__( real_path=path.real, first_path=os.path.join(target_path.root, target_path.relative), second_path=os.path.join(path.root, path.relative) ) def __str__(self): # _RecursionError.__str__ prints args without context return 'Symlink recursion: {}'.format(self.message) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/src/scantree/_version.py0000644000175100001770000005767314551320304017532 0ustar00runnerdocker # This file helps to compute a version number in source trees obtained from # git-archive tarball (such as those provided by githubs download-from-tag # feature). Distribution tarballs (built by setup.py sdist) and build # directories (produced by setup.py build) will contain a much shorter file # that just contains the computed version number. # This file is released into the public domain. # Generated by versioneer-0.29 # https://github.com/python-versioneer/python-versioneer """Git implementation of _version.py.""" import errno import os import re import subprocess import sys from typing import Any, Callable, Dict, List, Optional, Tuple import functools def get_keywords() -> Dict[str, str]: """Get the keywords needed to look up the version information.""" # these strings will be replaced by git during git-archive. # setup.py/versioneer.py will grep for the variable names, so they must # each be defined on a line of their own. _version.py will just call # get_keywords(). git_refnames = "$Format:%d$" git_full = "$Format:%H$" git_date = "$Format:%ci$" keywords = {"refnames": git_refnames, "full": git_full, "date": git_date} return keywords class VersioneerConfig: """Container for Versioneer configuration parameters.""" VCS: str style: str tag_prefix: str parentdir_prefix: str versionfile_source: str verbose: bool def get_config() -> VersioneerConfig: """Create, populate and return the VersioneerConfig() object.""" # these strings are filled in when 'setup.py versioneer' creates # _version.py cfg = VersioneerConfig() cfg.VCS = "git" cfg.style = "pep440" cfg.tag_prefix = "" cfg.parentdir_prefix = "scantree-" cfg.versionfile_source = "src/scantree/_version.py" cfg.verbose = False return cfg class NotThisMethod(Exception): """Exception raised if a method is not valid for the current scenario.""" LONG_VERSION_PY: Dict[str, str] = {} HANDLERS: Dict[str, Dict[str, Callable]] = {} def register_vcs_handler(vcs: str, method: str) -> Callable: # decorator """Create decorator to mark a method as the handler of a VCS.""" def decorate(f: Callable) -> Callable: """Store f in HANDLERS[vcs][method].""" if vcs not in HANDLERS: HANDLERS[vcs] = {} HANDLERS[vcs][method] = f return f return decorate def run_command( commands: List[str], args: List[str], cwd: Optional[str] = None, verbose: bool = False, hide_stderr: bool = False, env: Optional[Dict[str, str]] = None, ) -> Tuple[Optional[str], Optional[int]]: """Call the given command(s).""" assert isinstance(commands, list) process = None popen_kwargs: Dict[str, Any] = {} if sys.platform == "win32": # This hides the console window if pythonw.exe is used startupinfo = subprocess.STARTUPINFO() startupinfo.dwFlags |= subprocess.STARTF_USESHOWWINDOW popen_kwargs["startupinfo"] = startupinfo for command in commands: try: dispcmd = str([command] + args) # remember shell=False, so use git.cmd on windows, not just git process = subprocess.Popen([command] + args, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=(subprocess.PIPE if hide_stderr else None), **popen_kwargs) break except OSError as e: if e.errno == errno.ENOENT: continue if verbose: print("unable to run %s" % dispcmd) print(e) return None, None else: if verbose: print("unable to find command, tried %s" % (commands,)) return None, None stdout = process.communicate()[0].strip().decode() if process.returncode != 0: if verbose: print("unable to run %s (error)" % dispcmd) print("stdout was %s" % stdout) return None, process.returncode return stdout, process.returncode def versions_from_parentdir( parentdir_prefix: str, root: str, verbose: bool, ) -> Dict[str, Any]: """Try to determine the version from the parent directory name. Source tarballs conventionally unpack into a directory that includes both the project name and a version string. We will also support searching up two directory levels for an appropriately named parent directory """ rootdirs = [] for _ in range(3): dirname = os.path.basename(root) if dirname.startswith(parentdir_prefix): return {"version": dirname[len(parentdir_prefix):], "full-revisionid": None, "dirty": False, "error": None, "date": None} rootdirs.append(root) root = os.path.dirname(root) # up a level if verbose: print("Tried directories %s but none started with prefix %s" % (str(rootdirs), parentdir_prefix)) raise NotThisMethod("rootdir doesn't start with parentdir_prefix") @register_vcs_handler("git", "get_keywords") def git_get_keywords(versionfile_abs: str) -> Dict[str, str]: """Extract version information from the given file.""" # the code embedded in _version.py can just fetch the value of these # keywords. When used from setup.py, we don't want to import _version.py, # so we do it with a regexp instead. This function is not used from # _version.py. keywords: Dict[str, str] = {} try: with open(versionfile_abs, "r") as fobj: for line in fobj: if line.strip().startswith("git_refnames ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["refnames"] = mo.group(1) if line.strip().startswith("git_full ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["full"] = mo.group(1) if line.strip().startswith("git_date ="): mo = re.search(r'=\s*"(.*)"', line) if mo: keywords["date"] = mo.group(1) except OSError: pass return keywords @register_vcs_handler("git", "keywords") def git_versions_from_keywords( keywords: Dict[str, str], tag_prefix: str, verbose: bool, ) -> Dict[str, Any]: """Get version information from git keywords.""" if "refnames" not in keywords: raise NotThisMethod("Short version file found") date = keywords.get("date") if date is not None: # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] # git-2.2.0 added "%cI", which expands to an ISO-8601 -compliant # datestamp. However we prefer "%ci" (which expands to an "ISO-8601 # -like" string, which we must then edit to make compliant), because # it's been around since git-1.5.3, and it's too difficult to # discover which version we're using, or to work around using an # older one. date = date.strip().replace(" ", "T", 1).replace(" ", "", 1) refnames = keywords["refnames"].strip() if refnames.startswith("$Format"): if verbose: print("keywords are unexpanded, not using") raise NotThisMethod("unexpanded keywords, not a git-archive tarball") refs = {r.strip() for r in refnames.strip("()").split(",")} # starting in git-1.8.3, tags are listed as "tag: foo-1.0" instead of # just "foo-1.0". If we see a "tag: " prefix, prefer those. TAG = "tag: " tags = {r[len(TAG):] for r in refs if r.startswith(TAG)} if not tags: # Either we're using git < 1.8.3, or there really are no tags. We use # a heuristic: assume all version tags have a digit. The old git %d # expansion behaves like git log --decorate=short and strips out the # refs/heads/ and refs/tags/ prefixes that would let us distinguish # between branches and tags. By ignoring refnames without digits, we # filter out many common branch names like "release" and # "stabilization", as well as "HEAD" and "master". tags = {r for r in refs if re.search(r'\d', r)} if verbose: print("discarding '%s', no digits" % ",".join(refs - tags)) if verbose: print("likely tags: %s" % ",".join(sorted(tags))) for ref in sorted(tags): # sorting will prefer e.g. "2.0" over "2.0rc1" if ref.startswith(tag_prefix): r = ref[len(tag_prefix):] # Filter out refs that exactly match prefix or that don't start # with a number once the prefix is stripped (mostly a concern # when prefix is '') if not re.match(r'\d', r): continue if verbose: print("picking %s" % r) return {"version": r, "full-revisionid": keywords["full"].strip(), "dirty": False, "error": None, "date": date} # no suitable tags, so version is "0+unknown", but full hex is still there if verbose: print("no suitable tags, using unknown + full revision id") return {"version": "0+unknown", "full-revisionid": keywords["full"].strip(), "dirty": False, "error": "no suitable tags", "date": None} @register_vcs_handler("git", "pieces_from_vcs") def git_pieces_from_vcs( tag_prefix: str, root: str, verbose: bool, runner: Callable = run_command ) -> Dict[str, Any]: """Get version from 'git describe' in the root of the source tree. This only gets called if the git-archive 'subst' keywords were *not* expanded, and _version.py hasn't already been rewritten with a short version string, meaning we're inside a checked out source tree. """ GITS = ["git"] if sys.platform == "win32": GITS = ["git.cmd", "git.exe"] # GIT_DIR can interfere with correct operation of Versioneer. # It may be intended to be passed to the Versioneer-versioned project, # but that should not change where we get our version from. env = os.environ.copy() env.pop("GIT_DIR", None) runner = functools.partial(runner, env=env) _, rc = runner(GITS, ["rev-parse", "--git-dir"], cwd=root, hide_stderr=not verbose) if rc != 0: if verbose: print("Directory %s not under git control" % root) raise NotThisMethod("'git rev-parse --git-dir' returned error") # if there is a tag matching tag_prefix, this yields TAG-NUM-gHEX[-dirty] # if there isn't one, this yields HEX[-dirty] (no NUM) describe_out, rc = runner(GITS, [ "describe", "--tags", "--dirty", "--always", "--long", "--match", f"{tag_prefix}[[:digit:]]*" ], cwd=root) # --long was added in git-1.5.5 if describe_out is None: raise NotThisMethod("'git describe' failed") describe_out = describe_out.strip() full_out, rc = runner(GITS, ["rev-parse", "HEAD"], cwd=root) if full_out is None: raise NotThisMethod("'git rev-parse' failed") full_out = full_out.strip() pieces: Dict[str, Any] = {} pieces["long"] = full_out pieces["short"] = full_out[:7] # maybe improved later pieces["error"] = None branch_name, rc = runner(GITS, ["rev-parse", "--abbrev-ref", "HEAD"], cwd=root) # --abbrev-ref was added in git-1.6.3 if rc != 0 or branch_name is None: raise NotThisMethod("'git rev-parse --abbrev-ref' returned error") branch_name = branch_name.strip() if branch_name == "HEAD": # If we aren't exactly on a branch, pick a branch which represents # the current commit. If all else fails, we are on a branchless # commit. branches, rc = runner(GITS, ["branch", "--contains"], cwd=root) # --contains was added in git-1.5.4 if rc != 0 or branches is None: raise NotThisMethod("'git branch --contains' returned error") branches = branches.split("\n") # Remove the first line if we're running detached if "(" in branches[0]: branches.pop(0) # Strip off the leading "* " from the list of branches. branches = [branch[2:] for branch in branches] if "master" in branches: branch_name = "master" elif not branches: branch_name = None else: # Pick the first branch that is returned. Good or bad. branch_name = branches[0] pieces["branch"] = branch_name # parse describe_out. It will be like TAG-NUM-gHEX[-dirty] or HEX[-dirty] # TAG might have hyphens. git_describe = describe_out # look for -dirty suffix dirty = git_describe.endswith("-dirty") pieces["dirty"] = dirty if dirty: git_describe = git_describe[:git_describe.rindex("-dirty")] # now we have TAG-NUM-gHEX or HEX if "-" in git_describe: # TAG-NUM-gHEX mo = re.search(r'^(.+)-(\d+)-g([0-9a-f]+)$', git_describe) if not mo: # unparsable. Maybe git-describe is misbehaving? pieces["error"] = ("unable to parse git-describe output: '%s'" % describe_out) return pieces # tag full_tag = mo.group(1) if not full_tag.startswith(tag_prefix): if verbose: fmt = "tag '%s' doesn't start with prefix '%s'" print(fmt % (full_tag, tag_prefix)) pieces["error"] = ("tag '%s' doesn't start with prefix '%s'" % (full_tag, tag_prefix)) return pieces pieces["closest-tag"] = full_tag[len(tag_prefix):] # distance: number of commits since tag pieces["distance"] = int(mo.group(2)) # commit: short hex revision ID pieces["short"] = mo.group(3) else: # HEX: no tags pieces["closest-tag"] = None out, rc = runner(GITS, ["rev-list", "HEAD", "--left-right"], cwd=root) pieces["distance"] = len(out.split()) # total number of commits # commit date: see ISO-8601 comment in git_versions_from_keywords() date = runner(GITS, ["show", "-s", "--format=%ci", "HEAD"], cwd=root)[0].strip() # Use only the last line. Previous lines may contain GPG signature # information. date = date.splitlines()[-1] pieces["date"] = date.strip().replace(" ", "T", 1).replace(" ", "", 1) return pieces def plus_or_dot(pieces: Dict[str, Any]) -> str: """Return a + if we don't already have one, else return a .""" if "+" in pieces.get("closest-tag", ""): return "." return "+" def render_pep440(pieces: Dict[str, Any]) -> str: """Build up version string, with post-release "local version identifier". Our goal: TAG[+DISTANCE.gHEX[.dirty]] . Note that if you get a tagged build and then dirty it, you'll get TAG+0.gHEX.dirty Exceptions: 1: no tags. git_describe was just HEX. 0+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_branch(pieces: Dict[str, Any]) -> str: """TAG[[.dev0]+DISTANCE.gHEX[.dirty]] . The ".dev0" means not master branch. Note that .dev0 sorts backwards (a feature branch will appear "older" than the master branch). Exceptions: 1: no tags. 0[.dev0]+untagged.DISTANCE.gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: if pieces["branch"] != "master": rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0" if pieces["branch"] != "master": rendered += ".dev0" rendered += "+untagged.%d.g%s" % (pieces["distance"], pieces["short"]) if pieces["dirty"]: rendered += ".dirty" return rendered def pep440_split_post(ver: str) -> Tuple[str, Optional[int]]: """Split pep440 version string at the post-release segment. Returns the release segments before the post-release and the post-release version number (or -1 if no post-release segment is present). """ vc = str.split(ver, ".post") return vc[0], int(vc[1] or 0) if len(vc) == 2 else None def render_pep440_pre(pieces: Dict[str, Any]) -> str: """TAG[.postN.devDISTANCE] -- No -dirty. Exceptions: 1: no tags. 0.post0.devDISTANCE """ if pieces["closest-tag"]: if pieces["distance"]: # update the post release segment tag_version, post_version = pep440_split_post(pieces["closest-tag"]) rendered = tag_version if post_version is not None: rendered += ".post%d.dev%d" % (post_version + 1, pieces["distance"]) else: rendered += ".post0.dev%d" % (pieces["distance"]) else: # no commits, use the tag as the version rendered = pieces["closest-tag"] else: # exception #1 rendered = "0.post0.dev%d" % pieces["distance"] return rendered def render_pep440_post(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX] . The ".dev0" means dirty. Note that .dev0 sorts backwards (a dirty tree will appear "older" than the corresponding clean one), but you shouldn't be releasing software with -dirty anyways. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" rendered += "+g%s" % pieces["short"] return rendered def render_pep440_post_branch(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]+gHEX[.dirty]] . The ".dev0" means not master branch. Exceptions: 1: no tags. 0.postDISTANCE[.dev0]+gHEX[.dirty] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["branch"] != "master": rendered += ".dev0" rendered += plus_or_dot(pieces) rendered += "g%s" % pieces["short"] if pieces["dirty"]: rendered += ".dirty" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["branch"] != "master": rendered += ".dev0" rendered += "+g%s" % pieces["short"] if pieces["dirty"]: rendered += ".dirty" return rendered def render_pep440_old(pieces: Dict[str, Any]) -> str: """TAG[.postDISTANCE[.dev0]] . The ".dev0" means dirty. Exceptions: 1: no tags. 0.postDISTANCE[.dev0] """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"] or pieces["dirty"]: rendered += ".post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" else: # exception #1 rendered = "0.post%d" % pieces["distance"] if pieces["dirty"]: rendered += ".dev0" return rendered def render_git_describe(pieces: Dict[str, Any]) -> str: """TAG[-DISTANCE-gHEX][-dirty]. Like 'git describe --tags --dirty --always'. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] if pieces["distance"]: rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render_git_describe_long(pieces: Dict[str, Any]) -> str: """TAG-DISTANCE-gHEX[-dirty]. Like 'git describe --tags --dirty --always -long'. The distance/hash is unconditional. Exceptions: 1: no tags. HEX[-dirty] (note: no 'g' prefix) """ if pieces["closest-tag"]: rendered = pieces["closest-tag"] rendered += "-%d-g%s" % (pieces["distance"], pieces["short"]) else: # exception #1 rendered = pieces["short"] if pieces["dirty"]: rendered += "-dirty" return rendered def render(pieces: Dict[str, Any], style: str) -> Dict[str, Any]: """Render the given version pieces into the requested style.""" if pieces["error"]: return {"version": "unknown", "full-revisionid": pieces.get("long"), "dirty": None, "error": pieces["error"], "date": None} if not style or style == "default": style = "pep440" # the default if style == "pep440": rendered = render_pep440(pieces) elif style == "pep440-branch": rendered = render_pep440_branch(pieces) elif style == "pep440-pre": rendered = render_pep440_pre(pieces) elif style == "pep440-post": rendered = render_pep440_post(pieces) elif style == "pep440-post-branch": rendered = render_pep440_post_branch(pieces) elif style == "pep440-old": rendered = render_pep440_old(pieces) elif style == "git-describe": rendered = render_git_describe(pieces) elif style == "git-describe-long": rendered = render_git_describe_long(pieces) else: raise ValueError("unknown style '%s'" % style) return {"version": rendered, "full-revisionid": pieces["long"], "dirty": pieces["dirty"], "error": None, "date": pieces.get("date")} def get_versions() -> Dict[str, Any]: """Get version information or return default if unable to do so.""" # I am in _version.py, which lives at ROOT/VERSIONFILE_SOURCE. If we have # __file__, we can work backwards from there to the root. Some # py2exe/bbfreeze/non-CPython implementations don't do __file__, in which # case we can only use expanded keywords. cfg = get_config() verbose = cfg.verbose try: return git_versions_from_keywords(get_keywords(), cfg.tag_prefix, verbose) except NotThisMethod: pass try: root = os.path.realpath(__file__) # versionfile_source is the relative path from the top of the source # tree (where the .git directory might live) to this file. Invert # this to find the root from __file__. for _ in cfg.versionfile_source.split('/'): root = os.path.dirname(root) except NameError: return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to find root of source tree", "date": None} try: pieces = git_pieces_from_vcs(cfg.tag_prefix, root, verbose) return render(pieces, cfg.style) except NotThisMethod: pass try: if cfg.parentdir_prefix: return versions_from_parentdir(cfg.parentdir_prefix, root, verbose) except NotThisMethod: pass return {"version": "0+unknown", "full-revisionid": None, "dirty": None, "error": "unable to compute version", "date": None} ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/src/scantree/compat.py0000644000175100001770000000042514551320304017150 0ustar00runnerdockerdef fspath(path): """In python 2: os.path... and scandir does not support PathLike objects""" if isinstance(path, str): return path if hasattr(path, '__fspath__'): return path.__fspath__() raise TypeError('Object {} is not a path'.format(path)) ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/src/scantree/test_utils.py0000644000175100001770000000513114551320304020063 0ustar00runnerdockerfrom __future__ import print_function, division import os from ._node import LinkedDir, CyclicLinkedDir from ._path import RecursionPath, DirEntryReplacement def assert_dir_entry_equal(de1, de2): # TODO check has attributes assert de1.path == de2.path assert de1.name == de2.name for method, kwargs in [ ('is_dir', {'follow_symlinks': True}), ('is_dir', {'follow_symlinks': False}), ('is_file', {'follow_symlinks': True}), ('is_file', {'follow_symlinks': False}), ('is_symlink', {}), ('stat', {'follow_symlinks': True}), ('stat', {'follow_symlinks': False}), ('inode', {}) ]: for attempt in [1, 2]: # done two times to verify caching! res1 = getattr(de1, method)(**kwargs) res2 = getattr(de2, method)(**kwargs) if not res1 == res2: raise AssertionError( '\nde1.{method}(**{kwargs}) == {res1} != ' '\nde2.{method}(**{kwargs}) == {res2} ' '\n(attempt: {attempt})' '\nde1: {de1}' '\nde2: {de2}'.format( method=method, kwargs=kwargs, res1=res1, res2=res2, attempt=attempt, de1=de1, de2=de2 ) ) def assert_recursion_path_equal(p1, p2): assert p1.root == p2.root assert p1.relative == p2.relative assert p1.real == p2.real assert p1.absolute == p2.absolute assert_dir_entry_equal(p1, p2) def assert_dir_node_equal(dn1, dn2): assert_recursion_path_equal(dn1.path, dn2.path) if isinstance(dn1, LinkedDir): assert isinstance(dn2, LinkedDir) elif isinstance(dn1, CyclicLinkedDir): assert isinstance(dn2, CyclicLinkedDir) assert_recursion_path_equal(dn1.target_path, dn2.target_path) else: for path1, path2 in zip(dn1.files, dn2.files): assert_recursion_path_equal(path1, path2) for sub_dn1, sub_dn2 in zip(dn1.directories, dn2.directories): assert_dir_node_equal(sub_dn1, sub_dn2) def get_mock_recursion_path(relative, root=None, is_dir=False, is_symlink=False): dir_entry = DirEntryReplacement( path=relative, name=os.path.basename(relative) ) dir_entry._is_dir = is_dir dir_entry._is_file = not is_dir dir_entry._is_symlink = is_symlink return RecursionPath( root=root, relative=relative, real=None, dir_entry=dir_entry ) ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1705353420.6568758 scantree-0.0.2/src/scantree.egg-info/0000755000175100001770000000000014551320315017006 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353420.0 scantree-0.0.2/src/scantree.egg-info/PKG-INFO0000644000175100001770000001011214551320314020075 0ustar00runnerdockerMetadata-Version: 2.1 Name: scantree Version: 0.0.2 Summary: Flexible recursive directory iterator: scandir meets glob("**", recursive=True) Home-page: https://github.com/andhus/scantree Author: Anders Huss Author-email: andhus@kth.se License: MIT Description-Content-Type: text/markdown License-File: LICENSE Requires-Dist: attrs>=18.0.0 Requires-Dist: pathspec>=0.5.9 [![codecov](https://codecov.io/gh/andhus/scantree/branch/master/graph/badge.svg)](https://codecov.io/gh/andhus/scantree) # `scantree` Recursive directory iterator supporting: - flexible filtering including wildcard path matching - in memory representation of file-tree (for repeated access) - efficient access to directory entry properties (`posix.DirEntry` interface) extended with real path and path relative to the recursion root directory - detection and handling of cyclic symlinks ## Installation ```commandline pip install scantree ``` ## Usage See source code for full documentation, some generic examples below. Get matching file paths: ```python from scantree import scantree, RecursionFilter tree = scantree('/path/to/dir', RecursionFilter(match=['*.txt'])) print([path.relative for path in tree.filepaths()]) print([path.real for path in tree.filepaths()]) ``` ``` ['d1/d2/file3.txt', 'd1/file2.txt', 'file1.txt'] ['/path/to/other_dir/file3.txt', '/path/to/dir/d1/file2.txt', '/path/to/dir/file1.txt'] ``` Access metadata of directory entries in file tree: ```python d2 = tree.directories[0].directories[0] print(type(d2)) print(d2.path.absolute) print(d2.path.real) print(d2.path.is_symlink()) print(d2.files[0].relative) ``` ``` scantree._node.DirNode /path/to/dir/d1/d2 /path/to/other_dir True d1/d2/file3.txt ``` Aggregate information by operating on tree: ```python hello_count = tree.apply( file_apply=lambda path: sum([ w.lower() == 'hello' for w in path.as_pathlib().read_text().split() ]), dir_apply=lambda dir_: sum(dir_.entries), ) print(hello_count) ``` ``` 3 ``` ```python hello_count_tree = tree.apply( file_apply=lambda path: { 'name': path.name, 'count': sum([ w.lower() == 'hello' for w in path.as_pathlib().read_text().split() ]) }, dir_apply=lambda dir_: { 'name': dir_.path.name, 'count': sum(e['count'] for e in dir_.entries), 'sub_counts': [e for e in dir_.entries] }, ) from pprint import pprint pprint(hello_count_tree) ``` ``` {'count': 3, 'name': 'dir', 'sub_counts': [{'count': 2, 'name': 'file1.txt'}, {'count': 1, 'name': 'd1', 'sub_counts': [{'count': 1, 'name': 'file2.txt'}, {'count': 0, 'name': 'd2', 'sub_counts': [{'count': 0, 'name': 'file3.txt'}]}]}]} ``` Flexible filtering: ```python without_hidden_files = scantree('.', RecursionFilter(match=['*', '!.*'])) without_palindrome_linked_dirs = scantree( '.', lambda paths: [ p for p in paths if not ( p.is_dir() and p.is_symlink() and p.name == p.name[::-1] ) ] ) ``` Comparison: ```python tree = scandir('path/to/dir') # make some operations on filesystem, make sure file tree is the same: assert tree == scandir('path/to/dir') # tree contains absolute/real path info: import shutil shutil.copytree('path/to/dir', 'path/to/other_dir') new_tree = scandir('path/to/other_dir') assert tree != new_tree assert ( [p.relative for p in tree.leafpaths()] == [p.relative for p in new_tree.leafpaths()] ) ``` Inspect symlinks: ```python from scantree import CyclicLinkedDir file_links = [] dir_links = [] cyclic_links = [] def file_apply(path): if path.is_symlink(): file_links.append(path) def dir_apply(dir_node): if dir_node.path.is_symlink(): dir_links.append(dir_node.path) if isinstance(dir_node, CyclicLinkedDir): cyclic_links.append((dir_node.path, dir_node.target_path)) scantree('.', file_apply=file_apply, dir_apply=dir_apply) ``` ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353420.0 scantree-0.0.2/src/scantree.egg-info/SOURCES.txt0000644000175100001770000000106014551320314020666 0ustar00runnerdockerLICENSE MANIFEST.in README.md pyproject.toml setup.cfg setup.py src/scantree/__init__.py src/scantree/_filter.py src/scantree/_node.py src/scantree/_path.py src/scantree/_scan.py src/scantree/_version.py src/scantree/compat.py src/scantree/test_utils.py src/scantree.egg-info/PKG-INFO src/scantree.egg-info/SOURCES.txt src/scantree.egg-info/dependency_links.txt src/scantree.egg-info/requires.txt src/scantree.egg-info/top_level.txt tests/test_compat.py tests/test_filter.py tests/test_node.py tests/test_path.py tests/test_scantree.py tests/test_test_utils.py././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353420.0 scantree-0.0.2/src/scantree.egg-info/dependency_links.txt0000644000175100001770000000000114551320314023053 0ustar00runnerdocker ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353420.0 scantree-0.0.2/src/scantree.egg-info/requires.txt0000644000175100001770000000003614551320314021404 0ustar00runnerdockerattrs>=18.0.0 pathspec>=0.5.9 ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353420.0 scantree-0.0.2/src/scantree.egg-info/top_level.txt0000644000175100001770000000001114551320314021527 0ustar00runnerdockerscantree ././@PaxHeader0000000000000000000000000000003400000000000010212 xustar0028 mtime=1705353420.6568758 scantree-0.0.2/tests/0000755000175100001770000000000014551320315014063 5ustar00runnerdocker././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/tests/test_compat.py0000644000175100001770000000212714551320304016757 0ustar00runnerdockerfrom __future__ import print_function, division from os import scandir from posix import DirEntry import pytest from scantree.compat import fspath class TestFSPath(object): def test_string(self): assert fspath('path/to') == 'path/to' def test__fspath__(self): class Path(object): def __init__(self, path): self.path = path def __fspath__(self): return self.path assert fspath(Path('path/to/this')) == 'path/to/this' def test_not_supported(self): with pytest.raises(TypeError): fspath(1) class TestScandir(object): def test_basic(self, tmpdir): tmpdir.join('file').ensure() for path_like in [tmpdir, str(tmpdir)]: [de] = list(scandir(path_like)) assert isinstance(de, DirEntry) assert de.name == 'file' def test_none_path(self, tmpdir): tmpdir.join('file').ensure() with tmpdir.as_cwd(): [de] = list(scandir(None)) assert isinstance(de, DirEntry) assert de.name == 'file' ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/tests/test_filter.py0000644000175100001770000000426214551320304016763 0ustar00runnerdockerfrom __future__ import print_function, division import pytest from scantree.test_utils import get_mock_recursion_path class TestRecursionFilterBase(object): from scantree import RecursionFilter as test_class @pytest.mark.parametrize( 'description, filter_kwargs, expected_output', [ ( 'include all', {'linked_dirs': True, 'linked_files': True}, ['dir', 'dir/file.txt', 'ldir', 'dir/lfile'] ), ( 'default include all', {}, ['dir', 'dir/file.txt', 'ldir', 'dir/lfile'] ), ( 'exclude linked dirs', {'linked_dirs': False, 'linked_files': True}, ['dir', 'dir/file.txt', 'dir/lfile'] ), ( 'exclude linked files', {'linked_dirs': True, 'linked_files': False}, ['dir', 'dir/file.txt', 'ldir'] ), ( 'exclude linked files and dirs', {'linked_dirs': False, 'linked_files': False}, ['dir', 'dir/file.txt'] ), ( 'include only .txt files (dirs always included)', {'match': ['*.txt']}, ['dir', 'dir/file.txt', 'ldir'] ), ( 'exclude .txt files (dirs always included)', {'match': ['*', '!*.txt']}, ['dir', 'ldir', 'dir/lfile'] ), ] ) def test_call( self, description, filter_kwargs, expected_output ): paths = [ get_mock_recursion_path('dir', is_dir=True), get_mock_recursion_path('dir/file.txt'), get_mock_recursion_path('ldir', is_dir=True, is_symlink=True), get_mock_recursion_path('dir/lfile', is_symlink=True), ] relpath_to_path = {path.relative: path for path in paths} rfilter = self.test_class(**filter_kwargs) filtered_paths = list(rfilter(paths)) assert filtered_paths == [ relpath_to_path[relpath] for relpath in expected_output ] ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/tests/test_node.py0000644000175100001770000000720014551320304016416 0ustar00runnerdockerfrom __future__ import print_function, division import pytest from scantree import ( RecursionPath, DirNode, LinkedDir, CyclicLinkedDir ) from scantree.test_utils import get_mock_recursion_path def create_basic_entries(local_path): d1 = local_path.join('d1') d1.mkdir() f1 = local_path.join('f1') f1.write('file1') local_path.join('ld1').mksymlinkto(d1) local_path.join('lf1').mksymlinkto(f1) class TestDirNode(object): test_class = DirNode def test_init(self): dn = self.test_class(RecursionPath.from_root('.'), [], [None]) assert dn.directories == (None,) assert dn.files == tuple() def test_empty(self): dn = self.test_class(RecursionPath.from_root('.'), [], []) assert dn.empty def test_apply(self, tmpdir): create_basic_entries(tmpdir) root = RecursionPath.from_root(tmpdir) d1 = next((rp for rp in root.scandir() if rp.name == 'd1')) dn = self.test_class( path=root, directories=[self.test_class(d1, files=[1., 2.])], files=[0.5] ) dn_new = dn.apply( file_apply=lambda x: x*2, dir_apply=lambda dn_: sum(dn_.directories) ** 2 + sum(dn_.files) ) assert dn_new == ((2 + 4) ** 2 + 1) def test_leafpaths_filepaths(self): rp_file1 = get_mock_recursion_path('file1') rp_dir1 = get_mock_recursion_path('dir1') rp_file2 = get_mock_recursion_path('dir1/file2') rp_linked_dir = get_mock_recursion_path('linked_dir') rp_cyclic = get_mock_recursion_path('cyclic') rp_cyclic_target = get_mock_recursion_path('cyclic_target') ordered_leafpaths = [rp_cyclic, rp_file2, rp_file1, rp_linked_dir] ordered_filepaths = [rp_file2, rp_file1] tree = self.test_class( path=get_mock_recursion_path(''), files=[rp_file1], directories=[ CyclicLinkedDir(path=rp_cyclic, target_path=rp_cyclic_target), self.test_class( path=rp_dir1, files=[rp_file2] ), LinkedDir(path=rp_linked_dir), ] ) assert tree.leafpaths() == ordered_leafpaths assert tree.filepaths() == ordered_filepaths def test_entries(self): dn = self.test_class( RecursionPath.from_root('.'), files=[None], directories=['d1', 'd2'] ) assert dn.entries == dn.files + dn.directories class TestLinkedDir(object): test_class = LinkedDir @staticmethod def get_default_kwargs(): return {'path': get_mock_recursion_path('path/to/ld')} def test_undefined_attributes(self): ld = self.test_class(**self.get_default_kwargs()) for attribute in ['files', 'directories', 'entries']: with pytest.raises(AttributeError): getattr(ld, attribute) def test_empty(self): ld = self.test_class(**self.get_default_kwargs()) with pytest.raises(AttributeError): ld.empty def test_apply(self): ld = self.test_class(**self.get_default_kwargs()) res = ld.apply(dir_apply=lambda x: (x, 1), file_apply=None) assert res == (ld, 1) class TestCyclicLinkedDir(TestLinkedDir): test_class = CyclicLinkedDir @staticmethod def get_default_kwargs(): return { 'path': get_mock_recursion_path('path/to/ld'), 'target_path': get_mock_recursion_path('target') } def test_empty(self): cld = self.test_class(**self.get_default_kwargs()) assert cld.empty is False ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/tests/test_path.py0000644000175100001770000001053714551320304016434 0ustar00runnerdockerfrom __future__ import print_function, division from os import scandir from pathlib import Path import pytest from scantree.compat import fspath from scantree import DirEntryReplacement from scantree.test_utils import assert_dir_entry_equal def create_basic_entries(local_path): d1 = local_path.join('d1') d1.mkdir() f1 = local_path.join('f1') f1.write('file1') local_path.join('ld1').mksymlinkto(d1) local_path.join('lf1').mksymlinkto(f1) class TestDirEntryReplacement(object): test_class = DirEntryReplacement def test_equivalence(self, tmpdir): create_basic_entries(tmpdir) for de_true in scandir(tmpdir): de_rep_from_entry = self.test_class.from_dir_entry(de_true) de_rep_from_path = self.test_class.from_path(tmpdir.join(de_true.name)) assert_dir_entry_equal(de_rep_from_entry, de_true) assert de_rep_from_entry == de_true assert_dir_entry_equal(de_rep_from_path, de_true) assert de_rep_from_path == de_true # test not equal de_rep = self.test_class.from_dir_entry(de_true) assert de_rep != 'other type' for attribute in ['path', 'name']: de_rep = self.test_class.from_dir_entry(de_true) setattr(de_rep, attribute, "wrong value") assert de_rep != de_true for bool_attr in ['_is_dir', '_is_file', '_is_symlink']: de_rep = self.test_class.from_dir_entry(de_true) assert de_rep == de_true # must load cache values before negating setattr(de_rep, bool_attr, not getattr(de_rep, bool_attr)) assert de_rep != de_true de_rep = self.test_class.from_dir_entry(de_true) assert de_rep == de_true de_rep._stat_sym = "wrong_value" assert de_rep != de_true de_rep = self.test_class.from_dir_entry(de_true) assert de_rep == de_true de_rep._stat_nosym = "wrong_value" assert de_rep != de_true def test_raise_on_not_exists(self, tmpdir): with pytest.raises(IOError): self.test_class.from_path(tmpdir.join('no such entry')) class TestRecursionPath(object): from scantree import RecursionPath as test_class def test_from_root(self, tmpdir): create_basic_entries(tmpdir) rpath = self.test_class.from_root(tmpdir.realpath()) assert rpath.root == rpath.real == tmpdir.realpath() assert rpath.relative == '' d1 = rpath._join(DirEntryReplacement.from_path(tmpdir.join('d1'))) assert d1.relative == 'd1' assert d1.real == tmpdir.join('d1').realpath() assert d1.root == rpath.root ld1 = rpath._join(DirEntryReplacement.from_path(tmpdir.join('ld1'))) assert ld1.relative == 'ld1' assert ld1.real == tmpdir.join('d1').realpath() assert d1.root == rpath.root def test_dir_entry_interface(self, tmpdir): create_basic_entries(tmpdir) for de_true in scandir(tmpdir): de_repl = DirEntryReplacement.from_path(de_true.path) rpath_from_de_true = self.test_class.from_root(de_true) rpath_from_de_repl = self.test_class.from_root(de_repl) rpath_from_path = self.test_class.from_root(de_true.path) assert_dir_entry_equal(de_true, rpath_from_de_true) assert_dir_entry_equal(de_true, rpath_from_de_repl) assert_dir_entry_equal(de_true, rpath_from_path) def test_scandir(self, tmpdir): create_basic_entries(tmpdir) rpath = self.test_class.from_root(tmpdir) sub_rpaths = list(rpath.scandir()) sub_des = list(scandir(rpath)) assert len(sub_rpaths) == len(sub_des) for sub_de, sub_rpath in zip(sub_des, sub_rpaths): assert_dir_entry_equal(sub_de, sub_rpath) def test_picklable(self, tmpdir): rpath = self.test_class.from_root(tmpdir) state = rpath.__getstate__() dir_entry = state[-1] assert isinstance(dir_entry, DirEntryReplacement) rpath.__setstate__(state) assert rpath._dir_entry is dir_entry def test_as_pathlib(self, tmpdir): rpath = self.test_class.from_root(tmpdir) pathlib_path = rpath.as_pathlib() assert isinstance(pathlib_path, Path) assert fspath(pathlib_path) == rpath.absolute ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/tests/test_scantree.py0000644000175100001770000002634314551320304017306 0ustar00runnerdockerfrom __future__ import print_function, division import os import re from time import sleep, time from functools import partial import pytest from scantree import ( DirNode, scantree, RecursionPath, RecursionFilter, CyclicLinkedDir, SymlinkRecursionError, LinkedDir ) from scantree.test_utils import assert_dir_node_equal class TestScantree(object): def test_basic(self, tmpdir): tmpdir.ensure('root/f1') tmpdir.ensure('root/d1/f1') tmpdir.ensure('root/d1/d11/f1') tmpdir.ensure('root/d2/f1') root = tmpdir.join('root') tree = scantree(root) def rp(relative): recursion_path = RecursionPath.from_root(root.join(relative)) recursion_path.relative = relative recursion_path.root = root.strpath return recursion_path tree_expected = DirNode( path=rp(''), files=[rp('f1')], directories=[ DirNode( path=rp('d1'), files=[rp('d1/f1')], directories=[ DirNode( path=rp('d1/d11'), files=[rp('d1/d11/f1')] ) ] ), DirNode( path=rp('d2'), files=[rp('d2/f1')] ) ] ) assert_dir_node_equal(tree, tree_expected) def test_not_a_directory(self, tmpdir): tmpdir.ensure('root/f1') # does not exist with pytest.raises(ValueError): scantree(tmpdir.join('wrong_root')) # is a file with pytest.raises(ValueError): scantree(tmpdir.join('root/f1')) @pytest.mark.parametrize('include_empty', [True, False]) def test_cyclic_links(self, tmpdir, include_empty): root = tmpdir.join('root') d1 = root.join('d1') d1.ensure(dir=True) d1.join('link_back_d1').mksymlinkto(d1) d1.join('link_back_root').mksymlinkto(root) tree = scantree(root, include_empty=include_empty) def rp(relative): recursion_path = RecursionPath.from_root(root.join(relative)) recursion_path.relative = relative recursion_path.root = root.strpath return recursion_path tree_expected = DirNode( path=rp(''), directories=[ DirNode( path=rp('d1'), directories=[ CyclicLinkedDir( path=rp('d1/link_back_d1'), target_path=rp('d1') ), CyclicLinkedDir( path=rp('d1/link_back_root'), target_path=rp('') ) ] ) ] ) assert_dir_node_equal(tree, tree_expected) with pytest.raises(SymlinkRecursionError) as exc_info: scantree(root, allow_cyclic_links=False) assert re.match( re.compile( "Symlink recursion: Real path .*root/d1' " "was encountered at .*root/d1' " "and then .*root/d1/link_back_d1'."), str(exc_info.value) ) @pytest.mark.parametrize('include_empty', [True, False]) def test_follow_links(self, tmpdir, include_empty): root = tmpdir.join('root') root.join('f1').ensure(dir=False) external_d1 = tmpdir.join('d1') external_d1.join('f2').ensure(dir=False) root.join('link_to_d1').mksymlinkto(external_d1) def rp(relative): recursion_path = RecursionPath.from_root(root.join(relative)) recursion_path.relative = relative recursion_path.root = root.strpath return recursion_path tree_follow_false = scantree( root, include_empty=include_empty, follow_links=False ) tree_follow_true = scantree( root, include_empty=include_empty, follow_links=True ) tree_follow_false_expected = DirNode( path=rp(''), files=[rp('f1')], directories=[ LinkedDir(path=rp('link_to_d1')) ] ) tree_follow_true_expected = DirNode( path=rp(''), files=[rp('f1')], directories=[ DirNode( path=rp('link_to_d1'), files=[rp('link_to_d1/f2')] ) ] ) assert_dir_node_equal(tree_follow_false, tree_follow_false_expected) assert_dir_node_equal(tree_follow_true, tree_follow_true_expected) def test_include_empty(self, tmpdir): root = tmpdir.join('root') root.join('d1').ensure(dir=True) tree_empty_true = scantree(root, include_empty=True) def rp(relative): recursion_path = RecursionPath.from_root(root.join(relative)) recursion_path.relative = relative recursion_path.root = root.strpath return recursion_path tree_empty_true_expected = DirNode( path=rp(''), directories=[DirNode(path=rp('d1'))] ) assert_dir_node_equal(tree_empty_true, tree_empty_true_expected) tree_empty_false = scantree(root, include_empty=False) tree_empty_false_expected = DirNode(path=rp('')) assert tree_empty_false == tree_empty_false_expected def test_multiprocess_speedup(self, tmpdir): num_files = 4 for i in range(num_files): tmpdir.join('file_{}'.format(i)).ensure() wait_time = 0.5 expected_min_elapsed = wait_time * num_files slow_file_apply = get_slow_identity_f(wait_time) start = time() scantree(tmpdir, file_apply=slow_file_apply) end = time() elapsed_sequential = end - start assert elapsed_sequential > expected_min_elapsed start = time() scantree(tmpdir, file_apply=slow_file_apply, jobs=num_files) end = time() elapsed_muliproc = end - start assert elapsed_muliproc < expected_min_elapsed / 2 # just require at least half to account for multiprocessing overhead def test_cache_by_real_path_speedup(self, tmpdir): target_file = tmpdir.join('target_file') target_file.ensure() num_links = 10 for i in range(num_links): tmpdir.join('link_{}'.format(i)).mksymlinkto(target_file) wait_time = 0.1 expected_min_elapsed = wait_time * (num_links + 1) slow_file_apply = get_slow_identity_f(wait_time) start = time() scantree(tmpdir, file_apply=slow_file_apply) end = time() elapsed_sequential = end - start assert elapsed_sequential > expected_min_elapsed overhead = elapsed_sequential - expected_min_elapsed overhead_margin_factor = 1.5 expected_max_elapsed = overhead * overhead_margin_factor + wait_time assert expected_max_elapsed < expected_min_elapsed start = time() scantree(tmpdir, file_apply=slow_file_apply, cache_file_apply=True) end = time() elapsed_cache = end - start assert elapsed_cache < expected_max_elapsed def test_cache_together_with_multiprocess_speedup(self, tmpdir): target_file_names = ['target_file_1', 'target_file_2'] num_links_per_file = 10 for i, target_file_name in enumerate(target_file_names): target_file = tmpdir.join(target_file_name) target_file.ensure() for j in range(num_links_per_file): tmpdir.join('link_{}_{}'.format(i, j)).mksymlinkto(target_file) num_links = num_links_per_file * len(target_file_names) wait_time = 0.1 jobs = 2 expected_min_elapsed = ( wait_time * (num_links + len(target_file_names)) ) / jobs slow_file_apply = get_slow_identity_f(wait_time) start = time() scantree(tmpdir, file_apply=slow_file_apply, jobs=2) end = time() elapsed_mp = end - start assert elapsed_mp > expected_min_elapsed overhead = elapsed_mp - expected_min_elapsed overhead_margin_factor = 1.5 expected_max_elapsed = overhead * overhead_margin_factor + wait_time * 2 assert expected_max_elapsed < expected_min_elapsed start = time() scantree(tmpdir, file_apply=slow_file_apply, cache_file_apply=True, jobs=2) end = time() elapsed_mp_cache = end - start assert elapsed_mp_cache < expected_max_elapsed def _slow_identity(x, wait_time): sleep(wait_time) return x def get_slow_identity_f(wait_time): return partial(_slow_identity, wait_time=wait_time) class TestIncludedPaths(object): """Verify included leafpaths given combinations of options""" @staticmethod def get_leafpaths(directory, **kwargs): """Extract relative paths to leafs (with extra "/." for directories)""" return [ path.relative if path.is_file() else os.path.join(path.relative, '.') for path in scantree(directory, **kwargs).leafpaths() ] def test_basic(self, tmpdir): tmpdir.ensure('root/f1') tmpdir.ensure('root/d1/f1') tmpdir.ensure('root/d1/d11/f1') tmpdir.ensure('root/d2/f1') expected_filepaths = ['d1/d11/f1', 'd1/f1', 'd2/f1', 'f1'] filepaths = self.get_leafpaths(tmpdir.join('root')) assert filepaths == expected_filepaths # test pure string path as well filepaths = self.get_leafpaths(tmpdir.join('root').strpath) assert filepaths == expected_filepaths def test_symlinked_file(self, tmpdir): tmpdir.ensure('root/f1') tmpdir.ensure('linked_file') tmpdir.join('root/f2').mksymlinkto(tmpdir.join('linked_file')) root = tmpdir.join('root') # NOTE `follow_links` has no effect if linked files are included filepaths = self.get_leafpaths(root, follow_links=False) assert filepaths == ['f1', 'f2'] filepaths = self.get_leafpaths(root, follow_links=True) assert filepaths == ['f1', 'f2'] filepaths = self.get_leafpaths( root, recursion_filter=RecursionFilter(linked_files=False), ) assert filepaths == ['f1'] def test_symlinked_dir(self, tmpdir): tmpdir.ensure('root/f1') tmpdir.ensure('linked_dir/f1') tmpdir.ensure('linked_dir/f2') tmpdir.join('root/d1').mksymlinkto(tmpdir.join('linked_dir')) root = tmpdir.join('root') filepaths = self.get_leafpaths(root, follow_links=True) assert filepaths == ['d1/f1', 'd1/f2', 'f1'] # default is `follow_links=True` filepaths = self.get_leafpaths(root) assert filepaths == ['d1/f1', 'd1/f2', 'f1'] filepaths = self.get_leafpaths(root, follow_links=False) assert filepaths == ['d1/.', 'f1'] # correct way to ignore linked dirs completely: filepaths = self.get_leafpaths( root, recursion_filter=RecursionFilter(linked_dirs=False), ) assert filepaths == ['f1'] ././@PaxHeader0000000000000000000000000000002600000000000010213 xustar0022 mtime=1705353412.0 scantree-0.0.2/tests/test_test_utils.py0000644000175100001770000000243614551320304017676 0ustar00runnerdockerfrom __future__ import print_function, division import pytest import attr from scantree.test_utils import assert_dir_entry_equal from scantree import DirEntryReplacement class MockStat(object): def __init__(self, st_ino=None): self.st_ino = st_ino class TestAssertDirEntryEqual(object): def get_mock_dir_entry(self): de = DirEntryReplacement(path='/path/to/mock', name='mock') de._is_dir = True de._is_file = False de._is_symlink = False de._stat_sym = MockStat(1) de._stat_nosym = MockStat(0) return de def test_equal(self): de = self.get_mock_dir_entry() assert_dir_entry_equal(de, de) @pytest.mark.parametrize( 'kwargs', [ {'path': 'other/path'}, {'name': 'other_name'}, {'_is_dir': False}, {'_is_file': True}, {'_is_symlink': True}, {'_stat_sym': MockStat(11)}, {'_stat_nosym': MockStat(22)}, ] ) def test_not_equal(self, kwargs): de = self.get_mock_dir_entry() de_different = attr.evolve(de) for k, v in kwargs.items(): setattr(de_different, k, v) with pytest.raises(AssertionError): assert_dir_entry_equal(de, de_different)