pax_global_header00006660000000000000000000000064144547633470014533gustar00rootroot0000000000000052 comment=4654928ad3b669be69ffbe733408fd816d14653e thombashi-DataProperty-4654928/000077500000000000000000000000001445476334700163345ustar00rootroot00000000000000thombashi-DataProperty-4654928/.github/000077500000000000000000000000001445476334700176745ustar00rootroot00000000000000thombashi-DataProperty-4654928/.github/workflows/000077500000000000000000000000001445476334700217315ustar00rootroot00000000000000thombashi-DataProperty-4654928/.github/workflows/ci.yml000066400000000000000000000070101445476334700230450ustar00rootroot00000000000000name: CI on: push: paths-ignore: - 'misc/**' - '.gitignore' - 'README.rst' pull_request: paths-ignore: - 'misc/**' - '.gitignore' - 'README.rst' env: PYTEST_DISCORD_WEBHOOK: ${{ secrets.PYTEST_DISCORD_WEBHOOK }} jobs: build-package: runs-on: ubuntu-latest concurrency: group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref_name }}-build cancel-in-progress: true timeout-minutes: 20 container: image: ghcr.io/thombashi/python-ci:3.11 steps: - uses: actions/checkout@v3 - run: make build lint: runs-on: ubuntu-latest concurrency: group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref_name }}-lint cancel-in-progress: true timeout-minutes: 20 container: image: ghcr.io/thombashi/python-ci:3.11 steps: - uses: actions/checkout@v3 - run: make check unit-test: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: python-version: ['3.7', '3.8', '3.9', '3.10', '3.11', 'pypy-3.10'] os: [ubuntu-latest, macos-latest, windows-latest] concurrency: group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref_name }}-ut-${{ matrix.os }}-${{ matrix.python-version }} cancel-in-progress: true timeout-minutes: 20 steps: - uses: actions/checkout@v3 - name: Setup Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} cache: pip cache-dependency-path: | setup.py **/*requirements.txt tox.ini - name: Install pip run: python -m pip install --upgrade --disable-pip-version-check "pip>=21.1" - run: make setup-ci - run: tox -e py coverage: runs-on: ubuntu-latest concurrency: group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref_name }}-coverage cancel-in-progress: true timeout-minutes: 20 steps: - uses: actions/checkout@v3 - name: Setup Python uses: actions/setup-python@v4 with: python-version: '3.10' cache: pip cache-dependency-path: | setup.py **/*requirements.txt tox.ini - run: make setup-ci - run: tox -e cov # tox -e cov -- --discord-verbose=0 # reduce verbosity level of summary that send to discord - name: Upload coverage report env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} run: | python -m pip install --upgrade --disable-pip-version-check coveralls tomli coveralls --service=github smoke-examples: runs-on: ubuntu-latest concurrency: group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref_name }}-ex cancel-in-progress: true timeout-minutes: 20 steps: - uses: actions/checkout@v3 - name: Setup Python uses: actions/setup-python@v4 with: python-version: '3.11' cache: pip cache-dependency-path: | setup.py **/*requirements.txt - name: Install dependencies run: python -m pip install --upgrade --disable-pip-version-check . jupyter - name: Run tests run: | set -eux for f in examples/py/*.py; do python "$f" done for f in examples/ipynb/*.ipynb; do jupyter execute "$f" done thombashi-DataProperty-4654928/.gitignore000066400000000000000000000026011445476334700203230ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # User settings _sandbox/ *_profile Untitled.ipynb .pytype/ thombashi-DataProperty-4654928/LICENSE000066400000000000000000000020741445476334700173440ustar00rootroot00000000000000The MIT License (MIT) Copyright (c) 2016 Tsuyoshi Hombashi Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. thombashi-DataProperty-4654928/MANIFEST.in000066400000000000000000000003371445476334700200750ustar00rootroot00000000000000include LICENSE include README.rst include setup.cfg include tox.ini include */py.typed recursive-include requirements * recursive-include test * recursive-include misc * global-exclude __pycache__/* global-exclude *.pyc thombashi-DataProperty-4654928/Makefile000066400000000000000000000017471445476334700200050ustar00rootroot00000000000000AUTHOR := thombashi PACKAGE := DataProperty BUILD_WORK_DIR := _work PKG_BUILD_DIR := $(BUILD_WORK_DIR)/$(PACKAGE) PYTHON := python3 .PHONY: build-remote build-remote: clean @mkdir -p $(BUILD_WORK_DIR) @cd $(BUILD_WORK_DIR) && \ git clone https://github.com/$(AUTHOR)/$(PACKAGE).git --depth 1 && \ cd $(PACKAGE) && \ $(PYTHON) -m tox -e build ls -lh $(PKG_BUILD_DIR)/dist/* .PHONY: build build: clean @$(PYTHON) -m tox -e build ls -lh dist/* .PHONY: check check: @$(PYTHON) -m tox -e lint .PHONY: clean clean: @rm -rf $(BUILD_WORK_DIR) @$(PYTHON) -m tox -e clean .PHONY: fmt fmt: @$(PYTHON) -m tox -e fmt .PHONY: release release: cd $(PKG_BUILD_DIR) && $(PYTHON) setup.py release --verbose --search-dir dataproperty $(MAKE) clean .PHONY: setup-ci setup-ci: @$(PYTHON) -m pip install -q --disable-pip-version-check --upgrade tox .PHONY: setup setup: setup-ci @$(PYTHON) -m pip install -q --disable-pip-version-check --upgrade -e .[test] releasecmd @$(PYTHON) -m pip check thombashi-DataProperty-4654928/README.rst000066400000000000000000000230521445476334700200250ustar00rootroot00000000000000.. contents:: **DataProperty** :backlinks: top :local: Summary ======= A Python library for extract property from data. .. image:: https://badge.fury.io/py/DataProperty.svg :target: https://badge.fury.io/py/DataProperty :alt: PyPI package version .. image:: https://anaconda.org/conda-forge/DataProperty/badges/version.svg :target: https://anaconda.org/conda-forge/DataProperty :alt: conda-forge package version .. image:: https://img.shields.io/pypi/pyversions/DataProperty.svg :target: https://pypi.org/project/DataProperty :alt: Supported Python versions .. image:: https://img.shields.io/pypi/implementation/DataProperty.svg :target: https://pypi.org/project/DataProperty :alt: Supported Python implementations .. image:: https://github.com/thombashi/DataProperty/actions/workflows/ci.yml/badge.svg :target: https://github.com/thombashi/DataProperty/actions/workflows/ci.yml :alt: CI status of Linux/macOS/Windows .. image:: https://coveralls.io/repos/github/thombashi/DataProperty/badge.svg?branch=master :target: https://coveralls.io/github/thombashi/DataProperty?branch=master :alt: Test coverage .. image:: https://github.com/thombashi/DataProperty/actions/workflows/github-code-scanning/codeql/badge.svg :target: https://github.com/thombashi/DataProperty/actions/workflows/github-code-scanning/codeql :alt: CodeQL Installation ============ Installation: pip ------------------------------ :: pip install DataProperty Installation: conda ------------------------------ :: conda install -c conda-forge dataproperty Installation: apt ------------------------------ :: sudo add-apt-repository ppa:thombashi/ppa sudo apt update sudo apt install python3-dataproperty Usage ===== Extract property of data ------------------------ e.g. Extract a ``float`` value property ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python >>> from dataproperty import DataProperty >>> DataProperty(-1.1) data=-1.1, type=REAL_NUMBER, align=right, ascii_width=4, int_digits=1, decimal_places=1, extra_len=1 e.g. Extract a ``int`` value property ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python >>> from dataproperty import DataProperty >>> DataProperty(123456789) data=123456789, type=INTEGER, align=right, ascii_width=9, int_digits=9, decimal_places=0, extra_len=0 e.g. Extract a ``str`` (ascii) value property ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python >>> from dataproperty import DataProperty >>> DataProperty("sample string") data=sample string, type=STRING, align=left, length=13, ascii_width=13, extra_len=0 e.g. Extract a ``str`` (multi-byte) value property ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python >>> from dataproperty import DataProperty >>> str(DataProperty("吾輩は猫である")) data=吾輩は猫である, type=STRING, align=left, length=7, ascii_width=14, extra_len=0 e.g. Extract a time (``datetime``) value property ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python >>> import datetime >>> from dataproperty import DataProperty >>> DataProperty(datetime.datetime(2017, 1, 1, 0, 0, 0)) data=2017-01-01 00:00:00, type=DATETIME, align=left, ascii_width=19, extra_len=0 e.g. Extract a ``bool`` value property ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. code:: python >>> from dataproperty import DataProperty >>> DataProperty(True) data=True, type=BOOL, align=left, ascii_width=4, extra_len=0 Extract data property for each element from a matrix ---------------------------------------------------- ``DataPropertyExtractor.to_dp_matrix`` method returns a matrix of ``DataProperty`` instances from a data matrix. An example data set and the result are as follows: :Sample Code: .. code:: python import datetime from dataproperty import DataPropertyExtractor dp_extractor = DataPropertyExtractor() dt = datetime.datetime(2017, 1, 1, 0, 0, 0) inf = float("inf") nan = float("nan") dp_matrix = dp_extractor.to_dp_matrix([ [1, 1.1, "aa", 1, 1, True, inf, nan, dt], [2, 2.2, "bbb", 2.2, 2.2, False, "inf", "nan", dt], [3, 3.33, "cccc", -3, "ccc", "true", inf, "NAN", "2017-01-01T01:23:45+0900"], ]) for row, dp_list in enumerate(dp_matrix): for col, dp in enumerate(dp_list): print("row={:d}, col={:d}, {}".format(row, col, str(dp))) :Output: :: row=0, col=0, data=1, type=INTEGER, align=right, ascii_width=1, int_digits=1, decimal_places=0, extra_len=0 row=0, col=1, data=1.1, type=REAL_NUMBER, align=right, ascii_width=3, int_digits=1, decimal_places=1, extra_len=0 row=0, col=2, data=aa, type=STRING, align=left, ascii_width=2, length=2, extra_len=0 row=0, col=3, data=1, type=INTEGER, align=right, ascii_width=1, int_digits=1, decimal_places=0, extra_len=0 row=0, col=4, data=1, type=INTEGER, align=right, ascii_width=1, int_digits=1, decimal_places=0, extra_len=0 row=0, col=5, data=True, type=BOOL, align=left, ascii_width=4, extra_len=0 row=0, col=6, data=Infinity, type=INFINITY, align=left, ascii_width=8, extra_len=0 row=0, col=7, data=NaN, type=NAN, align=left, ascii_width=3, extra_len=0 row=0, col=8, data=2017-01-01 00:00:00, type=DATETIME, align=left, ascii_width=19, extra_len=0 row=1, col=0, data=2, type=INTEGER, align=right, ascii_width=1, int_digits=1, decimal_places=0, extra_len=0 row=1, col=1, data=2.2, type=REAL_NUMBER, align=right, ascii_width=3, int_digits=1, decimal_places=1, extra_len=0 row=1, col=2, data=bbb, type=STRING, align=left, ascii_width=3, length=3, extra_len=0 row=1, col=3, data=2.2, type=REAL_NUMBER, align=right, ascii_width=3, int_digits=1, decimal_places=1, extra_len=0 row=1, col=4, data=2.2, type=REAL_NUMBER, align=right, ascii_width=3, int_digits=1, decimal_places=1, extra_len=0 row=1, col=5, data=False, type=BOOL, align=left, ascii_width=5, extra_len=0 row=1, col=6, data=Infinity, type=INFINITY, align=left, ascii_width=8, extra_len=0 row=1, col=7, data=NaN, type=NAN, align=left, ascii_width=3, extra_len=0 row=1, col=8, data=2017-01-01 00:00:00, type=DATETIME, align=left, ascii_width=19, extra_len=0 row=2, col=0, data=3, type=INTEGER, align=right, ascii_width=1, int_digits=1, decimal_places=0, extra_len=0 row=2, col=1, data=3.33, type=REAL_NUMBER, align=right, ascii_width=4, int_digits=1, decimal_places=2, extra_len=0 row=2, col=2, data=cccc, type=STRING, align=left, ascii_width=4, length=4, extra_len=0 row=2, col=3, data=-3, type=INTEGER, align=right, ascii_width=2, int_digits=1, decimal_places=0, extra_len=1 row=2, col=4, data=ccc, type=STRING, align=left, ascii_width=3, length=3, extra_len=0 row=2, col=5, data=True, type=BOOL, align=left, ascii_width=4, extra_len=0 row=2, col=6, data=Infinity, type=INFINITY, align=left, ascii_width=8, extra_len=0 row=2, col=7, data=NaN, type=NAN, align=left, ascii_width=3, extra_len=0 row=2, col=8, data=2017-01-01T01:23:45+0900, type=STRING, align=left, ascii_width=24, length=24, extra_len=0 Full example source code can be found at *examples/py/to_dp_matrix.py* Extract properties for each column from a matrix ------------------------------------------------------ ``DataPropertyExtractor.to_column_dp_list`` method returns a list of ``DataProperty`` instances from a data matrix. The list represents the properties for each column. An example data set and the result are as follows: Example data set and result are as follows: :Sample Code: .. code:: python import datetime from dataproperty import DataPropertyExtractor dp_extractor = DataPropertyExtractor() dt = datetime.datetime(2017, 1, 1, 0, 0, 0) inf = float("inf") nan = float("nan") data_matrix = [ [1, 1.1, "aa", 1, 1, True, inf, nan, dt], [2, 2.2, "bbb", 2.2, 2.2, False, "inf", "nan", dt], [3, 3.33, "cccc", -3, "ccc", "true", inf, "NAN", "2017-01-01T01:23:45+0900"], ] dp_extractor.headers = ["int", "float", "str", "num", "mix", "bool", "inf", "nan", "time"] col_dp_list = dp_extractor.to_column_dp_list(dp_extractor.to_dp_matrix(dp_matrix)) for col_idx, col_dp in enumerate(col_dp_list): print(str(col_dp)) :Output: :: column=0, type=INTEGER, align=right, ascii_width=3, bit_len=2, int_digits=1, decimal_places=0 column=1, type=REAL_NUMBER, align=right, ascii_width=5, int_digits=1, decimal_places=(min=1, max=2) column=2, type=STRING, align=left, ascii_width=4 column=3, type=REAL_NUMBER, align=right, ascii_width=4, int_digits=1, decimal_places=(min=0, max=1), extra_len=(min=0, max=1) column=4, type=STRING, align=left, ascii_width=3, int_digits=1, decimal_places=(min=0, max=1) column=5, type=BOOL, align=left, ascii_width=5 column=6, type=INFINITY, align=left, ascii_width=8 column=7, type=NAN, align=left, ascii_width=3 column=8, type=STRING, align=left, ascii_width=24 Full example source code can be found at *examples/py/to_column_dp_list.py* Dependencies ============ - Python 3.7+ - `Python package dependencies (automatically installed) `__ Optional dependencies --------------------- - `loguru `__ - Used for logging if the package installed thombashi-DataProperty-4654928/dataproperty/000077500000000000000000000000001445476334700210525ustar00rootroot00000000000000thombashi-DataProperty-4654928/dataproperty/__init__.py000066400000000000000000000024341445476334700231660ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ from .__version__ import __author__, __copyright__, __email__, __license__, __version__ from ._align import Align from ._align_getter import align_getter from ._column import ColumnDataProperty from ._common import MAX_STRICT_LEVEL_MAP, MIN_STRICT_LEVEL_MAP, NOT_QUOTING_FLAGS, DefaultValue from ._container import MinMaxContainer from ._dataproperty import DataProperty from ._extractor import DataPropertyExtractor, DataPropertyMatrix, MatrixFormatting from ._formatter import Format from ._function import calc_ascii_char_width, get_integer_digit, get_number_of_digit from ._line_break import LineBreakHandling from ._preprocessor import Preprocessor from .logger import set_logger __all__ = ( "Align", "align_getter", "ColumnDataProperty", "DataProperty", "DataPropertyExtractor", "DataPropertyMatrix", "Format", "LineBreakHandling", "MatrixFormatting", "MinMaxContainer", "Preprocessor", "calc_ascii_char_width", "get_integer_digit", "get_number_of_digit", "MAX_STRICT_LEVEL_MAP", "MIN_STRICT_LEVEL_MAP", "NOT_QUOTING_FLAGS", "DefaultValue", "set_logger", "__author__", "__copyright__", "__email__", "__license__", "__version__", ) thombashi-DataProperty-4654928/dataproperty/__version__.py000066400000000000000000000003111445476334700237000ustar00rootroot00000000000000__author__ = "Tsuyoshi Hombashi" __copyright__ = f"Copyright 2016, {__author__}" __license__ = "MIT License" __version__ = "1.0.1" __maintainer__ = __author__ __email__ = "tsuyoshi.hombashi@gmail.com" thombashi-DataProperty-4654928/dataproperty/_align.py000066400000000000000000000010271445476334700226550ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import enum @enum.unique class Align(enum.Enum): AUTO = (1 << 0, "auto") LEFT = (1 << 1, "left") RIGHT = (1 << 2, "right") CENTER = (1 << 3, "center") @property def align_code(self) -> int: return self.__align_code @property def align_string(self) -> str: return self.__align_string def __init__(self, code: int, string: str) -> None: self.__align_code = code self.__align_string = string thombashi-DataProperty-4654928/dataproperty/_align_getter.py000066400000000000000000000015011445476334700242240ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ from typing import Dict from typepy import Typecode from ._align import Align class AlignGetter: @property def typecode_align_table(self): raise NotImplementedError() @typecode_align_table.setter def typecode_align_table(self, x: Dict[Typecode, Align]) -> None: self.__typecode_align_table = x def get_align_from_typecode(self, typecode: Typecode) -> Align: return self.__typecode_align_table.get(typecode, self.default_align) def __init__(self) -> None: self.typecode_align_table = { Typecode.STRING: Align.LEFT, Typecode.INTEGER: Align.RIGHT, Typecode.REAL_NUMBER: Align.RIGHT, } self.default_align = Align.LEFT align_getter = AlignGetter() thombashi-DataProperty-4654928/dataproperty/_base.py000066400000000000000000000047221445476334700225020ustar00rootroot00000000000000from typing import Dict, Optional, Type from typepy import ( Bool, DateTime, Dictionary, Infinity, Integer, IpAddress, List, Nan, NoneType, NullString, RealNumber, String, Typecode, ) from typepy.type import AbstractType from ._formatter import Formatter from ._interface import DataPeropertyInterface class DataPeropertyBase(DataPeropertyInterface): __slots__ = ( "_datetime_format_str", "_decimal_places", "_east_asian_ambiguous_width", "_formatter", "_typecode", "__format_str", ) __TYPE_CLASS_TABLE: Dict[Typecode, AbstractType] = { Typecode.BOOL: Bool, Typecode.DATETIME: DateTime, Typecode.DICTIONARY: Dictionary, Typecode.INTEGER: Integer, Typecode.INFINITY: Infinity, Typecode.IP_ADDRESS: IpAddress, Typecode.LIST: List, Typecode.NAN: Nan, Typecode.NONE: NoneType, Typecode.NULL_STRING: NullString, Typecode.REAL_NUMBER: RealNumber, Typecode.STRING: String, } @property def type_class(self) -> Type[AbstractType]: return self.__TYPE_CLASS_TABLE[self.typecode] @property def typecode(self) -> Typecode: """ ``typepy.Typecode`` that corresponds to the type of the ``data``. :return: One of the Enum value that are defined ``typepy.Typecode``. :rtype: typepy.Typecode """ assert self._typecode return self._typecode @property def typename(self) -> str: return self.typecode.name def __init__( self, format_flags: Optional[int], is_formatting_float: bool, datetime_format_str: str, east_asian_ambiguous_width: int, ) -> None: self._decimal_places: Optional[int] = None self._east_asian_ambiguous_width = east_asian_ambiguous_width self._typecode: Optional[Typecode] = None self._datetime_format_str = datetime_format_str self.__format_str = "" self._formatter = Formatter( format_flags=format_flags, datetime_format_str=self._datetime_format_str, is_formatting_float=is_formatting_float, ) @property def format_str(self) -> str: if self.__format_str: return self.__format_str self.__format_str = self._formatter.make_format_str(self.typecode, self.decimal_places) return self.__format_str thombashi-DataProperty-4654928/dataproperty/_column.py000066400000000000000000000266051445476334700230710ustar00rootroot00000000000000from typing import Any, Dict, List, Optional from mbstrdecoder import MultiByteStrDecoder from typepy import Integer, StrictLevel, Typecode, TypeConversionError from ._align import Align from ._align_getter import align_getter from ._base import DataPeropertyBase from ._common import DefaultValue from ._container import ListContainer, MinMaxContainer from ._dataproperty import DataProperty from ._function import calc_ascii_char_width from .typing import FloatType class ColumnDataProperty(DataPeropertyBase): __slots__ = ( "__header_ascii_char_width", "__body_ascii_char_width", "__column_index", "__dp_list", "__float_type", "__format_map", "__is_calculate", "__max_precision", "__minmax_integer_digits", "__minmax_decimal_places", "__minmax_additional_format_len", "__typecode_bitmap", ) @property def align(self) -> Align: return align_getter.get_align_from_typecode(self.typecode) @property def bit_length(self) -> Optional[int]: if self.typecode != Typecode.INTEGER: return None bit_length = 0 for value_dp in self.__dp_list: try: bit_length = max(bit_length, int.bit_length(value_dp.data)) except TypeError: pass return bit_length @property def column_index(self) -> int: return self.__column_index @property def decimal_places(self) -> Optional[int]: return self._decimal_places @property def ascii_char_width(self) -> int: return max(self.__header_ascii_char_width, self.__body_ascii_char_width) @property def minmax_integer_digits(self) -> MinMaxContainer: return self.__minmax_integer_digits @property def minmax_decimal_places(self) -> ListContainer: return self.__minmax_decimal_places @property def minmax_additional_format_len(self) -> MinMaxContainer: return self.__minmax_additional_format_len def __init__( self, column_index: int, float_type: Optional[FloatType], min_width: int = 0, format_flags: Optional[int] = None, is_formatting_float: bool = True, datetime_format_str: str = DefaultValue.DATETIME_FORMAT, east_asian_ambiguous_width: int = 1, max_precision: int = DefaultValue.MAX_PRECISION, ) -> None: super().__init__( format_flags=format_flags, is_formatting_float=is_formatting_float, datetime_format_str=datetime_format_str, east_asian_ambiguous_width=east_asian_ambiguous_width, ) self.__header_ascii_char_width = 0 self.__body_ascii_char_width = min_width self.__column_index = column_index self.__float_type = float_type self.__is_calculate = True self.__dp_list: List[DataProperty] = [] self.__minmax_integer_digits = MinMaxContainer() self.__minmax_decimal_places = ListContainer() self.__minmax_additional_format_len = MinMaxContainer() self.__max_precision = max_precision self.__typecode_bitmap = Typecode.NONE.value self.__calc_typecode_from_bitmap() self.__format_map: Dict[Typecode, str] = self._formatter.make_format_map( decimal_places=self._decimal_places ) def __repr__(self) -> str: element_list = [] if self.column_index is not None: element_list.append(f"column={self.column_index}") element_list.extend( [ f"type={self.typename}", f"align={self.align.align_string}", f"ascii_width={self.ascii_char_width}", ] ) if Integer(self.bit_length).is_type(): element_list.append(f"bit_len={self.bit_length}") if self.minmax_integer_digits.has_value(): if self.minmax_integer_digits.is_same_value(): value = f"int_digits={self.minmax_integer_digits.min_value}" else: value = f"int_digits=({self.minmax_integer_digits})" element_list.append(value) if self.minmax_decimal_places.has_value(): if self.minmax_decimal_places.is_same_value(): value = f"decimal_places={self.minmax_decimal_places.min_value}" else: value = f"decimal_places=({self.minmax_decimal_places})" element_list.append(value) if not self.minmax_additional_format_len.is_zero(): if self.minmax_additional_format_len.is_same_value(): value = f"extra_len={self.minmax_additional_format_len.min_value}" else: value = f"extra_len=({self.minmax_additional_format_len})" element_list.append(value) return ", ".join(element_list) def dp_to_str(self, value_dp: DataProperty) -> str: if value_dp.typecode == Typecode.STRING: return str(value_dp.data) try: value = self.__preprocess_value_before_tostring(value_dp) except TypeConversionError: return self.__format_map.get(value_dp.typecode, "{:s}").format(value_dp.data) to_string_format_str = self.__get_tostring_format(value_dp) try: return to_string_format_str.format(value) except (ValueError, TypeError): pass try: return MultiByteStrDecoder(value).unicode_str except ValueError: pass return str(value) def extend_width(self, ascii_char_width: int) -> None: self.extend_header_width(ascii_char_width) self.extend_body_width(ascii_char_width) def extend_header_width(self, ascii_char_width: int) -> None: self.__header_ascii_char_width += ascii_char_width def extend_body_width(self, ascii_char_width: int) -> None: self.__body_ascii_char_width += ascii_char_width def update_header(self, header_db: DataProperty) -> None: self.__header_ascii_char_width = header_db.ascii_char_width def update_body(self, value_dp: DataProperty) -> None: if value_dp.is_include_ansi_escape: assert value_dp.no_ansi_escape_dp value_dp = value_dp.no_ansi_escape_dp self.__typecode_bitmap |= value_dp.typecode.value self.__calc_typecode_from_bitmap() if value_dp.typecode in (Typecode.REAL_NUMBER, Typecode.INTEGER): self.__minmax_integer_digits.update(value_dp.integer_digits) self.__minmax_decimal_places.update(value_dp.decimal_places) self.__update_decimal_places() self.__minmax_additional_format_len.update(value_dp.additional_format_len) self.__dp_list.append(value_dp) self.__update_ascii_char_width() def merge(self, column_dp: "ColumnDataProperty") -> None: self.__typecode_bitmap |= column_dp.typecode.value self.__calc_typecode_from_bitmap() self.__minmax_integer_digits.merge(column_dp.minmax_integer_digits) self.__minmax_decimal_places.merge(column_dp.minmax_decimal_places) self.__update_decimal_places() self.__minmax_additional_format_len.merge(column_dp.minmax_additional_format_len) self.__body_ascii_char_width = max(self.__body_ascii_char_width, column_dp.ascii_char_width) self.__update_ascii_char_width() def begin_update(self) -> None: self.__is_calculate = False def end_update(self) -> None: self.__is_calculate = True self.__calc_typecode_from_bitmap() self.__update_decimal_places() self.__update_ascii_char_width() def __is_not_single_typecode(self, typecode_bitmap: int) -> bool: return bool( self.__typecode_bitmap & typecode_bitmap and self.__typecode_bitmap & ~typecode_bitmap ) def __is_float_typecode(self) -> bool: FLOAT_TYPECODE_BMP = ( Typecode.REAL_NUMBER.value | Typecode.INFINITY.value | Typecode.NAN.value ) NUMBER_TYPECODE_BMP = FLOAT_TYPECODE_BMP | Typecode.INTEGER.value if self.__is_not_single_typecode(NUMBER_TYPECODE_BMP | Typecode.NULL_STRING.value): return False if ( bin(self.__typecode_bitmap & (FLOAT_TYPECODE_BMP | Typecode.NULL_STRING.value)).count( "1" ) >= 2 ): return True if bin(self.__typecode_bitmap & NUMBER_TYPECODE_BMP).count("1") >= 2: return True return False def __calc_body_ascii_char_width(self) -> int: width_list = [self.__body_ascii_char_width] for value_dp in self.__dp_list: if value_dp.is_include_ansi_escape: assert value_dp.no_ansi_escape_dp value_dp = value_dp.no_ansi_escape_dp width_list.append( calc_ascii_char_width(self.dp_to_str(value_dp), self._east_asian_ambiguous_width) ) return max(width_list) def __calc_decimal_places(self) -> Optional[int]: if self.minmax_decimal_places.max_value is None: return None return min(self.__max_precision, int(self.minmax_decimal_places.max_value)) def __get_tostring_format(self, value_dp: DataProperty) -> str: if self.typecode == Typecode.STRING: return self.__format_map.get(value_dp.typecode, "{:s}") return self.__format_map.get(self.typecode, "{:s}") def __get_typecode_from_bitmap(self) -> Typecode: if self.__is_float_typecode(): return Typecode.REAL_NUMBER if any( [ self.__is_not_single_typecode(Typecode.BOOL.value), self.__is_not_single_typecode(Typecode.DATETIME.value), ] ): return Typecode.STRING typecode_list = [ Typecode.STRING, Typecode.REAL_NUMBER, Typecode.INTEGER, Typecode.DATETIME, Typecode.DICTIONARY, Typecode.IP_ADDRESS, Typecode.LIST, Typecode.BOOL, Typecode.INFINITY, Typecode.NAN, Typecode.NULL_STRING, ] for typecode in typecode_list: if self.__typecode_bitmap & typecode.value: return typecode if self.__typecode_bitmap == Typecode.NONE.value: return Typecode.NONE return Typecode.STRING def __update_ascii_char_width(self) -> None: if not self.__is_calculate: return self.__body_ascii_char_width = self.__calc_body_ascii_char_width() def __update_decimal_places(self) -> None: if not self.__is_calculate: return self._decimal_places = self.__calc_decimal_places() self.__format_map = self._formatter.make_format_map(decimal_places=self._decimal_places) def __calc_typecode_from_bitmap(self) -> None: if not self.__is_calculate: return self._typecode = self.__get_typecode_from_bitmap() def __preprocess_value_before_tostring(self, value_dp: DataProperty) -> Any: if self.typecode == value_dp.typecode or self.typecode in [ Typecode.STRING, Typecode.BOOL, Typecode.DATETIME, ]: return value_dp.data return self.type_class( value_dp.data, strict_level=StrictLevel.MIN, float_type=self.__float_type, strip_ansi_escape=False, ).convert() thombashi-DataProperty-4654928/dataproperty/_common.py000066400000000000000000000035731445476334700230630ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import copy import itertools from datetime import datetime from decimal import Decimal from typepy import StrictLevel, Typecode from .typing import StrictLevelMap, TypeValueMap NOT_QUOTING_FLAGS = { Typecode.BOOL: False, Typecode.DATETIME: False, Typecode.DICTIONARY: False, Typecode.INFINITY: False, Typecode.INTEGER: False, Typecode.IP_ADDRESS: False, Typecode.LIST: False, Typecode.NAN: False, Typecode.NULL_STRING: False, Typecode.NONE: False, Typecode.REAL_NUMBER: False, Typecode.STRING: False, } MAX_STRICT_LEVEL_MAP: StrictLevelMap = dict(itertools.product(list(Typecode), [StrictLevel.MAX])) MIN_STRICT_LEVEL_MAP: StrictLevelMap = dict(itertools.product(list(Typecode), [StrictLevel.MIN])) class DefaultValue: DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S%z" FLOAT_TYPE = Decimal INF_VALUE = FLOAT_TYPE("inf") NAN_VALUE = FLOAT_TYPE("nan") QUOTING_FLAGS = copy.deepcopy(NOT_QUOTING_FLAGS) STRICT_LEVEL_MAP: StrictLevelMap = { "default": StrictLevel.MAX, Typecode.BOOL: StrictLevel.MAX, Typecode.DATETIME: StrictLevel.MAX, Typecode.DICTIONARY: StrictLevel.MAX, Typecode.REAL_NUMBER: 1, Typecode.INFINITY: StrictLevel.MIN, Typecode.INTEGER: 1, Typecode.IP_ADDRESS: StrictLevel.MAX, Typecode.LIST: StrictLevel.MAX, Typecode.NAN: StrictLevel.MIN, Typecode.NONE: StrictLevel.MAX, Typecode.NULL_STRING: StrictLevel.MIN, Typecode.STRING: StrictLevel.MIN, } TYPE_VALUE_MAP: TypeValueMap = { Typecode.NONE: None, Typecode.INFINITY: INF_VALUE, Typecode.NAN: NAN_VALUE, } MAX_WORKERS = 1 MAX_PRECISION = 100 def default_datetime_formatter(value: datetime) -> str: return value.strftime(DefaultValue.DATETIME_FORMAT) thombashi-DataProperty-4654928/dataproperty/_container.py000066400000000000000000000120121445476334700235410ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import abc from decimal import Decimal from typing import Any, List, Optional, Sequence, Union from typepy import RealNumber T = Union[int, float, Decimal] NAN = Decimal("nan") class AbstractContainer(metaclass=abc.ABCMeta): @abc.abstractproperty def min_value(self) -> Optional[Decimal]: # pragma: no cover pass @abc.abstractproperty def max_value(self) -> Optional[Decimal]: # pragma: no cover pass @abc.abstractmethod def mean(self) -> Decimal: # pragma: no cover pass @abc.abstractmethod def update(self, value: Optional[T]) -> None: # pragma: no cover pass @abc.abstractmethod def merge(self, value: "AbstractContainer") -> None: # pragma: no cover pass def __repr__(self) -> str: if not self.has_value(): return "None" return ", ".join([f"min={self.min_value}", f"max={self.max_value}"]) def has_value(self) -> bool: return self.min_value is not None and self.max_value is not None def is_same_value(self) -> bool: return self.has_value() and self.min_value == self.max_value def is_zero(self) -> bool: return self.has_value() and self.min_value == 0 and self.max_value == 0 class ListContainer(AbstractContainer): __slots__ = ("__value_list",) @property def min_value(self) -> Optional[Decimal]: try: return min(self.__value_list) except ValueError: return None @property def max_value(self) -> Optional[Decimal]: try: return max(self.__value_list) except ValueError: return None @property def value_list(self) -> List[Decimal]: return self.__value_list def __init__(self, value_list: Optional[List[Decimal]] = None) -> None: if value_list is None: self.__value_list: List[Decimal] = [] return for value in value_list: self.update(value) def mean(self) -> Decimal: try: return Decimal(sum(self.__value_list) / len(self.__value_list)) except ZeroDivisionError: return NAN def update(self, value: Union[int, float, Decimal, None]) -> None: if value is None: return store_value = RealNumber(value).try_convert() if store_value is None: return self.__value_list.append(store_value) def merge(self, value: "AbstractContainer") -> None: if not isinstance(value, ListContainer): return for v in value.value_list: self.update(v) class MinMaxContainer(AbstractContainer): __slots__ = ("__min_value", "__max_value") def __init__(self, value_list: Optional[Sequence[Decimal]] = None) -> None: self.__min_value: Optional[Decimal] = None self.__max_value: Optional[Decimal] = None if value_list is None: return for value in value_list: self.update(value) @property def min_value(self) -> Optional[Decimal]: return self.__min_value @property def max_value(self) -> Optional[Decimal]: return self.__max_value def __eq__(self, other: Any) -> bool: if not isinstance(other, MinMaxContainer): return False return all([self.min_value == other.min_value, self.max_value == other.max_value]) def __ne__(self, other: Any) -> bool: if not isinstance(other, MinMaxContainer): return True return any([self.min_value != other.min_value, self.max_value != other.max_value]) def __contains__(self, x: T) -> bool: if self.min_value is None: return False if self.max_value is None: return False return self.min_value <= x <= self.max_value def diff(self) -> Decimal: if self.min_value is None: return NAN if self.max_value is None: return NAN try: return self.max_value - self.min_value except TypeError: return NAN def mean(self) -> Decimal: if self.min_value is None: return NAN if self.max_value is None: return NAN try: return (self.max_value + self.min_value) * Decimal("0.5") except TypeError: return NAN def update(self, value: Optional[T]) -> None: if value is None: return decimal_value = Decimal(value) if self.__min_value is None: self.__min_value = decimal_value else: self.__min_value = min(self.__min_value, decimal_value) if self.__max_value is None: self.__max_value = decimal_value else: self.__max_value = max(self.__max_value, decimal_value) def merge(self, value: "AbstractContainer") -> None: if not isinstance(value, MinMaxContainer): return self.update(value.min_value) self.update(value.max_value) thombashi-DataProperty-4654928/dataproperty/_converter.py000066400000000000000000000063051445476334700235760ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import re from typing import Any, Dict, Optional from typepy import Typecode, TypeConversionError from ._common import MAX_STRICT_LEVEL_MAP, DefaultValue from ._dataproperty import DataProperty from ._preprocessor import Preprocessor from .typing import DateTimeFormatter, FloatType, StrictLevelMap, TypeValueMap class DataPropertyConverter: __RE_QUOTE_LINE = re.compile(r"^\s*[\"'].*[\"']\s*$") # noqa: w605 __RE_QUOTE_CHAR = re.compile("[\"']") def __init__( self, preprocessor: Preprocessor, datetime_format_str: str, datetime_formatter: Optional[DateTimeFormatter] = None, type_value_map: Optional[TypeValueMap] = None, quoting_flags: Optional[Dict[Typecode, bool]] = None, float_type: Optional[FloatType] = None, strict_level_map: Optional[StrictLevelMap] = None, ) -> None: self.__preprocessor = preprocessor self.__type_value_map: TypeValueMap = ( type_value_map if type_value_map else DefaultValue.TYPE_VALUE_MAP ) self.__quoting_flags: Dict[Typecode, bool] = ( quoting_flags if quoting_flags else DefaultValue.QUOTING_FLAGS ) self.__datetime_formatter = datetime_formatter self.__datetime_format_str = datetime_format_str self.__float_type = float_type self.__strict_level_map = strict_level_map def convert(self, dp_value: DataProperty) -> DataProperty: try: return self.__create_dataproperty(self.__convert_value(dp_value)) except TypeConversionError: pass if not self.__quoting_flags.get(dp_value.typecode): if self.__preprocessor.is_escape_html_tag: return self.__create_dataproperty(dp_value.to_str()) return dp_value return self.__create_dataproperty(self.__apply_quote(dp_value.typecode, dp_value.to_str())) def __create_dataproperty(self, value: Any) -> DataProperty: return DataProperty( value, preprocessor=self.__preprocessor, float_type=self.__float_type, datetime_format_str=self.__datetime_format_str, strict_level_map=MAX_STRICT_LEVEL_MAP, ) def __apply_quote(self, typecode: Typecode, data: Any) -> Any: if not self.__quoting_flags.get(typecode): return data try: if self.__RE_QUOTE_LINE.search(data): return data except TypeError: return data return '"{}"'.format(self.__RE_QUOTE_CHAR.sub('\\"', data.replace("\\", "\\\\"))) def __convert_value(self, dp_value: DataProperty) -> Any: if dp_value.typecode in self.__type_value_map: return self.__apply_quote(dp_value.typecode, self.__type_value_map[dp_value.typecode]) if dp_value.typecode == Typecode.DATETIME and self.__datetime_formatter: try: return self.__apply_quote( dp_value.typecode, self.__datetime_formatter(dp_value.data) ) except TypeError: raise TypeConversionError raise TypeConversionError("no need to convert") thombashi-DataProperty-4654928/dataproperty/_dataproperty.py000066400000000000000000000260711445476334700243070ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import typing from decimal import Decimal from typing import Any, Optional, cast import typepy from mbstrdecoder import MultiByteStrDecoder from typepy import ( Bool, DateTime, Dictionary, Infinity, Integer, IpAddress, Nan, NoneType, NullString, RealNumber, StrictLevel, String, Typecode, TypeConversionError, ) from typepy.type import AbstractType from ._align import Align from ._align_getter import align_getter from ._base import DataPeropertyBase from ._common import DefaultValue from ._function import calc_ascii_char_width, get_number_of_digit from ._preprocessor import Preprocessor from .typing import FloatType, StrictLevelMap, TypeHint class DataProperty(DataPeropertyBase): __slots__ = ( "__data", "__no_ansi_escape_data", "__align", "__integer_digits", "__additional_format_len", "__length", "__ascii_char_width", ) __type_class_list: typing.List[AbstractType] = [ NoneType, Integer, Infinity, Nan, IpAddress, RealNumber, Bool, typepy.List, Dictionary, DateTime, NullString, String, ] def __init__( self, data: Any, preprocessor: Optional[Preprocessor] = None, type_hint: TypeHint = None, float_type: Optional[FloatType] = None, format_flags: Optional[int] = None, datetime_format_str: str = DefaultValue.DATETIME_FORMAT, strict_level_map: Optional[StrictLevelMap] = None, east_asian_ambiguous_width: int = 1, ) -> None: super().__init__( format_flags=format_flags, is_formatting_float=True, datetime_format_str=datetime_format_str, east_asian_ambiguous_width=east_asian_ambiguous_width, ) self.__additional_format_len: Optional[int] = None self.__align: Optional[Align] = None self.__ascii_char_width: Optional[int] = None self.__integer_digits: Optional[int] = None self.__length: Optional[int] = None if preprocessor is None: preprocessor = Preprocessor() data, no_ansi_escape_data = preprocessor.preprocess(data) self.__set_data(data, type_hint, float_type, strict_level_map) if no_ansi_escape_data is None or len(data) == len(no_ansi_escape_data): self.__no_ansi_escape_data: Optional[DataProperty] = None else: self.__no_ansi_escape_data = DataProperty(no_ansi_escape_data, float_type=float_type) def __eq__(self, other: Any) -> bool: if not isinstance(other, DataProperty): return False if self.typecode != other.typecode: return False if self.typecode == Typecode.NAN: return True return self.data == other.data def __ne__(self, other: Any) -> bool: if not isinstance(other, DataProperty): return True if self.typecode != other.typecode: return True if self.typecode == Typecode.NAN: return False return self.data != other.data def __repr__(self) -> str: element_list = [] if self.typecode == Typecode.DATETIME: element_list.append(f"data={str(self.data):s}") else: try: element_list.append("data=" + self.to_str()) except UnicodeEncodeError: element_list.append(f"data={MultiByteStrDecoder(self.data).unicode_str}") element_list.extend( [ f"type={self.typename:s}", f"align={self.align.align_string}", f"ascii_width={self.ascii_char_width:d}", ] ) if Integer(self.length).is_type(): element_list.append(f"length={self.length}") if Integer(self.integer_digits).is_type(): element_list.append(f"int_digits={self.integer_digits}") if Integer(self.decimal_places).is_type(): element_list.append(f"decimal_places={self.decimal_places}") if Integer(self.additional_format_len).is_type(): element_list.append(f"extra_len={self.additional_format_len}") return ", ".join(element_list) @property def align(self) -> Align: if not self.__align: if self.is_include_ansi_escape: assert self.no_ansi_escape_dp self.__align = self.no_ansi_escape_dp.align else: self.__align = align_getter.get_align_from_typecode(self.typecode) assert self.__align return self.__align @property def decimal_places(self) -> Optional[int]: """ :return: Decimal places if the ``data`` type either ``float`` or ``decimal.Decimal``. Returns ``0`` if the ``data`` type is ``int``. Otherwise, returns ``float("nan")``. :rtype: int """ if self._decimal_places is None: self.__set_digit() return self._decimal_places @property def data(self) -> Any: """ :return: Original data value. :rtype: Original data type. """ return self.__data @property def is_include_ansi_escape(self) -> bool: if self.no_ansi_escape_dp is None: return False return self.length != self.no_ansi_escape_dp.length @property def no_ansi_escape_dp(self) -> Optional["DataProperty"]: return self.__no_ansi_escape_data @property def length(self) -> Optional[int]: """ :return: Length of the ``data``. :rtype: int """ if self.__length is None: self.__length = self.__get_length() return self.__length @property def ascii_char_width(self) -> int: if self.__ascii_char_width is None: self.__ascii_char_width = self.__calc_ascii_char_width() return self.__ascii_char_width @property def integer_digits(self) -> Optional[int]: """ :return: Integer digits if the ``data`` type either ``int``/``float``/``decimal.Decimal``. Otherwise, returns ``None``. :rtype: int """ if self.__integer_digits is None: self.__set_digit() return self.__integer_digits @property def additional_format_len(self) -> int: if self.__additional_format_len is None: self.__additional_format_len = self.__get_additional_format_len() return self.__additional_format_len def get_padding_len(self, ascii_char_width: int) -> int: if self.typecode in (Typecode.LIST, Typecode.DICTIONARY): unicode_str_len = DataProperty(MultiByteStrDecoder(str(self.data)).unicode_str).length assert unicode_str_len return max( ascii_char_width - (self.ascii_char_width - unicode_str_len), 0, ) try: return max(ascii_char_width - (self.ascii_char_width - cast(int, self.length)), 0) except TypeError: return ascii_char_width def to_str(self) -> str: return self.format_str.format(self.data) def __get_additional_format_len(self) -> int: if not RealNumber(self.data, strip_ansi_escape=False).is_type(): return 0 format_len = 0 if Decimal(self.data) < 0: # for minus character format_len += 1 return format_len def __get_base_float_len(self) -> int: assert self.integer_digits is not None assert self.decimal_places is not None if any([self.integer_digits < 0, self.decimal_places < 0]): raise ValueError("integer digits and decimal places must be greater or equals to zero") float_len = self.integer_digits + self.decimal_places if self.decimal_places > 0: # for dot float_len += 1 return float_len def __get_length(self) -> Optional[int]: if self.typecode in (Typecode.DICTIONARY, Typecode.LIST, Typecode.STRING): return len(self.data) return None def __calc_ascii_char_width(self) -> int: if self.typecode == Typecode.INTEGER: return cast(int, self.integer_digits) + self.additional_format_len if self.typecode == Typecode.REAL_NUMBER: return self.__get_base_float_len() + self.additional_format_len if self.typecode == Typecode.DATETIME: try: return len(self.to_str()) except ValueError: # reach to this line if the year <1900. # the datetime strftime() methods require year >= 1900. return len(str(self.data)) if self.is_include_ansi_escape: assert self.no_ansi_escape_dp return self.no_ansi_escape_dp.ascii_char_width try: unicode_str = MultiByteStrDecoder(self.data).unicode_str except ValueError: unicode_str = self.to_str() return calc_ascii_char_width(unicode_str, self._east_asian_ambiguous_width) def __set_data( self, data: Any, type_hint: TypeHint, float_type: Optional[FloatType], strict_level_map: Optional[StrictLevelMap], ) -> None: if float_type is None: float_type = DefaultValue.FLOAT_TYPE if strict_level_map is None: strict_level_map = DefaultValue.STRICT_LEVEL_MAP if type_hint: type_obj = type_hint( data, strict_level=StrictLevel.MIN, float_type=float_type, strip_ansi_escape=False ) self._typecode = type_obj.typecode self.__data = type_obj.try_convert() if type_hint( self.__data, strict_level=StrictLevel.MAX, float_type=float_type, strip_ansi_escape=False, ).is_type(): return for type_class in self.__type_class_list: strict_level = strict_level_map.get( type_class(None).typecode, strict_level_map.get("default", StrictLevel.MAX) ) if self.__try_convert_type(data, type_class, strict_level, float_type): return raise TypeConversionError( f"failed to convert: data={data}, strict_level={strict_level_map}" ) def __set_digit(self) -> None: integer_digits, decimal_places = get_number_of_digit(self.__data) self.__integer_digits = integer_digits self._decimal_places = decimal_places def __try_convert_type( self, data: Any, type_class: AbstractType, strict_level: int, float_type: Optional[FloatType], ) -> bool: type_obj = type_class(data, strict_level, float_type=float_type, strip_ansi_escape=False) try: self.__data = type_obj.convert() except TypeConversionError: return False self._typecode = type_obj.typecode return True thombashi-DataProperty-4654928/dataproperty/_extractor.py000066400000000000000000000624531445476334700236100ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import copy import enum import sys import typing from collections import Counter from decimal import Decimal from typing import Any, Dict, List, Mapping, Optional, Sequence, Tuple, Type, Union, cast import typepy from typepy import ( Bool, DateTime, Dictionary, Infinity, Integer, IpAddress, Nan, NoneType, NullString, RealNumber, StrictLevel, String, Typecode, is_empty_sequence, ) from typepy.type import AbstractType from ._column import ColumnDataProperty from ._common import MIN_STRICT_LEVEL_MAP, DefaultValue from ._converter import DataPropertyConverter from ._dataproperty import DataProperty from ._formatter import Format from ._preprocessor import Preprocessor from .logger import logger from .typing import ( DateTimeFormatter, StrictLevelMap, TransFunc, TypeHint, TypeValueMap, normalize_type_hint, ) DataPropertyMatrix = List[List[DataProperty]] @enum.unique class MatrixFormatting(enum.Enum): # raise exception if the matrix is not properly formatted EXCEPTION = 1 << 1 # trim to the minimum size column TRIM = 1 << 2 # Append None values to columns so that it is the same as the maximum # column size. FILL_NONE = 1 << 3 HEADER_ALIGNED = 1 << 4 class DataPropertyExtractor: """ .. py:attribute:: quoting_flags Configurations to add double quote to for each items in a matrix, where |Typecode| of table-value is |True| in the ``quote_flag_table`` mapping table. ``quote_flag_table`` should be a dictionary. And is ``{ Typecode : bool }``. Defaults to: .. code-block:: json :caption: The default values { Typecode.BOOL: False, Typecode.DATETIME: False, Typecode.DICTIONARY: False, Typecode.INFINITY: False, Typecode.INTEGER: False, Typecode.IP_ADDRESS: False, Typecode.LIST: False, Typecode.NAN: False, Typecode.NULL_STRING: False, Typecode.NONE: False, Typecode.REAL_NUMBER: False, Typecode.STRING: False, } """ def __init__(self, max_precision: Optional[int] = None) -> None: self.max_workers = DefaultValue.MAX_WORKERS if max_precision is None: self.__max_precision = DefaultValue.MAX_PRECISION else: self.__max_precision = max_precision self.__headers: Sequence[str] = [] self.__default_type_hint: TypeHint = None self.__col_type_hints: List[TypeHint] = [] self.__strip_str_header: Optional[str] = None self.__is_formatting_float = True self.__min_col_ascii_char_width = 0 self.__default_format_flags = Format.NONE self.__format_flags_list: Sequence[int] = [] self.__float_type: Union[Type[float], Type[Decimal], None] = None self.__datetime_format_str = DefaultValue.DATETIME_FORMAT self.__strict_level_map = copy.deepcopy( cast(Dict[Union[Typecode, str], int], DefaultValue.STRICT_LEVEL_MAP) ) self.__east_asian_ambiguous_width = 1 self.__preprocessor = Preprocessor() self.__type_value_map: Mapping[Typecode, Union[float, Decimal, None]] = copy.deepcopy( DefaultValue.TYPE_VALUE_MAP ) self.__trans_func_list: List[TransFunc] = [] self.__quoting_flags = copy.deepcopy(DefaultValue.QUOTING_FLAGS) self.__datetime_formatter: Optional[DateTimeFormatter] = None self.__matrix_formatting = MatrixFormatting.TRIM self.__dp_converter: DataPropertyConverter self.__clear_cache() def __clear_cache(self) -> None: self.__update_dp_converter() self.__dp_cache_zero = self.__to_dp_raw(0) self.__dp_cache_one = self.__to_dp_raw(1) self.__dp_cache_true = self.__to_dp_raw(True) self.__dp_cache_false = self.__to_dp_raw(False) self.__dp_cache_map = {None: self.__to_dp_raw(None), "": self.__to_dp_raw("")} @property def headers(self) -> Sequence[str]: return self.__headers @headers.setter def headers(self, value: Sequence[str]) -> None: if self.__headers == value: return self.__headers = value self.__clear_cache() @property def default_type_hint(self) -> TypeHint: return self.__default_type_hint @default_type_hint.setter def default_type_hint(self, value: TypeHint) -> None: if self.__default_type_hint == value: return self.__default_type_hint = value self.__clear_cache() @property def column_type_hints(self) -> List[TypeHint]: return self.__col_type_hints @column_type_hints.setter def column_type_hints(self, value: Sequence[Union[str, TypeHint]]) -> None: normalized_type_hints: List[TypeHint] = [] for type_hint in value: type_hint = normalize_type_hint(type_hint) if type_hint not in ( Bool, DateTime, Dictionary, Infinity, Integer, IpAddress, typepy.List, Nan, NoneType, RealNumber, String, NullString, None, ): raise ValueError(f"invalid type hint: {type(type_hint)}") normalized_type_hints.append(type_hint) if self.__col_type_hints == normalized_type_hints: return self.__col_type_hints = normalized_type_hints self.__clear_cache() @property def is_formatting_float(self) -> bool: return self.__is_formatting_float @is_formatting_float.setter def is_formatting_float(self, value: bool) -> None: self.__is_formatting_float = value @property def max_precision(self) -> int: return self.__max_precision @max_precision.setter def max_precision(self, value: int) -> None: if self.__max_precision == value: return self.__max_precision = value self.__clear_cache() @property def preprocessor(self) -> Preprocessor: return self.__preprocessor @preprocessor.setter def preprocessor(self, value: Preprocessor) -> None: if self.preprocessor == value: return self.__preprocessor = value self.__update_dp_converter() @property def strip_str_header(self) -> Optional[str]: return self.__strip_str_header @strip_str_header.setter def strip_str_header(self, value: str) -> None: if self.__strip_str_header == value: return self.__strip_str_header = value self.__clear_cache() @property def min_column_width(self) -> int: return self.__min_col_ascii_char_width @min_column_width.setter def min_column_width(self, value: int) -> None: if self.__min_col_ascii_char_width == value: return self.__min_col_ascii_char_width = value self.__clear_cache() @property def default_format_flags(self) -> int: return self.__default_format_flags @default_format_flags.setter def default_format_flags(self, value: int) -> None: if self.__default_format_flags == value: return self.__default_format_flags = value self.__clear_cache() @property def format_flags_list(self) -> Sequence[int]: return self.__format_flags_list @format_flags_list.setter def format_flags_list(self, value: Sequence[int]) -> None: if self.__format_flags_list == value: return self.__format_flags_list = value self.__clear_cache() @property def float_type(self) -> Union[Type[float], Type[Decimal], None]: return self.__float_type @float_type.setter def float_type(self, value: Union[Type[float], Type[Decimal]]) -> None: if self.__float_type == value: return self.__float_type = value self.__clear_cache() @property def datetime_format_str(self) -> str: return self.__datetime_format_str @datetime_format_str.setter def datetime_format_str(self, value: str) -> None: if self.__datetime_format_str == value: return self.__datetime_format_str = value self.__clear_cache() @property def strict_level_map(self) -> StrictLevelMap: return self.__strict_level_map @strict_level_map.setter def strict_level_map(self, value: StrictLevelMap) -> None: if self.__strict_level_map == value: return self.__strict_level_map = cast(Dict[Union[Typecode, str], int], value) self.__clear_cache() @property def east_asian_ambiguous_width(self) -> int: return self.__east_asian_ambiguous_width @east_asian_ambiguous_width.setter def east_asian_ambiguous_width(self, value: int) -> None: if self.__east_asian_ambiguous_width == value: return self.__east_asian_ambiguous_width = value self.__clear_cache() @property def type_value_map(self) -> TypeValueMap: return self.__type_value_map @type_value_map.setter def type_value_map(self, value: TypeValueMap) -> None: if self.__type_value_map == value: return self.__type_value_map = value self.__clear_cache() def register_trans_func(self, trans_func: TransFunc) -> None: self.__trans_func_list.insert(0, trans_func) self.__clear_cache() @property def quoting_flags(self) -> Dict[Typecode, bool]: return self.__quoting_flags @quoting_flags.setter def quoting_flags(self, value: Dict[Typecode, bool]) -> None: if self.__quoting_flags == value: return self.__quoting_flags = value self.__clear_cache() @property def datetime_formatter(self) -> Optional[DateTimeFormatter]: return self.__datetime_formatter @datetime_formatter.setter def datetime_formatter(self, value: Optional[DateTimeFormatter]) -> None: if self.__datetime_formatter == value: return self.__datetime_formatter = value self.__clear_cache() @property def matrix_formatting(self) -> MatrixFormatting: return self.__matrix_formatting @matrix_formatting.setter def matrix_formatting(self, value: MatrixFormatting) -> None: if self.__matrix_formatting == value: return self.__matrix_formatting = value self.__clear_cache() @property def max_workers(self) -> int: assert self.__max_workers return self.__max_workers @max_workers.setter def max_workers(self, value: Optional[int]) -> None: try: from _multiprocessing import SemLock, sem_unlink # noqa except ImportError: logger.debug("This platform lacks a functioning sem_open implementation") value = 1 if "pytest" in sys.modules and value != 1: logger.debug("set max_workers to 1 to avoid deadlock when executed from pytest") value = 1 self.__max_workers = value if not self.__max_workers: self.__max_workers = DefaultValue.MAX_WORKERS def to_dp(self, value: Any) -> DataProperty: self.__update_dp_converter() return self.__to_dp(value) def to_dp_list(self, values: Sequence[Any]) -> List[DataProperty]: if is_empty_sequence(values): return [] self.__update_dp_converter() return self._to_dp_list(values) def to_column_dp_list( self, value_dp_matrix: Any, previous_column_dp_list: Optional[Sequence[ColumnDataProperty]] = None, ) -> List[ColumnDataProperty]: col_dp_list = self.__get_col_dp_list_base() logger.debug("converting to column dataproperty:") logs = [" params:"] if self.headers: logs.append(f" headers={len(self.headers)}") logs.extend( [ " prev_col_count={}".format( len(previous_column_dp_list) if previous_column_dp_list else None ), f" matrix_formatting={self.matrix_formatting}", ] ) if self.column_type_hints: logs.append( " column_type_hints=({})".format( ", ".join( [ type_hint.__name__ if type_hint else "none" for type_hint in self.column_type_hints ] ) ) ) else: logs.append(" column_type_hints=()") for log in logs: logger.debug(log) logger.debug(" results:") for col_idx, value_dp_list in enumerate(zip(*value_dp_matrix)): try: col_dp_list[col_idx] except IndexError: col_dp_list.append( ColumnDataProperty( column_index=col_idx, float_type=self.float_type, min_width=self.min_column_width, format_flags=self.__get_format_flags(col_idx), is_formatting_float=self.is_formatting_float, datetime_format_str=self.datetime_format_str, east_asian_ambiguous_width=self.east_asian_ambiguous_width, max_precision=self.__max_precision, ) ) col_dp = col_dp_list[col_idx] col_dp.begin_update() try: col_dp.merge(previous_column_dp_list[col_idx]) # type: ignore except (TypeError, IndexError): pass for value_dp in value_dp_list: col_dp.update_body(value_dp) col_dp.end_update() logger.debug(f" {str(col_dp):s}") return col_dp_list def to_dp_matrix(self, value_matrix: Sequence[Sequence[Any]]) -> DataPropertyMatrix: self.__update_dp_converter() logger.debug(f"max_workers={self.max_workers}, preprocessor={self.__preprocessor}") value_matrix = self.__strip_data_matrix(value_matrix) if self.__is_dp_matrix(value_matrix): logger.debug("already a dataproperty matrix") return value_matrix # type: ignore if self.max_workers <= 1: return self.__to_dp_matrix_st(value_matrix) return self.__to_dp_matrix_mt(value_matrix) def to_header_dp_list(self) -> List[DataProperty]: self.__update_dp_converter() preprocessor = copy.deepcopy(self.__preprocessor) preprocessor.strip_str = self.strip_str_header return self._to_dp_list( self.headers, type_hint=String, preprocessor=preprocessor, strict_level_map=MIN_STRICT_LEVEL_MAP, ) def update_preprocessor(self, **kwargs: Any) -> bool: is_updated = self.__preprocessor.update(**kwargs) self.__update_dp_converter() return is_updated def update_strict_level_map(self, value: StrictLevelMap) -> bool: org = copy.deepcopy(self.__strict_level_map) self.__strict_level_map.update(value) if org == self.__strict_level_map: return False self.__clear_cache() return True """ def update_dict(self, lhs: Mapping, rhs: Mapping) -> bool: is_updated = False for key, value in rhs.items(): if key not in lhs: lhs[] continue if getattr(lhs, key) == value: continue setattr(lhs, key, value) is_updated = True return is_updated """ @staticmethod def __is_dp_matrix(value: Any) -> bool: try: return isinstance(value[0][0], DataProperty) except (TypeError, IndexError): return False def __get_col_type_hint(self, col_idx: int) -> TypeHint: try: return self.column_type_hints[col_idx] except (TypeError, IndexError): return self.default_type_hint def __get_format_flags(self, col_idx: int) -> int: try: return self.format_flags_list[col_idx] except (TypeError, IndexError): return self.__default_format_flags def __to_dp( self, data: Any, type_hint: TypeHint = None, preprocessor: Optional[Preprocessor] = None, strict_level_map: Optional[StrictLevelMap] = None, ) -> DataProperty: for trans_func in self.__trans_func_list: data = trans_func(data) if type_hint: return self.__to_dp_raw( data, type_hint=type_hint, preprocessor=preprocessor, strict_level_map=strict_level_map, ) try: if data in self.__dp_cache_map: return self.__dp_cache_map[data] except TypeError: # unhashable type pass if data == 0: if data is False: return self.__dp_cache_false return self.__dp_cache_zero if data == 1: if data is True: return self.__dp_cache_true return self.__dp_cache_one return self.__to_dp_raw( data, type_hint=type_hint, preprocessor=preprocessor, strict_level_map=strict_level_map ) def __to_dp_raw( self, data: Any, type_hint: TypeHint = None, preprocessor: Optional[Preprocessor] = None, strict_level_map: Optional[StrictLevelMap] = None, ) -> DataProperty: if preprocessor: preprocessor = Preprocessor( dequote=preprocessor.dequote, line_break_handling=preprocessor.line_break_handling, line_break_repl=preprocessor.line_break_repl, strip_str=preprocessor.strip_str, is_escape_formula_injection=preprocessor.is_escape_formula_injection, ) else: preprocessor = Preprocessor( dequote=self.preprocessor.dequote, line_break_handling=self.preprocessor.line_break_handling, line_break_repl=self.preprocessor.line_break_repl, strip_str=self.preprocessor.strip_str, is_escape_formula_injection=self.__preprocessor.is_escape_formula_injection, ) value_dp = DataProperty( data, preprocessor=preprocessor, type_hint=(type_hint if type_hint is not None else self.default_type_hint), float_type=self.float_type, datetime_format_str=self.datetime_format_str, strict_level_map=(strict_level_map if type_hint is not None else self.strict_level_map), east_asian_ambiguous_width=self.east_asian_ambiguous_width, ) return self.__dp_converter.convert(value_dp) def __to_dp_matrix_st(self, value_matrix: Sequence[Sequence[Any]]) -> DataPropertyMatrix: return list( zip( # type: ignore *( _to_dp_list_helper( self, col_idx, values, self.__get_col_type_hint(col_idx), self.__preprocessor, )[1] for col_idx, values in enumerate(zip(*value_matrix)) ) ) ) def __to_dp_matrix_mt(self, value_matrix: Sequence[Sequence[Any]]) -> DataPropertyMatrix: from concurrent import futures col_data_map = {} with futures.ProcessPoolExecutor(self.max_workers) as executor: future_list = [ executor.submit( _to_dp_list_helper, self, col_idx, values, self.__get_col_type_hint(col_idx), self.__preprocessor, ) for col_idx, values in enumerate(zip(*value_matrix)) ] for future in futures.as_completed(future_list): col_idx, value_dp_list = future.result() col_data_map[col_idx] = value_dp_list return list( zip(*(col_data_map[col_idx] for col_idx in sorted(col_data_map))) # type: ignore ) def _to_dp_list( self, data_list: Sequence[Any], type_hint: TypeHint = None, preprocessor: Optional[Preprocessor] = None, strict_level_map: Optional[StrictLevelMap] = None, ) -> List[DataProperty]: if is_empty_sequence(data_list): return [] type_counter: typing.Counter[Type[AbstractType]] = Counter() dp_list = [] for data in data_list: expect_type_hint: TypeHint = type_hint if type_hint is None: try: expect_type_hint, _count = type_counter.most_common(1)[0] if not expect_type_hint( data, float_type=self.float_type, strict_level=StrictLevel.MAX ).is_type(): expect_type_hint = None except IndexError: pass dataprop = self.__to_dp( data=data, type_hint=expect_type_hint, preprocessor=preprocessor if preprocessor else self.__preprocessor, strict_level_map=strict_level_map, ) type_counter[dataprop.type_class] += 1 dp_list.append(dataprop) return dp_list def __strip_data_matrix(self, data_matrix: Sequence[Sequence[Any]]) -> Sequence[Sequence[Any]]: header_col_size = len(self.headers) if self.headers else 0 try: col_size_list = [len(data_list) for data_list in data_matrix] except TypeError: return [] if self.headers: min_col_size = min([header_col_size] + col_size_list) max_col_size = max([header_col_size] + col_size_list) elif col_size_list: min_col_size = min(col_size_list) max_col_size = max(col_size_list) else: min_col_size = 0 max_col_size = 0 if self.matrix_formatting == MatrixFormatting.EXCEPTION: if min_col_size != max_col_size: raise ValueError( "nonuniform column size found: min={}, max={}".format( min_col_size, max_col_size ) ) return data_matrix if self.matrix_formatting == MatrixFormatting.HEADER_ALIGNED: if header_col_size > 0: format_col_size = header_col_size else: format_col_size = max_col_size elif self.matrix_formatting == MatrixFormatting.TRIM: format_col_size = min_col_size elif self.matrix_formatting == MatrixFormatting.FILL_NONE: format_col_size = max_col_size else: raise ValueError(f"unknown matrix formatting: {self.matrix_formatting}") return [ list(data_matrix[row_idx][:format_col_size]) + [None] * (format_col_size - col_size) for row_idx, col_size in enumerate(col_size_list) ] def __get_col_dp_list_base(self) -> List[ColumnDataProperty]: header_dp_list = self.to_header_dp_list() col_dp_list = [] for col_idx, header_dp in enumerate(header_dp_list): col_dp = ColumnDataProperty( column_index=col_idx, float_type=self.float_type, min_width=self.min_column_width, format_flags=self.__get_format_flags(col_idx), is_formatting_float=self.is_formatting_float, datetime_format_str=self.datetime_format_str, east_asian_ambiguous_width=self.east_asian_ambiguous_width, max_precision=self.__max_precision, ) col_dp.update_header(header_dp) col_dp_list.append(col_dp) return col_dp_list def __update_dp_converter(self) -> None: preprocessor = Preprocessor( line_break_handling=self.__preprocessor.line_break_handling, line_break_repl=self.preprocessor.line_break_repl, is_escape_html_tag=self.__preprocessor.is_escape_html_tag, is_escape_formula_injection=self.__preprocessor.is_escape_formula_injection, ) self.__dp_converter = DataPropertyConverter( preprocessor=preprocessor, type_value_map=self.type_value_map, quoting_flags=self.quoting_flags, datetime_formatter=self.datetime_formatter, datetime_format_str=self.datetime_format_str, float_type=self.float_type, strict_level_map=self.strict_level_map, ) def _to_dp_list_helper( extractor: DataPropertyExtractor, col_idx: int, data_list: Sequence[Any], type_hint: TypeHint, preprocessor: Preprocessor, ) -> Tuple[int, List[DataProperty]]: return ( col_idx, extractor._to_dp_list(data_list, type_hint=type_hint, preprocessor=preprocessor), ) thombashi-DataProperty-4654928/dataproperty/_formatter.py000066400000000000000000000056701445476334700235760ustar00rootroot00000000000000import copy from decimal import Decimal from typing import Dict, Optional, Union from typepy import Nan, Typecode DecimalPlaces = Union[float, Decimal] class Format: NONE = 0 THOUSAND_SEPARATOR = 1 class Formatter: __slots__ = ("__is_formatting_float", "__format_flags", "__datetime_format_str") _BLANK_CURLY_BRACES_FORMAT_MAP: Dict[Typecode, str] = { Typecode.NONE: "{}", Typecode.IP_ADDRESS: "{}", Typecode.BOOL: "{}", Typecode.DICTIONARY: "{}", Typecode.LIST: "{}", } def __init__( self, datetime_format_str: str, is_formatting_float: Optional[bool] = True, format_flags: Optional[int] = None, ) -> None: if format_flags is not None: self.__format_flags = format_flags else: self.__format_flags = Format.NONE self.__datetime_format_str = datetime_format_str self.__is_formatting_float = is_formatting_float def make_format_map( self, decimal_places: Optional[DecimalPlaces] = None ) -> Dict[Typecode, str]: format_map = copy.copy(self._BLANK_CURLY_BRACES_FORMAT_MAP) format_map.update( { Typecode.INTEGER: self.make_format_str(Typecode.INTEGER), Typecode.REAL_NUMBER: self.make_format_str(Typecode.REAL_NUMBER, decimal_places), Typecode.INFINITY: self.make_format_str(Typecode.INFINITY), Typecode.NAN: self.make_format_str(Typecode.NAN), Typecode.DATETIME: self.make_format_str(Typecode.DATETIME), } ) return format_map def make_format_str( self, typecode: Typecode, decimal_places: Optional[DecimalPlaces] = None ) -> str: format_str = self._BLANK_CURLY_BRACES_FORMAT_MAP.get(typecode) if format_str is not None: return format_str if typecode == Typecode.INTEGER: return self.__get_integer_format() if typecode in (Typecode.REAL_NUMBER, Typecode.INFINITY, Typecode.NAN): return self.__get_realnumber_format(decimal_places) if typecode == Typecode.DATETIME: return "{:" + self.__datetime_format_str + "}" return "{:s}" def __get_base_format_str(self) -> str: if self.__format_flags & Format.THOUSAND_SEPARATOR: return "," return "" def __get_integer_format(self) -> str: return "{:" + self.__get_base_format_str() + "d}" def __get_realnumber_format(self, decimal_places: Optional[DecimalPlaces]) -> str: if not self.__is_formatting_float: return "{}" base_format = self.__get_base_format_str() if decimal_places is None or Nan(decimal_places).is_type(): return "{:" + base_format + "f}" try: return "{:" + f"{base_format:s}.{decimal_places:d}f" + "}" except ValueError: pass return "{:" + base_format + "f}" thombashi-DataProperty-4654928/dataproperty/_function.py000066400000000000000000000060531445476334700234140ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import decimal import re from decimal import Decimal from typing import Any, Optional, Tuple, Union from typepy import Integer, RealNumber, TypeConversionError decimal.setcontext(decimal.Context(prec=60, rounding=decimal.ROUND_HALF_DOWN)) _ansi_escape = re.compile(r"(\x9b|\x1b\[)[0-?]*[ -\/]*[@-~]", re.IGNORECASE) def get_integer_digit(value: Any) -> int: float_type = RealNumber(value) try: abs_value = abs(float_type.convert()) except TypeConversionError: try: abs_value = abs(Integer(value).convert()) except TypeConversionError: raise ValueError(f"the value must be a number: value='{value}' type='{type(value)}'") return len(str(abs_value)) if abs_value.is_zero(): return 1 try: return len(str(abs_value.quantize(Decimal("1."), rounding=decimal.ROUND_DOWN))) except decimal.InvalidOperation: return len(str(abs_value)) class DigitCalculator: REGEXP_COMMON_LOG = re.compile(r"[\d\.]+[eE]\-\d+") REGEXP_SPLIT = re.compile(r"[eE]\-") def get_decimal_places(self, value: Union[str, float, int, Decimal]) -> int: if Integer(value).is_type(): return 0 float_digit_len = 0 abs_value = abs(float(value)) text_value = str(abs_value) float_text = "0" if text_value.find(".") != -1: float_text = text_value.split(".")[1] float_digit_len = len(float_text) elif self.REGEXP_COMMON_LOG.search(text_value): float_text = self.REGEXP_SPLIT.split(text_value)[1] float_digit_len = int(float_text) return float_digit_len _digit_calculator = DigitCalculator() def get_number_of_digit( value: Any, max_decimal_places: int = 99 ) -> Tuple[Optional[int], Optional[int]]: try: integer_digits = get_integer_digit(value) except (ValueError, TypeError, OverflowError): return (None, None) try: decimal_places: Optional[int] = min( _digit_calculator.get_decimal_places(value), max_decimal_places ) except (ValueError, TypeError): decimal_places = None return (integer_digits, decimal_places) def _validate_eaaw(east_asian_ambiguous_width: int) -> None: if east_asian_ambiguous_width in (1, 2): return raise ValueError( "invalid east_asian_ambiguous_width: expected=1 or 2, actual={}".format( east_asian_ambiguous_width ) ) def strip_ansi_escape(unicode_str: str) -> str: return _ansi_escape.sub("", unicode_str) def calc_ascii_char_width(unicode_str: str, east_asian_ambiguous_width: int = 1) -> int: import unicodedata width = 0 for char in unicode_str: char_width = unicodedata.east_asian_width(char) if char_width in "WF": width += 2 elif char_width == "A": _validate_eaaw(east_asian_ambiguous_width) width += east_asian_ambiguous_width else: width += 1 return width thombashi-DataProperty-4654928/dataproperty/_interface.py000066400000000000000000000011621445476334700235230ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import abc from typing import Optional from typepy import Typecode from ._align import Align class DataPeropertyInterface(metaclass=abc.ABCMeta): __slots__ = () @abc.abstractproperty def align(self) -> Align: # pragma: no cover pass @abc.abstractproperty def decimal_places(self) -> Optional[int]: # pragma: no cover pass @abc.abstractproperty def typecode(self) -> Typecode: # pragma: no cover pass @abc.abstractproperty def typename(self) -> str: # pragma: no cover pass thombashi-DataProperty-4654928/dataproperty/_line_break.py000066400000000000000000000001621445476334700236550ustar00rootroot00000000000000from enum import Enum, unique @unique class LineBreakHandling(Enum): NOP = 0 REPLACE = 1 ESCAPE = 2 thombashi-DataProperty-4654928/dataproperty/_preprocessor.py000066400000000000000000000125331445476334700243150ustar00rootroot00000000000000import html import re from typing import Any, Optional, Tuple, Union from mbstrdecoder import MultiByteStrDecoder from ._function import strip_ansi_escape from ._line_break import LineBreakHandling _RE_LINE_BREAK = re.compile(r"\r\n|\n") _RE_FORMULA_PREFIX = re.compile(r"^[-\+=@]") def normalize_lbh(value: Optional[LineBreakHandling]) -> LineBreakHandling: if isinstance(value, LineBreakHandling): return value if value is None: return LineBreakHandling.NOP return LineBreakHandling[value.upper()] # type: ignore class Preprocessor: @property def line_break_handling(self) -> Optional[LineBreakHandling]: return self.__line_break_handling @line_break_handling.setter def line_break_handling(self, value: Optional[LineBreakHandling]) -> None: self.__line_break_handling = normalize_lbh(value) def __init__( self, strip_str: Optional[Union[str, bytes]] = None, replace_tabs_with_spaces: bool = True, tab_length: int = 2, line_break_handling: Optional[LineBreakHandling] = None, line_break_repl: str = " ", dequote: bool = False, is_escape_html_tag: bool = False, is_escape_formula_injection: bool = False, ) -> None: self.strip_str = strip_str self.replace_tabs_with_spaces = replace_tabs_with_spaces self.tab_length = tab_length self.line_break_handling = line_break_handling self.line_break_repl = line_break_repl self.dequote = dequote self.is_escape_html_tag = is_escape_html_tag self.is_escape_formula_injection = is_escape_formula_injection def __repr__(self) -> str: return ", ".join( [ f"strip_str={self.strip_str!r}", f"replace_tabs_with_spaces={self.replace_tabs_with_spaces}", f"tab_length={self.tab_length}", f"line_break_handling={self.line_break_handling}", f"line_break_repl={self.line_break_repl}", f"escape_html_tag={self.is_escape_html_tag}", f"escape_formula_injection={self.is_escape_formula_injection}", ] ) def preprocess(self, data: Any) -> Tuple: data, no_ansi_escape_data = self.__preprocess_string( self.__preprocess_data(data, self.strip_str), ) return (data, no_ansi_escape_data) def update(self, **kwargs: Any) -> bool: is_updated = False for key, value in kwargs.items(): if not hasattr(self, key): continue if getattr(self, key) == value: continue setattr(self, key, value) is_updated = True return is_updated def __preprocess_string(self, raw_data: Any) -> Tuple[Any, Optional[str]]: data = raw_data if not isinstance(data, str): return (data, None) if self.replace_tabs_with_spaces: try: data = data.replace("\t", " " * self.tab_length) except (TypeError, AttributeError, ValueError): pass if self.is_escape_html_tag: try: data = html.escape(data) except AttributeError: return (data, None) data = self.__process_line_break(data) data = self.__escape_formula_injection(data) data = self.__dequote(data) try: return (data, strip_ansi_escape(data)) except TypeError: return (data, None) @staticmethod def __preprocess_data(data: Any, strip_str: Optional[Union[str, bytes]]) -> Any: if strip_str is None: return data try: return data.strip(strip_str) except AttributeError: return data except UnicodeDecodeError: return MultiByteStrDecoder(data).unicode_str.strip(strip_str) except TypeError: # reach here when data and strip_str type are different if isinstance(data, bytes): return MultiByteStrDecoder(data).unicode_str.strip(strip_str) elif isinstance(strip_str, bytes): return data.strip(MultiByteStrDecoder(strip_str).unicode_str) def __dequote(self, s: str) -> str: if not self.dequote or not s: return s try: if (s[0] == s[-1]) and s.startswith(("'", '"')): if s.count(s[0]) == 2: return s[1:-1] except TypeError: pass return s def __process_line_break(self, data: str) -> str: lbh = self.line_break_handling if lbh == LineBreakHandling.NOP: return data try: if lbh == LineBreakHandling.REPLACE: return _RE_LINE_BREAK.sub(self.line_break_repl, data) if lbh == LineBreakHandling.ESCAPE: return data.replace("\n", "\\n").replace("\r", "\\r") except (TypeError, AttributeError): return data raise ValueError(f"unexpected line_break_handling: {lbh}") def __escape_formula_injection(self, data: str) -> str: if not self.is_escape_formula_injection: return data try: if _RE_FORMULA_PREFIX.search(data): return "'" + data except (TypeError, AttributeError): return data return data thombashi-DataProperty-4654928/dataproperty/logger/000077500000000000000000000000001445476334700223315ustar00rootroot00000000000000thombashi-DataProperty-4654928/dataproperty/logger/__init__.py000066400000000000000000000001301445476334700244340ustar00rootroot00000000000000from ._logger import logger, set_logger __all__ = ( "logger", "set_logger", ) thombashi-DataProperty-4654928/dataproperty/logger/_logger.py000066400000000000000000000006721445476334700243260ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ from ._null_logger import NullLogger MODULE_NAME = "dataproperty" try: from loguru import logger logger.disable(MODULE_NAME) except ImportError: logger = NullLogger() # type: ignore def set_logger(is_enable: bool, propagation_depth: int = 1) -> None: if is_enable: logger.enable(MODULE_NAME) else: logger.disable(MODULE_NAME) thombashi-DataProperty-4654928/dataproperty/logger/_null_logger.py000066400000000000000000000020571445476334700253570ustar00rootroot00000000000000class NullLogger: level_name = None def remove(self, handler_id=None): # pragma: no cover pass def add(self, sink, **kwargs): # pragma: no cover pass def disable(self, name): # pragma: no cover pass def enable(self, name): # pragma: no cover pass def critical(self, __message, *args, **kwargs): # pragma: no cover pass def debug(self, __message, *args, **kwargs): # pragma: no cover pass def error(self, __message, *args, **kwargs): # pragma: no cover pass def exception(self, __message, *args, **kwargs): # pragma: no cover pass def info(self, __message, *args, **kwargs): # pragma: no cover pass def log(self, __level, __message, *args, **kwargs): # pragma: no cover pass def success(self, __message, *args, **kwargs): # pragma: no cover pass def trace(self, __message, *args, **kwargs): # pragma: no cover pass def warning(self, __message, *args, **kwargs): # pragma: no cover pass thombashi-DataProperty-4654928/dataproperty/py.typed000066400000000000000000000000001445476334700225370ustar00rootroot00000000000000thombashi-DataProperty-4654928/dataproperty/typing.py000066400000000000000000000025731445476334700227450ustar00rootroot00000000000000from datetime import datetime from decimal import Decimal from typing import Any, Callable, Mapping, Optional, Type, Union from typepy import ( Bool, DateTime, Dictionary, Infinity, Integer, IpAddress, List, Nan, NoneType, NullString, RealNumber, String, Typecode, ) from typepy.type import AbstractType TypeHint = Optional[Type[AbstractType]] TransFunc = Callable[[Any], Any] DateTimeFormatter = Callable[[datetime], str] FloatType = Union[Type[Decimal], Type[float]] StrictLevelMap = Mapping[Union[str, Typecode], int] TypeValueMap = Mapping[Typecode, Union[float, Decimal, None]] _type_hint_map = { # high frequently used types "int": Integer, "float": RealNumber, "realnumber": RealNumber, "str": String, # low frequently used types "bool": Bool, "datetime": DateTime, "dict": Dictionary, "inf": Infinity, "ip": IpAddress, "list": List, "nan": Nan, "none": NoneType, "nullstr": NullString, } def normalize_type_hint(type_hint: Union[str, TypeHint]) -> TypeHint: if not type_hint: return None if not isinstance(type_hint, str): return type_hint type_hint = type_hint.strip().casefold() for key, value in _type_hint_map.items(): if type_hint.startswith(key): return value raise ValueError(f"unknown typehint: {type_hint}") thombashi-DataProperty-4654928/examples/000077500000000000000000000000001445476334700201525ustar00rootroot00000000000000thombashi-DataProperty-4654928/examples/ipynb/000077500000000000000000000000001445476334700212735ustar00rootroot00000000000000thombashi-DataProperty-4654928/examples/ipynb/DataProperty.ipynb000066400000000000000000000203511445476334700247550ustar00rootroot00000000000000{ "cells": [ { "cell_type": "code", "execution_count": 1, "source": [ "import datetime\n", "from dataproperty import DataProperty, DataPropertyExtractor\n", "from typepy import Typecode\n", "\n", "dt = datetime.datetime(2017, 1, 1, 0, 0, 0)\n", "inf = float(\"inf\")\n", "nan = float(\"nan\")\n", "data_matrix = [\n", " [1, 1.1, \"aa\", 1, 1, True, inf, nan, dt],\n", " [2, 2.2, \"bbb\", 2.2, 2.2, False, \"inf\", \"nan\", dt],\n", " [3, 3.33, \"cccc\", -3, \"ccc\", \"true\", inf, \"NAN\", \"2017-01-01T01:23:45+0900\"],\n", "]\n", "dp_extractor = DataPropertyExtractor()" ], "outputs": [], "metadata": {} }, { "cell_type": "code", "execution_count": 2, "source": [ "print(DataProperty(-1.1))" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "data=-1.1, type=REAL_NUMBER, align=right, ascii_width=4, int_digits=1, decimal_places=1, extra_len=1\n" ] } ], "metadata": {} }, { "cell_type": "code", "execution_count": 3, "source": [ "print(DataProperty(123456789))" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "data=123456789, type=INTEGER, align=right, ascii_width=9, int_digits=9, decimal_places=0, extra_len=0\n" ] } ], "metadata": {} }, { "cell_type": "code", "execution_count": 4, "source": [ "print(DataProperty(\"sample string\"))" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "data=sample string, type=STRING, align=left, ascii_width=13, length=13, extra_len=0\n" ] } ], "metadata": { "scrolled": true } }, { "cell_type": "code", "execution_count": 5, "source": [ "print(DataProperty(\"吾輩は猫である\"))" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "data=吾輩は猫である, type=STRING, align=left, ascii_width=14, length=7, extra_len=0\n" ] } ], "metadata": {} }, { "cell_type": "code", "execution_count": 6, "source": [ "import datetime\n", "\n", "print(DataProperty(datetime.datetime(2017, 1, 1, 0, 0, 0)))" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "data=2017-01-01 00:00:00, type=DATETIME, align=left, ascii_width=19, extra_len=0\n" ] } ], "metadata": {} }, { "cell_type": "code", "execution_count": 7, "source": [ "print(DataProperty(\"2017-01-01T01:23:45+0900\"))" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "data=2017-01-01T01:23:45+0900, type=STRING, align=left, ascii_width=24, length=24, extra_len=0\n" ] } ], "metadata": {} }, { "cell_type": "code", "execution_count": 8, "source": [ "print(DataProperty(True))" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "data=True, type=BOOL, align=left, ascii_width=4, extra_len=0\n" ] } ], "metadata": {} }, { "cell_type": "code", "execution_count": 9, "source": [ "dp_matrix = dp_extractor.to_dp_matrix(data_matrix)\n", "\n", "for row, dp_list in enumerate(dp_matrix):\n", " for col, dp in enumerate(dp_list):\n", " print(\"row={:d}, col={:d}, {}\".format(row, col, str(dp)))" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "row=0, col=0, data=1, type=INTEGER, align=right, ascii_width=1, int_digits=1, decimal_places=0, extra_len=0\n", "row=0, col=1, data=1.1, type=REAL_NUMBER, align=right, ascii_width=3, int_digits=1, decimal_places=1, extra_len=0\n", "row=0, col=2, data=aa, type=STRING, align=left, ascii_width=2, length=2, extra_len=0\n", "row=0, col=3, data=1, type=INTEGER, align=right, ascii_width=1, int_digits=1, decimal_places=0, extra_len=0\n", "row=0, col=4, data=1, type=INTEGER, align=right, ascii_width=1, int_digits=1, decimal_places=0, extra_len=0\n", "row=0, col=5, data=True, type=BOOL, align=left, ascii_width=4, extra_len=0\n", "row=0, col=6, data=Infinity, type=INFINITY, align=left, ascii_width=8, extra_len=0\n", "row=0, col=7, data=NaN, type=NAN, align=left, ascii_width=3, extra_len=0\n", "row=0, col=8, data=2017-01-01 00:00:00, type=DATETIME, align=left, ascii_width=19, extra_len=0\n", "row=1, col=0, data=2, type=INTEGER, align=right, ascii_width=1, int_digits=1, decimal_places=0, extra_len=0\n", "row=1, col=1, data=2.2, type=REAL_NUMBER, align=right, ascii_width=3, int_digits=1, decimal_places=1, extra_len=0\n", "row=1, col=2, data=bbb, type=STRING, align=left, ascii_width=3, length=3, extra_len=0\n", "row=1, col=3, data=2.2, type=REAL_NUMBER, align=right, ascii_width=3, int_digits=1, decimal_places=1, extra_len=0\n", "row=1, col=4, data=2.2, type=REAL_NUMBER, align=right, ascii_width=3, int_digits=1, decimal_places=1, extra_len=0\n", "row=1, col=5, data=False, type=BOOL, align=left, ascii_width=5, extra_len=0\n", "row=1, col=6, data=Infinity, type=INFINITY, align=left, ascii_width=8, extra_len=0\n", "row=1, col=7, data=NaN, type=NAN, align=left, ascii_width=3, extra_len=0\n", "row=1, col=8, data=2017-01-01 00:00:00, type=DATETIME, align=left, ascii_width=19, extra_len=0\n", "row=2, col=0, data=3, type=INTEGER, align=right, ascii_width=1, int_digits=1, decimal_places=0, extra_len=0\n", "row=2, col=1, data=3.33, type=REAL_NUMBER, align=right, ascii_width=4, int_digits=1, decimal_places=2, extra_len=0\n", "row=2, col=2, data=cccc, type=STRING, align=left, ascii_width=4, length=4, extra_len=0\n", "row=2, col=3, data=-3, type=INTEGER, align=right, ascii_width=2, int_digits=1, decimal_places=0, extra_len=1\n", "row=2, col=4, data=ccc, type=STRING, align=left, ascii_width=3, length=3, extra_len=0\n", "row=2, col=5, data=true, type=STRING, align=left, ascii_width=4, length=4, extra_len=0\n", "row=2, col=6, data=Infinity, type=INFINITY, align=left, ascii_width=8, extra_len=0\n", "row=2, col=7, data=NaN, type=NAN, align=left, ascii_width=3, extra_len=0\n", "row=2, col=8, data=2017-01-01T01:23:45+0900, type=STRING, align=left, ascii_width=24, length=24, extra_len=0\n" ] } ], "metadata": { "scrolled": false } }, { "cell_type": "code", "execution_count": 10, "source": [ "dp_extractor.headers = [\"int\", \"float\", \"str\", \"num\", \"mix\", \"bool\", \"inf\", \"nan\", \"time\"]\n", "col_dp_list = dp_extractor.to_column_dp_list(dp_matrix)\n", "\n", "for col_idx, col_dp in enumerate(col_dp_list):\n", " print(str(col_dp))" ], "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "column=0, type=INTEGER, align=right, ascii_width=3, bit_len=2, int_digits=1, decimal_places=0\n", "column=1, type=REAL_NUMBER, align=right, ascii_width=5, int_digits=1, decimal_places=(min=1, max=2)\n", "column=2, type=STRING, align=left, ascii_width=4\n", "column=3, type=REAL_NUMBER, align=right, ascii_width=4, int_digits=1, decimal_places=(min=0, max=1), extra_len=(min=0, max=1)\n", "column=4, type=STRING, align=left, ascii_width=3, int_digits=1, decimal_places=(min=0, max=1)\n", "column=5, type=STRING, align=left, ascii_width=5\n", "column=6, type=INFINITY, align=left, ascii_width=8\n", "column=7, type=NAN, align=left, ascii_width=3\n", "column=8, type=STRING, align=left, ascii_width=24\n" ] } ], "metadata": {} }, { "cell_type": "code", "execution_count": null, "source": [], "outputs": [], "metadata": {} } ], "metadata": { "kernelspec": { "name": "python3", "display_name": "Python 3.8.11 64-bit ('3.8.11')" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.11" }, "interpreter": { "hash": "e1c230d0d7838db35080269a6918dd30585979c6f8544df43693174aeb261eb3" } }, "nbformat": 4, "nbformat_minor": 1 }thombashi-DataProperty-4654928/examples/ipynb/README.rst000066400000000000000000000001511445476334700227570ustar00rootroot00000000000000https://nbviewer.jupyter.org/github/thombashi/DataProperty/tree/master/examples/ipynb/DataProperty.ipynb thombashi-DataProperty-4654928/examples/py/000077500000000000000000000000001445476334700206025ustar00rootroot00000000000000thombashi-DataProperty-4654928/examples/py/to_column_dp_list.py000077500000000000000000000025551445476334700247030ustar00rootroot00000000000000#!/usr/bin/env python3 """ .. codeauthor:: Tsuyoshi Hombashi """ import datetime import sys from dataproperty import DataPropertyExtractor def display_col_dp(dp_list, attr_name): """ show a value associated with an attribute for each DataProperty instance in the dp_list """ print() print(f"---------- {attr_name:s} ----------") print([getattr(dp, attr_name) for dp in dp_list]) def main(): # sample data definitions dt = datetime.datetime(2017, 1, 1, 0, 0, 0) inf = float("inf") nan = float("nan") data_matrix = [ [1, 1.1, "aa", 1, 1, True, inf, nan, dt], [2, 2.2, "bbb", 2.2, 2.2, False, "inf", "nan", dt], [3, 3.33, "cccc", -3, "ccc", "true", inf, "NAN", "2017-01-01T01:23:45+0900"], ] # extract property for each column from a matrix dp_extractor = DataPropertyExtractor() dp_extractor.headers = ["int", "float", "str", "num", "mix", "bool", "inf", "nan", "time"] col_dp_list = dp_extractor.to_column_dp_list(dp_extractor.to_dp_matrix(data_matrix)) print("---------- typename ----------") print([dp.typecode.name for dp in col_dp_list]) display_col_dp(col_dp_list, "align") display_col_dp(col_dp_list, "ascii_char_width") display_col_dp(col_dp_list, "decimal_places") return 0 if __name__ == "__main__": sys.exit(main()) thombashi-DataProperty-4654928/examples/py/to_dp_matrix.py000077500000000000000000000026661445476334700236620ustar00rootroot00000000000000#!/usr/bin/env python3 """ .. codeauthor:: Tsuyoshi Hombashi """ import datetime import sys from dataproperty import DataPropertyExtractor def display_dp_matrix_attr(dp_matrix, attr_name): """ show a value associated with an attribute for each DataProperty instance in the dp_matrix """ print() print(f"---------- {attr_name:s} ----------") for dp_list in dp_matrix: print([getattr(dp, attr_name) for dp in dp_list]) def main(): # sample data definitions dt = datetime.datetime(2017, 1, 1, 0, 0, 0) inf = float("inf") nan = float("nan") # extract data property for each element from a matrix dp_extractor = DataPropertyExtractor() dp_matrix = dp_extractor.to_dp_matrix( [ [1, 1.1, "aa", 1, 1, True, inf, nan, dt], [2, 2.2, "bbb", 2.2, 2.2, False, "inf", "nan", dt], [3, 3.33, "cccc", -3, "ccc", "true", inf, "NAN", "2017-01-01T01:23:45+0900"], ] ) print("---------- typename ----------") for dp_list in dp_matrix: print([dp.typecode.name for dp in dp_list]) display_dp_matrix_attr(dp_matrix, "data") display_dp_matrix_attr(dp_matrix, "align") display_dp_matrix_attr(dp_matrix, "ascii_char_width") display_dp_matrix_attr(dp_matrix, "integer_digits") display_dp_matrix_attr(dp_matrix, "decimal_places") return 0 if __name__ == "__main__": sys.exit(main()) thombashi-DataProperty-4654928/misc/000077500000000000000000000000001445476334700172675ustar00rootroot00000000000000thombashi-DataProperty-4654928/misc/summary.txt000066400000000000000000000000571445476334700215270ustar00rootroot00000000000000Python library for extract property from data. thombashi-DataProperty-4654928/pylama.ini000066400000000000000000000005701445476334700203220ustar00rootroot00000000000000[pylama] skip = .eggs/*,.tox/*,*/.env/*,_sandbox/*,build/*,docs/conf.py [pylama:mccabe] max-complexity = 15 [pylama:pycodestyle] max_line_length = 100 [pylama:pylint] max_line_length = 100 [pylama:*/__init__.py] # W0611: imported but unused [pyflakes] ignore = W0611 [pylama:test/test_logger.py] # E402: module level import not at top of file [pycodestyle] ignore = E402 thombashi-DataProperty-4654928/pyproject.toml000066400000000000000000000022421445476334700212500ustar00rootroot00000000000000[build-system] requires = ["setuptools>=61.0"] build-backend = "setuptools.build_meta" [tool.black] line-length = 100 exclude = ''' /( \.eggs | \.git | \.mypy_cache | \.tox | \.venv | \.pytype | _build | buck-out | build | dist )/ | docs/conf.py ''' target-version = ['py37', 'py38', 'py39', 'py310', 'py311'] [tool.isort] known_third_party = [ 'pytest', ] include_trailing_comma = true line_length = 100 lines_after_imports = 2 multi_line_output = 3 skip_glob = [ '*/.eggs/*', '*/.pytype/*', '*/.tox/*', ] [tool.coverage.run] source = ['dataproperty'] branch = true [tool.coverage.report] show_missing = true precision = 1 exclude_lines = [ 'except ImportError', 'raise NotImplementedError', 'pass', 'ABCmeta', 'abstractmethod', 'abstractproperty', 'abstractclassmethod', 'warnings.warn', ] [tool.mypy] ignore_missing_imports = true python_version = 3.7 pretty = true show_error_codes = true show_error_context = true warn_unreachable = true warn_unused_configs = true [tool.pytest.ini_options] testpaths = [ "test", ] md_report = true md_report_verbose = 0 md_report_color = "auto" thombashi-DataProperty-4654928/requirements/000077500000000000000000000000001445476334700210575ustar00rootroot00000000000000thombashi-DataProperty-4654928/requirements/docs_requirements.txt000066400000000000000000000000301445476334700253440ustar00rootroot00000000000000sphinx_rtd_theme Sphinx thombashi-DataProperty-4654928/requirements/requirements.txt000066400000000000000000000000621445476334700243410ustar00rootroot00000000000000mbstrdecoder>=1.0.0,<2 typepy[datetime]>=1.2.0,<2 thombashi-DataProperty-4654928/requirements/test_requirements.txt000066400000000000000000000000641445476334700254020ustar00rootroot00000000000000pytest>=6.0.1 pytest-md-report>=0.3 tcolorpy>=0.1.2 thombashi-DataProperty-4654928/setup.py000066400000000000000000000055011445476334700200470ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import os.path from typing import Dict, Type import setuptools MODULE_NAME = "DataProperty" REPOSITORY_URL = f"https://github.com/thombashi/{MODULE_NAME:s}" MISC_DIR = "misc" REQUIREMENT_DIR = "requirements" pkg_info: Dict[str, str] = {} def get_release_command_class() -> Dict[str, Type[setuptools.Command]]: try: from releasecmd import ReleaseCommand except ImportError: return {} return {"release": ReleaseCommand} with open(os.path.join(MODULE_NAME.lower(), "__version__.py")) as f: exec(f.read(), pkg_info) with open("README.rst", encoding="utf8") as f: long_description = f.read() with open(os.path.join(MISC_DIR, "summary.txt"), encoding="utf8") as f: summary = f.read().strip() with open(os.path.join(REQUIREMENT_DIR, "requirements.txt")) as f: install_requires = [line.strip() for line in f if line.strip()] with open(os.path.join(REQUIREMENT_DIR, "test_requirements.txt")) as f: tests_requires = [line.strip() for line in f if line.strip()] setuptools.setup( name=MODULE_NAME, version=pkg_info["__version__"], url=REPOSITORY_URL, author=pkg_info["__author__"], author_email=pkg_info["__email__"], description=summary, include_package_data=True, keywords=[ "data", "library", "property", ], license=pkg_info["__license__"], long_description=long_description, long_description_content_type="text/x-rst", maintainer=pkg_info["__author__"], maintainer_email=pkg_info["__email__"], packages=setuptools.find_packages(exclude=["test*"]), package_data={MODULE_NAME: ["py.typed"]}, project_urls={ "Source": REPOSITORY_URL, "Tracker": f"{REPOSITORY_URL:s}/issues", }, python_requires=">=3.7", install_requires=install_requires, extras_require={ "logging": ["loguru>=0.4.1,<1"], "test": tests_requires, }, classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries", "Topic :: Software Development :: Libraries :: Python Modules", ], cmdclass=get_release_command_class(), zip_safe=False, ) thombashi-DataProperty-4654928/test/000077500000000000000000000000001445476334700173135ustar00rootroot00000000000000thombashi-DataProperty-4654928/test/__init__.py000066400000000000000000000000001445476334700214120ustar00rootroot00000000000000thombashi-DataProperty-4654928/test/common.py000066400000000000000000000004031445476334700211520ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ from dataproperty._common import MAX_STRICT_LEVEL_MAP, MIN_STRICT_LEVEL_MAP def get_strict_level_map(is_strict): return MAX_STRICT_LEVEL_MAP if is_strict else MIN_STRICT_LEVEL_MAP thombashi-DataProperty-4654928/test/test_align_getter.py000066400000000000000000000033241445476334700233720ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import pytest from typepy import Typecode from dataproperty import Align from dataproperty._align_getter import AlignGetter @pytest.fixture def align_getter(): return AlignGetter() class Test_AlignGetter_get_align_from_typecode: @pytest.mark.parametrize( ["value", "expected"], [ [Typecode.STRING, Align.LEFT], [Typecode.INTEGER, Align.RIGHT], [Typecode.REAL_NUMBER, Align.RIGHT], [Typecode.NONE, Align.LEFT], ], ) def test_normal(self, align_getter, value, expected): assert align_getter.get_align_from_typecode(value) == expected @pytest.mark.parametrize( ["value", "expected"], [ [Typecode.STRING, Align.RIGHT], [Typecode.INTEGER, Align.LEFT], [Typecode.REAL_NUMBER, Align.CENTER], [Typecode.NONE, Align.LEFT], ], ) def test_setter(self, align_getter, value, expected): align_getter.typecode_align_table = { Typecode.STRING: Align.RIGHT, Typecode.INTEGER: Align.LEFT, Typecode.REAL_NUMBER: Align.CENTER, } assert align_getter.get_align_from_typecode(value) == expected @pytest.mark.parametrize( ["value", "expected"], [ [Typecode.STRING, Align.LEFT], [Typecode.INTEGER, Align.RIGHT], [Typecode.REAL_NUMBER, Align.RIGHT], [Typecode.NONE, Align.CENTER], ], ) def test_default_align(self, align_getter, value, expected): align_getter.default_align = Align.CENTER assert align_getter.get_align_from_typecode(value) == expected thombashi-DataProperty-4654928/test/test_column_dataproperty.py000066400000000000000000000552121445476334700250240ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import datetime import sys from decimal import Decimal from ipaddress import ip_address import pytest from tcolorpy import tcolor from typepy import ( Bool, DateTime, Dictionary, Infinity, Integer, IpAddress, List, Nan, NoneType, NullString, RealNumber, String, Typecode, ) from dataproperty import Align, ColumnDataProperty, DataProperty, Format nan = float("nan") inf = float("inf") class Test_ColumnDataPeroperty: DATATIME_DATA = datetime.datetime(2017, 1, 1, 1, 2, 3) @pytest.mark.parametrize( ["values", "expected_typecode", "expected_class"], [ # single type values [[None, None], Typecode.NONE, NoneType], [[0, sys.maxsize, str(sys.maxsize), -sys.maxsize], Typecode.INTEGER, Integer], [[0, 1.1, "0.01", -sys.maxsize], Typecode.REAL_NUMBER, RealNumber], [["-0.538882625371217", "0.268624155343302", ""], Typecode.REAL_NUMBER, RealNumber], [[ip_address("127.0.0.1"), ip_address("::1")], Typecode.IP_ADDRESS, IpAddress], [[0, 1.1, -sys.maxsize, "test"], Typecode.STRING, String], [["", ""], Typecode.NULL_STRING, NullString], [[True, True, False], Typecode.BOOL, Bool], [[True, "True", False], Typecode.STRING, String], [[DATATIME_DATA, DATATIME_DATA], Typecode.DATETIME, DateTime], [["2017-01-01 00:00:00", "2017-01-02 03:04:05+09:00"], Typecode.STRING, String], [[inf, "inf", "infinity", "INF"], Typecode.INFINITY, Infinity], [[nan, "nan", "NAN"], Typecode.NAN, Nan], [[{"a": 1}, {"b": 2}], Typecode.DICTIONARY, Dictionary], [[[1, 2], ["a", "b"]], Typecode.LIST, List], # not mixed types with None value [[None, sys.maxsize, str(-sys.maxsize)], Typecode.INTEGER, Integer], [[1, None, ""], Typecode.INTEGER, Integer], [[1.1, None], Typecode.REAL_NUMBER, RealNumber], [[1.1, None, ""], Typecode.REAL_NUMBER, RealNumber], [[0, 1.1, Decimal("0.1"), None, ""], Typecode.REAL_NUMBER, RealNumber], [ [ip_address("192.168.0.1"), None, ip_address("::1"), None], Typecode.IP_ADDRESS, IpAddress, ], [[None, "test"], Typecode.STRING, String], [[None, True, False], Typecode.BOOL, Bool], [[None, True, "False"], Typecode.STRING, String], [[None, DATATIME_DATA, None], Typecode.DATETIME, DateTime], [[None, inf], Typecode.INFINITY, Infinity], [[None, nan], Typecode.NAN, Nan], # mixed types [[True, 1], Typecode.STRING, String], [[DATATIME_DATA, "test"], Typecode.STRING, String], [[inf, 0.1], Typecode.REAL_NUMBER, RealNumber], [[inf, "test"], Typecode.STRING, String], [[nan, 0.1], Typecode.REAL_NUMBER, RealNumber], [[nan, "test"], Typecode.STRING, String], [[sys.maxsize, inf, nan], Typecode.REAL_NUMBER, RealNumber], [[DATATIME_DATA, str(DATATIME_DATA), DATATIME_DATA], Typecode.STRING, String], [[1, 1.1, DATATIME_DATA, "test", None, True, inf, Nan], Typecode.STRING, String], ], ) def test_normal_typecode_type_class(self, values, expected_typecode, expected_class): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("dummy")) for value in values: col_dp.update_body(DataProperty(value)) assert col_dp.typecode == expected_typecode assert col_dp.type_class == expected_class def test_normal_number_0(self): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("abc")) for value in [0, -1.234, 55.55]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.RIGHT assert col_dp.decimal_places == 3 assert col_dp.typecode == Typecode.REAL_NUMBER assert col_dp.ascii_char_width == 6 assert col_dp.minmax_integer_digits.min_value == 1 assert col_dp.minmax_integer_digits.max_value == 2 assert col_dp.minmax_decimal_places.min_value == 0 assert col_dp.minmax_decimal_places.max_value == 3 assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 1 assert str(col_dp) == ( "column=0, type=REAL_NUMBER, align=right, ascii_width=6, " "int_digits=(min=1, max=2), decimal_places=(min=0, max=3), " "extra_len=(min=0, max=1)" ) def test_normal_number_1(self): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("abc")) for value in [0, inf, nan]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.RIGHT assert col_dp.decimal_places == 0 assert col_dp.typecode == Typecode.REAL_NUMBER assert col_dp.ascii_char_width == 8 assert col_dp.minmax_integer_digits.min_value == 1 assert col_dp.minmax_integer_digits.max_value == 1 assert col_dp.minmax_decimal_places.min_value == 0 assert col_dp.minmax_decimal_places.max_value == 0 assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 0 assert str(col_dp) == ( "column=0, type=REAL_NUMBER, align=right, ascii_width=8, " "int_digits=1, decimal_places=0" ) def test_normal_number_2(self): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("abc")) for value in [1, 2.2, -3]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.RIGHT assert col_dp.decimal_places == 1 assert col_dp.typecode == Typecode.REAL_NUMBER assert col_dp.ascii_char_width == 4 assert col_dp.minmax_integer_digits.min_value == 1 assert col_dp.minmax_integer_digits.max_value == 1 assert col_dp.minmax_decimal_places.min_value == 0 assert col_dp.minmax_decimal_places.max_value == 1 assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 1 assert str(col_dp) == ( "column=0, type=REAL_NUMBER, align=right, ascii_width=4, " "int_digits=1, decimal_places=(min=0, max=1), " "extra_len=(min=0, max=1)" ) def test_normal_number_3(self): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("abc")) for value in [0.01, 2.2, None]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.RIGHT assert col_dp.decimal_places == 2 assert col_dp.typecode == Typecode.REAL_NUMBER assert col_dp.ascii_char_width == 4 assert col_dp.minmax_integer_digits.min_value == 1 assert col_dp.minmax_integer_digits.max_value == 1 assert col_dp.minmax_decimal_places.min_value == 1 assert col_dp.minmax_decimal_places.max_value == 2 assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 0 assert str(col_dp) == ( "column=0, type=REAL_NUMBER, align=right, ascii_width=4, " "int_digits=1, decimal_places=(min=1, max=2)" ) def test_normal_number_4(self): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("abc")) for value in [0.01, 1.0, 1.2]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.RIGHT assert col_dp.decimal_places == 2 assert col_dp.typecode == Typecode.REAL_NUMBER assert col_dp.ascii_char_width == 4 assert col_dp.minmax_integer_digits.min_value == 1 assert col_dp.minmax_integer_digits.max_value == 1 assert col_dp.minmax_decimal_places.min_value == 0 assert col_dp.minmax_decimal_places.max_value == 2 assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 0 assert str(col_dp) == ( "column=0, type=REAL_NUMBER, align=right, ascii_width=4, " "int_digits=1, decimal_places=(min=0, max=2)" ) def test_normal_number_5(self): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("abc")) for value in [1.1, 2.2, 3.33]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.RIGHT assert col_dp.decimal_places == 2 assert col_dp.typecode == Typecode.REAL_NUMBER assert col_dp.ascii_char_width == 4 assert col_dp.minmax_integer_digits.min_value == 1 assert col_dp.minmax_integer_digits.max_value == 1 assert col_dp.minmax_decimal_places.min_value == 1 assert col_dp.minmax_decimal_places.max_value == 2 assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 0 assert str(col_dp) == ( "column=0, type=REAL_NUMBER, align=right, ascii_width=4, " "int_digits=1, decimal_places=(min=1, max=2)" ) def test_normal_text_number(self): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("abc")) for value in ["1,000,000,000,000", "1"]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.RIGHT assert col_dp.decimal_places == 0 assert col_dp.typecode == Typecode.INTEGER assert col_dp.ascii_char_width == 13 assert col_dp.minmax_integer_digits.min_value == 1 assert col_dp.minmax_integer_digits.max_value == 13 assert col_dp.minmax_decimal_places.min_value == 0 assert col_dp.minmax_decimal_places.max_value == 0 assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 0 assert str(col_dp) == ( "column=0, type=INTEGER, align=right, ascii_width=13, " "bit_len=40, int_digits=(min=1, max=13), decimal_places=0" ) def test_normal_inf(self): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("inf")) for value in [inf, None, inf, "inf"]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.LEFT assert col_dp.decimal_places is None assert col_dp.typecode == Typecode.INFINITY assert col_dp.ascii_char_width == 8 assert col_dp.minmax_integer_digits.min_value is None assert col_dp.minmax_integer_digits.max_value is None assert col_dp.minmax_decimal_places.min_value is None assert col_dp.minmax_decimal_places.max_value is None assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 0 assert str(col_dp) == ("column=0, type=INFINITY, align=left, ascii_width=8") def test_normal_mix_0(self): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("abc")) for value in [0, -1.234, 55.55, "abcdefg"]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.LEFT assert col_dp.decimal_places == 3 assert col_dp.typecode == Typecode.STRING assert col_dp.ascii_char_width == 7 assert col_dp.minmax_integer_digits.min_value == 1 assert col_dp.minmax_integer_digits.max_value == 2 assert col_dp.minmax_decimal_places.min_value == 0 assert col_dp.minmax_decimal_places.max_value == 3 assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 1 assert str(col_dp) == ( "column=0, type=STRING, align=left, ascii_width=7, " "int_digits=(min=1, max=2), decimal_places=(min=0, max=3), " "extra_len=(min=0, max=1)" ) def test_normal_number_ansi_escape(self): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("abc")) for value in [ tcolor("1", color="red"), tcolor("2.2", color="green"), tcolor("-3", color="blue"), ]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.RIGHT assert col_dp.decimal_places == 1 assert col_dp.typecode == Typecode.REAL_NUMBER assert col_dp.ascii_char_width == 4 assert col_dp.minmax_integer_digits.min_value == 1 assert col_dp.minmax_integer_digits.max_value == 1 assert col_dp.minmax_decimal_places.min_value == 0 assert col_dp.minmax_decimal_places.max_value == 1 assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 1 assert str(col_dp) == ( "column=0, type=REAL_NUMBER, align=right, ascii_width=4, " "int_digits=1, decimal_places=(min=0, max=1), " "extra_len=(min=0, max=1)" ) def test_normal_mix_ansi_escape(self): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("abc")) for value in [ tcolor("0", color="red"), tcolor("-1.234", color="yellow"), tcolor("55.55", color="green"), tcolor("abcdefg", color="blue"), ]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.LEFT assert col_dp.decimal_places == 3 assert col_dp.typecode == Typecode.STRING assert col_dp.ascii_char_width == 7 assert col_dp.minmax_integer_digits.min_value == 1 assert col_dp.minmax_integer_digits.max_value == 2 assert col_dp.minmax_decimal_places.min_value == 0 assert col_dp.minmax_decimal_places.max_value == 3 assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 1 assert str(col_dp) == ( "column=0, type=STRING, align=left, ascii_width=7, " "int_digits=(min=1, max=2), decimal_places=(min=0, max=3), " "extra_len=(min=0, max=1)" ) @pytest.mark.parametrize( ["values", "expected"], [ [[0, 1, 0, 1], 1], [[-128, 0, 127, None], 8], ], ) def test_normal_bit_length(self, values, expected): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("dummy")) for value in values: col_dp.update_body(DataProperty(value)) assert col_dp.typecode == Typecode.INTEGER assert col_dp.bit_length == expected @pytest.mark.parametrize( ["values", "expected"], [ [[0.1, 1], None], [["aaa", "0.0.0.0"], None], ], ) def test_abnormal_bit_length(self, values, expected): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("dummy")) for value in values: col_dp.update_body(DataProperty(value)) assert col_dp.bit_length == expected def test_normal_multibyte_char(self): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("abc")) for value in ["いろは", "abcde"]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.LEFT assert col_dp.decimal_places is None assert col_dp.typecode == Typecode.STRING assert col_dp.ascii_char_width == 6 assert col_dp.minmax_integer_digits.min_value is None assert col_dp.minmax_integer_digits.max_value is None assert col_dp.minmax_decimal_places.min_value is None assert col_dp.minmax_decimal_places.max_value is None assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 0 assert str(col_dp) == ("column=0, type=STRING, align=left, ascii_width=6") @pytest.mark.parametrize( ["ambiguous_width", "ascii_char_width"], [ [2, 6], [1, 3], ], ) def test_normal_east_asian_ambiguous_width(self, ambiguous_width, ascii_char_width): col_dp = ColumnDataProperty( 0, float_type=Decimal, east_asian_ambiguous_width=ambiguous_width ) col_dp.update_header(DataProperty("abc")) for value in ["ØØØ", "α", "ββ"]: col_dp.update_body(DataProperty(value, east_asian_ambiguous_width=ambiguous_width)) assert col_dp.align == Align.LEFT assert col_dp.decimal_places is None assert col_dp.typecode == Typecode.STRING assert col_dp.ascii_char_width == ascii_char_width assert col_dp.minmax_integer_digits.min_value is None assert col_dp.minmax_integer_digits.max_value is None assert col_dp.minmax_decimal_places.min_value is None assert col_dp.minmax_decimal_places.max_value is None assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 0 def test_normal_max_precision(self): col_dp = ColumnDataProperty(column_index=0, float_type=Decimal, max_precision=3) col_dp.update_header(DataProperty("abc")) for value in ["0.0000000000001", "0.1", "0"]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.RIGHT assert col_dp.decimal_places == 3 assert col_dp.typecode == Typecode.REAL_NUMBER assert col_dp.ascii_char_width == 5 assert col_dp.minmax_integer_digits.min_value == 1 assert col_dp.minmax_integer_digits.max_value == 1 assert col_dp.minmax_decimal_places.min_value == 0 assert col_dp.minmax_decimal_places.max_value == 13 assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 0 def test_min_width(self): min_width = 100 col_dp = ColumnDataProperty(0, min_width=min_width, float_type=Decimal) col_dp.update_header(DataProperty("abc")) for value in [0, -1.234, 55.55]: col_dp.update_body(DataProperty(value)) assert col_dp.align == Align.RIGHT assert col_dp.decimal_places == 3 assert col_dp.typecode == Typecode.REAL_NUMBER assert col_dp.ascii_char_width == min_width assert col_dp.minmax_integer_digits.min_value == 1 assert col_dp.minmax_integer_digits.max_value == 2 assert col_dp.minmax_decimal_places.min_value == 0 assert col_dp.minmax_decimal_places.max_value == 3 assert col_dp.minmax_additional_format_len.min_value == 0 assert col_dp.minmax_additional_format_len.max_value == 1 assert str(col_dp) == ( "column=0, type=REAL_NUMBER, align=right, ascii_width=100, " "int_digits=(min=1, max=2), decimal_places=(min=0, max=3), " "extra_len=(min=0, max=1)" ) def test_extend_width(self): col_dp = ColumnDataProperty(0, float_type=Decimal) col_dp.update_header(DataProperty("abc")) assert col_dp.ascii_char_width == 3 col_dp.extend_width(2) assert col_dp.ascii_char_width == 5 def test_null(self): col_dp = ColumnDataProperty(0, float_type=Decimal) assert col_dp.align == Align.LEFT assert col_dp.decimal_places is None assert col_dp.typecode == Typecode.NONE assert col_dp.ascii_char_width == 0 class Test_ColumnDataPeroperty_dp_to_str: def test_normal_0(self): col_dp = ColumnDataProperty(0, float_type=Decimal) values = [0.1, 3.4375, 65.5397978633, 189.74439359, 10064.0097539, "abcd"] expected_list = [ "0.1000000000", "3.4375000000", "65.5397978633", "189.7443935900", "10064.0097539000", "abcd", ] col_dp.update_header(DataProperty("abc")) for value in values: col_dp.update_body(DataProperty(value)) assert len(values) == len(expected_list) for value, expected in zip(values, expected_list): assert col_dp.dp_to_str(DataProperty(value)) == expected def test_normal_1(self): col_dp = ColumnDataProperty(0, float_type=Decimal) values = [0, 0.1] expected_list = ["0", "0.1"] col_dp.update_header(DataProperty("abc")) for value in ["abcd", "efg"]: col_dp.update_body(DataProperty(value)) assert len(values) == len(expected_list) for value, expected in zip(values, expected_list): assert col_dp.dp_to_str(DataProperty(value)) == expected def test_normal_2(self): col_dp = ColumnDataProperty(0, float_type=Decimal) values = [1.1, 2.2, 3.33] expected_list = ["1.10", "2.20", "3.33"] col_dp.update_header(DataProperty("abc")) for value in values: col_dp.update_body(DataProperty(value)) assert len(values) == len(expected_list) for value, expected in zip(values, expected_list): assert col_dp.dp_to_str(DataProperty(value)) == expected def test_normal_dict(self): col_dp = ColumnDataProperty(0, float_type=Decimal) values = ["abc", {"1": 1}] expected_list = ["abc", "{'1': 1}"] col_dp.update_header(DataProperty("dict item")) for value in values: col_dp.update_body(DataProperty(value)) assert len(values) == len(expected_list) for value, expected in zip(values, expected_list): assert col_dp.dp_to_str(DataProperty(value)) == expected @pytest.mark.parametrize( ["values", "expected_list"], [ [[1234, 223, 1234567], ["1,234", "223", "1,234,567"]], [[1234.1, 223.33, 1234567.33], ["1,234.10", "223.33", "1,234,567.33"]], ], ) def test_normal_format(self, values, expected_list): col_dp = ColumnDataProperty(0, format_flags=Format.THOUSAND_SEPARATOR, float_type=Decimal) col_dp.update_header(DataProperty("format test")) for value in values: col_dp.update_body(DataProperty(value)) assert len(values) == len(expected_list) for value, expected in zip(values, expected_list): assert col_dp.dp_to_str(DataProperty(value)) == expected thombashi-DataProperty-4654928/test/test_container.py000066400000000000000000000101171445476334700227060ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import sys import pytest from typepy import Nan from dataproperty import MinMaxContainer @pytest.fixture def container(): return MinMaxContainer() class Test_MinMaxContainer_property: def test_null(self, container): assert container.min_value is None assert container.max_value is None class Test_MinMaxContainer_repr: @pytest.mark.parametrize( ["values", "expected"], [[[1, 3], "min=1, max=3"], [[1], "min=1, max=1"], [[None, None], "None"]], ) def test_normal(self, values, expected): assert str(MinMaxContainer(values)) == expected class Test_MinMaxContainer_eq_ne: @pytest.mark.parametrize( ["lhs", "rhs", "expected"], [ [MinMaxContainer([1, 3]), MinMaxContainer([1, 3]), True], [MinMaxContainer([1, 3]), MinMaxContainer([1, 4]), False], [MinMaxContainer([1, 3]), MinMaxContainer([0, 3]), False], [MinMaxContainer([1, 3]), MinMaxContainer([0, 4]), False], ], ) def test_normal(self, lhs, rhs, expected): assert (lhs == rhs) == expected assert (lhs != rhs) == (not expected) class Test_MinMaxContainer_contains: @pytest.mark.parametrize( ["lhs", "rhs", "expected"], [ [1, MinMaxContainer([1, 3]), True], [3, MinMaxContainer([1, 3]), True], [0, MinMaxContainer([1, 3]), False], [4, MinMaxContainer([1, 3]), False], ], ) def test_normal(self, lhs, rhs, expected): assert (lhs in rhs) == expected class Test_MinMaxContainer_mean: def test_normal(self, container): for value in [1, 3]: container.update(value) assert container.has_value() assert not container.is_zero() assert container.mean() == 2 def test_null(self, container): assert Nan(container.mean()).is_type() class Test_MinMaxContainer_diff: def test_normal(self, container): for value in [1, 3]: container.update(value) assert container.has_value() assert not container.is_zero() assert container.diff() == 2 def test_null(self, container): assert Nan(container.diff()).is_type() class Test_MinMaxContainer_update: def test_normal_0(self, container): for value in [1, 2, 3]: container.update(value) assert container.has_value() assert not container.is_zero() assert container.min_value == 1 assert container.max_value == 3 def test_normal_1(self, container): for value in [None, -sys.maxsize, 0, None, sys.maxsize, None]: container.update(value) assert container.has_value() assert not container.is_zero() assert container.min_value == -sys.maxsize assert container.max_value == sys.maxsize class Test_MinMaxContainer_merge: def test_normal(self, container): for value in [1, 2, 3]: container.update(value) other = MinMaxContainer([0, 10]) container.merge(other) assert container.has_value() assert not container.is_zero() assert container.min_value == 0 assert container.max_value == 10 class Test_MinMaxContainer_is_zero: @pytest.mark.parametrize( ["values", "expected"], [ [[0, 0], True], [[0, 0, 0], True], [[0, 1], False], [[1, 0], False], [[1, 1, 1], False], [[None, None], False], ], ) def test_normal(self, container, values, expected): assert MinMaxContainer(values).is_zero() == expected class Test_MinMaxContainer_is_same_value: @pytest.mark.parametrize( ["values", "expected"], [ [[0, 0], True], [[0, 0, 0], True], [[1, 1, 1], True], [[0, 1], False], [[1, 0], False], [[None, None], False], ], ) def test_normal(self, container, values, expected): assert MinMaxContainer(values).is_same_value() == expected thombashi-DataProperty-4654928/test/test_dataproperty.py000066400000000000000000000632651445476334700234560ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import datetime import ipaddress import sys from decimal import Decimal import pytest from tcolorpy import tcolor from typepy import ( Bool, DateTime, Dictionary, Integer, Nan, RealNumber, StrictLevel, String, Typecode, ) from dataproperty import ( MIN_STRICT_LEVEL_MAP, Align, DataProperty, DefaultValue, Format, LineBreakHandling, Preprocessor, ) from .common import get_strict_level_map dateutil = pytest.importorskip("dateutil", minversion="2.7") DATATIME_DATA = datetime.datetime(2017, 1, 2, 3, 4, 5) nan = float("nan") inf = float("inf") class Test_DataPeroperty_eq: @pytest.mark.parametrize( ["lhs", "rhs", "expected"], [ [1, 1, True], [1, 2, False], [1, 0.1, False], [1, True, False], [1.1, 1.1, True], [1, nan, False], [nan, nan, True], [None, None, True], ], ) def test_normal(self, lhs, rhs, expected): lhs = DataProperty(lhs) rhs = DataProperty(rhs) assert (lhs == rhs) == expected assert (lhs != rhs) == (not expected) class Test_DataPeroperty_data_typecode: @pytest.mark.parametrize( ["value", "expected_data", "expected_typecode"], [ ["-0.00284241876820074", Decimal("-0.00284241876820074"), Typecode.REAL_NUMBER], ], ) def test_normal(self, value, expected_data, expected_typecode): dp = DataProperty(value) assert dp == dp assert dp.data == expected_data assert dp.typecode == expected_typecode @pytest.mark.parametrize( ["value", "is_convert", "expected_data", "expected_typecode"], [ [1.0, True, 1, Typecode.INTEGER], [sys.maxsize, True, sys.maxsize, Typecode.INTEGER], [-sys.maxsize, False, -sys.maxsize, Typecode.INTEGER], [str(-sys.maxsize), True, -sys.maxsize, Typecode.INTEGER], [str(sys.maxsize), False, str(sys.maxsize), Typecode.STRING], [1.1, True, 1, Typecode.INTEGER], [-1.1, False, Decimal("-1.1"), Typecode.REAL_NUMBER], [Decimal("1.1"), False, Decimal("1.1"), Typecode.REAL_NUMBER], ["1.1", True, 1, Typecode.INTEGER], ["-1.1", False, "-1.1", Typecode.STRING], ["a", True, "a", Typecode.STRING], ["a", False, "a", Typecode.STRING], ["", True, "", Typecode.NULL_STRING], ["", False, "", Typecode.NULL_STRING], [" ", True, " ", Typecode.NULL_STRING], [" ", False, " ", Typecode.NULL_STRING], ["3.3.5", True, "3.3.5", Typecode.STRING], ["51.0.2704.106", True, "51.0.2704.106", Typecode.STRING], [True, True, 1, Typecode.INTEGER], [False, False, False, Typecode.BOOL], ["100-0002", False, "100-0002", Typecode.STRING], ["127.0.0.1", True, ipaddress.IPv4Address("127.0.0.1"), Typecode.IP_ADDRESS], ["127.0.0.1", False, "127.0.0.1", Typecode.STRING], ["::1", True, ipaddress.IPv6Address("::1"), Typecode.IP_ADDRESS], ["::1", False, "::1", Typecode.STRING], [[], True, [], Typecode.LIST], [[], False, [], Typecode.LIST], [{}, True, {}, Typecode.DICTIONARY], [{}, False, {}, Typecode.DICTIONARY], [ "2017-01-02 03:04:05", True, datetime.datetime(2017, 1, 2, 3, 4, 5), Typecode.DATETIME, ], [DATATIME_DATA, True, DATATIME_DATA, Typecode.DATETIME], ["2017-01-02 03:04:05", False, "2017-01-02 03:04:05", Typecode.STRING], [None, True, None, Typecode.NONE], [None, False, None, Typecode.NONE], ["None", True, "None", Typecode.STRING], ["None", False, "None", Typecode.STRING], [inf, True, inf, Typecode.INFINITY], [inf, False, Decimal(inf), Typecode.INFINITY], ["inf", True, Decimal(inf), Typecode.INFINITY], ["inf", False, "inf", Typecode.STRING], ["nan", False, "nan", Typecode.STRING], [ "Høgskolen i Østfold er et eksempel...", True, "Høgskolen i Østfold er et eksempel...", Typecode.STRING, ], [ "Høgskolen i Østfold er et eksempel...", False, "Høgskolen i Østfold er et eksempel...", Typecode.STRING, ], ["新しいテキスト ドキュメント.txt".encode(), True, "新しいテキスト ドキュメント.txt", Typecode.STRING], ], ) def test_normal_strict_map(self, value, is_convert, expected_data, expected_typecode): dp = DataProperty(value, strict_level_map=get_strict_level_map(not is_convert)) assert dp == dp assert dp != DataProperty("test for __ne__") assert dp.data == expected_data assert dp.typecode == expected_typecode @pytest.mark.parametrize( ["value", "strip_str", "is_strict", "expected_data", "expected_typecode"], [ ['"1"', '"', False, 1, Typecode.INTEGER], ['"1"', "", False, '"1"', Typecode.STRING], ['"1"', '"', True, "1", Typecode.STRING], ['"1"', "", False, '"1"', Typecode.STRING], [b"hoge", "a", False, "hoge", Typecode.STRING], ["hogea", b"a", False, "hoge", Typecode.STRING], ], ) def test_normal_strip_str(self, value, strip_str, is_strict, expected_data, expected_typecode): dp = DataProperty( value, preprocessor=Preprocessor(strip_str=strip_str), strict_level_map=get_strict_level_map(is_strict), ) assert dp.data == expected_data assert dp.typecode == expected_typecode @pytest.mark.parametrize( ["value", "type_hint", "strict_level", "expected_typecode"], [ ["2017-01-02 03:04:05", None, StrictLevel.MIN, Typecode.DATETIME], ["2017-01-02 03:04:05", None, StrictLevel.MAX, Typecode.STRING], ["2017-01-02 03:04:05", DateTime, StrictLevel.MIN, Typecode.DATETIME], ["2017-01-02 03:04:05", DateTime, StrictLevel.MAX, Typecode.DATETIME], ["2017-01-02 03:04:05", Integer, StrictLevel.MIN, Typecode.DATETIME], ["2017-01-02 03:04:05", Integer, StrictLevel.MAX, Typecode.STRING], ["1,000,000,000,000", None, StrictLevel.MAX, Typecode.STRING], ["1,000,000,000,000", None, StrictLevel.MIN + 1, Typecode.INTEGER], ["1,000,000,000,000", None, StrictLevel.MIN, Typecode.INTEGER], ["1,000,000,000,000", Integer, StrictLevel.MAX, Typecode.INTEGER], ["1,000,000,000,000", Integer, StrictLevel.MIN, Typecode.INTEGER], [DATATIME_DATA, None, StrictLevel.MIN, Typecode.DATETIME], [DATATIME_DATA, None, StrictLevel.MAX, Typecode.DATETIME], [DATATIME_DATA, String, StrictLevel.MIN, Typecode.STRING], [DATATIME_DATA, String, StrictLevel.MAX, Typecode.STRING], ["100-0002", None, StrictLevel.MIN, Typecode.STRING], ["45e76582", None, StrictLevel.MIN, Typecode.INTEGER], ["45e76582", None, StrictLevel.MAX, Typecode.STRING], ["4.5e-4", None, StrictLevel.MIN, Typecode.INTEGER], ["4.5e-4", None, StrictLevel.MIN + 1, Typecode.REAL_NUMBER], ["4.5e-4", None, StrictLevel.MAX, Typecode.STRING], [1, String, StrictLevel.MAX, Typecode.STRING], [1, String, StrictLevel.MIN, Typecode.STRING], [float("inf"), RealNumber, StrictLevel.MAX, Typecode.INFINITY], [float("inf"), RealNumber, StrictLevel.MIN, Typecode.INFINITY], [1, RealNumber, StrictLevel.MAX, Typecode.INTEGER], [1, RealNumber, StrictLevel.MIN, Typecode.INTEGER], [1.1, Integer, StrictLevel.MAX, Typecode.INTEGER], [1.1, Integer, StrictLevel.MIN, Typecode.INTEGER], ["true", None, StrictLevel.MAX, Typecode.STRING], ["false", None, StrictLevel.MAX, Typecode.STRING], ["true", None, StrictLevel.MIN, Typecode.BOOL], ["false", None, StrictLevel.MIN, Typecode.BOOL], ["true", Bool, StrictLevel.MIN, Typecode.BOOL], ["false", Bool, StrictLevel.MIN, Typecode.BOOL], [b"hoge", None, StrictLevel.MAX, Typecode.STRING], [b"hoge", None, StrictLevel.MIN, Typecode.STRING], ['{"foo": 10}', None, StrictLevel.MAX, Typecode.STRING], ['{"foo": 10}', None, StrictLevel.MIN, Typecode.DICTIONARY], ['{"foo": 10}', Dictionary, StrictLevel.MAX, Typecode.DICTIONARY], ['{"foo": 10}', Dictionary, StrictLevel.MIN, Typecode.DICTIONARY], [{"foo": 10}, None, StrictLevel.MAX, Typecode.DICTIONARY], [{"foo": 10}, None, StrictLevel.MIN, Typecode.LIST], ], ) def test_normal_type_hint(self, value, type_hint, strict_level, expected_typecode): dp = DataProperty(value, type_hint=type_hint, strict_level_map={"default": strict_level}) assert dp.typecode == expected_typecode @pytest.mark.parametrize( ["value", "is_convert", "expected_data", "expected_typecode"], [ [nan, True, nan, Typecode.NAN], [nan, False, nan, Typecode.NAN], ["nan", True, nan, Typecode.NAN], ], ) def test_normal_nan(self, value, is_convert, expected_data, expected_typecode): dp = DataProperty(value, strict_level_map=get_strict_level_map(not is_convert)) assert Nan(dp.data).is_type() assert dp.typecode == expected_typecode class Test_DataPeroperty_to_str: @pytest.mark.parametrize( ["value", "type_hint", "is_strict", "expected_data", "expected_str"], [ [float("inf"), None, True, Decimal("inf"), "Infinity"], [float("inf"), None, False, Decimal("inf"), "Infinity"], [float("inf"), RealNumber, True, Decimal("inf"), "Infinity"], [float("inf"), RealNumber, False, Decimal("inf"), "Infinity"], [float("inf"), String, False, "inf", "inf"], ], ) def test_normal(self, value, type_hint, is_strict, expected_data, expected_str): dp = DataProperty( value, type_hint=type_hint, strict_level_map=get_strict_level_map(is_strict) ) assert dp.data == expected_data assert dp.to_str() == expected_str @pytest.mark.parametrize( ["value", "format_flags", "expected"], [[1234567, Format.THOUSAND_SEPARATOR, "1,234,567"]] ) def test_normal_format_str(self, value, format_flags, expected): dp = DataProperty(value, format_flags=format_flags) assert dp.to_str() == expected class Test_DataPeroperty_set_data: @pytest.mark.parametrize( ["value", "is_convert", "replace_tabs_with_spaces", "tab_length", "expected"], [ ["a\tb", True, True, 2, "a b"], ["\ta\t\tb\tc\t", True, True, 2, " a b c "], ["a\tb", True, True, 4, "a b"], ["a\tb", True, False, 4, "a\tb"], ["a\tb", True, True, None, "a\tb"], ], ) def test_normal_tab(self, value, is_convert, replace_tabs_with_spaces, tab_length, expected): preprocessor = Preprocessor( replace_tabs_with_spaces=replace_tabs_with_spaces, tab_length=tab_length, ) dp = DataProperty( value, preprocessor=preprocessor, strict_level_map=get_strict_level_map(not is_convert) ) assert dp.data == expected class Test_DataPeroperty_is_escape_html_tag: @pytest.mark.parametrize( ["value", "is_escape_html_tag", "expected"], [ [ "test", True, "<a href='https://google.com'>test</a>", ], [ "test", False, "test", ], [True, True, True], ], ) def test_normal_tab(self, value, is_escape_html_tag, expected): dp = DataProperty(value, preprocessor=Preprocessor(is_escape_html_tag=is_escape_html_tag)) assert dp.data == expected class Test_DataPeroperty_float_type: @pytest.mark.parametrize( ["value", "float_type", "expected"], [[1.1, float, 1.1], [1.1, Decimal, Decimal("1.1")]] ) def test_normal_tab(self, value, float_type, expected): dp = DataProperty(value, float_type=float_type) assert isinstance(dp.data, float_type) assert dp.data == expected class Test_DataPeroperty_align: @pytest.mark.parametrize( ["value", "expected"], [ [1, Align.RIGHT], [1.1, Align.RIGHT], ["a", Align.LEFT], [True, Align.LEFT], [DATATIME_DATA, Align.LEFT], [None, Align.LEFT], [inf, Align.LEFT], [nan, Align.LEFT], ], ) def test_normal(self, value, expected): dp = DataProperty(value) assert dp.align == expected class Test_DataPeroperty_len: @pytest.mark.parametrize( ["value", "expected_acw", "expected_len"], [ [1, 1, None], [-1, 2, None], [1.0, 1, None], [-1.0, 2, None], [1.1, 3, None], [-1.1, 4, None], [12.34, 5, None], ["000", 1, None], ["123456789", 9, None], ["-123456789", 10, None], ["45e76582", 8, 8], ["a", 1, 1], ["a" * 1000, 1000, 1000], ["あ", 2, 1], [True, 4, None], [None, 4, None], [inf, 8, None], [nan, 3, None], ], ) def test_normal(self, value, expected_acw, expected_len): dp = DataProperty(value) assert dp.ascii_char_width == expected_acw assert dp.length == expected_len @pytest.mark.parametrize( ["value", "expected_acw", "expected_len"], [ [tcolor("0", color="red"), 1, 10], [tcolor("12.34", color="red"), 5, 14], [tcolor("abc", color="green"), 3, 12], ], ) def test_normal_ascii_escape_sequence(self, value, expected_acw, expected_len): dp = DataProperty(value) assert dp.ascii_char_width == expected_acw assert dp.length == expected_len @pytest.mark.parametrize( ["value", "eaaw", "expected_acw", "expected_len"], [["øø", 1, 2, 2], ["øø", 2, 4, 2]] ) def test_normal_eaaw(self, value, eaaw, expected_acw, expected_len): dp = DataProperty(value, east_asian_ambiguous_width=eaaw) assert dp.ascii_char_width == expected_acw assert dp.length == expected_len @pytest.mark.parametrize(["value", "expected"], [[nan, nan]]) def test_abnormal(self, value, expected): Nan(DataProperty(value).length).is_type() @pytest.mark.parametrize( ["value", "eaaw", "expected"], [["øø", None, ValueError], ["øø", 0, ValueError], ["øø", 3, ValueError]], ) def test_exception_eaaw(self, value, eaaw, expected): with pytest.raises(expected): DataProperty(value, east_asian_ambiguous_width=eaaw).ascii_char_width class Test_DataPeroperty_is_include_ansi_escape: @pytest.mark.parametrize( ["value", "expected_acw"], [ [0, False], [tcolor("0", color="red"), True], [12.34, False], [tcolor("12.34", color="red"), True], ["abc", False], [tcolor("abc", color="green"), True], ], ) def test_normal(self, value, expected_acw): assert DataProperty(value).is_include_ansi_escape == expected_acw class Test_DataPeroperty_line_break_handling: @pytest.mark.parametrize( ["value", "line_break_handling", "expected"], [ ["a\nb", LineBreakHandling.NOP, "a\nb"], ["a\nb", LineBreakHandling.REPLACE, "a b"], ["a\nb", LineBreakHandling.ESCAPE, "a\\nb"], ["a\r\nb", LineBreakHandling.ESCAPE, "a\\r\\nb"], [123, LineBreakHandling.ESCAPE, 123], ], ) def test_normal(self, value, line_break_handling, expected): preprocessor = Preprocessor(line_break_handling=line_break_handling) assert DataProperty(value, preprocessor=preprocessor).data == expected class Test_DataPeroperty_line_break_repl: @pytest.mark.parametrize( ["value", "line_break_handling", "line_break_repl", "expected"], [ ["a\nb", LineBreakHandling.REPLACE, "
", "a
b"], ["a\n\r\n\nb", LineBreakHandling.REPLACE, "
", "a


b"], ["a\nb", LineBreakHandling.NOP, "
", "a\nb"], ], ) def test_normal(self, value, line_break_handling, line_break_repl, expected): preprocessor = Preprocessor( line_break_handling=line_break_handling, line_break_repl=line_break_repl ) assert DataProperty(value, preprocessor=preprocessor).data == expected class Test_DataPeroperty_escape_formula_injection: @pytest.mark.parametrize( ["value", "escape_formula_injection", "expected"], [ ["a+b", True, "a+b"], ["=a+b", True, "'=a+b"], ["=a+b", False, "=a+b"], ["-a+b", True, "'-a+b"], ["-a+b", False, "-a+b"], ["+a+b", True, "'+a+b"], ["+a+b", False, "+a+b"], ["@a+b", True, "'@a+b"], ["@a+b", False, "@a+b"], ], ) def test_normal(self, value, escape_formula_injection, expected): preprocessor = Preprocessor(is_escape_formula_injection=escape_formula_injection) assert DataProperty(value, preprocessor=preprocessor).data == expected @pytest.mark.parametrize( ["value", "expected"], [[0, 0], [None, None]], ) def test_abnormal(self, value, expected): preprocessor = Preprocessor(is_escape_formula_injection=True) assert DataProperty(value, preprocessor=preprocessor).data == expected class Test_DataPeroperty_get_padding_len: @pytest.mark.parametrize( ["value", "ascii_char_width", "expected"], [ [1, 8, 8], ["000", 8, 8], ["a" * 1000, 8, 8], ["あ", 8, 7], ["あ" * 100, 8, 0], ["いろは", 8, 5], [["side", "where"], 20, 20], [["い" * 100, "ろは"], 8, 0], [["い", "ろは"], 20, 17], [{"1", 1}, 20, 20], ], ) def test_normal(self, value, ascii_char_width, expected): assert DataProperty(value).get_padding_len(ascii_char_width) == expected @pytest.mark.parametrize( ["value", "ascii_char_width", "ambiguous_width", "expected"], [ ["aøb", 4, 1, 4], ["aøb", 4, 2, 3], ], ) def test_normal_east_asian_ambiguous_width( self, value, ascii_char_width, ambiguous_width, expected ): dp = DataProperty(value, east_asian_ambiguous_width=ambiguous_width) assert dp.get_padding_len(ascii_char_width) == expected class Test_DataPeroperty_integer_digits: @pytest.mark.parametrize( ["value", "expected"], [ [1, 1], [1.0, 1], [12.34, 2], ], ) def test_normal(self, value, expected): dp = DataProperty(value) assert dp.integer_digits == expected @pytest.mark.parametrize(["value"], [[None], [True], [DATATIME_DATA], ["a"], [inf], [nan]]) def test_abnormal(self, value): dp = DataProperty(value) Nan(dp.integer_digits).is_type() class Test_DataPeroperty_decimal_places: @pytest.mark.parametrize( ["value", "expected"], [ [1, 0], [1.0, 0], [1.1, 1], [12.34, 2], ], ) def test_normal(self, value, expected): dp = DataProperty(value) assert dp.decimal_places == expected @pytest.mark.parametrize(["value"], [[None], [True], [DATATIME_DATA], ["a"], [inf], [nan]]) def test_abnormal(self, value): dp = DataProperty(value) Nan(dp.decimal_places).is_type() class Test_DataPeroperty_additional_format_len: @pytest.mark.parametrize( ["value", "expected"], [ [2147483648, 0], [0, 0], [-1, 1], [-0.01, 1], ["2147483648", 0], ["1", 0], ["-1", 1], ["-0.01", 1], [None, 0], [True, 0], [DATATIME_DATA, 0], ["a", 0], [inf, 0], [nan, 0], ], ) def test_normal(self, value, expected): dp = DataProperty(value) assert dp.additional_format_len == expected class Test_DataPeroperty_repr: @pytest.mark.parametrize( ["value", "strict_level_map", "expected"], [ ["100-0004", MIN_STRICT_LEVEL_MAP, 75], [{"a": 1}, DefaultValue.STRICT_LEVEL_MAP, 75], ["新しいテキスト ドキュメント.txt", DefaultValue.STRICT_LEVEL_MAP, 80], ], ) def test_smoke(self, value, strict_level_map, expected): dp = DataProperty(value, strict_level_map=strict_level_map) assert len(dp.__repr__()) > expected @pytest.mark.parametrize( ["value", "strict_level_map", "expected"], [ [ 0, DefaultValue.STRICT_LEVEL_MAP, "data=0, type=INTEGER, align=right, " "ascii_width=1, int_digits=1, decimal_places=0, extra_len=0", ], [ tcolor("0", color="red"), DefaultValue.STRICT_LEVEL_MAP, ( ("data={}, type=STRING, align=right, ".format(tcolor("0", color="red"))) + "ascii_width=1, length=10, int_digits=1, decimal_places=0, " + "extra_len=0" ), ], [ -1.0, DefaultValue.STRICT_LEVEL_MAP, "data=-1, type=INTEGER, align=right, " "ascii_width=2, int_digits=1, decimal_places=0, extra_len=1", ], [ -1.1, DefaultValue.STRICT_LEVEL_MAP, "data=-1.1, type=REAL_NUMBER, align=right, " "ascii_width=4, int_digits=1, decimal_places=1, extra_len=1", ], [ -12.234, DefaultValue.STRICT_LEVEL_MAP, "data=-12.234, type=REAL_NUMBER, align=right, " "ascii_width=7, int_digits=2, decimal_places=3, extra_len=1", ], [ 0.01, DefaultValue.STRICT_LEVEL_MAP, "data=0.01, type=REAL_NUMBER, align=right, " "ascii_width=4, int_digits=1, decimal_places=2, extra_len=0", ], [ "0.000000000000001", DefaultValue.STRICT_LEVEL_MAP, "data=0.000000000000001, type=REAL_NUMBER, align=right, " "ascii_width=17, int_digits=1, decimal_places=15, extra_len=0", ], [ "abcdefg", DefaultValue.STRICT_LEVEL_MAP, "data=abcdefg, type=STRING, align=left, ascii_width=7, length=7, extra_len=0", ], [ "いろは", DefaultValue.STRICT_LEVEL_MAP, "data=いろは, type=STRING, align=left, ascii_width=6, length=3, extra_len=0", ], [ None, DefaultValue.STRICT_LEVEL_MAP, "data=None, type=NONE, align=left, ascii_width=4, extra_len=0", ], [ True, DefaultValue.STRICT_LEVEL_MAP, "data=True, type=BOOL, align=left, ascii_width=4, extra_len=0", ], [ DATATIME_DATA, DefaultValue.STRICT_LEVEL_MAP, "data=2017-01-02 03:04:05, type=DATETIME, align=left, " "ascii_width=19, extra_len=0", ], [ "2017-01-02 03:04:05", DefaultValue.STRICT_LEVEL_MAP, "data=2017-01-02 03:04:05, type=STRING, align=left, " "ascii_width=19, length=19, extra_len=0", ], [ "2017-01-02 03:04:05+0900", MIN_STRICT_LEVEL_MAP, "data=2017-01-02 03:04:05+09:00, type=DATETIME, align=left, " "ascii_width=24, extra_len=0", ], [ inf, DefaultValue.STRICT_LEVEL_MAP, "data=Infinity, type=INFINITY, align=left, ascii_width=8, extra_len=0", ], [ nan, DefaultValue.STRICT_LEVEL_MAP, "data=NaN, type=NAN, align=left, ascii_width=3, extra_len=0", ], [ ["side", "where"], DefaultValue.STRICT_LEVEL_MAP, "data=['side', 'where'], type=LIST, align=left, " "ascii_width=17, length=2, extra_len=0", ], [ ["い", "ろは"], DefaultValue.STRICT_LEVEL_MAP, "data=['い', 'ろは'], type=LIST, align=left, ascii_width=14, length=2, extra_len=0", ], ], ) def test_normal(self, value, strict_level_map, expected): dp = DataProperty(value, strict_level_map=strict_level_map) print(f"[expected] {expected}") print(f"[actual] {dp}") assert str(dp) == expected thombashi-DataProperty-4654928/test/test_extractor.py000066400000000000000000000752021445476334700227450ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import datetime from decimal import Decimal import pytest from typepy import DateTime, RealNumber, String, Typecode from dataproperty import ( Align, DataPropertyExtractor, Format, LineBreakHandling, MatrixFormatting, Preprocessor, ) from .common import get_strict_level_map DATATIME_DATA = datetime.datetime(2017, 1, 2, 3, 4, 5) nan = float("nan") inf = float("inf") @pytest.fixture def dp_extractor(): return DataPropertyExtractor() def datetime_formatter_test(value): return value.strftime("%Y%m%d %H%M%S") def datetime_formatter_tostr_0(value): return value.strftime("%Y-%m-%d %H:%M:%S%z") def datetime_formatter_tostr_1(value): return value.strftime("%Y/%m/%d %H:%M:%S") def trans_func_1(v): if v is None: return "" if v is False: return "false" if v == 0: return 123 return v def trans_func_2(v): if v == 123: return 321 return v def nop(v): return v class Test_DataPropertyExtractor_to_dp: @pytest.mark.parametrize( ["value", "type_value_map", "is_strict", "expected_value", "expected_typecode"], [ [None, {Typecode.NONE: None}, True, None, Typecode.NONE], [None, {Typecode.NONE: "null"}, False, "null", Typecode.STRING], [None, {Typecode.NONE: ""}, True, "", Typecode.NULL_STRING], [None, {Typecode.NONE: 0}, False, 0, Typecode.INTEGER], [inf, {Typecode.INFINITY: "INF_1"}, False, "INF_1", Typecode.STRING], [inf, {Typecode.INFINITY: "INF_2"}, True, "INF_2", Typecode.STRING], [inf, {Typecode.INFINITY: None}, True, None, Typecode.NONE], ["inf", {Typecode.INFINITY: "INF_3"}, False, "INF_3", Typecode.STRING], ["inf", {Typecode.INFINITY: "INF_4"}, True, "inf", Typecode.STRING], ["inf", {Typecode.INFINITY: inf}, False, Decimal("Infinity"), Typecode.INFINITY], [nan, {Typecode.NAN: "NAN_1"}, False, "NAN_1", Typecode.STRING], [nan, {Typecode.NAN: "NAN_2"}, True, "NAN_2", Typecode.STRING], [nan, {Typecode.NAN: None}, True, None, Typecode.NONE], ["nan", {Typecode.NAN: "NAN_4"}, False, "NAN_4", Typecode.STRING], ["nan", {Typecode.NAN: "NAN_5"}, True, "nan", Typecode.STRING], ], ) def test_normal_type_value_map( self, dp_extractor, value, type_value_map, is_strict, expected_value, expected_typecode ): dp_extractor.type_value_map = type_value_map dp_extractor.strict_level_map = get_strict_level_map(is_strict) dp = dp_extractor.to_dp(value) assert dp.data == expected_value assert dp.typecode == expected_typecode assert isinstance(dp.to_str(), str) @pytest.mark.parametrize( ["value", "datetime_formatter", "datetime_format_str", "is_strict", "expected"], [ [DATATIME_DATA, datetime_formatter_tostr_0, "s", False, "2017-01-02 03:04:05"], ["2017-01-01 00:00:00", datetime_formatter_tostr_1, "s", False, "2017/01/01 00:00:00"], [ "2017-01-01 00:00:00", None, "%Y-%m-%dT%H:%M:%S", False, datetime.datetime(2017, 1, 1, 0, 0, 0), ], ["2017-01-01 00:00:00", None, "s", True, "2017-01-01 00:00:00"], ], ) def test_normal_datetime( self, dp_extractor, value, datetime_formatter, datetime_format_str, is_strict, expected ): dp_extractor.datetime_formatter = datetime_formatter dp_extractor.datetime_format_str = datetime_format_str dp_extractor.strict_level_map = get_strict_level_map(is_strict) dp = dp_extractor.to_dp(value) assert dp.data == expected @pytest.mark.parametrize( ["value", "type_hint", "trans_func", "expected"], [ [1, String, nop, "1"], [0, String, nop, "0"], [None, String, nop, "None"], [0, String, trans_func_1, "123"], [False, String, trans_func_1, "false"], [None, String, trans_func_1, ""], ], ) def test_normal_type_hint(self, dp_extractor, value, type_hint, trans_func, expected): dp_extractor.register_trans_func(trans_func) dp = dp_extractor._DataPropertyExtractor__to_dp(value, type_hint=type_hint) assert dp.data == expected @pytest.mark.parametrize( ["value", "type_hint", "trans_funcs", "expected"], [ [0, String, [trans_func_2, trans_func_1], "321"], [0, String, [trans_func_1, trans_func_2], "123"], ], ) def test_normal_trans_funcs(self, dp_extractor, value, type_hint, trans_funcs, expected): for trans_func in trans_funcs: dp_extractor.register_trans_func(trans_func) dp = dp_extractor._DataPropertyExtractor__to_dp(value, type_hint=type_hint) assert dp.data == expected class Test_DataPropertyExtractor_to_dp_quoting_flags: ALWAYS_QUOTE_FLAG_MAP = { Typecode.NONE: True, Typecode.INTEGER: True, Typecode.REAL_NUMBER: True, Typecode.STRING: True, Typecode.NULL_STRING: True, Typecode.DATETIME: True, Typecode.REAL_NUMBER: True, Typecode.NAN: True, Typecode.BOOL: True, } @pytest.mark.parametrize( ["value", "quoting_flags", "expected"], [ ["string", ALWAYS_QUOTE_FLAG_MAP, '"string"'], ['"string"', ALWAYS_QUOTE_FLAG_MAP, '"string"'], [' "123"', ALWAYS_QUOTE_FLAG_MAP, ' "123"'], ['"string" ', ALWAYS_QUOTE_FLAG_MAP, '"string" '], [' "12 345" ', ALWAYS_QUOTE_FLAG_MAP, ' "12 345" '], ], ) def test_normal_always_quote(self, dp_extractor, value, quoting_flags, expected): dp_extractor.quoting_flags = quoting_flags dp = dp_extractor.to_dp(value) assert dp.data == expected class Test_DataPropertyExtractor_to_dp_matrix: @pytest.mark.parametrize( ["value"], [ [ [ ["山田", "太郎", "2001/1/1", "100-0002", "東京都千代田区皇居外苑", "03-1234-5678"], ["山田", "次郎", "2001/1/2", "251-0036", "神奈川県藤沢市江の島1丁目", "03-9999-9999"], ] ] ], ) def test_smoke(self, dp_extractor, value): assert len(list(dp_extractor.to_dp_matrix(value))) > 0 @pytest.mark.parametrize( ["value", "type_value_map", "datetime_formatter"], [ [ [[None, "1"], [1.1, "a"], [nan, inf], ["false", DATATIME_DATA]], {Typecode.NONE: "null", Typecode.INFINITY: "INFINITY", Typecode.NAN: "NAN"}, datetime_formatter_test, ] ], ) def test_normal(self, dp_extractor, value, type_value_map, datetime_formatter): dp_extractor.type_value_map = type_value_map dp_extractor.datetime_formatter = datetime_formatter dp_matrix = list(dp_extractor.to_dp_matrix(dp_extractor.to_dp_matrix(value))) assert len(dp_matrix) == 4 dp = dp_matrix[0][0] assert dp.data == "null" assert dp.typecode == Typecode.STRING assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.decimal_places is None assert dp.format_str == "{:s}" dp = dp_matrix[0][1] assert dp.data == 1 assert dp.typecode == Typecode.INTEGER assert dp.align.align_code == Align.RIGHT.align_code assert dp.align.align_string == Align.RIGHT.align_string assert dp.decimal_places == 0 assert dp.format_str == "{:d}" dp = dp_matrix[1][0] assert dp.data == Decimal("1.1") assert dp.typecode == Typecode.REAL_NUMBER assert dp.align.align_code == Align.RIGHT.align_code assert dp.align.align_string == Align.RIGHT.align_string assert dp.decimal_places == 1 assert dp.format_str == "{:.1f}" dp = dp_matrix[1][1] assert dp.data == "a" assert dp.typecode == Typecode.STRING assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.decimal_places is None assert dp.format_str == "{:s}" dp = dp_matrix[2][0] assert dp.data == "NAN" assert dp.typecode == Typecode.STRING assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.decimal_places is None assert dp.format_str == "{:s}" dp = dp_matrix[2][1] assert dp.data == "INFINITY" assert dp.typecode == Typecode.STRING assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.decimal_places is None assert dp.format_str == "{:s}" dp = dp_matrix[3][0] assert dp.data == "false" assert dp.typecode == Typecode.STRING assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.decimal_places is None assert dp.format_str == "{:s}" dp = dp_matrix[3][1] assert dp.data == "20170102 030405" assert dp.typecode == Typecode.STRING assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.decimal_places is None assert dp.format_str == "{:s}" @pytest.mark.parametrize(["value", "expected"], [[None, []], [[], []], [(), []]]) def test_empty(self, dp_extractor, value, expected): assert dp_extractor.to_dp_matrix(value) == expected class Test_DataPropertyExtractor_to_dp_list: @pytest.mark.parametrize( ["value", "float_type"], [[[0.1, Decimal("1.1")], float], [[0.1, Decimal("1.1")], Decimal]] ) def test_normal_float(self, dp_extractor, value, float_type): dp_extractor.float_type = float_type dp_list = dp_extractor.to_dp_list(value) for dp in dp_list: assert isinstance(dp.data, float_type) @pytest.mark.parametrize( ["value", "type_hint", "expected_list"], [ [ ["2017-01-02 03:04:05", datetime.datetime(2017, 1, 2, 3, 4, 5)], None, [Typecode.STRING, Typecode.DATETIME], ], [ ["2017-01-02 03:04:05", datetime.datetime(2017, 1, 2, 3, 4, 5)], DateTime, [Typecode.DATETIME, Typecode.DATETIME], ], ], ) def test_normal_type_hint(self, dp_extractor, value, type_hint, expected_list): dp_extractor.default_type_hint = type_hint dp_list = dp_extractor.to_dp_list(value) for dp, expected in zip(dp_list, expected_list): assert dp.typecode == expected @pytest.mark.parametrize( ["value", "strip_str_header", "strip_str_value", "expected"], [ [['"1"', '"-1.1"', '"abc"'], "", '"', [1, Decimal("-1.1"), "abc"]], [['"1"', '"-1.1"', '"abc"'], '"', "", ['"1"', '"-1.1"', '"abc"']], [['"1"', '"-1.1"', '"abc"'], None, None, ['"1"', '"-1.1"', '"abc"']], ], ) def test_normal_strip_str( self, dp_extractor, value, strip_str_header, strip_str_value, expected ): dp_extractor.strip_str_header = strip_str_header dp_extractor.preprocessor = Preprocessor(strip_str=strip_str_value) dp_list = dp_extractor.to_dp_list(value) for dp, expected_value in zip(dp_list, expected): assert dp.data == expected_value dp_matrix = dp_extractor.to_dp_matrix([value]) for dp, expected_value in zip(dp_matrix[0], expected): assert dp.data == expected_value @pytest.mark.parametrize( ["value", "line_break_handling", "expected"], [ [["a\nb", "a\r\nb"], LineBreakHandling.NOP, ["a\nb", "a\r\nb"]], [["a\nb", "a\r\nb"], LineBreakHandling.REPLACE, ["a b", "a b"]], [["a\nb", "a\r\nb"], LineBreakHandling.ESCAPE, ["a\\nb", "a\\r\\nb"]], ], ) def test_normal_line_break_handling(self, dp_extractor, value, line_break_handling, expected): dp_extractor.preprocessor = Preprocessor(line_break_handling=line_break_handling) dp_list = dp_extractor.to_dp_list(value) for dp, value in zip(dp_list, expected): assert dp.data == value @pytest.mark.parametrize( ["value", "line_break_handling", "line_break_repl", "expected"], [ [["a\nb", "a\r\nb"], LineBreakHandling.NOP, "
", ["a\nb", "a\r\nb"]], [ ["a\nb", "a\r\nb", "a\r\n\nb"], LineBreakHandling.REPLACE, "
", ["a
b", "a
b", "a

b"], ], ], ) def test_normal_line_break_repl( self, dp_extractor, value, line_break_handling, line_break_repl, expected ): dp_extractor.preprocessor = Preprocessor( line_break_handling=line_break_handling, line_break_repl=line_break_repl ) dp_list = dp_extractor.to_dp_list(value) for dp, value in zip(dp_list, expected): assert dp.data == value, value @pytest.mark.parametrize( ["value", "escape_formula_injection", "expected"], [ [ ["a+b", "=a+b", "-a+b", "+a+b", "@a+b"], True, ["a+b", "'=a+b", "'-a+b", "'+a+b", "'@a+b"], ], [ ["a+b", "=a+b", "-a+b", "+a+b", "@a+b"], False, ["a+b", "=a+b", "-a+b", "+a+b", "@a+b"], ], ], ) def test_normal_escape_formula_injection( self, dp_extractor, value, escape_formula_injection, expected ): dp_extractor.preprocessor = Preprocessor( is_escape_formula_injection=escape_formula_injection ) dp_list = dp_extractor.to_dp_list(value) for dp, value in zip(dp_list, expected): assert dp.data == value, value @pytest.mark.parametrize( ["value", "expected"], [[[0, None], [0, None]]], ) def test_exception_escape_formula_injection(self, dp_extractor, value, expected): dp_extractor.preprocessor = Preprocessor(is_escape_formula_injection=True) dp_list = dp_extractor.to_dp_list(value) for dp, value in zip(dp_list, expected): assert dp.data == value, value class Test_DataPropertyExtractor_to_column_dp_list: TEST_DATA_MATRIX = [ [1, 1.1, "aa", 1, 1, True, inf, nan, datetime.datetime(2017, 1, 1, 0, 0, 0)], [2, 2.2, "bbb", 2.2, 2.2, False, "inf", "nan", "2017-01-01T01:23:45+0900"], [3, 3.33, "cccc", -3, "ccc", True, "infinity", "NAN", "2017-11-01 01:23:45+0900"], ] TEST_DATA_MATRIX_TUPLE = ( (1, 1.1, "aa", 1, 1, True, inf, nan, datetime.datetime(2017, 1, 1, 0, 0, 0)), (2, 2.2, "bbb", 2.2, 2.2, False, "inf", "nan", "2017-01-01T01:23:45+0900"), (3, 3.33, "cccc", -3, "ccc", True, "infinity", "NAN", "2017-11-01 01:23:45+0900"), ) @pytest.mark.parametrize( ["max_workers", "headers", "value"], [ [1, ["i", "f", "s", "if", "mix", "bool", "inf", "nan", "time"], TEST_DATA_MATRIX], [4, ["i", "f", "s", "if", "mix", "bool", "inf", "nan", "time"], TEST_DATA_MATRIX], [None, None, TEST_DATA_MATRIX], [None, [], TEST_DATA_MATRIX], [ None, ("i", "f", "s", "if", "mix", "bool", "inf", "nan", "time"), TEST_DATA_MATRIX_TUPLE, ], ], ) def test_normal_default(self, dp_extractor, max_workers, headers, value): dp_extractor.max_workers = max_workers dp_extractor.headers = headers col_dp_list = dp_extractor.to_column_dp_list(dp_extractor.to_dp_matrix(value)) assert len(col_dp_list) == 9 col_idx = 0 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.INTEGER assert dp.align.align_code == Align.RIGHT.align_code assert dp.align.align_string == Align.RIGHT.align_string assert dp.ascii_char_width == 1 assert dp.decimal_places == 0 assert dp.format_str == "{:d}" assert str(dp) == ( "column=0, type=INTEGER, align=right, " "ascii_width=1, bit_len=2, int_digits=1, decimal_places=0" ) col_idx += 1 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.REAL_NUMBER assert dp.align.align_code == Align.RIGHT.align_code assert dp.align.align_string == Align.RIGHT.align_string assert dp.ascii_char_width == 4 assert dp.decimal_places == 2 assert dp.format_str == "{:.2f}" col_idx += 1 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.STRING assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.ascii_char_width == 4 assert dp.decimal_places is None assert dp.format_str == "{:s}" col_idx += 1 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.REAL_NUMBER assert dp.align.align_code == Align.RIGHT.align_code assert dp.align.align_string == Align.RIGHT.align_string assert dp.ascii_char_width == 4 assert dp.decimal_places == 1 assert dp.format_str == "{:.1f}" col_idx += 1 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.STRING assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.ascii_char_width == 3 assert dp.decimal_places == 1 assert dp.format_str == "{:s}" col_idx += 1 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.BOOL assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.ascii_char_width == 5 assert dp.decimal_places is None assert dp.format_str == "{}" col_idx += 1 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.INFINITY assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.ascii_char_width == 8 assert dp.decimal_places is None assert dp.format_str == "{:f}" col_idx += 1 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.NAN assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.ascii_char_width == 3 assert dp.decimal_places is None assert dp.format_str == "{:f}" col_idx += 1 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.STRING assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.ascii_char_width == 24 assert dp.decimal_places is None assert dp.format_str == "{:s}" @pytest.mark.parametrize( ["headers", "value"], [ [ ["i", "f"], [ [1234, 1234.5], [1234567, 34.5], ], ], [ [], [ [1234, 1234.5], [1234567, 34.5], ], ], ], ) def test_normal_format_str(self, dp_extractor, headers, value): dp_extractor.format_flags_list = [Format.THOUSAND_SEPARATOR, Format.THOUSAND_SEPARATOR] dp_extractor.max_workers = 1 dp_extractor.headers = headers col_dp_list = dp_extractor.to_column_dp_list(dp_extractor.to_dp_matrix(value)) assert len(col_dp_list) == 2 col_idx = 0 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.INTEGER assert dp.format_str == "{:,d}" assert dp.ascii_char_width == 9 col_idx += 1 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.REAL_NUMBER assert dp.format_str == "{:,.1f}" assert dp.ascii_char_width == 7 @pytest.mark.parametrize( ["headers", "value"], [ [["i", "f", "s", "if", "mix", "bool", "inf", "nan", "time"], TEST_DATA_MATRIX], [None, TEST_DATA_MATRIX], [[], TEST_DATA_MATRIX], ], ) def test_normal_not_strict(self, dp_extractor, headers, value): dp_extractor.headers = headers col_dp_list = dp_extractor.to_column_dp_list(dp_extractor.to_dp_matrix(value)) assert len(col_dp_list) == 9 dp = col_dp_list[0] assert dp.typecode == Typecode.INTEGER assert dp.align.align_code == Align.RIGHT.align_code assert dp.align.align_string == Align.RIGHT.align_string assert dp.ascii_char_width == 1 assert dp.decimal_places == 0 assert dp.format_str == "{:d}" dp = col_dp_list[1] assert dp.typecode == Typecode.REAL_NUMBER assert dp.align.align_code == Align.RIGHT.align_code assert dp.align.align_string == Align.RIGHT.align_string assert dp.ascii_char_width == 4 assert dp.decimal_places == 2 assert dp.format_str == "{:.2f}" def test_normal_column_type_hints(self, dp_extractor): data_matrix = [ [1, "1.1", 1, "2017-01-02 03:04:05"], [2, "2.2", 0.1, "2017-01-02 03:04:05"], ] dp_extractor.headers = ["none", "to_float", "to_str", "to_datetime"] dp_extractor.column_type_hints = [None, RealNumber, String, DateTime] assert dp_extractor.column_type_hints == [None, RealNumber, String, DateTime] col_dp_list = dp_extractor.to_column_dp_list(dp_extractor.to_dp_matrix(data_matrix)) assert len(col_dp_list) == 4 assert col_dp_list[0].typecode == Typecode.INTEGER assert col_dp_list[1].typecode == Typecode.REAL_NUMBER assert col_dp_list[2].typecode == Typecode.STRING assert col_dp_list[3].typecode == Typecode.DATETIME dp_extractor.column_type_hints = ["", "float", "str", "datetime"] assert dp_extractor.column_type_hints == [None, RealNumber, String, DateTime] col_dp_list = dp_extractor.to_column_dp_list(dp_extractor.to_dp_matrix(data_matrix)) assert len(col_dp_list) == 4 assert col_dp_list[0].typecode == Typecode.INTEGER assert col_dp_list[1].typecode == Typecode.REAL_NUMBER assert col_dp_list[2].typecode == Typecode.STRING assert col_dp_list[3].typecode == Typecode.DATETIME def test_normal_max_precision(self): extractor = DataPropertyExtractor(max_precision=3) extractor.headers = ["i", "f"] value = [ [1234, 0.0000000001], [1234567, 34.5], ] col_dp_list = extractor.to_column_dp_list(extractor.to_dp_matrix(value)) assert len(col_dp_list) == 2 col_idx = 0 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.INTEGER assert dp.decimal_places == 0 col_idx += 1 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.REAL_NUMBER assert dp.decimal_places == 3 # test setter extractor.max_precision = 1 col_dp_list = extractor.to_column_dp_list(extractor.to_dp_matrix(value)) assert len(col_dp_list) == 2 col_idx = 0 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.INTEGER assert dp.decimal_places == 0 col_idx += 1 dp = col_dp_list[col_idx] assert dp.column_index == col_idx assert dp.typecode == Typecode.REAL_NUMBER assert dp.decimal_places == 1 def test_normal_nan_inf(self, dp_extractor): dp_extractor.headers = ["n", "i"] col_dp_list = dp_extractor.to_column_dp_list( dp_extractor.to_dp_matrix([[nan, inf], ["nan", "inf"]]) ) assert len(col_dp_list) == 2 dp = col_dp_list[0] assert dp.typecode == Typecode.NAN assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.ascii_char_width == 3 assert dp.decimal_places is None dp = col_dp_list[1] assert dp.typecode == Typecode.INFINITY assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.ascii_char_width == 8 assert dp.decimal_places is None @pytest.mark.parametrize(["ambiguous_width"], [[2], [1]]) def test_normal_east_asian_ambiguous_width(self, dp_extractor, ambiguous_width): dp_extractor.headers = ["ascii", "eaa"] dp_extractor.east_asian_ambiguous_width = ambiguous_width col_dp_list = dp_extractor.to_column_dp_list( dp_extractor.to_dp_matrix([["abcdefg", "Øαββ"], ["abcdefghij", "ØØ"]]) ) assert len(col_dp_list) == 2 dp = col_dp_list[0] assert dp.typecode == Typecode.STRING assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.ascii_char_width == 10 assert dp.decimal_places is None dp = col_dp_list[1] assert dp.typecode == Typecode.STRING assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.ascii_char_width == 4 * ambiguous_width assert dp.decimal_places is None def test_normal_empty_value(self, dp_extractor): dp_extractor.headers = ["a", "22", "cccc"] col_dp_list = dp_extractor.to_column_dp_list(dp_extractor.to_dp_matrix(None)) dp = col_dp_list[0] assert dp.typecode == Typecode.NONE assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.ascii_char_width == 1 assert dp.decimal_places is None assert dp.format_str == "{}" dp = col_dp_list[1] assert dp.typecode == Typecode.NONE assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.ascii_char_width == 2 assert dp.decimal_places is None assert dp.format_str == "{}" dp = col_dp_list[2] assert dp.typecode == Typecode.NONE assert dp.align.align_code == Align.LEFT.align_code assert dp.align.align_string == Align.LEFT.align_string assert dp.ascii_char_width == 4 assert dp.decimal_places is None assert dp.format_str == "{}" class Test_DataPropertyExtractor_matrix_formatting: TEST_DATA_MATRIX_NORMAL_COL3 = [["a", 0, "aa"], ["b", 1, "bb"], ["c", 2, "ccc"]] TEST_DATA_MATRIX_NOUNIFORM_COL1 = [["a", 0], ["b", 1, "bb"], ["c", 2, "ccc", 0.1], ["d"]] @pytest.mark.parametrize( ["headers", "value", "matrix_formatting", "expected"], [ [None, TEST_DATA_MATRIX_NOUNIFORM_COL1, MatrixFormatting.TRIM, 1], [["a", "b"], TEST_DATA_MATRIX_NORMAL_COL3, MatrixFormatting.TRIM, 2], [None, TEST_DATA_MATRIX_NOUNIFORM_COL1, MatrixFormatting.FILL_NONE, 4], [["a", "b", "c"], TEST_DATA_MATRIX_NORMAL_COL3, MatrixFormatting.FILL_NONE, 3], [["a", "b", "c"], TEST_DATA_MATRIX_NOUNIFORM_COL1, MatrixFormatting.HEADER_ALIGNED, 3], [ ["a", "b", "c", "d", "e"], TEST_DATA_MATRIX_NOUNIFORM_COL1, MatrixFormatting.HEADER_ALIGNED, 5, ], ], ) def test_normal_matrix_formatting( self, dp_extractor, headers, value, matrix_formatting, expected ): dp_extractor.headers = headers dp_extractor.matrix_formatting = matrix_formatting col_dp_list = dp_extractor.to_column_dp_list(dp_extractor.to_dp_matrix(value)) assert len(col_dp_list) == expected @pytest.mark.parametrize( ["headers", "value", "matrix_formatting", "expected"], [ [ ["i", "f", "s", "if", "mix"], TEST_DATA_MATRIX_NOUNIFORM_COL1, MatrixFormatting.EXCEPTION, ValueError, ] ], ) def test_exception_matrix_formatting( self, dp_extractor, headers, value, matrix_formatting, expected ): dp_extractor.headers = headers dp_extractor.matrix_formatting = matrix_formatting with pytest.raises(expected): dp_extractor.to_column_dp_list(dp_extractor.to_dp_matrix(value)) class Test_DataPropertyExtractor_update_preprocessor: def test_normal(self, dp_extractor): assert dp_extractor.preprocessor.strip_str is None assert dp_extractor.preprocessor.replace_tabs_with_spaces is True assert dp_extractor.preprocessor.tab_length == 2 assert dp_extractor.preprocessor.line_break_handling is LineBreakHandling.NOP assert dp_extractor.preprocessor.line_break_repl == " " assert dp_extractor.preprocessor.is_escape_html_tag is False assert dp_extractor.preprocessor.is_escape_formula_injection is False dp_extractor.update_preprocessor( strip_str='"', replace_tabs_with_spaces=False, tab_length=4, line_break_handling=LineBreakHandling.REPLACE, line_break_repl="
", is_escape_html_tag=True, is_escape_formula_injection=True, ) assert dp_extractor.preprocessor.strip_str == '"' assert dp_extractor.preprocessor.replace_tabs_with_spaces is False assert dp_extractor.preprocessor.tab_length == 4 assert dp_extractor.preprocessor.line_break_handling is LineBreakHandling.REPLACE assert dp_extractor.preprocessor.line_break_repl == "
" assert dp_extractor.preprocessor.is_escape_html_tag is True assert dp_extractor.preprocessor.is_escape_formula_injection is True thombashi-DataProperty-4654928/test/test_formatter.py000066400000000000000000000052471445476334700227370ustar00rootroot00000000000000import pytest from typepy import Typecode from dataproperty import Format from dataproperty._common import DefaultValue from dataproperty._formatter import Formatter dt_format = DefaultValue.DATETIME_FORMAT class TestFormatter_make_format_str: @pytest.mark.parametrize( ["format_flags", "datetime_format_str", "decimal_places", "typecode", "expected"], [ [None, dt_format, None, Typecode.STRING, "{:s}"], [Format.THOUSAND_SEPARATOR, dt_format, None, Typecode.STRING, "{:s}"], [None, dt_format, None, Typecode.INTEGER, "{:d}"], [Format.THOUSAND_SEPARATOR, dt_format, None, Typecode.INTEGER, "{:,d}"], [None, dt_format, 2, Typecode.INTEGER, "{:d}"], [None, dt_format, None, Typecode.REAL_NUMBER, "{:f}"], [Format.THOUSAND_SEPARATOR, dt_format, None, Typecode.REAL_NUMBER, "{:,f}"], [Format.THOUSAND_SEPARATOR, dt_format, 2, Typecode.REAL_NUMBER, "{:,.2f}"], [None, dt_format, 2, Typecode.REAL_NUMBER, "{:.2f}"], [None, dt_format, None, Typecode.INFINITY, "{:f}"], [None, dt_format, None, Typecode.NAN, "{:f}"], [None, dt_format, None, Typecode.DATETIME, "{:%Y-%m-%dT%H:%M:%S%z}"], [None, "%Y-%m-%d", None, Typecode.DATETIME, "{:%Y-%m-%d}"], [None, None, None, Typecode.NONE, "{}"], [None, None, None, Typecode.IP_ADDRESS, "{}"], [None, None, None, Typecode.BOOL, "{}"], [None, None, None, Typecode.DICTIONARY, "{}"], [None, None, None, Typecode.LIST, "{}"], ], ) def test_normal(self, format_flags, datetime_format_str, decimal_places, typecode, expected): formatter = Formatter(format_flags=format_flags, datetime_format_str=datetime_format_str) assert formatter.make_format_str(typecode, decimal_places) == expected class TestFormatter_make_format_map: @pytest.mark.parametrize( ["format_flags", "datetime_format_str", "decimal_places", "expected"], [[None, dt_format, "", {}]], ) def test_normal(self, format_flags, datetime_format_str, decimal_places, expected): formatter = Formatter(format_flags=format_flags, datetime_format_str=datetime_format_str) assert formatter.make_format_map(decimal_places) == { Typecode.INTEGER: "{:d}", Typecode.REAL_NUMBER: "{:f}", Typecode.INFINITY: "{:f}", Typecode.NAN: "{:f}", Typecode.DATETIME: "{:%Y-%m-%dT%H:%M:%S%z}", Typecode.NONE: "{}", Typecode.IP_ADDRESS: "{}", Typecode.BOOL: "{}", Typecode.DICTIONARY: "{}", Typecode.LIST: "{}", } thombashi-DataProperty-4654928/test/test_function.py000066400000000000000000000113061445476334700225520ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import pytest from dataproperty import get_integer_digit, get_number_of_digit nan = float("nan") inf = float("inf") class Test_get_integer_digit: @pytest.mark.parametrize( ["value", "expected"], [ [0, 1], [-0, 1], [0.99, 1], [-0.99, 1], [".99", 1], ["-.99", 1], [1.01, 1], [-1.01, 1], [9.99, 1], [-9.99, 1], ["9.99", 1], ["-9.99", 1], ["0", 1], ["-0", 1], [10, 2], [-10, 2], [99.99, 2], [-99.99, 2], ["10", 2], ["-10", 2], ["99.99", 2], ["-99.99", 2], [100, 3], [-100, 3], [999.99, 3], [-999.99, 3], ["100", 3], ["-100", 3], ["999.99", 3], ["-999.99", 3], [10000000000000000000, 20], [-10000000000000000000, 20], # float not enough precision [10000000000000000000.99, 20], [-10000000000000000000.99, 20], ["10000000000000000000", 20], ["-10000000000000000000", 20], ["99999999999999099999.99", 20], ["-99999999999999099999.99", 20], ], ) def test_normal(self, value, expected): assert get_integer_digit(value) == expected @pytest.mark.parametrize( ["value", "expected"], [ [999999999999999999999999999999.9999999999, 31], [-999999999999999999999999999999.9999999999, 31], ["999999999999999999999999999999.9999999999", 30], ["-999999999999999999999999999999.9999999999", 30], ], ) def test_abnormal(self, value, expected): assert get_integer_digit(value) == expected @pytest.mark.parametrize( ["value", "exception"], [ [True, ValueError], [False, ValueError], [None, ValueError], ["test", ValueError], ["a", ValueError], ["0xff", ValueError], [nan, ValueError], [inf, ValueError], ], ) def test_exception(self, value, exception): with pytest.raises(exception): get_integer_digit(value) class Test_get_number_of_digit: @pytest.mark.parametrize( ["value", "expected"], [ [0, (1, 0)], [-0, (1, 0)], ["0", (1, 0)], ["-0", (1, 0)], [10, (2, 0)], [-10, (2, 0)], ["10", (2, 0)], ["-10", (2, 0)], [10.1, (2, 1)], [-10.1, (2, 1)], ["10.1", (2, 1)], ["-10.1", (2, 1)], [10.01, (2, 2)], [-10.01, (2, 2)], [10.001, (2, 3)], [-10.001, (2, 3)], [100.1, (3, 1)], [-100.1, (3, 1)], [100.01, (3, 2)], [-100.01, (3, 2)], [0.1, (1, 1)], [-0.1, (1, 1)], ["0.1", (1, 1)], ["-0.1", (1, 1)], [0.99, (1, 2)], [-0.99, (1, 2)], [".99", (1, 2)], ["-.99", (1, 2)], [0.01, (1, 2)], [-0.01, (1, 2)], ["0.01", (1, 2)], ["-0.01", (1, 2)], [0.001, (1, 3)], [-0.001, (1, 3)], ["0.001", (1, 3)], ["-0.001", (1, 3)], [0.0001, (1, 4)], [-0.0001, (1, 4)], ["0.0001", (1, 4)], ["-0.0001", (1, 4)], [0.00001, (1, 5)], [-0.00001, (1, 5)], ["0.00001", (1, 5)], ["-0.00001", (1, 5)], [2e-05, (1, 5)], [-2e-05, (1, 5)], ["2e-05", (1, 5)], ["-2e-05", (1, 5)], ["0.000000000000001", (1, 15)], ["1e+15", (16, 0)], ], ) def test_normal(self, value, expected): assert get_number_of_digit(value) == expected @pytest.mark.parametrize( ["value", "max_decimal_places", "expected"], [ [0, 5, (1, 0)], ["0.000000000000001", 5, (1, 5)], ], ) def test_normal_max_decimal_places(self, value, max_decimal_places, expected): assert get_number_of_digit(value, max_decimal_places=max_decimal_places) == expected @pytest.mark.parametrize( ["value"], [[None], [True], [inf], [nan], ["0xff"], ["test"], ["いろは".encode()]] ) def test_nan(self, value): integer_digits, decimal_places = get_number_of_digit(value) assert integer_digits is None assert decimal_places is None thombashi-DataProperty-4654928/test/test_logger.py000066400000000000000000000010571445476334700222060ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import pytest from dataproperty import set_logger from dataproperty.logger._null_logger import NullLogger class Test_set_logger: @pytest.mark.parametrize(["value"], [[True], [False]]) def test_smoke(self, value): set_logger(value) class Test_NullLogger: @pytest.mark.parametrize(["value"], [[True], [False]]) def test_smoke(self, value, monkeypatch): monkeypatch.setattr("dataproperty.logger._logger.logger", NullLogger()) set_logger(value) thombashi-DataProperty-4654928/test/test_preprocessor.py000066400000000000000000000050151445476334700234530ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import pytest from dataproperty import LineBreakHandling, Preprocessor class Test_Preprocessor_update: def test_normal(self): preprocessor = Preprocessor() assert preprocessor.strip_str is None assert preprocessor.replace_tabs_with_spaces is True assert preprocessor.tab_length == 2 assert preprocessor.line_break_handling is LineBreakHandling.NOP assert preprocessor.line_break_repl == " " assert preprocessor.dequote is False assert preprocessor.is_escape_html_tag is False assert preprocessor.is_escape_formula_injection is False assert preprocessor.update( strip_str='"', replace_tabs_with_spaces=False, tab_length=4, line_break_handling=LineBreakHandling.REPLACE, line_break_repl="
", dequote=True, is_escape_html_tag=True, is_escape_formula_injection=True, ) assert preprocessor.strip_str == '"' assert preprocessor.replace_tabs_with_spaces is False assert preprocessor.tab_length == 4 assert preprocessor.line_break_handling is LineBreakHandling.REPLACE assert preprocessor.line_break_repl == "
" assert preprocessor.dequote is True assert preprocessor.is_escape_html_tag is True assert preprocessor.is_escape_formula_injection is True assert not preprocessor.update(strip_str='"') assert preprocessor.update(strip_str="") class Test_Preprocessor_preprocess: @pytest.mark.parametrize( ["value", "expected"], [ ['abc "efg"', 'abc "efg"'], ['"abc efg"', "abc efg"], ["'abc efg'", "abc efg"], ['"abc" "efg"', '"abc" "efg"'], ["'abc' 'efg'", "'abc' 'efg'"], ["\"abc 'efg'\"", "abc 'efg'"], ], ) def test_normal_dequote(self, value, expected): preprocessor = Preprocessor( dequote=True, ) data, no_ansi_escape_data = preprocessor.preprocess(value) assert data == expected class Test_Preprocessor_preprocess_string: @pytest.mark.parametrize( ["value", "expected"], [ [{"1": 1}, {"1": 1}], [{"1"}, {"1"}], ], ) def test_not_str(self, value, expected): preprocessor = Preprocessor(dequote=True) data, _ = preprocessor._Preprocessor__preprocess_string(value) assert data == expected thombashi-DataProperty-4654928/test/test_str_function.py000066400000000000000000000022121445476334700234360ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import itertools import pytest from dataproperty import calc_ascii_char_width nan = float("nan") inf = float("inf") class Test_calc_ascii_char_width: @pytest.mark.parametrize( ["value", "expected"], [["吾輩は猫である", 14], ["いaろbはc", 9], ["abcdef", 6], ["", 0]] ) def test_normal(self, value, expected): assert calc_ascii_char_width(value) == expected @pytest.mark.parametrize( ["value", "ambiguous_width"], itertools.product(["Ø", "α", "β", "γ", "θ", "κ", "λ", "π", "ǎ"], [1, 2]), ) def test_normal_east_asian_ambiguous(self, value, ambiguous_width): assert calc_ascii_char_width(value, ambiguous_width) == ambiguous_width @pytest.mark.parametrize( ["value", "expected"], [ [b"abcdef", TypeError], [None, TypeError], [True, TypeError], [1, TypeError], [nan, TypeError], ], ) def test_exception(self, value, expected): with pytest.raises(expected): calc_ascii_char_width(value) thombashi-DataProperty-4654928/test/test_typing.py000066400000000000000000000024401445476334700222360ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import pytest from dataproperty.typing import ( Bool, DateTime, Dictionary, Infinity, Integer, IpAddress, List, Nan, NoneType, NullString, RealNumber, String, normalize_type_hint, ) class Test_normalize_type_hint: @pytest.mark.parametrize( ["value", "expected"], [ ["bool", Bool], ["datetime", DateTime], ["dict", Dictionary], ["dictionary", Dictionary], ["inf", Infinity], ["infinity", Infinity], ["int", Integer], ["int ", Integer], ["int_", Integer], ["integer", Integer], ["ip", IpAddress], ["ipaddr", IpAddress], ["ipaddress", IpAddress], ["list", List], ["nan", Nan], ["none", NoneType], ["nullstr", NullString], ["nullstring", NullString], ["float", RealNumber], ["realnumber", RealNumber], ["str", String], ["string", String], ["", None], [None, None], ], ) def test_normal(self, value, expected): assert normalize_type_hint(value) == expected thombashi-DataProperty-4654928/tox.ini000066400000000000000000000016741445476334700176570ustar00rootroot00000000000000[tox] envlist = py{37,38,39,310,311} pypy3 build cov fmt lint [testenv] passenv = * extras = test commands = pytest {posargs} [testenv:build] deps = build>=0.10 twine wheel commands = python -m build twine check dist/*.whl dist/*.tar.gz [testenv:clean] skip_install = true deps = cleanpy>=0.4 commands = cleanpy --all --exclude-envs . [testenv:cov] extras = test deps = coverage[toml] commands = coverage run -m pytest {posargs:-vv} coverage report -m [testenv:fmt] skip_install = true deps = autoflake>=2 black[jupyter]>=23.1 isort>=5 commands = autoflake --in-place --recursive --remove-all-unused-imports . isort . black setup.py examples test dataproperty [testenv:lint] skip_install = true deps = codespell>=2 mypy>=1 pylama>=8.4.1 commands = -codespell -q2 dataproperty examples test README.rst mypy dataproperty pylama