pax_global_header00006660000000000000000000000064145013256010014507gustar00rootroot0000000000000052 comment=dfe60399ac8561f30efe5cb0fd00897186083adc thombashi-tabledata-bc6359d/000077500000000000000000000000001450132560100160255ustar00rootroot00000000000000thombashi-tabledata-bc6359d/.github/000077500000000000000000000000001450132560100173655ustar00rootroot00000000000000thombashi-tabledata-bc6359d/.github/workflows/000077500000000000000000000000001450132560100214225ustar00rootroot00000000000000thombashi-tabledata-bc6359d/.github/workflows/ci.yml000066400000000000000000000041551450132560100225450ustar00rootroot00000000000000name: CI on: [push, pull_request] jobs: build-package: runs-on: ubuntu-latest concurrency: group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref_name }}-build cancel-in-progress: true timeout-minutes: 20 container: image: ghcr.io/thombashi/python-ci:3.11 steps: - uses: actions/checkout@v4 - run: make build build-docs: runs-on: ubuntu-latest concurrency: group: ${{ github.event_name }}-${{ github.workflow }}-${{ github.ref_name }}-docs cancel-in-progress: true timeout-minutes: 20 container: image: ghcr.io/thombashi/python-ci:3.11 steps: - uses: actions/checkout@v4 - run: make docs unit-test: runs-on: ${{ matrix.os }} concurrency: group: ${{ github.event_name }}-${{ github.workflow }}-${{ matrix.os }}-${{ matrix.python-version }} cancel-in-progress: true strategy: fail-fast: false matrix: python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12-dev", "pypy-3.10"] os: [ubuntu-latest, macos-latest, windows-latest] timeout-minutes: 20 steps: - uses: actions/checkout@v4 - name: Setup Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} cache: pip cache-dependency-path: | setup.py **/requirements.txt tox.ini - name: Install pip run: python -m pip install --upgrade --disable-pip-version-check 'pip>=21.1' - name: Install dependencies run: python -m pip install --upgrade --disable-pip-version-check tox - name: Run tests run: tox -e cov env: PYTEST_DISCORD_WEBHOOK: ${{ secrets.PYTEST_DISCORD_WEBHOOK }} - name: Upload coverage report if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.8' run: | python -m pip install --upgrade --disable-pip-version-check coveralls tomli coveralls --service=github env: GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} thombashi-tabledata-bc6359d/.gitignore000066400000000000000000000025701450132560100200210ustar00rootroot00000000000000# Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] *$py.class # C extensions *.so # Distribution / packaging .Python build/ develop-eggs/ dist/ downloads/ eggs/ .eggs/ lib/ lib64/ parts/ sdist/ var/ wheels/ pip-wheel-metadata/ share/python-wheels/ *.egg-info/ .installed.cfg *.egg MANIFEST # PyInstaller # Usually these files are written by a python script from a template # before PyInstaller builds the exe, so as to inject date/other infos into it. *.manifest *.spec # Installer logs pip-log.txt pip-delete-this-directory.txt # Unit test / coverage reports htmlcov/ .tox/ .nox/ .coverage .coverage.* .cache nosetests.xml coverage.xml *.cover .hypothesis/ .pytest_cache/ # Translations *.mo *.pot # Django stuff: *.log local_settings.py db.sqlite3 # Flask stuff: instance/ .webassets-cache # Scrapy stuff: .scrapy # Sphinx documentation docs/_build/ # PyBuilder target/ # Jupyter Notebook .ipynb_checkpoints # IPython profile_default/ ipython_config.py # pyenv .python-version # celery beat schedule file celerybeat-schedule # SageMath parsed files *.sage.py # Environments .env .venv env/ venv/ ENV/ env.bak/ venv.bak/ # Spyder project settings .spyderproject .spyproject # Rope project settings .ropeproject # mkdocs documentation /site # mypy .mypy_cache/ .dmypy.json dmypy.json # Pyre type checker .pyre/ # User settings _sandbox/ *_profile Untitled.ipynb thombashi-tabledata-bc6359d/LICENSE000066400000000000000000000020621450132560100170320ustar00rootroot00000000000000MIT License Copyright (c) 2017 Tsuyoshi Hombashi Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. thombashi-tabledata-bc6359d/MANIFEST.in000066400000000000000000000003621450132560100175640ustar00rootroot00000000000000include docs/pages/introduction/summary.txt include LICENSE include README.rst include setup.cfg include tox.ini include */py.typed recursive-include requirements * recursive-include test * global-exclude __pycache__/* global-exclude *.pyc thombashi-tabledata-bc6359d/Makefile000066400000000000000000000022101450132560100174600ustar00rootroot00000000000000AUTHOR := thombashi PACKAGE := tabledata BUILD_WORK_DIR := _work DOCS_DIR := docs DOCS_BUILD_DIR := $(DOCS_DIR)/_build PKG_BUILD_DIR := $(BUILD_WORK_DIR)/$(PACKAGE) PYTHON := python3 .PHONY: build-remote build-remote: clean @mkdir -p $(BUILD_WORK_DIR) @cd $(BUILD_WORK_DIR) && \ git clone https://github.com/$(AUTHOR)/$(PACKAGE).git --depth 1 && \ cd $(PACKAGE) && \ $(PYTHON) -m tox -e build ls -lh $(PKG_BUILD_DIR)/dist/* .PHONY: build build: clean @$(PYTHON) -m tox -e build ls -lh dist/* .PHONY: check check: @$(PYTHON) -m tox -e lint .PHONY: clean clean: @rm -rf $(BUILD_WORK_DIR) @$(PYTHON) -m tox -e clean .PHONY: idocs idocs: @$(PYTHON) -m pip install -q --disable-pip-version-check --upgrade . @$(MAKE) docs .PHONY: docs docs: @$(PYTHON) -m tox -e docs .PHONY: fmt fmt: @$(PYTHON) -m tox -e fmt .PHONY: readme readme: @$(PYTHON) -m tox -e readme .PHONY: release release: @cd $(PKG_BUILD_DIR) && $(PYTHON) setup.py release --sign --verbose --search-dir $(PACKAGE) @$(MAKE) clean .PHONY: setup setup: @$(PYTHON) -m pip install -q --disable-pip-version-check --upgrade -e .[test] releasecmd tox @$(PYTHON) -m pip check thombashi-tabledata-bc6359d/README.rst000066400000000000000000000034671450132560100175260ustar00rootroot00000000000000.. contents:: **tabledata** :backlinks: top :depth: 2 Summary --------- `tabledata `__ is a Python library to represent tabular data. Used for pytablewriter/pytablereader/SimpleSQLite/etc. .. image:: https://badge.fury.io/py/tabledata.svg :target: https://badge.fury.io/py/tabledata :alt: PyPI package version .. image:: https://img.shields.io/pypi/pyversions/tabledata.svg :target: https://pypi.org/project/tabledata :alt: Supported Python versions .. image:: https://img.shields.io/pypi/implementation/tabledata.svg :target: https://pypi.org/project/tabledata :alt: Supported Python implementations .. image:: https://github.com/thombashi/tabledata/actions/workflows/ci.yml/badge.svg :target: https://github.com/thombashi/tabledata/actions/workflows/ci.yml :alt: Linux/macOS/Windows CI status .. image:: https://coveralls.io/repos/github/thombashi/tabledata/badge.svg?branch=master :target: https://coveralls.io/github/thombashi/tabledata?branch=master :alt: Test coverage Installation ============ Install from PyPI ------------------------------ :: pip install tabledata Install from PPA (for Ubuntu) ------------------------------ :: sudo add-apt-repository ppa:thombashi/ppa sudo apt update sudo apt install python3-tabledata Dependencies ============ - Python 3.7+ - `Mandatory Python package dependencies (automatically installed) `__ Optional Python packages ------------------------------------------------ - `loguru `__ - Used for logging if the package installed - `pandas `__ - required to get table data as a pandas data frame Documentation =============== https://tabledata.rtfd.io/ thombashi-tabledata-bc6359d/docs/000077500000000000000000000000001450132560100167555ustar00rootroot00000000000000thombashi-tabledata-bc6359d/docs/Makefile000066400000000000000000000011361450132560100204160ustar00rootroot00000000000000# Minimal makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build SPHINXPROJ = tabledata SOURCEDIR = . BUILDDIR = _build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)thombashi-tabledata-bc6359d/docs/conf.py000066400000000000000000000126711450132560100202630ustar00rootroot00000000000000import os import sys from tabledata import __author__, __copyright__, __name__, __version__ try: import sphinx_rtd_theme RTD_THEME_INSTALLED = True except ImportError: RTD_THEME_INSTALLED = False sys.path.insert(0, os.path.abspath('../tabledata')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. # # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.viewcode', 'sphinx.ext.napoleon', ] intersphinx_mapping = {'python': ('https://docs.python.org/', None)} # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # # source_suffix = ['.rst', '.md'] source_suffix = '.rst' # The master toctree document. master_doc = 'index' # General information about the project. project = __name__ copyright = __copyright__ author = __author__ # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. version = __version__ # The full version, including alpha/beta/rc tags. release = version # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # # This is also used if you do content translation via gettext catalogs. # Usually you set "language" from the command line for these cases. language = None # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. # This patterns also effect to html_static_path and html_extra_path exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = False # -- Options for HTML output ---------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. # if RTD_THEME_INSTALLED: html_theme = 'sphinx_rtd_theme' html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # # html_theme_options = {} # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # Custom sidebar templates, must be a dictionary that maps document names # to template names. # # This is required for the alabaster theme # refs: http://alabaster.readthedocs.io/en/latest/installation.html#sidebars html_sidebars = { '**': [ 'relations.html', # needs 'show_related': True theme option to display 'searchbox.html', ] } # -- Options for HTMLHelp output ------------------------------------------ # Output file base name for HTML help builder. htmlhelp_basename = 'tabledatadoc' # -- Options for LaTeX output --------------------------------------------- latex_elements = { # The paper size ('letterpaper' or 'a4paper'). # # 'papersize': 'letterpaper', # The font size ('10pt', '11pt' or '12pt'). # # 'pointsize': '10pt', # Additional stuff for the LaTeX preamble. # # 'preamble': '', # Latex figure (float) alignment # # 'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ (master_doc, 'tabledata.tex', 'tabledata Documentation', 'Tsuyoshi Hombashi', 'manual'), ] # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ (master_doc, 'tabledata', 'tabledata Documentation', [author], 1) ] # -- Options for Texinfo output ------------------------------------------- # Grouping the document tree into Texinfo files. List of tuples # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ (master_doc, 'tabledata', 'tabledata Documentation', author, 'tabledata', 'One line description of project.', 'Miscellaneous'), ] # --------------------------------------------- rp_builtin = """ .. |False| replace:: :py:obj:`False` .. |True| replace:: :py:obj:`True` .. |None| replace:: :py:obj:`None` .. |inf| replace:: :py:obj:`inf` .. |nan| replace:: :py:obj:`nan` .. |bool| replace:: :py:class:`bool` .. |dict| replace:: :py:class:`dict` .. |int| replace:: :py:class:`int` .. |list| replace:: :py:class:`list` .. |namedtuple| replace:: :py:class:`namedtuple` .. |float| replace:: :py:class:`float` .. |str| replace:: :py:class:`str` .. |tuple| replace:: :py:obj:`tuple` """ rp_class = """ .. |TableData| replace:: :py:class:`~tabledata.TableData` """ rst_prolog = ( rp_class + rp_builtin ) thombashi-tabledata-bc6359d/docs/index.rst000066400000000000000000000010771450132560100206230ustar00rootroot00000000000000Welcome to tabledata's documentation! ===================================== .. raw:: html


.. toctree:: :caption: Table of Contents :maxdepth: 4 :numbered: pages/introduction/index pages/reference/index pages/links Indices and tables ================== * :ref:`genindex` * :ref:`modindex` * :ref:`search` thombashi-tabledata-bc6359d/docs/make.bat000066400000000000000000000014111450132560100203570ustar00rootroot00000000000000@ECHO OFF pushd %~dp0 REM Command file for Sphinx documentation if "%SPHINXBUILD%" == "" ( set SPHINXBUILD=sphinx-build ) set SOURCEDIR=. set BUILDDIR=_build set SPHINXPROJ=tabledata if "%1" == "" goto help %SPHINXBUILD% >NUL 2>NUL if errorlevel 9009 ( echo. echo.The 'sphinx-build' command was not found. Make sure you have Sphinx echo.installed, then set the SPHINXBUILD environment variable to point echo.to the full path of the 'sphinx-build' executable. Alternatively you echo.may add the Sphinx directory to PATH. echo. echo.If you don't have Sphinx installed, grab it from echo.http://sphinx-doc.org/ exit /b 1 ) %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% goto end :help %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% :end popd thombashi-tabledata-bc6359d/docs/make_readme.py000066400000000000000000000014231450132560100215610ustar00rootroot00000000000000#!/usr/bin/env python """ .. codeauthor:: Tsuyoshi Hombashi """ import sys from readmemaker import ReadmeMaker PROJECT_NAME = "tabledata" OUTPUT_DIR = ".." def main(): maker = ReadmeMaker( PROJECT_NAME, OUTPUT_DIR, is_make_toc=True, project_url=f"https://github.com/thombashi/{PROJECT_NAME}", ) maker.inc_indent_level() maker.write_chapter("Summary") maker.write_introduction_file("summary.txt") maker.write_introduction_file("badges.txt") maker.write_introduction_file("installation.rst") maker.set_indent_level(0) maker.write_chapter("Documentation") maker.write_lines([f"https://{PROJECT_NAME:s}.rtfd.io/"]) return 0 if __name__ == "__main__": sys.exit(main()) thombashi-tabledata-bc6359d/docs/pages/000077500000000000000000000000001450132560100200545ustar00rootroot00000000000000thombashi-tabledata-bc6359d/docs/pages/genindex.rst000066400000000000000000000000701450132560100224040ustar00rootroot00000000000000Indices and tables ================== * :ref:`genindex`thombashi-tabledata-bc6359d/docs/pages/introduction/000077500000000000000000000000001450132560100225755ustar00rootroot00000000000000thombashi-tabledata-bc6359d/docs/pages/introduction/badges.txt000066400000000000000000000014761450132560100245730ustar00rootroot00000000000000.. image:: https://badge.fury.io/py/tabledata.svg :target: https://badge.fury.io/py/tabledata :alt: PyPI package version .. image:: https://img.shields.io/pypi/pyversions/tabledata.svg :target: https://pypi.org/project/tabledata :alt: Supported Python versions .. image:: https://img.shields.io/pypi/implementation/tabledata.svg :target: https://pypi.org/project/tabledata :alt: Supported Python implementations .. image:: https://github.com/thombashi/tabledata/actions/workflows/ci.yml/badge.svg :target: https://github.com/thombashi/tabledata/actions/workflows/ci.yml :alt: Linux/macOS/Windows CI status .. image:: https://coveralls.io/repos/github/thombashi/tabledata/badge.svg?branch=master :target: https://coveralls.io/github/thombashi/tabledata?branch=master :alt: Test coverage thombashi-tabledata-bc6359d/docs/pages/introduction/feature.txt000066400000000000000000000012731450132560100247740ustar00rootroot00000000000000Features -------- - Extract structured tabular data from various data format: - CSV - Microsoft Excel :superscript:`TM` file - `Google Sheets `_ - HTML - JSON - `Labeled Tab-separated Values (LTSV) `__ - Markdown - MediaWiki - SQLite database file - Tab separated values (TSV) - Supported data sources are: - Files on a local file system - Accessible URLs - ``str`` instances - Loaded table data can be converted as: - `pandas.DataFrame `__ instance - ``dict`` instance - rows of the tuple thombashi-tabledata-bc6359d/docs/pages/introduction/index.rst000066400000000000000000000006031450132560100244350ustar00rootroot00000000000000tabledata =============== .. include:: badges.txt Summary ------- .. include:: summary.txt .. raw:: html


.. include:: installation.rst thombashi-tabledata-bc6359d/docs/pages/introduction/installation.rst000066400000000000000000000013201450132560100260240ustar00rootroot00000000000000Installation ============ Install from PyPI ------------------------------ :: pip install tabledata Install from PPA (for Ubuntu) ------------------------------ :: sudo add-apt-repository ppa:thombashi/ppa sudo apt update sudo apt install python3-tabledata Dependencies ============ - Python 3.7+ - `Mandatory Python package dependencies (automatically installed) `__ Optional Python packages ------------------------------------------------ - `loguru `__ - Used for logging if the package installed - `pandas `__ - required to get table data as a pandas data frame thombashi-tabledata-bc6359d/docs/pages/introduction/summary.txt000066400000000000000000000001571450132560100250360ustar00rootroot00000000000000tabledata is a Python library to represent tabular data. Used for pytablewriter/pytablereader/SimpleSQLite/etc.thombashi-tabledata-bc6359d/docs/pages/links.rst000066400000000000000000000003771450132560100217350ustar00rootroot00000000000000.. include:: genindex.rst Links ===== - `GitHub repository `__ - `Issue tracker `__ - `pip: A tool for installing Python packages `__ thombashi-tabledata-bc6359d/docs/pages/reference/000077500000000000000000000000001450132560100220125ustar00rootroot00000000000000thombashi-tabledata-bc6359d/docs/pages/reference/data.rst000066400000000000000000000002771450132560100234630ustar00rootroot00000000000000Data Structure ---------------------------- TableData ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .. autoclass:: tabledata.TableData :members: :exclude-members: record_list :undoc-members: thombashi-tabledata-bc6359d/docs/pages/reference/error.rst000066400000000000000000000005071450132560100236770ustar00rootroot00000000000000Exceptions ---------------------------- .. autoexception:: tabledata.NameValidationError :show-inheritance: .. autoexception:: tabledata.InvalidTableNameError :show-inheritance: .. autoexception:: tabledata.InvalidHeaderNameError :show-inheritance: .. autoexception:: tabledata.DataError :show-inheritance: thombashi-tabledata-bc6359d/docs/pages/reference/index.rst000066400000000000000000000001051450132560100236470ustar00rootroot00000000000000Reference ========= .. toctree:: :maxdepth: 3 data error thombashi-tabledata-bc6359d/examples/000077500000000000000000000000001450132560100176435ustar00rootroot00000000000000thombashi-tabledata-bc6359d/examples/tabledata.ipynb000066400000000000000000000031751450132560100226350ustar00rootroot00000000000000{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'sample': [OrderedDict([('a', 1), ('b', 2)]),\n", " OrderedDict([('a', Decimal('3.3')), ('b', Decimal('4.4'))])]}" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from tabledata import TableData\n", "\n", "TableData(\n", " \"sample\",\n", " [\"a\", \"b\"],\n", " [[1, 2], [3.3, 4.4]]\n", ").as_dict()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Row(a=1, b=2)\n", "Row(a=Decimal('3.3'), b=Decimal('4.4'))\n" ] } ], "source": [ "from tabledata import TableData\n", "\n", "records = TableData(\n", " \"sample\",\n", " [\"a\", \"b\"],\n", " [[1, 2], [3.3, 4.4]]\n", ").as_tuple()\n", "for record in records:\n", " print(record)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.1" } }, "nbformat": 4, "nbformat_minor": 2 } thombashi-tabledata-bc6359d/invoke_pytest.py000066400000000000000000000005471450132560100213100ustar00rootroot00000000000000""" Unit tests at Windows environments required to invoke from py module, because of multiprocessing: https://py.rtfd.io/en/latest/faq.html?highlight=cmdline#issues-with-py-test-multiprocess-and-setuptools """ import multiprocessing import sys import py if __name__ == "__main__": multiprocessing.freeze_support() sys.exit(py.test.cmdline.main()) thombashi-tabledata-bc6359d/pylama.ini000066400000000000000000000005231450132560100200110ustar00rootroot00000000000000[pylama] skip = .eggs/*,.tox/*,*/.env/*,_sandbox/*,build/*,docs/conf.py [pylama:pycodestyle] max_line_length = 100 [pylama:pylint] max_line_length = 100 [pylama:*/__init__.py] # W0611: imported but unused [pyflakes] ignore = W0611 [pylama:test/test_logger.py] # E402: module level import not at top of file [pycodestyle] ignore = E402 thombashi-tabledata-bc6359d/pyproject.toml000066400000000000000000000023471450132560100207470ustar00rootroot00000000000000[build-system] build-backend = "setuptools.build_meta" requires = ["setuptools>=61.0"] [tool.black] exclude = ''' /( \.eggs | \.git | \.mypy_cache | \.tox | \.venv | \.pytype | _build | buck-out | build | dist )/ | docs/conf.py ''' line-length = 100 target-version = ['py37', 'py38', 'py39', 'py310', 'py311'] [tool.coverage.run] branch = true source = ['tabledata'] [tool.coverage.report] exclude_lines = [ 'except ImportError', 'raise NotImplementedError', 'pass', 'ABCmeta', 'abstractmethod', 'abstractproperty', 'abstractclassmethod', 'warnings.warn', ] precision = 1 show_missing = true [tool.isort] include_trailing_comma = true known_third_party = [ "dataproperty", "pytest", "typepy", ] line_length = 100 lines_after_imports = 2 multi_line_output = 3 skip_glob = [ '*/.eggs/*', '*/.pytype/*', '*/.tox/*', ] [tool.mypy] ignore_missing_imports = true python_version = 3.7 pretty = true check_untyped_defs = true disallow_incomplete_defs = true disallow_untyped_defs = true no_implicit_optional = true show_error_codes = true show_error_context = true #strict = true warn_redundant_casts = true warn_unreachable = true warn_unused_configs = true warn_unused_ignores = true thombashi-tabledata-bc6359d/requirements/000077500000000000000000000000001450132560100205505ustar00rootroot00000000000000thombashi-tabledata-bc6359d/requirements/docs_requirements.txt000066400000000000000000000000371450132560100250440ustar00rootroot00000000000000sphinx_rtd_theme>=1.2.2 Sphinx thombashi-tabledata-bc6359d/requirements/requirements.txt000066400000000000000000000000501450132560100240270ustar00rootroot00000000000000DataProperty>=1.0.1,<2 typepy>=1.2.0,<2 thombashi-tabledata-bc6359d/requirements/test_requirements.txt000066400000000000000000000000331450132560100250670ustar00rootroot00000000000000pytablewriter>=0.46 pytest thombashi-tabledata-bc6359d/setup.py000066400000000000000000000055751450132560100175530ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import os.path from typing import Dict, Type import setuptools MODULE_NAME = "tabledata" REPOSITORY_URL = f"https://github.com/thombashi/{MODULE_NAME:s}" REQUIREMENT_DIR = "requirements" ENCODING = "utf8" pkg_info: Dict[str, str] = {} def get_release_command_class() -> Dict[str, Type[setuptools.Command]]: try: from releasecmd import ReleaseCommand except ImportError: return {} return {"release": ReleaseCommand} with open(os.path.join(MODULE_NAME, "__version__.py")) as f: exec(f.read(), pkg_info) with open("README.rst", encoding=ENCODING) as f: LONG_DESCRIPTION = f.read() with open(os.path.join("docs", "pages", "introduction", "summary.txt"), encoding=ENCODING) as f: SUMMARY = f.read().strip() with open(os.path.join(REQUIREMENT_DIR, "requirements.txt")) as f: INSTALL_REQUIRES = [line.strip() for line in f if line.strip()] with open(os.path.join(REQUIREMENT_DIR, "test_requirements.txt")) as f: TESTS_REQUIRES = [line.strip() for line in f if line.strip()] setuptools.setup( name=MODULE_NAME, version=pkg_info["__version__"], url=REPOSITORY_URL, author=pkg_info["__author__"], author_email=pkg_info["__email__"], description=SUMMARY, include_package_data=True, keywords=["table"], license=pkg_info["__license__"], long_description=LONG_DESCRIPTION, long_description_content_type="text/x-rst", packages=setuptools.find_packages(exclude=["test*"]), package_data={MODULE_NAME: ["py.typed"]}, project_urls={ "Changlog": f"{REPOSITORY_URL:s}/releases", "Documentation": f"https://{MODULE_NAME:s}.rtfd.io/", "Source": REPOSITORY_URL, "Tracker": f"{REPOSITORY_URL:s}/issues", }, install_requires=INSTALL_REQUIRES, tests_require=TESTS_REQUIRES, python_requires=">=3.7", extras_require={"logging": ["loguru>=0.4.1,<1"], "test": TESTS_REQUIRES}, classifiers=[ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Intended Audience :: Information Technology", "License :: OSI Approved :: MIT License", "Operating System :: OS Independent", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Topic :: Software Development :: Libraries", "Topic :: Software Development :: Libraries :: Python Modules", ], cmdclass=get_release_command_class(), zip_safe=False, ) thombashi-tabledata-bc6359d/tabledata/000077500000000000000000000000001450132560100177465ustar00rootroot00000000000000thombashi-tabledata-bc6359d/tabledata/__init__.py000066400000000000000000000012531450132560100220600ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ from .__version__ import __author__, __copyright__, __email__, __license__, __version__ from ._common import convert_idx_to_alphabet from ._constant import PatternMatch from ._converter import to_value_matrix from ._core import TableData from ._logger import set_log_level, set_logger from .error import DataError, InvalidHeaderNameError, InvalidTableNameError, NameValidationError __all__ = ( "convert_idx_to_alphabet", "set_logger", "to_value_matrix", "PatternMatch", "TableData", "DataError", "InvalidHeaderNameError", "InvalidTableNameError", "NameValidationError", ) thombashi-tabledata-bc6359d/tabledata/__version__.py000066400000000000000000000003111450132560100225740ustar00rootroot00000000000000__author__ = "Tsuyoshi Hombashi" __copyright__ = f"Copyright 2017, {__author__}" __license__ = "MIT License" __version__ = "1.3.3" __maintainer__ = __author__ __email__ = "tsuyoshi.hombashi@gmail.com" thombashi-tabledata-bc6359d/tabledata/_common.py000066400000000000000000000004221450132560100217450ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ def convert_idx_to_alphabet(idx: int) -> str: if idx < 26: return chr(65 + idx) div, mod = divmod(idx, 26) return convert_idx_to_alphabet(div - 1) + convert_idx_to_alphabet(mod) thombashi-tabledata-bc6359d/tabledata/_constant.py000066400000000000000000000002321450132560100223050ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import enum @enum.unique class PatternMatch(enum.Enum): OR = 0 AND = 1 thombashi-tabledata-bc6359d/tabledata/_converter.py000066400000000000000000000015771450132560100225000ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ from typing import Any, List, Sequence, Tuple from .error import DataError Row = Tuple[int, Any] def to_value_matrix(headers: Sequence[str], value_matrix: Sequence[Any]) -> List[Row]: if not value_matrix: return [] return [_to_row(headers, values, row_idx)[1] for row_idx, values in enumerate(value_matrix)] def _to_row(headers: Sequence[str], values: Any, row_idx: int) -> Row: if headers: try: values = values._asdict() except AttributeError: pass try: return (row_idx, [values.get(header) for header in headers]) except (TypeError, AttributeError): pass if not isinstance(values, (tuple, list)): raise DataError(f"row must be a list or tuple: actual={type(values)}") return (row_idx, values) thombashi-tabledata-bc6359d/tabledata/_core.py000066400000000000000000000343021450132560100214110ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import copy import re from collections import OrderedDict, namedtuple from typing import TYPE_CHECKING, Any, Dict, Iterator, List, Optional, Sequence, Tuple, Union import dataproperty as dp import typepy from dataproperty import DataPropertyMatrix from dataproperty.typing import TypeHint from typepy import Nan from ._constant import PatternMatch from ._converter import to_value_matrix from ._logger import logger if TYPE_CHECKING: import pandas class TableData: """ Class to represent a table data structure. :param table_name: Name of the table. :param headers: Table header names. :param rows: Data of the table. """ def __init__( self, table_name: Optional[str], headers: Sequence[str], rows: Sequence, dp_extractor: Optional[dp.DataPropertyExtractor] = None, type_hints: Optional[Sequence[Union[str, TypeHint]]] = None, max_workers: Optional[int] = None, max_precision: Optional[int] = None, ) -> None: self.__table_name = table_name self.__value_matrix: List[List[Any]] = [] self.__value_dp_matrix: Optional[DataPropertyMatrix] = None if rows: self.__rows = rows else: self.__rows = [] if dp_extractor: self.__dp_extractor = copy.deepcopy(dp_extractor) else: self.__dp_extractor = dp.DataPropertyExtractor(max_precision=max_precision) if type_hints: self.__dp_extractor.column_type_hints = type_hints self.__dp_extractor.strip_str_header = '"' if max_workers: self.__dp_extractor.max_workers = max_workers if not headers: self.__dp_extractor.headers = [] else: self.__dp_extractor.headers = headers def __repr__(self) -> str: element_list = [f"table_name={self.table_name}"] try: element_list.append("headers=[{}]".format(", ".join(self.headers))) except TypeError: element_list.append("headers=None") element_list.extend([f"cols={self.num_columns}", f"rows={self.num_rows}"]) return ", ".join(element_list) def __eq__(self, other: Any) -> bool: if not isinstance(other, TableData): return False return self.equals(other, cmp_by_dp=False) def __ne__(self, other: Any) -> bool: if not isinstance(other, TableData): return True return not self.equals(other, cmp_by_dp=False) @property def table_name(self) -> Optional[str]: """str: Name of the table.""" return self.__table_name @table_name.setter def table_name(self, value: Optional[str]) -> None: self.__table_name = value @property def headers(self) -> Sequence[str]: """Sequence[str]: Table header names.""" return self.__dp_extractor.headers @property def rows(self) -> Sequence: """Sequence: Original rows of tabular data.""" return self.__rows @property def value_matrix(self) -> DataPropertyMatrix: """DataPropertyMatrix: Converted rows of tabular data.""" if self.__value_matrix: return self.__value_matrix self.__value_matrix = [ [value_dp.data for value_dp in value_dp_list] for value_dp_list in self.value_dp_matrix ] return self.__value_matrix @property def has_value_dp_matrix(self) -> bool: return self.__value_dp_matrix is not None @property def max_workers(self) -> int: return self.__dp_extractor.max_workers @max_workers.setter def max_workers(self, value: Optional[int]) -> None: self.__dp_extractor.max_workers = value @property def num_rows(self) -> Optional[int]: """Optional[int]: Number of rows in the tabular data. |None| if the ``rows`` is neither list nor tuple. """ try: return len(self.rows) except TypeError: return None @property def num_columns(self) -> Optional[int]: if typepy.is_not_empty_sequence(self.headers): return len(self.headers) try: return len(self.rows[0]) except TypeError: return None except IndexError: return 0 @property def value_dp_matrix(self) -> DataPropertyMatrix: """DataPropertyMatrix: DataProperty for table data.""" if self.__value_dp_matrix is None: self.__value_dp_matrix = self.__dp_extractor.to_dp_matrix( to_value_matrix(self.headers, self.rows) ) return self.__value_dp_matrix @property def header_dp_list(self) -> List[dp.DataProperty]: return self.__dp_extractor.to_header_dp_list() @property def column_dp_list(self) -> List[dp.ColumnDataProperty]: return self.__dp_extractor.to_column_dp_list(self.value_dp_matrix) @property def dp_extractor(self) -> dp.DataPropertyExtractor: return self.__dp_extractor def is_empty_header(self) -> bool: """bool: |True| if the data :py:attr:`.headers` is empty.""" return typepy.is_empty_sequence(self.headers) def is_empty_rows(self) -> bool: """ :return: |True| if the tabular data has no rows. :rtype: bool """ return self.num_rows == 0 def is_empty(self) -> bool: """ :return: |True| if the data :py:attr:`.headers` or :py:attr:`.value_matrix` is empty. :rtype: bool """ return any([self.is_empty_header(), self.is_empty_rows()]) def equals(self, other: "TableData", cmp_by_dp: bool = True) -> bool: if cmp_by_dp: return self.__equals_dp(other) return self.__equals_raw(other) def __equals_base(self, other: "TableData") -> bool: compare_item_list = [self.table_name == other.table_name] if self.num_rows is not None: compare_item_list.append(self.num_rows == other.num_rows) return all(compare_item_list) def __equals_raw(self, other: "TableData") -> bool: if not self.__equals_base(other): return False if self.headers != other.headers: return False for lhs_row, rhs_row in zip(self.rows, other.rows): if len(lhs_row) != len(rhs_row): return False if not all( [ lhs == rhs for lhs, rhs in zip(lhs_row, rhs_row) if not Nan(lhs).is_type() and not Nan(rhs).is_type() ] ): return False return True def __equals_dp(self, other: "TableData") -> bool: if not self.__equals_base(other): return False if self.header_dp_list != other.header_dp_list: return False if self.value_dp_matrix is None or other.value_dp_matrix is None: return False for lhs_list, rhs_list in zip(self.value_dp_matrix, other.value_dp_matrix): if len(lhs_list) != len(rhs_list): return False if any([lhs != rhs for lhs, rhs in zip(lhs_list, rhs_list)]): return False return True def in_tabledata_list(self, other: Sequence["TableData"], cmp_by_dp: bool = True) -> bool: for table_data in other: if self.equals(table_data, cmp_by_dp=cmp_by_dp): return True return False def validate_rows(self) -> None: """ :raises ValueError: """ invalid_row_idx_list = [] for row_idx, row in enumerate(self.rows): if isinstance(row, (list, tuple)) and len(self.headers) != len(row): invalid_row_idx_list.append(row_idx) if isinstance(row, dict): if not all([header in row for header in self.headers]): invalid_row_idx_list.append(row_idx) if not invalid_row_idx_list: return for invalid_row_idx in invalid_row_idx_list: logger.debug(f"invalid row (line={invalid_row_idx}): {self.rows[invalid_row_idx]}") raise ValueError( "table header length and row length are mismatch:\n" + f" header(len={len(self.headers)}): {self.headers}\n" + " # of miss match rows: {} ouf of {}\n".format( len(invalid_row_idx_list), self.num_rows ) ) def as_dict(self, default_key: str = "table") -> Dict[str, List["OrderedDict[str, Any]"]]: """ Args: default_key: Key of a returning dictionary when the ``table_name`` is empty. Returns: dict: Table data as a |dict| instance. Sample Code: .. code:: python from tabledata import TableData TableData( "sample", ["a", "b"], [[1, 2], [3.3, 4.4]] ).as_dict() Output: .. code:: json {'sample': [OrderedDict([('a', 1), ('b', 2)]), OrderedDict([('a', 3.3), ('b', 4.4)])]} """ # noqa dict_body = [] for row in self.value_matrix: if not row: continue values = [ (header, value) for header, value in zip(self.headers, row) if value is not None ] if not values: continue dict_body.append(OrderedDict(values)) table_name = self.table_name if not table_name: table_name = default_key return {table_name: dict_body} def as_tuple(self) -> Iterator[Tuple]: """ :return: Rows of the tuple. :rtype: list of |namedtuple| :Sample Code: .. code:: python from tabledata import TableData records = TableData( "sample", ["a", "b"], [[1, 2], [3.3, 4.4]] ).as_tuple() for record in records: print(record) :Output: .. code-block:: none Row(a=1, b=2) Row(a=Decimal('3.3'), b=Decimal('4.4')) """ Row = namedtuple("Row", self.headers) # type: ignore for value_dp_list in self.value_dp_matrix: if typepy.is_empty_sequence(value_dp_list): continue row = Row(*(value_dp.data for value_dp in value_dp_list)) yield row def as_dataframe(self) -> "pandas.DataFrame": """ :return: Table data as a ``pandas.DataFrame`` instance. :rtype: pandas.DataFrame :Sample Code: .. code-block:: python from tabledata import TableData TableData( "sample", ["a", "b"], [[1, 2], [3.3, 4.4]] ).as_dataframe() :Output: .. code-block:: none a b 0 1 2 1 3.3 4.4 :Dependency Packages: - `pandas `__ """ try: from pandas import DataFrame except ImportError: raise RuntimeError("required 'pandas' package to execute as_dataframe method") dataframe = DataFrame(self.value_matrix) if not self.is_empty_header(): dataframe.columns = self.headers return dataframe def transpose(self) -> "TableData": return TableData( self.table_name, self.headers, [row for row in zip(*self.rows)], max_workers=self.max_workers, ) def filter_column( self, patterns: Optional[str] = None, is_invert_match: bool = False, is_re_match: bool = False, pattern_match: PatternMatch = PatternMatch.OR, ) -> "TableData": logger.debug( "filter_column: patterns={}, is_invert_match={}, " "is_re_match={}, pattern_match={}".format( patterns, is_invert_match, is_re_match, pattern_match ) ) if not patterns: return self match_header_list = [] match_column_matrix = [] if pattern_match == PatternMatch.OR: match_method = any elif pattern_match == PatternMatch.AND: match_method = all else: raise ValueError(f"unknown matching: {pattern_match}") for header, column in zip(self.headers, zip(*self.rows)): is_match_list = [] for pattern in patterns: is_match = self.__is_match(header, pattern, is_re_match) is_match_list.append( any([is_match and not is_invert_match, not is_match and is_invert_match]) ) if match_method(is_match_list): match_header_list.append(header) match_column_matrix.append(column) logger.debug( "filter_column: table={}, match_header_list={}".format( self.table_name, match_header_list ) ) return TableData( self.table_name, match_header_list, list(zip(*match_column_matrix)), max_workers=self.max_workers, ) @staticmethod def from_dataframe( dataframe: "pandas.DataFrame", table_name: str = "", type_hints: Optional[Sequence[TypeHint]] = None, max_workers: Optional[int] = None, ) -> "TableData": """ Initialize TableData instance from a pandas.DataFrame instance. :param pandas.DataFrame dataframe: :param str table_name: Table name to create. """ return TableData( table_name, list(dataframe.columns.values), dataframe.values.tolist(), type_hints=type_hints, max_workers=max_workers, ) @staticmethod def __is_match(header: str, pattern: str, is_re_match: bool) -> bool: if is_re_match: return re.search(pattern, header) is not None return header == pattern thombashi-tabledata-bc6359d/tabledata/_logger/000077500000000000000000000000001450132560100213645ustar00rootroot00000000000000thombashi-tabledata-bc6359d/tabledata/_logger/__init__.py000066400000000000000000000000671450132560100235000ustar00rootroot00000000000000from ._logger import logger, set_log_level, set_logger thombashi-tabledata-bc6359d/tabledata/_logger/_logger.py000066400000000000000000000014171450132560100233570ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import warnings import dataproperty from ._null_logger import NullLogger # type: ignore MODULE_NAME = "tabledata" try: from loguru import logger logger.disable(MODULE_NAME) except ImportError: logger = NullLogger() def set_logger(is_enable: bool, propagation_depth: int = 1) -> None: if is_enable: logger.enable(MODULE_NAME) else: logger.disable(MODULE_NAME) if propagation_depth <= 0: return dataproperty.set_logger(is_enable, propagation_depth - 1) def set_log_level(log_level): # type: ignore warnings.warn( "'set_log_level' method is deprecated and will be removed in the future. ", DeprecationWarning, ) return thombashi-tabledata-bc6359d/tabledata/_logger/_null_logger.py000066400000000000000000000021001450132560100243770ustar00rootroot00000000000000# type: ignore class NullLogger: level_name = None def remove(self, handler_id=None): # pragma: no cover pass def add(self, sink, **kwargs): # pragma: no cover pass def disable(self, name): # pragma: no cover pass def enable(self, name): # pragma: no cover pass def critical(self, __message, *args, **kwargs): # pragma: no cover pass def debug(self, __message, *args, **kwargs): # pragma: no cover pass def error(self, __message, *args, **kwargs): # pragma: no cover pass def exception(self, __message, *args, **kwargs): # pragma: no cover pass def info(self, __message, *args, **kwargs): # pragma: no cover pass def log(self, __level, __message, *args, **kwargs): # pragma: no cover pass def success(self, __message, *args, **kwargs): # pragma: no cover pass def trace(self, __message, *args, **kwargs): # pragma: no cover pass def warning(self, __message, *args, **kwargs): # pragma: no cover pass thombashi-tabledata-bc6359d/tabledata/error.py000066400000000000000000000010161450132560100214470ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ class NameValidationError(ValueError): """ Exception raised when a name is invalid. """ class InvalidTableNameError(NameValidationError): """ Exception raised when a table name is invalid. """ class InvalidHeaderNameError(NameValidationError): """ Exception raised when a table header name is invalid. """ class DataError(ValueError): """ Exception raised when data is invalid as tabular data. """ thombashi-tabledata-bc6359d/tabledata/normalizer.py000066400000000000000000000145011450132560100225030ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import abc import warnings from typing import List, Sequence import typepy from dataproperty.typing import TypeHint from ._core import TableData from ._logger import logger from .error import InvalidHeaderNameError, InvalidTableNameError class TableDataNormalizerInterface(metaclass=abc.ABCMeta): """ Interface class to validate and normalize data of |TableData|. """ @abc.abstractmethod def validate(self) -> None: # pragma: no cover pass @abc.abstractmethod def normalize(self) -> TableData: # pragma: no cover pass class AbstractTableDataNormalizer(TableDataNormalizerInterface): @property def _type_hints(self) -> List[TypeHint]: return self._tabledata.dp_extractor.column_type_hints def __init__(self, tabledata: TableData) -> None: self._tabledata = tabledata def validate(self) -> None: if not self._tabledata.table_name: raise ValueError("table_name must not be empty") self._validate_table_name(self._tabledata.table_name) self._validate_headers() def sanitize(self): # type: ignore warnings.warn( "'sanitize' method is deprecated and will be removed in the future." " use 'normalize' method instead.", DeprecationWarning, ) return self.normalize() def normalize(self) -> TableData: """ :return: Sanitized table data. :rtype: tabledata.TableData """ logger.debug(f"normalize: {type(self).__name__}") normalize_headers = self._normalize_headers() return TableData( self.__normalize_table_name(), normalize_headers, self._normalize_rows(normalize_headers), dp_extractor=self._tabledata.dp_extractor, type_hints=self._type_hints, max_workers=self._tabledata.max_workers, ) @abc.abstractmethod def _preprocess_table_name(self) -> str: """ This method is always called before table name validation. You must return preprocessed table name. """ @abc.abstractmethod def _validate_table_name(self, table_name: str) -> None: """ Must raise :py:class:`~.InvalidTableNameError` when you consider the table name invalid. :param str header: Table name to validate. :raises tabledata.InvalidTableNameError: If the table name is invalid. |raises_validate_table_name| """ @abc.abstractmethod def _normalize_table_name(self, table_name: str) -> str: """ Must return a valid table name. The table name must be considered to be a valid name by :py:meth:`~._validate_table_name` method. This method called when :py:meth:`~._validate_table_name` method raise :py:class:`~.InvalidTableNameError`. :param str table_name: Table name to normalize. :return: Sanitized table name. :rtype: str """ @abc.abstractmethod def _preprocess_header(self, col_idx: int, header: str) -> str: """ This method is always called before a header validation. You must return preprocessed header. """ @abc.abstractmethod def _validate_header(self, header: str) -> None: """ No operation. This method called for each table header. Override this method in a subclass if you want to detect invalid table header elements. Raise :py:class:`~.InvalidHeaderNameError` if an invalid header element found. :param str header: Table header name. :raises tabledata.InvalidHeaderNameError: If the ``header`` is invalid. """ @abc.abstractmethod def _normalize_header(self, header: str) -> str: """ Must return a valid header name. This method called when :py:meth:`~._validate_header` method raise :py:class:`~.InvalidHeaderNameError`. Override this method in subclass if you want to rename invalid table header element. :param str header: Header name to normalize. :return: Renamed header name. :rtype: str """ def _normalize_rows(self, normalize_headers: Sequence[str]) -> List: return list(self._tabledata.rows) def _validate_headers(self) -> None: for header in self._tabledata.headers: self._validate_header(header) def __normalize_table_name(self) -> str: preprocessed_table_name = self._preprocess_table_name() try: self._validate_table_name(preprocessed_table_name) new_table_name = preprocessed_table_name except InvalidTableNameError: new_table_name = self._normalize_table_name(preprocessed_table_name) self._validate_table_name(new_table_name) return new_table_name def _normalize_headers(self) -> List[str]: new_header_list = [] for col_idx, header in enumerate(self._tabledata.headers): header = self._preprocess_header(col_idx, header) try: self._validate_header(header) new_header = header except InvalidHeaderNameError: new_header = self._normalize_header(header) self._validate_header(new_header) new_header_list.append(new_header) return new_header_list class TableDataNormalizer(AbstractTableDataNormalizer): def _preprocess_table_name(self) -> str: if not self._tabledata.table_name: return "" return self._tabledata.table_name def _validate_table_name(self, table_name: str) -> None: try: typepy.String(table_name).validate() except TypeError as e: raise InvalidTableNameError(e) def _normalize_table_name(self, table_name: str) -> str: return str(typepy.String(table_name).force_convert()) def _preprocess_header(self, col_idx: int, header: str) -> str: return header def _validate_header(self, header: str) -> None: try: typepy.String(header).validate() except TypeError as e: raise InvalidHeaderNameError(e) def _normalize_header(self, header: str) -> str: return str(typepy.String(header).force_convert()) thombashi-tabledata-bc6359d/tabledata/py.typed000066400000000000000000000000001450132560100214330ustar00rootroot00000000000000thombashi-tabledata-bc6359d/test/000077500000000000000000000000001450132560100170045ustar00rootroot00000000000000thombashi-tabledata-bc6359d/test/test_common.py000066400000000000000000000041041450132560100217040ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import pytest from tabledata._common import convert_idx_to_alphabet class Test_convert_idx_to_alphabet: @pytest.mark.parametrize( ["value", "expected"], [ [ range(30), [ "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "AA", "AB", "AC", "AD", ], ], [ range(0, 900, 30), [ "A", "AE", "BI", "CM", "DQ", "EU", "FY", "HC", "IG", "JK", "KO", "LS", "MW", "OA", "PE", "QI", "RM", "SQ", "TU", "UY", "WC", "XG", "YK", "ZO", "AAS", "ABW", "ADA", "AEE", "AFI", "AGM", ], ], ], ) def test_normal(self, value, expected): assert [convert_idx_to_alphabet(v) for v in value] == expected thombashi-tabledata-bc6359d/test/test_converter.py000066400000000000000000000026571450132560100224360ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ from collections import OrderedDict from tabledata import to_value_matrix class Test_to_value_matrix: def test_normal_dict_rows(self): expect = [ [1, None, None], [None, 2.1, "hoge"], [0, 0.1, "foo"], [None, None, None], [-1, -0.1, "bar"], ] assert ( to_value_matrix( ["A", "B", "C"], [ {"A": 1}, {"B": 2.1, "C": "hoge"}, {"A": 0, "B": 0.1, "C": "foo"}, {}, {"A": -1, "B": -0.1, "C": "bar", "D": "extra"}, ], ) == expect ) def test_normal_OrderedDict_rows(self): expect = [ [1, None, None], [None, 2.1, "hoge"], [0, 0.1, "foo"], [None, None, None], [-1, -0.1, "bar"], ] assert ( to_value_matrix( ["A", "B", "C"], [ OrderedDict({"A": 1}), OrderedDict({"B": 2.1, "C": "hoge"}), OrderedDict({"A": 0, "B": 0.1, "C": "foo"}), OrderedDict({}), OrderedDict({"A": -1, "B": -0.1, "C": "bar", "D": "extra"}), ], ) == expect ) thombashi-tabledata-bc6359d/test/test_logger.py000066400000000000000000000007371450132560100217030ustar00rootroot00000000000000import pytest from tabledata import set_logger from tabledata._logger._null_logger import NullLogger class Test_set_logger: @pytest.mark.parametrize(["value"], [[True], [False]]) def test_smoke(self, value): set_logger(value) class Test_NullLogger: @pytest.mark.parametrize(["value"], [[True], [False]]) def test_smoke(self, value, monkeypatch): monkeypatch.setattr("tabledata._logger._logger.logger", NullLogger()) set_logger(value) thombashi-tabledata-bc6359d/test/test_normalizer.py000066400000000000000000000035671450132560100226120ustar00rootroot00000000000000import pytest from tabledata import TableData from tabledata.normalizer import TableDataNormalizer class Test_TableDataNormalizer: @pytest.mark.parametrize( ["table_name", "headers", "rows", "expected"], [ [ "normal", ["a", "b_c"], [[1, 2], [3, 4]], TableData("normal", ["a", "b_c"], [[1, 2], [3, 4]]), ], [ "underscore_char", ["data", "_data", "data_", "_data_"], [[1, 2, 3, 4], [11, 12, 13, 14]], TableData( "underscore_char", ["data", "_data", "data_", "_data_"], [[1, 2, 3, 4], [11, 12, 13, 14]], ), ], [ "multibyte csv", ["姓", "名", "生年月日", "郵便番号", "住所", "電話番号"], [ ["山田", "太郎", "2001/1/1", "100-0002", "東京都千代田区皇居外苑", "03-1234-5678"], ["山田", "次郎", "2001/1/2", "251-0036", "神奈川県藤沢市江の島1丁目", "03-9999-9999"], ], TableData( "multibyte csv", ["姓", "名", "生年月日", "郵便番号", "住所", "電話番号"], [ ["山田", "太郎", "2001/1/1", "100-0002", "東京都千代田区皇居外苑", "03-1234-5678"], ["山田", "次郎", "2001/1/2", "251-0036", "神奈川県藤沢市江の島1丁目", "03-9999-9999"], ], ), ], ], ) def test_normal(self, table_name, headers, rows, expected): new_tabledata = TableDataNormalizer(TableData(table_name, headers, rows)).normalize() assert new_tabledata.equals(expected) thombashi-tabledata-bc6359d/test/test_tabledata.py000066400000000000000000000453741450132560100223530ustar00rootroot00000000000000""" .. codeauthor:: Tsuyoshi Hombashi """ import itertools import sys from collections import OrderedDict, namedtuple from decimal import Decimal import pytest from typepy import Integer, String from tabledata import DataError, PatternMatch, TableData attr_list_2 = ["attr_a", "attr_b"] NamedTuple2 = namedtuple("NamedTuple2", " ".join(attr_list_2)) def dumps_results(expected=None, actual=None): try: from pytablewriter import dumps_tabledata except ImportError: return if expected: print(f"expected: {dumps_tabledata(expected)}") if actual: print(f"actual: {dumps_tabledata(actual)}") class Test_TableData_constructor: @pytest.mark.parametrize( ["table_name", "headers", "rows", "expected"], [ [ "normal", ["a", "b"], [[1, 2], [3, 4]], TableData("normal", ["a", "b"], [[1, 2], [3, 4]]), ], ["empty_records", ["a", "b"], [], TableData("empty_records", ["a", "b"], [])], ["empty_header", [], [[1, 2], [3, 4]], TableData("empty_header", [], [[1, 2], [3, 4]])], ], ) def test_normal(self, table_name, headers, rows, expected): tabledata = TableData(table_name, headers, rows) dumps_results(expected=expected, actual=tabledata) assert tabledata == expected @pytest.mark.parametrize( ["table_name", "headers", "rows", "expected"], [ [ "none_header", None, [[1, 2], [3, 4]], TableData("none_header", None, [[1, 2], [3, 4]]), ], ["none_records", ["a", "b"], None, TableData("none_records", ["a", "b"], [])], ["none_data", None, None, TableData("none_data", [], [])], ], ) def test_normal_with_none_value(self, table_name, headers, rows, expected): tabledata = TableData(table_name, headers, rows) assert tabledata == expected def test_normal_type_hints(self): type_hints = [Integer, String] tabledata = TableData("type hints", ["a", "b"], [[1, 2], [1, 2]], type_hints=type_hints) for col_dp in tabledata.column_dp_list: print(col_dp) dumps_results(actual=tabledata) for row_dp in tabledata.value_dp_matrix: for dp, type_hint in zip(row_dp, type_hints): print(dp) assert dp.type_class == type_hint @pytest.mark.parametrize( ["table_name", "headers", "rows", "expected"], [["invalid_data", ["a", "b"], [1, 2], DataError]], ) def test_exception(self, table_name, headers, rows, expected): with pytest.raises(expected): TableData(table_name, headers, rows).value_matrix def yield_rows(): rows = [[1, 2], [3, 4]] yield from rows class Test_TableData_num_rows: @pytest.mark.parametrize( ["table_name", "headers", "rows", "expected"], [ ["normal", ["a", "b"], [[1, 2], [3, 4]], 2], ["empty", ["a", "b"], [], 0], ["zip", ["a", "b"], zip(["a", 1], ["b", 2]), None], ["empty", ["a", "b"], yield_rows(), None], ["empty", ["a", "b"], itertools.product([[1, 2], [3, 4]]), None], ], ) def test_normal(self, table_name, headers, rows, expected): table_data = TableData(table_name, headers, rows) assert table_data.num_columns == 2 assert table_data.num_rows == expected class Test_TableData_eq: __DATA_0 = TableData( "Sheet1", ["i", "f", "c", "if", "ifc", "bool", "inf", "nan", "mix_num", "time"], [ [1, "1.1", "aa", 1, 1, "True", float("inf"), "nan", 1, "2017-01-01T00:00:00"], [ 2, "2.2", "bbb", "2.2", "2.2", "False", float("inf"), float("NaN"), float("inf"), "2017-01-02 03:04:05+09:00", ], [ 3, "3.33", "cccc", -3, "ccc", "True", float("inf"), float("NaN"), float("NaN"), "2017-01-01T00:00:00", ], ], ) __DATA_10 = TableData("tablename", ["a", "b"], []) __DATA_11 = TableData("tablename", ["a", "b"], [[1, 2], [11, 12]]) @pytest.mark.parametrize( ["lhs", "rhs", "expected"], [[__DATA_0, __DATA_0, True], [__DATA_0, __DATA_10, False], [__DATA_10, __DATA_11, False]], ) def test_normal(self, lhs, rhs, expected): assert (lhs == rhs) == expected assert (lhs != rhs) == (not expected) class Test_TableData_equals: __LHS = TableData("tablename", ["a", "b"], [{"a": 1, "b": 2}, {"a": 11, "b": 12}]) __RHS = TableData("tablename", ["a", "b"], [[1, 2], [11, 12]]) @pytest.mark.parametrize( ["lhs", "rhs", "cmp_by_dp", "expected"], [[__LHS, __RHS, True, True], [__LHS, __RHS, False, False]], ) def test_normal(self, lhs, rhs, cmp_by_dp, expected): empty_td = TableData("tablename", ["a", "b"], None) assert lhs.equals(rhs, cmp_by_dp=cmp_by_dp) == expected assert lhs.equals(empty_td, cmp_by_dp=cmp_by_dp) is False assert empty_td.equals(rhs, cmp_by_dp=cmp_by_dp) is False assert (lhs == rhs) is False assert (lhs != rhs) is True assert lhs.in_tabledata_list([rhs, empty_td], cmp_by_dp=cmp_by_dp) == expected assert lhs.in_tabledata_list([lhs, empty_td], cmp_by_dp=cmp_by_dp) assert lhs.in_tabledata_list([rhs, lhs, empty_td], cmp_by_dp=cmp_by_dp) assert empty_td.in_tabledata_list([rhs, lhs], cmp_by_dp=cmp_by_dp) is False class Test_TableData_repr: @pytest.mark.parametrize( ["table_name", "headers", "rows", "expected"], [ [ "normal", ["a", "b"], [[1, 2], [3, 4]], "table_name=normal, headers=[a, b], cols=2, rows=2", ], [ "null_header", None, [[1, 2], [3, 4]], "table_name=null_header, headers=[], cols=2, rows=2", ], [ "null_header", [], [[1, 2], [3, 4]], "table_name=null_header, headers=[], cols=2, rows=2", ], ["null_body", ["a", "b"], [], "table_name=null_body, headers=[a, b], cols=2, rows=0"], ["マルチバイト", ["いろは", "漢字"], [], "table_name=マルチバイト, headers=[いろは, 漢字], cols=2, rows=0"], ], ) def test_normal(self, table_name, headers, rows, expected): tabledata = TableData(table_name, headers, rows) assert str(tabledata) == expected class Test_TableData_as_dict: @pytest.mark.parametrize( ["table_name", "headers", "rows", "expected"], [ [ "normal", ["a", "b"], [[1, 2], [3, 4]], {"normal": [OrderedDict([("a", 1), ("b", 2)]), OrderedDict([("a", 3), ("b", 4)])]}, ], [ None, ["a", "b"], [[1, 2], [3, 4]], {"table": [OrderedDict([("a", 1), ("b", 2)]), OrderedDict([("a", 3), ("b", 4)])]}, ], [ "number", ["a", "b"], [[1, 2.0], [3.3, Decimal("4.4")]], { "number": [ OrderedDict([("a", 1), ("b", 2)]), OrderedDict([("a", Decimal("3.3")), ("b", Decimal("4.4"))]), ] }, ], [ "include_none", ["a", "b"], [[None, 2], [None, None], [3, None], [None, None]], {"include_none": [OrderedDict([("b", 2)]), OrderedDict([("a", 3)])]}, ], ["empty_records", ["a", "b"], [], {"empty_records": []}], ], ) def test_normal(self, table_name, headers, rows, expected): assert TableData(table_name, headers, rows).as_dict() == expected def test_normal_default_key(self): headers = ["a", "b"] assert TableData(None, headers, []).as_dict() == {"table": []} assert TableData("", headers, []).as_dict(default_key="dummy") == {"dummy": []} class Test_TableData_as_tuple: @pytest.mark.parametrize( ["table_name", "headers", "rows", "expected"], [ ["normal", ["a", "b"], [[1, 2], [3, 4]], [(1, 2), (3, 4)]], [None, ["a", "b"], [[1, 2], [3, 4]], [(1, 2), (3, 4)]], [ "number", ["a", "b"], [[1, 2.0], [3.3, Decimal("4.4")]], [(1, 2.0), (Decimal("3.3"), Decimal("4.4"))], ], [ "include_none", ["a", "b"], [[None, 2], [None, None], [3, None], [None, None]], [(None, 2), (None, None), (3, None), (None, None)], ], ["empty_records", ["a", "b"], [], []], ], ) def test_normal(self, table_name, headers, rows, expected): for lhs, rhs in zip(TableData(table_name, headers, rows).as_tuple(), expected): print(f"lhs: {lhs}", file=sys.stderr) print(f"rhs: {rhs}", file=sys.stderr) assert tuple(lhs) == rhs class Test_TableData_transpose: @pytest.mark.parametrize( ["value", "expected"], [ [ TableData("tablename", ["a", "b"], [[1, 2, 3], [1, 2, 3]]), TableData("tablename", ["a", "b"], [[1, 1], [2, 2], [3, 3]]), ] ], ) def test_normal(self, value, expected): assert value.transpose() == expected class Test_TableData_value_dp_matrix: __MIXED_DATA = [ [1, 2], (3, 4), {"attr_a": 5, "attr_b": 6}, {"attr_a": 7, "attr_b": 8, "not_exist_attr": 100}, {"attr_a": 9}, {"attr_b": 10}, {}, NamedTuple2(11, None), ] @pytest.mark.parametrize( ["table_name", "headers", "rows", "expected"], [ [ "mixdata", attr_list_2, __MIXED_DATA, TableData( "mixdata", attr_list_2, [ [1, 2], [3, 4], [5, 6], [7, 8], [9, None], [None, 10], [None, None], [11, None], ], ), ], [ "none_header", None, [[1, 2], [3, 4]], TableData("none_header", None, [[1, 2], [3, 4]]), ], ["none_records", ["a", "b"], None, TableData("none_records", ["a", "b"], [])], ["none_data", None, None, TableData("none_data", [], [])], ], ) def test_normal(self, table_name, headers, rows, expected): tabledata = TableData(table_name, headers, rows) assert not tabledata.has_value_dp_matrix assert tabledata.value_dp_matrix == expected.value_dp_matrix assert tabledata.has_value_dp_matrix class Test_TableData_is_empty_header: @pytest.mark.parametrize( ["table_name", "headers", "rows", "expected"], [["tablename", [], [], True], ["tablename", ["a", "b"], [], False]], ) def test_normal(self, table_name, headers, rows, expected): tabledata = TableData(table_name, headers, rows) assert tabledata.is_empty_header() == expected class Test_TableData_is_empty_rows: @pytest.mark.parametrize( ["table_name", "headers", "rows", "expected"], [ ["tablename", [], [], True], ["tablename", ["a", "b"], [], True], ["tablename", ["a", "b"], [[1, 2]], False], ], ) def test_normal(self, table_name, headers, rows, expected): tabledata = TableData(table_name, headers, rows) assert tabledata.is_empty_rows() == expected class Test_TableData_is_empty: @pytest.mark.parametrize( ["table_name", "headers", "rows", "expected"], [ ["tablename", [], [], True], ["tablename", ["a", "b"], [], True], ["tablename", ["a", "b"], [[1, 2]], False], ], ) def test_normal(self, table_name, headers, rows, expected): tabledata = TableData(table_name, headers, rows) assert tabledata.is_empty() == expected class Test_TableData_validate_rows: @pytest.mark.parametrize( ["table_name", "headers", "rows"], [["tablename", [], []], ["tablename", ["a", "b"], []], ["tablename", ["a", "b"], [[1, 2]]]], ) def test_normal(self, table_name, headers, rows): TableData(table_name, headers, rows).validate_rows() @pytest.mark.parametrize( ["table_name", "headers", "rows", "expected"], [ ["tablename", ["a", "b"], [[1]], ValueError], ["tablename", ["a", "b"], [[1, 2, 3]], ValueError], ], ) def test_exception(self, table_name, headers, rows, expected): with pytest.raises(expected): TableData(table_name, headers, rows).validate_rows() class Test_TableData_filter_column: HEADERS = ["abcde", "test"] VALUE_MATRIX = [[1, 2], [3, 4]] @pytest.mark.parametrize( ["table_name", "headers", "rows", "pattern", "is_invert_match", "expected"], [ [ "match", HEADERS, VALUE_MATRIX, ["abcde"], False, TableData("match", ["abcde"], [[1], [3]]), ], [ "multiple_match", HEADERS, VALUE_MATRIX, ["abcde", "test"], False, TableData("multiple_match", ["abcde", "test"], [[1, 2], [3, 4]]), ], [ "invert_match", HEADERS, VALUE_MATRIX, ["abcde"], True, TableData("invert_match", ["test"], [[2], [4]]), ], ["none", HEADERS, VALUE_MATRIX, None, False, TableData("none", HEADERS, VALUE_MATRIX)], ["empty", HEADERS, VALUE_MATRIX, [], False, TableData("empty", HEADERS, VALUE_MATRIX)], ], ) def test_normal_match(self, table_name, headers, rows, pattern, is_invert_match, expected): tabledata = TableData(table_name, headers, rows) actual = tabledata.filter_column(patterns=pattern, is_invert_match=is_invert_match) dumps_results(expected=expected, actual=tabledata) assert actual == expected @pytest.mark.parametrize( ["table_name", "headers", "rows", "pattern", "is_invert_match", "expected"], [ [ "multiple_patterns", ["test001_AAA", "AAA_test1234", "foo", "AAA_hoge"], [[1, 2, 3, 4], [11, 12, 13, 14]], ["test[0-9]+", "AAA_[a-z]+"], False, TableData( "multiple_patterns", ["test001_AAA", "AAA_test1234", "AAA_hoge"], [[1, 2, 4], [11, 12, 14]], ), ], [ "re_match_pattern", HEADERS, VALUE_MATRIX, ["abc*"], False, TableData("re_match_pattern", ["abcde"], [[1], [3]]), ], [ "re_invert_match_pattern", HEADERS, VALUE_MATRIX, ["abc*"], True, TableData("re_invert_match_pattern", ["test"], [[2], [4]]), ], [ "re_invert_unmatch_pattern", HEADERS, VALUE_MATRIX, ["unmatch_pattern"], True, TableData("re_invert_unmatch_pattern", HEADERS, VALUE_MATRIX), ], ], ) def test_normal_re_match(self, table_name, headers, rows, pattern, is_invert_match, expected): tabledata = TableData(table_name, headers, rows) actual = tabledata.filter_column( patterns=pattern, is_invert_match=is_invert_match, is_re_match=True ) dumps_results(expected=expected, actual=tabledata) assert actual == expected @pytest.mark.parametrize( ["table_name", "headers", "rows", "pattern", "is_invert_match", "expected"], [ [ "match_and", ["test001_AAA", "AAA_test1234", "foo", "AAA_hoge"], [[1, 2, 3, 4], [11, 12, 13, 14]], ["[0-9]+", "AAA"], False, TableData("match_and", ["test001_AAA", "AAA_test1234"], [[1, 2], [11, 12]]), ], [ "unmatch_and", ["test001_AAA", "AAA_test1234", "foo", "AAA_hoge"], [[1, 2, 3, 4], [11, 12, 13, 14]], ["1234", "hoge"], True, TableData("unmatch_and", ["test001_AAA", "foo"], [[1, 3], [11, 13]]), ], ], ) def test_normal_pattern_match( self, table_name, headers, rows, pattern, is_invert_match, expected ): tabledata = TableData(table_name, headers, rows) actual = tabledata.filter_column( patterns=pattern, is_invert_match=is_invert_match, is_re_match=True, pattern_match=PatternMatch.AND, ) dumps_results(expected=expected, actual=tabledata) assert actual == expected @pytest.mark.parametrize( ["table_name", "headers", "rows", "pattern", "is_invert_match", "is_re_match", "expected"], [ [ "unmatch_pattern", HEADERS, VALUE_MATRIX, ["abc"], False, False, TableData("unmatch_pattern", [], []), ], [ "none_pattern", HEADERS, VALUE_MATRIX, None, False, False, TableData("none_pattern", HEADERS, VALUE_MATRIX), ], ], ) def test_normal_unmatch( self, table_name, headers, rows, pattern, is_invert_match, is_re_match, expected ): tabledata = TableData(table_name, headers, rows) actual = tabledata.filter_column( patterns=pattern, is_invert_match=is_invert_match, is_re_match=is_re_match ) assert actual == expected thombashi-tabledata-bc6359d/tox.ini000066400000000000000000000023651450132560100173460ustar00rootroot00000000000000[tox] envlist = py{37,38,39,310,311,312} build cov docs fmt lint readme [testenv] extras = test commands = python --version pytest {posargs} [testenv:build] deps = build>=0.10 twine wheel commands = python -m build twine check dist/*.whl dist/*.tar.gz [testenv:clean] skip_install = true deps = cleanpy>=0.4 commands = cleanpy --all --exclude-envs . [testenv:cov] extras = test deps = coverage[toml] commands = coverage run -m pytest {posargs:-vv} coverage report -m [testenv:docs] deps = -r{toxinidir}/requirements/docs_requirements.txt commands = sphinx-build docs/ docs/_build [testenv:fmt] skip_install = true deps = autoflake>=2 black>=23.1 isort>=5 commands = autoflake --in-place --recursive --remove-all-unused-imports --ignore-init-module-imports --exclude ".pytype" . isort . black setup.py test tabledata [testenv:lint] skip_install = true deps = codespell>=2 mypy>=1 pylama>=8.4.1 commands = mypy tabledata setup.py pylama codespell tabledata docs examples -q2 --check-filenames [testenv:readme] skip_install = true changedir = docs deps = path readmemaker>=1.1.0 commands = python make_readme.py