pax_global_header00006660000000000000000000000064144413765750014532gustar00rootroot0000000000000052 comment=d48407c08b6235bec6f1b48a3e2df09c3cb73750 tablib-3.5.0/000077500000000000000000000000001444137657500127745ustar00rootroot00000000000000tablib-3.5.0/.coveragerc000066400000000000000000000004101444137657500151100ustar00rootroot00000000000000# .coveragerc to control coverage.py [report] # Regexes for lines to exclude from consideration exclude_lines = # Have to re-enable the standard pragma: pragma: no cover # Don't complain if non-runnable code isn't run: if __name__ == .__main__.: tablib-3.5.0/.flake8000066400000000000000000000000731444137657500141470ustar00rootroot00000000000000[flake8] max_line_length = 99 exclude = .tox *env/ tablib-3.5.0/.github/000077500000000000000000000000001444137657500143345ustar00rootroot00000000000000tablib-3.5.0/.github/CONTRIBUTING.md000066400000000000000000000011041444137657500165610ustar00rootroot00000000000000[![Jazzband](https://jazzband.co/static/img/jazzband.svg)](https://jazzband.co/) This is a [Jazzband](https://jazzband.co/) project. By contributing you agree to abide by the [Contributor Code of Conduct](https://jazzband.co/about/conduct) and follow the [guidelines](https://jazzband.co/about/guidelines). If you'd like to contribute, simply fork [the repository](https://github.com/jazzband/tablib), commit your changes to a feature branch, and send a pull request to `master`. Make sure you add yourself to [AUTHORS](https://github.com/jazzband/tablib/blob/master/AUTHORS). tablib-3.5.0/.github/workflows/000077500000000000000000000000001444137657500163715ustar00rootroot00000000000000tablib-3.5.0/.github/workflows/docs-lint.yml000066400000000000000000000012231444137657500210060ustar00rootroot00000000000000name: Docs and lint on: [push, pull_request, workflow_dispatch] env: FORCE_COLOR: 1 jobs: build: runs-on: ubuntu-latest strategy: matrix: env: - TOXENV: docs - TOXENV: lint steps: - uses: actions/checkout@v3 - name: Set up Python uses: actions/setup-python@v4 with: python-version: "3.10" cache: pip cache-dependency-path: "pyproject.toml" - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install --upgrade tox - name: Tox run: tox env: ${{ matrix.env }} tablib-3.5.0/.github/workflows/release.yml000066400000000000000000000017671444137657500205470ustar00rootroot00000000000000name: Release on: push: branches: - master release: types: - published workflow_dispatch: jobs: build: if: github.repository_owner == 'jazzband' runs-on: ubuntu-latest steps: - uses: actions/checkout@v3 with: fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v4 with: python-version: "3.x" cache: pip cache-dependency-path: "pyproject.toml" - name: Install dependencies run: | python -m pip install -U pip python -m pip install build twine - name: Build package run: | python -m build twine check dist/* - name: Upload packages to Jazzband if: github.event.action == 'published' uses: pypa/gh-action-pypi-publish@release/v1 with: user: jazzband password: ${{ secrets.JAZZBAND_RELEASE_KEY }} repository-url: https://jazzband.co/projects/tablib/upload tablib-3.5.0/.github/workflows/test.yml000066400000000000000000000017321444137657500200760ustar00rootroot00000000000000name: Test on: [push, pull_request, workflow_dispatch] env: FORCE_COLOR: 1 jobs: build: runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: python-version: ["3.8", "3.9", "3.10", "3.11", "3.12"] os: [ubuntu-latest, macOS-latest, windows-latest] steps: - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} allow-prereleases: true cache: pip cache-dependency-path: "pyproject.toml" - name: Install dependencies run: | python -m pip install --upgrade pip python -m pip install --upgrade tox python -m pip install -e . - name: Tox tests shell: bash run: | tox -e py - name: Upload coverage uses: codecov/codecov-action@v3 with: name: ${{ matrix.os }} Python ${{ matrix.python-version }} tablib-3.5.0/.gitignore000066400000000000000000000006531444137657500147700ustar00rootroot00000000000000# application builds build/* dist/* MANIFEST # python skin *.pyc *.pyo # osx noise .DS_Store profile # pycharm noise .idea .idea/* # vi noise *.swp docs/_build/* coverage.xml nosetests.xml junit-py25.xml junit-py26.xml junit-py27.xml # tox noise .tox # pyenv noise .python-version tablib.egg-info/* # Coverage .coverage htmlcov # setuptools noise .eggs *.egg-info # generated by setuptools-scm /src/tablib/_version.py tablib-3.5.0/.pre-commit-config.yaml000066400000000000000000000013551444137657500172610ustar00rootroot00000000000000repos: - repo: https://github.com/asottile/pyupgrade rev: v3.3.1 hooks: - id: pyupgrade args: [--py38-plus] - repo: https://github.com/PyCQA/isort rev: 5.12.0 hooks: - id: isort - repo: https://github.com/PyCQA/flake8 rev: 6.0.0 hooks: - id: flake8 - repo: https://github.com/isidentical/teyit rev: 0.4.3 hooks: - id: teyit - repo: https://github.com/pre-commit/pygrep-hooks rev: v1.10.0 hooks: - id: python-check-blanket-noqa - id: rst-backticks - repo: https://github.com/pre-commit/pre-commit-hooks rev: v4.4.0 hooks: - id: check-merge-conflict - id: check-toml - id: check-yaml ci: autoupdate_schedule: quarterly tablib-3.5.0/.readthedocs.yaml000066400000000000000000000003071444137657500162230ustar00rootroot00000000000000version: 2 build: os: ubuntu-20.04 tools: python: "3.10" sphinx: configuration: docs/conf.py python: install: - requirements: docs/requirements.txt - method: pip path: . tablib-3.5.0/AUTHORS000066400000000000000000000012771444137657500140530ustar00rootroot00000000000000Tablib was originally written by Kenneth Reitz and is now maintained by the Jazzband GitHub team. Here is a list of past and present much-appreciated contributors: Alex Gaynor Andrii Soldatenko Benjamin Wohlwend Bruno Soares Claude Paroz Daniel Santos Erik Youngren Hugo van Kemenade Iuri de Silvio Jakub Janoszek James Douglass Joel Friedly Josh Ourisman Kenneth Reitz Luca Beltrame Luke Lee Marc Abramowitz Marco Dallagiacoma Maris Nartiss Mark Rogers Mark Walling Mathias Loesch Matthew Hegarty Mike Waldner Peyman Salehi Rabin Nankhwa Tommy Anthony Tsuyoshi Hombashi Tushar Makkar tablib-3.5.0/CODE_OF_CONDUCT.md000066400000000000000000000045071444137657500156010ustar00rootroot00000000000000# Code of Conduct As contributors and maintainers of the Jazzband projects, and in the interest of fostering an open and welcoming community, we pledge to respect all people who contribute through reporting issues, posting feature requests, updating documentation, submitting pull requests or patches, and other activities. We are committed to making participation in the Jazzband a harassment-free experience for everyone, regardless of the level of experience, gender, gender identity and expression, sexual orientation, disability, personal appearance, body size, race, ethnicity, age, religion, or nationality. Examples of unacceptable behavior by participants include: - The use of sexualized language or imagery - Personal attacks - Trolling or insulting/derogatory comments - Public or private harassment - Publishing other's private information, such as physical or electronic addresses, without explicit permission - Other unethical or unprofessional conduct The Jazzband roadies have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. By adopting this Code of Conduct, the roadies commit themselves to fairly and consistently applying these principles to every aspect of managing the jazzband projects. Roadies who do not follow or enforce the Code of Conduct may be permanently removed from the Jazzband roadies. This code of conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the roadies at `roadies@jazzband.co`. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. Roadies are obligated to maintain confidentiality with regard to the reporter of an incident. This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.3.0, available at [https://contributor-covenant.org/version/1/3/0/][version] [homepage]: https://contributor-covenant.org [version]: https://contributor-covenant.org/version/1/3/0/ tablib-3.5.0/HISTORY.md000066400000000000000000000222571444137657500144670ustar00rootroot00000000000000# History ## 3.5.0 (2023-06-11) ### Improvements - Add support for Python 3.12 (#550) - Drop support for EOL Python 3.7 (#551) - Allow importing 'ragged' .xlsx files through dataset (#547) - Release: replace deprecated `repository_url` with `repository-url` (#545) ## 3.4.0 (2023-03-24) ### Improvements - Move setup to `pyproject.toml` (#542) - xlsx export: remove redundant code (#541) - xlsx export: support escape of formulae (#540) - Add <tbody> tags to HTML output (#539) - Check for type list and improve error msg (#524) ### Bugfixes - Fix bug when yaml file is empty (#535) - Fix linting issues raised by Flake8 (#536) ## 3.3.0 (2022-12-10) ### Improvements - Add support for Python 3.11 (#525). - ODS export: integers/floats/decimals are exported as numbers (#527). ## 3.2.1 (2022-04-09) ### Bugfixes - Support solo CR in text input imports (#518). ## 3.2.0 (2022-01-27) ### Changes - Dropped Python 3.6 support (#513). ### Bugfixes - Corrected order of arguments to a regex call in `safe_xlsx_sheet_title` (#510). ## 3.1.0 (2021-10-26) ### Improvements - Add support for Python 3.10 (#504). - The csv, xls, and xlsx formats gained support for the `skip_lines` keyword argument for their `import_set()` method to be able to skip the nth first lines of a read file (#497). ### Bugfixes - Avoided mutable parameter defaults (#494). - Specify build backend for editable installs (#501). - Doubled sample size passed to `csv.Sniffer()` in `_csv.detect()` (#503). ## 3.0.0 (2020-12-05) ### Breaking changes - Dropped Python 3.5 support. - JSON-exported data is no longer forced to ASCII characters. - YAML-exported data is no longer forced to ASCII characters. ### Improvements - Added Python 3.9 support. - Added read_only option to xlsx file reader (#482). ### Bugfixes - Prevented crash in rst export with only-space strings (#469). ## 2.0.0 (2020-05-16) ### Breaking changes - The `Row.lpush/rpush` logic was reversed. `lpush` was appending while `rpush` and `append` were prepending. This was fixed (reversed behavior). If you counted on the broken behavior, please update your code (#453). ### Bugfixes - Fixed minimal openpyxl dependency version to 2.6.0 (#457). - Dates from xls files are now read as Python datetime objects (#373). - Allow import of "ragged" xlsx files (#465). ### Improvements - When importing an xlsx file, Tablib will now read cell values instead of formulas (#462). ## 1.1.0 (2020-02-13) ### Deprecations - Upcoming breaking change in Tablib 2.0.0: the `Row.lpush/rpush` logic is reversed. `lpush` is appending while `rpush` and `append` are prepending. The broken behavior will remain in Tablib 1.x and will be fixed (reversed) in Tablib 2.0.0 (#453). If you count on the broken behavior, please update your code when you upgrade to Tablib 2.x. ### Improvements - Tablib is now able to import CSV content where not all rows have the same length. Missing columns on any line receive the empty string (#226). ## 1.0.0 (2020-01-13) ### Breaking changes - Dropped Python 2 support - Dependencies are now all optional. To install `tablib` as before with all possible supported formats, run `pip install tablib[all]` ### Improvements - Formats can now be dynamically registered through the `tablib.formats.registry.refister` API (#256). - Tablib methods expecting data input (`detect_format`, `import_set`, `Dataset.load`, `Databook.load`) now accepts file-like objects in addition to raw strings and bytestrings (#440). ### Bugfixes - Fixed a crash when exporting an empty string with the ReST format (#368) - Error cells from imported .xls files contain now the error string (#202) ## 0.14.0 (2019-10-19) ### Deprecations - The 0.14.x series will be the last to support Python 2 ### Breaking changes - Dropped Python 3.4 support ### Improvements - Added Python 3.7 and 3.8 support - The project is now maintained by the Jazzband team, https://jazzband.co - Improved format autodetection and added autodetection for the odf format. - Added search to all documentation pages - Open xlsx workbooks in read-only mode (#316) - Unpin requirements - Only install backports.csv on Python 2 ### Bugfixes - Fixed `DataBook().load` parameter ordering (first stream, then format). - Fixed a regression for xlsx exports where non-string values were forced to strings (#314) - Fixed xlsx format detection (which was often detected as `xls` format) ## 0.13.0 (2019-03-08) - Added reStructuredText output capability (#336) - Added Jira output capability - Stopped calling openpyxl deprecated methods (accessing cells, removing sheets) (openpyxl minimal version is now 2.4.0) - Fixed a circular dependency issue in JSON output (#332) - Fixed Unicode error for the CSV export on Python 2 (#215) - Removed usage of optional `ujson` (#311) - Dropped Python 3.3 support ## 0.12.1 (2017-09-01) - Favor `Dataset.export()` over `Dataset.` syntax in docs - Make Panda dependency optional ## 0.12.0 (2017-08-27) - Add initial Panda DataFrame support - Dropped Python 2.6 support ## 0.11.5 (2017-06-13) - Use `yaml.safe_load` for importing yaml. ## 0.11.4 (2017-01-23) - Use built-in `json` package if available - Support Python 3.5+ in classifiers ### Bugfixes - Fixed textual representation for Dataset with no headers - Handle decimal types ## 0.11.3 (2016-02-16) - Release fix. ## 0.11.2 (2016-02-16) ### Bugfixes - Fix export only formats. - Fix for xlsx output. ## 0.11.1 (2016-02-07) ### Bugfixes - Fixed packaging error on Python 3. ## 0.11.0 (2016-02-07) ### New Formats! - Added LaTeX table export format (`Dataset.latex`). - Support for dBase (DBF) files (`Dataset.dbf`). ### Improvements - New import/export interface (`Dataset.export()`, `Dataset.load()`). - CSV custom delimiter support (`Dataset.export('csv', delimiter='$')`). - Adding ability to remove duplicates to all rows in a dataset (`Dataset.remove_duplicates()`). - Added a mechanism to avoid `datetime.datetime` issues when serializing data. - New `detect_format()` function (mostly for internal use). - Update the vendored unicodecsv to fix `None` handling. - Only freeze the headers row, not the headers columns (xls). ### Breaking Changes - `detect()` function removed. ### Bugfixes - Fix XLSX import. - Bugfix for `Dataset.transpose().transpose()`. ## 0.10.0 (2014-05-27) * Unicode Column Headers * ALL the bugfixes! ## 0.9.11 (2011-06-30) * Bugfixes ## 0.9.10 (2011-06-22) * Bugfixes ## 0.9.9 (2011-06-21) * Dataset API Changes * `stack_rows` => `stack`, `stack_columns` => `stack_cols` * column operations have their own methods now (`append_col`, `insert_col`) * List-style `pop()` * Redis-style `rpush`, `lpush`, `rpop`, `lpop`, `rpush_col`, and `lpush_col` ## 0.9.8 (2011-05-22) * OpenDocument Spreadsheet support (.ods) * Full Unicode TSV support ## 0.9.7 (2011-05-12) * Full XLSX Support! * Pickling Bugfix * Compat Module ## 0.9.6 (2011-05-12) * `seperators` renamed to `separators` * Full unicode CSV support ## 0.9.5 (2011-03-24) * Python 3.1, Python 3.2 Support (same code base!) * Formatter callback support * Various bug fixes ## 0.9.4 (2011-02-18) * Python 2.5 Support! * Tox Testing for 2.5, 2.6, 2.7 * AnyJSON Integrated * OrderedDict support * Caved to community pressure (spaces) ## 0.9.3 (2011-01-31) * Databook duplication leak fix. * HTML Table output. * Added column sorting. ## 0.9.2 (2010-11-17) * Transpose method added to Datasets. * New frozen top row in Excel output. * Pickling support for Datasets and Rows. * Support for row/column stacking. ## 0.9.1 (2010-11-04) * Minor reference shadowing bugfix. ## 0.9.0 (2010-11-04) * Massive documentation update! * Tablib.org! * Row tagging and Dataset filtering! * Column insert/delete support * Column append API change (header required) * Internal Changes (Row object and use thereof) ## 0.8.5 (2010-10-06) * New import system. All dependencies attempt to load from site-packages, then fallback on tenderized modules. ## 0.8.4 (2010-10-04) * Updated XLS output: Only wrap if '\\n' in cell. ## 0.8.3 (2010-10-04) * Ability to append new column passing a callable as the value that will be applied to every row. ## 0.8.2 (2010-10-04) * Added alignment wrapping to written cells. * Added separator support to XLS. ## 0.8.1 (2010-09-28) * Packaging Fix ## 0.8.0 (2010-09-25) * New format plugin system! * Imports! ELEGANT Imports! * Tests. Lots of tests. ## 0.7.1 (2010-09-20) * Reverting methods back to properties. * Windows bug compensated in documentation. ## 0.7.0 (2010-09-20) * Renamed DataBook Databook for consistency. * Export properties changed to methods (XLS filename / StringIO bug). * Optional Dataset.xls(path='filename') support (for writing on windows). * Added utf-8 on the worksheet level. ## 0.6.4 (2010-09-19) * Updated unicode export for XLS. * More exhaustive unit tests. ## 0.6.3 (2010-09-14) * Added Dataset.append() support for columns. ## 0.6.2 (2010-09-13) * Fixed Dataset.append() error on empty dataset. * Updated Dataset.headers property w/ validation. * Added Testing Fixtures. ## 0.6.1 (2010-09-12) * Packaging hotfixes. ## 0.6.0 (2010-09-11) * Public Release. * Export Support for XLS, JSON, YAML, and CSV. * DataBook Export for XLS, JSON, and YAML. * Python Dict Property Support. tablib-3.5.0/LICENSE000066400000000000000000000020651444137657500140040ustar00rootroot00000000000000Copyright 2016 Kenneth Reitz Copyright 2019 Jazzband Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. tablib-3.5.0/README.md000066400000000000000000000033351444137657500142570ustar00rootroot00000000000000# Tablib: format-agnostic tabular dataset library [![Jazzband](https://jazzband.co/static/img/badge.svg)](https://jazzband.co/) [![PyPI version](https://img.shields.io/pypi/v/tablib.svg)](https://pypi.org/project/tablib/) [![Supported Python versions](https://img.shields.io/pypi/pyversions/tablib.svg)](https://pypi.org/project/tablib/) [![PyPI downloads](https://img.shields.io/pypi/dm/tablib.svg)](https://pypistats.org/packages/tablib) [![GitHub Actions status](https://github.com/jazzband/tablib/workflows/Test/badge.svg)](https://github.com/jazzband/tablib/actions) [![codecov](https://codecov.io/gh/jazzband/tablib/branch/master/graph/badge.svg)](https://codecov.io/gh/jazzband/tablib) [![GitHub](https://img.shields.io/github/license/jazzband/tablib.svg)](LICENSE) _____ ______ ___________ ______ __ /_______ ____ /_ ___ /___(_)___ /_ _ __/_ __ `/__ __ \__ / __ / __ __ \ / /_ / /_/ / _ /_/ /_ / _ / _ /_/ / \__/ \__,_/ /_.___/ /_/ /_/ /_.___/ Tablib is a format-agnostic tabular dataset library, written in Python. Output formats supported: - Excel (Sets + Books) - JSON (Sets + Books) - YAML (Sets + Books) - Pandas DataFrames (Sets) - HTML (Sets) - Jira (Sets) - TSV (Sets) - ODS (Sets) - CSV (Sets) - DBF (Sets) Note that tablib *purposefully* excludes XML support. It always will. (Note: This is a joke. Pull requests are welcome.) Tablib documentation is graciously hosted on https://tablib.readthedocs.io It is also available in the ``docs`` directory of the source distribution. Make sure to check out [Tablib on PyPI](https://pypi.org/project/tablib/)! ## Contribute Please see the [contributing guide](https://github.com/jazzband/tablib/blob/master/.github/CONTRIBUTING.md). tablib-3.5.0/RELEASING.md000066400000000000000000000021231444137657500146250ustar00rootroot00000000000000# Release checklist Jazzband guidelines: https://jazzband.co/about/releases * [ ] Get master to the appropriate code release state. [GitHub Actions](https://github.com/jazzband/tablib/actions) should pass on master. [![GitHub Actions status](https://github.com/jazzband/tablib/workflows/Test/badge.svg)](https://github.com/jazzband/tablib/actions) * [ ] Check [HISTORY.md](https://github.com/jazzband/tablib/blob/master/HISTORY.md), update version number and release date * [ ] Create new GitHub release: https://github.com/jazzband/tablib/releases/new * Tag: * Click "Choose a tag" * Enter new tag: "v3.4.0" * Click "**Create new tag: v3.4.0** on publish" * Title: Leave blank, will be same as tag * Click "Generate release notes" and edit as required * Click "Publish release" * [ ] Once GitHub Actions has built and uploaded distributions, check files at [Jazzband](https://jazzband.co/projects/tablib) and release to [PyPI](https://pypi.org/pypi/tablib) * [ ] Check installation: ```bash pip uninstall -y tablib && pip install -U tablib ``` tablib-3.5.0/docs/000077500000000000000000000000001444137657500137245ustar00rootroot00000000000000tablib-3.5.0/docs/Makefile000066400000000000000000000107561444137657500153750ustar00rootroot00000000000000# Makefile for Sphinx documentation # # You can set these variables from the command line. SPHINXOPTS = SPHINXBUILD = sphinx-build PAPER = BUILDDIR = _build # Internal variables. PAPEROPT_a4 = -D latex_paper_size=a4 PAPEROPT_letter = -D latex_paper_size=letter ALLSPHINXOPTS = -d $(BUILDDIR)/doctrees $(PAPEROPT_$(PAPER)) $(SPHINXOPTS) . .PHONY: help clean html dirhtml singlehtml pickle json htmlhelp qthelp devhelp epub latex latexpdf text man changes linkcheck doctest help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " dirhtml to make HTML files named index.html in directories" @echo " singlehtml to make a single large HTML file" @echo " pickle to make pickle files" @echo " json to make JSON files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " qthelp to make HTML files and a qthelp project" @echo " devhelp to make HTML files and a Devhelp project" @echo " epub to make an epub" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " latexpdf to make LaTeX files and run them through pdflatex" @echo " text to make text files" @echo " man to make manual pages" @echo " changes to make an overview of all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " doctest to run all doctests embedded in the documentation (if enabled)" clean: -rm -rf $(BUILDDIR)/* html: $(SPHINXBUILD) -b html $(ALLSPHINXOPTS) $(BUILDDIR)/html @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/html." dirhtml: $(SPHINXBUILD) -b dirhtml $(ALLSPHINXOPTS) $(BUILDDIR)/dirhtml @echo @echo "Build finished. The HTML pages are in $(BUILDDIR)/dirhtml." singlehtml: $(SPHINXBUILD) -b singlehtml $(ALLSPHINXOPTS) $(BUILDDIR)/singlehtml @echo @echo "Build finished. The HTML page is in $(BUILDDIR)/singlehtml." pickle: $(SPHINXBUILD) -b pickle $(ALLSPHINXOPTS) $(BUILDDIR)/pickle @echo @echo "Build finished; now you can process the pickle files." json: $(SPHINXBUILD) -b json $(ALLSPHINXOPTS) $(BUILDDIR)/json @echo @echo "Build finished; now you can process the JSON files." htmlhelp: $(SPHINXBUILD) -b htmlhelp $(ALLSPHINXOPTS) $(BUILDDIR)/htmlhelp @echo @echo "Build finished; now you can run HTML Help Workshop with the" \ ".hhp project file in $(BUILDDIR)/htmlhelp." qthelp: $(SPHINXBUILD) -b qthelp $(ALLSPHINXOPTS) $(BUILDDIR)/qthelp @echo @echo "Build finished; now you can run "qcollectiongenerator" with the" \ ".qhcp project file in $(BUILDDIR)/qthelp, like this:" @echo "# qcollectiongenerator $(BUILDDIR)/qthelp/Tablib.qhcp" @echo "To view the help file:" @echo "# assistant -collectionFile $(BUILDDIR)/qthelp/Tablib.qhc" devhelp: $(SPHINXBUILD) -b devhelp $(ALLSPHINXOPTS) $(BUILDDIR)/devhelp @echo @echo "Build finished." @echo "To view the help file:" @echo "# mkdir -p $$HOME/.local/share/devhelp/Tablib" @echo "# ln -s $(BUILDDIR)/devhelp $$HOME/.local/share/devhelp/Tablib" @echo "# devhelp" epub: $(SPHINXBUILD) -b epub $(ALLSPHINXOPTS) $(BUILDDIR)/epub @echo @echo "Build finished. The epub file is in $(BUILDDIR)/epub." latex: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo @echo "Build finished; the LaTeX files are in $(BUILDDIR)/latex." @echo "Run \`make' in that directory to run these through (pdf)latex" \ "(use \`make latexpdf' here to do that automatically)." latexpdf: $(SPHINXBUILD) -b latex $(ALLSPHINXOPTS) $(BUILDDIR)/latex @echo "Running LaTeX files through pdflatex..." make -C $(BUILDDIR)/latex all-pdf @echo "pdflatex finished; the PDF files are in $(BUILDDIR)/latex." text: $(SPHINXBUILD) -b text $(ALLSPHINXOPTS) $(BUILDDIR)/text @echo @echo "Build finished. The text files are in $(BUILDDIR)/text." man: $(SPHINXBUILD) -b man $(ALLSPHINXOPTS) $(BUILDDIR)/man @echo @echo "Build finished. The manual pages are in $(BUILDDIR)/man." changes: $(SPHINXBUILD) -b changes $(ALLSPHINXOPTS) $(BUILDDIR)/changes @echo @echo "The overview file is in $(BUILDDIR)/changes." linkcheck: $(SPHINXBUILD) -b linkcheck $(ALLSPHINXOPTS) $(BUILDDIR)/linkcheck @echo @echo "Link check complete; look for any errors in the above output " \ "or in $(BUILDDIR)/linkcheck/output.txt." doctest: $(SPHINXBUILD) -b doctest $(ALLSPHINXOPTS) $(BUILDDIR)/doctest @echo "Testing of doctests in the sources finished, look at the " \ "results in $(BUILDDIR)/doctest/output.txt." tablib-3.5.0/docs/__init__.py000066400000000000000000000000001444137657500160230ustar00rootroot00000000000000tablib-3.5.0/docs/_templates/000077500000000000000000000000001444137657500160615ustar00rootroot00000000000000tablib-3.5.0/docs/_templates/sidebarintro.html000066400000000000000000000014021444137657500214310ustar00rootroot00000000000000

About Tablib

Tablib is an MIT Licensed format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. Advanced features include, segregation, dynamic columns, tags & filtering, and seamless format import & export.

Useful Links

tablib-3.5.0/docs/_templates/sidebarlogo.html000066400000000000000000000005261444137657500212440ustar00rootroot00000000000000

About Tablib

Tablib is an MIT Licensed format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. Advanced features include, segregation, dynamic columns, tags & filtering, and seamless format import & export.

tablib-3.5.0/docs/api.rst000066400000000000000000000012671444137657500152350ustar00rootroot00000000000000.. _api: === API === .. module:: tablib This part of the documentation covers all the interfaces of Tablib. For parts where Tablib depends on external libraries, we document the most important right here and provide links to the canonical documentation. -------------- Dataset Object -------------- .. autoclass:: Dataset :inherited-members: --------------- Databook Object --------------- .. autoclass:: Databook :inherited-members: --------- Functions --------- .. autofunction:: detect_format .. autofunction:: import_set ---------- Exceptions ---------- .. automodule:: tablib.exceptions :members: Now, go start some :ref:`Tablib Development `. tablib-3.5.0/docs/conf.py000066400000000000000000000166051444137657500152330ustar00rootroot00000000000000# # Tablib documentation build configuration file, created by # sphinx-quickstart on Tue Oct 5 15:25:21 2010. # # This file is execfile()d with the current directory set to its containing dir. # # Note that not all possible configuration values are present in this # autogenerated file. # # All configuration values have a default; values that are commented out # serve to show the default. import tablib # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. # sys.path.insert(0, os.path.abspath('..')) # -- General configuration ----------------------------------------------------- # If your documentation needs a minimal Sphinx version, state it here. # needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ 'sphinx.ext.autodoc', 'sphinx.ext.todo', 'sphinx.ext.coverage', 'sphinx.ext.viewcode', 'sphinx.ext.intersphinx' ] intersphinx_mapping = {'python': ('https://docs.python.org/3', None)} # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. # source_encoding = 'utf-8-sig' # The master toctree document. master_doc = 'index' # General information about the project. project = 'Tablib' copyright = '2019 Jazzband' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The full version, including alpha/beta/rc tags. release = tablib.__version__ # The short X.Y version. version = '.'.join(tablib.__version__.split('.')[:2]) # for example take major/minor # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. # language = None # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: # today = '' # Else, today_fmt is used as the format for a strftime call. # today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. exclude_patterns = ['_build'] # The reST default role (used for this markup: `text`) to use for all documents. # default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). # add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. # show_authors = False # The name of the Pygments (syntax highlighting) style to use. # pygments_style = '' # A list of ignored prefixes for module index sorting. # modindex_common_prefix = [] # -- Options for HTML output --------------------------------------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'alabaster' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. # html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". # html_title = None # A shorter title for the navigation bar. Default is the same as html_title. # html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. # html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. # html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". # html_static_path = ['static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. # html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. html_use_smartypants = True # Custom sidebar templates, maps document names to template names. html_sidebars = { 'index': ['sidebarintro.html', 'sourcelink.html', 'searchbox.html'], '**': ['sidebarlogo.html', 'localtoc.html', 'relations.html', 'sourcelink.html', 'searchbox.html'] } # Additional templates that should be rendered to pages, maps page names to # template names. # html_additional_pages = {} # If false, no module index is generated. # html_domain_indices = True # If false, no index is generated. # html_use_index = True # If true, the index is split into individual pages for each letter. # html_split_index = False # If true, links to the reST sources are added to the pages. html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. html_show_sphinx = False # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. # html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. # html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). # html_file_suffix = None # Output file base name for HTML help builder. htmlhelp_basename = 'Tablibdoc' # -- Options for LaTeX output -------------------------------------------------- # The paper size ('letter' or 'a4'). # latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). # latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, documentclass [howto/manual]). latex_documents = [ ('index', 'Tablib.tex', 'Tablib Documentation', 'Jazzband', 'manual'), ] latex_use_modindex = False latex_elements = { 'papersize': 'a4paper', 'pointsize': '12pt', } latex_use_parts = True # The name of an image file (relative to this directory) to place at the top of # the title page. # latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. # latex_use_parts = False # If true, show page references after internal links. # latex_show_pagerefs = False # If true, show URL addresses after external links. # latex_show_urls = False # Additional stuff for the LaTeX preamble. # latex_preamble = '' # Documents to append as an appendix to all manuals. # latex_appendices = [] # If false, no module index is generated. # latex_domain_indices = True # -- Options for manual page output -------------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). man_pages = [ ('index', 'tablib', 'Tablib Documentation', ['Jazzband'], 1) ] tablib-3.5.0/docs/development.rst000066400000000000000000000130421444137657500170000ustar00rootroot00000000000000.. _development: Development =========== Tablib is under active development, and contributors are welcome. If you have a feature request, suggestion, or bug report, please open a new issue on GitHub_. To submit patches, please send a pull request on GitHub_. .. _GitHub: https://github.com/jazzband/tablib/ .. _design: --------------------- Design Considerations --------------------- Tablib was developed with a few :pep:`20` idioms in mind. #. Beautiful is better than ugly. #. Explicit is better than implicit. #. Simple is better than complex. #. Complex is better than complicated. #. Readability counts. A few other things to keep in mind: #. Keep your code DRY. #. Strive to be as simple (to use) as possible. .. _scm: -------------- Source Control -------------- Tablib source is controlled with Git_, the lean, mean, distributed source control machine. The repository is publicly accessible. .. code-block:: console git clone git://github.com/jazzband/tablib.git The project is hosted on **GitHub**. GitHub: https://github.com/jazzband/tablib Git Branch Structure ++++++++++++++++++++ Feature / Hotfix / Release branches follow a `Successful Git Branching Model`_ . Git-flow_ is a great tool for managing the repository. I highly recommend it. ``master`` Current production release (|version|) on PyPi. Each release is tagged. When submitting patches, please place your feature/change in its own branch prior to opening a pull request on GitHub_. .. _Git: https://git-scm.org .. _`Successful Git Branching Model`: https://nvie.com/posts/a-successful-git-branching-model/ .. _git-flow: https://github.com/nvie/gitflow .. _newformats: ------------------ Adding New Formats ------------------ Tablib welcomes new format additions! Format suggestions include: * MySQL Dump Coding by Convention ++++++++++++++++++++ Tablib features a micro-framework for adding format support. The easiest way to understand it is to use it. So, let's define our own format, named *xxx*. From version 1.0, Tablib formats are class-based and can be dynamically registered. 1. Write your custom format class:: class MyXXXFormatClass: title = 'xxx' @classmethod def export_set(cls, dset): .... # returns string representation of given dataset @classmethod def export_book(cls, dbook): .... # returns string representation of given databook @classmethod def import_set(cls, dset, in_stream): ... # populates given Dataset with given datastream @classmethod def import_book(cls, dbook, in_stream): ... # returns Databook instance @classmethod def detect(cls, stream): ... # returns True if given stream is parsable as xxx .. admonition:: Excluding Support If the format excludes support for an import/export mechanism (*e.g.* :class:`csv ` excludes :class:`Databook ` support), simply don't define the respective class methods. Appropriate errors will be raised. 2. Register your class:: from tablib.formats import registry registry.register('xxx', MyXXXFormatClass()) 3. From then on, you should be able to use your new custom format as if it were a built-in Tablib format, e.g. using ``dataset.export('xxx')`` will use the ``MyXXXFormatClass.export_set`` method. .. _testing: -------------- Testing Tablib -------------- Testing is crucial to Tablib's stability. This stable project is used in production by many companies and developers, so it is important to be certain that every version released is fully operational. When developing a new feature for Tablib, be sure to write proper tests for it as well. When developing a feature for Tablib, the easiest way to test your changes for potential issues is to simply run the test suite directly. .. code-block:: console $ tox ---------------------- Continuous Integration ---------------------- Every pull request is automatically tested and inspected upon receipt with `GitHub Actions`_. If you broke the build, you will receive an email accordingly. Anyone may view the build status and history at any time. https://github.com/jazzband/tablib/actions Additional reports will also be included here in the future, including :pep:`8` checks and stress reports for extremely large datasets. .. _`GitHub Actions`: https://github.com/jazzband/tablib/actions .. _docs: ----------------- Building the Docs ----------------- Documentation is written in the powerful, flexible, and standard Python documentation format, `reStructured Text`_. Documentation builds are powered by the powerful Pocoo project, Sphinx_. The :ref:`API Documentation ` is mostly documented inline throughout the module. The Docs live in ``tablib/docs``. In order to build them, you will first need to install Sphinx. .. code-block:: console $ pip install sphinx Then, to build an HTML version of the docs, simply run the following from the ``docs`` directory: .. code-block:: console $ make html Your ``docs/_build/html`` directory will then contain an HTML representation of the documentation, ready for publication on most web servers. You can also generate the documentation in **epub**, **latex**, **json**, *&c* similarly. .. _`reStructured Text`: http://docutils.sourceforge.net/rst.html .. _Sphinx: http://sphinx.pocoo.org .. _`GitHub Pages`: https://pages.github.com ---------- Make sure to check out the :ref:`API Documentation `. tablib-3.5.0/docs/formats.rst000066400000000000000000000174021444137657500161350ustar00rootroot00000000000000.. _formats: ======= Formats ======= Tablib supports a wide variety of different tabular formats, both for input and output. Moreover, you can :ref:`register your own formats `. cli === The ``cli`` format is currently export-only. The exports produce a representation table suited to a terminal. When exporting to a CLI you can pass the table format with the ``tablefmt`` parameter, the supported formats are:: >>> import tabulate >>> list(tabulate._table_formats) ['simple', 'plain', 'grid', 'fancy_grid', 'github', 'pipe', 'orgtbl', 'jira', 'presto', 'psql', 'rst', 'mediawiki', 'moinmoin', 'youtrack', 'html', 'latex', 'latex_raw', 'latex_booktabs', 'tsv', 'textile'] For example:: dataset.export("cli", tablefmt="github") dataset.export("cli", tablefmt="grid") This format is optional, install Tablib with ``pip install "tablib[cli]"`` to make the format available. csv === When you import CSV data, you can specify if the first line of your data source is headers with the ``headers`` boolean parameter (defaults to ``True``):: import tablib tablib.import_set(your_data_stream, format='csv', headers=False) It is also possible to provide the ``skip_lines`` parameter for the number of lines that should be skipped before starting to read data. .. versionchanged:: 3.1.0 The ``skip_lines`` parameter was added. When exporting with the ``csv`` format, the top row will contain headers, if they have been set. Otherwise, the top row will contain the first row of the dataset. When importing a CSV data source or exporting a dataset as CSV, you can pass any parameter supported by the :py:func:`csv.reader` and :py:func:`csv.writer` functions. For example:: tablib.import_set(your_data_stream, format='csv', dialect='unix') dataset.export('csv', delimiter=' ', quotechar='|') .. admonition:: Line endings Exporting uses \\r\\n line endings by default so, make sure to include ``newline=''`` otherwise you will get a blank line between each row when you open the file in Excel:: with open('output.csv', 'w', newline='') as f: f.write(dataset.export('csv')) If you do not do this, and you export the file on Windows, your CSV file will open in Excel with a blank line between each row. dbf === Import/export using the dBASE_ format. .. admonition:: Binary Warning The ``dbf`` format contains binary data, so make sure to write in binary mode:: with open('output.dbf', 'wb') as f: f.write(dataset.export('dbf') .. _dBASE: https://en.wikipedia.org/wiki/DBase df (DataFrame) ============== Import/export using the pandas_ DataFrame format. This format is optional, install Tablib with ``pip install "tablib[pandas]"`` to make the format available. .. _pandas: https://pandas.pydata.org/ html ==== The ``html`` format is currently export-only. The exports produce an HTML page with the data in a ````. If headers have been set, they will be used as table headers. This format is optional, install Tablib with ``pip install "tablib[html]"`` to make the format available. jira ==== The ``jira`` format is currently export-only. Exports format the dataset according to the Jira table syntax:: ||heading 1||heading 2||heading 3|| |col A1|col A2|col A3| |col B1|col B2|col B3| json ==== Import/export using the JSON_ format. If headers have been set, a JSON list of objects will be returned. If no headers have been set, a JSON list of lists (rows) will be returned instead. Import assumes (for now) that headers exist. .. _JSON: http://json.org/ latex ===== Import/export using the LaTeX_ format. This format is export-only. If a title has been set, it will be exported as the table caption. .. _LaTeX: https://www.latex-project.org/ ods === Export data in OpenDocument Spreadsheet format. The ``ods`` format is currently export-only. This format is optional, install Tablib with ``pip install "tablib[ods]"`` to make the format available. .. admonition:: Binary Warning :class:`Dataset.ods` contains binary data, so make sure to write in binary mode:: with open('output.ods', 'wb') as f: f.write(data.ods) rst === Export data as a reStructuredText_ table representation of a dataset. The ``rst`` format is export-only. Exporting returns a simple table if the text in the first column is never wrapped, otherwise returns a grid table:: >>> from tablib import Dataset >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1)) >>> data = Dataset() >>> data.headers = ['A', 'B', 'A and B'] >>> for a, b in bits: ... data.append([bool(a), bool(b), bool(a * b)]) >>> table = data.export('rst') >>> table.split('\\n') == [ ... '===== ===== =====', ... ' A B A and', ... ' B ', ... '===== ===== =====', ... 'False False False', ... 'True False False', ... 'False True False', ... 'True True True ', ... '===== ===== =====', ... ] True .. _reStructuredText: http://docutils.sourceforge.net/rst.html tsv === A variant of the csv_ format with tabulators as fields separators. xls === Import/export data in Legacy Excel Spreadsheet representation. This format is optional, install Tablib with ``pip install "tablib[xls]"`` to make the format available. Its ``import_set()`` method also supports a ``skip_lines`` parameter that you can set to a number of lines that should be skipped before starting to read data. .. versionchanged:: 3.1.0 The ``skip_lines`` parameter for ``import_set()`` was added. .. note:: XLS files are limited to a maximum of 65,000 rows. Use xlsx_ to avoid this limitation. .. admonition:: Binary Warning The ``xls`` file format is binary, so make sure to write in binary mode:: with open('output.xls', 'wb') as f: f.write(data.export('xls')) xlsx ==== Import/export data in Excel 07+ Spreadsheet representation. This format is optional, install Tablib with ``pip install "tablib[xlsx]"`` to make the format available. The ``import_set()`` and ``import_book()`` methods accept keyword argument ``read_only``. If its value is ``True`` (the default), the XLSX data source is read lazily. Lazy reading generally reduces time and memory consumption, especially for large spreadsheets. However, it relies on the XLSX data source declaring correct dimensions. Some programs generate XLSX files with incorrect dimensions. Such files may need to be loaded with this optimization turned off by passing ``read_only=False``. The ``import_set()`` method also supports a ``skip_lines`` parameter that you can set to a number of lines that should be skipped before starting to read data. .. versionchanged:: 3.1.0 The ``skip_lines`` parameter for ``import_set()`` was added. .. note:: When reading an ``xlsx`` file containing formulas in its cells, Tablib will read the cell values, not the cell formulas. .. versionchanged:: 2.0.0 Reads cell values instead of formulas. You can export data to xlsx format by calling :meth:`export('xlsx') <.export>`. There are optional parameters to control the export. For available parameters, see :meth:`tablib.formats._xlsx.XLSXFormat.export_set`. .. admonition:: Binary Warning The ``xlsx`` file format is binary, so make sure to write in binary mode:: with open('output.xlsx', 'wb') as f: f.write(data.export('xlsx')) yaml ==== Import/export data in the YAML_ format. When exporting, if headers have been set, a YAML list of objects will be returned. If no headers have been set, a YAML list of lists (rows) will be returned instead. Import assumes (for now) that headers exist. This format is optional, install Tablib with ``pip install "tablib[yaml]"`` to make the format available. .. _YAML: https://yaml.org tablib-3.5.0/docs/index.rst000066400000000000000000000056401444137657500155720ustar00rootroot00000000000000.. Tablib documentation master file, created by sphinx-quickstart on Tue Oct 5 15:25:21 2010. You can adapt this file completely to your liking, but it should at least contain the root ``toctree`` directive. Tablib: Pythonic Tabular Datasets ================================= Release v\ |version|. (:ref:`Installation `) .. Contents: .. .. .. toctree:: .. :maxdepth: 2 .. .. Indices and tables .. ================== .. .. * :ref:`genindex` .. * :ref:`modindex` .. * :ref:`search` Tablib is an `MIT Licensed `_ format-agnostic tabular dataset library, written in Python. It allows you to import, export, and manipulate tabular data sets. Advanced features include segregation, dynamic columns, tags & filtering, and seamless format import & export. :: >>> data = tablib.Dataset(headers=['First Name', 'Last Name', 'Age']) >>> for i in [('Kenneth', 'Reitz', 22), ('Bessie', 'Monke', 21)]: ... data.append(i) >>> print(data.export('json')) [{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 21}] >>> print(data.export('yaml')) - {Age: 22, First Name: Kenneth, Last Name: Reitz} - {Age: 21, First Name: Bessie, Last Name: Monke} >>> data.export('xlsx') >>> data.export('df') First Name Last Name Age 0 Kenneth Reitz 22 1 Bessie Monke 21 Testimonials ------------ `National Geographic `_, `Digg, Inc `_, `Northrop Grumman `_, `Discovery Channel `_, and `The Sunlight Foundation `_ use Tablib internally. **Greg Thorton** Tablib by @kennethreitz saved my life. I had to consolidate like 5 huge poorly maintained lists of domains and data. It was a breeze! **Dave Coutts** It's turning into one of my most used modules of 2010. You really hit a sweet spot for managing tabular data with a minimal amount of code and effort. **Joshua Ourisman** Tablib has made it so much easier to deal with the inevitable 'I want an Excel file!' requests from clients... **Brad Montgomery** I think you nailed the "Python Zen" with tablib. Thanks again for an awesome lib! User's Guide ------------ This part of the documentation, which is mostly prose, begins with some background information about Tablib, then focuses on step-by-step instructions for getting the most out of your datasets. .. toctree:: :maxdepth: 2 intro .. toctree:: :maxdepth: 2 install .. toctree:: :maxdepth: 2 tutorial .. toctree:: :maxdepth: 2 formats .. toctree:: :maxdepth: 2 development API Reference ------------- If you are looking for information on a specific function, class or method, this part of the documentation is for you. .. toctree:: :maxdepth: 2 api tablib-3.5.0/docs/install.rst000066400000000000000000000032451444137657500161300ustar00rootroot00000000000000.. _install: Installation ============ This part of the documentation covers the installation of Tablib. The first step to using any software package is getting it properly installed. .. _installing: ----------------- Installing Tablib ----------------- Distribute & Pip ---------------- Of course, the recommended way to install Tablib is with `pip `_: .. code-block:: console $ pip install tablib You can also choose to install more dependencies to have more import/export formats available: .. code-block:: console $ pip install "tablib[xlsx]" Or all possible formats: .. code-block:: console $ pip install "tablib[all]" which is equivalent to: .. code-block:: console $ pip install "tablib[html, pandas, ods, xls, xlsx, yaml]" ------------------- Download the Source ------------------- You can also install Tablib from source. The latest release (|version|) is available from GitHub. * tarball_ * zipball_ .. _ Once you have a copy of the source, you can embed it in your Python package, or install it into your site-packages easily. .. code-block:: console $ python setup.py install To download the full source history from Git, see :ref:`Source Control `. .. _tarball: https://github.com/jazzband/tablib/tarball/master .. _zipball: https://github.com/jazzband/tablib/zipball/master .. _updates: Staying Updated --------------- The latest version of Tablib will always be available here: * PyPI: https://pypi.org/project/tablib/ * GitHub: https://github.com/jazzband/tablib/ When a new version is available, upgrading is simple:: $ pip install tablib --upgrade Now, go get a :ref:`Quick Start `. tablib-3.5.0/docs/intro.rst000066400000000000000000000040051444137657500156100ustar00rootroot00000000000000.. _intro: Introduction ============ This part of the documentation covers all the interfaces of Tablib. Tablib is a format-agnostic tabular dataset library, written in Python. It allows you to Pythonically import, export, and manipulate tabular data sets. Advanced features include segregation, dynamic columns, tags/filtering, and seamless format import/export. Philosophy ---------- Tablib was developed with a few :pep:`20` idioms in mind. #. Beautiful is better than ugly. #. Explicit is better than implicit. #. Simple is better than complex. #. Complex is better than complicated. #. Readability counts. All contributions to Tablib should keep these important rules in mind. .. _license: Tablib License -------------- Tablib is released under terms of `The MIT License`_. Copyright 2017 Kenneth Reitz Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. .. _`The MIT License`: https://opensource.org/licenses/mit-license.php .. _pythonsupport: Pythons Supported ----------------- Python 3.6+ is officially supported. Now, go :ref:`install Tablib `. tablib-3.5.0/docs/krstyle.sty000066400000000000000000000060421444137657500161640ustar00rootroot00000000000000\definecolor{TitleColor}{rgb}{0,0,0} \definecolor{InnerLinkColor}{rgb}{0,0,0} \renewcommand{\maketitle}{% \begin{titlepage}% \let\footnotesize\small \let\footnoterule\relax \ifsphinxpdfoutput \begingroup % This \def is required to deal with multi-line authors; it % changes \\ to ', ' (comma-space), making it pass muster for % generating document info in the PDF file. \def\\{, } \pdfinfo{ /Author (\@author) /Title (\@title) } \endgroup \fi \begin{flushright}% %\sphinxlogo% {\center \vspace*{3cm} \includegraphics{logo.pdf} \vspace{3cm} \par {\rm\Huge \@title \par}% {\em\LARGE \py@release\releaseinfo \par} {\large \@date \par \py@authoraddress \par }}% \end{flushright}%\par \@thanks \end{titlepage}% \cleardoublepage% \setcounter{footnote}{0}% \let\thanks\relax\let\maketitle\relax %\gdef\@thanks{}\gdef\@author{}\gdef\@title{} } \fancypagestyle{normal}{ \fancyhf{} \fancyfoot[LE,RO]{{\thepage}} \fancyfoot[LO]{{\nouppercase{\rightmark}}} \fancyfoot[RE]{{\nouppercase{\leftmark}}} \fancyhead[LE,RO]{{ \@title, \py@release}} \renewcommand{\headrulewidth}{0.4pt} \renewcommand{\footrulewidth}{0.4pt} } \fancypagestyle{plain}{ \fancyhf{} \fancyfoot[LE,RO]{{\thepage}} \renewcommand{\headrulewidth}{0pt} \renewcommand{\footrulewidth}{0.4pt} } \titleformat{\section}{\Large}% {\py@TitleColor\thesection}{0.5em}{\py@TitleColor}{\py@NormalColor} \titleformat{\subsection}{\large}% {\py@TitleColor\thesubsection}{0.5em}{\py@TitleColor}{\py@NormalColor} \titleformat{\subsubsection}{}% {\py@TitleColor\thesubsubsection}{0.5em}{\py@TitleColor}{\py@NormalColor} \titleformat{\paragraph}{\large}% {\py@TitleColor}{0em}{\py@TitleColor}{\py@NormalColor} \ChNameVar{\raggedleft\normalsize} \ChNumVar{\raggedleft \bfseries\Large} \ChTitleVar{\raggedleft \rm\Huge} \renewcommand\thepart{\@Roman\c@part} \renewcommand\part{% \pagestyle{empty} \if@noskipsec \leavevmode \fi \cleardoublepage \vspace*{6cm}% \@afterindentfalse \secdef\@part\@spart} \def\@part[#1]#2{% \ifnum \c@secnumdepth >\m@ne \refstepcounter{part}% \addcontentsline{toc}{part}{\thepart\hspace{1em}#1}% \else \addcontentsline{toc}{part}{#1}% \fi {\parindent \z@ %\center \interlinepenalty \@M \normalfont \ifnum \c@secnumdepth >\m@ne \rm\Large \partname~\thepart \par\nobreak \fi \MakeUppercase{\rm\Huge #2}% \markboth{}{}\par}% \nobreak \vskip 8ex \@afterheading} \def\@spart#1{% {\parindent \z@ %\center \interlinepenalty \@M \normalfont \huge \bfseries #1\par}% \nobreak \vskip 3ex \@afterheading} % use inconsolata font \usepackage{inconsolata} % fix single quotes, for inconsolata. (does not work) %%\usepackage{textcomp} %%\begingroup %% \catcode`'=\active %% \g@addto@macro\@noligs{\let'\textsinglequote} %% \endgroup %%\endinput tablib-3.5.0/docs/requirements.txt000066400000000000000000000000161444137657500172050ustar00rootroot00000000000000sphinx==6.1.3 tablib-3.5.0/docs/tutorial.rst000066400000000000000000000250151444137657500163240ustar00rootroot00000000000000.. _quickstart: ========== Quickstart ========== Eager to get started? This page gives a good introduction in how to get started with Tablib. This assumes you already have Tablib installed. If you do not, head over to the :ref:`Installation ` section. First, make sure that: * Tablib is :ref:`installed ` * Tablib is :ref:`up-to-date ` Let's get started with some simple use cases and examples. ------------------ Creating a Dataset ------------------ A :class:`Dataset ` is nothing more than what its name implies—a set of data. Creating your own instance of the :class:`tablib.Dataset` object is simple. :: data = tablib.Dataset() You can now start filling this :class:`Dataset ` object with data. .. admonition:: Example Context From here on out, if you see ``data``, assume that it's a fresh :class:`Dataset ` object. ----------- Adding Rows ----------- Let's say you want to collect a simple list of names. :: # collection of names names = ['Kenneth Reitz', 'Bessie Monke'] for name in names: # split name appropriately fname, lname = name.split() # add names to Dataset data.append([fname, lname]) You can get a nice, Pythonic view of the dataset at any time with :class:`Dataset.dict`:: >>> data.dict [('Kenneth', 'Reitz'), ('Bessie', 'Monke')] -------------- Adding Headers -------------- It's time to enhance our :class:`Dataset` by giving our columns some titles. To do so, set :class:`Dataset.headers`. :: data.headers = ['First Name', 'Last Name'] Now our data looks a little different. :: >>> data.dict [{'Last Name': 'Reitz', 'First Name': 'Kenneth'}, {'Last Name': 'Monke', 'First Name': 'Bessie'}] -------------- Adding Columns -------------- Now that we have a basic :class:`Dataset` in place, let's add a column of **ages** to it. :: data.append_col([22, 20], header='Age') Let's view the data now. :: >>> data.dict [{'Last Name': 'Reitz', 'First Name': 'Kenneth', 'Age': 22}, {'Last Name': 'Monke', 'First Name': 'Bessie', 'Age': 20}] It's that easy. -------------- Importing Data -------------- Creating a :class:`tablib.Dataset` object by importing a pre-existing file is simple. :: with open('data.csv', 'r') as fh: imported_data = Dataset().load(fh) This detects what sort of data is being passed in, and uses an appropriate formatter to do the import. So you can import from a variety of different file types. .. admonition:: Source without headers When the format is :class:`csv `, :class:`tsv `, :class:`dbf `, :class:`xls ` or :class:`xlsx `, and the data source does not have headers, the import should be done as follows :: with open('data.csv', 'r') as fh: imported_data = Dataset().load(fh, headers=False) -------------- Exporting Data -------------- Tablib's killer feature is the ability to export your :class:`Dataset` objects into a number of formats. **Comma-Separated Values** :: >>> data.export('csv') Last Name,First Name,Age Reitz,Kenneth,22 Monke,Bessie,20 **JavaScript Object Notation** :: >>> data.export('json') [{"Last Name": "Reitz", "First Name": "Kenneth", "Age": 22}, {"Last Name": "Monke", "First Name": "Bessie", "Age": 20}] **YAML Ain't Markup Language** :: >>> data.export('yaml') - {Age: 22, First Name: Kenneth, Last Name: Reitz} - {Age: 20, First Name: Bessie, Last Name: Monke} **Microsoft Excel** :: >>> data.export('xls') **Pandas DataFrame** :: >>> data.export('df') First Name Last Name Age 0 Kenneth Reitz 22 1 Bessie Monke 21 ------------------------ Selecting Rows & Columns ------------------------ You can slice and dice your data, just like a standard Python list. :: >>> data[0] ('Kenneth', 'Reitz', 22) If we had a set of data consisting of thousands of rows, it could be useful to get a list of values in a column. To do so, we access the :class:`Dataset` as if it were a standard Python dictionary. :: >>> data['First Name'] ['Kenneth', 'Bessie'] You can also access the column using its index. :: >>> data.headers ['Last Name', 'First Name', 'Age'] >>> data.get_col(1) ['Kenneth', 'Bessie'] Let's find the average age. :: >>> ages = data['Age'] >>> float(sum(ages)) / len(ages) 21.0 ----------------------- Removing Rows & Columns ----------------------- It's easier than you could imagine. Delete a column:: >>> del data['Col Name'] Delete a range of rows:: >>> del data[0:12] ============== Advanced Usage ============== This part of the documentation services to give you an idea that are otherwise hard to extract from the :ref:`API Documentation `. And now for something completely different. .. _dyncols: --------------- Dynamic Columns --------------- .. versionadded:: 0.8.3 Thanks to Josh Ourisman, Tablib now supports adding dynamic columns. A dynamic column is a single callable object (*e.g.* a function). Let's add a dynamic column to our :class:`Dataset` object. In this example, we have a function that generates a random grade for our students. :: import random def random_grade(row): """Returns a random integer for entry.""" return (random.randint(60,100)/100.0) data.append_col(random_grade, header='Grade') Let's have a look at our data. :: >>> data.export('yaml') - {Age: 22, First Name: Kenneth, Grade: 0.6, Last Name: Reitz} - {Age: 20, First Name: Bessie, Grade: 0.75, Last Name: Monke} Let's remove that column. :: >>> del data['Grade'] When you add a dynamic column, the first argument that is passed in to the given callable is the current data row. You can use this to perform calculations against your data row. For example, we can use the data available in the row to guess the gender of a student. :: def guess_gender(row): """Calculates gender of given student data row.""" m_names = ('Kenneth', 'Mike', 'Yuri') f_names = ('Bessie', 'Samantha', 'Heather') name = row[0] if name in m_names: return 'Male' elif name in f_names: return 'Female' else: return 'Unknown' Adding this function to our dataset as a dynamic column would result in: :: >>> data.export('yaml') - {Age: 22, First Name: Kenneth, Gender: Male, Last Name: Reitz} - {Age: 20, First Name: Bessie, Gender: Female, Last Name: Monke} .. _tags: ---------------------------- Filtering Datasets with Tags ---------------------------- .. versionadded:: 0.9.0 When constructing a :class:`Dataset` object, you can add tags to rows by specifying the ``tags`` parameter. This allows you to filter your :class:`Dataset` later. This can be useful to separate rows of data based on arbitrary criteria (*e.g.* origin) that you don't want to include in your :class:`Dataset`. Let's tag some students. :: students = tablib.Dataset() students.headers = ['first', 'last'] students.rpush(['Kenneth', 'Reitz'], tags=['male', 'technical']) students.rpush(['Daniel', 'Dupont'], tags=['male', 'creative' ]) students.rpush(['Bessie', 'Monke'], tags=['female', 'creative']) Now that we have extra meta-data on our rows, we can easily filter our :class:`Dataset`. Let's just see Female students. :: >>> students.filter(['female']).yaml - {first: Bessie, Last: Monke} By default, when you pass a list of tags you get filter type or. :: >>> students.filter(['female', 'creative']).yaml - {first: Daniel, Last: Dupont} - {first: Bessie, Last: Monke} Using chaining you can get a filter type and. :: >>> students.filter(['female']).filter(['creative']).yaml - {first: Bessie, Last: Monke} It's that simple. The original :class:`Dataset` is untouched. Open an Excel Workbook and read first sheet ------------------------------------------- Open an Excel 2007 and later workbook with a single sheet (or a workbook with multiple sheets but you just want the first sheet). :: data = tablib.Dataset() with open('my_excel_file.xlsx', 'rb') as fh: data.load(fh, 'xlsx') print(data) Excel Workbook With Multiple Sheets ------------------------------------ When dealing with a large number of :class:`Datasets ` in spreadsheet format, it's quite common to group multiple spreadsheets into a single Excel file, known as a Workbook. Tablib makes it extremely easy to build workbooks with the handy :class:`Databook` class. Let's say we have 3 different :class:`Datasets `. All we have to do is add them to a :class:`Databook` object... :: book = tablib.Databook((data1, data2, data3)) ... and export to Excel just like :class:`Datasets `. :: with open('students.xls', 'wb') as f: f.write(book.export('xls')) The resulting ``students.xls`` file will contain a separate spreadsheet for each :class:`Dataset` object in the :class:`Databook`. .. admonition:: Binary Warning Make sure to open the output file in binary mode. .. _separators: ---------- Separators ---------- .. versionadded:: 0.8.2 When constructing a spreadsheet, it's often useful to create a blank row containing information on the upcoming data. So, :: daniel_tests = [ ('11/24/09', 'Math 101 Mid-term Exam', 56.), ('05/24/10', 'Math 101 Final Exam', 62.) ] suzie_tests = [ ('11/24/09', 'Math 101 Mid-term Exam', 56.), ('05/24/10', 'Math 101 Final Exam', 62.) ] # Create new dataset tests = tablib.Dataset() tests.headers = ['Date', 'Test Name', 'Grade'] # Daniel's Tests tests.append_separator('Daniel\'s Scores') for test_row in daniel_tests: tests.append(test_row) # Susie's Tests tests.append_separator('Susie\'s Scores') for test_row in suzie_tests: tests.append(test_row) # Write spreadsheet to disk with open('grades.xls', 'wb') as f: f.write(tests.export('xls')) The resulting **tests.xls** will have the following layout: Daniel's Scores: * '11/24/09', 'Math 101 Mid-term Exam', 56. * '05/24/10', 'Math 101 Final Exam', 62. Suzie's Scores: * '11/24/09', 'Math 101 Mid-term Exam', 56. * '05/24/10', 'Math 101 Final Exam', 62. .. admonition:: Format Support At this time, only :class:`Excel ` output supports separators. ---- Now, go check out the :ref:`API Documentation ` or begin :ref:`Tablib Development `. tablib-3.5.0/pyproject.toml000066400000000000000000000033221444137657500157100ustar00rootroot00000000000000[build-system] requires = ["setuptools>=58", "setuptools_scm[toml]>=6.2"] build-backend = "setuptools.build_meta" [project] name = "tablib" description = "Format agnostic tabular data library (XLS, JSON, YAML, CSV, etc.)" readme = "README.md" license = {text = "MIT License"} authors = [ {name = "Kenneth Reitz", email = "me@kennethreitz.org"} ] maintainers = [ {name = "Jazzband Team", email = "roadies@jazzband.co"}, {name = "Hugo van Kemenade"}, {name = "Claude Paroz", email = "claude@2xlibre.net"}, ] requires-python = ">=3.8" classifiers = [ "Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "Natural Language :: English", "License :: OSI Approved :: MIT License", "Programming Language :: Python", "Programming Language :: Python :: 3 :: Only", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", ] dynamic = ["version"] [project.optional-dependencies] all = [ "markuppy", "odfpy", "openpyxl>=2.6.0", "pandas", "pyyaml", "tabulate", "xlrd", "xlwt", ] cli = ["tabulate"] html = ["markuppy"] ods = ["odfpy"] pandas = ["pandas"] xls = ["xlrd", "xlwt"] xlsx = ["openpyxl>=2.6.0"] yaml = ["pyyaml"] [project.urls] homepage = "https://tablib.readthedocs.io" documentation = "https://tablib.readthedocs.io" repository = "https://github.com/jazzband/tablib" changelog = "https://github.com/jazzband/tablib/blob/master/HISTORY.md" [tool.setuptools_scm] write_to = "src/tablib/_version.py" [tool.isort] profile = "black" tablib-3.5.0/pytest.ini000066400000000000000000000002301444137657500150200ustar00rootroot00000000000000[pytest] norecursedirs = .git .* addopts = -rsxX --showlocals --tb=native --cov=tablib --cov=tests --cov-report xml --cov-report term --cov-report html tablib-3.5.0/src/000077500000000000000000000000001444137657500135635ustar00rootroot00000000000000tablib-3.5.0/src/tablib/000077500000000000000000000000001444137657500150205ustar00rootroot00000000000000tablib-3.5.0/src/tablib/__init__.py000066400000000000000000000005651444137657500171370ustar00rootroot00000000000000""" Tablib. """ try: # Generated by setuptools-scm. from ._version import version as __version__ except ImportError: # Some broken installation. __version__ = None from tablib.core import ( # noqa: F401 Databook, Dataset, InvalidDatasetType, InvalidDimensions, UnsupportedFormat, detect_format, import_book, import_set, ) tablib-3.5.0/src/tablib/core.py000066400000000000000000000655141444137657500163350ustar00rootroot00000000000000""" tablib.core ~~~~~~~~~~~ This module implements the central Tablib objects. :copyright: (c) 2016 by Kenneth Reitz. 2019 Jazzband. :license: MIT, see LICENSE for more details. """ from collections import OrderedDict from copy import copy from operator import itemgetter from tablib.exceptions import ( HeadersNeeded, InvalidDatasetIndex, InvalidDatasetType, InvalidDimensions, UnsupportedFormat, ) from tablib.formats import registry from tablib.utils import normalize_input __title__ = 'tablib' __author__ = 'Kenneth Reitz' __license__ = 'MIT' __copyright__ = 'Copyright 2017 Kenneth Reitz. 2019 Jazzband.' __docformat__ = 'restructuredtext' class Row: """Internal Row object. Mainly used for filtering.""" __slots__ = ['_row', 'tags'] def __init__(self, row=(), tags=()): self._row = list(row) self.tags = list(tags) def __iter__(self): return (col for col in self._row) def __len__(self): return len(self._row) def __repr__(self): return repr(self._row) def __getitem__(self, i): return self._row[i] def __setitem__(self, i, value): self._row[i] = value def __delitem__(self, i): del self._row[i] def __getstate__(self): return self._row, self.tags def __setstate__(self, state): self._row, self.tags = state def rpush(self, value): self.insert(len(self._row), value) def lpush(self, value): self.insert(0, value) def append(self, value): self.rpush(value) def insert(self, index, value): self._row.insert(index, value) def __contains__(self, item): return (item in self._row) @property def tuple(self): """Tuple representation of :class:`Row`.""" return tuple(self._row) @property def list(self): """List representation of :class:`Row`.""" return list(self._row) def has_tag(self, tag): """Returns true if current row contains tag.""" if tag is None: return False elif isinstance(tag, str): return (tag in self.tags) else: return bool(len(set(tag) & set(self.tags))) class Dataset: """The :class:`Dataset` object is the heart of Tablib. It provides all core functionality. Usually you create a :class:`Dataset` instance in your main module, and append rows as you collect data. :: data = tablib.Dataset() data.headers = ('name', 'age') for (name, age) in some_collector(): data.append((name, age)) Setting columns is similar. The column data length must equal the current height of the data and headers must be set. :: data = tablib.Dataset() data.headers = ('first_name', 'last_name') data.append(('John', 'Adams')) data.append(('George', 'Washington')) data.append_col((90, 67), header='age') You can also set rows and headers upon instantiation. This is useful if dealing with dozens or hundreds of :class:`Dataset` objects. :: headers = ('first_name', 'last_name') data = [('John', 'Adams'), ('George', 'Washington')] data = tablib.Dataset(*data, headers=headers) :param \\*args: (optional) list of rows to populate Dataset :param headers: (optional) list strings for Dataset header row :param title: (optional) string to use as title of the Dataset .. admonition:: Format Attributes Definition If you look at the code, the various output/import formats are not defined within the :class:`Dataset` object. To add support for a new format, see :ref:`Adding New Formats `. """ def __init__(self, *args, **kwargs): self._data = list(Row(arg) for arg in args) self.__headers = None # ('title', index) tuples self._separators = [] # (column, callback) tuples self._formatters = [] self.headers = kwargs.get('headers') self.title = kwargs.get('title') def __len__(self): return self.height def __getitem__(self, key): if isinstance(key, str): if key in self.headers: pos = self.headers.index(key) # get 'key' index from each data return [row[pos] for row in self._data] else: raise KeyError else: _results = self._data[key] if isinstance(_results, Row): return _results.tuple else: return [result.tuple for result in _results] def __setitem__(self, key, value): self._validate(value) self._data[key] = Row(value) def __delitem__(self, key): if isinstance(key, str): if key in self.headers: pos = self.headers.index(key) del self.headers[pos] for i, row in enumerate(self._data): del row[pos] self._data[i] = row else: raise KeyError else: del self._data[key] def __repr__(self): try: return '<%s dataset>' % (self.title.lower()) except AttributeError: return '' def __str__(self): result = [] # Add str representation of headers. if self.__headers: result.append([str(h) for h in self.__headers]) # Add str representation of rows. result.extend(list(map(str, row)) for row in self._data) lens = [list(map(len, row)) for row in result] field_lens = list(map(max, zip(*lens))) # delimiter between header and data if self.__headers: result.insert(1, ['-' * length for length in field_lens]) format_string = '|'.join('{%s:%s}' % item for item in enumerate(field_lens)) return '\n'.join(format_string.format(*row) for row in result) # --------- # Internals # --------- def _get_in_format(self, fmt_key, **kwargs): return registry.get_format(fmt_key).export_set(self, **kwargs) def _set_in_format(self, fmt_key, in_stream, **kwargs): in_stream = normalize_input(in_stream) return registry.get_format(fmt_key).import_set(self, in_stream, **kwargs) def _validate(self, row=None, col=None, safety=False): """Assures size of every row in dataset is of proper proportions.""" if row: is_valid = (len(row) == self.width) if self.width else True elif col: if len(col) < 1: is_valid = True else: is_valid = (len(col) == self.height) if self.height else True else: is_valid = all(len(x) == self.width for x in self._data) if is_valid: return True else: if not safety: raise InvalidDimensions return False def _package(self, dicts=True, ordered=True): """Packages Dataset into lists of dictionaries for transmission.""" # TODO: Dicts default to false? _data = list(self._data) if ordered: dict_pack = OrderedDict else: dict_pack = dict # Execute formatters if self._formatters: for row_i, row in enumerate(_data): for col, callback in self._formatters: try: if col is None: for j, c in enumerate(row): _data[row_i][j] = callback(c) else: _data[row_i][col] = callback(row[col]) except IndexError: raise InvalidDatasetIndex if self.headers: if dicts: data = [dict_pack(list(zip(self.headers, data_row))) for data_row in _data] else: data = [list(self.headers)] + list(_data) else: data = [list(row) for row in _data] return data def _get_headers(self): """An *optional* list of strings to be used for header rows and attribute names. This must be set manually. The given list length must equal :attr:`Dataset.width`. """ return self.__headers def _set_headers(self, collection): """Validating headers setter.""" self._validate(collection) if collection: try: self.__headers = list(collection) except TypeError: raise TypeError else: self.__headers = None headers = property(_get_headers, _set_headers) def _get_dict(self): """A native Python representation of the :class:`Dataset` object. If headers have been set, a list of Python dictionaries will be returned. If no headers have been set, a list of tuples (rows) will be returned instead. A dataset object can also be imported by setting the `Dataset.dict` attribute: :: data = tablib.Dataset() data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}] """ return self._package() def _set_dict(self, pickle): """A native Python representation of the Dataset object. If headers have been set, a list of Python dictionaries will be returned. If no headers have been set, a list of tuples (rows) will be returned instead. A dataset object can also be imported by setting the :attr:`Dataset.dict` attribute. :: data = tablib.Dataset() data.dict = [{'age': 90, 'first_name': 'Kenneth', 'last_name': 'Reitz'}] """ error_details = ( "Please check format documentation " "https://tablib.readthedocs.io/en/stable/formats.html#yaml" ) if not pickle: return if not isinstance(pickle, list): # sometimes pickle is a dict and len(pickle) returns True. # since we access index 0 we should check if the type is list raise UnsupportedFormat(error_details) # if list of rows if isinstance(pickle[0], list): self.wipe() for row in pickle: self.append(Row(row)) # if list of objects elif isinstance(pickle[0], dict): self.wipe() self.headers = list(pickle[0].keys()) for row in pickle: self.append(Row(list(row.values()))) else: raise UnsupportedFormat(error_details) dict = property(_get_dict, _set_dict) def _clean_col(self, col): """Prepares the given column for insert/append.""" col = list(col) if self.headers: header = [col.pop(0)] else: header = [] if len(col) == 1 and hasattr(col[0], '__call__'): col = list(map(col[0], self._data)) col = tuple(header + col) return col @property def height(self): """The number of rows currently in the :class:`Dataset`. Cannot be directly modified. """ return len(self._data) @property def width(self): """The number of columns currently in the :class:`Dataset`. Cannot be directly modified. """ try: return len(self._data[0]) except IndexError: try: return len(self.headers) except TypeError: return 0 def load(self, in_stream, format=None, **kwargs): """ Import `in_stream` to the :class:`Dataset` object using the `format`. `in_stream` can be a file-like object, a string, or a bytestring. :param \\*\\*kwargs: (optional) custom configuration to the format `import_set`. """ stream = normalize_input(in_stream) if not format: format = detect_format(stream) fmt = registry.get_format(format) if not hasattr(fmt, 'import_set'): raise UnsupportedFormat(f'Format {format} cannot be imported.') if not import_set: raise UnsupportedFormat(f'Format {format} cannot be imported.') fmt.import_set(self, stream, **kwargs) return self def export(self, format, **kwargs): """ Export :class:`Dataset` object to `format`. :param \\*\\*kwargs: (optional) custom configuration to the format `export_set`. """ fmt = registry.get_format(format) if not hasattr(fmt, 'export_set'): raise UnsupportedFormat(f'Format {format} cannot be exported.') return fmt.export_set(self, **kwargs) # ---- # Rows # ---- def insert(self, index, row, tags=()): """Inserts a row to the :class:`Dataset` at the given index. Rows inserted must be the correct size (height or width). The default behaviour is to insert the given row to the :class:`Dataset` object at the given index. """ self._validate(row) self._data.insert(index, Row(row, tags=tags)) def rpush(self, row, tags=()): """Adds a row to the end of the :class:`Dataset`. See :method:`Dataset.insert` for additional documentation. """ self.insert(self.height, row=row, tags=tags) def lpush(self, row, tags=()): """Adds a row to the top of the :class:`Dataset`. See :method:`Dataset.insert` for additional documentation. """ self.insert(0, row=row, tags=tags) def append(self, row, tags=()): """Adds a row to the :class:`Dataset`. See :method:`Dataset.insert` for additional documentation. """ self.rpush(row, tags) def extend(self, rows, tags=()): """Adds a list of rows to the :class:`Dataset` using :method:`Dataset.append` """ for row in rows: self.append(row, tags) def lpop(self): """Removes and returns the first row of the :class:`Dataset`.""" cache = self[0] del self[0] return cache def rpop(self): """Removes and returns the last row of the :class:`Dataset`.""" cache = self[-1] del self[-1] return cache def pop(self): """Removes and returns the last row of the :class:`Dataset`.""" return self.rpop() # ------- # Columns # ------- def insert_col(self, index, col=None, header=None): """Inserts a column to the :class:`Dataset` at the given index. Columns inserted must be the correct height. You can also insert a column of a single callable object, which will add a new column with the return values of the callable each as an item in the column. :: data.append_col(col=random.randint) If inserting a column, and :attr:`Dataset.headers` is set, the header attribute must be set, and will be considered the header for that row. See :ref:`dyncols` for an in-depth example. .. versionchanged:: 0.9.0 If inserting a column, and :attr:`Dataset.headers` is set, the header attribute must be set, and will be considered the header for that row. .. versionadded:: 0.9.0 If inserting a row, you can add :ref:`tags ` to the row you are inserting. This gives you the ability to :method:`filter ` your :class:`Dataset` later. """ if col is None: col = [] # Callable Columns... if hasattr(col, '__call__'): col = list(map(col, self._data)) col = self._clean_col(col) self._validate(col=col) if self.headers: # pop the first item off, add to headers if not header: raise HeadersNeeded() # corner case - if header is set without data elif header and self.height == 0 and len(col): raise InvalidDimensions self.headers.insert(index, header) if self.height and self.width: for i, row in enumerate(self._data): row.insert(index, col[i]) self._data[i] = row else: self._data = [Row([row]) for row in col] def rpush_col(self, col, header=None): """Adds a column to the end of the :class:`Dataset`. See :method:`Dataset.insert` for additional documentation. """ self.insert_col(self.width, col, header=header) def lpush_col(self, col, header=None): """Adds a column to the top of the :class:`Dataset`. See :method:`Dataset.insert` for additional documentation. """ self.insert_col(0, col, header=header) def insert_separator(self, index, text='-'): """Adds a separator to :class:`Dataset` at given index.""" sep = (index, text) self._separators.append(sep) def append_separator(self, text='-'): """Adds a :ref:`separator ` to the :class:`Dataset`.""" # change offsets if headers are or aren't defined if not self.headers: index = self.height if self.height else 0 else: index = (self.height + 1) if self.height else 1 self.insert_separator(index, text) def append_col(self, col, header=None): """Adds a column to the :class:`Dataset`. See :method:`Dataset.insert_col` for additional documentation. """ self.rpush_col(col, header) def get_col(self, index): """Returns the column from the :class:`Dataset` at the given index.""" return [row[index] for row in self._data] # ---- # Misc # ---- def add_formatter(self, col, handler): """Adds a formatter to the :class:`Dataset`. .. versionadded:: 0.9.5 :param col: column to. Accepts index int or header str. :param handler: reference to callback function to execute against each cell value. """ if isinstance(col, str): if col in self.headers: col = self.headers.index(col) # get 'key' index from each data else: raise KeyError if not col > self.width: self._formatters.append((col, handler)) else: raise InvalidDatasetIndex return True def filter(self, tag): """Returns a new instance of the :class:`Dataset`, excluding any rows that do not contain the given :ref:`tags `. """ _dset = copy(self) _dset._data = [row for row in _dset._data if row.has_tag(tag)] return _dset def sort(self, col, reverse=False): """Sort a :class:`Dataset` by a specific column, given string (for header) or integer (for column index). The order can be reversed by setting ``reverse`` to ``True``. Returns a new :class:`Dataset` instance where columns have been sorted. """ if isinstance(col, str): if not self.headers: raise HeadersNeeded _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse) _dset = Dataset(headers=self.headers, title=self.title) for item in _sorted: row = [item[key] for key in self.headers] _dset.append(row=row) else: if self.headers: col = self.headers[col] _sorted = sorted(self.dict, key=itemgetter(col), reverse=reverse) _dset = Dataset(headers=self.headers, title=self.title) for item in _sorted: if self.headers: row = [item[key] for key in self.headers] else: row = item _dset.append(row=row) return _dset def transpose(self): """Transpose a :class:`Dataset`, turning rows into columns and vice versa, returning a new ``Dataset`` instance. The first row of the original instance becomes the new header row.""" # Don't transpose if there is no data if not self: return _dset = Dataset() # The first element of the headers stays in the headers, # it is our "hinge" on which we rotate the data new_headers = [self.headers[0]] + self[self.headers[0]] _dset.headers = new_headers for index, column in enumerate(self.headers): if column == self.headers[0]: # It's in the headers, so skip it continue # Adding the column name as now they're a regular column # Use `get_col(index)` in case there are repeated values row_data = [column] + self.get_col(index) row_data = Row(row_data) _dset.append(row=row_data) return _dset def stack(self, other): """Stack two :class:`Dataset` instances together by joining at the row level, and return new combined ``Dataset`` instance.""" if not isinstance(other, Dataset): return if self.width != other.width: raise InvalidDimensions # Copy the source data _dset = copy(self) rows_to_stack = [row for row in _dset._data] other_rows = [row for row in other._data] rows_to_stack.extend(other_rows) _dset._data = rows_to_stack return _dset def stack_cols(self, other): """Stack two :class:`Dataset` instances together by joining at the column level, and return a new combined ``Dataset`` instance. If either ``Dataset`` has headers set, than the other must as well.""" if not isinstance(other, Dataset): return if self.headers or other.headers: if not self.headers or not other.headers: raise HeadersNeeded if self.height != other.height: raise InvalidDimensions try: new_headers = self.headers + other.headers except TypeError: new_headers = None _dset = Dataset() for column in self.headers: _dset.append_col(col=self[column]) for column in other.headers: _dset.append_col(col=other[column]) _dset.headers = new_headers return _dset def remove_duplicates(self): """Removes all duplicate rows from the :class:`Dataset` object while maintaining the original order.""" seen = set() self._data[:] = [ row for row in self._data if not (tuple(row) in seen or seen.add(tuple(row))) ] def wipe(self): """Removes all content and headers from the :class:`Dataset` object.""" self._data = list() self.__headers = None def subset(self, rows=None, cols=None): """Returns a new instance of the :class:`Dataset`, including only specified rows and columns. """ # Don't return if no data if not self: return if rows is None: rows = list(range(self.height)) if cols is None: cols = list(self.headers) # filter out impossible rows and columns rows = [row for row in rows if row in range(self.height)] cols = [header for header in cols if header in self.headers] _dset = Dataset() # filtering rows and columns _dset.headers = list(cols) _dset._data = [] for row_no, row in enumerate(self._data): data_row = [] for key in _dset.headers: if key in self.headers: pos = self.headers.index(key) data_row.append(row[pos]) else: raise KeyError if row_no in rows: _dset.append(row=Row(data_row)) return _dset class Databook: """A book of :class:`Dataset` objects. """ def __init__(self, sets=None): self._datasets = sets or [] def __repr__(self): try: return '<%s databook>' % (self.title.lower()) except AttributeError: return '' def wipe(self): """Removes all :class:`Dataset` objects from the :class:`Databook`.""" self._datasets = [] def sheets(self): return self._datasets def add_sheet(self, dataset): """Adds given :class:`Dataset` to the :class:`Databook`.""" if isinstance(dataset, Dataset): self._datasets.append(dataset) else: raise InvalidDatasetType def _package(self, ordered=True): """Packages :class:`Databook` for delivery.""" collector = [] if ordered: dict_pack = OrderedDict else: dict_pack = dict for dset in self._datasets: collector.append(dict_pack( title=dset.title, data=dset._package(ordered=ordered) )) return collector @property def size(self): """The number of the :class:`Dataset` objects within :class:`Databook`.""" return len(self._datasets) def load(self, in_stream, format, **kwargs): """ Import `in_stream` to the :class:`Databook` object using the `format`. `in_stream` can be a file-like object, a string, or a bytestring. :param \\*\\*kwargs: (optional) custom configuration to the format `import_book`. """ stream = normalize_input(in_stream) if not format: format = detect_format(stream) fmt = registry.get_format(format) if not hasattr(fmt, 'import_book'): raise UnsupportedFormat(f'Format {format} cannot be loaded.') fmt.import_book(self, stream, **kwargs) return self def export(self, format, **kwargs): """ Export :class:`Databook` object to `format`. :param \\*\\*kwargs: (optional) custom configuration to the format `export_book`. """ fmt = registry.get_format(format) if not hasattr(fmt, 'export_book'): raise UnsupportedFormat(f'Format {format} cannot be exported.') return fmt.export_book(self, **kwargs) def detect_format(stream): """Return format name of given stream (file-like object, string, or bytestring).""" stream = normalize_input(stream) fmt_title = None for fmt in registry.formats(): try: if fmt.detect(stream): fmt_title = fmt.title break except AttributeError: pass finally: if hasattr(stream, 'seek'): stream.seek(0) return fmt_title def import_set(stream, format=None, **kwargs): """Return dataset of given stream (file-like object, string, or bytestring).""" return Dataset().load(normalize_input(stream), format, **kwargs) def import_book(stream, format=None, **kwargs): """Return dataset of given stream (file-like object, string, or bytestring).""" return Databook().load(normalize_input(stream), format, **kwargs) registry.register_builtins() tablib-3.5.0/src/tablib/exceptions.py000066400000000000000000000011731444137657500175550ustar00rootroot00000000000000class TablibException(Exception): """Tablib common exception.""" class InvalidDatasetType(TablibException, TypeError): """Only Datasets can be added to a Databook.""" class InvalidDimensions(TablibException, ValueError): """The size of the column or row doesn't fit the table dimensions.""" class InvalidDatasetIndex(TablibException, IndexError): """Outside of Dataset size.""" class HeadersNeeded(TablibException, AttributeError): """Header parameter must be given when appending a column to this Dataset.""" class UnsupportedFormat(TablibException, NotImplementedError): """Format not supported.""" tablib-3.5.0/src/tablib/formats/000077500000000000000000000000001444137657500164735ustar00rootroot00000000000000tablib-3.5.0/src/tablib/formats/__init__.py000066400000000000000000000123001444137657500206000ustar00rootroot00000000000000""" Tablib - formats """ from collections import OrderedDict from functools import partialmethod from importlib import import_module from importlib.util import find_spec from tablib.exceptions import UnsupportedFormat from tablib.utils import normalize_input from ._csv import CSVFormat from ._json import JSONFormat from ._tsv import TSVFormat uninstalled_format_messages = { "cli": {"package_name": "tabulate package", "extras_name": "cli"}, "df": {"package_name": "pandas package", "extras_name": "pandas"}, "html": {"package_name": "MarkupPy package", "extras_name": "html"}, "ods": {"package_name": "odfpy package", "extras_name": "ods"}, "xls": {"package_name": "xlrd and xlwt packages", "extras_name": "xls"}, "xlsx": {"package_name": "openpyxl package", "extras_name": "xlsx"}, "yaml": {"package_name": "pyyaml package", "extras_name": "yaml"}, } def load_format_class(dotted_path): try: module_path, class_name = dotted_path.rsplit('.', 1) return getattr(import_module(module_path), class_name) except (ValueError, AttributeError) as err: raise ImportError(f"Unable to load format class '{dotted_path}' ({err})") class FormatDescriptorBase: def __init__(self, key, format_or_path): self.key = key self._format_path = None if isinstance(format_or_path, str): self._format = None self._format_path = format_or_path else: self._format = format_or_path def ensure_format_loaded(self): if self._format is None: self._format = load_format_class(self._format_path) class ImportExportBookDescriptor(FormatDescriptorBase): def __get__(self, obj, cls, **kwargs): self.ensure_format_loaded() return self._format.export_book(obj, **kwargs) def __set__(self, obj, val): self.ensure_format_loaded() return self._format.import_book(obj, normalize_input(val)) class ImportExportSetDescriptor(FormatDescriptorBase): def __get__(self, obj, cls, **kwargs): self.ensure_format_loaded() return self._format.export_set(obj, **kwargs) def __set__(self, obj, val): self.ensure_format_loaded() return self._format.import_set(obj, normalize_input(val)) class Registry: _formats = OrderedDict() def register(self, key, format_or_path): from tablib.core import Databook, Dataset # Create Databook. read or read/write properties setattr(Databook, key, ImportExportBookDescriptor(key, format_or_path)) # Create Dataset. read or read/write properties, # and Dataset.get_/set_ methods. setattr(Dataset, key, ImportExportSetDescriptor(key, format_or_path)) try: setattr(Dataset, 'get_%s' % key, partialmethod(Dataset._get_in_format, key)) setattr(Dataset, 'set_%s' % key, partialmethod(Dataset._set_in_format, key)) except AttributeError: setattr(Dataset, 'get_%s' % key, partialmethod(Dataset._get_in_format, key)) self._formats[key] = format_or_path def register_builtins(self): # Registration ordering matters for autodetection. self.register('json', JSONFormat()) # xlsx before as xls (xlrd) can also read xlsx if find_spec('openpyxl'): self.register('xlsx', 'tablib.formats._xlsx.XLSXFormat') if find_spec('xlrd') and find_spec('xlwt'): self.register('xls', 'tablib.formats._xls.XLSFormat') if find_spec('yaml'): self.register('yaml', 'tablib.formats._yaml.YAMLFormat') self.register('csv', CSVFormat()) self.register('tsv', TSVFormat()) if find_spec('odf'): self.register('ods', 'tablib.formats._ods.ODSFormat') self.register('dbf', 'tablib.formats._dbf.DBFFormat') if find_spec('MarkupPy'): self.register('html', 'tablib.formats._html.HTMLFormat') self.register('jira', 'tablib.formats._jira.JIRAFormat') self.register('latex', 'tablib.formats._latex.LATEXFormat') if find_spec('pandas'): self.register('df', 'tablib.formats._df.DataFrameFormat') self.register('rst', 'tablib.formats._rst.ReSTFormat') if find_spec('tabulate'): self.register('cli', 'tablib.formats._cli.CLIFormat') def formats(self): for key, frm in self._formats.items(): if isinstance(frm, str): self._formats[key] = load_format_class(frm) yield self._formats[key] def get_format(self, key): if key not in self._formats: if key in uninstalled_format_messages: raise UnsupportedFormat( "The '{key}' format is not available. You may want to install the " "{package_name} (or `pip install \"tablib[{extras_name}]\"`).".format( **uninstalled_format_messages[key], key=key ) ) raise UnsupportedFormat("Tablib has no format '%s' or it is not registered." % key) if isinstance(self._formats[key], str): self._formats[key] = load_format_class(self._formats[key]) return self._formats[key] registry = Registry() tablib-3.5.0/src/tablib/formats/_cli.py000066400000000000000000000011431444137657500177520ustar00rootroot00000000000000"""Tablib - Command-line Interface table export support. Generates a representation for CLI from the dataset. Wrapper for tabulate library. """ from tabulate import tabulate as Tabulate class CLIFormat: """ Class responsible to export to CLI Format """ title = 'cli' DEFAULT_FMT = 'plain' @classmethod def export_set(cls, dataset, **kwargs): """Returns CLI representation of a Dataset.""" if dataset.headers: kwargs.setdefault('headers', dataset.headers) kwargs.setdefault('tablefmt', cls.DEFAULT_FMT) return Tabulate(dataset, **kwargs) tablib-3.5.0/src/tablib/formats/_csv.py000066400000000000000000000032251444137657500200010ustar00rootroot00000000000000""" Tablib - *SV Support. """ import csv from io import StringIO class CSVFormat: title = 'csv' extensions = ('csv',) DEFAULT_DELIMITER = ',' @classmethod def export_stream_set(cls, dataset, **kwargs): """Returns CSV representation of Dataset as file-like.""" stream = StringIO() kwargs.setdefault('delimiter', cls.DEFAULT_DELIMITER) _csv = csv.writer(stream, **kwargs) for row in dataset._package(dicts=False): _csv.writerow(row) stream.seek(0) return stream @classmethod def export_set(cls, dataset, **kwargs): """Returns CSV representation of Dataset.""" stream = cls.export_stream_set(dataset, **kwargs) return stream.getvalue() @classmethod def import_set(cls, dset, in_stream, headers=True, skip_lines=0, **kwargs): """Returns dataset from CSV stream.""" dset.wipe() kwargs.setdefault('delimiter', cls.DEFAULT_DELIMITER) rows = csv.reader(in_stream, **kwargs) for i, row in enumerate(rows): if i < skip_lines: continue if i == skip_lines and headers: dset.headers = row elif row: if i > 0 and len(row) < dset.width: row += [''] * (dset.width - len(row)) dset.append(row) @classmethod def detect(cls, stream, delimiter=None): """Returns True if given stream is valid CSV.""" try: csv.Sniffer().sniff(stream.read(2048), delimiters=delimiter or cls.DEFAULT_DELIMITER) return True except Exception: return False tablib-3.5.0/src/tablib/formats/_dbf.py000066400000000000000000000036301444137657500177410ustar00rootroot00000000000000""" Tablib - DBF Support. """ import io import os import tempfile from tablib.packages.dbfpy import dbf, dbfnew from tablib.packages.dbfpy import record as dbfrecord class DBFFormat: title = 'dbf' extensions = ('csv',) DEFAULT_ENCODING = 'utf-8' @classmethod def export_set(cls, dataset): """Returns DBF representation of a Dataset""" new_dbf = dbfnew.dbf_new() temp_file, temp_uri = tempfile.mkstemp() # create the appropriate fields based on the contents of the first row first_row = dataset[0] for fieldname, field_value in zip(dataset.headers, first_row): if type(field_value) in [int, float]: new_dbf.add_field(fieldname, 'N', 10, 8) else: new_dbf.add_field(fieldname, 'C', 80) new_dbf.write(temp_uri) dbf_file = dbf.Dbf(temp_uri, readOnly=0) for row in dataset: record = dbfrecord.DbfRecord(dbf_file) for fieldname, field_value in zip(dataset.headers, row): record[fieldname] = field_value record.store() dbf_file.close() dbf_stream = open(temp_uri, 'rb') stream = io.BytesIO(dbf_stream.read()) dbf_stream.close() os.close(temp_file) os.remove(temp_uri) return stream.getvalue() @classmethod def import_set(cls, dset, in_stream, headers=True): """Returns a dataset from a DBF stream.""" dset.wipe() _dbf = dbf.Dbf(in_stream) dset.headers = _dbf.fieldNames for record in range(_dbf.recordCount): row = [_dbf[record][f] for f in _dbf.fieldNames] dset.append(row) @classmethod def detect(cls, stream): """Returns True if the given stream is valid DBF""" try: dbf.Dbf(stream, readOnly=True) return True except Exception: return False tablib-3.5.0/src/tablib/formats/_df.py000066400000000000000000000021441444137657500175760ustar00rootroot00000000000000""" Tablib - DataFrame Support. """ try: from pandas import DataFrame except ImportError: DataFrame = None class DataFrameFormat: title = 'df' extensions = ('df',) @classmethod def detect(cls, stream): """Returns True if given stream is a DataFrame.""" if DataFrame is None: return False elif isinstance(stream, DataFrame): return True try: DataFrame(stream.read()) return True except ValueError: return False @classmethod def export_set(cls, dset, index=None): """Returns DataFrame representation of DataBook.""" if DataFrame is None: raise NotImplementedError( 'DataFrame Format requires `pandas` to be installed.' ' Try `pip install "tablib[pandas]"`.') dataframe = DataFrame(dset.dict, columns=dset.headers) return dataframe @classmethod def import_set(cls, dset, in_stream): """Returns dataset from DataFrame.""" dset.wipe() dset.dict = in_stream.to_dict(orient='records') tablib-3.5.0/src/tablib/formats/_html.py000066400000000000000000000031661444137657500201560ustar00rootroot00000000000000""" Tablib - HTML export support. """ import codecs from io import BytesIO from MarkupPy import markup class HTMLFormat: BOOK_ENDINGS = 'h3' title = 'html' extensions = ('html', ) @classmethod def export_set(cls, dataset): """HTML representation of a Dataset.""" stream = BytesIO() page = markup.page() page.table.open() if dataset.headers is not None: new_header = [item if item is not None else '' for item in dataset.headers] page.thead.open() headers = markup.oneliner.th(new_header) page.tr(headers) page.thead.close() page.tbody.open() for row in dataset: new_row = [item if item is not None else '' for item in row] html_row = markup.oneliner.td(new_row) page.tr(html_row) page.tbody.close() page.table.close() # Allow unicode characters in output wrapper = codecs.getwriter("utf8")(stream) wrapper.writelines(str(page)) return stream.getvalue().decode('utf-8') @classmethod def export_book(cls, databook): """HTML representation of a Databook.""" stream = BytesIO() # Allow unicode characters in output wrapper = codecs.getwriter("utf8")(stream) for i, dset in enumerate(databook._datasets): title = (dset.title if dset.title else 'Set %s' % (i)) wrapper.write(f'<{cls.BOOK_ENDINGS}>{title}\n') wrapper.write(dset.html) wrapper.write('\n') return stream.getvalue().decode('utf-8') tablib-3.5.0/src/tablib/formats/_jira.py000066400000000000000000000020771444137657500201370ustar00rootroot00000000000000"""Tablib - Jira table export support. Generates a Jira table from the dataset. """ class JIRAFormat: title = 'jira' @classmethod def export_set(cls, dataset): """Formats the dataset according to the Jira table syntax: ||heading 1||heading 2||heading 3|| |col A1|col A2|col A3| |col B1|col B2|col B3| :param dataset: dataset to serialize :type dataset: tablib.core.Dataset """ header = cls._get_header(dataset.headers) if dataset.headers else '' body = cls._get_body(dataset) return f'{header}\n{body}' if header else body @classmethod def _get_body(cls, dataset): return '\n'.join([cls._serialize_row(row) for row in dataset]) @classmethod def _get_header(cls, headers): return cls._serialize_row(headers, delimiter='||') @classmethod def _serialize_row(cls, row, delimiter='|'): return '{}{}{}'.format( delimiter, delimiter.join([str(item) if item else ' ' for item in row]), delimiter ) tablib-3.5.0/src/tablib/formats/_json.py000066400000000000000000000030301444137657500201510ustar00rootroot00000000000000""" Tablib - JSON Support """ import decimal import json from uuid import UUID import tablib def serialize_objects_handler(obj): if isinstance(obj, (decimal.Decimal, UUID)): return str(obj) elif hasattr(obj, 'isoformat'): return obj.isoformat() else: return obj class JSONFormat: title = 'json' extensions = ('json', 'jsn') @classmethod def export_set(cls, dataset): """Returns JSON representation of Dataset.""" return json.dumps( dataset.dict, default=serialize_objects_handler, ensure_ascii=False ) @classmethod def export_book(cls, databook): """Returns JSON representation of Databook.""" return json.dumps( databook._package(), default=serialize_objects_handler, ensure_ascii=False ) @classmethod def import_set(cls, dset, in_stream): """Returns dataset from JSON stream.""" dset.wipe() dset.dict = json.load(in_stream) @classmethod def import_book(cls, dbook, in_stream): """Returns databook from JSON stream.""" dbook.wipe() for sheet in json.load(in_stream): data = tablib.Dataset() data.title = sheet['title'] data.dict = sheet['data'] dbook.add_sheet(data) @classmethod def detect(cls, stream): """Returns True if given stream is valid JSON.""" try: json.load(stream) return True except (TypeError, ValueError): return False tablib-3.5.0/src/tablib/formats/_latex.py000066400000000000000000000100761444137657500203250ustar00rootroot00000000000000"""Tablib - LaTeX table export support. Generates a LaTeX booktabs-style table from the dataset. """ import re class LATEXFormat: title = 'latex' extensions = ('tex',) TABLE_TEMPLATE = """\ %% Note: add \\usepackage{booktabs} to your preamble %% \\begin{table}[!htbp] \\centering %(CAPTION)s \\begin{tabular}{%(COLSPEC)s} \\toprule %(HEADER)s %(MIDRULE)s %(BODY)s \\bottomrule \\end{tabular} \\end{table} """ TEX_RESERVED_SYMBOLS_MAP = dict([ ('\\', '\\textbackslash{}'), ('{', '\\{'), ('}', '\\}'), ('$', '\\$'), ('&', '\\&'), ('#', '\\#'), ('^', '\\textasciicircum{}'), ('_', '\\_'), ('~', '\\textasciitilde{}'), ('%', '\\%'), ]) TEX_RESERVED_SYMBOLS_RE = re.compile( '(%s)' % '|'.join(map(re.escape, TEX_RESERVED_SYMBOLS_MAP.keys()))) @classmethod def export_set(cls, dataset): """Returns LaTeX representation of dataset :param dataset: dataset to serialize :type dataset: tablib.core.Dataset """ caption = '\\caption{%s}' % dataset.title if dataset.title else '%' colspec = cls._colspec(dataset.width) header = cls._serialize_row(dataset.headers) if dataset.headers else '' midrule = cls._midrule(dataset.width) body = '\n'.join([cls._serialize_row(row) for row in dataset]) return cls.TABLE_TEMPLATE % dict(CAPTION=caption, COLSPEC=colspec, HEADER=header, MIDRULE=midrule, BODY=body) @classmethod def _colspec(cls, dataset_width): """Generates the column specification for the LaTeX `tabular` environment based on the dataset width. The first column is justified to the left, all further columns are aligned to the right. .. note:: This is only a heuristic and most probably has to be fine-tuned post export. Column alignment should depend on the data type, e.g., textual content should usually be aligned to the left while numeric content almost always should be aligned to the right. :param dataset_width: width of the dataset """ spec = 'l' for _ in range(1, dataset_width): spec += 'r' return spec @classmethod def _midrule(cls, dataset_width): """Generates the table `midrule`, which may be composed of several `cmidrules`. :param dataset_width: width of the dataset to serialize """ if not dataset_width or dataset_width == 1: return '\\midrule' return ' '.join([cls._cmidrule(colindex, dataset_width) for colindex in range(1, dataset_width + 1)]) @classmethod def _cmidrule(cls, colindex, dataset_width): """Generates the `cmidrule` for a single column with appropriate trimming based on the column position. :param colindex: Column index :param dataset_width: width of the dataset """ rule = '\\cmidrule(%s){%d-%d}' if colindex == 1: # Rule of first column is trimmed on the right return rule % ('r', colindex, colindex) if colindex == dataset_width: # Rule of last column is trimmed on the left return rule % ('l', colindex, colindex) # Inner columns are trimmed on the left and right return rule % ('lr', colindex, colindex) @classmethod def _serialize_row(cls, row): """Returns string representation of a single row. :param row: single dataset row """ new_row = [cls._escape_tex_reserved_symbols(str(item)) if item else '' for item in row] return 6 * ' ' + ' & '.join(new_row) + ' \\\\' @classmethod def _escape_tex_reserved_symbols(cls, input): """Escapes all TeX reserved symbols ('_', '~', etc.) in a string. :param input: String to escape """ def replace(match): return cls.TEX_RESERVED_SYMBOLS_MAP[match.group()] return cls.TEX_RESERVED_SYMBOLS_RE.sub(replace, input) tablib-3.5.0/src/tablib/formats/_ods.py000066400000000000000000000047461444137657500200040ustar00rootroot00000000000000""" Tablib - ODF Support. """ import numbers from io import BytesIO from odf import opendocument, style, table, text bold = style.Style(name="bold", family="paragraph") bold.addElement(style.TextProperties( fontweight="bold", fontweightasian="bold", fontweightcomplex="bold", )) class ODSFormat: title = 'ods' extensions = ('ods',) @classmethod def export_set(cls, dataset): """Returns ODF representation of Dataset.""" wb = opendocument.OpenDocumentSpreadsheet() wb.automaticstyles.addElement(bold) ws = table.Table(name=dataset.title if dataset.title else 'Tablib Dataset') wb.spreadsheet.addElement(ws) cls.dset_sheet(dataset, ws) stream = BytesIO() wb.save(stream) return stream.getvalue() @classmethod def export_book(cls, databook): """Returns ODF representation of DataBook.""" wb = opendocument.OpenDocumentSpreadsheet() wb.automaticstyles.addElement(bold) for i, dset in enumerate(databook._datasets): ws = table.Table(name=dset.title if dset.title else 'Sheet%s' % (i)) wb.spreadsheet.addElement(ws) cls.dset_sheet(dset, ws) stream = BytesIO() wb.save(stream) return stream.getvalue() @classmethod def dset_sheet(cls, dataset, ws): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1],)) for row_number, row in enumerate(_package, start=1): is_header = row_number == 1 and dataset.headers style = bold if is_header else None odf_row = table.TableRow(stylename=style) ws.addElement(odf_row) for j, col in enumerate(row): if isinstance(col, numbers.Number): cell = table.TableCell(valuetype="float", value=col) else: cell = table.TableCell(valuetype="string") cell.addElement(text.P(text=str(col), stylename=style)) odf_row.addElement(cell) @classmethod def detect(cls, stream): if isinstance(stream, bytes): # load expects a file-like object. stream = BytesIO(stream) try: opendocument.load(stream) return True except Exception: return False tablib-3.5.0/src/tablib/formats/_rst.py000066400000000000000000000220121444137657500200110ustar00rootroot00000000000000""" Tablib - reStructuredText Support """ from itertools import zip_longest from statistics import median from textwrap import TextWrapper JUSTIFY_LEFT = 'left' JUSTIFY_CENTER = 'center' JUSTIFY_RIGHT = 'right' JUSTIFY_VALUES = (JUSTIFY_LEFT, JUSTIFY_CENTER, JUSTIFY_RIGHT) def to_str(value): if isinstance(value, bytes): return value.decode('utf-8') return str(value) def _max_word_len(text): """ Return the length of the longest word in `text`. >>> _max_word_len('Python Module for Tabular Datasets') 8 """ return max((len(word) for word in text.split()), default=0) if text else 0 class ReSTFormat: title = 'rst' extensions = ('rst',) MAX_TABLE_WIDTH = 80 # Roughly. It may be wider to avoid breaking words. @classmethod def _get_column_string_lengths(cls, dataset): """ Returns a list of string lengths of each column, and a list of maximum word lengths. """ if dataset.headers: column_lengths = [[len(h)] for h in dataset.headers] word_lens = [_max_word_len(h) for h in dataset.headers] else: column_lengths = [[] for _ in range(dataset.width)] word_lens = [0 for _ in range(dataset.width)] for row in dataset.dict: values = iter(row.values() if hasattr(row, 'values') else row) for i, val in enumerate(values): text = to_str(val) column_lengths[i].append(len(text)) word_lens[i] = max(word_lens[i], _max_word_len(text)) return column_lengths, word_lens @classmethod def _row_to_lines(cls, values, widths, wrapper, sep='|', justify=JUSTIFY_LEFT): """ Returns a table row of wrapped values as a list of lines """ if justify not in JUSTIFY_VALUES: raise ValueError('Value of "justify" must be one of "{}"'.format( '", "'.join(JUSTIFY_VALUES) )) def just(text_, width_): if justify == JUSTIFY_LEFT: return text_.ljust(width_) elif justify == JUSTIFY_CENTER: return text_.center(width_) else: return text_.rjust(width_) lpad = sep + ' ' if sep else '' rpad = ' ' + sep if sep else '' pad = ' ' + sep + ' ' cells = [] for value, width in zip(values, widths): wrapper.width = width text = to_str(value) cell = wrapper.wrap(text) cells.append(cell) lines = zip_longest(*cells, fillvalue='') lines = ( (just(cell_line, widths[i]) for i, cell_line in enumerate(line)) for line in lines ) lines = [''.join((lpad, pad.join(line), rpad)) for line in lines] return lines @classmethod def _get_column_widths(cls, dataset, max_table_width=MAX_TABLE_WIDTH, pad_len=3): """ Returns a list of column widths proportional to the median length of the text in their cells. """ str_lens, word_lens = cls._get_column_string_lengths(dataset) median_lens = [int(median(lens)) for lens in str_lens] total = sum(median_lens) if total > max_table_width - (pad_len * len(median_lens)): column_widths = (max_table_width * lens // total for lens in median_lens) else: column_widths = (lens for lens in median_lens) # Allow for separator and padding: column_widths = (w - pad_len if w > pad_len else w for w in column_widths) # Rather widen table than break words: column_widths = [max(w, l) for w, l in zip(column_widths, word_lens)] return column_widths @classmethod def export_set_as_simple_table(cls, dataset, column_widths=None): """ Returns reStructuredText grid table representation of dataset. """ lines = [] wrapper = TextWrapper() if column_widths is None: column_widths = cls._get_column_widths(dataset, pad_len=2) border = ' '.join(['=' * w for w in column_widths]) lines.append(border) if dataset.headers: lines.extend(cls._row_to_lines( dataset.headers, column_widths, wrapper, sep='', justify=JUSTIFY_CENTER, )) lines.append(border) for row in dataset.dict: values = iter(row.values() if hasattr(row, 'values') else row) lines.extend(cls._row_to_lines(values, column_widths, wrapper, '')) lines.append(border) return '\n'.join(lines) @classmethod def export_set_as_grid_table(cls, dataset, column_widths=None): """ Returns reStructuredText grid table representation of dataset. >>> from tablib import Dataset >>> from tablib.formats import registry >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1)) >>> data = Dataset() >>> data.headers = ['A', 'B', 'A and B'] >>> for a, b in bits: ... data.append([bool(a), bool(b), bool(a * b)]) >>> rst = registry.get_format('rst') >>> print(rst.export_set(data, force_grid=True)) +-------+-------+-------+ | A | B | A and | | | | B | +=======+=======+=======+ | False | False | False | +-------+-------+-------+ | True | False | False | +-------+-------+-------+ | False | True | False | +-------+-------+-------+ | True | True | True | +-------+-------+-------+ """ lines = [] wrapper = TextWrapper() if column_widths is None: column_widths = cls._get_column_widths(dataset) header_sep = '+=' + '=+='.join(['=' * w for w in column_widths]) + '=+' row_sep = '+-' + '-+-'.join(['-' * w for w in column_widths]) + '-+' lines.append(row_sep) if dataset.headers: lines.extend(cls._row_to_lines( dataset.headers, column_widths, wrapper, justify=JUSTIFY_CENTER, )) lines.append(header_sep) for row in dataset.dict: values = iter(row.values() if hasattr(row, 'values') else row) lines.extend(cls._row_to_lines(values, column_widths, wrapper)) lines.append(row_sep) return '\n'.join(lines) @classmethod def _use_simple_table(cls, head0, col0, width0): """ Use a simple table if the text in the first column is never wrapped >>> from tablib.formats import registry >>> rst = registry.get_format('rst') >>> rst._use_simple_table('menu', ['egg', 'bacon'], 10) True >>> rst._use_simple_table(None, ['lobster thermidor', 'spam'], 10) False """ if head0 is not None: head0 = to_str(head0) if len(head0) > width0: return False for cell in col0: cell = to_str(cell) if len(cell) > width0: return False return True @classmethod def export_set(cls, dataset, **kwargs): """ Returns reStructuredText table representation of dataset. Returns a simple table if the text in the first column is never wrapped, otherwise returns a grid table. >>> from tablib import Dataset >>> bits = ((0, 0), (1, 0), (0, 1), (1, 1)) >>> data = Dataset() >>> data.headers = ['A', 'B', 'A and B'] >>> for a, b in bits: ... data.append([bool(a), bool(b), bool(a * b)]) >>> table = data.rst >>> table.split('\\n') == [ ... '===== ===== =====', ... ' A B A and', ... ' B ', ... '===== ===== =====', ... 'False False False', ... 'True False False', ... 'False True False', ... 'True True True ', ... '===== ===== =====', ... ] True """ if not dataset.dict: return '' force_grid = kwargs.get('force_grid', False) max_table_width = kwargs.get('max_table_width', cls.MAX_TABLE_WIDTH) column_widths = cls._get_column_widths(dataset, max_table_width) use_simple_table = cls._use_simple_table( dataset.headers[0] if dataset.headers else None, dataset.get_col(0), column_widths[0], ) if use_simple_table and not force_grid: return cls.export_set_as_simple_table(dataset, column_widths) else: return cls.export_set_as_grid_table(dataset, column_widths) @classmethod def export_book(cls, databook): """ reStructuredText representation of a Databook. Tables are separated by a blank line. All tables use the grid format. """ return '\n\n'.join(cls.export_set(dataset, force_grid=True) for dataset in databook._datasets) tablib-3.5.0/src/tablib/formats/_tsv.py000066400000000000000000000002721444137657500200210ustar00rootroot00000000000000""" Tablib - TSV (Tab Separated Values) Support. """ from ._csv import CSVFormat class TSVFormat(CSVFormat): title = 'tsv' extensions = ('tsv',) DEFAULT_DELIMITER = '\t' tablib-3.5.0/src/tablib/formats/_xls.py000066400000000000000000000101641444137657500200140ustar00rootroot00000000000000""" Tablib - XLS Support. """ from io import BytesIO import xlrd import xlwt from xlrd.xldate import xldate_as_datetime import tablib # special styles wrap = xlwt.easyxf("alignment: wrap on") bold = xlwt.easyxf("font: bold on") class XLSFormat: title = 'xls' extensions = ('xls',) @classmethod def detect(cls, stream): """Returns True if given stream is a readable excel file.""" try: xlrd.open_workbook(file_contents=stream) return True except Exception: pass try: xlrd.open_workbook(file_contents=stream.read()) return True except Exception: pass try: xlrd.open_workbook(filename=stream) return True except Exception: return False @classmethod def export_set(cls, dataset): """Returns XLS representation of Dataset.""" wb = xlwt.Workbook(encoding='utf8') ws = wb.add_sheet(dataset.title if dataset.title else 'Tablib Dataset') cls.dset_sheet(dataset, ws) stream = BytesIO() wb.save(stream) return stream.getvalue() @classmethod def export_book(cls, databook): """Returns XLS representation of DataBook.""" wb = xlwt.Workbook(encoding='utf8') for i, dset in enumerate(databook._datasets): ws = wb.add_sheet(dset.title if dset.title else 'Sheet%s' % (i)) cls.dset_sheet(dset, ws) stream = BytesIO() wb.save(stream) return stream.getvalue() @classmethod def import_set(cls, dset, in_stream, headers=True, skip_lines=0): """Returns databook from XLS stream.""" dset.wipe() xls_book = xlrd.open_workbook(file_contents=in_stream.read()) sheet = xls_book.sheet_by_index(0) dset.title = sheet.name def cell_value(value, type_): if type_ == xlrd.XL_CELL_ERROR: return xlrd.error_text_from_code[value] elif type_ == xlrd.XL_CELL_DATE: return xldate_as_datetime(value, xls_book.datemode) return value for i in range(sheet.nrows): if i < skip_lines: continue if i == skip_lines and headers: dset.headers = sheet.row_values(i) else: dset.append([ cell_value(val, typ) for val, typ in zip(sheet.row_values(i), sheet.row_types(i)) ]) @classmethod def import_book(cls, dbook, in_stream, headers=True): """Returns databook from XLS stream.""" dbook.wipe() xls_book = xlrd.open_workbook(file_contents=in_stream) for sheet in xls_book.sheets(): data = tablib.Dataset() data.title = sheet.name for i in range(sheet.nrows): if i == 0 and headers: data.headers = sheet.row_values(0) else: data.append(sheet.row_values(i)) dbook.add_sheet(data) @classmethod def dset_sheet(cls, dataset, ws): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1],)) for i, row in enumerate(_package): for j, col in enumerate(row): # bold headers if (i == 0) and dataset.headers: ws.write(i, j, col, bold) # frozen header row ws.panes_frozen = True ws.horz_split_pos = 1 # bold separators elif len(row) < dataset.width: ws.write(i, j, col, bold) # wrap the rest else: try: if '\n' in col: ws.write(i, j, col, wrap) else: ws.write(i, j, col) except TypeError: ws.write(i, j, col) tablib-3.5.0/src/tablib/formats/_xlsx.py000066400000000000000000000125341444137657500202070ustar00rootroot00000000000000""" Tablib - XLSX Support. """ import re from io import BytesIO from openpyxl.reader.excel import ExcelReader, load_workbook from openpyxl.styles import Alignment, Font from openpyxl.utils import get_column_letter from openpyxl.workbook import Workbook import tablib INVALID_TITLE_REGEX = re.compile(r'[\\*?:/\[\]]') def safe_xlsx_sheet_title(s, replace="-"): return re.sub(INVALID_TITLE_REGEX, replace, s)[:31] class XLSXFormat: title = 'xlsx' extensions = ('xlsx',) @classmethod def detect(cls, stream): """Returns True if given stream is a readable excel file.""" try: # No need to fully load the file, it should be enough to be able to # read the manifest. reader = ExcelReader(stream, read_only=False) reader.read_manifest() return True except Exception: return False @classmethod def export_set(cls, dataset, freeze_panes=True, invalid_char_subst="-", escape=False): """Returns XLSX representation of Dataset. If ``freeze_panes`` is True, Export will freeze panes only after first line. If ``dataset.title`` contains characters which are considered invalid for an XLSX file sheet name (http://www.excelcodex.com/2012/06/worksheets-naming-conventions/), they will be replaced with ``invalid_char_subst``. If ``escape`` is True, formulae will have the leading '=' character removed. This is a security measure to prevent formulae from executing by default in exported XLSX files. """ wb = Workbook() ws = wb.worksheets[0] ws.title = ( safe_xlsx_sheet_title(dataset.title, invalid_char_subst) if dataset.title else 'Tablib Dataset' ) cls.dset_sheet(dataset, ws, freeze_panes=freeze_panes, escape=escape) stream = BytesIO() wb.save(stream) return stream.getvalue() @classmethod def export_book(cls, databook, freeze_panes=True, invalid_char_subst="-", escape=False): """Returns XLSX representation of DataBook. See export_set(). """ wb = Workbook() for sheet in wb.worksheets: wb.remove(sheet) for i, dset in enumerate(databook._datasets): ws = wb.create_sheet() ws.title = ( safe_xlsx_sheet_title(dset.title, invalid_char_subst) if dset.title else 'Sheet%s' % (i) ) cls.dset_sheet(dset, ws, freeze_panes=freeze_panes, escape=escape) stream = BytesIO() wb.save(stream) return stream.getvalue() @classmethod def import_sheet(cls, dset, sheet, headers=True, skip_lines=0): """Populates dataset with sheet.""" dset.title = sheet.title for i, row in enumerate(sheet.rows): if i < skip_lines: continue row_vals = [c.value for c in row] if i == skip_lines and headers: dset.headers = row_vals else: if i > skip_lines and len(row_vals) < dset.width: row_vals += [''] * (dset.width - len(row_vals)) dset.append(row_vals) @classmethod def import_set(cls, dset, in_stream, headers=True, read_only=True, skip_lines=0): """Returns databook from XLS stream.""" dset.wipe() xls_book = load_workbook(in_stream, read_only=read_only, data_only=True) sheet = xls_book.active cls.import_sheet(dset, sheet, headers, skip_lines) @classmethod def import_book(cls, dbook, in_stream, headers=True, read_only=True): """Returns databook from XLS stream.""" dbook.wipe() xls_book = load_workbook(in_stream, read_only=read_only, data_only=True) for sheet in xls_book.worksheets: dset = tablib.Dataset() cls.import_sheet(dset, sheet, headers) dbook.add_sheet(dset) @classmethod def dset_sheet(cls, dataset, ws, freeze_panes=True, escape=False): """Completes given worksheet from given Dataset.""" _package = dataset._package(dicts=False) for i, sep in enumerate(dataset._separators): _offset = i _package.insert((sep[0] + _offset), (sep[1],)) bold = Font(bold=True) wrap_text = Alignment(wrap_text=True) for i, row in enumerate(_package): row_number = i + 1 for j, col in enumerate(row): col_idx = get_column_letter(j + 1) cell = ws[f'{col_idx}{row_number}'] # bold headers if (row_number == 1) and dataset.headers: cell.font = bold if freeze_panes: # Export Freeze only after first Line ws.freeze_panes = 'A2' # bold separators elif len(row) < dataset.width: cell.font = bold # wrap the rest else: if '\n' in str(col): cell.alignment = wrap_text try: cell.value = col except ValueError: cell.value = str(col) if escape and cell.data_type == 'f' and cell.value.startswith('='): cell.value = cell.value.replace("=", "") tablib-3.5.0/src/tablib/formats/_yaml.py000066400000000000000000000030021444137657500201410ustar00rootroot00000000000000""" Tablib - YAML Support. """ import yaml import tablib class YAMLFormat: title = 'yaml' extensions = ('yaml', 'yml') @classmethod def export_set(cls, dataset): """Returns YAML representation of Dataset.""" return yaml.safe_dump( dataset._package(ordered=False), default_flow_style=None, allow_unicode=True ) @classmethod def export_book(cls, databook): """Returns YAML representation of Databook.""" return yaml.safe_dump( databook._package(ordered=False), default_flow_style=None, allow_unicode=True ) @classmethod def import_set(cls, dset, in_stream): """Returns dataset from YAML stream.""" dset.wipe() dset.dict = yaml.safe_load(in_stream) @classmethod def import_book(cls, dbook, in_stream): """Returns databook from YAML stream.""" dbook.wipe() for sheet in yaml.safe_load(in_stream): data = tablib.Dataset() data.title = sheet['title'] data.dict = sheet['data'] dbook.add_sheet(data) @classmethod def detect(cls, stream): """Returns True if given stream is valid YAML.""" try: _yaml = yaml.safe_load(stream) if isinstance(_yaml, (list, tuple, dict)): return True else: return False except (yaml.parser.ParserError, yaml.reader.ReaderError, yaml.scanner.ScannerError): return False tablib-3.5.0/src/tablib/packages/000077500000000000000000000000001444137657500165765ustar00rootroot00000000000000tablib-3.5.0/src/tablib/packages/__init__.py000066400000000000000000000000001444137657500206750ustar00rootroot00000000000000tablib-3.5.0/src/tablib/packages/dbfpy/000077500000000000000000000000001444137657500177025ustar00rootroot00000000000000tablib-3.5.0/src/tablib/packages/dbfpy/__init__.py000066400000000000000000000000001444137657500220010ustar00rootroot00000000000000tablib-3.5.0/src/tablib/packages/dbfpy/dbf.py000066400000000000000000000220061444137657500210070ustar00rootroot00000000000000#! /usr/bin/env python from . import header, record from .utils import INVALID_VALUE __version__ = "$Revision: 1.7 $"[11:-2] __date__ = "$Date: 2007/02/11 09:23:13 $"[7:-2] __author__ = "Jeff Kunce " __all__ = ["Dbf"] """DBF accessing helpers. FIXME: more documentation needed Examples: Create new table, setup structure, add records: dbf = Dbf(filename, new=True) dbf.addField( ("NAME", "C", 15), ("SURNAME", "C", 25), ("INITIALS", "C", 10), ("BIRTHDATE", "D"), ) for (n, s, i, b) in ( ("John", "Miller", "YC", (1980, 10, 11)), ("Andy", "Larkin", "", (1980, 4, 11)), ): rec = dbf.newRecord() rec["NAME"] = n rec["SURNAME"] = s rec["INITIALS"] = i rec["BIRTHDATE"] = b rec.store() dbf.close() Open existed dbf, read some data: dbf = Dbf(filename, True) for rec in dbf: for fldName in dbf.fieldNames: print('%s:\t %s (%s)' % (fldName, rec[fldName], type(rec[fldName]))) print() dbf.close() """ """History (most recent first): 11-feb-2007 [als] export INVALID_VALUE; Dbf: added .ignoreErrors, .INVALID_VALUE 04-jul-2006 [als] added export declaration 20-dec-2005 [yc] removed fromStream and newDbf methods: use argument of __init__ call must be used instead; added class fields pointing to the header and record classes. 17-dec-2005 [yc] split to several modules; reimplemented 13-dec-2005 [yc] adapted to the changes of the `strutil` module. 13-sep-2002 [als] support FoxPro Timestamp datatype 15-nov-1999 [jjk] documentation updates, add demo 24-aug-1998 [jjk] add some encodeValue methods (not tested), other tweaks 08-jun-1998 [jjk] fix problems, add more features 20-feb-1998 [jjk] fix problems, add more features 19-feb-1998 [jjk] add create/write capabilities 18-feb-1998 [jjk] from dbfload.py """ class Dbf: """DBF accessor. FIXME: docs and examples needed (dont' forget to tell about problems adding new fields on the fly) Implementation notes: ``_new`` field is used to indicate whether this is a new data table. `addField` could be used only for the new tables! If at least one record was appended to the table it's structure couldn't be changed. """ __slots__ = ("name", "header", "stream", "_changed", "_new", "_ignore_errors") HeaderClass = header.DbfHeader RecordClass = record.DbfRecord INVALID_VALUE = INVALID_VALUE # initialization and creation helpers def __init__(self, f, readOnly=False, new=False, ignoreErrors=False): """Initialize instance. Arguments: f: Filename or file-like object. new: True if new data table must be created. Assume data table exists if this argument is False. readOnly: if ``f`` argument is a string file will be opend in read-only mode; in other cases this argument is ignored. This argument is ignored even if ``new`` argument is True. headerObj: `header.DbfHeader` instance or None. If this argument is None, new empty header will be used with the all fields set by default. ignoreErrors: if set, failing field value conversion will return ``INVALID_VALUE`` instead of raising conversion error. """ if isinstance(f, str): # a filename self.name = f if new: # new table (table file must be # created or opened and truncated) self.stream = open(f, "w+b") else: # table file must exist self.stream = open(f, ("r+b", "rb")[bool(readOnly)]) else: # a stream self.name = getattr(f, "name", "") self.stream = f if new: # if this is a new table, header will be empty self.header = self.HeaderClass() else: # or instantiated using stream self.header = self.HeaderClass.fromStream(self.stream) self.ignoreErrors = ignoreErrors self._new = bool(new) self._changed = False # properties closed = property(lambda self: self.stream.closed) recordCount = property(lambda self: self.header.recordCount) fieldNames = property( lambda self: [_fld.name for _fld in self.header.fields]) fieldDefs = property(lambda self: self.header.fields) changed = property(lambda self: self._changed or self.header.changed) def ignoreErrors(self, value): """Update `ignoreErrors` flag on the header object and self""" self.header.ignoreErrors = self._ignore_errors = bool(value) ignoreErrors = property( lambda self: self._ignore_errors, ignoreErrors, doc="""Error processing mode for DBF field value conversion if set, failing field value conversion will return ``INVALID_VALUE`` instead of raising conversion error. """) # protected methods def _fixIndex(self, index): """Return fixed index. This method fails if index isn't a numeric object (long or int). Or index isn't in a valid range (less or equal to the number of records in the db). If ``index`` is a negative number, it will be treated as a negative indexes for list objects. Return: Return value is numeric object maning valid index. """ if not isinstance(index, int): raise TypeError("Index must be a numeric object") if index < 0: # index from the right side # fix it to the left-side index index += len(self) + 1 if index >= len(self): raise IndexError("Record index out of range") return index # interface methods def close(self): self.flush() self.stream.close() def flush(self): """Flush data to the associated stream.""" if self.changed: self.header.setCurrentDate() self.header.write(self.stream) self.stream.flush() self._changed = False def indexOfFieldName(self, name): """Index of field named ``name``.""" # FIXME: move this to header class names = [f.name for f in self.header.fields] return names.index(name.upper()) def newRecord(self): """Return new record, which belong to this table.""" return self.RecordClass(self) def append(self, record): """Append ``record`` to the database.""" record.index = self.header.recordCount record._write() self.header.recordCount += 1 self._changed = True self._new = False def addField(self, *defs): """Add field definitions. For more information see `header.DbfHeader.addField`. """ if self._new: self.header.addField(*defs) else: raise TypeError("At least one record was added, " "structure can't be changed") # 'magic' methods (representation and sequence interface) def __repr__(self): return "Dbf stream '%s'\n" % self.stream + repr(self.header) def __len__(self): """Return number of records.""" return self.recordCount def __getitem__(self, index): """Return `DbfRecord` instance.""" return self.RecordClass.fromStream(self, self._fixIndex(index)) def __setitem__(self, index, record): """Write `DbfRecord` instance to the stream.""" record.index = self._fixIndex(index) record._write() self._changed = True self._new = False # def __del__(self): # """Flush stream upon deletion of the object.""" # self.flush() def demo_read(filename): _dbf = Dbf(filename, True) for _rec in _dbf: print() print(repr(_rec)) _dbf.close() def demo_create(filename): _dbf = Dbf(filename, new=True) _dbf.addField( ("NAME", "C", 15), ("SURNAME", "C", 25), ("INITIALS", "C", 10), ("BIRTHDATE", "D"), ) for (_n, _s, _i, _b) in ( ("John", "Miller", "YC", (1981, 1, 2)), ("Andy", "Larkin", "AL", (1982, 3, 4)), ("Bill", "Clinth", "", (1983, 5, 6)), ("Bobb", "McNail", "", (1984, 7, 8)), ): _rec = _dbf.newRecord() _rec["NAME"] = _n _rec["SURNAME"] = _s _rec["INITIALS"] = _i _rec["BIRTHDATE"] = _b _rec.store() print(repr(_dbf)) _dbf.close() if __name__ == '__main__': import sys _name = len(sys.argv) > 1 and sys.argv[1] or "county.dbf" demo_create(_name) demo_read(_name) tablib-3.5.0/src/tablib/packages/dbfpy/dbfnew.py000066400000000000000000000123551444137657500215270ustar00rootroot00000000000000#!/usr/bin/python __version__ = "$Revision: 1.4 $"[11:-2] __date__ = "$Date: 2006/07/04 08:18:18 $"[7:-2] __all__ = ["dbf_new"] from .dbf import Dbf from .fields import ( DbfCharacterFieldDef, DbfDateFieldDef, DbfDateTimeFieldDef, DbfLogicalFieldDef, DbfNumericFieldDef, ) from .header import DbfHeader from .record import DbfRecord """.DBF creation helpers. Note: this is a legacy interface. New code should use Dbf class for table creation (see examples in dbf.py) TODO: - handle Memo fields. - check length of the fields according to the `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` """ """History (most recent first) 04-jul-2006 [als] added export declaration; updated for dbfpy 2.0 15-dec-2005 [yc] define dbf_new.__slots__ 14-dec-2005 [yc] added vim modeline; retab'd; added doc-strings; dbf_new now is a new class (inherited from object) ??-jun-2000 [--] added by Hans Fiby """ class _FieldDefinition: """Field definition. This is a simple structure, which contains ``name``, ``type``, ``len``, ``dec`` and ``cls`` fields. Objects also implement get/setitem magic functions, so fields could be accessed via sequence interface, where 'name' has index 0, 'type' index 1, 'len' index 2, 'dec' index 3 and 'cls' could be located at index 4. """ __slots__ = "name", "type", "len", "dec", "cls" # WARNING: be attentive - dictionaries are mutable! FLD_TYPES = { # type: (cls, len) "C": (DbfCharacterFieldDef, None), "N": (DbfNumericFieldDef, None), "L": (DbfLogicalFieldDef, 1), # FIXME: support memos # "M": (DbfMemoFieldDef), "D": (DbfDateFieldDef, 8), # FIXME: I'm not sure length should be 14 characters! # but temporary I use it, cuz date is 8 characters # and time 6 (hhmmss) "T": (DbfDateTimeFieldDef, 14), } def __init__(self, name, type, len=None, dec=0): _cls, _len = self.FLD_TYPES[type] if _len is None: if len is None: raise ValueError("Field length must be defined") _len = len self.name = name self.type = type self.len = _len self.dec = dec self.cls = _cls def getDbfField(self): "Return `DbfFieldDef` instance from the current definition." return self.cls(self.name, self.len, self.dec) def appendToHeader(self, dbfh): """Create a `DbfFieldDef` instance and append it to the dbf header. Arguments: dbfh: `DbfHeader` instance. """ _dbff = self.getDbfField() dbfh.addField(_dbff) class dbf_new: """New .DBF creation helper. Example Usage: dbfn = dbf_new() dbfn.add_field("name",'C',80) dbfn.add_field("price",'N',10,2) dbfn.add_field("date",'D',8) dbfn.write("tst.dbf") Note: This module cannot handle Memo-fields, they are special. """ __slots__ = ("fields",) FieldDefinitionClass = _FieldDefinition def __init__(self): self.fields = [] def add_field(self, name, typ, len, dec=0): """Add field definition. Arguments: name: field name (str object). field name must not contain ASCII NULs and it's length shouldn't exceed 10 characters. typ: type of the field. this must be a single character from the "CNLMDT" set meaning character, numeric, logical, memo, date and date/time respectively. len: length of the field. this argument is used only for the character and numeric fields. all other fields have fixed length. FIXME: use None as a default for this argument? dec: decimal precision. used only for the numric fields. """ self.fields.append(self.FieldDefinitionClass(name, typ, len, dec)) def write(self, filename): """Create empty .DBF file using current structure.""" _dbfh = DbfHeader() _dbfh.setCurrentDate() for _fldDef in self.fields: _fldDef.appendToHeader(_dbfh) _dbfStream = open(filename, "wb") _dbfh.write(_dbfStream) _dbfStream.close() if __name__ == '__main__': # create a new DBF-File dbfn = dbf_new() dbfn.add_field("name", 'C', 80) dbfn.add_field("price", 'N', 10, 2) dbfn.add_field("date", 'D', 8) dbfn.write("tst.dbf") # test new dbf print("*** created tst.dbf: ***") dbft = Dbf('tst.dbf', readOnly=0) print(repr(dbft)) # add a record rec = DbfRecord(dbft) rec['name'] = 'something' rec['price'] = 10.5 rec['date'] = (2000, 1, 12) rec.store() # add another record rec = DbfRecord(dbft) rec['name'] = 'foo and bar' rec['price'] = 12234 rec['date'] = (1992, 7, 15) rec.store() # show the records print("*** inserted 2 records into tst.dbf: ***") print(repr(dbft)) for i1 in range(len(dbft)): rec = dbft[i1] for fldName in dbft.fieldNames: print(f'{fldName}:\t {rec[fldName]}') print() dbft.close() tablib-3.5.0/src/tablib/packages/dbfpy/fields.py000066400000000000000000000343071444137657500215310ustar00rootroot00000000000000import datetime import struct from functools import total_ordering from . import utils __version__ = "$Revision: 1.14 $"[11:-2] __date__ = "$Date: 2009/05/26 05:16:51 $"[7:-2] __all__ = ["lookupFor"] # field classes added at the end of the module """DBF fields definitions. TODO: - make memos work """ """History (most recent first): 26-may-2009 [als] DbfNumericFieldDef.decodeValue: strip zero bytes 05-feb-2009 [als] DbfDateFieldDef.encodeValue: empty arg produces empty date 16-sep-2008 [als] DbfNumericFieldDef decoding looks for decimal point in the value to select float or integer return type 13-mar-2008 [als] check field name length in constructor 11-feb-2007 [als] handle value conversion errors 10-feb-2007 [als] DbfFieldDef: added .rawFromRecord() 01-dec-2006 [als] Timestamp columns use None for empty values 31-oct-2006 [als] support field types 'F' (float), 'I' (integer) and 'Y' (currency); automate export and registration of field classes 04-jul-2006 [als] added export declaration 10-mar-2006 [als] decode empty values for Date and Logical fields; show field name in errors 10-mar-2006 [als] fix Numeric value decoding: according to spec, value always is string representation of the number; ensure that encoded Numeric value fits into the field 20-dec-2005 [yc] use field names in upper case 15-dec-2005 [yc] field definitions moved from `dbf`. """ # abstract definitions @total_ordering class DbfFieldDef: """Abstract field definition. Child classes must override ``type`` class attribute to provide datatype information of the field definition. For more info about types visit `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` Also child classes must override ``defaultValue`` field to provide default value for the field value. If child class has fixed length ``length`` class attribute must be overridden and set to the valid value. None value means, that field isn't of fixed length. Note: ``name`` field must not be changed after instantiation. """ __slots__ = ("name", "decimalCount", "start", "end", "ignoreErrors") # length of the field, None in case of variable-length field, # or a number if this field is a fixed-length field length = None # field type. for more information about fields types visit # `http://www.clicketyclick.dk/databases/xbase/format/data_types.html` # must be overridden in child classes typeCode = None # default value for the field. this field must be # overridden in child classes defaultValue = None def __init__(self, name, length=None, decimalCount=None, start=None, stop=None, ignoreErrors=False): """Initialize instance.""" assert self.typeCode is not None, "Type code must be overridden" assert self.defaultValue is not None, "Default value must be overridden" # fix arguments if len(name) > 10: raise ValueError("Field name \"%s\" is too long" % name) name = str(name).upper() if self.__class__.length is None: if length is None: raise ValueError("[%s] Length isn't specified" % name) length = int(length) if length <= 0: raise ValueError("[%s] Length must be a positive integer" % name) else: length = self.length if decimalCount is None: decimalCount = 0 # set fields self.name = name # FIXME: validate length according to the specification at # http://www.clicketyclick.dk/databases/xbase/format/data_types.html self.length = length self.decimalCount = decimalCount self.ignoreErrors = ignoreErrors self.start = start self.end = stop def __eq__(self, other): return repr(self) == repr(other) def __ne__(self, other): return repr(self) != repr(other) def __lt__(self, other): return repr(self) < repr(other) def __hash__(self): return hash(self.name) def fromString(cls, string, start, ignoreErrors=False): """Decode dbf field definition from the string data. Arguments: string: a string, dbf definition is decoded from. length of the string must be 32 bytes. start: position in the database file. ignoreErrors: initial error processing mode for the new field (boolean) """ assert len(string) == 32 _length = string[16] return cls(utils.unzfill(string)[:11].decode('utf-8'), _length, string[17], start, start + _length, ignoreErrors=ignoreErrors) fromString = classmethod(fromString) def toString(self): """Return encoded field definition. Return: Return value is a string object containing encoded definition of this field. """ _name = self.name.ljust(11, '\0') return ( _name + self.typeCode + # data address chr(0) * 4 + chr(self.length) + chr(self.decimalCount) + chr(0) * 14 ) def __repr__(self): return "%-10s %1s %3d %3d" % self.fieldInfo() def fieldInfo(self): """Return field information. Return: Return value is a (name, type, length, decimals) tuple. """ return (self.name, self.typeCode, self.length, self.decimalCount) def rawFromRecord(self, record): """Return a "raw" field value from the record string.""" return record[self.start:self.end] def decodeFromRecord(self, record): """Return decoded field value from the record string.""" try: return self.decodeValue(self.rawFromRecord(record)) except Exception: if self.ignoreErrors: return utils.INVALID_VALUE else: raise def decodeValue(self, value): """Return decoded value from string value. This method shouldn't be used publicly. It's called from the `decodeFromRecord` method. This is an abstract method and it must be overridden in child classes. """ raise NotImplementedError def encodeValue(self, value): """Return str object containing encoded field value. This is an abstract method and it must be overridden in child classes. """ raise NotImplementedError # real classes class DbfCharacterFieldDef(DbfFieldDef): """Definition of the character field.""" typeCode = "C" defaultValue = b'' def decodeValue(self, value): """Return string object. Return value is a ``value`` argument with stripped right spaces. """ return value.rstrip(b' ').decode('utf-8') def encodeValue(self, value): """Return raw data string encoded from a ``value``.""" return str(value)[:self.length].ljust(self.length) class DbfNumericFieldDef(DbfFieldDef): """Definition of the numeric field.""" typeCode = "N" # XXX: now I'm not sure it was a good idea to make a class field # `defaultValue` instead of a generic method as it was implemented # previously -- it's ok with all types except number, cuz # if self.decimalCount is 0, we should return 0 and 0.0 otherwise. defaultValue = 0 def decodeValue(self, value): """Return a number decoded from ``value``. If decimals is zero, value will be decoded as an integer; or as a float otherwise. Return: Return value is a int (long) or float instance. """ value = value.strip(b' \0') if b'.' in value: # a float (has decimal separator) return float(value) elif value: # must be an integer return int(value) else: return 0 def encodeValue(self, value): """Return string containing encoded ``value``.""" _rv = ("%*.*f" % (self.length, self.decimalCount, value)) if len(_rv) > self.length: _ppos = _rv.find(".") if 0 <= _ppos <= self.length: _rv = _rv[:self.length] else: raise ValueError( f"[{self.name}] Numeric overflow: {_rv} (field width: {self.length})" ) return _rv class DbfFloatFieldDef(DbfNumericFieldDef): """Definition of the float field - same as numeric.""" typeCode = "F" class DbfIntegerFieldDef(DbfFieldDef): """Definition of the integer field.""" typeCode = "I" length = 4 defaultValue = 0 def decodeValue(self, value): """Return an integer number decoded from ``value``.""" return struct.unpack("= 1: _rv = datetime.datetime.fromordinal(_jdn - self.JDN_GDN_DIFF) _rv += datetime.timedelta(0, _msecs / 1000.0) else: # empty date _rv = None return _rv def encodeValue(self, value): """Return a string-encoded ``value``.""" if value: value = utils.getDateTime(value) # LE byteorder _rv = struct.pack("<2I", value.toordinal() + self.JDN_GDN_DIFF, (value.hour * 3600 + value.minute * 60 + value.second) * 1000) else: _rv = "\0" * self.length assert len(_rv) == self.length return _rv _fieldsRegistry = {} def registerField(fieldCls): """Register field definition class. ``fieldCls`` should be subclass of the `DbfFieldDef`. Use `lookupFor` to retrieve field definition class by the type code. """ assert fieldCls.typeCode is not None, "Type code isn't defined" # XXX: use fieldCls.typeCode.upper()? in case of any decign # don't forget to look to the same comment in ``lookupFor`` method _fieldsRegistry[fieldCls.typeCode] = fieldCls def lookupFor(typeCode): """Return field definition class for the given type code. ``typeCode`` must be a single character. That type should be previously registered. Use `registerField` to register new field class. Return: Return value is a subclass of the `DbfFieldDef`. """ # XXX: use typeCode.upper()? in case of any decign don't # forget to look to the same comment in ``registerField`` return _fieldsRegistry[chr(typeCode)] # register generic types for (_name, _val) in list(globals().items()): if isinstance(_val, type) and issubclass(_val, DbfFieldDef) \ and (_name != "DbfFieldDef"): __all__.append(_name) registerField(_val) del _name, _val tablib-3.5.0/src/tablib/packages/dbfpy/header.py000066400000000000000000000222221444137657500215040ustar00rootroot00000000000000import datetime import io import struct import sys from . import fields from .utils import getDate __version__ = "$Revision: 1.6 $"[11:-2] __date__ = "$Date: 2010/09/16 05:06:39 $"[7:-2] __all__ = ["DbfHeader"] """DBF header definition. TODO: - handle encoding of the character fields (encoding information stored in the DBF header) """ """History (most recent first): 16-sep-2010 [als] fromStream: fix century of the last update field 11-feb-2007 [als] added .ignoreErrors 10-feb-2007 [als] added __getitem__: return field definitions by field name or field number (zero-based) 04-jul-2006 [als] added export declaration 15-dec-2005 [yc] created """ class DbfHeader: """Dbf header definition. For more information about dbf header format visit `http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_STRUCT` Examples: Create an empty dbf header and add some field definitions: dbfh = DbfHeader() dbfh.addField(("name", "C", 10)) dbfh.addField(("date", "D")) dbfh.addField(DbfNumericFieldDef("price", 5, 2)) Create a dbf header with field definitions: dbfh = DbfHeader([ ("name", "C", 10), ("date", "D"), DbfNumericFieldDef("price", 5, 2), ]) """ __slots__ = ("signature", "fields", "lastUpdate", "recordLength", "recordCount", "headerLength", "changed", "_ignore_errors") # instance construction and initialization methods def __init__(self, fields=None, headerLength=0, recordLength=0, recordCount=0, signature=0x03, lastUpdate=None, ignoreErrors=False): """Initialize instance. Arguments: fields: a list of field definitions; recordLength: size of the records; headerLength: size of the header; recordCount: number of records stored in DBF; signature: version number (aka signature). using 0x03 as a default meaning "File without DBT". for more information about this field visit ``http://www.clicketyclick.dk/databases/xbase/format/dbf.html#DBF_NOTE_1_TARGET`` lastUpdate: date of the DBF's update. this could be a string ('yymmdd' or 'yyyymmdd'), timestamp (int or float), datetime/date value, a sequence (assuming (yyyy, mm, dd, ...)) or an object having callable ``ticks`` field. ignoreErrors: error processing mode for DBF fields (boolean) """ self.signature = signature if fields is None: self.fields = [] else: self.fields = list(fields) self.lastUpdate = getDate(lastUpdate) self.recordLength = recordLength self.headerLength = headerLength self.recordCount = recordCount self.ignoreErrors = ignoreErrors # XXX: I'm not sure this is safe to # initialize `self.changed` in this way self.changed = bool(self.fields) # @classmethod def fromString(cls, string): """Return header instance from the string object.""" return cls.fromStream(io.StringIO(str(string))) fromString = classmethod(fromString) # @classmethod def fromStream(cls, stream): """Return header object from the stream.""" stream.seek(0) first_32 = stream.read(32) if type(first_32) != bytes: _data = bytes(first_32, sys.getfilesystemencoding()) _data = first_32 (_cnt, _hdrLen, _recLen) = struct.unpack(" DbfRecord._write(); added delete() method. 16-dec-2005 [yc] record definition moved from `dbf`. """ class DbfRecord: """DBF record. Instances of this class shouldn't be created manually, use `dbf.Dbf.newRecord` instead. Class implements mapping/sequence interface, so fields could be accessed via their names or indexes (names is a preferred way to access fields). Hint: Use `store` method to save modified record. Examples: Add new record to the database: db = Dbf(filename) rec = db.newRecord() rec["FIELD1"] = value1 rec["FIELD2"] = value2 rec.store() Or the same, but modify existed (second in this case) record: db = Dbf(filename) rec = db[2] rec["FIELD1"] = value1 rec["FIELD2"] = value2 rec.store() """ __slots__ = "dbf", "index", "deleted", "fieldData" # creation and initialization def __init__(self, dbf, index=None, deleted=False, data=None): """Instance initialization. Arguments: dbf: A `Dbf.Dbf` instance this record belongs to. index: An integer record index or None. If this value is None, record will be appended to the DBF. deleted: Boolean flag indicating whether this record is a deleted record. data: A sequence or None. This is a data of the fields. If this argument is None, default values will be used. """ self.dbf = dbf # XXX: I'm not sure ``index`` is necessary self.index = index self.deleted = deleted if data is None: self.fieldData = [_fd.defaultValue for _fd in dbf.header.fields] else: self.fieldData = list(data) # XXX: validate self.index before calculating position? position = property(lambda self: self.dbf.header.headerLength + self.index * self.dbf.header.recordLength) def rawFromStream(cls, dbf, index): """Return raw record contents read from the stream. Arguments: dbf: A `Dbf.Dbf` instance containing the record. index: Index of the record in the records' container. This argument can't be None in this call. Return value is a string containing record data in DBF format. """ # XXX: may be write smth assuming, that current stream # position is the required one? it could save some # time required to calculate where to seek in the file dbf.stream.seek(dbf.header.headerLength + index * dbf.header.recordLength) return dbf.stream.read(dbf.header.recordLength) rawFromStream = classmethod(rawFromStream) def fromStream(cls, dbf, index): """Return a record read from the stream. Arguments: dbf: A `Dbf.Dbf` instance new record should belong to. index: Index of the record in the records' container. This argument can't be None in this call. Return value is an instance of the current class. """ return cls.fromString(dbf, cls.rawFromStream(dbf, index), index) fromStream = classmethod(fromStream) def fromString(cls, dbf, string, index=None): """Return record read from the string object. Arguments: dbf: A `Dbf.Dbf` instance new record should belong to. string: A string new record should be created from. index: Index of the record in the container. If this argument is None, record will be appended. Return value is an instance of the current class. """ return cls(dbf, index, string[0] == "*", [_fd.decodeFromRecord(string) for _fd in dbf.header.fields]) fromString = classmethod(fromString) # object representation def __repr__(self): _template = "%%%ds: %%s (%%s)" % max(len(_fld) for _fld in self.dbf.fieldNames) _rv = [] for _fld in self.dbf.fieldNames: _val = self[_fld] if _val is utils.INVALID_VALUE: _rv.append(_template % (_fld, "None", "value cannot be decoded")) else: _rv.append(_template % (_fld, _val, type(_val))) return "\n".join(_rv) # protected methods def _write(self): """Write data to the dbf stream. Note: This isn't a public method, it's better to use 'store' instead publicly. Be design ``_write`` method should be called only from the `Dbf` instance. """ self._validateIndex(False) self.dbf.stream.seek(self.position) self.dbf.stream.write(bytes(self.toString(), sys.getfilesystemencoding())) # FIXME: may be move this write somewhere else? # why we should check this condition for each record? if self.index == len(self.dbf): # this is the last record, # we should write SUB (ASCII 26) self.dbf.stream.write(b"\x1A") # utility methods def _validateIndex(self, allowUndefined=True, checkRange=False): """Valid ``self.index`` value. If ``allowUndefined`` argument is True functions does nothing in case of ``self.index`` pointing to None object. """ if self.index is None: if not allowUndefined: raise ValueError("Index is undefined") elif self.index < 0: raise ValueError("Index can't be negative (%s)" % self.index) elif checkRange and self.index <= self.dbf.header.recordCount: raise ValueError("There are only %d records in the DBF" % self.dbf.header.recordCount) # interface methods def store(self): """Store current record in the DBF. If ``self.index`` is None, this record will be appended to the records of the DBF this records belongs to; or replaced otherwise. """ self._validateIndex() if self.index is None: self.index = len(self.dbf) self.dbf.append(self) else: self.dbf[self.index] = self def delete(self): """Mark method as deleted.""" self.deleted = True def toString(self): """Return string packed record values.""" return "".join([" *"[self.deleted]] + [ _def.encodeValue(_dat) for (_def, _dat) in zip(self.dbf.header.fields, self.fieldData) ]) def asList(self): """Return a flat list of fields. Note: Change of the list's values won't change real values stored in this object. """ return self.fieldData[:] def asDict(self): """Return a dictionary of fields. Note: Change of the dicts's values won't change real values stored in this object. """ return dict([_i for _i in zip(self.dbf.fieldNames, self.fieldData)]) def __getitem__(self, key): """Return value by field name or field index.""" if isinstance(key, int): # integer index of the field return self.fieldData[key] # assuming string field name return self.fieldData[self.dbf.indexOfFieldName(key)] def __setitem__(self, key, value): """Set field value by integer index of the field or string name.""" if isinstance(key, int): # integer index of the field return self.fieldData[key] # assuming string field name self.fieldData[self.dbf.indexOfFieldName(key)] = value tablib-3.5.0/src/tablib/packages/dbfpy/utils.py000066400000000000000000000113441444137657500214170ustar00rootroot00000000000000import datetime import time __version__ = "$Revision: 1.4 $"[11:-2] __date__ = "$Date: 2007/02/11 08:57:17 $"[7:-2] """String utilities. TODO: - allow strings in getDateTime routine; """ """History (most recent first): 11-feb-2007 [als] added INVALID_VALUE 10-feb-2007 [als] allow date strings padded with spaces instead of zeroes 20-dec-2005 [yc] handle long objects in getDate/getDateTime 16-dec-2005 [yc] created from ``strutil`` module. """ def unzfill(str): """Return a string without ASCII NULs. This function searchers for the first NUL (ASCII 0) occurrence and truncates string till that position. """ try: return str[:str.index(b'\0')] except ValueError: return str def getDate(date=None): """Return `datetime.date` instance. Type of the ``date`` argument could be one of the following: None: use current date value; datetime.date: this value will be returned; datetime.datetime: the result of the date.date() will be returned; string: assuming "%Y%m%d" or "%y%m%dd" format; number: assuming it's a timestamp (returned for example by the time.time() call; sequence: assuming (year, month, day, ...) sequence; Additionally, if ``date`` has callable ``ticks`` attribute, it will be used and result of the called would be treated as a timestamp value. """ if date is None: # use current value return datetime.date.today() if isinstance(date, datetime.date): return date if isinstance(date, datetime.datetime): return date.date() if isinstance(date, (int, float)): # date is a timestamp return datetime.date.fromtimestamp(date) if isinstance(date, str): date = date.replace(" ", "0") if len(date) == 6: # yymmdd return datetime.date(*time.strptime(date, "%y%m%d")[:3]) # yyyymmdd return datetime.date(*time.strptime(date, "%Y%m%d")[:3]) if hasattr(date, "__getitem__"): # a sequence (assuming date/time tuple) return datetime.date(*date[:3]) return datetime.date.fromtimestamp(date.ticks()) def getDateTime(value=None): """Return `datetime.datetime` instance. Type of the ``value`` argument could be one of the following: None: use current date value; datetime.date: result will be converted to the `datetime.datetime` instance using midnight; datetime.datetime: ``value`` will be returned as is; string: *** CURRENTLY NOT SUPPORTED ***; number: assuming it's a timestamp (returned for example by the time.time() call; sequence: assuming (year, month, day, ...) sequence; Additionally, if ``value`` has callable ``ticks`` attribute, it will be used and result of the called would be treated as a timestamp value. """ if value is None: # use current value return datetime.datetime.today() if isinstance(value, datetime.datetime): return value if isinstance(value, datetime.date): return datetime.datetime.fromordinal(value.toordinal()) if isinstance(value, (int, float)): # value is a timestamp return datetime.datetime.fromtimestamp(value) if isinstance(value, str): raise NotImplementedError("Strings aren't currently implemented") if hasattr(value, "__getitem__"): # a sequence (assuming date/time tuple) return datetime.datetime(*tuple(value)[:6]) return datetime.datetime.fromtimestamp(value.ticks()) class classproperty(property): """Works in the same way as a ``property``, but for the classes.""" def __get__(self, obj, cls): return self.fget(cls) class _InvalidValue: """Value returned from DBF records when field validation fails The value is not equal to anything except for itself and equal to all empty values: None, 0, empty string etc. In other words, invalid value is equal to None and not equal to None at the same time. This value yields zero upon explicit conversion to a number type, empty string for string types, and False for boolean. """ def __eq__(self, other): return not other def __ne__(self, other): return not (other is self) def __bool__(self): return False def __int__(self): return 0 __long__ = __int__ def __float__(self): return 0.0 def __str__(self): return "" def __repr__(self): return "" # invalid value is a constant singleton INVALID_VALUE = _InvalidValue() # vim: set et sts=4 sw=4 : tablib-3.5.0/src/tablib/utils.py000066400000000000000000000005261444137657500165350ustar00rootroot00000000000000from io import BytesIO, StringIO def normalize_input(stream): """ Accept either a str/bytes stream or a file-like object and always return a file-like object. """ if isinstance(stream, str): return StringIO(stream, newline='') elif isinstance(stream, bytes): return BytesIO(stream) return stream tablib-3.5.0/tests/000077500000000000000000000000001444137657500141365ustar00rootroot00000000000000tablib-3.5.0/tests/files/000077500000000000000000000000001444137657500152405ustar00rootroot00000000000000tablib-3.5.0/tests/files/bad_dimensions.xlsx000066400000000000000000000220041444137657500211340ustar00rootroot00000000000000PKQ _rels/.relsJ1}{wDdЛH}a70u}{ZI~7CfGFoZ+{kW#VJ$cʪl n0\QX^:`dd{m]_dhVFw^F9W-(F/3ODSUNl/w{N([qTu͹3+Y8 *ׅP}{H3YR+,Y[9dZIe:xk:[mċV.7Zʘ9^Q"vP<$TO1]O$ hlnA+,GCݷ$g0]©gzE o:r;OOaKλ*; $f!(`!0nN}TD!l9(? քv.jo@"' [X8G*Ij&)2~w4"f?Oi2Fha >ƓAMS\IOC]/[4Cjϝ\;f>%w c PKҧu|uPKQ xl/styles.xmlXn0}WX~_4T]'L՚J=8`I~6IilRؾs`b]R$;r!,)ayo3B,E3 "zHXd #H 3}%DJEJ`J*3s])a0 تK%AWL |I6⩖3,γ'd4'ȭ&iTEAV@'fLz)@񖅡۴+DRPIƆ묤@Bj/-_]ةCy)5` uc#v͠ DA‚zbST4uos6]wEv[كm!z[0CTb؅>GLibA´W!QbLiutSzc]{Wy^~F{ӵLUܐ(Mc2]R$^ pe^5:wNMb^3zmMb&1I$f!})A!9bn{x6~+KmOQlSɶ1_\F߲kri>gs¯漖[Uّe|d8~?:?ςVB`l:3fPl;}wEA9@QL=G?PKaS#PKQ#xl/worksheets/_rels/sheet1.xml.rels 0E~Eօ4Fn~@LiyDѿ7 .\}VN!N (yrdsw\WՉfr$,wl0QddΓ)A*5(P//uOO0LN &y)eДpV>F30źPKZVPKQxl/_rels/workbook.xml.relsMk0 FIc8AQ6ֵ~.[ eГH4C vV͢h%؀I/Z8Y.b;]JNɺQ/IӜS[n+Psc _{0 r3Rb+/2m=?"#ʯR6w _fz o ۭP~$E|PKOz%PKQxl/sharedStrings.xmlj0 :-b;KGk YJY~n6Z~I'ꌅc& e )>qxc&pAֽfQu 2m0`R|咼XNQҨMGLbaj==;љۉ-O>uc9#`8!\Wbkj'PKD8PKQxl/tables/table1.xmlmn0 FG Lӄ"TKYkhĩbwotQ]~߉|6PY c(Cq].áJ"Jk[gCJ7bp/.Ew<zoPKPKQdocProps/core.xmlRN0 XM*JHmfmjHvq&-١*=h#j(\"CܿCTrZ2tf͔)j /V9#iSZƆm&p M+jV}p\Z[C_dhv 8PB>k-\}1Jأҁ#Fa4AtRnT_vU P>=5Bj{΀5y|ZQqGƫhB{'S}kǵ[ \0-u7 K*[x_/;ɘjOYRc<䆎S#ݒ0%Iz1`UְӮۮG%!oPK*pdPKQdocProps/app.xmlj0} #rc +-UZ'*CZ|3lXS &]OUM p+=y ) zrDvF TdK=9"-ITeeg 2&-قCuGaAp T.@GܞP}8l@^0u6.=`;{fn^ƏM7vmq0_ qF 0FoPKЛv|PKQ[Content_Types].xmlUMO0 WT&P8$eۆIdc8(j({&MUFkNj5&#:$T 1y?d:$arcRxonsiwTPɴǫ͙.+jA2I G|ntN&/H 7)f!ZqJ\+qT]"q4g#Y{;@; R@4? kўiSڔ]B%EMgLAtU!:c WI+. }%k_t^2 ]_=ڰ?QG$Wp [0!:<_4G&Gjp7Z$eBvwA_PK4tYPKQxl/worksheets/sheet1.xmlMwG c}c;@fkIi%n/\jS]z ~j9)mUr2W˳/|ǣm[Ų^gv?No,Q`=_d]b{Rou~gQ7Ϳ6)yrөj=9Ţ\IrYy۫j7mzxa>Vլ=ի 2M}VEz]xouڷϛѢZesQ,mEo_*oaw죦\ʃF|weUy=z=^շO:v?m?SS͟V2zsސWwn?TWyOE{-^(-ǟ{v.sȋ0\˶[BM^xm2OYoGrx4%xW/f2oLGn[o~[M7;n7E[xTћrﻏ&w8ny?9v"«j^uajDa\v[WmN]>׻}~Zޔ|}yO]mw~wήm[\g*պs۾N4yz<9_>%CC9GT]jJ澝;]+mq~Էߕ]VZ?|]\{?ͣ7M+~]ax]^apá_C9z:6z6z>pЋá?CCg8`pУ/}1..7K3K`r7^,}t@w@s/w[/y{2< O`;dOd~p,pl8K6%Α Ȇsd9pl8G6#Α Ȇdy{~Z?u~q7|x}v~iGG̓*?V/~y&uPK|E PKQf; _rels/.relsPKQҧu|uxl/workbook.xmlPKQaS# Sxl/styles.xmlPKQZV#dxl/worksheets/_rels/sheet1.xml.relsPKQOz%\xl/_rels/workbook.xml.relsPKQD8vxl/sharedStrings.xmlPKQx xl/tables/table1.xmlPKQ*pd docProps/core.xmlPKQЛv|H docProps/app.xmlPKQ4tYq [Content_Types].xmlPKQ|E &xl/worksheets/sheet1.xmlPK !tablib-3.5.0/tests/files/dates.xls000066400000000000000000000130001444137657500170620ustar00rootroot00000000000000ࡱ;  Root Entry  "  \pCalc Ba==@ 8@"1Arial1Arial1Arial1Arial General DD/MM/YY                + ) , *    `Tablib Dataset))TZR3  @@   birth_date q cc   dMbP?_%%*+&C&P&C&F&333333?'333333?(-؂-؂?)[[?" d,,??U }  %% ~ PH0(  >@A gg  FMicrosoft Excel 97-TabelleBiff8Oh+'0|8 @ L X d p2@,@@@]՜.+,D՜.+,\Root EntryFWorkbookCompObjIOle SummaryInformation(DocumentSummaryInformation8!ttablib-3.5.0/tests/files/errors.xls000066400000000000000000000130001444137657500172760ustar00rootroot00000000000000ࡱ;  Root Entry !$%'  \pCalc Ba==@ 8@"1Arial1Arial1Arial1Arial General                + ) , *    `Feuille1))TZR3  @@  Udiv by 0 name unknownnot available (formula)not available (static) Z cc   dMbP?_%*+# &C&"Times New Roman,Normal"&12&A(%&C&"Times New Roman,Normal"&12Page &P&333333?'333333?(-؂-?)-؂-?" d,,333333?333333?U } }  } b} }      +**A **PH0(  >@gg  FMicrosoft Excel 97-TabelleBiff8Oh+'0|8 @ L X d p3@>6@@(*@՜.+,D՜.+,\Root EntryF WorkbookCompObj IOle "SummaryInformation(#DocumentSummaryInformation8&ttablib-3.5.0/tests/files/founders.xlsx000066400000000000000000000114111444137657500200030ustar00rootroot00000000000000PKQZOxl/_rels/workbook.xml.relsMk0 FIc8AQ6ֵ~.[ eГH4C vV͢h%؀I/Z8Y.b;]JNɺQ/IӜS[n+Psc _{0 r3Rb+/2m=?"#ʯR6w _fz o ۭP~$E|PKOz%PKQZOxl/sharedStrings.xmlMK1 YKE+%diZ΢*CxcqUg*[Pc/O7"O&& $ptFVbךZ)R&n9eђ $U_ õԘ6nAm6wd[!TnE+c$3 _?{J tԎ7 o7ɧfM^4~ fPKJ*PKQZOxl/worksheets/sheet1.xmlVn8+zڵdǎTVum4zE"BqT|IYV`Qq8cg)=S\QLf@-//@") "|d:|Jt 0ԋ0"$z8*ERP$ϣpwb@]CVLD1A ^0M\;\>Ųs"4ù؆/7pt&Q8M( @|^/SrvН@pXa +Nodh5n@ߙ֠ oXnڐl7L0]$r @YNjal ѾNJ MdBXaYߏ|O&#Iqg|{e `M6nlPXz+bUSE@p ݈tYn@?0*6~BaTް=RvmȤG"M6~-Tڶ @SSSy]ҒMfٴ6RCǣ೻!i('C?m:08㟡WczD7yapAag=pCi\C:7^٠Y㼇l|< j =.zt~f]9m QTO;)C߂ u P !bTRA6<ND^>X8G]ɼC`f 'Z9錣Vb o + 2Q+mVkqsEm6Z`r P p/EX2pԑ Õ nZ"5PUPڧ(VkWouxZ ,TtllGy}?t4)wi7Bۥ}_':ŗg(˫snwD(6ZF}60PKK^ PKQZOxl/workbook.xmlSn0+m-^jWəFke)C8 ߼yZޜkXUaz}ecV!6 ߷y5C݀HM͑LmcZQL : ]"5(@ HVl,!7g^˜W_|hRʎYɥjҧ/wȑ:R2<)@hL*C$d[!i#^B.R Zٶzg }|rgQ`E fwbWaf UMPv2~/nHMzQe-kjPK - zPKQZOdocProps/core.xmlRN0*%DQŁJH{bDz$MZn;;ٗ^Vbb5rޢ:8j9:E*c:e'Sk0N )9Z;S-[6 Um$ukʾh 81(0ԃ#:Zr6XꍩZ0T A9IDIH{A˜)p =9V nFO5YTdFRf:7Hr=:/Hb2 I&%IRr=ÿ7]\aO,3B;Î𸢪+ZɐjNYQ+=.1nlޠl`+WFpt>oPKbPKQZO[Content_Types].xmlTO W4\Ma`iG]<6G8Nc{/m6]7u9I@ #.sbY \iÕ>5aA}o-Μ!cB$"%Sq{]ֈxG9PK "]WPKQZOOz%xl/_rels/workbook.xml.relsPKQZOJ*xl/sharedStrings.xmlPKQZOK^ 7xl/worksheets/sheet1.xmlPKQZO^4wp?xl/workbook.xmlPKQZOOc vxl/styles.xmlPKQZOf; 4 _rels/.relsPKQZO - zM docProps/app.xmlPKQZObu docProps/core.xmlPKQZO "]W[Content_Types].xmlPK ?tablib-3.5.0/tests/files/issue_524.yaml000066400000000000000000000000361444137657500176450ustar00rootroot00000000000000title: Voice of Miki Vanoušektablib-3.5.0/tests/files/ragged.xlsx000066400000000000000000000111321444137657500174070ustar00rootroot00000000000000PK\Pxl/drawings/drawing1.xml]n0 UiZC^N0%n~J6i{m?ntDb|#z]#o(8`F\n5ϼ"{^}ZJV=:2 Ӵ : IvVΫE@{ĸ&Mׄ5 A8!bf଩Q=P36*)HB< 8R_O,Czȇ&^eFwhȸ8ݱ*6(+l^ޭ̳"_PKbiPK\Pxl/worksheets/sheet1.xmlN0`!qR$hB4V}lҷ VK7;??;(E3cVJELQ]q+ #"B+VmØl cK&u|m #o7+t$df C5AN2qo//pS]LI`@1)B7gB.1v,>/PgT63VAc+z|@E1\sޫo=;xjZ]~{]/|Z?lggxg7נ~mnktrexBΩGɀ|*H)%=^#/xutީ PhRi}UAz[Anb̜l=dnoq@VqOR8?Ա#)-)NW]ei6AO(`vɊtwocIU [C}ݹZl%bk %;9ḗ]r'n4r6'lpRSNȭYOG ې*:4 KWœy#.FUPÕ%7B0\Y*N\ xx<}TFipF +R83@PhB;!#ϗz-Jx&LXL]MrsKw-{?5|Z\L$33bۇŮa1 ꨁ$\~vg ۍ&NA R舑P*  E߆M_PKMʢG&PK\Pxl/_rels/workbook.xml.relsAj0EO;ײRJM(dۦ2%!M6:B^hz)w+zl['a[m^3u1 Y#Rfpй=4! XVFmE*GP_dUq#';4Mgpi@OW*$,rN-_y6@^gX-ɐi 'U^/xN1o<, 1;DT>ȋWPKSPK\P _rels/.relsA0xfRpa1&l C!@iR ~yba +r`h= QZ-'(`u)/8ɘnB?b>Fw<g2rhӦ#?˘Foj.ܿP}| `nPx"u_$Ko0 X&$?ވ,.dOhrRj!gAeZjK7Tgy[1>b k~h̾$i`o]?m>13:) 5&*$_BqJI1!hj6U75 pL*wV~$0)5Pč!/?ӇNe!D%{Åw93 ~h9}:gϾPKmP5PK\Pbixl/drawings/drawing1.xmlPK\P*E޳0Kxl/worksheets/sheet1.xmlPK\PM*#Dxl/worksheets/_rels/sheet1.xml.relsPK\Pea(Hxl/theme/theme1.xmlPK\Pxl/sharedStrings.xmlPK\P޾ xl/styles.xmlPK\PMʢG& xl/workbook.xmlPK\PS xl/_rels/workbook.xml.relsPK\Po ( I _rels/.relsPK\PmP54[Content_Types].xmlPK tablib-3.5.0/tests/files/xlsx_cell_values.xlsx000066400000000000000000000220131444137657500215320ustar00rootroot00000000000000PK!{ De[Content_Types].xml (n0EUb袪* .[\{B,PNcQ {2l)Q{Wa5`8v}gV`N l &| vX69phEe\ R,?O\z2ul1t}@]ʖ+L^yPK!^e _rels/.rels (MK1!̽;*"l/EMd1`7FAtzwyfx{vE fӻVKrFH"l3*>⢄.%uGVł=\i8XrZJ%\P4H;s>67Mizoɥ#+DΐYB5V$~"c'ZkRRF%8EsܙF|02Xn/ɢ1=cW7޲PK!= /xl/workbook.xmlU]o0}xH@MVik_*U8*`f&Uk~iڡƾ_ueQ!o6>@Eۿ3gd[R oؾ>|rvۥRm2/iMoi%5Q07l%,)UuznMXcx_.YNSw5mT"hEЗ%kWq۵N bހV7 dQkXkAmOӫj .Rۓ~?F.B~!wLpJd=FF -A`ͳ'KVы^i۟֙l"R h10+D&yd'RT4DoHmC2سSc ;В<& yBTiu۳JѺRBu˫'*$%:$ a"jD ޏ3rчy Ń&1~؟gIq:~6:;IdIN9'*7c0 cŞ<_4[ۣvX FWr/=֗)J > s()14Q3qt)@Vc~ ({dt6InIKwfatKezzG!|"ϙ7t:?ۯfYosJ !l4~3r$ yj,8#DupS%5Xfv7'6yzvT}o 7qMtkn7PK!I xl/_rels/workbook.xml.rels (J0nӮ""A*B:m¦IɌ[7|/CLbPP% &. ޚ:ǀ &$WWY7.XQJ2ME1IӠ9QQn^Px} [4c9ѼL$|~ht"3<Y3YpIkuZ3YD^8-"ݚ0F{d ̱{yoPK!]^>Zxl/worksheets/sheet1.xmlKo0,/ $4dQs1T i9_&?hEv`4]A(:a*5ayWqe:(/߾n\ :W>c̉4wÙX=vm\oWCZ4o沣#!0L]K+#:?B,(ѿke&4mFXK%qREt܇ rxx]M2gj!߱;L:&fv2l RY vJ f[YOXon6=2$pHE,PVCw/7P <@BI͵1 bĹaAq>RHŘ(6+lOjC5oj ֣3"ɪ D(]ζW2fOpѥyn EX œ.#Z7w\eh2ߕ0fxf4>s;F#l{km7z #?uۛʿPK!u>ixl/theme/theme1.xmlY[F~/?;IlN&!qlɎ4F3ލ N\OA?{v_O=:ys޷re]AL{W}/߿|uiѼNaj/38<4{FY)=ģ,Jt25CIv@![%YI) =Ztv i2{d HA49vFtDPb~*8!Y4'3BVߩ)]@QP֎Y&(gɋV#LcPK!#_xl/calcChain.xmlTdN> =o2!dݚ07mHE$[% I!By*2Y/PU/PK-!{ De[Content_Types].xmlPK-!^e _rels/.relsPK-!= /xl/workbook.xmlPK-!I - xl/_rels/workbook.xml.relsPK-!]^>Z_ xl/worksheets/sheet1.xmlPK-!u>ixl/theme/theme1.xmlPK-!n+e| axl/styles.xmlPK-!#_xl/calcChain.xmlPK-!bxGYdocProps/core.xmlPK-!aI 2docProps/app.xmlPK-!docProps/custom.xmlPK 8!tablib-3.5.0/tests/requirements.txt000066400000000000000000000001131444137657500174150ustar00rootroot00000000000000pytest pytest-cov MarkupPy odfpy openpyxl>=2.6.0 pyyaml tabulate xlrd xlwt tablib-3.5.0/tests/test_tablib.py000077500000000000000000001461551444137657500170230ustar00rootroot00000000000000#!/usr/bin/env python """Tests for Tablib.""" import datetime import doctest import json import pickle import tempfile import unittest from collections import OrderedDict from decimal import Decimal from io import BytesIO, StringIO from pathlib import Path from uuid import uuid4 import pytest from MarkupPy import markup from openpyxl.reader.excel import load_workbook import tablib from tablib.core import Row, detect_format from tablib.exceptions import UnsupportedFormat from tablib.formats import registry try: import pandas except ImportError: # pragma: no cover pandas = None class BaseTestCase(unittest.TestCase): def setUp(self): """Create simple data set with headers.""" global data, book data = tablib.Dataset() book = tablib.Databook() self.headers = ('first_name', 'last_name', 'gpa') self.john = ('John', 'Adams', 90) self.george = ('George', 'Washington', 67) self.tom = ('Thomas', 'Jefferson', 50) self.founders = tablib.Dataset(headers=self.headers, title='Founders') self.founders.append(self.john) self.founders.append(self.george) self.founders.append(self.tom) class TablibTestCase(BaseTestCase): """Tablib test cases.""" def _test_export_data_in_all_formats(self, dataset, exclude=()): all_formats = [ 'json', 'yaml', 'csv', 'tsv', 'xls', 'xlsx', 'ods', 'html', 'jira', 'latex', 'df', 'rst', ] for format_ in all_formats: if format_ in exclude: continue dataset.export(format_) def test_unknown_format(self): with self.assertRaises(UnsupportedFormat): data.export('??') # A known format but uninstalled del registry._formats['ods'] msg = (r"The 'ods' format is not available. You may want to install the " "odfpy package \\(or `pip install \"tablib\\[ods\\]\"`\\).") with self.assertRaisesRegex(UnsupportedFormat, msg): data.export('ods') def test_empty_append(self): """Verify append() correctly adds tuple with no headers.""" new_row = (1, 2, 3) data.append(new_row) # Verify width/data self.assertEqual(data.width, len(new_row)) self.assertEqual(data[0], new_row) def test_empty_append_with_headers(self): """Verify append() correctly detects mismatch of number of headers and data. """ data.headers = ['first', 'second'] new_row = (1, 2, 3, 4) self.assertRaises(tablib.InvalidDimensions, data.append, new_row) def test_set_headers_with_incorrect_dimension(self): """Verify headers correctly detects mismatch of number of headers and data. """ data.append(self.john) def set_header_callable(): data.headers = ['first_name'] self.assertRaises(tablib.InvalidDimensions, set_header_callable) def test_add_column(self): """Verify adding column works with/without headers.""" data.append(['kenneth']) data.append(['bessie']) new_col = ['reitz', 'monke'] data.append_col(new_col) self.assertEqual(data[0], ('kenneth', 'reitz')) self.assertEqual(data.width, 2) # With Headers data.headers = ('fname', 'lname') new_col = [21, 22] data.append_col(new_col, header='age') self.assertEqual(data['age'], new_col) def test_add_column_no_data_no_headers(self): """Verify adding new column with no headers.""" new_col = ('reitz', 'monke') data.append_col(new_col) self.assertEqual(data[0], tuple([new_col[0]])) self.assertEqual(data.width, 1) self.assertEqual(data.height, len(new_col)) def test_add_column_with_header_ignored(self): """Verify append_col() ignores the header if data.headers has not previously been set """ new_col = ('reitz', 'monke') data.append_col(new_col, header='first_name') self.assertEqual(data[0], tuple([new_col[0]])) self.assertEqual(data.width, 1) self.assertEqual(data.height, len(new_col)) self.assertEqual(data.headers, None) def test_add_column_with_header_and_headers_only_exist(self): """Verify append_col() with header correctly detects mismatch when headers exist but there is no existing row data """ data.headers = ['first_name'] # no data new_col = ('allen') def append_col_callable(): data.append_col(new_col, header='middle_name') self.assertRaises(tablib.InvalidDimensions, append_col_callable) def test_add_column_with_header_and_data_exists(self): """Verify append_col() works when headers and rows exists""" data.headers = self.headers data.append(self.john) new_col = [10] data.append_col(new_col, header='age') self.assertEqual(data.height, 1) self.assertEqual(data.width, len(self.john) + 1) self.assertEqual(data['age'], new_col) self.assertEqual(len(data.headers), len(self.headers) + 1) def test_add_callable_column(self): """Verify adding column with values specified as callable.""" def new_col(x): return x[0] self.founders.append_col(new_col, header='first_again') def test_header_slicing(self): """Verify slicing by headers.""" self.assertEqual(self.founders['first_name'], [self.john[0], self.george[0], self.tom[0]]) self.assertEqual(self.founders['last_name'], [self.john[1], self.george[1], self.tom[1]]) self.assertEqual(self.founders['gpa'], [self.john[2], self.george[2], self.tom[2]]) def test_get_col(self): """Verify getting columns by index""" self.assertEqual( self.founders.get_col(list(self.headers).index('first_name')), [self.john[0], self.george[0], self.tom[0]]) self.assertEqual( self.founders.get_col(list(self.headers).index('last_name')), [self.john[1], self.george[1], self.tom[1]]) self.assertEqual( self.founders.get_col(list(self.headers).index('gpa')), [self.john[2], self.george[2], self.tom[2]]) def test_data_slicing(self): """Verify slicing by data.""" # Slice individual rows self.assertEqual(self.founders[0], self.john) self.assertEqual(self.founders[:1], [self.john]) self.assertEqual(self.founders[1:2], [self.george]) self.assertEqual(self.founders[-1], self.tom) self.assertEqual(self.founders[3:], []) # Slice multiple rows self.assertEqual(self.founders[:], [self.john, self.george, self.tom]) self.assertEqual(self.founders[0:2], [self.john, self.george]) self.assertEqual(self.founders[1:3], [self.george, self.tom]) self.assertEqual(self.founders[2:], [self.tom]) def test_row_slicing(self): """Verify Row slicing. Issue #184.""" john = Row(self.john) self.assertEqual(john[:], list(self.john[:])) self.assertEqual(john[0:], list(self.john[0:])) self.assertEqual(john[:2], list(self.john[:2])) self.assertEqual(john[0:2], list(self.john[0:2])) self.assertEqual(john[0:-1], list(self.john[0:-1])) def test_delete(self): """Verify deleting from dataset works.""" # Delete from front of object del self.founders[0] self.assertEqual(self.founders[:], [self.george, self.tom]) # Verify dimensions, width should NOT change self.assertEqual(self.founders.height, 2) self.assertEqual(self.founders.width, 3) # Delete from back of object del self.founders[1] self.assertEqual(self.founders[:], [self.george]) # Verify dimensions, width should NOT change self.assertEqual(self.founders.height, 1) self.assertEqual(self.founders.width, 3) # Delete from invalid index self.assertRaises(IndexError, self.founders.__delitem__, 3) def test_str_no_columns(self): d = tablib.Dataset(['a', 1], ['b', 2], ['c', 3]) output = '%s' % d self.assertEqual(output.splitlines(), [ 'a|1', 'b|2', 'c|3' ]) @pytest.mark.skipif(pandas is None, reason="pandas is not installed") def test_unicode_append(self): """Passes in a single unicode character and exports.""" new_row = ('å', 'é') data.append(new_row) self._test_export_data_in_all_formats(data) @pytest.mark.skipif(pandas is None, reason="pandas is not installed") def test_datetime_append(self): """Passes in a single datetime and a single date and exports.""" new_row = ( datetime.datetime.now(), datetime.datetime.today(), ) data.append(new_row) self._test_export_data_in_all_formats(data) @pytest.mark.skipif(pandas is None, reason="pandas is not installed") def test_separator_append(self): for a in range(3): data.append_separator('foobar') for a in range(5): data.append(['asdf', 'asdf', 'asdf']) self._test_export_data_in_all_formats(data) def test_book_export_no_exceptions(self): """Test that various exports don't error out.""" book = tablib.Databook() book.add_sheet(data) # These formats don't implement the book abstraction. unsupported = ['csv', 'tsv', 'jira', 'latex', 'df'] self._test_export_data_in_all_formats(book, exclude=unsupported) def test_book_unsupported_loading(self): with self.assertRaises(UnsupportedFormat): tablib.Databook().load('Any stream', 'csv') def test_book_unsupported_export(self): book = tablib.Databook().load( '[{"title": "first", "data": [{"first_name": "John"}]}]', 'json', ) with self.assertRaises(UnsupportedFormat): book.export('csv') def test_book_import_from_file(self): xlsx_source = Path(__file__).parent / 'files' / 'founders.xlsx' with xlsx_source.open('rb') as fh: book = tablib.Databook().load(fh, 'xlsx') self.assertEqual(eval(book.json)[0]['title'], 'Feuille1') def test_dataset_import_from_file(self): xlsx_source = Path(__file__).parent / 'files' / 'founders.xlsx' with xlsx_source.open('rb') as fh: dset = tablib.Dataset().load(fh, 'xlsx') self.assertEqual(eval(dset.json)[0]['last_name'], 'Adams') def test_empty_file(self): tmp_file = tempfile.NamedTemporaryFile() dset = tablib.Dataset().load(tmp_file, 'yaml') self.assertEqual(dset.json, '[]') @pytest.mark.skipif(pandas is None, reason="pandas is not installed") def test_auto_format_detect(self): """Test auto format detection.""" # html, jira, latex, rst are export only. _xls = self.founders.export('xls') self.assertEqual(tablib.detect_format(_xls), 'xls') _xlsx = self.founders.export('xlsx') self.assertEqual(tablib.detect_format(_xlsx), 'xlsx') _ods = self.founders.export('ods') self.assertEqual(tablib.detect_format(_ods), 'ods') _df = self.founders.export('df') self.assertEqual(tablib.detect_format(_df), 'df') _yaml = '- {age: 90, first_name: John, last_name: Adams}' self.assertEqual(tablib.detect_format(_yaml), 'yaml') _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' self.assertEqual(tablib.detect_format(_json), 'json') _csv = '1,2,3\n4,5,6\n7,8,9\n' self.assertEqual(tablib.detect_format(_csv), 'csv') _tsv = '1\t2\t3\n4\t5\t6\n7\t8\t9\n' self.assertEqual(tablib.detect_format(_tsv), 'tsv') _bunk = StringIO( '¡¡¡¡¡¡---///\n\n\n' + '¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' ) self.assertEqual(tablib.detect_format(_bunk), None) def test_transpose(self): """Transpose a dataset.""" transposed_founders = self.founders.transpose() first_row = transposed_founders[0] second_row = transposed_founders[1] self.assertEqual(transposed_founders.headers, ["first_name", "John", "George", "Thomas"]) self.assertEqual(first_row, ("last_name", "Adams", "Washington", "Jefferson")) self.assertEqual(second_row, ("gpa", 90, 67, 50)) def test_transpose_multiple_headers(self): data = tablib.Dataset() data.headers = ("first_name", "last_name", "age") data.append(('John', 'Adams', 90)) data.append(('George', 'Washington', 67)) data.append(('John', 'Tyler', 71)) self.assertEqual(data.transpose().transpose().dict, data.dict) def test_row_stacking(self): """Row stacking.""" to_join = tablib.Dataset(headers=self.founders.headers) for row in self.founders: to_join.append(row=row) row_stacked = self.founders.stack(to_join) for column in row_stacked.headers: original_data = self.founders[column] expected_data = original_data + original_data self.assertEqual(row_stacked[column], expected_data) def test_column_stacking(self): """Column stacking""" to_join = tablib.Dataset(headers=self.founders.headers) for row in self.founders: to_join.append(row=row) column_stacked = self.founders.stack_cols(to_join) for index, row in enumerate(column_stacked): original_data = self.founders[index] expected_data = original_data + original_data self.assertEqual(row, expected_data) self.assertEqual(column_stacked[0], ("John", "Adams", 90, "John", "Adams", 90)) def test_sorting(self): """Sort columns.""" sorted_data = self.founders.sort(col="first_name") self.assertEqual(sorted_data.title, 'Founders') first_row = sorted_data[0] second_row = sorted_data[2] third_row = sorted_data[1] expected_first = self.founders[1] expected_second = self.founders[2] expected_third = self.founders[0] self.assertEqual(first_row, expected_first) self.assertEqual(second_row, expected_second) self.assertEqual(third_row, expected_third) def test_remove_duplicates(self): """Unique Rows.""" self.founders.append(self.john) self.founders.append(self.george) self.founders.append(self.tom) self.assertEqual(self.founders[0], self.founders[3]) self.assertEqual(self.founders[1], self.founders[4]) self.assertEqual(self.founders[2], self.founders[5]) self.assertEqual(self.founders.height, 6) self.founders.remove_duplicates() self.assertEqual(self.founders[0], self.john) self.assertEqual(self.founders[1], self.george) self.assertEqual(self.founders[2], self.tom) self.assertEqual(self.founders.height, 3) def test_wipe(self): """Purge a dataset.""" new_row = (1, 2, 3) data.append(new_row) # Verify width/data self.assertEqual(data.width, len(new_row)) self.assertEqual(data[0], new_row) data.wipe() new_row = (1, 2, 3, 4) data.append(new_row) self.assertEqual(data.width, len(new_row)) self.assertEqual(data[0], new_row) def test_subset(self): """Create a subset of a dataset""" rows = (0, 2) columns = ('first_name', 'gpa') data.headers = self.headers data.append(self.john) data.append(self.george) data.append(self.tom) # Verify data is truncated subset = data.subset(rows=rows, cols=columns) self.assertEqual(type(subset), tablib.Dataset) self.assertEqual(subset.headers, list(columns)) self.assertEqual(subset._data[0].list, ['John', 90]) self.assertEqual(subset._data[1].list, ['Thomas', 50]) def test_formatters(self): """Confirm formatters are being triggered.""" def _formatter(cell_value): return str(cell_value).upper() self.founders.add_formatter('last_name', _formatter) for name in [r['last_name'] for r in self.founders.dict]: self.assertTrue(name.isupper()) def test_unicode_renders_markdown_table(self): # add another entry to test right field width for # integer self.founders.append(('Old', 'Man', 100500)) self.assertEqual('first_name|last_name |gpa ', str(self.founders).split('\n')[0]) def test_pickle_unpickle_dataset(self): before_pickle = self.founders.export('json') founders = pickle.loads(pickle.dumps(self.founders)) self.assertEqual(founders.export('json'), before_pickle) def test_databook_add_sheet_accepts_only_dataset_instances(self): class NotDataset: def append(self, item): pass dataset = NotDataset() dataset.append(self.john) self.assertRaises(tablib.InvalidDatasetType, book.add_sheet, dataset) def test_databook_add_sheet_accepts_dataset_subclasses(self): class DatasetSubclass(tablib.Dataset): pass # just checking if subclass of tablib.Dataset can be added to Databook dataset = DatasetSubclass() dataset.append(self.john) dataset.append(self.tom) try: book.add_sheet(dataset) except tablib.InvalidDatasetType: self.fail("Subclass of tablib.Dataset should be accepted by Databook.add_sheet") def test_databook_formatter_support_kwargs(self): """Test XLSX export with formatter configuration.""" self.founders.export('xlsx', freeze_panes=False) def test_databook_formatter_with_new_lines(self): """Test XLSX export with new line in content.""" self.founders.append(('First\nSecond', 'Name', 42)) self.founders.export('xlsx') def test_row_repr(self): """Row repr.""" # Arrange john = Row(self.john) # Act output = str(john) # Assert self.assertEqual(output, "['John', 'Adams', 90]") def test_row_pickle_unpickle(self): """Row __setstate__ and __getstate__.""" # Arrange before_pickle = Row(self.john) # Act output = pickle.loads(pickle.dumps(before_pickle)) # Assert self.assertEqual(output[0], before_pickle[0]) self.assertEqual(output[1], before_pickle[1]) self.assertEqual(output[2], before_pickle[2]) def test_row_lpush(self): """Row lpush.""" john = Row(self.john) john.lpush(53) self.assertEqual(john.list, [53, 'John', 'Adams', 90]) def test_row_append(self): """Row append.""" john = Row(self.john) john.append('stuff') self.assertEqual(john.list, ['John', 'Adams', 90, 'stuff']) def test_row_contains(self): """Row __contains__.""" # Arrange john = Row(self.john) # Act / Assert self.assertIn("John", john) def test_row_no_tag(self): """Row has_tag.""" # Arrange john = Row(self.john) # Act / Assert self.assertFalse(john.has_tag("not found")) self.assertFalse(john.has_tag(None)) def test_row_has_tag(self): """Row has_tag.""" # Arrange john = Row(self.john, tags=["tag1"]) # Act / Assert self.assertTrue(john.has_tag("tag1")) def test_row_has_tags(self): """Row has_tag.""" # Arrange john = Row(self.john, tags=["tag1", "tag2"]) # Act / Assert self.assertTrue(john.has_tag(["tag2", "tag1"])) class HTMLTests(BaseTestCase): def test_html_export(self): """HTML export""" html = markup.page() html.table.open() html.thead.open() html.tr(markup.oneliner.th(self.founders.headers)) html.thead.close() html.tbody.open() for founder in self.founders: html.tr(markup.oneliner.td(founder)) html.tbody.close() html.table.close() html = str(html) self.assertEqual(html, self.founders.html) def test_html_export_none_value(self): """HTML export""" html = markup.page() html.table.open() html.thead.open() html.tr(markup.oneliner.th(['foo', '', 'bar'])) html.thead.close() html.tbody.open() html.tr(markup.oneliner.td(['foo', '', 'bar'])) html.tbody.close() html.table.close() html = str(html) headers = ['foo', None, 'bar'] d = tablib.Dataset(['foo', None, 'bar'], headers=headers) self.assertEqual(html, d.html) class RSTTests(BaseTestCase): def test_rst_force_grid(self): data = tablib.Dataset() data.append(self.john) data.append(self.george) data.headers = self.headers fmt = registry.get_format('rst') simple = fmt.export_set(data) grid = fmt.export_set(data, force_grid=True) self.assertNotEqual(simple, grid) self.assertNotIn('+', simple) self.assertIn('+', grid) def test_empty_string(self): data = tablib.Dataset() data.headers = self.headers data.append(self.john) data.append(('Wendy', '', 43)) data.append(('Esther', ' ', 31)) self.assertEqual( data.export('rst'), '========== ========= ===\n' 'first_name last_name gpa\n' '========== ========= ===\n' 'John Adams 90 \n' 'Wendy 43 \n' 'Esther 31 \n' '========== ========= ===' ) def test_rst_export_set(self): # Arrange data = tablib.Dataset() data.append(self.john) data.headers = self.headers fmt = registry.get_format("rst") # Act out1 = fmt.export_set(data) out2 = fmt.export_set_as_simple_table(data) # Assert self.assertEqual(out1, out2) self.assertEqual( out1, "========== ========= ===\n" "first_name last_name gpa\n" "========== ========= ===\n" "John Adams 90 \n" "========== ========= ===", ) class CSVTests(BaseTestCase): def test_csv_format_detect(self): """Test CSV format detection.""" _csv = StringIO( '1,2,3\n' '4,5,6\n' '7,8,9\n' ) _bunk = StringIO( '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' ) fmt = registry.get_format('csv') self.assertTrue(fmt.detect(_csv)) self.assertFalse(fmt.detect(_bunk)) def test_csv_import_set(self): """Generate and import CSV set serialization.""" data.append(self.john) data.append(self.george) data.headers = self.headers _csv = data.csv data.csv = _csv self.assertEqual(_csv, data.csv) def test_csv_import_set_semicolons(self): """Test for proper output with semicolon separated CSV.""" data.append(self.john) data.append(self.george) data.headers = self.headers _csv = data.get_csv(delimiter=';') data.set_csv(_csv, delimiter=';') self.assertEqual(_csv, data.get_csv(delimiter=';')) def test_csv_import_set_with_spaces(self): """Generate and import CSV set serialization when row values have spaces.""" data.append(('Bill Gates', 'Microsoft')) data.append(('Steve Jobs', 'Apple')) data.headers = ('Name', 'Company') _csv = data.csv data.csv = _csv self.assertEqual(_csv, data.csv) def test_csv_import_set_semicolon_with_spaces(self): """Generate and import semicolon separated CSV set serialization when row values have spaces.""" data.append(('Bill Gates', 'Microsoft')) data.append(('Steve Jobs', 'Apple')) data.headers = ('Name', 'Company') _csv = data.get_csv(delimiter=';') data.set_csv(_csv, delimiter=';') self.assertEqual(_csv, data.get_csv(delimiter=';')) def test_csv_import_set_with_newlines(self): """Generate and import CSV set serialization when row values have newlines.""" data.append(('Markdown\n=======', 'A cool language\n\nwith paragraphs')) data.append(('reStructedText\n==============', 'Another cool language\n\nwith paragraphs')) data.headers = ('title', 'body') _csv = data.csv data.csv = _csv self.assertEqual(_csv, data.csv) def test_csv_import_set_commas_embedded(self): """Comma-separated CSV can include commas inside quoted string.""" csv_text = ( 'id,name,description,count\r\n' '12,Smith,"Red, rounded",4\r\n' ) data.csv = csv_text self.assertEqual(data[0][2], 'Red, rounded') self.assertEqual(data.csv, csv_text) def test_csv_import_set_with_unicode_str(self): """Import CSV set with non-ascii characters in unicode literal""" csv_text = ( "id,givenname,surname,loginname,email,pref_firstname,pref_lastname\n" "13765,Ævar,Arnfjörð,testing,test@example.com,Ævar,Arnfjörð" ) data.csv = csv_text self.assertEqual(data.width, 7) def test_csv_import_set_ragged(self): """Import CSV set when not all rows have the same length.""" csv_text = ( "H1,H2,H3\n" "A,B\n" "C,D,E\n" "\n" "F\n" ) dataset = tablib.import_set(csv_text, format="csv") self.assertEqual( str(dataset), 'H1|H2|H3\n' '--|--|--\n' 'A |B | \n' 'C |D |E \n' 'F | | ' ) def test_csv_import_set_skip_lines(self): csv_text = ( 'garbage,line\n' '\n' 'id,name,description\n' '12,Smith,rounded\n' ) dataset = tablib.import_set(csv_text, format="csv", skip_lines=2) self.assertEqual(dataset.headers, ['id', 'name', 'description']) def test_csv_import_mac_os_lf(self): csv_text = ( 'id,name,description\r' '12,Smith,rounded\r' ) dataset = tablib.import_set(csv_text, format="csv") self.assertEqual('id,name,description\r\n12,Smith,rounded\r\n', dataset.csv) def test_csv_export(self): """Verify exporting dataset object as CSV.""" # Build up the csv string with headers first, followed by each row csv = '' for col in self.headers: csv += col + ',' csv = csv.strip(',') + '\r\n' for founder in self.founders: for col in founder: csv += str(col) + ',' csv = csv.strip(',') + '\r\n' self.assertEqual(csv, self.founders.csv) def test_csv_export_options(self): """Exporting support csv.writer() parameters.""" data.append(('1. a', '2. b', '3. c')) result = data.export('csv', delimiter=' ', quotechar='|') self.assertEqual(result, '|1. a| |2. b| |3. c|\r\n') def test_csv_stream_export(self): """Verify exporting dataset object as CSV from file object.""" # Build up the csv string with headers first, followed by each row csv = '' for col in self.headers: csv += col + ',' csv = csv.strip(',') + '\r\n' for founder in self.founders: for col in founder: csv += str(col) + ',' csv = csv.strip(',') + '\r\n' frm = registry.get_format('csv') csv_stream = frm.export_stream_set(self.founders) self.assertEqual(csv, csv_stream.getvalue()) def test_unicode_csv(self): """Check if unicode in csv export doesn't raise.""" data = tablib.Dataset() data.append(['\xfc', '\xfd']) data.csv def test_csv_column_select(self): """Build up a CSV and test selecting a column""" data = tablib.Dataset() data.csv = self.founders.csv headers = data.headers self.assertIsInstance(headers[0], str) orig_first_name = self.founders[self.headers[0]] csv_first_name = data[headers[0]] self.assertEqual(orig_first_name, csv_first_name) def test_csv_column_delete(self): """Build up a CSV and test deleting a column""" data = tablib.Dataset() data.csv = self.founders.csv target_header = data.headers[0] self.assertIsInstance(target_header, str) del data[target_header] self.assertNotIn(target_header, data.headers) def test_csv_column_sort(self): """Build up a CSV and test sorting a column by name""" data = tablib.Dataset() data.csv = self.founders.csv orig_target_header = self.founders.headers[1] target_header = data.headers[1] self.founders.sort(orig_target_header) data.sort(target_header) self.assertEqual(self.founders[orig_target_header], data[target_header]) def test_csv_formatter_support_kwargs(self): """Test CSV import and export with formatter configuration.""" data.append(self.john) data.append(self.george) data.headers = self.headers expected = 'first_name;last_name;gpa\nJohn;Adams;90\nGeorge;Washington;67\n' kwargs = dict(delimiter=';', lineterminator='\n') _csv = data.export('csv', **kwargs) self.assertEqual(expected, _csv) # the import works but consider default delimiter=',' d1 = tablib.import_set(_csv, format="csv") self.assertEqual(1, len(d1.headers)) d2 = tablib.import_set(_csv, format="csv", **kwargs) self.assertEqual(3, len(d2.headers)) class TSVTests(BaseTestCase): def test_tsv_import_set(self): """Generate and import TSV set serialization.""" data.append(self.john) data.append(self.george) data.headers = self.headers _tsv = data.tsv data.tsv = _tsv self.assertEqual(_tsv, data.tsv) def test_tsv_format_detect(self): """Test TSV format detection.""" _tsv = StringIO( '1\t2\t3\n' '4\t5\t6\n' '7\t8\t9\n' ) _bunk = StringIO( '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' ) fmt = registry.get_format('tsv') self.assertTrue(fmt.detect(_tsv)) self.assertFalse(fmt.detect(_bunk)) def test_tsv_export(self): """Verify exporting dataset object as TSV.""" # Build up the tsv string with headers first, followed by each row tsv = '' for col in self.headers: tsv += col + '\t' tsv = tsv.strip('\t') + '\r\n' for founder in self.founders: for col in founder: tsv += str(col) + '\t' tsv = tsv.strip('\t') + '\r\n' self.assertEqual(tsv, self.founders.tsv) class ODSTests(BaseTestCase): def test_ods_export_datatypes(self): date_time = datetime.datetime(2019, 10, 4, 12, 30, 8) data.append(('string', '004', 42, 21.55, Decimal('34.5'), date_time)) data.headers = ('string', 'start0', 'integer', 'float', 'decimal', 'date/time') # ODS is currently write-only, just test that output doesn't crash. assert data.ods is not None assert len(data.ods) class XLSTests(BaseTestCase): def test_xls_format_detect(self): """Test the XLS format detection.""" in_stream = self.founders.xls self.assertEqual(detect_format(in_stream), 'xls') def test_xls_date_import(self): xls_source = Path(__file__).parent / 'files' / 'dates.xls' with xls_source.open('rb') as fh: dset = tablib.Dataset().load(fh, 'xls') self.assertEqual(dset.dict[0]['birth_date'], datetime.datetime(2015, 4, 12, 0, 0)) def test_xlsx_import_set_skip_lines(self): data.append(('garbage', 'line', '')) data.append(('', '', '')) data.append(('id', 'name', 'description')) _xls = data.xls new_data = tablib.Dataset().load(_xls, skip_lines=2) self.assertEqual(new_data.headers, ['id', 'name', 'description']) def test_xls_import_with_errors(self): """Errors from imported files are kept as errors.""" xls_source = Path(__file__).parent / 'files' / 'errors.xls' with xls_source.open('rb') as fh: data = tablib.Dataset().load(fh.read()) self.assertEqual( data.dict[0], OrderedDict([ ('div by 0', '#DIV/0!'), ('name unknown', '#NAME?'), ('not available (formula)', '#N/A'), ('not available (static)', '#N/A') ]) ) class XLSXTests(BaseTestCase): def test_xlsx_format_detect(self): """Test the XLSX format detection.""" in_stream = self.founders.xlsx self.assertEqual(detect_format(in_stream), 'xlsx') def test_xlsx_import_set(self): date_time = datetime.datetime(2019, 10, 4, 12, 30, 8) data.append(('string', '004', 42, 21.55, date_time)) data.headers = ('string', 'start0', 'integer', 'float', 'date/time') _xlsx = data.xlsx data.xlsx = _xlsx self.assertEqual(data.dict[0]['string'], 'string') self.assertEqual(data.dict[0]['start0'], '004') self.assertEqual(data.dict[0]['integer'], 42) self.assertEqual(data.dict[0]['float'], 21.55) self.assertEqual(data.dict[0]['date/time'], date_time) def test_xlsx_import_set_skip_lines(self): data.append(('garbage', 'line', '')) data.append(('', '', '')) data.append(('id', 'name', 'description')) _xlsx = data.xlsx new_data = tablib.Dataset().load(_xlsx, skip_lines=2) self.assertEqual(new_data.headers, ['id', 'name', 'description']) def test_xlsx_bad_chars_sheet_name(self): """ Sheet names are limited to 30 chars and the following chars are not permitted: \\ / * ? : [ ] """ _dataset = tablib.Dataset( title='bad name \\/*?:[]qwertyuiopasdfghjklzxcvbnm' ) _xlsx = _dataset.export('xlsx') new_data = tablib.Dataset().load(_xlsx) self.assertEqual(new_data.title, 'bad name -------qwertyuiopasdfg') _book = tablib.Databook() _book.add_sheet(_dataset) _xlsx = _book.export('xlsx') new_data = tablib.Databook().load(_xlsx, 'xlsx') self.assertEqual(new_data.sheets()[0].title, 'bad name -------qwertyuiopasdfg') def test_xlsx_import_book_ragged(self): """Import XLSX file through databook when not all rows have the same length.""" xlsx_source = Path(__file__).parent / 'files' / 'ragged.xlsx' with xlsx_source.open('rb') as fh: book = tablib.Databook().load(fh, 'xlsx') self.assertEqual(book.sheets()[0].pop(), (1.0, '')) def test_xlsx_import_set_ragged(self): """Import XLSX file through dataset when not all rows have the same length.""" xlsx_source = Path(__file__).parent / 'files' / 'ragged.xlsx' with xlsx_source.open('rb') as fh: dataset = tablib.Dataset().load(fh, 'xlsx') self.assertEqual(dataset.pop(), (1.0, '')) def test_xlsx_wrong_char(self): """Bad characters are not silently ignored. We let the exception bubble up.""" from openpyxl.utils.exceptions import IllegalCharacterError with self.assertRaises(IllegalCharacterError): data.append(('string', b'\x0cf')) data.xlsx def test_xlsx_cell_values(self): """Test cell values are read and not formulas""" xls_source = Path(__file__).parent / 'files' / 'xlsx_cell_values.xlsx' with xls_source.open('rb') as fh: data = tablib.Dataset().load(fh) self.assertEqual(data.headers[0], 'Hello World') def test_xlsx_export_set_escape_formulae(self): """ Test that formulae are sanitised on export. """ data.append(('=SUM(1+1)',)) _xlsx = data.export('xlsx') # read back using openpyxl because tablib reads formulae as values wb = load_workbook(filename=BytesIO(_xlsx)) self.assertEqual('=SUM(1+1)', wb.active['A1'].value) _xlsx = data.export('xlsx', escape=True) wb = load_workbook(filename=BytesIO(_xlsx)) self.assertEqual('SUM(1+1)', wb.active['A1'].value) def test_xlsx_export_book_escape_formulae(self): """ Test that formulae are sanitised on export. """ data.append(('=SUM(1+1)',)) _book = tablib.Databook() _book.add_sheet(data) _xlsx = _book.export('xlsx') # read back using openpyxl because tablib reads formulae as values wb = load_workbook(filename=BytesIO(_xlsx)) self.assertEqual('=SUM(1+1)', wb.active['A1'].value) _xlsx = _book.export('xlsx', escape=True) wb = load_workbook(filename=BytesIO(_xlsx)) self.assertEqual('SUM(1+1)', wb.active['A1'].value) def test_xlsx_export_set_escape_formulae_in_header(self): data.headers = ('=SUM(1+1)',) _xlsx = data.export('xlsx') wb = load_workbook(filename=BytesIO(_xlsx)) self.assertEqual('=SUM(1+1)', wb.active['A1'].value) _xlsx = data.export('xlsx', escape=True) wb = load_workbook(filename=BytesIO(_xlsx)) self.assertEqual('SUM(1+1)', wb.active['A1'].value) def test_xlsx_export_book_escape_formulae_in_header(self): data.headers = ('=SUM(1+1)',) _book = tablib.Databook() _book.add_sheet(data) _xlsx = _book.export('xlsx') wb = load_workbook(filename=BytesIO(_xlsx)) self.assertEqual('=SUM(1+1)', wb.active['A1'].value) _xlsx = _book.export('xlsx', escape=True) wb = load_workbook(filename=BytesIO(_xlsx)) self.assertEqual('SUM(1+1)', wb.active['A1'].value) def test_xlsx_bad_dimensions(self): """Test loading file with bad dimension. Must be done with read_only=False.""" xls_source = Path(__file__).parent / 'files' / 'bad_dimensions.xlsx' with xls_source.open('rb') as fh: data = tablib.Dataset().load(fh, read_only=False) self.assertEqual(data.height, 3) def test_xlsx_raise_ValueError_on_cell_write_during_export(self): """Test that the process handles errors which might be raised when calling cell setter.""" # openpyxl does not handle array type, so will raise ValueError, # which results in the array being cast to string data.append(([1],)) _xlsx = data.export('xlsx') wb = load_workbook(filename=BytesIO(_xlsx)) self.assertEqual('[1]', wb.active['A1'].value) class JSONTests(BaseTestCase): def test_json_format_detect(self): """Test JSON format detection.""" _json = StringIO('[{"last_name": "Adams","age": 90,"first_name": "John"}]') _bunk = StringIO( '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' ) fmt = registry.get_format('json') self.assertTrue(fmt.detect(_json)) self.assertFalse(fmt.detect(_bunk)) def test_json_import_book(self): """Generate and import JSON book serialization.""" data.append(self.john) data.append(self.george) data.headers = self.headers book.add_sheet(data) _json = book.json book.json = _json self.assertEqual(json.loads(_json), json.loads(book.json)) # Same with the load interface book2 = tablib.Databook().load(_json, None) self.assertEqual(json.loads(book.json), json.loads(book2.json)) def test_json_import_set(self): """Generate and import JSON set serialization.""" data.append(self.john) data.append(self.george) data.headers = self.headers _json = data.json data.json = _json self.assertEqual(json.loads(_json), json.loads(data.json)) def test_json_export(self): """Verify exporting dataset object as JSON""" address_id = uuid4() headers = self.headers + ('address_id',) founders = tablib.Dataset(headers=headers, title='Founders') founders.append(('John', 'Adams', 90, address_id)) founders.append(('名字', '李', 60, '')) founders_json = founders.export('json') expected_json = ( '[{"first_name": "John", "last_name": "Adams", "gpa": 90, ' '"address_id": "%s"}, {"first_name": "名字", "last_name": "李", ' '"gpa": 60, "address_id": ""}]' % str(address_id) ) self.assertEqual(founders_json, expected_json) def test_json_list_of_lists(self): input_json = "[[1,2],[3,4]]" expected_yaml = "- [1, 2]\n- [3, 4]\n" dset = tablib.Dataset().load(in_stream=input_json, format="json") self.assertEqual(dset.export("yaml"), expected_yaml) class YAMLTests(BaseTestCase): def test_yaml_format_detect(self): """Test YAML format detection.""" _yaml = '- {age: 90, first_name: John, last_name: Adams}' _tsv = 'foo\tbar' _bunk = ( '¡¡¡¡¡¡---///\n\n\n¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†' 'ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' ) fmt = registry.get_format('yaml') self.assertTrue(fmt.detect(_yaml)) self.assertFalse(fmt.detect(_bunk)) self.assertFalse(fmt.detect(_tsv)) def test_yaml_import_book(self): """Generate and import YAML book serialization.""" data.append(self.john) data.append(self.george) data.headers = self.headers book.add_sheet(data) _yaml = book.yaml book.yaml = _yaml self.assertEqual(_yaml, book.yaml) # Same with the load interface book2 = tablib.Databook().load(_yaml, None) self.assertEqual(_yaml, book2.yaml) def test_yaml_import_set(self): """Generate and import YAML set serialization.""" data.append(self.john) data.append(self.george) data.headers = self.headers _yaml = data.yaml data.yaml = _yaml self.assertEqual(_yaml, data.yaml) def test_yaml_export(self): """YAML export""" self.founders.append(('名字', '李', 60)) expected = """\ - {first_name: John, gpa: 90, last_name: Adams} - {first_name: George, gpa: 67, last_name: Washington} - {first_name: Thomas, gpa: 50, last_name: Jefferson} - {first_name: 名字, gpa: 60, last_name: 李} """ output = self.founders.yaml self.assertEqual(output, expected) def test_yaml_load(self): """ test issue 524: invalid format """ yaml_source = Path(__file__).parent / 'files' / 'issue_524.yaml' with yaml_source.open('rb') as fh: with self.assertRaises(UnsupportedFormat): tablib.Dataset().load(fh, 'yaml') class LatexTests(BaseTestCase): def test_latex_export(self): """LaTeX export""" expected = """\ % Note: add \\usepackage{booktabs} to your preamble % \\begin{table}[!htbp] \\centering \\caption{Founders} \\begin{tabular}{lrr} \\toprule first\\_name & last\\_name & gpa \\\\ \\cmidrule(r){1-1} \\cmidrule(lr){2-2} \\cmidrule(l){3-3} John & Adams & 90 \\\\ George & Washington & 67 \\\\ Thomas & Jefferson & 50 \\\\ \\bottomrule \\end{tabular} \\end{table} """ output = self.founders.latex self.assertEqual(output, expected) def test_latex_export_empty_dataset(self): self.assertIsNotNone(tablib.Dataset().latex) def test_latex_export_no_headers(self): d = tablib.Dataset() d.append(('one', 'two', 'three')) self.assertIn('one', d.latex) def test_latex_export_caption(self): d = tablib.Dataset() d.append(('one', 'two', 'three')) self.assertNotIn('caption', d.latex) d.title = 'Title' self.assertIn('\\caption{Title}', d.latex) def test_latex_export_none_values(self): headers = ['foo', None, 'bar'] d = tablib.Dataset(['foo', None, 'bar'], headers=headers) output = d.latex self.assertIn('foo', output) self.assertNotIn('None', output) def test_latex_escaping(self): d = tablib.Dataset(['~', '^']) output = d.latex self.assertNotIn('~', output) self.assertIn('textasciitilde', output) self.assertNotIn('^', output) self.assertIn('textasciicircum', output) class DBFTests(BaseTestCase): def test_dbf_import_set(self): data.append(self.john) data.append(self.george) data.headers = self.headers _dbf = data.dbf data.dbf = _dbf # self.assertEqual(_dbf, data.dbf) try: self.assertEqual(_dbf, data.dbf) except AssertionError: index = 0 so_far = '' for reg_char, data_char in zip(_dbf, data.dbf): so_far += chr(data_char) if reg_char != data_char and index not in [1, 2, 3]: raise AssertionError('Failing at char {}: {} vs {} {}'.format( index, reg_char, data_char, so_far)) index += 1 def test_dbf_export_set(self): """Test DBF import.""" data.append(self.john) data.append(self.george) data.append(self.tom) data.headers = self.headers _regression_dbf = (b'\x03r\x06\x06\x03\x00\x00\x00\x81\x00\xab\x00\x00' b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' b'\x00\x00\x00FIRST_NAME\x00C\x00\x00\x00\x00P\x00\x00\x00\x00\x00' b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00LAST_NAME\x00\x00C\x00' b'\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' b'\x00\x00GPA\x00\x00\x00\x00\x00\x00\x00\x00N\x00\x00\x00\x00\n' b'\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\r' ) _regression_dbf += b' John' + (b' ' * 75) _regression_dbf += b' Adams' + (b' ' * 74) _regression_dbf += b' 90.0000000' _regression_dbf += b' George' + (b' ' * 73) _regression_dbf += b' Washington' + (b' ' * 69) _regression_dbf += b' 67.0000000' _regression_dbf += b' Thomas' + (b' ' * 73) _regression_dbf += b' Jefferson' + (b' ' * 70) _regression_dbf += b' 50.0000000' _regression_dbf += b'\x1a' # If in python3, decode regression string to binary. # _regression_dbf = bytes(_regression_dbf, 'utf-8') # _regression_dbf = _regression_dbf.replace(b'\n', b'\r') try: self.assertEqual(_regression_dbf, data.dbf) except AssertionError: index = 0 found_so_far = '' for reg_char, data_char in zip(_regression_dbf, data.dbf): # found_so_far += chr(data_char) if reg_char != data_char and index not in [1, 2, 3]: raise AssertionError( 'Failing at char {}: {} vs {} (found {})'.format( index, reg_char, data_char, found_so_far)) index += 1 def test_dbf_format_detect(self): """Test the DBF format detection.""" _dbf = (b'\x03r\x06\x03\x03\x00\x00\x00\x81\x00\xab\x00\x00' b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' b'\x00\x00\x00FIRST_NAME\x00C\x00\x00\x00\x00P\x00\x00\x00\x00\x00' b'\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00LAST_NAME\x00\x00C\x00' b'\x00\x00\x00P\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00' b'\x00\x00GPA\x00\x00\x00\x00\x00\x00\x00\x00N\x00\x00\x00\x00\n' b'\x08\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\r' ) _dbf += b' John' + (b' ' * 75) _dbf += b' Adams' + (b' ' * 74) _dbf += b' 90.0000000' _dbf += b' George' + (b' ' * 73) _dbf += b' Washington' + (b' ' * 69) _dbf += b' 67.0000000' _dbf += b' Thomas' + (b' ' * 73) _dbf += b' Jefferson' + (b' ' * 70) _dbf += b' 50.0000000' _dbf += b'\x1a' _dbf = BytesIO(_dbf) _yaml = '- {age: 90, first_name: John, last_name: Adams}' _tsv = 'foo\tbar' _csv = '1,2,3\n4,5,6\n7,8,9\n' _json = '[{"last_name": "Adams","age": 90,"first_name": "John"}]' _bunk = ( '¡¡¡¡¡¡¡¡£™∞¢£§∞§¶•¶ª∞¶•ªº••ª–º§•†•§º¶•†¥ª–º•§ƒø¥¨©πƒø†ˆ¥ç©¨√øˆ¥≈†ƒ¥ç©ø¨çˆ¥ƒçø¶' ) fmt = registry.get_format('dbf') self.assertTrue(fmt.detect(_dbf)) self.assertFalse(fmt.detect(_yaml)) self.assertFalse(fmt.detect(_tsv)) self.assertFalse(fmt.detect(_csv)) self.assertFalse(fmt.detect(_json)) self.assertFalse(fmt.detect(_bunk)) class JiraTests(BaseTestCase): def test_jira_export(self): expected = """||first_name||last_name||gpa|| |John|Adams|90| |George|Washington|67| |Thomas|Jefferson|50|""" self.assertEqual(expected, self.founders.jira) def test_jira_export_no_headers(self): self.assertEqual('|a|b|c|', tablib.Dataset(['a', 'b', 'c']).jira) def test_jira_export_none_and_empty_values(self): self.assertEqual('| | |c|', tablib.Dataset(['', None, 'c']).jira) def test_jira_export_empty_dataset(self): self.assertIsNotNone(tablib.Dataset().jira) class DocTests(unittest.TestCase): def test_rst_formatter_doctests(self): import tablib.formats._rst results = doctest.testmod(tablib.formats._rst) self.assertEqual(results.failed, 0) class CliTests(BaseTestCase): def test_cli_export_github(self): self.assertEqual( '|---|---|---|\n| a | b | c |', tablib.Dataset(['a', 'b', 'c']).export('cli', tablefmt='github') ) def test_cli_export_simple(self): self.assertEqual( '- - -\na b c\n- - -', tablib.Dataset(['a', 'b', 'c']).export('cli', tablefmt='simple') ) def test_cli_export_grid(self): self.assertEqual( '+---+---+---+\n| a | b | c |\n+---+---+---+', tablib.Dataset(['a', 'b', 'c']).export('cli', tablefmt='grid') ) tablib-3.5.0/tests/test_tablib_dbfpy_packages_fields.py000066400000000000000000000022701444137657500233550ustar00rootroot00000000000000#!/usr/bin/env python """Tests for tablib.packages.dbfpy.""" import unittest from tablib.packages.dbfpy import fields class DbfFieldDefTestCompareCase(unittest.TestCase): """dbfpy.fields.DbfFieldDef comparison test cases, via child classes.""" def setUp(self) -> None: self.length = 10 self.a = fields.DbfCharacterFieldDef("abc", self.length) self.z = fields.DbfCharacterFieldDef("xyz", self.length) self.a2 = fields.DbfCharacterFieldDef("abc", self.length) def test_compare__eq__(self): # Act / Assert self.assertEqual(self.a, self.a2) def test_compare__ne__(self): # Act / Assert self.assertNotEqual(self.a, self.z) def test_compare__lt__(self): # Act / Assert self.assertLess(self.a, self.z) def test_compare__le__(self): # Act / Assert self.assertLessEqual(self.a, self.a2) self.assertLessEqual(self.a, self.z) def test_compare__gt__(self): # Act / Assert self.assertGreater(self.z, self.a) def test_compare__ge__(self): # Act / Assert self.assertGreaterEqual(self.a2, self.a) self.assertGreaterEqual(self.z, self.a) tablib-3.5.0/tests/test_tablib_dbfpy_packages_utils.py000066400000000000000000000101321444137657500232430ustar00rootroot00000000000000#!/usr/bin/env python """Tests for tablib.packages.dbfpy.""" import datetime import unittest from tablib.packages.dbfpy import utils class UtilsUnzfillTestCase(unittest.TestCase): """dbfpy.utils.unzfill test cases.""" def test_unzfill_with_nul(self): # Arrange text = b"abc\0xyz" # Act output = utils.unzfill(text) # Assert self.assertEqual(output, b"abc") def test_unzfill_without_nul(self): # Arrange text = b"abcxyz" # Act output = utils.unzfill(text) # Assert self.assertEqual(output, b"abcxyz") class UtilsGetDateTestCase(unittest.TestCase): """dbfpy.utils.getDate test cases.""" def test_getDate_none(self): # Arrange value = None # Act output = utils.getDate(value) # Assert self.assertIsInstance(output, datetime.date) def test_getDate_datetime_date(self): # Arrange value = datetime.date(2019, 10, 19) # Act output = utils.getDate(value) # Assert self.assertIsInstance(output, datetime.date) self.assertEqual(output, value) def test_getDate_datetime_datetime(self): # Arrange value = datetime.datetime(2019, 10, 19, 12, 00, 00) # Act output = utils.getDate(value) # Assert self.assertIsInstance(output, datetime.date) self.assertEqual(output, value) def test_getDate_datetime_timestamp(self): # Arrange value = 1571515306 # Act output = utils.getDate(value) # Assert self.assertIsInstance(output, datetime.date) self.assertEqual(output, datetime.date(2019, 10, 19)) def test_getDate_datetime_string_yyyy_mm_dd(self): # Arrange value = "20191019" # Act output = utils.getDate(value) # Assert self.assertIsInstance(output, datetime.date) self.assertEqual(output, datetime.date(2019, 10, 19)) def test_getDate_datetime_string_yymmdd(self): # Arrange value = "191019" # Act output = utils.getDate(value) # Assert self.assertIsInstance(output, datetime.date) self.assertEqual(output, datetime.date(2019, 10, 19)) class UtilsGetDateTimeTestCase(unittest.TestCase): """dbfpy.utils.getDateTime test cases.""" def test_getDateTime_none(self): # Arrange value = None # Act output = utils.getDateTime(value) # Assert self.assertIsInstance(output, datetime.datetime) def test_getDateTime_datetime_datetime(self): # Arrange value = datetime.datetime(2019, 10, 19, 12, 00, 00) # Act output = utils.getDateTime(value) # Assert self.assertIsInstance(output, datetime.date) self.assertEqual(output, value) def test_getDateTime_datetime_date(self): # Arrange value = datetime.date(2019, 10, 19) # Act output = utils.getDateTime(value) # Assert self.assertIsInstance(output, datetime.date) self.assertEqual(output, datetime.datetime(2019, 10, 19, 00, 00)) def test_getDateTime_datetime_timestamp(self): # Arrange value = 1571515306 # Act output = utils.getDateTime(value) # Assert self.assertIsInstance(output, datetime.datetime) def test_getDateTime_datetime_string(self): # Arrange value = "20191019" # Act / Assert with self.assertRaises(NotImplementedError): utils.getDateTime(value) class InvalidValueTestCase(unittest.TestCase): """dbfpy.utils._InvalidValue test cases.""" def test_sanity(self): # Arrange INVALID_VALUE = utils.INVALID_VALUE # Act / Assert self.assertEqual(INVALID_VALUE, INVALID_VALUE) self.assertNotEqual(INVALID_VALUE, 123) self.assertEqual(int(INVALID_VALUE), 0) self.assertEqual(float(INVALID_VALUE), 0.0) self.assertEqual(str(INVALID_VALUE), "") self.assertEqual(repr(INVALID_VALUE), "") tablib-3.5.0/tox.ini000066400000000000000000000010631444137657500143070ustar00rootroot00000000000000[tox] usedevelop = true minversion = 2.4 envlist = docs lint py{37,38,39,310,311,312} [testenv] deps = -rtests/requirements.txt commands_pre = - {envpython} -m pip install --only-binary :all: pandas passenv = FORCE_COLOR commands = pytest {posargs:tests} [testenv:docs] deps = sphinx commands = sphinx-build -b html -d {envtmpdir}/doctrees docs {envtmpdir}/html [testenv:lint] deps = pre-commit build twine commands = pre-commit run --all-files python -m build twine check dist/* skip_install = true