pax_global_header00006660000000000000000000000064150555576000014522gustar00rootroot0000000000000052 comment=15d12005bc830d65daa82c18d8d0636fb4583a81 scrapy-itemadapter-7cce401/000077500000000000000000000000001505555760000157305ustar00rootroot00000000000000scrapy-itemadapter-7cce401/.editorconfig000066400000000000000000000002651505555760000204100ustar00rootroot00000000000000[*] trim_trailing_whitespace = true insert_final_newline = true indent_style = space [Makefile] indent_style = tab [*.py] indent_size = 4 charset = utf-8 [*.yml] indent_size = 2 scrapy-itemadapter-7cce401/.git-blame-ignore-revs000066400000000000000000000001231505555760000220240ustar00rootroot00000000000000# applying pre-commit hooks to the project 106a4e0af9e9ac07defef3c9a781d2fe0ac4640fscrapy-itemadapter-7cce401/.github/000077500000000000000000000000001505555760000172705ustar00rootroot00000000000000scrapy-itemadapter-7cce401/.github/workflows/000077500000000000000000000000001505555760000213255ustar00rootroot00000000000000scrapy-itemadapter-7cce401/.github/workflows/checks.yml000066400000000000000000000016031505555760000233100ustar00rootroot00000000000000name: Checks on: [push, pull_request] jobs: checks: runs-on: ubuntu-latest strategy: fail-fast: false matrix: include: - python-version: 3.13 env: TOXENV: typing - python-version: 3.13 env: TOXENV: docs - python-version: 3.13 env: TOXENV: twinecheck - python-version: 3.13 env: TOXENV: pylint steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Run check env: ${{ matrix.env }} run: | pip install -U pip pip install -U tox tox pre-commit: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: pre-commit/action@v3.0.1 scrapy-itemadapter-7cce401/.github/workflows/publish.yml000066400000000000000000000010251505555760000235140ustar00rootroot00000000000000name: Publish on: push: tags: - 'v[0-9]+.[0-9]+.[0-9]+' jobs: publish: runs-on: ubuntu-latest environment: name: pypi url: https://pypi.org/p/${{ github.event.repository.name }} permissions: id-token: write steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: 3.13 - run: | python -m pip install --upgrade build python -m build - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 scrapy-itemadapter-7cce401/.github/workflows/tests.yml000066400000000000000000000044731505555760000232220ustar00rootroot00000000000000name: Tests on: [push, pull_request] jobs: tests-ubuntu: name: "Test: ${{ matrix.python-version }}, Ubuntu" runs-on: ubuntu-latest strategy: fail-fast: false matrix: include: - python-version: "3.9" env: TOXENV: min-attrs - python-version: "3.9" env: TOXENV: min-pydantic - python-version: "3.9" env: TOXENV: min-scrapy - python-version: "3.9" env: TOXENV: min-extra - python-version: "3.9" env: TOXENV: py - python-version: "3.10" env: TOXENV: py - python-version: "3.11" env: TOXENV: py - python-version: "3.12" env: TOXENV: py - python-version: "3.13" env: TOXENV: py - python-version: "3.13" env: TOXENV: attrs - python-version: "3.13" env: TOXENV: pydantic1 - python-version: "3.13" env: TOXENV: pydantic - python-version: "3.13" env: TOXENV: scrapy - python-version: "3.13" env: TOXENV: extra - python-version: "3.13" env: TOXENV: extra-pydantic1 - python-version: "pypy3.10" env: TOXENV: py - python-version: "pypy3.11" env: TOXENV: py steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install tox run: pip install tox - name: Run tests env: ${{ matrix.env }} run: tox - name: Upload coverage report uses: codecov/codecov-action@v5 tests-other-os: name: "Test: py39, ${{ matrix.os }}" runs-on: "${{ matrix.os }}" strategy: matrix: os: [macos-latest, windows-latest] steps: - uses: actions/checkout@v4 - name: Set up Python uses: actions/setup-python@v5 with: python-version: 3.9 - name: Install tox run: pip install tox - name: Run tests run: tox -e py - name: Upload coverage report uses: codecov/codecov-action@v5 scrapy-itemadapter-7cce401/.gitignore000066400000000000000000000001651505555760000177220ustar00rootroot00000000000000*.pyc .~lock* .DS_Store .mypy_cache/ *.egg-info/ .tox/ .coverage htmlcov/ coverage.xml /dist/ .venv/ .idea/ .vscode/ scrapy-itemadapter-7cce401/.pre-commit-config.yaml000066400000000000000000000003771505555760000222200ustar00rootroot00000000000000repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.12.1 hooks: - id: ruff-check args: [ --fix ] - id: ruff-format - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: trailing-whitespace scrapy-itemadapter-7cce401/Changelog.md000066400000000000000000000133331505555760000201440ustar00rootroot00000000000000# Changelog ### 0.12.2 (2025-09-02) The return value of `get_json_schema()` is now deterministic (deterministic order of dict keys and list items). ### 0.12.1 (2025-08-08) `get_json_schema()` now supports inherited field docstrings. ### 0.12.0 (2025-07-24) Added support for [PyPy](https://pypy.org/) 3.11 ([#97](https://github.com/scrapy/itemadapter/pull/97)). Added a new `get_json_schema()` class method to `ItemAdapter` and all built-in adapters to output a [JSON Schema](https://json-schema.org/) for a given item class ([#101](https://github.com/scrapy/itemadapter/pull/101)). Modernized the code base, now making full use of [pyproject.toml](https://packaging.python.org/en/latest/guides/writing-pyproject-toml/) and using [ruff](https://docs.astral.sh/ruff/) ([#96](https://github.com/scrapy/itemadapter/pull/96), [#98](https://github.com/scrapy/itemadapter/pull/98)). ### 0.11.0 (2025-01-29) Removed functions deprecated in 0.5.0: * `itemadapter.utils.is_attrs_instance()` * `itemadapter.utils.is_dataclass_instance()` * `itemadapter.utils.is_pydantic_instance()` * `itemadapter.utils.is_scrapy_item()` ([#93](https://github.com/scrapy/itemadapter/pull/93)). Added support for Pydantic 2 ([#91](https://github.com/scrapy/itemadapter/pull/91)). Added `__all__` to the top-level module to improve type checks ([#90](https://github.com/scrapy/itemadapter/pull/90)). Improved `pre-commit` and CI configuration ([#91](https://github.com/scrapy/itemadapter/pull/91), [#92](https://github.com/scrapy/itemadapter/pull/92)). ### 0.10.0 (2024-11-29) Dropped Python 3.8 support, added official Python 3.13 and PyPy 3.10 support ([#79](https://github.com/scrapy/itemadapter/pull/79), [#87](https://github.com/scrapy/itemadapter/pull/87)). Fixed the typing check when run with Scrapy 2.12.0+ ([#88](https://github.com/scrapy/itemadapter/pull/88)). Fixed `MANIFEST.in` that was missing some files ([#84](https://github.com/scrapy/itemadapter/pull/84)). Enabled `pre-commit` ([#85](https://github.com/scrapy/itemadapter/pull/85)). ### 0.9.0 (2024-05-07) Dropped Python 3.7 support, added official Python 3.12 support ([#75](https://github.com/scrapy/itemadapter/pull/75), [#77](https://github.com/scrapy/itemadapter/pull/77)). Updated the documentation and the type hint about `ItemAdapter.ADAPTER_CLASSES` to say that subclasses can use any iterable, not just `collections.deque` ([#74](https://github.com/scrapy/itemadapter/pull/74)). Documented that `Pydantic >= 2` is not supported yet ([#73](https://github.com/scrapy/itemadapter/pull/73)). Updated CI configuration ([#77](https://github.com/scrapy/itemadapter/pull/77), [#80](https://github.com/scrapy/itemadapter/pull/80)). ### 0.8.0 (2023-03-30) Dropped Python 3.6 support, and made Python 3.11 support official ([#65](https://github.com/scrapy/itemadapter/pull/65), [#66](https://github.com/scrapy/itemadapter/pull/66), [#69](https://github.com/scrapy/itemadapter/pull/69)). It is now possible to declare custom `ItemAdapter` subclasses with their own `ADAPTER_CLASSES` attribute, allowing to support different item types in different parts of the same code base ([#68](https://github.com/scrapy/itemadapter/pull/68)). Improved type hint support ([#67](https://github.com/scrapy/itemadapter/pull/67)). ### 0.7.0 (2022-08-02) ItemAdapter.get_field_names_from_class ([#64](https://github.com/scrapy/itemadapter/pull/64)) ### 0.6.0 (2022-05-12) Slight performance improvement ([#62](https://github.com/scrapy/itemadapter/pull/62)) ### 0.5.0 (2022-03-18) Improve performance by removing imports inside functions ([#60](https://github.com/scrapy/itemadapter/pull/60)) ### 0.4.0 (2021-08-26) Added `ItemAdapter.is_item_class` and `ItemAdapter.get_field_meta_from_class` ([#54](https://github.com/scrapy/itemadapter/pull/54)) ### 0.3.0 (2021-07-15) Added built-in support for `pydantic` models ([#53](https://github.com/scrapy/itemadapter/pull/53)) ### 0.2.0 (2020-11-06) Adapter interface: added the ability to support arbitrary types, by implementing a MutableMapping-based interface. By way of this change, now any type can be used as a Scrapy item. ### 0.1.1 (2020-09-28) Dropped support for Python 3.5 (#38). The new `get_field_meta_from_class` function offers the same functionality as `ItemAdapter.get_field_meta` but for an item class, as opposed to an item object (#34, #35). `ItemAdapter.__repr__` no longer raises exceptions caused by the underlying item (#31, #41). Minor improvement to the release process (#37), and cleanup of test warnings (#40). ### 0.1.0 (2020-06-10) Added `ItemAdapter.asdict`, which allows converting an item and all of its nested items into `dict` objects (#27, #29). Improved `ItemAdapter` performance by reducing time complexity for lookups and traversals for dataclass and attrs items (#28). ### 0.0.8 (2020-05-22) `ItemAdapter.field_names` now returns a `KeysView` instead of a `list`. Minor CI and test changes. ### 0.0.7 (2020-05-22) `ItemAdapter.get_field_meta` now returns an empty `MappingProxyType` object for items without metadata support, instead of raising `TypeError`. Improved the README and some docstrings. Provided full test coverage, and refactored CI configuration, test configuration and tests themselves. ### 0.0.6 (2020-05-09) Added support for Scrapy’s `BaseItem`. Refactored and extended tests. Code style and documentation fixes. ### 0.0.5 (2020-04-28) Removed support for `MutableMapping`. ### 0.0.4 (2020-04-28) Removed metadata support for arbitrary mutable mappings. ### 0.0.3 (2020-04-27) Rebuild for the Python Package Index. ### 0.0.2 (2020-04-27) Split the implementation into several files for better code organization, and without an impact on the existing API import paths. Also improved the README. ### 0.0.1 (2020-04-25) Initial release. scrapy-itemadapter-7cce401/LICENSE000066400000000000000000000026651505555760000167460ustar00rootroot00000000000000Copyright 2020 Eugenio Lacuesta Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. scrapy-itemadapter-7cce401/README.md000066400000000000000000000511731505555760000172160ustar00rootroot00000000000000# itemadapter [![version](https://img.shields.io/pypi/v/itemadapter.svg)](https://pypi.python.org/pypi/itemadapter) [![pyversions](https://img.shields.io/pypi/pyversions/itemadapter.svg)](https://pypi.python.org/pypi/itemadapter) [![actions](https://github.com/scrapy/itemadapter/workflows/Tests/badge.svg)](https://github.com/scrapy/itemadapter/actions) [![codecov](https://codecov.io/gh/scrapy/itemadapter/branch/master/graph/badge.svg)](https://codecov.io/gh/scrapy/itemadapter) The `ItemAdapter` class is a wrapper for data container objects, providing a common interface to handle objects of different types in an uniform manner, regardless of their underlying implementation. Currently supported types are: * [`scrapy.item.Item`](https://docs.scrapy.org/en/latest/topics/items.html#scrapy.item.Item) * [`dict`](https://docs.python.org/3/library/stdtypes.html#dict) * [`dataclass`](https://docs.python.org/3/library/dataclasses.html)-based classes * [`attrs`](https://www.attrs.org)-based classes * [`pydantic`](https://pydantic-docs.helpmanual.io/)-based classes Additionally, interaction with arbitrary types is supported, by implementing a pre-defined interface (see [extending `itemadapter`](#extending-itemadapter)). --- ## Requirements * Python 3.9+, either the CPython implementation (default) or the PyPy implementation * [`scrapy`](https://scrapy.org/) 2.2+: optional, needed to interact with `scrapy` items * [`attrs`](https://pypi.org/project/attrs/) 20.1.0+: optional, needed to interact with `attrs`-based items * [`pydantic`](https://pypi.org/project/pydantic/) 1.8+: optional, needed to interact with `pydantic`-based items --- ## Installation `itemadapter` is available on [`PyPI`](https://pypi.python.org/pypi/itemadapter), it can be installed with `pip`: ``` pip install itemadapter ``` For `attrs`, `pydantic` and `scrapy` support, install the corresponding extra to ensure that a supported version of the corresponding dependencies is installed. For example: ``` pip install itemadapter[scrapy] ``` Mind that you can install multiple extras as needed. For example: ``` pip install itemadapter[attrs,pydantic,scrapy] ``` --- ## License `itemadapter` is distributed under a [BSD-3](https://opensource.org/licenses/BSD-3-Clause) license. --- ## Basic usage The following is a simple example using a `dataclass` object. Consider the following type definition: ```python >>> from dataclasses import dataclass >>> from itemadapter import ItemAdapter >>> @dataclass ... class InventoryItem: ... name: str ... price: float ... stock: int >>> ``` An `ItemAdapter` object can be treated much like a dictionary: ```python >>> obj = InventoryItem(name='foo', price=20.5, stock=10) >>> ItemAdapter.is_item(obj) True >>> adapter = ItemAdapter(obj) >>> len(adapter) 3 >>> adapter["name"] 'foo' >>> adapter.get("price") 20.5 >>> ``` The wrapped object is modified in-place: ```python >>> adapter["name"] = "bar" >>> adapter.update({"price": 12.7, "stock": 9}) >>> adapter.item InventoryItem(name='bar', price=12.7, stock=9) >>> adapter.item is obj True >>> ``` ### Converting to dict The `ItemAdapter` class provides the `asdict` method, which converts nested items recursively. Consider the following example: ```python >>> from dataclasses import dataclass >>> from itemadapter import ItemAdapter >>> @dataclass ... class Price: ... value: int ... currency: str >>> @dataclass ... class Product: ... name: str ... price: Price >>> ``` ```python >>> item = Product("Stuff", Price(42, "UYU")) >>> adapter = ItemAdapter(item) >>> adapter.asdict() {'name': 'Stuff', 'price': {'value': 42, 'currency': 'UYU'}} >>> ``` Note that just passing an adapter object to the `dict` built-in also works, but it doesn't traverse the object recursively converting nested items: ```python >>> dict(adapter) {'name': 'Stuff', 'price': Price(value=42, currency='UYU')} >>> ``` --- ## API reference ### Built-in adapters The following adapters are included by default: * `itemadapter.adapter.ScrapyItemAdapter`: handles `Scrapy` items * `itemadapter.adapter.DictAdapter`: handles `Python` dictionaries * `itemadapter.adapter.DataclassAdapter`: handles `dataclass` objects * `itemadapter.adapter.AttrsAdapter`: handles `attrs` objects * `itemadapter.adapter.PydanticAdapter`: handles `pydantic` objects ### class `itemadapter.adapter.ItemAdapter(item: Any)` This is the main entrypoint for the package. Tipically, user code wraps an item using this class, and proceeds to handle it with the provided interface. `ItemAdapter` implements the [`MutableMapping`](https://docs.python.org/3/library/collections.abc.html#collections.abc.MutableMapping) interface, providing a `dict`-like API to manipulate data for the object it wraps (which is modified in-place). **Attributes** #### class attribute `ADAPTER_CLASSES: Iterable` Stores the currently registered adapter classes. The order in which the adapters are registered is important. When an `ItemAdapter` object is created for a specific item, the registered adapters are traversed in order and the first adapter class to return `True` for the `is_item` class method is used for all subsequent operations. The default order is the one defined in the [built-in adapters](#built-in-adapters) section. The default implementation uses a [`collections.deque`](https://docs.python.org/3/library/collections.html#collections.deque) to support efficient addition/deletion of adapters classes to both ends, but if you are deriving a subclass (see the section on [extending itemadapter](#extending-itemadapter) for additional information), any other iterable (e.g. `list`, `tuple`) will work. **Methods** #### class method `is_item(item: Any) -> bool` Return `True` if any of the registed adapters can handle the item (i.e. if any of them returns `True` for its `is_item` method with `item` as argument), `False` otherwise. #### class method `is_item_class(item_class: type) -> bool` Return `True` if any of the registered adapters can handle the item class (i.e. if any of them returns `True` for its `is_item_class` method with `item_class` as argument), `False` otherwise. #### class method `get_field_meta_from_class(item_class: type, field_name: str) -> MappingProxyType` Return a [`types.MappingProxyType`](https://docs.python.org/3/library/types.html#types.MappingProxyType) object, which is a read-only mapping with metadata about the given field. If the item class does not support field metadata, or there is no metadata for the given field, an empty object is returned. The returned value is taken from the following sources, depending on the item type: * [`scrapy.item.Field`](https://docs.scrapy.org/en/latest/topics/items.html#item-fields) for `scrapy.item.Item`s * [`dataclasses.field.metadata`](https://docs.python.org/3/library/dataclasses.html#dataclasses.field) for `dataclass`-based items * [`attr.Attribute.metadata`](https://www.attrs.org/en/stable/examples.html#metadata) for `attrs`-based items * [`pydantic.fields.FieldInfo`](https://pydantic-docs.helpmanual.io/usage/schema/#field-customisation) for `pydantic`-based items #### class method `get_field_names_from_class(item_class: type) -> Optional[list[str]]` Return a list with the names of all the fields defined for the item class. If an item class doesn't support defining fields upfront, None is returned. #### class method `get_json_schema(item_class: type) -> dict[str, Any]` Return a dict with a [JSON Schema](https://json-schema.org/) representation of the item class. The generated JSON Schema reflects field type hints, attribute docstrings and class and field metadata of any supported item class. It also supports using item classes in field types of other item classes. For example, given: ```python from dataclasses import dataclass import attrs @dataclass class Brand: name: str @attrs.define class Product: name: str """Product name""" brand: Brand | None in_stock: bool = True ``` `ItemAdapter.get_json_schema(Product)` returns: ```python { "type": "object", "additionalProperties": False, "properties": { "name": {"type": "string", "description": "Product name"}, "brand": { "anyOf": [ {"type": "null"}, { "type": "object", "additionalProperties": False, "properties": {"name": {"type": "string"}}, "required": ["name"], }, ] }, "in_stock": {"default": True, "type": "boolean"}, }, "required": ["name", "brand"], } ``` You can also extend or override JSON Schema data at the item class or field level: - Set `json_schema_extra` in field metadata to extend or override the JSON Schema data for that field. For example: ```python >>> from scrapy.item import Item, Field >>> from itemadapter import ItemAdapter >>> class MyItem(Item): ... name: str = Field(json_schema_extra={"minLength": 1}) ... >>> ItemAdapter.get_json_schema(MyItem) {'type': 'object', 'additionalProperties': False, 'properties': {'name': {'minLength': 1, 'type': 'string'}}, 'required': ['name']} ``` - Define a `__json_schema_extra__` class attribute dict to extend or override JSON Schema data for the entire class. For example: ```python >>> from dataclasses import dataclass >>> from itemadapter import ItemAdapter >>> @dataclass ... class MyItem: ... __json_schema_extra__ = {"additionalProperties": True} ... name: str ... >>> ItemAdapter.get_json_schema(MyItem) {'additionalProperties': True, 'type': 'object', 'properties': {'name': {'type': 'string'}}, 'required': ['name']} ``` Note that, for Pydantic items, itemadapter does not use [`model_json_schema()`](https://docs.pydantic.dev/latest/api/base_model/#pydantic.BaseModel.model_json_schema) and instead uses its own implementation. That way, the output JSON Schema is consistent across different item types. It also makes it possible to generate JSON Schemas for Pydantic models that have nested non-Pydantic item classes as fields. The downside is that JSON Schema support in itemadapter may not be as advanced as Pydantic‘s. The following are some known limitations of JSON Schema generation in itemadapter: - Attribute docstrings are read with [`inspect.getsource()`](https://docs.python.org/3/library/inspect.html#inspect.getsource), and may not be readable at run time in some cases. For such cases, define `description` within `json_schema_extra` instead (see below). - String pattern contraints are silently ignored if they are not compatible with JSON Schema. No effort is made to make them compatible. - Recursion is silently ignored: if you have an item class that has an attribute with that same item class as a type or as part of its type, a simple `{"type": "object"}` is used to map the nested instances of that item class. #### `get_field_meta(field_name: str) -> MappingProxyType` Return metadata for the given field, if available. Unless overriden in a custom adapter class, by default this method calls the adapter's `get_field_meta_from_class` method, passing the wrapped item's class. #### `field_names() -> collections.abc.KeysView` Return a [keys view](https://docs.python.org/3/library/collections.abc.html#collections.abc.KeysView) with the names of all the defined fields for the item. #### `asdict() -> dict` Return a `dict` object with the contents of the adapter. This works slightly different than calling `dict(adapter)`, because it's applied recursively to nested items (if there are any). ### function `itemadapter.utils.is_item(obj: Any) -> bool` Return `True` if the given object belongs to (at least) one of the supported types, `False` otherwise. This is an alias, using the `itemadapter.adapter.ItemAdapter.is_item` class method is encouraged for better performance. ### function `itemadapter.utils.get_field_meta_from_class(item_class: type, field_name: str) -> types.MappingProxyType` Alias for `itemadapter.adapter.ItemAdapter.get_field_meta_from_class` --- ## Metadata support `scrapy.item.Item`, `dataclass`, `attrs`, and `pydantic` objects allow the definition of arbitrary field metadata. This can be accessed through a [`MappingProxyType`](https://docs.python.org/3/library/types.html#types.MappingProxyType) object, which can be retrieved from an item instance with `itemadapter.adapter.ItemAdapter.get_field_meta`, or from an item class with the `itemadapter.adapter.ItemAdapter.get_field_meta_from_class` method (or its alias `itemadapter.utils.get_field_meta_from_class`). The source of the data depends on the underlying type (see the docs for `ItemAdapter.get_field_meta_from_class`). #### `scrapy.item.Item` objects ```python >>> from scrapy.item import Item, Field >>> from itemadapter import ItemAdapter >>> class InventoryItem(Item): ... name = Field(serializer=str) ... value = Field(serializer=int, limit=100) ... >>> adapter = ItemAdapter(InventoryItem(name="foo", value=10)) >>> adapter.get_field_meta("name") mappingproxy({'serializer': }) >>> adapter.get_field_meta("value") mappingproxy({'serializer': , 'limit': 100}) >>> ``` #### `dataclass` objects ```python >>> from dataclasses import dataclass, field >>> @dataclass ... class InventoryItem: ... name: str = field(metadata={"serializer": str}) ... value: int = field(metadata={"serializer": int, "limit": 100}) ... >>> adapter = ItemAdapter(InventoryItem(name="foo", value=10)) >>> adapter.get_field_meta("name") mappingproxy({'serializer': }) >>> adapter.get_field_meta("value") mappingproxy({'serializer': , 'limit': 100}) >>> ``` #### `attrs` objects ```python >>> import attr >>> @attr.s ... class InventoryItem: ... name = attr.ib(metadata={"serializer": str}) ... value = attr.ib(metadata={"serializer": int, "limit": 100}) ... >>> adapter = ItemAdapter(InventoryItem(name="foo", value=10)) >>> adapter.get_field_meta("name") mappingproxy({'serializer': }) >>> adapter.get_field_meta("value") mappingproxy({'serializer': , 'limit': 100}) >>> ``` #### `pydantic` objects ```python >>> from pydantic import BaseModel, Field >>> class InventoryItem(BaseModel): ... name: str = Field(json_schema_extra={"serializer": str}) ... value: int = Field(json_schema_extra={"serializer": int, "limit": 100}) ... >>> adapter = ItemAdapter(InventoryItem(name="foo", value=10)) >>> adapter.get_field_meta("name") mappingproxy({'annotation': , 'json_schema_extra': {'serializer': }, 'repr': True}) >>> adapter.get_field_meta("value") mappingproxy({'annotation': , 'json_schema_extra': {'serializer': , 'limit': 100}, 'repr': True}) >>> ``` --- ## Extending `itemadapter` This package allows to handle arbitrary item classes, by implementing an adapter interface: _class `itemadapter.adapter.AdapterInterface(item: Any)`_ Abstract Base Class for adapters. An adapter that handles a specific type of item must inherit from this class and implement the abstract methods defined on it. `AdapterInterface` inherits from [`collections.abc.MutableMapping`](https://docs.python.org/3/library/collections.abc.html#collections.abc.MutableMapping), so all methods from the `MutableMapping` interface must be implemented as well. * _class method `is_item_class(cls, item_class: type) -> bool`_ Return `True` if the adapter can handle the given item class, `False` otherwise. Abstract (mandatory). * _class method `is_item(cls, item: Any) -> bool`_ Return `True` if the adapter can handle the given item, `False` otherwise. The default implementation calls `cls.is_item_class(item.__class__)`. * _class method `get_field_meta_from_class(cls, item_class: type) -> bool`_ Return metadata for the given item class and field name, if available. By default, this method returns an empty `MappingProxyType` object. Please supply your own method definition if you want to handle field metadata based on custom logic. See the [section on metadata support](#metadata-support) for additional information. * _method `get_field_meta(self, field_name: str) -> types.MappingProxyType`_ Return metadata for the given field name, if available. It's usually not necessary to override this method, since the `itemadapter.adapter.AdapterInterface` base class provides a default implementation that calls `ItemAdapter.get_field_meta_from_class` with the wrapped item's class as argument. See the [section on metadata support](#metadata-support) for additional information. * _method `field_names(self) -> collections.abc.KeysView`_: Return a [dynamic view](https://docs.python.org/3/library/collections.abc.html#collections.abc.KeysView) of the item's field names. By default, this method returns the result of calling `keys()` on the current adapter, i.e., its return value depends on the implementation of the methods from the `MutableMapping` interface (more specifically, it depends on the return value of `__iter__`). You might want to override this method if you want a way to get all fields for an item, whether or not they are populated. For instance, Scrapy uses this method to define column names when exporting items to CSV. ### Registering an adapter Add your custom adapter class to the `itemadapter.adapter.ItemAdapter.ADAPTER_CLASSES` class attribute in order to handle custom item classes. **Example** ``` pip install zyte-common-items ``` ```python >>> from itemadapter.adapter import ItemAdapter >>> from zyte_common_items import Item, ZyteItemAdapter >>> >>> ItemAdapter.ADAPTER_CLASSES.appendleft(ZyteItemAdapter) >>> item = Item() >>> adapter = ItemAdapter(item) >>> adapter >>> ``` ### Multiple adapter classes If you need to have different handlers and/or priorities for different cases you can subclass the `ItemAdapter` class and set the `ADAPTER_CLASSES` attribute as needed: **Example** ```python >>> from itemadapter.adapter import ( ... ItemAdapter, ... AttrsAdapter, ... DataclassAdapter, ... DictAdapter, ... PydanticAdapter, ... ScrapyItemAdapter, ... ) >>> from scrapy.item import Item, Field >>> >>> class BuiltinTypesItemAdapter(ItemAdapter): ... ADAPTER_CLASSES = [DictAdapter, DataclassAdapter] ... >>> class ThirdPartyTypesItemAdapter(ItemAdapter): ... ADAPTER_CLASSES = [AttrsAdapter, PydanticAdapter, ScrapyItemAdapter] ... >>> class ScrapyItem(Item): ... foo = Field() ... >>> BuiltinTypesItemAdapter.is_item(dict()) True >>> ThirdPartyTypesItemAdapter.is_item(dict()) False >>> BuiltinTypesItemAdapter.is_item(ScrapyItem(foo="bar")) False >>> ThirdPartyTypesItemAdapter.is_item(ScrapyItem(foo="bar")) True >>> ``` --- ## More examples ### `scrapy.item.Item` objects ```python >>> from scrapy.item import Item, Field >>> from itemadapter import ItemAdapter >>> class InventoryItem(Item): ... name = Field() ... price = Field() ... >>> item = InventoryItem(name="foo", price=10) >>> adapter = ItemAdapter(item) >>> adapter.item is item True >>> adapter["name"] 'foo' >>> adapter["name"] = "bar" >>> adapter["price"] = 5 >>> item {'name': 'bar', 'price': 5} >>> ``` ### `dict` ```python >>> from itemadapter import ItemAdapter >>> item = dict(name="foo", price=10) >>> adapter = ItemAdapter(item) >>> adapter.item is item True >>> adapter["name"] 'foo' >>> adapter["name"] = "bar" >>> adapter["price"] = 5 >>> item {'name': 'bar', 'price': 5} >>> ``` ### `dataclass` objects ```python >>> from dataclasses import dataclass >>> from itemadapter import ItemAdapter >>> @dataclass ... class InventoryItem: ... name: str ... price: int ... >>> item = InventoryItem(name="foo", price=10) >>> adapter = ItemAdapter(item) >>> adapter.item is item True >>> adapter["name"] 'foo' >>> adapter["name"] = "bar" >>> adapter["price"] = 5 >>> item InventoryItem(name='bar', price=5) >>> ``` ### `attrs` objects ```python >>> import attr >>> from itemadapter import ItemAdapter >>> @attr.s ... class InventoryItem: ... name = attr.ib() ... price = attr.ib() ... >>> item = InventoryItem(name="foo", price=10) >>> adapter = ItemAdapter(item) >>> adapter.item is item True >>> adapter["name"] 'foo' >>> adapter["name"] = "bar" >>> adapter["price"] = 5 >>> item InventoryItem(name='bar', price=5) >>> ``` ### `pydantic` objects ```python >>> from pydantic import BaseModel >>> from itemadapter import ItemAdapter >>> class InventoryItem(BaseModel): ... name: str ... price: int ... >>> item = InventoryItem(name="foo", price=10) >>> adapter = ItemAdapter(item) >>> adapter.item is item True >>> adapter["name"] 'foo' >>> adapter["name"] = "bar" >>> adapter["price"] = 5 >>> item InventoryItem(name='bar', price=5) >>> ``` ## Changelog See the [full changelog](Changelog.md) scrapy-itemadapter-7cce401/itemadapter/000077500000000000000000000000001505555760000202275ustar00rootroot00000000000000scrapy-itemadapter-7cce401/itemadapter/__init__.py000066400000000000000000000003011505555760000223320ustar00rootroot00000000000000from .adapter import ItemAdapter from .utils import get_field_meta_from_class, is_item __version__ = "0.12.2" __all__ = [ "ItemAdapter", "get_field_meta_from_class", "is_item", ] scrapy-itemadapter-7cce401/itemadapter/_imports.py000066400000000000000000000032101505555760000224310ustar00rootroot00000000000000from __future__ import annotations from typing import Any # attempt the following imports only once, # to be imported from itemadapter's submodules _scrapy_item_classes: tuple scrapy: Any try: import scrapy except ImportError: _scrapy_item_classes = () scrapy = None else: try: # handle deprecated base classes _base_item_cls = getattr( scrapy.item, "_BaseItem", scrapy.item.BaseItem, ) except AttributeError: _scrapy_item_classes = (scrapy.item.Item,) else: _scrapy_item_classes = (scrapy.item.Item, _base_item_cls) attr: Any try: import attr except ImportError: attr = None pydantic_v1: Any = None pydantic: Any = None try: import pydantic except ImportError: # No pydantic pass else: try: import pydantic.v1 as pydantic_v1 except ImportError: # Pydantic <1.10.17 pydantic_v1 = pydantic pydantic = None else: # Pydantic 1.10.17+ if not hasattr(pydantic.BaseModel, "model_fields"): # Pydantic <2 pydantic_v1 = pydantic pydantic = None try: from pydantic.v1.fields import Undefined as PydanticV1Undefined from pydantic_core import PydanticUndefined except ImportError: # < Pydantic 2.0 try: from pydantic.fields import ( # type: ignore[attr-defined,no-redef] Undefined as PydanticUndefined, ) from pydantic.fields import ( # type: ignore[attr-defined,no-redef] Undefined as PydanticV1Undefined, ) except ImportError: PydanticUndefined = PydanticV1Undefined = None # type: ignore[assignment] scrapy-itemadapter-7cce401/itemadapter/_json_schema.py000066400000000000000000000533061505555760000232400ustar00rootroot00000000000000from __future__ import annotations import ast import dataclasses import inspect import operator from collections.abc import Iterator, Mapping, Sequence from collections.abc import Set as AbstractSet from copy import copy from enum import Enum from textwrap import dedent from typing import ( TYPE_CHECKING, Any, Protocol, Union, get_args, get_origin, get_type_hints, runtime_checkable, ) from ._imports import PydanticUndefined, PydanticV1Undefined, attr from .utils import _is_pydantic_model if TYPE_CHECKING: from types import MappingProxyType from .adapter import AdapterInterface, ItemAdapter SIMPLE_TYPES = { type(None): "null", bool: "boolean", int: "integer", float: "number", str: "string", } @dataclasses.dataclass class _JsonSchemaState: adapter: type[ItemAdapter | AdapterInterface] """ItemAdapter class or AdapterInterface implementation used on the initial get_json_schema() call. On types for which adapter.is_item_class() returns True, adapter.get_json_schema() is used to get the corresponding, nested JSON Schema. """ containers: set[type] = dataclasses.field(default_factory=set) """Used to keep track of item classes that are being processed, to avoid recursion.""" def dedupe_types(types: Sequence[type]) -> list[type]: seen = set() result = [] for t in types: key = float if t in (int, float) else t if key not in seen: seen.add(key) result.append(t) return result def update_prop_from_union(prop: dict[str, Any], prop_type: Any, state: _JsonSchemaState) -> None: prop_types = dedupe_types(get_args(prop_type)) simple_types = [v for k, v in SIMPLE_TYPES.items() if k in prop_types] complex_types = sorted([t for t in prop_types if t not in SIMPLE_TYPES]) # type: ignore[type-var] if not complex_types: prop.setdefault("type", simple_types) return new_any_of: list[dict[str, Any]] = [] any_of = prop.setdefault("anyOf", new_any_of) if any_of is not new_any_of: return any_of.append({"type": simple_types if len(simple_types) > 1 else simple_types[0]}) for complex_type in complex_types: complex_prop: dict[str, Any] = {} update_prop_from_type(complex_prop, complex_type, state) any_of.append(complex_prop) @runtime_checkable class ArrayProtocol(Protocol): def __iter__(self) -> Iterator[Any]: ... def __len__(self) -> int: ... def __contains__(self, item: Any) -> bool: ... @runtime_checkable class ObjectProtocol(Protocol): # noqa: PLW1641 def __getitem__(self, key: str) -> Any: ... def __iter__(self) -> Iterator[str]: ... def __len__(self) -> int: ... def __contains__(self, key: str) -> bool: ... def keys(self): ... def items(self): ... def values(self): ... def get(self, key: str, default: Any = ...): ... def __eq__(self, other): ... def __ne__(self, other): ... INVALID_PATTERN_SUBSTRINGS = [ "(?P<", # named groups "(?<=", # lookbehind "(?", # atomic group "\\A", # start of string "\\Z", # end of string "(?i)", # inline flags (case-insensitive, etc.) "(?m)", # multiline "(?s)", # dotall "(?x)", # verbose "(?#", # comments ] def is_valid_pattern(pattern: str) -> bool: # https://ecma-international.org/publications-and-standards/standards/ecma-262/ # # Note: We allow word boundaries (\b, \B) in patterns even thought there is # a difference in behavior: in Python, they work with Unicode; in JSON # Schema, they only work with ASCII. return not any(sub in pattern for sub in INVALID_PATTERN_SUBSTRINGS) def array_type(type_hint): """Given the type hint of a Python type that maps to a JSON Schema array, such as a list, a tuple or a set, return the type of the items in that array.""" args = get_args(type_hint) if not args: return Any if args[-1] is Ellipsis: args = args[:-1] unique_args = set(args) if len(unique_args) == 1: return next(iter(unique_args)) return Union[tuple(unique_args)] def update_prop_from_pattern(prop: dict[str, Any], pattern: str) -> None: if is_valid_pattern(pattern): prop.setdefault("pattern", pattern) try: from types import UnionType except ImportError: # Python < 3.10 UNION_TYPES: set[Any] = {Union} else: UNION_TYPES = {Union, UnionType} def update_prop_from_origin( prop: dict[str, Any], origin: Any, prop_type: Any, state: _JsonSchemaState ) -> None: if isinstance(origin, type): if issubclass(origin, (Sequence, AbstractSet)): prop.setdefault("type", "array") if issubclass(origin, AbstractSet): prop.setdefault("uniqueItems", True) had_items = "items" in prop items = prop.setdefault("items", {}) item_type = array_type(prop_type) update_prop_from_type(items, item_type, state) if not items and not had_items: del prop["items"] return if issubclass(origin, Mapping): prop.setdefault("type", "object") args = get_args(prop_type) if args: assert len(args) == 2 value_type = args[1] props = prop.setdefault("additionalProperties", {}) update_prop_from_type(props, value_type, state) return if origin in UNION_TYPES: update_prop_from_union(prop, prop_type, state) def update_prop_from_type(prop: dict[str, Any], prop_type: Any, state: _JsonSchemaState) -> None: if (origin := get_origin(prop_type)) is not None: update_prop_from_origin(prop, origin, prop_type, state) return if isinstance(prop_type, type): if state.adapter.is_item_class(prop_type): if prop_type in state.containers: prop.setdefault("type", "object") return state.containers.add(prop_type) subschema = state.adapter.get_json_schema( prop_type, _state=state, ) state.containers.remove(prop_type) for k, v in subschema.items(): prop.setdefault(k, v) return if issubclass(prop_type, Enum): values = [item.value for item in prop_type] value_types = tuple({type(v) for v in values}) prop_type = value_types[0] if len(value_types) == 1 else Union[value_types] update_prop_from_type(prop, prop_type, state) prop.setdefault("enum", values) return if not issubclass(prop_type, str): if isinstance(prop_type, ObjectProtocol): prop.setdefault("type", "object") return if isinstance(prop_type, ArrayProtocol): prop.setdefault("type", "array") if issubclass(prop_type, AbstractSet): prop.setdefault("uniqueItems", True) return json_schema_type = SIMPLE_TYPES.get(prop_type) if json_schema_type is not None: prop.setdefault("type", json_schema_type) def _setdefault_attribute_types_on_json_schema( schema: dict[str, Any], item_class: type, state: _JsonSchemaState ) -> None: """Inspect the type hints of the class attributes of the item class and, for any matching JSON Schema property that has no type set, set the type based on the type hint.""" props = schema.get("properties", {}) attribute_type_hints = get_type_hints(item_class) for prop_name, prop in props.items(): if prop_name not in attribute_type_hints: continue prop_type = attribute_type_hints[prop_name] update_prop_from_type(prop, prop_type, state) def iter_docstrings(item_class: type, attr_names: AbstractSet[str]) -> Iterator[tuple[str, str]]: try: source = inspect.getsource(item_class) except (OSError, TypeError): return tree = ast.parse(dedent(source)) try: class_node = tree.body[0] except IndexError: # pragma: no cover # This can be reproduced with the doctests of the README, but the # coverage data does not seem to include those. return assert isinstance(class_node, ast.ClassDef) for node in ast.iter_child_nodes(class_node): if isinstance(node, ast.Assign) and isinstance(node.targets[0], ast.Name): attr_name = node.targets[0].id elif isinstance(node, ast.AnnAssign) and isinstance(node.target, ast.Name): attr_name = node.target.id else: continue if attr_name not in attr_names: continue next_idx = class_node.body.index(node) + 1 if next_idx >= len(class_node.body): continue next_node = class_node.body[next_idx] if ( isinstance(next_node, ast.Expr) and isinstance(next_node.value, ast.Constant) and isinstance(next_node.value.value, str) ): yield attr_name, next_node.value.value def get_inherited_attr_docstring(item_class: type, attr_name: str) -> str | None: """Recursively search the MRO for a docstring for the given attribute name.""" for cls in item_class.__mro__: for name, doc in iter_docstrings(cls, {attr_name}): if name == attr_name: return doc return None def _setdefault_attribute_docstrings_on_json_schema( schema: dict[str, Any], item_class: type ) -> None: """Inspect the docstrings after each class attribute of the item class and its bases and, for any matching JSON Schema property that has no description set, set the description to the contents of the docstring.""" props = schema.get("properties", {}) attr_names = set(props) if not attr_names: return for attr_name in attr_names: prop = props.setdefault(attr_name, {}) if "description" not in prop: doc = get_inherited_attr_docstring(item_class, attr_name) if doc: prop["description"] = doc def base_json_schema_from_item_class(item_class: type) -> dict[str, Any]: json_schema_extra = getattr(item_class, "__json_schema_extra__", {}) schema = copy(json_schema_extra) schema.setdefault("type", "object") schema.setdefault("additionalProperties", False) return schema def _json_schema_from_item_class( adapter: type[AdapterInterface], item_class: type, state: _JsonSchemaState | None = None ) -> dict[str, Any]: state = state or _JsonSchemaState(adapter=adapter, containers={item_class}) schema = base_json_schema_from_item_class(item_class) fields_meta = { field_name: adapter.get_field_meta_from_class(item_class, field_name) for field_name in adapter.get_field_names_from_class(item_class) or () } if not fields_meta: return schema schema["properties"] = { field_name: copy(field_meta.get("json_schema_extra", {})) for field_name, field_meta in fields_meta.items() } required = [ field_name for field_name, field_data in schema["properties"].items() if "default" not in field_data ] if required: schema.setdefault("required", required) return schema def update_required_fields( schema: dict[str, Any], optional_fields: set[str] | None = None ) -> None: optional_fields = optional_fields or set() if "required" in schema: return required = [ field for field, metadata in schema["properties"].items() if field not in optional_fields and "default" not in metadata ] if required: schema["required"] = required def _json_schema_from_attrs(item_class: type, state: _JsonSchemaState) -> dict[str, Any]: schema = base_json_schema_from_item_class(item_class) fields = attr.fields(item_class) if not fields: return schema from attr import resolve_types resolve_types(item_class) # Ensure field.type annotations are resolved schema["properties"] = { field.name: copy(field.metadata.get("json_schema_extra", {})) for field in fields } default_factory_fields: set[str] = set() for field in fields: prop = schema["properties"][field.name] _update_attrs_prop(prop, field, state, default_factory_fields) update_required_fields(schema, default_factory_fields) _setdefault_attribute_docstrings_on_json_schema(schema, item_class) return schema def _update_attrs_prop( prop: dict[str, Any], field: attr.Attribute, state: _JsonSchemaState, default_factory_fields: set[str], ) -> None: update_prop_from_type(prop, field.type, state) if isinstance(field.default, attr.Factory): default_factory_fields.add(field.name) elif field.default is not attr.NOTHING: prop.setdefault("default", field.default) _update_attrs_prop_validation(prop, field) ATTRS_NUMBER_VALIDATORS = { operator.ge: "minimum", operator.gt: "exclusiveMinimum", operator.le: "maximum", operator.lt: "exclusiveMaximum", } def _update_attrs_prop_validation( prop: dict[str, Any], field: attr.Attribute, ) -> None: if not field.validator: return if type(field.validator).__name__ == "_AndValidator": validators = field.validator._validators else: validators = [field.validator] for validator in validators: validator_type_name = type(validator).__name__ if validator_type_name == "_NumberValidator": key = ATTRS_NUMBER_VALIDATORS.get(validator.compare_func) if not key: # pragma: no cover continue prop.setdefault(key, validator.bound) elif validator_type_name == "_InValidator": prop.setdefault("enum", list(validator.options)) elif validator_type_name == "_MinLengthValidator": key = "minLength" if field.type is str else "minItems" prop.setdefault(key, validator.min_length) elif validator_type_name == "_MaxLengthValidator": key = "maxLength" if field.type is str else "maxItems" prop.setdefault(key, validator.max_length) elif validator_type_name == "_MatchesReValidator": pattern_obj = getattr(validator, "pattern", None) or validator.regex update_prop_from_pattern(prop, pattern_obj.pattern) def _json_schema_from_dataclass(item_class: type, state: _JsonSchemaState) -> dict[str, Any]: schema = base_json_schema_from_item_class(item_class) fields = dataclasses.fields(item_class) resolved_field_types = get_type_hints(item_class) default_factory_fields = set() if fields: schema["properties"] = { field.name: copy(field.metadata.get("json_schema_extra", {})) for field in fields } for field in fields: prop = schema["properties"][field.name] field_type = resolved_field_types.get(field.name) if field_type is not None: update_prop_from_type(prop, field_type, state) if field.default_factory is not dataclasses.MISSING: default_factory_fields.add(field.name) elif field.default is not dataclasses.MISSING: prop.setdefault("default", field.default) update_required_fields(schema, default_factory_fields) _setdefault_attribute_docstrings_on_json_schema(schema, item_class) return schema def _json_schema_from_pydantic( adapter: type[AdapterInterface], item_class: type, state: _JsonSchemaState | None = None ) -> dict[str, Any]: state = state or _JsonSchemaState(adapter=adapter, containers={item_class}) if not _is_pydantic_model(item_class): return _json_schema_from_pydantic_v1(adapter, item_class, state) schema = copy( item_class.model_config.get("json_schema_extra", {}) # type: ignore[attr-defined] ) extra = item_class.model_config.get("extra") # type: ignore[attr-defined] schema.setdefault("type", "object") if extra == "forbid": schema.setdefault("additionalProperties", False) fields = { name: adapter.get_field_meta_from_class(item_class, name) for name in adapter.get_field_names_from_class(item_class) or () } if not fields: return schema schema["properties"] = { name: copy(metadata.get("json_schema_extra", {})) for name, metadata in fields.items() } default_factory_fields: set[str] = set() for name, metadata in fields.items(): prop = schema["properties"][name] _update_pydantic_prop(prop, name, metadata, state, default_factory_fields) update_required_fields(schema, default_factory_fields) _setdefault_attribute_docstrings_on_json_schema(schema, item_class) return schema def _update_pydantic_prop( prop: dict[str, Any], name: str, metadata: MappingProxyType, _state: _JsonSchemaState, default_factory_fields: set[str], ) -> None: if "annotation" in metadata: field_type = metadata["annotation"] if field_type is not None: update_prop_from_type(prop, field_type, _state) if "default_factory" in metadata: default_factory_fields.add(name) elif "default" in metadata and metadata["default"] is not PydanticUndefined: prop.setdefault("default", metadata["default"]) if "metadata" in metadata: _update_pydantic_prop_validation(prop, metadata["metadata"], field_type) for metadata_key, json_schema_field in ( ("title", "title"), ("description", "description"), ("examples", "examples"), ): if metadata_key in metadata: prop.setdefault(json_schema_field, metadata[metadata_key]) if "deprecated" in metadata: prop.setdefault("deprecated", bool(metadata["deprecated"])) def _update_pydantic_prop_validation( prop: dict[str, Any], metadata: Sequence[Any], field_type: type, ) -> None: for metadata_item in metadata: metadata_item_type = type(metadata_item).__name__ if metadata_item_type == "_PydanticGeneralMetadata": if "pattern" in metadata_item.__dict__: pattern = metadata_item.__dict__["pattern"] update_prop_from_pattern(prop, pattern) elif metadata_item_type == "MinLen": key = "minLength" if field_type is str else "minItems" prop.setdefault(key, metadata_item.min_length) elif metadata_item_type == "MaxLen": key = "maxLength" if field_type is str else "maxItems" prop.setdefault(key, metadata_item.max_length) else: for metadata_key, json_schema_field in ( ("ge", "minimum"), ("gt", "exclusiveMinimum"), ("le", "maximum"), ("lt", "exclusiveMaximum"), ): if metadata_item_type == metadata_key.capitalize(): prop.setdefault(json_schema_field, getattr(metadata_item, metadata_key)) def _json_schema_from_pydantic_v1( adapter: type[AdapterInterface], item_class: type, state: _JsonSchemaState ) -> dict[str, Any]: schema = copy( getattr(item_class.Config, "schema_extra", {}) # type: ignore[attr-defined] ) extra = getattr(item_class.Config, "extra", None) # type: ignore[attr-defined] schema.setdefault("type", "object") if extra == "forbid": schema.setdefault("additionalProperties", False) fields = { name: adapter.get_field_meta_from_class(item_class, name) for name in adapter.get_field_names_from_class(item_class) or () } if not fields: return schema schema["properties"] = { name: copy(metadata.get("json_schema_extra", {})) for name, metadata in fields.items() } default_factory_fields: set[str] = set() field_type_hints = get_type_hints(item_class) for name, metadata in fields.items(): prop = schema["properties"][name] _update_pydantic_v1_prop( prop, name, metadata, field_type_hints, default_factory_fields, state ) update_required_fields(schema, default_factory_fields) _setdefault_attribute_docstrings_on_json_schema(schema, item_class) return schema def _update_pydantic_v1_prop( # pylint: disable=too-many-positional-arguments,too-many-arguments prop: dict[str, Any], name: str, metadata: Mapping[str, Any], field_type_hints: dict[str, Any], default_factory_fields: set[str], state: _JsonSchemaState, ) -> None: field_type = field_type_hints[name] if field_type is not None: update_prop_from_type(prop, field_type, state) if "default_factory" in metadata: default_factory_fields.add(name) elif "default" in metadata and metadata["default"] not in ( Ellipsis, PydanticV1Undefined, ): prop.setdefault("default", metadata["default"]) for metadata_key, json_schema_field in ( ("title", "title"), ("description", "description"), ("examples", "examples"), ("ge", "minimum"), ("gt", "exclusiveMinimum"), ("le", "maximum"), ("lt", "exclusiveMaximum"), ): if metadata_key in metadata: prop.setdefault(json_schema_field, metadata[metadata_key]) for prefix in ("min", "max"): if f"{prefix}_length" in metadata: key = f"{prefix}Length" if field_type is str else f"{prefix}Items" prop.setdefault(key, metadata[f"{prefix}_length"]) elif f"{prefix}_items" in metadata: prop.setdefault(f"{prefix}Items", metadata[f"{prefix}_items"]) for metadata_key in ("pattern", "regex"): if metadata_key in metadata: pattern = metadata[metadata_key] update_prop_from_pattern(prop, pattern) break if "deprecated" in metadata: prop.setdefault("deprecated", bool(metadata["deprecated"])) scrapy-itemadapter-7cce401/itemadapter/adapter.py000066400000000000000000000413431505555760000222260ustar00rootroot00000000000000from __future__ import annotations import dataclasses from abc import ABCMeta, abstractmethod from collections import deque from collections.abc import Iterable, Iterator, KeysView, MutableMapping from types import MappingProxyType from typing import Any from itemadapter._imports import _scrapy_item_classes, attr from itemadapter._json_schema import ( _json_schema_from_attrs, _json_schema_from_dataclass, _json_schema_from_item_class, _json_schema_from_pydantic, _JsonSchemaState, _setdefault_attribute_docstrings_on_json_schema, _setdefault_attribute_types_on_json_schema, ) from itemadapter.utils import ( _get_pydantic_model_metadata, _get_pydantic_v1_model_metadata, _is_attrs_class, _is_pydantic_model, _is_pydantic_v1_model, ) __all__ = [ "AdapterInterface", "AttrsAdapter", "DataclassAdapter", "DictAdapter", "ItemAdapter", "PydanticAdapter", "ScrapyItemAdapter", ] class AdapterInterface(MutableMapping, metaclass=ABCMeta): """Abstract Base Class for adapters. An adapter that handles a specific type of item should inherit from this class and implement the abstract methods defined here, plus the abtract methods inherited from the MutableMapping base class. """ def __init__(self, item: Any) -> None: self.item = item @classmethod @abstractmethod def is_item_class(cls, item_class: type) -> bool: """Return True if the adapter can handle the given item class, False otherwise.""" raise NotImplementedError @classmethod def is_item(cls, item: Any) -> bool: """Return True if the adapter can handle the given item, False otherwise.""" return cls.is_item_class(item.__class__) @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: return MappingProxyType({}) @classmethod def get_field_names_from_class(cls, item_class: type) -> list[str] | None: """Return a list of fields defined for ``item_class``. If a class doesn't support fields, None is returned.""" return None @classmethod def get_json_schema( cls, item_class: type, *, _state: _JsonSchemaState | None = None ) -> dict[str, Any]: return _json_schema_from_item_class(cls, item_class, _state) def get_field_meta(self, field_name: str) -> MappingProxyType: """Return metadata for the given field name, if available.""" return self.get_field_meta_from_class(self.item.__class__, field_name) def field_names(self) -> KeysView: """Return a dynamic view of the item's field names.""" return self.keys() # type: ignore[return-value] class _MixinAttrsDataclassAdapter: _fields_dict: dict item: Any def get_field_meta(self, field_name: str) -> MappingProxyType: return self._fields_dict[field_name].metadata def field_names(self) -> KeysView: return KeysView(self._fields_dict) def __getitem__(self, field_name: str) -> Any: if field_name in self._fields_dict: return getattr(self.item, field_name) raise KeyError(field_name) def __setitem__(self, field_name: str, value: Any) -> None: if field_name in self._fields_dict: setattr(self.item, field_name, value) else: raise KeyError(f"{self.item.__class__.__name__} does not support field: {field_name}") def __delitem__(self, field_name: str) -> None: if field_name in self._fields_dict: try: if hasattr(self.item, field_name): delattr(self.item, field_name) else: raise AttributeError except AttributeError as ex: raise KeyError(field_name) from ex else: raise KeyError(f"{self.item.__class__.__name__} does not support field: {field_name}") def __iter__(self) -> Iterator: return iter(attr for attr in self._fields_dict if hasattr(self.item, attr)) def __len__(self) -> int: return len(list(iter(self))) class AttrsAdapter(_MixinAttrsDataclassAdapter, AdapterInterface): def __init__(self, item: Any) -> None: super().__init__(item) if attr is None: raise RuntimeError("attr module is not available") # store a reference to the item's fields to avoid O(n) lookups and O(n^2) traversals self._fields_dict = attr.fields_dict(self.item.__class__) @classmethod def is_item(cls, item: Any) -> bool: return _is_attrs_class(item) and not isinstance(item, type) @classmethod def is_item_class(cls, item_class: type) -> bool: return _is_attrs_class(item_class) @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: if attr is None: raise RuntimeError("attr module is not available") try: return attr.fields_dict(item_class)[field_name].metadata except KeyError as ex: raise KeyError(f"{item_class.__name__} does not support field: {field_name}") from ex @classmethod def get_field_names_from_class(cls, item_class: type) -> list[str] | None: if attr is None: raise RuntimeError("attr module is not available") return [a.name for a in attr.fields(item_class)] @classmethod def get_json_schema( cls, item_class: type, *, _state: _JsonSchemaState | None = None ) -> dict[str, Any]: _state = _state or _JsonSchemaState(adapter=cls, containers={item_class}) return _json_schema_from_attrs(item_class, _state) class DataclassAdapter(_MixinAttrsDataclassAdapter, AdapterInterface): def __init__(self, item: Any) -> None: super().__init__(item) # store a reference to the item's fields to avoid O(n) lookups and O(n^2) traversals self._fields_dict = {field.name: field for field in dataclasses.fields(self.item)} @classmethod def is_item(cls, item: Any) -> bool: return dataclasses.is_dataclass(item) and not isinstance(item, type) @classmethod def is_item_class(cls, item_class: type) -> bool: return dataclasses.is_dataclass(item_class) @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: for field in dataclasses.fields(item_class): if field.name == field_name: return field.metadata raise KeyError(f"{item_class.__name__} does not support field: {field_name}") @classmethod def get_field_names_from_class(cls, item_class: type) -> list[str] | None: return [a.name for a in dataclasses.fields(item_class)] @classmethod def get_json_schema( cls, item_class: type, *, _state: _JsonSchemaState | None = None ) -> dict[str, Any]: _state = _state or _JsonSchemaState(adapter=cls, containers={item_class}) return _json_schema_from_dataclass(item_class, _state) class PydanticAdapter(AdapterInterface): item: Any @classmethod def is_item_class(cls, item_class: type) -> bool: return _is_pydantic_model(item_class) or _is_pydantic_v1_model(item_class) @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: try: try: return _get_pydantic_model_metadata(item_class, field_name) except AttributeError: return _get_pydantic_v1_model_metadata(item_class, field_name) except KeyError as ex: raise KeyError(f"{item_class.__name__} does not support field: {field_name}") from ex @classmethod def get_field_names_from_class(cls, item_class: type) -> list[str] | None: try: return list(item_class.model_fields.keys()) # type: ignore[attr-defined] except AttributeError: return list(item_class.__fields__.keys()) # type: ignore[attr-defined] @classmethod def get_json_schema( cls, item_class: type, *, _state: _JsonSchemaState | None = None ) -> dict[str, Any]: return _json_schema_from_pydantic(cls, item_class, _state) def field_names(self) -> KeysView: try: return KeysView(self.item.__class__.model_fields) except AttributeError: return KeysView(self.item.__fields__) def __getitem__(self, field_name: str) -> Any: try: self.item.__class__.model_fields # noqa: B018 except AttributeError: if field_name in self.item.__fields__: return getattr(self.item, field_name) else: if field_name in self.item.__class__.model_fields: return getattr(self.item, field_name) raise KeyError(field_name) def __setitem__(self, field_name: str, value: Any) -> None: try: self.item.__class__.model_fields # noqa: B018 except AttributeError: if field_name in self.item.__fields__: setattr(self.item, field_name, value) return else: if field_name in self.item.__class__.model_fields: setattr(self.item, field_name, value) return raise KeyError(f"{self.item.__class__.__name__} does not support field: {field_name}") def __delitem__(self, field_name: str) -> None: try: self.item.__class__.model_fields # noqa: B018 except AttributeError as ex: if field_name in self.item.__fields__: try: if hasattr(self.item, field_name): delattr(self.item, field_name) return raise AttributeError from ex except AttributeError as ex2: raise KeyError(field_name) from ex2 else: if field_name in self.item.__class__.model_fields: try: if hasattr(self.item, field_name): delattr(self.item, field_name) return raise AttributeError except AttributeError as ex: raise KeyError(field_name) from ex raise KeyError(f"{self.item.__class__.__name__} does not support field: {field_name}") def __iter__(self) -> Iterator: try: return iter( attr for attr in self.item.__class__.model_fields if hasattr(self.item, attr) ) except AttributeError: return iter(attr for attr in self.item.__fields__ if hasattr(self.item, attr)) def __len__(self) -> int: return len(list(iter(self))) class _MixinDictScrapyItemAdapter: _fields_dict: dict item: Any def __getitem__(self, field_name: str) -> Any: return self.item[field_name] def __setitem__(self, field_name: str, value: Any) -> None: self.item[field_name] = value def __delitem__(self, field_name: str) -> None: del self.item[field_name] def __iter__(self) -> Iterator: return iter(self.item) def __len__(self) -> int: return len(self.item) class DictAdapter(_MixinDictScrapyItemAdapter, AdapterInterface): @classmethod def is_item(cls, item: Any) -> bool: return isinstance(item, dict) @classmethod def is_item_class(cls, item_class: type) -> bool: return issubclass(item_class, dict) @classmethod def get_json_schema( cls, item_class: type, *, _state: _JsonSchemaState | None = None ) -> dict[str, Any]: return {"type": "object"} def field_names(self) -> KeysView: return KeysView(self.item) class ScrapyItemAdapter(_MixinDictScrapyItemAdapter, AdapterInterface): @classmethod def is_item(cls, item: Any) -> bool: return isinstance(item, _scrapy_item_classes) @classmethod def is_item_class(cls, item_class: type) -> bool: return issubclass(item_class, _scrapy_item_classes) @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: return MappingProxyType(item_class.fields[field_name]) # type: ignore[attr-defined] @classmethod def get_field_names_from_class(cls, item_class: type) -> list[str] | None: return list(item_class.fields.keys()) # type: ignore[attr-defined] @classmethod def get_json_schema( cls, item_class: type, *, _state: _JsonSchemaState | None = None ) -> dict[str, Any]: _state = _state or _JsonSchemaState(adapter=cls, containers={item_class}) schema = super().get_json_schema(item_class, _state=_state) _setdefault_attribute_types_on_json_schema(schema, item_class, _state) _setdefault_attribute_docstrings_on_json_schema(schema, item_class) return schema def field_names(self) -> KeysView: return KeysView(self.item.fields) class ItemAdapter(MutableMapping): """Wrapper class to interact with data container objects. It provides a common interface to extract and set data without having to take the object's type into account. """ ADAPTER_CLASSES: Iterable[type[AdapterInterface]] = deque( [ ScrapyItemAdapter, DictAdapter, DataclassAdapter, AttrsAdapter, PydanticAdapter, ] ) def __init__(self, item: Any) -> None: for cls in self.ADAPTER_CLASSES: if cls.is_item(item): self.adapter = cls(item) break else: raise TypeError(f"No adapter found for objects of type: {type(item)} ({item})") @classmethod def is_item(cls, item: Any) -> bool: return any(adapter_class.is_item(item) for adapter_class in cls.ADAPTER_CLASSES) @classmethod def is_item_class(cls, item_class: type) -> bool: return any( adapter_class.is_item_class(item_class) for adapter_class in cls.ADAPTER_CLASSES ) @classmethod def _get_adapter_class(cls, item_class: type) -> type[AdapterInterface]: for adapter_class in cls.ADAPTER_CLASSES: if adapter_class.is_item_class(item_class): return adapter_class raise TypeError(f"{item_class} is not a valid item class") @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: adapter_class = cls._get_adapter_class(item_class) return adapter_class.get_field_meta_from_class(item_class, field_name) @classmethod def get_field_names_from_class(cls, item_class: type) -> list[str] | None: adapter_class = cls._get_adapter_class(item_class) return adapter_class.get_field_names_from_class(item_class) @classmethod def get_json_schema( cls, item_class: type, *, _state: _JsonSchemaState | None = None ) -> dict[str, Any]: _state = _state or _JsonSchemaState(adapter=cls, containers={item_class}) adapter_class = cls._get_adapter_class(item_class) return adapter_class.get_json_schema(item_class, _state=_state) @property def item(self) -> Any: return self.adapter.item def __repr__(self) -> str: values = ", ".join([f"{key}={value!r}" for key, value in self.items()]) return f"<{self.__class__.__name__} for {self.item.__class__.__name__}({values})>" def __getitem__(self, field_name: str) -> Any: return self.adapter.__getitem__(field_name) def __setitem__(self, field_name: str, value: Any) -> None: self.adapter.__setitem__(field_name, value) def __delitem__(self, field_name: str) -> None: self.adapter.__delitem__(field_name) def __iter__(self) -> Iterator: return self.adapter.__iter__() def __len__(self) -> int: return self.adapter.__len__() def get_field_meta(self, field_name: str) -> MappingProxyType: """Return metadata for the given field name.""" return self.adapter.get_field_meta(field_name) def field_names(self) -> KeysView: """Return read-only key view with the names of all the defined fields for the item.""" return self.adapter.field_names() def asdict(self) -> dict: """Return a dict object with the contents of the adapter. This works slightly different than calling `dict(adapter)`: it's applied recursively to nested items (if there are any). """ return {key: self._asdict(value) for key, value in self.items()} @classmethod def _asdict(cls, obj: Any) -> Any: if isinstance(obj, dict): return {key: cls._asdict(value) for key, value in obj.items()} if isinstance(obj, (list, set, tuple)): return obj.__class__(cls._asdict(x) for x in obj) if isinstance(obj, cls): return obj.asdict() if cls.is_item(obj): return cls(obj).asdict() return obj scrapy-itemadapter-7cce401/itemadapter/py.typed000066400000000000000000000000001505555760000217140ustar00rootroot00000000000000scrapy-itemadapter-7cce401/itemadapter/utils.py000066400000000000000000000102661505555760000217460ustar00rootroot00000000000000from __future__ import annotations from types import MappingProxyType from typing import Any from itemadapter._imports import ( PydanticUndefined, PydanticV1Undefined, attr, pydantic, pydantic_v1, ) __all__ = ["get_field_meta_from_class", "is_item"] def _is_attrs_class(obj: Any) -> bool: if attr is None: return False return attr.has(obj) def _is_pydantic_model(obj: Any) -> bool: if pydantic is None: return False return issubclass(obj, pydantic.BaseModel) def _is_pydantic_v1_model(obj: Any) -> bool: if pydantic_v1 is None: return False return issubclass(obj, pydantic_v1.BaseModel) def _get_pydantic_model_metadata(item_model: Any, field_name: str) -> MappingProxyType: metadata = {} field = item_model.model_fields[field_name] for attribute in [ "alias_priority", "alias", "allow_inf_nan", "annotation", "coerce_numbers_to_str", "decimal_places", "default_factory", "deprecated", "description", "discriminator", "examples", "exclude", "fail_fast", "field_title_generator", "frozen", "ge", "gt", "init_var", "init", "json_schema_extra", "kw_only", "le", "lt", "max_digits", "max_length", "min_length", "multiple_of", "pattern", "repr", "serialization_alias", "strict", "title", "union_mode", "validate_default", "validation_alias", ]: if hasattr(field, attribute) and (value := getattr(field, attribute)) is not None: metadata[attribute] = value for attribute, default_value in [ ("default", PydanticUndefined), ("metadata", []), ]: if hasattr(field, attribute) and (value := getattr(field, attribute)) != default_value: metadata[attribute] = value return MappingProxyType(metadata) def _get_pydantic_v1_model_metadata(item_model: Any, field_name: str) -> MappingProxyType: metadata = {} field = item_model.__fields__[field_name] field_info = field.field_info for attribute in [ "alias", "const", "description", "ge", "gt", "le", "lt", "max_items", "max_length", "min_items", "min_length", "multiple_of", "regex", "title", ]: value = getattr(field_info, attribute) if value is not None: metadata[attribute] = value if (value := field_info.default) not in (PydanticV1Undefined, Ellipsis): metadata["default"] = value if value := field.default_factory is not None: metadata["default_factory"] = value if not field_info.allow_mutation: metadata["allow_mutation"] = field_info.allow_mutation metadata.update(field_info.extra) return MappingProxyType(metadata) def is_item(obj: Any) -> bool: """Return True if the given object belongs to one of the supported types, False otherwise. Alias for ItemAdapter.is_item """ from itemadapter.adapter import ItemAdapter return ItemAdapter.is_item(obj) def get_field_meta_from_class(item_class: type, field_name: str) -> MappingProxyType: """Return a read-only mapping with metadata for the given field name, within the given item class. If there is no metadata for the field, or the item class does not support field metadata, an empty object is returned. Field metadata is taken from different sources, depending on the item type: * scrapy.item.Item: corresponding scrapy.item.Field object * dataclass items: "metadata" attribute for the corresponding field * attrs items: "metadata" attribute for the corresponding field * pydantic models: corresponding pydantic.field.FieldInfo/ModelField object The returned value is an instance of types.MappingProxyType, i.e. a dynamic read-only view of the original mapping, which gets automatically updated if the original mapping changes. """ from itemadapter.adapter import ItemAdapter return ItemAdapter.get_field_meta_from_class(item_class, field_name) scrapy-itemadapter-7cce401/pyproject.toml000066400000000000000000000130531505555760000206460ustar00rootroot00000000000000[build-system] requires = ["hatchling"] build-backend = "hatchling.build" [project] name = "itemadapter" description = "Common interface for data container classes" authors = [ { name = "Eugenio Lacuesta", email = "eugenio.lacuesta@gmail.com" }, ] readme = "README.md" license = "BSD-3-Clause" license-files = ["LICENSE"] requires-python = ">=3.9" classifiers = [ "Development Status :: 3 - Alpha", "Intended Audience :: Developers", "Programming Language :: Python", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", "Programming Language :: Python :: 3.12", "Programming Language :: Python :: 3.13", "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", "Operating System :: OS Independent", "Framework :: Scrapy", "Topic :: Internet :: WWW/HTTP", "Topic :: Software Development :: Libraries :: Application Frameworks", "Topic :: Software Development :: Libraries :: Python Modules", ] dynamic = ["version"] [project.optional-dependencies] attrs = [ "attrs>=20.1.0", ] pydantic = [ "pydantic>=1.8", ] scrapy = [ "scrapy>=2.2", ] [project.urls] Homepage = "https://github.com/scrapy/itemadapter" Source = "https://github.com/scrapy/itemadapter" Tracker = "https://github.com/scrapy/itemadapter/issues" "Release notes" = "https://github.com/scrapy/itemadapter/blob/master/Changelog.md" [tool.hatch.version] path = "itemadapter/__init__.py" [tool.hatch.build.targets.sdist] include = [ "/itemadapter", "/tests", "/Changelog.md", "/tox.ini", ] [tool.bumpversion] current_version = "0.12.2" commit = true tag = true [[tool.bumpversion.files]] filename = "Changelog.md" search = "\\(unreleased\\)$" replace = "({now:%Y-%m-%d})" regex = true [[tool.bumpversion.files]] filename = "itemadapter/__init__.py" [tool.pylint.MASTER] persistent = "no" load-plugins=[ "pylint_per_file_ignores", ] [tool.pylint."MESSAGES CONTROL"] enable = [ "useless-suppression", ] disable = [ "cyclic-import", "duplicate-code", "import-error", "import-outside-toplevel", "invalid-name", "line-too-long", # https://docs.astral.sh/ruff/rules/line-too-long/ "missing-class-docstring", "missing-function-docstring", "missing-module-docstring", "not-callable", "pointless-statement", "protected-access", "too-few-public-methods", "too-many-return-statements", "unused-argument", ] per-file-ignores = [ "/tests/:disallowed-name,too-many-instance-attributes", "/itemadapter/_imports.py:unused-import,ungrouped-imports", ] [tool.pylint.FORMAT] expected-line-ending-format = "LF" max-line-length = 99 [tool.pylint.IMPORTS] allow-any-import-level = [ "attr", "dataclasses", "scrapy" ] [tool.pytest.ini_options] filterwarnings = [ "ignore:.*BaseItem.*", ] [tool.ruff] line-length = 99 [tool.ruff.lint] extend-select = [ # flake8-bugbear "B", # flake8-comprehensions "C4", # pydocstyle "D", # flake8-future-annotations "FA", # flynt "FLY", # refurb "FURB", # isort "I", # flake8-implicit-str-concat "ISC", # flake8-logging "LOG", # Perflint "PERF", # pygrep-hooks "PGH", # flake8-pie "PIE", # pylint "PL", # flake8-use-pathlib "PTH", # flake8-pyi "PYI", # flake8-quotes "Q", # flake8-return "RET", # flake8-raise "RSE", # Ruff-specific rules "RUF", # flake8-bandit "S", # flake8-simplify "SIM", # flake8-slots "SLOT", # flake8-debugger "T10", # flake8-type-checking "TC", # pyupgrade "UP", # pycodestyle warnings "W", # flake8-2020 "YTT", ] ignore = [ # Missing docstring in public module "D100", # Missing docstring in public class "D101", # Missing docstring in public method "D102", # Missing docstring in public function "D103", # Missing docstring in public package "D104", # Missing docstring in magic method "D105", # Missing docstring in public nested class "D106", # Missing docstring in __init__ "D107", # One-line docstring should fit on one line with quotes "D200", # No blank lines allowed after function docstring "D202", # 1 blank line required between summary line and description "D205", # Multi-line docstring closing quotes should be on a separate line "D209", # First line should end with a period "D400", # First line should be in imperative mood; try rephrasing "D401", # First line should not be the function's "signature" "D402", # First word of the first line should be properly capitalized "D403", # `import` should be at the top-level of a file "PLC0415", # Too many return statements "PLR0911", # Too many branches "PLR0912", # Too many arguments in function definition "PLR0913", # Too many statements "PLR0915", # Magic value used in comparison "PLR2004", # String contains ambiguous {}. "RUF001", # Docstring contains ambiguous {}. "RUF002", # Comment contains ambiguous {}. "RUF003", # Mutable class attributes should be annotated with `typing.ClassVar` "RUF012", # Use of `assert` detected "S101", ] [tool.ruff.lint.pydocstyle] convention = "pep257" [tool.ruff.lint.pyupgrade] # for Pydantic annotations while we support Python 3.92 keep-runtime-typing = true scrapy-itemadapter-7cce401/tests/000077500000000000000000000000001505555760000170725ustar00rootroot00000000000000scrapy-itemadapter-7cce401/tests/__init__.py000066400000000000000000000247601505555760000212140ustar00rootroot00000000000000from __future__ import annotations import importlib import sys from contextlib import contextmanager from dataclasses import dataclass, field from enum import Enum from typing import TYPE_CHECKING, Any, Callable, Optional, Union from itemadapter import ItemAdapter from itemadapter._imports import pydantic, pydantic_v1 if TYPE_CHECKING: from collections.abc import Generator def make_mock_import(block_name: str) -> Callable: def mock_import(name: str, *args, **kwargs): """Prevent importing a specific module, let everything else pass.""" if name.split(".")[0] == block_name: raise ImportError(name) return importlib.__import__(name, *args, **kwargs) return mock_import @contextmanager def clear_itemadapter_imports() -> Generator[None]: backup = {} for key in sys.modules.copy(): if key.startswith("itemadapter"): backup[key] = sys.modules.pop(key) try: yield finally: sys.modules.update(backup) class Color(Enum): RED = "red" GREEN = "green" BLUE = "blue" @dataclass class DataClassItem: name: str = field(default_factory=lambda: None, metadata={"serializer": str}) value: int = field(default_factory=lambda: None, metadata={"serializer": int}) @dataclass class DataClassItemNested: nested: DataClassItem adapter: ItemAdapter dict_: dict list_: list set_: set tuple_: tuple int_: int @dataclass(init=False) class DataClassWithoutInit: name: str = field(metadata={"serializer": str}) value: int = field(metadata={"serializer": int}) @dataclass class DataClassItemSubclassed(DataClassItem): subclassed: bool = True @dataclass class DataClassItemEmpty: pass @dataclass class DataClassItemJsonSchemaNested: is_nested: bool = True @dataclass class DataClassItemJsonSchema: __json_schema_extra__ = { "llmHint": "Hi model!", } name: str = field(metadata={"json_schema_extra": {"title": "Name"}}) """Display name""" color: Color answer: Union[str, float, int, None] numbers: list[float] aliases: dict[str, str] nested: DataClassItemJsonSchemaNested nested_list: list[DataClassItemJsonSchemaNested] nested_dict: dict[str, DataClassItemJsonSchemaNested] nested_dict_list: list[dict[str, DataClassItemJsonSchemaNested]] value: Any = None produced: bool = field(default_factory=lambda: True) try: import attr except ImportError: AttrsItem = None AttrsItemNested = None AttrsItemWithoutInit = None AttrsItemSubclassed = None AttrsItemEmpty = None AttrsItemJsonSchema = None AttrsItemJsonSchemaNested = None else: @attr.s class AttrsItem: name = attr.ib(default=None, metadata={"serializer": str}) value = attr.ib(default=None, metadata={"serializer": int}) @attr.s class AttrsItemNested: nested = attr.ib(type=AttrsItem) adapter = attr.ib(type=ItemAdapter) dict_ = attr.ib(type=dict) list_ = attr.ib(type=list) set_ = attr.ib(type=set) tuple_ = attr.ib(type=tuple) int_ = attr.ib(type=int) @attr.s(init=False) class AttrsItemWithoutInit: name = attr.ib(default=None, metadata={"serializer": str}) value = attr.ib(default=None, metadata={"serializer": int}) @attr.s(init=False) class AttrsItemSubclassed(AttrsItem): subclassed = attr.ib(default=True, type=bool) @attr.s class AttrsItemEmpty: pass @attr.s class AttrsItemJsonSchemaNested: is_nested: bool = attr.ib(default=True) @attr.s class AttrsItemJsonSchema: __json_schema_extra__ = { "llmHint": "Hi model!", } name: str = attr.ib(metadata={"json_schema_extra": {"title": "Name"}}) """Display name""" color: Color = attr.ib() answer: Union[str, float, int, None] = attr.ib() numbers: list[float] = attr.ib() aliases: dict[str, str] = attr.ib() nested: AttrsItemJsonSchemaNested = attr.ib() nested_list: list[AttrsItemJsonSchemaNested] = attr.ib() nested_dict: dict[str, AttrsItemJsonSchemaNested] = attr.ib() nested_dict_list: list[dict[str, AttrsItemJsonSchemaNested]] = attr.ib() value: Any = attr.ib(default=None) produced: bool = attr.ib(factory=lambda: True) if pydantic_v1 is None: PydanticV1Model = None PydanticV1SpecialCasesModel = None PydanticV1ModelNested = None PydanticV1ModelSubclassed = None PydanticV1ModelEmpty = None PydanticV1ModelJsonSchema = None PydanticV1ModelJsonSchemaNested = None else: class PydanticV1Model(pydantic_v1.BaseModel): name: Optional[str] = pydantic_v1.Field( default_factory=lambda: None, serializer=str, ) value: Optional[int] = pydantic_v1.Field( default_factory=lambda: None, serializer=int, ) class PydanticV1SpecialCasesModel(pydantic_v1.BaseModel): special_cases: Optional[int] = pydantic_v1.Field( default_factory=lambda: None, alias="special_cases", allow_mutation=False, ) class Config: validate_assignment = True class PydanticV1ModelNested(pydantic_v1.BaseModel): nested: PydanticV1Model adapter: ItemAdapter dict_: dict list_: list set_: set tuple_: tuple int_: int class Config: arbitrary_types_allowed = True class PydanticV1ModelSubclassed(PydanticV1Model): subclassed: bool = pydantic_v1.Field( default_factory=lambda: True, ) class PydanticV1ModelEmpty(pydantic_v1.BaseModel): pass class PydanticV1ModelJsonSchemaNested(pydantic_v1.BaseModel): is_nested: bool = True class PydanticV1ModelJsonSchema(pydantic_v1.BaseModel): name: str = pydantic_v1.Field(title="Name", description="Display name") value: Any = None color: Color produced: bool answer: Union[str, float, int, None] numbers: list[float] aliases: dict[str, str] nested: PydanticV1ModelJsonSchemaNested nested_list: list[PydanticV1ModelJsonSchemaNested] nested_dict: dict[str, PydanticV1ModelJsonSchemaNested] nested_dict_list: list[dict[str, PydanticV1ModelJsonSchemaNested]] class Config: schema_extra = { "llmHint": "Hi model!", } if pydantic is None: PydanticModel = None PydanticSpecialCasesModel = None PydanticModelNested = None PydanticModelSubclassed = None PydanticModelEmpty = None PydanticModelJsonSchema = None PydanticModelJsonSchemaNested = None else: class PydanticModel(pydantic.BaseModel): name: Optional[str] = pydantic.Field( default_factory=lambda: None, json_schema_extra={"serializer": str}, ) value: Optional[int] = pydantic.Field( default_factory=lambda: None, json_schema_extra={"serializer": int}, ) class PydanticSpecialCasesModel(pydantic.BaseModel): special_cases: Optional[int] = pydantic.Field( default_factory=lambda: None, alias="special_cases", frozen=True, ) model_config = { "validate_assignment": True, } class PydanticModelNested(pydantic.BaseModel): nested: PydanticModel adapter: ItemAdapter dict_: dict list_: list set_: set tuple_: tuple int_: int model_config = { "arbitrary_types_allowed": True, } class PydanticModelSubclassed(PydanticModel): subclassed: bool = pydantic.Field( default_factory=lambda: True, ) class PydanticModelEmpty(pydantic.BaseModel): pass class PydanticModelJsonSchemaNested(pydantic.BaseModel): is_nested: bool = True class PydanticModelJsonSchema(pydantic.BaseModel): name: str = pydantic.Field(description="Display name", title="Name") value: Any = None color: Color produced: bool = pydantic.Field(default_factory=lambda: True) answer: Union[str, float, int, None] numbers: list[float] aliases: dict[str, str] nested: PydanticModelJsonSchemaNested nested_list: list[PydanticModelJsonSchemaNested] nested_dict: dict[str, PydanticModelJsonSchemaNested] nested_dict_list: list[dict[str, PydanticModelJsonSchemaNested]] model_config = { "json_schema_extra": { "llmHint": "Hi model!", }, } try: from scrapy.item import Field from scrapy.item import Item as ScrapyItem except ImportError: ScrapyItem = None ScrapySubclassedItem = None ScrapySubclassedItemNested = None ScrapySubclassedItemSubclassed = None ScrapySubclassedItemEmpty = None ScrapySubclassedItemJsonSchema = None ScrapySubclassedItemJsonSchemaNested = None else: class ScrapySubclassedItem(ScrapyItem): name = Field(serializer=str) value = Field(serializer=int) class ScrapySubclassedItemNested(ScrapyItem): nested = Field() adapter = Field() dict_ = Field() list_ = Field() set_ = Field() tuple_ = Field() int_ = Field() class ScrapySubclassedItemSubclassed(ScrapySubclassedItem): subclassed = Field() class ScrapySubclassedItemEmpty(ScrapyItem): pass class ScrapySubclassedItemJsonSchemaNested(ScrapyItem): is_nested: bool = Field( json_schema_extra={ "default": True, }, ) class ScrapySubclassedItemJsonSchema(ScrapyItem): __json_schema_extra__ = { "llmHint": "Hi model!", } name: str = Field( json_schema_extra={ "title": "Name", }, ) """Display name""" value = Field( json_schema_extra={ "default": None, }, ) color: Color = Field() produced = Field() answer: Union[str, float, int, None] = Field() numbers: list[float] = Field() aliases: dict[str, str] = Field() nested: ScrapySubclassedItemJsonSchemaNested = Field() nested_list: list[ScrapySubclassedItemJsonSchemaNested] = Field() nested_dict: dict[str, ScrapySubclassedItemJsonSchemaNested] = Field() nested_dict_list: list[dict[str, ScrapySubclassedItemJsonSchemaNested]] = Field() scrapy-itemadapter-7cce401/tests/test_adapter.py000066400000000000000000000475571505555760000221450ustar00rootroot00000000000000from __future__ import annotations import unittest from collections.abc import KeysView from types import MappingProxyType from itemadapter.adapter import ItemAdapter, PydanticAdapter from tests import ( AttrsItem, AttrsItemEmpty, AttrsItemJsonSchema, AttrsItemNested, AttrsItemSubclassed, AttrsItemWithoutInit, DataClassItem, DataClassItemEmpty, DataClassItemJsonSchema, DataClassItemNested, DataClassItemSubclassed, DataClassWithoutInit, PydanticModel, PydanticModelEmpty, PydanticModelJsonSchema, PydanticModelNested, PydanticModelSubclassed, PydanticV1Model, PydanticV1ModelEmpty, PydanticV1ModelJsonSchema, PydanticV1ModelNested, PydanticV1ModelSubclassed, ScrapySubclassedItem, ScrapySubclassedItemEmpty, ScrapySubclassedItemJsonSchema, ScrapySubclassedItemNested, ScrapySubclassedItemSubclassed, ) from tests.test_json_schema import check_schemas class ItemAdapterReprTestCase(unittest.TestCase): def test_repr_dict(self): item = {"name": "asdf", "value": 1234} adapter = ItemAdapter(item) self.assertEqual(repr(adapter), "") @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") def test_repr_scrapy_item(self): item = ScrapySubclassedItem(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual( repr(adapter), "", ) @unittest.skipIf(not DataClassItem, "dataclasses module is not available") def test_repr_dataclass(self): item = DataClassItem(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual( repr(adapter), "", ) @unittest.skipIf(not DataClassWithoutInit, "dataclasses module is not available") def test_repr_dataclass_init_false(self): item = DataClassWithoutInit() adapter = ItemAdapter(item) self.assertEqual(repr(adapter), "") adapter["name"] = "set after init" self.assertEqual( repr(adapter), "", ) @unittest.skipIf(not AttrsItem, "attrs module is not available") def test_repr_attrs(self): item = AttrsItem(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual( repr(adapter), "", ) @unittest.skipIf(not AttrsItemWithoutInit, "attrs module is not available") def test_repr_attrs_init_false(self): item = AttrsItemWithoutInit() adapter = ItemAdapter(item) self.assertEqual(repr(adapter), "") adapter["name"] = "set after init" self.assertEqual( repr(adapter), "", ) @unittest.skipIf(not PydanticV1Model, "pydantic module is not available") def test_repr_pydantic(self): item = PydanticV1Model(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual( repr(adapter), "", ) class ItemAdapterInitError(unittest.TestCase): def test_non_item(self): with self.assertRaises(TypeError): ItemAdapter(ScrapySubclassedItem) with self.assertRaises(TypeError): ItemAdapter(dict) with self.assertRaises(TypeError): ItemAdapter(1234) class BaseTestMixin: maxDiff = None item_class = None item_class_nested = None item_class_json_schema = None def setUp(self): if self.item_class is None: raise unittest.SkipTest def test_get_set_value(self): item = self.item_class() adapter = ItemAdapter(item) self.assertEqual(adapter.get("name"), None) self.assertEqual(adapter.get("value"), None) adapter["name"] = "asdf" adapter["value"] = 1234 self.assertEqual(adapter.get("name"), "asdf") self.assertEqual(adapter.get("value"), 1234) self.assertEqual(adapter["name"], "asdf") self.assertEqual(adapter["value"], 1234) item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual(adapter.get("name"), "asdf") self.assertEqual(adapter.get("value"), 1234) self.assertEqual(adapter["name"], "asdf") self.assertEqual(adapter["value"], 1234) def test_get_value_keyerror(self): item = self.item_class() adapter = ItemAdapter(item) with self.assertRaises(KeyError): adapter["undefined_field"] def test_as_dict(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual({"name": "asdf", "value": 1234}, dict(adapter)) def test_as_dict_nested(self): item = self.item_class_nested( nested=self.item_class(name="asdf", value=1234), adapter=ItemAdapter({"foo": "bar", "nested_list": [1, 2, 3, 4, 5]}), dict_={"foo": "bar", "answer": 42, "nested_dict": {"a": "b"}}, list_=[1, 2, 3], set_={1, 2, 3}, tuple_=(1, 2, 3), int_=123, ) adapter = ItemAdapter(item) self.assertEqual( adapter.asdict(), { "nested": {"name": "asdf", "value": 1234}, "adapter": {"foo": "bar", "nested_list": [1, 2, 3, 4, 5]}, "dict_": {"foo": "bar", "answer": 42, "nested_dict": {"a": "b"}}, "list_": [1, 2, 3], "set_": {1, 2, 3}, "tuple_": (1, 2, 3), "int_": 123, }, ) def test_field_names(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertIsInstance(adapter.field_names(), KeysView) self.assertEqual(sorted(adapter.field_names()), ["name", "value"]) def test_json_schema(self): item_class = self.item_class_json_schema actual = ItemAdapter.get_json_schema(item_class) check_schemas(self.expected_json_schema, actual) def test_json_schema_empty(self): actual = ItemAdapter.get_json_schema(self.item_class_empty) expected = {"type": "object"} if self.item_class_empty is not dict and not PydanticAdapter.is_item_class( self.item_class_empty ): expected["additionalProperties"] = False check_schemas(actual, expected) _NESTED_JSON_SCHEMA = { "type": "object", "additionalProperties": False, "properties": { "is_nested": {"type": "boolean", "default": True}, }, } class NonDictTestMixin(BaseTestMixin): item_class_subclassed = None item_class_empty = None expected_json_schema = { "llmHint": "Hi model!", "type": "object", "additionalProperties": False, "properties": { "name": { "type": "string", "title": "Name", "description": "Display name", }, "value": { "default": None, }, "color": { "type": "string", "enum": ["red", "green", "blue"], }, "produced": {"type": "boolean"}, "answer": { "type": ["null", "number", "string"], }, "numbers": {"type": "array", "items": {"type": "number"}}, "aliases": { "type": "object", "additionalProperties": {"type": "string"}, }, "nested": _NESTED_JSON_SCHEMA, "nested_list": { "type": "array", "items": _NESTED_JSON_SCHEMA, }, "nested_dict": { "type": "object", "additionalProperties": _NESTED_JSON_SCHEMA, }, "nested_dict_list": { "type": "array", "items": { "type": "object", "additionalProperties": _NESTED_JSON_SCHEMA, }, }, }, "required": [ "name", "color", "answer", "numbers", "aliases", "nested", "nested_list", "nested_dict", "nested_dict_list", ], } def test_set_value_keyerror(self): item = self.item_class() adapter = ItemAdapter(item) with self.assertRaises(KeyError): adapter["undefined_field"] = "some value" def test_metadata_common(self): adapter = ItemAdapter(self.item_class()) self.assertIsInstance(adapter.get_field_meta("name"), MappingProxyType) self.assertIsInstance(adapter.get_field_meta("value"), MappingProxyType) with self.assertRaises(KeyError): adapter.get_field_meta("undefined_field") def test_get_field_meta_defined_fields(self): adapter = ItemAdapter(self.item_class()) self.assertEqual(adapter.get_field_meta("name"), MappingProxyType({"serializer": str})) self.assertEqual(adapter.get_field_meta("value"), MappingProxyType({"serializer": int})) def test_delitem_len_iter(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual(len(adapter), 2) self.assertEqual(sorted(iter(adapter)), ["name", "value"]) del adapter["name"] self.assertEqual(len(adapter), 1) self.assertEqual(sorted(iter(adapter)), ["value"]) del adapter["value"] self.assertEqual(len(adapter), 0) self.assertEqual(sorted(iter(adapter)), []) with self.assertRaises(KeyError): del adapter["name"] with self.assertRaises(KeyError): del adapter["value"] with self.assertRaises(KeyError): del adapter["undefined_field"] def test_field_names_from_class(self): field_names = ItemAdapter.get_field_names_from_class(self.item_class) assert isinstance(field_names, list) self.assertEqual(sorted(field_names), ["name", "value"]) def test_field_names_from_class_nested(self): field_names = ItemAdapter.get_field_names_from_class(self.item_class_subclassed) assert isinstance(field_names, list) self.assertEqual(sorted(field_names), ["name", "subclassed", "value"]) def test_field_names_from_class_empty(self): field_names = ItemAdapter.get_field_names_from_class(self.item_class_empty) assert isinstance(field_names, list) self.assertEqual(field_names, []) class DictTestCase(unittest.TestCase, BaseTestMixin): item_class = dict item_class_nested = dict item_class_json_schema = dict item_class_empty = dict expected_json_schema = {"type": "object"} def test_get_value_keyerror_item_dict(self): """Instantiate without default values.""" adapter = ItemAdapter(self.item_class()) with self.assertRaises(KeyError): adapter["name"] def test_empty_metadata(self): adapter = ItemAdapter(self.item_class(name="foo", value=5)) for field_name in ("name", "value", "undefined_field"): self.assertEqual(adapter.get_field_meta(field_name), MappingProxyType({})) def test_field_names_updated(self): item = self.item_class(name="asdf") field_names = ItemAdapter(item).field_names() self.assertEqual(sorted(field_names), ["name"]) item["value"] = 1234 self.assertEqual(sorted(field_names), ["name", "value"]) def test_field_names_from_class(self): assert ItemAdapter.get_field_names_from_class(dict) is None _SCRAPY_NESTED_JSON_SCHEMA = { "type": "object", "additionalProperties": False, "properties": { "is_nested": {"default": True, "type": "boolean"}, }, } class ScrapySubclassedItemTestCase(NonDictTestMixin, unittest.TestCase): item_class = ScrapySubclassedItem item_class_nested = ScrapySubclassedItemNested item_class_subclassed = ScrapySubclassedItemSubclassed item_class_empty = ScrapySubclassedItemEmpty item_class_json_schema = ScrapySubclassedItemJsonSchema expected_json_schema = { "llmHint": "Hi model!", "type": "object", "additionalProperties": False, "properties": { **{ k: NonDictTestMixin.expected_json_schema["properties"][k] for k in sorted(NonDictTestMixin.expected_json_schema["properties"]) }, # Different order since stuff defined in json_schema_extra comes # first. "name": { "title": "Name", "type": "string", "description": "Display name", }, "nested": _SCRAPY_NESTED_JSON_SCHEMA, "nested_list": { "type": "array", "items": _SCRAPY_NESTED_JSON_SCHEMA, }, "nested_dict": { "type": "object", "additionalProperties": _SCRAPY_NESTED_JSON_SCHEMA, }, "nested_dict_list": { "type": "array", "items": { "type": "object", "additionalProperties": _SCRAPY_NESTED_JSON_SCHEMA, }, }, # No type, since none was specified in json_schema_extra. "produced": {}, # value comes last due to Scrapy items sorting fields # alphabetically. https://github.com/scrapy/scrapy/issues/7015 "value": NonDictTestMixin.expected_json_schema["properties"]["value"], }, # Scrapy items seem to sort fields alphabetically. produced is required # because there is no default factory support in Scrapy. "required": sorted(NonDictTestMixin.expected_json_schema["required"] + ["produced"]), } def test_get_value_keyerror_item_dict(self): """Instantiate without default values.""" adapter = ItemAdapter(self.item_class()) with self.assertRaises(KeyError): adapter["name"] _PYDANTIC_NESTED_JSON_SCHEMA = { k: v for k, v in _NESTED_JSON_SCHEMA.items() if k != "additionalProperties" } class PydanticV1ModelTestCase(NonDictTestMixin, unittest.TestCase): item_class = PydanticV1Model item_class_nested = PydanticV1ModelNested item_class_subclassed = PydanticV1ModelSubclassed item_class_empty = PydanticV1ModelEmpty item_class_json_schema = PydanticV1ModelJsonSchema expected_json_schema = { **{ k: v for k, v in NonDictTestMixin.expected_json_schema.items() if k not in {"additionalProperties"} }, "properties": { **{ k: v for k, v in NonDictTestMixin.expected_json_schema["properties"].items() if k not in {"nested", "nested_list", "nested_dict", "nested_dict_list"} }, "nested": _PYDANTIC_NESTED_JSON_SCHEMA, "nested_list": { "type": "array", "items": _PYDANTIC_NESTED_JSON_SCHEMA, }, "nested_dict": { "type": "object", "additionalProperties": _PYDANTIC_NESTED_JSON_SCHEMA, }, "nested_dict_list": { "type": "array", "items": { "type": "object", "additionalProperties": _PYDANTIC_NESTED_JSON_SCHEMA, }, }, }, "required": [ *NonDictTestMixin.expected_json_schema["required"][:2], "produced", *NonDictTestMixin.expected_json_schema["required"][2:], ], } def test_get_field_meta_defined_fields(self): adapter = ItemAdapter(self.item_class()) name_actual = adapter.get_field_meta("name") name_expected = MappingProxyType( { "serializer": str, "default_factory": name_actual["default_factory"], } ) self.assertEqual(name_expected, name_actual) value_actual = adapter.get_field_meta("value") value_expected = MappingProxyType( { "serializer": int, "default_factory": value_actual["default_factory"], } ) self.assertEqual(value_expected, value_actual) class PydanticModelTestCase(NonDictTestMixin, unittest.TestCase): item_class = PydanticModel item_class_nested = PydanticModelNested item_class_subclassed = PydanticModelSubclassed item_class_empty = PydanticModelEmpty item_class_json_schema = PydanticModelJsonSchema expected_json_schema = { **{ k: v for k, v in NonDictTestMixin.expected_json_schema.items() if k not in {"additionalProperties"} }, "properties": { **{ k: v for k, v in NonDictTestMixin.expected_json_schema["properties"].items() if k not in {"nested", "nested_list", "nested_dict", "nested_dict_list"} }, "nested": _PYDANTIC_NESTED_JSON_SCHEMA, "nested_list": { "type": "array", "items": _PYDANTIC_NESTED_JSON_SCHEMA, }, "nested_dict": { "type": "object", "additionalProperties": _PYDANTIC_NESTED_JSON_SCHEMA, }, "nested_dict_list": { "type": "array", "items": { "type": "object", "additionalProperties": _PYDANTIC_NESTED_JSON_SCHEMA, }, }, }, "required": NonDictTestMixin.expected_json_schema["required"], } def test_get_field_meta_defined_fields(self): adapter = ItemAdapter(self.item_class()) self.assertEqual( adapter.get_field_meta("name")["json_schema_extra"], MappingProxyType({"serializer": str}), ) self.assertEqual( adapter.get_field_meta("value")["json_schema_extra"], MappingProxyType({"serializer": int}), ) class DataClassItemTestCase(NonDictTestMixin, unittest.TestCase): item_class = DataClassItem item_class_nested = DataClassItemNested item_class_subclassed = DataClassItemSubclassed item_class_empty = DataClassItemEmpty item_class_json_schema = DataClassItemJsonSchema expected_json_schema = { **NonDictTestMixin.expected_json_schema, "properties": { **{ k: v for k, v in NonDictTestMixin.expected_json_schema["properties"].items() if k not in {"value", "produced"} }, # Title is set through json_schema_extra, so it comes first. "name": {"title": "Name", "type": "string", "description": "Display name"}, # value and produced come last because they have a default value, # and dataclass does not support values without a default after # values with a default. "value": NonDictTestMixin.expected_json_schema["properties"]["value"], "produced": NonDictTestMixin.expected_json_schema["properties"]["produced"], }, "required": NonDictTestMixin.expected_json_schema["required"], } class AttrsItemTestCase(NonDictTestMixin, unittest.TestCase): item_class = AttrsItem item_class_nested = AttrsItemNested item_class_subclassed = AttrsItemSubclassed item_class_empty = AttrsItemEmpty item_class_json_schema = AttrsItemJsonSchema expected_json_schema = DataClassItemTestCase.expected_json_schema scrapy-itemadapter-7cce401/tests/test_adapter_attrs.py000066400000000000000000000163071505555760000233470ustar00rootroot00000000000000import importlib import importlib.metadata import unittest from types import MappingProxyType from unittest import mock from packaging.version import Version from itemadapter.adapter import ItemAdapter from itemadapter.utils import get_field_meta_from_class from tests import ( AttrsItem, DataClassItem, PydanticModel, PydanticV1Model, ScrapyItem, ScrapySubclassedItem, clear_itemadapter_imports, make_mock_import, ) from tests.test_json_schema import check_schemas class AttrsTestCase(unittest.TestCase): def test_false(self): from itemadapter.adapter import AttrsAdapter self.assertFalse(AttrsAdapter.is_item(int)) self.assertFalse(AttrsAdapter.is_item(sum)) self.assertFalse(AttrsAdapter.is_item(1234)) self.assertFalse(AttrsAdapter.is_item(object())) self.assertFalse(AttrsAdapter.is_item(DataClassItem())) self.assertFalse(AttrsAdapter.is_item("a string")) self.assertFalse(AttrsAdapter.is_item(b"some bytes")) self.assertFalse(AttrsAdapter.is_item({"a": "dict"})) self.assertFalse(AttrsAdapter.is_item(["a", "list"])) self.assertFalse(AttrsAdapter.is_item(("a", "tuple"))) self.assertFalse(AttrsAdapter.is_item({"a", "set"})) self.assertFalse(AttrsAdapter.is_item(AttrsItem)) if PydanticModel is not None: self.assertFalse(AttrsAdapter.is_item(PydanticModel())) if PydanticV1Model is not None: self.assertFalse(AttrsAdapter.is_item(PydanticV1Model())) try: import scrapy # noqa: F401 # pylint: disable=unused-import except ImportError: pass else: self.assertFalse(AttrsAdapter.is_item(ScrapyItem())) self.assertFalse(AttrsAdapter.is_item(ScrapySubclassedItem())) @unittest.skipIf(not AttrsItem, "attrs module is not available") @mock.patch("builtins.__import__", make_mock_import("attr")) def test_module_import_error(self): with clear_itemadapter_imports(): from itemadapter.adapter import AttrsAdapter self.assertFalse(AttrsAdapter.is_item(AttrsItem(name="asdf", value=1234))) with self.assertRaises(RuntimeError, msg="attr module is not available"): AttrsAdapter(AttrsItem(name="asdf", value=1234)) with self.assertRaises(RuntimeError, msg="attr module is not available"): AttrsAdapter.get_field_meta_from_class(AttrsItem, "name") with self.assertRaises(RuntimeError, msg="attr module is not available"): AttrsAdapter.get_field_names_from_class(AttrsItem) with self.assertRaises(TypeError, msg="AttrsItem is not a valid item class"): get_field_meta_from_class(AttrsItem, "name") @unittest.skipIf(not AttrsItem, "attrs module is not available") @mock.patch("itemadapter.utils.attr", None) def test_module_not_available(self): from itemadapter.adapter import AttrsAdapter self.assertFalse(AttrsAdapter.is_item(AttrsItem(name="asdf", value=1234))) with self.assertRaises(TypeError, msg="AttrsItem is not a valid item class"): get_field_meta_from_class(AttrsItem, "name") @unittest.skipIf(not AttrsItem, "attrs module is not available") def test_true(self): from itemadapter.adapter import AttrsAdapter self.assertTrue(AttrsAdapter.is_item(AttrsItem())) self.assertTrue(AttrsAdapter.is_item(AttrsItem(name="asdf", value=1234))) # field metadata self.assertEqual( get_field_meta_from_class(AttrsItem, "name"), MappingProxyType({"serializer": str}), ) self.assertEqual( get_field_meta_from_class(AttrsItem, "value"), MappingProxyType({"serializer": int}), ) with self.assertRaises(KeyError, msg="AttrsItem does not support field: non_existent"): get_field_meta_from_class(AttrsItem, "non_existent") @unittest.skipIf(not AttrsItem, "attrs module is not available") def test_json_schema_validators(self): import attr from attr import validators ATTRS_VERSION = Version(importlib.metadata.version("attrs")) @attr.s class ItemClass: # String with min/max length and regex pattern name: str = attr.ib( validator=[ *( validators.min_len(3) for _ in range(1) if Version("22.1.0") <= ATTRS_VERSION ), *( validators.max_len(10) for _ in range(1) if Version("21.3.0") <= ATTRS_VERSION ), validators.matches_re(r"^[A-Za-z]+$"), ], ) # Integer with minimum, maximum, exclusive minimum, exclusive maximum age: int = attr.ib( validator=[ validators.ge(18), validators.le(99), validators.gt(17), validators.lt(100), ] if Version("21.3.0") <= ATTRS_VERSION else [], ) # Enum (membership) color: str = attr.ib(validator=validators.in_(["red", "green", "blue"])) # Unsupported pattern [(?i)] year: str = attr.ib( validator=[ validators.matches_re(r"(?i)\bY\d{4}\b"), ], ) # Len limits on sequences/sets. tags: set[str] = attr.ib( validator=validators.max_len(50) if Version("21.3.0") <= ATTRS_VERSION else [], ) actual = ItemAdapter.get_json_schema(ItemClass) expected = { "type": "object", "additionalProperties": False, "properties": { "name": { "type": "string", **({"minLength": 3} if Version("22.1.0") <= ATTRS_VERSION else {}), **({"maxLength": 10} if Version("21.3.0") <= ATTRS_VERSION else {}), "pattern": "^[A-Za-z]+$", }, "age": { "type": "integer", **( { "minimum": 18, "maximum": 99, "exclusiveMinimum": 17, "exclusiveMaximum": 100, } if Version("21.3.0") <= ATTRS_VERSION else {} ), }, "color": {"type": "string", "enum": ["red", "green", "blue"]}, "year": { "type": "string", }, "tags": { "type": "array", "uniqueItems": True, "items": { "type": "string", }, **({"maxItems": 50} if Version("21.3.0") <= ATTRS_VERSION else {}), }, }, "required": ["name", "age", "color", "year", "tags"], } check_schemas(actual, expected) scrapy-itemadapter-7cce401/tests/test_adapter_dataclasses.py000066400000000000000000000047501505555760000245000ustar00rootroot00000000000000from types import MappingProxyType from unittest import TestCase from itemadapter.utils import get_field_meta_from_class from tests import ( AttrsItem, DataClassItem, PydanticModel, PydanticV1Model, ScrapyItem, ScrapySubclassedItem, ) class DataclassTestCase(TestCase): def test_false(self): from itemadapter.adapter import DataclassAdapter self.assertFalse(DataclassAdapter.is_item(int)) self.assertFalse(DataclassAdapter.is_item(sum)) self.assertFalse(DataclassAdapter.is_item(1234)) self.assertFalse(DataclassAdapter.is_item(object())) self.assertFalse(DataclassAdapter.is_item("a string")) self.assertFalse(DataclassAdapter.is_item(b"some bytes")) self.assertFalse(DataclassAdapter.is_item({"a": "dict"})) self.assertFalse(DataclassAdapter.is_item(["a", "list"])) self.assertFalse(DataclassAdapter.is_item(("a", "tuple"))) self.assertFalse(DataclassAdapter.is_item({"a", "set"})) self.assertFalse(DataclassAdapter.is_item(DataClassItem)) try: import attrs # noqa: F401 # pylint: disable=unused-import except ImportError: pass else: self.assertFalse(DataclassAdapter.is_item(AttrsItem())) if PydanticModel is not None: self.assertFalse(DataclassAdapter.is_item(PydanticModel())) if PydanticV1Model is not None: self.assertFalse(DataclassAdapter.is_item(PydanticV1Model())) try: import scrapy # noqa: F401 # pylint: disable=unused-import except ImportError: pass else: self.assertFalse(DataclassAdapter.is_item(ScrapyItem())) self.assertFalse(DataclassAdapter.is_item(ScrapySubclassedItem())) def test_true(self): from itemadapter.adapter import DataclassAdapter self.assertTrue(DataclassAdapter.is_item(DataClassItem())) self.assertTrue(DataclassAdapter.is_item(DataClassItem(name="asdf", value=1234))) # field metadata self.assertEqual( get_field_meta_from_class(DataClassItem, "name"), MappingProxyType({"serializer": str}), ) self.assertEqual( get_field_meta_from_class(DataClassItem, "value"), MappingProxyType({"serializer": int}), ) with self.assertRaises(KeyError, msg="DataClassItem does not support field: non_existent"): get_field_meta_from_class(DataClassItem, "non_existent") scrapy-itemadapter-7cce401/tests/test_adapter_pydantic.py000066400000000000000000000204411505555760000240170ustar00rootroot00000000000000import unittest from types import MappingProxyType from typing import Optional from unittest import mock from itemadapter.adapter import ItemAdapter from itemadapter.utils import get_field_meta_from_class from tests import ( AttrsItem, DataClassItem, PydanticModel, PydanticSpecialCasesModel, ScrapyItem, ScrapySubclassedItem, clear_itemadapter_imports, make_mock_import, ) from tests.test_json_schema import check_schemas class PydanticTestCase(unittest.TestCase): maxDiff = None def test_false(self): from itemadapter.adapter import PydanticAdapter self.assertFalse(PydanticAdapter.is_item(int)) self.assertFalse(PydanticAdapter.is_item(sum)) self.assertFalse(PydanticAdapter.is_item(1234)) self.assertFalse(PydanticAdapter.is_item(object())) self.assertFalse(PydanticAdapter.is_item(DataClassItem())) self.assertFalse(PydanticAdapter.is_item("a string")) self.assertFalse(PydanticAdapter.is_item(b"some bytes")) self.assertFalse(PydanticAdapter.is_item({"a": "dict"})) self.assertFalse(PydanticAdapter.is_item(["a", "list"])) self.assertFalse(PydanticAdapter.is_item(("a", "tuple"))) self.assertFalse(PydanticAdapter.is_item({"a", "set"})) self.assertFalse(PydanticAdapter.is_item(PydanticModel)) try: import attrs # noqa: F401 # pylint: disable=unused-import except ImportError: pass else: self.assertFalse(PydanticAdapter.is_item(AttrsItem())) try: import scrapy # noqa: F401 # pylint: disable=unused-import except ImportError: pass else: self.assertFalse(PydanticAdapter.is_item(ScrapyItem())) self.assertFalse(PydanticAdapter.is_item(ScrapySubclassedItem())) @unittest.skipIf(not PydanticModel, "pydantic <2 module is not available") @mock.patch("builtins.__import__", make_mock_import("pydantic")) def test_module_import_error(self): with clear_itemadapter_imports(): from itemadapter.adapter import PydanticAdapter self.assertFalse(PydanticAdapter.is_item(PydanticModel(name="asdf", value=1234))) with self.assertRaises(TypeError, msg="PydanticModel is not a valid item class"): get_field_meta_from_class(PydanticModel, "name") @unittest.skipIf(not PydanticModel, "pydantic module is not available") @mock.patch("itemadapter.utils.pydantic", None) @mock.patch("itemadapter.utils.pydantic_v1", None) def test_module_not_available(self): from itemadapter.adapter import PydanticAdapter self.assertFalse(PydanticAdapter.is_item(PydanticModel(name="asdf", value=1234))) with self.assertRaises(TypeError, msg="PydanticModel is not a valid item class"): get_field_meta_from_class(PydanticModel, "name") @unittest.skipIf(not PydanticModel, "pydantic module is not available") def test_true(self): from itemadapter.adapter import PydanticAdapter self.assertTrue(PydanticAdapter.is_item(PydanticModel())) self.assertTrue(PydanticAdapter.is_item(PydanticModel(name="asdf", value=1234))) # field metadata mapping_proxy_type = get_field_meta_from_class(PydanticModel, "name") self.assertEqual( mapping_proxy_type, MappingProxyType( { "annotation": Optional[str], "default_factory": mapping_proxy_type["default_factory"], "json_schema_extra": {"serializer": str}, "repr": True, } ), ) mapping_proxy_type = get_field_meta_from_class(PydanticModel, "value") self.assertEqual( get_field_meta_from_class(PydanticModel, "value"), MappingProxyType( { "annotation": Optional[int], "default_factory": mapping_proxy_type["default_factory"], "json_schema_extra": {"serializer": int}, "repr": True, } ), ) mapping_proxy_type = get_field_meta_from_class(PydanticSpecialCasesModel, "special_cases") self.assertEqual( mapping_proxy_type, MappingProxyType( { "annotation": Optional[int], "alias": "special_cases", "alias_priority": 2, "default_factory": mapping_proxy_type["default_factory"], "validation_alias": "special_cases", "serialization_alias": "special_cases", "frozen": True, "repr": True, } ), ) with self.assertRaises(KeyError, msg="PydanticModel does not support field: non_existent"): get_field_meta_from_class(PydanticModel, "non_existent") @unittest.skipIf(not PydanticModel, "pydantic module is not available") def test_json_schema_forbid(self): from itemadapter._imports import pydantic class Item(pydantic.BaseModel): foo: str model_config = { "extra": "forbid", } actual = ItemAdapter.get_json_schema(Item) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": {"type": "string"}, }, "required": ["foo"], } check_schemas(actual, expected) @unittest.skipIf(not PydanticModel, "pydantic module is not available") def test_json_schema_field_deprecated_bool(self): from itemadapter._imports import pydantic class Item(pydantic.BaseModel): foo: str = pydantic.Field(deprecated=True) actual = ItemAdapter.get_json_schema(Item) expected = { "type": "object", "properties": { "foo": {"type": "string", "deprecated": True}, }, "required": ["foo"], } check_schemas(actual, expected) @unittest.skipIf(not PydanticModel, "pydantic module is not available") def test_json_schema_field_deprecated_str(self): from itemadapter._imports import pydantic class Item(pydantic.BaseModel): foo: str = pydantic.Field(deprecated="Use something else") actual = ItemAdapter.get_json_schema(Item) expected = { "type": "object", "properties": { "foo": {"type": "string", "deprecated": True}, }, "required": ["foo"], } check_schemas(actual, expected) @unittest.skipIf(not PydanticModel, "pydantic module is not available") def test_json_schema_validators(self): from itemadapter._imports import pydantic class Model(pydantic.BaseModel): # String with min/max length and regex pattern name: str = pydantic.Field( min_length=3, max_length=10, pattern=r"^[A-Za-z]+$", ) # Integer with minimum, maximum, exclusive minimum, exclusive maximum age: int = pydantic.Field( ge=18, le=99, gt=17, lt=100, ) # Sequence with max_items tags: set[str] = pydantic.Field(max_length=50) actual = ItemAdapter.get_json_schema(Model) expected = { "type": "object", "properties": { "name": { "type": "string", "minLength": 3, "maxLength": 10, "pattern": "^[A-Za-z]+$", }, "age": { "type": "integer", "exclusiveMinimum": 17, "minimum": 18, "exclusiveMaximum": 100, "maximum": 99, }, "tags": { "type": "array", "uniqueItems": True, "items": { "type": "string", }, "maxItems": 50, }, }, "required": ["name", "age", "tags"], } check_schemas(actual, expected) scrapy-itemadapter-7cce401/tests/test_adapter_pydantic_v1.py000066400000000000000000000204041505555760000244240ustar00rootroot00000000000000import unittest from types import MappingProxyType from unittest import mock from itemadapter.adapter import ItemAdapter from itemadapter.utils import get_field_meta_from_class from tests import ( AttrsItem, DataClassItem, PydanticV1Model, PydanticV1SpecialCasesModel, ScrapyItem, ScrapySubclassedItem, clear_itemadapter_imports, make_mock_import, ) from tests.test_json_schema import check_schemas class PydanticTestCase(unittest.TestCase): def test_false(self): from itemadapter.adapter import PydanticAdapter self.assertFalse(PydanticAdapter.is_item(int)) self.assertFalse(PydanticAdapter.is_item(sum)) self.assertFalse(PydanticAdapter.is_item(1234)) self.assertFalse(PydanticAdapter.is_item(object())) self.assertFalse(PydanticAdapter.is_item(DataClassItem())) self.assertFalse(PydanticAdapter.is_item("a string")) self.assertFalse(PydanticAdapter.is_item(b"some bytes")) self.assertFalse(PydanticAdapter.is_item({"a": "dict"})) self.assertFalse(PydanticAdapter.is_item(["a", "list"])) self.assertFalse(PydanticAdapter.is_item(("a", "tuple"))) self.assertFalse(PydanticAdapter.is_item({"a", "set"})) self.assertFalse(PydanticAdapter.is_item(PydanticV1Model)) try: import attrs # noqa: F401 # pylint: disable=unused-import except ImportError: pass else: self.assertFalse(PydanticAdapter.is_item(AttrsItem())) try: import scrapy # noqa: F401 # pylint: disable=unused-import except ImportError: pass else: self.assertFalse(PydanticAdapter.is_item(ScrapyItem())) self.assertFalse(PydanticAdapter.is_item(ScrapySubclassedItem())) @unittest.skipIf(not PydanticV1Model, "pydantic <2 module is not available") @mock.patch("builtins.__import__", make_mock_import("pydantic")) def test_module_import_error(self): with clear_itemadapter_imports(): from itemadapter.adapter import PydanticAdapter self.assertFalse(PydanticAdapter.is_item(PydanticV1Model(name="asdf", value=1234))) with self.assertRaises(TypeError, msg="PydanticV1Model is not a valid item class"): get_field_meta_from_class(PydanticV1Model, "name") @unittest.skipIf(not PydanticV1Model, "pydantic module is not available") @mock.patch("itemadapter.utils.pydantic", None) @mock.patch("itemadapter.utils.pydantic_v1", None) def test_module_not_available(self): from itemadapter.adapter import PydanticAdapter self.assertFalse(PydanticAdapter.is_item(PydanticV1Model(name="asdf", value=1234))) with self.assertRaises(TypeError, msg="PydanticV1Model is not a valid item class"): get_field_meta_from_class(PydanticV1Model, "name") @unittest.skipIf(not PydanticV1Model, "pydantic module is not available") def test_true(self): from itemadapter.adapter import PydanticAdapter self.assertTrue(PydanticAdapter.is_item(PydanticV1Model())) self.assertTrue(PydanticAdapter.is_item(PydanticV1Model(name="asdf", value=1234))) # field metadata actual = get_field_meta_from_class(PydanticV1Model, "name") self.assertEqual( actual, MappingProxyType({"serializer": str, "default_factory": actual["default_factory"]}), ) actual = get_field_meta_from_class(PydanticV1Model, "value") self.assertEqual( actual, MappingProxyType({"serializer": int, "default_factory": actual["default_factory"]}), ) actual = get_field_meta_from_class(PydanticV1SpecialCasesModel, "special_cases") self.assertEqual( actual, MappingProxyType( { "alias": "special_cases", "allow_mutation": False, "default_factory": actual["default_factory"], } ), ) with self.assertRaises( KeyError, msg="PydanticV1Model does not support field: non_existent" ): get_field_meta_from_class(PydanticV1Model, "non_existent") @unittest.skipIf(not PydanticV1Model, "pydantic module is not available") def test_json_schema_forbid(self): from itemadapter._imports import pydantic_v1 class Item(pydantic_v1.BaseModel): foo: str class Config: extra = "forbid" actual = ItemAdapter.get_json_schema(Item) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": {"type": "string"}, }, "required": ["foo"], } check_schemas(actual, expected) @unittest.skipIf(not PydanticV1Model, "pydantic module is not available") def test_json_schema_field_deprecated_bool(self): from itemadapter._imports import pydantic_v1 class Item(pydantic_v1.BaseModel): foo: str = pydantic_v1.Field(deprecated=True) actual = ItemAdapter.get_json_schema(Item) expected = { "type": "object", "properties": { "foo": {"type": "string", "deprecated": True}, }, "required": ["foo"], } check_schemas(actual, expected) @unittest.skipIf(not PydanticV1Model, "pydantic module is not available") def test_json_schema_field_deprecated_str(self): from itemadapter._imports import pydantic_v1 class Item(pydantic_v1.BaseModel): foo: str = pydantic_v1.Field(deprecated="Use something else") actual = ItemAdapter.get_json_schema(Item) expected = { "type": "object", "properties": { "foo": {"type": "string", "deprecated": True}, }, "required": ["foo"], } check_schemas(actual, expected) @unittest.skipIf(not PydanticV1Model, "pydantic module is not available") def test_json_schema_field_default_factory(self): from itemadapter._imports import pydantic_v1 class Item(pydantic_v1.BaseModel): foo: str = pydantic_v1.Field(default_factory=lambda: "bar") actual = ItemAdapter.get_json_schema(Item) expected = { "type": "object", "properties": { "foo": {"type": "string"}, }, } check_schemas(actual, expected) @unittest.skipIf(not PydanticV1Model, "pydantic module is not available") def test_json_schema_validators(self): from itemadapter._imports import pydantic_v1 class Model(pydantic_v1.BaseModel): # String with min/max length and regex pattern name: str = pydantic_v1.Field( min_length=3, max_length=10, pattern=r"^[A-Za-z]+$", ) # Integer with minimum, maximum, exclusive minimum, exclusive maximum age1: int = pydantic_v1.Field( gt=17, lt=100, ) age2: int = pydantic_v1.Field( ge=18, le=99, ) # Sequence with max_items tags: set[str] = pydantic_v1.Field(max_items=50) actual = ItemAdapter.get_json_schema(Model) expected = { "type": "object", "properties": { "name": { "type": "string", "minLength": 3, "maxLength": 10, "pattern": "^[A-Za-z]+$", }, "age1": { "type": "integer", "exclusiveMinimum": 17, "exclusiveMaximum": 100, }, "age2": { "type": "integer", "minimum": 18, "maximum": 99, }, "tags": { "type": "array", "uniqueItems": True, "items": { "type": "string", }, "maxItems": 50, }, }, "required": ["name", "age1", "age2", "tags"], } check_schemas(actual, expected) scrapy-itemadapter-7cce401/tests/test_adapter_scrapy.py000066400000000000000000000125761505555760000235170ustar00rootroot00000000000000import unittest from types import MappingProxyType from unittest import mock from itemadapter.utils import get_field_meta_from_class from tests import ( AttrsItem, DataClassItem, PydanticModel, PydanticV1Model, ScrapyItem, ScrapySubclassedItem, clear_itemadapter_imports, make_mock_import, ) class ScrapyItemTestCase(unittest.TestCase): def test_false(self): from itemadapter.adapter import ScrapyItemAdapter self.assertFalse(ScrapyItemAdapter.is_item(int)) self.assertFalse(ScrapyItemAdapter.is_item(sum)) self.assertFalse(ScrapyItemAdapter.is_item(1234)) self.assertFalse(ScrapyItemAdapter.is_item(object())) self.assertFalse(ScrapyItemAdapter.is_item(DataClassItem())) self.assertFalse(ScrapyItemAdapter.is_item("a string")) self.assertFalse(ScrapyItemAdapter.is_item(b"some bytes")) self.assertFalse(ScrapyItemAdapter.is_item({"a": "dict"})) self.assertFalse(ScrapyItemAdapter.is_item(["a", "list"])) self.assertFalse(ScrapyItemAdapter.is_item(("a", "tuple"))) self.assertFalse(ScrapyItemAdapter.is_item({"a", "set"})) self.assertFalse(ScrapyItemAdapter.is_item(ScrapySubclassedItem)) try: import attrs # noqa: F401 # pylint: disable=unused-import except ImportError: pass else: self.assertFalse(ScrapyItemAdapter.is_item(AttrsItem())) if PydanticModel is not None: self.assertFalse(ScrapyItemAdapter.is_item(PydanticModel())) if PydanticV1Model is not None: self.assertFalse(ScrapyItemAdapter.is_item(PydanticV1Model())) @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") @mock.patch("builtins.__import__", make_mock_import("scrapy")) def test_module_import_error(self): with clear_itemadapter_imports(): from itemadapter.adapter import ScrapyItemAdapter self.assertFalse( ScrapyItemAdapter.is_item(ScrapySubclassedItem(name="asdf", value=1234)) ) with self.assertRaises( TypeError, msg="ScrapySubclassedItem is not a valid item class" ): get_field_meta_from_class(ScrapySubclassedItem, "name") @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") @mock.patch("itemadapter.adapter._scrapy_item_classes", ()) def test_module_not_available(self): from itemadapter.adapter import ScrapyItemAdapter self.assertFalse(ScrapyItemAdapter.is_item(ScrapySubclassedItem(name="asdf", value=1234))) with self.assertRaises(TypeError, msg="ScrapySubclassedItem is not a valid item class"): get_field_meta_from_class(ScrapySubclassedItem, "name") @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") def test_true(self): from itemadapter.adapter import ScrapyItemAdapter self.assertTrue(ScrapyItemAdapter.is_item(ScrapyItem())) self.assertTrue(ScrapyItemAdapter.is_item(ScrapySubclassedItem())) self.assertTrue(ScrapyItemAdapter.is_item(ScrapySubclassedItem(name="asdf", value=1234))) # field metadata self.assertEqual( get_field_meta_from_class(ScrapySubclassedItem, "name"), MappingProxyType({"serializer": str}), ) self.assertEqual( get_field_meta_from_class(ScrapySubclassedItem, "value"), MappingProxyType({"serializer": int}), ) try: import scrapy except ImportError: scrapy = None class ScrapyDeprecatedBaseItemTestCase(unittest.TestCase): """Tests for deprecated classes. These will go away once the upstream classes are removed.""" @unittest.skipIf( scrapy is None or not hasattr(scrapy.item, "_BaseItem"), "scrapy.item._BaseItem not available", ) def test_deprecated_underscore_baseitem(self): from itemadapter.adapter import ScrapyItemAdapter class SubClassed_BaseItem(scrapy.item._BaseItem): pass self.assertTrue(ScrapyItemAdapter.is_item(scrapy.item._BaseItem())) self.assertTrue(ScrapyItemAdapter.is_item(SubClassed_BaseItem())) @unittest.skipIf( scrapy is None or not hasattr(scrapy.item, "BaseItem"), "scrapy.item.BaseItem not available", ) def test_deprecated_baseitem(self): from itemadapter.adapter import ScrapyItemAdapter class SubClassedBaseItem(scrapy.item.BaseItem): pass self.assertTrue(ScrapyItemAdapter.is_item(scrapy.item.BaseItem())) self.assertTrue(ScrapyItemAdapter.is_item(SubClassedBaseItem())) @unittest.skipIf(scrapy is None, "scrapy module is not available") def test_removed_baseitem(self): """Mock the scrapy.item module so it does not contain the deprecated _BaseItem class.""" from itemadapter.adapter import ScrapyItemAdapter class MockItemModule: Item = ScrapyItem with mock.patch("scrapy.item", MockItemModule): self.assertFalse(ScrapyItemAdapter.is_item({})) self.assertEqual( get_field_meta_from_class(ScrapySubclassedItem, "name"), MappingProxyType({"serializer": str}), ) self.assertEqual( get_field_meta_from_class(ScrapySubclassedItem, "value"), MappingProxyType({"serializer": int}), ) scrapy-itemadapter-7cce401/tests/test_interface.py000066400000000000000000000174761505555760000224620ustar00rootroot00000000000000from __future__ import annotations import unittest from collections.abc import Iterator, KeysView from types import MappingProxyType from typing import Any from unittest import mock from itemadapter.adapter import AdapterInterface, ItemAdapter class AdapterInterfaceTest(unittest.TestCase): @mock.patch.multiple(AdapterInterface, __abstractmethods__=set()) def test_interface_class_methods(self): with self.assertRaises(NotImplementedError): AdapterInterface.is_item(object()) with self.assertRaises(NotImplementedError): AdapterInterface.is_item_class(object) class FakeItemClass: _fields = { "name": {"serializer": str}, "value": {"serializer": int}, } def __init__(self, **kwargs) -> None: self._values = {**kwargs} class BaseFakeItemAdapter(AdapterInterface): """An adapter that only implements the required methods.""" @classmethod def is_item_class(cls, item_class: type) -> bool: return issubclass(item_class, FakeItemClass) def __getitem__(self, field_name: str) -> Any: if field_name in self.item._fields: return self.item._values[field_name] raise KeyError(field_name) def __setitem__(self, field_name: str, value: Any) -> None: if field_name in self.item._fields: self.item._values[field_name] = value else: raise KeyError(field_name) def __delitem__(self, field_name: str) -> None: if field_name in self.item._fields and field_name in self.item._values: del self.item._values[field_name] else: raise KeyError(field_name) def __iter__(self) -> Iterator: return iter(self.item._values) def __len__(self) -> int: return len(self.item._values) class FieldNamesFakeItemAdapter(BaseFakeItemAdapter): """An adapter that also implements the field_names method.""" def field_names(self) -> KeysView: return KeysView({key.upper(): value for key, value in self.item._fields.items()}) class MetadataFakeItemAdapter(BaseFakeItemAdapter): """An adapter that also implements metadata-related methods.""" @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: return MappingProxyType(item_class._fields.get(field_name) or {}) class BaseFakeItemAdapterTest(unittest.TestCase): item_class = FakeItemClass adapter_class = BaseFakeItemAdapter def setUp(self): ItemAdapter.ADAPTER_CLASSES.appendleft(self.adapter_class) def tearDown(self): ItemAdapter.ADAPTER_CLASSES.popleft() def test_repr(self): item = self.item_class() adapter = ItemAdapter(item) self.assertEqual(repr(adapter), "") adapter["name"] = "asdf" adapter["value"] = 1234 self.assertEqual(repr(adapter), "") def test_get_set_value(self): item = self.item_class() adapter = ItemAdapter(item) self.assertEqual(adapter.get("name"), None) self.assertEqual(adapter.get("value"), None) adapter["name"] = "asdf" adapter["value"] = 1234 self.assertEqual(adapter.get("name"), "asdf") self.assertEqual(adapter.get("value"), 1234) self.assertEqual(adapter["name"], "asdf") self.assertEqual(adapter["value"], 1234) def test_get_set_value_init(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual(adapter.get("name"), "asdf") self.assertEqual(adapter.get("value"), 1234) self.assertEqual(adapter["name"], "asdf") self.assertEqual(adapter["value"], 1234) def test_get_value_keyerror(self): item = self.item_class() adapter = ItemAdapter(item) with self.assertRaises(KeyError): adapter["_undefined_"] def test_as_dict(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual({"name": "asdf", "value": 1234}, dict(adapter)) def test_set_value_keyerror(self): item = self.item_class() adapter = ItemAdapter(item) with self.assertRaises(KeyError): adapter["_undefined_"] = "some value" def test_delitem_len_iter(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual(len(adapter), 2) self.assertEqual(sorted(iter(adapter)), ["name", "value"]) del adapter["name"] self.assertEqual(len(adapter), 1) self.assertEqual(sorted(iter(adapter)), ["value"]) del adapter["value"] self.assertEqual(len(adapter), 0) self.assertEqual(sorted(iter(adapter)), []) with self.assertRaises(KeyError): del adapter["name"] with self.assertRaises(KeyError): del adapter["value"] with self.assertRaises(KeyError): del adapter["_undefined_"] def test_get_value_keyerror_item_dict(self): """Instantiate without default values.""" adapter = ItemAdapter(self.item_class()) with self.assertRaises(KeyError): adapter["name"] def test_get_field_meta(self): """Metadata is always empty for the default implementation.""" adapter = ItemAdapter(self.item_class()) self.assertEqual(adapter.get_field_meta("_undefined_"), MappingProxyType({})) self.assertEqual(adapter.get_field_meta("name"), MappingProxyType({})) self.assertEqual(adapter.get_field_meta("value"), MappingProxyType({})) def test_get_field_meta_from_class(self): """Metadata is always empty for the default implementation.""" self.assertEqual( ItemAdapter.get_field_meta_from_class(self.item_class, "_undefined_"), MappingProxyType({}), ) self.assertEqual( ItemAdapter.get_field_meta_from_class(self.item_class, "name"), MappingProxyType({}), ) self.assertEqual( ItemAdapter.get_field_meta_from_class(self.item_class, "value"), MappingProxyType({}), ) def test_field_names(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertIsInstance(adapter.field_names(), KeysView) self.assertEqual(sorted(adapter.field_names()), ["name", "value"]) class MetadataFakeItemAdapterTest(BaseFakeItemAdapterTest): item_class = FakeItemClass adapter_class = MetadataFakeItemAdapter def test_get_field_meta(self): adapter = ItemAdapter(self.item_class()) self.assertEqual(adapter.get_field_meta("_undefined_"), MappingProxyType({})) self.assertEqual(adapter.get_field_meta("name"), MappingProxyType({"serializer": str})) self.assertEqual(adapter.get_field_meta("value"), MappingProxyType({"serializer": int})) def test_get_field_meta_from_class(self): self.assertEqual( ItemAdapter.get_field_meta_from_class(self.item_class, "_undefined_"), MappingProxyType({}), ) self.assertEqual( ItemAdapter.get_field_meta_from_class(self.item_class, "name"), MappingProxyType({"serializer": str}), ) self.assertEqual( ItemAdapter.get_field_meta_from_class(self.item_class, "value"), MappingProxyType({"serializer": int}), ) class FieldNamesFakeItemAdapterTest(BaseFakeItemAdapterTest): item_class = FakeItemClass adapter_class = FieldNamesFakeItemAdapter def test_field_names(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertIsInstance(adapter.field_names(), KeysView) self.assertEqual(sorted(adapter.field_names()), ["NAME", "VALUE"]) scrapy-itemadapter-7cce401/tests/test_itemadapter.py000066400000000000000000000010121505555760000227740ustar00rootroot00000000000000import unittest from itemadapter.adapter import DictAdapter, ItemAdapter class DictOnlyItemAdapter(ItemAdapter): ADAPTER_CLASSES = [DictAdapter] class ItemAdapterTestCase(unittest.TestCase): def test_repr(self): adapter = ItemAdapter({"foo": "bar"}) self.assertEqual(repr(adapter), "") def test_repr_subclass(self): adapter = DictOnlyItemAdapter({"foo": "bar"}) self.assertEqual(repr(adapter), "") scrapy-itemadapter-7cce401/tests/test_json_schema.py000066400000000000000000000540441505555760000230030ustar00rootroot00000000000000from __future__ import annotations import json import sys import typing import unittest from collections.abc import Mapping, Sequence # noqa: TC003 from dataclasses import dataclass, field from enum import Enum from typing import Any, Optional, Union import pytest from itemadapter._imports import pydantic from itemadapter.adapter import AttrsAdapter, ItemAdapter, PydanticAdapter, ScrapyItemAdapter from tests import ( AttrsItem, AttrsItemJsonSchemaNested, DataClassItemJsonSchemaNested, PydanticModel, PydanticModelJsonSchemaNested, PydanticV1Model, PydanticV1ModelJsonSchemaNested, ScrapySubclassedItem, ScrapySubclassedItemJsonSchemaNested, ) PYTHON_VERSION = sys.version_info[:2] if ScrapySubclassedItem and AttrsItem: from scrapy import Field as ScrapyField from scrapy import Item as ScrapyItem class ScrapySubclassedItemCrossNested(ScrapyItem): nested: AttrsItemJsonSchemaNested = ScrapyField() @dataclass class Brand: name: str @dataclass class OptionalItemListNestedItem: is_nested: bool = True @dataclass class OptionalItemListItem: foo: Optional[list[OptionalItemListNestedItem]] = None @dataclass class RecursionItem: child: RecursionNestedItem sibling: RecursionItem @dataclass class RecursionNestedItem: parent: RecursionItem sibling: RecursionNestedItem @dataclass class SimpleItem: foo: str class CustomMapping: # noqa: PLW1641 def __init__(self, data): self._data = dict(data) def __getitem__(self, key): return self._data[key] def __iter__(self): return iter(self._data) def __len__(self): return len(self._data) def __contains__(self, key): return key in self._data def keys(self): return self._data.keys() def items(self): return self._data.items() def values(self): return self._data.values() def get(self, key, default=None): return self._data.get(key, default) def __eq__(self, other): if isinstance(other, CustomMapping): return self._data == other._data if isinstance(other, dict): return self._data == other return NotImplemented def __ne__(self, other): eq = self.__eq__(other) if eq is NotImplemented: return NotImplemented return not eq class SimpleEnum(Enum): foo = "foo" if PydanticModel: class PydanticEnumModel(pydantic.BaseModel): enum: SimpleEnum def check_schemas(actual, expected): """Helper function to check if the actual JSON schema matches the expected one. It uses json.dumps() WITHOUT sorting the keys, to ensure key sorting matches in both schemas as well. Maintaining the source order of fields can be important The indentation is set for better readability or mismatch output. """ assert json.dumps(actual, indent=2) == json.dumps(expected, indent=2) class JsonSchemaTestCase(unittest.TestCase): maxDiff = None @unittest.skipIf(not AttrsItem, "attrs module is not available") @unittest.skipIf(not PydanticModel, "pydantic module is not available") def test_attrs_pydantic_enum(self): """This test exists to ensure that we do not let the JSON Schema generation of Pydantic item classes generate nested $defs (which we don’t since we do not run Pydantic’s JSON Schema generation but our own).""" import attrs @attrs.define class TestAttrsItem: pydantic: PydanticEnumModel actual = ItemAdapter.get_json_schema(TestAttrsItem) expected = { "type": "object", "additionalProperties": False, "properties": { "pydantic": { "type": "object", "properties": { "enum": {"type": "string", "enum": ["foo"]}, }, "required": ["enum"], } }, "required": ["pydantic"], } check_schemas(actual, expected) @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") @unittest.skipIf( PYTHON_VERSION >= (3, 13), "It seems inspect can get the class code in Python 3.13+" ) def test_unreachable_source(self): """Using inspect to get the item class source and find attribute docstrings is not always a possibility, e.g. when the item class is defined within a (test) method. In those cases, only the extraction of those docstrings should fail.""" class ScrapySubclassedItemUnreachable(ScrapyItem): name: str = ScrapyField(json_schema_extra={"example": "Foo"}) """Display name""" actual = ItemAdapter.get_json_schema(ScrapySubclassedItemUnreachable) expected = { "type": "object", "additionalProperties": False, "properties": { "name": { "example": "Foo", "type": "string", } }, "required": ["name"], } check_schemas(actual, expected) def test_recursion(self): actual = ItemAdapter.get_json_schema(RecursionItem) expected = { "type": "object", "additionalProperties": False, "properties": { "child": { "type": "object", "additionalProperties": False, "properties": { "parent": { "type": "object", }, "sibling": { "type": "object", }, }, "required": ["parent", "sibling"], }, "sibling": { "type": "object", }, }, "required": ["child", "sibling"], } check_schemas(actual, expected) def test_nested_dict(self): @dataclass class TestItem: foo: dict actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": { "type": "object", }, }, "required": ["foo"], } check_schemas(actual, expected) def test_optional_item_list(self): actual = ItemAdapter.get_json_schema(OptionalItemListItem) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": { "anyOf": [ { "type": "null", }, { "type": "array", "items": { "type": "object", "additionalProperties": False, "properties": { "is_nested": { "type": "boolean", "default": True, }, }, }, }, ], "default": None, }, }, } check_schemas(actual, expected) def test_sequence_untyped(self): @dataclass class TestItem: foo: Sequence actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": { "type": "array", }, }, "required": ["foo"], } check_schemas(actual, expected) def test_tuple_ellipsis(self): @dataclass class TestItem: foo: tuple[Any, ...] actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": { "type": "array", }, }, "required": ["foo"], } check_schemas(actual, expected) def test_tuple_multiple_types(self): @dataclass class TestItem: foo: tuple[str, int, int] actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": { "type": "array", "items": { "type": ["integer", "string"], }, }, }, "required": ["foo"], } check_schemas(actual, expected) def test_union_single(self): @dataclass class TestItem: foo: Union[str] actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": {"type": "string"}, }, "required": ["foo"], } check_schemas(actual, expected) def test_custom_any_of(self): @dataclass class TestItem: foo: Union[str, SimpleItem] = field( metadata={"json_schema_extra": {"anyOf": []}}, ) actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": {"anyOf": []}, }, "required": ["foo"], } check_schemas(actual, expected) def test_set_untyped(self): @dataclass class TestItem: foo: set actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": {"type": "array", "uniqueItems": True}, }, "required": ["foo"], } check_schemas(actual, expected) def test_mapping_untyped(self): @dataclass class TestItem: foo: Mapping actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": {"type": "object"}, }, "required": ["foo"], } check_schemas(actual, expected) def test_custom_mapping(self): @dataclass class TestItem: foo: CustomMapping actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": {"type": "object"}, }, "required": ["foo"], } check_schemas(actual, expected) def test_item_without_attributes(self): @dataclass class TestItem: pass actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, } check_schemas(actual, expected) def test_typing_sequence_untyped(self): @dataclass class TestItem: foo: typing.Sequence actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": { "type": "array", }, }, "required": ["foo"], } check_schemas(actual, expected) def test_custom_items(self): @dataclass class TestItem: foo: typing.Sequence = field(metadata={"json_schema_extra": {"items": {}}}) actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": { "items": {}, "type": "array", }, }, "required": ["foo"], } check_schemas(actual, expected) @unittest.skipIf(not AttrsItem, "attrs module is not available") @unittest.skipIf(PYTHON_VERSION < (3, 10), "Modern optional annotations require Python 3.10+") def test_modern_optional_annotations(self): import attr @attr.define class Product: name: str """Product name""" brand: Brand | None in_stock: bool = True actual = ItemAdapter.get_json_schema(Product) expected = { "type": "object", "additionalProperties": False, "properties": { "name": {"type": "string", "description": "Product name"}, "brand": { "anyOf": [ {"type": "null"}, { "type": "object", "additionalProperties": False, "properties": {"name": {"type": "string"}}, "required": ["name"], }, ] }, "in_stock": {"type": "boolean", "default": True}, }, "required": ["name", "brand"], } check_schemas(actual, expected) def test_field_docstring_inheritance(self): """Test that field docstrings are inherited from parent classes.""" @dataclass class ParentItem: foo: str """Parent item foo""" @dataclass class ChildItem(ParentItem): pass actual = ItemAdapter.get_json_schema(ChildItem) expected = { "type": "object", "additionalProperties": False, "properties": { "foo": {"type": "string", "description": "Parent item foo"}, }, "required": ["foo"], } check_schemas(actual, expected) class CrossNestingTestCase(unittest.TestCase): """Test item nesting across different item types, with all supported types acting as parent or child in one test.""" maxDiff = None @unittest.skipIf(not PydanticV1Model, "pydantic module is not available") def test_dataclass_pydantic1(self): @dataclass class TestItem: nested: PydanticV1ModelJsonSchemaNested actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": { "nested": { "type": "object", "properties": { "is_nested": {"type": "boolean", "default": True}, }, } }, "required": ["nested"], } check_schemas(actual, expected) @unittest.skipIf(not PydanticModel, "pydantic module is not available") @unittest.skipIf(not AttrsItem, "attrs module is not available") def test_attrs_pydantic2(self): import attrs @attrs.define class TestItem: nested: PydanticModelJsonSchemaNested actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": { "nested": { "type": "object", "properties": { "is_nested": {"type": "boolean", "default": True}, }, } }, "required": ["nested"], } check_schemas(actual, expected) actual = AttrsAdapter.get_json_schema(TestItem) expected = { "type": "object", "additionalProperties": False, "properties": {"nested": {}}, "required": ["nested"], } check_schemas(actual, expected) @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") @unittest.skipIf(not AttrsItem, "attrs module is not available") def test_scrapy_attrs(self): actual = ItemAdapter.get_json_schema(ScrapySubclassedItemCrossNested) expected = { "type": "object", "additionalProperties": False, "properties": { "nested": { "type": "object", "additionalProperties": False, "properties": { "is_nested": {"type": "boolean", "default": True}, }, } }, "required": ["nested"], } check_schemas(actual, expected) actual = ScrapyItemAdapter.get_json_schema(ScrapySubclassedItemCrossNested) expected = { "type": "object", "additionalProperties": False, "properties": {"nested": {}}, "required": ["nested"], } check_schemas(actual, expected) @unittest.skipIf(not PydanticV1Model, "pydantic module is not available") @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") def test_pydantic1_scrapy(self): from . import pydantic_v1 class TestItem(pydantic_v1.BaseModel): nested: ScrapySubclassedItemJsonSchemaNested class Config: arbitrary_types_allowed = True actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "properties": { "nested": { "type": "object", "additionalProperties": False, "properties": { "is_nested": {"default": True, "type": "boolean"}, }, } }, "required": ["nested"], } check_schemas(actual, expected) actual = PydanticAdapter.get_json_schema(TestItem) expected = { "type": "object", "properties": { # Scrapy item classes implement the Mapping interface, so # they are correctly recognized as objects even when there is # no access to ScrapyItemAdapter. "nested": {"type": "object"} }, "required": ["nested"], } check_schemas(actual, expected) @unittest.skipIf(not PydanticModel, "pydantic module is not available") def test_pydantic_dataclass(self): class TestItem(pydantic.BaseModel): nested: DataClassItemJsonSchemaNested actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "properties": { "nested": { "type": "object", "additionalProperties": False, "properties": { "is_nested": {"type": "boolean", "default": True}, }, }, }, "required": ["nested"], } check_schemas(actual, expected) actual = PydanticAdapter.get_json_schema(TestItem) expected = { "type": "object", "properties": { "nested": {}, }, "required": ["nested"], } check_schemas(actual, expected) @unittest.skipIf(not PydanticModel, "pydantic module is not available") @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") def test_pydantic_scrapy(self): class TestItem(pydantic.BaseModel): nested: ScrapySubclassedItemJsonSchemaNested model_config = { "arbitrary_types_allowed": True, } actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "properties": { "nested": { "type": "object", "additionalProperties": False, "properties": { "is_nested": {"default": True, "type": "boolean"}, }, }, }, "required": ["nested"], } check_schemas(actual, expected) actual = PydanticAdapter.get_json_schema(TestItem) expected = { "type": "object", "properties": { "nested": {"type": "object"}, }, "required": ["nested"], } check_schemas(actual, expected) @unittest.skipIf(not PydanticModel, "pydantic module is not available") @pytest.mark.filterwarnings("ignore:Mixing V1 models and V2 models") def test_pydantics(self): class TestItem(pydantic.BaseModel): nested: PydanticV1ModelJsonSchemaNested actual = ItemAdapter.get_json_schema(TestItem) expected = { "type": "object", "properties": { "nested": { "type": "object", "properties": { "is_nested": {"type": "boolean", "default": True}, }, }, }, "required": ["nested"], } check_schemas(actual, expected) # Since PydanticAdapter is not version-specific, it works with both # Pydantic V1 and V2+ models. actual = PydanticAdapter.get_json_schema(TestItem) expected = { "type": "object", "properties": { "nested": { "type": "object", "properties": { "is_nested": {"type": "boolean", "default": True}, }, }, }, "required": ["nested"], } check_schemas(actual, expected) scrapy-itemadapter-7cce401/tests/test_utils.py000066400000000000000000000060301505555760000216420ustar00rootroot00000000000000import unittest from types import MappingProxyType from itemadapter import ItemAdapter from itemadapter.utils import get_field_meta_from_class, is_item from tests import ( AttrsItem, DataClassItem, PydanticV1Model, ScrapyItem, ScrapySubclassedItem, ) class FieldMetaFromClassTestCase(unittest.TestCase): def test_invalid_item_class(self): with self.assertRaises(TypeError, msg="1 is not a valid item class"): get_field_meta_from_class(1, "field") with self.assertRaises(TypeError, msg="list is not a valid item class"): get_field_meta_from_class(list, "field") def test_empty_meta_for_dict(self): class DictSubclass(dict): pass self.assertEqual(get_field_meta_from_class(DictSubclass, "name"), MappingProxyType({})) self.assertEqual(get_field_meta_from_class(dict, "name"), MappingProxyType({})) class ItemLikeTestCase(unittest.TestCase): def test_false(self): self.assertFalse(is_item(int)) self.assertFalse(is_item(sum)) self.assertFalse(is_item(1234)) self.assertFalse(is_item(object())) self.assertFalse(is_item("a string")) self.assertFalse(is_item(b"some bytes")) self.assertFalse(is_item(["a", "list"])) self.assertFalse(is_item(("a", "tuple"))) self.assertFalse(is_item({"a", "set"})) self.assertFalse(is_item(dict)) self.assertFalse(is_item(ScrapyItem)) self.assertFalse(is_item(DataClassItem)) self.assertFalse(is_item(ScrapySubclassedItem)) self.assertFalse(is_item(AttrsItem)) self.assertFalse(is_item(PydanticV1Model)) self.assertFalse(ItemAdapter.is_item_class(list)) self.assertFalse(ItemAdapter.is_item_class(int)) self.assertFalse(ItemAdapter.is_item_class(tuple)) def test_true_dict(self): self.assertTrue(is_item({"a": "dict"})) self.assertTrue(ItemAdapter.is_item_class(dict)) @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") def test_true_scrapy(self): self.assertTrue(is_item(ScrapyItem())) self.assertTrue(is_item(ScrapySubclassedItem(name="asdf", value=1234))) self.assertTrue(ItemAdapter.is_item_class(ScrapyItem)) self.assertTrue(ItemAdapter.is_item_class(ScrapySubclassedItem)) @unittest.skipIf(not DataClassItem, "dataclasses module is not available") def test_true_dataclass(self): self.assertTrue(is_item(DataClassItem(name="asdf", value=1234))) self.assertTrue(ItemAdapter.is_item_class(DataClassItem)) @unittest.skipIf(not AttrsItem, "attrs module is not available") def test_true_attrs(self): self.assertTrue(is_item(AttrsItem(name="asdf", value=1234))) self.assertTrue(ItemAdapter.is_item_class(AttrsItem)) @unittest.skipIf(not PydanticV1Model, "pydantic module is not available") def test_true_pydantic(self): self.assertTrue(is_item(PydanticV1Model(name="asdf", value=1234))) self.assertTrue(ItemAdapter.is_item_class(PydanticV1Model)) scrapy-itemadapter-7cce401/tox.ini000066400000000000000000000031611505555760000172440ustar00rootroot00000000000000[tox] envlist = min-attrs,min-pydantic,min-scrapy,min-extra,py39,py310,py311,py312,py313,attrs,pydantic1,pydantic,scrapy,extra,extra-pydantic1,pre-commit,typing,docs,twinecheck,pylint [testenv] basepython = min-attrs,min-pydantic,min-scrapy,min-extra: python3.9 deps = pytest>=5.4 pytest-cov>=2.8 packaging min-attrs,min-extra: attrs==20.1.0 min-pydantic,min-extra: pydantic==1.8 min-scrapy,min-extra: scrapy==2.2 pydantic1,extra-pydantic1: pydantic<2 extras = min-attrs,attrs,min-extra,extra,extra-pydantic1: attrs min-pydantic,pydantic1,pydantic,min-extra,extra,extra-pydantic1: pydantic min-scrapy,scrapy,min-extra,extra,extra-pydantic1: scrapy commands = pytest -vv --cov=itemadapter --cov-report=term-missing --cov-report=html --cov-report=xml {posargs: itemadapter tests} [testenv:docs] deps = {[testenv]deps} zyte-common-items extras = attrs pydantic scrapy commands = pytest -vv --cov=itemadapter --cov-report=term-missing --cov-report=html --cov-report=xml --doctest-glob=README.md {posargs:README.md} [testenv:typing] basepython = python3 deps = mypy==1.16.1 attrs==25.3.0 pydantic==2.11.7 scrapy==2.13.2 commands = mypy {posargs:itemadapter} [testenv:pylint] deps = pylint==3.3.7 pylint-per-file-ignores==1.4.0 commands = pylint {posargs:itemadapter tests} [testenv:twinecheck] basepython = python3 deps = twine==6.1.0 build==1.2.2.post1 commands = python -m build --sdist twine check dist/* [testenv:pre-commit] deps = pre-commit commands = pre-commit run --all-files --show-diff-on-failure skip_install = true