pax_global_header00006660000000000000000000000064141117176770014526gustar00rootroot0000000000000052 comment=bb722cf68165e26b20393a0b3de7c00514a22a05 itemadapter-0.4.0/000077500000000000000000000000001411171767700140265ustar00rootroot00000000000000itemadapter-0.4.0/.bumpversion.cfg000066400000000000000000000002101411171767700171270ustar00rootroot00000000000000[bumpversion] current_version = 0.4.0 commit = True tag = True [bumpversion:file:itemadapter/__init__.py] [bumpversion:file:setup.py] itemadapter-0.4.0/.editorconfig000066400000000000000000000002651411171767700165060ustar00rootroot00000000000000[*] trim_trailing_whitespace = true insert_final_newline = true indent_style = space [Makefile] indent_style = tab [*.py] indent_size = 4 charset = utf-8 [*.yml] indent_size = 2 itemadapter-0.4.0/.github/000077500000000000000000000000001411171767700153665ustar00rootroot00000000000000itemadapter-0.4.0/.github/workflows/000077500000000000000000000000001411171767700174235ustar00rootroot00000000000000itemadapter-0.4.0/.github/workflows/checks.yml000066400000000000000000000013451411171767700214110ustar00rootroot00000000000000name: Checks on: [push, pull_request] jobs: checks: runs-on: ubuntu-latest strategy: matrix: include: - python-version: 3 env: TOXENV: black - python-version: 3 env: TOXENV: bandit - python-version: 3 env: TOXENV: flake8 - python-version: 3.8 env: TOXENV: typing steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Run check env: ${{ matrix.env }} run: | pip install -U pip pip install -U tox tox itemadapter-0.4.0/.github/workflows/publish.yml000066400000000000000000000010371411171767700216150ustar00rootroot00000000000000name: Publish on: release: types: [published] jobs: publish: runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - name: Set up Python 3.8 uses: actions/setup-python@v2 with: python-version: 3.8 - name: Publish to PyPI run: | pip install --upgrade pip pip install --upgrade setuptools wheel twine python setup.py sdist bdist_wheel export TWINE_USERNAME=__token__ export TWINE_PASSWORD=${{ secrets.PYPI_TOKEN }} twine upload dist/* itemadapter-0.4.0/.github/workflows/tests.yml000066400000000000000000000017651411171767700213210ustar00rootroot00000000000000name: Tests on: [push, pull_request] jobs: tests-ubuntu: name: "Test: py${{ matrix.python-version }}, Ubuntu" runs-on: ubuntu-latest strategy: matrix: python-version: [3.6, 3.7, 3.8, 3.9] steps: - uses: actions/checkout@v2 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - name: Install tox run: pip install tox - name: Run tests run: tox -e py - name: Upload coverage report run: bash <(curl -s https://codecov.io/bash) tests-other-os: name: "Test: py3.8, ${{ matrix.os }}" runs-on: "${{ matrix.os }}" strategy: matrix: os: [macos-latest, windows-latest] steps: - uses: actions/checkout@v2 - name: Set up Python 3.8 uses: actions/setup-python@v2 with: python-version: 3.8 - name: Install tox run: pip install tox - name: Run tests run: tox -e py itemadapter-0.4.0/.gitignore000066400000000000000000000001271411171767700160160ustar00rootroot00000000000000*.pyc .~lock* .DS_Store .mypy_cache/ *.egg-info/ .tox/ .coverage htmlcov/ coverage.xml itemadapter-0.4.0/Changelog.md000066400000000000000000000042371411171767700162450ustar00rootroot00000000000000# Changelog ### 0.4.0 (2021-08-26) Added `ItemAdapter.is_item_class` and `ItemAdapter.get_field_meta_from_class` ([#54](https://github.com/scrapy/itemadapter/pull/54)) ### 0.3.0 (2021-07-15) Added built-in support for `pydantic` models ([#53](https://github.com/scrapy/itemadapter/pull/53)) ### 0.2.0 (2020-11-06) Adapter interface: added the ability to support arbitrary types, by implementing a MutableMapping-based interface. By way of this change, now any type can be used as a Scrapy item. ### 0.1.1 (2020-09-28) Dropped support for Python 3.5 (#38). The new `get_field_meta_from_class` function offers the same functionality as `ItemAdapter.get_field_meta` but for an item class, as opposed to an item object (#34, #35). `ItemAdapter.__repr__` no longer raises exceptions caused by the underlying item (#31, #41). Minor improvement to the release process (#37), and cleanup of test warnings (#40). ### 0.1.0 (2020-06-10) Added `ItemAdapter.asdict`, which allows converting an item and all of its nested items into `dict` objects (#27, #29). Improved `ItemAdapter` performance by reducing time complexity for lookups and traversals for dataclass and attrs items (#28). ### 0.0.8 (2020-05-22) `ItemAdapter.field_names` now returns a `KeysView` instead of a `list`. Minor CI and test changes. ### 0.0.7 (2020-05-22) `ItemAdapter.get_field_meta` now returns an empty `MappingProxyType` object for items without metadata support, instead of raising `TypeError`. Improved the README and some docstrings. Provided full test coverage, and refactored CI configuration, test configuration and tests themselves. ### 0.0.6 (2020-05-09) Added support for Scrapy’s `BaseItem`. Refactored and extended tests. Code style and documentation fixes. ### 0.0.5 (2020-04-28) Removed support for `MutableMapping`. ### 0.0.4 (2020-04-28) Removed metadata support for arbitrary mutable mappings. ### 0.0.3 (2020-04-27) Rebuild for the Python Package Index. ### 0.0.2 (2020-04-27) Split the implementation into several files for better code organization, and without an impact on the existing API import paths. Also improved the README. ### 0.0.1 (2020-04-25) Initial release. itemadapter-0.4.0/LICENSE000066400000000000000000000026651411171767700150440ustar00rootroot00000000000000Copyright 2020 Eugenio Lacuesta Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. itemadapter-0.4.0/MANIFEST.in000066400000000000000000000000421411171767700155600ustar00rootroot00000000000000include LICENSE include README.md itemadapter-0.4.0/README.md000066400000000000000000000364551411171767700153220ustar00rootroot00000000000000# itemadapter [![version](https://img.shields.io/pypi/v/itemadapter.svg)](https://pypi.python.org/pypi/itemadapter) [![pyversions](https://img.shields.io/pypi/pyversions/itemadapter.svg)](https://pypi.python.org/pypi/itemadapter) [![actions](https://github.com/scrapy/itemadapter/workflows/Build/badge.svg)](https://github.com/scrapy/itemadapter/actions) [![codecov](https://codecov.io/gh/scrapy/itemadapter/branch/master/graph/badge.svg)](https://codecov.io/gh/scrapy/itemadapter) The `ItemAdapter` class is a wrapper for data container objects, providing a common interface to handle objects of different types in an uniform manner, regardless of their underlying implementation. Currently supported types are: * [`scrapy.item.Item`](https://docs.scrapy.org/en/latest/topics/items.html#scrapy.item.Item) * [`dict`](https://docs.python.org/3/library/stdtypes.html#dict) * [`dataclass`](https://docs.python.org/3/library/dataclasses.html)-based classes * [`attrs`](https://www.attrs.org)-based classes * [`pydantic`](https://pydantic-docs.helpmanual.io/)-based classes Additionally, interaction with arbitrary types is supported, by implementing a pre-defined interface (see [extending `itemadapter`](#extending-itemadapter)). --- ## Requirements * Python 3.6+ * [`scrapy`](https://scrapy.org/): optional, needed to interact with `scrapy` items * `dataclasses` ([stdlib](https://docs.python.org/3/library/dataclasses.html) in Python 3.7+, or its [backport](https://pypi.org/project/dataclasses/) in Python 3.6): optional, needed to interact with `dataclass`-based items * [`attrs`](https://pypi.org/project/attrs/): optional, needed to interact with `attrs`-based items * [`pydantic`](https://pypi.org/project/pydantic/): optional, needed to interact with `pydantic`-based items --- ## Installation `itemadapter` is available on [`PyPI`](https://pypi.python.org/pypi/itemadapter), it can be installed with `pip`: ``` pip install itemadapter ``` --- ## License `itemadapter` is distributed under a [BSD-3](https://opensource.org/licenses/BSD-3-Clause) license. --- ## Basic usage The following is a simple example using a `dataclass` object. Consider the following type definition: ```python >>> from dataclasses import dataclass >>> from itemadapter import ItemAdapter, is_item >>> @dataclass ... class InventoryItem: ... name: str ... price: float ... stock: int >>> ``` An `ItemAdapter` object can be treated much like a dictionary: ```python >>> obj = InventoryItem(name='foo', price=20.5, stock=10) >>> is_item(obj) True >>> adapter = ItemAdapter(obj) >>> len(adapter) 3 >>> adapter["name"] 'foo' >>> adapter.get("price") 20.5 >>> ``` The wrapped object is modified in-place: ```python >>> adapter["name"] = "bar" >>> adapter.update({"price": 12.7, "stock": 9}) >>> adapter.item InventoryItem(name='bar', price=12.7, stock=9) >>> adapter.item is obj True >>> ``` ### Converting to dict The `ItemAdapter` class provides the `asdict` method, which converts nested items recursively. Consider the following example: ```python >>> from dataclasses import dataclass >>> from itemadapter import ItemAdapter >>> @dataclass ... class Price: ... value: int ... currency: str >>> @dataclass ... class Product: ... name: str ... price: Price >>> ``` ```python >>> item = Product("Stuff", Price(42, "UYU")) >>> adapter = ItemAdapter(item) >>> adapter.asdict() {'name': 'Stuff', 'price': {'value': 42, 'currency': 'UYU'}} >>> ``` Note that just passing an adapter object to the `dict` built-in also works, but it doesn't traverse the object recursively converting nested items: ```python >>> dict(adapter) {'name': 'Stuff', 'price': Price(value=42, currency='UYU')} >>> ``` --- ## API reference ### Built-in adapters The following adapters are included by default: * `itemadapter.adapter.ScrapyItemAdapter`: handles `Scrapy` items * `itemadapter.adapter.DictAdapter`: handles `Python` dictionaries * `itemadapter.adapter.DataclassAdapter`: handles `dataclass` objects * `itemadapter.adapter.AttrsAdapter`: handles `attrs` objects * `itemadapter.adapter.PydanticAdapter`: handles `pydantic` objects ### class `itemadapter.adapter.ItemAdapter(item: Any)` This is the main entrypoint for the package. Tipically, user code wraps an item using this class, and proceeds to handle it with the provided interface. `ItemAdapter` implements the [`MutableMapping`](https://docs.python.org/3/library/collections.abc.html#collections.abc.MutableMapping) interface, providing a `dict`-like API to manipulate data for the object it wraps (which is modified in-place). **Attributes** #### class attribute `ADAPTER_CLASSES: collections.deque` Stores the currently registered adapter classes. Being a [`collections.deque`](https://docs.python.org/3/library/collections.html#collections.deque), it supports efficient addition/deletion of adapters classes to both ends. The order in which the adapters are registered is important. When an `ItemAdapter` object is created for a specific item, the registered adapters are traversed in order and the first adapter class to return `True` for the `is_item` class method is used for all subsequent operations. The default order is the one defined in the [built-in adapters](#built-in-adapters) section. See the section on [extending itemadapter](#extending-itemadapter) for additional information. **Methods** #### class method `is_item(item: Any) -> bool` Return `True` if any of the registed adapters can handle the item (i.e. if any of them returns `True` for its `is_item` method with `item` as argument), `False` otherwise. #### class method `is_item_class(item_class: type) -> bool` Return `True` if any of the registered adapters can handle the item class (i.e. if any of them returns `True` for its `is_item_class` method with `item_class` as argument), `False` otherwise. #### class method `get_field_meta_from_class(item_class: type, field_name: str) -> MappingProxyType` Return a [`types.MappingProxyType`](https://docs.python.org/3/library/types.html#types.MappingProxyType) object, which is a read-only mapping with metadata about the given field. If the item class does not support field metadata, or there is no metadata for the given field, an empty object is returned. The returned value is taken from the following sources, depending on the item type: * [`scrapy.item.Field`](https://docs.scrapy.org/en/latest/topics/items.html#item-fields) for `scrapy.item.Item`s * [`dataclasses.field.metadata`](https://docs.python.org/3/library/dataclasses.html#dataclasses.field) for `dataclass`-based items * [`attr.Attribute.metadata`](https://www.attrs.org/en/stable/examples.html#metadata) for `attrs`-based items * [`pydantic.fields.FieldInfo`](https://pydantic-docs.helpmanual.io/usage/schema/#field-customisation) for `pydantic`-based items #### `get_field_meta(field_name: str) -> MappingProxyType` Return metadata for the given field, if available. Unless overriden in a custom adapter class, by default this method calls the adapter's `get_field_meta_from_class` method, passing the wrapped item's class. #### `field_names() -> collections.abc.KeysView` Return a [keys view](https://docs.python.org/3/library/collections.abc.html#collections.abc.KeysView) with the names of all the defined fields for the item. #### `asdict() -> dict` Return a `dict` object with the contents of the adapter. This works slightly different than calling `dict(adapter)`, because it's applied recursively to nested items (if there are any). ### function `itemadapter.utils.is_item(obj: Any) -> bool` Return `True` if the given object belongs to (at least) one of the supported types, `False` otherwise. This is an alias for `itemadapter.adapter.ItemAdapter.is_item`. ### function `itemadapter.utils.get_field_meta_from_class(item_class: type, field_name: str) -> types.MappingProxyType` Alias for `itemadapter.adapter.ItemAdapter.get_field_meta_from_class` --- ## Metadata support `scrapy.item.Item`, `dataclass`, `attrs`, and `pydantic` objects allow the definition of arbitrary field metadata. This can be accessed through a [`MappingProxyType`](https://docs.python.org/3/library/types.html#types.MappingProxyType) object, which can be retrieved from an item instance with `itemadapter.adapter.ItemAdapter.get_field_meta`, or from an item class with the `itemadapter.adapter.ItemAdapter.get_field_meta_from_class` method (or its alias `itemadapter.utils.get_field_meta_from_class`). The source of the data depends on the underlying type (see the docs for `ItemAdapter.get_field_meta_from_class`). #### `scrapy.item.Item` objects ```python >>> from scrapy.item import Item, Field >>> from itemadapter import ItemAdapter >>> class InventoryItem(Item): ... name = Field(serializer=str) ... value = Field(serializer=int, limit=100) ... >>> adapter = ItemAdapter(InventoryItem(name="foo", value=10)) >>> adapter.get_field_meta("name") mappingproxy({'serializer': }) >>> adapter.get_field_meta("value") mappingproxy({'serializer': , 'limit': 100}) >>> ``` #### `dataclass` objects ```python >>> from dataclasses import dataclass, field >>> @dataclass ... class InventoryItem: ... name: str = field(metadata={"serializer": str}) ... value: int = field(metadata={"serializer": int, "limit": 100}) ... >>> adapter = ItemAdapter(InventoryItem(name="foo", value=10)) >>> adapter.get_field_meta("name") mappingproxy({'serializer': }) >>> adapter.get_field_meta("value") mappingproxy({'serializer': , 'limit': 100}) >>> ``` #### `attrs` objects ```python >>> import attr >>> @attr.s ... class InventoryItem: ... name = attr.ib(metadata={"serializer": str}) ... value = attr.ib(metadata={"serializer": int, "limit": 100}) ... >>> adapter = ItemAdapter(InventoryItem(name="foo", value=10)) >>> adapter.get_field_meta("name") mappingproxy({'serializer': }) >>> adapter.get_field_meta("value") mappingproxy({'serializer': , 'limit': 100}) >>> ``` #### `pydantic` objects ```python >>> from pydantic import BaseModel, Field >>> class InventoryItem(BaseModel): ... name: str = Field(serializer=str) ... value: int = Field(serializer=int, limit=100) ... >>> adapter = ItemAdapter(InventoryItem(name="foo", value=10)) >>> adapter.get_field_meta("name") mappingproxy({'serializer': }) >>> adapter.get_field_meta("value") mappingproxy({'serializer': , 'limit': 100}) >>> ``` --- ## Extending `itemadapter` This package allows to handle arbitrary item classes, by implementing an adapter interface: _class `itemadapter.adapter.AdapterInterface(item: Any)`_ Abstract Base Class for adapters. An adapter that handles a specific type of item must inherit from this class and implement the abstract methods defined on it. `AdapterInterface` inherits from [`collections.abc.MutableMapping`](https://docs.python.org/3/library/collections.abc.html#collections.abc.MutableMapping), so all methods from the `MutableMapping` interface must be implemented as well. * _class method `is_item_class(cls, item_class: type) -> bool`_ Return `True` if the adapter can handle the given item class, `False` otherwise. Abstract (mandatory). * _class method `is_item(cls, item: Any) -> bool`_ Return `True` if the adapter can handle the given item, `False` otherwise. The default implementation calls `cls.is_item_class(item.__class__)`. * _class method `get_field_meta_from_class(cls, item_class: type) -> bool`_ Return metadata for the given item class and field name, if available. By default, this method returns an empty `MappingProxyType` object. Please supply your own method definition if you want to handle field metadata based on custom logic. See the [section on metadata support](#metadata-support) for additional information. * _method `get_field_meta(self, field_name: str) -> types.MappingProxyType`_ Return metadata for the given field name, if available. It's usually not necessary to override this method, since the `itemadapter.adapter.AdapterInterface` base class provides a default implementation that calls `ItemAdapter.get_field_meta_from_class` with the wrapped item's class as argument. See the [section on metadata support](#metadata-support) for additional information. * _method `field_names(self) -> collections.abc.KeysView`_: Return a [dynamic view](https://docs.python.org/3/library/collections.abc.html#collections.abc.KeysView) of the item's field names. By default, this method returns the result of calling `keys()` on the current adapter, i.e., its return value depends on the implementation of the methods from the `MutableMapping` interface (more specifically, it depends on the return value of `__iter__`). You might want to override this method if you want a way to get all fields for an item, whether or not they are populated. For instance, Scrapy uses this method to define column names when exporting items to CSV. ### Registering an adapter Add your custom adapter class to the `itemadapter.adapter.ItemAdapter.ADAPTER_CLASSES` class attribute in order to handle custom item classes: **Example** ```python >>> from itemadapter.adapter import ItemAdapter >>> from tests.test_interface import BaseFakeItemAdapter, FakeItemClass >>> >>> ItemAdapter.ADAPTER_CLASSES.appendleft(BaseFakeItemAdapter) >>> item = FakeItemClass() >>> adapter = ItemAdapter(item) >>> adapter >>> ``` --- ## More examples ### `scrapy.item.Item` objects ```python >>> from scrapy.item import Item, Field >>> from itemadapter import ItemAdapter >>> class InventoryItem(Item): ... name = Field() ... price = Field() ... >>> item = InventoryItem(name="foo", price=10) >>> adapter = ItemAdapter(item) >>> adapter.item is item True >>> adapter["name"] 'foo' >>> adapter["name"] = "bar" >>> adapter["price"] = 5 >>> item {'name': 'bar', 'price': 5} >>> ``` ### `dict` ```python >>> from itemadapter import ItemAdapter >>> item = dict(name="foo", price=10) >>> adapter = ItemAdapter(item) >>> adapter.item is item True >>> adapter["name"] 'foo' >>> adapter["name"] = "bar" >>> adapter["price"] = 5 >>> item {'name': 'bar', 'price': 5} >>> ``` ### `dataclass` objects ```python >>> from dataclasses import dataclass >>> from itemadapter import ItemAdapter >>> @dataclass ... class InventoryItem: ... name: str ... price: int ... >>> item = InventoryItem(name="foo", price=10) >>> adapter = ItemAdapter(item) >>> adapter.item is item True >>> adapter["name"] 'foo' >>> adapter["name"] = "bar" >>> adapter["price"] = 5 >>> item InventoryItem(name='bar', price=5) >>> ``` ### `attrs` objects ```python >>> import attr >>> from itemadapter import ItemAdapter >>> @attr.s ... class InventoryItem: ... name = attr.ib() ... price = attr.ib() ... >>> item = InventoryItem(name="foo", price=10) >>> adapter = ItemAdapter(item) >>> adapter.item is item True >>> adapter["name"] 'foo' >>> adapter["name"] = "bar" >>> adapter["price"] = 5 >>> item InventoryItem(name='bar', price=5) >>> ``` ### `pydantic` objects ```python >>> from pydantic import BaseModel >>> from itemadapter import ItemAdapter >>> class InventoryItem(BaseModel): ... name: str ... price: int ... >>> item = InventoryItem(name="foo", price=10) >>> adapter = ItemAdapter(item) >>> adapter.item is item True >>> adapter["name"] 'foo' >>> adapter["name"] = "bar" >>> adapter["price"] = 5 >>> item InventoryItem(name='bar', price=5) >>> ``` ## Changelog See the [full changelog](Changelog.md) itemadapter-0.4.0/itemadapter/000077500000000000000000000000001411171767700163255ustar00rootroot00000000000000itemadapter-0.4.0/itemadapter/__init__.py000066400000000000000000000002131411171767700204320ustar00rootroot00000000000000from .adapter import ItemAdapter # noqa: F401 from .utils import get_field_meta_from_class, is_item # noqa: F401 __version__ = "0.4.0" itemadapter-0.4.0/itemadapter/adapter.py000066400000000000000000000261521411171767700203250ustar00rootroot00000000000000from abc import abstractmethod, ABCMeta from collections import deque from collections.abc import KeysView, MutableMapping from types import MappingProxyType from typing import Any, Deque, Iterator, Type from itemadapter.utils import ( _get_pydantic_model_metadata, _get_scrapy_item_classes, _is_attrs_class, _is_dataclass, _is_pydantic_model, is_attrs_instance, is_dataclass_instance, is_pydantic_instance, is_scrapy_item, ) __all__ = [ "AdapterInterface", "AttrsAdapter", "DataclassAdapter", "DictAdapter", "ItemAdapter", "PydanticAdapter", "ScrapyItemAdapter", ] class AdapterInterface(MutableMapping, metaclass=ABCMeta): """Abstract Base Class for adapters. An adapter that handles a specific type of item should inherit from this class and implement the abstract methods defined here, plus the abtract methods inherited from the MutableMapping base class. """ def __init__(self, item: Any) -> None: self.item = item @classmethod @abstractmethod def is_item_class(cls, item_class: type) -> bool: """Return True if the adapter can handle the given item class, False otherwise.""" raise NotImplementedError() @classmethod def is_item(cls, item: Any) -> bool: """Return True if the adapter can handle the given item, False otherwise.""" return cls.is_item_class(item.__class__) @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: return MappingProxyType({}) def get_field_meta(self, field_name: str) -> MappingProxyType: """Return metadata for the given field name, if available.""" return self.get_field_meta_from_class(self.item.__class__, field_name) def field_names(self) -> KeysView: """Return a dynamic view of the item's field names.""" return self.keys() # type: ignore[return-value] class _MixinAttrsDataclassAdapter: _fields_dict: dict item: Any def get_field_meta(self, field_name: str) -> MappingProxyType: return self._fields_dict[field_name].metadata def field_names(self) -> KeysView: return KeysView(self._fields_dict) def __getitem__(self, field_name: str) -> Any: if field_name in self._fields_dict: return getattr(self.item, field_name) raise KeyError(field_name) def __setitem__(self, field_name: str, value: Any) -> None: if field_name in self._fields_dict: setattr(self.item, field_name, value) else: raise KeyError(f"{self.item.__class__.__name__} does not support field: {field_name}") def __delitem__(self, field_name: str) -> None: if field_name in self._fields_dict: try: delattr(self.item, field_name) except AttributeError: raise KeyError(field_name) else: raise KeyError(f"{self.item.__class__.__name__} does not support field: {field_name}") def __iter__(self) -> Iterator: return iter(attr for attr in self._fields_dict if hasattr(self.item, attr)) def __len__(self) -> int: return len(list(iter(self))) class AttrsAdapter(_MixinAttrsDataclassAdapter, AdapterInterface): def __init__(self, item: Any) -> None: super().__init__(item) import attr # store a reference to the item's fields to avoid O(n) lookups and O(n^2) traversals self._fields_dict = attr.fields_dict(self.item.__class__) @classmethod def is_item(cls, item: Any) -> bool: return is_attrs_instance(item) @classmethod def is_item_class(cls, item_class: type) -> bool: return _is_attrs_class(item_class) @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: from attr import fields_dict try: return fields_dict(item_class)[field_name].metadata # type: ignore except KeyError: raise KeyError(f"{item_class.__name__} does not support field: {field_name}") class DataclassAdapter(_MixinAttrsDataclassAdapter, AdapterInterface): def __init__(self, item: Any) -> None: super().__init__(item) import dataclasses # store a reference to the item's fields to avoid O(n) lookups and O(n^2) traversals self._fields_dict = {field.name: field for field in dataclasses.fields(self.item)} @classmethod def is_item(cls, item: Any) -> bool: return is_dataclass_instance(item) @classmethod def is_item_class(cls, item_class: type) -> bool: return _is_dataclass(item_class) @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: from dataclasses import fields for field in fields(item_class): if field.name == field_name: return field.metadata # type: ignore raise KeyError(f"{item_class.__name__} does not support field: {field_name}") class PydanticAdapter(AdapterInterface): item: Any @classmethod def is_item(cls, item: Any) -> bool: return is_pydantic_instance(item) @classmethod def is_item_class(cls, item_class: type) -> bool: return _is_pydantic_model(item_class) @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: try: return _get_pydantic_model_metadata(item_class, field_name) except KeyError: raise KeyError(f"{item_class.__name__} does not support field: {field_name}") def field_names(self) -> KeysView: return KeysView(self.item.__fields__) def __getitem__(self, field_name: str) -> Any: if field_name in self.item.__fields__: return getattr(self.item, field_name) raise KeyError(field_name) def __setitem__(self, field_name: str, value: Any) -> None: if field_name in self.item.__fields__: setattr(self.item, field_name, value) else: raise KeyError(f"{self.item.__class__.__name__} does not support field: {field_name}") def __delitem__(self, field_name: str) -> None: if field_name in self.item.__fields__: try: delattr(self.item, field_name) except AttributeError: raise KeyError(field_name) else: raise KeyError(f"{self.item.__class__.__name__} does not support field: {field_name}") def __iter__(self) -> Iterator: return iter(attr for attr in self.item.__fields__ if hasattr(self.item, attr)) def __len__(self) -> int: return len(list(iter(self))) class _MixinDictScrapyItemAdapter: _fields_dict: dict item: Any def __getitem__(self, field_name: str) -> Any: return self.item[field_name] def __setitem__(self, field_name: str, value: Any) -> None: self.item[field_name] = value def __delitem__(self, field_name: str) -> None: del self.item[field_name] def __iter__(self) -> Iterator: return iter(self.item) def __len__(self) -> int: return len(self.item) class DictAdapter(_MixinDictScrapyItemAdapter, AdapterInterface): @classmethod def is_item_class(cls, item_class: type) -> bool: return issubclass(item_class, dict) def field_names(self) -> KeysView: return KeysView(self.item) class ScrapyItemAdapter(_MixinDictScrapyItemAdapter, AdapterInterface): @classmethod def is_item(cls, item: Any) -> bool: return is_scrapy_item(item) @classmethod def is_item_class(cls, item_class: type) -> bool: return issubclass(item_class, _get_scrapy_item_classes()) @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: return MappingProxyType(item_class.fields[field_name]) # type: ignore def field_names(self) -> KeysView: return KeysView(self.item.fields) class ItemAdapter(MutableMapping): """Wrapper class to interact with data container objects. It provides a common interface to extract and set data without having to take the object's type into account. """ ADAPTER_CLASSES: Deque[Type[AdapterInterface]] = deque( [ ScrapyItemAdapter, DictAdapter, DataclassAdapter, AttrsAdapter, PydanticAdapter, ] ) def __init__(self, item: Any) -> None: for cls in self.ADAPTER_CLASSES: if cls.is_item(item): self.adapter = cls(item) break else: raise TypeError(f"No adapter found for objects of type: {type(item)} ({item})") @classmethod def is_item(cls, item: Any) -> bool: return any(adapter_class.is_item(item) for adapter_class in cls.ADAPTER_CLASSES) @classmethod def is_item_class(cls, item_class: type) -> bool: return any( adapter_class.is_item_class(item_class) for adapter_class in cls.ADAPTER_CLASSES ) @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: for adapter_class in cls.ADAPTER_CLASSES: if adapter_class.is_item_class(item_class): return adapter_class.get_field_meta_from_class(item_class, field_name) raise TypeError(f"{item_class} is not a valid item class") @property def item(self) -> Any: return self.adapter.item def __repr__(self) -> str: values = ", ".join([f"{key}={value!r}" for key, value in self.items()]) return f"" def __getitem__(self, field_name: str) -> Any: return self.adapter.__getitem__(field_name) def __setitem__(self, field_name: str, value: Any) -> None: self.adapter.__setitem__(field_name, value) def __delitem__(self, field_name: str) -> None: self.adapter.__delitem__(field_name) def __iter__(self) -> Iterator: return self.adapter.__iter__() def __len__(self) -> int: return self.adapter.__len__() def get_field_meta(self, field_name: str) -> MappingProxyType: """Return metadata for the given field name.""" return self.adapter.get_field_meta(field_name) def field_names(self) -> KeysView: """Return read-only key view with the names of all the defined fields for the item.""" return self.adapter.field_names() def asdict(self) -> dict: """Return a dict object with the contents of the adapter. This works slightly different than calling `dict(adapter)`: it's applied recursively to nested items (if there are any). """ return {key: _asdict(value) for key, value in self.items()} def _asdict(obj: Any) -> Any: """Helper for ItemAdapter.asdict().""" if isinstance(obj, dict): return {key: _asdict(value) for key, value in obj.items()} elif isinstance(obj, (list, set, tuple)): return obj.__class__(_asdict(x) for x in obj) elif isinstance(obj, ItemAdapter): return obj.asdict() elif ItemAdapter.is_item(obj): return ItemAdapter(obj).asdict() else: return obj itemadapter-0.4.0/itemadapter/utils.py000066400000000000000000000100001411171767700200260ustar00rootroot00000000000000from types import MappingProxyType from typing import Any def _get_scrapy_item_classes() -> tuple: try: import scrapy except ImportError: return () else: try: _base_item_cls = getattr(scrapy.item, "_BaseItem", scrapy.item.BaseItem) # deprecated return (scrapy.item.Item, _base_item_cls) except AttributeError: return (scrapy.item.Item,) def _is_dataclass(obj: Any) -> bool: try: import dataclasses except ImportError: return False return dataclasses.is_dataclass(obj) def _is_attrs_class(obj: Any) -> bool: try: import attr except ImportError: return False return attr.has(obj) def _is_pydantic_model(obj: Any) -> bool: try: from pydantic import BaseModel except ImportError: return False return issubclass(obj, BaseModel) def _get_pydantic_model_metadata(item_model: Any, field_name: str) -> MappingProxyType: metadata = {} field = item_model.__fields__[field_name].field_info for attr in [ "alias", "title", "description", "const", "gt", "ge", "lt", "le", "multiple_of", "min_items", "max_items", "min_length", "max_length", "regex", ]: value = getattr(field, attr) if value is not None: metadata[attr] = value if not field.allow_mutation: metadata["allow_mutation"] = field.allow_mutation metadata.update(field.extra) return MappingProxyType(metadata) def is_dataclass_instance(obj: Any) -> bool: """Return True if the given object is a dataclass object, False otherwise. In py36, this function returns False if the "dataclasses" backport is not available. Taken from https://docs.python.org/3/library/dataclasses.html#dataclasses.is_dataclass. """ return _is_dataclass(obj) and not isinstance(obj, type) def is_pydantic_instance(obj: Any) -> bool: """Return True if the given object is a Pydantic model, False otherwise.""" return _is_pydantic_model(type(obj)) and not isinstance(obj, type) def is_attrs_instance(obj: Any) -> bool: """Return True if the given object is a attrs-based object, False otherwise.""" return _is_attrs_class(obj) and not isinstance(obj, type) def is_scrapy_item(obj: Any) -> bool: """Return True if the given object is a Scrapy item, False otherwise.""" try: import scrapy except ImportError: return False if isinstance(obj, scrapy.item.Item): return True try: # handle deprecated BaseItem BaseItem = getattr(scrapy.item, "_BaseItem", scrapy.item.BaseItem) return isinstance(obj, BaseItem) except AttributeError: return False def is_item(obj: Any) -> bool: """Return True if the given object belongs to one of the supported types, False otherwise. Alias for ItemAdapter.is_item """ from itemadapter.adapter import ItemAdapter return ItemAdapter.is_item(obj) def get_field_meta_from_class(item_class: type, field_name: str) -> MappingProxyType: """Return a read-only mapping with metadata for the given field name, within the given item class. If there is no metadata for the field, or the item class does not support field metadata, an empty object is returned. Field metadata is taken from different sources, depending on the item type: * scrapy.item.Item: corresponding scrapy.item.Field object * dataclass items: "metadata" attribute for the corresponding field * attrs items: "metadata" attribute for the corresponding field * pydantic models: corresponding pydantic.field.FieldInfo/ModelField object The returned value is an instance of types.MappingProxyType, i.e. a dynamic read-only view of the original mapping, which gets automatically updated if the original mapping changes. """ from itemadapter.adapter import ItemAdapter return ItemAdapter.get_field_meta_from_class(item_class, field_name) itemadapter-0.4.0/pyproject.toml000066400000000000000000000000361411171767700167410ustar00rootroot00000000000000[tool.black] line-length = 99 itemadapter-0.4.0/pytest.ini000066400000000000000000000001351411171767700160560ustar00rootroot00000000000000[pytest] filterwarnings = ignore:.*BaseItem.*:scrapy.exceptions.ScrapyDeprecationWarning itemadapter-0.4.0/setup.cfg000066400000000000000000000000361411171767700156460ustar00rootroot00000000000000[flake8] max-line-length = 99 itemadapter-0.4.0/setup.py000066400000000000000000000021601411171767700155370ustar00rootroot00000000000000import setuptools with open("README.md", "r") as fh: long_description = fh.read() setuptools.setup( name="itemadapter", version="0.4.0", license="BSD", description="Common interface for data container classes", long_description=long_description, long_description_content_type="text/markdown", author="Eugenio Lacuesta", author_email="eugenio.lacuesta@gmail.com", url="https://github.com/scrapy/itemadapter", packages=["itemadapter"], python_requires=">=3.6", classifiers=[ "Development Status :: 3 - Alpha", "License :: OSI Approved :: BSD License", "Programming Language :: Python", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Framework :: Scrapy", "Intended Audience :: Developers", "Topic :: Internet :: WWW/HTTP", "Topic :: Software Development :: Libraries :: Application Frameworks", "Topic :: Software Development :: Libraries :: Python Modules", ], ) itemadapter-0.4.0/tests/000077500000000000000000000000001411171767700151705ustar00rootroot00000000000000itemadapter-0.4.0/tests/__init__.py000066400000000000000000000061331411171767700173040ustar00rootroot00000000000000from typing import Optional from itemadapter.adapter import ItemAdapter try: import attr except ImportError: AttrsItem = None AttrsItemNested = None AttrsItemWithoutInit = None else: @attr.s class AttrsItem: name = attr.ib(default=None, metadata={"serializer": str}) value = attr.ib(default=None, metadata={"serializer": int}) @attr.s class AttrsItemNested: nested = attr.ib(type=AttrsItem) adapter = attr.ib(type=ItemAdapter) dict_ = attr.ib(type=dict) list_ = attr.ib(type=list) set_ = attr.ib(type=set) tuple_ = attr.ib(type=tuple) int_ = attr.ib(type=int) @attr.s(init=False) class AttrsItemWithoutInit: name = attr.ib(default=None, metadata={"serializer": str}) value = attr.ib(default=None, metadata={"serializer": int}) try: from dataclasses import dataclass, field except ImportError: DataClassItem = None DataClassItemNested = None DataClassWithoutInit = None else: @dataclass class DataClassItem: name: str = field(default_factory=lambda: None, metadata={"serializer": str}) value: int = field(default_factory=lambda: None, metadata={"serializer": int}) @dataclass class DataClassItemNested: nested: DataClassItem adapter: ItemAdapter dict_: dict list_: list set_: set tuple_: tuple int_: int @dataclass(init=False) class DataClassWithoutInit: name: str = field(metadata={"serializer": str}) value: int = field(metadata={"serializer": int}) try: from pydantic import BaseModel, Field as PydanticField except ImportError: PydanticModel = None PydanticSpecialCasesModel = None PydanticModelNested = None else: class PydanticModel(BaseModel): name: Optional[str] = PydanticField( default_factory=lambda: None, serializer=str, ) value: Optional[int] = PydanticField( default_factory=lambda: None, serializer=int, ) class PydanticSpecialCasesModel(BaseModel): special_cases: Optional[int] = PydanticField( default_factory=lambda: None, alias="special_cases", allow_mutation=False, ) class Config: validate_assignment = True class PydanticModelNested(BaseModel): nested: PydanticModel adapter: ItemAdapter dict_: dict list_: list set_: set tuple_: tuple int_: int class Config: arbitrary_types_allowed = True try: from scrapy.item import Item as ScrapyItem, Field except ImportError: ScrapyItem = None ScrapySubclassedItem = None ScrapySubclassedItemNested = None else: class ScrapySubclassedItem(ScrapyItem): name = Field(serializer=str) value = Field(serializer=int) class ScrapySubclassedItemNested(ScrapyItem): nested = Field() adapter = Field() dict_ = Field() list_ = Field() set_ = Field() tuple_ = Field() int_ = Field() itemadapter-0.4.0/tests/requirements.txt000066400000000000000000000001341411171767700204520ustar00rootroot00000000000000attrs dataclasses; python_version == "3.6" pydantic pytest-cov>=2.8 pytest>=5.4 scrapy>=2.0 itemadapter-0.4.0/tests/test_adapter.py000066400000000000000000000214101411171767700202170ustar00rootroot00000000000000import unittest from types import MappingProxyType from typing import KeysView from itemadapter.adapter import ItemAdapter from tests import ( AttrsItem, AttrsItemNested, AttrsItemWithoutInit, DataClassItem, DataClassItemNested, DataClassWithoutInit, PydanticModel, PydanticModelNested, ScrapySubclassedItem, ScrapySubclassedItemNested, ) class ItemAdapterReprTestCase(unittest.TestCase): def test_repr_dict(self): item = dict(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual(repr(adapter), "") @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") def test_repr_scrapy_item(self): item = ScrapySubclassedItem(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual( repr(adapter), "" ) @unittest.skipIf(not DataClassItem, "dataclasses module is not available") def test_repr_dataclass(self): item = DataClassItem(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual( repr(adapter), "", ) @unittest.skipIf(not DataClassWithoutInit, "dataclasses module is not available") def test_repr_dataclass_init_false(self): item = DataClassWithoutInit() adapter = ItemAdapter(item) self.assertEqual(repr(adapter), "") adapter["name"] = "set after init" self.assertEqual( repr(adapter), "" ) @unittest.skipIf(not AttrsItem, "attrs module is not available") def test_repr_attrs(self): item = AttrsItem(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual( repr(adapter), "", ) @unittest.skipIf(not AttrsItemWithoutInit, "attrs module is not available") def test_repr_attrs_init_false(self): item = AttrsItemWithoutInit() adapter = ItemAdapter(item) self.assertEqual(repr(adapter), "") adapter["name"] = "set after init" self.assertEqual( repr(adapter), "" ) @unittest.skipIf(not PydanticModel, "pydantic module is not available") def test_repr_pydantic(self): item = PydanticModel(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual( repr(adapter), "", ) class ItemAdapterInitError(unittest.TestCase): def test_non_item(self): with self.assertRaises(TypeError): ItemAdapter(ScrapySubclassedItem) with self.assertRaises(TypeError): ItemAdapter(dict) with self.assertRaises(TypeError): ItemAdapter(1234) class BaseTestMixin: item_class = None item_class_nested = None def setUp(self): if self.item_class is None: raise unittest.SkipTest() def test_get_set_value(self): item = self.item_class() adapter = ItemAdapter(item) self.assertEqual(adapter.get("name"), None) self.assertEqual(adapter.get("value"), None) adapter["name"] = "asdf" adapter["value"] = 1234 self.assertEqual(adapter.get("name"), "asdf") self.assertEqual(adapter.get("value"), 1234) self.assertEqual(adapter["name"], "asdf") self.assertEqual(adapter["value"], 1234) item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual(adapter.get("name"), "asdf") self.assertEqual(adapter.get("value"), 1234) self.assertEqual(adapter["name"], "asdf") self.assertEqual(adapter["value"], 1234) def test_get_value_keyerror(self): item = self.item_class() adapter = ItemAdapter(item) with self.assertRaises(KeyError): adapter["undefined_field"] def test_as_dict(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual(dict(name="asdf", value=1234), dict(adapter)) def test_as_dict_nested(self): item = self.item_class_nested( nested=self.item_class(name="asdf", value=1234), adapter=ItemAdapter(dict(foo="bar", nested_list=[1, 2, 3, 4, 5])), dict_={"foo": "bar", "answer": 42, "nested_dict": {"a": "b"}}, list_=[1, 2, 3], set_={1, 2, 3}, tuple_=(1, 2, 3), int_=123, ) adapter = ItemAdapter(item) self.assertEqual( adapter.asdict(), dict( nested=dict(name="asdf", value=1234), adapter=dict(foo="bar", nested_list=[1, 2, 3, 4, 5]), dict_={"foo": "bar", "answer": 42, "nested_dict": {"a": "b"}}, list_=[1, 2, 3], set_={1, 2, 3}, tuple_=(1, 2, 3), int_=123, ), ) def test_field_names(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertIsInstance(adapter.field_names(), KeysView) self.assertEqual(sorted(adapter.field_names()), ["name", "value"]) class NonDictTestMixin(BaseTestMixin): def test_set_value_keyerror(self): item = self.item_class() adapter = ItemAdapter(item) with self.assertRaises(KeyError): adapter["undefined_field"] = "some value" def test_metadata_common(self): adapter = ItemAdapter(self.item_class()) self.assertIsInstance(adapter.get_field_meta("name"), MappingProxyType) self.assertIsInstance(adapter.get_field_meta("value"), MappingProxyType) with self.assertRaises(KeyError): adapter.get_field_meta("undefined_field") def test_get_field_meta_defined_fields(self): adapter = ItemAdapter(self.item_class()) self.assertEqual(adapter.get_field_meta("name"), MappingProxyType({"serializer": str})) self.assertEqual(adapter.get_field_meta("value"), MappingProxyType({"serializer": int})) def test_delitem_len_iter(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual(len(adapter), 2) self.assertEqual(sorted(list(iter(adapter))), ["name", "value"]) del adapter["name"] self.assertEqual(len(adapter), 1) self.assertEqual(sorted(list(iter(adapter))), ["value"]) del adapter["value"] self.assertEqual(len(adapter), 0) self.assertEqual(sorted(list(iter(adapter))), []) with self.assertRaises(KeyError): del adapter["name"] with self.assertRaises(KeyError): del adapter["value"] with self.assertRaises(KeyError): del adapter["undefined_field"] class DictTestCase(unittest.TestCase, BaseTestMixin): item_class = dict item_class_nested = dict def test_get_value_keyerror_item_dict(self): """Instantiate without default values.""" adapter = ItemAdapter(self.item_class()) with self.assertRaises(KeyError): adapter["name"] def test_empty_metadata(self): adapter = ItemAdapter(self.item_class(name="foo", value=5)) for field_name in ("name", "value", "undefined_field"): self.assertEqual(adapter.get_field_meta(field_name), MappingProxyType({})) def test_field_names_updated(self): item = self.item_class(name="asdf") field_names = ItemAdapter(item).field_names() self.assertEqual(sorted(field_names), ["name"]) item["value"] = 1234 self.assertEqual(sorted(field_names), ["name", "value"]) class ScrapySubclassedItemTestCase(NonDictTestMixin, unittest.TestCase): item_class = ScrapySubclassedItem item_class_nested = ScrapySubclassedItemNested def test_get_value_keyerror_item_dict(self): """Instantiate without default values.""" adapter = ItemAdapter(self.item_class()) with self.assertRaises(KeyError): adapter["name"] class PydanticModelTestCase(NonDictTestMixin, unittest.TestCase): item_class = PydanticModel item_class_nested = PydanticModelNested class DataClassItemTestCase(NonDictTestMixin, unittest.TestCase): item_class = DataClassItem item_class_nested = DataClassItemNested class AttrsItemTestCase(NonDictTestMixin, unittest.TestCase): item_class = AttrsItem item_class_nested = AttrsItemNested itemadapter-0.4.0/tests/test_interface.py000066400000000000000000000174121411171767700205460ustar00rootroot00000000000000import unittest from types import MappingProxyType from typing import Any, Iterator, KeysView from unittest import mock from itemadapter.adapter import AdapterInterface, ItemAdapter class AdapterInterfaceTest(unittest.TestCase): @mock.patch.multiple(AdapterInterface, __abstractmethods__=set()) def test_interface_class_methods(self): with self.assertRaises(NotImplementedError): AdapterInterface.is_item(object()) with self.assertRaises(NotImplementedError): AdapterInterface.is_item_class(object) class FakeItemClass: _fields = { "name": {"serializer": str}, "value": {"serializer": int}, } def __init__(self, **kwargs) -> None: self._values = {**kwargs} class BaseFakeItemAdapter(AdapterInterface): """An adapter that only implements the required methods.""" @classmethod def is_item_class(cls, item_class: type) -> bool: return issubclass(item_class, FakeItemClass) def __getitem__(self, field_name: str) -> Any: if field_name in self.item._fields: return self.item._values[field_name] else: raise KeyError(field_name) def __setitem__(self, field_name: str, value: Any) -> None: if field_name in self.item._fields: self.item._values[field_name] = value else: raise KeyError(field_name) def __delitem__(self, field_name: str) -> None: if field_name in self.item._fields and field_name in self.item._values: del self.item._values[field_name] else: raise KeyError(field_name) def __iter__(self) -> Iterator: return iter(self.item._values) def __len__(self) -> int: return len(self.item._values) class FieldNamesFakeItemAdapter(BaseFakeItemAdapter): """An adapter that also implements the field_names method.""" def field_names(self) -> KeysView: return KeysView({key.upper(): value for key, value in self.item._fields.items()}) class MetadataFakeItemAdapter(BaseFakeItemAdapter): """An adapter that also implements metadata-related methods.""" @classmethod def get_field_meta_from_class(cls, item_class: type, field_name: str) -> MappingProxyType: return MappingProxyType(item_class._fields.get(field_name) or {}) class BaseFakeItemAdapterTest(unittest.TestCase): item_class = FakeItemClass adapter_class = BaseFakeItemAdapter def setUp(self): ItemAdapter.ADAPTER_CLASSES.appendleft(self.adapter_class) def tearDown(self): ItemAdapter.ADAPTER_CLASSES.popleft() def test_repr(self): item = self.item_class() adapter = ItemAdapter(item) self.assertEqual(repr(adapter), "") adapter["name"] = "asdf" adapter["value"] = 1234 self.assertEqual(repr(adapter), "") def test_get_set_value(self): item = self.item_class() adapter = ItemAdapter(item) self.assertEqual(adapter.get("name"), None) self.assertEqual(adapter.get("value"), None) adapter["name"] = "asdf" adapter["value"] = 1234 self.assertEqual(adapter.get("name"), "asdf") self.assertEqual(adapter.get("value"), 1234) self.assertEqual(adapter["name"], "asdf") self.assertEqual(adapter["value"], 1234) def test_get_set_value_init(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual(adapter.get("name"), "asdf") self.assertEqual(adapter.get("value"), 1234) self.assertEqual(adapter["name"], "asdf") self.assertEqual(adapter["value"], 1234) def test_get_value_keyerror(self): item = self.item_class() adapter = ItemAdapter(item) with self.assertRaises(KeyError): adapter["_undefined_"] def test_as_dict(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual(dict(name="asdf", value=1234), dict(adapter)) def test_set_value_keyerror(self): item = self.item_class() adapter = ItemAdapter(item) with self.assertRaises(KeyError): adapter["_undefined_"] = "some value" def test_delitem_len_iter(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertEqual(len(adapter), 2) self.assertEqual(sorted(list(iter(adapter))), ["name", "value"]) del adapter["name"] self.assertEqual(len(adapter), 1) self.assertEqual(sorted(list(iter(adapter))), ["value"]) del adapter["value"] self.assertEqual(len(adapter), 0) self.assertEqual(sorted(list(iter(adapter))), []) with self.assertRaises(KeyError): del adapter["name"] with self.assertRaises(KeyError): del adapter["value"] with self.assertRaises(KeyError): del adapter["_undefined_"] def test_get_value_keyerror_item_dict(self): """Instantiate without default values.""" adapter = ItemAdapter(self.item_class()) with self.assertRaises(KeyError): adapter["name"] def test_get_field_meta(self): """Metadata is always empty for the default implementation.""" adapter = ItemAdapter(self.item_class()) self.assertEqual(adapter.get_field_meta("_undefined_"), MappingProxyType({})) self.assertEqual(adapter.get_field_meta("name"), MappingProxyType({})) self.assertEqual(adapter.get_field_meta("value"), MappingProxyType({})) def test_get_field_meta_from_class(self): """Metadata is always empty for the default implementation.""" self.assertEqual( ItemAdapter.get_field_meta_from_class(self.item_class, "_undefined_"), MappingProxyType({}), ) self.assertEqual( ItemAdapter.get_field_meta_from_class(self.item_class, "name"), MappingProxyType({}) ) self.assertEqual( ItemAdapter.get_field_meta_from_class(self.item_class, "value"), MappingProxyType({}) ) def test_field_names(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertIsInstance(adapter.field_names(), KeysView) self.assertEqual(sorted(adapter.field_names()), ["name", "value"]) class MetadataFakeItemAdapterTest(BaseFakeItemAdapterTest): item_class = FakeItemClass adapter_class = MetadataFakeItemAdapter def test_get_field_meta(self): adapter = ItemAdapter(self.item_class()) self.assertEqual(adapter.get_field_meta("_undefined_"), MappingProxyType({})) self.assertEqual(adapter.get_field_meta("name"), MappingProxyType({"serializer": str})) self.assertEqual(adapter.get_field_meta("value"), MappingProxyType({"serializer": int})) def test_get_field_meta_from_class(self): self.assertEqual( ItemAdapter.get_field_meta_from_class(self.item_class, "_undefined_"), MappingProxyType({}), ) self.assertEqual( ItemAdapter.get_field_meta_from_class(self.item_class, "name"), MappingProxyType({"serializer": str}), ) self.assertEqual( ItemAdapter.get_field_meta_from_class(self.item_class, "value"), MappingProxyType({"serializer": int}), ) class FieldNamesFakeItemAdapterTest(BaseFakeItemAdapterTest): item_class = FakeItemClass adapter_class = FieldNamesFakeItemAdapter def test_field_names(self): item = self.item_class(name="asdf", value=1234) adapter = ItemAdapter(item) self.assertIsInstance(adapter.field_names(), KeysView) self.assertEqual(sorted(adapter.field_names()), ["NAME", "VALUE"]) itemadapter-0.4.0/tests/test_utils.py000066400000000000000000000324171411171767700177500ustar00rootroot00000000000000import importlib import unittest from unittest import mock from types import MappingProxyType from itemadapter.utils import ( get_field_meta_from_class, is_attrs_instance, is_dataclass_instance, is_item, is_pydantic_instance, is_scrapy_item, ) from itemadapter import ItemAdapter from tests import ( AttrsItem, DataClassItem, PydanticModel, PydanticSpecialCasesModel, ScrapyItem, ScrapySubclassedItem, ) def mocked_import(name, *args, **kwargs): """Allow only internal itemadapter imports.""" if name.split(".")[0] == "itemadapter": return importlib.__import__(name, *args, **kwargs) raise ImportError(name) class FieldMetaFromClassTestCase(unittest.TestCase): def test_invalid_item_class(self): with self.assertRaises(TypeError, msg="1 is not a valid item class"): get_field_meta_from_class(1, "field") with self.assertRaises(TypeError, msg="list is not a valid item class"): get_field_meta_from_class(list, "field") def test_empty_meta_for_dict(self): class DictSubclass(dict): pass self.assertEqual(get_field_meta_from_class(DictSubclass, "name"), MappingProxyType({})) self.assertEqual(get_field_meta_from_class(dict, "name"), MappingProxyType({})) class ItemLikeTestCase(unittest.TestCase): def test_false(self): self.assertFalse(is_item(int)) self.assertFalse(is_item(sum)) self.assertFalse(is_item(1234)) self.assertFalse(is_item(object())) self.assertFalse(is_item("a string")) self.assertFalse(is_item(b"some bytes")) self.assertFalse(is_item(["a", "list"])) self.assertFalse(is_item(("a", "tuple"))) self.assertFalse(is_item({"a", "set"})) self.assertFalse(is_item(dict)) self.assertFalse(is_item(ScrapyItem)) self.assertFalse(is_item(DataClassItem)) self.assertFalse(is_item(ScrapySubclassedItem)) self.assertFalse(is_item(AttrsItem)) self.assertFalse(is_item(PydanticModel)) self.assertFalse(ItemAdapter.is_item_class(list)) self.assertFalse(ItemAdapter.is_item_class(int)) self.assertFalse(ItemAdapter.is_item_class(tuple)) def test_true_dict(self): self.assertTrue(is_item({"a": "dict"})) self.assertTrue(ItemAdapter.is_item_class(dict)) @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") def test_true_scrapy(self): self.assertTrue(is_item(ScrapyItem())) self.assertTrue(is_item(ScrapySubclassedItem(name="asdf", value=1234))) self.assertTrue(ItemAdapter.is_item_class(ScrapyItem)) self.assertTrue(ItemAdapter.is_item_class(ScrapySubclassedItem)) @unittest.skipIf(not DataClassItem, "dataclasses module is not available") def test_true_dataclass(self): self.assertTrue(is_item(DataClassItem(name="asdf", value=1234))) self.assertTrue(ItemAdapter.is_item_class(DataClassItem)) @unittest.skipIf(not AttrsItem, "attrs module is not available") def test_true_attrs(self): self.assertTrue(is_item(AttrsItem(name="asdf", value=1234))) self.assertTrue(ItemAdapter.is_item_class(AttrsItem)) @unittest.skipIf(not PydanticModel, "pydantic module is not available") def test_true_pydantic(self): self.assertTrue(is_item(PydanticModel(name="asdf", value=1234))) self.assertTrue(ItemAdapter.is_item_class(PydanticModel)) class AttrsTestCase(unittest.TestCase): def test_false(self): self.assertFalse(is_attrs_instance(int)) self.assertFalse(is_attrs_instance(sum)) self.assertFalse(is_attrs_instance(1234)) self.assertFalse(is_attrs_instance(object())) self.assertFalse(is_attrs_instance(ScrapyItem())) self.assertFalse(is_attrs_instance(DataClassItem())) self.assertFalse(is_attrs_instance(PydanticModel())) self.assertFalse(is_attrs_instance(ScrapySubclassedItem())) self.assertFalse(is_attrs_instance("a string")) self.assertFalse(is_attrs_instance(b"some bytes")) self.assertFalse(is_attrs_instance({"a": "dict"})) self.assertFalse(is_attrs_instance(["a", "list"])) self.assertFalse(is_attrs_instance(("a", "tuple"))) self.assertFalse(is_attrs_instance({"a", "set"})) self.assertFalse(is_attrs_instance(AttrsItem)) @unittest.skipIf(not AttrsItem, "attrs module is not available") @mock.patch("builtins.__import__", mocked_import) def test_module_not_available(self): self.assertFalse(is_attrs_instance(AttrsItem(name="asdf", value=1234))) with self.assertRaises(TypeError, msg="AttrsItem is not a valid item class"): get_field_meta_from_class(AttrsItem, "name") @unittest.skipIf(not AttrsItem, "attrs module is not available") def test_true(self): self.assertTrue(is_attrs_instance(AttrsItem())) self.assertTrue(is_attrs_instance(AttrsItem(name="asdf", value=1234))) # field metadata self.assertEqual( get_field_meta_from_class(AttrsItem, "name"), MappingProxyType({"serializer": str}) ) self.assertEqual( get_field_meta_from_class(AttrsItem, "value"), MappingProxyType({"serializer": int}) ) with self.assertRaises(KeyError, msg="AttrsItem does not support field: non_existent"): get_field_meta_from_class(AttrsItem, "non_existent") class DataclassTestCase(unittest.TestCase): def test_false(self): self.assertFalse(is_dataclass_instance(int)) self.assertFalse(is_dataclass_instance(sum)) self.assertFalse(is_dataclass_instance(1234)) self.assertFalse(is_dataclass_instance(object())) self.assertFalse(is_dataclass_instance(ScrapyItem())) self.assertFalse(is_dataclass_instance(AttrsItem())) self.assertFalse(is_dataclass_instance(PydanticModel())) self.assertFalse(is_dataclass_instance(ScrapySubclassedItem())) self.assertFalse(is_dataclass_instance("a string")) self.assertFalse(is_dataclass_instance(b"some bytes")) self.assertFalse(is_dataclass_instance({"a": "dict"})) self.assertFalse(is_dataclass_instance(["a", "list"])) self.assertFalse(is_dataclass_instance(("a", "tuple"))) self.assertFalse(is_dataclass_instance({"a", "set"})) self.assertFalse(is_dataclass_instance(DataClassItem)) @unittest.skipIf(not DataClassItem, "dataclasses module is not available") @mock.patch("builtins.__import__", mocked_import) def test_module_not_available(self): self.assertFalse(is_dataclass_instance(DataClassItem(name="asdf", value=1234))) with self.assertRaises(TypeError, msg="DataClassItem is not a valid item class"): get_field_meta_from_class(DataClassItem, "name") @unittest.skipIf(not DataClassItem, "dataclasses module is not available") def test_true(self): self.assertTrue(is_dataclass_instance(DataClassItem())) self.assertTrue(is_dataclass_instance(DataClassItem(name="asdf", value=1234))) # field metadata self.assertEqual( get_field_meta_from_class(DataClassItem, "name"), MappingProxyType({"serializer": str}) ) self.assertEqual( get_field_meta_from_class(DataClassItem, "value"), MappingProxyType({"serializer": int}), ) with self.assertRaises(KeyError, msg="DataClassItem does not support field: non_existent"): get_field_meta_from_class(DataClassItem, "non_existent") class PydanticTestCase(unittest.TestCase): def test_false(self): self.assertFalse(is_pydantic_instance(int)) self.assertFalse(is_pydantic_instance(sum)) self.assertFalse(is_pydantic_instance(1234)) self.assertFalse(is_pydantic_instance(object())) self.assertFalse(is_pydantic_instance(ScrapyItem())) self.assertFalse(is_pydantic_instance(AttrsItem())) self.assertFalse(is_pydantic_instance(DataClassItem())) self.assertFalse(is_pydantic_instance(ScrapySubclassedItem())) self.assertFalse(is_pydantic_instance("a string")) self.assertFalse(is_pydantic_instance(b"some bytes")) self.assertFalse(is_pydantic_instance({"a": "dict"})) self.assertFalse(is_pydantic_instance(["a", "list"])) self.assertFalse(is_pydantic_instance(("a", "tuple"))) self.assertFalse(is_pydantic_instance({"a", "set"})) self.assertFalse(is_pydantic_instance(PydanticModel)) @unittest.skipIf(not PydanticModel, "pydantic module is not available") @mock.patch("builtins.__import__", mocked_import) def test_module_not_available(self): self.assertFalse(is_pydantic_instance(PydanticModel(name="asdf", value=1234))) with self.assertRaises(TypeError, msg="PydanticModel is not a valid item class"): get_field_meta_from_class(PydanticModel, "name") @unittest.skipIf(not PydanticModel, "pydantic module is not available") def test_true(self): self.assertTrue(is_pydantic_instance(PydanticModel())) self.assertTrue(is_pydantic_instance(PydanticModel(name="asdf", value=1234))) # field metadata self.assertEqual( get_field_meta_from_class(PydanticModel, "name"), MappingProxyType({"serializer": str}), ) self.assertEqual( get_field_meta_from_class(PydanticModel, "value"), MappingProxyType({"serializer": int}), ) self.assertEqual( get_field_meta_from_class(PydanticSpecialCasesModel, "special_cases"), MappingProxyType({"alias": "special_cases", "allow_mutation": False}), ) with self.assertRaises(KeyError, msg="PydanticModel does not support field: non_existent"): get_field_meta_from_class(PydanticModel, "non_existent") class ScrapyItemTestCase(unittest.TestCase): def test_false(self): self.assertFalse(is_scrapy_item(int)) self.assertFalse(is_scrapy_item(sum)) self.assertFalse(is_scrapy_item(1234)) self.assertFalse(is_scrapy_item(object())) self.assertFalse(is_scrapy_item(AttrsItem())) self.assertFalse(is_scrapy_item(DataClassItem())) self.assertFalse(is_scrapy_item(PydanticModel())) self.assertFalse(is_scrapy_item("a string")) self.assertFalse(is_scrapy_item(b"some bytes")) self.assertFalse(is_scrapy_item({"a": "dict"})) self.assertFalse(is_scrapy_item(["a", "list"])) self.assertFalse(is_scrapy_item(("a", "tuple"))) self.assertFalse(is_scrapy_item({"a", "set"})) self.assertFalse(is_scrapy_item(ScrapySubclassedItem)) @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") @mock.patch("builtins.__import__", mocked_import) def test_module_not_available(self): self.assertFalse(is_scrapy_item(ScrapySubclassedItem(name="asdf", value=1234))) with self.assertRaises(TypeError, msg="ScrapySubclassedItem is not a valid item class"): get_field_meta_from_class(ScrapySubclassedItem, "name") @unittest.skipIf(not ScrapySubclassedItem, "scrapy module is not available") def test_true(self): self.assertTrue(is_scrapy_item(ScrapyItem())) self.assertTrue(is_scrapy_item(ScrapySubclassedItem())) self.assertTrue(is_scrapy_item(ScrapySubclassedItem(name="asdf", value=1234))) # field metadata self.assertEqual( get_field_meta_from_class(ScrapySubclassedItem, "name"), MappingProxyType({"serializer": str}), ) self.assertEqual( get_field_meta_from_class(ScrapySubclassedItem, "value"), MappingProxyType({"serializer": int}), ) try: import scrapy except ImportError: scrapy = None class ScrapyDeprecatedBaseItemTestCase(unittest.TestCase): """Tests for deprecated classes. These will go away once the upstream classes are removed.""" @unittest.skipIf( scrapy is None or not hasattr(scrapy.item, "_BaseItem"), "scrapy.item._BaseItem not available", ) def test_deprecated_underscore_baseitem(self): class SubClassed_BaseItem(scrapy.item._BaseItem): pass self.assertTrue(is_scrapy_item(scrapy.item._BaseItem())) self.assertTrue(is_scrapy_item(SubClassed_BaseItem())) @unittest.skipIf( scrapy is None or not hasattr(scrapy.item, "BaseItem"), "scrapy.item.BaseItem not available", ) def test_deprecated_baseitem(self): class SubClassedBaseItem(scrapy.item.BaseItem): pass self.assertTrue(is_scrapy_item(scrapy.item.BaseItem())) self.assertTrue(is_scrapy_item(SubClassedBaseItem())) @unittest.skipIf(scrapy is None, "scrapy module is not available") def test_removed_baseitem(self): """Mock the scrapy.item module so it does not contain the deprecated _BaseItem class.""" class MockItemModule: Item = ScrapyItem with mock.patch("scrapy.item", MockItemModule): self.assertFalse(is_scrapy_item(dict())) self.assertEqual( get_field_meta_from_class(ScrapySubclassedItem, "name"), MappingProxyType({"serializer": str}), ) self.assertEqual( get_field_meta_from_class(ScrapySubclassedItem, "value"), MappingProxyType({"serializer": int}), ) itemadapter-0.4.0/tox.ini000066400000000000000000000014431411171767700153430ustar00rootroot00000000000000[tox] envlist = bandit,flake8,typing,black,py [testenv] deps = -rtests/requirements.txt commands = pytest --verbose --cov=itemadapter --cov-report=term-missing --cov-report=html --cov-report=xml --doctest-glob=README.md {posargs: itemadapter README.md tests} [testenv:bandit] basepython = python3 deps = bandit commands = bandit -r {posargs:itemadapter} [testenv:flake8] basepython = python3 deps = flake8>=3.7.9 commands = flake8 --exclude=.git,.tox,venv* {posargs:itemadapter tests} [testenv:typing] basepython = python3 deps = mypy==0.770 commands = mypy --show-error-codes --ignore-missing-imports --follow-imports=skip {posargs:itemadapter} [testenv:black] basepython = python3 deps = black>=19.10b0 commands = black --check {posargs:itemadapter tests}