pax_global_header 0000666 0000000 0000000 00000000064 15031251302 0014502 g ustar 00root root 0000000 0000000 52 comment=155a504f24b9dd180b8786a4fa709f87cfe8fb66 scrapy-2.13.3/ 0000775 0000000 0000000 00000000000 15031251302 0013071 5 ustar 00root root 0000000 0000000 scrapy-2.13.3/.git-blame-ignore-revs 0000664 0000000 0000000 00000000501 15031251302 0017165 0 ustar 00root root 0000000 0000000 # .git-blame-ignore-revs # adding black formatter to all the code e211ec0aa26ecae0da8ae55d064ea60e1efe4d0d # reapplying black to the code with default line length 303f0a70fcf8067adf0a909c2096a5009162383a # reapplying black again and removing line length on pre-commit black config c5cdd0d30ceb68ccba04af0e71d1b8e6678e2962 scrapy-2.13.3/.gitattributes 0000664 0000000 0000000 00000000034 15031251302 0015761 0 ustar 00root root 0000000 0000000 tests/sample_data/** binary scrapy-2.13.3/.github/ 0000775 0000000 0000000 00000000000 15031251302 0014431 5 ustar 00root root 0000000 0000000 scrapy-2.13.3/.github/ISSUE_TEMPLATE/ 0000775 0000000 0000000 00000000000 15031251302 0016614 5 ustar 00root root 0000000 0000000 scrapy-2.13.3/.github/ISSUE_TEMPLATE/bug_report.md 0000664 0000000 0000000 00000002763 15031251302 0021316 0 ustar 00root root 0000000 0000000 --- name: Bug report about: Report a problem to help us improve --- ### Description [Description of the issue] ### Steps to Reproduce 1. [First Step] 2. [Second Step] 3. [and so on...] **Expected behavior:** [What you expect to happen] **Actual behavior:** [What actually happens] **Reproduces how often:** [What percentage of the time does it reproduce?] ### Versions Please paste here the output of executing `scrapy version --verbose` in the command line. ### Additional context Any additional information, configuration, data or output from commands that might be necessary to reproduce or understand the issue. Please try not to include screenshots of code or the command line, paste the contents as text instead. You can use [GitHub Flavored Markdown](https://help.github.com/en/articles/creating-and-highlighting-code-blocks) to make the text look better. scrapy-2.13.3/.github/ISSUE_TEMPLATE/feature_request.md 0000664 0000000 0000000 00000002370 15031251302 0022343 0 ustar 00root root 0000000 0000000 --- name: Feature request about: Suggest an idea for an enhancement or new feature --- ## Summary One paragraph explanation of the feature. ## Motivation Why are we doing this? What use cases does it support? What is the expected outcome? ## Describe alternatives you've considered A clear and concise description of the alternative solutions you've considered. Be sure to explain why Scrapy's existing customizability isn't suitable for this feature. ## Additional context Any additional information about the feature request here. scrapy-2.13.3/.github/ISSUE_TEMPLATE/question.md 0000664 0000000 0000000 00000000644 15031251302 0021011 0 ustar 00root root 0000000 0000000 --- name: Question / Help about: Ask a question about Scrapy or ask for help with your Scrapy code. --- Thanks for taking an interest in Scrapy! The Scrapy GitHub issue tracker is not meant for questions or help. Please ask for help in the [Scrapy community resources](https://scrapy.org/community/) instead. The GitHub issue tracker's purpose is to deal with bug reports and feature requests for the project itself. scrapy-2.13.3/.github/workflows/ 0000775 0000000 0000000 00000000000 15031251302 0016466 5 ustar 00root root 0000000 0000000 scrapy-2.13.3/.github/workflows/checks.yml 0000664 0000000 0000000 00000002161 15031251302 0020451 0 ustar 00root root 0000000 0000000 name: Checks on: push: branches: - master - '[0-9]+.[0-9]+' pull_request: concurrency: group: ${{github.workflow}}-${{ github.ref }} cancel-in-progress: true jobs: checks: runs-on: ubuntu-latest strategy: fail-fast: false matrix: include: - python-version: "3.13" env: TOXENV: pylint - python-version: "3.9" env: TOXENV: typing - python-version: "3.9" env: TOXENV: typing-tests - python-version: "3.13" # Keep in sync with .readthedocs.yml env: TOXENV: docs - python-version: "3.13" env: TOXENV: twinecheck steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Run check env: ${{ matrix.env }} run: | pip install -U tox tox pre-commit: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: pre-commit/action@v3.0.1 scrapy-2.13.3/.github/workflows/publish.yml 0000664 0000000 0000000 00000001206 15031251302 0020656 0 ustar 00root root 0000000 0000000 name: Publish on: push: tags: - '[0-9]+.[0-9]+.[0-9]+' concurrency: group: ${{github.workflow}}-${{ github.ref }} cancel-in-progress: true jobs: publish: name: Upload release to PyPI runs-on: ubuntu-latest environment: name: pypi url: https://pypi.org/p/Scrapy permissions: id-token: write steps: - uses: actions/checkout@v4 - uses: actions/setup-python@v5 with: python-version: "3.13" - run: | python -m pip install --upgrade build python -m build - name: Publish to PyPI uses: pypa/gh-action-pypi-publish@release/v1 scrapy-2.13.3/.github/workflows/tests-macos.yml 0000664 0000000 0000000 00000001435 15031251302 0021456 0 ustar 00root root 0000000 0000000 name: macOS on: push: branches: - master - '[0-9]+.[0-9]+' pull_request: concurrency: group: ${{github.workflow}}-${{ github.ref }} cancel-in-progress: true jobs: tests: runs-on: macos-latest strategy: fail-fast: false matrix: python-version: ["3.9", "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Run tests run: | pip install -U tox tox -e py - name: Upload coverage report uses: codecov/codecov-action@v5 - name: Upload test results if: ${{ !cancelled() }} uses: codecov/test-results-action@v1 scrapy-2.13.3/.github/workflows/tests-ubuntu.yml 0000664 0000000 0000000 00000004352 15031251302 0021677 0 ustar 00root root 0000000 0000000 name: Ubuntu on: push: branches: - master - '[0-9]+.[0-9]+' pull_request: concurrency: group: ${{github.workflow}}-${{ github.ref }} cancel-in-progress: true jobs: tests: runs-on: ubuntu-latest strategy: fail-fast: false matrix: include: - python-version: "3.9" env: TOXENV: py - python-version: "3.10" env: TOXENV: py - python-version: "3.11" env: TOXENV: py - python-version: "3.12" env: TOXENV: py - python-version: "3.13" env: TOXENV: py - python-version: "3.13" env: TOXENV: default-reactor - python-version: pypy3.10 env: TOXENV: pypy3 - python-version: pypy3.11 env: TOXENV: pypy3 # pinned deps - python-version: "3.9.21" env: TOXENV: pinned - python-version: "3.9.21" env: TOXENV: default-reactor-pinned - python-version: pypy3.10 env: TOXENV: pypy3-pinned - python-version: "3.9.21" env: TOXENV: extra-deps-pinned - python-version: "3.9.21" env: TOXENV: botocore-pinned - python-version: "3.13" env: TOXENV: extra-deps - python-version: pypy3.11 env: TOXENV: pypy3-extra-deps - python-version: "3.13" env: TOXENV: botocore steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install system libraries if: contains(matrix.python-version, 'pypy') || contains(matrix.env.TOXENV, 'pinned') run: | sudo apt-get update sudo apt-get install libxml2-dev libxslt-dev - name: Run tests env: ${{ matrix.env }} run: | pip install -U tox tox - name: Upload coverage report uses: codecov/codecov-action@v5 - name: Upload test results if: ${{ !cancelled() }} uses: codecov/test-results-action@v1 scrapy-2.13.3/.github/workflows/tests-windows.yml 0000664 0000000 0000000 00000002713 15031251302 0022046 0 ustar 00root root 0000000 0000000 name: Windows on: push: branches: - master - '[0-9]+.[0-9]+' pull_request: concurrency: group: ${{github.workflow}}-${{ github.ref }} cancel-in-progress: true jobs: tests: runs-on: windows-latest strategy: fail-fast: false matrix: include: - python-version: "3.9" env: TOXENV: py - python-version: "3.10" env: TOXENV: py - python-version: "3.11" env: TOXENV: py - python-version: "3.12" env: TOXENV: py - python-version: "3.13" env: TOXENV: py - python-version: "3.13" env: TOXENV: default-reactor # pinned deps - python-version: "3.9.13" env: TOXENV: pinned - python-version: "3.9.13" env: TOXENV: extra-deps-pinned - python-version: "3.13" env: TOXENV: extra-deps steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Run tests env: ${{ matrix.env }} run: | pip install -U tox tox - name: Upload coverage report uses: codecov/codecov-action@v5 - name: Upload test results if: ${{ !cancelled() }} uses: codecov/test-results-action@v1 scrapy-2.13.3/.gitignore 0000664 0000000 0000000 00000000513 15031251302 0015060 0 ustar 00root root 0000000 0000000 /.vagrant /scrapy.iml *.pyc _trial_temp* dropin.cache docs/build *egg-info .tox/ venv/ .venv/ build/ dist/ .idea/ .vscode/ htmlcov/ .pytest_cache/ .coverage .coverage.* coverage.* *.junit.xml test-output.* .cache/ .mypy_cache/ /tests/keys/localhost.crt /tests/keys/localhost.key # Windows Thumbs.db # OSX miscellaneous .DS_Store scrapy-2.13.3/.pre-commit-config.yaml 0000664 0000000 0000000 00000000611 15031251302 0017350 0 ustar 00root root 0000000 0000000 repos: - repo: https://github.com/astral-sh/ruff-pre-commit rev: v0.9.3 hooks: - id: ruff args: [ --fix ] - id: ruff-format - repo: https://github.com/adamchainz/blacken-docs rev: 1.19.1 hooks: - id: blacken-docs additional_dependencies: - black==24.10.0 - repo: https://github.com/pre-commit/pre-commit-hooks rev: v5.0.0 hooks: - id: trailing-whitespace scrapy-2.13.3/.readthedocs.yml 0000664 0000000 0000000 00000000601 15031251302 0016154 0 ustar 00root root 0000000 0000000 version: 2 formats: all sphinx: configuration: docs/conf.py fail_on_warning: true build: os: ubuntu-24.04 tools: # For available versions, see: # https://docs.readthedocs.io/en/stable/config-file/v2.html#build-tools-python python: "3.13" # Keep in sync with .github/workflows/checks.yml python: install: - requirements: docs/requirements.txt - path: . scrapy-2.13.3/AUTHORS 0000664 0000000 0000000 00000002404 15031251302 0014141 0 ustar 00root root 0000000 0000000 Scrapy was brought to life by Shane Evans while hacking a scraping framework prototype for Mydeco (mydeco.com). It soon became maintained, extended and improved by Insophia (insophia.com), with the initial sponsorship of Mydeco to bootstrap the project. In mid-2011, Scrapinghub (now Zyte) became the new official maintainer. Here is the list of the primary authors & contributors: * Pablo Hoffman * Daniel Graña * Martin Olveyra * Gabriel García * Michael Cetrulo * Artem Bogomyagkov * Damian Canabal * Andres Moreira * Ismael Carnales * Matías Aguirre * German Hoffmann * Anibal Pacheco * Bruno Deferrari * Shane Evans * Ezequiel Rivero * Patrick Mezard * Rolando Espinoza * Ping Yin * Lucian Ursu * Shuaib Khan * Didier Deshommes * Vikas Dhiman * Jochen Maes * Darian Moody * Jordi Lonch * Zuhao Wan * Steven Almeroth * Tom Mortimer-Jones * Chris Tilden * Alexandr N Zamaraev * Emanuel Schorsch * Michal Danilak * Natan Lao * Hasnain Lakhani * Pedro Faustino * Alex Cepoi * Ilya Baryshev * Libor Nenadál * Jae-Myoung Yu * Vladislav Poluhin * Marc Abramowitz * Valentin-Costel Hăloiu * Jason Yeo * Сергей Прохоров * Simon Ratne * Julien Duponchelle * Jochen Maes * Vikas Dhiman * Juan Picca * Nicolás Ramírez scrapy-2.13.3/CODE_OF_CONDUCT.md 0000664 0000000 0000000 00000012555 15031251302 0015700 0 ustar 00root root 0000000 0000000 # Contributor Covenant Code of Conduct ## Our Pledge We as members, contributors, and leaders pledge to make participation in our community a harassment-free experience for everyone, regardless of age, body size, visible or invisible disability, ethnicity, sex characteristics, gender identity and expression, level of experience, education, socio-economic status, nationality, personal appearance, race, caste, color, religion, or sexual identity and orientation. We pledge to act and interact in ways that contribute to an open, welcoming, diverse, inclusive, and healthy community. ## Our Standards Examples of behavior that contributes to a positive environment for our community include: * Demonstrating empathy and kindness toward other people * Being respectful of differing opinions, viewpoints, and experiences * Giving and gracefully accepting constructive feedback * Accepting responsibility and apologizing to those affected by our mistakes, and learning from the experience * Focusing on what is best not just for us as individuals, but for the overall community Examples of unacceptable behavior include: * The use of sexualized language or imagery, and sexual attention or advances of any kind * Trolling, insulting or derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or email address, without their explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Enforcement Responsibilities Community leaders are responsible for clarifying and enforcing our standards of acceptable behavior and will take appropriate and fair corrective action in response to any behavior that they deem inappropriate, threatening, offensive, or harmful. Community leaders have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, and will communicate reasons for moderation decisions when appropriate. ## Scope This Code of Conduct applies within all community spaces, and also applies when an individual is officially representing the community in public spaces. Examples of representing our community include using an official e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported to the community leaders responsible for enforcement at opensource@zyte.com. All complaints will be reviewed and investigated promptly and fairly. All community leaders are obligated to respect the privacy and security of the reporter of any incident. ## Enforcement Guidelines Community leaders will follow these Community Impact Guidelines in determining the consequences for any action they deem in violation of this Code of Conduct: ### 1. Correction **Community Impact**: Use of inappropriate language or other behavior deemed unprofessional or unwelcome in the community. **Consequence**: A private, written warning from community leaders, providing clarity around the nature of the violation and an explanation of why the behavior was inappropriate. A public apology may be requested. ### 2. Warning **Community Impact**: A violation through a single incident or series of actions. **Consequence**: A warning with consequences for continued behavior. No interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, for a specified period of time. This includes avoiding interactions in community spaces as well as external channels like social media. Violating these terms may lead to a temporary or permanent ban. ### 3. Temporary Ban **Community Impact**: A serious violation of community standards, including sustained inappropriate behavior. **Consequence**: A temporary ban from any sort of interaction or public communication with the community for a specified period of time. No public or private interaction with the people involved, including unsolicited interaction with those enforcing the Code of Conduct, is allowed during this period. Violating these terms may lead to a permanent ban. ### 4. Permanent Ban **Community Impact**: Demonstrating a pattern of violation of community standards, including sustained inappropriate behavior, harassment of an individual, or aggression toward or disparagement of classes of individuals. **Consequence**: A permanent ban from any sort of public interaction within the community. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 2.1, available at [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. Community Impact Guidelines were inspired by [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. For answers to common questions about this code of conduct, see the FAQ at [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at [https://www.contributor-covenant.org/translations][translations]. [homepage]: https://www.contributor-covenant.org [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html [Mozilla CoC]: https://github.com/mozilla/diversity [FAQ]: https://www.contributor-covenant.org/faq [translations]: https://www.contributor-covenant.org/translations scrapy-2.13.3/CONTRIBUTING.md 0000664 0000000 0000000 00000000457 15031251302 0015330 0 ustar 00root root 0000000 0000000 The guidelines for contributing are available here: https://docs.scrapy.org/en/master/contributing.html Please do not abuse the issue tracker for support questions. If your issue topic can be rephrased to "How to ...?", please use the support channels to get it answered: https://scrapy.org/community/ scrapy-2.13.3/INSTALL.md 0000664 0000000 0000000 00000000233 15031251302 0014517 0 ustar 00root root 0000000 0000000 For information about installing Scrapy see: * [Local docs](docs/intro/install.rst) * [Online docs](https://docs.scrapy.org/en/latest/intro/install.html) scrapy-2.13.3/LICENSE 0000664 0000000 0000000 00000002755 15031251302 0014107 0 ustar 00root root 0000000 0000000 Copyright (c) Scrapy developers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of Scrapy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. scrapy-2.13.3/NEWS 0000664 0000000 0000000 00000000022 15031251302 0013562 0 ustar 00root root 0000000 0000000 See docs/news.rst scrapy-2.13.3/README.rst 0000664 0000000 0000000 00000004145 15031251302 0014564 0 ustar 00root root 0000000 0000000 |logo| .. |logo| image:: https://raw.githubusercontent.com/scrapy/scrapy/master/docs/_static/logo.svg :target: https://scrapy.org :alt: Scrapy :width: 480px |version| |python_version| |ubuntu| |macos| |windows| |coverage| |conda| |deepwiki| .. |version| image:: https://img.shields.io/pypi/v/Scrapy.svg :target: https://pypi.org/pypi/Scrapy :alt: PyPI Version .. |python_version| image:: https://img.shields.io/pypi/pyversions/Scrapy.svg :target: https://pypi.org/pypi/Scrapy :alt: Supported Python Versions .. |ubuntu| image:: https://github.com/scrapy/scrapy/workflows/Ubuntu/badge.svg :target: https://github.com/scrapy/scrapy/actions?query=workflow%3AUbuntu :alt: Ubuntu .. |macos| image:: https://github.com/scrapy/scrapy/workflows/macOS/badge.svg :target: https://github.com/scrapy/scrapy/actions?query=workflow%3AmacOS :alt: macOS .. |windows| image:: https://github.com/scrapy/scrapy/workflows/Windows/badge.svg :target: https://github.com/scrapy/scrapy/actions?query=workflow%3AWindows :alt: Windows .. |coverage| image:: https://img.shields.io/codecov/c/github/scrapy/scrapy/master.svg :target: https://codecov.io/github/scrapy/scrapy?branch=master :alt: Coverage report .. |conda| image:: https://anaconda.org/conda-forge/scrapy/badges/version.svg :target: https://anaconda.org/conda-forge/scrapy :alt: Conda Version .. |deepwiki| image:: https://deepwiki.com/badge.svg :target: https://deepwiki.com/scrapy/scrapy :alt: Ask DeepWiki Scrapy_ is a web scraping framework to extract structured data from websites. It is cross-platform, and requires Python 3.9+. It is maintained by Zyte_ (formerly Scrapinghub) and `many other contributors`_. .. _many other contributors: https://github.com/scrapy/scrapy/graphs/contributors .. _Scrapy: https://scrapy.org/ .. _Zyte: https://www.zyte.com/ Install with: .. code:: bash pip install scrapy And follow the documentation_ to learn how to use it. .. _documentation: https://docs.scrapy.org/en/latest/ If you wish to contribute, see Contributing_. .. _Contributing: https://docs.scrapy.org/en/master/contributing.html scrapy-2.13.3/SECURITY.md 0000664 0000000 0000000 00000000463 15031251302 0014665 0 ustar 00root root 0000000 0000000 # Security Policy ## Supported Versions | Version | Supported | | ------- | ------------------ | | 2.13.x | :white_check_mark: | | < 2.13.x | :x: | ## Reporting a Vulnerability Please report the vulnerability using https://github.com/scrapy/scrapy/security/advisories/new. scrapy-2.13.3/codecov.yml 0000664 0000000 0000000 00000000120 15031251302 0015227 0 ustar 00root root 0000000 0000000 comment: layout: "header, diff, tree" coverage: status: project: false scrapy-2.13.3/conftest.py 0000664 0000000 0000000 00000006376 15031251302 0015304 0 ustar 00root root 0000000 0000000 from pathlib import Path import pytest from twisted.web.http import H2_ENABLED from scrapy.utils.reactor import install_reactor from tests.keys import generate_keys def _py_files(folder): return (str(p) for p in Path(folder).rglob("*.py")) collect_ignore = [ # not a test, but looks like a test "scrapy/utils/testproc.py", "scrapy/utils/testsite.py", "tests/ftpserver.py", "tests/mockserver.py", "tests/pipelines.py", "tests/spiders.py", # contains scripts to be run by tests/test_crawler.py::CrawlerProcessSubprocess *_py_files("tests/CrawlerProcess"), # contains scripts to be run by tests/test_crawler.py::CrawlerRunnerSubprocess *_py_files("tests/CrawlerRunner"), ] base_dir = Path(__file__).parent ignore_file_path = base_dir / "tests" / "ignores.txt" with ignore_file_path.open(encoding="utf-8") as reader: for line in reader: file_path = line.strip() if file_path and file_path[0] != "#": collect_ignore.append(file_path) if not H2_ENABLED: collect_ignore.extend( ( "scrapy/core/downloader/handlers/http2.py", *_py_files("scrapy/core/http2"), ) ) @pytest.fixture def chdir(tmpdir): """Change to pytest-provided temporary directory""" tmpdir.chdir() def pytest_addoption(parser): parser.addoption( "--reactor", default="asyncio", choices=["default", "asyncio"], ) @pytest.fixture(scope="class") def reactor_pytest(request): if not request.cls: # doctests return None request.cls.reactor_pytest = request.config.getoption("--reactor") return request.cls.reactor_pytest @pytest.fixture(autouse=True) def only_asyncio(request, reactor_pytest): if request.node.get_closest_marker("only_asyncio") and reactor_pytest == "default": pytest.skip("This test is only run without --reactor=default") @pytest.fixture(autouse=True) def only_not_asyncio(request, reactor_pytest): if ( request.node.get_closest_marker("only_not_asyncio") and reactor_pytest != "default" ): pytest.skip("This test is only run with --reactor=default") @pytest.fixture(autouse=True) def requires_uvloop(request): if not request.node.get_closest_marker("requires_uvloop"): return try: import uvloop del uvloop except ImportError: pytest.skip("uvloop is not installed") @pytest.fixture(autouse=True) def requires_botocore(request): if not request.node.get_closest_marker("requires_botocore"): return try: import botocore del botocore except ImportError: pytest.skip("botocore is not installed") @pytest.fixture(autouse=True) def requires_boto3(request): if not request.node.get_closest_marker("requires_boto3"): return try: import boto3 del boto3 except ImportError: pytest.skip("boto3 is not installed") def pytest_configure(config): if config.getoption("--reactor") != "default": install_reactor("twisted.internet.asyncioreactor.AsyncioSelectorReactor") else: # install the reactor explicitly from twisted.internet import reactor # noqa: F401 # Generate localhost certificate files, needed by some tests generate_keys() scrapy-2.13.3/docs/ 0000775 0000000 0000000 00000000000 15031251302 0014021 5 ustar 00root root 0000000 0000000 scrapy-2.13.3/docs/Makefile 0000664 0000000 0000000 00000001171 15031251302 0015461 0 ustar 00root root 0000000 0000000 # Minimal makefile for Sphinx documentation # # You can set these variables from the command line, and also # from the environment for the first two. SPHINXOPTS ?= SPHINXBUILD ?= sphinx-build SOURCEDIR = . BUILDDIR = build # Put it first so that "make" without argument is like "make help". help: @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) .PHONY: help Makefile # Catch-all target: route all unknown targets to Sphinx using the new # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) scrapy-2.13.3/docs/README.rst 0000664 0000000 0000000 00000003055 15031251302 0015513 0 ustar 00root root 0000000 0000000 :orphan: ====================================== Scrapy documentation quick start guide ====================================== This file provides a quick guide on how to compile the Scrapy documentation. Setup the environment --------------------- To compile the documentation you need Sphinx Python library. To install it and all its dependencies run the following command from this dir :: pip install -r requirements.txt Compile the documentation ------------------------- To compile the documentation (to classic HTML output) run the following command from this dir:: make html Documentation will be generated (in HTML format) inside the ``build/html`` dir. View the documentation ---------------------- To view the documentation run the following command:: make htmlview This command will fire up your default browser and open the main page of your (previously generated) HTML documentation. Start over ---------- To clean up all generated documentation files and start from scratch run:: make clean Keep in mind that this command won't touch any documentation source files. Recreating documentation on the fly ----------------------------------- There is a way to recreate the doc automatically when you make changes, you need to install watchdog (``pip install watchdog``) and then use:: make watch Alternative method using tox ---------------------------- To compile the documentation to HTML run the following command:: tox -e docs Documentation will be generated (in HTML format) inside the ``.tox/docs/tmp/html`` dir. scrapy-2.13.3/docs/_ext/ 0000775 0000000 0000000 00000000000 15031251302 0014760 5 ustar 00root root 0000000 0000000 scrapy-2.13.3/docs/_ext/scrapydocs.py 0000664 0000000 0000000 00000011451 15031251302 0017506 0 ustar 00root root 0000000 0000000 # pylint: disable=import-error from collections.abc import Sequence from operator import itemgetter from typing import Any, TypedDict from docutils import nodes from docutils.nodes import Element, General, Node, document from docutils.parsers.rst import Directive from sphinx.application import Sphinx from sphinx.util.nodes import make_refnode class SettingData(TypedDict): docname: str setting_name: str refid: str class SettingslistNode(General, Element): pass class SettingsListDirective(Directive): def run(self) -> Sequence[Node]: return [SettingslistNode()] def is_setting_index(node: Node) -> bool: if node.tagname == "index" and node["entries"]: # type: ignore[index,attr-defined] # index entries for setting directives look like: # [('pair', 'SETTING_NAME; setting', 'std:setting-SETTING_NAME', '')] entry_type, info, refid = node["entries"][0][:3] # type: ignore[index] return entry_type == "pair" and info.endswith("; setting") return False def get_setting_name_and_refid(node: Node) -> tuple[str, str]: """Extract setting name from directive index node""" entry_type, info, refid = node["entries"][0][:3] # type: ignore[index] return info.replace("; setting", ""), refid def collect_scrapy_settings_refs(app: Sphinx, doctree: document) -> None: env = app.builder.env if not hasattr(env, "scrapy_all_settings"): emptyList: list[SettingData] = [] env.scrapy_all_settings = emptyList # type: ignore[attr-defined] for node in doctree.findall(is_setting_index): setting_name, refid = get_setting_name_and_refid(node) env.scrapy_all_settings.append( # type: ignore[attr-defined] SettingData( docname=env.docname, setting_name=setting_name, refid=refid, ) ) def make_setting_element( setting_data: SettingData, app: Sphinx, fromdocname: str ) -> Any: refnode = make_refnode( app.builder, fromdocname, todocname=setting_data["docname"], targetid=setting_data["refid"], child=nodes.Text(setting_data["setting_name"]), ) p = nodes.paragraph() p += refnode item = nodes.list_item() item += p return item def replace_settingslist_nodes( app: Sphinx, doctree: document, fromdocname: str ) -> None: env = app.builder.env for node in doctree.findall(SettingslistNode): settings_list = nodes.bullet_list() settings_list.extend( [ make_setting_element(d, app, fromdocname) for d in sorted(env.scrapy_all_settings, key=itemgetter("setting_name")) # type: ignore[attr-defined] if fromdocname != d["docname"] ] ) node.replace_self(settings_list) def source_role( name, rawtext, text: str, lineno, inliner, options=None, content=None ) -> tuple[list[Any], list[Any]]: ref = "https://github.com/scrapy/scrapy/blob/master/" + text node = nodes.reference(rawtext, text, refuri=ref, **options) return [node], [] def issue_role( name, rawtext, text: str, lineno, inliner, options=None, content=None ) -> tuple[list[Any], list[Any]]: ref = "https://github.com/scrapy/scrapy/issues/" + text node = nodes.reference(rawtext, "issue " + text, refuri=ref) return [node], [] def commit_role( name, rawtext, text: str, lineno, inliner, options=None, content=None ) -> tuple[list[Any], list[Any]]: ref = "https://github.com/scrapy/scrapy/commit/" + text node = nodes.reference(rawtext, "commit " + text, refuri=ref) return [node], [] def rev_role( name, rawtext, text: str, lineno, inliner, options=None, content=None ) -> tuple[list[Any], list[Any]]: ref = "http://hg.scrapy.org/scrapy/changeset/" + text node = nodes.reference(rawtext, "r" + text, refuri=ref) return [node], [] def setup(app: Sphinx) -> None: app.add_crossref_type( directivename="setting", rolename="setting", indextemplate="pair: %s; setting", ) app.add_crossref_type( directivename="signal", rolename="signal", indextemplate="pair: %s; signal", ) app.add_crossref_type( directivename="command", rolename="command", indextemplate="pair: %s; command", ) app.add_crossref_type( directivename="reqmeta", rolename="reqmeta", indextemplate="pair: %s; reqmeta", ) app.add_role("source", source_role) app.add_role("commit", commit_role) app.add_role("issue", issue_role) app.add_role("rev", rev_role) app.add_node(SettingslistNode) app.add_directive("settingslist", SettingsListDirective) app.connect("doctree-read", collect_scrapy_settings_refs) app.connect("doctree-resolved", replace_settingslist_nodes) scrapy-2.13.3/docs/_ext/scrapyfixautodoc.py 0000664 0000000 0000000 00000001074 15031251302 0020723 0 ustar 00root root 0000000 0000000 """ Must be included after 'sphinx.ext.autodoc'. Fixes unwanted 'alias of' behavior. https://github.com/sphinx-doc/sphinx/issues/4422 """ # pylint: disable=import-error from sphinx.application import Sphinx def maybe_skip_member(app: Sphinx, what, name: str, obj, skip: bool, options) -> bool: if not skip: # autodocs was generating a text "alias of" for the following members return name in {"default_item_class", "default_selector_class"} return skip def setup(app: Sphinx) -> None: app.connect("autodoc-skip-member", maybe_skip_member) scrapy-2.13.3/docs/_static/ 0000775 0000000 0000000 00000000000 15031251302 0015447 5 ustar 00root root 0000000 0000000 scrapy-2.13.3/docs/_static/custom.css 0000664 0000000 0000000 00000003227 15031251302 0017477 0 ustar 00root root 0000000 0000000 /* Move lists closer to their introducing paragraph */ .rst-content .section ol p, .rst-content .section ul p { margin-bottom: 0px; } .rst-content p + ol, .rst-content p + ul { margin-top: -18px; /* Compensates margin-top: 24px of p */ } .rst-content dl p + ol, .rst-content dl p + ul { margin-top: -6px; /* Compensates margin-top: 12px of p */ } /*override some styles in sphinx-rtd-dark-mode/static/dark_mode_css/general.css*/ .theme-switcher { right: 0.4em !important; top: 0.6em !important; -webkit-box-shadow: 0px 3px 14px 4px rgba(0, 0, 0, 0.30) !important; box-shadow: 0px 3px 14px 4px rgba(0, 0, 0, 0.30) !important; height: 2em !important; width: 2em !important; } /*place the toggle button for dark mode at the bottom right corner on small screens*/ @media (max-width: 768px) { .theme-switcher { right: 0.4em !important; bottom: 2.6em !important; top: auto !important; } } /*persist blue color at the top left used in default rtd theme*/ html[data-theme="dark"] .wy-side-nav-search, html[data-theme="dark"] .wy-nav-top { background-color: #1d577d !important; } /*all the styles below used to present API objects nicely in dark mode*/ html[data-theme="dark"] .sig.sig-object { border-left-color: #3e4446 !important; background-color: #202325 !important } html[data-theme="dark"] .sig-name, html[data-theme="dark"] .sig-prename, html[data-theme="dark"] .property, html[data-theme="dark"] .sig-param, html[data-theme="dark"] .sig-paren, html[data-theme="dark"] .sig-return-icon, html[data-theme="dark"] .sig-return-typehint, html[data-theme="dark"] .optional { color: #e8e6e3 !important } scrapy-2.13.3/docs/_static/logo.svg 0000664 0000000 0000000 00000016737 15031251302 0017146 0 ustar 00root root 0000000 0000000 scrapy-2.13.3/docs/_static/selectors-sample1.html 0000664 0000000 0000000 00000001233 15031251302 0021677 0 ustar 00root root 0000000 0000000