pax_global_header00006660000000000000000000000064135206001120014500gustar00rootroot0000000000000052 comment=e22a8c8c36e34ffaf12ef9e330624df654582605 scrapy-1.7.3/000077500000000000000000000000001352060011200130115ustar00rootroot00000000000000scrapy-1.7.3/.bumpversion.cfg000066400000000000000000000001741352060011200161230ustar00rootroot00000000000000[bumpversion] current_version = 1.7.3 commit = True tag = True tag_name = {new_version} [bumpversion:file:scrapy/VERSION] scrapy-1.7.3/.coveragerc000066400000000000000000000001101352060011200151220ustar00rootroot00000000000000[run] branch = true include = scrapy/* omit = tests/* scrapy/xlib/* scrapy-1.7.3/.gitignore000066400000000000000000000002611352060011200150000ustar00rootroot00000000000000/.vagrant /scrapy.iml *.pyc _trial_temp* dropin.cache docs/build *egg-info .tox venv build dist .idea htmlcov/ .coverage .pytest_cache/ .coverage.* .cache/ # Windows Thumbs.db scrapy-1.7.3/.travis.yml000066400000000000000000000037331352060011200151300ustar00rootroot00000000000000language: python dist: trusty branches: only: - master - /^\d\.\d+$/ - /^\d\.\d+\.\d+(rc\d+|\.dev\d+)?$/ matrix: include: - python: 2.7 env: TOXENV=py27 - python: 2.7 env: TOXENV=jessie - python: 2.7 env: TOXENV=pypy - python: 2.7 env: TOXENV=pypy3 - python: 3.4 env: TOXENV=py34 - python: 3.5 env: TOXENV=py35 - python: 3.6 env: TOXENV=py36 - python: 3.7 env: TOXENV=py37 dist: xenial sudo: true - python: 3.6 env: TOXENV=docs install: - | if [ "$TOXENV" = "pypy" ]; then export PYPY_VERSION="pypy-6.0.0-linux_x86_64-portable" wget "https://bitbucket.org/squeaky/portable-pypy/downloads/${PYPY_VERSION}.tar.bz2" tar -jxf ${PYPY_VERSION}.tar.bz2 virtualenv --python="$PYPY_VERSION/bin/pypy" "$HOME/virtualenvs/$PYPY_VERSION" source "$HOME/virtualenvs/$PYPY_VERSION/bin/activate" fi if [ "$TOXENV" = "pypy3" ]; then export PYPY_VERSION="pypy3.5-5.9-beta-linux_x86_64-portable" wget "https://bitbucket.org/squeaky/portable-pypy/downloads/${PYPY_VERSION}.tar.bz2" tar -jxf ${PYPY_VERSION}.tar.bz2 virtualenv --python="$PYPY_VERSION/bin/pypy3" "$HOME/virtualenvs/$PYPY_VERSION" source "$HOME/virtualenvs/$PYPY_VERSION/bin/activate" fi - pip install -U tox twine wheel codecov script: tox after_success: - codecov notifications: irc: use_notice: true skip_join: true channels: - irc.freenode.org#scrapy cache: directories: - $HOME/.cache/pip deploy: provider: pypi distributions: "sdist bdist_wheel" user: scrapy password: secure: JaAKcy1AXWXDK3LXdjOtKyaVPCSFoCGCnW15g4f65E/8Fsi9ZzDfmBa4Equs3IQb/vs/if2SVrzJSr7arN7r9Z38Iv1mUXHkFAyA3Ym8mThfABBzzcUWEQhIHrCX0Tdlx9wQkkhs+PZhorlmRS4gg5s6DzPaeA2g8SCgmlRmFfA= on: tags: true repo: scrapy/scrapy condition: "$TOXENV == py27 && $TRAVIS_TAG =~ ^[0-9]+[.][0-9]+[.][0-9]+(rc[0-9]+|[.]dev[0-9]+)?$" scrapy-1.7.3/AUTHORS000066400000000000000000000023711352060011200140640ustar00rootroot00000000000000Scrapy was brought to life by Shane Evans while hacking a scraping framework prototype for Mydeco (mydeco.com). It soon became maintained, extended and improved by Insophia (insophia.com), with the initial sponsorship of Mydeco to bootstrap the project. In mid-2011, Scrapinghub became the new official maintainer. Here is the list of the primary authors & contributors: * Pablo Hoffman * Daniel Graña * Martin Olveyra * Gabriel García * Michael Cetrulo * Artem Bogomyagkov * Damian Canabal * Andres Moreira * Ismael Carnales * Matías Aguirre * German Hoffmann * Anibal Pacheco * Bruno Deferrari * Shane Evans * Ezequiel Rivero * Patrick Mezard * Rolando Espinoza * Ping Yin * Lucian Ursu * Shuaib Khan * Didier Deshommes * Vikas Dhiman * Jochen Maes * Darian Moody * Jordi Lonch * Zuhao Wan * Steven Almeroth * Tom Mortimer-Jones * Chris Tilden * Alexandr N Zamaraev * Emanuel Schorsch * Michal Danilak * Natan Lao * Hasnain Lakhani * Pedro Faustino * Alex Cepoi * Ilya Baryshev * Libor Nenadál * Jae-Myoung Yu * Vladislav Poluhin * Marc Abramowitz * Valentin-Costel Hăloiu * Jason Yeo * Сергей Прохоров * Simon Ratne * Julien Duponchelle * Jochen Maes * Vikas Dhiman * Juan Picca * Nicolás Ramírez scrapy-1.7.3/CODE_OF_CONDUCT.md000066400000000000000000000062421352060011200156140ustar00rootroot00000000000000# Contributor Covenant Code of Conduct ## Our Pledge In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to make participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. ## Our Standards Examples of behavior that contributes to creating a positive environment include: * Using welcoming and inclusive language * Being respectful of differing viewpoints and experiences * Gracefully accepting constructive criticism * Focusing on what is best for the community * Showing empathy towards other community members Examples of unacceptable behavior by participants include: * The use of sexualized language or imagery and unwelcome sexual attention or advances * Trolling, insulting/derogatory comments, and personal or political attacks * Public or private harassment * Publishing others' private information, such as a physical or electronic address, without explicit permission * Other conduct which could reasonably be considered inappropriate in a professional setting ## Our Responsibilities Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. ## Scope This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. ## Enforcement Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at opensource@scrapinghub.com. All complaints will be reviewed and investigated and will result in a response that is deemed necessary and appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. ## Attribution This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] [homepage]: http://contributor-covenant.org [version]: http://contributor-covenant.org/version/1/4/ scrapy-1.7.3/CONTRIBUTING.md000066400000000000000000000004571352060011200152500ustar00rootroot00000000000000The guidelines for contributing are available here: https://docs.scrapy.org/en/master/contributing.html Please do not abuse the issue tracker for support questions. If your issue topic can be rephrased to "How to ...?", please use the support channels to get it answered: https://scrapy.org/community/ scrapy-1.7.3/INSTALL000066400000000000000000000002341352060011200140410ustar00rootroot00000000000000For information about installing Scrapy see: * docs/intro/install.rst (local file) * https://docs.scrapy.org/en/latest/intro/install.html (online version) scrapy-1.7.3/LICENSE000066400000000000000000000027551352060011200140270ustar00rootroot00000000000000Copyright (c) Scrapy developers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions, and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions, and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of Scrapy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. scrapy-1.7.3/MANIFEST.in000066400000000000000000000007461352060011200145560ustar00rootroot00000000000000include README.rst include AUTHORS include INSTALL include LICENSE include MANIFEST.in include NEWS include scrapy/VERSION include scrapy/mime.types include codecov.yml include conftest.py include pytest.ini include requirements-*.txt include tox.ini recursive-include scrapy/templates * recursive-include scrapy license.txt recursive-include docs * prune docs/build recursive-include extras * recursive-include bin * recursive-include tests * global-exclude __pycache__ *.py[cod] scrapy-1.7.3/Makefile.buildbot000066400000000000000000000013261352060011200162560ustar00rootroot00000000000000TRIAL := $(shell which trial) BRANCH := $(shell git rev-parse --abbrev-ref HEAD) export PYTHONPATH=$(PWD) test: coverage run --branch $(TRIAL) --reporter=text tests rm -rf htmlcov && coverage html -s3cmd sync -P htmlcov/ s3://static.scrapy.org/coverage-scrapy-$(BRANCH)/ build: git describe --tags --match '[0-9]*' |sed 's/-/.post/;s/-g/+g/' >scrapy/VERSION debchange -m -D unstable --force-distribution -v \ $$(python setup.py --version |sed -r 's/([0-9]+.[0-9]+.[0-9]+)(a|b|rc|dev)([0-9]*)/\1~\2\3/')-$$(date +%s) \ "Automatic build" debuild -us -uc -b clean: git checkout debian scrapy/VERSION git clean -dfq pypi: umask 0022 && chmod -R a+rX . && python setup.py sdist upload .PHONY: clean test build scrapy-1.7.3/NEWS000066400000000000000000000000221352060011200135020ustar00rootroot00000000000000See docs/news.rst scrapy-1.7.3/README.rst000066400000000000000000000045351352060011200145070ustar00rootroot00000000000000====== Scrapy ====== .. image:: https://img.shields.io/pypi/v/Scrapy.svg :target: https://pypi.python.org/pypi/Scrapy :alt: PyPI Version .. image:: https://img.shields.io/pypi/pyversions/Scrapy.svg :target: https://pypi.python.org/pypi/Scrapy :alt: Supported Python Versions .. image:: https://img.shields.io/travis/scrapy/scrapy/master.svg :target: https://travis-ci.org/scrapy/scrapy :alt: Build Status .. image:: https://img.shields.io/badge/wheel-yes-brightgreen.svg :target: https://pypi.python.org/pypi/Scrapy :alt: Wheel Status .. image:: https://img.shields.io/codecov/c/github/scrapy/scrapy/master.svg :target: https://codecov.io/github/scrapy/scrapy?branch=master :alt: Coverage report .. image:: https://anaconda.org/conda-forge/scrapy/badges/version.svg :target: https://anaconda.org/conda-forge/scrapy :alt: Conda Version Overview ======== Scrapy is a fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages. It can be used for a wide range of purposes, from data mining to monitoring and automated testing. For more information including a list of features check the Scrapy homepage at: https://scrapy.org Requirements ============ * Python 2.7 or Python 3.4+ * Works on Linux, Windows, Mac OSX, BSD Install ======= The quick way:: pip install scrapy For more details see the install section in the documentation: https://docs.scrapy.org/en/latest/intro/install.html Documentation ============= Documentation is available online at https://docs.scrapy.org/ and in the ``docs`` directory. Releases ======== You can find release notes at https://docs.scrapy.org/en/latest/news.html Community (blog, twitter, mail list, IRC) ========================================= See https://scrapy.org/community/ Contributing ============ See https://docs.scrapy.org/en/master/contributing.html Code of Conduct --------------- Please note that this project is released with a Contributor Code of Conduct (see https://github.com/scrapy/scrapy/blob/master/CODE_OF_CONDUCT.md). By participating in this project you agree to abide by its terms. Please report unacceptable behavior to opensource@scrapinghub.com. Companies using Scrapy ====================== See https://scrapy.org/companies/ Commercial Support ================== See https://scrapy.org/support/ scrapy-1.7.3/appveyor.yml000066400000000000000000000007461352060011200154100ustar00rootroot00000000000000platform: x86 version: '{branch}-{build}' environment: matrix: - PYTHON: "C:\\Python36" TOX_ENV: py36 branches: only: - master - /d+\.\d+\.\d+[\w\-]*$/ install: - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" - "SET PYTHONPATH=%APPVEYOR_BUILD_FOLDER%" - "SET TOX_TESTENV_PASSENV=HOME HOMEDRIVE HOMEPATH PYTHONPATH USERPROFILE" - "pip install -U tox" build: false skip_tags: true test_script: - "tox -e %TOX_ENV%" cache: - '%LOCALAPPDATA%\pip\cache' scrapy-1.7.3/artwork/000077500000000000000000000000001352060011200145025ustar00rootroot00000000000000scrapy-1.7.3/artwork/README.rst000066400000000000000000000005611352060011200161730ustar00rootroot00000000000000:orphan: Scrapy artwork ============== This folder contains Scrapy artwork resources such as logos and fonts. scrapy-logo.jpg --------------- Main Scrapy logo, in JPEG format. qlassik.zip ----------- Font used for Scrapy logo. Homepage: https://www.dafont.com/qlassik.font scrapy-blog.logo.xcf -------------------- The logo used in Scrapy blog, in Gimp format. scrapy-1.7.3/artwork/qlassik.zip000066400000000000000000003526141352060011200167100ustar00rootroot00000000000000PK:e=}Qlassik_TB.ttfy$E7yߕYuW]]U]]5s p 7"" "("x+"Ǻ*xDdUOπCSYYU<版aPкv}cx=©']~uupi90tهDHx̃_<ܟҿ+/ Dћ/>砸m"1exw<ߌ_Y5[3:.>}܃_}_xOCBI*=p|3Oahڃ_u1>;~_x_"~/]|e#BhshqZD $K;| xEoDP~G~ٍ2܏e>vU<H; ;i`5dKzlG_x)IoG8š0y^|hgݯ,8(G܏jd %ЧQ8 h?HETއb ~Z8jp8p4H }op 1 :) 'LI"#D$w4_h  jiḥ<~>{+ } CQ{-5? Ҧ|?q#hH^+h}|jAmr'/iV_'ha4 aS=,j@Mt z 🡿#  ሰdǓ(m?lF6Py5~ 2/lCCwCh;@kA~ $yYdr<{Q =٣0NtLn6Sh Y9H}~9ӪƩ;81GwNljꃍQo^<~{1#ll]<0ꃎ =^xA_+i0CE3:4Hv_mo])iГ>Z>838gP&Cmyp@whx!3ПX1c: FnOP3\rLgAI? uz9&Nm|l j_&d2cccH8QcFG킝e$;`=L> /vGПCpz Ã86E4n̩=9>ߋ" q8rw)z{@VP?$#,pTҀ@Aҁj!hY# Aa.r:CPEƀő4b@(4m&fP h(35 4|Z'<2*">TGeUT::O:@:u7t m!#hQBMm4=G@'I:whΠqhGS@4E4t -9hZtD" h Ѝh & ڌ݂݆݊6t~@wM@w@w:mz< t7%:mz"yz2:)h7}/Щ=z:Ltг@Su~A=.@g=zB3t:s^<.C] t@^ ]"/BwC/F} Ke@Fw~^z  :k} + ^^1zz1W} z)ע;?Bף} @_M@oFt]U@߄^ 5@oA|b@ߊ^m э@߁nNts] @ߍ=M@oGo|{[@ޏNt+:ہލ@?0z7Ft;{?u>{~7't'O=|} >AFz?pg,g54y1_@EtoC@Ї'~Wm)UοC Pho&z;a @ΗЗ~= @0Dяѿ}}OпucH%@_<@sMѓ@W@ @ y;#G?(ЧO:c@A'8?u>_@~?:g/>zYA :8.N;H$*C=| 2pr„8BQ{>Vq,3}Aݺ|̫\ ^^:#ЭGBFZ@}hK_] У'^Ëw*Yׂ=d_]=&d7Gz#x7z+oy'/A7/z%O YVBO_ >GOϢƋx>_W7ɏx ow'3~/ _?y1'@W;t& }0sECw ~F[^ibhe̋|>?̏]ﳖ'Vl|kCp V shw>y;S?E_3տxlc3={,SpGgģ/z?HZ&ATb9.;@ ΰ=9.*=nq< 1Ws5zVqbW/]~ _A{~z!ՠw70+z!n~nczv/T { `n?p7} , {] ju }"{!\hA>mi'z 2֧RˢVu. 60kkkG4j!eb|Cs!D|}yK jθv^z g]={{_߃v9q{rM߻w>;߽pgKBmW޾gǞ{YJ37|n{> Wz /=/}p"wٵ=h {G!c.`t#WZ>>K \!ܻm+M,COs{R_D1ķ>@/:p$(vylΒHجn4)_ %7Žkva3m|\{!iGnjz{m1 E'ZcaMYg}?]t1"~0 o?[:B=*knMNU^gˢ.;;7c]D篤 |p,_A vx1\5A([hqB*SpT.%#Ї&7CYL+c#AOJ(DOGF b’;~>\\5cթBcB~\ܜupmRrZD 7@履j) \jϕ5p|-7VtXL\Mo=byz/,yrV}ȭ,఩^IlM--fâhh!WB-a,b{(!̻}1#Ҷf:8J?ڑi;TR !n^)X(&VjݶFWTdUw,3:IVEJ(FWNp+3vo/lbۚ1osǍF'S~tBu&[ZX>z.1O B# &Urx * :h\' X'~Cy qBa`!ʲ(ؕ*X )C:~o>΀B:OP?5iԏt&Yظpect\0 S& n5kӥB>a*tnChB kh`)Sy+bh E1YƼ_&ÜYQk霝늵Z|3,$2#/33c%(Ƙ%h$o"= >\/u=#} M@-4UqA ّa8Q d( V"dވu}DR䈦PCX_a$ #|_LkW=M=[Zq ~/95!8֍MD #Ψh TߚQRg(UH,x=dD ̢F%@ tDE0[+&`^7VA'Ac̮۴ԲI:$[@4mfgԚ)B~6`Lo"'E6Y 8+<8ύR^ UƱQp:o@0G=IyZz?f$쑆 cljld09)MOnپ1ʚ6XI +g]#J, X 떵1c*+y mQ_hi8Mc@`r`oDqC1C\WD۠~AX1 6f?=[e?+9x$phōzQow}w9xwoltl+;P(>8_ *a(Oa0X):fnKdvx6v frbفcPؑxƽ5_tbbut"}Y| ~~xoaiZ5|_9Á0sӭ~_E=P6O#Uh}R*4r>ԬoKXӚŁhfF\\k*/][nҖqXTVN"nn `?Dӱ|e' +L=}*s@RGy~LE ڳ\u2;(V,",w|s.3.7!4U{(6W,GA~(QV$\ٱb h0-VኧZR.5}Ss{26l"/"``n4NېN_7q";Sjpd"ʌ}A>NF>Kg,P)B0%o$+,0\3poQWI+0!T>s~$I ZD;5(#Ozknˍ,dk>VƉxt؁k`o{X/4 !*&YŬ{Wܒ;7r _LMdthۍ&WOil'o9TRb51Twm rآHXo$"1hf`VBݳKkץ}I sE 1YgJnh0!Ճ:j=@5  vD, +@Ş6#HVw~a$fՈ<nق hl `WʡH7jȑT:ܛ$Smyw֊p8iʫ.kLRQ/>SX4%Q?;܊he_r.#(TX eS:M`[lfVݤ\b$2^[dRHO*ϖEmxKÚ/Ie3vX(@E{}ؔDUα08G[EUhW"0ϐ; D9y۴>"t $jqfLF>"/J$![jP=56`Sל9}4rfْȫJ|?abfa1maB yLvWήf1\wk*~0(ݱ s[ww<;iM9MKy<@T '͍ŠR=hhI'}ГVS*{(5cd@UPjnl% ^Ѥ%I^Jy 䀕.ɶʁ0No8g$K`bYV4r\r[a5x|<˓B<+{AD/0A|^#j 躬pB#SI-jL3QS` hP&VLA=n[n;ّ|݄𪎽,_PP܊[ق2S3tZ^}Kqr`_U7!yjÿq #C([;K0=ckK]QOc8&CQ,0REKK wʱxAԬt$M)!\HV$xNgyNYv=o>"CkzNL2xK y۵&iΞyWëi|u~OҀ=n^b+#uTp]dSg3/v+ҫ^qݍ^ȾC'lj}{lMv@sVE(ϰ16Oe^4\T@X9o,Óq .gp?Jq*F]d"ђE7PrD-^ f (nJ\mtuSA-@ ̈ YyTVW:: dV ,d,q,%?]K;FbzԙLnRl51M{ZO7N. ]1$ j!q+>*WTU/ H^rݍ^Wa͙966;2EnUק3oqsO`STAkg+ۭJFab a!Cp+,Ty[P$Hӌس'JɑwcpR3D8lEŃ?32x4ޙqOsj~v8c/*33w0M~>ȗVgA,wXy|iLK ٿj:̦\4uM>fX"wX$ lR8󬨡iFNGGhވRo$OyzU_{xwx)uj͑\;S,pDMi9S31%I%-&2L4'i fJS-a>ٽ 0" Т uMj:L;5`2)dݴCx2($CgdeAeo2 =qy(8&|rrPr$8U9ʪ,J,22gXɄjZ܈+J4BWLENv)V4G’ Ȋ~* ln-GO:57SR1V/ 98O>`8?V 8ݺod`aֻ0u?VA 7kX!baY{Fj-ʠg{cshA"]Q͌m¡*I)SVMggqB?.Q\.E8p/'\zSٟISs!A|8rٻ.re>|} ,_2h2\ 0cCUW/ H6H@86sAf55)akB{zW^1bbc(uޅb?G D:#/XStJ~]Sdsش:± _!UY!W !(oE0IGd/fY'PďC\V *)f4CM!)$uYDD8i,Η+]/]Ek^+ҚEHj"gšX|0Dx+f=ٿh9eԔOl-MM-AJ۴d*f ;Zd7XnVǬR)fVAIc3mяLjӻ%Eс\$x^E7M('TC[iU~AOڇ=Eǭi,Tlˠ 1~—khI:Z=ZţrV{;$JmpXH7mVa¿1 ֧#J\u=D2|9=nJc>?RLR'9Ko^bF(&!Y  A>}RQ!Ra(x%) 8FZO@L.1Oz.\er"pdq(*ǧˉip|US}~#$ٕOxh TLQ˅W.Q-}, 鏮uOfS4|_؆ґ 6 =/Ckk$A Bv(m2yK#r PD6y 7X.[ L"vτ7nP\ "n̖-wGVO9+=~ೇh%Q [L~;oοZZ]X;zNg`Y4@[ي ,bzmVg`;݄lH [;o6g, dŤp9ucݗ4C!!^2NPb;'O2BC;fw,Ï.l[a}tJ*UJ2ʙ=)Q=`Z3ހnp7 s%Vjp;NdAq ˂!A8ok#֞ I4i <{cwuC k(Xz# g8jl.A9 .F^RxA0DA74N̩@ٕ@3i-W7 σNNWvkA &+# vf  /[`?.jx̰3LVe^ *AYskvv~EJQ2 %1w#~a9zJ%mBCJvuWӡk1]6ܫU`c7!DzK*.w _yu0cIh2BMuQ !=7#&A$MQLc:gA5Y8Ha.[f²WxQ >y?0Z˦ZԏZN$ O =jvG&ATUsaZ *J_dSH$gg#oa57|%!j`ou;`V@}A\Xs0$B)W9Rkh,YJOMFҜ2PN52\e/@!Q/%yu')~DW 5S فdst.z&}~し$k$VuPo!G{]MMEjݻcN bʕ!4َA諨eYQX"rQQy9iMդ!N"t^Kʭ .-C!Z F|~fMS1etzT!V1-`Piw5Rt$gXmr)cl dm߻ d!\=7{&kc0 rjp&ݾ3ES TmXQ,9*U@69k;)9fC4%d 9rfNq 3ojIS +| wz!PxȨ%#m'= rX©Ępħ=Y)KxN4ā&tfjg_o`w~yΙ.vU,lwB:CB$jhD]# |ōTPDOՈp tc83ÔZ oTiЛNZTzeW]CVCrfTKuC–mӆJt:|2SV5`f5rV1?7pHF?ieIv!?[jNS >dgJmE/#ڨCz@AO=M:=$S`sPQ ڌAerJT痡MQх@_NMM ڴ{uד6rQ&vZ7_Z9n<ޞnϭs+ A ;L!( Qͬ9A NH. Tφ\Z ݂61DѬ['LO;A1rB<5a=JbC/ G8X?F"Z}Ùcgwg2Vv96 .DZH<.~GRpq~}O=+{;7W v7dy>]D ]2POBBm,g:oF7@"fvϓVҘ>KĬ#7 yH 7nQiz#p5ޥ܂-jdג*+U^-*C[nICFnmm﬛!u~:kv|UY]\XU}_k.;}Gu4à z=X8|ټ,qiV=k2#Ew ~6Sxvl???o<+#?{ [7RCfwŊ3GEJW!ps$ ::5EÚI=Fzx#ʕfH|EYOn,p(bKZؐ2iѯ4Ȑc3y_1RłFxUj"'Hݼr_,a UsۊiKbfJ<]>x7IʰH?iC+2)l}gP [P2ٴ"4EFtGm(ArTL@:R{l!k>dBA.21[ 8HՃ*VPwy&$.ckI L *abyJrxp8fJJ$ʪJ,?a'MY d|.EW> fbH$$6i&g QwXq)\9#Gx6 ^gI<@C?7wCy9=-$̉Zeb8X4In럲:>V2~$Q~7wj{s^+ BaSbr?d\7P<+1/Řirf +-կ$= *͕H҅)i?`Z  CRꥪNr=rQEI8]Dp LiV>.NJsOb,.+g܂]= 4;* δ{f(VAMA*rhuq$xm{(/ACn<)NG䮈$N i,r9tx! XVBąD$} '*ZrTQ4ˣeKRP\ .>[8Ppyɽ禀J.D`kt J0u@ĕ]C`Bꨴכ1F=- Ǡ &*If>U;{pcZ6A8#̝Xߜ",} x2^xQ0dcz7V缄i*R4_'6b 'w6wd-#e8\I.&#p+Å^W\@yy{0z~1{K.֩iiP+vg@'/J5)1n2%-DR Mϣ۪b@|=MyZ BZ:PMċ:p!ۑ,=k.Y1՘O $;)ɜgrW[S-#~hPn490\Y!>lgjSŐ1O1 ұ_eհ<RTFuU  xm٢JzY@si!''bkPxe?Xz. l 1YY[qd/zH 1I/!+Fiuz+0@cm*ZЫrZpP$2;cmd*1"DHI%TT}w`< }}wHX#aCһF_0UP\[%Ii?zr̟r*lIg`H{5=y)|2F= kClWUω]+Ǻ{ BwyjM\e[q+bmk44n8_(M^lջ)l+QF dо|@ g쩠{\)*}ZJ_ڀ-s֍ /Ug`ixlm-sCn4QQȘ(bvOW1OM3ˤA9uRNw)u^ӫ&źOE3bJ8Ŵo&݋GNB`XiٖKbխLl7S2 e,쬧nv婄 ř鑉߻Êk"NO_8GȂdH#"4颦Ѹ_KrqF$\Ň[Ntѵ.pl !tb=ʡ^?R;ϭid?hkS<+c-PcrGMz.$ʣ2{I63 7Vxl0'( rH%Q^[G8Nt/O˳qK,[eN0Q浽X0\-[<Γ;klBжx.y(- F~1(DVCY[FL jr`DΛ|N _{+%3X$t]1 4H7#FXn+0Yz'A5b+Ҭ/$&[^/ qcrev ,g['3t7+^(Aa#`|!BJnmNضLkdiʆ4cAp<"/M3 a,~I'q {,/-d0n;qIOmO*7Fqr '}ZoO{D DAﶠ>=+TC]Mcy03^b,L&L!Ad[T;%HÙlf:#4Qi,!#ɱqڑ k ɴg|!%7Et;IOBo`ý96_R =>Kn)$tSH=2dS,9ubBiS=n$^[K+(B!c*o۷y,o7g;c ` PZB(JDRIN@"5GD(M[ZP$$Ch+5Th4j==fVT~sιYs~'1ɋ/MX}ѩD~@1ڹ]x(xK)E:oY uRo=6 b>}ޟ{= hOiy'HIAAWJ*EWYI65[ZmtpT(2LVf$@HVe% r[:Ou;NY;&զVZFy5 /;:ڿѲ.ں+zh"b7X*ir2̸j4;6!(tʐDM&CTk.q7%ޔI,\(4k77zug=wqI-oʻ G  Q|:Q@O=ξY>\{r.ޡ0uZ<{;' m\Gݢܢz?NP(j ~B/9|{t0 έ"^(|yUg4BjG›?)':HO$ĮJKG:S_@U;ǒuNQwI''V#hɸbvܶGF h:S##l |E*grޓ+. nNwQ UAMNIV}' G'$Zg7ܤujםٱnv3~Z!_%J,\}L/q~F[rގ2[g"98ɤC H|-Q_hMjvuYf&Y& $]s 5ڪEnB@BNa%SD60F%KX`t)[!Tb0 _TpBB-1s@ik$Td-b<3PXSYH gU[Q@1_\,L| >SG[+M=$ Ir%pwfgJl UC~$Qac-XR}4;TDok9}糱5qQt4n-UgRQrh'vj{bhl1ig`Xw[^a^-v-F*Az9QfԶqaS}ϐHhCpSg2:1 c(.qIvTUq3LfHo&gү 8ޥڡ'AƐy=hףGЭ+ 4ݚo70@#v9{9Fww 2A`pCNQ.5=R$כ|^@ZL4oVb$[1}xj_Hd^LԘlXMԥIwБ"Bށ&7pr:Iv{Blɘr4ꔯZޢLSfiyNBnFYg-N#C𦷛Z:E:+THOE{nӒdWtEZ5df'4Ƞ+OeƀaTKGdzY ߜYm(Nfb٢ء ri$D&-w"mD˽cAF3]Ko ]DpJ$'W]Jt]]GZ}[3wҺj&&\]9w<<]k! _)_dEm"YeכPJC;Ck_sTu 2ۼjstűsΥ,Dß(o]yzO~,~ЉRᜧ~z)YI<ߟJwz0t˭m#A,SΦcv]L1׉C!.u@'=~:!wuV':A9'`Benh!R%<}lhΜaMRīH4) -KV8@(#X/\{:#B>3bkwV!d-w%(+k>Le9Ap:g-h䲏ج/* #MHH#z}`"[q벬B4 :6Đ0t"w;ɝxMҍCiPl#^zȶkӝ&:u,CX H{.-{5:OSbvs~-~qͳ(j٫HD n½A1n-dcO(vFmrD3Ň,9*gȆ/ XkT /c/U@g'h-iؔ%EUPX%"]|C5ф_+LRڴe+jPOO1TJ 0g6B@ jg`dig F59JP ,꼤-v1 ؎_c 9e&R  g64zTgS Rrm$>@"R}E٩pGW K't]3{?./<^qEӌ ڇE |j (8 3$l!FEiX¦-AEYJxpt#.f#R(fö4iD (11561#qP t%A$ ZϬ/FdNN-ܚP^0SOӄ p**$iF>ɔe{KX Z`678?ξ1zNP [%!ZyqZ+v.sͻH(mՀb4Jh׾əl껵TPz%:ٴ!s`ONgo*"3 rs\OYܾ"$F_ E8oL4pA;xpneRyQ7; 6vqnȮ^껒]$xS1𨚐*|"p T|'CLk46З,9x EeRt=h iV38oI-??i&?_iKQ:3O C9*8zAüZFy0Eb9D2n ZppoohB`9ߟ>56wj߄Z3|C z">7%lqKAx%wn%J@5cxvk Cth5Tu6B5Cp:]C}RGV!/:'϶ƕv-aOS.oIČ|cmMO , H]SX$#5ZaYىggF,~@ޟ\h9o#HH R 5RMdyhDsA= Ff!ƃ%4b8JݲkUp Lεp} ~D\P2 Y~YN抁r<6a"y=t)-[os[W4p&)4KRϲ`@Vs ? /G{j==&S^؅lĽ_AϟLoGj#j{:z?1k(/;؜ڶHig|{މ/{&(~˹2}FnjOzthp6^oq=Tiaۂ^r' fg 1mݬ1rT %^X ԎEQ)_5܅C[K;vEQklΝlV3߼%a9kx!JC6'/|3'WF)'?GK4yC\KdordOCq! է?!R@Ub>}D `lτ_rDp @nKRQ"ɉF0 K`}Y_"ʤzt'dd3F#%)c@%$̨³yD4$[GoY9S $WJb'} J<}I7w zet`1Hvll{8Z+:^7pgT~s"} q_-5pa&k exS_фh%uI+2×9ZžFz4`H( ?P%`rչUw_o}_gzf{z}6ŒF%/%K6[m1$vf v,!$aC B@B?c -^ 7D|ބ?3/. _[djR!=hHAˊ"WmU\bf`xpBUesw8"3g$v8s[iy h&p^LQ:=TpȟwIu¦''c;?>:鋹uxS>s{.{3dy&GJ+a ^֏O|vxą<@ TDQWR]x-%]{&_EI3F FG)Bq[ĢH0Dw8s;9R c ʢ!Z([lŐ\,ވE4 9[-t^ODf:.Lë.|A_ȖS/E?U*;c_ 5t'cܒ*7|+ F.!D 7^ W/3h,"G諁/}7f٧@ؘ{Vc>λ$T8XƢ[b>wNxuo1iwjx{䄪8AMeve pXAxn^I q@JX7AbGC[#TJP -Ra#Tz71u了#em êMOK"֩_̼#2p|0&8 `C>4<ʼnP#[/q)ru-9< ܿʤ65T^ Iy爛aY+~G⍩sRuj8JSEMF/ Y@IUk->13r~*bCPww~*{a~a<ٿ+d"ڻld|ģ^Se.kOh;2:%h㿘f"KC Sx#s! $,E p᫶mY^&?U S$7/ܬ_ Ԛch =F<)=T Bm]eWzTEif8XxPtGbB.L%Nv0樗i8KcQ~\*h0SVa8~Kt{,Q}+ wTYn8k;g K+Z3Op\.qק!>hslV4fԕ-3sr%'+WFXL u K'zTTqro~15L33k%hVjgzni 5``y&xO۾s/MLNk~L4/=(?M*O+8Msr_',l$;Sgɰ҅rUuBrζ2^yXcAU ‘YnM9㼀|NTҲxVFsor ]/yA$p" isf^U]2I nG=XFY-e&{Hj&j}CL0??ӭ^ikӳ守'`fqCe ^λ(ttC+d>P5g)ߔ++^WJR(0jײ]լe;68[[Ȳ2زyHhvl*2Hfxv(ĵўhOFq)NUCؖZc4vIºTk[Gю/zⲤP)30U<ͬ8fo5ca4)R0AU9:CEO6Kk/J[hXK)7 |;/M]ɌF% o IfrԒep-@/kW:ZI,X@xv,\"X{yz@?%?=N.' e@#^Qƪdwa"Jd=NkKuoavR٩T;Hlȶ0Y7)9נ\lElڛmE:rqxgmSFbQ?P -wHD=+V#JU T߀Yjj ,bPK3~-JTի*,$V#`3ʟ-G4Q l0 gChЃ1=WV^ILLdHt_wQbϭ|`p@}38};|V;&Y'v 8U9Iѩn!@M)UUdeqFwD2La@d~DkEEš/ʏ"dP4EC[F] ::&L 1˕X˦_3#lʮmF ^G>eEY_t19T63d-)10b 3Mr,[FG$If&BPQيY(-Q]$kެ:T+X% r>h;Z 6gt tW)mׇQf8\Oέ\QQY2# Vk}6=c!lȶ{Fc 'j6o .>OtLFШb<2Hȱ.Ċ28a- /X{^n SrlR c沔w $Lm9ZND).k˸'[FЈ&=( ѽZ,h?]-0ֵIM꺴5 BI!kpɟigq/kS;V; loXLwZw" pZT(p(9orAG%b̲ GjgZ/i-K-*nR5A>QT(\*XaTD-vޖm#b!k~ͤ)r!K\jمvX{tl,+k׍Ȁ$oؕ7tD=ɼ'a%VRΣ#M |R)x{ɍU˯Kg/>Mыs<_Ќ\D6L"Nл4{|)n"2͈@"&t]:HW3jo=Vao?»Y~vkfپ@fe>_>bWya=m'bU ( !km ;х(zPd3vuL dw]eؾF[_?|p ?Ξw;'PW?tQC?}>8J6b'˱mU3\/2/aR^bN`q'K$M 2L֓=N}#ZYx]vAuv;1[d]2›5E/J=~)~\џgoEz3|4D5iHԷO7 l]P߮1]#Ϊu6I1zeIK~l{K69YwcA>] {5YJb 3NT^~=n p|x` J;s} lTUJYc~?MV' 7`p^Jo3O7ӹ{<͝~F:~πܩ2k,j)f!JqЋ~We1,_3=)F ףN/v'vKU{L;2>e+"ˌn98U٩d87ךۗsV^92êf ܦXTaz1ؾujfc~}˩uUҪ?WHHM. e_p4%We|!(TG!Qz7XE);pF|Hq{no{-<׎isR9vv3mt6}{qASe'-56e >vY)E1n)=0j;"(ʆ>0_ЭS_ϼ3&XHT_ϊ嫦O|E٪UeVr~sL;:t!&ON|%zcp?%Jq:% :C9%*ˏ9g7>@D_R싯,J`)T7hu *結,Eb] nJ%P;_.ZD3Қ sw4 > +ﭓ O@H_Q'g<1XitA6 d(wUwR*鱔*j(x0 CBVDMTW(*H^AT #$j;@o;fVRY=핖QقܲSTD]^5m`>`'/{-x=ѹ&åoYsS2ǁ m e/K,ܯkF1و~$>\ß/T9Yur׭UGo8#ժΆY x"aZYӺ:[ B"H;o{iǣa|*ήEraT0EҔ o aByMd$9'VXU0Qժe LAI%1 j%ne\@jrEIOjiTw1=(DQE] {>qˊG`ƿO]nUj0pYNfdrH^$I*^93 з <@eoL\V\@UyMlmS(\Y kZ~UV_F&:6ll wg3yNlYoԉ%ý!d"^I4=VoPU~yH ĤR '=z"yU%jXegOD9R4.Quda@ L_MM/ymyz%=hZ:̔:4F`Ojh GI@Q}v!&:Éi >Q;-8R^ֆ&uld2RE7ja ҥf"h@KzC@V! sOWeLIW#{E>ɑڊUYSO`5({3]ˢ#|'3:,\sx~5kw|׃Ȳ<v1Gg ̺lݼUs}&CU:\;3\6aB%N5T06X^D xFuiX 3:UX~АWf'^_PQKl2ތx XғUC&pz~ЖkrLBSW~Y;##ıAbFvzXj?޶*x:,wϴԲqUA0$XRg9)V@FtR ;XbBBU[DK klmM  L5\2|UB3UQJ;`X&  i 2R`)PD!ʱ;6S$j䮐 M } 5 j|l4pΩ|.*Rb`T ݘhÚfvyc{R Yּ&i_ rĪwc#l0ũFBKRNh.ڮ^d/FI{L)MRIl45 c$KoO( %20;lGy$qb>}&,>t!/y=8O=mxն%4"[6 T9o0o;x&AG'ra%` aM.̬*>CkLO>6sYA6':cϤDk$l'sӪ -'jX ŵ1Oc@]9iKY x-Ej| '5vw.Ԩ*^%Yt7#]ڞHsU!x+c_e5=# -KtŔ.nR/7Gw=t3yS\gϟp4>v9P6a(UCdi7S2u \'?gӽ,{Ѯ v/i(g0ILNviMVmOap0rS0byJ]HS`{ q>o⌡W " :jW#+[`s)A˶AIJ2</%ܰYys%0_AKP!kyU]^ЫTq . %,BSWnŢZJg\(uɒFcl͹$95wvr< 47Av!WEyk3\i3'u*%g!1rpBCK3]}a3b陂g]m,3n>]5f-4[%`;iǗƲk&HXdxECKa=Ix nwomoN T' [U^A;Nc8JaR^3ęzPsKs r(bџ $+5{ Q4Ƴ{p4%'4Սل,.?&MD?'=g)@"M +{L;qiOR+qżC^MZ,Ǘ3P% ? r+7~֕Up0KUr!,E<8s M$Eygyoh8h K6HعT.| T8%6D6O(ą`N< s yŠ94#ҟluj+/CG$D{EjnE#Z"  d)6NȋNЃe' ivab0ǡw 9˴#5]lK(-`jkQ4r0VݴB.w0v'22o]^ @XԂ^Q5যK^TНScc*JYGpO/7ߑm~lc\ME+decP3 Qr't'򞴩 h9)+†@WONu%~Uբэ1Y+;;^!]$5/[R|wI/r3( g&~Yx |#'駀4FtB?ۦc$bIaq|Cǃ%4,tgv*@HUP_R_˖S<2jյٺraij`i(kxh,5)"S@Zz?QTX0;q&:Tju膑 pdayuvPVzgWUdJv1Rqj}Gsli>1Ef-V.Gݞqx>w,OfeQss;k#0hQ5Q.қqvp ;=9O ,% t9"Vu92xrc+49(n@cc t^q˅7,[dv]3aSd= bŒpBxDx^$i )G)MAsъmAh0>nJ+-w]kugkޚ:cZ   <\?DBB~0H_]}<]l+~(D_ gJޔe:^xG3dnޛ}yo|+W4|C7\%CNFh`Oojn8.ee츜W?i־ײ:v\ώ츁7MOq3;6vq8.GR^Ǝq|03욕*v\͎kvZv\ǎq7Fv4M8nf-츕r-i D%y[%:ޖI w !ֈMuhM*oHyMwi.m%-~E~%-o+$&*'!ok$'y[vM-nm)m7)̉{+ٓupWfŁN;Y8]ro^ en \:~̺{bB{|d_Q|5yc9<ߑ'H!D$|6y7 ")~K+@2_#_Mi[x0"? AHyLaxױp=I!1 Jfhhv-= J\肭QP^i6i!:LG((~, ]Bet9]A ]IWt z:K7ЍtLm &ɕ#'{==rbﱽ{ǎ+:K9cG'5j{m!Ϸ7gu  / >gGB:q'VK$zp1<\ ^~+=;w#jG̎:u#JG9sc #.GT9qD#G8oD߈y#Fԍ7mDۈi#|P(PKĠ:lIyQlassikBold_TB.ttfw#W7|ﭜ ,uԹ'xx<8=#8 D0fd/ Kf/d0,,$c0`X`ww-}?ޞT97 mٳ{߃{auF"8 \ui'~3?pϻ?}o+B+*\ +.T<.B7=y%>= B/~gC '*3ch3߇kx9 o~9<ʳdMQ9!eȊ2 r_w@ !eW@k*zŀ!I2O{_\?w{N5(F\ie2n_Ǚ@ GNeWЏ2ly` yߠ,y##-p/³I Y8 i_1߁{H"4:п @Md FVA6rQ ?@} M GA4@( 4R ,J QheQ=*"-1ZFPVQ h JqЩ*:AC@Gp:qFhM1M4@D7&vPhNޯ tuΣnf.Y\z4tZ-]FKF݌݂݊6~@e;&;/Ih ]h+h=@NFہE;C;N:z t?ttr AFN+< ,tг~{Cs@CFOAg=F/Dg~.Btt.KyKa#@/G]{]. F.zjtנK^.,tYrס+^蹌>=  ] &t kތ{= u@_bKs[ފn2t#З=mw+ ގ^UE@jF_^RE^nz2wGރ 6oDOvoB} ѷVjoC}"@Fu@ߎ^t'wN ]@ߍ {Л5t=@߇W@E z{+Bzz'GOa.AQC}ݏ8'$z_KS@?>3^E7Gǀ>> ѿF> #/ xЯ*{oO&oO>d3@> sϡ>zC @,z/2G>~OW}kOAD49B ާ/| bܧD {@1P@G!ߣ>Gcb'_O>z1C? H0:(10ŷ`haxOS,*} ~?'og@rY[:> > x|p|(qc[vN9^_r=xȍ7GpxN|5?/U(E+aqxNH?Lz=7T ׃}п{9 xs)!SD>ϳ̬y)8;nՁO?ᅇN>f/L>mͯmtdMB%} lWO'p34R~Ư8p ^x -g?;c.Z>ρ7d>o>U6=t Kmoo_xQ^#h䏢}_ GH9p`>;\ܿp'"atG(Wsɧq9}ttt=9]8pWZ Ͻ(omp"Gwwu-DJ7rKG1t>w*n,RTvXg+; ZZ80XMVfGo%~(81oLLvqm-V=$)hXmOu1&8LZp`ʇۗ@&jW^RRqx2S  ,nHyvaNIIEYT11r\y3>ۭqSEMcn\rB55qW2; >OjVD9m^8x^=B o$?MOS 0T'ķq8ڋBL(0%*8מ_vuk]U+uwM4ڛZNbj&&ܵ#ĶG NK'a:ﮭHdž'Uxз=ޑ4] ^›dPc{/&Smw[ӘNl4!#9> p5%DsNldߚkMZ$pjN U` UH_ZMjS |qkvwc IJr{:i/>E-G6yY[~M\, &  f@Zq&1Pd,JrfjTk9޶.m*.pdH\ϑ3z5*2V:w H}[ :Th{ :cЇ4#!\Ҋ&Dnيp1@A_zY9ܜϏ: YpݡLn--p6\7!ah ?=Qvw60~Њy;jL<~iGJ.VÜSKf;ɽ)һh~Á,C;"]=m}9I (B~hbosrzaT}JÚ$3bA[r" FVӾ$+kgml,Mȩ]SgBKU}I_k PCo5" +B-H mAUEH 'kc?lA:h56p$h,qƁNdӇ_bO{%8҇%0ҋ3Jq7fG}N56 oVg*=ǗOKY'?皸0)D&\ʅMT n#Q[K`.Uɘk=QNE' 6W?4W119h34_qǮgv}7|3`ʥ@y0oz2(glN:%ɂĎW3t+i_K(X@F&JSvDr/9sUwjSƓcXM7Cc-8mI f&HzE3͔!z7^딥g]\9G4䮘A[DH CNA"6a*Ǯ[9?-JK+=1%L~/1 ntmG50ZޅwD2 vq\l"U\s 9&ؚSTJ*UM5}+/q"h*4;?/+~pvTӡT&:XvTo> qA-S$JJϔ}9!@ތ'ZAh- <~ׁM/IJѕ`|h6 $ I S:@0䝵*m"O[ds;.(xMS9͓p%c}WnnDlf 92^M^ ~V~( bv5 jӬsߢQMf Ǎvgd[W$,'i9;u!4'>dSkG¹ӆR\-ST,ϝj&<{{F?d~Bgrxtf+.Dw(Q oS$ eG %XMIKAL2mBR3BvV~i]&ꢀů3 B4W)!^ttFxWZq肑1JJ ]Mp,*>2:z4iMMːBXƪgץ$ȤMn$4U©ٴ?~CX4DŸ4=/.ܓ:cXǠF+%V>xg,H-53(0qZS2`A`$43a r96z>Rtc3 <6*ᕳgQ-C#e''ʸW `)5mVi,M FkSRO(&LN|]WVlQI %'D An&ujaIkp"c͌fM@$Ԝ- "Ǥqp'Ar8E7DN$ m{kSԘ"(Jl|- 7%5?{7>ȑS,EKN9&Tm'G_ߛ)=yK\y1t't8'  i7dS~C`}ͪbV Xr:ׯOZIi(,WdC=upb-UňgS8Rc #eig -vKPMDEǷi |ү̱o!1"Tvq( &h%hEP l4`Y8@Y歩y(Ċ9W]igP~CKpӹ ?[6.|߰8pÞww'IZ[IZ=fHORcXOޙКO~\鲧e+a6^c O94wOw7(qGPbw85=%\8|N%=,ں2EsBjoN92˱[J ʧ+jBMue wG±gvH9<yc׈}ђR@ ?ky[i6ڷQVm!DS5ͦ#zûKS\H #H^p-[_~ ,O>(~T6VcA|C{[+V^~&1`VLJs7%۰< bsIRѥnFԩjФLx!×@zm/{SS-G+*7z>)= V1/34r`:,ƿő.^f_lD$N z VlN( NZҲ(hLÜNm2-[K.'E9R)BAJ$R19r@s͌QT?o>丩ɱ\3OEid4ȸb.-hf@ΔqX'N-H Q xHބiI" RD5 ~mi2[baY`*bb FYhcc*8c!xD?}~ndJ3?Z k]7Xkoj&?le}bgNbILX<8IFc7A=Hsj?_}?wBּ-O)cQ9zj<Z-@NgɐY| N4ʖ {Q OyEcS.dUӕ"]޺0u`y\tPb7Οx1m1괖Nòip (XF.Z_wɩ,5{ 1҈f7y%O*4l 'wJ*5+װ ګX =iK J.)(DQ9HD!S])k^Df&Zur05EEQEW`X z$ie,SW-)^vkcC؟J!n&˔ 7T53vlȽM6D(DI[z`f,w,>6|*i(t6dDeE:4uݺFȢr ]5?r[-ArJWIC)}ڸ% #cSzj,K*`(.L !cYY@q[eɖTGF-A8(Lbҝ,|&ǔP3!;wjqo=˃41XQ0#j5p/*o` AIx"?$$FE'kݘ餀HJ\b/+..vǭ>az)q#Qs[ڙG8g6^94q{O Mt) TNN=i|sӿrtZlfI_v15heB!l(|Uck f(b€._U t~"-oj23Vl :BeK-ҧ,& wZB$9/jp4 ''S`,:|TbU/ը|B$Id$r?QeKv7qѭ!YdF33%%C51S8E B{組|+J`mVhn2ġg,ך=C> hĠ Y/ZJ6E<] ꗕɪ xByqQ#ΐ]q,`]QlqK |ej!]QAn dZh`7et&CU3򜙳]X(ӊQDG8leD{<dz%z.3~9: ̤rK)V fxC( 8EKg;ŨҷH uZ'qH't5.5H~v;|81˦iU5#)P5RZj8jc5%sB,銮/[J >t%E{ej<7B<h갡^\dJR2퀣P&AZ5meM&qՙ,N"|5:j]66" Rd8O]Nƨ` :鏘q/Q!hfѭrz!ԨKK{d~vnq5-lom>)-Nnjˣ}VRDH rZ FGmG@[][tșR)GE.G@•ٕ3:uOQ7Sn4Y'flD|ɴ+IJu'gFm$n.%A-"1cB KYE|Ui<I^HHvⶀgO_1_{eK7!{ >HOb+S`k6whnǨw'Ʋ|VOwR>X , ZNVhڹXcA6BR |;E0735]W4S؆o&{ ,vFZLfeVD3zQDXY Qvsa>v޹`& n,0'@]:{]ם>r=ol+>/Of'N5c Ks8;֬Ј6]N_+ReSهV6VY?iSY.xy *lɐr\N͞uM>; W3vI9,ئ 7eُB?d8Fyelʰ]38FawlH;=uu05r`,tV#dJ'xM)&kB69ɉ Æ$y 4M^*-lOc_&qM1y6))A jNyi5NGoGWVZM嬞\ڴS 1̮s?Zj|64g 890zBcOk8yEQ*lVRX1B1" MśZ'<@KjWo=2j,;V|`>dCTBRV~5c|MeX _֘A0|i[:aH;tg3ӖUiPؚU:jq իt=#>i1խf Æ@H!;WGɹ097xC lNi]%Y Y6$Uhwsٍ'k6q`~끟;͏y9/5mT& Wdh ;;,@뫓Xyr ۦuD+EMP,Keb5 jڐ@T9%=en"T ~(kS3DܝJKĄ+ 5gg\mo|Stഀ\}2HcNڵ Zϳ1YXtR>@. oN4ZiepmC:HX-y:jn3V>w&TyNQjҙy3L}R .nbfvgcZr$Ni&뱤eC"z3@zxJQj@sηz@P[-c &.'-ɲ)Hӽo]/8}+_"qkk/7RWiw1tQZ3T\Lۺ\ μEʺg>:a,t7dD1-(d??ccKxuV0Sgo׌»jg'pd0K٧ ze:&*^$55 w cEt)EA%6?"B0o{_X ]/C`>Rr ـػ݀BfY4#sDA$Xa|뵒T э^m Y 9|$|!'FVAry3]9!rﵸŞf[<ҏZ>-YjP XA-XqM.TNcCT 7غH `c1vQ!}ٸiZkKA-ӝrV0XL oNXHg4љt?{ 5ku N`*AWlCegͩ!vr+TEUE%nGTA_ Hx9K:KV7YQ %ڿG.8zo =s\ d,+|rT@OqQm=[-KD\51^)GQ{„#HaVJXF[ɹ9yg:oä)΄+36VSXȰ]:ˡfwb{vJ+Fą met(Ys՚߾r'o? #Gص>t]lo5Onk;orGjo-.w|Үw*h׻{ NDީ2NXs6-gCZLrUOX#7ѶAb66+J;Sx)S%)V']j&|ReU%V>M9Z.$];_qD\IR4iYkxʞ%kRc\9#Mөb6'x\|*軏jwG8;Rhmf2tO;t)^na ߝQޏ!Ptנ<-t08)-ⳬI5ݨdh!KKCT9apćn&\ǝp|k_~AhP+*7W!w)|FA4$)HTUIa_ "|ԆK\FTf t>WKމb*+iR*%G~ZOcNlЛS.Jh=J MtOJM`{-7^t+-y>Wi[e4:ъ茍f;^;MYĪ! E;5TN^$Yx~O*ՋB6bZ-dΉ.@u3'v2^ð3 :@B\v|\z:$P5-ͮ;uTnTA1@_xtE1(D"d _lB[;3YB,4^mJ@pֽ(;ݩ*􌕝Ifaѥ5ֶ=3Sӡ:X<FLZڔ 9PlCL5&8UZ޳hzF1")/C6l+-0 ܴz$9fSTrlڲbjs Ce|fH&$_=n NP3װ2q !ܖGZ&]ŸLpu= ז#߇F> D%OH涙E ņ5j~-+<]ylyf9{rO)DRDKE 6cv8qZuG S7'5hjE6$(ڤu hhF۴HZ-X{of$e@4"){*Yca" *Ff`6jHS){EɄKJ,NZZ\9w_&I1 !UmRz2VII4 '2PK]p?rTwlSޠh~4OË~9 Sdۚ{ddJPl@3U(鸮AEX\>%cEtXcs)|-f !KaJ<'x:>r0( %N ?k:1$k).ā72x\Q1bȓD2'3fŠ׀L&NDt$J f1FTxD\0[zy^vo/23bV| ҝE[W.W%,q|dл.el^=TRS63ޗk3ɌHqlW\(N(-:u+}J~W]w!Z$r2 " {4O r8H,R0i&tRr)bYE#+"K&D_ǡ8UҘ[x D!/5j^Ξ jM}5ah!)-|83~Tȗ Dl&go:ʜ3Y>y= 祝g7e*Nxg<%:uML(Uɉ|Sx:vycō;/h b\s3$ 2AU3(!,#E /p`:aDrG-[^% ,ɔӼ),W0?aLDƺK}]Hn9~|YtM@ ;TFI7l9ʦޥ l$O:8`%ÎV&XQ:PLd@ԣZ2ciU[R٤QT eYC H̑SXuX4L3eq=heRiYe xGCg&.Oy43*.4rNH"[hovu#sWV gEFo.oTV!YI4!ZdQ>)a+bNDJIo/mʯl[\+V7ԙ>;vԙ1=MiRɑ9)j2R~uv ^g#,ˠtt,!Z%"^Ru/,`I|R& PV`#q=3jIp>u|c|X^>?==tq"H x l Z9 )hM0-Y5d ".XQPL8NHNގ'4!ՖU3#-,T nH;Te>Ȍ5FoKƾOUٶY}.m< DK7ޡ.O`|/2R\z x;?dKxX3V֟l]g w  m] B䐓Ѱin6%5\se=Jqoh4FŤoZ͐eYfx/p ɂU~*n%`(bnjG>NEMNEߊ\}#읱'L_`/y*<~s~f\S.~ph|-vCo# כnb @uh]À ^=m>{<tOR4MΎ~0Ն/{2n`q˯ҮŸoz m~ y8⽝缴]5 #j6]b%jL4F&~z 㗰)P"% $7"zZlnΫ7H ̶q٥ 50n'm[?}bY]^c Պ+}jX= ?ˮxs`!T})Tjz޵pqx"Z:$҉xH6cSǏzu&MM p]tdU2r9&5e+TDYz-~lDd%ZP7Ձa?@=&`xu:ܥh`;0uP)"߄~vRδ/+:|$yLD3W NFʞ ^luZ˥C3p \73N:U׭=,yOd՝ 37[ZJS~ d|*0ٿ?YEO2~٬짛.M*m:o I*aL(MlTg;$ O8ܚx3yɇxqm 8`' ޕ]{(5t#afLONyT|gJ#!L͸xDxdAAQ=H0] 89kF2J/LJE~TPT˧P(+xZto"!~mXu9%ҝZq/ w'mm=uNN,}E'XVTޥ38ޫ'{͌Um Ms|fؾ)buE5E5kHJ# r*A-v8»h/x?RնÚޙ@]m3i/2Ŵ$::$_2Mwgcφ,65舽ATLR|<4s,ծF6[u4>=Ay= xŞrmލ,m36xڶ(\w$Is+w#릺]Wj jiu׉tv!vYٻ.}y3|;.1<ڥ?>~ut[F4V6v╣>9zead:{y|,w:ŷ~wGX^ڻ.i[", cpL*ALU @<.pXA8@}"HЃ@34m֌ N75fjɃ(_B>.sl|+>纱WwV ^2=ɜ{ؿIζ1lȵ5C휝/ucbBw<"qdgiwRYqJ`gvrG+y #=WyxXaCZ3{Y 7%awkzuٻQO :(< u0~NӚ1Uû9PQ+#fJ2Fj2- z@&QZAiC-=l7ꩵMq[׫f1ä~ uwf8.< LkW=sV]xj+C=w_%K  –>NЀl jsK]Pe1'k00%٪;->4; 0EQ&I5DYV;Ӈwr_|fMgcqکG[*xafUGsی OH,iдl|2pUh zL ">ATD >5iTd"5 ~?R~_7MS=oܬq];Ψ? %b*}8K ~U_T=3 tL,*f[N~H ͎+UQw;]馠tcـZ0 D!g!+(̍i|F^tfaVl >+xU>eg@jQb Ŝʋ*McOE51fC#@{bDsPy&:SuO8 5,X≠aų~9є7 s4p([1nv\.k-YUA ? w0CK@HkA݉2RR>@~N9ɘ9ʅ>tt5qtkc,ۊ"Go鯏,ߟNMk(hk*m!^Ge?grgua0:8!襗1 B=EY4$%^P1be7]:RS*s%K0BUk.0=̫Rd fp(k:1  -RڑȼhJT{:P>mVW*]y`P|dj6.L4eN 8Z'$jD5w(hMZL`Y~t:5d^u@2л`ikSk4ii!/7 X HP,mѩXz:1O/w_&"U7(G#Z3ǘp2(cPc)L/Hg)=2"& 4ѰlK* l4`<~ ۨIob殓oә|ifQJ]&Y JiB+LT@z.E 5TvzE%5'Ƀ(撧ĕLIFD D:{ MȨg4(R@/5,.77y_Sh]<@|x'qf/|Ae1V D,¨-X_*3Cx{%tVwƯMhL9 fNXbMئZQ̈%Hp9ԠE1cz<S*Q[4rfrM: ]l i'CUN/INQ0@+*ZY@Ylƅ-۷W8 #p.pBTNCy@_3Wأ(_=arM.CvgSԻfbtA}XZ)-7;H@3n@ adg**0(Q"q(y !V`oy "jlhɜ}4,^5gf  @WiF4|"+zDQ~CI-*)Ug._OL ',냉b$RBt Ӷ,&WH>U#v,2S:-ދ|uw2=CY&-,Vk\rc0>Ej+ifۢ5LwKx]꘩`+ohW=UIc55 5-kN@CmϊKلÇnS3,Vc?lZfSgjqtǬ4~\fg%oӰfzʷَBll}64el\q5P7yFx1Cw&.,/I,\ܛ 妁T{AcCXKiqŞmRMcT,B1PC|w9+@EC'|?>]}JΙl-5]jND[u]Ng1 50VϢJ6K`=ԪaD#Yε1Jk >h>#+ebFsK)k\Yc4 6ÔZB &a"nVc;ә5Bxn!ÿHH}3,3H 1wHj04/'F@shrl-XN8m@TGXF@3rt/x .Ԅ|Ffy ]7\t2tm3]+pB&k-$DyA.4>/ޔhF2$ CJ&^(ANaEDC&'Y6Џ y:9 nq^d/'ќ9OF6[B=gt }8wJhA؈5Mո(}F"6+ :u~|mɦ[wI<ˆi(:ʶq7J2ՎE UC&1sa3FTfz| NrLa;0@c_zgVjz\Pi3ގ؂q ?2Јe&QUJmKj_V8Б NT綬RyK*'e4H&BRsl}t /g~"|,rt BpLU}D⸆#(|7x0ܴo rCb Bw (ґQKf/\wIN$5֭]q{:?@sS}eh>"'l}S붪 0}Qס21D|6< :.{-x[\_!,Ʀ9/)2헅Yny<./:oӸ[(b!O n+}Frx b^.̞$cOPdWeq xC+Z9Jɳ&:xp'?5]K}>ikH!8lv ׆E'߄<9"ij BY ,DL5% -5_1;^#ƍB%$ 2޸[WqC)Anu+kbb:X2^Kĵ:DwJٺb; J.\-GpzґJ90.j6H-"˿\E7x,W|y៵)i}ᒦ&+s-EhYN9$Eޯ/Ib P#{ ML Ȁ]jG[e!,FK6[2k|0;[Cnǫ[ 5iMl.o.?jtR45S~tY>ξ`X vqh=sILxy>r“W),tf//u$'ѹC?dw v4 %hU7)U7:Ȗz9A8/#'1\'0"6^e2hPљlA#CS`sf 2~MKXhxGI#(u#o4>dK,2I"d'9rgrry & !"\,^&Z,}_}߭GԨM}H}[^=/~+gcټSfV{`N9IG&#O6TnF:c㱧+zbGġpT05z)̲̣VM#M6ۙ/NU,.SV*^,6/i~ti--+[.nxk{+7Uvovwwv}Cp {9>^gfxy'w>y_4# O XԿ_),ܾ4t|"˶-;|vW^UVo_3g{k}oȺ{ֽ=>GFFyh k7xݩM~vM֦7jlntO7vn8{xotxmC~5r VAOFhP.`BVrPrVr+Wr+WrP ؑaVg+7r#pGw酻BVr\N9#KXX3\U\5\;S(ױ0+׳rXJ{7Vέ@R :zW=^]d{WAw˫K0 yu^wXjګƦWP w7E^ ?9?y!WJR" $7xue{^݇2\ث+(zuӽ|#=sml|Gz]{wMMJKj=?'5{FuvzĞkڿgtG.s䑑}&mZҵZ[^wNLm?>U:zoԿblǮ{ӻGS;N8ضW9ŕkSКG'ǚwNMW㜇h Dc>A9vQt6hڅk MBFpt\Ofoj]~=kFwφs 8΂kO'?tc+њ:g1ɎN@Ҩ ZhCA#5 =lg}ʏK >6i֯Qd Ӿs'OȸWNO>c1 QA{8 cs>kjs`|qs|b# {x?"A>=X*u4 p,8= +f48J$9syͨ8U60sSJzAsQ?xd!. K2 D>@ց [tmD&mA[_VGêzȻCQ}=it5sпA7/Uet 0?A9cO0 >GYz"o7g0/G~o FŒħEbt!]@c-~~Rs w΀;%t5" 4>@G7@7[_WNt/ȟ`-/Яѿ۰%,c~䮉Ƨ&v''Ec{FŽ''Ƶܿw)mQn\ZhEx8Psve>{lsaex=YN(bP(>bL(.bH(hbD((b@ PAyPAQqPAiPAQaP|AYP\AQQP<?PK.:E%Fi thebend.txtRKN0#oE[M @ b&$f$@a6|fLm?''( -)g f w 0*FlK͓7X ~l[~89NOفJcKgN &S$ޕBВ\ ZD5? ܤ9AFT'M=RRRh֬PltC],A iL$>TXdF2\J"OB R>Zlx?Lx=oxmHPb7}BR (oT'E6ڢsˋZC^b99fX2{(7\ 9χRڗ=1xdk?j<`M,`|$jg_Y r c38$bxC4Ӑp%.k5 )b21ȐCmXX =H%A84u50Lc0 2y L@pt_FH6B!4z"LB&#O@9a*42 j 8YC2 e; ,Z,rY Y@V•^ `-Yl~9dfd Ԣmpv / ;F /!{ȫq ""!$4Bo܉#源9wcHҏ@N"9 r9\D {~]mh!&(CD萏OODH0 bUH5>FCmˁHR*CHz[3 ^DZ+PR>5bPXĉF<!~ o!H=Ҁ4!zEȷwPK| jG|P"JܐCQ8pƟ ?#((V*v( Eu;עяFѳFo;X<}2vvձG'M|2.&nlGM;/r/ iCP& P,+fHJ4\ R_ت8(Q@G5ѱs㣷DMcc/1:D]+~;9 L/A`Lkl24|nw0}s{=>[ْϞlg_k^̧OQʷG+xС@ PHjХP+c x|8g`RLVLWTU$ R"s?ъ(pDSYg)!K5(l QS)Z݊s[o+>T|F??GEGFM53j~ԳQ6Fmu(*=*7$2EQ QQGNE_#3JsrVm_SZhy0͇)g`Z¤Ҥ̼̼mNZqPAFQZRIZ^WQ%7-# \)I9rJruiyiťCRi^ysfsA0xf.?o v!?rp9Ås`x9ãY0 `0X5 V5`m<8ܜҒ-u?2]t0Wsy;a8wǹ<q0sy;a8wǹK?L/~azӋ?L/~^0azӋ7o¼&;o޼az7$7o޼az7oC8bФKtVqEq#)hBQXjըFLzF܅ѫ_n$;b #|ܠR)'134 mi:&QVznԮQ⨮QF~ԟGE0Ǟb#X%fjF j*N5M5KNIVUJRU˪;U_ѣG:z76z胣]0jqcstht裣ό0nFfkDS3^ ӡYWAzR4jlv&z #+rY*;pQD0ygŗW(A9*ܖ²rgy;K $Y\#_n@wӫe*@͚AOu5:⋢p ARln|_K}nN>w.t=ބw/}+^Y^"Uev{6rmnyH]tih6Ѭmn%sΗg4aߒ˓7sa>Dt>j+3;7eԔ֔u -Bpj+,Ո5P)Gޮ>X\zgzn-JzT%Se&ΠrgI6~?Cב%(Ds&'/TA8qph}r"x䣽N:}bm/*(gʹX[35(kCV!z ݒv{uk6I]:.4T+-v&9m*&܉F^53-Fr4ɹlY%e2/)v97KJKr;+UqI[ch`Q)DxKtu76;ǜPgxRs!&E40{ :1[yr܎63k-g({-]NL%O@jFŀP9a"5Yw;_ཨs +&*Uՠ22]s!)-++EHݬ|~?_J0䮬*bDkTI$ 5bM^>BFbH3-KǛMA]O [y}e)L=˟(WԵ:zZ0NyKZhYY&Uz&9mKSUdS|L$alԶة$2ONP{` ^p?xFBOp͓QW.no奛޼zy \mI/WMgc{woCA~~yQn^iK/}ElT44PjKV%T_ $ijjT9c:l `x’E\lT.9XQ\}M0Z$+RCwQ ~˟=ʀ-ږtl e쨪`XҪ>:!X/tp;BT!H$)b jh߄V%q }Ɓs^F_\̯-TB4DcmQT+N_Ss܂õmc;o76Я2ZP^\ꋷΞq%Ҷɉ;Crm(ϯ(/mTKMՃ۝i=iqA"dB" UG6z8/qM<Ao*YK. ۚݴps1U1T*]`ᯣj]Kĩ+*M |O 1- RK>Pg42~-?^x`˧t9/Ъ,XP$uTDРS9ppR*;&t=NT;xlϹ*KtTvexF )UNపlFP˜]3 #JM\گ:*_ wyqeWyQ0S*5uL ;BeTV%RųS)vK @IQ GHBDR ($ Ү@grAjA  4dT($Aj$A H3#  # $ AD? G FbG:Q,3'bfƔ)Q%(k}C(x`5yQ/ cj7X1fjUWuLubz>>STge u~GJ){D<1]czǜsj,5 o*%{tʣKyt@jš7-W8g؂ǖ=1鱟ǯ|?O75fBㄎ '2ޛ)>?F$ϑ?RGbl`zbM:É_Oa@"nT*qqv%ƥejƝI3&-bI['thRƤIIII$awRxRI&]ҤW&=IOvҏM^1y>azrEO?9S} {-SwM=05ej©SuO|i f;s}3ݳYͲzkyٟY1Gǹϵu̽6_#cx,>)>%>-y1|UM TBe>`NIOp-ᕄ %Kx}_< aA|pE-]E_/nqg]K-Yd/_|˯-"a•YVdk2*>v߮d& ʾ4LΤOtt0GəMKeۗoc$]%zau:ފq"ŷ8{G{pCb%*ac2Cόqw.~l^n*bqJ6➒I2)\Edx1`*ؠ6f-󀼺 |:5ڒ¦z{T^8z4R; !J?i-MN?@/Ӧ#gTm݊grS ee]UUԺt|GrٖBMIoή­gII?un}_՞Hp.RVHo0h.jI|(`@Z >/' %htp&a'{qbO ^5)iƆڶNHٲ|;rZkn#zP:vW[=]T[Lg' R2lU=kR` 0׀7הSTD7, 0Fj 7zeo3n\ ;.4pyyҲyX*l_#`e .(A<Hlu,+)Z\]W&IXtf JvMP dnE/jG^ů@npI#9;@2 xQ]Zś1OՀvʋ僉/ؒ| 5X F#li3M f$F>Nɳ8!4ǐd  Kv)u;ZUFOg.iy#]Ura_q$ҵbS:j=!wxZ//SR+c0XZ"76:k%x9!5P~+5([1D7;z`5J!z6ӈYSWH worG]\4W: =M'W,*zJ|2rS2t[6G*#!|L y|0~xҺ*JsGpGtzLz{9&~O2Y>ZAHo]eaVcŝ&W=q>e>jέk&two7}k5|iCX۬ b˂myy;Jq{9V{]/iO 1YT0%z.6w1g]Ty,nt߁mMyMoskCo u?J|m2Ě OfpyKY68-/0d' /C>vC'(9>y鼭ϡJk!؍ufSo8wZ/ݫmUE$|Pf4p]-[]kQ_̩wݢ?|(@`wS Gϖi~inE4׻\A:p{(֠c+ E,t?-/%,.۞C/ݕsyFUga1B`/ ܏5O/D[Dt%h͉_?rc+*=DWùtUXZ*xBO'-*֦f B+"AS )S̻8T1the?Ay;ͻ&MB#Z&G#@q%k;t48;GcA=^M-VXmߋ򌣚EI026! S^)UM߅S/̡/5Ѻ`=/+$oɔ tަPЇ4Qy.69,Z wmlD3<-|T5Pj ![@o Fx1>d R{K,2G8.A׬uz1bkx~>,RZJ= [@$1:dDnɠiL(J:tmhŃld-cٲJ^'tj5Bj6S哼`U]Yc?^X v{N\޶uTJm5yρV?`2 Be%#y669i7ƯR[eþŭ էs;h2n9* m: U }:yl^>=GN֪iZ`Wj΢X{x+P:J (:X88+m3&{!:rns:톊 uodܿQu6Pt9ge@ae<+?ѲT2KdC]" g_D <1r||h$:N{+~{WECWp/l~LOɋZ8F#tHTs`ݦ ީ[0nCMU -lwiV|jnܩ,VHE(&4 ǚgHly3m-  \L-m  V;Cc%$'ob~qXH3NwVBX$Lu0(ǾQyBRXlۅmEgq@16# {u'襠^ppxбvPAP:hdZs M" E8˟EDnMC:fέsV%MLtlI$Vϣz/ -9k>f9nCՅS5>~~f3@x,rVȧjSVsM |uUJeEr:rpl\^Ύب)KZ|,)sTYy惧}&VAZ\K&0pvֶ r>VtP$3wj).D/RX7]ȧiavUMҺҊ j[\k-:Gsvo[fvO+FV0q(ׯҢvmSBhRƓ}Ǻõ\gy k9޳D1pV& T 0RgY DMy^χ5\W~`ŮnzNbG=k :tkhoH 7লx)DnD&m enEO8.\%RUK[J(8<0*^j!/ ?Vp荵z+)k!͏<U{e~|k[Z n U[%N@h#3HMIUv5}6ķVf+م0܌Ӹ3^!\ःΆBI+l  ,5Z83[8Jɞ*L-?ξXUbaEu!^ xHWPYsZ8e:km*k7 `Ey(/4ы >߇{y/HJp/SOُe/ΛuLEwY :.G4/F Fs2^ .O'C'뤤*@m<$I}2bG 84g5+׸K=J8P˻B]G%Q"{ȥ'lvkYYGiu+Y;oTݭx,J>#Ǣ&%,e+Ζqr~tLgLMؐo񠻋z+j^>KFWbYrtyt`@fgXM Р604jdYھ B]E:h4#)'4<܌Bl|)_—W!wo(FFFօn}AhZb֏:%QpOԅWe[M:Wn:X܃nAtZŃ[jsCi.WzQɷ5@r=3xP"gc Vz<_܏7Swu 8YC '0>_,Q؟_ZE #Fsz'lgM7&Ir6Z< mi dts +?6 b{pOEOHf<)'`a$SMh{A+{qygDMln= ir~'#(<I`)H⼿T*L,O98 Ap@-Re96 /tsTɟ1y17ڎZ^f܄zb؛HDed-j sUEʔWb+_ɺ^G B!\ 4KNZ0ulLavO Rޡ7{y@f{CK]c(43g?bJ0c(8Z}*{R'%킍(*VX` 6<}6 6O,lg-toۙwBys,hTQs!5ZI:py0 c`FjS MT[+W@ܷZne>.D49=>']M0KbMNxPGj;zG\~&8lA,g"RÙPO>9^&c}ѿk-64ċwC0/ug h/m'X0fTiҡRn%,KJȞ.)mnIY'|fܜp:I?j{QCa+%ޟ:8xV#g0-=zǯwmLn_H([nZQ1 gmsċl)& P^Nֺx~ҟ|s[y,Xhъ^".AD7=Dn I#(~՗o<''6,5g` <ϋ@]:fKXC<ҲX)Gه/ywlŜ=m >fAmN}px\^~Zn@[ZFn.N1/:|9ݏ4; >(c%P+&FA^ W72h4)DvQЫ-Vסim7}dV/]PREKtM-m;P]Iݧ|`4 F"5RE9CylƫfՊ{ ʨgEϦiij;֖F#܈p*'_!#x|-^f0TS;3SٝTkIgasaH(JQu (,p;c=%8==·`-t,<Ĉ hwQp)Q&V*کvr|rsyps.bCa!čyنFo-b6ZoVo"PR91+-7 Y# GD[ ?80Ӽ jT &b#ڨkFLS=њ ܎ kF42vkz QomzN.oRBg$}pCgHfx8'EWgg%㯮)wA9ؼ :Dr(We up~1Ya"T2F5o% wc\nM4 :Bj9/811iRRkB+C0`$N;uӇO>u ݵ5|v˸Op O_kn+#b332dfI>r5z#m1MȾr꯳t ʩ_OK0'zhq/Z(G˪)pl#M))R?| ^Y7j7Zˉ4=}凾K67P + ҒHr<9.ϙd21=.Iz#Y q>v;iK.B?_89[H.b'f\p0t.'a$3O$3դMM)na~.N#[eXxrfbHF>(w\J,m6V[Mx? L 8l95l,670s \sy3/#y$#9B2ߪ&bU9&z>2I֑[70m2p.2N+2M^0S?x+*2sj 2K%z„ٰRo"Œq`6k>)o CB7xmǦa@NÊJrHf`VĔ@BAXy GLa8,- Jn.%&V{ SA0 ܛ#ݚp :(x.֤M `\<8m59tֶ'IL=XJ}zOΝƥ ܤxF}r8JeIf2koy7K'C->&ebg9#Sj.6Cpfo Srx$+9Q R&JAY`D_Aw?=r7z[|5=^1T J ~{_=b}SuTřʓh s>N; s`fk =yPޟmlrf (-btЧCJ]Ho2z zɄb0[zP~"ِ&tZ,M @hVBYDllN:Xe%\dŷW.]j8 b^xδ;XӉ8MN+opy͕ ]Twkkm?i)n=yt)Z1J/ƃ|IyrgN]֒iX~VzP4uxx(OfA;U^c(Ufd8`rQAc:x+ٴ[K\~1+u79aE^17A~rhҕP3JFZ+"r'!SoEt #Î:Jaq|[g[/^[Z],Fc.  2p``fr<*m l2xhVexV=,nV~B AAI(?dEN-y))DIӀ^+_m.2K2PP ;CAM+htEhAefP9%MQ=@ lp8O~ AS,șf`&rq$Gs!WAENf^gЊʔ}<XU<m'&N?P"&'hm2]G &hdq̆?p}>/yV%@ ~`< 0%^aL4fx~3X#n[}W1"98з+AHoSht yZ<#RW`(iԫ.t/cyr$DD9W:$vH3QoE%/Xɠv#G]FWK#FL[x/ ji XP@XI9"\?xxw:;'G2s{"dC}ZRּrSstY#GGc4ralȵ Reʑ(%+vFBI2JICJ9[bi2v7~vAQLan5&݆O޳EŚoN9I})S%ȏ?YFnuQYzEd+N?['n Z}{kT`"tGɓcee%U^ubOO.8lg@^M,ISz9DɁUERCmgWo"镕s4sesvaG.~Zew=6_}FR\P Cd $8%} zZ> d2wYё8@ËVlRLsʭkVR|J E4r}u!F'E5^5zP[ /=PIp~?s}oA }hT :iyk q@}^GWN,Z}zwtR$/pG ՙ$| ETWĈDXH8se+ʷ@ml+˒KQ8g(ORW.TwʢPƦ['~~IO.;OJZ7:6A ] VnMܨoo4 3+l@Zaa7/q!"UW:Xzʥ$_~C?ZZȈ89K*G3+F=6m;6uy]wGt/g^S嵲6hޕ},=`m^67U>m\V]I(Y3hB\e,<ܶ5c^.-AZVZ[ƋyDIۯ*xI[!YC]iB K~k/UNѹ3f.]m/vDH;wr Ǻoy8J-&J]Lp'XZLDž)ؖ%C%K 0GvEzB=D,ߢplq253;;jF*QԬnQKs:Yu([esc V[6Pjgq%\$="I7>M4NL'-HXKzU BJzacXG d鯜UYЫ^w#`FJ#ݯ~EPO٠"fΉt; cKߛsrsV.x)&bdBӗޝ~' Dt@m,Hc1$T9+P3_9DIQWf^JUJ+%æTC]SlO8HV9Dz#$6rf~ ew4HOI=CP}z+t~+E` OmIO3{%.cxڀ{ Ѭ0,wߥ?T۾6(_0A_K/%30Ṫk,T3@)zzM"g~ %o^pNȯ]G r)ind?}~Gwq(8~_5=xkz>.ᙟ4jgt6WhUsYpZ8 nOeWEx1Mo6Xз67p/7l?oCH$*%VuI49>.&4ܥҡ [=.kf-&Q6>N3O?IO , gt›%E9>+d .yW wV3׋V a4H]r99,nU%%=dJn#_|Xpxr7>yamˣJ/gmnns+9'J:qG%F׉*FҔt+|TBas1N%F36s\ ps.ɢ+ښ䡉W4Ҡ^7ۡ<D];'K"͉1bq~1JjFt۷w̮9U~']e"yʦjW!C{p8ζ;iW1}2ɦȡAxmY!LAs6Pbw:\_%Gn},.!/J7&fלzd/Ld/G6!Y!oH!*CT"hoh?L3DbJW!\2UOH(= ۙ&3dwLw4gp'" W˳zeGd`e=JZu \|NÙl&XBiѧan\C(b6RX{XQ|&b9sXjEر̍|`EO^@G#Q_Y绮_/qiħ6TB*u{Y7rGX8} PӁWP 9p~ևp^wgCL0ϥ0r^EW;J7Y$=g rK$h精0"^q{F< Hm^pe@%[3/y!J`9ud83;>9;Y7m`[|(X("p] ލ:s7jXSNub^ D}2﫷z{(^[¹9XΫ<r ^FFe+RZd6N^f<Lb!CǭFjJI (a6B^¼- v\P(11))1˙y am*+gvRᰲnA9D~}lqcO"g.J GϷ >>>`Ƕu}~Zԫ%Tymڿ%X5`/1GwLj:NU[+o;*9DTNO>KUTslތk[@@7r>cT\ii#Zh8Y/uAgW?{~8Oq= ȨOOX }ryP/ m!M qz4(.CcUvihu7Rz92Zt+!/&Jd2[3m8␝dLٔѰD3Ȓ8U:m%%,z@oa4O N5[ ֯hi#228iM9!O{!diә/cqnOףn?n7ݮh.!ǏUiոݮ7mJEʋم;ZL1%Qc"ja:vo 8{XDJWK'9qަj*mtMd?:@DUr5A/"}.d:WS` $(| VɥI>rPp.4lUS}np /.u50V\0qQ}U: b\s\BV4r-exFW+ԭ& [*J{ktsRQխ~c2m̄с!0yU'fi+8;tXhw;I;z,)VۦDp5!;e>#F[ޫkkkV /# [{fMtV%.2͟o)0g=AǵeqȦV295?hy#&gRbbBXT _1x- f䌓3s挙gQVf~SVWT5 ]x7>tݬ^3]7~[;5544VMXӧ١D;_5A*m֯jwhh/h?~< ;YzrY*f9gYYó:4Yլ΂3sܹݖۜ۞erTP'r~i_-{B7KwQtst-٠wLL-5"03m1q%\wNwk]N!iamB:Fzkclv0 /t`qEllS˸T/G>OIW zKc\Z#=B-#1 lhr;ZI3v]Ξ:rCmE~͈-i/"'"CFiEXj٬JAI!qk7/ĩgߞ_߼W>}ްrJ?*^Z,= h 1Z z8oQ cOJ=o3>>NIX=:gs*(X_tRNCJO'<~vA{ӭpK>=7c 44Z-RTjg_]@HK4h[<;co;KvTw֠o`8 H|- SGY8a!nU%H $֡awm]52C3#nZnbucEbec䪁,K |nefz:cϖ@"OOt|h`p{2^kVDw_:@ <_ߍ?ܨoMcg#P(+tK:Mb|L }Bp X7pz LwcB_l{y!P/:q+o|@,+6fRXw~zxSØhu1EUW>`om9ZM.t8Fn&W|WxcʹsɾnBO[P0N*,n6u2T;FĻGG~Q7ʸk7Z3|: ,Ș3p \vBwT+Z. a eYeK٠P*%h*TG]gG>fbʼn w@U|!q-!!#I } vݷgw`H3F8}!*r\zBmLAe8dC"e?8|YCD $>B~N8WPsQ z@31F[:ň¸I$mn^GGm{zvWZi9|29>bv)u^o+V$s/tև䒮8r|_~rԟשqZK䲵Ѓ$&؄':~5U\7m5tI&XVJN.TZ'{' gC~@KuCa: KU%?,|LT4{)d]}0J_vh0#o'&=xS3&Ɔ{udI.h7Nxc|8/"Ǿ}ieúrSf:0&2x W#= ðRuh]NjyOa:_r%Fy]Od.![ecX+5{]}ev b: G-RB \$vWE63hpoᜑw@|,"Fk(c3 b5iwL{{rٹ]كة>ކ ^A1*dJjs;ϛi%ǐM| msnl4hB`;qf8{< wžN!-v94[Qf~ |t*TcԫҰ/}ǎ7_h}|K bWe\<mfG~bPŠ7h)wNQkͭtD+A9 ff:VYx.^!re,"*M:azwdWO"L7?&x\X-hƙ=w#y2#(sH*6dE9/6ڒqZCQe/1ŚƊ^ZU3g\Nbx>Hl l[ml5Mc=]:JmfN+'l ɅMnM )HtPSBG&JP2l+w:M[<7Džd{K}=ImےMFmj69iߪoK;ve4!iGOo4|xԠm18m!|ĨԆZ$H@Zc3YGVXeDVY Wt6V).Z-%{!I-gi-6oWi@_F[n6n"#NRjЬUFj :PSNHOhA;93H"6B -;WӠ89Gr`vb$o`)=S܎݆w(6cy=bB܇F 0(_}Z c㊋ح9cO~Sa97aab ?Q&laN=g@lA"Vi4f"> KQn(J9k$? ّ!Dž~; {$؍9O"eLuyhA@, eee:d.K$k2rErq [8bxH|ddY"Ϸ2TFȰ31{{YNagNkA8eLvX `z' /D\+C}݁p:W5݇)k1moUegӿ0-W{q-s0V {-F _!Xr/Rt\`%`|rKW֠E+^E@E _!XXϢ,G<^@"UF_HSh)4i 4V^PK4/:iԆ\jQlassikBold_TB.otfԹ?u[i1tW0`c66ƽgwNiTFhz/{׽wbCMBOO%7 s,ν}?7ڣS{lY! 彴rer"_ް~''@P2|hd} (ܚ_:bm~ l1˹}=4yAte)OO@r24 ooɿL>=ҏ whRȤST@ā4CMU o^WfƉ̛()9fBwE` W+z(a溉KG4\_;(!g 7+=i('_/r&M25W&[nUvwLb^uȻ3Ïg]Kje/-_U_Yu7lܴymm?P4?408|p衣;q䩏>?9]:/ _~{> }׿LW+ XK*Žߟmӂ¢W3Z`8X217ᏍMIM,^j&4 Rh! uv:> }3/gY朂}%7IM><4ϛ7+onޢyϸ֌w@g3?kc+Z>={^!t: }1ˋ9rs@/f˕I^ʛwo^D/;^rAϳ; l~ߟG}}'/W~ˇ_砥?7~n%z[ P FйO13%'7QC3 ߿+}Uh }zr  )yl~d b b!7d  64]BP C(E! <%EM==; uA= ;a: C#+.BQks@NAgo;kqE|?io zg0X}`ʡ3X?LY,9e[~dgn-4`OH?fN?mؽ;99Sof<:˫7nSX`]݊}> S ,iقZmI}SAUyrq"@[0AוhKJ W[W^TPJ[PUk,)-il0eeM5FSuUAv[hqϮ_@q :@e*-f1b&AB$@I+3@9En ` SoBCC@99L.=L@C`s<9 r  rsΉN:)m4 y['Srʦ))S5SkVݩJ3`M7$t7oY}s[n[޲[kߩշqzG}w|}oO?9 ?٨}tg=OCMsN˴& z}{{5M޷GĿ%r*E|F g|7?@g̯|g:K3Yz~YYYY~ˏt?G=OxB? f_o9qssԹ{Μgnܚ?;oż yҼȼ_xϯ4<5_=d_?ZZYp؂,R?U𕅩xgV>"""lьE,hg?8\ .'?}/D_ۋ/^XV,KK旾^n9o}+^XnŶ{^xCޫ{>-t ү}UÕ&|%g .JJ>D?r2GWV%HaAU_ʻ5~r?U Nx b @^wsY;G6UO6Hyis1###zyIo}E%bFHrP0Q*mUB !ؓ%Q->)noGnrR5zYrE,Aͥ]u›R1:k`\ݦeQF\nҊe UT%ȄB V_gG]JLcØ& ,ꠜi9/ܒLj W5l=@a#Z*v]g,'D:b!|8^_n/mݻ/V &'4ʤiz>B;N+gqlCgkr¯@v> .uݫ jy\=6H]ˁ䭚3i0 |mEd±5:}mpk,Q<&Oh=hcЀڛ|[oth3ho yJ1` .݊ c1p7Iܻ}h(|1`=dn%[rRSSmG=֝0r0³\uoSQ a0sE:C'}-N[;\eD!Y,<Żm~⅏ˏ˛Q5x)ƥR^ݠSPw/iTR4fCt9A%T ۣy|VPyo5@(N]fp8\45]ig8~YuiW/5]~.v(Cߓ4cĚ.#O$%NW(w+BLNi|e]>F[[i?K(ȷ1kRU!?,=|=j#3"a"DfM''%P/99gk v݀(VkT'4 3 t,We \!kQ>!@`O=tqX}H}D V`"̕o7Ir~yTo.(Y܏9F^<3ï)O* ض"~Vdbc~ VFx$=?KVyUnv2Nb:2;xmP/{0_BDPާ6"//d O՟5jxڭ7.Y6),*kj-Ox0}+؉aQh'RBsnLp] SgaW*? dc&A/T|1rRYςx1G=D`0v&^Qn;Ui͌ fxq͊HA콳>g4 .^[3+)$ p&jM11s~)?ٮsRpG[G JVQUO/DnIј,>'?{99lF?>#ykkFMF1T. vp+*.M+ RxW/=owaZ8UAc] x4)Kctǝvm#z`\]zaIq0YnEdĭA_F{ФۗGK$\ik!?ne,od7%_= \EIIG&B u hJJub$q(aGWr}۷Ϳ,OJ7W?J{Bx }ikybYf^0cQ,b(~v7FӞ xD2 ! r 'ݞ(žmZHvŨ؇^oU9 &/AφJHVau|&tm"'!> O \-\մs}B~LXp.q"oGlUIϖplawX܄ݱ{.1&ܱKջU$Z14^B.R 1q{TX,_\*?Wr7p'X;*s2:ӄQEW+w9-bV ƞiD3A;(gMQ}+ 3($:$:~X  M3W[3kyL7/]? Z{IŇ;mەk5qr]f8bfߝԄ6&]S%`6cw$MQfP5Nn>tצhehC;X46cE2 )΍/]ZWRc#ԋ59h[RgA,(f'$\ o>kfZB S!N9NjŠ w^>/:&/ ڽ) a (?`}oa{.MisV9YmsTE'YXeamFg5UKSZ99>LQT\pӇw[>g;2a 6h5]ږ|: .Ճ+cиFcbANQr܆Br?Um΃tIik2 c[@o6쵄D6Hzo}dklctSmU ̨܃CcO7× bkD v0J;,D;pPi<ͳ< j<# HQy_1\t@\IYJz~>}NP Gnqpppt,IM/տ_лb0TY'1EaY4I;MVE0ZFl56t`{ كpq5j$l`[\h{{ޭ:f4oVٷzݜ9Is,#"ޗ9~(Ҟc/ 5+Vl$U(8úݑ(!ݜW?CyaE0 6U0VYܶTqKq!y0GWFRm[x»"LKy;[]? I!>*< <%J!`5 <&>w;΍b|bLG;Є+är(u9"6:/qyUԪ [|{+g\ Oj!AcKǖ>D\٭ߏCrh'„88z cH5W!j1"vl0qQxUX[jqy|G.o濹nA"Vc˷FQX;(^Ef@sS[, zlkA6 'ϥӔ#BW=ƗkB>l٣ f#(&9Q˵iF")7$9k Fpԇ=xa[rp[75d{K΅ISQ:L {3 rM@:yh*\|q2umf:yEv]$uL~ƽsڸVF3̸ nuf 5?KɡdlZ0"xvH|{o\ZvfxZ/U@^O04:JyW%ƃ\t\ I"iT>ʴLnU*bb aJj.~ } }t;1-su͆X)ۅ5S: (kP.7KMG[^/PmwWFF0&I~]ٯil̄(5$Udp%9r&N2\ =#ѬH{wx\;\-SR 6&;tr4F&.;TϦ k}^+`^\Q/4c";VW&Fl!xm$wxCq@r!&Hì ![}3^+ }|']aۃ&_9ᱏ_Uk=g٨%dÒa1ᰑD]w=rb`K$ V iP{߫6[f)/D)yd߅HO?ikoD:v'JB=r )vrZ_ NMOj(#A8>z٭3HDou be6߉%¥N<>v$3^BYSvxGym~k(,liD wj ֗P-ay9DK1]L7D5 Z2x zp*bo-pvvȴK6<` B#.-MV<؊'2Q.0PwT[ܔm@ B8G}W}mm.]Z'p],/ΫAkN ⭇/yV4 qfwװVwOzS>J`9 ai|~9=_ZM0]= 6vtQĞr#A,{2Dɜ3+Gn6D\0`$#=|ժNsT g"W Aˏ1iʶnlApe_CՑr?${r 8qka90YPk6־䁳~^t4MJz\ϲ]u=e˜4a^<>2s+h^y)AwY4.keBs~lCt#E㓆fzu?4ьD.X0W чI<:pzHCV`_9=P0Rn|B\O'CG>rw"].~dW˭׽Бy~8r q.RK:?3wLU-^TVYyɊ(}%jػ]2ogJ g^ d3Vѐ:|=GZc`+xR)4Fl4a&`J a-UEOz4kvڲ Qfǎhb|WG~ofh%('CR(D;DKwCWtJ\9^Y*x )LDYqDS2y|'%XP X|jcUȳ>>[hG_<]|r=[S;ӟ7T65/^.MNC?& eB"Ce~u|_no0~K(rAvg݃\?MP fߗO&xewY8/yfmW{dcU+t0 K %,D//$?+`\qgg-Pr4O0:vz;N_(0W]UչfVЄ#}VqE+Ss3G{N[ "p9kɫ?~u"zDQMhr960%;Û四 *UO[Nh3Ǽ>#W%u|ک_mΓm~΍rʤ7_n I{P/jQ ?ʫR1`ͤRU#WckvJ7<<*)wk߾:tשxF$yv $™ѷc]<#:DvEֻ :^o}oT6{ak>Wjɱ_#GAX=]?mk,"es 7[B-xJQ`B}>l(R.7[QJԓ{xɋ=S.C|t>=H6F}$l%f8lgއ.2,8Vjɜ6zG^ŢvOj19 9-]Fpv <紸-Xu 2)7V)b_Wɺ ы| oC߫s129j\:q%Sx' 3N46Y2 B4ۺ(ܧ]]Y*u.ǐ6%MSΓ؇S`z'>:Q[*wم?!/ȷ>}5A꣸lJo)S 0nsD-DBi`_k* VJ1TU6|U^~|x𹆋'(q%|s>pFGtڝ݈T(:*Oާܩ{WXvnVS,x*b1DGS-/j~3a1";-.3#G, l!}\EC&DlICm8MMPo<ybE}b*{rD) פU`M*kH9l&u'(w}9et?(Ga>E,1j/@uv݆ e]uxѵ7%fMQn[J|Y' ^Ñ"UBu2OS/{"/"/*/jwDDžE^=̞`dxs.Na6(#i$ ]62qM}>t{u #򫚒PǠI #)Yz~B\XJruHcQ=NbHR1ْ4$[Lkk[{G Z㦐 :fۜNao3Fizґ#a+mci]6ݔԥu½%ck#ϕg"yCWf+sV?[T|#缽HKP&D{;5A2RP:TmA_<~<0]ZG V?Kz;E<ps>#/I+^BJ ?~94V(ۄ$}HJ#vxs~PoXϜl5AO[we!lB; .T.j6v /h<,h6,x NfM놧㚎X{[gѬmh2f:qmXz&2 g!]w/ǻd{dߌ'@uMaӄYiFclNS a !GǷ6Mi0<:c(.in•g.m5 eD~>^ >\6cVDrf{e2~ccP3odr>y9E#怂u 兇?wق,fHQx9Yv;칾`yٶv}7Yh%5<Ě& v̠m"[My|dC HiddbzkVoDngS<\0YDvX^dǻ*p>fd_DTbUDy\=1 x{( 5zK tK$=$ښE<ț)3uH5fh50C1;Æ(~ Wʋc'qS!/!b39e5 qDl*W<%*䏟B!mHX-D^Ac 35Ո2;R}\wVlCr g"O 1r!1F>c ?x Fȧ@S?4^T}2f.9%LsQ. E(jh' G`jŶTQߊEDCRpPpwZGX,EA AQ1)'ASl^(`5X.qHNKa2]pP8C&G6Rx]@G=~/r<Jtfqa4{Q? 6޴/o}_?NF L8'r`.d™l!9W'4;WRLa52bTRj [9r-x]q #_ky@Ȏ_z3<&oCqw8,ESզɠy3tI\S4K4MޥÅN9HBW?I1 5LQ*ܨ4Fu^sT0Jx BP1Vֹq3nKm}gau"OKTH}' ѶAÌ\g\_9>ƻ[ۢX{;M0W)ݏ*4A >dM[UTokhiJ6LkejWMйgߓ׃;i=)pb5HȤJ 4 m8l7iͰ'"JCgFRVUm/1<ʫUj]c4Y8b킓9aUI+{(dfoVvկW lEV6Q ޴\Vw HX /"X%m.³#&FͥF\VuXs}k{ WW^רk*X!_Jՙx~ZYKQ) 'a ftvG}2ޮketQS$RPڴ|w \3&hF-5Sd?CX7qCCҥ霊?pŝ `P]`tрWH)L)okY)u&MgvtlH͡W1]1gӤ.MFNiN_{yph] {/MȨLF";Z( Ѵz~loGGbVs0."XԘE/XNQ)>?uBտrƫ 5}]שFUSP 48(;PRN^-Ys6ޒ~w3ʋ4Yr%쳋Ó5gNRW{9ʌg2q%ه^I93go4'G/ <tA/:9Y|Y)|w gN> J~o){S3=dg}@lfnku;*]:1}Mۑ u5񂺾 DLyDe0-+r֟hǟ$lk憕 +nw1o=;P?'C oו#_\H/]O şN"{~< ξ R;7V %/9DiuڨNo{pqk}4q7ƿ5~p7TYefe ZA{Ev1ڄ Df"HDglt E=hGGsٚ{~6{Nl1;ݗY7JNV.gZ9rTrrN\8vs~M={MRWkYu.]ɹٹ)]6 2אȕrù=rO^ν{+O?i#˛zy5p^*37o_3y}w3˼okޏ9D|"ylkxzC9w `sqaXȥ/Urg>(t~_X}pk܅`]0ʥ. i}5`@3bU7`ͺTC  .KG X+-8󚒇 p !<+T##c;NNh>b8mF.ݨ+m^ʱv򰬇#V!oV&nRrw<7=vwzx:.G4m` 5`FP~UN_M|?r w qo'7'v?h~%h6q{'Y@`wv nں͹oJծI x`wnŒ-ū5'¨>C-ܸ?A?)ҦHnnN) Sj|}URI IL~ vDcq֕DB]yb7J;ZL;HT[BO 74wrNЯo1e Cc_8~ 0ZM4Qݷ}#ܶd6.|ic90,f1t'$u ˶_J, z]*D3;/#;=b8hnʹ hx8Io#~zY;}]N\^DŽh/&jw < l oxYr X Lc ȿDm8Zte/…7ee)!Os~ܣ)/)zeh/ʦeTA [[Ůr{q#LW zHɞ-di讏XE%b [DedU[6vv1.3yáCdKnH%?NS1,a? GCcZ{:D7G{^2vi36[ˤߩ㵅l 3n1r6NI @;"t fm,Zz;>λH-~|\ojhUo.[V'ٵœXG˶I,D~T+Ioz5T\fb ޔ* s:0żAj& {)5/.՚,MHik=}]/N2 |7ķ0ڶֵ /r'=ӄH0z Q^ω$ [{ 9р$v>|<[^&/{s* v4Ô%< Ej?7i^] XC10`+[+pQ]+2*J'q_,BvZ a<3iAP  ^?$"]]`zZh&#Tk5t$Zp]4ꗎ:{ݳǷ[! 7$5VS]mj tr {Eۊ6hOkp,KB~JD5fEL%}kBj%IjKLL/W T=ަI70NwԘ4a,f(eHހFLI8{޶Ny=`tkWE+ʆu~1K8`}sUfwUuFD]KkKW[ƃG#C/44mM}}eN:D`#FӨ1oJle N`(.G?6gw@ ?Hr<Dzny9Ƣ šW#)}\6kн_>gƭN)Ɓ:aɀrC*J2 MVM&Azyy1b>( NCϪ= գU@@ASW)O ݯCpMWOtyz^}4IkW4q\P!/TW5Ou Ҩr*RCUq TZISM+MU5㫠Qz= ]3 CB zu8 j+(sUi!Ъ8U=J휥VzCa@s5,zէCKУ :) =?;@{>*T j(T$T2bս"6hָq:P4o)@frJ˄4ߡ:<79=ݸ=ot XZN)д iFFS@ 7L\mU GZ6i{ ; d>IOOwzytQG}D>Ӑ`# =3xDU (5;Z·&COy> u zZ0d= C vewwo =*  Xw~F}c|>*pF!S -q  &;BE,X`ZX` Z 8l7;\p~86"w,X `O@iPF@iD. PK:e=} Qlassik_TB.ttfPKĠ:lIy QlassikBold_TB.ttfPK.:E%Fi  thebend.txtPK:Og VERY IMPORTANT NOTE.txtPK/:V}j| Qlassik_TB.otfPK4/:iԆ\jwQlassikBold_TB.otfPKvscrapy-1.7.3/artwork/scrapy-blog-logo.xcf000066400000000000000000001463141352060011200203750ustar00rootroot00000000000000gimp xcf file 7BB@gimp-image-grid(style solid) (fgcolor (color-rgba 0.000000 0.000000 0.000000 1.000000)) (bgcolor (color-rgba 1.000000 1.000000 1.000000 1.000000)) (xspacing 10.000000) (yspacing 10.000000) (spacing-unit inches) (xoffset 0.000000) (yoffset 0.000000) (offset-unit inches) jpeg-settings Z          1 7 Background     V 7z 7 *:JZjz *:JZjz *:JZjz *:JZjz                                                Md&2s Scrapy Blog     `Wgimp-text-layer;(text "Scrapy Blog") (font "Qlassik Medium, Medium") (font-size 100.000000) (font-size-unit pixels) (hinting yes) (antialias yes) (language "en-us") (base-direction ltr) (color (color-rgb 0.403922 0.000000 0.000000)) (justify left) (box-mode fixed) (box-width 406.000000) (box-height 115.000000) (box-unit pixels) s 111s =j d$&3'(+,-0:gWæu=+P (F $ "v! X!  K }D Hw Q s8 J "!23+-3U4q4|4q4[ 3AT3 4Q'U~# i k q2 r`0 Z YO9a{ Xm Me ~Ck &ee _  1V  YQ  fQ  O Q K] Ojs iw T 6C!;! 5"2i8#?##I$_$x$V($@0s$9*P #L$P#c#!qg)ٯ|9Rd &b )[1 = v1  m )) j& \|T]D+a + ݷ7|9 F%@J!S A C#۴\ 444444 4 4     \~Z   A ?_`=H}rS?9!GgȞM2x“V Y' 0 N0 q ?n, {1  . r o27~ o6l N Dj^ 1y 3y  $; B A %  }  C: b {          e H >, k    2e     `pg+R5]=#!t(+P  ~\ ( J B|B '  G}& 6 )q s! |! 8!-R("c#d#42$|%P%BD%T&'S9f' v(Z)f)02*n+zX+-H,(,g-CQT- !g1>54b4x- AbǨyDo:JhAMcF \I WJ SK PLP=.! 0M ? OL@ ML% LL u LL ' LL  LL iLL 7LL LL  LL LL  LL 1 LL Y LL N LL  LL f LL  LL\ LL LL7GLOVbn LL LL,LLa LL LLC LLx LL{lhhs LL R eLL 8$LL LL %0LL LL LL/LLjLL[LL8LL4LL9LL@LLSLLm}LL^LL7LL LL _LL sLL PLLg1q̪x;A   *3 GH* D8 󻡩&򲓄 g c # j#x[< 2 Zq , -7 .T p +0Vl9rjAvW1#?Fe-V$!]s+c02d3/\s#)!T d"-F[ m=- Q m'  I z  uUo=+s vA |f gRҠiR!+5BO]ky&WbJl  |K_hjF` o t (- f   y n f v  f 0 L  o(  G\ I  / o9Q g  \v#< D( VT,\H #]| ވ</E o9 ֶ @ " '   T  3^q <Aq`ʪzB  g  $j&i)= n-   +T0QV(3, 6{;m6@D%\E! Jv ޺R ,RĔV  g        Y \P!   rX\}F tz kh]p   /Mό(FJ$IõxA &44444444444444 455|53ߺ| g  /Z ^/ 0 1m ^123$\4x 45P44c5745ec54454'54=4  3S, ?IBAJe- - 6. P/ N0-3}M2 g   LL 7LL (LNLonN8&$<_ O Lz Jl 7 ] (V$?6B?Woܿj2 qp  g  g9   MS e 8tt7 G#uD ѺwML. =o >9 O  ? ( (C ^\ e (Hcw `ٳ?0:|8k7I6_43 333 3446I45_ U)3])***+ ,MŨ[&bgbbAmYD/ p+ ` -M5 Z,b G o . `# & " E N  t~  n $Q=p%ud>  L i Tg]9e2 Espatula     \2e2̴222DZu Lx8̤̔<;hh9_jq8Ӂanqm7mgqkvs5hnpuzpg4zdurqqkkm2dkrrmtkimb1`noqhrjnnfm0q_urnlrejojmq/aklsoqmlkqnmph-{cfqrmkohiijfhhd,lhv{lnkmnniefeec_*\kzrolnkmiokjiebeg)Yswppnmgmmoggedil(fd|unnhkfmlWilb^]ileg&betosngkkhj`vVjoc%mjppijljnfhngmg]f`$lrnnmmllkklhm`]_ce"appnnmmlkkimhYb`h_!vmrgkmmllkkjjemm[o\Ybhhnqom`llkkjjiikbddTzUb[cfdpmmrlkkjjiihhegfm`UldP[eeTfinnmmhipjjiihhgg_ddjfdYRWa`Y^`bnwlihhkmiihggffh^hda^dcdb]cbV`zYmtfmmlplceiihhggffdbgYed[]e^f[`ZYtkoimkkjjiihhggffeeddcbb``^]]\[[Z_iuqkekkjjihhgfeddcba__^[]]\[XUt`bpnloljjiihhggffeeddcca^^\[]][YWSN֮dlqldhljjiihhggffeeddccbba^]]^]\\]ZVSQNLenolooifkohhggffeeddccbbaa^]]\\]\ZXUQMLLo`qvpnlheilfggffeeddccbbaa``^][[ZXVQPMLLJbiokrikihljegffeddccbba``__]\[ZYVRPNLLJIZrrnmcffknhgjgffeeddccbbaa``__]\[YVRNKMLKKIIHnhnmslqi`cecg`hgedccbca^^]\ZXTPMLLMOJGJMJ ]ljpnlgkc__dbcedccba_a^]]\UTQNLLIJIJLHG ރYknjllig^fn__dcbba_^]]\ZXVOOMLLQIHKLML k\rtnkhhY[ea``__^\^^][YUPLLHINNHH Zlqnhkmlff_^bha`_^__^^\YVSOLJKKLLKINDGM7@o^okpjigijfh^\Si`_^^]\ZWROLLMMLLKJJKDJ@2_ahmillemfddijc\[a]]\[YWSQNKKLLKLLKJJIKF3ILmhjliknlgfelefg\ؚW\X\\]]\YURMMLHGKKJI@?IG5|=CBHMD?4\?IFEFCDJ2BCFGDKDEJ?1t*=?F@AAGDD>BADFDABC)e2GJCFFGAGEE;?ACA=AB(H:@A9;&xCAICHECHG@B=iZ1?B:%Z/ްK=CGHIA>DAA@@??>>:@;9>;9069?<29;@FLD@AADEA@@?>>==C:A<96<;><6<:1=h;CCIEAED?B@@??>>==;C5AA:;92<1756I;DCMDDCCBBAA@@??>>==<;;<;988766876߀?@IFDADDCCBAA@?>==<;:;;:9889841WA?IECFECCBBAA@@??>>==<<:9863/,FBIE@CEA?BBAA@@??>>==<<;;::989:99873/-,*t>@A?FJE?CEAA@@??>>==<<;;::9899:9852-+**S9CECCA>CD?@@??>>==<<;;::998764-,+**@@D?GBDA@CA>@??>==<<;;:9988987640.**)h8GDBHAFCEC=>AA??>>==<<;;::9988:98640,)+*))(H?B?F@GBA;66C>D=>=<<;<::99::9862.+**+-*'*-* AFADBA@E?Ant=@>?>=<<;:8::88:9932/,**'('(*&% e5CEA@CCE@7rӇ9=;99::962/))+)('))(' p$!#()"$/s##!!&""!-Y()$"&"#!" $",1$, ))+))#')%*{!!*'&! &%""!)M$# "##!!(0& &##"$'..% &b'!' %%#*'RE!%K"'%(*$%'#"4.!$###""!! " _%"[#'"##""!  "a.!5%%2$$##""!!"%H)!x+)'##""!! #%"nU J(% '$""!! !$:]V2!ޢ8!(+) ##""!!  r$#*! "#"""! &  Z!%)""!$""!! #!#$!)'##""!! !! n& $##!##""!  ! 9"(%%*(""!!  .%$ &&#!! ^&-%""!!  =$#%" !! }% "%!##!#  T% +&  "+. !!)%%% !"" v)!#+%!)Xr\!  K%#%# 3/   - '$!$'q  i## "&%,)  ; '%"%#% %   >ޔ"###("$"Sj!%  $""$!#)kx _<<:9865 3 2 1 0.-+*)'&$#" ""##&''*+-.f. 1 2 "2 5"574ٺ#ݴmghhfigfaYayhfinqoorullqpe^bj΋ebkqmnopkgffgjii^L~ĄfgvtmnjggiihbfhfcdcajbHyzdgtvqklnprplkkmnjikigginneBhoonejluomiejjiffj^gf`h`:yhuoonmnmkfii\lmhdeefeebddMA gkk}oonnmmon`fjfjWbjfegbgbbZga_lonnmmlljiihhggafsbZ^_]]^\\ZYXSOKBpnnmllkkiihggffjXi׶r^Z\_^\\[[ZZYWRNK?onmmllkkjihhggffegh\XWW[R\_a]Z[[WYYZXTPMK<omllkkjjihggffeedhebec`b_]_^[Z^]YYXUQMII=elkkjjiihgffeeddca]ajga`\Z\\[YWXXUQMIHI>jkjjiihhgfeeddccb^bcb`[[]]^^\ZVW[USPMIHHI>hjjiihggedcbb^a^Z\]Z[[][XYY]ONLIIH>piihgfddcbaha`_\[_aY\YW[YQJKJJHIJHH?fhhgffeebba``__^^\]][ZXXYXVQMKIMFQBHGAI7gfeda`_^a][Z[ZZYXVSPMKKLJJ@ILEO@_fedcc`_^]]^][ZYYXTRNKJJKLJIQOBJJ?heedccbb__^]]\\[[ZXTRNMKJKKJL@IMI@Ybeeddcbb_^]\\ZYXSOLIHHIJJIHFBQLM>:oddcbbaa^^]\\[[YVTPMJHIHHIGCJ87 accbbaa``]]\\[[ZZXUQMKKJGIIHGGHIBJH4X _bbaa`__\[ZYYRPMKKIGHJJHGFFG3H caa``]]Z[ZVQNHMKHIJIFNGHHGJHA1} faa^]\[Y][YWUQNKQNGHLHENEIOFHB;[]aa]\[[ZZ\YURNMKJKEILHOSFEFIO?F^`]\ZZ[VSOLKKIIFJNNLGHPCG@:^\\ZXWVOMJJIJJFEHGGONAJ:b^Z[[YUPMKJJIHIKLKH?8MW[ZXVSOLJKKJIIHF@MGEH4DZ[YTPLKLLKKJIJIGFHBME:sZVMKLKKLIPFKJGIHGMG5UQOLMNLKLJGPHLOGLE9FKNKLKEBMJJL@?}KLKIKLLKDKFFH"ILKIKMJGHGC?#HKJIKLGFL5f%HKJHJJDFQ&GHFKN>?(MEQC:s*HG5Y,AE-yɧzy}"ęgKB@<899;:7CkӓZ@>>CEABEE??ED:6:SsG?DIFGKLHDDCBCCBA<0khC?LMGJFCDFHGF@DFC@@?=>9&b`E?EEDBIFHGEAAD?>@A??A=><"r{@GGFIHADABCB>?A?>B7?A9?9_AHGGFEHE@;@B8IA@<>=>>==;=-, KG?MGGFFEE?C=HNHL9:C@>@:@8< T vABEKFFEDC6g܂;>B<@8<=<=55=5 ִY?HEGEDEEDCCBBA;;<;=2A7830!:EGGDCFDDCBAA@@4<;<::=2784+!^IHEJECHCDDCCBA@??:f4@7:88:66:878-,0HGFFEEDDCBBAA@@:GN=59:88977651-)"HFFEDDCCBBA@@??5NȦ_957:9776677640,)EDCBABAA@@??>;@7679=37:<8566266752.+)vIDCCBBAA@A@@??>>=@=7:87>;8:9659846653/+))rDCBBAA@@?@??>>==?>>==<<;<>>;97898997512631.+)(()p@AA@@?>>=<;;69979:656863448-,*))*A@@?>=<;:959<977474264,%)(()**?AA@??>>=<<;;778756764/+)'+&3&,+#+#D@@??>>==<;:8656775641.+))*(* ).%/NC??>>=<<;:9889865665420,)(()*('/- ((B>>=<<;;::98877667420,+)())(**&*' 96>>==<;;:987756541-**))*)(&"/*+ C==<;;::99877668620.+(&*)(()+'#*" :<<;;::99887766551-+))('*))(''()&,*C ?;;::98876544.,))'(**('&&+, 9::998857864/,%*+*+,+()$)+*+&o =::9876688631/,)/,'(,(%,')+#&#"F6::876677961.,+)(+%)*&,0#,(%)/987557673/+*))(,,+)$%-%)!p977434+)(()**'&$$,,!'O;568851.+)(()(-+))($/6752/-*())())($/%#(1x9861,*)**))()*))(,$+#\64+)*))*'+")*'*('%$C/-*+,*)*('0&*+%)%+),)*))#)%%(!#i)*)')**-"&!$ C *+*('()(()'&/")*)')+(%&%#$n#()(')*%$*S%()(&(("$/&),&()(~(+%/!\*$'?,+-^߼~teel~"谄O0%+T|z=  !A[%&%&+,*&((%%"#"!!\R%&& (&#$()+*$(*%"  PG&"-"$$" a_$$#$#$ !$)$H !$$#"!"!#&#%  3'%$$##"" .6.2 "#!## E a%!"&!##"! Rv#!#!  =% $$&%%$##""!8Z  %Մ!#%&1$$##"!! `!!!!$"$"$$!"&$$#"!! o$!  S%")$"% !$##"! Q%&$##""!!"!! 21  $##"!! !! =N~%$$##""!#""!! "!#g%$##""!! "!! c%#""!! ! #a*$##""!! "!! #$!!a##""!  ! s""!   !      &  D%                v 2 *r    d !   3    !    c   >   $ "k J  7  w  ^  5  " _# D%  & v( J* .,-K f!f " # & ())D***********))( ' & % #" !D! $ & ')+-.:73/.)(%!         :73/.)(%!         :73/.)(%!         9"7"3"0 -D*'f$"!"" $"%')+- / "1 D36f698"8f7676D4 3 4 3 2 D0 /0/.,-,+*()('&{4/+%                     {4/+%                     {4/+%                     z"3D0*f%f  f%"). 26 Dhiogjielggfed^bc`[\^_Z[\ZVRNMMLJIIEJMKKLD7^Xqdpkgnhafggffedd_ab^]^][\ZWSPNMMLIKMHHOOGOKE6QzYpaokeld`ffeeddccb]\\^][YVQMKLMNIGGIJMLFJLD8?t[khkjdgaeeddccbb`^]][YWPNLKKLLKLGLNFIOKI99onYingg_ddcbaZ\`_\VQNLKKLMMKJEKNKIMKC6X dSgbhb^ccbbaa``]][YTOLLMMLLKJJKIFMR?4D aZn^e]bba``__XROMKLNKKJIHHIEHKL<< cXka`bbaa``__VQMNONLLIJJIHHIJGOH;dWYcb^^]]Z[[QLPLNMKNKLLOLJNMKO8JXVc__aXZaVOOHFONJMIGKHJJNC9RZd]b`]YOLOPNNJHMMHGLMSA9fL]c\Z[VQJHMKJONJJGLGF7LNZ^[[WJJPMLKJIILLI@CLK`[RGKJGMJGPODD=o JRUOMKJJLKOMCDTJUJLIJMJKL8Ou>KMKIKK=:yj1NJIB7c a9F:K!O>~#Ϲ%&'''(( ( ) ) **++ + ,---.//001223445667899:;=<d;AH@AC>E@@?>=7;<9789:78972.*))**+*+))%((&&*&W;F=GB>EA=B@@??>==8:;:9988974/,*))+*)+-(&--%,)%:b6K:D@9E@>??>>==<<;9879:863-)'()*)''()(#(,&({Y:F@@>;C@>>==<<;;9:8864.,*))**).'**#&+'+ \T;EE<>=<<;;::998860+**++**)''-)"*0 . G7G6A9;;:9988:5/+))*,,+*)(()%&'("o D3D:<;;::99883.)*-,**)**)(()*'-%E<8=99<:97)*.*,+),)(*-*(*(&'3<3<88<58?2*-&$-,(+'(+%&&*!x37<4:;96-)-.,,(&++,(*)/!L16<4263/+&+)(-,(('*#$5,69363+-.+*)(''*)% '{{1+=6.%.(%+(%.-"$!Xv012++,((*)-+!"?n-2&))(+()*-~b#+*&'))!iV.')$L O&4!<#k#§%&'''(( ( ) ) **++ + ,---.//001223445667899:;=<Q'+!!$!!  Q)#'"%  !!    (V!! &    nH  !#   M3%%"    2 ;! w 6 ! b % $ 0) %&!  e   ;   %| lr !    Fo .W  e[  \K  : C "}!.\#%&'''(( ( ) ) **++ + ,---.//001223445667899:;<<6875 3 2 0.,+*'&$"!f"$&&(('))(**++D*,+-f,.-/00 1 1 2 3 2 4 5467689"8:;<=<     !"#$%%&&'())++,,-//0112345567789:;;<= !"#$%%&&'())++,,-//0112345567789:;;<= !"#$%%&&'())++,,-//0112345567789:;;<=$%$#D!"! " !""D#$%&&D'())*+,--./01 1D 2 3 4 4f5"67889:;<<<>       !#$%&()*,-./123679<       !#$%&()*,-./123679<       !#$%&()*,-./123679<=f;;;:988765 4 3 2 1 0/.-,"+"*D)('&%$D#"!f !!f"%&&)*+f+./0 2 3 3"58:f;    y^</scrapy-1.7.3/artwork/scrapy-logo.jpg000066400000000000000000000555461352060011200174620ustar00rootroot00000000000000JFIFddDuckyFAdobed    7    !1AQaq"2BRr#bs4u7ђ3$SdCc%DTt&'!1AQqa23"RrBb4#S$ ?2I1Y^W=TkZJD뾩9y%X2m\(U#Nߋe{qyޯy}#௰ԉR!mI;8ˑ~?-m_nkTU<Q}(dzKl-a)?3 DZC`vG ;1Z6Xܔ"UĈ]DYl8sEglSǓ'.`uިmUDJqF^D5d\q+d*dcmAx.\;'+Ϻ^;k\\y>J:O5iy.k3{o˶I,gy5tl9^m\6584V}yGFՔ|C4\}֒KՏu\Vٮ0kK[e}qzK\Ċힴ{dnCn+-5e^KjX,͖7#kN.Y5VN@{,|^}V'{coDgk &*F5I2obrW&DܸB˃b`L[-}o!sz^%WG9NI4iFA>1d$q#Q9̍+$!a|MjtqƩyK&Oz)uSSlt3-aHk +-fEFĪ[D{WѤߚt w<6˹-{}g#qK\9Qf"53"78Ɯ唯΢TVO*nTjcWofxSLmQo#Z\fvRs#{ m|I#*\oR:y#YH%iu5N7/.SIBֽ]V?k\1\5{ڪ{|v1/bx<c']hў7wrn3c2^;!Ӗ\CVR.=%k2yR%_iϑv+*үgi+^tkH[UK5ic81ثXl[[#miDN4VgYzFL^Nx!z7/[wu'oiZ5;\Z6V5t+媎hs+Ps[W]DV9^A^=D:ck{8+< JBM26U,/-9}H'k;רib|9X۬NEoM*(sڳ^xֳ2OM]l,edZlQO_qmO5xgxChYf%r,ez}TSM9sWkfXmt;_4Q? bJ|kWb}B>g. qG}^Nm+ߔԹV9dm.׏zƏk9g6M\?kh\WV2MdtGcgh\bsWW+ؔ[yWF8Sy!ew$O3a2aE '^{+H,_6"vN( &('w=bMbd+븑Ŵ;.ﲿJaa};l`[ogkbN=+HDFe=xsbҖV.Y.ǵ$VGob;ӝ#$ǵC-Z(64lPb{1V?X+xj,A:$[1WkZ֫U9 Rʛ73$=֋5ZOX֩Ư{Q_1b&5lKRu>cDi o-a`QӶB*nWQԮʩiZ|e NpZ4:WYY?sWzjJ)¼e!ۄ!>W9nV\LTD"QkQ;Zo:MֱX9K_Kr"|")ϟ Tjegc2^'Tɂ%9ͻfǬx;yTވ*xm~M93Vӷ]/FV$Z:;{"Ar˸ro:\:seyvWN߫.Vd$lI#֑=ޏ˒k؀!u[J F:?ͮh,蚟KƑ@ǯqE26*OWW5Nْ/=:LqkY\G]i y~_Gm-걾ᑎDshCm2VNpxXϘTr@I5wYتc35NgUH9>͓wJp.76^:ycҎ޹w;+i_Kn56pyF~,]f&Ӹ*y즧m]b lscKVGMskKSH6EDmŵވȭsVJb++_enre=$9\L.JYdq6?Z#Nd:R5G[N|+N]ܕFd$m IdTHӂǻqtkϿh,M__mEm%T_𜳾m`J~Kz=lS:|3fIh~nȘQMZWr֥\yR*"mT\iùN0q^Nh ]ssڭ<n"/)w#~yk >}n xs*27]q=v"6 խO;o)nPk__БMs^sU%ZڊڇzP8K,pD{c6kZԪDDF:k;qI3l$sVG7װ:V5Oky99:*Xn=q"]y3DT4Ϸs-?4 o>b`5HH]GX_Mj^֪/i'$d a˸5zXW9˱j_\GJ۬V&JYc բlXz{'jѯh9x: pڱdcwaB?+lIRRm]׺C qMtv9f]G词ܩUEj ,w-اHK!6FUOtG۱*#[edo߹͍?%S/$pޕ|T=zK'/e{UF䑜IojbkXi|]iCnUޛc%&.ҧnk2E-c7=qoDܞ(5Ē; 5z:gKjMi1l[ml%k~ܴkzֳiZXS;]1^Fs-J=zʖ.O"Q*v8YŴdf] 6 uV6Hcb67sZƢ""w!gWu>j=^VKKvJU毴T1b1-N;E5E 徯WRQo7YɎii5sR/^ճCS~[Lj\Xm7v:DS{w9Oe]tumy1W%fb[.w/7R۾\fE;]³Wٚ=YH rNa\;w7C%@hW%{ -Wrmh4m,̳;[6\pUdzTj*σ_$k嬰Zyog鬜BKyq5/涜[+aɾXx$>hb5DDܞ$ߣkL#%sage!6(ܮk$T"2Lrl-tDJ"nD1f@h`vO33Ue|QZOGWJLʡoh_H=X]2nj{|}̬+]^-JEot9~޹Em˛Y_QdW{۹,^'nDD؝z֛NҕiXР4( Ggd-reeŝK$SFMsU;QP&bu6Z4Rڿ'|WѬBϛ LNW5\T,r|JE ix/s`s}Iim[{oasou 'AI%UXkj.4g1Μݛ8^5ZY.%y49di{9UURhr\F|vVۋmdt3Gc^EJU-1:R-Z5RG4gIrY^ڮsy-O @h*"j/`4Jzy;.[kkO/|&Bu֌r[_}'gnH9)9xJטyWIks(bESɍcFUbc_niKk'9~bzsx\zm1.,h퇋CϡbZP[iofN5]htoj%6U{a֕w&U@ G:p:h-_>=*xE4g˳gf}-R5-F%>DEUܛT6 Nr_F.ӆqz\ԒPigL: ekW_6#&j o{Ooe~RJ'f%|F^A|2;ܝբ| C8 yLmSTDMT p|sWOҶ?h~-WC"XMٽ6rNF ӊ} x kGt栵u^Tлj*.{rmkzc[I Z54:Ir2(؍kRg}j[f$gO:n9U"Z+%7ضn6 7 0,+N)z}9u_#\)ZFyes]Br.ǜZuXv;|:eM[rmE,za'}etV/a"yg2p^]話Sjl+։<ךZZKx]U9=M/_awj*/cڻ&5v*IJmĶEȾ}iqccEby_[qcңv6۾~]:A^.c?+0bp˜vM|iQʟ]ߒBk>H[z>_<痂(_eqxi/2WI,/ XUU=!+3L-WwPJ{,[*xVN*w7fʥogk~SܠјFR֮fXT4DL,zM֫ĻNdUmBU~.S|Mf=S>>sK[t# I>x亴}q, z+lW"ppXyH[ֳ8$QG l"Ԣ5J""w"̰V<2_%^26*Cy┯Ju5W Nu5ٙiWy{? }"}eVTõN<8Ng:ͶV %8 ƴ}{=n<: vK}=W41SgV)[Vq춅]Or⤝)Dv+[眭x)D\6ĴO|zMkƭ௉>?=&,5m<|hcȻ̫i {$j𹯣h*mZȥtٱ5DG>h9H}uFʾpC8‘64E.G=r=W+ov;kFNܴvH<`t-8lo.%jL$s~vZnSp%˛i쮧gum#VHrzvaSL'T`ǎGqw%T3GL|{WvfCc軕Dch%9>>l**J$58Og.Q}TEcR$ZʈtalÓwǷ{!6WI\Җ']ODO6,告nH|I{"7}))͖mcځ$"vV+&bgc&zU3|ֈiIt6oo VH&&汉ˆB:ίx>Fި^AӾ#؉˚-O"~8>{rv/ >-6I (]*Ui T@T YΎHˏ[6kÍpqīW3QQ_Ukmy78#$z]sN9=StR6MOIsk*!oͅ 4Zxs t9g/cV)m6U~k{Ҩqvі5imPt4Xk7i=g%bEd6ɱSjVk:O5◭PCiOɏX=%h97+U69SbVY9:޳[F-t+<~j;҈ڭUJ)`nc,i<~c'YDy:=i[R*%&{ّVlZQ /o-fg9xv΍Few'"˒Kyw"UwUӬJE+PPu-d’7a8Ҩ1q*xk)Į7kn<L`]PAo&Y'?٧5 Mtm{Mv|.o>}T{ԏ2d<Fc3y<Bߟ}'m?L{ n/ǯ~EQqoa/ӿԫ=be곯d?nOmɾO>mO)BI44e04I]FvBFڹSk.N85eWN4[d0{X׵wT)V}!JDSP E47e!ja}=->f\+~7ɕIzџ|_CMK^D'9~ŽɎ裛Ċ@~GTmC#e֫VW\wj}oq5֮_~7V =7Vw<}ri E6F +8#J}USg:{^mOwiɯre`Nkb֢ʍjmU]ywYVZgχ)#ϒȨg͑Xdlj""x|&BժzU=^<vz6.ZN܀RP͸~liϴ#5 Xnpa#1EJ6VwUѪobN^=2ǥ+om''}k әkvK>WAyi*Q쑿֪lTڅv՚Γ|zū:ĺ4Wɽ *Q\Dj%UWb" Vw/tY_:zIQSc\la׏g'[k"Q?vSɫ_/s'k__֧K}Zv$3\=rUhT]$f;׭f"=z5Ւ;J^_ؙ¨u?EO'*=zW*/hP'C_|?u':>*g]?l &ԟ2WbN39K~%*cKz_L[͊X-z3Gwo}-U_FԆ5m>Ƌ lu*%2[\ƻV==hy1hʶLs5V:1iOb խwͣeRcY#%"6 jyӺD'cnٱw7䧃_8&]å~KbUYjg:Qܭֿ4濚>׺ Nǡ)njٙIDTڶ",':}4۽MuJ2M]:Y,XRW^ mN^؍E%"ձ7c7Ebݻ}*"eh7U;@1S*hE<=^jZiqT@'ܼ-jvCs%p&"Uڪs;͜Kh_aqRu_58;hǼBvۍ'\7ˌvF-$|V7HUcw**ىWZ5R񖌃nd]XJ,"g"ITX֒7ž'F/üOc})ogiK2x^C+Qի\%QQSz*4En.-4׹;+Y_P+Am?Ѓ]4qc F۹onN^i;f"#bcؔݲ2׊4xC-ڹͮpaΩZ~ ǼR?S얳VBrl_@4mo?]v.J7|Nk9QjxU9mZ1k+w7>^2l=Ǜ'?rkno'RڧP\Z=jMN.KC{rڱoDOAح@hP&WAѧ+e.I^OGLaTuaLɕyNӽ+tR<w{_Pj Tst?z[q-V?0hv#Ưgmz=vlzc[xǵBƠk4-4{6"QeHdOJq&DR4#3_JZt9e~9p خHkvm羼SZ4.uFtjȓ4-&} NZ54TDEZjU}7C˱zkSzOX4mzQ:gKp͎)#|ɗ#ZH̷y~.k_W|n%X*xN˸l}˭ݬwc :rF=gr]@qUW@8z l(ܮn5^{j!Z\‘30óHģV6+)wBbbڸ-ڳxvmAQrfܗ9 PZ{]_lmRӴe󎧋o߯ʼnZo̻ػ;_k"}K܏@?j P\q=ztmS6R6L>=McR݊uxC廘-ڹMpaީ:Z~ ǽ6KtKZ4+裓bk+1w97q:z(~ǯrwg^4]_4w19KZ7h۬e77imİ"lh^kkt6]bD]c=cOY=iJ_F3D<}wёGYv wO=ȾVmO>HPn}<J-Ot2dmRf_ܽQ: F'>' QY#VjECvqQ{G٫Uo@8WgSPU]Gx/zzqRQ>*؞qGQjz*ð qP-cڕ[ci]clQ lDkZ8 ,+N8rC_#6~kGTz$1-vЭtx'$q7_mK.CocS|kjŤ^J`UdD0-}- n4וּBhP6u~qJiz>[K?çJ[%ƌE-;KyUnɹwοOT:рýS'"u]Ve'o!8&~=~kNa]rl_@4m__mvݿ˯>ab}5^ZU܈j_ZZCn\vNybEZ+!bx66C-wV jFWG/l3902u,vX*]|~#k{ԗآfm=^)\q\g8,S|Oeky/ޤ*9ee-OūCKDEp{+sD_o&g+z><'KeSuaLɕyO*ӽ+ִ( áu'o˟#RxV<}YLi΢r6Uw}nz'R=qKֱ1 EI'̵g:I{^_Jvi`t+rk Tьe.SbdMeKNM*fZj\SO_V{)Rk+S-YlŒq-uj?-oV_cn&jT6Df'ZZ/Xrԡ=ēYi>]VIit=gtiaNwB?mY2[X2CDkHyڟ]GȰ/T%'T_HLqz?Gښڞb=?1)+Sζ[͛dkSt>{jz9*|kr-l=Ezr6EEG5;VUgfݩM碜MMTݻҽB9^gAu#H۶G{Rܝ<Σx3-9a:{˧QUUWb"%Uv!gf,vhcYgٌ%7$vI"{>>[EXKMmL;LD޷V!r[}m̋GU>‘K&7YxP]4bUw 4l, wFv,w3-][pZ>i2y7sL?WVa`g۬_uj٪\1V>Y|Vn)n}EG"97*U yG2-2̑)Z#FnaOk[#SNͦR1N)VB7ڲ}gLoK[4+h96/6ɯ6 aᅻoؿj7<Ș,_5-.8)u>3r~xl|1*Dvx]ElO>,ʾFk W,6߸"~5g~JUYԷ0c%sfrb-ԅ?,{xj醑[t5:tM^کΧH_^<%. M׿R~O&U ={NR\J'CӌnX:o˟ίe'Ud3Hp g2~1]Fz9cѬN"o-<}'BIOjP/z DEU؉T6]6Dr owœf/G#(|[FZvxxoշqiUb C+7yΞ6q+[~\s|MGmgOG4( "M :G.M[]u+-[L~LQ:G2֋tr^۳;|Ʀ9ZӹTYOe~3` q/Rϡ~P ]OH_⪭_]]|/ 6DMb͋qp?4ZڏX }52+[{eѫx^z䴅YtxͲgQ|ל[%t6rpuڮU(Vċø)281jTtQpWX">KW/j*F%*Ͻ*odh🹒ꗑSĒd*_.[Kֽ=))?ڌ>KDbA\gꊐ\K;㤯kY|F-\~*:;3Rݺ3~9vDDXƦƱ쵩+<\8)JF)}{yvwO"MU3{So9%-6fmJKxm[ mֱ֧"؍,eQ%Zj*)Ưy˙ě +0ovC %=Y9\EXi6i;B$>i\G+{ZTtƇmco5>Gm:\󼖾9XҮDV;}+Lܑ5O֊^#XbSQQvS wPXRMjk'˦rw> T㸶X]}ߛ7Ėw Jnt'Dizl`]Kh.\䪵nVV'kIɲώt'Y\czfdZ_Mtdmhͼǎ9>nӤgڪW2䜖K;}0cS-e~Sq=}kms ùٕ=v9)O^ói]Syl31[}ֱ"5J5 Sjb_H~+Z>+=y\9o_7*rHؾ+֑n.i?\-PFyzР4]/|ӚQ!{^pii7Uɼp62668ڍkQ(R8%cܔsWrcb[?ڂEjW3ZI_(T^ mP(zE˿bUWwr/hoʕ7'E_kx_\l/Rܱʹ\>d]VVy\3BӠ h@=)uA>\VuJE{Y%z,bvCfƹJ8cU|6mKzeݮ")TF۳⋰1=6|6j79Enei W\Hs[-ʹIMSk=56Ϳy}?rN<{h<@/:*xh@ *UTQw*xᣄXވ^mHAOmJy8H近 8ұy%>ף 3Mu ?/w~R ]JQN<Ö՜3xjeEKKHOi݇F6m*GlMZiUi[}1b[ k"#}7n[8kX|y$ިoqj3zg+r孤S7J#LZ9(azgkb:έOgq/2x9'eԘ˹s|m#Wh5)֯f;Z="ܵ0l+z"ўs:Ë*Tg"6LzmSşJ3MyGW϶swk]ب{ose;mRosU6n2jռ q9,j.]ހ8ܩZشzIxYZP=&!ꭿjl0gsik;LWGugpՎ{y"حs\ܧ1N:ߣ~[)d:ZEW,6ԹA*f^kV-_ώ4E蟥\6kIV{I>ʲV8mrG)Mcł}>[Rt; mHiM_w:;z9xm8v[ʩ'W3{kl[ƸԳ_m}.$|iT-jxdG?B+?;O t<~Hbbt& ]d[Y%r})du^xJc\qcEgqɞlmsnk57_)fXXJ)/\4jrෛm}a$}ǢeN$<X7zZ"tȟvVl2S_Lpc{yjK;5sq4._TQ>znXNv-z9[V9q>]&|NߔX,qp+yח;TM֢5;l;zb+ }smo> phrǚ:>Gdof]KϹk"-""#Sji>(_,v[e0CzшYeoj$vw2JkGt})\y{e9Zvū]EMO~=:N[YiX~ 02a1f64Uq6WҚ"rͲǓ)M}y}O,sX3 qVG'G[_аS)I 'u މu].eu<cb|g/0q쭾Qнh9 cSDii`Oƹzx+өYEnpeihq?4kw\2|T&x*66DTaf<:q2,<ҚN9Sd`s$j*v{ڧ^ZN4H^ɖ&[vJ7_Ty28jSw~RPj$'$:wsAd ,rOdcn-_ AĪW5ROkԙI]=VL_/y˳賚orco(~yީĨ2M%?pZ?Rӣ^q\J\Mu$z"z#c;/9y?FtC;}_kV☶:JS쵋u;LizgԓgJbP{vw˵rʪ)ZF!U͞ 6S6Hتs^[)6F ZGLCSI4>w+ΒW{׉U)Wb*yf*?_M1uX k-/OvƮԱr=jϗHzwm9g#+^gd92yEF/Ъvm0\Q]Su>֎s&'v+am25ڴSpVވȇfhŒuމW ft^+;襤E5?6z :~OSʼ5Qh=NƟ|'ϯe&uT23cxYlJXy3ǎ״VX<-4KUHK\Ec FuVG~Sns|[;~6bb-WrmS!a+rA&s+u?^|6v#- ;>m778ze %x߱wc BKtJmZz[=1ڹWzn6?sYlI'vDSyXcqV̥v^SDΣxK7:7zUy/~?l0*#9N[r;ۉ1#wz+UR6=RfXccm+Em3?NJ/PWΗ~DlO*+\}zԜ:ŭT,Z[tڑ%_45L˯r_R4NH=Cʚ# 2=Do#Z|lZ:U7i)Ky֭z|:Os`p;"rlZ%Ke5}[7DQG m&#bQjQ c&Y>5+YF't3JETkQ^Ԫ*&|vfJ>lMZ6;kir1yZZܢY_-)2]'}K9w{ˋdrCgqDy0=UV;v[9F{s>xs戮 %t-na2,w)TIlrƫTjT.4\˳i$d8C:D涞plb*SJ{8'shܽ;-go#I*r'/U|o3~YHiī1rǣG%ٛñQJogDȚ.sMι8Gr{c˃O|4^kEc!㍻>5]X+XiL-kN.XQZJ9SvFRDD@*scrapy-1.7.3/codecov.yml000066400000000000000000000001201352060011200151470ustar00rootroot00000000000000comment: layout: "header, diff, tree" coverage: status: project: false scrapy-1.7.3/conftest.py000066400000000000000000000013121352060011200152050ustar00rootroot00000000000000import glob import six import pytest from twisted import version as twisted_version def _py_files(folder): return glob.glob(folder + "/*.py") + glob.glob(folder + "/*/*.py") collect_ignore = [ # not a test, but looks like a test "scrapy/utils/testsite.py", ] if (twisted_version.major, twisted_version.minor, twisted_version.micro) >= (15, 5, 0): collect_ignore += _py_files("scrapy/xlib/tx") if six.PY3: for line in open('tests/py3-ignores.txt'): file_path = line.strip() if file_path and file_path[0] != '#': collect_ignore.append(file_path) @pytest.fixture() def chdir(tmpdir): """Change to pytest-provided temporary directory""" tmpdir.chdir() scrapy-1.7.3/debian/000077500000000000000000000000001352060011200142335ustar00rootroot00000000000000scrapy-1.7.3/debian/changelog000066400000000000000000000002101352060011200160760ustar00rootroot00000000000000scrapy (0.11) unstable; urgency=low * Initial release. -- Scrapinghub Team Thu, 10 Jun 2010 17:24:02 -0300 scrapy-1.7.3/debian/compat000066400000000000000000000000021352060011200154310ustar00rootroot000000000000007 scrapy-1.7.3/debian/control000066400000000000000000000015601352060011200156400ustar00rootroot00000000000000Source: scrapy Section: python Priority: optional Maintainer: Scrapinghub Team Build-Depends: debhelper (>= 7.0.50), python (>=2.7), python-twisted, python-w3lib, python-lxml, python-six (>=1.5.2) Standards-Version: 3.8.4 Homepage: https://scrapy.org/ Package: scrapy Architecture: all Depends: ${python:Depends}, python-lxml, python-twisted, python-openssl, python-w3lib (>= 1.8.0), python-queuelib, python-cssselect (>= 0.9), python-six (>=1.5.2) Recommends: python-setuptools Conflicts: python-scrapy, scrapy-0.25 Provides: python-scrapy, scrapy-0.25 Description: Python web crawling and web scraping framework Scrapy is a fast high-level web crawling and web scraping framework, used to crawl websites and extract structured data from their pages. It can be used for a wide range of purposes, from data mining to monitoring and automated testing. scrapy-1.7.3/debian/copyright000066400000000000000000000035151352060011200161720ustar00rootroot00000000000000This package was debianized by the Scrapinghub team . It was downloaded from https://scrapy.org Upstream Author: Scrapy Developers Copyright: 2007-2013 Scrapy Developers License: bsd Copyright (c) Scrapy developers. All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. Neither the name of Scrapy nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. The Debian packaging is (C) 2010-2013, Scrapinghub and is licensed under the BSD, see `/usr/share/common-licenses/BSD'. scrapy-1.7.3/debian/pyversions000066400000000000000000000000041352060011200163710ustar00rootroot000000000000002.7 scrapy-1.7.3/debian/rules000077500000000000000000000000611352060011200153100ustar00rootroot00000000000000#!/usr/bin/make -f # -*- makefile -*- %: dh $@ scrapy-1.7.3/debian/scrapy.docs000066400000000000000000000000231352060011200164010ustar00rootroot00000000000000README.rst AUTHORS scrapy-1.7.3/debian/scrapy.install000066400000000000000000000001741352060011200171260ustar00rootroot00000000000000extras/scrapy_bash_completion etc/bash_completion.d/ extras/scrapy_zsh_completion /usr/share/zsh/vendor-completions/_scrapy scrapy-1.7.3/debian/scrapy.lintian-overrides000066400000000000000000000001521352060011200211120ustar00rootroot00000000000000new-package-should-close-itp-bug extra-license-file usr/share/pyshared/scrapy/xlib/pydispatch/license.txt scrapy-1.7.3/debian/scrapy.manpages000066400000000000000000000000201352060011200172410ustar00rootroot00000000000000extras/scrapy.1 scrapy-1.7.3/docs/000077500000000000000000000000001352060011200137415ustar00rootroot00000000000000scrapy-1.7.3/docs/Makefile000066400000000000000000000053641352060011200154110ustar00rootroot00000000000000# # Makefile for Scrapy documentation [based on Python documentation Makefile] # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ # # You can set these variables from the command line. PYTHON = python SPHINXOPTS = PAPER = SOURCES = SHELL = /bin/bash ALLSPHINXOPTS = -b $(BUILDER) -d build/doctrees \ -D latex_elements.papersize=$(PAPER) \ $(SPHINXOPTS) . build/$(BUILDER) $(SOURCES) .PHONY: help update build html htmlhelp clean help: @echo "Please use \`make ' where is one of" @echo " html to make standalone HTML files" @echo " htmlhelp to make HTML files and a HTML help project" @echo " latex to make LaTeX files, you can set PAPER=a4 or PAPER=letter" @echo " text to make plain text files" @echo " changes to make an overview over all changed/added/deprecated items" @echo " linkcheck to check all external links for integrity" @echo " watch build HTML docs, open in browser and watch for changes" build-dirs: mkdir -p build/$(BUILDER) build/doctrees build: build-dirs sphinx-build $(ALLSPHINXOPTS) @echo build-ignore-errors: build-dirs -sphinx-build $(ALLSPHINXOPTS) @echo html: BUILDER = html html: build @echo "Build finished. The HTML pages are in build/html." htmlhelp: BUILDER = htmlhelp htmlhelp: build @echo "Build finished; now you can run HTML Help Workshop with the" \ "build/htmlhelp/pydoc.hhp project file." latex: BUILDER = latex latex: build @echo "Build finished; the LaTeX files are in build/latex." @echo "Run \`make all-pdf' or \`make all-ps' in that directory to" \ "run these through (pdf)latex." text: BUILDER = text text: build @echo "Build finished; the text files are in build/text." changes: BUILDER = changes changes: build @echo "The overview file is in build/changes." linkcheck: BUILDER = linkcheck linkcheck: build @echo "Link check complete; look for any errors in the above output " \ "or in build/$(BUILDER)/output.txt" linkfix: BUILDER = linkcheck linkfix: build-ignore-errors $(PYTHON) utils/linkfix.py @echo "Fixing redirecting links in docs has finished; check all " \ "replacements before committing them" doctest: BUILDER = doctest doctest: build @echo "Testing of doctests in the sources finished, look at the " \ "results in build/doctest/output.txt" pydoc-topics: BUILDER = pydoc-topics pydoc-topics: build @echo "Building finished; now copy build/pydoc-topics/pydoc_topics.py " \ "into the Lib/ directory" coverage: BUILDER = coverage coverage: build htmlview: html $(PYTHON) -c "import webbrowser, os; webbrowser.open('file://' + \ os.path.realpath('build/html/index.html'))" clean: -rm -rf build/* watch: htmlview watchmedo shell-command -p '*.rst' -c 'make html' -R -D scrapy-1.7.3/docs/README.rst000066400000000000000000000025051352060011200154320ustar00rootroot00000000000000:orphan: ====================================== Scrapy documentation quick start guide ====================================== This file provides a quick guide on how to compile the Scrapy documentation. Setup the environment --------------------- To compile the documentation you need Sphinx Python library. To install it and all its dependencies run the following command from this dir :: pip install -r requirements.txt Compile the documentation ------------------------- To compile the documentation (to classic HTML output) run the following command from this dir:: make html Documentation will be generated (in HTML format) inside the ``build/html`` dir. View the documentation ---------------------- To view the documentation run the following command:: make htmlview This command will fire up your default browser and open the main page of your (previously generated) HTML documentation. Start over ---------- To cleanup all generated documentation files and start from scratch run:: make clean Keep in mind that this command won't touch any documentation source files. Recreating documentation on the fly ----------------------------------- There is a way to recreate the doc automatically when you make changes, you need to install watchdog (``pip install watchdog``) and then use:: make watch scrapy-1.7.3/docs/_ext/000077500000000000000000000000001352060011200147005ustar00rootroot00000000000000scrapy-1.7.3/docs/_ext/scrapydocs.py000066400000000000000000000107201352060011200174240ustar00rootroot00000000000000from docutils.parsers.rst.roles import set_classes from docutils import nodes from docutils.parsers.rst import Directive from sphinx.util.nodes import make_refnode from operator import itemgetter class settingslist_node(nodes.General, nodes.Element): pass class SettingsListDirective(Directive): def run(self): return [settingslist_node('')] def is_setting_index(node): if node.tagname == 'index': # index entries for setting directives look like: # [(u'pair', u'SETTING_NAME; setting', u'std:setting-SETTING_NAME', '')] entry_type, info, refid = node['entries'][0][:3] return entry_type == 'pair' and info.endswith('; setting') return False def get_setting_target(node): # target nodes are placed next to the node in the doc tree return node.parent[node.parent.index(node) + 1] def get_setting_name_and_refid(node): """Extract setting name from directive index node""" entry_type, info, refid = node['entries'][0][:3] return info.replace('; setting', ''), refid def collect_scrapy_settings_refs(app, doctree): env = app.builder.env if not hasattr(env, 'scrapy_all_settings'): env.scrapy_all_settings = [] for node in doctree.traverse(is_setting_index): targetnode = get_setting_target(node) assert isinstance(targetnode, nodes.target), "Next node is not a target" setting_name, refid = get_setting_name_and_refid(node) env.scrapy_all_settings.append({ 'docname': env.docname, 'setting_name': setting_name, 'refid': refid, }) def make_setting_element(setting_data, app, fromdocname): refnode = make_refnode(app.builder, fromdocname, todocname=setting_data['docname'], targetid=setting_data['refid'], child=nodes.Text(setting_data['setting_name'])) p = nodes.paragraph() p += refnode item = nodes.list_item() item += p return item def replace_settingslist_nodes(app, doctree, fromdocname): env = app.builder.env for node in doctree.traverse(settingslist_node): settings_list = nodes.bullet_list() settings_list.extend([make_setting_element(d, app, fromdocname) for d in sorted(env.scrapy_all_settings, key=itemgetter('setting_name')) if fromdocname != d['docname']]) node.replace_self(settings_list) def setup(app): app.add_crossref_type( directivename = "setting", rolename = "setting", indextemplate = "pair: %s; setting", ) app.add_crossref_type( directivename = "signal", rolename = "signal", indextemplate = "pair: %s; signal", ) app.add_crossref_type( directivename = "command", rolename = "command", indextemplate = "pair: %s; command", ) app.add_crossref_type( directivename = "reqmeta", rolename = "reqmeta", indextemplate = "pair: %s; reqmeta", ) app.add_role('source', source_role) app.add_role('commit', commit_role) app.add_role('issue', issue_role) app.add_role('rev', rev_role) app.add_node(settingslist_node) app.add_directive('settingslist', SettingsListDirective) app.connect('doctree-read', collect_scrapy_settings_refs) app.connect('doctree-resolved', replace_settingslist_nodes) def source_role(name, rawtext, text, lineno, inliner, options={}, content=[]): ref = 'https://github.com/scrapy/scrapy/blob/master/' + text set_classes(options) node = nodes.reference(rawtext, text, refuri=ref, **options) return [node], [] def issue_role(name, rawtext, text, lineno, inliner, options={}, content=[]): ref = 'https://github.com/scrapy/scrapy/issues/' + text set_classes(options) node = nodes.reference(rawtext, 'issue ' + text, refuri=ref, **options) return [node], [] def commit_role(name, rawtext, text, lineno, inliner, options={}, content=[]): ref = 'https://github.com/scrapy/scrapy/commit/' + text set_classes(options) node = nodes.reference(rawtext, 'commit ' + text, refuri=ref, **options) return [node], [] def rev_role(name, rawtext, text, lineno, inliner, options={}, content=[]): ref = 'http://hg.scrapy.org/scrapy/changeset/' + text set_classes(options) node = nodes.reference(rawtext, 'r' + text, refuri=ref, **options) return [node], [] scrapy-1.7.3/docs/_static/000077500000000000000000000000001352060011200153675ustar00rootroot00000000000000scrapy-1.7.3/docs/_static/selectors-sample1.html000066400000000000000000000010651352060011200216220ustar00rootroot00000000000000 Example website scrapy-1.7.3/docs/_templates/000077500000000000000000000000001352060011200160765ustar00rootroot00000000000000scrapy-1.7.3/docs/_templates/layout.html000066400000000000000000000024071352060011200203040ustar00rootroot00000000000000{% extends "!layout.html" %} {% block footer %} {{ super() }} {% endblock %} scrapy-1.7.3/docs/conf.py000066400000000000000000000204301352060011200152370ustar00rootroot00000000000000# -*- coding: utf-8 -*- # # Scrapy documentation build configuration file, created by # sphinx-quickstart on Mon Nov 24 12:02:52 2008. # # This file is execfile()d with the current directory set to its containing dir. # # The contents of this file are pickled, so don't put values in the namespace # that aren't pickleable (module imports are okay, they're removed automatically). # # All configuration values have a default; values that are commented out # serve to show the default. import sys from os import path # If your extensions are in another directory, add it here. If the directory # is relative to the documentation root, use os.path.abspath to make it # absolute, like shown here. sys.path.append(path.join(path.dirname(__file__), "_ext")) sys.path.insert(0, path.dirname(path.dirname(__file__))) # General configuration # --------------------- # Add any Sphinx extension module names here, as strings. They can be extensions # coming with Sphinx (named 'sphinx.ext.*') or your custom ones. extensions = [ 'scrapydocs', 'sphinx.ext.autodoc', 'sphinx.ext.coverage', 'sphinx.ext.intersphinx', ] # Add any paths that contain templates here, relative to this directory. templates_path = ['_templates'] # The suffix of source filenames. source_suffix = '.rst' # The encoding of source files. #source_encoding = 'utf-8' # The master toctree document. master_doc = 'index' # General information about the project. project = u'Scrapy' copyright = u'2008–2018, Scrapy developers' # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # # The short X.Y version. try: import scrapy version = '.'.join(map(str, scrapy.version_info[:2])) release = scrapy.__version__ except ImportError: version = '' release = '' # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. language = 'en' # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: #today = '' # Else, today_fmt is used as the format for a strftime call. #today_fmt = '%B %d, %Y' # List of documents that shouldn't be included in the build. #unused_docs = [] exclude_patterns = ['build'] # List of directories, relative to source directory, that shouldn't be searched # for source files. exclude_trees = ['.build'] # The reST default role (used for this markup: `text`) to use for all documents. #default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. #add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). #add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. #show_authors = False # The name of the Pygments (syntax highlighting) style to use. pygments_style = 'sphinx' # Options for HTML output # ----------------------- # The theme to use for HTML and HTML Help pages. See the documentation for # a list of builtin themes. html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. #html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. # Add path to the RTD explicitly to robustify builds (otherwise might # fail in a clean Debian build env) import sphinx_rtd_theme html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] # The style sheet to use for HTML and HTML Help pages. A file of that name # must exist either in Sphinx' static/ path, or in one of the custom paths # given in html_static_path. # html_style = 'scrapydoc.css' # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". #html_title = None # A shorter title for the navigation bar. Default is the same as html_title. #html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. #html_logo = None # The name of an image file (within the static path) to use as favicon of the # docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. #html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. html_last_updated_fmt = '%b %d, %Y' # Custom sidebar templates, maps document names to template names. #html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. #html_additional_pages = {} # If false, no module index is generated. #html_use_modindex = True # If false, no index is generated. #html_use_index = True # If true, the index is split into individual pages for each letter. #html_split_index = False # If true, the reST sources are included in the HTML build as _sources/. html_copy_source = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. #html_use_opensearch = '' # If nonempty, this is the file name suffix for HTML files (e.g. ".xhtml"). #html_file_suffix = '' # Output file base name for HTML help builder. htmlhelp_basename = 'Scrapydoc' # Options for LaTeX output # ------------------------ # The paper size ('letter' or 'a4'). #latex_paper_size = 'letter' # The font size ('10pt', '11pt' or '12pt'). #latex_font_size = '10pt' # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, author, document class [howto/manual]). latex_documents = [ ('index', 'Scrapy.tex', u'Scrapy Documentation', u'Scrapy developers', 'manual'), ] # The name of an image file (relative to this directory) to place at the top of # the title page. #latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. #latex_use_parts = False # Additional stuff for the LaTeX preamble. #latex_preamble = '' # Documents to append as an appendix to all manuals. #latex_appendices = [] # If false, no module index is generated. #latex_use_modindex = True # Options for the linkcheck builder # --------------------------------- # A list of regular expressions that match URIs that should not be checked when # doing a linkcheck build. linkcheck_ignore = [ 'http://localhost:\d+', 'http://hg.scrapy.org', 'http://directory.google.com/' ] # Options for the Coverage extension # ---------------------------------- coverage_ignore_pyobjects = [ # Contract’s add_pre_hook and add_post_hook are not documented because # they should be transparent to contract developers, for whom pre_hook and # post_hook should be the actual concern. r'\bContract\.add_(pre|post)_hook$', # ContractsManager is an internal class, developers are not expected to # interact with it directly in any way. r'\bContractsManager\b$', # For default contracts we only want to document their general purpose in # their constructor, the methods they reimplement to achieve that purpose # should be irrelevant to developers using those contracts. r'\w+Contract\.(adjust_request_args|(pre|post)_process)$', # Methods of downloader middlewares are not documented, only the classes # themselves, since downloader middlewares are controlled through Scrapy # settings. r'^scrapy\.downloadermiddlewares\.\w*?\.(\w*?Middleware|DownloaderStats)\.', # Base classes of downloader middlewares are implementation details that # are not meant for users. r'^scrapy\.downloadermiddlewares\.\w*?\.Base\w*?Middleware', # Private exception used by the command-line interface implementation. r'^scrapy\.exceptions\.UsageError', ] # Options for the InterSphinx extension # ------------------------------------- intersphinx_mapping = { 'python': ('https://docs.python.org/3', None), } scrapy-1.7.3/docs/contributing.rst000066400000000000000000000255521352060011200172130ustar00rootroot00000000000000.. _topics-contributing: ====================== Contributing to Scrapy ====================== .. important:: Double check that you are reading the most recent version of this document at https://docs.scrapy.org/en/master/contributing.html There are many ways to contribute to Scrapy. Here are some of them: * Blog about Scrapy. Tell the world how you're using Scrapy. This will help newcomers with more examples and will help the Scrapy project to increase its visibility. * Report bugs and request features in the `issue tracker`_, trying to follow the guidelines detailed in `Reporting bugs`_ below. * Submit patches for new functionalities and/or bug fixes. Please read :ref:`writing-patches` and `Submitting patches`_ below for details on how to write and submit a patch. * Join the `Scrapy subreddit`_ and share your ideas on how to improve Scrapy. We're always open to suggestions. * Answer Scrapy questions at `Stack Overflow `__. Reporting bugs ============== .. note:: Please report security issues **only** to scrapy-security@googlegroups.com. This is a private list only open to trusted Scrapy developers, and its archives are not public. Well-written bug reports are very helpful, so keep in mind the following guidelines when you're going to report a new bug. * check the :ref:`FAQ ` first to see if your issue is addressed in a well-known question * if you have a general question about scrapy usage, please ask it at `Stack Overflow `__ (use "scrapy" tag). * check the `open issues`_ to see if the issue has already been reported. If it has, don't dismiss the report, but check the ticket history and comments. If you have additional useful information, please leave a comment, or consider :ref:`sending a pull request ` with a fix. * search the `scrapy-users`_ list and `Scrapy subreddit`_ to see if it has been discussed there, or if you're not sure if what you're seeing is a bug. You can also ask in the ``#scrapy`` IRC channel. * write **complete, reproducible, specific bug reports**. The smaller the test case, the better. Remember that other developers won't have your project to reproduce the bug, so please include all relevant files required to reproduce it. See for example StackOverflow's guide on creating a `Minimal, Complete, and Verifiable example`_ exhibiting the issue. * the most awesome way to provide a complete reproducible example is to send a pull request which adds a failing test case to the Scrapy testing suite (see :ref:`submitting-patches`). This is helpful even if you don't have an intention to fix the issue yourselves. * include the output of ``scrapy version -v`` so developers working on your bug know exactly which version and platform it occurred on, which is often very helpful for reproducing it, or knowing if it was already fixed. .. _Minimal, Complete, and Verifiable example: https://stackoverflow.com/help/mcve .. _writing-patches: Writing patches =============== The better a patch is written, the higher the chances that it'll get accepted and the sooner it will be merged. Well-written patches should: * contain the minimum amount of code required for the specific change. Small patches are easier to review and merge. So, if you're doing more than one change (or bug fix), please consider submitting one patch per change. Do not collapse multiple changes into a single patch. For big changes consider using a patch queue. * pass all unit-tests. See `Running tests`_ below. * include one (or more) test cases that check the bug fixed or the new functionality added. See `Writing tests`_ below. * if you're adding or changing a public (documented) API, please include the documentation changes in the same patch. See `Documentation policies`_ below. * if you're adding a private API, please add a regular expression to the ``coverage_ignore_pyobjects`` variable of ``docs/conf.py`` to exclude the new private API from documentation coverage checks. To see if your private API is skipped properly, generate a documentation coverage report as follows:: tox -e docs-coverage .. _submitting-patches: Submitting patches ================== The best way to submit a patch is to issue a `pull request`_ on GitHub, optionally creating a new issue first. Remember to explain what was fixed or the new functionality (what it is, why it's needed, etc). The more info you include, the easier will be for core developers to understand and accept your patch. You can also discuss the new functionality (or bug fix) before creating the patch, but it's always good to have a patch ready to illustrate your arguments and show that you have put some additional thought into the subject. A good starting point is to send a pull request on GitHub. It can be simple enough to illustrate your idea, and leave documentation/tests for later, after the idea has been validated and proven useful. Alternatively, you can start a conversation in the `Scrapy subreddit`_ to discuss your idea first. Sometimes there is an existing pull request for the problem you'd like to solve, which is stalled for some reason. Often the pull request is in a right direction, but changes are requested by Scrapy maintainers, and the original pull request author hasn't had time to address them. In this case consider picking up this pull request: open a new pull request with all commits from the original pull request, as well as additional changes to address the raised issues. Doing so helps a lot; it is not considered rude as soon as the original author is acknowledged by keeping his/her commits. You can pull an existing pull request to a local branch by running ``git fetch upstream pull/$PR_NUMBER/head:$BRANCH_NAME_TO_CREATE`` (replace 'upstream' with a remote name for scrapy repository, ``$PR_NUMBER`` with an ID of the pull request, and ``$BRANCH_NAME_TO_CREATE`` with a name of the branch you want to create locally). See also: https://help.github.com/articles/checking-out-pull-requests-locally/#modifying-an-inactive-pull-request-locally. When writing GitHub pull requests, try to keep titles short but descriptive. E.g. For bug #411: "Scrapy hangs if an exception raises in start_requests" prefer "Fix hanging when exception occurs in start_requests (#411)" instead of "Fix for #411". Complete titles make it easy to skim through the issue tracker. Finally, try to keep aesthetic changes (:pep:`8` compliance, unused imports removal, etc) in separate commits from functional changes. This will make pull requests easier to review and more likely to get merged. Coding style ============ Please follow these coding conventions when writing code for inclusion in Scrapy: * Unless otherwise specified, follow :pep:`8`. * It's OK to use lines longer than 80 chars if it improves the code readability. * Don't put your name in the code you contribute; git provides enough metadata to identify author of the code. See https://help.github.com/articles/setting-your-username-in-git/ for setup instructions. .. _documentation-policies: Documentation policies ====================== For reference documentation of API members (classes, methods, etc.) use docstrings and make sure that the Sphinx documentation uses the autodoc_ extension to pull the docstrings. API reference documentation should follow docstring conventions (`PEP 257`_) and be IDE-friendly: short, to the point, and it may provide short examples. Other types of documentation, such as tutorials or topics, should be covered in files within the ``docs/`` directory. This includes documentation that is specific to an API member, but goes beyond API reference documentation. In any case, if something is covered in a docstring, use the autodoc_ extension to pull the docstring into the documentation instead of duplicating the docstring in files within the ``docs/`` directory. .. _autodoc: http://www.sphinx-doc.org/en/stable/ext/autodoc.html Tests ===== Tests are implemented using the `Twisted unit-testing framework`_, running tests requires `tox`_. .. _running-tests: Running tests ------------- Make sure you have a recent enough `tox`_ installation: ``tox --version`` If your version is older than 1.7.0, please update it first: ``pip install -U tox`` To run all tests go to the root directory of Scrapy source code and run: ``tox`` To run a specific test (say ``tests/test_loader.py``) use: ``tox -- tests/test_loader.py`` To run the tests on a specific tox_ environment, use ``-e `` with an environment name from ``tox.ini``. For example, to run the tests with Python 3.6 use:: tox -e py36 You can also specify a comma-separated list of environmets, and use `tox’s parallel mode`_ to run the tests on multiple environments in parallel:: tox -e py27,py36 -p auto To pass command-line options to pytest_, add them after ``--`` in your call to tox_. Using ``--`` overrides the default positional arguments defined in ``tox.ini``, so you must include those default positional arguments (``scrapy tests``) after ``--`` as well:: tox -- scrapy tests -x # stop after first failure You can also use the `pytest-xdist`_ plugin. For example, to run all tests on the Python 3.6 tox_ environment using all your CPU cores:: tox -e py36 -- scrapy tests -n auto To see coverage report install `coverage`_ (``pip install coverage``) and run: ``coverage report`` see output of ``coverage --help`` for more options like html or xml report. .. _coverage: https://pypi.python.org/pypi/coverage Writing tests ------------- All functionality (including new features and bug fixes) must include a test case to check that it works as expected, so please include tests for your patches if you want them to get accepted sooner. Scrapy uses unit-tests, which are located in the `tests/`_ directory. Their module name typically resembles the full path of the module they're testing. For example, the item loaders code is in:: scrapy.loader And their unit-tests are in:: tests/test_loader.py .. _issue tracker: https://github.com/scrapy/scrapy/issues .. _scrapy-users: https://groups.google.com/forum/#!forum/scrapy-users .. _Scrapy subreddit: https://reddit.com/r/scrapy .. _Twisted unit-testing framework: https://twistedmatrix.com/documents/current/core/development/policy/test-standard.html .. _AUTHORS: https://github.com/scrapy/scrapy/blob/master/AUTHORS .. _tests/: https://github.com/scrapy/scrapy/tree/master/tests .. _open issues: https://github.com/scrapy/scrapy/issues .. _PEP 257: https://www.python.org/dev/peps/pep-0257/ .. _pull request: https://help.github.com/en/articles/creating-a-pull-request .. _pytest: https://docs.pytest.org/en/latest/usage.html .. _pytest-xdist: https://docs.pytest.org/en/3.0.0/xdist.html .. _tox: https://pypi.python.org/pypi/tox .. _tox’s parallel mode: https://tox.readthedocs.io/en/latest/example/basic.html#parallel-mode scrapy-1.7.3/docs/faq.rst000066400000000000000000000316441352060011200152520ustar00rootroot00000000000000.. _faq: Frequently Asked Questions ========================== .. _faq-scrapy-bs-cmp: How does Scrapy compare to BeautifulSoup or lxml? ------------------------------------------------- `BeautifulSoup`_ and `lxml`_ are libraries for parsing HTML and XML. Scrapy is an application framework for writing web spiders that crawl web sites and extract data from them. Scrapy provides a built-in mechanism for extracting data (called :ref:`selectors `) but you can easily use `BeautifulSoup`_ (or `lxml`_) instead, if you feel more comfortable working with them. After all, they're just parsing libraries which can be imported and used from any Python code. In other words, comparing `BeautifulSoup`_ (or `lxml`_) to Scrapy is like comparing `jinja2`_ to `Django`_. .. _BeautifulSoup: https://www.crummy.com/software/BeautifulSoup/ .. _lxml: http://lxml.de/ .. _jinja2: http://jinja.pocoo.org/ .. _Django: https://www.djangoproject.com/ Can I use Scrapy with BeautifulSoup? ------------------------------------ Yes, you can. As mentioned :ref:`above `, `BeautifulSoup`_ can be used for parsing HTML responses in Scrapy callbacks. You just have to feed the response's body into a ``BeautifulSoup`` object and extract whatever data you need from it. Here's an example spider using BeautifulSoup API, with ``lxml`` as the HTML parser:: from bs4 import BeautifulSoup import scrapy class ExampleSpider(scrapy.Spider): name = "example" allowed_domains = ["example.com"] start_urls = ( 'http://www.example.com/', ) def parse(self, response): # use lxml to get decent HTML parsing speed soup = BeautifulSoup(response.text, 'lxml') yield { "url": response.url, "title": soup.h1.string } .. note:: ``BeautifulSoup`` supports several HTML/XML parsers. See `BeautifulSoup's official documentation`_ on which ones are available. .. _BeautifulSoup's official documentation: https://www.crummy.com/software/BeautifulSoup/bs4/doc/#specifying-the-parser-to-use .. _faq-python-versions: What Python versions does Scrapy support? ----------------------------------------- Scrapy is supported under Python 2.7 and Python 3.4+ under CPython (default Python implementation) and PyPy (starting with PyPy 5.9). Python 2.6 support was dropped starting at Scrapy 0.20. Python 3 support was added in Scrapy 1.1. PyPy support was added in Scrapy 1.4, PyPy3 support was added in Scrapy 1.5. .. note:: For Python 3 support on Windows, it is recommended to use Anaconda/Miniconda as :ref:`outlined in the installation guide `. Did Scrapy "steal" X from Django? --------------------------------- Probably, but we don't like that word. We think Django_ is a great open source project and an example to follow, so we've used it as an inspiration for Scrapy. We believe that, if something is already done well, there's no need to reinvent it. This concept, besides being one of the foundations for open source and free software, not only applies to software but also to documentation, procedures, policies, etc. So, instead of going through each problem ourselves, we choose to copy ideas from those projects that have already solved them properly, and focus on the real problems we need to solve. We'd be proud if Scrapy serves as an inspiration for other projects. Feel free to steal from us! Does Scrapy work with HTTP proxies? ----------------------------------- Yes. Support for HTTP proxies is provided (since Scrapy 0.8) through the HTTP Proxy downloader middleware. See :class:`~scrapy.downloadermiddlewares.httpproxy.HttpProxyMiddleware`. How can I scrape an item with attributes in different pages? ------------------------------------------------------------ See :ref:`topics-request-response-ref-request-callback-arguments`. Scrapy crashes with: ImportError: No module named win32api ---------------------------------------------------------- You need to install `pywin32`_ because of `this Twisted bug`_. .. _pywin32: https://sourceforge.net/projects/pywin32/ .. _this Twisted bug: https://twistedmatrix.com/trac/ticket/3707 How can I simulate a user login in my spider? --------------------------------------------- See :ref:`topics-request-response-ref-request-userlogin`. .. _faq-bfo-dfo: Does Scrapy crawl in breadth-first or depth-first order? -------------------------------------------------------- By default, Scrapy uses a `LIFO`_ queue for storing pending requests, which basically means that it crawls in `DFO order`_. This order is more convenient in most cases. If you do want to crawl in true `BFO order`_, you can do it by setting the following settings:: DEPTH_PRIORITY = 1 SCHEDULER_DISK_QUEUE = 'scrapy.squeues.PickleFifoDiskQueue' SCHEDULER_MEMORY_QUEUE = 'scrapy.squeues.FifoMemoryQueue' While pending requests are below the configured values of :setting:`CONCURRENT_REQUESTS`, :setting:`CONCURRENT_REQUESTS_PER_DOMAIN` or :setting:`CONCURRENT_REQUESTS_PER_DOMAIN`, those requests are sent concurrently. As a result, the first few requests of a crawl rarely follow the desired order. Lowering those settings to ``1`` enforces the desired order, but it significantly slows down the crawl as a whole. My Scrapy crawler has memory leaks. What can I do? -------------------------------------------------- See :ref:`topics-leaks`. Also, Python has a builtin memory leak issue which is described in :ref:`topics-leaks-without-leaks`. How can I make Scrapy consume less memory? ------------------------------------------ See previous question. Can I use Basic HTTP Authentication in my spiders? -------------------------------------------------- Yes, see :class:`~scrapy.downloadermiddlewares.httpauth.HttpAuthMiddleware`. Why does Scrapy download pages in English instead of my native language? ------------------------------------------------------------------------ Try changing the default `Accept-Language`_ request header by overriding the :setting:`DEFAULT_REQUEST_HEADERS` setting. .. _Accept-Language: https://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.4 Where can I find some example Scrapy projects? ---------------------------------------------- See :ref:`intro-examples`. Can I run a spider without creating a project? ---------------------------------------------- Yes. You can use the :command:`runspider` command. For example, if you have a spider written in a ``my_spider.py`` file you can run it with:: scrapy runspider my_spider.py See :command:`runspider` command for more info. I get "Filtered offsite request" messages. How can I fix them? -------------------------------------------------------------- Those messages (logged with ``DEBUG`` level) don't necessarily mean there is a problem, so you may not need to fix them. Those messages are thrown by the Offsite Spider Middleware, which is a spider middleware (enabled by default) whose purpose is to filter out requests to domains outside the ones covered by the spider. For more info see: :class:`~scrapy.spidermiddlewares.offsite.OffsiteMiddleware`. What is the recommended way to deploy a Scrapy crawler in production? --------------------------------------------------------------------- See :ref:`topics-deploy`. Can I use JSON for large exports? --------------------------------- It'll depend on how large your output is. See :ref:`this warning ` in :class:`~scrapy.exporters.JsonItemExporter` documentation. Can I return (Twisted) deferreds from signal handlers? ------------------------------------------------------ Some signals support returning deferreds from their handlers, others don't. See the :ref:`topics-signals-ref` to know which ones. What does the response status code 999 means? --------------------------------------------- 999 is a custom response status code used by Yahoo sites to throttle requests. Try slowing down the crawling speed by using a download delay of ``2`` (or higher) in your spider:: class MySpider(CrawlSpider): name = 'myspider' download_delay = 2 # [ ... rest of the spider code ... ] Or by setting a global download delay in your project with the :setting:`DOWNLOAD_DELAY` setting. Can I call ``pdb.set_trace()`` from my spiders to debug them? ------------------------------------------------------------- Yes, but you can also use the Scrapy shell which allows you to quickly analyze (and even modify) the response being processed by your spider, which is, quite often, more useful than plain old ``pdb.set_trace()``. For more info see :ref:`topics-shell-inspect-response`. Simplest way to dump all my scraped items into a JSON/CSV/XML file? ------------------------------------------------------------------- To dump into a JSON file:: scrapy crawl myspider -o items.json To dump into a CSV file:: scrapy crawl myspider -o items.csv To dump into a XML file:: scrapy crawl myspider -o items.xml For more information see :ref:`topics-feed-exports` What's this huge cryptic ``__VIEWSTATE`` parameter used in some forms? ---------------------------------------------------------------------- The ``__VIEWSTATE`` parameter is used in sites built with ASP.NET/VB.NET. For more info on how it works see `this page`_. Also, here's an `example spider`_ which scrapes one of these sites. .. _this page: http://search.cpan.org/~ecarroll/HTML-TreeBuilderX-ASP_NET-0.09/lib/HTML/TreeBuilderX/ASP_NET.pm .. _example spider: https://github.com/AmbientLighter/rpn-fas/blob/master/fas/spiders/rnp.py What's the best way to parse big XML/CSV data feeds? ---------------------------------------------------- Parsing big feeds with XPath selectors can be problematic since they need to build the DOM of the entire feed in memory, and this can be quite slow and consume a lot of memory. In order to avoid parsing all the entire feed at once in memory, you can use the functions ``xmliter`` and ``csviter`` from ``scrapy.utils.iterators`` module. In fact, this is what the feed spiders (see :ref:`topics-spiders`) use under the cover. Does Scrapy manage cookies automatically? ----------------------------------------- Yes, Scrapy receives and keeps track of cookies sent by servers, and sends them back on subsequent requests, like any regular web browser does. For more info see :ref:`topics-request-response` and :ref:`cookies-mw`. How can I see the cookies being sent and received from Scrapy? -------------------------------------------------------------- Enable the :setting:`COOKIES_DEBUG` setting. How can I instruct a spider to stop itself? ------------------------------------------- Raise the :exc:`~scrapy.exceptions.CloseSpider` exception from a callback. For more info see: :exc:`~scrapy.exceptions.CloseSpider`. How can I prevent my Scrapy bot from getting banned? ---------------------------------------------------- See :ref:`bans`. Should I use spider arguments or settings to configure my spider? ----------------------------------------------------------------- Both :ref:`spider arguments ` and :ref:`settings ` can be used to configure your spider. There is no strict rule that mandates to use one or the other, but settings are more suited for parameters that, once set, don't change much, while spider arguments are meant to change more often, even on each spider run and sometimes are required for the spider to run at all (for example, to set the start url of a spider). To illustrate with an example, assuming you have a spider that needs to log into a site to scrape data, and you only want to scrape data from a certain section of the site (which varies each time). In that case, the credentials to log in would be settings, while the url of the section to scrape would be a spider argument. I'm scraping a XML document and my XPath selector doesn't return any items -------------------------------------------------------------------------- You may need to remove namespaces. See :ref:`removing-namespaces`. .. _faq-split-item: How to split an item into multiple items in an item pipeline? ------------------------------------------------------------- :ref:`Item pipelines ` cannot yield multiple items per input item. :ref:`Create a spider middleware ` instead, and use its :meth:`~scrapy.spidermiddlewares.SpiderMiddleware.process_spider_output` method for this puspose. For example:: from copy import deepcopy from scrapy.item import BaseItem class MultiplyItemsMiddleware: def process_spider_output(self, response, result, spider): for item in result: if isinstance(item, (BaseItem, dict)): for _ in range(item['multiply_by']): yield deepcopy(item) .. _user agents: https://en.wikipedia.org/wiki/User_agent .. _LIFO: https://en.wikipedia.org/wiki/Stack_(abstract_data_type) .. _DFO order: https://en.wikipedia.org/wiki/Depth-first_search .. _BFO order: https://en.wikipedia.org/wiki/Breadth-first_search scrapy-1.7.3/docs/index.rst000066400000000000000000000143221352060011200156040ustar00rootroot00000000000000.. _topics-index: ============================== Scrapy |version| documentation ============================== Scrapy is a fast high-level `web crawling`_ and `web scraping`_ framework, used to crawl websites and extract structured data from their pages. It can be used for a wide range of purposes, from data mining to monitoring and automated testing. .. _web crawling: https://en.wikipedia.org/wiki/Web_crawler .. _web scraping: https://en.wikipedia.org/wiki/Web_scraping Getting help ============ Having trouble? We'd like to help! * Try the :doc:`FAQ ` -- it's got answers to some common questions. * Looking for specific information? Try the :ref:`genindex` or :ref:`modindex`. * Ask or search questions in `StackOverflow using the scrapy tag`_. * Ask or search questions in the `Scrapy subreddit`_. * Search for questions on the archives of the `scrapy-users mailing list`_. * Ask a question in the `#scrapy IRC channel`_, * Report bugs with Scrapy in our `issue tracker`_. .. _scrapy-users mailing list: https://groups.google.com/forum/#!forum/scrapy-users .. _Scrapy subreddit: https://www.reddit.com/r/scrapy/ .. _StackOverflow using the scrapy tag: https://stackoverflow.com/tags/scrapy .. _#scrapy IRC channel: irc://irc.freenode.net/scrapy .. _issue tracker: https://github.com/scrapy/scrapy/issues First steps =========== .. toctree:: :caption: First steps :hidden: intro/overview intro/install intro/tutorial intro/examples :doc:`intro/overview` Understand what Scrapy is and how it can help you. :doc:`intro/install` Get Scrapy installed on your computer. :doc:`intro/tutorial` Write your first Scrapy project. :doc:`intro/examples` Learn more by playing with a pre-made Scrapy project. .. _section-basics: Basic concepts ============== .. toctree:: :caption: Basic concepts :hidden: topics/commands topics/spiders topics/selectors topics/items topics/loaders topics/shell topics/item-pipeline topics/feed-exports topics/request-response topics/link-extractors topics/settings topics/exceptions :doc:`topics/commands` Learn about the command-line tool used to manage your Scrapy project. :doc:`topics/spiders` Write the rules to crawl your websites. :doc:`topics/selectors` Extract the data from web pages using XPath. :doc:`topics/shell` Test your extraction code in an interactive environment. :doc:`topics/items` Define the data you want to scrape. :doc:`topics/loaders` Populate your items with the extracted data. :doc:`topics/item-pipeline` Post-process and store your scraped data. :doc:`topics/feed-exports` Output your scraped data using different formats and storages. :doc:`topics/request-response` Understand the classes used to represent HTTP requests and responses. :doc:`topics/link-extractors` Convenient classes to extract links to follow from pages. :doc:`topics/settings` Learn how to configure Scrapy and see all :ref:`available settings `. :doc:`topics/exceptions` See all available exceptions and their meaning. Built-in services ================= .. toctree:: :caption: Built-in services :hidden: topics/logging topics/stats topics/email topics/telnetconsole topics/webservice :doc:`topics/logging` Learn how to use Python's builtin logging on Scrapy. :doc:`topics/stats` Collect statistics about your scraping crawler. :doc:`topics/email` Send email notifications when certain events occur. :doc:`topics/telnetconsole` Inspect a running crawler using a built-in Python console. :doc:`topics/webservice` Monitor and control a crawler using a web service. Solving specific problems ========================= .. toctree:: :caption: Solving specific problems :hidden: faq topics/debug topics/contracts topics/practices topics/broad-crawls topics/developer-tools topics/dynamic-content topics/leaks topics/media-pipeline topics/deploy topics/autothrottle topics/benchmarking topics/jobs :doc:`faq` Get answers to most frequently asked questions. :doc:`topics/debug` Learn how to debug common problems of your scrapy spider. :doc:`topics/contracts` Learn how to use contracts for testing your spiders. :doc:`topics/practices` Get familiar with some Scrapy common practices. :doc:`topics/broad-crawls` Tune Scrapy for crawling a lot domains in parallel. :doc:`topics/developer-tools` Learn how to scrape with your browser's developer tools. :doc:`topics/dynamic-content` Read webpage data that is loaded dynamically. :doc:`topics/leaks` Learn how to find and get rid of memory leaks in your crawler. :doc:`topics/media-pipeline` Download files and/or images associated with your scraped items. :doc:`topics/deploy` Deploying your Scrapy spiders and run them in a remote server. :doc:`topics/autothrottle` Adjust crawl rate dynamically based on load. :doc:`topics/benchmarking` Check how Scrapy performs on your hardware. :doc:`topics/jobs` Learn how to pause and resume crawls for large spiders. .. _extending-scrapy: Extending Scrapy ================ .. toctree:: :caption: Extending Scrapy :hidden: topics/architecture topics/downloader-middleware topics/spider-middleware topics/extensions topics/api topics/signals topics/exporters :doc:`topics/architecture` Understand the Scrapy architecture. :doc:`topics/downloader-middleware` Customize how pages get requested and downloaded. :doc:`topics/spider-middleware` Customize the input and output of your spiders. :doc:`topics/extensions` Extend Scrapy with your custom functionality :doc:`topics/api` Use it on extensions and middlewares to extend Scrapy functionality :doc:`topics/signals` See all available signals and how to work with them. :doc:`topics/exporters` Quickly export your scraped items to a file (XML, CSV, etc). All the rest ============ .. toctree:: :caption: All the rest :hidden: news contributing versioning :doc:`news` See what has changed in recent Scrapy versions. :doc:`contributing` Learn how to contribute to the Scrapy project. :doc:`versioning` Understand Scrapy versioning and API stability. scrapy-1.7.3/docs/intro/000077500000000000000000000000001352060011200150745ustar00rootroot00000000000000scrapy-1.7.3/docs/intro/examples.rst000066400000000000000000000013501352060011200174430ustar00rootroot00000000000000.. _intro-examples: ======== Examples ======== The best way to learn is with examples, and Scrapy is no exception. For this reason, there is an example Scrapy project named quotesbot_, that you can use to play and learn more about Scrapy. It contains two spiders for http://quotes.toscrape.com, one using CSS selectors and another one using XPath expressions. The quotesbot_ project is available at: https://github.com/scrapy/quotesbot. You can find more information about it in the project's README. If you're familiar with git, you can checkout the code. Otherwise you can download the project as a zip file by clicking `here `_. .. _quotesbot: https://github.com/scrapy/quotesbot scrapy-1.7.3/docs/intro/install.rst000066400000000000000000000250431352060011200173000ustar00rootroot00000000000000.. _intro-install: ================== Installation guide ================== Installing Scrapy ================= Scrapy runs on Python 2.7 and Python 3.4 or above under CPython (default Python implementation) and PyPy (starting with PyPy 5.9). If you're using `Anaconda`_ or `Miniconda`_, you can install the package from the `conda-forge`_ channel, which has up-to-date packages for Linux, Windows and OS X. To install Scrapy using ``conda``, run:: conda install -c conda-forge scrapy Alternatively, if you’re already familiar with installation of Python packages, you can install Scrapy and its dependencies from PyPI with:: pip install Scrapy Note that sometimes this may require solving compilation issues for some Scrapy dependencies depending on your operating system, so be sure to check the :ref:`intro-install-platform-notes`. We strongly recommend that you install Scrapy in :ref:`a dedicated virtualenv `, to avoid conflicting with your system packages. For more detailed and platform specifics instructions, as well as troubleshooting information, read on. Things that are good to know ---------------------------- Scrapy is written in pure Python and depends on a few key Python packages (among others): * `lxml`_, an efficient XML and HTML parser * `parsel`_, an HTML/XML data extraction library written on top of lxml, * `w3lib`_, a multi-purpose helper for dealing with URLs and web page encodings * `twisted`_, an asynchronous networking framework * `cryptography`_ and `pyOpenSSL`_, to deal with various network-level security needs The minimal versions which Scrapy is tested against are: * Twisted 14.0 * lxml 3.4 * pyOpenSSL 0.14 Scrapy may work with older versions of these packages but it is not guaranteed it will continue working because it’s not being tested against them. Some of these packages themselves depends on non-Python packages that might require additional installation steps depending on your platform. Please check :ref:`platform-specific guides below `. In case of any trouble related to these dependencies, please refer to their respective installation instructions: * `lxml installation`_ * `cryptography installation`_ .. _lxml installation: http://lxml.de/installation.html .. _cryptography installation: https://cryptography.io/en/latest/installation/ .. _intro-using-virtualenv: Using a virtual environment (recommended) ----------------------------------------- TL;DR: We recommend installing Scrapy inside a virtual environment on all platforms. Python packages can be installed either globally (a.k.a system wide), or in user-space. We do not recommend installing scrapy system wide. Instead, we recommend that you install scrapy within a so-called "virtual environment" (`virtualenv`_). Virtualenvs allow you to not conflict with already-installed Python system packages (which could break some of your system tools and scripts), and still install packages normally with ``pip`` (without ``sudo`` and the likes). To get started with virtual environments, see `virtualenv installation instructions`_. To install it globally (having it globally installed actually helps here), it should be a matter of running:: $ [sudo] pip install virtualenv Check this `user guide`_ on how to create your virtualenv. .. note:: If you use Linux or OS X, `virtualenvwrapper`_ is a handy tool to create virtualenvs. Once you have created a virtualenv, you can install scrapy inside it with ``pip``, just like any other Python package. (See :ref:`platform-specific guides ` below for non-Python dependencies that you may need to install beforehand). Python virtualenvs can be created to use Python 2 by default, or Python 3 by default. * If you want to install scrapy with Python 3, install scrapy within a Python 3 virtualenv. * And if you want to install scrapy with Python 2, install scrapy within a Python 2 virtualenv. .. _virtualenv: https://virtualenv.pypa.io .. _virtualenv installation instructions: https://virtualenv.pypa.io/en/stable/installation/ .. _virtualenvwrapper: https://virtualenvwrapper.readthedocs.io/en/latest/install.html .. _user guide: https://virtualenv.pypa.io/en/stable/userguide/ .. _intro-install-platform-notes: Platform specific installation notes ==================================== .. _intro-install-windows: Windows ------- Though it's possible to install Scrapy on Windows using pip, we recommend you to install `Anaconda`_ or `Miniconda`_ and use the package from the `conda-forge`_ channel, which will avoid most installation issues. Once you've installed `Anaconda`_ or `Miniconda`_, install Scrapy with:: conda install -c conda-forge scrapy .. _intro-install-ubuntu: Ubuntu 14.04 or above --------------------- Scrapy is currently tested with recent-enough versions of lxml, twisted and pyOpenSSL, and is compatible with recent Ubuntu distributions. But it should support older versions of Ubuntu too, like Ubuntu 14.04, albeit with potential issues with TLS connections. **Don't** use the ``python-scrapy`` package provided by Ubuntu, they are typically too old and slow to catch up with latest Scrapy. To install scrapy on Ubuntu (or Ubuntu-based) systems, you need to install these dependencies:: sudo apt-get install python-dev python-pip libxml2-dev libxslt1-dev zlib1g-dev libffi-dev libssl-dev - ``python-dev``, ``zlib1g-dev``, ``libxml2-dev`` and ``libxslt1-dev`` are required for ``lxml`` - ``libssl-dev`` and ``libffi-dev`` are required for ``cryptography`` If you want to install scrapy on Python 3, you’ll also need Python 3 development headers:: sudo apt-get install python3 python3-dev Inside a :ref:`virtualenv `, you can install Scrapy with ``pip`` after that:: pip install scrapy .. note:: The same non-Python dependencies can be used to install Scrapy in Debian Jessie (8.0) and above. .. _intro-install-macos: Mac OS X -------- Building Scrapy's dependencies requires the presence of a C compiler and development headers. On OS X this is typically provided by Apple’s Xcode development tools. To install the Xcode command line tools open a terminal window and run:: xcode-select --install There's a `known issue `_ that prevents ``pip`` from updating system packages. This has to be addressed to successfully install Scrapy and its dependencies. Here are some proposed solutions: * *(Recommended)* **Don't** use system python, install a new, updated version that doesn't conflict with the rest of your system. Here's how to do it using the `homebrew`_ package manager: * Install `homebrew`_ following the instructions in https://brew.sh/ * Update your ``PATH`` variable to state that homebrew packages should be used before system packages (Change ``.bashrc`` to ``.zshrc`` accordantly if you're using `zsh`_ as default shell):: echo "export PATH=/usr/local/bin:/usr/local/sbin:$PATH" >> ~/.bashrc * Reload ``.bashrc`` to ensure the changes have taken place:: source ~/.bashrc * Install python:: brew install python * Latest versions of python have ``pip`` bundled with them so you won't need to install it separately. If this is not the case, upgrade python:: brew update; brew upgrade python * *(Optional)* Install Scrapy inside an isolated python environment. This method is a workaround for the above OS X issue, but it's an overall good practice for managing dependencies and can complement the first method. `virtualenv`_ is a tool you can use to create virtual environments in python. We recommended reading a tutorial like http://docs.python-guide.org/en/latest/dev/virtualenvs/ to get started. After any of these workarounds you should be able to install Scrapy:: pip install Scrapy PyPy ---- We recommend using the latest PyPy version. The version tested is 5.9.0. For PyPy3, only Linux installation was tested. Most scrapy dependencides now have binary wheels for CPython, but not for PyPy. This means that these dependecies will be built during installation. On OS X, you are likely to face an issue with building Cryptography dependency, solution to this problem is described `here `_, that is to ``brew install openssl`` and then export the flags that this command recommends (only needed when installing scrapy). Installing on Linux has no special issues besides installing build dependencies. Installing scrapy with PyPy on Windows is not tested. You can check that scrapy is installed correctly by running ``scrapy bench``. If this command gives errors such as ``TypeError: ... got 2 unexpected keyword arguments``, this means that setuptools was unable to pick up one PyPy-specific dependency. To fix this issue, run ``pip install 'PyPyDispatcher>=2.1.0'``. .. _intro-install-troubleshooting: Troubleshooting =============== AttributeError: 'module' object has no attribute 'OP_NO_TLSv1_1' ---------------------------------------------------------------- After you install or upgrade Scrapy, Twisted or pyOpenSSL, you may get an exception with the following traceback:: […] File "[…]/site-packages/twisted/protocols/tls.py", line 63, in from twisted.internet._sslverify import _setAcceptableProtocols File "[…]/site-packages/twisted/internet/_sslverify.py", line 38, in TLSVersion.TLSv1_1: SSL.OP_NO_TLSv1_1, AttributeError: 'module' object has no attribute 'OP_NO_TLSv1_1' The reason you get this exception is that your system or virtual environment has a version of pyOpenSSL that your version of Twisted does not support. To install a version of pyOpenSSL that your version of Twisted supports, reinstall Twisted with the :code:`tls` extra option:: pip install twisted[tls] For details, see `Issue #2473 `_. .. _Python: https://www.python.org/ .. _pip: https://pip.pypa.io/en/latest/installing/ .. _lxml: http://lxml.de/ .. _parsel: https://pypi.python.org/pypi/parsel .. _w3lib: https://pypi.python.org/pypi/w3lib .. _twisted: https://twistedmatrix.com/ .. _cryptography: https://cryptography.io/ .. _pyOpenSSL: https://pypi.python.org/pypi/pyOpenSSL .. _setuptools: https://pypi.python.org/pypi/setuptools .. _AUR Scrapy package: https://aur.archlinux.org/packages/scrapy/ .. _homebrew: https://brew.sh/ .. _zsh: https://www.zsh.org/ .. _Scrapinghub: https://scrapinghub.com .. _Anaconda: https://docs.anaconda.com/anaconda/ .. _Miniconda: https://conda.io/docs/user-guide/install/index.html .. _conda-forge: https://conda-forge.org/ scrapy-1.7.3/docs/intro/overview.rst000066400000000000000000000150501352060011200174750ustar00rootroot00000000000000.. _intro-overview: ================== Scrapy at a glance ================== Scrapy is an application framework for crawling web sites and extracting structured data which can be used for a wide range of useful applications, like data mining, information processing or historical archival. Even though Scrapy was originally designed for `web scraping`_, it can also be used to extract data using APIs (such as `Amazon Associates Web Services`_) or as a general purpose web crawler. Walk-through of an example spider ================================= In order to show you what Scrapy brings to the table, we'll walk you through an example of a Scrapy Spider using the simplest way to run a spider. Here's the code for a spider that scrapes famous quotes from website http://quotes.toscrape.com, following the pagination:: import scrapy class QuotesSpider(scrapy.Spider): name = 'quotes' start_urls = [ 'http://quotes.toscrape.com/tag/humor/', ] def parse(self, response): for quote in response.css('div.quote'): yield { 'text': quote.css('span.text::text').get(), 'author': quote.xpath('span/small/text()').get(), } next_page = response.css('li.next a::attr("href")').get() if next_page is not None: yield response.follow(next_page, self.parse) Put this in a text file, name it to something like ``quotes_spider.py`` and run the spider using the :command:`runspider` command:: scrapy runspider quotes_spider.py -o quotes.json When this finishes you will have in the ``quotes.json`` file a list of the quotes in JSON format, containing text and author, looking like this (reformatted here for better readability):: [{ "author": "Jane Austen", "text": "\u201cThe person, be it gentleman or lady, who has not pleasure in a good novel, must be intolerably stupid.\u201d" }, { "author": "Groucho Marx", "text": "\u201cOutside of a dog, a book is man's best friend. Inside of a dog it's too dark to read.\u201d" }, { "author": "Steve Martin", "text": "\u201cA day without sunshine is like, you know, night.\u201d" }, ...] What just happened? ------------------- When you ran the command ``scrapy runspider quotes_spider.py``, Scrapy looked for a Spider definition inside it and ran it through its crawler engine. The crawl started by making requests to the URLs defined in the ``start_urls`` attribute (in this case, only the URL for quotes in *humor* category) and called the default callback method ``parse``, passing the response object as an argument. In the ``parse`` callback, we loop through the quote elements using a CSS Selector, yield a Python dict with the extracted quote text and author, look for a link to the next page and schedule another request using the same ``parse`` method as callback. Here you notice one of the main advantages about Scrapy: requests are :ref:`scheduled and processed asynchronously `. This means that Scrapy doesn't need to wait for a request to be finished and processed, it can send another request or do other things in the meantime. This also means that other requests can keep going even if some request fails or an error happens while handling it. While this enables you to do very fast crawls (sending multiple concurrent requests at the same time, in a fault-tolerant way) Scrapy also gives you control over the politeness of the crawl through :ref:`a few settings `. You can do things like setting a download delay between each request, limiting amount of concurrent requests per domain or per IP, and even :ref:`using an auto-throttling extension ` that tries to figure out these automatically. .. note:: This is using :ref:`feed exports ` to generate the JSON file, you can easily change the export format (XML or CSV, for example) or the storage backend (FTP or `Amazon S3`_, for example). You can also write an :ref:`item pipeline ` to store the items in a database. .. _topics-whatelse: What else? ========== You've seen how to extract and store items from a website using Scrapy, but this is just the surface. Scrapy provides a lot of powerful features for making scraping easy and efficient, such as: * Built-in support for :ref:`selecting and extracting ` data from HTML/XML sources using extended CSS selectors and XPath expressions, with helper methods to extract using regular expressions. * An :ref:`interactive shell console ` (IPython aware) for trying out the CSS and XPath expressions to scrape data, very useful when writing or debugging your spiders. * Built-in support for :ref:`generating feed exports ` in multiple formats (JSON, CSV, XML) and storing them in multiple backends (FTP, S3, local filesystem) * Robust encoding support and auto-detection, for dealing with foreign, non-standard and broken encoding declarations. * :ref:`Strong extensibility support `, allowing you to plug in your own functionality using :ref:`signals ` and a well-defined API (middlewares, :ref:`extensions `, and :ref:`pipelines `). * Wide range of built-in extensions and middlewares for handling: - cookies and session handling - HTTP features like compression, authentication, caching - user-agent spoofing - robots.txt - crawl depth restriction - and more * A :ref:`Telnet console ` for hooking into a Python console running inside your Scrapy process, to introspect and debug your crawler * Plus other goodies like reusable spiders to crawl sites from `Sitemaps`_ and XML/CSV feeds, a media pipeline for :ref:`automatically downloading images ` (or any other media) associated with the scraped items, a caching DNS resolver, and much more! What's next? ============ The next steps for you are to :ref:`install Scrapy `, :ref:`follow through the tutorial ` to learn how to create a full-blown Scrapy project and `join the community`_. Thanks for your interest! .. _join the community: https://scrapy.org/community/ .. _web scraping: https://en.wikipedia.org/wiki/Web_scraping .. _Amazon Associates Web Services: https://affiliate-program.amazon.com/gp/advertising/api/detail/main.html .. _Amazon S3: https://aws.amazon.com/s3/ .. _Sitemaps: https://www.sitemaps.org/index.html scrapy-1.7.3/docs/intro/tutorial.rst000066400000000000000000000713251352060011200175010ustar00rootroot00000000000000.. _intro-tutorial: =============== Scrapy Tutorial =============== In this tutorial, we'll assume that Scrapy is already installed on your system. If that's not the case, see :ref:`intro-install`. We are going to scrape `quotes.toscrape.com `_, a website that lists quotes from famous authors. This tutorial will walk you through these tasks: 1. Creating a new Scrapy project 2. Writing a :ref:`spider ` to crawl a site and extract data 3. Exporting the scraped data using the command line 4. Changing spider to recursively follow links 5. Using spider arguments Scrapy is written in Python_. If you're new to the language you might want to start by getting an idea of what the language is like, to get the most out of Scrapy. If you're already familiar with other languages, and want to learn Python quickly, the `Python Tutorial`_ is a good resource. If you're new to programming and want to start with Python, the following books may be useful to you: * `Automate the Boring Stuff With Python`_ * `How To Think Like a Computer Scientist`_ * `Learn Python 3 The Hard Way`_ You can also take a look at `this list of Python resources for non-programmers`_, as well as the `suggested resources in the learnpython-subreddit`_. .. _Python: https://www.python.org/ .. _this list of Python resources for non-programmers: https://wiki.python.org/moin/BeginnersGuide/NonProgrammers .. _Python Tutorial: https://docs.python.org/3/tutorial .. _Automate the Boring Stuff With Python: https://automatetheboringstuff.com/ .. _How To Think Like a Computer Scientist: http://openbookproject.net/thinkcs/python/english3e/ .. _Learn Python 3 The Hard Way: https://learnpythonthehardway.org/python3/ .. _suggested resources in the learnpython-subreddit: https://www.reddit.com/r/learnpython/wiki/index#wiki_new_to_python.3F Creating a project ================== Before you start scraping, you will have to set up a new Scrapy project. Enter a directory where you'd like to store your code and run:: scrapy startproject tutorial This will create a ``tutorial`` directory with the following contents:: tutorial/ scrapy.cfg # deploy configuration file tutorial/ # project's Python module, you'll import your code from here __init__.py items.py # project items definition file middlewares.py # project middlewares file pipelines.py # project pipelines file settings.py # project settings file spiders/ # a directory where you'll later put your spiders __init__.py Our first Spider ================ Spiders are classes that you define and that Scrapy uses to scrape information from a website (or a group of websites). They must subclass :class:`scrapy.Spider` and define the initial requests to make, optionally how to follow links in the pages, and how to parse the downloaded page content to extract data. This is the code for our first Spider. Save it in a file named ``quotes_spider.py`` under the ``tutorial/spiders`` directory in your project:: import scrapy class QuotesSpider(scrapy.Spider): name = "quotes" def start_requests(self): urls = [ 'http://quotes.toscrape.com/page/1/', 'http://quotes.toscrape.com/page/2/', ] for url in urls: yield scrapy.Request(url=url, callback=self.parse) def parse(self, response): page = response.url.split("/")[-2] filename = 'quotes-%s.html' % page with open(filename, 'wb') as f: f.write(response.body) self.log('Saved file %s' % filename) As you can see, our Spider subclasses :class:`scrapy.Spider ` and defines some attributes and methods: * :attr:`~scrapy.spiders.Spider.name`: identifies the Spider. It must be unique within a project, that is, you can't set the same name for different Spiders. * :meth:`~scrapy.spiders.Spider.start_requests`: must return an iterable of Requests (you can return a list of requests or write a generator function) which the Spider will begin to crawl from. Subsequent requests will be generated successively from these initial requests. * :meth:`~scrapy.spiders.Spider.parse`: a method that will be called to handle the response downloaded for each of the requests made. The response parameter is an instance of :class:`~scrapy.http.TextResponse` that holds the page content and has further helpful methods to handle it. The :meth:`~scrapy.spiders.Spider.parse` method usually parses the response, extracting the scraped data as dicts and also finding new URLs to follow and creating new requests (:class:`~scrapy.http.Request`) from them. How to run our spider --------------------- To put our spider to work, go to the project's top level directory and run:: scrapy crawl quotes This command runs the spider with name ``quotes`` that we've just added, that will send some requests for the ``quotes.toscrape.com`` domain. You will get an output similar to this:: ... (omitted for brevity) 2016-12-16 21:24:05 [scrapy.core.engine] INFO: Spider opened 2016-12-16 21:24:05 [scrapy.extensions.logstats] INFO: Crawled 0 pages (at 0 pages/min), scraped 0 items (at 0 items/min) 2016-12-16 21:24:05 [scrapy.extensions.telnet] DEBUG: Telnet console listening on 127.0.0.1:6023 2016-12-16 21:24:05 [scrapy.core.engine] DEBUG: Crawled (404) (referer: None) 2016-12-16 21:24:05 [scrapy.core.engine] DEBUG: Crawled (200) (referer: None) 2016-12-16 21:24:05 [scrapy.core.engine] DEBUG: Crawled (200) (referer: None) 2016-12-16 21:24:05 [quotes] DEBUG: Saved file quotes-1.html 2016-12-16 21:24:05 [quotes] DEBUG: Saved file quotes-2.html 2016-12-16 21:24:05 [scrapy.core.engine] INFO: Closing spider (finished) ... Now, check the files in the current directory. You should notice that two new files have been created: *quotes-1.html* and *quotes-2.html*, with the content for the respective URLs, as our ``parse`` method instructs. .. note:: If you are wondering why we haven't parsed the HTML yet, hold on, we will cover that soon. What just happened under the hood? ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Scrapy schedules the :class:`scrapy.Request ` objects returned by the ``start_requests`` method of the Spider. Upon receiving a response for each one, it instantiates :class:`~scrapy.http.Response` objects and calls the callback method associated with the request (in this case, the ``parse`` method) passing the response as argument. A shortcut to the start_requests method --------------------------------------- Instead of implementing a :meth:`~scrapy.spiders.Spider.start_requests` method that generates :class:`scrapy.Request ` objects from URLs, you can just define a :attr:`~scrapy.spiders.Spider.start_urls` class attribute with a list of URLs. This list will then be used by the default implementation of :meth:`~scrapy.spiders.Spider.start_requests` to create the initial requests for your spider:: import scrapy class QuotesSpider(scrapy.Spider): name = "quotes" start_urls = [ 'http://quotes.toscrape.com/page/1/', 'http://quotes.toscrape.com/page/2/', ] def parse(self, response): page = response.url.split("/")[-2] filename = 'quotes-%s.html' % page with open(filename, 'wb') as f: f.write(response.body) The :meth:`~scrapy.spiders.Spider.parse` method will be called to handle each of the requests for those URLs, even though we haven't explicitly told Scrapy to do so. This happens because :meth:`~scrapy.spiders.Spider.parse` is Scrapy's default callback method, which is called for requests without an explicitly assigned callback. Extracting data --------------- The best way to learn how to extract data with Scrapy is trying selectors using the :ref:`Scrapy shell `. Run:: scrapy shell 'http://quotes.toscrape.com/page/1/' .. note:: Remember to always enclose urls in quotes when running Scrapy shell from command-line, otherwise urls containing arguments (ie. ``&`` character) will not work. On Windows, use double quotes instead:: scrapy shell "http://quotes.toscrape.com/page/1/" You will see something like:: [ ... Scrapy log here ... ] 2016-09-19 12:09:27 [scrapy.core.engine] DEBUG: Crawled (200) (referer: None) [s] Available Scrapy objects: [s] scrapy scrapy module (contains scrapy.Request, scrapy.Selector, etc) [s] crawler [s] item {} [s] request [s] response <200 http://quotes.toscrape.com/page/1/> [s] settings [s] spider [s] Useful shortcuts: [s] shelp() Shell help (print this help) [s] fetch(req_or_url) Fetch request (or URL) and update local objects [s] view(response) View response in a browser >>> Using the shell, you can try selecting elements using `CSS`_ with the response object:: >>> response.css('title') [] The result of running ``response.css('title')`` is a list-like object called :class:`~scrapy.selector.SelectorList`, which represents a list of :class:`~scrapy.selector.Selector` objects that wrap around XML/HTML elements and allow you to run further queries to fine-grain the selection or extract the data. To extract the text from the title above, you can do:: >>> response.css('title::text').getall() ['Quotes to Scrape'] There are two things to note here: one is that we've added ``::text`` to the CSS query, to mean we want to select only the text elements directly inside ```` element. If we don't specify ``::text``, we'd get the full title element, including its tags:: >>> response.css('title').getall() ['<title>Quotes to Scrape'] The other thing is that the result of calling ``.getall()`` is a list: it is possible that a selector returns more than one result, so we extract them all. When you know you just want the first result, as in this case, you can do:: >>> response.css('title::text').get() 'Quotes to Scrape' As an alternative, you could've written:: >>> response.css('title::text')[0].get() 'Quotes to Scrape' However, using ``.get()`` directly on a :class:`~scrapy.selector.SelectorList` instance avoids an ``IndexError`` and returns ``None`` when it doesn't find any element matching the selection. There's a lesson here: for most scraping code, you want it to be resilient to errors due to things not being found on a page, so that even if some parts fail to be scraped, you can at least get **some** data. Besides the :meth:`~scrapy.selector.SelectorList.getall` and :meth:`~scrapy.selector.SelectorList.get` methods, you can also use the :meth:`~scrapy.selector.SelectorList.re` method to extract using `regular expressions`_:: >>> response.css('title::text').re(r'Quotes.*') ['Quotes to Scrape'] >>> response.css('title::text').re(r'Q\w+') ['Quotes'] >>> response.css('title::text').re(r'(\w+) to (\w+)') ['Quotes', 'Scrape'] In order to find the proper CSS selectors to use, you might find useful opening the response page from the shell in your web browser using ``view(response)``. You can use your browser's developer tools to inspect the HTML and come up with a selector (see :ref:`topics-developer-tools`). `Selector Gadget`_ is also a nice tool to quickly find CSS selector for visually selected elements, which works in many browsers. .. _regular expressions: https://docs.python.org/3/library/re.html .. _Selector Gadget: http://selectorgadget.com/ XPath: a brief intro ^^^^^^^^^^^^^^^^^^^^ Besides `CSS`_, Scrapy selectors also support using `XPath`_ expressions:: >>> response.xpath('//title') [] >>> response.xpath('//title/text()').get() 'Quotes to Scrape' XPath expressions are very powerful, and are the foundation of Scrapy Selectors. In fact, CSS selectors are converted to XPath under-the-hood. You can see that if you read closely the text representation of the selector objects in the shell. While perhaps not as popular as CSS selectors, XPath expressions offer more power because besides navigating the structure, it can also look at the content. Using XPath, you're able to select things like: *select the link that contains the text "Next Page"*. This makes XPath very fitting to the task of scraping, and we encourage you to learn XPath even if you already know how to construct CSS selectors, it will make scraping much easier. We won't cover much of XPath here, but you can read more about :ref:`using XPath with Scrapy Selectors here `. To learn more about XPath, we recommend `this tutorial to learn XPath through examples `_, and `this tutorial to learn "how to think in XPath" `_. .. _XPath: https://www.w3.org/TR/xpath .. _CSS: https://www.w3.org/TR/selectors Extracting quotes and authors ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Now that you know a bit about selection and extraction, let's complete our spider by writing the code to extract the quotes from the web page. Each quote in http://quotes.toscrape.com is represented by HTML elements that look like this: .. code-block:: html
“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.” by Albert Einstein (about)
Let's open up scrapy shell and play a bit to find out how to extract the data we want:: $ scrapy shell 'http://quotes.toscrape.com' We get a list of selectors for the quote HTML elements with:: >>> response.css("div.quote") Each of the selectors returned by the query above allows us to run further queries over their sub-elements. Let's assign the first selector to a variable, so that we can run our CSS selectors directly on a particular quote:: >>> quote = response.css("div.quote")[0] Now, let's extract ``text``, ``author`` and the ``tags`` from that quote using the ``quote`` object we just created:: >>> text = quote.css("span.text::text").get() >>> text '“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”' >>> author = quote.css("small.author::text").get() >>> author 'Albert Einstein' Given that the tags are a list of strings, we can use the ``.getall()`` method to get all of them:: >>> tags = quote.css("div.tags a.tag::text").getall() >>> tags ['change', 'deep-thoughts', 'thinking', 'world'] Having figured out how to extract each bit, we can now iterate over all the quotes elements and put them together into a Python dictionary:: >>> for quote in response.css("div.quote"): ... text = quote.css("span.text::text").get() ... author = quote.css("small.author::text").get() ... tags = quote.css("div.tags a.tag::text").getall() ... print(dict(text=text, author=author, tags=tags)) {'tags': ['change', 'deep-thoughts', 'thinking', 'world'], 'author': 'Albert Einstein', 'text': '“The world as we have created it is a process of our thinking. It cannot be changed without changing our thinking.”'} {'tags': ['abilities', 'choices'], 'author': 'J.K. Rowling', 'text': '“It is our choices, Harry, that show what we truly are, far more than our abilities.”'} ... a few more of these, omitted for brevity >>> Extracting data in our spider ----------------------------- Let's get back to our spider. Until now, it doesn't extract any data in particular, just saves the whole HTML page to a local file. Let's integrate the extraction logic above into our spider. A Scrapy spider typically generates many dictionaries containing the data extracted from the page. To do that, we use the ``yield`` Python keyword in the callback, as you can see below:: import scrapy class QuotesSpider(scrapy.Spider): name = "quotes" start_urls = [ 'http://quotes.toscrape.com/page/1/', 'http://quotes.toscrape.com/page/2/', ] def parse(self, response): for quote in response.css('div.quote'): yield { 'text': quote.css('span.text::text').get(), 'author': quote.css('small.author::text').get(), 'tags': quote.css('div.tags a.tag::text').getall(), } If you run this spider, it will output the extracted data with the log:: 2016-09-19 18:57:19 [scrapy.core.scraper] DEBUG: Scraped from <200 http://quotes.toscrape.com/page/1/> {'tags': ['life', 'love'], 'author': 'André Gide', 'text': '“It is better to be hated for what you are than to be loved for what you are not.”'} 2016-09-19 18:57:19 [scrapy.core.scraper] DEBUG: Scraped from <200 http://quotes.toscrape.com/page/1/> {'tags': ['edison', 'failure', 'inspirational', 'paraphrased'], 'author': 'Thomas A. Edison', 'text': "“I have not failed. I've just found 10,000 ways that won't work.”"} .. _storing-data: Storing the scraped data ======================== The simplest way to store the scraped data is by using :ref:`Feed exports `, with the following command:: scrapy crawl quotes -o quotes.json That will generate an ``quotes.json`` file containing all scraped items, serialized in `JSON`_. For historic reasons, Scrapy appends to a given file instead of overwriting its contents. If you run this command twice without removing the file before the second time, you'll end up with a broken JSON file. You can also use other formats, like `JSON Lines`_:: scrapy crawl quotes -o quotes.jl The `JSON Lines`_ format is useful because it's stream-like, you can easily append new records to it. It doesn't have the same problem of JSON when you run twice. Also, as each record is a separate line, you can process big files without having to fit everything in memory, there are tools like `JQ`_ to help doing that at the command-line. In small projects (like the one in this tutorial), that should be enough. However, if you want to perform more complex things with the scraped items, you can write an :ref:`Item Pipeline `. A placeholder file for Item Pipelines has been set up for you when the project is created, in ``tutorial/pipelines.py``. Though you don't need to implement any item pipelines if you just want to store the scraped items. .. _JSON Lines: http://jsonlines.org .. _JQ: https://stedolan.github.io/jq Following links =============== Let's say, instead of just scraping the stuff from the first two pages from http://quotes.toscrape.com, you want quotes from all the pages in the website. Now that you know how to extract data from pages, let's see how to follow links from them. First thing is to extract the link to the page we want to follow. Examining our page, we can see there is a link to the next page with the following markup: .. code-block:: html We can try extracting it in the shell:: >>> response.css('li.next a').get() 'Next ' This gets the anchor element, but we want the attribute ``href``. For that, Scrapy supports a CSS extension that lets you select the attribute contents, like this:: >>> response.css('li.next a::attr(href)').get() '/page/2/' There is also an ``attrib`` property available (see :ref:`selecting-attributes` for more):: >>> response.css('li.next a').attrib['href'] '/page/2' Let's see now our spider modified to recursively follow the link to the next page, extracting data from it:: import scrapy class QuotesSpider(scrapy.Spider): name = "quotes" start_urls = [ 'http://quotes.toscrape.com/page/1/', ] def parse(self, response): for quote in response.css('div.quote'): yield { 'text': quote.css('span.text::text').get(), 'author': quote.css('small.author::text').get(), 'tags': quote.css('div.tags a.tag::text').getall(), } next_page = response.css('li.next a::attr(href)').get() if next_page is not None: next_page = response.urljoin(next_page) yield scrapy.Request(next_page, callback=self.parse) Now, after extracting the data, the ``parse()`` method looks for the link to the next page, builds a full absolute URL using the :meth:`~scrapy.http.Response.urljoin` method (since the links can be relative) and yields a new request to the next page, registering itself as callback to handle the data extraction for the next page and to keep the crawling going through all the pages. What you see here is Scrapy's mechanism of following links: when you yield a Request in a callback method, Scrapy will schedule that request to be sent and register a callback method to be executed when that request finishes. Using this, you can build complex crawlers that follow links according to rules you define, and extract different kinds of data depending on the page it's visiting. In our example, it creates a sort of loop, following all the links to the next page until it doesn't find one -- handy for crawling blogs, forums and other sites with pagination. .. _response-follow-example: A shortcut for creating Requests -------------------------------- As a shortcut for creating Request objects you can use :meth:`response.follow `:: import scrapy class QuotesSpider(scrapy.Spider): name = "quotes" start_urls = [ 'http://quotes.toscrape.com/page/1/', ] def parse(self, response): for quote in response.css('div.quote'): yield { 'text': quote.css('span.text::text').get(), 'author': quote.css('span small::text').get(), 'tags': quote.css('div.tags a.tag::text').getall(), } next_page = response.css('li.next a::attr(href)').get() if next_page is not None: yield response.follow(next_page, callback=self.parse) Unlike scrapy.Request, ``response.follow`` supports relative URLs directly - no need to call urljoin. Note that ``response.follow`` just returns a Request instance; you still have to yield this Request. You can also pass a selector to ``response.follow`` instead of a string; this selector should extract necessary attributes:: for href in response.css('li.next a::attr(href)'): yield response.follow(href, callback=self.parse) For ```` elements there is a shortcut: ``response.follow`` uses their href attribute automatically. So the code can be shortened further:: for a in response.css('li.next a'): yield response.follow(a, callback=self.parse) .. note:: ``response.follow(response.css('li.next a'))`` is not valid because ``response.css`` returns a list-like object with selectors for all results, not a single selector. A ``for`` loop like in the example above, or ``response.follow(response.css('li.next a')[0])`` is fine. More examples and patterns -------------------------- Here is another spider that illustrates callbacks and following links, this time for scraping author information:: import scrapy class AuthorSpider(scrapy.Spider): name = 'author' start_urls = ['http://quotes.toscrape.com/'] def parse(self, response): # follow links to author pages for href in response.css('.author + a::attr(href)'): yield response.follow(href, self.parse_author) # follow pagination links for href in response.css('li.next a::attr(href)'): yield response.follow(href, self.parse) def parse_author(self, response): def extract_with_css(query): return response.css(query).get(default='').strip() yield { 'name': extract_with_css('h3.author-title::text'), 'birthdate': extract_with_css('.author-born-date::text'), 'bio': extract_with_css('.author-description::text'), } This spider will start from the main page, it will follow all the links to the authors pages calling the ``parse_author`` callback for each of them, and also the pagination links with the ``parse`` callback as we saw before. Here we're passing callbacks to ``response.follow`` as positional arguments to make the code shorter; it also works for ``scrapy.Request``. The ``parse_author`` callback defines a helper function to extract and cleanup the data from a CSS query and yields the Python dict with the author data. Another interesting thing this spider demonstrates is that, even if there are many quotes from the same author, we don't need to worry about visiting the same author page multiple times. By default, Scrapy filters out duplicated requests to URLs already visited, avoiding the problem of hitting servers too much because of a programming mistake. This can be configured by the setting :setting:`DUPEFILTER_CLASS`. Hopefully by now you have a good understanding of how to use the mechanism of following links and callbacks with Scrapy. As yet another example spider that leverages the mechanism of following links, check out the :class:`~scrapy.spiders.CrawlSpider` class for a generic spider that implements a small rules engine that you can use to write your crawlers on top of it. Also, a common pattern is to build an item with data from more than one page, using a :ref:`trick to pass additional data to the callbacks `. Using spider arguments ====================== You can provide command line arguments to your spiders by using the ``-a`` option when running them:: scrapy crawl quotes -o quotes-humor.json -a tag=humor These arguments are passed to the Spider's ``__init__`` method and become spider attributes by default. In this example, the value provided for the ``tag`` argument will be available via ``self.tag``. You can use this to make your spider fetch only quotes with a specific tag, building the URL based on the argument:: import scrapy class QuotesSpider(scrapy.Spider): name = "quotes" def start_requests(self): url = 'http://quotes.toscrape.com/' tag = getattr(self, 'tag', None) if tag is not None: url = url + 'tag/' + tag yield scrapy.Request(url, self.parse) def parse(self, response): for quote in response.css('div.quote'): yield { 'text': quote.css('span.text::text').get(), 'author': quote.css('small.author::text').get(), } next_page = response.css('li.next a::attr(href)').get() if next_page is not None: yield response.follow(next_page, self.parse) If you pass the ``tag=humor`` argument to this spider, you'll notice that it will only visit URLs from the ``humor`` tag, such as ``http://quotes.toscrape.com/tag/humor``. You can :ref:`learn more about handling spider arguments here `. Next steps ========== This tutorial covered only the basics of Scrapy, but there's a lot of other features not mentioned here. Check the :ref:`topics-whatelse` section in :ref:`intro-overview` chapter for a quick overview of the most important ones. You can continue from the section :ref:`section-basics` to know more about the command-line tool, spiders, selectors and other things the tutorial hasn't covered like modeling the scraped data. If you prefer to play with an example project, check the :ref:`intro-examples` section. .. _JSON: https://en.wikipedia.org/wiki/JSON scrapy-1.7.3/docs/news.rst000066400000000000000000004103601352060011200154530ustar00rootroot00000000000000.. _news: Release notes ============= .. note:: Scrapy 1.x will be the last series supporting Python 2. Scrapy 2.0, planned for Q4 2019 or Q1 2020, will support **Python 3 only**. Scrapy 1.7.3 (2019-08-01) ------------------------- Enforce lxml 4.3.5 or lower for Python 3.4 (:issue:`3912`, :issue:`3918`). Scrapy 1.7.2 (2019-07-23) ------------------------- Fix Python 2 support (:issue:`3889`, :issue:`3893`, :issue:`3896`). Scrapy 1.7.1 (2019-07-18) ------------------------- Re-packaging of Scrapy 1.7.0, which was missing some changes in PyPI. .. _release-1.7.0: Scrapy 1.7.0 (2019-07-18) ------------------------- .. note:: Make sure you install Scrapy 1.7.1. The Scrapy 1.7.0 package in PyPI is the result of an erroneous commit tagging and does not include all the changes described below. Highlights: * Improvements for crawls targeting multiple domains * A cleaner way to pass arguments to callbacks * A new class for JSON requests * Improvements for rule-based spiders * New features for feed exports Backward-incompatible changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * ``429`` is now part of the :setting:`RETRY_HTTP_CODES` setting by default This change is **backward incompatible**. If you don’t want to retry ``429``, you must override :setting:`RETRY_HTTP_CODES` accordingly. * :class:`~scrapy.crawler.Crawler`, :class:`CrawlerRunner.crawl ` and :class:`CrawlerRunner.create_crawler ` no longer accept a :class:`~scrapy.spiders.Spider` subclass instance, they only accept a :class:`~scrapy.spiders.Spider` subclass now. :class:`~scrapy.spiders.Spider` subclass instances were never meant to work, and they were not working as one would expect: instead of using the passed :class:`~scrapy.spiders.Spider` subclass instance, their :class:`~scrapy.spiders.Spider.from_crawler` method was called to generate a new instance. * Non-default values for the :setting:`SCHEDULER_PRIORITY_QUEUE` setting may stop working. Scheduler priority queue classes now need to handle :class:`~scrapy.http.Request` objects instead of arbitrary Python data structures. See also :ref:`1.7-deprecation-removals` below. New features ~~~~~~~~~~~~ * A new scheduler priority queue, :class:`scrapy.pqueues.DownloaderAwarePriorityQueue`, may be :ref:`enabled ` for a significant scheduling improvement on crawls targetting multiple web domains, at the cost of no :setting:`CONCURRENT_REQUESTS_PER_IP` support (:issue:`3520`) * A new :attr:`Request.cb_kwargs ` attribute provides a cleaner way to pass keyword arguments to callback methods (:issue:`1138`, :issue:`3563`) * A new :class:`~scrapy.http.JSONRequest` class offers a more convenient way to build JSON requests (:issue:`3504`, :issue:`3505`) * A ``process_request`` callback passed to the :class:`~scrapy.spiders.Rule` constructor now receives the :class:`~scrapy.http.Response` object that originated the request as its second argument (:issue:`3682`) * A new ``restrict_text`` parameter for the :attr:`LinkExtractor ` constructor allows filtering links by linking text (:issue:`3622`, :issue:`3635`) * A new :setting:`FEED_STORAGE_S3_ACL` setting allows defining a custom ACL for feeds exported to Amazon S3 (:issue:`3607`) * A new :setting:`FEED_STORAGE_FTP_ACTIVE` setting allows using FTP’s active connection mode for feeds exported to FTP servers (:issue:`3829`) * A new :setting:`METAREFRESH_IGNORE_TAGS` setting allows overriding which HTML tags are ignored when searching a response for HTML meta tags that trigger a redirect (:issue:`1422`, :issue:`3768`) * A new :reqmeta:`redirect_reasons` request meta key exposes the reason (status code, meta refresh) behind every followed redirect (:issue:`3581`, :issue:`3687`) * The ``SCRAPY_CHECK`` variable is now set to the ``true`` string during runs of the :command:`check` command, which allows :ref:`detecting contract check runs from code ` (:issue:`3704`, :issue:`3739`) * A new :meth:`Item.deepcopy() ` method makes it easier to :ref:`deep-copy items ` (:issue:`1493`, :issue:`3671`) * :class:`~scrapy.extensions.corestats.CoreStats` also logs ``elapsed_time_seconds`` now (:issue:`3638`) * Exceptions from :class:`~scrapy.loader.ItemLoader` :ref:`input and output processors ` are now more verbose (:issue:`3836`, :issue:`3840`) * :class:`~scrapy.crawler.Crawler`, :class:`CrawlerRunner.crawl ` and :class:`CrawlerRunner.create_crawler ` now fail gracefully if they receive a :class:`~scrapy.spiders.Spider` subclass instance instead of the subclass itself (:issue:`2283`, :issue:`3610`, :issue:`3872`) Bug fixes ~~~~~~~~~ * :meth:`~scrapy.spidermiddlewares.SpiderMiddleware.process_spider_exception` is now also invoked for generators (:issue:`220`, :issue:`2061`) * System exceptions like KeyboardInterrupt_ are no longer caught (:issue:`3726`) * :meth:`ItemLoader.load_item() ` no longer makes later calls to :meth:`ItemLoader.get_output_value() ` or :meth:`ItemLoader.load_item() ` return empty data (:issue:`3804`, :issue:`3819`) * The images pipeline (:class:`~scrapy.pipelines.images.ImagesPipeline`) no longer ignores these Amazon S3 settings: :setting:`AWS_ENDPOINT_URL`, :setting:`AWS_REGION_NAME`, :setting:`AWS_USE_SSL`, :setting:`AWS_VERIFY` (:issue:`3625`) * Fixed a memory leak in :class:`~scrapy.pipelines.media.MediaPipeline` affecting, for example, non-200 responses and exceptions from custom middlewares (:issue:`3813`) * Requests with private callbacks are now correctly unserialized from disk (:issue:`3790`) * :meth:`FormRequest.from_response() ` now handles invalid methods like major web browsers (:issue:`3777`, :issue:`3794`) Documentation ~~~~~~~~~~~~~ * A new topic, :ref:`topics-dynamic-content`, covers recommended approaches to read dynamically-loaded data (:issue:`3703`) * :ref:`topics-broad-crawls` now features information about memory usage (:issue:`1264`, :issue:`3866`) * The documentation of :class:`~scrapy.spiders.Rule` now covers how to access the text of a link when using :class:`~scrapy.spiders.CrawlSpider` (:issue:`3711`, :issue:`3712`) * A new section, :ref:`httpcache-storage-custom`, covers writing a custom cache storage backend for :class:`~scrapy.downloadermiddlewares.httpcache.HttpCacheMiddleware` (:issue:`3683`, :issue:`3692`) * A new :ref:`FAQ ` entry, :ref:`faq-split-item`, explains what to do when you want to split an item into multiple items from an item pipeline (:issue:`2240`, :issue:`3672`) * Updated the :ref:`FAQ entry about crawl order ` to explain why the first few requests rarely follow the desired order (:issue:`1739`, :issue:`3621`) * The :setting:`LOGSTATS_INTERVAL` setting (:issue:`3730`), the :meth:`FilesPipeline.file_path ` and :meth:`ImagesPipeline.file_path ` methods (:issue:`2253`, :issue:`3609`) and the :meth:`Crawler.stop() ` method (:issue:`3842`) are now documented * Some parts of the documentation that were confusing or misleading are now clearer (:issue:`1347`, :issue:`1789`, :issue:`2289`, :issue:`3069`, :issue:`3615`, :issue:`3626`, :issue:`3668`, :issue:`3670`, :issue:`3673`, :issue:`3728`, :issue:`3762`, :issue:`3861`, :issue:`3882`) * Minor documentation fixes (:issue:`3648`, :issue:`3649`, :issue:`3662`, :issue:`3674`, :issue:`3676`, :issue:`3694`, :issue:`3724`, :issue:`3764`, :issue:`3767`, :issue:`3791`, :issue:`3797`, :issue:`3806`, :issue:`3812`) .. _1.7-deprecation-removals: Deprecation removals ~~~~~~~~~~~~~~~~~~~~ The following deprecated APIs have been removed (:issue:`3578`): * ``scrapy.conf`` (use :attr:`Crawler.settings `) * From ``scrapy.core.downloader.handlers``: * ``http.HttpDownloadHandler`` (use ``http10.HTTP10DownloadHandler``) * ``scrapy.loader.ItemLoader._get_values`` (use ``_get_xpathvalues``) * ``scrapy.loader.XPathItemLoader`` (use :class:`~scrapy.loader.ItemLoader`) * ``scrapy.log`` (see :ref:`topics-logging`) * From ``scrapy.pipelines``: * ``files.FilesPipeline.file_key`` (use ``file_path``) * ``images.ImagesPipeline.file_key`` (use ``file_path``) * ``images.ImagesPipeline.image_key`` (use ``file_path``) * ``images.ImagesPipeline.thumb_key`` (use ``thumb_path``) * From both ``scrapy.selector`` and ``scrapy.selector.lxmlsel``: * ``HtmlXPathSelector`` (use :class:`~scrapy.selector.Selector`) * ``XmlXPathSelector`` (use :class:`~scrapy.selector.Selector`) * ``XPathSelector`` (use :class:`~scrapy.selector.Selector`) * ``XPathSelectorList`` (use :class:`~scrapy.selector.Selector`) * From ``scrapy.selector.csstranslator``: * ``ScrapyGenericTranslator`` (use parsel.csstranslator.GenericTranslator_) * ``ScrapyHTMLTranslator`` (use parsel.csstranslator.HTMLTranslator_) * ``ScrapyXPathExpr`` (use parsel.csstranslator.XPathExpr_) * From :class:`~scrapy.selector.Selector`: * ``_root`` (both the constructor argument and the object property, use ``root``) * ``extract_unquoted`` (use ``getall``) * ``select`` (use ``xpath``) * From :class:`~scrapy.selector.SelectorList`: * ``extract_unquoted`` (use ``getall``) * ``select`` (use ``xpath``) * ``x`` (use ``xpath``) * ``scrapy.spiders.BaseSpider`` (use :class:`~scrapy.spiders.Spider`) * From :class:`~scrapy.spiders.Spider` (and subclasses): * ``DOWNLOAD_DELAY`` (use :ref:`download_delay `) * ``set_crawler`` (use :meth:`~scrapy.spiders.Spider.from_crawler`) * ``scrapy.spiders.spiders`` (use :class:`~scrapy.spiderloader.SpiderLoader`) * ``scrapy.telnet`` (use :mod:`scrapy.extensions.telnet`) * From ``scrapy.utils.python``: * ``str_to_unicode`` (use ``to_unicode``) * ``unicode_to_str`` (use ``to_bytes``) * ``scrapy.utils.response.body_or_str`` The following deprecated settings have also been removed (:issue:`3578`): * ``SPIDER_MANAGER_CLASS`` (use :setting:`SPIDER_LOADER_CLASS`) Deprecations ~~~~~~~~~~~~ * The ``queuelib.PriorityQueue`` value for the :setting:`SCHEDULER_PRIORITY_QUEUE` setting is deprecated. Use :class:`scrapy.pqueues.ScrapyPriorityQueue` instead. * ``process_request`` callbacks passed to :class:`~scrapy.spiders.Rule` that do not accept two arguments are deprecated. * The following modules are deprecated: * ``scrapy.utils.http`` (use `w3lib.http`_) * ``scrapy.utils.markup`` (use `w3lib.html`_) * ``scrapy.utils.multipart`` (use `urllib3`_) * The ``scrapy.utils.datatypes.MergeDict`` class is deprecated for Python 3 code bases. Use :class:`~collections.ChainMap` instead. (:issue:`3878`) * The ``scrapy.utils.gz.is_gzipped`` function is deprecated. Use ``scrapy.utils.gz.gzip_magic_number`` instead. .. _urllib3: https://urllib3.readthedocs.io/en/latest/index.html .. _w3lib.html: https://w3lib.readthedocs.io/en/latest/w3lib.html#module-w3lib.html .. _w3lib.http: https://w3lib.readthedocs.io/en/latest/w3lib.html#module-w3lib.http Other changes ~~~~~~~~~~~~~ * It is now possible to run all tests from the same tox_ environment in parallel; the documentation now covers :ref:`this and other ways to run tests ` (:issue:`3707`) * It is now possible to generate an API documentation coverage report (:issue:`3806`, :issue:`3810`, :issue:`3860`) * The :ref:`documentation policies ` now require docstrings_ (:issue:`3701`) that follow `PEP 257`_ (:issue:`3748`) * Internal fixes and cleanup (:issue:`3629`, :issue:`3643`, :issue:`3684`, :issue:`3698`, :issue:`3734`, :issue:`3735`, :issue:`3736`, :issue:`3737`, :issue:`3809`, :issue:`3821`, :issue:`3825`, :issue:`3827`, :issue:`3833`, :issue:`3857`, :issue:`3877`) .. _release-1.6.0: Scrapy 1.6.0 (2019-01-30) ------------------------- Highlights: * better Windows support; * Python 3.7 compatibility; * big documentation improvements, including a switch from ``.extract_first()`` + ``.extract()`` API to ``.get()`` + ``.getall()`` API; * feed exports, FilePipeline and MediaPipeline improvements; * better extensibility: :signal:`item_error` and :signal:`request_reached_downloader` signals; ``from_crawler`` support for feed exporters, feed storages and dupefilters. * ``scrapy.contracts`` fixes and new features; * telnet console security improvements, first released as a backport in :ref:`release-1.5.2`; * clean-up of the deprecated code; * various bug fixes, small new features and usability improvements across the codebase. Selector API changes ~~~~~~~~~~~~~~~~~~~~ While these are not changes in Scrapy itself, but rather in the parsel_ library which Scrapy uses for xpath/css selectors, these changes are worth mentioning here. Scrapy now depends on parsel >= 1.5, and Scrapy documentation is updated to follow recent ``parsel`` API conventions. Most visible change is that ``.get()`` and ``.getall()`` selector methods are now preferred over ``.extract_first()`` and ``.extract()``. We feel that these new methods result in a more concise and readable code. See :ref:`old-extraction-api` for more details. .. note:: There are currently **no plans** to deprecate ``.extract()`` and ``.extract_first()`` methods. Another useful new feature is the introduction of ``Selector.attrib`` and ``SelectorList.attrib`` properties, which make it easier to get attributes of HTML elements. See :ref:`selecting-attributes`. CSS selectors are cached in parsel >= 1.5, which makes them faster when the same CSS path is used many times. This is very common in case of Scrapy spiders: callbacks are usually called several times, on different pages. If you're using custom ``Selector`` or ``SelectorList`` subclasses, a **backward incompatible** change in parsel may affect your code. See `parsel changelog`_ for a detailed description, as well as for the full list of improvements. .. _parsel changelog: https://parsel.readthedocs.io/en/latest/history.html Telnet console ~~~~~~~~~~~~~~ **Backward incompatible**: Scrapy's telnet console now requires username and password. See :ref:`topics-telnetconsole` for more details. This change fixes a **security issue**; see :ref:`release-1.5.2` release notes for details. New extensibility features ~~~~~~~~~~~~~~~~~~~~~~~~~~ * ``from_crawler`` support is added to feed exporters and feed storages. This, among other things, allows to access Scrapy settings from custom feed storages and exporters (:issue:`1605`, :issue:`3348`). * ``from_crawler`` support is added to dupefilters (:issue:`2956`); this allows to access e.g. settings or a spider from a dupefilter. * :signal:`item_error` is fired when an error happens in a pipeline (:issue:`3256`); * :signal:`request_reached_downloader` is fired when Downloader gets a new Request; this signal can be useful e.g. for custom Schedulers (:issue:`3393`). * new SitemapSpider :meth:`~.SitemapSpider.sitemap_filter` method which allows to select sitemap entries based on their attributes in SitemapSpider subclasses (:issue:`3512`). * Lazy loading of Downloader Handlers is now optional; this enables better initialization error handling in custom Downloader Handlers (:issue:`3394`). New FilePipeline and MediaPipeline features ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Expose more options for S3FilesStore: :setting:`AWS_ENDPOINT_URL`, :setting:`AWS_USE_SSL`, :setting:`AWS_VERIFY`, :setting:`AWS_REGION_NAME`. For example, this allows to use alternative or self-hosted AWS-compatible providers (:issue:`2609`, :issue:`3548`). * ACL support for Google Cloud Storage: :setting:`FILES_STORE_GCS_ACL` and :setting:`IMAGES_STORE_GCS_ACL` (:issue:`3199`). ``scrapy.contracts`` improvements ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Exceptions in contracts code are handled better (:issue:`3377`); * ``dont_filter=True`` is used for contract requests, which allows to test different callbacks with the same URL (:issue:`3381`); * ``request_cls`` attribute in Contract subclasses allow to use different Request classes in contracts, for example FormRequest (:issue:`3383`). * Fixed errback handling in contracts, e.g. for cases where a contract is executed for URL which returns non-200 response (:issue:`3371`). Usability improvements ~~~~~~~~~~~~~~~~~~~~~~ * more stats for RobotsTxtMiddleware (:issue:`3100`) * INFO log level is used to show telnet host/port (:issue:`3115`) * a message is added to IgnoreRequest in RobotsTxtMiddleware (:issue:`3113`) * better validation of ``url`` argument in ``Response.follow`` (:issue:`3131`) * non-zero exit code is returned from Scrapy commands when error happens on spider inititalization (:issue:`3226`) * Link extraction improvements: "ftp" is added to scheme list (:issue:`3152`); "flv" is added to common video extensions (:issue:`3165`) * better error message when an exporter is disabled (:issue:`3358`); * ``scrapy shell --help`` mentions syntax required for local files (``./file.html``) - :issue:`3496`. * Referer header value is added to RFPDupeFilter log messages (:issue:`3588`) Bug fixes ~~~~~~~~~ * fixed issue with extra blank lines in .csv exports under Windows (:issue:`3039`); * proper handling of pickling errors in Python 3 when serializing objects for disk queues (:issue:`3082`) * flags are now preserved when copying Requests (:issue:`3342`); * FormRequest.from_response clickdata shouldn't ignore elements with ``input[type=image]`` (:issue:`3153`). * FormRequest.from_response should preserve duplicate keys (:issue:`3247`) Documentation improvements ~~~~~~~~~~~~~~~~~~~~~~~~~~ * Docs are re-written to suggest .get/.getall API instead of .extract/.extract_first. Also, :ref:`topics-selectors` docs are updated and re-structured to match latest parsel docs; they now contain more topics, such as :ref:`selecting-attributes` or :ref:`topics-selectors-css-extensions` (:issue:`3390`). * :ref:`topics-developer-tools` is a new tutorial which replaces old Firefox and Firebug tutorials (:issue:`3400`). * SCRAPY_PROJECT environment variable is documented (:issue:`3518`); * troubleshooting section is added to install instructions (:issue:`3517`); * improved links to beginner resources in the tutorial (:issue:`3367`, :issue:`3468`); * fixed :setting:`RETRY_HTTP_CODES` default values in docs (:issue:`3335`); * remove unused ``DEPTH_STATS`` option from docs (:issue:`3245`); * other cleanups (:issue:`3347`, :issue:`3350`, :issue:`3445`, :issue:`3544`, :issue:`3605`). Deprecation removals ~~~~~~~~~~~~~~~~~~~~ Compatibility shims for pre-1.0 Scrapy module names are removed (:issue:`3318`): * ``scrapy.command`` * ``scrapy.contrib`` (with all submodules) * ``scrapy.contrib_exp`` (with all submodules) * ``scrapy.dupefilter`` * ``scrapy.linkextractor`` * ``scrapy.project`` * ``scrapy.spider`` * ``scrapy.spidermanager`` * ``scrapy.squeue`` * ``scrapy.stats`` * ``scrapy.statscol`` * ``scrapy.utils.decorator`` See :ref:`module-relocations` for more information, or use suggestions from Scrapy 1.5.x deprecation warnings to update your code. Other deprecation removals: * Deprecated scrapy.interfaces.ISpiderManager is removed; please use scrapy.interfaces.ISpiderLoader. * Deprecated ``CrawlerSettings`` class is removed (:issue:`3327`). * Deprecated ``Settings.overrides`` and ``Settings.defaults`` attributes are removed (:issue:`3327`, :issue:`3359`). Other improvements, cleanups ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * All Scrapy tests now pass on Windows; Scrapy testing suite is executed in a Windows environment on CI (:issue:`3315`). * Python 3.7 support (:issue:`3326`, :issue:`3150`, :issue:`3547`). * Testing and CI fixes (:issue:`3526`, :issue:`3538`, :issue:`3308`, :issue:`3311`, :issue:`3309`, :issue:`3305`, :issue:`3210`, :issue:`3299`) * ``scrapy.http.cookies.CookieJar.clear`` accepts "domain", "path" and "name" optional arguments (:issue:`3231`). * additional files are included to sdist (:issue:`3495`); * code style fixes (:issue:`3405`, :issue:`3304`); * unneeded .strip() call is removed (:issue:`3519`); * collections.deque is used to store MiddlewareManager methods instead of a list (:issue:`3476`) .. _release-1.5.2: Scrapy 1.5.2 (2019-01-22) ------------------------- * *Security bugfix*: Telnet console extension can be easily exploited by rogue websites POSTing content to http://localhost:6023, we haven't found a way to exploit it from Scrapy, but it is very easy to trick a browser to do so and elevates the risk for local development environment. *The fix is backward incompatible*, it enables telnet user-password authentication by default with a random generated password. If you can't upgrade right away, please consider setting :setting:`TELNET_CONSOLE_PORT` out of its default value. See :ref:`telnet console ` documentation for more info * Backport CI build failure under GCE environemnt due to boto import error. .. _release-1.5.1: Scrapy 1.5.1 (2018-07-12) ------------------------- This is a maintenance release with important bug fixes, but no new features: * ``O(N^2)`` gzip decompression issue which affected Python 3 and PyPy is fixed (:issue:`3281`); * skipping of TLS validation errors is improved (:issue:`3166`); * Ctrl-C handling is fixed in Python 3.5+ (:issue:`3096`); * testing fixes (:issue:`3092`, :issue:`3263`); * documentation improvements (:issue:`3058`, :issue:`3059`, :issue:`3089`, :issue:`3123`, :issue:`3127`, :issue:`3189`, :issue:`3224`, :issue:`3280`, :issue:`3279`, :issue:`3201`, :issue:`3260`, :issue:`3284`, :issue:`3298`, :issue:`3294`). .. _release-1.5.0: Scrapy 1.5.0 (2017-12-29) ------------------------- This release brings small new features and improvements across the codebase. Some highlights: * Google Cloud Storage is supported in FilesPipeline and ImagesPipeline. * Crawling with proxy servers becomes more efficient, as connections to proxies can be reused now. * Warnings, exception and logging messages are improved to make debugging easier. * ``scrapy parse`` command now allows to set custom request meta via ``--meta`` argument. * Compatibility with Python 3.6, PyPy and PyPy3 is improved; PyPy and PyPy3 are now supported officially, by running tests on CI. * Better default handling of HTTP 308, 522 and 524 status codes. * Documentation is improved, as usual. Backward Incompatible Changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * Scrapy 1.5 drops support for Python 3.3. * Default Scrapy User-Agent now uses https link to scrapy.org (:issue:`2983`). **This is technically backward-incompatible**; override :setting:`USER_AGENT` if you relied on old value. * Logging of settings overridden by ``custom_settings`` is fixed; **this is technically backward-incompatible** because the logger changes from ``[scrapy.utils.log]`` to ``[scrapy.crawler]``. If you're parsing Scrapy logs, please update your log parsers (:issue:`1343`). * LinkExtractor now ignores ``m4v`` extension by default, this is change in behavior. * 522 and 524 status codes are added to ``RETRY_HTTP_CODES`` (:issue:`2851`) New features ~~~~~~~~~~~~ - Support ```` tags in ``Response.follow`` (:issue:`2785`) - Support for ``ptpython`` REPL (:issue:`2654`) - Google Cloud Storage support for FilesPipeline and ImagesPipeline (:issue:`2923`). - New ``--meta`` option of the "scrapy parse" command allows to pass additional request.meta (:issue:`2883`) - Populate spider variable when using ``shell.inspect_response`` (:issue:`2812`) - Handle HTTP 308 Permanent Redirect (:issue:`2844`) - Add 522 and 524 to ``RETRY_HTTP_CODES`` (:issue:`2851`) - Log versions information at startup (:issue:`2857`) - ``scrapy.mail.MailSender`` now works in Python 3 (it requires Twisted 17.9.0) - Connections to proxy servers are reused (:issue:`2743`) - Add template for a downloader middleware (:issue:`2755`) - Explicit message for NotImplementedError when parse callback not defined (:issue:`2831`) - CrawlerProcess got an option to disable installation of root log handler (:issue:`2921`) - LinkExtractor now ignores ``m4v`` extension by default - Better log messages for responses over :setting:`DOWNLOAD_WARNSIZE` and :setting:`DOWNLOAD_MAXSIZE` limits (:issue:`2927`) - Show warning when a URL is put to ``Spider.allowed_domains`` instead of a domain (:issue:`2250`). Bug fixes ~~~~~~~~~ - Fix logging of settings overridden by ``custom_settings``; **this is technically backward-incompatible** because the logger changes from ``[scrapy.utils.log]`` to ``[scrapy.crawler]``, so please update your log parsers if needed (:issue:`1343`) - Default Scrapy User-Agent now uses https link to scrapy.org (:issue:`2983`). **This is technically backward-incompatible**; override :setting:`USER_AGENT` if you relied on old value. - Fix PyPy and PyPy3 test failures, support them officially (:issue:`2793`, :issue:`2935`, :issue:`2990`, :issue:`3050`, :issue:`2213`, :issue:`3048`) - Fix DNS resolver when ``DNSCACHE_ENABLED=False`` (:issue:`2811`) - Add ``cryptography`` for Debian Jessie tox test env (:issue:`2848`) - Add verification to check if Request callback is callable (:issue:`2766`) - Port ``extras/qpsclient.py`` to Python 3 (:issue:`2849`) - Use getfullargspec under the scenes for Python 3 to stop DeprecationWarning (:issue:`2862`) - Update deprecated test aliases (:issue:`2876`) - Fix ``SitemapSpider`` support for alternate links (:issue:`2853`) Docs ~~~~ - Added missing bullet point for the ``AUTOTHROTTLE_TARGET_CONCURRENCY`` setting. (:issue:`2756`) - Update Contributing docs, document new support channels (:issue:`2762`, issue:`3038`) - Include references to Scrapy subreddit in the docs - Fix broken links; use https:// for external links (:issue:`2978`, :issue:`2982`, :issue:`2958`) - Document CloseSpider extension better (:issue:`2759`) - Use ``pymongo.collection.Collection.insert_one()`` in MongoDB example (:issue:`2781`) - Spelling mistake and typos (:issue:`2828`, :issue:`2837`, :issue:`2884`, :issue:`2924`) - Clarify ``CSVFeedSpider.headers`` documentation (:issue:`2826`) - Document ``DontCloseSpider`` exception and clarify ``spider_idle`` (:issue:`2791`) - Update "Releases" section in README (:issue:`2764`) - Fix rst syntax in ``DOWNLOAD_FAIL_ON_DATALOSS`` docs (:issue:`2763`) - Small fix in description of startproject arguments (:issue:`2866`) - Clarify data types in Response.body docs (:issue:`2922`) - Add a note about ``request.meta['depth']`` to DepthMiddleware docs (:issue:`2374`) - Add a note about ``request.meta['dont_merge_cookies']`` to CookiesMiddleware docs (:issue:`2999`) - Up-to-date example of project structure (:issue:`2964`, :issue:`2976`) - A better example of ItemExporters usage (:issue:`2989`) - Document ``from_crawler`` methods for spider and downloader middlewares (:issue:`3019`) .. _release-1.4.0: Scrapy 1.4.0 (2017-05-18) ------------------------- Scrapy 1.4 does not bring that many breathtaking new features but quite a few handy improvements nonetheless. Scrapy now supports anonymous FTP sessions with customizable user and password via the new :setting:`FTP_USER` and :setting:`FTP_PASSWORD` settings. And if you're using Twisted version 17.1.0 or above, FTP is now available with Python 3. There's a new :meth:`response.follow ` method for creating requests; **it is now a recommended way to create Requests in Scrapy spiders**. This method makes it easier to write correct spiders; ``response.follow`` has several advantages over creating ``scrapy.Request`` objects directly: * it handles relative URLs; * it works properly with non-ascii URLs on non-UTF8 pages; * in addition to absolute and relative URLs it supports Selectors; for ```` elements it can also extract their href values. For example, instead of this:: for href in response.css('li.page a::attr(href)').extract(): url = response.urljoin(href) yield scrapy.Request(url, self.parse, encoding=response.encoding) One can now write this:: for a in response.css('li.page a'): yield response.follow(a, self.parse) Link extractors are also improved. They work similarly to what a regular modern browser would do: leading and trailing whitespace are removed from attributes (think ``href=" http://example.com"``) when building ``Link`` objects. This whitespace-stripping also happens for ``action`` attributes with ``FormRequest``. **Please also note that link extractors do not canonicalize URLs by default anymore.** This was puzzling users every now and then, and it's not what browsers do in fact, so we removed that extra transformation on extracted links. For those of you wanting more control on the ``Referer:`` header that Scrapy sends when following links, you can set your own ``Referrer Policy``. Prior to Scrapy 1.4, the default ``RefererMiddleware`` would simply and blindly set it to the URL of the response that generated the HTTP request (which could leak information on your URL seeds). By default, Scrapy now behaves much like your regular browser does. And this policy is fully customizable with W3C standard values (or with something really custom of your own if you wish). See :setting:`REFERRER_POLICY` for details. To make Scrapy spiders easier to debug, Scrapy logs more stats by default in 1.4: memory usage stats, detailed retry stats, detailed HTTP error code stats. A similar change is that HTTP cache path is also visible in logs now. Last but not least, Scrapy now has the option to make JSON and XML items more human-readable, with newlines between items and even custom indenting offset, using the new :setting:`FEED_EXPORT_INDENT` setting. Enjoy! (Or read on for the rest of changes in this release.) Deprecations and Backward Incompatible Changes ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Default to ``canonicalize=False`` in :class:`scrapy.linkextractors.LinkExtractor` (:issue:`2537`, fixes :issue:`1941` and :issue:`1982`): **warning, this is technically backward-incompatible** - Enable memusage extension by default (:issue:`2539`, fixes :issue:`2187`); **this is technically backward-incompatible** so please check if you have any non-default ``MEMUSAGE_***`` options set. - ``EDITOR`` environment variable now takes precedence over ``EDITOR`` option defined in settings.py (:issue:`1829`); Scrapy default settings no longer depend on environment variables. **This is technically a backward incompatible change**. - ``Spider.make_requests_from_url`` is deprecated (:issue:`1728`, fixes :issue:`1495`). New Features ~~~~~~~~~~~~ - Accept proxy credentials in :reqmeta:`proxy` request meta key (:issue:`2526`) - Support `brotli`_-compressed content; requires optional `brotlipy`_ (:issue:`2535`) - New :ref:`response.follow ` shortcut for creating requests (:issue:`1940`) - Added ``flags`` argument and attribute to :class:`Request ` objects (:issue:`2047`) - Support Anonymous FTP (:issue:`2342`) - Added ``retry/count``, ``retry/max_reached`` and ``retry/reason_count/`` stats to :class:`RetryMiddleware ` (:issue:`2543`) - Added ``httperror/response_ignored_count`` and ``httperror/response_ignored_status_count/`` stats to :class:`HttpErrorMiddleware ` (:issue:`2566`) - Customizable :setting:`Referrer policy ` in :class:`RefererMiddleware ` (:issue:`2306`) - New ``data:`` URI download handler (:issue:`2334`, fixes :issue:`2156`) - Log cache directory when HTTP Cache is used (:issue:`2611`, fixes :issue:`2604`) - Warn users when project contains duplicate spider names (fixes :issue:`2181`) - :class:`CaselessDict` now accepts ``Mapping`` instances and not only dicts (:issue:`2646`) - :ref:`Media downloads `, with :class:`FilesPipelines` or :class:`ImagesPipelines`, can now optionally handle HTTP redirects using the new :setting:`MEDIA_ALLOW_REDIRECTS` setting (:issue:`2616`, fixes :issue:`2004`) - Accept non-complete responses from websites using a new :setting:`DOWNLOAD_FAIL_ON_DATALOSS` setting (:issue:`2590`, fixes :issue:`2586`) - Optional pretty-printing of JSON and XML items via :setting:`FEED_EXPORT_INDENT` setting (:issue:`2456`, fixes :issue:`1327`) - Allow dropping fields in ``FormRequest.from_response`` formdata when ``None`` value is passed (:issue:`667`) - Per-request retry times with the new :reqmeta:`max_retry_times` meta key (:issue:`2642`) - ``python -m scrapy`` as a more explicit alternative to ``scrapy`` command (:issue:`2740`) .. _brotli: https://github.com/google/brotli .. _brotlipy: https://github.com/python-hyper/brotlipy/ Bug fixes ~~~~~~~~~ - LinkExtractor now strips leading and trailing whitespaces from attributes (:issue:`2547`, fixes :issue:`1614`) - Properly handle whitespaces in action attribute in :class:`FormRequest` (:issue:`2548`) - Buffer CONNECT response bytes from proxy until all HTTP headers are received (:issue:`2495`, fixes :issue:`2491`) - FTP downloader now works on Python 3, provided you use Twisted>=17.1 (:issue:`2599`) - Use body to choose response type after decompressing content (:issue:`2393`, fixes :issue:`2145`) - Always decompress ``Content-Encoding: gzip`` at :class:`HttpCompressionMiddleware ` stage (:issue:`2391`) - Respect custom log level in ``Spider.custom_settings`` (:issue:`2581`, fixes :issue:`1612`) - 'make htmlview' fix for macOS (:issue:`2661`) - Remove "commands" from the command list (:issue:`2695`) - Fix duplicate Content-Length header for POST requests with empty body (:issue:`2677`) - Properly cancel large downloads, i.e. above :setting:`DOWNLOAD_MAXSIZE` (:issue:`1616`) - ImagesPipeline: fixed processing of transparent PNG images with palette (:issue:`2675`) Cleanups & Refactoring ~~~~~~~~~~~~~~~~~~~~~~ - Tests: remove temp files and folders (:issue:`2570`), fixed ProjectUtilsTest on OS X (:issue:`2569`), use portable pypy for Linux on Travis CI (:issue:`2710`) - Separate building request from ``_requests_to_follow`` in CrawlSpider (:issue:`2562`) - Remove “Python 3 progress” badge (:issue:`2567`) - Add a couple more lines to ``.gitignore`` (:issue:`2557`) - Remove bumpversion prerelease configuration (:issue:`2159`) - Add codecov.yml file (:issue:`2750`) - Set context factory implementation based on Twisted version (:issue:`2577`, fixes :issue:`2560`) - Add omitted ``self`` arguments in default project middleware template (:issue:`2595`) - Remove redundant ``slot.add_request()`` call in ExecutionEngine (:issue:`2617`) - Catch more specific ``os.error`` exception in :class:`FSFilesStore` (:issue:`2644`) - Change "localhost" test server certificate (:issue:`2720`) - Remove unused ``MEMUSAGE_REPORT`` setting (:issue:`2576`) Documentation ~~~~~~~~~~~~~ - Binary mode is required for exporters (:issue:`2564`, fixes :issue:`2553`) - Mention issue with :meth:`FormRequest.from_response ` due to bug in lxml (:issue:`2572`) - Use single quotes uniformly in templates (:issue:`2596`) - Document :reqmeta:`ftp_user` and :reqmeta:`ftp_password` meta keys (:issue:`2587`) - Removed section on deprecated ``contrib/`` (:issue:`2636`) - Recommend Anaconda when installing Scrapy on Windows (:issue:`2477`, fixes :issue:`2475`) - FAQ: rewrite note on Python 3 support on Windows (:issue:`2690`) - Rearrange selector sections (:issue:`2705`) - Remove ``__nonzero__`` from :class:`SelectorList` docs (:issue:`2683`) - Mention how to disable request filtering in documentation of :setting:`DUPEFILTER_CLASS` setting (:issue:`2714`) - Add sphinx_rtd_theme to docs setup readme (:issue:`2668`) - Open file in text mode in JSON item writer example (:issue:`2729`) - Clarify ``allowed_domains`` example (:issue:`2670`) .. _release-1.3.3: Scrapy 1.3.3 (2017-03-10) ------------------------- Bug fixes ~~~~~~~~~ - Make ``SpiderLoader`` raise ``ImportError`` again by default for missing dependencies and wrong :setting:`SPIDER_MODULES`. These exceptions were silenced as warnings since 1.3.0. A new setting is introduced to toggle between warning or exception if needed ; see :setting:`SPIDER_LOADER_WARN_ONLY` for details. .. _release-1.3.2: Scrapy 1.3.2 (2017-02-13) ------------------------- Bug fixes ~~~~~~~~~ - Preserve request class when converting to/from dicts (utils.reqser) (:issue:`2510`). - Use consistent selectors for author field in tutorial (:issue:`2551`). - Fix TLS compatibility in Twisted 17+ (:issue:`2558`) .. _release-1.3.1: Scrapy 1.3.1 (2017-02-08) ------------------------- New features ~~~~~~~~~~~~ - Support ``'True'`` and ``'False'`` string values for boolean settings (:issue:`2519`); you can now do something like ``scrapy crawl myspider -s REDIRECT_ENABLED=False``. - Support kwargs with ``response.xpath()`` to use :ref:`XPath variables ` and ad-hoc namespaces declarations ; this requires at least Parsel v1.1 (:issue:`2457`). - Add support for Python 3.6 (:issue:`2485`). - Run tests on PyPy (warning: some tests still fail, so PyPy is not supported yet). Bug fixes ~~~~~~~~~ - Enforce ``DNS_TIMEOUT`` setting (:issue:`2496`). - Fix :command:`view` command ; it was a regression in v1.3.0 (:issue:`2503`). - Fix tests regarding ``*_EXPIRES settings`` with Files/Images pipelines (:issue:`2460`). - Fix name of generated pipeline class when using basic project template (:issue:`2466`). - Fix compatiblity with Twisted 17+ (:issue:`2496`, :issue:`2528`). - Fix ``scrapy.Item`` inheritance on Python 3.6 (:issue:`2511`). - Enforce numeric values for components order in ``SPIDER_MIDDLEWARES``, ``DOWNLOADER_MIDDLEWARES``, ``EXTENIONS`` and ``SPIDER_CONTRACTS`` (:issue:`2420`). Documentation ~~~~~~~~~~~~~ - Reword Code of Coduct section and upgrade to Contributor Covenant v1.4 (:issue:`2469`). - Clarify that passing spider arguments converts them to spider attributes (:issue:`2483`). - Document ``formid`` argument on ``FormRequest.from_response()`` (:issue:`2497`). - Add .rst extension to README files (:issue:`2507`). - Mention LevelDB cache storage backend (:issue:`2525`). - Use ``yield`` in sample callback code (:issue:`2533`). - Add note about HTML entities decoding with ``.re()/.re_first()`` (:issue:`1704`). - Typos (:issue:`2512`, :issue:`2534`, :issue:`2531`). Cleanups ~~~~~~~~ - Remove reduntant check in ``MetaRefreshMiddleware`` (:issue:`2542`). - Faster checks in ``LinkExtractor`` for allow/deny patterns (:issue:`2538`). - Remove dead code supporting old Twisted versions (:issue:`2544`). .. _release-1.3.0: Scrapy 1.3.0 (2016-12-21) ------------------------- This release comes rather soon after 1.2.2 for one main reason: it was found out that releases since 0.18 up to 1.2.2 (included) use some backported code from Twisted (``scrapy.xlib.tx.*``), even if newer Twisted modules are available. Scrapy now uses ``twisted.web.client`` and ``twisted.internet.endpoints`` directly. (See also cleanups below.) As it is a major change, we wanted to get the bug fix out quickly while not breaking any projects using the 1.2 series. New Features ~~~~~~~~~~~~ - ``MailSender`` now accepts single strings as values for ``to`` and ``cc`` arguments (:issue:`2272`) - ``scrapy fetch url``, ``scrapy shell url`` and ``fetch(url)`` inside scrapy shell now follow HTTP redirections by default (:issue:`2290`); See :command:`fetch` and :command:`shell` for details. - ``HttpErrorMiddleware`` now logs errors with ``INFO`` level instead of ``DEBUG``; this is technically **backward incompatible** so please check your log parsers. - By default, logger names now use a long-form path, e.g. ``[scrapy.extensions.logstats]``, instead of the shorter "top-level" variant of prior releases (e.g. ``[scrapy]``); this is **backward incompatible** if you have log parsers expecting the short logger name part. You can switch back to short logger names using :setting:`LOG_SHORT_NAMES` set to ``True``. Dependencies & Cleanups ~~~~~~~~~~~~~~~~~~~~~~~ - Scrapy now requires Twisted >= 13.1 which is the case for many Linux distributions already. - As a consequence, we got rid of ``scrapy.xlib.tx.*`` modules, which copied some of Twisted code for users stuck with an "old" Twisted version - ``ChunkedTransferMiddleware`` is deprecated and removed from the default downloader middlewares. .. _release-1.2.3: Scrapy 1.2.3 (2017-03-03) ------------------------- - Packaging fix: disallow unsupported Twisted versions in setup.py .. _release-1.2.2: Scrapy 1.2.2 (2016-12-06) ------------------------- Bug fixes ~~~~~~~~~ - Fix a cryptic traceback when a pipeline fails on ``open_spider()`` (:issue:`2011`) - Fix embedded IPython shell variables (fixing :issue:`396` that re-appeared in 1.2.0, fixed in :issue:`2418`) - A couple of patches when dealing with robots.txt: - handle (non-standard) relative sitemap URLs (:issue:`2390`) - handle non-ASCII URLs and User-Agents in Python 2 (:issue:`2373`) Documentation ~~~~~~~~~~~~~ - Document ``"download_latency"`` key in ``Request``'s ``meta`` dict (:issue:`2033`) - Remove page on (deprecated & unsupported) Ubuntu packages from ToC (:issue:`2335`) - A few fixed typos (:issue:`2346`, :issue:`2369`, :issue:`2369`, :issue:`2380`) and clarifications (:issue:`2354`, :issue:`2325`, :issue:`2414`) Other changes ~~~~~~~~~~~~~ - Advertize `conda-forge`_ as Scrapy's official conda channel (:issue:`2387`) - More helpful error messages when trying to use ``.css()`` or ``.xpath()`` on non-Text Responses (:issue:`2264`) - ``startproject`` command now generates a sample ``middlewares.py`` file (:issue:`2335`) - Add more dependencies' version info in ``scrapy version`` verbose output (:issue:`2404`) - Remove all ``*.pyc`` files from source distribution (:issue:`2386`) .. _conda-forge: https://anaconda.org/conda-forge/scrapy .. _release-1.2.1: Scrapy 1.2.1 (2016-10-21) ------------------------- Bug fixes ~~~~~~~~~ - Include OpenSSL's more permissive default ciphers when establishing TLS/SSL connections (:issue:`2314`). - Fix "Location" HTTP header decoding on non-ASCII URL redirects (:issue:`2321`). Documentation ~~~~~~~~~~~~~ - Fix JsonWriterPipeline example (:issue:`2302`). - Various notes: :issue:`2330` on spider names, :issue:`2329` on middleware methods processing order, :issue:`2327` on getting multi-valued HTTP headers as lists. Other changes ~~~~~~~~~~~~~ - Removed ``www.`` from ``start_urls`` in built-in spider templates (:issue:`2299`). .. _release-1.2.0: Scrapy 1.2.0 (2016-10-03) ------------------------- New Features ~~~~~~~~~~~~ - New :setting:`FEED_EXPORT_ENCODING` setting to customize the encoding used when writing items to a file. This can be used to turn off ``\uXXXX`` escapes in JSON output. This is also useful for those wanting something else than UTF-8 for XML or CSV output (:issue:`2034`). - ``startproject`` command now supports an optional destination directory to override the default one based on the project name (:issue:`2005`). - New :setting:`SCHEDULER_DEBUG` setting to log requests serialization failures (:issue:`1610`). - JSON encoder now supports serialization of ``set`` instances (:issue:`2058`). - Interpret ``application/json-amazonui-streaming`` as ``TextResponse`` (:issue:`1503`). - ``scrapy`` is imported by default when using shell tools (:command:`shell`, :ref:`inspect_response `) (:issue:`2248`). Bug fixes ~~~~~~~~~ - DefaultRequestHeaders middleware now runs before UserAgent middleware (:issue:`2088`). **Warning: this is technically backward incompatible**, though we consider this a bug fix. - HTTP cache extension and plugins that use the ``.scrapy`` data directory now work outside projects (:issue:`1581`). **Warning: this is technically backward incompatible**, though we consider this a bug fix. - ``Selector`` does not allow passing both ``response`` and ``text`` anymore (:issue:`2153`). - Fixed logging of wrong callback name with ``scrapy parse`` (:issue:`2169`). - Fix for an odd gzip decompression bug (:issue:`1606`). - Fix for selected callbacks when using ``CrawlSpider`` with :command:`scrapy parse ` (:issue:`2225`). - Fix for invalid JSON and XML files when spider yields no items (:issue:`872`). - Implement ``flush()`` fpr ``StreamLogger`` avoiding a warning in logs (:issue:`2125`). Refactoring ~~~~~~~~~~~ - ``canonicalize_url`` has been moved to `w3lib.url`_ (:issue:`2168`). .. _w3lib.url: https://w3lib.readthedocs.io/en/latest/w3lib.html#w3lib.url.canonicalize_url Tests & Requirements ~~~~~~~~~~~~~~~~~~~~ Scrapy's new requirements baseline is Debian 8 "Jessie". It was previously Ubuntu 12.04 Precise. What this means in practice is that we run continuous integration tests with these (main) packages versions at a minimum: Twisted 14.0, pyOpenSSL 0.14, lxml 3.4. Scrapy may very well work with older versions of these packages (the code base still has switches for older Twisted versions for example) but it is not guaranteed (because it's not tested anymore). Documentation ~~~~~~~~~~~~~ - Grammar fixes: :issue:`2128`, :issue:`1566`. - Download stats badge removed from README (:issue:`2160`). - New scrapy :ref:`architecture diagram ` (:issue:`2165`). - Updated ``Response`` parameters documentation (:issue:`2197`). - Reworded misleading :setting:`RANDOMIZE_DOWNLOAD_DELAY` description (:issue:`2190`). - Add StackOverflow as a support channel (:issue:`2257`). .. _release-1.1.4: Scrapy 1.1.4 (2017-03-03) ------------------------- - Packaging fix: disallow unsupported Twisted versions in setup.py .. _release-1.1.3: Scrapy 1.1.3 (2016-09-22) ------------------------- Bug fixes ~~~~~~~~~ - Class attributes for subclasses of ``ImagesPipeline`` and ``FilesPipeline`` work as they did before 1.1.1 (:issue:`2243`, fixes :issue:`2198`) Documentation ~~~~~~~~~~~~~ - :ref:`Overview ` and :ref:`tutorial ` rewritten to use http://toscrape.com websites (:issue:`2236`, :issue:`2249`, :issue:`2252`). .. _release-1.1.2: Scrapy 1.1.2 (2016-08-18) ------------------------- Bug fixes ~~~~~~~~~ - Introduce a missing :setting:`IMAGES_STORE_S3_ACL` setting to override the default ACL policy in ``ImagesPipeline`` when uploading images to S3 (note that default ACL policy is "private" -- instead of "public-read" -- since Scrapy 1.1.0) - :setting:`IMAGES_EXPIRES` default value set back to 90 (the regression was introduced in 1.1.1) .. _release-1.1.1: Scrapy 1.1.1 (2016-07-13) ------------------------- Bug fixes ~~~~~~~~~ - Add "Host" header in CONNECT requests to HTTPS proxies (:issue:`2069`) - Use response ``body`` when choosing response class (:issue:`2001`, fixes :issue:`2000`) - Do not fail on canonicalizing URLs with wrong netlocs (:issue:`2038`, fixes :issue:`2010`) - a few fixes for ``HttpCompressionMiddleware`` (and ``SitemapSpider``): - Do not decode HEAD responses (:issue:`2008`, fixes :issue:`1899`) - Handle charset parameter in gzip Content-Type header (:issue:`2050`, fixes :issue:`2049`) - Do not decompress gzip octet-stream responses (:issue:`2065`, fixes :issue:`2063`) - Catch (and ignore with a warning) exception when verifying certificate against IP-address hosts (:issue:`2094`, fixes :issue:`2092`) - Make ``FilesPipeline`` and ``ImagesPipeline`` backward compatible again regarding the use of legacy class attributes for customization (:issue:`1989`, fixes :issue:`1985`) New features ~~~~~~~~~~~~ - Enable genspider command outside project folder (:issue:`2052`) - Retry HTTPS CONNECT ``TunnelError`` by default (:issue:`1974`) Documentation ~~~~~~~~~~~~~ - ``FEED_TEMPDIR`` setting at lexicographical position (:commit:`9b3c72c`) - Use idiomatic ``.extract_first()`` in overview (:issue:`1994`) - Update years in copyright notice (:commit:`c2c8036`) - Add information and example on errbacks (:issue:`1995`) - Use "url" variable in downloader middleware example (:issue:`2015`) - Grammar fixes (:issue:`2054`, :issue:`2120`) - New FAQ entry on using BeautifulSoup in spider callbacks (:issue:`2048`) - Add notes about scrapy not working on Windows with Python 3 (:issue:`2060`) - Encourage complete titles in pull requests (:issue:`2026`) Tests ~~~~~ - Upgrade py.test requirement on Travis CI and Pin pytest-cov to 2.2.1 (:issue:`2095`) .. _release-1.1.0: Scrapy 1.1.0 (2016-05-11) ------------------------- This 1.1 release brings a lot of interesting features and bug fixes: - Scrapy 1.1 has beta Python 3 support (requires Twisted >= 15.5). See :ref:`news_betapy3` for more details and some limitations. - Hot new features: - Item loaders now support nested loaders (:issue:`1467`). - ``FormRequest.from_response`` improvements (:issue:`1382`, :issue:`1137`). - Added setting :setting:`AUTOTHROTTLE_TARGET_CONCURRENCY` and improved AutoThrottle docs (:issue:`1324`). - Added ``response.text`` to get body as unicode (:issue:`1730`). - Anonymous S3 connections (:issue:`1358`). - Deferreds in downloader middlewares (:issue:`1473`). This enables better robots.txt handling (:issue:`1471`). - HTTP caching now follows RFC2616 more closely, added settings :setting:`HTTPCACHE_ALWAYS_STORE` and :setting:`HTTPCACHE_IGNORE_RESPONSE_CACHE_CONTROLS` (:issue:`1151`). - Selectors were extracted to the parsel_ library (:issue:`1409`). This means you can use Scrapy Selectors without Scrapy and also upgrade the selectors engine without needing to upgrade Scrapy. - HTTPS downloader now does TLS protocol negotiation by default, instead of forcing TLS 1.0. You can also set the SSL/TLS method using the new :setting:`DOWNLOADER_CLIENT_TLS_METHOD`. - These bug fixes may require your attention: - Don't retry bad requests (HTTP 400) by default (:issue:`1289`). If you need the old behavior, add ``400`` to :setting:`RETRY_HTTP_CODES`. - Fix shell files argument handling (:issue:`1710`, :issue:`1550`). If you try ``scrapy shell index.html`` it will try to load the URL http://index.html, use ``scrapy shell ./index.html`` to load a local file. - Robots.txt compliance is now enabled by default for newly-created projects (:issue:`1724`). Scrapy will also wait for robots.txt to be downloaded before proceeding with the crawl (:issue:`1735`). If you want to disable this behavior, update :setting:`ROBOTSTXT_OBEY` in ``settings.py`` file after creating a new project. - Exporters now work on unicode, instead of bytes by default (:issue:`1080`). If you use ``PythonItemExporter``, you may want to update your code to disable binary mode which is now deprecated. - Accept XML node names containing dots as valid (:issue:`1533`). - When uploading files or images to S3 (with ``FilesPipeline`` or ``ImagesPipeline``), the default ACL policy is now "private" instead of "public" **Warning: backward incompatible!**. You can use :setting:`FILES_STORE_S3_ACL` to change it. - We've reimplemented ``canonicalize_url()`` for more correct output, especially for URLs with non-ASCII characters (:issue:`1947`). This could change link extractors output compared to previous scrapy versions. This may also invalidate some cache entries you could still have from pre-1.1 runs. **Warning: backward incompatible!**. Keep reading for more details on other improvements and bug fixes. .. _news_betapy3: Beta Python 3 Support ~~~~~~~~~~~~~~~~~~~~~ We have been `hard at work to make Scrapy run on Python 3 `_. As a result, now you can run spiders on Python 3.3, 3.4 and 3.5 (Twisted >= 15.5 required). Some features are still missing (and some may never be ported). Almost all builtin extensions/middlewares are expected to work. However, we are aware of some limitations in Python 3: - Scrapy does not work on Windows with Python 3 - Sending emails is not supported - FTP download handler is not supported - Telnet console is not supported Additional New Features and Enhancements ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - Scrapy now has a `Code of Conduct`_ (:issue:`1681`). - Command line tool now has completion for zsh (:issue:`934`). - Improvements to ``scrapy shell``: - Support for bpython and configure preferred Python shell via ``SCRAPY_PYTHON_SHELL`` (:issue:`1100`, :issue:`1444`). - Support URLs without scheme (:issue:`1498`) **Warning: backward incompatible!** - Bring back support for relative file path (:issue:`1710`, :issue:`1550`). - Added :setting:`MEMUSAGE_CHECK_INTERVAL_SECONDS` setting to change default check interval (:issue:`1282`). - Download handlers are now lazy-loaded on first request using their scheme (:issue:`1390`, :issue:`1421`). - HTTPS download handlers do not force TLS 1.0 anymore; instead, OpenSSL's ``SSLv23_method()/TLS_method()`` is used allowing to try negotiating with the remote hosts the highest TLS protocol version it can (:issue:`1794`, :issue:`1629`). - ``RedirectMiddleware`` now skips the status codes from ``handle_httpstatus_list`` on spider attribute or in ``Request``'s ``meta`` key (:issue:`1334`, :issue:`1364`, :issue:`1447`). - Form submission: - now works with ``