pyquery-1.2.9/0000775000000000000000000000000012625300673011745 5ustar rootrootpyquery-1.2.9/CHANGES.rst0000644000000000000000000000477712375634422013570 0ustar rootroot1.2.9 (2014-08-22) ------------------ - Support for keyword arguments in PyQuery custom functions - Fixed #78: items must take care or the parent - Fixed #65 PyQuery.make_links_absolute() no longer creates 'href' attribute when it isn't there - Fixed #19. ``is_()`` was broken. - Fixed #9. ``.replaceWith(PyQuery element)`` raises error - Remove official python3.2 support (mostly because of 3rd party semi-deps) 1.2.8 (2013-12-21) ------------------ - Fixed #22: Open by filename fails when file contains invalid xml - Bug fix in .remove_class() 1.2.7 (2013-12-21) ------------------ - Use pep8 name for methods but keep an alias for camel case method. Eg: remove_attr and removeAttr works Fix #57 - .text() now return an empty string instead of None if there is no text node. Fix #45 - Fixed #23: removeClass adds class attribute to elements which previously lacked one 1.2.6 (2013-10-11) ------------------ README_fixt.py was not include in the release. Fix #54. 1.2.5 (2013-10-10) ------------------ cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 tests improvments. no longer require a eth connection. fix #55 1.2.4 ----- Moved to github. So a few files are renamed from .txt to .rst Added .xhtml_to_html() and .remove_namespaces() Use requests to fetch urls (if available) Use restkit's proxy instead of Paste (which will die with py3) Allow to open https urls python2.5 is no longer supported (may work, but tests are broken) 1.2.3 ----- Allow to pass this in .filter() callback Add .contents() .items() Add tox.ini Bug fixes: fix #35 #55 #64 #66 1.2.2 ----- Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) Fix issue #37 (Caleb Burns) 1.2.1 ----- Allow to use a custom css translator. Fix issue 44: case problem with xml documents 1.2 --- PyQuery now use `cssselect `_. See issue 43. Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` 1.1.1 ----- Minor release. Include test file so you can run tests from the tarball. 1.1 --- fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support 1.0 --- fix issues 24 0.7 --- Python 3 compatible Add __unicode__ method Add root and encoding attribute fix issues 19, 20, 22, 23 0.6.1 ------ Move README.txt at package root Add CHANGES.txt and add it to long_description 0.6 ---- Added PyQuery.outerHtml Added PyQuery.fn Added PyQuery.map Change PyQuery.each behavior to reflect jQuery api pyquery-1.2.9/README_fixt.py0000644000000000000000000000117312375634422014312 0ustar rootroot# -*- coding: utf-8 -*- import os from webtest import http from webtest.debugapp import debug_app try: from urllib import urlopen except ImportError: from urllib.request import urlopen def setup_test(test): server = http.StopableWSGIServer.create(debug_app) server.wait() path_to_html_file = os.path.join('tests', 'test.html') test.globs.update( urlopen=urlopen, server=server, your_url=server.application_url, path_to_html_file=path_to_html_file, ) setup_test.__test__ = False def teardown_test(test): test.globs['server'].shutdown() teardown_test.__test__ = False pyquery-1.2.9/setup.cfg0000644000000000000000000000043512375634432013573 0ustar rootroot[nosetests] verbosity = 2 detailed-errors = True with-doctest = True doctest-extension = rst doctest-fixtures = _fixt include = docs cover-package = pyquery with-coverage = 1 doctest-options = +ELLIPSIS,+NORMALIZE_WHITESPACE [egg_info] tag_build = tag_date = 0 tag_svn_revision = 0 pyquery-1.2.9/MANIFEST.in0000644000000000000000000000021312375634422013501 0ustar rootrootgraft docs prune docs/_build graft pyquery graft tests include *_fixt.py *.rst *.cfg *.ini global-exclude *.pyc global-exclude __pycache__ pyquery-1.2.9/PKG-INFO0000644000000000000000000001556612375634432013062 0ustar rootrootMetadata-Version: 1.1 Name: pyquery Version: 1.2.9 Summary: A jquery-like library for python Home-page: https://github.com/gawel/pyquery Author: Gael Pasgrimaud Author-email: gael@gawel.org License: BSD Description: pyquery: a jquery-like library for python ========================================= pyquery allows you to make jquery queries on xml documents. The API is as much as possible the similar to jquery. pyquery uses lxml for fast xml and html manipulation. This is not (or at least not yet) a library to produce or interact with javascript code. I just liked the jquery API and I missed it in python so I told myself "Hey let's make jquery in python". This is the result. The `project`_ is being actively developped on a git repository on Github. I have the policy of giving push access to anyone who wants it and then to review what he does. So if you want to contribute just email me. Please report bugs on the `github `_ issue tracker. .. _deliverance: http://www.gawel.org/weblog/en/2008/12/skinning-with-pyquery-and-deliverance .. _project: https://github.com/gawel/pyquery/ Quickstart ========== You can use the PyQuery class to load an xml document from a string, a lxml document, from a file or from an url:: >>> from pyquery import PyQuery as pq >>> from lxml import etree >>> import urllib >>> d = pq("") >>> d = pq(etree.fromstring("")) >>> d = pq(url=your_url) >>> d = pq(url=your_url, ... opener=lambda url, **kw: urlopen(url).read()) >>> d = pq(filename=path_to_html_file) Now d is like the $ in jquery:: >>> d("#hello") [] >>> p = d("#hello") >>> print(p.html()) Hello world ! >>> p.html("you know Python rocks") [] >>> print(p.html()) you know Python rocks >>> print(p.text()) you know Python rocks You can use some of the pseudo classes that are available in jQuery but that are not standard in css such as :first :last :even :odd :eq :lt :gt :checked :selected :file:: >>> d('p:first') [] See http://pyquery.rtfd.org/ for the full documentation News ==== 1.2.9 (2014-08-22) ------------------ - Support for keyword arguments in PyQuery custom functions - Fixed #78: items must take care or the parent - Fixed #65 PyQuery.make_links_absolute() no longer creates 'href' attribute when it isn't there - Fixed #19. ``is_()`` was broken. - Fixed #9. ``.replaceWith(PyQuery element)`` raises error - Remove official python3.2 support (mostly because of 3rd party semi-deps) 1.2.8 (2013-12-21) ------------------ - Fixed #22: Open by filename fails when file contains invalid xml - Bug fix in .remove_class() 1.2.7 (2013-12-21) ------------------ - Use pep8 name for methods but keep an alias for camel case method. Eg: remove_attr and removeAttr works Fix #57 - .text() now return an empty string instead of None if there is no text node. Fix #45 - Fixed #23: removeClass adds class attribute to elements which previously lacked one 1.2.6 (2013-10-11) ------------------ README_fixt.py was not include in the release. Fix #54. 1.2.5 (2013-10-10) ------------------ cssselect compat. See https://github.com/SimonSapin/cssselect/pull/22 tests improvments. no longer require a eth connection. fix #55 1.2.4 ----- Moved to github. So a few files are renamed from .txt to .rst Added .xhtml_to_html() and .remove_namespaces() Use requests to fetch urls (if available) Use restkit's proxy instead of Paste (which will die with py3) Allow to open https urls python2.5 is no longer supported (may work, but tests are broken) 1.2.3 ----- Allow to pass this in .filter() callback Add .contents() .items() Add tox.ini Bug fixes: fix #35 #55 #64 #66 1.2.2 ----- Fix cssselectpatch to match the newer implementation of cssselect. Fixes issue #62, #52 and #59 (Haoyu Bai) Fix issue #37 (Caleb Burns) 1.2.1 ----- Allow to use a custom css translator. Fix issue 44: case problem with xml documents 1.2 --- PyQuery now use `cssselect `_. See issue 43. Fix issue 40: forward .html() extra arguments to ``lxml.etree.tostring`` 1.1.1 ----- Minor release. Include test file so you can run tests from the tarball. 1.1 --- fix issues 30, 31, 32 - py3 improvements / webob 1.2+ support 1.0 --- fix issues 24 0.7 --- Python 3 compatible Add __unicode__ method Add root and encoding attribute fix issues 19, 20, 22, 23 0.6.1 ------ Move README.txt at package root Add CHANGES.txt and add it to long_description 0.6 ---- Added PyQuery.outerHtml Added PyQuery.fn Added PyQuery.map Change PyQuery.each behavior to reflect jQuery api Keywords: jquery html xml scraping Platform: UNKNOWN Classifier: Intended Audience :: Developers Classifier: Development Status :: 5 - Production/Stable Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.6 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 pyquery-1.2.9/buildout.cfg0000644000000000000000000000067712375634422014271 0ustar rootroot[buildout] newest = false parts = py2 docs develop = . [py3] recipe = zc.recipe.egg eggs = cssselect>0.7.9 WebOb>1.1.9 WebTest pyquery nose coverage [py2] recipe = zc.recipe.egg eggs = ${py3:eggs} unittest2 BeautifulSoup restkit [docs] recipe = zc.recipe.egg eggs = ${py2:eggs} Pygments Sphinx sphinx-pypi-upload interpreter = py scripts = sphinx-build [tox] recipe = gp.recipe.tox pyquery-1.2.9/setup.py0000644000000000000000000000344712375634422013471 0ustar rootroot#-*- coding:utf-8 -*- # # Copyright (C) 2008 - Olivier Lauzanne # # Distributed under the BSD license, see LICENSE.txt from setuptools import setup, find_packages import os def read(*names): values = dict() for name in names: filename = name + '.rst' if os.path.isfile(filename): fd = open(filename) value = fd.read() fd.close() else: value = '' values[name] = value return values long_description = """ %(README)s See http://pyquery.rtfd.org/ for the full documentation News ==== %(CHANGES)s """ % read('README', 'CHANGES') version = '1.2.9' setup(name='pyquery', version=version, description='A jquery-like library for python', long_description=long_description, classifiers=[ "Intended Audience :: Developers", "Development Status :: 5 - Production/Stable", "Programming Language :: Python :: 2", "Programming Language :: Python :: 2.6", "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", ], keywords='jquery html xml scraping', author='Olivier Lauzanne', author_email='olauzanne@gmail.com', maintainer='Gael Pasgrimaud', maintainer_email='gael@gawel.org', url='https://github.com/gawel/pyquery', license='BSD', packages=find_packages(exclude=[ 'bootstrap', 'bootstrap-py3k', 'docs', 'tests', 'README_fixt' ]), include_package_data=True, zip_safe=False, install_requires=[ 'lxml>=2.1', 'cssselect', ], entry_points=""" # -*- Entry points: -*- """, ) pyquery-1.2.9/tox.ini0000644000000000000000000000105312375634422013261 0ustar rootroot[tox] envlist=py26,py27,py33,py34 [testenv] commands = {envbindir}/nosetests [] deps = cssselect>0.7.9 requests WebOb>1.1.9 WebTest nose coverage unittest2 BeautifulSoup restkit [testenv:py33] changedir={toxinidir} commands = {envbindir}/nosetests [] deps = cssselect>0.7.9 requests WebOb>1.1.9 WebTest nose coverage [testenv:py34] changedir={toxinidir} commands = {envbindir}/nosetests [] deps = cssselect>0.7.9 requests WebOb>1.1.9 WebTest nose coverage pyquery-1.2.9/README.rst0000644000000000000000000000370412375634422013442 0ustar rootrootpyquery: a jquery-like library for python ========================================= pyquery allows you to make jquery queries on xml documents. The API is as much as possible the similar to jquery. pyquery uses lxml for fast xml and html manipulation. This is not (or at least not yet) a library to produce or interact with javascript code. I just liked the jquery API and I missed it in python so I told myself "Hey let's make jquery in python". This is the result. The `project`_ is being actively developped on a git repository on Github. I have the policy of giving push access to anyone who wants it and then to review what he does. So if you want to contribute just email me. Please report bugs on the `github `_ issue tracker. .. _deliverance: http://www.gawel.org/weblog/en/2008/12/skinning-with-pyquery-and-deliverance .. _project: https://github.com/gawel/pyquery/ Quickstart ========== You can use the PyQuery class to load an xml document from a string, a lxml document, from a file or from an url:: >>> from pyquery import PyQuery as pq >>> from lxml import etree >>> import urllib >>> d = pq("") >>> d = pq(etree.fromstring("")) >>> d = pq(url=your_url) >>> d = pq(url=your_url, ... opener=lambda url, **kw: urlopen(url).read()) >>> d = pq(filename=path_to_html_file) Now d is like the $ in jquery:: >>> d("#hello") [] >>> p = d("#hello") >>> print(p.html()) Hello world ! >>> p.html("you know Python rocks") [] >>> print(p.html()) you know Python rocks >>> print(p.text()) you know Python rocks You can use some of the pseudo classes that are available in jQuery but that are not standard in css such as :first :last :even :odd :eq :lt :gt :checked :selected :file:: >>> d('p:first') [] pyquery-1.2.9/pyquery/0000775000000000000000000000000012625300673013463 5ustar rootrootpyquery-1.2.9/pyquery/cssselectpatch.py0000644000000000000000000003072312375634422017054 0ustar rootroot#-*- coding:utf-8 -*- # # Copyright (C) 2008 - Olivier Lauzanne # # Distributed under the BSD license, see LICENSE.txt from __future__ import unicode_literals from cssselect import xpath as cssselect_xpath from cssselect.xpath import ExpressionError XPathExprOrig = cssselect_xpath.XPathExpr class XPathExpr(XPathExprOrig): def __init__(self, path='', element='*', condition='', star_prefix=False): self.path = path self.element = element self.condition = condition self.post_condition = None def add_post_condition(self, post_condition): if self.post_condition: self.post_condition = '%s and (%s)' % (self.post_condition, post_condition) else: self.post_condition = post_condition def __str__(self): path = XPathExprOrig.__str__(self) if self.post_condition: path = '%s[%s]' % (path, self.post_condition) return path def join(self, combiner, other): res = XPathExprOrig.join(self, combiner, other) self.post_condition = other.post_condition return res # keep cssselect < 0.8 compat for now class JQueryTranslator(cssselect_xpath.HTMLTranslator): """This class is used to implement the css pseudo classes (:first, :last, ...) that are not defined in the css standard, but are defined in the jquery API. """ xpathexpr_cls = XPathExpr def xpath_first_pseudo(self, xpath): """Matches the first selected element:: >>> from pyquery import PyQuery >>> d = PyQuery('

') >>> d('p:first') [] .. """ xpath.add_post_condition('position() = 1') return xpath def xpath_last_pseudo(self, xpath): """Matches the last selected element:: >>> from pyquery import PyQuery >>> d = PyQuery('

') >>> d('p:last') [] .. """ xpath.add_post_condition('position() = last()') return xpath def xpath_even_pseudo(self, xpath): """Matches even elements, zero-indexed:: >>> from pyquery import PyQuery >>> d = PyQuery('

') >>> d('p:even') [

] .. """ # the first element is 1 in xpath and 0 in python and js xpath.add_post_condition('position() mod 2 = 1') return xpath def xpath_odd_pseudo(self, xpath): """Matches odd elements, zero-indexed:: >>> from pyquery import PyQuery >>> d = PyQuery('

') >>> d('p:odd') [] .. """ xpath.add_post_condition('position() mod 2 = 0') return xpath def xpath_checked_pseudo(self, xpath): """Matches odd elements, zero-indexed:: >>> from pyquery import PyQuery >>> d = PyQuery('
') >>> d('input:checked') [] .. """ xpath.add_condition("@checked and name(.) = 'input'") return xpath def xpath_selected_pseudo(self, xpath): """Matches all elements that are selected:: >>> from pyquery import PyQuery >>> d = PyQuery('') >>> d('option:selected') [