BeautifulSoup-3.2.1/0000755000175000017500000000000011717203106015740 5ustar leonardrleonardr00000000000000BeautifulSoup-3.2.1/setup.py0000644000175000017500000000524111514065303017454 0ustar leonardrleonardr00000000000000from distutils.core import setup import unittest import warnings warnings.filterwarnings("ignore", "Unknown distribution option") import sys # patch distutils if it can't cope with the "classifiers" keyword if sys.version < '2.2.3': from distutils.dist import DistributionMetadata DistributionMetadata.classifiers = None DistributionMetadata.download_url = None from BeautifulSoup import __version__ #Make sure all the tests complete. import BeautifulSoupTests loader = unittest.TestLoader() result = unittest.TestResult() suite = loader.loadTestsFromModule(BeautifulSoupTests) suite.run(result) if not result.wasSuccessful(): print "Unit tests have failed!" for l in result.errors, result.failures: for case, error in l: print "-" * 80 desc = case.shortDescription() if desc: print desc print error print '''If you see an error like: "'ascii' codec can't encode character...", see\nthe Beautiful Soup documentation:\n http://www.crummy.com/software/BeautifulSoup/documentation.html#Why%20can't%20Beautiful%20Soup%20print%20out%20the%20non-ASCII%20characters%20I%20gave%20it?''' print "This might or might not be a problem depending on what you plan to do with\nBeautiful Soup." if sys.argv[1] == 'sdist': print print "I'm not going to make a source distribution since the tests don't pass." sys.exit(1) setup(name="BeautifulSoup", version=__version__, py_modules=['BeautifulSoup', 'BeautifulSoupTests'], description="HTML/XML parser for quick-turnaround applications like screen-scraping.", author="Leonard Richardson", author_email = "leonardr@segfault.org", long_description="""Beautiful Soup parses arbitrarily invalid SGML and provides a variety of methods and Pythonic idioms for iterating and searching the parse tree.""", classifiers=["Development Status :: 5 - Production/Stable", "Intended Audience :: Developers", "License :: OSI Approved :: Python Software Foundation License", "Programming Language :: Python", "Topic :: Text Processing :: Markup :: HTML", "Topic :: Text Processing :: Markup :: XML", "Topic :: Text Processing :: Markup :: SGML", "Topic :: Software Development :: Libraries :: Python Modules", ], url="http://www.crummy.com/software/BeautifulSoup/", license="BSD", download_url="http://www.crummy.com/software/BeautifulSoup/download/" ) # Send announce to: # python-announce@python.org # python-list@python.org BeautifulSoup-3.2.1/PKG-INFO0000644000175000017500000000171211717203106017036 0ustar leonardrleonardr00000000000000Metadata-Version: 1.0 Name: BeautifulSoup Version: 3.2.1 Summary: HTML/XML parser for quick-turnaround applications like screen-scraping. Home-page: http://www.crummy.com/software/BeautifulSoup/ Author: Leonard Richardson Author-email: leonardr@segfault.org License: BSD Download-URL: http://www.crummy.com/software/BeautifulSoup/download/ Description: Beautiful Soup parses arbitrarily invalid SGML and provides a variety of methods and Pythonic idioms for iterating and searching the parse tree. Platform: UNKNOWN Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: Python Software Foundation License Classifier: Programming Language :: Python Classifier: Topic :: Text Processing :: Markup :: HTML Classifier: Topic :: Text Processing :: Markup :: XML Classifier: Topic :: Text Processing :: Markup :: SGML Classifier: Topic :: Software Development :: Libraries :: Python Modules BeautifulSoup-3.2.1/BeautifulSoupTests.py0000644000175000017500000011132511717177620022142 0ustar leonardrleonardr00000000000000# -*- coding: utf-8 -*- """Unit tests for Beautiful Soup. These tests make sure the Beautiful Soup works as it should. If you find a bug in Beautiful Soup, the best way to express it is as a test case like this that fails.""" import unittest from BeautifulSoup import * class SoupTest(unittest.TestCase): def assertSoupEquals(self, toParse, rep=None, c=BeautifulSoup): """Parse the given text and make sure its string rep is the other given text.""" if rep == None: rep = toParse self.assertEqual(str(c(toParse)), rep) class FollowThatTag(SoupTest): "Tests the various ways of fetching tags from a soup." def setUp(self): ml = """ 1 2 3 4 4""" self.soup = BeautifulStoneSoup(ml) def testFindAllByName(self): matching = self.soup('a') self.assertEqual(len(matching), 2) self.assertEqual(matching[0].name, 'a') self.assertEqual(matching, self.soup.findAll('a')) self.assertEqual(matching, self.soup.findAll(SoupStrainer('a'))) def testFindAllByAttribute(self): matching = self.soup.findAll(id='x') self.assertEqual(len(matching), 2) self.assertEqual(matching[0].name, 'a') self.assertEqual(matching[1].name, 'b') matching2 = self.soup.findAll(attrs={'id' : 'x'}) self.assertEqual(matching, matching2) strainer = SoupStrainer(attrs={'id' : 'x'}) self.assertEqual(matching, self.soup.findAll(strainer)) self.assertEqual(len(self.soup.findAll(id=None)), 1) self.assertEqual(len(self.soup.findAll(width=100)), 1) self.assertEqual(len(self.soup.findAll(junk=None)), 5) self.assertEqual(len(self.soup.findAll(junk=[1, None])), 5) self.assertEqual(len(self.soup.findAll(junk=re.compile('.*'))), 0) self.assertEqual(len(self.soup.findAll(junk=True)), 0) self.assertEqual(len(self.soup.findAll(junk=True)), 0) self.assertEqual(len(self.soup.findAll(href=True)), 1) def testFindallByClass(self): soup = BeautifulSoup('FooBar') self.assertEqual(soup.find(attrs='foo').string, "Foo") self.assertEqual(soup.find('a', '1').string, "Bar") self.assertEqual(soup.find('a', '23').string, "Bar") self.assertEqual(soup.find('a', '4').string, "Bar") self.assertEqual(soup.find('a', '2'), None) def testFindAllByList(self): matching = self.soup(['a', 'ac']) self.assertEqual(len(matching), 3) def testFindAllByHash(self): matching = self.soup({'a' : True, 'b' : True}) self.assertEqual(len(matching), 4) def testFindAllText(self): soup = BeautifulSoup("\xbb") self.assertEqual(soup.findAll(text=re.compile('.*')), [u'\xbb']) def testFindAllByRE(self): import re r = re.compile('a.*') self.assertEqual(len(self.soup(r)), 3) def testFindAllByMethod(self): def matchTagWhereIDMatchesName(tag): return tag.name == tag.get('id') matching = self.soup.findAll(matchTagWhereIDMatchesName) self.assertEqual(len(matching), 2) self.assertEqual(matching[0].name, 'a') def testFindByIndex(self): """For when you have the tag and you want to know where it is.""" tag = self.soup.find('a', id="a") self.assertEqual(self.soup.index(tag), 3) # It works for NavigableStrings as well. s = tag.string self.assertEqual(tag.index(s), 0) # If the tag isn't present, a ValueError is raised. soup2 = BeautifulSoup("") tag2 = soup2.find('b') self.assertRaises(ValueError, self.soup.index, tag2) def testConflictingFindArguments(self): """The 'text' argument takes precedence.""" soup = BeautifulSoup('FooBarBaz') self.assertEqual(soup.find('b', text='Baz'), 'Baz') self.assertEqual(soup.findAll('b', text='Baz'), ['Baz']) self.assertEqual(soup.find(True, text='Baz'), 'Baz') self.assertEqual(soup.findAll(True, text='Baz'), ['Baz']) def testParents(self): soup = BeautifulSoup('