python-feedvalidator-0~svn1022/0000755000175000017500000000000011065534350014775 5ustar poxpoxpython-feedvalidator-0~svn1022/runtests.sh0000755000175000017500000000117610513170766017234 0ustar poxpox#!/bin/sh PYTHON=${1:-${PYTHON:-python}} # Run all project tests cd "`dirname "$0"`" ${PYTHON} validtest.py # Make sure XML encoding detection works ${PYTHON} tests/genXmlTestcases.py && python tests/testXmlEncoding.py # Confirm that XML is decoded correctly ${PYTHON} tests/testXmlEncodingDecode.py # Make sure media type checks are consistent ${PYTHON} tests/testMediaTypes.py # Test URI equivalence ${PYTHON} tests/testUri.py # Ensure check.cgi runs cleanly, at least for a GET PYTHONPATH="`pwd`/tests:." REQUEST_METHOD=GET FEEDVALIDATOR_HOME="`pwd`/.." python - <../check.cgi >/dev/null || echo >&2 "check.cgi failed to run" python-feedvalidator-0~svn1022/validtest.py0000755000175000017500000001233211015104552017342 0ustar poxpox#!/usr/bin/python """$Id: validtest.py 1014 2008-05-21 20:43:22Z joe.walton.gglcd $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 1014 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" import feedvalidator import unittest, new, os, sys, glob, re from feedvalidator.logging import Message,SelfDoesntMatchLocation,MissingSelf from feedvalidator import compatibility from feedvalidator.formatter.application_test import Formatter class TestCase(unittest.TestCase): def failIfNoMessage(self, theList): filterFunc = compatibility.AA events = filterFunc(theList) output = Formatter(events) for e in events: if not output.format(e): raise self.failureException, 'could not contruct message for %s' % e def failUnlessContainsInstanceOf(self, theClass, params, theList, msg=None): """Fail if there are no instances of theClass in theList with given params""" self.failIfNoMessage(theList) failure=(msg or 'no %s instances in %s' % (theClass.__name__, `theList`)) for item in theList: if issubclass(item.__class__, theClass): if not params: return for k, v in params.items(): if str(item.params[k]) <> v: failure=("%s.%s value was %s, expected %s" % (theClass.__name__, k, item.params[k], v)) break else: return raise self.failureException, failure def failIfContainsInstanceOf(self, theClass, params, theList, msg=None): """Fail if there are instances of theClass in theList with given params""" self.failIfNoMessage(theList) for item in theList: if theClass==Message and isinstance(item,SelfDoesntMatchLocation): continue if theClass==Message and isinstance(item,MissingSelf): continue if issubclass(item.__class__, theClass): if not params: raise self.failureException, \ (msg or 'unexpected %s' % (theClass.__name__)) allmatch = 1 for k, v in params.items(): if item.params[k] != v: allmatch = 0 if allmatch: raise self.failureException, \ "unexpected %s.%s with a value of %s" % \ (theClass.__name__, k, v) desc_re = re.compile("") validome_re = re.compile("", re.S) def getDescription(xmlfile): """Extract description and exception from XML file The deal here is that each test case is an XML file which contains not only a possibly invalid RSS feed but also the description of the test, i.e. the exception that we would expect the RSS validator to raise (or not) when it validates the feed. The expected exception and the human-readable description are placed into an XML comment like this: """ stream = open(xmlfile) xmldoc = stream.read() stream.close() search_results = desc_re.search(xmldoc) if search_results: description, cond, excName, plist = list(search_results.groups()) else: search_results = validome_re.search(xmldoc) if search_results: plist = '' description, cond, excName = list(search_results.groups()) excName = excName.capitalize() if excName=='Valid': cond,excName = '!', 'Message' else: raise RuntimeError, "can't parse %s" % xmlfile if cond == "": method = TestCase.failUnlessContainsInstanceOf else: method = TestCase.failIfContainsInstanceOf params = {} if plist: for entry in plist.split(','): name,value = entry.lstrip().split(':',1) params[name] = value exc = getattr(feedvalidator, excName) description = xmlfile + ": " + description return method, description, params, exc def buildTestCase(xmlfile, xmlBase, description, method, exc, params): """factory to create functions which validate `xmlfile` the returned function asserts that validating `xmlfile` (an XML file) will return a list of exceptions that include an instance of `exc` (an Exception class) """ func = lambda self, xmlfile=xmlfile, exc=exc, params=params: \ method(self, exc, params, feedvalidator.validateString(open(xmlfile).read(), fallback='US-ASCII', base=xmlBase)['loggedEvents']) func.__doc__ = description return func def buildTestSuite(): curdir = os.path.dirname(os.path.abspath(__file__)) basedir = os.path.split(curdir)[0] for xmlfile in sys.argv[1:] or (glob.glob(os.path.join(basedir, 'testcases', '**', '**', '*.xml')) + glob.glob(os.path.join(basedir, 'testcases', 'opml', '**', '*.opml'))): method, description, params, exc = getDescription(xmlfile) xmlBase = os.path.abspath(xmlfile).replace(basedir,"http://www.feedvalidator.org") testName = 'test_' + xmlBase.replace(os.path.sep, "/") testFunc = buildTestCase(xmlfile, xmlBase, description, method, exc, params) instanceMethod = new.instancemethod(testFunc, None, TestCase) setattr(TestCase, testName, instanceMethod) return unittest.TestLoader().loadTestsFromTestCase(TestCase) if __name__ == '__main__': suite = buildTestSuite() unittest.main(argv=sys.argv[:1]) python-feedvalidator-0~svn1022/tests/0000755000175000017500000000000011065534336016143 5ustar poxpoxpython-feedvalidator-0~svn1022/tests/testMediaTypes.py0000755000175000017500000000625610766017570021500 0ustar poxpox#!/usr/bin/python """$Id: testMediaTypes.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Joseph Walton " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2004 Joseph Walton" import os, sys curdir = os.path.abspath(os.path.dirname(sys.argv[0])) srcdir = os.path.split(curdir)[0] if srcdir not in sys.path: sys.path.insert(0, srcdir) basedir = os.path.split(srcdir)[0] import unittest from feedvalidator import mediaTypes from feedvalidator.logging import TYPE_RSS1, TYPE_RSS2, TYPE_ATOM def l(x): if x: return x.lower() else: return x class MediaTypesTest(unittest.TestCase): def testCheckValid(self): el = [] (t, c) = mediaTypes.checkValid(self.contentType, el) self.assertEqual(l(t), l(self.mediaType), 'Media type should be ' + self.mediaType) self.assertEqual(l(c), l(self.charset), 'Charset should be ' + str(self.charset) + ' for ' + self.mediaType + ' was ' + str(c)) if (self.error): self.assertEqual(len(el), 1, 'Expected errors to be logged') else: self.assertEqual(len(el), 0, 'Did not expect errors to be logged') def testCheckAgainstFeedType(self): FT=['Unknown', 'RSS 1.0', 'RSS 2.0', 'Atom', 'Atom 0.3'] el = [] r = mediaTypes.checkAgainstFeedType(self.mediaType, self.feedType, el) if (self.error): self.assertEqual(len(el), 1, 'Expected errors to be logged (' + self.mediaType + ',' + FT[self.feedType] + ')') else: self.assertEqual(len(el), 0, 'Did not expect errors to be logged (' + self.mediaType + ',' + FT[self.feedType] + ')') # Content-Type, Media type, Charset, Error? cvCases = [ ['text/xml', 'text/xml', None, False], ['text/xml; charset=UTF-8', 'text/xml', 'utf-8', False], ['application/xml', 'application/xml', None, False], ['text/plain', 'text/plain', None, True], ['application/octet-stream', 'application/octet-stream', None, True] ] # Media type, Feed type, Error? caftCases = [ ['text/xml', TYPE_RSS1, False], ['application/xml', TYPE_RSS1, False], ['application/rss+xml', TYPE_RSS1, False], ['application/rdf+xml', TYPE_RSS1, False], ['application/x.atom+xml', TYPE_RSS1, True], ['application/atom+xml', TYPE_RSS1, True], ['text/xml', TYPE_RSS2, False], ['application/xml', TYPE_RSS1, False], ['application/rss+xml', TYPE_RSS2, False], ['application/rdf+xml', TYPE_RSS2, True], ['application/x.atom+xml', TYPE_RSS2, True], ['application/atom+xml', TYPE_RSS2, True], ['text/xml', TYPE_ATOM, False], ['application/xml', TYPE_ATOM, False], ['application/rss+xml', TYPE_ATOM, True], ['application/rdf+xml', TYPE_ATOM, True], ['application/x.atom+xml', TYPE_ATOM, False], ['application/atom+xml', TYPE_ATOM, False], ] def buildTestSuite(): suite = unittest.TestSuite() for (ct, mt, cs, e) in cvCases: t = MediaTypesTest('testCheckValid') t.contentType = ct; t.mediaType = mt t.charset = cs t.error = e suite.addTest(t) for (mt, ft, e) in caftCases: t = MediaTypesTest('testCheckAgainstFeedType') t.mediaType = mt t.feedType = ft t.error = e suite.addTest(t) return suite if __name__ == "__main__": s = buildTestSuite() unittest.TextTestRunner().run(s) python-feedvalidator-0~svn1022/tests/testXmlEncodingDecode.py0000755000175000017500000001225110766017570022737 0ustar poxpox#!/usr/bin/python """$Id: testXmlEncodingDecode.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Joseph Walton " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2004 Joseph Walton" import os, sys curdir = os.path.abspath(os.path.dirname(sys.argv[0])) srcdir = os.path.split(curdir)[0] if srcdir not in sys.path: sys.path.insert(0, srcdir) basedir = os.path.split(srcdir)[0] import unittest from feedvalidator import xmlEncoding from feedvalidator.logging import * ctAX='application/xml' class TestDecode(unittest.TestCase): def _assertEqualUnicode(self, a, b): self.assertNotEqual(a, None, 'Decoded strings should not equal None') self.assertEqual(type(a), unicode, 'Decoded strings should be Unicode (was ' + str(type(a)) + ')') self.assertEqual(type(b), unicode, 'Test suite error: test strings must be Unicode') self.assertEqual(a, b) def testProvidedEncoding(self): loggedEvents=[] (encoding, decoded) = xmlEncoding.decode(ctAX, 'UTF-8', '', loggedEvents) self.assertEquals('UTF-8', encoding) self._assertEqualUnicode(decoded, u'') self.assertEqual(loggedEvents, []) loggedEvents=[] (encoding, decoded) = xmlEncoding.decode(ctAX, 'UTF-8', '', loggedEvents) self.assertEquals('UTF-8', encoding) self._assertEqualUnicode(decoded, u'') self.assertEquals(loggedEvents, []) def testNoDeclarationOrBOM(self): loggedEvents=[] self.assertEquals(xmlEncoding.decode(ctAX, None, '', loggedEvents)[-1], None) self.assertEquals(len(loggedEvents), 1) self.assertEquals(loggedEvents[0].__class__, MissingEncoding, "Must warn if there's no clue as to encoding") # This document is currently detected as UTF-8, rather than None. # # def testMissingEncodingDeclaration(self): # loggedEvents=[] # self._assertEqualUnicode(xmlEncoding.decode(ctAX, None, '', loggedEvents), u'') # self.assertEquals(len(loggedEvents), 1) # self.assertEquals(loggedEvents[0].__class__, MissingEncoding, "Must warn if there's no clue as to encoding") def testJustDeclaration(self): loggedEvents=[] (encoding, decoded) = xmlEncoding.decode(ctAX, None, '', loggedEvents) self.assertEquals(encoding, 'utf-8') self._assertEqualUnicode(decoded, u'') self.assertEquals(loggedEvents, []) def testSupplyUnknownEncoding(self): loggedEvents=[] self.assertEquals(xmlEncoding.decode(ctAX, 'X-FAKE', '', loggedEvents)[-1], None) self.assertEquals(len(loggedEvents), 1) self.assertEquals(loggedEvents[0].__class__, UnknownEncoding, 'Must fail if an unknown encoding is used') def testDeclareUnknownEncoding(self): loggedEvents=[] self.assertEquals(xmlEncoding.decode(ctAX, None, '', loggedEvents)[-1], None) self.assert_(loggedEvents) self.assertEquals(loggedEvents[-1].__class__, UnknownEncoding) def testWarnMismatch(self): loggedEvents=[] self.assertEquals(xmlEncoding.decode(ctAX, 'US-ASCII', '', loggedEvents)[-1], u'') self.assert_(loggedEvents) self.assertEquals(loggedEvents[-1].__class__, EncodingMismatch) def testDecodeUTF8(self): loggedEvents=[] self.assertEquals(xmlEncoding.decode(ctAX, 'utf-8', '\xc2\xa3', loggedEvents)[-1], u'\u00a3') self.assertEquals(loggedEvents, []) def testDecodeBadUTF8(self): """Ensure bad UTF-8 is flagged as such, but still decoded.""" loggedEvents=[] self.assertEquals(xmlEncoding.decode(ctAX, 'utf-8', '\xa3', loggedEvents)[-1], u'\ufffd') self.assert_(loggedEvents) self.assertEquals(loggedEvents[-1].__class__, UnicodeError) def testRemovedBOM(self): """Make sure the initial BOM signature is not in the decoded string.""" loggedEvents=[] self.assertEquals(xmlEncoding.decode(ctAX, 'UTF-16', '\xff\xfe\x3c\x00\x78\x00\x2f\x00\x3e\x00', loggedEvents)[-1], u'') self.assertEquals(loggedEvents, []) class TestRemoveDeclaration(unittest.TestCase): def testRemoveSimple(self): self.assertEqual(xmlEncoding.removeDeclaration( ''), '') self.assertEqual(xmlEncoding.removeDeclaration( ""), "") def testNotRemoved(self): """Make sure that invalid, or missing, declarations aren't affected.""" for x in [ '', # Missing version '', # No declaration ' ' # Space before declaration ]: self.assertEqual(xmlEncoding.removeDeclaration(x), x) def buildTestSuite(): suite = unittest.TestSuite() loader = unittest.TestLoader() suite.addTest(loader.loadTestsFromTestCase(TestDecode)) suite.addTest(loader.loadTestsFromTestCase(TestRemoveDeclaration)) return suite if __name__ == "__main__": unittest.main() python-feedvalidator-0~svn1022/tests/testUri.py0000755000175000017500000001736710766017570020200 0ustar poxpox#!/usr/bin/python """$Id: testUri.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Joseph Walton " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2004 Joseph Walton" import os, sys curdir = os.path.abspath(os.path.dirname(sys.argv[0])) srcdir = os.path.split(curdir)[0] if srcdir not in sys.path: sys.path.insert(0, srcdir) basedir = os.path.split(srcdir)[0] import unittest class UriTest(unittest.TestCase): pass testsEqual = [ ['http://example.com/', 'http://example.com'], ['HTTP://example.com/', 'http://example.com/'], ['http://example.com/', 'http://example.com:/'], ['http://example.com/', 'http://example.com:80/'], ['http://example.com/', 'http://Example.com/'], ['http://example.com/~smith/', 'http://example.com/%7Esmith/'], ['http://example.com/~smith/', 'http://example.com/%7esmith/'], ['http://example.com/%7Esmith/', 'http://example.com/%7esmith/'], ['http://example.com/%C3%87', 'http://example.com/C%CC%A7'], ['tag:example.com,2004:Test', 'TAG:example.com,2004:Test'], ['ftp://example.com/', 'ftp://EXAMPLE.COM/'], ['ftp://example.com/', 'ftp://example.com:21/'], ['mailto:user@example.com', 'mailto:user@EXAMPLE.COM'], ['../%C3%87', '../C%CC%A7'], ] testsDifferent = [ ['http://example.com/', 'http://example.org/'], ['http://example.com/index.html', 'http://example.com'], ['FTP://example.com/', 'http://example.com/'], ['http://example.com/', 'http://example.com:8080/'], ['http://example.com:8080/', 'http://example.com:80/'], ['http://example.com/index.html', 'http://example.com/INDEX.HTML'], ['http://example.com/~smith/', 'http://example.com/%7Esmith'], ['http://example.com/~smith/', 'http://example.com/%2fsmith/'], ['http://user:password@example.com/', 'http://USER:PASSWORD@example.com/'], # Not a valid HTTP URL ['http://example.com:x', 'http://example.com/'], ['tag:example.com,2004:Test', 'tag:EXAMPLE.COM,2004:Test'], ['tag:user@example.com,2004:Test', 'tag:user@EXAMPLE.COM,2004:Test'], ['tag:example.com,2004:test', 'Tag:example.com,2004:TEST'], ['tag:example.com,2004:Test', 'Tag:example.com,2004-01:Test'], ['tag:user@example.com,2004:Test', 'tag:USER@example.com,2004:Test'], ['ftp://example.com/', 'ftp://example.com/test'], ['mailto:user@example.com', 'mailto:USER@example.com'], ['mailto:user@example.com?subject=test', 'mailto:user@example.com?subject=TEST'] ] # Examples from PaceCanonicalIds testsCanonical = [ ['HTTP://example.com/', 'http://example.com/'], ['http://EXAMPLE.COM/', 'http://example.com/'], ['http://example.com/%7Ejane', 'http://example.com/~jane'], ['http://example.com/?q=1%2f2', 'http://example.com/?q=1%2F2'], ['http://example.com/?q=1/2'], ['http://example.com/a/./b', 'http://example.com/a/b'], ['http://example.com/a/../a/b', 'http://example.com/a/b'], ['http://user:password@example.com/', 'http://user:password@example.com/'], ['http://User:Password@Example.com/', 'http://User:Password@example.com/'], ['http://@example.com/', 'http://example.com/'], ['http://@Example.com/', 'http://example.com/'], ['http://:@example.com/', 'http://example.com/'], ['http://:@Example.com/', 'http://example.com/'], ['http://example.com', 'http://example.com/'], ['http://example.com:80/', 'http://example.com/'], ['http://www.w3.org/2000/01/rdf-schema#'], ['http://example.com/?q=C%CC%A7', 'http://example.com/?q=%C3%87'], ['http://example.com/?q=%E2%85%A0'], ['http://example.com/?'], [u'http://example.com/%C3%87'], # Other tests ['mailto:user@EXAMPLE.COM', 'mailto:user@example.com'], ['TAG:example.com,2004:Test', 'tag:example.com,2004:Test'], ['ftp://Example.Com:21/', 'ftp://example.com/'], ['http://example.com/?q=%E2%85%A0'], ['ldap://[2001:db8::7]/c=GB?objectClass?one'], ['mailto:John.Doe@example.com'], ['news:comp.infosystems.www.servers.unix'], ['tel:+1-816-555-1212'], ['telnet://192.0.2.16:80/'], ['urn:oasis:names:specification:docbook:dtd:xml:4.1.2'], ['http://example.com:081/', 'http://example.com:81/'], ['/test#test#test', '/test#test%23test'], ['http://com./'], ['http://example.com./', 'http://example.com/'], ['http://www.example.com//a//', 'http://www.example.com//a//'], ['http://www.example.com/./a//', 'http://www.example.com/a//'], ['http://www.example.com//a/./', 'http://www.example.com//a/'], ['http://example.com/%2F/'], ["aa1+-.:///?a1-._~!$&'()*+,;=:@/?#a1-._~!$&'()*+,;=:@/?"], ['http://example.com/?a+b'], ['http://a/b/c/../../../../g', 'http://a/g'], ['/.foo', '/.foo'], ['/foo/bar/.', '/foo/bar/'], ['/foo/bar/..', '/foo/'], ['http:test'], ['tag:'], ['file://', 'file:///'], ['telnet://example.com:23/', 'telnet://example.com/'], ['x://:@a/', 'x://a/'], ['tag:www.stanleysy.com,2005://1.119'], ['tag:timothy@hpl.hp.com,2001:web/externalHome'], ['http://xxx/read?id=abc%26x%3Dz&x=y'], ['tag:www.stanleysy.com,2005:%2F%2F1.119'], # IPv6 literals should be accepted ['http://[fe80::290:4bff:fe1e:4374]/tests/atom/ipv6/'], ['http://[fe80::290:4bff:fe1e:4374]:80/tests/atom/ipv6/', 'http://[fe80::290:4bff:fe1e:4374]/tests/atom/ipv6/'], ['http://[fe80::290:4bff:fe1e:4374]:8080/tests/atom/ipv6/'], ['http://[fe80::290:4bff:fe1e:4374]:/tests/atom/ipv6/', 'http://[fe80::290:4bff:fe1e:4374]/tests/atom/ipv6/'], ] # These are invalid URI references, but we can still sensibly # normalise them testNormalisableBadUris = [ ['http://example.com/\\/', 'http://example.com/%5C/'], ['http://example.com/?a b', 'http://example.com/?a%20b'], ] testsInvalid = [ # This URI is not in canonical form, and cannot be normalised 'http://example.com/?q=%C7' # Don't try to deal with relative URI references 'foo/../bar', './http://', './\\/', # Bad IPv6 literals 'http://fe80::290:4bff:fe1e:4374]/tests/atom/ipv6/', 'http://[fe80::290:4bff:fe1e:4374/tests/atom/ipv6/', ] import feedvalidator.uri from feedvalidator.validators import rfc2396 def buildTestSuite(): i = 0 for t in testsEqual: i+=1 def tstEqual(self, a, b): self.assertEqual(feedvalidator.uri.Uri(a), feedvalidator.uri.Uri(b)) func = lambda self, a=t[0], b=t[1]: tstEqual(self, a, b) func.__doc__ = 'Test ' + t[0] + " == " + t[1] setattr(UriTest, 'test' + str(i), func) for t in testsDifferent: i+=1 def tstDifferent(self, a, b): self.assertNotEqual(feedvalidator.uri.Uri(a), feedvalidator.uri.Uri(b)) func = lambda self, a=t[0], b=t[1]: tstDifferent(self, a, b) func.__doc__ = 'Test ' + t[0] + " != " + t[1] setattr(UriTest, 'test' + str(i), func) for t in testsCanonical + testNormalisableBadUris: i+=1 o = t[0] if len(t) > 1: c = t[1] else: c = o def tstCanonicalForm(self, a, b): cf = feedvalidator.uri.canonicalForm(a) self.assertEqual(cf, b, 'Became: ' + str(cf)) func = lambda self, a=o, b=c: tstCanonicalForm(self, a, b) func.__doc__ = 'Test ' + o + ' becomes ' + c setattr(UriTest, 'test' + str(i), func) for a in testsInvalid: i+= 1 def tstCanFindCanonicalForm(self, a): self.assertEquals(feedvalidator.uri.canonicalForm(a), None) func = lambda self, a=a: tstCanFindCanonicalForm(self, a) func.__doc__ = 'Test ' + a + ' cannot be canonicalised' setattr(UriTest, 'test' + str(i), func) # Test everything against the rfc2396 matcher r2 = feedvalidator.validators.rfc2396() for t in testsEqual + testsDifferent + testsCanonical: i+=1 def tstMatchesRe(self, a): self.assertTrue(r2.rfc2396_re.match(a)) func = lambda self, a=t[0]: tstMatchesRe(self, a) func.__doc__ = 'Test ' + t[0] + ' is matched by the URI regular expression' setattr(UriTest, 'test' + str(i), func) return unittest.TestLoader().loadTestsFromTestCase(UriTest) if __name__ == '__main__': buildTestSuite() unittest.main() python-feedvalidator-0~svn1022/tests/config.py0000644000175000017500000000045410152373264017762 0ustar poxpoxfrom os import environ # This is a test config, used by the runtests script, to ensure check.cgi # runs without requiring a web server. HOMEURL = 'http://localhost/check' PYDIR = '/usr/lib/python/' WEBDIR = environ['FEEDVALIDATOR_HOME'] SRCDIR = WEBDIR + '/src' DOCSURL = 'docs' CSSURL = 'css' python-feedvalidator-0~svn1022/tests/testXmlEncoding.py0000755000175000017500000002100310766017570021626 0ustar poxpox#!/usr/bin/python """$Id: testXmlEncoding.py 988 2008-03-12 18:22:48Z sa3ruby $ Test XML character decoding against a range of encodings, valid and not.""" __author__ = "Joseph Walton " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2004, 2006 Joseph Walton" import os, sys import codecs import re curdir = os.path.abspath(os.path.dirname(__file__)) srcdir = os.path.split(curdir)[0] if srcdir not in sys.path: sys.path.insert(0, srcdir) basedir = os.path.split(srcdir)[0] skippedNames = [] import unittest, new, glob, re from feedvalidator import xmlEncoding class EncodingTestCase(unittest.TestCase): def testEncodingMatches(self): try: enc = xmlEncoding.detect(self.bytes) except UnicodeError,u: self.fail("'" + self.filename + "' should not cause an exception (" + str(u) + ")") self.assert_(enc, 'An encoding must be returned for all valid files (' + self.filename + ')') self.assertEqual(enc, self.expectedEncoding, 'Encoding for ' + self.filename + ' should be ' + self.expectedEncoding + ', but was ' + enc) def testEncodingFails(self): eventLog = [] try: encoding = xmlEncoding.detect(self.bytes, eventLog) except UnicodeError,u: self.fail("'" + self.filename + "' should not cause an exception (" + str(u) + ")") if encoding: self.fail("'" + self.filename + "' should not parse successfully (as " + encoding + ")") if not(eventLog): self.fail("'" + self.filename + "' should give a reason for parse failure") bom8='\xEF\xBB\xBF' bom16BE='\xFE\xFF' bom16LE='\xFF\xFE' bom32BE='\x00\x00\xFE\xFF' bom32LE='\xFF\xFE\x00\x00' # Some fairly typical Unicode text. It should survive XML roundtripping. docText=u'\u201c"This\uFEFF" is\na\r\u00A3t\u20Acst\u201D' validDecl = re.compile('[A-Za-z][-A-Za-z0-9._]*') def makeDecl(enc=None): if enc: assert validDecl.match(enc), "'" + enc + "' is not a valid encoding name" return "" else: return "" def encoded(enc, txt=docText): return codecs.getencoder(enc)(txt, 'xmlcharrefreplace')[0] def genValidXmlTestCases(): someFailed = False # Required yield('UTF-8', ['BOM', 'declaration'], bom8 + makeDecl('UTF-8') + encoded('UTF-8')) yield('UTF-8', [], encoded('UTF-8')) yield('UTF-8', ['noenc'], makeDecl() + encoded('UTF-8')) yield('UTF-8', ['declaration'], makeDecl('UTF-8') + encoded('UTF-8')) yield('UTF-8', ['BOM'], bom8 + encoded('UTF-8')) yield('UTF-8', ['BOM', 'noenc'], bom8 + makeDecl('UTF-8') + encoded('UTF-8')) yield('UTF-16', ['BOM', 'declaration', 'BE'], bom16BE + encoded('UTF-16BE', makeDecl('UTF-16') + docText)) yield('UTF-16', ['BOM', 'declaration', 'LE'], bom16LE + encoded('UTF-16LE', makeDecl('UTF-16') + docText)) yield('UTF-16', ['BOM', 'BE'], bom16BE + encoded('UTF-16BE')) yield('UTF-16', ['BOM', 'BE', 'noenc'], bom16BE + encoded('UTF-16BE', makeDecl() + docText)) yield('UTF-16', ['BOM', 'LE'], bom16LE + encoded('UTF-16LE')) yield('UTF-16', ['BOM', 'LE', 'noenc'], bom16LE + encoded('UTF-16LE', makeDecl() + docText)) yield('UTF-16', ['declaration', 'BE'], encoded('UTF-16BE', makeDecl('UTF-16') + docText)) yield('UTF-16', ['declaration', 'LE'], encoded('UTF-16LE', makeDecl('UTF-16') + docText)) # Standard wide encodings try: yield('ISO-10646-UCS-2', ['BOM', 'declaration', 'BE'], bom16BE + encoded('UCS-2BE', makeDecl('ISO-10646-UCS-2') + docText)) yield('ISO-10646-UCS-2', ['BOM', 'declaration', 'LE'], bom16LE + encoded('UCS-2LE', makeDecl('ISO-10646-UCS-2') + docText)) yield('UTF-32', ['BOM', 'declaration', 'BE'], bom32BE + encoded('UTF-32BE', makeDecl('UTF-32') + docText)) yield('UTF-32', ['BOM', 'declaration', 'LE'], bom32LE + encoded('UTF-32LE', makeDecl('UTF-32') + docText)) yield('UTF-32', ['declaration', 'BE'], encoded('UTF-32BE', makeDecl('UTF-32') + docText)) yield('UTF-32', ['declaration', 'LE'], encoded('UTF-32LE', makeDecl('UTF-32') + docText)) yield('ISO-10646-UCS-4', ['BOM', 'declaration', 'BE'], bom32BE + encoded('UCS-4BE', makeDecl('ISO-10646-UCS-4') + docText)) yield('ISO-10646-UCS-4', ['BOM', 'declaration', 'LE'], bom32LE + encoded('UCS-4LE', makeDecl('ISO-10646-UCS-4') + docText)) except LookupError, e: print e someFailed = True # Encodings that don't have BOMs, and require declarations withDeclarations = [ # Common ASCII-compatible encodings 'US-ASCII', 'ISO-8859-1', 'ISO-8859-15', 'WINDOWS-1252', # EBCDIC 'IBM037', 'IBM038', # Encodings with explicit endianness 'UTF-16BE', 'UTF-16LE', 'UTF-32BE', 'UTF-32LE', # (UCS doesn't seem to define endian'd encodings) ] for enc in withDeclarations: try: yield(enc, ['declaration'], encoded(enc, makeDecl(enc) + docText)) except LookupError, e: print e someFailed = True # 10646-UCS encodings, with no BOM but with a declaration try: yield('ISO-10646-UCS-2', ['declaration', 'BE'], encoded('UCS-2BE', makeDecl('ISO-10646-UCS-2') + docText)) yield('ISO-10646-UCS-2', ['declaration', 'LE'], encoded('UCS-2LE', makeDecl('ISO-10646-UCS-2') + docText)) yield('ISO-10646-UCS-4', ['declaration', 'BE'], encoded('UCS-4BE', makeDecl('ISO-10646-UCS-4') + docText)) yield('ISO-10646-UCS-4', ['declaration', 'LE'], bom32LE + encoded('UCS-4LE', makeDecl('ISO-10646-UCS-4') + docText)) except LookupError, e: print e someFailed = True # Files with aliases for declarations. The declared alias should be # reported back, rather than the canonical form. try: yield('csUnicode', ['alias', 'BOM', 'BE'], bom16BE + encoded('UCS-2BE', makeDecl('csUnicode') + docText)) yield('csUnicode', ['alias', 'LE'], encoded('UCS-2LE', makeDecl('csUnicode') + docText)) yield('csucs4', ['alias', 'BE'], encoded('csucs4', makeDecl('csucs4') + docText)) except LookupError, e: print e someFailed = True if someFailed: print "Unable to generate some tests; see README for details" def genInvalidXmlTestCases(): # Invalid files someFailed = False # UTF-32 with a non-four-byte declaration try: yield('UTF-32', ['BOM', 'BE', 'declaration'], encoded('UTF-32', makeDecl('US-ASCII') + docText)) except LookupError, e: print e someFailed = True # UTF-16 with a non-two-byte declaration yield('UTF-16', ['BOM', 'BE', 'declaration'], encoded('UTF-16', makeDecl('UTF-8') + docText)) # UTF-16BE, with a BOM yield('UTF-16BE', ['BOM', 'declaration'], bom16BE + encoded('UTF-16BE', makeDecl('UTF-16BE') + docText)) # UTF-8, with a BOM, declaring US-ASCII yield('UTF-8', ['BOM', 'declaration'], bom8 + encoded('UTF-8', makeDecl('US-ASCII') + docText)) try: # UTF-32, with a BOM, beginning without a declaration yield('UTF-32', ['BOM', 'BE'], bom32BE + encoded('UTF-32BE')) # UTF-32, with a BOM, and a declaration with no encoding yield('UTF-32', ['BOM', 'BE', 'noenc'], bom32BE + encoded('UTF-32BE', makeDecl() + docText)) except LookupError, e: print e someFailed = True # UTF-16, no BOM, no declaration # yield('UTF-16', ['BE'], encoded('UTF-16BE')) # This case falls through, and is identified as UTF-8; leave it out # until we're doing decoding as well as detection. if someFailed: print "Unable to generate some tests; see README for details" def genXmlTestCases(): for (enc, t, x) in genValidXmlTestCases(): yield (enc, t, x, True) for (enc, t, x) in genInvalidXmlTestCases(): yield (enc, t, x, False) def buildTestSuite(): import codecs suite = unittest.TestSuite() for (enc, t, x, valid) in genXmlTestCases(): t.sort() if valid: pfx = 'valid_' else: pfx = 'invalid_' name = pfx + '_'.join([enc] + t) + '.xml' # name, x is content try: alias = enc if enc.startswith('ISO-10646-'): alias = enc[10:] c = codecs.lookup(alias) if valid: t = EncodingTestCase('testEncodingMatches') t.expectedEncoding = enc else: t = EncodingTestCase('testEncodingFails') t.filename = name t.bytes = x suite.addTest(t) except LookupError,e: print "Skipping " + name + ": " + str(e) skippedNames.append(name) return suite if __name__ == "__main__": s = buildTestSuite() unittest.TextTestRunner().run(s) if skippedNames: print "Tests skipped:",len(skippedNames) print "Please see README for details" python-feedvalidator-0~svn1022/tests/testHowtoNs.py0000755000175000017500000000202110601211034020772 0ustar poxpox#!/usr/bin/python import os, sys, unittest curdir = os.path.abspath(os.path.dirname(sys.argv[0])) srcdir = os.path.split(curdir)[0] if srcdir not in sys.path: sys.path.insert(0, srcdir) basedir = os.path.split(srcdir)[0] from feedvalidator.base import namespaces from os.path import dirname,join class HowtoNsTest(unittest.TestCase): def test_howto_declare_namespaces(self): base=dirname(dirname(dirname(os.path.abspath(__file__)))) filename=join(join(join(base,'docs'),'howto'),'declare_namespaces.html') handle=open(filename) page=handle.read() handle.close() for uri,prefix in namespaces.items(): if prefix=='xml': continue if prefix=='soap': continue if uri.find('ModWiki')>0: continue xmlns = 'xmlns:%s="%s"' % (prefix,uri) self.assertTrue(page.find(xmlns)>=0,xmlns) def buildTestSuite(): suite = unittest.TestSuite() loader = unittest.TestLoader() suite.addTest(loader.loadTestsFromTestCase(HowtoNsTest)) return suite if __name__ == '__main__': unittest.main() python-feedvalidator-0~svn1022/ws-demo.py0000755000175000017500000000334710071652502016730 0ustar poxpox#!/usr/bin/python # This is a simple demo of validation through the web service. WS_HOST = 'www.feedvalidator.org' WS_URI = '/check.cgi' import urllib, httplib from xml.dom import minidom from sys import exit # Fetch the feed to validate rawData = open('../testcases/rss/may/image_height_recommended.xml').read() # Specify the content type, including the charset if known hdrs = {'Content-Type': 'application/xml'} # Simply POST the feed contents to the validator URL connection=httplib.HTTPConnection(WS_HOST, 80) connection.request('POST', WS_URI, rawData, hdrs) response=connection.getresponse() # The response is a SOAP message, as XML (otherwise there's a problem # with the validator) try: document=minidom.parseString(response.read()) except: print "Server error, unable to validate:",response.status,response.reason print "(Unable to parse response as XML.)" exit(20) # If the status is OK, validation took place. if response.status == 200: errors = document.getElementsByTagName("text") if not errors: print "The feed is valid!" exit(0) else: # Errors were found for node in errors: print "".join([child.data for child in node.childNodes]) exit(5) # If there was a problem on the server, show details elif response.status >= 500: errors = document.getElementsByTagName("faultstring") for node in errors: print "".join([child.data for child in node.childNodes]) traceback = document.getElementsByTagNameNS("http://www.python.org/doc/current/lib/module-traceback.html", "traceback") if traceback: print "".join([child.data for child in traceback[0].childNodes]) exit(10) # The unexpected happened... else: print "Unexpected server response:",response.status,response.reason exit(20) python-feedvalidator-0~svn1022/index.py0000644000175000017500000000407110346044323016455 0ustar poxpoximport feedvalidator import sys def escapeURL(url): import cgi, urllib, urlparse parts = map(urllib.quote, map(urllib.unquote, urlparse.urlparse(url))) return cgi.escape(urlparse.urlunparse(parts)) def sanitizeURL(url): # Allow feed: URIs, as described by draft-obasanjo-feed-URI-scheme-02 if url.lower().startswith('feed:'): url = url[5:] if url.startswith('//'): url = 'http:' + url if not url.split(':')[0].lower() in ['http','https']: url = 'http://%s' % url url = url.strip() # strip user and password import re url = re.sub(r'^(\w*://)[-+.\w]*(:[-+.\w]+)?@', r'\1' ,url) return url def index(req,url="",out="xml"): if not url: s = """Feed Validator Enter the URL to validate:

URL:
""" return s url = sanitizeURL(url) events = feedvalidator.validateURL(url, firstOccurrenceOnly=1)['loggedEvents'] # (optional) arg 2 is compatibility level # "A" is most basic level # "AA" mimics online validator # "AAA" is experimental; these rules WILL change or disappear in future versions from feedvalidator import compatibility filter = "AA" filterFunc = getattr(compatibility, filter) events = filterFunc(events) if out == "html": s = "

Validating " + escapeURL(url) + "...

"

    from feedvalidator.formatter.text_plain import Formatter
    output = Formatter(events)
    if output:
      s += "\n".join(output)
    else:
      s += "No errors or warnings"

    s += "
" return s else: from feedvalidator.formatter.text_xml import Formatter s = "\n".join(Formatter(events)) or "" s = '\n\n' + s + "" req.content_type = "application/xml" return s if __name__=="__main__": import sys for url in sys.argv[1:]: print index(0,url=url,out="html") python-feedvalidator-0~svn1022/feedvalidator/0000755000175000017500000000000011065534350017606 5ustar poxpoxpython-feedvalidator-0~svn1022/feedvalidator/root.py0000644000175000017500000001634211014372756021155 0ustar poxpox"""$Id: root.py 1013 2008-05-19 21:49:34Z joe.walton.gglcd $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 1013 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase rss11_namespace='http://purl.org/net/rss1.1#' purl1_namespace='http://purl.org/rss/1.0/' soap_namespace='http://feeds.archive.org/validator/' pie_namespace='http://purl.org/atom/ns#' atom_namespace='http://www.w3.org/2005/Atom' opensearch_namespace='http://a9.com/-/spec/opensearch/1.1/' xrds_namespace='xri://$xrds' kml20_namespace='http://earth.google.com/kml/2.0' kml21_namespace='http://earth.google.com/kml/2.1' kml22_namespace='http://earth.google.com/kml/2.2' # # Main document. # Supports rss, rdf, pie, kml, and ffkar # class root(validatorBase): def __init__(self, parent, base): validatorBase.__init__(self) self.parent = parent self.dispatcher = parent self.name = "root" self.xmlBase = base self.xmlLang = None def startElementNS(self, name, qname, attrs): if name=='rss': if qname: from logging import InvalidNamespace self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname})) self.dispatcher.defaultNamespaces.append(qname) if name=='feed' or name=='entry': if self.namespace.has_key('atom'): from logging import AvoidNamespacePrefix self.log(AvoidNamespacePrefix({'prefix':'atom'})) if self.namespace.has_key('xhtml'): from logging import AvoidNamespacePrefix self.log(AvoidNamespacePrefix({'prefix':'xhtml'})) if qname==pie_namespace: from logging import ObsoleteNamespace self.log(ObsoleteNamespace({"element":"feed"})) self.dispatcher.defaultNamespaces.append(pie_namespace) from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) elif not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) else: if name=='feed': from logging import TYPE_ATOM self.setFeedType(TYPE_ATOM) else: from logging import TYPE_ATOM_ENTRY self.setFeedType(TYPE_ATOM_ENTRY) self.dispatcher.defaultNamespaces.append(atom_namespace) if qname<>atom_namespace: from logging import InvalidNamespace self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname})) self.dispatcher.defaultNamespaces.append(qname) if name=='Channel': if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) elif qname != rss11_namespace : from logging import InvalidNamespace self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname})) else: self.dispatcher.defaultNamespaces.append(qname) from logging import TYPE_RSS1 self.setFeedType(TYPE_RSS1) if name=='kml': from logging import TYPE_KML20, TYPE_KML21, TYPE_KML22 self.dispatcher.defaultNamespaces.append(qname) if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) qname = kml20_namespace feedType = TYPE_KML20 elif qname == kml20_namespace: feedType = TYPE_KML20 elif qname == kml21_namespace: feedType = TYPE_KML21 elif qname == kml22_namespace: feedType = TYPE_KML22 elif qname != kml20_namespace and qname != kml21_namespace and qname != kml22_namespace: from logging import InvalidNamespace self.log(InvalidNamespace({"element":name, "namespace":qname})) qname = kml22_namespace feedType = TYPE_KML22 self.setFeedType(feedType) if name=='OpenSearchDescription': if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) qname = opensearch_namespace elif qname != opensearch_namespace: from logging import InvalidNamespace self.log(InvalidNamespace({"element":name, "namespace":qname})) self.dispatcher.defaultNamespaces.append(qname) qname = opensearch_namespace if name=='XRDS': from logging import TYPE_XRD self.setFeedType(TYPE_XRD) if not qname: from logging import MissingNamespace self.log(MissingNamespace({"parent":"root", "element":name})) qname = xrds_namespace elif qname != xrds_namespace: from logging import InvalidNamespace self.log(InvalidNamespace({"element":name, "namespace":qname})) self.dispatcher.defaultNamespaces.append(qname) qname = xrds_namespace validatorBase.startElementNS(self, name, qname, attrs) def unknown_starttag(self, name, qname, attrs): from logging import ObsoleteNamespace,InvalidNamespace,UndefinedElement if qname in ['http://example.com/newformat#','http://purl.org/atom/ns#']: self.log(ObsoleteNamespace({"element":name, "namespace":qname})) elif name=='feed': self.log(InvalidNamespace({"element":name, "namespace":qname})) else: self.log(UndefinedElement({"parent":"root", "element":name})) from validators import any return any(self, name, qname, attrs) def do_rss(self): from rss import rss return rss() def do_feed(self): from feed import feed if pie_namespace in self.dispatcher.defaultNamespaces: from validators import eater return eater() return feed() def do_entry(self): from entry import entry return entry() def do_app_categories(self): from logging import TYPE_APP_CATEGORIES self.setFeedType(TYPE_APP_CATEGORIES) from categories import categories return categories() def do_app_service(self): from logging import TYPE_APP_SERVICE self.setFeedType(TYPE_APP_SERVICE) from service import service return service() def do_kml(self): from kml import kml return kml() def do_opml(self): from opml import opml return opml() def do_outlineDocument(self): from logging import ObsoleteVersion self.log(ObsoleteVersion({"element":"outlineDocument"})) from opml import opml return opml() def do_opensearch_OpenSearchDescription(self): import opensearch self.dispatcher.defaultNamespaces.append(opensearch_namespace) from logging import TYPE_OPENSEARCH self.setFeedType(TYPE_OPENSEARCH) return opensearch.OpenSearchDescription() def do_xrds_XRDS(self): from xrd import xrds return xrds() def do_rdf_RDF(self): from rdf import rdf self.dispatcher.defaultNamespaces.append(purl1_namespace) return rdf() def do_Channel(self): from channel import rss10Channel return rss10Channel() def do_soap_Envelope(self): return root(self, self.xmlBase) def do_soap_Body(self): self.dispatcher.defaultNamespaces.append(soap_namespace) return root(self, self.xmlBase) def do_request(self): return root(self, self.xmlBase) def do_xhtml_html(self): from logging import UndefinedElement self.log(UndefinedElement({"parent":"root", "element":"xhtml:html"})) from validators import eater return eater() python-feedvalidator-0~svn1022/feedvalidator/entry.py0000644000175000017500000001016010766017570021326 0ustar poxpox"""$Id: entry.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from validators import * from logging import * from itunes import itunes_item from extension import extension_entry # # pie/echo entry element. # class entry(validatorBase, extension_entry, itunes_item): def getExpectedAttrNames(self): return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'parseType')] def prevalidate(self): self.links=[] self.content=None def validate(self): if not 'title' in self.children: self.log(MissingElement({"parent":self.name, "element":"title"})) if not 'author' in self.children and not 'author' in self.parent.children: self.log(MissingElement({"parent":self.name, "element":"author"})) if not 'id' in self.children: self.log(MissingElement({"parent":self.name, "element":"id"})) if not 'updated' in self.children: self.log(MissingElement({"parent":self.name, "element":"updated"})) if self.content: if not 'summary' in self.children: if self.content.attrs.has_key((None,"src")): self.log(MissingSummary({"parent":self.parent.name, "element":self.name})) ctype = self.content.type if ctype.find('/') > -1 and not ( ctype.endswith('+xml') or ctype.endswith('/xml') or ctype.startswith('text/')): self.log(MissingSummary({"parent":self.parent.name, "element":self.name})) else: if not 'summary' in self.children: self.log(MissingTextualContent({"parent":self.parent.name, "element":self.name})) for link in self.links: if link.rel == 'alternate': break else: self.log(MissingContentOrAlternate({"parent":self.parent.name, "element":self.name})) # can only have one alternate per type types={} for link in self.links: if not link.rel=='alternate': continue if not link.type in types: types[link.type]=[] if link.hreflang in types[link.type]: self.log(DuplicateAtomLink({"parent":self.name, "element":"link", "type":link.type, "hreflang":link.hreflang})) else: types[link.type] += [link.hreflang] if self.itunes: itunes_item.validate(self) def do_author(self): from author import author return author() def do_category(self): from category import category return category() def do_content(self): from content import content self.content=content() return self.content, noduplicates() def do_contributor(self): from author import author return author() def do_id(self): return canonicaluri(), nows(), noduplicates(), unique('id',self.parent,DuplicateEntries) def do_link(self): from link import link self.links += [link()] return self.links[-1] def do_published(self): return rfc3339(), nows(), noduplicates() def do_source(self): return source(), noduplicates() def do_rights(self): from content import textConstruct return textConstruct(), noduplicates() def do_summary(self): from content import textConstruct return textConstruct(), noduplicates() def do_title(self): from content import textConstruct return textConstruct(), noduplicates() def do_updated(self): return rfc3339(), nows(), noduplicates(), unique('updated',self.parent,DuplicateUpdated) def do_app_edited(self): return rfc3339(), nows(), noduplicates() def do_app_control(self): return app_control(), noduplicates() class app_control(validatorBase): def do_app_draft(self): return yesno(), noduplicates() from feed import feed class source(feed): def missingElement(self, params): self.log(MissingSourceElement(params)) def validate(self): self.validate_metadata() def do_author(self): if not 'author' in self.parent.children: self.parent.children.append('author') return feed.do_author(self) def do_entry(self): self.log(UndefinedElement({"parent":self.name, "element":"entry"})) return eater() python-feedvalidator-0~svn1022/feedvalidator/rss.py0000644000175000017500000000320410766017570020775 0ustar poxpox"""$Id: rss.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from logging import * from validators import noduplicates # # Rss element. The only valid child element is "channel" # class rss(validatorBase): def do_channel(self): from channel import rss20Channel return rss20Channel(), noduplicates() def do_access_restriction(self): from extension import access_restriction return access_restriction(), noduplicates() def getExpectedAttrNames(self): return [(None, u'version')] def prevalidate(self): self.setFeedType(TYPE_RSS2) # could be anything in the 0.9x family, don't really care self.version = "2.0" if (None,'version') not in self.attrs.getNames(): self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"version"})) elif [e for e in self.dispatcher.loggedEvents if e.__class__==ValidDoctype]: self.version = self.attrs[(None,'version')] if self.attrs[(None,'version')]<>'0.91': self.log(InvalidDoctype({"parent":self.parent.name, "element":self.name, "attr":"version"})) else: self.version = self.attrs[(None,'version')] if self.version not in ['0.91', '0.92', '2.0']: self.log(InvalidRSSVersion({"parent":self.parent.name, "element":self.name, "value":self.version})) def validate(self): if not "channel" in self.children: self.log(MissingElement({"parent":self.name, "element":"channel"})) python-feedvalidator-0~svn1022/feedvalidator/category.py0000644000175000017500000000135110766017570022004 0ustar poxpox"""$Id: category.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from validators import * # # author element. # class category(validatorBase): def getExpectedAttrNames(self): return [(None,u'term'),(None,u'scheme'),(None,u'label')] def prevalidate(self): self.children.append(True) # force warnings about "mixed" content self.validate_required_attribute((None,'term'), nonblank) self.validate_optional_attribute((None,'scheme'), rfc3987_full) self.validate_optional_attribute((None,'label'), nonhtml) python-feedvalidator-0~svn1022/feedvalidator/service.py0000644000175000017500000000313010720344740021614 0ustar poxpoxfrom base import validatorBase from validators import * from extension import extension_everywhere class service(validatorBase, extension_everywhere): def getExpectedAttrNames(self): return [] # (None,u'scheme'),(None,u'fixed')] def validate(self): if not "app_workspace" in self.children: self.log(MissingElement({"parent":self.name, "element":"app:workspace"})) def do_app_workspace(self): return workspace() class workspace(validatorBase, extension_everywhere): def validate(self): if not "atom_title" in self.children: self.log(MissingElement({"parent":self.name, "element":"atom:title"})) def do_app_collection(self): return collection() def do_atom_title(self): from content import textConstruct return textConstruct(), noduplicates() class collection(validatorBase, extension_everywhere): def getExpectedAttrNames(self): return [(None,u'href')] def prevalidate(self): self.validate_required_attribute((None,'href'), rfc3987) def validate(self): if not "atom_title" in self.children: self.log(MissingElement({"parent":self.name, "element":"atom:title"})) def do_atom_title(self): from content import textConstruct return textConstruct(), noduplicates() def do_title(self): from root import atom_namespace assert(atom_namespace in self.dispatcher.defaultNamespaces) self.child = 'atom_title' return self.do_atom_title() def do_app_categories(self): from categories import categories return categories() def do_app_accept(self): from categories import categories return MediaRange() python-feedvalidator-0~svn1022/feedvalidator/skipHours.py0000644000175000017500000000264610766017570022166 0ustar poxpox"""$Id: skipHours.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from validators import text from logging import * # # skipHours element # class skipHours(validatorBase): def __init__(self): self.hours = [] validatorBase.__init__(self) def validate(self): if "hour" not in self.children: self.log(MissingElement({"parent":self.name, "element":"hour"})) if len(self.children) > 24: self.log(NotEnoughHoursInTheDay({})) def do_hour(self): return hour() class hour(text): def validate(self): try: h = int(self.value) if h in self.parent.hours or (h in [0,24] and 24-h in self.parent.hours): self.log(DuplicateValue({"parent":self.parent.name, "element":self.name, "value":self.value})) if (h < 0) or (h > 23): raise ValueError else: self.parent.hours.append(h) self.log(ValidHour({"parent":self.parent.name, "element":self.name, "value":self.value})) except ValueError: if self.value == '24': self.log(UseZeroForMidnight({"parent":self.parent.name, "element":self.name, "value":self.value})) else: self.log(InvalidHour({"parent":self.parent.name, "element":self.name, "value":self.value})) python-feedvalidator-0~svn1022/feedvalidator/skipDays.py0000644000175000017500000000231510766017570021757 0ustar poxpox"""$Id: skipDays.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from validators import text from logging import * # # skipDays element # class skipDays(validatorBase): def __init__(self): self.days = [] validatorBase.__init__(self) def validate(self): if "day" not in self.children: self.log(MissingElement({"parent":self.name, "element":"day"})) if len(self.children) > 7: self.log(EightDaysAWeek({})) def do_day(self): return day() class day(text): def validate(self): if self.value not in ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'): self.log(InvalidDay({"parent":self.parent.name, "element":self.name, "value":self.value})) elif self.value in self.parent.days: self.log(DuplicateValue({"parent":self.parent.name, "element":self.name, "value":self.value})) else: self.parent.days.append(self.value) self.log(ValidDay({"parent":self.parent.name, "element":self.name, "value":self.value})) python-feedvalidator-0~svn1022/feedvalidator/mediaTypes.py0000644000175000017500000001016010766017570022271 0ustar poxpox""" $Id: mediaTypes.py 988 2008-03-12 18:22:48Z sa3ruby $ This module deals with valid internet media types for feeds. """ __author__ = "Joseph Walton " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2004 Joseph Walton" from cgi import parse_header from logging import * FEED_TYPES = [ 'text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml', 'application/atom+xml', 'text/x-opml', 'application/xrds+xml', 'application/opensearchdescription+xml', 'application/vnd.google-earth.kml+xml', 'application/vnd.google-earth.kmz', 'application/atomsvc+xml', 'application/atomcat+xml', ] # Is the Content-Type correct? def checkValid(contentType, loggedEvents): (mediaType, params) = parse_header(contentType) if mediaType.lower() not in FEED_TYPES: loggedEvents.append(UnexpectedContentType({"type": "Feeds", "contentType": mediaType})) if 'charset' in params: charset = params['charset'] else: charset = None return (mediaType, charset) # Warn about mismatches between media type and feed version def checkAgainstFeedType(mediaType, feedType, loggedEvents): mtl = mediaType.lower() if mtl in ['application/x.atom+xml', 'application/atom+xml']: if feedType not in [TYPE_ATOM, TYPE_ATOM_ENTRY]: loggedEvents.append(UnexpectedContentType({"type": 'Non-Atom 1.0 feeds', "contentType": mediaType})) elif mtl == 'application/atomcat+xml': if feedType != TYPE_APP_CATEGORIES: loggedEvents.append(UnexpectedContentType({"type": 'Non-AtomPub Category document', "contentType": mediaType})) elif mtl == 'application/atomsvc+xml': if feedType != TYPE_APP_SERVICE: loggedEvents.append(UnexpectedContentType({"type": 'Non-AtomPub Service document', "contentType": mediaType})) elif mtl == 'application/rdf+xml': if feedType != TYPE_RSS1: loggedEvents.append(UnexpectedContentType({"type": 'Non-RSS 1.0 feeds', "contentType": mediaType})) elif mtl == 'application/rss+xml': if feedType not in [TYPE_RSS1, TYPE_RSS2]: loggedEvents.append(UnexpectedContentType({"type": 'Non-RSS feeds', "contentType": mediaType})) elif mtl == 'text/x-opml': if feedType not in [TYPE_OPML]: loggedEvents.append(UnexpectedContentType({"type": 'Non-OPML feeds', "contentType": mediaType})) elif mtl == 'application/opensearchdescription+xml': if feedType not in [TYPE_OPENSEARCH]: loggedEvents.append(UnexpectedContentType({"type": 'Non-OpenSearchDescription documents', "contentType": mediaType})) elif mtl == 'application/xrds+xml': if feedType not in [TYPE_XRD]: loggedEvents.append(UnexpectedContentType({"type": 'Non-Extensible Resource Descriptor documents', "contentType": mediaType})) elif mtl == 'application/vnd.google-earth.kml+xml': if feedType not in [TYPE_KML20, TYPE_KML21, TYPE_KML22]: loggedEvents.append(UnexpectedContentType({"type": 'Non-KML documents', "contentType": mediaType})) elif mtl == 'application/earthviewer': loggedEvents.append(InvalidKmlMediaType({"type": 'Non-KML documents', "contentType": mediaType})) # warn if a non-specific media type is used without a 'marker' def contentSniffing(mediaType, rawdata, loggedEvents): if mediaType not in FEED_TYPES: return if mediaType == 'application/atom+xml': return if mediaType == 'application/atomcat+xml': return if mediaType == 'application/atomsvc+xml': return if mediaType == 'application/rss+xml': return if mediaType == 'text/x-opml': return if mediaType == 'application/opensearchdescription+xml': return if mediaType == 'application/xrds+xml': return if mediaType == 'application/vnd.google-earth.kml+xml': return block = rawdata[:512] if block.find('= 0: return if block.find('= 0: return if block.find('= 0: return if block.find('= 0: return if block.find('= 0: return if (block.find('=0 and block.find('http://www.w3.org/1999/02/22-rdf-syntax-ns#') >= 0 and block.find( 'http://purl.org/rss/1.0/')): return from logging import NonSpecificMediaType loggedEvents.append(NonSpecificMediaType({"contentType": mediaType})) python-feedvalidator-0~svn1022/feedvalidator/compatibility.py0000644000175000017500000000214510766017570023042 0ustar poxpox"""$Id: compatibility.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from logging import * def _must(event): return isinstance(event, Error) def _should(event): return isinstance(event, Warning) def _may(event): return isinstance(event, Info) def A(events): return [event for event in events if _must(event)] def AA(events): return [event for event in events if _must(event) or _should(event)] def AAA(events): return [event for event in events if _must(event) or _should(event) or _may(event)] def AAAA(events): return events def analyze(events, rawdata): block = rawdata[0:512].strip().upper() if block.startswith(' -1: self.log(InvalidTextType({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type})) def prevalidate(self): nonhtml.start(self) if self.attrs.has_key((None,"src")): self.type='' else: self.type='text' if self.getFeedType() == TYPE_RSS2 and self.name != 'atom_summary': self.log(DuplicateDescriptionSemantics({"element":self.name})) if self.attrs.has_key((None,"type")): self.type=self.attrs.getValue((None,"type")) if not self.type: self.log(AttrNotBlank({"parent":self.parent.name, "element":self.name, "attr":"type"})) self.maptype() if self.attrs.has_key((None,"src")): self.children.append(True) # force warnings about "mixed" content self.value=self.attrs.getValue((None,"src")) rfc2396.validate(self, errorClass=InvalidURIAttribute, extraParams={"attr": "src"}) self.value="" if not self.attrs.has_key((None,"type")): self.log(MissingTypeAttr({"parent":self.parent.name, "element":self.name, "attr":"type"})) if self.type in ['text','html','xhtml'] and not self.attrs.has_key((None,"src")): pass elif self.type and not self.mime_re.match(self.type): self.log(InvalidMIMEType({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type})) else: self.log(ValidMIMEAttribute({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type})) if not self.xmlLang: self.log(MissingDCLanguage({"parent":self.name, "element":"xml:lang"})) def validate(self): if self.type in ['text','xhtml']: if self.type=='xhtml': nonhtml.validate(self, NotInline) else: nonhtml.validate(self, ContainsUndeclaredHTML) else: if self.type.find('/') > -1 and not ( self.type.endswith('+xml') or self.type.endswith('/xml') or self.type.startswith('text/')): import base64 try: self.value=base64.decodestring(self.value) if self.type.endswith('/html'): self.type='html' except: self.log(NotBase64({"parent":self.parent.name, "element":self.name,"value":self.value})) if self.type=='html' or self.type.endswith("/html"): self.validateSafe(self.value) if self.type.endswith("/html"): if self.value.find("'xhtml') and not ( self.type.endswith('+xml') or self.type.endswith('/xml')): self.log(UndefinedElement({"parent":self.name, "element":name})) if self.type=="xhtml": if name<>'div' and not self.value.strip(): self.log(MissingXhtmlDiv({"parent":self.parent.name, "element":self.name})) elif qname not in ["http://www.w3.org/1999/xhtml"]: self.log(NotHtml({"parent":self.parent.name, "element":self.name, "message":"unexpected namespace", "value": qname})) if self.type=="application/xhtml+xml": if name<>'html': self.log(HtmlFragment({"parent":self.parent.name, "element":self.name,"value":self.value, "type":self.type})) elif qname not in ["http://www.w3.org/1999/xhtml"]: self.log(NotHtml({"parent":self.parent.name, "element":self.name, "message":"unexpected namespace", "value":qname})) if self.attrs.has_key((None,"mode")): if self.attrs.getValue((None,"mode")) == 'escaped': self.log(NotEscaped({"parent":self.parent.name, "element":self.name})) if name=="div" and qname=="http://www.w3.org/1999/xhtml": handler=diveater() else: handler=eater() self.children.append(handler) self.push(handler, name, attrs) # treat xhtml:div as part of the content for purposes of detecting escaped html class diveater(eater): def __init__(self): eater.__init__(self) self.mixed = False def textOK(self): pass def characters(self, string): validatorBase.characters(self, string) def startElementNS(self, name, qname, attrs): if not qname: self.log(MissingNamespace({"parent":"xhtml:div", "element":name})) elif qname == 'http://www.w3.org/1999/xhtml': if name not in HTMLValidator.htmltags: self.log(NotHtml({'message':'Non-XHTML element', 'value':name})) elif name not in HTMLValidator.acceptable_elements: self.log(SecurityRisk({'tag':name})) for ns,attr in attrs.getNames(): if not ns and attr not in HTMLValidator.acceptable_attributes: if attr == 'style': for value in checkStyle(attrs.get((ns,attr))): self.log(DangerousStyleAttr({"attr":attr, "value":value})) else: self.log(SecurityRiskAttr({'attr':attr})) elif qname == 'http://www.w3.org/2000/svg': if name not in HTMLValidator.svg_elements: self.log(SecurityRisk({'tag':name})) for ns,attr in attrs.getNames(): if not ns and attr not in HTMLValidator.svg_attributes: self.log(SecurityRiskAttr({'attr':attr})) elif qname == 'http://www.w3.org/1998/Math/MathML': if name not in HTMLValidator.mathml_elements: self.log(SecurityRisk({'tag':name})) for ns,attr in attrs.getNames(): if not ns and attr not in HTMLValidator.mathml_attributes: self.log(SecurityRiskAttr({'attr':attr})) elif namespaces.has_key(qname): if self.name != 'metadata': self.log(UndefinedElement({"parent": self.name, "element":namespaces[qname] + ":" + name})) self.push(eater(), name, attrs) return self.mixed = True eater.startElementNS(self, name, qname, attrs) def validate(self): if not self.mixed: self.parent.value += self.value class content(textConstruct): def maptype(self): if self.type == 'multipart/alternative': self.log(InvalidMIMEType({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type})) python-feedvalidator-0~svn1022/feedvalidator/kml.py0000644000175000017500000006431710766017570020765 0ustar poxpox"""$Id: kml.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Gregor J. Rothfuss and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from validators import * import re # This code tries to mimic the structure of the canonical KML XSD as much as possible. # The KML XSD is at http://code.google.com/apis/kml/schema/kml21.xsd # FeatureType from the XSD schema # class FeatureType(validatorBase): def do_name(self): return text(),noduplicates() def do_visibility(self): return zeroone(),noduplicates() def do_open(self): return zeroone(),noduplicates() def do_address(self): return nonhtml(),noduplicates() def do_phoneNumber(self): return text(),noduplicates() # todo: implement full check from http://www.koders.com/perl/fid426DF448FE99166A1AD0162538E583A0FA956EEA.aspx def do_Snippet(self): return Snippet(), noduplicates() def do_description(self): return text(), noduplicates() def do_LookAt(self): return LookAt(),noduplicates() # TimePrimitive def do_TimeStamp(self): return TimeStamp(),noduplicates() def do_TimeSpan(self): return TimeSpan(),noduplicates() # /TimePrimitive def do_styleUrl(self): return text(), noduplicates() # StyleSelector def do_Style(self): return Style() def do_StyleMap(self): return StyleMap() # /StyleSelector # 2.0 only def do_View(self): return View(),noduplicates() # /2.0 only def do_Region(self): return Region(), noduplicates() def do_Metadata(self): return Metadata() def do_atom_link(self): from link import link return link() def do_atom_author(self): from author import author return author() # OverlayType from the XSD schema # class OverlayType(validatorBase): def do_color(self): return color(),noduplicates() def do_drawOrder(self): return Integer(),noduplicates() def do_Icon(self): return Icon(), noduplicates() # ColorStyleType from the XSD schema # class ColorStyleType(validatorBase): def do_color(self): return color(),noduplicates() def do_colorMode(self): return colorMode(),noduplicates() # # Container from the XSD schema # class Container(validatorBase): def do_Document(self): return Document() def do_Folder(self): return Folder() # # Feature from the XSD schema # class Feature(validatorBase): def do_Placemark(self): return Placemark() # # Geometry from the XSD schema # class Geometry(Feature): # TODO these should all be noduplicates(), but because they can appear # inside MultiGeometry, they are not. def do_Model(self): return Model() def do_LineString(self): return LineString() def do_LinearRing(self): return LinearRing() def do_Point(self): return Point() def do_Polygon(self): return Polygon() def do_MultiGeometry(self): return MultiGeometry() # # GeometryElements from the XSD schema # class GeometryElements(Geometry): def do_extrude(self): return zeroone(),noduplicates() def do_tessellate(self): return zeroone(),noduplicates() def do_altitudeMode(self): return altitudeMode(),noduplicates() # # LinkType from the XSD schema # class LinkType(validatorBase): def do_href(self): return text(),noduplicates() def do_refreshMode(self): return refreshMode(),noduplicates() def do_viewRefreshMode(self): return viewRefreshMode(),noduplicates() def do_viewRefreshTime(self): return Float(), noduplicates() def do_viewBoundScale(self): return Float(), noduplicates() def do_refreshVisibility(self): return refreshVisibility(),noduplicates() def do_refreshInterval(self): return Float(), noduplicates() def do_viewFormat(self): return text(),noduplicates() def do_httpQuery(self): return text(),noduplicates() # # LookAtType from the XSD schema # class LookAtType(Feature): def do_longitude(self): return longitude(),noduplicates() def do_latitude(self): return latitude(),noduplicates() def do_altitude(self): return FloatWithNegative(),noduplicates() def do_range(self): return Float(),noduplicates() def do_tilt(self): return latitude(),noduplicates() def do_heading(self): return angle360(),noduplicates() def do_altitudeMode(self): return altitudeMode(),noduplicates() # # KML element. # class kml(validatorBase, Container, Feature): from logging import TYPE_KML20, TYPE_KML21, TYPE_KML22 def do_NetworkLink(self): return NetworkLink() def do_GroundOverlay(self): return GroundOverlay() def do_ScreenOverlay(self): return ScreenOverlay() def do_NetworkLinkControl(self): return NetworkLinkControl() def do_atom_link(self): from link import link return link() def do_atom_author(self): from author import author return author() class NetworkLinkControl(validatorBase): def do_minRefreshPeriod(self): return Float(),noduplicates() def do_linkName(self): return text(),noduplicates() def do_linkDescription(self): return text(),noduplicates() def do_cookie(self): return text(),noduplicates() def do_message(self): return text(), noduplicates() def do_linkSnippet(self): return Snippet(), noduplicates() def do_expires(self): return w3cdtf(),noduplicates() def do_Update(self): return Update(),noduplicates() def do_LookAt(self): return LookAt(),noduplicates() def do_View(self): return View(),noduplicates() class Update(validatorBase): def validate(self): if not "targetHref" in self.children: self.log(MissingElement({"parent":self.name, "element":"targetHref"})) def do_targetHref(self): return text(),noduplicates() # todo: child validation def do_Change(self): return noduplicates() # todo: child validation def do_Update(self): return noduplicates() # todo: child validation def do_Delete(self): return noduplicates() class NetworkLink(validatorBase, FeatureType, Feature): def validate(self): if not "Link" in self.children and not "Url" in self.children: self.log(MissingElement({"parent":self.name, "element":"Link"})) def do_targetHref(self): return Update(),noduplicates() def getExpectedAttrNames(self): return [(None, u'id')] def do_refreshInterval(self): return Float(), noduplicates() def do_flyToView(self): return zeroone(),noduplicates() def do_Link(self): return Link(),noduplicates() def do_Url(self): return Url(),noduplicates() class Document(validatorBase, FeatureType, Container, Feature): def getExpectedAttrNames(self): return [(None, u'id')] def do_ScreenOverlay(self): return ScreenOverlay() def do_GroundOverlay(self): return GroundOverlay() def do_NetworkLink(self): return NetworkLink() def do_Schema(self): return Schema(), noduplicates() class Schema(validatorBase): def getExpectedAttrNames(self): return [(None, u'name'), (None, u'parent')] def do_SimpleField(self): return SchemaField() def do_SimpleArrayField(self): return SchemaField() def do_ObjField(self): return SchemaField() def do_ObjArrayField(self): return SchemaField() class SchemaField(validatorBase): def getExpectedAttrNames(self): return [ (None, u'name'), (None, u'type'), ] def validate(self): self.validate_required_attribute((None,'name'), text) self.validate_required_attribute((None,'type'), SchemaFieldType) class Placemark(validatorBase, FeatureType, Geometry): def prevalidate(self): if not self.attrs.has_key((None,"id")): self.log(MissingId({"parent":self.name, "element":"id"})) self.validate_optional_attribute((None,'id'), unique('id',self.parent)) def getExpectedAttrNames(self): return [(None, u'id')] def do_GeometryCollection(self): return GeometryCollection() class MultiGeometry(Geometry): # TODO: check for either geometry or multigeometry in feature, but not both? def getExpectedAttrNames(self): return [(None, u'id')] class ScreenOverlay(validatorBase, FeatureType, OverlayType): def getExpectedAttrNames(self): return [(None, u'id')] def do_geomColor(self): return geomColor(),noduplicates() def do_overlayXY(self): return overlayxy(), noduplicates() def do_screenXY(self): return overlayxy(), noduplicates() def do_rotationXY(self): return overlayxy(), noduplicates() def do_size(self): return overlayxy(), noduplicates() class GroundOverlay(validatorBase, FeatureType, OverlayType): def validate(self): if not "LatLonBox" in self.children: self.log(MissingElement({"parent":self.name, "element":"LatLonBox"})) def getExpectedAttrNames(self): return [(None, u'id')] def do_altitude(self): return FloatWithNegative(),noduplicates() def do_altitudeMode(self): return altitudeMode(),noduplicates() def do_geomColor(self): return geomColor(),noduplicates() def do_LatLonBox(self): return LatLonBox(), noduplicates() class overlayxy(validatorBase): def getExpectedAttrNames(self): return [ (None, u'x'), (None, u'y'), (None, u'xunits'), (None, u'yunits'), ] def validate(self): self.validate_required_attribute((None,'x'), FloatWithNegative) self.validate_required_attribute((None,'y'), FloatWithNegative) self.validate_required_attribute((None,'xunits'), kmlunits) self.validate_required_attribute((None,'yunits'), kmlunits) class Region(validatorBase): def validate(self): if not "LatLonAltBox" in self.children: self.log(MissingElement({"parent":self.name, "element":"LatLonAltBox"})) def do_LatLonAltBox(self): return LatLonAltBox(), noduplicates() def do_Lod(self): return Lod(), noduplicates() class LatLonBox(validatorBase): def getExpectedAttrNames(self): return [(None, u'id')] def validate(self): if not "north" in self.children: self.log(MissingElement({"parent":self.name, "element":"north"})) if not "south" in self.children: self.log(MissingElement({"parent":self.name, "element":"south"})) if not "east" in self.children: self.log(MissingElement({"parent":self.name, "element":"east"})) if not "west" in self.children: self.log(MissingElement({"parent":self.name, "element":"west"})) def do_north(self): return latitude(),noduplicates() def do_south(self): return latitude(),noduplicates() def do_east(self): return longitude(),noduplicates() def do_west(self): return longitude(),noduplicates() def do_rotation(self): return longitude(),noduplicates() class LatLonAltBox(validatorBase, LatLonBox): def do_minAltitude(self): return Float(),noduplicates() def do_maxAltitude(self): return Float(), noduplicates() def do_altitudeMode(self): return altitudeMode(),noduplicates() class Lod(validatorBase): def do_minLodPixels(self): return Float(),noduplicates() def do_maxLodPixels(self): return Float(),noduplicates() def do_minFadeExtent(self): return Float(),noduplicates() def do_maxFadeExtent(self): return Float(),noduplicates() class Metadata(validatorBase): # TODO do smarter validation here def validate(self): return noduplicates() class Snippet(text): def validate(self): return nonhtml(),noduplicates() def getExpectedAttrNames(self): return [(None, u'maxLines')] class Folder(validatorBase, FeatureType, Container, Feature): def getExpectedAttrNames(self): return [(None, u'id')] def do_NetworkLink(self): return NetworkLink() def do_GroundOverlay(self): return GroundOverlay() def do_ScreenOverlay(self): return ScreenOverlay() class LookAt(validatorBase, LookAtType): def getExpectedAttrNames(self): return [(None, u'id')] class StyleMap(validatorBase): def validate(self): if not "Pair" in self.children: self.log(MissingElement({"parent":self.name, "element":"Pair"})) def getExpectedAttrNames(self): return [(None, u'id')] def do_Pair(self): return Pair() class Style(validatorBase): def prevalidate(self): self.validate_optional_attribute((None,'id'), unique('id',self.parent)) def getExpectedAttrNames(self): return [(None, u'id')] def do_LineStyle(self): return LineStyle(), noduplicates() def do_PolyStyle(self): return PolyStyle(), noduplicates() def do_IconStyle(self): return IconStyle(), noduplicates() def do_ListStyle(self): return ListStyle(), noduplicates() def do_LabelStyle(self): return LabelStyle(), noduplicates() def do_BalloonStyle(self): return BalloonStyle(), noduplicates() def do_scale(self): return Float(),noduplicates() def do_labelColor(self): return labelColor(),noduplicates() class IconStyle(validatorBase, ColorStyleType): def prevalidate(self): self.validate_optional_attribute((None,'id'), unique('id',self.parent)) def getExpectedAttrNames(self): return [(None, u'id')] def do_heading(self): return angle360(),noduplicates() def do_Icon(self): return Icon(),noduplicates() def do_scale(self): return Float(),noduplicates() def do_hotSpot(self): return overlayxy(), noduplicates() class Icon(validatorBase): def validate(self): if not 'href' in self.children: self.log(MissingElement({"parent":self.name, "element":"href"})) def do_href(self): # if not self.getFeedType() == TYPE_KML20 and self.startswith('root://'): # self.log(DeprecatedRootHref()) return text(),noduplicates() # would be url, but has these weird root:// def do_x(self): return noiconoffset() def do_y(self): return noiconoffset() def do_w(self): return noiconoffset() def do_h(self): return noiconoffset() def do_refreshInterval(self): return Float(), noduplicates() def do_refreshMode(self): return refreshMode(), noduplicates() def do_viewRefreshMode(self): return viewRefreshMode(), noduplicates() def do_viewRefreshTime(self): return Float(), noduplicates() def do_viewBoundScale(self): return Float(), noduplicates() class BalloonStyle(validatorBase): def prevalidate(self): self.validate_optional_attribute((None,'id'), unique('id',self.parent)) def getExpectedAttrNames(self): return [(None, u'id')] def do_textColor(self): return color(),noduplicates() def do_bgColor(self): return color(),noduplicates() def do_color(self): return color(),noduplicates() def do_text(self): return text(),noduplicates() class ListStyle(validatorBase): def prevalidate(self): self.validate_optional_attribute((None,'id'), unique('id',self.parent)) def getExpectedAttrNames(self): return [(None, u'id')] def do_bgColor(self): return color(),noduplicates() def do_ItemIcon(self): return ItemIcon() def do_listItemType(self): return listItemType(),noduplicates() def do_scale(self): return Float(),noduplicates() class ItemIcon(validatorBase): def validate(self): if not 'href' in self.children: self.log(MissingElement({"parent":self.name, "element":"href"})) def do_href(self): return text(),noduplicates() def do_state(self): return itemIconState(),noduplicates() class LabelStyle(validatorBase, ColorStyleType): def prevalidate(self): self.validate_optional_attribute((None,'id'), unique('id',self.parent)) def getExpectedAttrNames(self): return [(None, u'id')] def do_labelColor(self): return labelColor(),noduplicates() def do_scale(self): return Float(),noduplicates() class LineStyle(validatorBase, ColorStyleType): def prevalidate(self): self.validate_optional_attribute((None,'id'), unique('id',self.parent)) def getExpectedAttrNames(self): return [(None, u'id')] def do_width(self): return Float(),noduplicates() class PolyStyle(validatorBase, ColorStyleType): def prevalidate(self): self.validate_optional_attribute((None,'id'), unique('id',self.parent)) def getExpectedAttrNames(self): return [(None, u'id')] def do_fill(self): return zeroone(), noduplicates() def do_outline(self): return zeroone(), noduplicates() class Link(validatorBase, LinkType): def getExpectedAttrNames(self): return [(None, u'id')] class Pair(validatorBase): def validate(self): if not 'key' in self.children: self.log(MissingElement({"parent":self.name, "element":"key"})) if not 'styleUrl' in self.children: self.log(MissingElement({"parent":self.name, "element":"styleUrl"})) def do_key(self): return styleState(),noduplicates() def do_styleUrl(self): return text(),noduplicates() class Point(validatorBase, GeometryElements): def validate(self): if not "coordinates" in self.children: self.log(MissingElement({"parent":self.name, "element":"coordinates"})) def getExpectedAttrNames(self): return [(None, u'id')] def do_coordinates(self): return coordinates() class Model(validatorBase): # TODO seems to me that Location and Orientation ought to be required? def validate(self): if not "Link" in self.children: self.log(MissingElement({"parent":self.name, "element":"Link"})) def getExpectedAttrNames(self): return [(None, u'id')] def do_altitudeMode(self): return altitudeMode(), noduplicates() def do_Location(self): return Location(), noduplicates() def do_Orientation(self): return Orientation(), noduplicates() def do_Scale(self): return Scale(), noduplicates() def do_Link(self): return Link(), noduplicates() def do_ResourceMap(self): return ResourceMap(), noduplicates() class ResourceMap(validatorBase): def do_Alias(self): return Alias() class Alias(validatorBase): def do_targetHref(self): return text(),noduplicates() def do_sourceHref(self): return text(),noduplicates() class Location(validatorBase): # TODO they are loosely defined in the schema, but 0,0,0 makes no sense. def validate(self): if not "longitude" in self.children: self.log(MissingElement({"parent":self.name, "element":"longitude"})) if not "latitude" in self.children: self.log(MissingElement({"parent":self.name, "element":"latitude"})) if not "altitude" in self.children: self.log(MissingElement({"parent":self.name, "element":"altitude"})) def do_longitude(self): return longitude(), noduplicates() def do_latitude(self): return latitude(), noduplicates() def do_altitude(self): return FloatWithNegative(), noduplicates() class Scale(validatorBase): def do_x(self): return Float(), noduplicates() def do_y(self): return Float(), noduplicates() def do_z(self): return Float(), noduplicates() class Orientation(validatorBase): def do_heading(self): return angle360(), noduplicates() def do_tilt(self): return angle360(), noduplicates() def do_roll(self): return angle360(), noduplicates() class Polygon(validatorBase, GeometryElements): def validate(self): if not "outerBoundaryIs" in self.children: self.log(MissingElement({"parent":self.name, "element":"outerBoundaryIs"})) def getExpectedAttrNames(self): return [(None, u'id')] def do_outerBoundaryIs(self): return boundary(), noduplicates() def do_innerBoundaryIs(self): return boundary() class boundary(validatorBase): def validate(self): if not "LinearRing" in self.children: self.log(MissingElement({"parent":self.name, "element":"LinearRing"})) def do_LinearRing(self): return LinearRing() class LineString(validatorBase, GeometryElements): def validate(self): if not "coordinates" in self.children: self.log(MissingElement({"parent":self.name, "element":"coordinates"})) def getExpectedAttrNames(self): return [(None, u'id')] def do_coordinates(self): return coordinates(), noduplicates() class LinearRing(validatorBase, GeometryElements): def validate(self): if not "coordinates" in self.children: self.log(MissingElement({"parent":self.name, "element":"coordinates"})) def getExpectedAttrNames(self): return [(None, u'id')] def do_coordinates(self): return coordinates(), noduplicates() class TimeSpan(validatorBase): def getExpectedAttrNames(self): return [(None, u'id')] def do_begin(self): return w3cdtf(),noduplicates() def do_end(self): return w3cdtf(),noduplicates() class TimeStamp(validatorBase): def validate(self): if not "when" in self.children: self.log(MissingElement({"parent":self.name, "element":"when"})) def getExpectedAttrNames(self): return [(None, u'id')] def do_when(self): return w3cdtf(),noduplicates() class kmlunits(enumeration): error = InvalidKmlUnits valuelist = [ "fraction", "pixels", "insetPixels" ] class colorMode(enumeration): error = InvalidColorMode valuelist = [ "normal", "random" ] class refreshMode(enumeration): error = InvalidRefreshMode valuelist = [ "onChange", "onInterval", "onExpire" ] class viewRefreshMode(enumeration): error = InvalidViewRefreshMode valuelist = [ "never", "onRequest", "onStop", "onRegion" ] class styleState(enumeration): error = InvalidStyleState valuelist = [ "normal", "highlight" ] class listItemType(enumeration): error = InvalidListItemType valuelist = [ "radioFolder", "check", "checkHideChildren", "checkOffOnly" ] class itemIconState(enumeration): error = InvalidItemIconState valuelist = [ "open", "closed", "error", "fetching0", "fetching1", "fetching2", "open error", "closed error", "fetching0 error", "fetching1 error", "fetching2 error" ] class altitudeMode(enumeration): error = InvalidAltitudeMode valuelist = [ "clampToGround", "relativeToGround", "absolute" ] class SchemaFieldType(enumeration): error = InvalidSchemaFieldType valuelist = [ "string", "int", "uint", "short", "ushort", "float", "double","bool" ] # # Deprecated in 2.0 # class antialias(validatorBase): def prevalidate(self): self.log(Deprecated({"element":self.name, "replacement":"none"})) def validate(self): return zeroone(),noduplicates() class View(validatorBase, LookAtType): def prevalidate(self): self.log(Deprecated({"element":self.name, "replacement":"LookAt"})) def getExpectedAttrNames(self): return [(None, u'id')] # # Deprecated in 2.1 # class labelColor(text): def prevalidate(self): if not self.getFeedType() == TYPE_KML20: self.log(Deprecated({"element":self.name, "replacement":"LabelStyle"})) def validate(self): if not re.match("([a-f]|[A-F]|[0-9]){8}",self.value): return self.log(InvalidColor({'value':self.value})) class geomColor(text): def prevalidate(self): if not self.getFeedType() == TYPE_KML20: self.log(Deprecated({"element":self.name, "replacement":"color"})) def validate(self): if not re.match("([a-f]|[A-F]|[0-9]){8}",self.value): return self.log(InvalidColor({'value':self.value})) class geomScale(text): def prevalidate(self): if not self.getFeedType() == TYPE_KML20: self.log(Deprecated({"element":self.name, "replacement":"scale"})) def validate(self): return Float() class GeometryCollection(validatorBase, Geometry): def prevalidate(self): if not self.getFeedType() == TYPE_KML20: self.log(Deprecated({"element":self.name, "replacement":"MultiGeometry"})) def getExpectedAttrNames(self): return [(None, u'id')] class Url(validatorBase, LinkType): def prevalidate(self): if not self.getFeedType() == TYPE_KML20: self.log(Deprecated({"element":self.name, "replacement":"Link"})) class refreshVisibility(validatorBase): def prevalidate(self): if not self.getFeedType() == TYPE_KML20: self.log(Deprecated({"element":self.name, "replacement":"Update"})) def validate(self): return zeroone, noduplicates() # In theory, the spec also supports things like .2 if unit is fractions. ugh. class noiconoffset(text): def validate(self): if not self.getFeedType() == TYPE_KML20: self.log(Deprecated({"element":self.name, "replacement":"Icon"})) return Integer(), noduplicates() # # Validators # class zeroone(text): def normalizeWhitespace(self): pass def validate(self): if not self.value.lower() in ['0','1']: self.log(InvalidZeroOne({"parent":self.parent.name, "element":self.name,"value":self.value})) class color(text): def validate(self): if not re.match("^([a-f]|[A-F]|[0-9]){8}$",self.value): return self.log(InvalidColor({'value':self.value})) class coordinates(text): def validate(self): values = self.value.strip().split() for value in values: # ensure that commas are only used to separate lat and long (and alt) if not re.match('^[-+.0-9]+,[-+.0-9]+(,[-+.0-9]+)?$', value.strip()): return self.log(InvalidKmlCoordList({'value':self.value})) # Now validate individual coordinates point = value.split(','); # First coordinate is longitude try: lon = float(point[0].strip()) if lon > 180 or lon < -180: raise ValueError else: self.log(ValidLongitude({"parent":self.parent.name, "element":self.name, "value":lon})) except ValueError: self.log(InvalidKmlLongitude({"parent":self.parent.name, "element":self.name, "value":lon})) # Second coordinate is latitude try: lat = float(point[1].strip()) if lat > 90 or lat < -90: raise ValueError else: self.log(ValidLatitude({"parent":self.parent.name, "element":self.name, "value":lat})) except ValueError: self.log(InvalidKmlLatitude({"parent":self.parent.name, "element":self.name, "value":lat})) # Third coordinate value (altitude) has to be float, if present if len(point) == 3: if not re.match('\d+\.?\d*$', point[2]): self.log(InvalidFloat({"attr":self.name, "value":point[2]})) class angle360(text): def validate(self): try: angle = float(self.value) if angle > 360 or angle < -360: raise ValueError else: self.log(ValidAngle({"parent":self.parent.name, "element":self.name, "value":self.value})) except ValueError: self.log(InvalidAngle({"parent":self.parent.name, "element":self.name, "value":self.value})) class FloatWithNegative(text): def validate(self, name=None): if not re.match('-?\d+\.?\d*$', self.value): self.log(InvalidFloat({"attr":name or self.name, "value":self.value})) python-feedvalidator-0~svn1022/feedvalidator/item.py0000644000175000017500000002452111021151354021111 0ustar poxpox"""$Id: item.py 1019 2008-06-03 05:13:16Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 1019 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from validators import * from logging import * from itunes import itunes_item from extension import * # # item element. # class item(validatorBase, extension_item, itunes_item): def validate(self): if (not "title" in self.children) and (not "description" in self.children): self.log(ItemMustContainTitleOrDescription({})) if not "guid" in self.children: if self.getFeedType() == TYPE_RSS2: rss = self.parent.parent while rss and rss.name!='rss': rss=rss.parent if rss.version.startswith("2."): self.log(MissingGuid({"parent":self.name, "element":"guid"})) if "slash_comments" in self.children: if "lastBuildDate" not in self.parent.children and self.getFeedType()==TYPE_RSS2: self.log(SlashDate({})) if self.itunes: itunes_item.validate(self) def do_link(self): return rfc2396_full(), noduplicates() def do_title(self): return nonhtml(), nonblank(), noduplicates() def do_description(self): if self.getFeedType() == TYPE_RSS2: rss = self.parent.parent while rss and rss.name!='rss': rss=rss.parent if rss.version == "0.91": return nonhtml(), noduplicates() return safeHtml(), noduplicates() def do_content_encoded(self): if self.getFeedType() == TYPE_RSS2: if not 'description' in self.children: self.log(NeedDescriptionBeforeContent({})) return safeHtml(), noduplicates() def do_content_items(self): return ContentItems(), noduplicates() def do_xhtml_body(self): if self.getFeedType() == TYPE_RSS2: self.log(DuplicateDescriptionSemantics({"element":"xhtml:body"})) return htmlEater().setElement('xhtml:body',{},self) def do_atom_id(self): if "guid" in self.children: self.log(DuplicateItemSemantics({"core":"guid", "ext":"atom:id"})) return rfc2396_full(), noduplicates(), unique('atom_id',self.parent) def do_atom_link(self): from link import link return link() def do_atom_title(self): from content import content return content(), noduplicates() def do_atom_summary(self): from content import textConstruct return textConstruct(), noduplicates() def do_atom_author(self): from author import author return author(), noduplicates() def do_atom_contributor(self): from author import author return author() def do_atom_content(self): from content import content return content() def do_atom_published(self): if "published" in self.children: self.log(DuplicateItemSemantics({"core":"pubDate", "ext":"atom:published"})) return rfc3339(), noduplicates() def do_atom_updated(self): return rfc3339(), noduplicates() def do_dc_creator(self): if self.child.find('.')<0 and "author" in self.children: self.log(DuplicateItemSemantics({"core":"author", "ext":"dc:creator"})) return text() # duplicates allowed def do_dc_subject(self): if self.child.find('.')<0 and "category" in self.children: self.log(DuplicateItemSemantics({"core":"category", "ext":"dc:subject"})) return text() # duplicates allowed def do_dc_date(self): if self.child.find('.')<0 and "pubDate" in self.children: self.log(DuplicateItemSemantics({"core":"pubDate", "ext":"dc:date"})) return w3cdtf() def do_cc_license(self): if "creativeCommons_license" in self.children: self.log(DuplicateItemSemantics({"core":"creativeCommons:license", "ext":"cc:license"})) return eater() def do_creativeCommons_license(self): if "cc_license" in self.children: self.log(DuplicateItemSemantics({"core":"creativeCommons:license", "ext":"cc:license"})) return rfc2396_full() class rss20Item(item, extension_rss20_item): def do_comments(self): return rfc2396_full(), noduplicates() def do_enclosure(self): return enclosure(), noduplicates(DuplicateEnclosure) def do_pubDate(self): if "dc_date" in self.children: self.log(DuplicateItemSemantics({"core":"pubDate", "ext":"dc:date"})) if "atom_published" in self.children: self.log(DuplicateItemSemantics({"core":"pubDate", "ext":"atom:published"})) return rfc822(), noduplicates() def do_author(self): if "dc_creator" in self.children: self.log(DuplicateItemSemantics({"core":"author", "ext":"dc:creator"})) return email_with_name(), noduplicates() def do_category(self): if "dc_subject" in self.children: self.log(DuplicateItemSemantics({"core":"category", "ext":"dc:subject"})) return category(), nonblank() def do_guid(self): if "atom_id" in self.children: self.log(DuplicateItemSemantics({"core":"guid", "ext":"atom:id"})) return guid(), noduplicates(), unique('guid',self.parent) def do_source(self): if "dc_source" in self.children: self.log(DuplicateItemSemantics({"core":"source", "ext":"dc:source"})) return source(), noduplicates() class rss10Item(item, extension_rss10_item): def validate(self): if not "link" in self.children: self.log(MissingElement({"parent":self.name, "element":"link"})) if not "title" in self.children: self.log(MissingElement({"parent":self.name, "element":"title"})) def getExpectedAttrNames(self): return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'about')] def do_rdfs_label(self): return text() def do_rdfs_comment(self): return text() def prevalidate(self): if self.attrs.has_key((rdfNS,"about")): about = self.attrs[(rdfNS,"about")] if not "abouts" in self.dispatcher.__dict__: self.dispatcher.__dict__["abouts"] = [] if about in self.dispatcher.__dict__["abouts"]: self.log(DuplicateValue({"parent":self.name, "element":"rdf:about", "value":about})) else: self.dispatcher.__dict__["abouts"].append(about) # # items element. # class items(validatorBase): from root import rss11_namespace as rss11_ns def getExpectedAttrNames(self): return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'parseType')] def do_item(self): if self.rss11_ns not in self.dispatcher.defaultNamespaces: self.log(UndefinedElement({"element":"item","parent":"items"})) return rss10Item() def do_rdf_Seq(self): if self.rss11_ns in self.dispatcher.defaultNamespaces: self.log(UndefinedElement({"element":"rdf:Seq","parent":"items"})) return rdfSeq() class rdfSeq(validatorBase): def do_rdf_li(self): return rdfLi() class rdfLi(validatorBase): def getExpectedAttrNames(self): return [(None,u'resource'), (u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'resource')] class category(nonhtml): def getExpectedAttrNames(self): return [(None, u'domain')] class source(nonhtml): def getExpectedAttrNames(self): return [(None, u'url')] def prevalidate(self): self.validate_required_attribute((None,'url'), rfc2396_full) return text.prevalidate(self) class enclosure(validatorBase): from validators import mime_re def getExpectedAttrNames(self): return [(None, u'url'), (None, u'length'), (None, u'type')] def prevalidate(self): try: if int(self.attrs.getValue((None, 'length'))) < 0: if int(self.attrs.getValue((None, 'length'))) == -1: self.log(UseZeroForUnknown({"parent":self.name, "element":'length'})) else: self.log(InvalidNonNegativeInteger({"parent":self.name, "element":'length'})) else: self.log(ValidIntegerAttribute({"parent":self.parent.name, "element":self.name, "attr":'length'})) except KeyError: self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":'length'})) except ValueError: self.log(InvalidIntegerAttribute({"parent":self.parent.name, "element":self.name, "attr":'length'})) try: if not self.mime_re.match(self.attrs.getValue((None, 'type'))): self.log(InvalidMIMEAttribute({"parent":self.parent.name, "element":self.name, "attr":'type'})) else: self.log(ValidMIMEAttribute({"parent":self.parent.name, "element":self.name, "attr":'type'})) except KeyError: self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":'type'})) self.validate_required_attribute((None,'url'), httpURL) if self.attrs.has_key((None,u"url")): if hasattr(self.parent,'setEnclosure'): self.parent.setEnclosure(self.attrs.getValue((None, 'url'))) return validatorBase.prevalidate(self) class guid(rfc2396_full, noduplicates): def getExpectedAttrNames(self): return [(None, u'isPermaLink')] def validate(self): isPermalink = 1 try: isPermalinkStr = self.attrs.getValue((None, 'isPermaLink')) if isPermalinkStr not in ('true', 'false'): self.log(InvalidBooleanAttribute({"parent":self.parent.name, "element":self.name, "attr":"isPermaLink"})) else: self.log(ValidBooleanAttribute({"parent":self.parent.name, "element":self.name, "attr":"isPermaLink"})) isPermalink = (isPermalinkStr == 'true') except KeyError: pass if isPermalink: if not(rfc2396.validate(self, InvalidHttpGUID, ValidHttpGUID)): return 0 else: lu = self.value.lower() if lu.startswith("tag:") or lu.startswith("urn:uuid:"): self.log(InvalidPermalink({"parent":self.parent.name, "element":self.name})) return 0 else: return 1 elif len(self.value)<9 and self.value.isdigit(): self.log(NotSufficientlyUnique({"parent":self.parent.name, "element":self.name, "value":self.value})) return noduplicates.validate(self) else: self.log(ValidHttpGUID({"parent":self.parent.name, "element":self.name})) return noduplicates.validate(self) class ContentItems(validatorBase): def do_rdf_Bag(self): return ContentBag(), noduplicates() class ContentBag(validatorBase): def do_rdf_li(self): return ContentLi() class ContentLi(validatorBase): def do_content_item(self): return ContentItem() class ContentItem(validatorBase): def do_content_format(self): return rdfResourceURI(), noduplicates() def do_content_encoding(self): return rdfResourceURI(), noduplicates() def do_rdf_value(self): return text(), noduplicates() python-feedvalidator-0~svn1022/feedvalidator/timeoutsocket.py0000644000175000017500000003061210403213716023054 0ustar poxpox#$Id #### # Copyright 2000,2001 by Timothy O'Malley # # All Rights Reserved # # Permission to use, copy, modify, and distribute this software # and its documentation for any purpose and without fee is hereby # granted, provided that the above copyright notice appear in all # copies and that both that copyright notice and this permission # notice appear in supporting documentation, and that the name of # Timothy O'Malley not be used in advertising or publicity # pertaining to distribution of the software without specific, written # prior permission. # # Timothy O'Malley DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS # SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY # AND FITNESS, IN NO EVENT SHALL Timothy O'Malley BE LIABLE FOR # ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES # WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR # PERFORMANCE OF THIS SOFTWARE. # #### """Timeout Socket This module enables a timeout mechanism on all TCP connections. It does this by inserting a shim into the socket module. After this module has been imported, all socket creation goes through this shim. As a result, every TCP connection will support a timeout. The beauty of this method is that it immediately and transparently enables the entire python library to support timeouts on TCP sockets. As an example, if you wanted to SMTP connections to have a 20 second timeout: import timeoutsocket import smtplib timeoutsocket.setDefaultSocketTimeout(20) The timeout applies to the socket functions that normally block on execution: read, write, connect, and accept. If any of these operations exceeds the specified timeout, the exception Timeout will be raised. The default timeout value is set to None. As a result, importing this module does not change the default behavior of a socket. The timeout mechanism only activates when the timeout has been set to a numeric value. (This behavior mimics the behavior of the select.select() function.) This module implements two classes: TimeoutSocket and TimeoutFile. The TimeoutSocket class defines a socket-like object that attempts to avoid the condition where a socket may block indefinitely. The TimeoutSocket class raises a Timeout exception whenever the current operation delays too long. The TimeoutFile class defines a file-like object that uses the TimeoutSocket class. When the makefile() method of TimeoutSocket is called, it returns an instance of a TimeoutFile. Each of these objects adds two methods to manage the timeout value: get_timeout() --> returns the timeout of the socket or file set_timeout() --> sets the timeout of the socket or file As an example, one might use the timeout feature to create httplib connections that will timeout after 30 seconds: import timeoutsocket import httplib H = httplib.HTTP("www.python.org") H.sock.set_timeout(30) Note: When used in this manner, the connect() routine may still block because it happens before the timeout is set. To avoid this, use the 'timeoutsocket.setDefaultSocketTimeout()' function. Good Luck! """ __version__ = "$Revision: 511 $" __author__ = "Timothy O'Malley " # # Imports # import select, string import socket if not hasattr(socket, "_no_timeoutsocket"): _socket = socket.socket else: _socket = socket._no_timeoutsocket # # Set up constants to test for Connected and Blocking operations. # We delete 'os' and 'errno' to keep our namespace clean(er). # Thanks to Alex Martelli and G. Li for the Windows error codes. # import os if os.name == "nt": _IsConnected = ( 10022, 10056 ) _ConnectBusy = ( 10035, ) _AcceptBusy = ( 10035, ) else: import errno _IsConnected = ( errno.EISCONN, ) _ConnectBusy = ( errno.EINPROGRESS, errno.EALREADY, errno.EWOULDBLOCK ) _AcceptBusy = ( errno.EAGAIN, errno.EWOULDBLOCK ) del errno del os # # Default timeout value for ALL TimeoutSockets # _DefaultTimeout = None def setDefaultSocketTimeout(timeout): global _DefaultTimeout _DefaultTimeout = timeout def getDefaultSocketTimeout(): return _DefaultTimeout # # Exceptions for socket errors and timeouts # Error = socket.error class Timeout(Exception): pass # # Factory function # from socket import AF_INET, SOCK_STREAM def timeoutsocket(family=AF_INET, type=SOCK_STREAM, proto=None): if family != AF_INET or type != SOCK_STREAM: if proto: return _socket(family, type, proto) else: return _socket(family, type) return TimeoutSocket( _socket(family, type), _DefaultTimeout ) # end timeoutsocket # # The TimeoutSocket class definition # class TimeoutSocket: """TimeoutSocket object Implements a socket-like object that raises Timeout whenever an operation takes too long. The definition of 'too long' can be changed using the set_timeout() method. """ _copies = 0 _blocking = 1 def __init__(self, sock, timeout): self._sock = sock self._timeout = timeout # end __init__ def __getattr__(self, key): return getattr(self._sock, key) # end __getattr__ def get_timeout(self): return self._timeout # end set_timeout def set_timeout(self, timeout=None): self._timeout = timeout # end set_timeout def setblocking(self, blocking): self._blocking = blocking return self._sock.setblocking(blocking) # end set_timeout def connect_ex(self, addr): errcode = 0 try: self.connect(addr) except Error, why: errcode = why[0] return errcode # end connect_ex def connect(self, addr, port=None, dumbhack=None): # In case we were called as connect(host, port) if port != None: addr = (addr, port) # Shortcuts sock = self._sock timeout = self._timeout blocking = self._blocking # First, make a non-blocking call to connect try: sock.setblocking(0) sock.connect(addr) sock.setblocking(blocking) return except Error, why: # Set the socket's blocking mode back sock.setblocking(blocking) # If we are not blocking, re-raise if not blocking: raise # If we are already connected, then return success. # If we got a genuine error, re-raise it. errcode = why[0] if dumbhack and errcode in _IsConnected: return elif errcode not in _ConnectBusy: raise # Now, wait for the connect to happen # ONLY if dumbhack indicates this is pass number one. # If select raises an error, we pass it on. # Is this the right behavior? if not dumbhack: r,w,e = select.select([], [sock], [], timeout) if w: return self.connect(addr, dumbhack=1) # If we get here, then we should raise Timeout raise Timeout("Attempted connect to %s timed out." % str(addr) ) # end connect def accept(self, dumbhack=None): # Shortcuts sock = self._sock timeout = self._timeout blocking = self._blocking # First, make a non-blocking call to accept # If we get a valid result, then convert the # accept'ed socket into a TimeoutSocket. # Be carefult about the blocking mode of ourselves. try: sock.setblocking(0) newsock, addr = sock.accept() sock.setblocking(blocking) timeoutnewsock = self.__class__(newsock, timeout) timeoutnewsock.setblocking(blocking) return (timeoutnewsock, addr) except Error, why: # Set the socket's blocking mode back sock.setblocking(blocking) # If we are not supposed to block, then re-raise if not blocking: raise # If we got a genuine error, re-raise it. errcode = why[0] if errcode not in _AcceptBusy: raise # Now, wait for the accept to happen # ONLY if dumbhack indicates this is pass number one. # If select raises an error, we pass it on. # Is this the right behavior? if not dumbhack: r,w,e = select.select([sock], [], [], timeout) if r: return self.accept(dumbhack=1) # If we get here, then we should raise Timeout raise Timeout("Attempted accept timed out.") # end accept def send(self, data, flags=0): sock = self._sock if self._blocking: r,w,e = select.select([],[sock],[], self._timeout) if not w: raise Timeout("Send timed out") return sock.send(data, flags) # end send def recv(self, bufsize, flags=0): sock = self._sock if self._blocking: r,w,e = select.select([sock], [], [], self._timeout) if not r: raise Timeout("Recv timed out") return sock.recv(bufsize, flags) # end recv def makefile(self, flags="r", bufsize=-1): self._copies = self._copies +1 return TimeoutFile(self, flags, bufsize) # end makefile def close(self): if self._copies <= 0: self._sock.close() else: self._copies = self._copies -1 # end close # end TimeoutSocket class TimeoutFile: """TimeoutFile object Implements a file-like object on top of TimeoutSocket. """ def __init__(self, sock, mode="r", bufsize=4096): self._sock = sock self._bufsize = 4096 if bufsize > 0: self._bufsize = bufsize if not hasattr(sock, "_inqueue"): self._sock._inqueue = "" # end __init__ def __getattr__(self, key): return getattr(self._sock, key) # end __getattr__ def close(self): self._sock.close() self._sock = None # end close def write(self, data): self.send(data) # end write def read(self, size=-1): _sock = self._sock _bufsize = self._bufsize while 1: datalen = len(_sock._inqueue) if datalen >= size >= 0: break bufsize = _bufsize if size > 0: bufsize = min(bufsize, size - datalen ) buf = self.recv(bufsize) if not buf: break _sock._inqueue = _sock._inqueue + buf data = _sock._inqueue _sock._inqueue = "" if size > 0 and datalen > size: _sock._inqueue = data[size:] data = data[:size] return data # end read def readline(self, size=-1): _sock = self._sock _bufsize = self._bufsize while 1: idx = string.find(_sock._inqueue, "\n") if idx >= 0: break datalen = len(_sock._inqueue) if datalen >= size >= 0: break bufsize = _bufsize if size > 0: bufsize = min(bufsize, size - datalen ) buf = self.recv(bufsize) if not buf: break _sock._inqueue = _sock._inqueue + buf data = _sock._inqueue _sock._inqueue = "" if idx >= 0: idx = idx + 1 _sock._inqueue = data[idx:] data = data[:idx] elif size > 0 and datalen > size: _sock._inqueue = data[size:] data = data[:size] return data # end readline def readlines(self, sizehint=-1): result = [] data = self.read() while data: idx = string.find(data, "\n") if idx >= 0: idx = idx + 1 result.append( data[:idx] ) data = data[idx:] else: result.append( data ) data = "" return result # end readlines def flush(self): pass # end TimeoutFile # # Silently replace the socket() builtin function with # our timeoutsocket() definition. # if not hasattr(socket, "_no_timeoutsocket"): socket._no_timeoutsocket = socket.socket socket.socket = timeoutsocket del socket socket = timeoutsocket # Finis python-feedvalidator-0~svn1022/feedvalidator/uri.py0000644000175000017500000001145010766017570020767 0ustar poxpox"""$Id: uri.py 988 2008-03-12 18:22:48Z sa3ruby $""" """ Code to test URI references for validity, and give their normalized form, according to RFC 3986. """ __author__ = "Joseph Walton " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2004, 2007 Joseph Walton" from urlparse import urljoin from urllib import quote, quote_plus, unquote, unquote_plus from unicodedata import normalize from codecs import lookup import re (enc, dec) = lookup('UTF-8')[:2] SUBDELIMS='!$&\'()*+,;=' PCHAR='-._~' + SUBDELIMS + ':@' GENDELIMS=':/?#[]@' RESERVED=GENDELIMS + SUBDELIMS default_port = { 'ftp': 21, 'telnet': 23, 'http': 80, 'gopher': 70, 'news': 119, 'nntp': 119, 'prospero': 191, 'https': 443, 'snews': 563, 'snntp': 563, } class BadUri(Exception): pass def _n(s): return enc(normalize('NFC', dec(s)[0]))[0] octetRe = re.compile('([^%]|%[a-fA-F0-9]{2})') def asOctets(s): while (s): m = octetRe.match(s) if not(m): raise BadUri() c = m.group(1) if (c[0] == '%'): yield(c.upper(), chr(int(c[1:], 0x10))) else: yield(c, c) s = s[m.end(1):] def _qnu(s,safe=''): if s == None: return None # unquote{,_plus} leave high-bit octets unconverted in Unicode strings # This conversion will, correctly, cause UnicodeEncodeError if there are # non-ASCII characters present in the string s = str(s) res = '' b = '' for (c,x) in asOctets(s): if x in RESERVED and x in safe: res += quote(_n(unquote(b)), safe) b = '' res += c else: b += x res += quote(_n(unquote(b)), safe) return res # Match an optional port specification portRe = re.compile(':(\d*)$') def _normPort(netloc,defPort): nl = netloc.lower() p = defPort m = portRe.search(nl) if m: if m.group(1) != '': p = int(m.group(1)) nl = nl[:m.start(1) - 1] if nl and nl[-1] == '.' and nl.rfind('.', 0, -2) >= 0: nl = nl[:-1] # Square brackets are allowed, and only allowed, delimiting IPv6 addresses if nl.startswith('[') != nl.endswith(']'): raise BadUri() if p != defPort: nl = nl + ':' + str(p) return nl def _normAuth(auth,port): i = auth.rfind('@') if i >= 0: c = auth[:i] if c == ':': c = '' h = auth[i + 1:] else: c = None h = auth if c: return c + '@' + _normPort(h,port) else: return _normPort(h,port) def _normPath(p): l = p.split(u'/') i = 0 if l and l[0]: i = len(l) while i < len(l): c = l[i] if (c == '.'): if i < len(l) - 1: del l[i] else: l[i] = '' elif (c == '..'): if i < len(l) - 1: del l[i] else: l[i] = '' if i > 1 or (i > 0 and l[0]): i -= 1 del l[i] else: i += 1 if l == ['']: l = ['', ''] return u'/'.join([_qnu(c, PCHAR) for c in l]) # From RFC 2396bis, with added end-of-string marker uriRe = re.compile('^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?$') def _canonical(s): m = uriRe.match(s) if not(m): raise BadUri() # Check for a relative URI if m.group(2) is None: scheme = None else: scheme = m.group(2).lower() if m.group(4) is None: authority = None p = m.group(5) # Don't try to normalise URI references with relative paths if scheme is None and not p.startswith('/'): return None if scheme == 'mailto': # XXX From RFC 2368, mailto equivalence needs to be subtler than this i = p.find('@') if i > 0: j = p.find('?') if j < 0: j = len(p) p = _qnu(p[:i]) + '@' + _qnu(p[i + 1:].lower()) + _qnu(p[j:]) path = p else: if scheme is None or p.startswith('/'): path = _normPath(p) else: path = _qnu(p, PCHAR + '/') else: a = m.group(4) p = m.group(5) if scheme in default_port: a = _normAuth(a, default_port[scheme]) else: a = _normAuth(a, None) authority = a path = _normPath(p) query = _qnu(m.group(7), PCHAR + "/?") fragment = _qnu(m.group(9), PCHAR + "/?") s = u'' if scheme != None: s += scheme + ':' if authority != None: s += '//' + authority s += path if query != None: s += '?' + query if fragment != None: s += '#' + fragment return s class Uri: """A Uri wraps a string and performs equality testing according to the rules for URI equivalence. """ def __init__(self,s): self.s = s self.n = _canonical(s) def __str__(self): return self.s def __repr__(self): return repr(self.s) def __eq__(self, a): return self.n == a.n def canonicalForm(u): """Give the canonical form for a URI, so char-by-char comparisons become valid tests for equivalence.""" try: return _canonical(u) except BadUri: return None except UnicodeError: return None python-feedvalidator-0~svn1022/feedvalidator/xrd.py0000644000175000017500000000113210547240711020751 0ustar poxpoxfrom base import validatorBase from validators import * class xrds(validatorBase): def do_xrd_XRD(self): return xrd() class xrd(validatorBase): def do_xrd_Service(self): return service() class service(validatorBase): def getExpectedAttrNames(self): return [(None,'priority')] def prevalidate(self): self.validate_optional_attribute((None,'priority'), nonNegativeInteger) def do_xrd_Type(self): return xrdtype() def do_xrd_URI(self): return xrdtype() def do_openid_Delegate(self): return delegate() xrdtype = rfc3987 URI = rfc3987 delegate = rfc3987 python-feedvalidator-0~svn1022/feedvalidator/opml.py0000644000175000017500000001271210766017570021141 0ustar poxpox"""$Id: opml.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from validators import * from logging import * from extension import extension_everywhere import re # # Outline Processor Markup Language element. # class opml(validatorBase, extension_everywhere): versionList = ['1.0', '1.1', '2.0'] def validate(self): self.setFeedType(TYPE_OPML) if (None,'version') in self.attrs.getNames(): if self.attrs[(None,'version')] not in opml.versionList: self.log(InvalidOPMLVersion({"parent":self.parent.name, "element":self.name, "value":self.attrs[(None,'version')]})) elif self.name != 'outlineDocument': self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"version"})) if 'head' not in self.children: self.log(MissingElement({"parent":self.name, "element":"head"})) if 'body' not in self.children: self.log(MissingElement({"parent":self.name, "element":"body"})) def getExpectedAttrNames(self): return [(None, u'version')] def do_head(self): return opmlHead() def do_body(self): return opmlBody() class opmlHead(validatorBase, extension_everywhere): def do_title(self): return safeHtml(), noduplicates() def do_dateCreated(self): return rfc822(), noduplicates() def do_dateModified(self): return rfc822(), noduplicates() def do_ownerName(self): return safeHtml(), noduplicates() def do_ownerEmail(self): return email(), noduplicates() def do_ownerId(self): return httpURL(), noduplicates() def do_expansionState(self): return commaSeparatedLines(), noduplicates() def do_vertScrollState(self): return positiveInteger(), nonblank(), noduplicates() def do_windowTop(self): return positiveInteger(), nonblank(), noduplicates() def do_windowLeft(self): return positiveInteger(), nonblank(), noduplicates() def do_windowBottom(self): return positiveInteger(), nonblank(), noduplicates() def do_windowRight(self): return positiveInteger(), nonblank(), noduplicates() class commaSeparatedLines(text): linenumbers_re=re.compile('^(\d+(,\s*\d+)*)?$') def validate(self): if not self.linenumbers_re.match(self.value): self.log(InvalidExpansionState({"parent":self.parent.name, "element":self.name, "value":self.value})) class opmlBody(validatorBase, extension_everywhere): def validate(self): if 'outline' not in self.children: self.log(MissingElement({"parent":self.name, "element":"outline"})) def do_outline(self): return opmlOutline() class opmlOutline(validatorBase, extension_everywhere): versionList = ['RSS', 'RSS1', 'RSS2', 'scriptingNews'] def getExpectedAttrNames(self): return [ (None, u'category'), (None, u'created'), (None, u'description'), (None, u'htmlUrl'), (None, u'isBreakpoint'), (None, u'isComment'), (None, u'language'), (None, u'text'), (None, u'title'), (None, u'type'), (None, u'url'), (None, u'version'), (None, u'xmlUrl'), ] def validate(self): if not (None,'text') in self.attrs.getNames(): self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"text"})) if (None,'type') in self.attrs.getNames(): if self.attrs[(None,'type')].lower() == 'rss': if not (None,'xmlUrl') in self.attrs.getNames(): self.log(MissingXmlURL({"parent":self.parent.name, "element":self.name})) if not (None,'title') in self.attrs.getNames(): self.log(MissingTitleAttr({"parent":self.parent.name, "element":self.name})) elif self.attrs[(None,'type')].lower() == 'link': if not (None,'url') in self.attrs.getNames(): self.log(MissingUrlAttr({"parent":self.parent.name, "element":self.name})) else: self.log(InvalidOutlineType({"parent":self.parent.name, "element":self.name, "value":self.attrs[(None,'type')]})) if (None,'version') in self.attrs.getNames(): if self.attrs[(None,'version')] not in opmlOutline.versionList: self.log(InvalidOutlineVersion({"parent":self.parent.name, "element":self.name, "value":self.attrs[(None,'version')]})) if len(self.attrs)>1 and not (None,u'type') in self.attrs.getNames(): for name in u'description htmlUrl language title version xmlUrl'.split(): if (None, name) in self.attrs.getNames(): self.log(MissingOutlineType({"parent":self.parent.name, "element":self.name})) break self.validate_optional_attribute((None,'created'), rfc822) self.validate_optional_attribute((None,'description'), safeHtml) self.validate_optional_attribute((None,'htmlUrl'), rfc2396_full) self.validate_optional_attribute((None,'isBreakpoint'), truefalse) self.validate_optional_attribute((None,'isComment'), truefalse) self.validate_optional_attribute((None,'language'), iso639) self.validate_optional_attribute((None,'title'), safeHtml) self.validate_optional_attribute((None,'text'), safeHtml) self.validate_optional_attribute((None,'url'), rfc2396_full) def characters(self, string): if not self.value: if string.strip(): self.log(UnexpectedText({"element":self.name,"parent":self.parent.name})) self.value = string def do_outline(self): return opmlOutline() python-feedvalidator-0~svn1022/feedvalidator/extension.py0000644000175000017500000010465411020273026022175 0ustar poxpox"""$Id: extension.py 1018 2008-05-31 16:00:22Z sa3ruby $""" __author__ = "Sam Ruby , Mark Pilgrim and Phil Ringnalda " __version__ = "$Revision: 1018 $" __copyright__ = "Copyright (c) 2002 Sam Ruby, Mark Pilgrim and Phil Ringnalda" from validators import * from logging import * ######################################################################## # Extensions that are valid everywhere # ######################################################################## class extension_everywhere: def do_dc_title(self): return text(), noduplicates() def do_dc_description(self): return text(), noduplicates() def do_dc_publisher(self): if "webMaster" in self.children: self.log(DuplicateSemantics({"core":"webMaster", "ext":"dc:publisher"})) return text() # duplicates allowed def do_dc_contributor(self): return text() # duplicates allowed def do_dc_type(self): return text(), noduplicates() def do_dc_format(self): return text(), noduplicates() def do_dc_identifier(self): return text() def do_dc_source(self): if "source" in self.children: self.log(DuplicateItemSemantics({"core":"source", "ext":"dc:source"})) return text(), noduplicates() def do_dc_language(self): if "language" in self.children: self.log(DuplicateSemantics({"core":"language", "ext":"dc:language"})) return iso639(), noduplicates() def do_dc_relation(self): return text(), # duplicates allowed def do_dc_coverage(self): return text(), # duplicates allowed def do_dc_rights(self): if "copyright" in self.children: self.log(DuplicateSemantics({"core":"copyright", "ext":"dc:rights"})) return nonhtml(), noduplicates() def do_dcterms_alternative(self): return text() #duplicates allowed def do_dcterms_abstract(self): return text(), noduplicates() def do_dcterms_tableOfContents(self): return rdfResourceURI(), noduplicates() def do_dcterms_created(self): return w3cdtf(), noduplicates() def do_dcterms_valid(self): return eater() def do_dcterms_available(self): return eater() def do_dcterms_issued(self): return w3cdtf(), noduplicates() def do_dcterms_modified(self): if "lastBuildDate" in self.children: self.log(DuplicateSemantics({"core":"lastBuildDate", "ext":"dcterms:modified"})) return w3cdtf(), noduplicates() def do_dcterms_dateAccepted(self): return text(), noduplicates() def do_dcterms_dateCopyrighted(self): return text(), noduplicates() def do_dcterms_dateSubmitted(self): return text(), noduplicates() def do_dcterms_extent(self): return positiveInteger(), nonblank(), noduplicates() # def do_dcterms_medium(self): # spec defines it as something that should never be used # undefined element'll do for now def do_dcterms_isVersionOf(self): return rdfResourceURI() # duplicates allowed def do_dcterms_hasVersion(self): return rdfResourceURI() # duplicates allowed def do_dcterms_isReplacedBy(self): return rdfResourceURI() # duplicates allowed def do_dcterms_replaces(self): return rdfResourceURI() # duplicates allowed def do_dcterms_isRequiredBy(self): return rdfResourceURI() # duplicates allowed def do_dcterms_requires(self): return rdfResourceURI() # duplicates allowed def do_dcterms_isPartOf(self): return rdfResourceURI() # duplicates allowed def do_dcterms_hasPart(self): return rdfResourceURI() # duplicates allowed def do_dcterms_isReferencedBy(self): return rdfResourceURI() # duplicates allowed def do_dcterms_references(self): return rdfResourceURI() # duplicates allowed def do_dcterms_isFormatOf(self): return rdfResourceURI() # duplicates allowed def do_dcterms_hasFormat(self): return rdfResourceURI() # duplicates allowed def do_dcterms_conformsTo(self): return rdfResourceURI() # duplicates allowed def do_dcterms_spatial(self): return eater() def do_dcterms_temporal(self): return eater() def do_dcterms_audience(self): return text() def do_dcterms_mediator(self): return text(), noduplicates() # added to DMCI, but no XML mapping has been defined def do_dcterms_accessRights(self): return eater() def do_dcterms_accrualMethod(self): return eater() def do_dcterms_accrualPeriodicity(self): return eater() def do_dcterms_accrualPolicy(self): return eater() def do_dcterms_bibliographicCitation(self): return eater() def do_dcterms_educationLevel(self): return eater() def do_dcterms_instructionalMethod(self): return eater() def do_dcterms_license(self): return eater() def do_dcterms_provenance(self): return eater() def do_dcterms_rightsHolder(self): return eater() def do_rdf_RDF(self): return eater() def do_rdf_type(self): return eater() def do_rdf_Description(self): return eater() def do_rdfs_seeAlso(self): return rdfResourceURI() # duplicates allowed def do_geo_Point(self): return geo_point() def do_geo_lat(self): return latitude() def do_geo_long(self): return longitude() def do_geo_alt(self): return decimal() def do_geourl_latitude(self): return latitude() def do_geourl_longitude(self): return longitude() def do_georss_where(self): return georss_where() def do_georss_point(self): return gml_pos() def do_georss_line(self): return gml_posList() def do_georss_polygon(self): return gml_posList() def do_georss_featuretypetag(self): return text() def do_georss_relationshiptag(self): return text() def do_georss_featurename(self): return text() def do_georss_elev(self): return decimal() def do_georss_floor(self): return Integer() def do_georss_radius(self): return Float() def do_icbm_latitude(self): return latitude() def do_icbm_longitude(self): return longitude() def do_opml_dateCreated(self): return rfc822(), noduplicates() def do_opml_dateModified(self): return rfc822(), noduplicates() def do_opml_ownerName(self): return safeHtml(), noduplicates() def do_opml_ownerEmail(self): return email(), noduplicates() def do_opml_ownerId(self): return httpURL(), noduplicates() ######################################################################## # Extensions that are valid at either the channel or item levels # ######################################################################## from media import media_elements, media_content, media_group class extension_channel_item(extension_everywhere, media_elements): def do_taxo_topics(self): return eater() def do_l_link(self): return l_link() ######################################################################## # Extensions that are valid at only at the item level # ######################################################################## class extension_item(extension_channel_item): def do_annotate_reference(self): return rdfResourceURI(), noduplicates() def do_ag_source(self): return text(), noduplicates() def do_ag_sourceURL(self): return rfc2396_full(), noduplicates() def do_ag_timestamp(self): return iso8601(), noduplicates() def do_ev_startdate(self): return iso8601(), noduplicates() def do_ev_enddate(self): return iso8601(), noduplicates() def do_ev_location(self): return eater() def do_ev_organizer(self): return eater() def do_ev_type(self): return text(), noduplicates() def do_feedburner_awareness(self): return rfc2396_full(), noduplicates() def do_feedburner_origEnclosureLink(self): return rfc2396_full(), noduplicates() def do_feedburner_origLink(self): return rfc2396_full(), noduplicates() def do_foaf_maker(self): return eater() def do_foaf_primaryTopic(self): return eater() def do_slash_comments(self): return nonNegativeInteger(), noduplicates() def do_slash_section(self): return text() def do_slash_department(self): return text() def do_slash_hit_parade(self): return commaSeparatedIntegers(), noduplicates() def do_thr_children(self): if self.getFeedType() != TYPE_RSS1: self.log(UndefinedElement({'parent':self.name,"element":"thr:children"})) return eater() def do_thr_total(self): return nonNegativeInteger(), noduplicates() def do_thr_in_reply_to(self): return in_reply_to() def do_wfw_comment(self): return rfc2396_full(), noduplicates() def do_wfw_commentRss(self): return rfc2396_full(), noduplicates() def do_wfw_commentRSS(self): self.log(CommentRSS({"parent":self.parent.name, "element":self.name})) return rfc2396_full(), noduplicates() def do_wiki_diff(self): return text() def do_wiki_history(self): return text() def do_wiki_importance(self): return text() def do_wiki_status(self): return text() def do_wiki_version(self): return text() def do_g_actor(self): return nonhtml(), noduplicates() def do_g_age(self): return nonNegativeInteger(), noduplicates() def do_g_agent(self): return nonhtml(), noduplicates() def do_g_area(self): return nonhtml(), noduplicates() # intUnit def do_g_apparel_type(self): return nonhtml(), noduplicates() def do_g_artist(self): return nonhtml(), noduplicates() def do_g_author(self): return nonhtml(), noduplicates() def do_g_bathrooms(self): return nonNegativeInteger(), noduplicates() def do_g_bedrooms(self): return nonNegativeInteger(), noduplicates() def do_g_brand(self): return nonhtml(), noduplicates() def do_g_calories(self): return g_float(), noduplicates() def do_g_cholesterol(self): return g_float(), noduplicates() def do_g_color(self): return nonhtml(), noduplicates() def do_g_cooking_time(self): return g_float(), noduplicates() def do_g_condition(self): return nonhtml(), noduplicates() def do_g_course(self): return nonhtml(), noduplicates() def do_g_course_date_range(self): return g_dateTimeRange(), noduplicates() def do_g_course_number(self): return nonhtml(), noduplicates() def do_g_course_times(self): return nonhtml(), noduplicates() def do_g_cuisine(self): return nonhtml(), noduplicates() def do_g_currency(self): return iso4217(), noduplicates() def do_g_delivery_notes(self): return nonhtml(), noduplicates() def do_g_delivery_radius(self): return floatUnit(), noduplicates() def do_g_education(self): return nonhtml(), noduplicates() def do_g_employer(self): return nonhtml(), noduplicates() def do_g_ethnicity(self): return nonhtml(), noduplicates() def do_g_event_date_range(self): return g_dateTimeRange(), noduplicates() def do_g_expiration_date(self): return iso8601_date(), noduplicates() def do_g_expiration_date_time(self): return iso8601(), noduplicates() def do_g_fiber(self): return g_float(), noduplicates() def do_g_from_location(self): return g_locationType(), noduplicates() def do_g_gender(self): return g_genderEnumeration(), noduplicates() def do_g_hoa_dues(self): return g_float(), noduplicates() def do_g_format(self): return nonhtml(), noduplicates() def do_g_id(self): return nonhtml(), noduplicates() def do_g_image_link(self): return rfc2396_full(), maxten() def do_g_immigration_status(self): return nonhtml(), noduplicates() def do_g_interested_in(self): return nonhtml(), noduplicates() def do_g_isbn(self): return nonhtml(), noduplicates() def do_g_job_function(self): return nonhtml(), noduplicates() def do_g_job_industry(self): return nonhtml(), noduplicates() def do_g_job_type(self): return nonhtml(), noduplicates() def do_g_label(self): return g_labelType(), maxten() def do_g_listing_type(self): return truefalse(), noduplicates() def do_g_location(self): return g_full_locationType(), noduplicates() def do_g_main_ingredient(self): return nonhtml(), noduplicates() def do_g_make(self): return nonhtml(), noduplicates() def do_g_manufacturer(self): return nonhtml(), noduplicates() def do_g_manufacturer_id(self): return nonhtml(), noduplicates() def do_g_marital_status(self): return g_maritalStatusEnumeration(), noduplicates() def do_g_meal_type(self): return nonhtml(), noduplicates() def do_g_megapixels(self): return floatUnit(), noduplicates() def do_g_memory(self): return floatUnit(), noduplicates() def do_g_mileage(self): return g_intUnit(), noduplicates() def do_g_model(self): return nonhtml(), noduplicates() def do_g_model_number(self): return nonhtml(), noduplicates() def do_g_name_of_item_being_reviewed(self): return nonhtml(), noduplicates() def do_g_news_source(self): return nonhtml(), noduplicates() def do_g_occupation(self): return nonhtml(), noduplicates() def do_g_payment_notes(self): return nonhtml(), noduplicates() def do_g_pages(self): return positiveInteger(), nonblank(), noduplicates() def do_g_payment_accepted(self): return g_paymentMethodEnumeration() def do_g_pickup(self): return truefalse(), noduplicates() def do_g_preparation_time(self): return floatUnit(), noduplicates() def do_g_price(self): return floatUnit(), noduplicates() def do_g_price_type(self): return g_priceTypeEnumeration(), noduplicates() def do_g_processor_speed(self): return floatUnit(), noduplicates() def do_g_product_type(self): return nonhtml(), noduplicates() def do_g_property_type(self): return nonhtml(), noduplicates() def do_g_protein(self): return floatUnit(), noduplicates() def do_g_publication_name(self): return nonhtml(), noduplicates() def do_g_publication_volume(self): return nonhtml(), noduplicates() def do_g_publish_date(self): return iso8601_date(), noduplicates() def do_g_quantity(self): return nonNegativeInteger(), nonblank(), noduplicates() def do_g_rating(self): return g_ratingTypeEnumeration(), noduplicates() def do_g_review_type(self): return nonhtml(), noduplicates() def do_g_reviewer_type(self): return g_reviewerTypeEnumeration(), noduplicates() def do_g_salary(self): return g_float(), noduplicates() def do_g_salary_type(self): return g_salaryTypeEnumeration(), noduplicates() def do_g_saturated_fat(self): return g_float(), noduplicates() def do_g_school_district(self): return nonhtml(), noduplicates() def do_g_service_type(self): return nonhtml(), noduplicates() def do_g_servings(self): return g_float(), noduplicates() def do_g_sexual_orientation(self): return nonhtml(), noduplicates() def do_g_size(self): return nonhtml(), noduplicates() # TODO: expressed in either two or three dimensions. def do_g_shipping(self): return g_shipping(), noduplicates() def do_g_sodium(self): return g_float(), noduplicates() def do_g_subject(self): return nonhtml(), noduplicates() def do_g_subject_area(self): return nonhtml(), noduplicates() def do_g_tax_percent(self): return percentType(), noduplicates() def do_g_tax_region(self): return nonhtml(), noduplicates() def do_g_to_location(self): return g_locationType(), noduplicates() def do_g_total_carbs(self): return g_float(), noduplicates() def do_g_total_fat(self): return g_float(), noduplicates() def do_g_travel_date_range(self): return g_dateTimeRange(), noduplicates() def do_g_university(self): return nonhtml(), noduplicates() def do_g_upc(self): return nonhtml(), noduplicates() def do_g_url_of_item_being_reviewed(self): return rfc2396_full(), noduplicates() def do_g_vehicle_type(self): return nonhtml(), noduplicates() def do_g_vin(self): return nonhtml(), noduplicates() def do_g_weight(self): return floatUnit(), noduplicates() def do_g_year(self): return g_year(), noduplicates() def do_media_group(self): return media_group() def do_media_content(self): return media_content() def do_sx_sync(self): import sse return sse.Sync() def do_conversationsNetwork_introMilliseconds(self): return nonNegativeInteger(), noduplicates() class heisen_uri(rfc3987, rfc2396_full): def validate(self): if self.getFeedType() == TYPE_ATOM: rfc3987.validate(self) elif not rfc2396_full.rfc2396_re.match(self.value): self.log(ContainsRelRef({'parent':self.parent.name})) class feedFlare(nonhtml): def getExpectedAttrNames(self): return [(None,u'href'),(None,u'src')] def prevalidate(self): self.validate_required_attribute((None,'href'), heisen_uri) self.validate_required_attribute((None,'src'), heisen_uri) return text.prevalidate(self) class xmlView(validatorBase): def getExpectedAttrNames(self): return [(None,u'href')] def prevalidate(self): self.validate_required_attribute((None,'href'), rfc2396_full) class georss_where(validatorBase): def do_gml_Point(self): return gml_point() def do_gml_LineString(self): return gml_line() def do_gml_Polygon(self): return gml_polygon() def do_gml_Envelope(self): return gml_envelope() class geo_srsName(validatorBase): def getExpectedAttrNames(self): return [(None, u'srsName')] class gml_point(geo_srsName): def do_gml_pos(self): return gml_pos() class geo_point(validatorBase): def do_geo_lat(self): return latitude() def do_geo_long(self): return longitude() def validate(self): if "geo_lat" not in self.children: self.log(MissingElement({"parent":self.name.replace('_',':'), "element":"geo:lat"})) if "geo_long" not in self.children: self.log(MissingElement({"parent":self.name.replace('_',':'), "element":"geo:long"})) class gml_pos(text): def validate(self): if not re.match('^[-+]?\d+\.?\d*[ ,][-+]?\d+\.?\d*$', self.value): return self.log(InvalidCoord({'value':self.value})) if self.value.find(',')>=0: self.log(CoordComma({'value':self.value})) class gml_line(geo_srsName): def do_gml_posList(self): return gml_posList() class gml_posList(text): def validate(self): if self.value.find(',')>=0: # ensure that commas are only used to separate lat and long if not re.match('^[-+.0-9]+[, ][-+.0-9]( [-+.0-9]+[, ][-+.0-9])+$', value.strip()): return self.log(InvalidCoordList({'value':self.value})) self.log(CoordComma({'value':self.value})) self.value=self.value.replace(',',' ') values = self.value.strip().split() if len(values)<3 or len(values)%2 == 1: return self.log(InvalidCoordList({'value':self.value})) for value in values: if not re.match('^[-+]?\d+\.?\d*$', value): return self.log(InvalidCoordList({'value':value})) class gml_polygon(geo_srsName): def do_gml_exterior(self): return gml_exterior() class gml_exterior(validatorBase): def do_gml_LinearRing(self): return gml_linearRing() class gml_linearRing(geo_srsName): def do_gml_posList(self): return gml_posList() class gml_envelope(geo_srsName): def do_gml_lowerCorner(self): return gml_pos() def do_gml_upperCorner(self): return gml_pos() class access_restriction(enumeration): error = InvalidAccessRestrictionRel valuelist = ["allow", "deny"] def getExpectedAttrNames(self): return [(None, u'relationship')] def prevalidate(self): self.children.append(True) # force warnings about "mixed" content if not self.attrs.has_key((None,"relationship")): self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"relationship"})) else: self.value=self.attrs.getValue((None,"relationship")) ######################################################################## # Extensions that are valid at only at the RSS 2.0 item level # ######################################################################## class extension_rss20_item(extension_item): def do_trackback_ping(self): return rfc2396_full(), noduplicates() def do_trackback_about(self): return rfc2396_full() def do_dcterms_accessRights(self): return eater() def do_dcterms_accrualMethod(self): return eater() def do_dcterms_accrualPeriodicity(self): return eater() def do_dcterms_accrualPolicy(self): return eater() def do_dcterms_bibliographicCitation(self): return eater() def do_dcterms_educationLevel(self): return eater() def do_dcterms_instructionalMethod(self): return eater() def do_dcterms_license(self): return eater() def do_dcterms_provenance(self): return eater() def do_dcterms_rightsHolder(self): return eater() ######################################################################## # Extensions that are valid at only at the RSS 1.0 item level # ######################################################################## class extension_rss10_item(extension_item): def do_trackback_ping(self): return rdfResourceURI(), noduplicates() def do_trackback_about(self): return rdfResourceURI() def do_l_permalink(self): return l_permalink() class l_permalink(rdfResourceURI, MimeType): lNS = u'http://purl.org/rss/1.0/modules/link/' def getExpectedAttrNames(self): return rdfResourceURI.getExpectedAttrNames(self) + [(self.lNS, u'type')] def validate(self): if (self.lNS, 'type') in self.attrs.getNames(): self.value=self.attrs.getValue((self.lNS, 'type')) MimeType.validate(self) return rdfResourceURI.validate(self) class l_link(rdfResourceURI, MimeType): lNS = u'http://purl.org/rss/1.0/modules/link/' def getExpectedAttrNames(self): return rdfResourceURI.getExpectedAttrNames(self) + [ (self.lNS, u'lang'), (self.lNS, u'rel'), (self.lNS, u'type'), (self.lNS, u'title') ] def prevalidate(self): self.validate_optional_attribute((self.lNS,'lang'), iso639) self.validate_required_attribute((self.lNS,'rel'), rfc2396_full) self.validate_optional_attribute((self.lNS,'title'), nonhtml) if self.attrs.has_key((self.lNS, "type")): if self.attrs.getValue((self.lNS, "type")).find(':') < 0: self.validate_optional_attribute((self.lNS,'type'), MimeType) else: self.validate_optional_attribute((self.lNS,'type'), rfc2396_full) ######################################################################## # Extensions that are valid at only at the Atom entry level # ######################################################################## class extension_entry(extension_item): def do_dc_creator(self): # atom:creator return text() # duplicates allowed def do_dc_subject(self): # atom:category return text() # duplicates allowed def do_dc_date(self): # atom:published return w3cdtf(), noduplicates() def do_creativeCommons_license(self): return rfc2396_full() def do_trackback_ping(self): return rfc2396_full(), noduplicates() # XXX This should have duplicate semantics with link[@rel='related'] def do_trackback_about(self): return rfc2396_full() ######################################################################## # Extensions that are valid at only at the channel level # ######################################################################## class extension_channel(extension_channel_item): def do_admin_generatorAgent(self): if "generator" in self.children: self.log(DuplicateSemantics({"core":"generator", "ext":"admin:generatorAgent"})) return admin_generatorAgent(), noduplicates() def do_admin_errorReportsTo(self): return admin_errorReportsTo(), noduplicates() def do_blogChannel_blogRoll(self): return rfc2396_full(), noduplicates() def do_blogChannel_mySubscriptions(self): return rfc2396_full(), noduplicates() def do_blogChannel_blink(self): return rfc2396_full(), noduplicates() def do_blogChannel_changes(self): return rfc2396_full(), noduplicates() def do_sy_updatePeriod(self): return sy_updatePeriod(), noduplicates() def do_sy_updateFrequency(self): return positiveInteger(), nonblank(), noduplicates() def do_sy_updateBase(self): return w3cdtf(), noduplicates() def do_foaf_maker(self): return eater() def do_cp_server(self): return rdfResourceURI() def do_wiki_interwiki(self): return text() def do_thr_in_reply_to(self): return in_reply_to() def do_cf_listinfo(self): from cf import listinfo return listinfo() def do_cf_treatAs(self): from cf import treatAs return treatAs() def do_feedburner_awareness(self): return rfc2396_full(), noduplicates() def do_feedburner_browserFriendly(self): return nonhtml(), noduplicates() def do_feedburner_emailServiceId(self): return positiveInteger(), noduplicates() def do_feedburner_feedFlare(self): return feedFlare() def do_feedburner_feedburnerHostname(self): return rfc2396_full(), noduplicates() def do_opensearch_totalResults(self): return nonNegativeInteger(), noduplicates() do_opensearch10_totalResults = do_opensearch_totalResults def do_opensearch_startIndex(self): return Integer(), noduplicates() do_opensearch10_startIndex = do_opensearch_startIndex def do_opensearch_itemsPerPage(self): return nonNegativeInteger(), noduplicates() do_opensearch10_itemsPerPage = do_opensearch_itemsPerPage def do_opensearch_Query(self): from opensearch import Query return Query() def do_xhtml_div(self): return eater() def do_xhtml_meta(self): return xhtml_meta() def do_sx_sharing(self): import sse return sse.Sharing() def do_fh_archive(self): return validatorBase() def do_fh_complete(self): return validatorBase() class xhtml_meta(validatorBase): def getExpectedAttrNames(self): return [ (None, u'name'), (None, u'content') ] def prevalidate(self): self.validate_required_attribute((None,'name'), xhtmlMetaEnumeration) self.validate_required_attribute((None,'content'), robotsEnumeration) class xhtmlMetaEnumeration(caseinsensitive_enumeration): error = InvalidMetaName valuelist = ["robots"] class robotsEnumeration(caseinsensitive_enumeration): error = InvalidMetaContent valuelist = [ "all", "none", "index", "index,follow", "index,nofollow", "noindex", "noindex,follow", "noindex,nofollow", "follow", "follow,index", "follow,noindex", "nofollow", "nofollow,index", "nofollow,noindex"] ######################################################################## # Extensions that are valid at only at the Atom feed level # ######################################################################## class extension_feed(extension_channel): def do_dc_creator(self): # atom:creator return text() # duplicates allowed def do_dc_subject(self): # atom:category return text() # duplicates allowed def do_dc_date(self): # atom:updated return w3cdtf(), noduplicates() def do_creativeCommons_license(self): return rfc2396_full() def do_access_restriction(self): return access_restriction() ######################################################################## # Validators # ######################################################################## class admin_generatorAgent(rdfResourceURI): pass class admin_errorReportsTo(rdfResourceURI): pass class sy_updatePeriod(text): def validate(self): if self.value not in ('hourly', 'daily', 'weekly', 'monthly', 'yearly'): self.log(InvalidUpdatePeriod({"parent":self.parent.name, "element":self.name, "value":self.value})) else: self.log(ValidUpdatePeriod({"parent":self.parent.name, "element":self.name, "value":self.value})) class g_complex_type(validatorBase): def getExpectedAttrNames(self): if self.getFeedType() == TYPE_RSS1: return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'parseType')] else: return [] class g_shipping(g_complex_type): def do_g_service(self): return g_serviceTypeEnumeration(), noduplicates() def do_g_country(self): return iso3166(), noduplicates() def do_g_price(self): return floatUnit(), noduplicates() class g_dateTimeRange(g_complex_type): def do_g_start(self): return iso8601(), noduplicates() def do_g_end(self): return iso8601(), noduplicates() class g_labelType(text): def validate(self): if self.value.find(',')>=0: self.log(InvalidLabel({"parent":self.parent.name, "element":self.name, "attr": ':'.join(self.name.split('_',1)), "value":self.value})) class g_locationType(text): def validate(self): if len(self.value.split(',')) not in [2,3]: self.log(InvalidLocation({"parent":self.parent.name, "element":self.name, "attr": ':'.join(self.name.split('_',1)), "value":self.value})) class g_full_locationType(text): def validate(self): fields = self.value.split(',') if len(fields) != 5 or 0 in [len(f.strip()) for f in fields]: self.log(InvalidFullLocation({"parent":self.parent.name, "element":self.name, "attr": ':'.join(self.name.split('_',1)), "value":self.value})) class g_genderEnumeration(enumeration): error = InvalidGender valuelist = ["Male", "M", "Female", "F"] class g_maritalStatusEnumeration(enumeration): error = InvalidMaritalStatus valuelist = ["single", "divorced", "separated", "widowed", "married", "in relationship"] class g_paymentMethodEnumeration(enumeration): error = InvalidPaymentMethod valuelist = ["Cash", "Check", "Visa", "MasterCard", "AmericanExpress", "Discover", "WireTransfer"] class g_priceTypeEnumeration(enumeration): error = InvalidPriceType valuelist = ["negotiable", "starting"] class g_ratingTypeEnumeration(enumeration): error = InvalidRatingType valuelist = ["1", "2", "3", "4", "5"] class g_reviewerTypeEnumeration(enumeration): error = InvalidReviewerType valuelist = ["editorial", "user"] class g_salaryTypeEnumeration(enumeration): error = InvalidSalaryType valuelist = ["starting", "negotiable"] class g_serviceTypeEnumeration(enumeration): error = InvalidServiceType valuelist = ['FedEx', 'UPS', 'DHL', 'Mail', 'Other', 'Overnight', 'Standard'] class g_float(text): def validate(self): import re if not re.match('\d+\.?\d*\s*\w*', self.value): self.log(InvalidFloat({"parent":self.parent.name, "element":self.name, "attr": ':'.join(self.name.split('_',1)), "value":self.value})) class floatUnit(text): def validate(self): import re if not re.match('\d+\.?\d*\s*\w*$', self.value): self.log(InvalidFloatUnit({"parent":self.parent.name, "element":self.name, "attr": ':'.join(self.name.split('_',1)), "value":self.value})) class decimal(text): def validate(self): import re if not re.match('[-+]?\d+\.?\d*\s*$', self.value): self.log(InvalidFloatUnit({"parent":self.parent.name, "element":self.name, "attr": ':'.join(self.name.split('_',1)), "value":self.value})) class g_year(text): def validate(self): import time try: year = int(self.value) if year < 1900 or year > time.localtime()[0]+4: raise InvalidYear except: self.log(InvalidYear({"parent":self.parent.name, "element":self.name, "attr": ':'.join(self.name.split('_',1)), "value":self.value})) class g_intUnit(text): def validate(self): try: if int(self.value.split(' ')[0].replace(',','')) < 0: raise InvalidIntUnit except: self.log(InvalidIntUnit({"parent":self.parent.name, "element":self.name, "attr": ':'.join(self.name.split('_',1)), "value":self.value})) class maxten(validatorBase): def textOK(self): pass def prevalidate(self): if 10 == len([1 for child in self.parent.children if self.name==child]): self.log(TooMany({"parent":self.parent.name, "element":self.name})) class in_reply_to(canonicaluri, xmlbase): def getExpectedAttrNames(self): return [(None, u'href'), (None, u'ref'), (None, u'source'), (None, u'type')] def validate(self): if self.attrs.has_key((None, "href")): self.value = self.attrs.getValue((None, "href")) self.name = "href" xmlbase.validate(self) if self.attrs.has_key((None, "ref")): self.value = self.attrs.getValue((None, "ref")) self.name = "ref" canonicaluri.validate(self) if self.attrs.has_key((None, "source")): self.value = self.attrs.getValue((None, "source")) self.name = "source" xmlbase.validate(self) if self.attrs.has_key((None, "type")): self.value = self.attrs.getValue((None, "type")) if not mime_re.match(self.value): self.log(InvalidMIMEType({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.value})) else: self.log(ValidMIMEAttribute({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.value})) ######################################################################## # Extensions that you just gotta question # ######################################################################## class Questionable(extension_everywhere): children = [] def do_atom_author(self): from author import author return author() def do_atom_category(self): from category import category return category() def do_atom_content(self): from content import content return content() def do_atom_contributor(self): from author import author return author() def do_atom_generator(self): from generator import generator return generator() def do_atom_icon(self): return rfc2396(), noduplicates() def do_atom_id(self): return canonicaluri(), noduplicates() def do_atom_link(self): from link import link return link() def do_atom_logo(self): return rfc2396(), noduplicates() def do_atom_published(self): return rfc3339(), noduplicates() def do_atom_rights(self): from content import textConstruct return textConstruct(), noduplicates() def do_atom_subtitle(self): from content import textConstruct return textConstruct(), noduplicates() def do_atom_summary(self): from content import textConstruct return textConstruct(), noduplicates() def do_atom_title(self): from content import textConstruct return textConstruct(), noduplicates() def do_atom_updated(self): return rfc3339(), noduplicates() def do_app_workspace(self): from service import workspace return workspace() def do_app_collection(self): from service import collection return collection() def do_app_categories(self): from categories import categories return categories() python-feedvalidator-0~svn1022/feedvalidator/itunes.py0000644000175000017500000001751310766017570021505 0ustar poxpox"""$Id: itunes.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from validators import * class itunes: def do_itunes_author(self): return lengthLimitedText(255), noduplicates() def do_itunes_block(self): return yesnoclean(), noduplicates() def do_itunes_explicit(self): return yesnoclean(), noduplicates() def do_itunes_keywords(self): return lengthLimitedText(255), keywords(), noduplicates() def do_itunes_subtitle(self): return lengthLimitedText(255), noduplicates() def do_itunes_summary(self): return lengthLimitedText(4000), noduplicates() def do_itunes_image(self): return image(), noduplicates() class itunes_channel(itunes): from logging import MissingItunesElement def validate(self): if not 'language' in self.children and not self.xmlLang: self.log(MissingItunesElement({"parent":self.name, "element":'language'})) if not 'itunes_category' in self.children: self.log(MissingItunesElement({"parent":self.name, "element":'itunes:category'})) if not 'itunes_explicit' in self.children: self.log(MissingItunesElement({"parent":self.name, "element":'itunes:explicit'})) if not 'itunes_owner' in self.children: self.log(MissingItunesEmail({"parent":self.name, "element":'itunes:email'})) def setItunes(self, value): if value and not self.itunes: if self.dispatcher.encoding.lower() not in ['utf-8','utf8']: from logging import NotUTF8 self.log(NotUTF8({"parent":self.parent.name, "element":self.name})) if self.getFeedType() == TYPE_ATOM and 'entry' in self.children: self.validate() self.itunes |= value def do_itunes_owner(self): return owner(), noduplicates() def do_itunes_category(self): return category() def do_itunes_pubDate(self): return rfc822(), noduplicates() def do_itunes_new_feed_url(self): if self.child != 'itunes_new-feed-url': self.log(UndefinedElement({"parent":self.name.replace("_",":"), "element":self.child})) return rfc2396_full(), noduplicates() class itunes_item(itunes): supported_formats = ['m4a', 'mp3', 'mov', 'mp4', 'm4v', 'pdf'] def validate(self): pass def setItunes(self, value): if value and not self.itunes: self.parent.setItunes(True) self.itunes = value if hasattr(self, 'enclosures'): save, self.enclosures = self.enclosures, [] for enclosure in save: self.setEnclosure(enclosure) def setEnclosure(self, url): if self.itunes: # http://www.apple.com/itunes/podcasts/techspecs.html#_Toc526931678 ext = url.split('.')[-1] if ext not in itunes_item.supported_formats: from logging import UnsupportedItunesFormat self.log(UnsupportedItunesFormat({"parent":self.parent.name, "element":self.name, "extension":ext})) if not hasattr(self, 'enclosures'): self.enclosures = [] self.enclosures.append(url) def do_itunes_duration(self): return duration(), noduplicates() class owner(validatorBase): def validate(self): if not "itunes_email" in self.children: self.log(MissingElement({"parent":self.name.replace("_",":"), "element":"itunes:email"})) def do_itunes_email(self): return email(), noduplicates() def do_itunes_name(self): return lengthLimitedText(255), noduplicates() class subcategory(validatorBase): def __init__(self, newlist, oldlist): validatorBase.__init__(self) self.newlist = newlist self.oldlist = oldlist self.text = None def getExpectedAttrNames(self): return [(None, u'text')] def prevalidate(self): try: self.text=self.attrs.getValue((None, "text")) if not self.text in self.newlist: if self.text in self.oldlist: self.log(ObsoleteItunesCategory({"parent":self.parent.name.replace("_",":"), "element":self.name.replace("_",":"), "text":self.text})) else: self.log(InvalidItunesCategory({"parent":self.parent.name.replace("_",":"), "element":self.name.replace("_",":"), "text":self.text})) except KeyError: self.log(MissingAttribute({"parent":self.parent.name.replace("_",":"), "element":self.name.replace("_",":"), "attr":"text"})) class image(validatorBase): def getExpectedAttrNames(self): return [(None, u'href')] def prevalidate(self): self.validate_required_attribute((None,'href'), httpURL) class category(subcategory): def __init__(self): subcategory.__init__(self, valid_itunes_categories.keys(), old_itunes_categories.keys()) def do_itunes_category(self): if not self.text: return eater() return subcategory(valid_itunes_categories.get(self.text,[]), old_itunes_categories.get(self.text,[])) valid_itunes_categories = { "Arts": [ "Design", "Fashion & Beauty", "Food", "Literature", "Performing Arts", "Visual Arts"], "Business": [ "Business News", "Careers", "Investing", "Management & Marketing", "Shopping"], "Comedy": [], "Education": [ "Education Technology", "Higher Education", "K-12", "Language Courses", "Training"], "Games & Hobbies": [ "Automotive", "Aviation", "Hobbies", "Other Games", "Video Games"], "Government & Organizations": [ "Local", "National", "Non-Profit", "Regional"], "Health": [ "Alternative Health", "Fitness & Nutrition", "Self-Help", "Sexuality"], "Kids & Family": [], "Music": [], "News & Politics": [], "Religion & Spirituality": [ "Buddhism", "Christianity", "Hinduism", "Islam", "Judaism", "Other", "Spirituality"], "Science & Medicine": [ "Medicine", "Natural Sciences", "Social Sciences"], "Society & Culture": [ "History", "Personal Journals", "Philosophy", "Places & Travel"], "Sports & Recreation": [ "Amateur", "College & High School", "Outdoor", "Professional"], "Technology": [ "Gadgets", "Tech News", "Podcasting", "Software How-To"], "TV & Film": [], } old_itunes_categories = { "Arts & Entertainment": [ "Architecture", "Books", "Design", "Entertainment", "Games", "Performing Arts", "Photography", "Poetry", "Science Fiction"], "Audio Blogs": [], "Business": [ "Careers", "Finance", "Investing", "Management", "Marketing"], "Comedy": [], "Education": [ "Higher Education", "K-12"], "Family": [], "Food": [], "Health": [ "Diet & Nutrition", "Fitness", "Relationships", "Self-Help", "Sexuality"], "International": [ "Australian", "Belgian", "Brazilian", "Canadian", "Chinese", "Dutch", "French", "German", "Hebrew", "Italian", "Japanese", "Norwegian", "Polish", "Portuguese", "Spanish", "Swedish"], "Movies & Television": [], "Music": [], "News": [], "Politics": [], "Public Radio": [], "Religion & Spirituality": [ "Buddhism", "Christianity", "Islam", "Judaism", "New Age", "Philosophy", "Spirituality"], "Science": [], "Sports": [], "Talk Radio": [], "Technology": [ "Computers", "Developers", "Gadgets", "Information Technology", "News", "Operating Systems", "Podcasting", "Smart Phones", "Text/Speech"], "Transportation": [ "Automotive", "Aviation", "Bicycles", "Commuting"], "Travel": [] } class yesnoclean(text): def normalizeWhitespace(self): pass def validate(self): if not self.value.lower() in ['yes','no','clean']: self.log(InvalidYesNoClean({"parent":self.parent.name, "element":self.name,"value":self.value})) python-feedvalidator-0~svn1022/feedvalidator/image.py0000644000175000017500000000736710766017570021266 0ustar poxpox"""$Id: image.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from validators import * from extension import extension_everywhere # # image element. # class image(validatorBase, extension_everywhere): def getExpectedAttrNames(self): return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'resource'), (u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'about'), (u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'parseType')] def validate(self): if self.value.strip(): self.log(UnexpectedText({"parent":self.parent.name, "element":"image"})) if self.attrs.has_key((rdfNS,"resource")): return # looks like an RSS 1.0 feed if not "title" in self.children: self.log(MissingTitle({"parent":self.name, "element":"title"})) if not "url" in self.children: self.log(MissingElement({"parent":self.name, "element":"url"})) if self.attrs.has_key((rdfNS,"parseType")): return # looks like an RSS 1.1 feed if not "link" in self.children: self.log(MissingLink({"parent":self.name, "element":"link"})) def do_title(self): return title(), noduplicates() def do_link(self): return link(), noduplicates() def do_url(self): return url(), noduplicates() def do_width(self): return width(), noduplicates() def do_height(self): return height(), noduplicates() def do_description(self): return nonhtml(), noduplicates() def do_dc_creator(self): return text() def do_dc_subject(self): return text() # duplicates allowed def do_dc_date(self): return w3cdtf(), noduplicates() def do_cc_license(self): return eater() class link(rfc2396_full): def validate(self): rfc2396_full.validate(self) if hasattr(self.parent.parent, 'link') and \ self.parent.parent.link and self.parent.parent.link != self.value: self.log(ImageLinkDoesntMatch({"parent":self.parent.name, "element":self.name})) class url(rfc2396_full): def validate(self): rfc2396_full.validate(self) import re ext = self.value.split('.')[-1].lower() if re.match("^\w+$", ext) and ext not in ['jpg','jpeg','gif','png']: self.log(ImageUrlFormat({"parent":self.parent.name, "element":self.name})) class title(nonhtml, noduplicates): def validate(self): if not self.value.strip(): self.log(NotBlank({"parent":self.parent.name, "element":self.name})) else: self.log(ValidTitle({"parent":self.parent.name, "element":self.name})) nonhtml.validate(self) if hasattr(self.parent.parent, 'title') and \ self.parent.parent.title and self.parent.parent.title != self.value: self.log(ImageTitleDoesntMatch({"parent":self.parent.name, "element":self.name})) class width(text, noduplicates): def validate(self): try: w = int(self.value) if (w <= 0) or (w > 144): self.log(InvalidWidth({"parent":self.parent.name, "element":self.name, "value":self.value})) else: self.log(ValidWidth({"parent":self.parent.name, "element":self.name})) except ValueError: self.log(InvalidWidth({"parent":self.parent.name, "element":self.name, "value":self.value})) class height(text, noduplicates): def validate(self): try: h = int(self.value) if (h <= 0) or (h > 400): self.log(InvalidHeight({"parent":self.parent.name, "element":self.name, "value":self.value})) else: self.log(ValidHeight({"parent":self.parent.name, "element":self.name})) except ValueError: self.log(InvalidHeight({"parent":self.parent.name, "element":self.name, "value":self.value})) python-feedvalidator-0~svn1022/feedvalidator/channel.py0000644000175000017500000002650210766017570021604 0ustar poxpox"""$Id: channel.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from logging import * from validators import * from itunes import itunes_channel from extension import * # # channel element. # class channel(validatorBase, rfc2396, extension_channel, itunes_channel): def getExpectedAttrNames(self): return [(u'urn:atom-extension:indexing', u'index')] def prevalidate(self): self.validate_optional_attribute((u'urn:atom-extension:indexing', u'index'), yesno) def __init__(self): self.link=None self.docs='' self.links = [] self.title=None validatorBase.__init__(self) def validate(self): if not "description" in self.children: self.log(MissingDescription({"parent":self.name,"element":"description"})) if not "link" in self.children: self.log(MissingLink({"parent":self.name, "element":"link"})) if not "title" in self.children: self.log(MissingTitle({"parent":self.name, "element":"title"})) if not "dc_language" in self.children and not "language" in self.children: if not self.xmlLang: self.log(MissingDCLanguage({"parent":self.name, "element":"language"})) if self.children.count("image") > 1: self.log(DuplicateElement({"parent":self.name, "element":"image"})) if self.children.count("textInput") > 1: self.log(DuplicateElement({"parent":self.name, "element":"textInput"})) if self.children.count("skipHours") > 1: self.log(DuplicateElement({"parent":self.name, "element":"skipHours"})) if self.children.count("skipDays") > 1: self.log(DuplicateElement({"parent":self.name, "element":"skipDays"})) if self.attrs.has_key((rdfNS,"about")): self.value = self.attrs.getValue((rdfNS, "about")) rfc2396.validate(self, extraParams={"attr": "rdf:about"}) if not "items" in self.children: self.log(MissingElement({"parent":self.name, "element":"items"})) if self.parent.name == 'rss' and self.parent.version == '2.0': for link in self.links: if link.rel=='self': break else: self.log(MissingAtomSelfLink({})) if self.itunes: itunes_channel.validate(self) # don't warn about use of extension attributes for rss-board compliant feeds if self.docs == 'http://www.rssboard.org/rss-specification': self.dispatcher.loggedEvents = [event for event in self.dispatcher.loggedEvents if not isinstance(event,UseOfExtensionAttr)] def metadata(self): pass def do_image(self): self.metadata() from image import image return image(), noduplicates() def do_textInput(self): self.metadata() from textInput import textInput return textInput(), noduplicates() def do_textinput(self): self.metadata() if not self.attrs.has_key((rdfNS,"about")): # optimize for RSS 2.0. If it is not valid RDF, assume that it is # a simple misspelling (in other words, the error message will be # less than helpful on RSS 1.0 feeds. self.log(UndefinedElement({"parent":self.name, "element":"textinput"})) return eater(), noduplicates() def do_link(self): self.metadata() return link(), noduplicates() def do_title(self): self.metadata() return title(), noduplicates(), nonblank() def do_description(self): self.metadata() return nonhtml(), noduplicates() def do_blink(self): return blink(), noduplicates() def do_atom_author(self): from author import author return author() def do_atom_category(self): from category import category return category() def do_atom_contributor(self): from author import author return author() def do_atom_generator(self): from generator import generator return generator(), nonblank(), noduplicates() def do_atom_id(self): return rfc2396_full(), noduplicates() def do_atom_icon(self): return nonblank(), rfc2396(), noduplicates() def do_atom_link(self): from link import link self.links.append(link()) return self.links[-1] def do_atom_logo(self): return nonblank(), rfc2396(), noduplicates() def do_atom_title(self): from content import textConstruct return textConstruct(), noduplicates() def do_atom_subtitle(self): from content import textConstruct return textConstruct(), noduplicates() def do_atom_rights(self): from content import textConstruct return textConstruct(), noduplicates() def do_atom_updated(self): return rfc3339(), noduplicates() def do_dc_creator(self): if "managingEditor" in self.children: self.log(DuplicateSemantics({"core":"managingEditor", "ext":"dc:creator"})) return text() # duplicates allowed def do_dc_subject(self): if "category" in self.children: self.log(DuplicateSemantics({"core":"category", "ext":"dc:subject"})) return text() # duplicates allowed def do_dc_date(self): if "pubDate" in self.children: self.log(DuplicateSemantics({"core":"pubDate", "ext":"dc:date"})) return w3cdtf(), noduplicates() def do_cc_license(self): if "creativeCommons_license" in self.children: self.log(DuplicateSemantics({"core":"creativeCommons:license", "ext":"cc:license"})) return eater() def do_creativeCommons_license(self): if "cc_license" in self.children: self.log(DuplicateSemantics({"core":"creativeCommons:license", "ext":"cc:license"})) return rfc2396_full() class rss20Channel(channel): def __init__(self): self.itemlocs=[] channel.__init__(self) def metadata(self): locator=self.dispatcher.locator for line,col in self.itemlocs: offset=(line - locator.getLineNumber(), col - locator.getColumnNumber()) self.log(MisplacedItem({"parent":self.name, "element":"item"}), offset) self.itemlocs = [] def do_textInput(self): self.log(AvoidTextInput({})) return channel.do_textInput(self) def do_item(self): locator=self.dispatcher.locator self.itemlocs.append((locator.getLineNumber(), locator.getColumnNumber())) from item import rss20Item return rss20Item() def do_category(self): self.metadata() return category() def do_cloud(self): self.metadata() return cloud(), noduplicates() do_rating = validatorBase.leaf # TODO test cases?!? def do_ttl(self): self.metadata() return positiveInteger(), nonblank(), noduplicates() def do_docs(self): self.metadata() return docs(), noduplicates() def do_generator(self): self.metadata() if "admin_generatorAgent" in self.children: self.log(DuplicateSemantics({"core":"generator", "ext":"admin:generatorAgent"})) return text(), noduplicates() def do_pubDate(self): self.metadata() if "dc_date" in self.children: self.log(DuplicateSemantics({"core":"pubDate", "ext":"dc:date"})) return rfc822(), noduplicates() def do_managingEditor(self): self.metadata() if "dc_creator" in self.children: self.log(DuplicateSemantics({"core":"managingEditor", "ext":"dc:creator"})) return email_with_name(), noduplicates() def do_webMaster(self): self.metadata() if "dc_publisher" in self.children: self.log(DuplicateSemantics({"core":"webMaster", "ext":"dc:publisher"})) return email_with_name(), noduplicates() def do_language(self): self.metadata() if "dc_language" in self.children: self.log(DuplicateSemantics({"core":"language", "ext":"dc:language"})) return iso639(), noduplicates() def do_copyright(self): self.metadata() if "dc_rights" in self.children: self.log(DuplicateSemantics({"core":"copyright", "ext":"dc:rights"})) return nonhtml(), noduplicates() def do_lastBuildDate(self): self.metadata() if "dcterms_modified" in self.children: self.log(DuplicateSemantics({"core":"lastBuildDate", "ext":"dcterms:modified"})) return rfc822(), noduplicates() def do_skipHours(self): self.metadata() from skipHours import skipHours return skipHours() def do_skipDays(self): self.metadata() from skipDays import skipDays return skipDays() class rss10Channel(channel): def getExpectedAttrNames(self): return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'about'), (u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'about')] def prevalidate(self): if self.attrs.has_key((rdfNS,"about")): if not "abouts" in self.dispatcher.__dict__: self.dispatcher.__dict__["abouts"] = [] self.dispatcher.__dict__["abouts"].append(self.attrs[(rdfNS,"about")]) def do_items(self): # this actually should be from the rss1.0 ns if not self.attrs.has_key((rdfNS,"about")): self.log(MissingAttribute({"parent":self.name, "element":self.name, "attr":"rdf:about"})) from item import items return items(), noduplicates() def do_rdfs_label(self): return text() def do_rdfs_comment(self): return text() class link(rfc2396_full): def validate(self): self.parent.link = self.value rfc2396_full.validate(self) class title(nonhtml): def validate(self): self.parent.title = self.value nonhtml.validate(self) class docs(rfc2396_full): def validate(self): self.parent.docs = self.value rfc2396_full.validate(self) class blink(text): def validate(self): self.log(NoBlink({})) class category(nonhtml): def getExpectedAttrNames(self): return [(None, u'domain')] class cloud(validatorBase): def getExpectedAttrNames(self): return [(None, u'domain'), (None, u'path'), (None, u'registerProcedure'), (None, u'protocol'), (None, u'port')] def prevalidate(self): if (None, 'domain') not in self.attrs.getNames(): self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"domain"})) else: self.log(ValidCloud({"parent":self.parent.name, "element":self.name, "attr":"domain"})) try: if int(self.attrs.getValue((None, 'port'))) <= 0: self.log(InvalidIntegerAttribute({"parent":self.parent.name, "element":self.name, "attr":'port'})) else: self.log(ValidCloud({"parent":self.parent.name, "element":self.name, "attr":'port'})) except KeyError: self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":'port'})) except ValueError: self.log(InvalidIntegerAttribute({"parent":self.parent.name, "element":self.name, "attr":'port'})) if (None, 'path') not in self.attrs.getNames(): self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"path"})) else: self.log(ValidCloud({"parent":self.parent.name, "element":self.name, "attr":"path"})) if (None, 'registerProcedure') not in self.attrs.getNames(): self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"registerProcedure"})) else: self.log(ValidCloud({"parent":self.parent.name, "element":self.name, "attr":"registerProcedure"})) if (None, 'protocol') not in self.attrs.getNames(): self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"protocol"})) else: self.log(ValidCloud({"parent":self.parent.name, "element":self.name, "attr":"protocol"})) ## TODO - is there a list of accepted protocols for this thing? return validatorBase.prevalidate(self) python-feedvalidator-0~svn1022/feedvalidator/generator.py0000644000175000017500000000150610766017570022157 0ustar poxpox"""$Id: generator.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from validators import * # # Atom generator element # class generator(nonhtml,rfc2396): def getExpectedAttrNames(self): return [(None, u'uri'), (None, u'version')] def prevalidate(self): if self.attrs.has_key((None, "url")): self.value = self.attrs.getValue((None, "url")) rfc2396.validate(self, extraParams={"attr": "url"}) if self.attrs.has_key((None, "uri")): self.value = self.attrs.getValue((None, "uri")) rfc2396.validate(self, errorClass=InvalidURIAttribute, extraParams={"attr": "uri"}) self.value='' python-feedvalidator-0~svn1022/feedvalidator/iso639codes.py0000644000175000017500000004356710766017570022260 0ustar poxpox"""$Id: iso639codes.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" isoLang = \ {'aa': 'Afar', 'ab': 'Abkhazian', 'ae': 'Avestan', 'af': 'Afrikaans', 'ak': 'Akan', 'am': 'Amharic', 'an': 'Aragonese', 'ar': 'Arabic', 'as': 'Assamese', 'av': 'Avaric', 'ay': 'Aymara', 'az': 'Azerbaijani', 'ba': 'Bashkir', 'be': 'Byelorussian', 'bg': 'Bulgarian', 'bh': 'Bihari', 'bi': 'Bislama', 'bm': 'Bambara', 'bn': 'Bengali;Bangla', 'bo': 'Tibetan', 'br': 'Breton', 'bs': 'Bosnian', 'ca': 'Catalan', 'ce': 'Chechen', 'ch': 'Chamorro', 'co': 'Corsican', 'cr': 'Cree', 'cs': 'Czech', 'cu': 'Church Slavic', 'cv': 'Chuvash', 'cy': 'Welsh', 'da': 'Danish', 'de': 'German', 'dv': 'Divehi', 'dz': 'Dzongkha', 'ee': 'Ewe', 'el': 'Greek', 'en': 'English', 'eo': 'Esperanto', 'es': 'Spanish', 'et': 'Estonian', 'eu': 'Basque', 'fa': 'Persian (Farsi)', 'ff': 'Fulah', 'fi': 'Finnish', 'fj': 'Fiji', 'fo': 'Faroese', 'fr': 'French', 'fy': 'Frisian, Western', 'ga': 'Irish', 'gd': 'Scots Gaelic', 'gl': 'Galician', 'gn': 'Guarani', 'gu': 'Gujarati', 'gv': 'Manx', 'ha': 'Hausa', 'he': 'Hebrew', 'hi': 'Hindi', 'ho': 'Hiri Motu', 'hr': 'Croatian', 'ht': 'Haitian', 'hu': 'Hungarian', 'hy': 'Armenian', 'hz': 'Herero', 'ia': 'Interlingua', 'id': 'Indonesian', 'ie': 'Interlingue', 'ig': 'Igbo', 'ii': 'Sichuan Yi', 'ik': 'Inupiak', 'io': 'Ido', 'is': 'Icelandic', 'it': 'Italian', 'iu': 'Inuktitut', 'ja': 'Japanese', 'jv': 'Javanese', 'ka': 'Georgian', 'kg': 'Kongo', 'ki': 'Kikuyu; Gikuyu', 'kj': 'Kuanyama; Kwanyama', 'kk': 'Kazakh', 'kl': 'Greenlandic', 'km': 'Cambodian', 'kn': 'Kannada', 'ko': 'Korean', 'kr': 'Kanuri', 'ks': 'Kashmiri', 'ku': 'Kurdish', 'kv': 'Komi', 'kw': 'Cornish', 'ky': 'Kirghiz', 'la': 'Latin', 'lb': 'Letzeburgesch; Luxembourgish', 'lg': 'Ganda', 'li': 'Limburgan; Limburger, Limburgish', 'ln': 'Lingala', 'lo': 'Lao', 'lt': 'Lithuanian', 'lu': 'Luba-Katanga', 'lv': 'Latvian', 'mg': 'Malagasy', 'mh': 'Marshallese', 'mi': 'Maori', 'mk': 'Macedonian', 'ml': 'Malayalam', 'mn': 'Mongolian', 'mo': 'Moldavian', 'mr': 'Marathi', 'ms': 'Malay', 'mt': 'Maltese', 'my': 'Burmese', 'na': 'Nauru', 'nb': 'Norwegian Bokmal', 'nd': 'Ndebele, North', 'ne': 'Nepali', 'ng': 'Ndonga', 'nl': 'Dutch', 'nn': 'Norwegian Nynorsk', 'no': 'Norwegian', 'nr': 'Ndebele, South', 'nv': 'Navaho; Navajo', 'ny': 'Chewa; Chichewa; Nyanha', 'oc': 'Occitan', 'oj': 'Ojibwa', 'om': 'Afan (Oromo)', 'or': 'Oriya', 'os': 'Ossetian; Ossetic', 'pa': 'Punjabi', 'pi': 'Pali', 'pl': 'Polish', 'ps': 'Pushto', 'pt': 'Portuguese', 'qu': 'Quechua', 'rm': 'Rhaeto-Romance', 'rn': 'Kurundi', 'ro': 'Romanian', 'ru': 'Russian', 'rw': 'Kinyarwanda', 'sa': 'Sanskrit', 'sc': 'Sardinian', 'sd': 'Sindhi', 'se': 'Northern Sami', 'sg': 'Sangho', 'sh': 'Serbo-Croatian', 'si': 'Singhalese', 'sk': 'Slovak', 'sl': 'Slovenian', 'sm': 'Samoan', 'sn': 'Shona', 'so': 'Somali', 'sq': 'Albanian', 'sr': 'Serbian', 'ss': 'Swati', 'st': 'Sotho, Southern', 'su': 'Sundanese', 'sv': 'Swedish', 'sw': 'Swahili', 'ta': 'Tamil', 'te': 'Telugu', 'tg': 'Tajik', 'th': 'Thai', 'ti': 'Tigrinya', 'tk': 'Turkmen', 'tl': 'Tagalog', 'tn': 'Tswana', 'to': 'Tonga', 'tr': 'Turkish', 'ts': 'Tsonga', 'tt': 'Tatar', 'tw': 'Twi', 'ty': 'Tahitian', 'ug': 'Uigur', 'uk': 'Ukrainian', 'ur': 'Urdu', 'uz': 'Uzbek', 've': 'Venda', 'vi': 'Vietnamese', 'vo': 'Volapuk', 'wa': 'Walloon', 'wo': 'Wolof', 'xh': 'Xhosa', 'yi': 'Yiddish', 'yo': 'Yoruba', 'za': 'Zhuang', 'zh': 'Chinese', 'zu': 'Zulu', 'x' : 'a user-defined language', 'xx': 'a user-defined language', 'abk': 'Abkhazian', 'ace': 'Achinese', 'ach': 'Acoli', 'ada': 'Adangme', 'ady': 'Adygei', 'ady': 'Adyghe', 'aar': 'Afar', 'afh': 'Afrihili', 'afr': 'Afrikaans', 'afa': 'Afro-Asiatic (Other)', 'ain': 'Ainu', 'aka': 'Akan', 'akk': 'Akkadian', 'alb': 'Albanian', 'sqi': 'Albanian', 'gws': 'Alemanic', 'ale': 'Aleut', 'alg': 'Algonquian languages', 'tut': 'Altaic (Other)', 'amh': 'Amharic', 'anp': 'Angika', 'apa': 'Apache languages', 'ara': 'Arabic', 'arg': 'Aragonese', 'arc': 'Aramaic', 'arp': 'Arapaho', 'arn': 'Araucanian', 'arw': 'Arawak', 'arm': 'Armenian', 'hye': 'Armenian', 'rup': 'Aromanian', 'art': 'Artificial (Other)', 'asm': 'Assamese', 'ast': 'Asturian', 'ath': 'Athapascan languages', 'aus': 'Australian languages', 'map': 'Austronesian (Other)', 'ava': 'Avaric', 'ave': 'Avestan', 'awa': 'Awadhi', 'aym': 'Aymara', 'aze': 'Azerbaijani', 'ast': 'Bable', 'ban': 'Balinese', 'bat': 'Baltic (Other)', 'bal': 'Baluchi', 'bam': 'Bambara', 'bai': 'Bamileke languages', 'bad': 'Banda', 'bnt': 'Bantu (Other)', 'bas': 'Basa', 'bak': 'Bashkir', 'baq': 'Basque', 'eus': 'Basque', 'btk': 'Batak (Indonesia)', 'bej': 'Beja', 'bel': 'Belarusian', 'bem': 'Bemba', 'ben': 'Bengali', 'ber': 'Berber (Other)', 'bho': 'Bhojpuri', 'bih': 'Bihari', 'bik': 'Bikol', 'byn': 'Bilin', 'bin': 'Bini', 'bis': 'Bislama', 'byn': 'Blin', 'nob': 'Bokmal, Norwegian', 'bos': 'Bosnian', 'bra': 'Braj', 'bre': 'Breton', 'bug': 'Buginese', 'bul': 'Bulgarian', 'bua': 'Buriat', 'bur': 'Burmese', 'mya': 'Burmese', 'cad': 'Caddo', 'car': 'Carib', 'spa': 'Castilian', 'cat': 'Catalan', 'cau': 'Caucasian (Other)', 'ceb': 'Cebuano', 'cel': 'Celtic (Other)', 'cai': 'Central American Indian (Other)', 'chg': 'Chagatai', 'cmc': 'Chamic languages', 'cha': 'Chamorro', 'che': 'Chechen', 'chr': 'Cherokee', 'nya': 'Chewa', 'chy': 'Cheyenne', 'chb': 'Chibcha', 'nya': 'Chichewa', 'chi': 'Chinese', 'zho': 'Chinese', 'chn': 'Chinook jargon', 'chp': 'Chipewyan', 'cho': 'Choctaw', 'zha': 'Chuang', 'chu': 'Church Slavic; Church Slavonic; Old Church Slavonic; Old Church Slavic; Old Bulgarian', 'chk': 'Chuukese', 'chv': 'Chuvash', 'nwc': 'Classical Nepal Bhasa; Classical Newari; Old Newari', 'cop': 'Coptic', 'cor': 'Cornish', 'cos': 'Corsican', 'cre': 'Cree', 'mus': 'Creek', 'crp': 'Creoles and pidgins(Other)', 'cpe': 'Creoles and pidgins, English-based (Other)', 'cpf': 'Creoles and pidgins, French-based (Other)', 'cpp': 'Creoles and pidgins, Portuguese-based (Other)', 'crh': 'Crimean Tatar; Crimean Turkish', 'scr': 'Croatian', 'hrv': 'Croatian', 'cus': 'Cushitic (Other)', 'cze': 'Czech', 'ces': 'Czech', 'dak': 'Dakota', 'dan': 'Danish', 'dar': 'Dargwa', 'day': 'Dayak', 'del': 'Delaware', 'din': 'Dinka', 'div': 'Divehi', 'doi': 'Dogri', 'dgr': 'Dogrib', 'dra': 'Dravidian (Other)', 'dua': 'Duala', 'dut': 'Dutch', 'nld': 'Dutch', 'dum': 'Dutch, Middle (ca. 1050-1350)', 'dyu': 'Dyula', 'dzo': 'Dzongkha', 'efi': 'Efik', 'egy': 'Egyptian (Ancient)', 'eka': 'Ekajuk', 'elx': 'Elamite', 'eng': 'English', 'enm': 'English, Middle (1100-1500)', 'ang': 'English, Old (ca.450-1100)', 'myv': 'Erzya', 'epo': 'Esperanto', 'est': 'Estonian', 'ewe': 'Ewe', 'ewo': 'Ewondo', 'fan': 'Fang', 'fat': 'Fanti', 'fao': 'Faroese', 'fij': 'Fijian', 'fil': 'Filipino; Pilipino', 'fin': 'Finnish', 'fiu': 'Finno-Ugrian (Other)', 'fon': 'Fon', 'fre': 'French', 'fra': 'French', 'frm': 'French, Middle (ca.1400-1600)', 'fro': 'French, Old (842-ca.1400)', 'frs': 'Frisian, Eastern', 'fry': 'Frisian, Western', 'fur': 'Friulian', 'ful': 'Fulah', 'gaa': 'Ga', 'gla': 'Gaelic', 'glg': 'Gallegan', 'lug': 'Ganda', 'gay': 'Gayo', 'gba': 'Gbaya', 'gez': 'Geez', 'geo': 'Georgian', 'kat': 'Georgian', 'ger': 'German', 'deu': 'German', 'nds': 'German, Low', 'gmh': 'German, Middle High (ca.1050-1500)', 'goh': 'German, Old High (ca.750-1050)', 'gem': 'Germanic (Other)', 'kik': 'Gikuyu', 'gil': 'Gilbertese', 'gon': 'Gondi', 'gor': 'Gorontalo', 'got': 'Gothic', 'grb': 'Grebo', 'grc': 'Greek, Ancient (to 1453)', 'gre': 'Greek, Modern (1453-)', 'ell': 'Greek, Modern (1453-)', 'kal': 'Greenlandic; Kalaallisut', 'grn': 'Guarani', 'guj': 'Gujarati', 'gwi': 'Gwich\'in', 'hai': 'Haida', 'hat': 'Haitian', 'hau': 'Hausa', 'haw': 'Hawaiian', 'heb': 'Hebrew', 'her': 'Herero', 'hil': 'Hiligaynon', 'him': 'Himachali', 'hin': 'Hindi', 'hmo': 'Hiri Motu', 'hit': 'Hittite', 'hmn': 'Hmong', 'hun': 'Hungarian', 'hup': 'Hupa', 'iba': 'Iban', 'ice': 'Icelandic', 'isl': 'Icelandic', 'ido': 'Ido', 'ibo': 'Igbo', 'ijo': 'Ijo', 'ilo': 'Iloko', 'smn': 'Inari Sami', 'inc': 'Indic (Other)', 'ine': 'Indo-European (Other)', 'ind': 'Indonesian', 'inh': 'Ingush', 'ina': 'Interlingua (International Auxiliary Language Association)', 'ile': 'Interlingue', 'iku': 'Inuktitut', 'ipk': 'Inupiaq', 'ira': 'Iranian (Other)', 'gle': 'Irish', 'mga': 'Irish, Middle (900-1200)', 'sga': 'Irish, Old (to 900)', 'iro': 'Iroquoian languages', 'ita': 'Italian', 'jpn': 'Japanese', 'jav': 'Javanese', 'jrb': 'Judeo-Arabic', 'jpr': 'Judeo-Persian', 'kbd': 'Kabardian', 'kab': 'Kabyle', 'kac': 'Kachin', 'kal': 'Kalaallisut', 'xal': 'Kalmyk', 'kam': 'Kamba', 'kan': 'Kannada', 'kau': 'Kanuri', 'krc': 'Karachay-Balkar', 'kaa': 'Kara-Kalpak', 'krl': 'Karelian', 'kar': 'Karen', 'kas': 'Kashmiri', 'csb': 'Kashubian', 'kaw': 'Kawi', 'kaz': 'Kazakh', 'kha': 'Khasi', 'khm': 'Khmer', 'khi': 'Khoisan (Other)', 'kho': 'Khotanese', 'kik': 'Kikuyu', 'kmb': 'Kimbundu', 'kin': 'Kinyarwanda', 'kir': 'Kirghiz', 'tlh': 'Klingon; tlhIngan-Hol', 'kom': 'Komi', 'kon': 'Kongo', 'kok': 'Konkani', 'kor': 'Korean', 'kos': 'Kosraean', 'kpe': 'Kpelle', 'kro': 'Kru', 'kua': 'Kuanyama', 'kum': 'Kumyk', 'kur': 'Kurdish', 'kru': 'Kurukh', 'kut': 'Kutenai', 'kua': 'Kwanyama', 'lad': 'Ladino', 'lah': 'Lahnda', 'lam': 'Lamba', 'lao': 'Lao', 'lat': 'Latin', 'lav': 'Latvian', 'ltz': 'Letzeburgesch', 'lez': 'Lezghian', 'lim': 'Limburgan', 'lin': 'Lingala', 'lit': 'Lithuanian', 'jbo': 'Lojban', 'nds': 'Low German', 'dsb': 'Lower Sorbian', 'loz': 'Lozi', 'lub': 'Luba-Katanga', 'lua': 'Luba-Lulua', 'lui': 'Luiseno', 'smj': 'Lule Sami', 'lun': 'Lunda', 'luo': 'Luo (Kenya and Tanzania)', 'lus': 'Lushai', 'ltz': 'Luxembourgish', 'mac': 'Macedonian', 'mkd': 'Macedonian', 'mad': 'Madurese', 'mag': 'Magahi', 'mai': 'Maithili', 'mak': 'Makasar', 'mlg': 'Malagasy', 'may': 'Malay', 'msa': 'Malay', 'mal': 'Malayalam', 'mlt': 'Maltese', 'mnc': 'Manchu', 'mdr': 'Mandar', 'man': 'Mandingo', 'mni': 'Manipuri', 'mno': 'Manobo languages', 'glv': 'Manx', 'mao': 'Maori', 'mri': 'Maori', 'mar': 'Marathi', 'chm': 'Mari', 'mah': 'Marshallese', 'mwr': 'Marwari', 'mas': 'Masai', 'myn': 'Mayan languages', 'men': 'Mende', 'mic': 'Micmac', 'min': 'Minangkabau', 'mwl': 'Mirandese', 'mis': 'Miscellaneous languages', 'moh': 'Mohawk', 'mdf': 'Moksha', 'mol': 'Moldavian', 'mkh': 'Mon-Khmer (Other)', 'lol': 'Mongo', 'mon': 'Mongolian', 'mos': 'Mossi', 'mul': 'Multiple languages', 'mun': 'Munda languages', 'nah': 'Nahuatl', 'nau': 'Nauru', 'nav': 'Navaho; Navajo', 'nde': 'Ndebele, North', 'nbl': 'Ndebele, South', 'ndo': 'Ndonga', 'nap': 'Neapolitan', 'nep': 'Nepali', 'new': 'Newari', 'nia': 'Nias', 'nic': 'Niger-Kordofanian (Other)', 'ssa': 'Nilo-Saharan (Other)', 'niu': 'Niuean', 'nog': 'Nogai', 'non': 'Norse, Old', 'nai': 'North American Indian (Other)', 'frr': 'Northern Frisian', 'sme': 'Northern Sami', 'nso': 'Northern Sotho; Pedi; Sepedi', 'nde': 'North Ndebele', 'nor': 'Norwegian', 'nob': 'Norwegian Bokmal', 'nno': 'Norwegian Nynorsk', 'nub': 'Nubian languages', 'nym': 'Nyamwezi', 'nya': 'Nyanja', 'nyn': 'Nyankole', 'nno': 'Nynorsk, Norwegian', 'nyo': 'Nyoro', 'nzi': 'Nzima', 'oci': 'Occitan (post 1500)', 'oji': 'Ojibwa', 'ori': 'Oriya', 'orm': 'Oromo', 'osa': 'Osage', 'oss': 'Ossetian; Ossetic', 'oto': 'Otomian languages', 'pal': 'Pahlavi', 'pau': 'Palauan', 'pli': 'Pali', 'pam': 'Pampanga', 'pag': 'Pangasinan', 'pan': 'Panjabi', 'pap': 'Papiamento', 'paa': 'Papuan (Other)', 'per': 'Persian', 'fas': 'Persian', 'peo': 'Persian, Old (ca.600-400)', 'phi': 'Philippine (Other)', 'phn': 'Phoenician', 'pon': 'Pohnpeian', 'pol': 'Polish', 'por': 'Portuguese', 'pra': 'Prakrit languages', 'oci': 'Provencal', 'pro': 'Provencal, Old (to 1500)', 'pan': 'Punjabi', 'pus': 'Pushto', 'que': 'Quechua', 'roh': 'Raeto-Romance', 'raj': 'Rajasthani', 'rap': 'Rapanui', 'rar': 'Rarotongan', 'qaa': 'Reserved for local use', 'qtz': 'Reserved for local use', 'roa': 'Romance (Other)', 'rum': 'Romanian', 'ron': 'Romanian', 'rom': 'Romany', 'run': 'Rundi', 'rus': 'Russian', 'sal': 'Salishan languages', 'sam': 'Samaritan Aramaic', 'smi': 'Sami languages (Other)', 'smo': 'Samoan', 'sad': 'Sandawe', 'sag': 'Sango', 'san': 'Sanskrit', 'sat': 'Santali', 'srd': 'Sardinian', 'sas': 'Sasak', 'nds': 'Saxon, Low', 'sco': 'Scots', 'gla': 'Scottish Gaelic', 'sel': 'Selkup', 'sem': 'Semitic (Other)', 'nso': 'Sepedi; Northern Sotho; Pedi', 'scc': 'Serbian', 'srp': 'Serbian', 'srr': 'Serer', 'shn': 'Shan', 'sna': 'Shona', 'iii': 'Sichuan Yi', 'scn': 'Sicilian', 'sid': 'Sidamo', 'sgn': 'Sign languages', 'bla': 'Siksika', 'snd': 'Sindhi', 'sin': 'Sinhalese', 'sit': 'Sino-Tibetan (Other)', 'sio': 'Siouan languages', 'sms': 'Skolt Sami', 'den': 'Slave (Athapascan)', 'sla': 'Slavic (Other)', 'slo': 'Slovak', 'slk': 'Slovak', 'slv': 'Slovenian', 'sog': 'Sogdian', 'som': 'Somali', 'son': 'Songhai', 'snk': 'Soninke', 'wen': 'Sorbian languages', 'nso': 'Sotho, Northern', 'sot': 'Sotho, Southern', 'sai': 'South American Indian (Other)', 'alt': 'Southern Altai', 'sma': 'Southern Sami', 'nbl': 'South Ndebele', 'spa': 'Spanish', 'srn': 'Sranan Tongo', 'suk': 'Sukuma', 'sux': 'Sumerian', 'sun': 'Sundanese', 'sus': 'Susu', 'swa': 'Swahili', 'ssw': 'Swati', 'swe': 'Swedish', 'gsw': 'Swiss German; Alemanic', 'syr': 'Syriac', 'tgl': 'Tagalog', 'tah': 'Tahitian', 'tai': 'Tai (Other)', 'tgk': 'Tajik', 'tmh': 'Tamashek', 'tam': 'Tamil', 'tat': 'Tatar', 'tel': 'Telugu', 'ter': 'Tereno', 'tet': 'Tetum', 'tha': 'Thai', 'tib': 'Tibetan', 'bod': 'Tibetan', 'tig': 'Tigre', 'tir': 'Tigrinya', 'tem': 'Timne', 'tiv': 'Tiv', 'tlh': 'tlhIngan-Hol; Klingon', 'tli': 'Tlingit', 'tpi': 'Tok Pisin', 'tkl': 'Tokelau', 'tog': 'Tonga (Nyasa)', 'ton': 'Tonga (Tonga Islands)', 'tsi': 'Tsimshian', 'tso': 'Tsonga', 'tsn': 'Tswana', 'tum': 'Tumbuka', 'tup': 'Tupi languages', 'tur': 'Turkish', 'ota': 'Turkish, Ottoman (1500-1928)', 'tuk': 'Turkmen', 'tvl': 'Tuvalu', 'tyv': 'Tuvinian', 'twi': 'Twi', 'udm': 'Udmurt', 'uga': 'Ugaritic', 'uig': 'Uighur', 'ukr': 'Ukrainian', 'umb': 'Umbundu', 'und': 'Undetermined', 'hsb': 'Upper Sorbian', 'urd': 'Urdu', 'uzb': 'Uzbek', 'vai': 'Vai', 'cat': 'Valencian', 'ven': 'Venda', 'vie': 'Vietnamese', 'vol': 'Volapuk', 'vot': 'Votic', 'wak': 'Wakashan languages', 'wal': 'Walamo', 'wln': 'Walloon', 'war': 'Waray', 'was': 'Washo', 'wel': 'Welsh', 'cym': 'Welsh', 'fry': 'Wester Frisian', 'wol': 'Wolof', 'xho': 'Xhosa', 'sah': 'Yakut', 'yao': 'Yao', 'yap': 'Yapese', 'yid': 'Yiddish', 'yor': 'Yoruba', 'ypk': 'Yupik languages', 'znd': 'Zande', 'zap': 'Zapotec', 'zen': 'Zenaga', 'zha': 'Zhuang', 'zul': 'Zulu', 'zun': 'Zuni' } python-feedvalidator-0~svn1022/feedvalidator/base.py0000644000175000017500000005241611037140553021077 0ustar poxpox"""$Id: base.py 1022 2008-07-15 15:25:31Z joe.walton.gglcd $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 1022 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from xml.sax.handler import ContentHandler from xml.sax.xmlreader import Locator from logging import NonCanonicalURI, NotUTF8 import re # references: # http://web.resource.org/rss/1.0/modules/standard.html # http://web.resource.org/rss/1.0/modules/proposed.html # http://dmoz.org/Reference/Libraries/Library_and_Information_Science/Technical_Services/Cataloguing/Metadata/RDF/Applications/RSS/Specifications/RSS1.0_Modules/ namespaces = { "http://www.bloglines.com/about/specs/fac-1.0": "access", "http://webns.net/mvcb/": "admin", "http://purl.org/rss/1.0/modules/aggregation/": "ag", "http://purl.org/rss/1.0/modules/annotate/": "annotate", "http://www.w3.org/2007/app": "app", "http://media.tangent.org/rss/1.0/": "audio", "http://backend.userland.com/blogChannelModule": "blogChannel", "http://web.resource.org/cc/": "cc", "http://www.microsoft.com/schemas/rss/core/2005": "cf", "http://backend.userland.com/creativeCommonsRssModule": "creativeCommons", "http://purl.org/rss/1.0/modules/company": "company", "http://purl.org/rss/1.0/modules/content/": "content", "http://conversationsnetwork.org/rssNamespace-1.0/": "conversationsNetwork", "http://my.theinfo.org/changed/1.0/rss/": "cp", "http://purl.org/dc/elements/1.1/": "dc", "http://purl.org/dc/terms/": "dcterms", "http://purl.org/rss/1.0/modules/email/": "email", "http://purl.org/rss/1.0/modules/event/": "ev", "http://purl.org/syndication/history/1.0": "fh", "http://www.w3.org/2003/01/geo/wgs84_pos#": "geo", "http://geourl.org/rss/module/": "geourl", "http://www.georss.org/georss": "georss", "http://www.opengis.net/gml": "gml", "http://postneo.com/icbm": "icbm", "http://purl.org/rss/1.0/modules/image/": "image", "urn:atom-extension:indexing": "indexing", "http://www.itunes.com/dtds/podcast-1.0.dtd": "itunes", "http://rssnamespace.org/feedburner/ext/1.0": "feedburner", "http://xmlns.com/foaf/0.1/": "foaf", "http://purl.org/rss/1.0/modules/link/": "l", "http://search.yahoo.com/mrss/": "media", "http://www.w3.org/1998/Math/MathML": "mathml", "http://a9.com/-/spec/opensearchrss/1.0/": "opensearch10", "http://a9.com/-/spec/opensearch/1.1/": "opensearch", "http://www.opml.org/spec2": "opml", "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", "http://www.w3.org/2000/01/rdf-schema#": "rdfs", "http://purl.org/rss/1.0/modules/reference/": "ref", "http://purl.org/rss/1.0/modules/richequiv/": "reqv", "http://purl.org/rss/1.0/modules/rss091#": "rss091", "http://purl.org/rss/1.0/modules/search/": "search", "http://purl.org/rss/1.0/modules/slash/": "slash", "http://purl.org/rss/1.0/modules/servicestatus/": "ss", "http://hacks.benhammersley.com/rss/streaming/": "str", "http://purl.org/rss/1.0/modules/subscription/": "sub", "http://feedsync.org/2007/feedsync": "sx", "http://www.w3.org/2000/svg": "svg", "http://purl.org/rss/1.0/modules/syndication/": "sy", "http://purl.org/rss/1.0/modules/taxonomy/": "taxo", "http://purl.org/rss/1.0/modules/threading/": "thr", "http://purl.org/syndication/thread/1.0": "thr", "http://madskills.com/public/xml/rss/module/trackback/": "trackback", "http://wellformedweb.org/CommentAPI/": "wfw", "http://purl.org/rss/1.0/modules/wiki/": "wiki", "http://www.usemod.com/cgi-bin/mb.pl?ModWiki": "wiki", "http://schemas.xmlsoap.org/soap/envelope/": "soap", "http://www.w3.org/2005/Atom": "atom", "http://www.w3.org/1999/xhtml": "xhtml", "http://my.netscape.com/rdf/simple/0.9/": "rss090", "http://purl.org/rss/1.0/": "rss1", "http://purl.org/net/rss1.1#": "rss11", "http://base.google.com/ns/1.0": "g", "http://www.w3.org/XML/1998/namespace": "xml", "http://openid.net/xmlns/1.0": "openid", "http://earth.google.com/kml/2.0": "kml20", "http://earth.google.com/kml/2.1": "kml21", "http://earth.google.com/kml/2.2": "kml22", "http://www.w3.org/1999/xlink": "xlink", "xri://$xrd*($v*2.0)": "xrd", "xri://$xrds": "xrds", } def near_miss(ns): try: return re.match(".*\w", ns).group().lower() except: return ns nearly_namespaces = dict([(near_miss(u),p) for u,p in namespaces.items()]) stdattrs = [(u'http://www.w3.org/XML/1998/namespace', u'base'), (u'http://www.w3.org/XML/1998/namespace', u'id'), (u'http://www.w3.org/XML/1998/namespace', u'lang'), (u'http://www.w3.org/XML/1998/namespace', u'space')] # # From the SAX parser's point of view, this class is the one responsible for # handling SAX events. In actuality, all this class does is maintain a # pushdown stack of the *real* content handlers, and delegates sax events # to the current one. # class SAXDispatcher(ContentHandler): firstOccurrenceOnly = 0 def __init__(self, base, selfURIs, encoding): from root import root ContentHandler.__init__(self) self.lastKnownLine = 1 self.lastKnownColumn = 0 self.loggedEvents = [] self.feedType = 0 try: self.xmlBase = base.encode('idna') except: self.xmlBase = base self.selfURIs = selfURIs self.encoding = encoding self.handler_stack=[[root(self, base)]] self.defaultNamespaces = [] # experimental RSS-Profile support self.rssCharData = [] def setDocumentLocator(self, locator): self.locator = locator ContentHandler.setDocumentLocator(self, self.locator) def setFirstOccurrenceOnly(self, firstOccurrenceOnly=1): self.firstOccurrenceOnly = firstOccurrenceOnly def startPrefixMapping(self, prefix, uri): for handler in iter(self.handler_stack[-1]): handler.namespace[prefix] = uri if uri and len(uri.split())>1: from xml.sax import SAXException self.error(SAXException('Invalid Namespace: %s' % uri)) if prefix in namespaces.values(): if not namespaces.get(uri,'') == prefix and prefix: from logging import ReservedPrefix, MediaRssNamespace preferredURI = [key for key, value in namespaces.items() if value == prefix][0] if uri == 'http://search.yahoo.com/mrss': self.log(MediaRssNamespace({'prefix':prefix, 'ns':preferredURI})) else: self.log(ReservedPrefix({'prefix':prefix, 'ns':preferredURI})) elif prefix=='wiki' and uri.find('usemod')>=0: from logging import ObsoleteWikiNamespace self.log(ObsoleteWikiNamespace({'preferred':namespaces[uri], 'ns':uri})) elif prefix in ['atom','xhtml']: from logging import TYPE_ATOM, AvoidNamespacePrefix if self.getFeedType() == TYPE_ATOM: self.log(AvoidNamespacePrefix({'prefix':prefix})) elif namespaces.has_key(uri): if not namespaces[uri] == prefix and prefix: from logging import NonstdPrefix self.log(NonstdPrefix({'preferred':namespaces[uri], 'ns':uri})) if namespaces[uri] in ['atom', 'xhtml']: from logging import TYPE_UNKNOWN, TYPE_ATOM, AvoidNamespacePrefix if self.getFeedType() in [TYPE_ATOM,TYPE_UNKNOWN]: self.log(AvoidNamespacePrefix({'prefix':prefix})) else: from logging import UnknownNamespace self.log(UnknownNamespace({'namespace':uri})) def namespaceFor(self, prefix): return None def startElementNS(self, name, qname, attrs): self.lastKnownLine = self.locator.getLineNumber() self.lastKnownColumn = self.locator.getColumnNumber() qname, name = name for handler in iter(self.handler_stack[-1]): handler.startElementNS(name, qname, attrs) if len(attrs): present = attrs.getNames() unexpected = filter(lambda x: x not in stdattrs, present) for handler in iter(self.handler_stack[-1]): ean = handler.getExpectedAttrNames() if ean: unexpected = filter(lambda x: x not in ean, unexpected) for u in unexpected: if u[0] and near_miss(u[0]) not in nearly_namespaces: feedtype=self.getFeedType() if (not qname) and feedtype and (feedtype==TYPE_RSS2): from logging import UseOfExtensionAttr self.log(UseOfExtensionAttr({"attribute":u, "element":name})) continue from logging import UnexpectedAttribute if not u[0]: u=u[1] self.log(UnexpectedAttribute({"parent":name, "attribute":u, "element":name})) def resolveEntity(self, publicId, systemId): if not publicId and not systemId: import cStringIO return cStringIO.StringIO() try: def log(exception): from logging import SAXError self.log(SAXError({'exception':str(exception)})) if self.xmlvalidator: self.xmlvalidator(log) self.xmlvalidator=0 except: pass if (publicId=='-//Netscape Communications//DTD RSS 0.91//EN' and systemId=='http://my.netscape.com/publish/formats/rss-0.91.dtd'): from logging import ValidDoctype, DeprecatedDTD self.log(ValidDoctype({})) self.log(DeprecatedDTD({})) else: from logging import ContainsSystemEntity self.lastKnownLine = self.locator.getLineNumber() self.lastKnownColumn = self.locator.getColumnNumber() self.log(ContainsSystemEntity({})) from StringIO import StringIO return StringIO() def skippedEntity(self, name): from logging import ValidDoctype if [e for e in self.loggedEvents if e.__class__ == ValidDoctype]: from htmlentitydefs import name2codepoint if name in name2codepoint: return from logging import UndefinedNamedEntity self.log(UndefinedNamedEntity({'value':name})) def characters(self, string): self.lastKnownLine = self.locator.getLineNumber() self.lastKnownColumn = self.locator.getColumnNumber() for handler in iter(self.handler_stack[-1]): handler.characters(string) def endElementNS(self, name, qname): self.lastKnownLine = self.locator.getLineNumber() self.lastKnownColumn = self.locator.getColumnNumber() qname, name = name for handler in iter(self.handler_stack[-1]): handler.endElementNS(name, qname) del self.handler_stack[-1] def push(self, handlers, name, attrs, parent): if hasattr(handlers,'__iter__'): for handler in iter(handlers): handler.setElement(name, attrs, parent) handler.value="" handler.prevalidate() else: handlers.setElement(name, attrs, parent) handlers.value="" handlers.prevalidate() handlers = [handlers] self.handler_stack.append(handlers) def log(self, event, offset=(0,0)): def findDuplicate(self, event): duplicates = [e for e in self.loggedEvents if e.__class__ == event.__class__] if duplicates and (event.__class__ in [NonCanonicalURI]): return duplicates[0] for dup in duplicates: for k, v in event.params.items(): if k != 'value': if not k in dup.params or dup.params[k] != v: break else: return dup if event.params.has_key('element') and event.params['element']: if not isinstance(event.params['element'],tuple): event.params['element']=':'.join(event.params['element'].split('_', 1)) elif event.params['element'][0]==u'http://www.w3.org/XML/1998/namespace': event.params['element'] = 'xml:' + event.params['element'][-1] if self.firstOccurrenceOnly: dup = findDuplicate(self, event) if dup: dup.params['msgcount'] = dup.params['msgcount'] + 1 return event.params['msgcount'] = 1 try: line = self.locator.getLineNumber() + offset[0] backupline = self.lastKnownLine column = (self.locator.getColumnNumber() or 0) + offset[1] backupcolumn = self.lastKnownColumn except AttributeError: line = backupline = column = backupcolumn = 1 event.params['line'] = line event.params['backupline'] = backupline event.params['column'] = column event.params['backupcolumn'] = backupcolumn self.loggedEvents.append(event) def error(self, exception): from logging import SAXError self.log(SAXError({'exception':str(exception)})) raise exception fatalError=error warning=error def getFeedType(self): return self.feedType def setFeedType(self, feedType): self.feedType = feedType # # This base class for content handlers keeps track of such administrative # details as the parent of the current element, and delegating both log # and push events back up the stack. It will also concatenate up all of # the SAX events associated with character data into a value, handing such # things as CDATA and entities. # # Subclasses are expected to declare "do_name" methods for every # element that they support. These methods are expected to return the # appropriate handler for the element. # # The name of the element and the names of the children processed so # far are also maintained. # # Hooks are also provided for subclasses to do "prevalidation" and # "validation". # from logging import TYPE_RSS2 class validatorBase(ContentHandler): def __init__(self): ContentHandler.__init__(self) self.value = "" self.attrs = None self.children = [] self.isValid = 1 self.name = None self.itunes = False self.namespace = {} def setElement(self, name, attrs, parent): self.name = name self.attrs = attrs self.parent = parent self.dispatcher = parent.dispatcher self.line = self.dispatcher.locator.getLineNumber() self.col = self.dispatcher.locator.getColumnNumber() self.xmlLang = parent.xmlLang if attrs and attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'base')): self.xmlBase=attrs.getValue((u'http://www.w3.org/XML/1998/namespace', u'base')) from validators import rfc3987 self.validate_attribute((u'http://www.w3.org/XML/1998/namespace',u'base'), rfc3987) from urlparse import urljoin self.xmlBase = urljoin(parent.xmlBase, self.xmlBase) else: self.xmlBase = parent.xmlBase return self def simplename(self, name): if not name[0]: return name[1] return namespaces.get(name[0], name[0]) + ":" + name[1] def namespaceFor(self, prefix): if self.namespace.has_key(prefix): return self.namespace[prefix] elif self.parent: return self.parent.namespaceFor(prefix) else: return None def validate_attribute(self, name, rule): if not isinstance(rule,validatorBase): rule = rule() if isinstance(name,str): name = (None,name) rule.setElement(self.simplename(name), {}, self) rule.value=self.attrs.getValue(name) rule.validate() def validate_required_attribute(self, name, rule): if self.attrs and self.attrs.has_key(name): self.validate_attribute(name, rule) else: from logging import MissingAttribute self.log(MissingAttribute({"attr": self.simplename(name)})) def validate_optional_attribute(self, name, rule): if self.attrs and self.attrs.has_key(name): self.validate_attribute(name, rule) def getExpectedAttrNames(self): None def unknown_starttag(self, name, qname, attrs): from validators import any return any(self, name, qname, attrs) def startElementNS(self, name, qname, attrs): if attrs.has_key((u'http://www.w3.org/XML/1998/namespace', u'lang')): self.xmlLang=attrs.getValue((u'http://www.w3.org/XML/1998/namespace', u'lang')) if self.xmlLang: from validators import iso639_validate iso639_validate(self.log, self.xmlLang, "xml:lang", name) from validators import eater feedtype=self.getFeedType() if (not qname) and feedtype and (feedtype!=TYPE_RSS2): from logging import UndeterminableVocabulary self.log(UndeterminableVocabulary({"parent":self.name, "element":name, "namespace":'""'})) qname="null" if qname in self.dispatcher.defaultNamespaces: qname=None nm_qname = near_miss(qname) if nearly_namespaces.has_key(nm_qname): prefix = nearly_namespaces[nm_qname] qname, name = None, prefix + "_" + name if prefix == 'itunes' and not self.itunes and not self.parent.itunes: if hasattr(self, 'setItunes'): self.setItunes(True) # ensure all attribute namespaces are properly defined for (namespace,attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log(MissingNamespace({"parent":self.name, "element":attr})) if qname=='http://purl.org/atom/ns#': from logging import ObsoleteNamespace self.log(ObsoleteNamespace({"element":"feed"})) for key, string in attrs.items(): for c in string: if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd': from validators import BadCharacters self.log(BadCharacters({"parent":name, "element":key[-1]})) if qname: handler = self.unknown_starttag(name, qname, attrs) name="unknown_"+name self.child=name else: try: self.child=name if name.startswith('dc_'): # handle "Qualified" Dublin Core handler = getattr(self, "do_" + name.replace("-","_").split('.')[0])() else: handler = getattr(self, "do_" + name.replace("-","_"))() except AttributeError: if name.find(':') != -1: from logging import MissingNamespace self.log(MissingNamespace({"parent":self.name, "element":name})) handler = eater() elif name.startswith('xhtml_'): from logging import MisplacedXHTMLContent self.log(MisplacedXHTMLContent({"parent": ':'.join(self.name.split("_",1)), "element":name})) handler = eater() else: try: from extension import Questionable # requalify the name with the default namespace qname = name from logging import TYPE_APP_CATEGORIES, TYPE_APP_SERVICE if self.getFeedType() in [TYPE_APP_CATEGORIES, TYPE_APP_SERVICE]: if qname.startswith('app_'): qname=qname[4:] if name.find('_')<0 and self.name.find('_')>=0: if 'http://www.w3.org/2005/Atom' in self.dispatcher.defaultNamespaces: qname='atom_'+qname # is this element questionable? handler = getattr(Questionable(), "do_" + qname.replace("-","_"))() from logging import QuestionableUsage self.log(QuestionableUsage({"parent": ':'.join(self.name.split("_",1)), "element":qname})) except AttributeError: from logging import UndefinedElement self.log(UndefinedElement({"parent": ':'.join(self.name.split("_",1)), "element":name})) handler = eater() self.push(handler, name, attrs) # MAP - always append name, even if already exists (we need this to # check for too many hour elements in skipHours, and it doesn't # hurt anything else) self.children.append(self.child) def normalizeWhitespace(self): self.value = self.value.strip() def endElementNS(self, name, qname): self.normalizeWhitespace() self.validate() if self.isValid and self.name: from validators import ValidElement self.log(ValidElement({"parent":self.parent.name, "element":name})) def textOK(self): from validators import UnexpectedText self.log(UnexpectedText({"element":self.name,"parent":self.parent.name})) def characters(self, string): if string.strip(): self.textOK() line=column=0 pc=' ' for c in string: # latin characters double encoded as utf-8 if 0x80 <= ord(c) <= 0xBF: if 0xC2 <= ord(pc) <= 0xC3: try: string.encode('iso-8859-1').decode('utf-8') from validators import BadCharacters self.log(BadCharacters({"parent":self.parent.name, "element":self.name}), offset=(line,max(1,column-1))) except: pass pc = c # win1252 if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd': from validators import BadCharacters self.log(BadCharacters({"parent":self.parent.name, "element":self.name}), offset=(line,column)) column=column+1 if ord(c) in (10,13): column=0 line=line+1 self.value = self.value + string def log(self, event, offset=(0,0)): if not event.params.has_key('element'): event.params['element'] = self.name self.dispatcher.log(event, offset) self.isValid = 0 def setFeedType(self, feedType): self.dispatcher.setFeedType(feedType) def getFeedType(self): return self.dispatcher.getFeedType() def push(self, handler, name, value): self.dispatcher.push(handler, name, value, self) def leaf(self): from validators import text return text() def prevalidate(self): pass def validate(self): pass python-feedvalidator-0~svn1022/feedvalidator/media.py0000644000175000017500000003536510677775110021264 0ustar poxpoxfrom validators import * class media_elements: def do_media_adult(self): self.log(DeprecatedMediaAdult({"parent":self.name, "element":"media:adult"})) return truefalse(), noduplicates() def do_media_category(self): return media_category() def do_media_copyright(self): return media_copyright(), noduplicates() def do_media_credit(self): return media_credit() def do_media_description(self): return media_title(), noduplicates() def do_media_keywords(self): return text() def do_media_hash(self): return media_hash() def do_media_player(self): return media_player() def do_media_rating(self): return media_rating() def do_media_restriction(self): return media_restriction() def do_media_text(self): return media_text() def do_media_title(self): return media_title(), noduplicates() def do_media_thumbnail(self): return media_thumbnail() class media_category(nonhtml,rfc2396_full): def getExpectedAttrNames(self): return [(None,u'label'),(None, u'scheme')] def prevalidate(self): self.name = "label" self.value = self.attrs.get((None,u'label')) if self.value: nonhtml.validate(self) self.name = "scheme" self.value = self.attrs.get((None,u'scheme')) if self.value: rfc2396_full.validate(self) self.name = "media_category" self.value = "" class media_copyright(nonhtml,rfc2396_full): def getExpectedAttrNames(self): return [(None,u'url')] def prevalidate(self): self.name = "url" self.value = self.attrs.get((None,u'url')) if self.value: rfc2396_full.validate(self) self.name = "media_copyright" self.value = "" class media_credit(text,rfc2396_full): EBU = [ "actor", "adaptor", "anchor person", "animal trainer", "animator", "announcer", "armourer", "art director", "artist/performer", "assistant camera", "assistant chief lighting technician", "assistant director", "assistant producer", "assistant visual editor", "author", "broadcast assistant", "broadcast journalist", "camera operator", "carpenter", "casting", "causeur", "chief lighting technician", "choir", "choreographer", "clapper loader", "commentary or commentator", "commissioning broadcaster", "composer", "computer programmer", "conductor", "consultant", "continuity checker", "correspondent", "costume designer", "dancer", "dialogue coach", "director", "director of photography", "distribution company", "draughtsman", "dresser", "dubber", "editor/producer", "editor", "editor", "ensemble", "executive producer", "expert", "fight director", "floor manager", "focus puller", "foley artist", "foley editor", "foley mixer", "graphic assistant", "graphic designer", "greensman", "grip", "hairdresser", "illustrator", "interviewed guest", "interviewer", "key character", "key grip", "key talents", "leadman", "librettist", "lighting director", "lighting technician", "location manager", "lyricist", "make up artist", "manufacturer", "matte artist", "music arranger", "music group", "musician", "news reader", "orchestra", "participant", "photographer", "post", "producer", "production assistant", "production company", "production department", "production manager", "production secretary", "programme production researcher", "property manager", "publishing company", "puppeteer", "pyrotechnician", "reporter", "rigger", "runner", "scenario", "scenic operative", "script supervisor", "second assistant camera", "second assistant director", "second unit director", "set designer", "set dresser", "sign language", "singer", "sound designer", "sound mixer", "sound recordist", "special effects", "stunts", "subtitles", "technical director", "term", "translation", "transportation manager", "treatment/programme proposal", "vision mixer", "visual editor", "visual effects", "wardrobe", "witness", # awaiting confirmation "artist", "performer", "editor", "producer", "treatment", "treatment proposal", "programme proposal", ] def getExpectedAttrNames(self): return [(None, u'role'),(None,u'scheme')] def prevalidate(self): scheme = self.attrs.get((None, 'scheme')) or 'urn:ebu' role = self.attrs.get((None, 'role')) if role: if scheme=='urn:ebu' and role not in self.EBU: self.log(InvalidCreditRole({"parent":self.parent.name, "element":self.name, "attr":"role", "value":role})) elif role != role.lower(): self.log(InvalidCreditRole({"parent":self.parent.name, "element":self.name, "attr":"role", "value":role})) self.value = scheme self.name = "scheme" if scheme != 'urn:ebu': rfc2396_full.validate(self) self.name = "media_credit" self.value = "" class media_hash(text): def getExpectedAttrNames(self): return [(None,u'algo')] def prevalidate(self): self.algo = self.attrs.get((None, 'algo')) if self.algo and self.algo not in ['md5', 'sha-1']: self.log(InvalidMediaHash({"parent":self.parent.name, "element":self.name, "attr":"algo", "value":self.algo})) def validate(self): self.value = self.value.strip() if not re.match("^[0-9A-Za-z]+$",self.value): self.log(InvalidMediaHash({"parent":self.parent.name, "element":self.name, "value":self.value})) else: if self.algo == 'sha-1': if len(self.value) != 40: self.log(InvalidMediaHash({"parent":self.parent.name, "element":self.name, "algo":self.algo, "value":self.value})) else: if len(self.value) != 32: self.log(InvalidMediaHash({"parent":self.parent.name, "element":self.name, "algo":self.algo, "value":self.value})) class media_rating(rfc2396_full): def getExpectedAttrNames(self): return [(None, u'scheme')] def validate(self): scheme = self.attrs.get((None, 'scheme')) or 'urn:simple' if scheme == 'urn:simple': if self.value not in ['adult', 'nonadult']: self.log(InvalidMediaRating({"parent":self.parent.name, "element":self.name, "scheme":scheme, "value":self.value})) elif scheme == 'urn:mpaa': if self.value not in ['g', 'm', 'nc-17', 'pg', 'pg-13', 'r', 'x']: self.log(InvalidMediaRating({"parent":self.parent.name, "element":self.name, "scheme":scheme, "value":self.value})) elif scheme == 'urn:v-chip': if self.value not in ['14+', '18+', 'c', 'c8', 'g', 'pg', 'tv-14', 'tv-g', 'tv-ma', 'tv-pg', 'tv-y', 'tv-y7', 'tv-y7-fv']: self.log(InvalidMediaRating({"parent":self.parent.name, "element":self.name, "scheme":scheme, "value":self.value})) elif scheme == 'urn:icra': code = '([nsvlocx]z [01]|(n[a-c]|s[a-f]|v[a-j]|l[a-c]|o[a-h]|c[a-b]|x[a-e]) 1)' if not re.match(r"^r \(%s( %s)*\)$" %(code,code),self.value): self.log(InvalidMediaRating({"parent":self.parent.name, "element":self.name, "scheme":scheme, "value":self.value})) pass else: self.value = scheme self.name = 'scheme' rfc2396_full.validate(self) class media_restriction(text,rfc2396_full,iso3166): def getExpectedAttrNames(self): return [(None, u'relationship'),(None,u'type')] def validate(self): relationship = self.attrs.get((None, 'relationship')) if not relationship: self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"relationship"})) elif relationship not in ['allow','disallow']: self.log(InvalidMediaRestrictionRel({"parent":self.parent.name, "element":self.name, "attr":"relationship", "value":relationship})) type = self.attrs.get((None, 'type')) if not type: if self.value and self.value not in ['all','none']: self.log(InvalidMediaRestriction({"parent":self.parent.name, "element":self.name, "value":self.value})) elif type == 'country': self.name = 'country' countries = self.value.upper().split(' ') for self.value in countries: iso3166.validate(self) elif type == 'uri': rfc2396_full.validate(self) else: self.log(InvalidMediaRestrictionType({"parent":self.parent.name, "element":self.name, "attr":"type", "value":type})) class media_player(validatorBase,positiveInteger,rfc2396_full): def getExpectedAttrNames(self): return [(None,u'height'),(None,u'url'),(None, u'width')] def validate(self): self.value = self.attrs.get((None, 'url')) if self.value: self.name = "url" rfc2396_full.validate(self) else: self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"url"})) self.value = self.attrs.get((None, 'height')) self.name = "height" if self.value: positiveInteger.validate(self) self.value = self.attrs.get((None, 'width')) self.name = "width" if self.value: positiveInteger.validate(self) class media_text(nonhtml): def getExpectedAttrNames(self): return [(None,u'end'),(None,u'lang'),(None,u'start'),(None, u'type')] def prevalidate(self): self.type = self.attrs.get((None, 'type')) if self.type and self.type not in ['plain', 'html']: self.log(InvalidMediaTextType({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type})) start = self.attrs.get((None, 'start')) if start and not media_thumbnail.npt_re.match(start): self.log(InvalidNPTTime({"parent":self.parent.name, "element":self.name, "attr":"start", "value":start})) else: self.log(ValidNPTTime({"parent":self.parent.name, "element":self.name, "attr":"start", "value":start})) end = self.attrs.get((None, 'end')) if end and not media_thumbnail.npt_re.match(end): self.log(InvalidNPTTime({"parent":self.parent.name, "element":self.name, "attr":"end", "value":end})) else: self.log(ValidNPTTime({"parent":self.parent.name, "element":self.name, "attr":"end", "value":end})) lang = self.attrs.get((None, 'lang')) if lang: iso639_validate(self.log,lang,'lang',self.parent) def validate(self): if self.type == 'html': self.validateSafe(self.value) else: nonhtml.validate(self, ContainsUndeclaredHTML) class media_title(nonhtml): def getExpectedAttrNames(self): return [(None, u'type')] def prevalidate(self): self.type = self.attrs.get((None, 'type')) if self.type and self.type not in ['plain', 'html']: self.log(InvalidMediaTextType({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type})) def validate(self): if self.type == 'html': self.validateSafe(self.value) else: nonhtml.validate(self, ContainsUndeclaredHTML) class media_thumbnail(validatorBase,positiveInteger,rfc2396_full): npt_re = re.compile("^(now)|(\d+(\.\d+)?)|(\d+:\d\d:\d\d(\.\d+)?)$") def getExpectedAttrNames(self): return [(None,u'height'),(None,u'time'),(None,u'url'),(None, u'width')] def validate(self): time = self.attrs.get((None, 'time')) if time and not media_thumbnail.npt_re.match(time): self.log(InvalidNPTTime({"parent":self.parent.name, "element":self.name, "attr":"time", "value":time})) else: self.log(ValidNPTTime({"parent":self.parent.name, "element":self.name, "attr":"time", "value":time})) self.value = self.attrs.get((None, 'url')) if self.value: self.name = "url" rfc2396_full.validate(self) else: self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"url"})) self.value = self.attrs.get((None, 'height')) self.name = "height" if self.value: positiveInteger.validate(self) self.value = self.attrs.get((None, 'width')) self.name = "width" if self.value: positiveInteger.validate(self) from extension import extension_everywhere class media_content(validatorBase, media_elements, extension_everywhere, positiveInteger, rfc2396_full, truefalse, nonNegativeInteger): def getExpectedAttrNames(self): return [ (None,u'bitrate'), (None,u'channels'), (None,u'duration'), (None,u'expression'), (None,u'fileSize'), (None,u'framerate'), (None,u'height'), (None,u'isDefault'), (None,u'lang'), (None,u'medium'), (None,u'samplingrate'), (None,u'type'), (None,u'url'), (None,u'width') ] def validate(self): self.value = self.attrs.get((None,u'bitrate')) if self.value and not re.match('\d+\.?\d*', self.value): self.log(InvalidFloat({"parent":self.parent.name, "element":self.name, "attr": 'bitrate', "value":self.value})) self.value = self.attrs.get((None, 'channels')) self.name = "channels" if self.value: nonNegativeInteger.validate(self) self.value = self.attrs.get((None,u'duration')) if self.value and not re.match('\d+\.?\d*', self.value): self.log(InvalidFloat({"parent":self.parent.name, "element":self.name, "attr": 'duration', "value":self.value})) self.value = self.attrs.get((None,u'expression')) if self.value and self.value not in ['sample', 'full', 'nonstop']: self.log(InvalidMediaExpression({"parent":self.parent.name, "element":self.name, "value": self.value})) self.value = self.attrs.get((None, 'fileSize')) self.name = "fileSize" if self.value: positiveInteger.validate(self) self.value = self.attrs.get((None,u'framerate')) if self.value and not re.match('\d+\.?\d*', self.value): self.log(InvalidFloat({"parent":self.parent.name, "element":self.name, "attr": 'framerate', "value":self.value})) self.value = self.attrs.get((None, 'height')) self.name = "height" if self.value: positiveInteger.validate(self) self.value = self.attrs.get((None, 'isDefault')) if self.value: truefalse.validate(self) self.value = self.attrs.get((None, 'lang')) if self.value: iso639_validate(self.log,self.value,'lang',self.parent) self.value = self.attrs.get((None,u'medium')) if self.value and self.value not in ['image', 'audio', 'video', 'document', 'executable']: self.log(InvalidMediaMedium({"parent":self.parent.name, "element":self.name, "value": self.value})) self.value = self.attrs.get((None,u'samplingrate')) if self.value and not re.match('\d+\.?\d*', self.value): self.log(InvalidFloat({"parent":self.parent.name, "element":self.name, "attr": 'samplingrate', "value":self.value})) self.value = self.attrs.get((None,u'type')) if self.value and not mime_re.match(self.value): self.log(InvalidMIMEAttribute({"parent":self.parent.name, "element":self.name, "attr":'type'})) self.name = "url" self.value = self.attrs.get((None,u'url')) if self.value: rfc2396_full.validate(self) self.value = self.attrs.get((None, 'width')) self.name = "width" if self.value: positiveInteger.validate(self) class media_group(validatorBase, media_elements): def do_media_content(self): return media_content() def validate(self): if len([child for child in self.children if child=='media_content']) < 2: self.log(MediaGroupWithoutAlternatives({})) python-feedvalidator-0~svn1022/feedvalidator/categories.py0000644000175000017500000000143010700220336022272 0ustar poxpoxfrom base import validatorBase from category import category from validators import yesno from logging import ConflictingCatAttr, ConflictingCatChildren class categories(validatorBase): def getExpectedAttrNames(self): return [(None,u'scheme'),(None,u'fixed'),(None,u'href')] def prevalidate(self): self.validate_optional_attribute((None,'fixed'), yesno) if self.attrs.has_key((None,'href')): if self.attrs.has_key((None,'fixed')): self.log(ConflictingCatAttr({'attr':'fixed'})) if self.attrs.has_key((None,'scheme')): self.log(ConflictingCatAttr({'attr':'scheme'})) def validate(self): if self.attrs.has_key((None,'href')) and self.children: self.log(ConflictingCatChildren({})) def do_atom_category(self): return category() python-feedvalidator-0~svn1022/feedvalidator/link.py0000644000175000017500000001531210766017570021126 0ustar poxpox"""$Id: link.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from validators import * # # Atom link element # class link(nonblank,xmlbase,iso639,nonhtml,nonNegativeInteger,rfc3339,nonblank): validRelations = [ # http://www.iana.org/assignments/link-relations.html 'alternate', # RFC4287 'current', # RFC5005 'enclosure', # RFC4287 'edit', # RFC-ietf-atompub-protocol-17.txt 'edit-media', # RFC-ietf-atompub-protocol-17.txt 'first', # RFC5005 'last', # RFC5005 'license', # RFC4946 'next', # RFC5005 'next-archive', # RFC5005 'payment', # Kinberg 'prev-archive', # RFC5005 'previous', # RFC5005 'related', # RFC4287 'replies', # RFC4685 'self', # RFC4287 'via' # RFC4287 ] rfc5005 = [ 'current', # RFC5005 'first', # RFC5005 'last', # RFC5005 'next', # RFC5005 'next-archive', # RFC5005 'prev-archive', # RFC5005 'previous', # RFC5005 ] def getExpectedAttrNames(self): return [(None, u'type'), (None, u'title'), (None, u'rel'), (None, u'href'), (None, u'length'), (None, u'hreflang'), (u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'type'), (u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'resource'), (u'http://purl.org/syndication/thread/1.0', u'count'), (u'http://purl.org/syndication/thread/1.0', u'when'), (u'http://purl.org/syndication/thread/1.0', u'updated')] def validate(self): self.type = "" self.rel = "alternate" self.href = "" self.hreflang = "" self.title = "" if self.attrs.has_key((None, "rel")): self.value = self.rel = self.attrs.getValue((None, "rel")) if self.rel.startswith('http://www.iana.org/assignments/relation/'): self.rel=self.rel[len('http://www.iana.org/assignments/relation/'):] if self.rel in self.validRelations: self.log(ValidAtomLinkRel({"parent":self.parent.name, "element":self.name, "attr":"rel", "value":self.rel})) elif rfc2396_full.rfc2396_re.match(self.rel.encode('idna')): self.log(ValidAtomLinkRel({"parent":self.parent.name, "element":self.name, "attr":"rel", "value":self.rel})) else: self.log(UnregisteredAtomLinkRel({"parent":self.parent.name, "element":self.name, "attr":"rel", "value":self.rel})) nonblank.validate(self, errorClass=AttrNotBlank, extraParams={"attr": "rel"}) if self.rel in self.rfc5005 and self.parent.name == 'entry': self.log(FeedHistoryRelInEntry({"rel":self.rel})) if self.attrs.has_key((None, "type")): self.value = self.type = self.attrs.getValue((None, "type")) if not mime_re.match(self.type): self.log(InvalidMIMEType({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type})) elif self.rel == "self" and self.type not in ["application/atom+xml", "application/rss+xml", "application/rdf+xml"]: self.log(SelfNotAtom({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type})) else: self.log(ValidMIMEAttribute({"parent":self.parent.name, "element":self.name, "attr":"type", "value":self.type})) if self.attrs.has_key((None, "title")): self.log(ValidTitle({"parent":self.parent.name, "element":self.name, "attr":"title"})) self.value = self.title = self.attrs.getValue((None, "title")) nonblank.validate(self, errorClass=AttrNotBlank, extraParams={"attr": "title"}) nonhtml.validate(self) if self.attrs.has_key((None, "length")): self.name = 'length' self.value = self.attrs.getValue((None, "length")) nonNegativeInteger.validate(self) nonblank.validate(self) if self.attrs.has_key((None, "hreflang")): self.name = 'hreflang' self.value = self.hreflang = self.attrs.getValue((None, "hreflang")) iso639.validate(self) if self.attrs.has_key((None, "href")): self.name = 'href' self.value = self.href = self.attrs.getValue((None, "href")) xmlbase.validate(self, extraParams={"attr": "href"}) if self.rel == "self" and self.parent.name in ["feed","channel"]: # detect relative self values from urlparse import urlparse from xml.dom import XML_NAMESPACE absolute = urlparse(self.href)[1] element = self while not absolute and element and hasattr(element,'attrs'): pattrs = element.attrs if pattrs and pattrs.has_key((XML_NAMESPACE, u'base')): absolute=urlparse(pattrs.getValue((XML_NAMESPACE, u'base')))[1] element = element.parent if not absolute: self.log(RelativeSelf({"value":self.href})) from urlparse import urljoin if urljoin(self.xmlBase,self.value) not in self.dispatcher.selfURIs: if urljoin(self.xmlBase,self.value).split('#')[0] != self.xmlBase.split('#')[0]: from uri import Uri value = Uri(self.value) for docbase in self.dispatcher.selfURIs: if value == Uri(docbase): break else: self.log(SelfDoesntMatchLocation({"parent":self.parent.name, "element":self.name})) self.dispatcher.selfURIs.append(urljoin(self.xmlBase,self.value)) else: self.log(MissingHref({"parent":self.parent.name, "element":self.name, "attr":"href"})) if self.attrs.has_key((u'http://purl.org/syndication/thread/1.0', u'count')): if self.rel != "replies": self.log(UnexpectedAttribute({"parent":self.parent.name, "element":self.name, "attribute":"thr:count"})) self.value = self.attrs.getValue((u'http://purl.org/syndication/thread/1.0', u'count')) self.name="thr:count" nonNegativeInteger.validate(self) if self.attrs.has_key((u'http://purl.org/syndication/thread/1.0', u'when')): self.log(NoThrWhen({"parent":self.parent.name, "element":self.name, "attribute":"thr:when"})) if self.attrs.has_key((u'http://purl.org/syndication/thread/1.0', u'updated')): if self.rel != "replies": self.log(UnexpectedAttribute({"parent":self.parent.name, "element":self.name, "attribute":"thr:updated"})) self.value = self.attrs.getValue((u'http://purl.org/syndication/thread/1.0', u'updated')) self.name="thr:updated" rfc3339.validate(self) def startElementNS(self, name, qname, attrs): self.push(eater(), name, attrs) def characters(self, text): if text.strip(): self.log(AtomLinkNotEmpty({"parent":self.parent.name, "element":self.name})) python-feedvalidator-0~svn1022/feedvalidator/cf.py0000644000175000017500000000105210410000277020533 0ustar poxpox# http://msdn.microsoft.com/XML/rss/sle/default.aspx from base import validatorBase from validators import eater, text class sort(validatorBase): def getExpectedAttrNames(self): return [(None,u'data-type'),(None,u'default'),(None,u'element'),(None, u'label'),(None,u'ns')] class group(validatorBase): def getExpectedAttrNames(self): return [(None,u'element'),(None, u'label'),(None,u'ns')] class listinfo(validatorBase): def do_cf_sort(self): return sort() def do_cf_group(self): return group() class treatAs(text): pass python-feedvalidator-0~svn1022/feedvalidator/__init__.py0000644000175000017500000002652710766017570021742 0ustar poxpox"""$Id: __init__.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" import socket if hasattr(socket, 'setdefaulttimeout'): socket.setdefaulttimeout(10) Timeout = socket.timeout else: import timeoutsocket timeoutsocket.setDefaultSocketTimeout(10) Timeout = timeoutsocket.Timeout import urllib2 import logging from logging import * from xml.sax import SAXException from xml.sax.xmlreader import InputSource import re import xmlEncoding import mediaTypes from httplib import BadStatusLine MAXDATALENGTH = 2000000 def _validate(aString, firstOccurrenceOnly, loggedEvents, base, encoding, selfURIs=None, mediaType=None): """validate RSS from string, returns validator object""" from xml.sax import make_parser, handler from base import SAXDispatcher from exceptions import UnicodeError from cStringIO import StringIO if re.match("^\s+<\?xml",aString) and re.search("",aString): lt = aString.find('<'); gt = aString.find('>') if lt > 0 and gt > 0 and lt < gt: loggedEvents.append(logging.WPBlankLine({'line':1,'column':1})) # rearrange so that other errors can be found aString = aString[lt:gt+1]+aString[0:lt]+aString[gt+1:] # By now, aString should be Unicode source = InputSource() source.setByteStream(StringIO(xmlEncoding.asUTF8(aString))) validator = SAXDispatcher(base, selfURIs or [base], encoding) validator.setFirstOccurrenceOnly(firstOccurrenceOnly) if mediaType == 'application/atomsvc+xml': validator.setFeedType(TYPE_APP_SERVICE) elif mediaType == 'application/atomcat+xml': validator.setFeedType(TYPE_APP_CATEGORIES) validator.loggedEvents += loggedEvents # experimental RSS-Profile support validator.rssCharData = [s.find('&#x')>=0 for s in aString.split('\n')] xmlver = re.match("^<\?\s*xml\s+version\s*=\s*['\"]([-a-zA-Z0-9_.:]*)['\"]",aString) if xmlver and xmlver.group(1)<>'1.0': validator.log(logging.BadXmlVersion({"version":xmlver.group(1)})) try: from xml.sax.expatreader import ExpatParser class fake_dtd_parser(ExpatParser): def reset(self): ExpatParser.reset(self) self._parser.UseForeignDTD(1) parser = fake_dtd_parser() except: parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) parser.setContentHandler(validator) parser.setErrorHandler(validator) parser.setEntityResolver(validator) if hasattr(parser, '_ns_stack'): # work around bug in built-in SAX parser (doesn't recognize xml: namespace) # PyXML doesn't have this problem, and it doesn't have _ns_stack either parser._ns_stack.append({'http://www.w3.org/XML/1998/namespace':'xml'}) def xmlvalidate(log): import libxml2 from StringIO import StringIO from random import random prefix="...%s..." % str(random()).replace('0.','') msg=[] libxml2.registerErrorHandler(lambda msg,str: msg.append(str), msg) input = libxml2.inputBuffer(StringIO(xmlEncoding.asUTF8(aString))) reader = input.newTextReader(prefix) reader.SetParserProp(libxml2.PARSER_VALIDATE, 1) ret = reader.Read() while ret == 1: ret = reader.Read() msg=''.join(msg) for line in msg.splitlines(): if line.startswith(prefix): log(line.split(':',4)[-1].strip()) validator.xmlvalidator=xmlvalidate try: parser.parse(source) except SAXException: pass except UnicodeError: import sys exctype, value = sys.exc_info()[:2] validator.log(logging.UnicodeError({"exception":value})) if validator.getFeedType() == TYPE_RSS1: try: from rdflib.syntax.parsers.RDFXMLHandler import RDFXMLHandler class Handler(RDFXMLHandler): ns_prefix_map = {} prefix_ns_map = {} def add(self, triple): pass def __init__(self, dispatcher): RDFXMLHandler.__init__(self, self) self.dispatcher=dispatcher def error(self, message): self.dispatcher.log(InvalidRDF({"message": message})) source.getByteStream().reset() parser.reset() parser.setContentHandler(Handler(parser.getContentHandler())) parser.setErrorHandler(handler.ErrorHandler()) parser.parse(source) except: pass return validator def validateStream(aFile, firstOccurrenceOnly=0, contentType=None, base=""): loggedEvents = [] if contentType: (mediaType, charset) = mediaTypes.checkValid(contentType, loggedEvents) else: (mediaType, charset) = (None, None) rawdata = aFile.read(MAXDATALENGTH) if aFile.read(1): raise ValidationFailure(logging.ValidatorLimit({'limit': 'feed length > ' + str(MAXDATALENGTH) + ' bytes'})) encoding, rawdata = xmlEncoding.decode(mediaType, charset, rawdata, loggedEvents, fallback='utf-8') validator = _validate(rawdata, firstOccurrenceOnly, loggedEvents, base, encoding, mediaType=mediaType) if mediaType and validator.feedType: mediaTypes.checkAgainstFeedType(mediaType, validator.feedType, validator.loggedEvents) return {"feedType":validator.feedType, "loggedEvents":validator.loggedEvents} def validateString(aString, firstOccurrenceOnly=0, fallback=None, base=""): loggedEvents = [] if type(aString) != unicode: encoding, aString = xmlEncoding.decode("", None, aString, loggedEvents, fallback) else: encoding = "utf-8" # setting a sane (?) default if aString is not None: validator = _validate(aString, firstOccurrenceOnly, loggedEvents, base, encoding) return {"feedType":validator.feedType, "loggedEvents":validator.loggedEvents} else: return {"loggedEvents": loggedEvents} def validateURL(url, firstOccurrenceOnly=1, wantRawData=0): """validate RSS from URL, returns events list, or (events, rawdata) tuple""" loggedEvents = [] request = urllib2.Request(url) request.add_header("Accept-encoding", "gzip, deflate") request.add_header("User-Agent", "FeedValidator/1.3") usock = None try: try: usock = urllib2.urlopen(request) rawdata = usock.read(MAXDATALENGTH) if usock.read(1): raise ValidationFailure(logging.ValidatorLimit({'limit': 'feed length > ' + str(MAXDATALENGTH) + ' bytes'})) # check for temporary redirects if usock.geturl()<>request.get_full_url(): from httplib import HTTPConnection spliturl=url.split('/',3) if spliturl[0]=="http:": conn=HTTPConnection(spliturl[2]) conn.request("GET",'/'+spliturl[3].split("#",1)[0]) resp=conn.getresponse() if resp.status<>301: loggedEvents.append(TempRedirect({})) except BadStatusLine, status: raise ValidationFailure(logging.HttpError({'status': status.__class__})) except urllib2.HTTPError, status: rawdata = status.read() if len(rawdata) > 512 and 'content-encoding' in status.headers: loggedEvents.append(logging.HttpError({'status': status})) usock = status else: rawdata=re.sub('','',rawdata) lastline = rawdata.strip().split('\n')[-1].strip() if lastline in ['','','', '']: loggedEvents.append(logging.HttpError({'status': status})) usock = status else: raise ValidationFailure(logging.HttpError({'status': status})) except urllib2.URLError, x: raise ValidationFailure(logging.HttpError({'status': x.reason})) except Timeout, x: raise ValidationFailure(logging.IOError({"message": 'Server timed out', "exception":x})) except Exception, x: raise ValidationFailure(logging.IOError({"message": x.__class__.__name__, "exception":x})) if usock.headers.get('content-encoding', None) == None: loggedEvents.append(Uncompressed({})) if usock.headers.get('content-encoding', None) == 'gzip': import gzip, StringIO try: rawdata = gzip.GzipFile(fileobj=StringIO.StringIO(rawdata)).read() except: import sys exctype, value = sys.exc_info()[:2] event=logging.IOError({"message": 'Server response declares Content-Encoding: gzip', "exception":value}) raise ValidationFailure(event) if usock.headers.get('content-encoding', None) == 'deflate': import zlib try: rawdata = zlib.decompress(rawdata, -zlib.MAX_WBITS) except: import sys exctype, value = sys.exc_info()[:2] event=logging.IOError({"message": 'Server response declares Content-Encoding: deflate', "exception":value}) raise ValidationFailure(event) if usock.headers.get('content-type', None) == 'application/vnd.google-earth.kmz': import tempfile, zipfile, os try: (fd, tempname) = tempfile.mkstemp() os.write(fd, rawdata) os.close(fd) zfd = zipfile.ZipFile(tempname) namelist = zfd.namelist() for name in namelist: if name.endswith('.kml'): rawdata = zfd.read(name) zfd.close() os.unlink(tempname) except: import sys value = sys.exc_info()[:1] event=logging.IOError({"message": 'Problem decoding KMZ', "exception":value}) raise ValidationFailure(event) mediaType = None charset = None # Is the Content-Type correct? contentType = usock.headers.get('content-type', None) if contentType: (mediaType, charset) = mediaTypes.checkValid(contentType, loggedEvents) # Check for malformed HTTP headers for (h, v) in usock.headers.items(): if (h.find(' ') >= 0): loggedEvents.append(HttpProtocolError({'header': h})) selfURIs = [request.get_full_url()] baseURI = usock.geturl() if not baseURI in selfURIs: selfURIs.append(baseURI) # Get baseURI from content-location and/or redirect information if usock.headers.get('content-location', None): from urlparse import urljoin baseURI=urljoin(baseURI,usock.headers.get('content-location', "")) elif usock.headers.get('location', None): from urlparse import urljoin baseURI=urljoin(baseURI,usock.headers.get('location', "")) if not baseURI in selfURIs: selfURIs.append(baseURI) usock.close() usock = None mediaTypes.contentSniffing(mediaType, rawdata, loggedEvents) encoding, rawdata = xmlEncoding.decode(mediaType, charset, rawdata, loggedEvents, fallback='utf-8') if rawdata is None: return {'loggedEvents': loggedEvents} rawdata = rawdata.replace('\r\n', '\n').replace('\r', '\n') # normalize EOL validator = _validate(rawdata, firstOccurrenceOnly, loggedEvents, baseURI, encoding, selfURIs, mediaType=mediaType) # Warn about mismatches between media type and feed version if mediaType and validator.feedType: mediaTypes.checkAgainstFeedType(mediaType, validator.feedType, validator.loggedEvents) params = {"feedType":validator.feedType, "loggedEvents":validator.loggedEvents} if wantRawData: params['rawdata'] = rawdata return params finally: try: if usock: usock.close() except: pass __all__ = ['base', 'channel', 'compatibility', 'image', 'item', 'logging', 'rdf', 'root', 'rss', 'skipHours', 'textInput', 'util', 'validators', 'validateURL', 'validateString'] python-feedvalidator-0~svn1022/feedvalidator/formatter/0000755000175000017500000000000011065534346021616 5ustar poxpoxpython-feedvalidator-0~svn1022/feedvalidator/formatter/text_html.py0000644000175000017500000000746110766017570024212 0ustar poxpox"""$Id: text_html.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" """Output class for HTML text output""" from base import BaseFormatter import feedvalidator from xml.sax.saxutils import escape from feedvalidator.logging import Message, Info, Warning, Error from config import DOCSURL def escapeAndMark(x): html = escape(x) # Double-escape, and highlight, illegal characters. for i in range(len(html)-1,-1,-1): c = ord(html[i]) if 0x80 <= c <= 0x9F or c == 0xfffd: if c == 0xfffd: e = '?' else: e = '\\x%02x' % (c) html = '%s%s%s' % (html[:i], e, html[i+1:]) return html.replace(" ","  ") class Formatter(BaseFormatter): FRAGMENTLEN = 80 def __init__(self, events, rawdata): BaseFormatter.__init__(self, events) self.rawdata = rawdata def getRootClass(self, aClass): base = aClass.__bases__[0] if base == Message: return aClass if base.__name__.split('.')[-1] == 'LoggedEvent': return aClass else: return self.getRootClass(base) def getHelpURL(self, event): rootClass = self.getRootClass(event.__class__).__name__ rootClass = rootClass.split('.')[-1] rootClass = rootClass.lower() # messageClass = self.getMessageClass(event).__name__.split('.')[-1] messageClass = event.__class__.__name__.split('.')[-1] return DOCSURL + '/' + rootClass + '/' + messageClass def mostSeriousClass(self): ms=0 for event in self.data: level = -1 if isinstance(event,Info): level = 1 if isinstance(event,Warning): level = 2 if isinstance(event,Error): level = 3 ms = max(ms, level) return [None, Info, Warning, Error][ms] def header(self): return '
    ' def footer(self): return '
' def format(self, event): if event.params.has_key('line'): line = event.params['line'] if line >= len(self.rawdata.split('\n')): # For some odd reason, UnicodeErrors tend to trigger a bug # in the SAX parser that misrepresents the current line number. # We try to capture the last known good line number/column as # we go along, and now it's time to fall back to that. line = event.params['line'] = event.params.get('backupline',0) column = event.params['column'] = event.params.get('backupcolumn',0) column = event.params['column'] codeFragment = self.rawdata.split('\n')[line-1] markerColumn = column if column > self.FRAGMENTLEN: codeFragment = '... ' + codeFragment[column-(self.FRAGMENTLEN/2):] markerColumn = 5 + (self.FRAGMENTLEN/2) if len(codeFragment) > self.FRAGMENTLEN: codeFragment = codeFragment[:(self.FRAGMENTLEN-4)] + ' ...' else: codeFragment = '' line = None markerColumn = None html = escapeAndMark(codeFragment) rc = u'
  • ' if line: rc += u'''''' % line rc += u'''%s, ''' % self.getLine(event) rc += u'''%s: ''' % self.getColumn(event) if 'value' in event.params: rc += u'''%s: %s''' % (escape(self.getMessage(event)), escape(unicode(event.params['value']))) else: rc += u'''%s''' % escape(self.getMessage(event)) rc += u'''%s ''' % self.getCount(event) rc += u'''[help]

    ''' % self.getHelpURL(event) rc += u'''
    ''' + html + '''
    ''' if markerColumn: rc += u' ' * markerColumn rc += u'''^''' rc += u'
  • ' return rc python-feedvalidator-0~svn1022/feedvalidator/formatter/application_test.py0000644000175000017500000000171110766017570025534 0ustar poxpox"""$Id: application_test.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" """Output class for testing that all output messages are defined properly""" from base import BaseFormatter import feedvalidator import os LANGUAGE = os.environ.get('LANGUAGE', 'en') lang = __import__('feedvalidator.i18n.%s' % LANGUAGE, globals(), locals(), LANGUAGE) class Formatter(BaseFormatter): def getMessage(self, event): classes = [event.__class__] while len(classes): if lang.messages.has_key(classes[0]): return lang.messages[classes[0]] % event.params classes = classes + list(classes[0].__bases__) del classes[0] return None def format(self, event): """returns the formatted representation of a single event""" return self.getMessage(event) python-feedvalidator-0~svn1022/feedvalidator/formatter/text_xml.py0000644000175000017500000000315410766017570024041 0ustar poxpox"""$Id: text_xml.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" """Output class for xml output""" from base import BaseFormatter from feedvalidator.logging import * import feedvalidator def xmlEncode(value): value = value.replace('&', '&') value = value.replace('<', '<') value = value.replace('>', '>') value = value.replace('"', '"') value = value.replace("'", ''') return value class Formatter(BaseFormatter): def format(self, event): params = event.params params['type'] = event.__class__.__name__ params['text'] = self.getMessage(event) # determine the level of severity level = 'unknown' if isinstance(event,Info): level = 'info' if isinstance(event,Warning): level = 'warning' if isinstance(event,Error): level = 'error' params['level'] = level # organize fixed elements into a known order order = params.keys() order.sort() for key in ['msgcount', 'text', 'column', 'line', 'type', 'level']: if key in order: order.remove(key) order.insert(0,key) # output the elements result = "<%s>\n" % level for key in order: value = xmlEncode(str(params[key])) pub_key = key if key == "backupcolumn": pubkey = "column" elif key == "backupline": pubkey = "line" result = result + (" <%s>%s\n" % (key, value, key)) result = result + "\n" % level return result python-feedvalidator-0~svn1022/feedvalidator/formatter/base.py0000644000175000017500000000421010766017570023101 0ustar poxpox"""$Id: base.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" """Base class for output classes""" from UserList import UserList import os LANGUAGE = os.environ.get('LANGUAGE', 'en') lang = __import__('feedvalidator.i18n.%s' % LANGUAGE, globals(), locals(), LANGUAGE) from feedvalidator.logging import Info, Warning, Error class BaseFormatter(UserList): def __getitem__(self, i): return self.format(self.data[i]) def getErrors(self): return [self.format(msg) for msg in self.data if isinstance(msg,Error)] def getWarnings(self): return [self.format(msg) for msg in self.data if isinstance(msg,Warning)] def getLine(self, event): if not event.params.has_key('line'): return '' return lang.line % event.params def getColumn(self, event): if not event.params.has_key('column'): return '' return lang.column % event.params def getLineAndColumn(self, event): line = self.getLine(event) if not line: return '' column = self.getColumn(event) return '%s, %s:' % (line, column) def getCount(self, event): if not event.params.has_key('msgcount'): return '' count = int(event.params['msgcount']) if count <= 1: return '' return lang.occurances % event.params def getMessageClass(self, event): classes = [event.__class__] while len(classes): if lang.messages.has_key(classes[0]): return classes[0] classes = classes + list(classes[0].__bases__) del classes[0] return "Undefined message: %s[%s]" % (event.__class__, event.params) def getMessage(self, event): classes = [event.__class__] while len(classes): if lang.messages.has_key(classes[0]): return lang.messages[classes[0]] % event.params classes = classes + list(classes[0].__bases__) del classes[0] return "Undefined message: %s[%s]" % (event.__class__, event.params) def format(self, event): """returns the formatted representation of a single event""" return `event` python-feedvalidator-0~svn1022/feedvalidator/formatter/text_plain.py0000644000175000017500000000100110766017570024331 0ustar poxpox"""$Id: text_plain.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" """Output class for plain text output""" from base import BaseFormatter import feedvalidator class Formatter(BaseFormatter): def format(self, event): return '%s %s%s' % (self.getLineAndColumn(event), self.getMessage(event), self.getCount(event)) python-feedvalidator-0~svn1022/feedvalidator/formatter/__init__.py0000644000175000017500000000045110766017570023731 0ustar poxpox"""$Id: __init__.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" __all__ = ['base', 'text_plain', 'text_html'] python-feedvalidator-0~svn1022/feedvalidator/i18n/0000755000175000017500000000000011065534342020366 5ustar poxpoxpython-feedvalidator-0~svn1022/feedvalidator/i18n/__init__.py0000644000175000017500000000037210766017570022507 0ustar poxpox"""$Id: __init__.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" python-feedvalidator-0~svn1022/feedvalidator/i18n/en.py0000644000175000017500000004372410775424171021361 0ustar poxpox"""$Id: en.py 999 2008-04-04 13:09:13Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 999 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" import feedvalidator from feedvalidator.logging import * line = "line %(line)s" column = "column %(column)s" occurances = " (%(msgcount)s occurrences)" messages = { SAXError: "XML parsing error: %(exception)s", WPBlankLine: "Blank line before XML declaration", NotHtml: "%(message)s", UnicodeError: "%(exception)s (maybe a high-bit character?)", UndefinedElement: "Undefined %(parent)s element: %(element)s", MissingNamespace: "Missing namespace for %(element)s", MissingElement: "Missing %(parent)s element: %(element)s", MissingRecommendedElement: "%(parent)s should contain a %(element)s element", MissingAttribute: "Missing %(element)s attribute: %(attr)s", MissingRecommendedAttribute: "Missing recommended %(element)s attribute: %(attr)s", UnexpectedAttribute: "Unexpected %(attribute)s attribute on %(element)s element", NoBlink: "There is no blink element in RSS; use blogChannel:blink instead", NoThrWhen: "thr:when attribute obsolete; use thr:updated instead", NoBlink: "There is no thr:when attribute in Atom; use thr:updated instead", InvalidWidth: "%(element)s must be between 1 and 144", InvalidHeight: "%(element)s must be between 1 and 400", InvalidHour: "%(element)s must be an integer between 0 and 23", InvalidDay: "%(element)s must be Monday, Tuesday, Wednesday, Thursday, Friday, Saturday, or Sunday", InvalidInteger: "%(element)s must be an integer", InvalidNonNegativeInteger: "%(element)s must be a non-negative integer", InvalidPositiveInteger: "%(element)s must be a positive integer", InvalidLatitude: "%(element)s must be between -90 and 90", InvalidLongitude: "%(element)s must be between -180 and 180", InvalidCommaSeparatedIntegers: "%(element)s must be comma-separated integers", InvalidHttpGUID: "guid must be a full URL, unless isPermaLink attribute is false", InvalidUpdatePeriod: "%(element)s must be hourly, daily, weekly, monthly, or yearly", NotBlank: "%(element)s should not be blank", AttrNotBlank: "The %(attr)s attribute of %(element)s should not be blank", DuplicateElement: "%(parent)s contains more than one %(element)s", DuplicateSemantics: "A channel should not include both %(core)s and %(ext)s", DuplicateItemSemantics: "An item should not include both %(core)s and %(ext)s", DuplicateValue: "%(element)s values must not be duplicated within a feed", NonstdPrefix: '"%(preferred)s" is the preferred prefix for the namespace "%(ns)s"', ReservedPrefix: 'The prefix "%(prefix)s" generally is associated with the namespace "%(ns)s"', MediaRssNamespace: 'The prefix "%(prefix)s" generally is associated with the namespace "%(ns)s"', InvalidContact: "Invalid email address", InvalidAddrSpec: "%(element)s must be an email address", InvalidLink: "%(element)s must be a valid URI", InvalidIRI: "%(element)s must be a valid IRI", InvalidFullLink: "%(element)s must be a full and valid URL", InvalidUriChar: "Invalid character in a URI", InvalidISO8601Date: "%(element)s must be an ISO8601 date", InvalidISO8601DateTime: "%(element)s must be an ISO8601 date-time", InvalidW3CDTFDate: "%(element)s must be an W3CDTF date", InvalidRFC2822Date: "%(element)s must be an RFC-822 date-time", IncorrectDOW: "Incorrect day of week", InvalidRFC3339Date: "%(element)s must be an RFC-3339 date-time", InvalidNPTTime: "%(attr)s must be an NPT-time", InvalidLanguage: "%(element)s must be an ISO-639 language code", InvalidURIAttribute: "%(attr)s attribute of %(element)s must be a valid URI", InvalidURLAttribute: "%(element)s must be a full URL", InvalidIntegerAttribute: "%(attr)s attribute of %(element)s must be a positive integer", InvalidBooleanAttribute: "%(attr)s attribute of %(element)s must be 'true' or 'false'", InvalidMIMEAttribute: "%(attr)s attribute of %(element)s must be a valid MIME type", ItemMustContainTitleOrDescription: "item must contain either title or description", ContainsHTML: "%(element)s should not contain HTML", ContainsEmail: "%(element)s should not include email address", ContainsUndeclaredHTML: "%(element)s should not contain HTML unless declared in the type attribute", NotEnoughHoursInTheDay: "skipHours can not contain more than 24 hour elements", EightDaysAWeek: "skipDays can not contain more than 7 day elements", SecurityRisk: "%(element)s should not contain %(tag)s tag", SecurityRiskAttr: "%(element)s should not contain %(attr)s attribute", ContainsRelRef: "%(element)s should not contain relative URL references", ContainsSystemEntity: "Feeds must not contain SYSTEM entities", InvalidContentMode: "mode must be 'xml', 'escaped', or 'base64'", InvalidMIMEType: "Not a valid MIME type", NotEscaped: "%(element)s claims to be escaped, but isn't", NotInline: "%(element)s claims to be inline, but may contain html", NotBase64: "%(element)s claims to be base64-encoded, but isn't", InvalidURN: "%(element)s is not a valid URN", InvalidTAG: "%(element)s is not a valid TAG", InvalidURI: "%(element)s is not a valid URI", ObsoleteVersion: "This feed is an obsolete version", ObsoleteNamespace: "This feed uses an obsolete namespace", InvalidNamespace: "%(element)s is in an invalid namespace: %(namespace)s", InvalidDoctype: "This feed contains conflicting DOCTYPE and version information", DuplicateAtomLink: "Duplicate alternate links with the same type and hreflang", MissingHref: "%(element)s must have an href attribute", AtomLinkNotEmpty: "%(element)s should not have text (all data is in attributes)", BadCharacters: '%(element)s contains bad characters', BadXmlVersion: "Incorrect XML Version: %(version)s", UnregisteredAtomLinkRel: "Unregistered link relationship", HttpError: "Server returned %(status)s", IOError: "%(exception)s (%(message)s; misconfigured server?)", ObscureEncoding: "Obscure XML character encoding: %(encoding)s", NonstdEncoding: "This encoding is not mandated by the XML specification: %(encoding)s", UnexpectedContentType: '%(type)s should not be served with the "%(contentType)s" media type', EncodingMismatch: 'Your feed appears to be encoded as "%(encoding)s", but your server is reporting "%(charset)s"', UnknownEncoding: "Unknown XML character encoding: %(encoding)s", NotSufficientlyUnique: "The specified guid is not sufficiently unique", MissingEncoding: "No character encoding was specified", UnexpectedText: "Unexpected Text", ValidatorLimit: "Unable to validate, due to hardcoded resource limits (%(limit)s)", TempRedirect: "Temporary redirect", TextXml: "Content type of text/xml with no charset", Uncompressed: "Response is not compressed", HttpProtocolError: 'Response includes bad HTTP header name: "%(header)s"', NonCanonicalURI: 'Identifier "%(uri)s" is not in canonical form (the canonical form would be "%(curi)s")', InvalidRDF: 'RDF parsing error: %(message)s', InvalidDuration: 'Invalid duration', InvalidYesNo: '%(element)s must be "yes", "no"', InvalidYesNoClean: '%(element)s must be "yes", "no", or "clean"', TooLong: 'length of %(len)d exceeds the maximum allowable for %(element)s of %(max)d', InvalidItunesCategory: '%(text)s is not one of the predefined iTunes categories or sub-categories', ObsoleteItunesCategory: '%(text)s is an obsolete iTunes category or sub-category', InvalidKeywords: 'Use commas to separate keywords', InvalidTextType: 'type attribute must be "text", "html", or "xhtml"', MissingXhtmlDiv: 'Missing xhtml:div element', MissingSelf: 'Missing atom:link with rel="self"', MissingAtomSelfLink: 'Missing atom:link with rel="self"', DuplicateEntries: 'Two entries with the same id', MisplacedMetadata: '%(element)s must appear before all entries', MissingSummary: 'Missing summary', MissingTextualContent: 'Missing textual content', MissingContentOrAlternate: 'Missing content or alternate link', MissingSourceElement: "Missing %(parent)s element: %(element)s", MissingTypeAttr: "Missing %(element)s attribute: %(attr)s", HtmlFragment: "%(type)s type used for a document fragment", DuplicateUpdated: "Two entries with the same value for atom:updated", UndefinedNamedEntity: "Undefined named entity", ImplausibleDate: "Implausible date", UnexpectedWhitespace: "Whitespace not permitted here", SameDocumentReference: "Same-document reference", SelfDoesntMatchLocation: "Self reference doesn't match document location", InvalidOPMLVersion: 'The "version" attribute for the opml element must be 1.0 or 1.1.', MissingXmlURL: 'An element whose type is "rss" must have an "xmlUrl" attribute.', InvalidOutlineVersion: 'An element whose type is "rss" may have a version attribute, whose value must be RSS, RSS1, RSS2, or scriptingNews.', InvalidOutlineType: 'The type attribute on an element should be a known type.', InvalidExpansionState: ' is a comma-separated list of line numbers.', InvalidTrueFalse: '%(element)s must be "true" or "false"', MissingOutlineType: 'An element with more than just a "text" attribute should have a "type" attribute indicating how the other attributes are to be interpreted.', MissingTitleAttr: 'Missing outline attribute: title', MissingUrlAttr: 'Missing outline attribute: url', NotUTF8: 'iTunes elements should only be present in feeds encoded as UTF-8', MissingItunesElement: 'Missing recommended iTunes %(parent)s element: %(element)s', UnsupportedItunesFormat: 'Format %(extension)s is not supported by iTunes', InvalidCountryCode: "Invalid country code: \"%(value)s\"", InvalidCurrencyUnit: "Invalid value for %(attr)s", InvalidFloat: "Invalid value for %(attr)s", InvalidFloatUnit: "Invalid value for %(attr)s", InvalidFullLocation: "Invalid value for %(attr)s", InvalidGender: "Invalid value for %(attr)s", InvalidIntUnit: "Invalid value for %(attr)s", InvalidLabel: "Invalid value for %(attr)s", InvalidLocation: "Invalid value for %(attr)s", InvalidMaritalStatus: "Invalid value for %(attr)s", InvalidPaymentMethod: "Invalid value for %(attr)s", InvalidPercentage: '%(element)s must be a percentage', InvalidPriceType: "Invalid value for %(attr)s", InvalidRatingType: "Invalid value for %(attr)s", InvalidReviewerType: "Invalid value for %(attr)s", InvalidSalaryType: "Invalid value for %(attr)s", InvalidServiceType: "Invalid value for %(attr)s", InvalidValue: "Invalid value for %(attr)s", InvalidYear: "Invalid value for %(attr)s", TooMany: "%(parent)s contains more than ten %(element)s elements", InvalidPermalink: "guid must be a full URL, unless isPermaLink attribute is false", NotInANamespace: "Missing namespace for %(element)s", UndeterminableVocabulary:"Missing namespace for %(element)s", SelfNotAtom: '"self" link references a non-Atom representation', InvalidFormComponentName: 'Invalid form component name', ImageLinkDoesntMatch: "Image link doesn't match channel link", ImageUrlFormat: "Image not in required format", ProblematicalRFC822Date: "Problematical RFC 822 date-time value", DuplicateEnclosure: "item contains more than one enclosure", MissingItunesEmail: "The recommended element is missing", MissingGuid: "%(parent)s should contain a %(element)s element", UriNotIri: "IRI found where URL expected", ObsoleteWikiNamespace: "Obsolete Wiki Namespace", DuplicateDescriptionSemantics: "Avoid %(element)s", InvalidCreditRole: "Invalid Credit Role", InvalidMediaTextType: 'type attribute must be "plain" or "html"', InvalidMediaHash: 'Invalid Media Hash', InvalidMediaRating: 'Invalid Media Rating', InvalidMediaRestriction: "media:restriction must be 'all' or 'none'", InvalidMediaRestrictionRel: "relationship must be 'allow' or 'disallow'", InvalidMediaRestrictionType: "type must be 'country' or 'uri'", InvalidMediaMedium: 'Invalid content medium: "%(value)s"', InvalidMediaExpression: 'Invalid content expression: "%(value)s"', DeprecatedMediaAdult: 'media:adult is deprecated', MediaGroupWithoutAlternatives: 'media:group must have multiple media:content children', CommentRSS: 'wfw:commentRSS should be wfw:commentRss', NonSpecificMediaType: '"%(contentType)s" media type is not specific enough', DangerousStyleAttr: "style attribute contains potentially dangerous content", NotURLEncoded: "%(element)s must be URL encoded", InvalidLocalRole: "Invalid local role", InvalidEncoding: "Invalid character encoding", ShouldIncludeExample: "OpenSearchDescription should include an example Query", InvalidAdultContent: "Non-boolean value for %(element)s", InvalidLocalParameter: "Invalid local parameter name", UndeclaredPrefix: "Undeclared %(element)s prefix", UseOfExtensionAttr: "Use of extension attribute on RSS 2.0 core element: %(attribute)s", DeprecatedDTD: "The use of this DTD has been deprecated by Netscape", MisplacedXHTMLContent: "Misplaced XHTML content", SchemeNotIANARegistered: "URI scheme not IANA registered", InvalidCoord: "Invalid coordinates", InvalidCoordList: "Invalid coordinate list", CoordComma: "Comma found in coordinate pair", AvoidNamespacePrefix: "Avoid Namespace Prefix: %(prefix)s", Deprecated: "%(element)s has been superceded by %(replacement)s.", DeprecatedRootHref: "root:// URLs have been superceded by full http:// URLs", InvalidAltitudeMode: "Invalid altitudeMode", InvalidAngle: "%(element)s must be between -360 and 360", InvalidColor: "Not a valid color", InvalidColorMode: "Invalid colorMode.", InvalidItemIconState: "Invalid state for Icon", InvalidListItemType: "Invalid list item type", InvalidKmlCoordList: "Invalid coordinate list. Make sure that coordinates are of the form longitude,latitude or longitude,latitude,altitude and seperated by a single space. It is also a good idea to avoid line breaks or other extraneous white space", InvalidKmlLatitude: "Invalid latitude found within coordinates. Latitudes have to be between -90 and 90.", InvalidKmlLongitude: "Invalid longitude found within coordinates. Longitudes have to be between -180 and 180.", InvalidKmlMediaType: "%(contentType)s is an invalid KML media type. Use application/vnd.google-earth.kml+xml or application/vnd.google-earth.kmz", InvalidKmlUnits: "Invalid units.", InvalidRefreshMode: "Invalid refreshMode", InvalidSchemaFieldType: "Invalid Schema field type", InvalidStyleState: "Invalid key for StyleMap.", InvalidViewRefreshMode: "Invalid viewRefreshMode.", InvalidZeroOne: "Invalid value. Should be 0 or 1.", MissingId: "%(parent)s should contain a %(element)s attribute. This is important if you want to link directly to features.", InvalidSseType: "sx:related type must be either 'aggregated' or 'compete'", FeedHistoryRelInEntry: "%(rel)s link relation found in entry", LinkPastEnd: "%(rel)s link in %(self)s entry in list", FeedRelInCompleteFeed: "%(rel)s link relation found in complete feed", MissingCurrentInArchive: "Current link not found in archive feed", CurrentNotSelfInCompleteFeed: "Current not self in complete feed", ArchiveIncomplete: "Archive incomplete", RelativeSelf: "Relative href value on self link", ConflictingCatAttr: "Categories can't have both href and %(attr)s attributes", ConflictingCatChildren: "Categories can't have both href attributes and children", UndefinedParam: "Undefined media-range parameter", CharacterData: 'Encode "&" and "<" in plain text using hexadecimal character references.', EmailFormat: 'Email address is not in the recommended format', MissingRealName: 'Email address is missing real name', MisplacedItem: 'Misplaced Item', ImageTitleDoesntMatch: "Image title doesn't match channel title", AvoidTextInput: "Avoid Text Input", NeedDescriptionBeforeContent: "Ensure description precedes content:encoded", SlashDate: "Ensure lastBuildDate is present when slash:comments is used", UseZeroForMidnight: "Use zero for midnight", UseZeroForUnknown: "Use zero for unknown length", UnknownHost: "Unknown host", UnknownNamespace: "Use of unknown namespace: %(namespace)s", IntegerOverflow: "%(element)s value too large", InvalidNSS: "Invalid Namespace Specific String: %(element)s", SinceAfterUntil: "Since After until", MissingByAndWhenAttrs: "Missing by and when attributes", QuestionableUsage: "Undocumented use of %(element)s", InvalidRSSVersion: "Invalid RSS Version", } python-feedvalidator-0~svn1022/feedvalidator/textInput.py0000644000175000017500000000260710766017570022200 0ustar poxpox"""$Id: textInput.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from validators import * from extension import extension_everywhere # # textInput element. # class textInput(validatorBase, extension_everywhere): def getExpectedAttrNames(self): return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'about')] def validate(self): if not "title" in self.children: self.log(MissingTitle({"parent":self.name, "element":"title"})) if not "link" in self.children: self.log(MissingLink({"parent":self.name, "element":"link"})) if not "description" in self.children: self.log(MissingDescription({"parent":self.name,"element":"description"})) if not "name" in self.children: self.log(MissingElement({"parent":self.name, "element":"name"})) def do_title(self): return nonhtml(), noduplicates() def do_description(self): return text(), noduplicates() def do_name(self): return formname(), noduplicates() def do_link(self): return rfc2396_full(), noduplicates() def do_dc_creator(self): return text() # duplicates allowed def do_dc_subject(self): return text() # duplicates allowed def do_dc_date(self): return w3cdtf(), noduplicates() python-feedvalidator-0~svn1022/feedvalidator/author.py0000644000175000017500000000245110766017570021473 0ustar poxpox"""$Id: author.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from validators import * # # author element. # class author(validatorBase): def getExpectedAttrNames(self): return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'parseType')] def validate(self): if not "name" in self.children and not "atom_name" in self.children: self.log(MissingElement({"parent":self.name, "element":"name"})) def do_name(self): return nonhtml(), nonemail(), nonblank(), noduplicates() def do_email(self): return addr_spec(), noduplicates() def do_uri(self): return nonblank(), rfc3987(), nows(), noduplicates() def do_foaf_workplaceHomepage(self): return rdfResourceURI() def do_foaf_homepage(self): return rdfResourceURI() def do_foaf_weblog(self): return rdfResourceURI() def do_foaf_plan(self): return text() def do_foaf_firstName(self): return text() def do_xhtml_div(self): from content import diveater return diveater() # RSS/Atom support do_atom_name = do_name do_atom_email = do_email do_atom_uri = do_uri python-feedvalidator-0~svn1022/feedvalidator/rdf.py0000644000175000017500000001154110766017570020744 0ustar poxpox"""$Id: rdf.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from logging import * from validators import rdfAbout, noduplicates, text, eater from root import rss11_namespace as rss11_ns from extension import extension_everywhere rdfNS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" # # rdf:RDF element. The valid children include "channel", "item", "textinput", "image" # class rdf(validatorBase,object): def do_rss090_channel(self): from channel import channel self.dispatcher.defaultNamespaces.append("http://my.netscape.com/rdf/simple/0.9/") return channel(), noduplicates() def do_channel(self): from channel import rss10Channel return rdfAbout(), rss10Channel(), noduplicates() def _is_090(self): return "http://my.netscape.com/rdf/simple/0.9/" in self.dispatcher.defaultNamespaces def _withAbout(self,v): if self._is_090(): return v else: return v, rdfAbout() def do_item(self): from item import rss10Item return self._withAbout(rss10Item()) def do_textinput(self): from textInput import textInput return self._withAbout(textInput()) def do_image(self): return self._withAbout(rss10Image()) def do_cc_License(self): return eater() def do_taxo_topic(self): return eater() def do_rdf_Description(self): return eater() def prevalidate(self): self.setFeedType(TYPE_RSS1) def validate(self): if not "channel" in self.children and not "rss090_channel" in self.children: self.log(MissingElement({"parent":self.name.replace('_',':'), "element":"channel"})) from validators import rfc2396_full class rss10Image(validatorBase, extension_everywhere): def validate(self): if not "title" in self.children: self.log(MissingTitle({"parent":self.name, "element":"title"})) if not "link" in self.children: self.log(MissingLink({"parent":self.name, "element":"link"})) if not "url" in self.children: self.log(MissingElement({"parent":self.name, "element":"url"})) def do_title(self): from image import title return title(), noduplicates() def do_link(self): return rfc2396_full(), noduplicates() def do_url(self): return rfc2396_full(), noduplicates() def do_dc_creator(self): return text() def do_dc_subject(self): return text() # duplicates allowed def do_dc_date(self): from validators import w3cdtf return w3cdtf(), noduplicates() def do_cc_license(self): return eater() # # This class performs RSS 1.x specific validations on extensions. # class rdfExtension(validatorBase): def __init__(self, qname, literal=False): validatorBase.__init__(self) self.qname=qname self.literal=literal def textOK(self): pass def setElement(self, name, attrs, parent): validatorBase.setElement(self, name, attrs, parent) if attrs.has_key((rdfNS,"parseType")): if attrs[(rdfNS,"parseType")] == "Literal": self.literal=True if not self.literal: # ensure no rss11 children if self.qname==rss11_ns: from logging import UndefinedElement self.log(UndefinedElement({"parent":parent.name, "element":name})) # no duplicate rdf:abouts if attrs.has_key((rdfNS,"about")): about = attrs[(rdfNS,"about")] if not "abouts" in self.dispatcher.__dict__: self.dispatcher.__dict__["abouts"] = [] if about in self.dispatcher.__dict__["abouts"]: self.log(DuplicateValue( {"parent":parent.name, "element":"rdf:about", "value":about})) else: self.dispatcher.__dict__["abouts"].append(about) def getExpectedAttrNames(self): # no rss11 attributes if self.literal or not self.attrs: return self.attrs.keys() return [(ns,n) for ns,n in self.attrs.keys() if ns!=rss11_ns] def validate(self): # rdflib 2.0.5 does not catch mixed content errors if self.value.strip() and self.children and not self.literal: self.log(InvalidRDF({"message":"mixed content"})) def startElementNS(self, name, qname, attrs): # ensure element is "namespace well formed" if name.find(':') != -1: from logging import MissingNamespace self.log(MissingNamespace({"parent":self.name, "element":name})) # ensure all attribute namespaces are properly defined for (namespace,attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log(MissingNamespace({"parent":self.name, "element":attr})) # eat children self.children.append((qname,name)) self.push(rdfExtension(qname, self.literal), name, attrs) def characters(self, string): if not self.literal: validatorBase.characters(self, string) python-feedvalidator-0~svn1022/feedvalidator/logging.py0000644000175000017500000003320510775424171021617 0ustar poxpox"""$Id: logging.py 999 2008-04-04 13:09:13Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 999 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" # feed types TYPE_UNKNOWN = 0 TYPE_RSS1 = 1 TYPE_RSS2 = 2 TYPE_ATOM = 3 TYPE_ATOM_ENTRY = 4 TYPE_APP_CATEGORIES = 5 TYPE_APP_SERVICE = 6 TYPE_XRD = 7 TYPE_OPENSEARCH = 8 TYPE_OPML = 9 TYPE_KML20 = 10 TYPE_KML21 = 11 TYPE_KML22 = 12 FEEDTYPEDISPLAY = {0:"(unknown type)", 1:"RSS", 2:"RSS", 3:"Atom 1.0", 4:"Atom 1.0", 5:"Atom Publishing Protocol Category", 6:"Atom Publishing Protocol Service", 7:"XRD", 8:"OpenSearch", 9:"OPML", 10:"KML 2.0", 11:"KML 2.1", 12:"KML 2.2"} VALIDFEEDGRAPHIC = {0:"", 1:"valid-rss.png", 2:"valid-rss.png", 3:"valid-atom.png", 4:"valid-atom.png", 5:"valid-atom.png", 6:"valid-atom.png", 7:"valid-xrd.png", 8:"valid-opensearch.png", 9:"valid-opml.gif", 10:"valid-kml.png", 11:"valid-kml.png", 12:"valid-kml.png"} # # logging support # class LoggedEvent: def __init__(self, params): self.params = params class Info(LoggedEvent): pass class Message(LoggedEvent): pass class Warning(Message): pass class Error(Message): pass class ValidationFailure(Exception): def __init__(self, event): self.event = event ###################### error ###################### class SAXError(Error): pass class WPBlankLine(SAXError): pass class UnicodeError(Error): pass class MissingNamespace(SAXError): pass class NotInANamespace(MissingNamespace): pass class UseOfExtensionAttr(Warning): pass class UndefinedNamedEntity(SAXError): pass class InvalidRSSVersion(Error): pass class UndefinedElement(Error): pass class NoBlink(UndefinedElement): pass class NoThrWhen(UndefinedElement): pass class MissingAttribute(Error): pass class UnexpectedAttribute(Error): pass class DuplicateElement(Error): pass class NotEnoughHoursInTheDay(Error): pass class EightDaysAWeek(Error): pass class InvalidValue(Error): pass class InvalidContact(InvalidValue): pass class UnknownHost(Warning): pass class InvalidAddrSpec(InvalidContact): pass class InvalidLink(InvalidValue): pass class UriNotIri(InvalidLink): pass class InvalidIRI(InvalidLink): pass class InvalidFullLink(InvalidLink): pass class InvalidUriChar(InvalidLink): pass class InvalidISO8601Date(InvalidValue): pass class InvalidISO8601DateTime(InvalidValue): pass class InvalidW3CDTFDate(InvalidISO8601Date): pass class InvalidRFC2822Date(InvalidValue): pass class IncorrectDOW(InvalidRFC2822Date): pass class InvalidRFC3339Date(InvalidValue): pass class InvalidURIAttribute(InvalidLink): pass class InvalidURLAttribute(InvalidURIAttribute): pass class InvalidIntegerAttribute(InvalidValue): pass class InvalidBooleanAttribute(InvalidValue): pass class InvalidMIMEAttribute(InvalidValue): pass class InvalidInteger(InvalidValue): pass class InvalidPercentage(InvalidValue): pass class InvalidNonNegativeInteger(InvalidInteger): pass class InvalidPositiveInteger(InvalidInteger): pass class InvalidWidth(InvalidValue): pass class InvalidHeight(InvalidValue): pass class InvalidHour(InvalidValue): pass class InvalidDay(InvalidValue): pass class InvalidHttpGUID(InvalidValue): pass class InvalidLanguage(InvalidValue): pass class InvalidUpdatePeriod(InvalidValue): pass class InvalidItunesCategory(InvalidValue): pass class ObsoleteItunesCategory(Warning): pass class InvalidYesNo(InvalidValue): pass class InvalidYesNoClean(InvalidValue): pass class InvalidDuration(InvalidValue): pass class TooLong(InvalidValue): pass class InvalidKeywords(Warning): pass class InvalidTextType(InvalidValue): pass class InvalidCommaSeparatedIntegers(InvalidValue): pass class UndeterminableVocabulary(Warning): pass class InvalidFormComponentName(InvalidValue): pass class InvalidAccessRestrictionRel(InvalidValue): pass class NotURLEncoded(InvalidValue): pass class InvalidLocalRole(InvalidValue): pass class InvalidEncoding(InvalidValue): pass class InvalidSyndicationRight(InvalidValue): pass class InvalidLocalParameter(InvalidValue): pass class MissingElement(Error): pass class MissingDescription(MissingElement): pass class MissingLink(MissingElement): pass class MissingTitle(MissingElement): pass class ItemMustContainTitleOrDescription(MissingElement): pass class MissingXhtmlDiv(MissingElement): pass class MissingContentOrAlternate(MissingElement): pass class FatalSecurityRisk(Error): pass class ContainsSystemEntity(Info): pass class DuplicateValue(InvalidValue): pass class InvalidDoctype(Error): pass class BadXmlVersion(Error): pass class DuplicateAtomLink(Error): pass class MissingHref(MissingAttribute): pass class AtomLinkNotEmpty(Warning): pass class UnregisteredAtomLinkRel(Warning): pass class HttpError(Error): pass class IOError(Error): pass class UnknownEncoding(Error): pass class UnexpectedText(Error): pass class UnexpectedWhitespace(Error): pass class ValidatorLimit(Error): pass class HttpProtocolError(Error): pass class InvalidRDF(Error): pass class InvalidLatitude(Error): pass class InvalidLongitude(Error): pass class MisplacedMetadata(Error): pass class InvalidPermalink(Error): pass class InvalidCreditRole(Error): pass class InvalidMediaTextType(Error): pass class InvalidMediaHash(Error): pass class InvalidMediaRating(Error): pass class InvalidNPTTime(Error): pass class InvalidMediaRestriction(Error): pass class InvalidMediaRestrictionRel(Error): pass class InvalidMediaRestrictionType(Error): pass class InvalidMediaMedium(Error): pass class InvalidMediaExpression(Error): pass class DeprecatedMediaAdult(Warning): pass class MediaGroupWithoutAlternatives(Error): pass class InvalidSseType(Error): pass class InvalidNSS(Error): pass class IntegerOverflow(Error): pass class SinceAfterUntil(Error): pass class MissingByAndWhenAttrs(Error): pass ###################### warning ###################### class DuplicateSemantics(Warning): pass class DuplicateItemSemantics(DuplicateSemantics): pass class DuplicateDescriptionSemantics(DuplicateSemantics): pass class ImageLinkDoesntMatch(Warning): pass class ImageUrlFormat(Warning): pass class ContainsRelRef(Warning): pass class ReservedPrefix(Warning): pass class MediaRssNamespace(Error): pass class NotSufficientlyUnique(Warning): pass class ImplausibleDate(Warning): pass class ProblematicalRFC822Date(Warning): pass class SecurityRisk(Warning): pass class SecurityRiskAttr(SecurityRisk): pass class DangerousStyleAttr(SecurityRiskAttr): pass class BadCharacters(Warning): pass class ObscureEncoding(Warning): pass class UnexpectedContentType(Warning): pass class EncodingMismatch(Warning): pass class NonSpecificMediaType(Warning): pass class NonCanonicalURI(Warning): pass class SameDocumentReference(Warning): pass class ContainsEmail(Warning): pass class ContainsHTML(Warning): pass class ContainsUndeclaredHTML(ContainsHTML): pass class MissingSelf(Warning): pass class SelfDoesntMatchLocation(Warning): pass class RelativeSelf(Warning): pass class MissingSourceElement(Warning): pass class MissingTypeAttr(Warning): pass class DuplicateEntries(Warning): pass class DuplicateUpdated(Warning): pass class NotBlank(Warning): pass class AttrNotBlank(Warning): pass class MissingSummary(Error): pass class MissingTextualContent(Warning): pass class NotUTF8(Warning): pass class MissingItunesElement(Warning): pass class MissingItunesEmail(Warning): pass class UnsupportedItunesFormat(Warning): pass class SelfNotAtom(Warning): pass class DuplicateEnclosure(Warning): pass class MissingGuid(Warning): pass class ObsoleteWikiNamespace(Warning): pass class CommentRSS(Warning): pass class ShouldIncludeExample(Warning): pass class InvalidAdultContent(Warning): pass class InvalidSyndicationRight(InvalidValue): pass class UndeclaredPrefix(InvalidValue): pass class MisplacedXHTMLContent(Warning): pass class SchemeNotIANARegistered(Warning): pass class AvoidNamespacePrefix(Warning): pass class UnknownNamespace(Warning): pass class MissingRecommendedAttribute(Warning): pass class QuestionableUsage(Warning): pass ###################### info ###################### class BestPractices(Info): pass class MissingRecommendedElement(BestPractices): pass class MissingDCLanguage(MissingRecommendedElement): pass class NonstdPrefix(BestPractices): pass class NonstdEncoding(BestPractices): pass class MissingEncoding(BestPractices): pass class TempRedirect(Info): pass class TextXml(Info): pass class Uncompressed(Info): pass ## Atom-specific errors class ObsoleteVersion(Warning): pass class ObsoleteNamespace(Error): pass class ConflictingCatAttr(Error): pass class ConflictingCatChildren(Error): pass class InvalidMediaRange(Error): pass class UndefinedParam(Warning): pass class InvalidURI(InvalidValue) : pass class InvalidURN(InvalidValue): pass class InvalidTAG(InvalidValue): pass class InvalidContentMode(InvalidValue) : pass class InvalidMIMEType(InvalidMediaRange) : pass class InvalidNamespace(Error): pass class NotEscaped(InvalidValue): pass class NotBase64(InvalidValue): pass class NotInline(Warning): pass # this one can never be sure... class NotHtml(Warning): pass class HtmlFragment(Warning): pass class FeedHistoryRelInEntry(Warning): pass class FeedRelInCompleteFeed(Error): pass class CurrentNotSelfInCompleteFeed(Error): pass class LinkPastEnd(Error): pass class MissingCurrentInArchive(Warning): pass class ArchiveIncomplete(Warning): pass ############## non-errors (logging successes) ################### class Success(LoggedEvent): pass class ValidValue(Success): pass class ValidCloud(Success): pass class ValidURI(ValidValue): pass class ValidHttpGUID(ValidURI): pass class ValidURLAttribute(ValidURI): pass class ValidURN(ValidValue): pass class ValidTAG(ValidValue): pass class ValidTitle(ValidValue): pass class ValidDate(ValidValue): pass class ValidW3CDTFDate(ValidDate): pass class ValidRFC2822Date(ValidDate): pass class ValidAttributeValue(ValidValue): pass class ValidBooleanAttribute(ValidAttributeValue): pass class ValidLanguage(ValidValue): pass class ValidHeight(ValidValue): pass class ValidWidth(ValidValue): pass class ValidTitle(ValidValue): pass class ValidContact(ValidValue): pass class ValidIntegerAttribute(ValidValue): pass class ValidMIMEAttribute(ValidValue): pass class ValidDay(ValidValue): pass class ValidHour(ValidValue): pass class ValidInteger(ValidValue): pass class ValidPercentage(ValidValue): pass class ValidUpdatePeriod(ValidValue): pass class ValidContentMode(ValidValue): pass class ValidElement(ValidValue): pass class ValidCopyright(ValidValue): pass class ValidGeneratorName(ValidValue): pass class OptionalValueMissing(ValidValue): pass class ValidDoctype(ValidValue): pass class DeprecatedDTD(Info): pass class ValidHtml(ValidValue): pass class ValidAtomLinkRel(ValidValue): pass class ValidLatitude(ValidValue): pass class ValidLongitude(ValidValue): pass class ValidNPTTime(ValidValue): pass ###################### opml ###################### class InvalidOPMLVersion(Error): pass class MissingXmlURL(Warning): pass class InvalidOutlineVersion(Warning): pass class InvalidOutlineType(Warning): pass class InvalidExpansionState(Error): pass class InvalidTrueFalse(InvalidValue): pass class MissingOutlineType(Warning): pass class MissingTitleAttr(Warning): pass class MissingUrlAttr(Warning): pass ###################### gbase ###################### class InvalidCountryCode(InvalidValue): pass class InvalidCurrencyUnit(InvalidValue): pass class InvalidFloat(InvalidValue): pass class InvalidFloatUnit(InvalidValue): pass class InvalidFullLocation(InvalidValue): pass class InvalidGender(InvalidValue): pass class InvalidIntUnit(InvalidValue): pass class InvalidLabel(InvalidValue): pass class InvalidLocation(InvalidValue): pass class InvalidMaritalStatus(InvalidValue): pass class InvalidPaymentMethod(InvalidValue): pass class InvalidPriceType(InvalidValue): pass class InvalidRatingType(InvalidValue): pass class InvalidReviewerType(InvalidValue): pass class InvalidSalaryType(InvalidValue): pass class InvalidServiceType(InvalidValue): pass class InvalidYear(InvalidValue): pass class TooMany(DuplicateElement): pass ###################### georss ###################### class InvalidCoord(InvalidValue): pass class InvalidCoordList(InvalidValue): pass class CoordComma(Warning): pass ###################### meta ###################### class InvalidMetaName(InvalidValue): pass class InvalidMetaContent(InvalidValue): pass ###################### kml ###################### class Deprecated(Warning): pass class DeprecatedRootHref(Warning): pass class InvalidAltitudeMode(InvalidValue): pass class InvalidAngle(InvalidValue): pass class InvalidColor(InvalidValue): pass class InvalidColorMode(InvalidValue): pass class InvalidItemIconState(InvalidValue): pass class InvalidListItemType(InvalidValue): pass class InvalidKmlCoordList(InvalidValue): pass class InvalidKmlLatitude(InvalidValue): pass class InvalidKmlLongitude(InvalidValue): pass class InvalidKmlMediaType(Warning): pass class InvalidKmlUnits(InvalidValue): pass class InvalidRefreshMode(InvalidValue): pass class InvalidSchemaFieldType(InvalidValue): pass class InvalidStyleState(InvalidValue): pass class InvalidViewRefreshMode(InvalidValue): pass class InvalidZeroOne(InvalidValue): pass class MissingId(Warning): pass class ValidAngle(ValidValue): pass ###################### RSS 2.0 Profile ###################### class RSS20Profile(Warning): pass class CharacterData(ContainsHTML): pass class EmailFormat(RSS20Profile): pass class MissingRealName(EmailFormat): pass class MisplacedItem(RSS20Profile): pass class ImageTitleDoesntMatch(RSS20Profile): pass class AvoidTextInput(RSS20Profile): pass class NeedDescriptionBeforeContent(RSS20Profile): pass class SlashDate(RSS20Profile): pass class UseZeroForMidnight(RSS20Profile): pass class MissingAtomSelfLink(MissingSelf): pass class UseZeroForUnknown(InvalidNonNegativeInteger): pass python-feedvalidator-0~svn1022/feedvalidator/opensearch.py0000644000175000017500000001256110754323271022316 0ustar poxpoxfrom validators import * from logging import * import re class OpenSearchDescription(validatorBase): def __init__(self): self.exampleFound = 0 validatorBase.__init__(self) def validate(self): name=self.name.replace("opensearch_",'') if not "ShortName" in self.children: self.log(MissingElement({"parent":name, "element":"ShortName"})) if not "Description" in self.children: self.log(MissingElement({"parent":name, "element":"Description"})) if not "Url" in self.children: self.log(MissingElement({"parent":name, "element":"Url"})) if not self.exampleFound: self.log(ShouldIncludeExample({})) def do_ShortName(self): return lengthLimitedText(16), noduplicates() def do_Description(self): return lengthLimitedText(1024), noduplicates() def do_Url(self): return Url() def do_Contact(self): return addr_spec(), noduplicates() def do_Tags(self): return lengthLimitedText(256), noduplicates() def do_LongName(self): return lengthLimitedText(48), noduplicates() def do_Image(self): return Image() def do_Query(self): return Query() def do_Developer(self): return lengthLimitedText(64), noduplicates() def do_Attribution(self): return lengthLimitedText(256), noduplicates() def do_SyndicationRight(self): return SyndicationRight(), noduplicates() def do_AdultContent(self): return AdultContent(), noduplicates() def do_Language(self): return Language() def do_InputEncoding(self): return Charset() def do_OutputEncoding(self): return Charset() class Url(validatorBase): def getExpectedAttrNames(self): return [(None,attr) for attr in ['template', 'type', 'indexOffset', 'pageOffset']] def prevalidate(self): self.validate_required_attribute((None,'template'), Template()) self.validate_required_attribute((None,'type'), MimeType) self.validate_optional_attribute((None,'indexOffset'), Integer) self.validate_optional_attribute((None,'pageOffset'), Integer) class Template(rfc2396_full): tparam = re.compile("{((?:[-a-zA-Z0-9._~]|%[a-fA-F0-9]{2})+:?(?:[-a-zA-Z0-9._~]|%[a-fA-F0-9]{2})*)\??}") valuelist = ['searchTerms', 'count', 'startIndex', 'startPage', 'language', 'inputEncoding', 'outputEncoding'] def validate(self): for pname in self.tparam.findall(self.value): if pname.find(':')<0: if pname not in self.valuelist: self.log(InvalidLocalParameter({'value':pname})) else: prefix,name = pname.split(':',1) if not self.parent.namespaceFor(prefix): self.log(UndeclaredPrefix({'value':prefix})) self.value = self.tparam.sub(r'\1',self.value) rfc2396_full.validate(self) class Image(rfc2396_full): def getExpectedAttrNames(self): return [(None,attr) for attr in ['height', 'width', 'type']] def prevalidate(self): self.validate_required_attribute((None,'height'), nonNegativeInteger) self.validate_required_attribute((None,'width'), nonNegativeInteger) self.validate_required_attribute((None,'type'), MimeType) class Query(validatorBase): def getExpectedAttrNames(self): return [(None,attr) for attr in ['role', 'title', 'totalResults', 'searchTerms', 'count', 'startIndex', 'startPage', 'language', 'inputEncoding', 'outputEncoding', 'parameter']] def prevalidate(self): self.validate_required_attribute((None,'role'), QueryRole) self.validate_optional_attribute((None,'title'), lengthLimitedText(256)) self.validate_optional_attribute((None,'title'), nonhtml) self.validate_optional_attribute((None,'totalResults'), nonNegativeInteger) self.validate_optional_attribute((None,'searchTerms'), UrlEncoded) self.validate_optional_attribute((None,'count'), nonNegativeInteger) self.validate_optional_attribute((None,'startIndex'), Integer) self.validate_optional_attribute((None,'startPage'), Integer) self.validate_optional_attribute((None,'language'), iso639) self.validate_optional_attribute((None,'inputEncoding'), Charset) self.validate_optional_attribute((None,'outputEncoding'), Charset) if self.attrs.has_key((None,"role")) and \ self.attrs.getValue((None,"role")) == "example": self.parent.exampleFound = 1 class QueryRole(enumeration): error = InvalidLocalRole valuelist = ['request', 'example', 'related', 'correction', 'subset', 'superset'] def validate(self): if self.value.find(':')<0: enumeration.validate(self) else: prefix,name = self.value.split(':',1) if not self.parent.namespaceFor(prefix): self.log(UndeclaredPrefix({'value':prefix})) class UrlEncoded(validatorBase): def validate(self): from urllib import quote, unquote import re for value in self.value.split(): if type(value) == unicode: value = value.encode('utf-8') value = re.sub('%\w\w', lambda x: x.group(0).upper(), value) if value != quote(unquote(value)): self.log(NotURLEncoded({})) break class SyndicationRight(enumeration): error = InvalidSyndicationRight valuelist = ['open','limited','private','closed'] def validate(self): self.value = self.value.lower() enumeration.validate(self) class AdultContent(enumeration): error = InvalidAdultContent valuelist = ['false', 'FALSE', '0', 'no', 'NO', 'true', 'TRUE', '1', 'yes', 'YES'] class Language(iso639): def validate(self): if self.value != '*': iso639.validate(self) python-feedvalidator-0~svn1022/feedvalidator/feed.py0000644000175000017500000001246310766017570021100 0ustar poxpox"""$Id: feed.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from validators import * from logging import * from itunes import itunes_channel from extension import extension_feed # # Atom root element # class feed(validatorBase, extension_feed, itunes_channel): def getExpectedAttrNames(self): return [(u'urn:atom-extension:indexing', u'index')] def prevalidate(self): self.links = [] self.validate_optional_attribute((u'urn:atom-extension:indexing', u'index'), yesno) def missingElement(self, params): offset = [self.line - self.dispatcher.locator.getLineNumber(), self.col - self.dispatcher.locator.getColumnNumber()] self.log(MissingElement(params), offset) def validate_metadata(self): if not 'title' in self.children: self.missingElement({"parent":self.name, "element":"title"}) if not 'id' in self.children: self.missingElement({"parent":self.name, "element":"id"}) if not 'updated' in self.children: self.missingElement({"parent":self.name, "element":"updated"}) # complete feeds can only have current=self and no other links if 'fh_complete' in self.children: for link in self.links: if link.rel in link.rfc5005: if link.rel == "current": if link.href not in self.dispatcher.selfURIs: self.log(CurrentNotSelfInCompleteFeed({"rel":link.rel})) else: self.log(FeedRelInCompleteFeed({"rel":link.rel})) # ensure that there is a link rel="self" if self.name != 'source': for link in self.links: if link.rel=='self': break else: offset = [self.line - self.dispatcher.locator.getLineNumber(), self.col - self.dispatcher.locator.getColumnNumber()] self.log(MissingSelf({"parent":self.parent.name, "element":self.name}), offset) types={} archive=False current=False for link in self.links: if link.rel == 'current': current = True if link.rel in ['prev-archive', 'next-archive']: archive = True # attempts to link past the end of the list if link.rel == 'first' and link.href in self.dispatcher.selfURIs: for link2 in self.links: if link2.rel == 'previous': self.log(LinkPastEnd({"self":link.rel, "rel":link2.rel})) if link.rel == 'last' and link.href in self.dispatcher.selfURIs: for link2 in self.links: if link2.rel == 'next': self.log(LinkPastEnd({"self":link.rel, "rel":link2.rel})) # can only have one alternate per type if not link.rel=='alternate': continue if not link.type in types: types[link.type]={} if link.rel in types[link.type]: if link.hreflang in types[link.type][link.rel]: self.log(DuplicateAtomLink({"parent":self.name, "element":"link", "type":link.type, "hreflang":link.hreflang})) else: types[link.type][link.rel] += [link.hreflang] else: types[link.type][link.rel] = [link.hreflang] if 'fh_archive' in self.children: # archives should either have links or be marked complete if not archive and 'fh_complete' not in self.children: self.log(ArchiveIncomplete({})) # archives should have current links if not current and ('fh_complete' not in self.children): self.log(MissingCurrentInArchive({})) if self.itunes: itunes_channel.validate(self) def metadata(self): if 'entry' in self.children: self.log(MisplacedMetadata({"parent":self.name, "element":self.child})) def validate(self): if not 'entry' in self.children: self.validate_metadata() def do_author(self): self.metadata() from author import author return author() def do_category(self): self.metadata() from category import category return category() def do_contributor(self): self.metadata() from author import author return author() def do_generator(self): self.metadata() from generator import generator return generator(), nonblank(), noduplicates() def do_id(self): self.metadata() return canonicaluri(), nows(), noduplicates() def do_icon(self): self.metadata() return nonblank(), nows(), rfc2396(), noduplicates() def do_link(self): self.metadata() from link import link self.links.append(link()) return self.links[-1] def do_logo(self): self.metadata() return nonblank(), nows(), rfc2396(), noduplicates() def do_title(self): self.metadata() from content import textConstruct return textConstruct(), noduplicates() def do_subtitle(self): self.metadata() from content import textConstruct return textConstruct(), noduplicates() def do_rights(self): self.metadata() from content import textConstruct return textConstruct(), noduplicates() def do_updated(self): self.metadata() return rfc3339(), nows(), noduplicates() def do_entry(self): if not 'entry' in self.children: self.validate_metadata() from entry import entry return entry() def do_app_collection(self): from service import collection return collection(), noduplicates() python-feedvalidator-0~svn1022/feedvalidator/sse.py0000644000175000017500000000657010720106263020755 0ustar poxpoxfrom base import validatorBase from validators import * from logging import InvalidSseType, InvalidNSS, MissingElement, MissingByAndWhenAttrs import re class Sharing(validatorBase): def getExpectedAttrNames(self): return [ (None, u'expires'), (None, u'since'), (None, u'until') ] def prevalidate(self): if self.attrs.has_key((None,'until')): self.validate_required_attribute((None,'since'), rfc3339) else: self.validate_optional_attribute((None,'since'), rfc3339) if self.attrs.has_key((None,'since')): self.validate_required_attribute((None,'until'), rfc3339) else: self.validate_optional_attribute((None,'until'), rfc3339) self.validate_optional_attribute((None,'expires'), rfc3339) if self.attrs.has_key((None,'since')): if self.attrs.has_key((None,'until')): if self.attrs[(None,'since')]>self.attrs[(None,'until')]: self.log(SinceAfterUntil({})) def do_sx_related(self): return Related() class Sync(validatorBase): def getExpectedAttrNames(self): return [ (None, u'deleted'), (None, u'noconflicts'), (None, u'id'), (None, u'updates') ] def prevalidate(self): self.validate_optional_attribute((None,'deleted'), truefalsestrict) self.validate_optional_attribute((None,'noconflicts'), truefalsestrict) self.validate_required_attribute((None,'id'), unique('id',self.parent.parent)) self.validate_optional_attribute((None,'id'), rfc2141_nss) self.validate_required_attribute((None,'updates'), UINT31) def validate(self): if not 'sx_history' in self.children: self.log(MissingElement({'parent':self.name, 'element':'sx:history'})) def do_sx_history(self): return History() def do_sx_conflicts(self): return Conflicts() class Related(validatorBase): def getExpectedAttrNames(self): return [ (None, u'link'), (None, u'title'), (None, u'type') ] def prevalidate(self): self.validate_required_attribute((None,'link'), rfc2396_full) self.validate_optional_attribute((None,'title'), nonhtml) self.validate_optional_attribute((None,'title'), nonblank) self.validate_required_attribute((None,'type'), FeedType) class History(validatorBase): def getExpectedAttrNames(self): return [ (None, u'by'), (None, u'sequence'), (None, u'when') ] def prevalidate(self): self.validate_optional_attribute((None,'by'), nonhtml) self.validate_optional_attribute((None,'by'), nonblank) self.validate_optional_attribute((None,'by'), rfc2141_nss) self.validate_required_attribute((None,'sequence'), UINT31) self.validate_optional_attribute((None,'when'), rfc3339) if self.attrs.has_key((None,'when')): if not self.attrs.has_key((None,'by')): self.log(MissingRecommendedAttribute({"attr":"by"})) elif self.attrs.has_key((None,'by')): self.log(MissingRecommendedAttribute({"attr":"when"})) else: self.log(MissingByAndWhenAttrs({})) class FeedType(enumeration): error = InvalidSseType valuelist = ['complete', 'aggregated'] class rfc2141_nss(text): def validate(self): if not re.match("^([0-9a-zA-Z()+,\\-\\.:=@;$_!*'/?#]|%[0-9a-fA-F][0-9a-fA-F])+$", self.value): self.log(InvalidNSS({"element":self.name,"parent":self.parent.name})) class Conflicts(validatorBase): def do_entry(self): from entry import entry return entry() def do_item(self): from item import item return item() python-feedvalidator-0~svn1022/feedvalidator/xmlEncoding.py0000644000175000017500000002400310766017570022435 0ustar poxpox#!/usr/bin/python """ $Id: xmlEncoding.py 988 2008-03-12 18:22:48Z sa3ruby $ This module deals with detecting XML encodings, using both BOMs and explicit declarations. """ __author__ = "Joseph Walton " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2004 Joseph Walton" import codecs import re from logging import ObscureEncoding, NonstdEncoding import logging class FailingCodec: def __init__(self, name): self.name = name def fail(self, txt, errors='strict'): raise UnicodeError('No codec available for ' + self.name + ' in this installation of FeedValidator') # Don't die if the codec can't be found, but return # a decoder that will fail on use def getdecoder(codec): try: return codecs.getdecoder(codec) except: return FailingCodec(codec).fail # These are generic decoders that are only used # to decode the XML declaration, from which we can read # the real encoding _decUTF32BE = getdecoder('UTF-32BE') _decUTF32LE = getdecoder('UTF-32LE') _decUTF16BE = getdecoder('UTF-16BE') _decUTF16LE = getdecoder('UTF-16LE') _decEBCDIC = getdecoder('IBM037') # EBCDIC _decACE = getdecoder('ISO-8859-1') # An ASCII-compatible encoding # Given a character index into a string, calculate its 1-based row and column def _position(txt, idx): row = txt.count('\n', 0, idx) + 1 ln = txt.rfind('\n', 0, idx) + 1 column = 0 for c in txt[ln:idx]: if c == '\t': column = (column // 8 + 1) * 8 else: column += 1 column += 1 return (row, column) def _normaliseNewlines(txt): return txt.replace('\r\n', '\n').replace('\r', '\n') def _logEvent(loggedEvents, e, pos=None): if pos: e.params['line'], e.params['column'] = pos loggedEvents.append(e) # Return the encoding from the declaration, or 'None' # Return None if the 'permitted' list is passed in and the encoding # isn't found in it. This is so that, e.g., a 4-byte-character XML file # that claims to be US-ASCII will fail now. def _decodeDeclaration(sig, dec, permitted, loggedEvents): sig = _normaliseNewlines(dec(sig)[0]) eo = _encodingFromDecl(sig) if not(eo): _logEvent(loggedEvents, logging.UnicodeError({'exception': 'This XML file (apparently ' + permitted[0] + ') requires an encoding declaration'}), (1, 1)) elif permitted and not(eo[0].upper() in permitted): if _hasCodec(eo[0]): # see if the codec is an alias of one of the permitted encodings codec=codecs.lookup(eo[0]) for encoding in permitted: if _hasCodec(encoding) and codecs.lookup(encoding)[-1]==codec[-1]: break else: _logEvent(loggedEvents, logging.UnicodeError({'exception': 'This XML file claims an encoding of ' + eo[0] + ', but looks more like ' + permitted[0]}), eo[1]) return eo # Return the encoding from the declaration, or 'fallback' if none is # present. Return None if the 'permitted' list is passed in and # the encoding isn't found in it def _decodePostBOMDeclaration(sig, dec, permitted, loggedEvents, fallback=None): sig = _normaliseNewlines(dec(sig)[0]) eo = _encodingFromDecl(sig) if eo and not(eo[0].upper() in permitted): _logEvent(loggedEvents, logging.UnicodeError({'exception': 'Document starts with ' + permitted[0] + ' BOM marker but has incompatible declaration of ' + eo[0]}), eo[1]) return None else: return eo or (fallback, None) def isStandard(x): """ Is this encoding required by the XML 1.0 Specification, 4.3.3? """ return x.upper() in ['UTF-8', 'UTF-16'] def isCommon(x): """Is this encoding commonly used, according to (as of 2004-03-27)?""" return isStandard(x) or x.upper() in ['US-ASCII', 'ISO-8859-1', 'EUC-JP', 'ISO-8859-2', 'ISO-8859-15', 'ISO-8859-7', 'KOI8-R', 'SHIFT_JIS', 'WINDOWS-1250', 'WINDOWS-1251', 'WINDOWS-1252', 'WINDOWS-1254', 'WINDOWS-1255', 'WINDOWS-1256', # This doesn't seem to be popular, but is the Chinese # government's mandatory standard 'GB18030' ] # Inspired by xmlproc's autodetect_encoding, but rewritten def _detect(doc_start, loggedEvents=[], fallback='UTF-8'): """This is the logic from appendix F.1 of the XML 1.0 specification. Pass in the start of a document (>= 256 octets), and receive the encoding to use, or None if there is a problem with the document.""" sig = doc_start[:4] # With a BOM. We also check for a declaration, and make sure # it doesn't contradict (for 4-byte encodings, it's required) if sig == '\x00\x00\xFE\xFF': # UTF-32 BE eo = _decodeDeclaration(doc_start[4:], _decUTF32BE, ['UTF-32', 'ISO-10646-UCS-4', 'CSUCS4', 'UCS-4'], loggedEvents) elif sig == '\xFF\xFE\x00\x00': # UTF-32 LE eo = _decodeDeclaration(doc_start[4:], _decUTF32LE, ['UTF-32', 'ISO-10646-UCS-4', 'CSUCS4', 'UCS-4'], loggedEvents) elif sig == '\x00\x00\xFF\xFE' or sig == '\xFE\xFF\x00\x00': raise UnicodeError('Unable to process UCS-4 with unusual octet ordering') elif sig[:2] == '\xFE\xFF': # UTF-16 BE eo = _decodePostBOMDeclaration(doc_start[2:], _decUTF16BE, ['UTF-16', 'ISO-10646-UCS-2', 'CSUNICODE', 'UCS-2'], loggedEvents, fallback='UTF-16') elif sig[:2] == '\xFF\xFE': # UTF-16 LE eo = _decodePostBOMDeclaration(doc_start[2:], _decUTF16LE, ['UTF-16', 'ISO-10646-UCS-2', 'CSUNICODE', 'UCS-2'], loggedEvents, fallback='UTF-16') elif sig[:3] == '\xEF\xBB\xBF': eo = _decodePostBOMDeclaration(doc_start[3:], _decACE, ['UTF-8'], loggedEvents, fallback='UTF-8') # Without a BOM; we must read the declaration elif sig == '\x00\x00\x00\x3C': eo = _decodeDeclaration(doc_start, _decUTF32BE, ['UTF-32BE', 'UTF-32', 'ISO-10646-UCS-4', 'CSUCS4', 'UCS-4'], loggedEvents) elif sig == '\x3C\x00\x00\x00': eo = _decodeDeclaration(doc_start, _decUTF32LE, ['UTF-32LE', 'UTF-32', 'ISO-10646-UCS-4', 'CSUCS4', 'UCS-4'], loggedEvents) elif sig == '\x00\x3C\x00\x3F': eo = _decodeDeclaration(doc_start, _decUTF16BE, ['UTF-16BE', 'UTF-16', 'ISO-10646-UCS-2', 'CSUNICODE', 'UCS-2'], loggedEvents) elif sig == '\x3C\x00\x3F\x00': eo = _decodeDeclaration(doc_start, _decUTF16LE, ['UTF-16LE', 'UTF-16', 'ISO-10646-UCS-2', 'CSUNICODE', 'UCS-2'], loggedEvents) elif sig == '\x3C\x3F\x78\x6D': eo = _encodingFromDecl(_normaliseNewlines(_decACE(doc_start)[0])) or ('UTF-8', None) elif sig == '\x4C\x6F\xA7\x94': eo = _decodeDeclaration(doc_start, _decEBCDIC, ['IBM037', 'CP037', 'IBM038', 'EBCDIC-INT'], loggedEvents) # There's no BOM, and no declaration. It's UTF-8, or mislabelled. else: eo = (fallback, None) return eo def detect(doc_start, loggedEvents=[], fallback='UTF-8'): eo = _detect(doc_start, loggedEvents, fallback) if eo: return eo[0] else: return None _encRe = re.compile(r'<\?xml\s+version\s*=\s*(?:"[-a-zA-Z0-9_.:]+"|\'[-a-zA-Z0-9_.:]+\')\s+(encoding\s*=\s*(?:"([-A-Za-z0-9._]+)"|\'([-A-Za-z0-9._]+)\'))') def _encodingFromDecl(x): m = _encRe.match(x) if m: if m.group(2): return m.group(2), _position(x, m.start(2)) else: return m.group(3), _position(x, m.start(3)) else: return None def removeDeclaration(x): """Replace an XML document string's encoding declaration with the same number of spaces. Some XML parsers don't allow the encoding to be overridden, and this is a workaround.""" m = _encRe.match(x) if m: s = m.start(1) e = m.end(1) res = x[:s] + ' ' * (e - s) + x[e:] else: res = x return res def _hasCodec(enc): try: return codecs.lookup(enc) is not None except: return False def decode(mediaType, charset, bs, loggedEvents, fallback=None): eo = _detect(bs, loggedEvents, fallback=None) # Check declared encodings if eo and eo[1] and _hasCodec(eo[0]): if not(isCommon(eo[0])): _logEvent(loggedEvents, ObscureEncoding({"encoding": eo[0]}), eo[1]) elif not(isStandard(eo[0])): _logEvent(loggedEvents, NonstdEncoding({"encoding": eo[0]}), eo[1]) if eo: encoding = eo[0] else: encoding = None if charset and encoding and charset.lower() != encoding.lower(): # RFC 3023 requires us to use 'charset', but a number of aggregators # ignore this recommendation, so we should warn. loggedEvents.append(logging.EncodingMismatch({"charset": charset, "encoding": encoding})) if mediaType and mediaType.startswith("text/") and charset is None: loggedEvents.append(logging.TextXml({})) # RFC 3023 requires text/* to default to US-ASCII. Issue a warning # if this occurs, but continue validation using the detected encoding try: bs.decode("US-ASCII") except: if not encoding: try: bs.decode(fallback) encoding=fallback except: pass if encoding and encoding.lower() != 'us-ascii': loggedEvents.append(logging.EncodingMismatch({"charset": "US-ASCII", "encoding": encoding})) enc = charset or encoding if enc is None: loggedEvents.append(logging.MissingEncoding({})) enc = fallback elif not(_hasCodec(enc)): if eo: _logEvent(loggedEvents, logging.UnknownEncoding({'encoding': enc}), eo[1]) else: _logEvent(loggedEvents, logging.UnknownEncoding({'encoding': enc})) enc = fallback if enc is None: return enc, None dec = getdecoder(enc) try: return enc, dec(bs)[0] except UnicodeError, ue: salvage = dec(bs, 'replace')[0] if 'start' in ue.__dict__: # XXX 'start' is in bytes, not characters. This is wrong for multibyte # encodings pos = _position(salvage, ue.start) else: pos = None _logEvent(loggedEvents, logging.UnicodeError({"exception":ue}), pos) return enc, salvage _encUTF8 = codecs.getencoder('UTF-8') def asUTF8(x): """Accept a Unicode string and return a UTF-8 encoded string, with its encoding declaration removed, suitable for parsing.""" x = removeDeclaration(unicode(x)) return _encUTF8(x)[0] if __name__ == '__main__': from sys import argv from os.path import isfile for x in argv[1:]: if isfile(x): f = open(x, 'r') l = f.read(1024) log = [] eo = detect(l, log) if eo: print x,eo else: print repr(log) python-feedvalidator-0~svn1022/feedvalidator/validators.py0000644000175000017500000013246011024563150022331 0ustar poxpox"""$Id: validators.py 1020 2008-06-13 21:28:08Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 1020 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" from base import validatorBase from logging import * import re, time, datetime from uri import canonicalForm, urljoin from rfc822 import AddressList, parsedate, parsedate_tz, mktime_tz rdfNS = "http://www.w3.org/1999/02/22-rdf-syntax-ns#" def implausible_822(value): if value[0] < 1990: return True try: from rfc822 import parsedate_tz, mktime_tz except: # no time zone functions available, granularity is a day pvalue=parsedate(value) return value > time.gmtime(time.time()+86400) or pvalue[0]<1990 try: pvalue=parsedate_tz(value) zvalue=mktime_tz(pvalue) except: # outside of range of what parsedate supports: definitely problematic return True # when time zone functions are available, granularity is ten minutes return zvalue > time.time()+600 or pvalue[0]<1990 def implausible_8601(value): if value < '1990-01-01': return True try: import xml.utils.iso8601 except: # no time zone functions available, granularity is a day tomorrow=time.strftime("%Y-%m-%dT%H:%M:%SZ",time.gmtime(time.time()+86400)) return (value > tomorrow) try: zvalue = xml.utils.iso8601.parse(value) except: # outside of range of what parse supports: definitely problematic return True # when time zone functions are available, granularity is ten minutes return zvalue > time.time() + 600 # # Valid mime type # mime_re = re.compile('[^\s()<>,;:\\"/[\]?=]+/[^\s()<>,;:\\"/[\]?=]+(\s*;\s*[^\s()<>,;:\\"/[\]?=]+=("(\\"|[^"])*"|[^\s()<>,;:\\"/[\]?=]+))*$') # # Extensibility hook: logic varies based on type of feed # def any(self, name, qname, attrs): if self.getFeedType() != TYPE_RSS1: return eater() else: from rdf import rdfExtension return rdfExtension(qname) # # This class simply eats events. Useful to prevent cascading of errors # class eater(validatorBase): def getExpectedAttrNames(self): return self.attrs.getNames() def characters(self, string): for c in string: if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd': from validators import BadCharacters self.log(BadCharacters({"parent":self.parent.name, "element":self.name})) def startElementNS(self, name, qname, attrs): # RSS 2.0 arbitrary restriction on extensions feedtype=self.getFeedType() if (not qname) and feedtype and (feedtype==TYPE_RSS2) and self.name.find('_')>=0: from logging import NotInANamespace self.log(NotInANamespace({"parent":self.name, "element":name, "namespace":'""'})) # ensure element is "namespace well formed" if name.find(':') != -1: from logging import MissingNamespace self.log(MissingNamespace({"parent":self.name, "element":name})) # ensure all attribute namespaces are properly defined for (namespace,attr) in attrs.keys(): if ':' in attr and not namespace: from logging import MissingNamespace self.log(MissingNamespace({"parent":self.name, "element":attr})) for c in attrs.get((namespace,attr)): if 0x80 <= ord(c) <= 0x9F or c == u'\ufffd': from validators import BadCharacters self.log(BadCharacters({"parent":name, "element":attr})) # eat children self.push(self.__class__(), name, attrs) from HTMLParser import HTMLParser, HTMLParseError class HTMLValidator(HTMLParser): htmltags = [ "a", "abbr", "acronym", "address", "applet", "area", "article", "aside", "audio", "b", "base", "basefont", "bdo", "big", "blockquote", "body", "br", "button", "canvas", "caption", "center", "cite", "code", "col", "colgroup", "command", "datagrid", "datalist", "dd", "del", "details", "dialog", "dir", "div", "dfn", "dl", "dt", "em", "event-source", "fieldset", "figure", "font", "footer", "form", "frame", "frameset", "h1", "h2", "h3", "h4", "h5", "h6", "head", "header", "hr", "html", "i", "iframe", "img", "input", "ins", "isindex", "kbd", "label", "legend", "li", "link", "m", "map", "menu", "meta", "meter", "nav", "noframes", "noscript", "object", "ol", "output", "optgroup", "option", "p", "param", "pre", "progress", "q", "s", "samp", "script", "section", "select", "small", "source", "span", "strike", "strong", "style", "sub", "sup", "table", "tbody", "td", "textarea", "tfoot", "th", "thead", "time", "title", "tr", "tt", "u", "ul", "var", "xmp", "plaintext", "embed", "comment", "listing", "video", "wbr"] acceptable_elements = ['a', 'abbr', 'acronym', 'address', 'area', 'article', 'aside', 'audio', 'b', 'big', 'blockquote', 'br', 'button', 'canvas', 'caption', 'center', 'cite', 'code', 'col', 'colgroup', 'command', 'datagrid', 'datalist', 'dd', 'del', 'details', 'dfn', 'dialog', 'dir', 'div', 'dl', 'dt', 'em', 'event-source', 'fieldset', 'figure', 'footer', 'font', 'form', 'header', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'hr', 'i', 'img', 'input', 'ins', 'keygen', 'kbd', 'label', 'legend', 'li', 'm', 'map', 'menu', 'meter', 'multicol', 'nav', 'nextid', 'ol', 'output', 'optgroup', 'option', 'p', 'pre', 'progress', 'q', 's', 'samp', 'section', 'select', 'small', 'sound', 'source', 'spacer', 'span', 'strike', 'strong', 'sub', 'sup', 'table', 'tbody', 'td', 'textarea', 'time', 'tfoot', 'th', 'thead', 'tr', 'tt', 'u', 'ul', 'var', 'video', 'noscript', 'wbr'] acceptable_attributes = ['abbr', 'accept', 'accept-charset', 'accesskey', 'action', 'align', 'alt', 'autoplay', 'autocomplete', 'autofocus', 'axis', 'background', 'balance', 'bgcolor', 'bgproperties', 'border', 'bordercolor', 'bordercolordark', 'bordercolorlight', 'bottompadding', 'cellpadding', 'cellspacing', 'ch', 'challenge', 'char', 'charoff', 'choff', 'charset', 'checked', 'cite', 'class', 'clear', 'color', 'cols', 'colspan', 'compact', 'contenteditable', 'coords', 'data', 'datafld', 'datapagesize', 'datasrc', 'datetime', 'default', 'delay', 'dir', 'disabled', 'draggable', 'dynsrc', 'enctype', 'end', 'face', 'for', 'form', 'frame', 'galleryimg', 'gutter', 'headers', 'height', 'hidefocus', 'hidden', 'high', 'href', 'hreflang', 'hspace', 'icon', 'id', 'inputmode', 'ismap', 'keytype', 'label', 'leftspacing', 'lang', 'list', 'longdesc', 'loop', 'loopcount', 'loopend', 'loopstart', 'low', 'lowsrc', 'max', 'maxlength', 'media', 'method', 'min', 'multiple', 'name', 'nohref', 'noshade', 'nowrap', 'open', 'optimum', 'pattern', 'ping', 'point-size', 'prompt', 'pqg', 'radiogroup', 'readonly', 'rel', 'repeat-max', 'repeat-min', 'replace', 'required', 'rev', 'rightspacing', 'rows', 'rowspan', 'rules', 'scope', 'selected', 'shape', 'size', 'span', 'src', 'start', 'step', 'summary', 'suppress', 'tabindex', 'target', 'template', 'title', 'toppadding', 'type', 'unselectable', 'usemap', 'urn', 'valign', 'value', 'variable', 'volume', 'vspace', 'vrml', 'width', 'wrap', 'xml:lang', 'xmlns'] acceptable_css_properties = ['azimuth', 'background', 'background-color', 'border', 'border-bottom', 'border-bottom-color', 'border-bottom-style', 'border-bottom-width', 'border-collapse', 'border-color', 'border-left', 'border-left-color', 'border-left-style', 'border-left-width', 'border-right', 'border-right-color', 'border-right-style', 'border-right-width', 'border-spacing', 'border-style', 'border-top', 'border-top-color', 'border-top-style', 'border-top-width', 'border-width', 'clear', 'color', 'cursor', 'direction', 'display', 'elevation', 'float', 'font', 'font-family', 'font-size', 'font-style', 'font-variant', 'font-weight', 'height', 'letter-spacing', 'line-height', 'margin', 'margin-bottom', 'margin-left', 'margin-right', 'margin-top', 'overflow', 'padding', 'padding-bottom', 'padding-left', 'padding-right', 'padding-top', 'pause', 'pause-after', 'pause-before', 'pitch', 'pitch-range', 'richness', 'speak', 'speak-header', 'speak-numeral', 'speak-punctuation', 'speech-rate', 'stress', 'text-align', 'text-decoration', 'text-indent', 'unicode-bidi', 'vertical-align', 'voice-family', 'volume', 'white-space', 'width'] # survey of common keywords found in feeds acceptable_css_keywords = ['aqua', 'auto', 'black', 'block', 'blue', 'bold', 'both', 'bottom', 'brown', 'center', 'collapse', 'dashed', 'dotted', 'fuchsia', 'gray', 'green', '!important', 'italic', 'left', 'lime', 'maroon', 'medium', 'none', 'navy', 'normal', 'nowrap', 'olive', 'pointer', 'purple', 'red', 'right', 'solid', 'silver', 'teal', 'top', 'transparent', 'underline', 'white', 'yellow'] valid_css_values = re.compile('^(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|' + '\d?\.?\d?\d(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)$') mathml_elements = ['annotation', 'annotation-xml', 'maction', 'math', 'merror', 'mfrac', 'mi', 'mmultiscripts', 'mn', 'mo', 'mover', 'mpadded', 'mphantom', 'mprescripts', 'mroot', 'mrow', 'mspace', 'msqrt', 'mstyle', 'msub', 'msubsup', 'msup', 'mtable', 'mtd', 'mtext', 'mtr', 'munder', 'munderover', 'none', 'semantics'] mathml_attributes = ['actiontype', 'align', 'columnalign', 'columnalign', 'columnalign', 'columnlines', 'columnspacing', 'columnspan', 'depth', 'display', 'displaystyle', 'encoding', 'equalcolumns', 'equalrows', 'fence', 'fontstyle', 'fontweight', 'frame', 'height', 'linethickness', 'lspace', 'mathbackground', 'mathcolor', 'mathvariant', 'mathvariant', 'maxsize', 'minsize', 'other', 'rowalign', 'rowalign', 'rowalign', 'rowlines', 'rowspacing', 'rowspan', 'rspace', 'scriptlevel', 'selection', 'separator', 'stretchy', 'width', 'width', 'xlink:href', 'xlink:show', 'xlink:type', 'xmlns', 'xmlns:xlink'] # svgtiny - foreignObject + linearGradient + radialGradient + stop - image svg_elements = ['a', 'animate', 'animateColor', 'animateMotion', 'animateTransform', 'circle', 'defs', 'desc', 'ellipse', 'font-face', 'font-face-name', 'font-face-src', 'g', 'glyph', 'hkern', 'linearGradient', 'line', 'marker', 'metadata', 'missing-glyph', 'mpath', 'path', 'polygon', 'polyline', 'radialGradient', 'rect', 'set', 'stop', 'svg', 'switch', 'text', 'title', 'tspan', 'use'] # svgtiny + class + opacity + offset + xmlns + xmlns:xlink svg_attributes = ['accent-height', 'accumulate', 'additive', 'alphabetic', 'arabic-form', 'ascent', 'attributeName', 'attributeType', 'baseProfile', 'bbox', 'begin', 'by', 'calcMode', 'cap-height', 'class', 'color', 'color-rendering', 'content', 'cx', 'cy', 'd', 'dx', 'dy', 'descent', 'display', 'dur', 'end', 'fill', 'fill-opacity', 'fill-rule', 'font-family', 'font-size', 'font-stretch', 'font-style', 'font-variant', 'font-weight', 'from', 'fx', 'fy', 'g1', 'g2', 'glyph-name', 'gradientUnits', 'hanging', 'height', 'horiz-adv-x', 'horiz-origin-x', 'id', 'ideographic', 'k', 'keyPoints', 'keySplines', 'keyTimes', 'lang', 'mathematical', 'marker-end', 'marker-mid', 'marker-start', 'markerHeight', 'markerUnits', 'markerWidth', 'max', 'min', 'name', 'offset', 'opacity', 'orient', 'origin', 'overline-position', 'overline-thickness', 'panose-1', 'path', 'pathLength', 'points', 'preserveAspectRatio', 'r', 'refX', 'refY', 'repeatCount', 'repeatDur', 'requiredExtensions', 'requiredFeatures', 'restart', 'rotate', 'rx', 'ry', 'slope', 'stemh', 'stemv', 'stop-color', 'stop-opacity', 'strikethrough-position', 'strikethrough-thickness', 'stroke', 'stroke-dasharray', 'stroke-dashoffset', 'stroke-linecap', 'stroke-linejoin', 'stroke-miterlimit', 'stroke-opacity', 'stroke-width', 'systemLanguage', 'target', 'text-anchor', 'to', 'transform', 'type', 'u1', 'u2', 'underline-position', 'underline-thickness', 'unicode', 'unicode-range', 'units-per-em', 'values', 'version', 'viewBox', 'visibility', 'width', 'widths', 'x', 'x-height', 'x1', 'x2', 'xlink:actuate', 'xlink:arcrole', 'xlink:href', 'xlink:role', 'xlink:show', 'xlink:title', 'xlink:type', 'xml:base', 'xml:lang', 'xml:space', 'xmlns', 'xmlns:xlink', 'y', 'y1', 'y2', 'zoomAndPan'] def log(self,msg): offset = [self.element.line + self.getpos()[0] - 1 - self.element.dispatcher.locator.getLineNumber(), -self.element.dispatcher.locator.getColumnNumber()] self.element.log(msg, offset) def __init__(self,value,element): self.element=element self.stack = [] self.valid = True HTMLParser.__init__(self) if value.lower().find('= 0: self.log(SecurityRisk({"parent":self.element.parent.name, "element":self.element.name, "tag":"?import"})) try: self.feed(value) self.close() if self.valid: self.log(ValidHtml({"parent":self.element.parent.name, "element":self.element.name})) except HTMLParseError, msg: element = self.element offset = [element.line - element.dispatcher.locator.getLineNumber(), - element.dispatcher.locator.getColumnNumber()] match = re.search(', at line (\d+), column (\d+)',str(msg)) if match: offset[0] += int(match.group(1))-1 element.log(NotHtml({"parent":element.parent.name, "element":element.name, "message":"Invalid HTML", "value": str(msg)}),offset) def handle_starttag(self, tag, attributes): if tag.lower() not in self.htmltags: self.log(NotHtml({"parent":self.element.parent.name, "element":self.element.name,"value":tag, "message": "Non-html tag"})) self.valid = False elif tag.lower() not in HTMLValidator.acceptable_elements: if not 'embed' in self.stack and not 'object' in self.stack: self.log(SecurityRisk({"parent":self.element.parent.name, "element":self.element.name, "tag":tag})) else: for (name,value) in attributes: if name.lower() == 'style': for evil in checkStyle(value): self.log(DangerousStyleAttr({"parent":self.element.parent.name, "element":self.element.name, "attr":"style", "value":evil})) elif name.lower() not in self.acceptable_attributes: self.log(SecurityRiskAttr({"parent":self.element.parent.name, "element":self.element.name, "attr":name})) self.stack.append(tag) def handle_endtag(self, tag): if tag in self.stack: while self.stack[-1] != tag: self.stack.pop() self.stack.pop() def handle_charref(self, name): if name.startswith('x'): value = int(name[1:],16) else: value = int(name) if 0x80 <= value <= 0x9F or value == 0xfffd: self.log(BadCharacters({"parent":self.element.parent.name, "element":self.element.name, "value":"&#" + name + ";"})) # # Scub CSS properties for potentially evil intent # def checkStyle(style): if not re.match("""^([:,;#%.\sa-zA-Z0-9!]|\w-\w|'[\s\w]+'|"[\s\w]+"|\([\d,\s]+\))*$""", style): return [style] if not re.match("^(\s*[-\w]+\s*:\s*[^:;]*(;|$))*$", style): return [style] unsafe = [] for prop,value in re.findall("([-\w]+)\s*:\s*([^:;]*)",style.lower()): if prop not in HTMLValidator.acceptable_css_properties: if prop not in unsafe: unsafe.append(prop) elif prop.split('-')[0] in ['background','border','margin','padding']: for keyword in value.split(): if keyword not in HTMLValidator.acceptable_css_keywords and \ not HTMLValidator.valid_css_values.match(keyword): if keyword not in unsafe: unsafe.append(keyword) return unsafe # # This class simply html events. Identifies unsafe events # class htmlEater(validatorBase): def getExpectedAttrNames(self): if self.attrs and len(self.attrs): return self.attrs.getNames() def textOK(self): pass def startElementNS(self, name, qname, attrs): for attr in attrs.getNames(): if attr[0]==None: if attr[1].lower() == 'style': for value in checkStyle(attrs.get(attr)): self.log(DangerousStyleAttr({"parent":self.parent.name, "element":self.name, "attr":attr[1], "value":value})) elif attr[1].lower() not in HTMLValidator.acceptable_attributes: self.log(SecurityRiskAttr({"parent":self.parent.name, "element":self.name, "attr":attr[1]})) self.push(htmlEater(), self.name, attrs) if name.lower() not in HTMLValidator.acceptable_elements: self.log(SecurityRisk({"parent":self.parent.name, "element":self.name, "tag":name})) def endElementNS(self,name,qname): pass # # text: i.e., no child elements allowed (except rdf:Description). # class text(validatorBase): def textOK(self): pass def getExpectedAttrNames(self): if self.getFeedType() == TYPE_RSS1: return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'parseType'), (u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'datatype'), (u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'resource')] else: return [] def startElementNS(self, name, qname, attrs): if self.getFeedType() == TYPE_RSS1: if self.value.strip() or self.children: if self.attrs.get((u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'parseType')) != 'Literal': self.log(InvalidRDF({"message":"mixed content"})) if name=="div" and qname=="http://www.w3.org/1999/xhtml": from content import diveater self.push(diveater(), name, attrs) else: from rdf import rdfExtension self.push(rdfExtension(qname), name, attrs) else: from base import namespaces ns = namespaces.get(qname, '') if name.find(':') != -1: from logging import MissingNamespace self.log(MissingNamespace({"parent":self.name, "element":name})) else: self.log(UndefinedElement({"parent":self.name, "element":name})) self.push(eater(), name, attrs) # # noduplicates: no child elements, no duplicate siblings # class noduplicates(validatorBase): def __init__(self, message=DuplicateElement): self.message=message validatorBase.__init__(self) def startElementNS(self, name, qname, attrs): pass def characters(self, string): pass def prevalidate(self): if self.name in self.parent.children: self.log(self.message({"parent":self.parent.name, "element":self.name})) # # valid e-mail addr-spec # class addr_spec(text): domains = """ AC AD AE AERO AF AG AI AL AM AN AO AQ AR ARPA AS ASIA AT AU AW AX AZ BA BB BD BE BF BG BH BI BIZ BJ BM BN BO BR BS BT BV BW BY BZ CA CAT CC CD CF CG CH CI CK CL CM CN CO COM COOP CR CU CV CX CY CZ DE DJ DK DM DO DZ EC EDU EE EG ER ES ET EU FI FJ FK FM FO FR GA GB GD GE GF GG GH GI GL GM GN GOV GP GQ GR GS GT GU GW GY HK HM HN HR HT HU ID IE IL IM IN INFO INT IO IQ IR IS IT JE JM JO JOBS JP KE KG KH KI KM KN KP KR KW KY KZ LA LB LC LI LK LR LS LT LU LV LY MA MC MD ME MG MH MIL MK ML MM MN MO MOBI MP MQ MR MS MT MU MUSEUM MV MW MX MY MZ NA NAME NC NE NET NF NG NI NL NO NP NR NU NZ OM ORG PA PE PF PG PH PK PL PM PN PR PRO PS PT PW PY QA RE RO RS RU RW SA SB SC SD SE SG SH SI SJ SK SL SM SN SO SR ST SU SV SY SZ TC TD TEL TF TG TH TJ TK TL TM TN TO TP TR TRAVEL TT TV TW TZ UA UG UK UM US UY UZ VA VC VE VG VI VN VU WF WS XN--0ZWM56D XN--11B5BS3A9AJ6G XN--80AKHBYKNJ4F XN--9T4B11YI5A XN--DEBA0AD XN--G6W251D XN--HGBK6AJ7F53BBA XN--HLCJ6AYA9ESC7A XN--JXALPDLP XN--KGBECHTV XN--ZCKZAH YE YT YU ZA ZM ZW """ # http://data.iana.org/TLD/tlds-alpha-by-domain.txt domain_re = '''(([0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.)|(([A-Z0-9\-]+\.)+))(%s|[0-9]{1,3})''' % '|'.join(domains.strip().split()) email_re = re.compile("([A-Z0-9_\-\+\.\']+)@" + domain_re + "$", re.I) simple_email_re = re.compile('^[\w._%+-]+@[A-Za-z][\w.-]+$') message = InvalidAddrSpec def validate(self, value=None): if not value: value=self.value if not self.email_re.match(value): if not self.simple_email_re.match(value): self.log(self.message({"parent":self.parent.name, "element":self.name, "value":self.value})) else: try: import socket socket.gethostbyname(value.split('@')[-1]) except: self.log(UnknownHost({"parent":self.parent.name, "element":self.name, "value":self.value})) else: self.log(ValidContact({"parent":self.parent.name, "element":self.name, "value":self.value})) # # iso639 language code # def iso639_validate(log,value,element,parent): import iso639codes if '-' in value: lang, sublang = value.split('-', 1) else: lang = value if not iso639codes.isoLang.has_key(unicode.lower(unicode(lang))): log(InvalidLanguage({"parent":parent, "element":element, "value":value})) else: log(ValidLanguage({"parent":parent, "element":element})) class iso639(text): def validate(self): iso639_validate(self.log, self.value, self.name, self.parent.name) # # Encoding charset # class Charset(text): def validate(self): try: import codecs codecs.lookup(self.value) except: self.log(InvalidEncoding({'value': self.value})) # # Mime type # class MimeType(text): def validate(self): if not mime_re.match(self.value): self.log(InvalidMIMEType({'attr':'type'})) class MediaRange(MimeType): def validate(self): if not self.value.strip(): return original_value = self.value for value in original_value.split(','): self.value = value.strip() if value.find(';q=')>=0: self.log(UndefinedParam({'param':'q'})) MimeType.validate(self) # # iso8601 dateTime # class iso8601(text): iso8601_re = re.compile("^\d\d\d\d(-\d\d(-\d\d(T\d\d:\d\d(:\d\d(\.\d*)?)?" + "(Z|([+-]\d\d:\d\d))?)?)?)?$") message = InvalidISO8601DateTime def validate(self): if not self.iso8601_re.match(self.value): self.log(self.message({"parent":self.parent.name, "element":self.name, "value":self.value})) return work=self.value.split('T') date=work[0].split('-') year=int(date[0]) if len(date)>1: month=int(date[1]) try: if len(date)>2: datetime.date(year,month,int(date[2])) except ValueError, e: return self.log(self.message({"parent":self.parent.name, "element":self.name, "value":str(e)})) if len(work) > 1: time=work[1].split('Z')[0].split('+')[0].split('-')[0] time=time.split(':') if int(time[0])>23: self.log(self.message({"parent":self.parent.name, "element":self.name, "value":self.value})) return if len(time)>1 and int(time[1])>60: self.log(self.message({"parent":self.parent.name, "element":self.name, "value":self.value})) return if len(time)>2 and float(time[2])>60.0: self.log(self.message({"parent":self.parent.name, "element":self.name, "value":self.value})) return self.log(ValidW3CDTFDate({"parent":self.parent.name, "element":self.name, "value":self.value})) return 1 class w3cdtf(iso8601): # The same as in iso8601, except a timezone is not optional when # a time is present iso8601_re = re.compile("^\d\d\d\d(-\d\d(-\d\d(T\d\d:\d\d(:\d\d(\.\d*)?)?" + "(Z|([+-]\d\d:\d\d)))?)?)?$") message = InvalidW3CDTFDate class rfc3339(iso8601): # The same as in iso8601, except that the only thing that is optional # is the seconds iso8601_re = re.compile("^\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\d(\.\d*)?" + "(Z|([+-]\d\d:\d\d))$") message = InvalidRFC3339Date def validate(self): if iso8601.validate(self): if implausible_8601(self.value): self.log(ImplausibleDate({"parent":self.parent.name, "element":self.name, "value":self.value})) return 0 return 1 return 0 class iso8601_date(iso8601): date_re = re.compile("^\d\d\d\d-\d\d-\d\d$") def validate(self): if iso8601.validate(self): if not self.date_re.search(self.value): self.log(InvalidISO8601Date({"parent":self.parent.name, "element":self.name, "value":self.value})) iana_schemes = [ # http://www.iana.org/assignments/uri-schemes.html "ftp", "http", "gopher", "mailto", "news", "nntp", "telnet", "wais", "file", "prospero", "z39.50s", "z39.50r", "cid", "mid", "vemmi", "service", "imap", "nfs", "acap", "rtsp", "tip", "pop", "data", "dav", "opaquelocktoken", "sip", "sips", "tel", "fax", "modem", "ldap", "https", "soap.beep", "soap.beeps", "xmlrpc.beep", "xmlrpc.beeps", "urn", "go", "h323", "ipp", "tftp", "mupdate", "pres", "im", "mtqp", "iris.beep", "dict", "snmp", "crid", "tag", "dns", "info" ] # # rfc2396 fully qualified (non-relative) uri # class rfc2396(text): rfc2396_re = re.compile("([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)?/{0,2}" + "(\\[[0-9A-Fa-f:]+\\])?" + "[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]*$") urn_re = re.compile(r"^[Uu][Rr][Nn]:[a-zA-Z0-9][a-zA-Z0-9-]{1,31}:([a-zA-Z0-9()+,\.:=@;$_!*'\-]|%[0-9A-Fa-f]{2})+$") tag_re = re.compile(r"^tag:([a-z0-9\-\._]+?@)?[a-z0-9\.\-]+?,\d{4}(-\d{2}(-\d{2})?)?:[0-9a-zA-Z;/\?:@&=+$\.\-_!~*'\(\)%,]*(#[0-9a-zA-Z;/\?:@&=+$\.\-_!~*'\(\)%,]*)?$") urichars_re=re.compile("[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]") def validate(self, errorClass=InvalidLink, successClass=ValidURI, extraParams={}): success = 0 scheme=self.value.split(':')[0].lower() if scheme=='tag': if self.tag_re.match(self.value): success = 1 logparams = {"parent":self.parent.name, "element":self.name, "value":self.value} logparams.update(extraParams) self.log(ValidTAG(logparams)) else: logparams = {"parent":self.parent.name, "element":self.name, "value":self.value} logparams.update(extraParams) self.log(InvalidTAG(logparams)) elif scheme=="urn": if self.urn_re.match(self.value): success = 1 logparams = {"parent":self.parent.name, "element":self.name, "value":self.value} logparams.update(extraParams) self.log(ValidURN(logparams)) else: logparams = {"parent":self.parent.name, "element":self.name, "value":self.value} logparams.update(extraParams) self.log(InvalidURN(logparams)) elif not self.rfc2396_re.match(self.value): logparams = {"parent":self.parent.name, "element":self.name, "value":self.value} logparams.update(extraParams) for c in self.value: if ord(c)<128 and not rfc2396.urichars_re.match(c): logparams['value'] = repr(str(c)) self.log(InvalidUriChar(logparams)) break else: try: if self.rfc2396_re.match(self.value.encode('idna')): errorClass=UriNotIri except: pass self.log(errorClass(logparams)) elif scheme in ['http','ftp']: if not re.match('^\w+://[^/].*',self.value): logparams = {"parent":self.parent.name, "element":self.name, "value":self.value} logparams.update(extraParams) self.log(errorClass(logparams)) else: success = 1 elif self.value.find(':')>=0 and scheme.isalpha() and scheme not in iana_schemes: self.log(SchemeNotIANARegistered({"parent":self.parent.name, "element":self.name, "value":scheme})) else: success = 1 if success: logparams = {"parent":self.parent.name, "element":self.name, "value":self.value} logparams.update(extraParams) self.log(successClass(logparams)) return success # # rfc3987 iri # class rfc3987(rfc2396): def validate(self, errorClass=InvalidIRI, successClass=ValidURI, extraParams={}): try: if self.value: self.value = self.value.encode('idna') except: pass # apparently '.' produces label too long return rfc2396.validate(self, errorClass, successClass, extraParams) class rfc2396_full(rfc2396): rfc2396_re = re.compile("[a-zA-Z][0-9a-zA-Z+\\-\\.]*:(//)?" + "[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%,#]+$") def validate(self, errorClass=InvalidFullLink, successClass=ValidURI, extraParams={}): return rfc2396.validate(self, errorClass, successClass, extraParams) # # URI reference resolvable relative to xml:base # class xmlbase(rfc3987): def validate(self, errorClass=InvalidIRI, successClass=ValidURI, extraParams={}): if rfc3987.validate(self, errorClass, successClass, extraParams): if self.dispatcher.xmlBase != self.xmlBase: docbase=canonicalForm(self.dispatcher.xmlBase).split('#')[0] elembase=canonicalForm(self.xmlBase).split('#')[0] value=canonicalForm(urljoin(elembase,self.value)).split('#')[0] if (value==elembase) and (elembase.encode('idna')!=docbase): self.log(SameDocumentReference({"parent":self.parent.name, "element":self.name, "value":self.value})) # # rfc822 dateTime (+Y2K extension) # class rfc822(text): rfc822_re = re.compile("(((mon)|(tue)|(wed)|(thu)|(fri)|(sat)|(sun))\s*,\s*)?" + "\d\d?\s+((jan)|(feb)|(mar)|(apr)|(may)|(jun)|(jul)|(aug)|(sep)|(oct)|" + "(nov)|(dec))\s+\d\d(\d\d)?\s+\d\d:\d\d(:\d\d)?\s+(([+-]\d\d\d\d)|" + "(ut)|(gmt)|(est)|(edt)|(cst)|(cdt)|(mst)|(mdt)|(pst)|(pdt)|[a-ik-z])?$", re.UNICODE) rfc2822_re = re.compile("(((Mon)|(Tue)|(Wed)|(Thu)|(Fri)|(Sat)|(Sun)), )?" + "\d\d? ((Jan)|(Feb)|(Mar)|(Apr)|(May)|(Jun)|(Jul)|(Aug)|(Sep)|(Oct)|" + "(Nov)|(Dec)) \d\d\d\d \d\d:\d\d(:\d\d)? (([+-]?\d\d[03]0)|" + "(UT)|(GMT)|(EST)|(EDT)|(CST)|(CDT)|(MST)|(MDT)|(PST)|(PDT)|Z)$") def validate(self): if self.rfc2822_re.match(self.value): import calendar value = parsedate(self.value) try: if value[0] > 1900: dow = datetime.date(*value[:3]).strftime("%a") if self.value.find(',')>0 and dow.lower() != self.value[:3].lower(): self.log(IncorrectDOW({"parent":self.parent.name, "element":self.name, "value":self.value[:3]})) return except ValueError, e: self.log(InvalidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":str(e)})) return if implausible_822(self.value): self.log(ImplausibleDate({"parent":self.parent.name, "element":self.name, "value":self.value})) else: self.log(ValidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":self.value})) else: value1,value2 = '', self.value value2 = re.sub(r'[\\](.)','',value2) while value1!=value2: value1,value2=value2,re.sub('\([^(]*?\)',' ',value2) if not self.rfc822_re.match(value2.strip().lower()): self.log(InvalidRFC2822Date({"parent":self.parent.name, "element":self.name, "value":self.value})) else: self.log(ProblematicalRFC822Date({"parent":self.parent.name, "element":self.name, "value":self.value})) # # Decode html entityrefs # from htmlentitydefs import name2codepoint def decodehtml(data): chunks=re.split('&#?(\w+);',data) for i in range(1,len(chunks),2): if chunks[i].isdigit(): # print chunks[i] chunks[i]=unichr(int(chunks[i])) elif chunks[i] in name2codepoint: chunks[i]=unichr(name2codepoint[chunks[i]]) else: chunks[i]='&' + chunks[i] +';' # print repr(chunks) return u"".join(map(unicode,chunks)) # # Scan HTML for relative URLs # class absUrlMixin: anchor_re = re.compile(']', re.IGNORECASE) img_re = re.compile(']*src=(?:"(.*?)"|\'(.*?)\'|([\w-]+))[\s>]', re.IGNORECASE) absref_re = re.compile("\w+:") def validateAbsUrl(self,value): refs = self.img_re.findall(self.value) + self.anchor_re.findall(self.value) for ref in [reduce(lambda a,b: a or b, x) for x in refs]: ref = decodehtml(ref).strip() if not self.absref_re.match(ref): for c in ref: if ord(c)<128 and not rfc2396.urichars_re.match(c): self.log(InvalidUriChar({'value':repr(str(c))})) break else: self.log(ContainsRelRef({"parent":self.parent.name, "element":self.name, "value": ref})) # # Scan HTML for 'devious' content # class safeHtmlMixin: def validateSafe(self,value): HTMLValidator(value, self) class safeHtml(text, safeHtmlMixin, absUrlMixin): def prevalidate(self): self.children.append(True) # force warnings about "mixed" content def validate(self): self.validateSafe(self.value) self.validateAbsUrl(self.value) # # Elements for which email addresses are discouraged # class nonemail(text): email_re = re.compile("<" + addr_spec.email_re.pattern[:-1] + ">", re.I) def validate(self): if self.email_re.search(self.value): self.log(ContainsEmail({"parent":self.parent.name, "element":self.name})) # # Elements for which html is discouraged, also checks for relative URLs # class nonhtml(text,safeHtmlMixin):#,absUrlMixin): htmlEndTag_re = re.compile("") htmlEntity_re = re.compile("&(#?\w+)") def start(self): nonhtml.startline = self.__dict__['startline'] = self.line def prevalidate(self): self.start() self.children.append(True) # force warnings about "mixed" content def validate(self, message=ContainsHTML): tags = [t for t in self.htmlEndTag_re.findall(self.value) if t.lower() in HTMLValidator.htmltags] if tags: self.log(message({"parent":self.parent.name, "element":self.name, "value":tags[0]})) # experimental RSS-Profile support elif self.htmlEntity_re.search(self.value): for value in self.htmlEntity_re.findall(self.value): from htmlentitydefs import name2codepoint if value in name2codepoint or value == 'apos' or not value.isalpha(): if not hasattr(self,'startline'): self.startline=self.line lines = self.dispatcher.rssCharData[self.startline-1:self.line] if not [chardata for chardata in lines if chardata]: self.log(message({"parent":self.parent.name, "element":self.name, "value":'&'+value+';'})) # experimental RSS-Profile support # &#x � &ent 0: if not self.value.endswith(")"): if self.value.find(' ')>0: self.log(EmailFormat({})) else: self.log(MissingRealName({})) else: email.validate(self) else: email.validate(self) class nonNegativeInteger(text): def validate(self): try: t = int(self.value) if t < 0: raise ValueError else: self.log(ValidInteger({"parent":self.parent.name, "element":self.name, "value":self.value})) except ValueError: self.log(InvalidNonNegativeInteger({"parent":self.parent.name, "element":self.name, "value":self.value})) class positiveInteger(text): max = 0 def validate(self): try: t = int(self.value) if t <= 0: raise ValueError elif self.max and t>self.max: self.log(IntegerOverflow({"parent":self.parent.name, "element":self.name, "value":self.value})) else: self.log(ValidInteger({"parent":self.parent.name, "element":self.name, "value":self.value})) except ValueError: self.log(InvalidPositiveInteger({"parent":self.parent.name, "element":self.name, "value":self.value})) class UINT31(positiveInteger): max = 2147483647 class Integer(text): def validate(self): if self.value == '': return try: t = int(self.value) self.log(ValidInteger({"parent":self.parent.name, "element":self.name, "value":self.value})) except ValueError: self.log(InvalidInteger({"parent":self.parent.name, "element":self.name, "value":self.value})) class Float(text): def validate(self, name=None): if not re.match('\d+\.?\d*$', self.value): self.log(InvalidFloat({"attr":name or self.name, "value":self.value})) class percentType(text): def validate(self): try: t = float(self.value) if t < 0.0 or t > 100.0: raise ValueError else: self.log(ValidPercentage({"parent":self.parent.name, "element":self.name, "value":self.value})) except ValueError: self.log(InvalidPercentage({"parent":self.parent.name, "element":self.name, "value":self.value})) class latitude(text): def validate(self): try: lat = float(self.value) if lat > 90 or lat < -90: raise ValueError else: self.log(ValidLatitude({"parent":self.parent.name, "element":self.name, "value":self.value})) except ValueError: self.log(InvalidLatitude({"parent":self.parent.name, "element":self.name, "value":self.value})) class longitude(text): def validate(self): try: lon = float(self.value) if lon > 180 or lon < -180: raise ValueError else: self.log(ValidLongitude({"parent":self.parent.name, "element":self.name, "value":self.value})) except ValueError: self.log(InvalidLongitude({"parent":self.parent.name, "element":self.name, "value":self.value})) class httpURL(text): http_re = re.compile("http://" + addr_spec.domain_re + '(/|$)', re.IGNORECASE) def validate(self): if not self.http_re.match(self.value): self.log(InvalidURLAttribute({"parent":self.parent.name, "element":self.name, "value":self.value})) elif not rfc2396_full.rfc2396_re.match(self.value): self.log(InvalidURLAttribute({"parent":self.parent.name, "element":self.name, "value":self.value})) else: self.log(ValidURLAttribute({"parent":self.parent.name, "element":self.name, "value":self.value})) class rdfResourceURI(rfc2396): def getExpectedAttrNames(self): return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'resource'), (u'http://purl.org/dc/elements/1.1/', u'title')] def validate(self): if (rdfNS, 'resource') in self.attrs.getNames(): self.value=self.attrs.getValue((rdfNS, 'resource')) rfc2396.validate(self) elif self.getFeedType() == TYPE_RSS1: self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"rdf:resource"})) class rdfAbout(validatorBase): def getExpectedAttrNames(self): return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'about')] def startElementNS(self, name, qname, attrs): pass def validate(self): if (rdfNS, 'about') not in self.attrs.getNames(): self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"rdf:about"})) else: test=rfc2396().setElement(self.name, self.attrs, self) test.value=self.attrs.getValue((rdfNS, 'about')) test.validate() class nonblank(text): def validate(self, errorClass=NotBlank, extraParams={}): if not self.value: logparams={"parent":self.parent.name,"element":self.name} logparams.update(extraParams) self.log(errorClass(logparams)) class nows(text): def __init__(self): self.ok = 1 text.__init__(self) def characters(self, string): text.characters(self, string) if self.ok and (self.value != self.value.strip()): self.log(UnexpectedWhitespace({"parent":self.parent.name, "element":self.name})) self.ok = 0 class unique(nonblank): def __init__(self, name, scope, message=DuplicateValue): self.scope_name=name self.scope=scope self.message=message nonblank.__init__(self) if not name+'s' in self.scope.__dict__: self.scope.__dict__[name+'s']=[] def validate(self): nonblank.validate(self) list=self.scope.__dict__[self.scope_name+'s'] if self.value in list: self.log(self.message({"parent":self.parent.name, "element":self.name,"value":self.value})) elif self.value: list.append(self.value) class rfc3987_full(xmlbase): rfc2396_re = rfc2396_full.rfc2396_re def validate(self, errorClass=InvalidFullLink, successClass=ValidURI, extraParams={}): return rfc2396.validate(self, errorClass, successClass, extraParams) class canonicaluri(rfc3987_full): def validate(self): prestrip = self.value self.value = self.value.strip() if rfc3987_full.validate(self): c = canonicalForm(self.value) if c is None or c != prestrip: self.log(NonCanonicalURI({"parent":self.parent.name,"element":self.name,"uri":prestrip, "curi":c or 'N/A'})) class yesno(text): def normalizeWhitespace(self): pass def validate(self): if not self.value in ['yes','no']: self.log(InvalidYesNo({"parent":self.parent.name, "element":self.name,"value":self.value})) class truefalse(text): def normalizeWhitespace(self): pass def validate(self): if not self.value.lower() in ['true','false']: self.log(InvalidTrueFalse({"parent":self.parent.name, "element":self.name,"value":self.value})) class truefalsestrict(text): def normalizeWhitespace(self): pass def validate(self): if not self.value in ['true','false']: self.log(InvalidTrueFalse({"parent":self.parent.name, "element":self.name,"value":self.value})) class duration(text): duration_re = re.compile("\d+(:[0-5][0-9](:[0-5][0-9])?)?$") def validate(self): if not self.duration_re.match(self.value): self.log(InvalidDuration({"parent":self.parent.name, "element":self.name , "value":self.value})) class lengthLimitedText(nonhtml): def __init__(self, max): self.max = max text.__init__(self) def validate(self): if len(self.value)>self.max: self.log(TooLong({"parent":self.parent.name, "element":self.name, "len": len(self.value), "max": self.max})) nonhtml.validate(self) class keywords(text): def validate(self): if self.value.find(' ')>=0 and self.value.find(',')<0: self.log(InvalidKeywords({"parent":self.parent.name, "element":self.name})) class commaSeparatedIntegers(text): def validate(self): if not re.match("^\d+(,\s*\d+)*$", self.value): self.log(InvalidCommaSeparatedIntegers({"parent":self.parent.name, "element":self.name})) class formname(text): def validate(self): if not re.match("^[a-zA-z][a-zA-z0-9:._]*", self.value): self.log(InvalidFormComponentName({"parent":self.parent.name, "element":self.name, "value":self.value})) class enumeration(text): def validate(self): if self.value not in self.valuelist: self.log(self.error({"parent":self.parent.name, "element":self.name, "attr": ':'.join(self.name.split('_',1)), "value":self.value})) class caseinsensitive_enumeration(enumeration): def validate(self): self.value=self.value.lower() enumeration.validate(self) class iso3166(enumeration): error = InvalidCountryCode valuelist = [ "AD", "AE", "AF", "AG", "AI", "AM", "AN", "AO", "AQ", "AR", "AS", "AT", "AU", "AW", "AZ", "BA", "BB", "BD", "BE", "BF", "BG", "BH", "BI", "BJ", "BM", "BN", "BO", "BR", "BS", "BT", "BV", "BW", "BY", "BZ", "CA", "CC", "CD", "CF", "CG", "CH", "CI", "CK", "CL", "CM", "CN", "CO", "CR", "CU", "CV", "CX", "CY", "CZ", "DE", "DJ", "DK", "DM", "DO", "DZ", "EC", "EE", "EG", "EH", "ER", "ES", "ET", "FI", "FJ", "FK", "FM", "FO", "FR", "GA", "GB", "GD", "GE", "GF", "GH", "GI", "GL", "GM", "GN", "GP", "GQ", "GR", "GS", "GT", "GU", "GW", "GY", "HK", "HM", "HN", "HR", "HT", "HU", "ID", "IE", "IL", "IN", "IO", "IQ", "IR", "IS", "IT", "JM", "JO", "JP", "KE", "KG", "KH", "KI", "KM", "KN", "KP", "KR", "KW", "KY", "KZ", "LA", "LB", "LC", "LI", "LK", "LR", "LS", "LT", "LU", "LV", "LY", "MA", "MC", "MD", "MG", "MH", "MK", "ML", "MM", "MN", "MO", "MP", "MQ", "MR", "MS", "MT", "MU", "MV", "MW", "MX", "MY", "MZ", "NA", "NC", "NE", "NF", "NG", "NI", "NL", "NO", "NP", "NR", "NU", "NZ", "OM", "PA", "PE", "PF", "PG", "PH", "PK", "PL", "PM", "PN", "PR", "PS", "PT", "PW", "PY", "QA", "RE", "RO", "RU", "RW", "SA", "SB", "SC", "SD", "SE", "SG", "SH", "SI", "SJ", "SK", "SL", "SM", "SN", "SO", "SR", "ST", "SV", "SY", "SZ", "TC", "TD", "TF", "TG", "TH", "TJ", "TK", "TM", "TN", "TO", "TR", "TT", "TV", "TW", "TZ", "UA", "UG", "UM", "US", "UY", "UZ", "VA", "VC", "VE", "VG", "VI", "VN", "VU", "WF", "WS", "YE", "YT", "ZA", "ZM", "ZW"] class iso4217(enumeration): error = InvalidCurrencyUnit valuelist = [ "AED", "AFN", "ALL", "AMD", "ANG", "AOA", "ARS", "AUD", "AWG", "AZM", "BAM", "BBD", "BDT", "BGN", "BHD", "BIF", "BMD", "BND", "BOB", "BOV", "BRL", "BSD", "BTN", "BWP", "BYR", "BZD", "CAD", "CDF", "CHE", "CHF", "CHW", "CLF", "CLP", "CNY", "COP", "COU", "CRC", "CSD", "CUP", "CVE", "CYP", "CZK", "DJF", "DKK", "DOP", "DZD", "EEK", "EGP", "ERN", "ETB", "EUR", "FJD", "FKP", "GBP", "GEL", "GHC", "GIP", "GMD", "GNF", "GTQ", "GWP", "GYD", "HKD", "HNL", "HRK", "HTG", "HUF", "IDR", "ILS", "INR", "IQD", "IRR", "ISK", "JMD", "JOD", "JPY", "KES", "KGS", "KHR", "KMF", "KPW", "KRW", "KWD", "KYD", "KZT", "LAK", "LBP", "LKR", "LRD", "LSL", "LTL", "LVL", "LYD", "MAD", "MDL", "MGA", "MKD", "MMK", "MNT", "MOP", "MRO", "MTL", "MUR", "MWK", "MXN", "MXV", "MYR", "MZM", "NAD", "NGN", "NIO", "NOK", "NPR", "NZD", "OMR", "PAB", "PEN", "PGK", "PHP", "PKR", "PLN", "PYG", "QAR", "ROL", "RON", "RUB", "RWF", "SAR", "SBD", "SCR", "SDD", "SEK", "SGD", "SHP", "SIT", "SKK", "SLL", "SOS", "SRD", "STD", "SVC", "SYP", "SZL", "THB", "TJS", "TMM", "TND", "TOP", "TRL", "TRY", "TTD", "TWD", "TZS", "UAH", "UGX", "USD", "USN", "USS", "UYU", "UZS", "VEB", "VND", "VUV", "WST", "XAF", "XAG", "XAU", "XBA", "XBB", "XBC", "XBD", "XCD", "XDR", "XFO", "XFU", "XOF", "XPD", "XPF", "XPT", "XTS", "XXX", "YER", "ZAR", "ZMK", "ZWD"] python-feedvalidator-0~svn1022/demo.py0000755000175000017500000000327110766017570016310 0ustar poxpox#!/usr/bin/python """$Id: demo.py 988 2008-03-12 18:22:48Z sa3ruby $""" __author__ = "Sam Ruby and Mark Pilgrim " __version__ = "$Revision: 988 $" __copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim" import feedvalidator import sys import os import urllib import urllib2 import urlparse if __name__ == '__main__': # arg 1 is URL to validate link = sys.argv[1:] and sys.argv[1] or 'http://www.intertwingly.net/blog/index.atom' link = urlparse.urljoin('file:' + urllib.pathname2url(os.getcwd()) + '/', link) try: link = link.decode('utf-8').encode('idna') except: pass print 'Validating %s' % link curdir = os.path.abspath(os.path.dirname(sys.argv[0])) basedir = urlparse.urljoin('file:' + curdir, ".") try: if link.startswith(basedir): events = feedvalidator.validateStream(urllib.urlopen(link), firstOccurrenceOnly=1,base=link.replace(basedir,"http://www.feedvalidator.org/"))['loggedEvents'] else: events = feedvalidator.validateURL(link, firstOccurrenceOnly=1)['loggedEvents'] except feedvalidator.logging.ValidationFailure, vf: events = [vf.event] # (optional) arg 2 is compatibility level # "A" is most basic level # "AA" mimics online validator # "AAA" is experimental; these rules WILL change or disappear in future versions from feedvalidator import compatibility filter = sys.argv[2:] and sys.argv[2] or "AA" filterFunc = getattr(compatibility, filter) events = filterFunc(events) from feedvalidator.formatter.text_plain import Formatter output = Formatter(events) if output: print "\n".join(output) sys.exit(1) else: print "No errors or warnings" python-feedvalidator-0~svn1022/rdflib/0000755000175000017500000000000011065534340016236 5ustar poxpoxpython-feedvalidator-0~svn1022/rdflib/LICENSE0000755000175000017500000000310010224417257017243 0ustar poxpoxLICENSE AGREEMENT FOR RDFLIB 0.9.0 THROUGH 2.0.6 ------------------------------------------------ Copyright (c) 2002-2005, Daniel Krech, http://eikeon.com/ All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of Daniel Krech nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. python-feedvalidator-0~svn1022/rdflib/URIRef.py0000755000175000017500000000116710175761534017724 0ustar poxpoxfrom sys import version_info if version_info[0:2] > (2, 2): from unicodedata import normalize else: normalize = None from rdflib.Identifier import Identifier from rdflib.Literal import Literal class URIRef(Identifier): def __new__(cls, value): return Identifier.__new__(cls, value) def __init__(self, value): if normalize and value: if not isinstance(value, unicode): value = unicode(value) if value != normalize("NFC", value): raise Error("value must be in NFC normalized form.") def n3(self): return "<%s>" % self python-feedvalidator-0~svn1022/rdflib/Namespace.py0000755000175000017500000000021710224417257020512 0ustar poxpoxfrom rdflib.URIRef import URIRef class Namespace(URIRef): def __getitem__(self, key, default=None): return URIRef(self + key) python-feedvalidator-0~svn1022/rdflib/exceptions.py0000755000175000017500000000451310175761534021007 0ustar poxpox class Error(Exception): """Base class for rdflib exceptions.""" def __init__(self, msg=None): Exception.__init__(self, msg) self.msg = msg class TypeCheckError(Error): """Parts of assertions are subject to type checks.""" def __init__(self, node): Error.__init__(self, node) self.type = type(node) self.node = node class SubjectTypeError(TypeCheckError): """Subject of an assertion must be an instance of URIRef.""" def __init__(self, node): TypeCheckError.__init__(self, node) self.msg = "Subject must be instance of URIRef or BNode: %s(%s)" \ % (self.node, self.type) class PredicateTypeError(TypeCheckError): """Predicate of an assertion must be an instance of URIRef.""" def __init__(self, node): TypeCheckError.__init__(self, node) self.msg = "Predicate must be a URIRef instance: %s(%s)" \ % (self.node, self.type) class ObjectTypeError(TypeCheckError): """Object of an assertion must be an instance of URIRef, Literal, or BNode.""" def __init__(self, node): TypeCheckError.__init__(self, node) self.msg = "Object must be instance of URIRef, Literal, or BNode: %s(%s)" % \ (self.node, self.type) class ContextTypeError(TypeCheckError): """Context of an assertion must be an instance of URIRef.""" def __init__(self, node): TypeCheckError.__init__(self, node) self.msg = "Context must be instance of URIRef or BNode: %s(%s)" \ % (self.node, self.type) class ParserError(Error): """RDF Parser error.""" def __init__(self, msg): self.msg = msg def __str__(self): return self.msg class SerializerDispatchNameError(Error): """No name set...""" def __init__(self, msg): Error.__init__(self) self.msg = msg class SerializerDispatchNameClashError(Error): """Name clash...""" def __init(self, msg): Error.__init__(self) self.msg = msg class ParserDispatchNameError(Error): """No name set...""" def __init__(self, msg): Error.__init__(self) self.msg = msg class ParserDispatchNameClashError(Error): """Name clash...""" def __init(self, msg): Error.__init__(self) self.msg = msg python-feedvalidator-0~svn1022/rdflib/BNode.py0000755000175000017500000000117710175761534017620 0ustar poxpoxfrom string import ascii_letters from random import choice from rdflib.Identifier import Identifier from rdflib.Literal import Literal # Create a (hopefully) unique prefix so that BNode values do not # collide with ones created with a different instance of this module. prefix = "" for i in xrange(0,8): prefix += choice(ascii_letters) node_id = 0 class BNode(Identifier): def __new__(cls, value=None): if value==None: global node_id node_id += 1 value = "_:%s%s" % (prefix, node_id) return Identifier.__new__(cls, value) def n3(self): return str(self) python-feedvalidator-0~svn1022/rdflib/Literal.py0000755000175000017500000000366110224417257020220 0ustar poxpoxfrom sys import version_info if version_info[0:2] > (2, 2): from unicodedata import normalize else: normalize = None from rdflib.Identifier import Identifier from rdflib.exceptions import Error class Literal(Identifier): """ http://www.w3.org/TR/rdf-concepts/#section-Graph-Literal """ def __new__(cls, value, lang='', datatype=''): value = unicode(value) return Identifier.__new__(cls, value) def __init__(self, value, lang='', datatype=''): if normalize and value: if not isinstance(value, unicode): value = unicode(value) if value != normalize("NFC", value): raise Error("value must be in NFC normalized form.") if datatype: lang = '' self.language = lang self.datatype = datatype def __add__(self, val): s = super(Literal, self).__add__(val) return Literal(s, self.language, self.datatype) def n3(self): language = self.language datatype = self.datatype encoded = self.encode('unicode-escape') if language: if datatype: return '"%s"@%s^^<%s>' % (encoded, language, datatype) else: return '"%s"@%s' % (encoded, language) else: if datatype: return '"%s"^^<%s>' % (encoded, datatype) else: return '"%s"' % encoded def __eq__(self, other): if other==None: return 0 elif isinstance(other, Literal): result = self.__cmp__(other)==0 if result==1: if self.language==other.language: return 1 else: return 0 else: return result elif isinstance(other, Identifier): return 0 else: return unicode(self)==other python-feedvalidator-0~svn1022/rdflib/syntax/0000755000175000017500000000000011065534337017572 5ustar poxpoxpython-feedvalidator-0~svn1022/rdflib/syntax/xml_names.py0000755000175000017500000000534010224417257022131 0ustar poxpox# From: http://www.w3.org/TR/REC-xml#NT-CombiningChar # # * Name start characters must have one of the categories Ll, Lu, Lo, # Lt, Nl. # # * Name characters other than Name-start characters must have one of # the categories Mc, Me, Mn, Lm, or Nd. # # * Characters in the compatibility area (i.e. with character code # greater than #xF900 and less than #xFFFE) are not allowed in XML # names. # # * Characters which have a font or compatibility decomposition # (i.e. those with a "compatibility formatting tag" in field 5 of the # database -- marked by field 5 beginning with a "<") are not allowed. # # * The following characters are treated as name-start characters rather # than name characters, because the property file classifies them as # Alphabetic: [#x02BB-#x02C1], #x0559, #x06E5, #x06E6. # # * Characters #x20DD-#x20E0 are excluded (in accordance with Unicode # 2.0, section 5.14). # # * Character #x00B7 is classified as an extender, because the property # list so identifies it. # # * Character #x0387 is added as a name character, because #x00B7 is its # canonical equivalent. # # * Characters ':' and '_' are allowed as name-start characters. # # * Characters '-' and '.' are allowed as name characters. from unicodedata import category, decomposition NAME_START_CATEGORIES = ["Ll", "Lu", "Lo", "Lt", "Nl"] NAME_CATEGORIES = NAME_START_CATEGORIES + ["Mc", "Me", "Mn", "Lm", "Nd"] ALLOWED_NAME_CHARS = [u"\u00B7", u"\u0387", u"-", u".", u"_"] # http://www.w3.org/TR/REC-xml-names/#NT-NCName # [4] NCName ::= (Letter | '_') (NCNameChar)* /* An XML Name, minus # the ":" */ # [5] NCNameChar ::= Letter | Digit | '.' | '-' | '_' | CombiningChar # | Extender def is_ncname(name): first = name[0] if first=="_" or category(first) in NAME_START_CATEGORIES: for i in xrange(1, len(name)): c = name[i] if not category(c) in NAME_CATEGORIES: if c in ALLOWED_NAME_CHARS: continue return 0 #if in compatibility area #if decomposition(c)!='': # return 0 return 1 else: return 0 def split_uri(predicate): predicate = predicate length = len(predicate) for i in xrange(0, length): if not category(predicate[-i-1]) in NAME_CATEGORIES: for j in xrange(-1-i, length): if category(predicate[j]) in NAME_START_CATEGORIES: ns = predicate[:j] if not ns: break ln = predicate[j:] return (ns, ln) break raise Error("This graph cannot be serialized in RDF/XML. Could not split predicate: '%s'" % predicate) python-feedvalidator-0~svn1022/rdflib/syntax/parsers/0000755000175000017500000000000011065534337021251 5ustar poxpoxpython-feedvalidator-0~svn1022/rdflib/syntax/parsers/__init__.py0000755000175000017500000000005010175761534023362 0ustar poxpox__all__ = ["RDFXMLParser", "NTParser"] python-feedvalidator-0~svn1022/rdflib/syntax/parsers/RDFXMLHandler.py0000755000175000017500000004720610224417257024126 0ustar poxpox# Copyright (c) 2002, Daniel Krech, http://eikeon.com/ # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are # met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above # copyright notice, this list of conditions and the following # disclaimer in the documentation and/or other materials provided # with the distribution. # # * Neither the name of Daniel Krech nor the names of its # contributors may be used to endorse or promote products derived # from this software without specific prior written permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ """ from urlparse import urljoin, urldefrag from xml.sax.saxutils import handler, quoteattr, escape from urllib import quote from rdflib.URIRef import URIRef from rdflib.BNode import BNode from rdflib.Literal import Literal from rdflib.Namespace import Namespace from rdflib.exceptions import ParserError, Error from rdflib.constants import RDFNS from rdflib.constants import UNQUALIFIED, CORE_SYNTAX_TERMS, OLD_TERMS from rdflib.constants import RDF, DESCRIPTION, ID, ABOUT from rdflib.constants import PARSE_TYPE, RESOURCE, LI from rdflib.constants import NODE_ID, DATATYPE from rdflib.constants import SEQ, BAG, ALT from rdflib.constants import STATEMENT, PROPERTY, XMLLiteral, LIST from rdflib.constants import SUBJECT, PREDICATE, OBJECT from rdflib.constants import TYPE, VALUE, FIRST, REST from rdflib.constants import NIL from rdflib.syntax.xml_names import is_ncname NODE_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [LI,] + OLD_TERMS NODE_ELEMENT_ATTRIBUTES = [ID, NODE_ID, ABOUT] PROPERTY_ELEMENT_EXCEPTIONS = CORE_SYNTAX_TERMS + [DESCRIPTION,] + OLD_TERMS PROPERTY_ATTRIBUTE_EXCEPTIONS = CORE_SYNTAX_TERMS + [DESCRIPTION, LI] + OLD_TERMS PROPERTY_ELEMENT_ATTRIBUTES = [ID, RESOURCE, NODE_ID] XMLNS = Namespace("http://www.w3.org/XML/1998/namespace") BASE = (XMLNS, "base") LANG = (XMLNS, "lang") class BagID(URIRef): __slots__ = ['li'] def __init__(self, val): super(URIRef, self).__init__(val) self.li = 0 def next_li(self): self.li += 1 return URIRef(RDFNS + "_%s" % self.li) class ElementHandler(object): __slots__ = ['start', 'char', 'end', 'li', 'id', 'base', 'subject', 'predicate', 'object', 'list', 'language', 'datatype', 'declared'] def __init__(self): self.start = None self.char = None self.end = None self.li = 0 self.id = None self.base = None self.subject = None self.object = None self.list = None self.language = "" self.datatype = "" self.declared = None def next_li(self): self.li += 1 return URIRef(RDFNS + "_%s" % self.li) class RDFXMLHandler(handler.ContentHandler): def __init__(self, store): self.store = store self.reset() def reset(self): document_element = ElementHandler() document_element.start = self.document_element_start document_element.end = lambda name, qname: None self.stack = [None, document_element,] self.ids = {} # remember IDs we have already seen self.bnode = {} self._ns_contexts = [{}] # contains uri -> prefix dicts self._current_context = self._ns_contexts[-1] # ContentHandler methods def setDocumentLocator(self, locator): self.locator = locator def startDocument(self): pass def startPrefixMapping(self, prefix, uri): self._ns_contexts.append(self._current_context.copy()) self._current_context[uri] = prefix ns_prefix = self.store.ns_prefix_map prefix_ns = self.store.prefix_ns_map if prefix in prefix_ns: if ns_prefix.get(uri, None) != prefix: num = 1 while 1: new_prefix = "%s%s" % (prefix, num) if new_prefix not in prefix_ns: break num +=1 ns_prefix[uri] = new_prefix prefix_ns[new_prefix] = uri elif uri not in ns_prefix: # Only if we do not already have a # binding. So we do not clobber # things like rdf, rdfs ns_prefix[uri] = prefix prefix_ns[prefix] = uri def endPrefixMapping(self, prefix): self._current_context = self._ns_contexts[-1] del self._ns_contexts[-1] def startElementNS(self, name, qname, attrs): stack = self.stack stack.append(ElementHandler()) current = self.current parent = self.parent base = attrs.get(BASE, None) if base is not None: base, frag = urldefrag(base) else: if parent: base = parent.base if base is None: systemId = self.locator.getPublicId() or self.locator.getSystemId() if systemId: base, frag = urldefrag(systemId) current.base = base language = attrs.get(LANG, None) if language is None: if parent: language = parent.language else: language = '' current.language = language current.start(name, qname, attrs) def endElementNS(self, name, qname): self.current.end(name, qname) self.stack.pop() def characters(self, content): char = self.current.char if char: char(content) def ignorableWhitespace(self, content): pass def processingInstruction(self, target, data): pass def add_reified(self, sid, (s, p, o)): self.store.add((sid, TYPE, STATEMENT)) self.store.add((sid, SUBJECT, s)) self.store.add((sid, PREDICATE, p)) self.store.add((sid, OBJECT, o)) def error(self, message): locator = self.locator info = "%s:%s:%s: " % (locator.getSystemId(), locator.getLineNumber(), locator.getColumnNumber()) raise ParserError(info + message) def get_current(self): return self.stack[-2] # Create a read only property called current so that self.current # give the current element handler. current = property(get_current) def get_next(self): return self.stack[-1] # Create a read only property that gives the element handler to be # used for the next element. next = property(get_next) def get_parent(self): return self.stack[-3] # Create a read only property that gives the current parent # element handler parent = property(get_parent) def absolutize(self, uri): s = urljoin(self.current.base, uri, allow_fragments=1) if uri and uri[-1]=="#": return URIRef(''.join((s, "#"))) else: return URIRef(s) def convert(self, name, qname, attrs): if name[0] is None: name = name[1] else: name = "".join(name) atts = {} for (n, v) in attrs.items(): #attrs._attrs.iteritems(): # if n[0] is None: att = n[1] else: att = "".join(n) if att.startswith(XMLNS) or att[0:3].lower()=="xml": pass elif att in UNQUALIFIED: #if not RDFNS[att] in atts: atts[RDFNS[att]] = v else: atts[att] = v return name, atts def document_element_start(self, name, qname, attrs): if name[0] and "".join(name) == RDF: next = self.next next.start = self.node_element_start next.end = self.node_element_end else: self.node_element_start(name, qname, attrs) #self.current.end = self.node_element_end # TODO... set end to something that sets start such that # another element will cause error def node_element_start(self, name, qname, attrs): name, atts = self.convert(name, qname, attrs) current = self.current absolutize = self.absolutize next = self.next next.start = self.property_element_start next.end = self.property_element_end if name in NODE_ELEMENT_EXCEPTIONS: self.error("Invalid node element URI: %s" % name) if ID in atts: if ABOUT in atts or NODE_ID in atts: self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") id = atts[ID] if not is_ncname(id): self.error("rdf:ID value is not a valid NCName: %s" % id) subject = absolutize("#%s" % id) if subject in self.ids: self.error("two elements cannot use the same ID: '%s'" % subject) self.ids[subject] = 1 # IDs can only appear once within a document elif NODE_ID in atts: if ID in atts or ABOUT in atts: self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") nodeID = atts[NODE_ID] if not is_ncname(nodeID): self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) if nodeID in self.bnode: subject = self.bnode[nodeID] else: subject = BNode() self.bnode[nodeID] = subject elif ABOUT in atts: if ID in atts or NODE_ID in atts: self.error("Can have at most one of rdf:ID, rdf:about, and rdf:nodeID") subject = absolutize(atts[ABOUT]) else: subject = BNode() if name!=DESCRIPTION: # S1 self.store.add((subject, TYPE, absolutize(name))) if TYPE in atts: # S2 self.store.add((subject, TYPE, absolutize(atts[TYPE]))) language = current.language for att in atts: if not att.startswith(RDFNS): predicate = absolutize(att) try: object = Literal(atts[att], language) except Error, e: self.error(e.msg) elif att==TYPE: #S2 predicate = TYPE object = absolutize(atts[TYPE]) elif att in NODE_ELEMENT_ATTRIBUTES: continue elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: #S3 self.error("Invalid property attribute URI: %s" % att) continue # for when error does not throw an exception else: predicate = absolutize(att) try: object = Literal(atts[att], language) except Error, e: self.error(e.msg) self.store.add((subject, predicate, object)) current.subject = subject def node_element_end(self, name, qname): self.parent.object = self.current.subject def property_element_start(self, name, qname, attrs): name, atts = self.convert(name, qname, attrs) current = self.current absolutize = self.absolutize next = self.next object = None current.list = None if not name.startswith(RDFNS): current.predicate = absolutize(name) elif name==LI: current.predicate = current.next_li() elif name in PROPERTY_ELEMENT_EXCEPTIONS: self.error("Invalid property element URI: %s" % name) else: current.predicate = absolutize(name) id = atts.get(ID, None) if id is not None: if not is_ncname(id): self.error("rdf:ID value is not a value NCName: %s" % id) current.id = absolutize("#%s" % id) else: current.id = None resource = atts.get(RESOURCE, None) nodeID = atts.get(NODE_ID, None) parse_type = atts.get(PARSE_TYPE, None) if resource is not None and nodeID is not None: self.error("Property element cannot have both rdf:nodeID and rdf:resource") if resource is not None: object = absolutize(resource) next.start = self.node_element_start next.end = self.node_element_end elif nodeID is not None: if not is_ncname(nodeID): self.error("rdf:nodeID value is not a valid NCName: %s" % nodeID) if nodeID in self.bnode: object = self.bnode[nodeID] else: subject = BNode() self.bnode[nodeID] = subject object = subject next.start = self.node_element_start next.end = self.node_element_end else: if parse_type is not None: for att in atts: if att!=PARSE_TYPE and att!=ID: self.error("Property attr '%s' now allowed here" % att) if parse_type=="Resource": current.subject = object = BNode() current.char = self.property_element_char next.start = self.property_element_start next.end = self.property_element_end elif parse_type=="Collection": current.char = None next.start = self.node_element_start next.end = self.list_node_element_end else: #if parse_type=="Literal": # All other values are treated as Literal # See: http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt #object = Literal("", current.language, XMLLiteral) object = Literal("", "", XMLLiteral) current.char = self.literal_element_char current.declared = {} next.start = self.literal_element_start next.char = self.literal_element_char next.end = self.literal_element_end current.object = object return else: object = None current.char = self.property_element_char next.start = self.node_element_start next.end = self.node_element_end datatype = current.datatype = atts.get(DATATYPE, None) language = current.language if datatype is not None: # TODO: check that there are no atts other than datatype and id pass else: for att in atts: if not att.startswith(RDFNS): predicate = absolutize(att) elif att in PROPERTY_ELEMENT_ATTRIBUTES: continue elif att in PROPERTY_ATTRIBUTE_EXCEPTIONS: self.error("""Invalid property attribute URI: %s""" % att) else: predicate = absolutize(att) if att==TYPE: o = URIRef(atts[att]) else: o = Literal(atts[att], language, datatype) if object is None: object = BNode() self.store.add((object, predicate, o)) if object is None: object = Literal("", language, datatype) current.object = object def property_element_char(self, data): current = self.current if current.object is None: try: current.object = Literal(data, current.language, current.datatype) except Error, e: self.error(e.msg) else: if isinstance(current.object, Literal): try: current.object += data except Error, e: self.error(e.msg) def property_element_end(self, name, qname): current = self.current if self.next.end==self.list_node_element_end: self.store.add((current.list, REST, NIL)) if current.object is not None: self.store.add((self.parent.subject, current.predicate, current.object)) if current.id is not None: self.add_reified(current.id, (self.parent.subject, current.predicate, current.object)) current.subject = None def list_node_element_end(self, name, qname): current = self.current if not self.parent.list: list = BNode() # Removed between 20030123 and 20030905 #self.store.add((list, TYPE, LIST)) self.parent.list = list self.store.add((self.parent.list, FIRST, current.subject)) self.parent.object = list self.parent.char = None else: list = BNode() # Removed between 20030123 and 20030905 #self.store.add((list, TYPE, LIST)) self.store.add((self.parent.list, REST, list)) self.store.add((list, FIRST, current.subject)) self.parent.list = list def literal_element_start(self, name, qname, attrs): current = self.current self.next.start = self.literal_element_start self.next.char = self.literal_element_char self.next.end = self.literal_element_end current.declared = self.parent.declared.copy() if name[0]: prefix = self._current_context[name[0]] if prefix: current.object = "<%s:%s" % (prefix, name[1]) else: current.object = "<%s" % name[1] if not name[0] in current.declared: current.declared[name[0]] = prefix if prefix: current.object += (' xmlns:%s="%s"' % (prefix, name[0])) else: current.object += (' xmlns="%s"' % name[0]) else: current.object = "<%s" % name[1] for (name, value) in attrs.items(): if name[0]: if not name[0] in current.declared: current.declared[name[0]] = self._current_context[name[0]] name = current.declared[name[0]] + ":" + name[1] else: name = name[1] current.object += (' %s=%s' % (name, quoteattr(value))) current.object += ">" def literal_element_char(self, data): self.current.object += data def literal_element_end(self, name, qname): if name[0]: prefix = self._current_context[name[0]] if prefix: end = u"" % (prefix, name[1]) else: end = u"" % name[1] else: end = u"" % name[1] self.parent.object += self.current.object + end python-feedvalidator-0~svn1022/rdflib/syntax/__init__.py0000755000175000017500000000001610175761534021705 0ustar poxpox# RDF Library python-feedvalidator-0~svn1022/rdflib/Identifier.py0000755000175000017500000000021510175761534020703 0ustar poxpoxclass Identifier(unicode): """ See http://www.w3.org/2002/07/rdf-identifer-terminology/ regarding choice of terminology. """ python-feedvalidator-0~svn1022/rdflib/constants.py0000755000175000017500000000373310175761534020645 0ustar poxpoxfrom rdflib.Namespace import Namespace # The RDF Namespace # http://ilrt.org/discovery/2001/07/rdf-syntax-grammar/#section-Namespace RDFNS = Namespace("http://www.w3.org/1999/02/22-rdf-syntax-ns#") # Syntax names RDF = RDFNS["RDF"] DESCRIPTION = RDFNS["Description"] ID = RDFNS["ID"] ABOUT = RDFNS["about"] PARSE_TYPE = RDFNS["parseType"] RESOURCE = RDFNS["resource"] LI = RDFNS["li"] NODE_ID = RDFNS["nodeID"] DATATYPE = RDFNS["datatype"] # RDF Classes SEQ = RDFNS["Seq"] BAG = RDFNS["Bag"] ALT = RDFNS["Alt"] STATEMENT = RDFNS["Statement"] PROPERTY = RDFNS["Property"] XMLLiteral = RDFNS["XMLLiteral"] LIST = RDFNS["List"] # RDF Properties SUBJECT = RDFNS["subject"] PREDICATE = RDFNS["predicate"] OBJECT = RDFNS["object"] TYPE = RDFNS["type"] VALUE = RDFNS["value"] FIRST = RDFNS["first"] REST = RDFNS["rest"] # and _n where n is a non-negative integer # RDF Resources NIL = RDFNS["nil"] # http://www.w3.org/TR/rdf-syntax-grammar/#eventterm-attribute-URI # A mapping from unqualified terms to there qualified version. UNQUALIFIED = {"about" : ABOUT, "ID" : ID, "type" : TYPE, "resource": RESOURCE, "parseType": PARSE_TYPE} # http://www.w3.org/TR/rdf-syntax-grammar/#coreSyntaxTerms CORE_SYNTAX_TERMS = [RDF, ID, ABOUT, PARSE_TYPE, RESOURCE, NODE_ID, DATATYPE] # http://www.w3.org/TR/rdf-syntax-grammar/#syntaxTerms SYNTAX_TERMS = CORE_SYNTAX_TERMS + [DESCRIPTION, LI] # http://www.w3.org/TR/rdf-syntax-grammar/#oldTerms OLD_TERMS = [RDFNS["aboutEach"], RDFNS["aboutEachPrefix"], RDFNS["bagID"]] # SCHEMA RDFSNS = Namespace("http://www.w3.org/2000/01/rdf-schema#") RDFS_CLASS = RDFSNS["Class"] RDFS_RESOURCE = RDFSNS["Resource"] RDFS_SUBCLASSOF = RDFSNS["subClassOf"] RDFS_SUBPROPERTYOF = RDFSNS["subPropertyOf"] RDFS_ISDEFINEDBY = RDFSNS["isDefinedBy"] RDFS_LABEL = RDFSNS["label"] RDFS_COMMENT = RDFSNS["comment"] RDFS_RANGE = RDFSNS["range"] RDFS_DOMAIN = RDFSNS["domain"] RDFS_LITERAL = RDFSNS["Literal"] RDFS_CONTAINER = RDFSNS["Container"] RDFS_SEEALSO = RDFSNS["seeAlso"] python-feedvalidator-0~svn1022/rdflib/__init__.py0000755000175000017500000000005010224417257020350 0ustar poxpox# RDF Library __version__ = "2.0.6" python-feedvalidator-0~svn1022/missingWebPages.py0000755000175000017500000000211510031501505020424 0ustar poxpox#!/usr/bin/python """ $Id: missingWebPages.py 75 2004-03-28 07:48:21Z josephw $ Show any logging events without explanatory web pages """ from sys import path, argv, exit from os.path import isfile import inspect import os.path curdir = os.path.abspath(os.path.dirname(argv[0])) BASE = os.path.split(curdir)[0] path.insert(0, os.path.join(BASE, 'src')) import feedvalidator.logging # Logic from text_html.py def getRootClass(aClass): bl = aClass.__bases__ if not(bl): return None aClass = bl[0] bl = bl[0].__bases__ while bl: base = bl[0] if base == feedvalidator.logging.LoggedEvent: return aClass aClass = base bl = aClass.__bases__ return None show = argv[1:] or ['warning', 'error'] areMissing=False for n, o in inspect.getmembers(feedvalidator.logging, inspect.isclass): rc = getRootClass(o) if not(rc): continue rcname = rc.__name__.split('.')[-1].lower() if rcname in show: fn = os.path.join('docs', rcname, n + '.html') if not(isfile(os.path.join(BASE, fn))): print fn areMissing=True if areMissing: exit(5)