python-feedvalidator-0~svn1022/ 0000755 0001750 0001750 00000000000 11065534350 014775 5 ustar pox pox python-feedvalidator-0~svn1022/runtests.sh 0000755 0001750 0001750 00000001176 10513170766 017234 0 ustar pox pox #!/bin/sh
PYTHON=${1:-${PYTHON:-python}}
# Run all project tests
cd "`dirname "$0"`"
${PYTHON} validtest.py
# Make sure XML encoding detection works
${PYTHON} tests/genXmlTestcases.py && python tests/testXmlEncoding.py
# Confirm that XML is decoded correctly
${PYTHON} tests/testXmlEncodingDecode.py
# Make sure media type checks are consistent
${PYTHON} tests/testMediaTypes.py
# Test URI equivalence
${PYTHON} tests/testUri.py
# Ensure check.cgi runs cleanly, at least for a GET
PYTHONPATH="`pwd`/tests:." REQUEST_METHOD=GET FEEDVALIDATOR_HOME="`pwd`/.." python - <../check.cgi >/dev/null || echo >&2 "check.cgi failed to run"
python-feedvalidator-0~svn1022/validtest.py 0000755 0001750 0001750 00000012332 11015104552 017342 0 ustar pox pox #!/usr/bin/python
"""$Id: validtest.py 1014 2008-05-21 20:43:22Z joe.walton.gglcd $"""
__author__ = "Sam Ruby and Mark Pilgrim "
__version__ = "$Revision: 1014 $"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
import feedvalidator
import unittest, new, os, sys, glob, re
from feedvalidator.logging import Message,SelfDoesntMatchLocation,MissingSelf
from feedvalidator import compatibility
from feedvalidator.formatter.application_test import Formatter
class TestCase(unittest.TestCase):
def failIfNoMessage(self, theList):
filterFunc = compatibility.AA
events = filterFunc(theList)
output = Formatter(events)
for e in events:
if not output.format(e):
raise self.failureException, 'could not contruct message for %s' % e
def failUnlessContainsInstanceOf(self, theClass, params, theList, msg=None):
"""Fail if there are no instances of theClass in theList with given params"""
self.failIfNoMessage(theList)
failure=(msg or 'no %s instances in %s' % (theClass.__name__, `theList`))
for item in theList:
if issubclass(item.__class__, theClass):
if not params: return
for k, v in params.items():
if str(item.params[k]) <> v:
failure=("%s.%s value was %s, expected %s" %
(theClass.__name__, k, item.params[k], v))
break
else:
return
raise self.failureException, failure
def failIfContainsInstanceOf(self, theClass, params, theList, msg=None):
"""Fail if there are instances of theClass in theList with given params"""
self.failIfNoMessage(theList)
for item in theList:
if theClass==Message and isinstance(item,SelfDoesntMatchLocation):
continue
if theClass==Message and isinstance(item,MissingSelf):
continue
if issubclass(item.__class__, theClass):
if not params:
raise self.failureException, \
(msg or 'unexpected %s' % (theClass.__name__))
allmatch = 1
for k, v in params.items():
if item.params[k] != v:
allmatch = 0
if allmatch:
raise self.failureException, \
"unexpected %s.%s with a value of %s" % \
(theClass.__name__, k, v)
desc_re = re.compile("")
validome_re = re.compile("", re.S)
def getDescription(xmlfile):
"""Extract description and exception from XML file
The deal here is that each test case is an XML file which contains
not only a possibly invalid RSS feed but also the description of the
test, i.e. the exception that we would expect the RSS validator to
raise (or not) when it validates the feed. The expected exception and
the human-readable description are placed into an XML comment like this:
"""
stream = open(xmlfile)
xmldoc = stream.read()
stream.close()
search_results = desc_re.search(xmldoc)
if search_results:
description, cond, excName, plist = list(search_results.groups())
else:
search_results = validome_re.search(xmldoc)
if search_results:
plist = ''
description, cond, excName = list(search_results.groups())
excName = excName.capitalize()
if excName=='Valid': cond,excName = '!', 'Message'
else:
raise RuntimeError, "can't parse %s" % xmlfile
if cond == "":
method = TestCase.failUnlessContainsInstanceOf
else:
method = TestCase.failIfContainsInstanceOf
params = {}
if plist:
for entry in plist.split(','):
name,value = entry.lstrip().split(':',1)
params[name] = value
exc = getattr(feedvalidator, excName)
description = xmlfile + ": " + description
return method, description, params, exc
def buildTestCase(xmlfile, xmlBase, description, method, exc, params):
"""factory to create functions which validate `xmlfile`
the returned function asserts that validating `xmlfile` (an XML file)
will return a list of exceptions that include an instance of
`exc` (an Exception class)
"""
func = lambda self, xmlfile=xmlfile, exc=exc, params=params: \
method(self, exc, params, feedvalidator.validateString(open(xmlfile).read(), fallback='US-ASCII', base=xmlBase)['loggedEvents'])
func.__doc__ = description
return func
def buildTestSuite():
curdir = os.path.dirname(os.path.abspath(__file__))
basedir = os.path.split(curdir)[0]
for xmlfile in sys.argv[1:] or (glob.glob(os.path.join(basedir, 'testcases', '**', '**', '*.xml')) + glob.glob(os.path.join(basedir, 'testcases', 'opml', '**', '*.opml'))):
method, description, params, exc = getDescription(xmlfile)
xmlBase = os.path.abspath(xmlfile).replace(basedir,"http://www.feedvalidator.org")
testName = 'test_' + xmlBase.replace(os.path.sep, "/")
testFunc = buildTestCase(xmlfile, xmlBase, description, method, exc, params)
instanceMethod = new.instancemethod(testFunc, None, TestCase)
setattr(TestCase, testName, instanceMethod)
return unittest.TestLoader().loadTestsFromTestCase(TestCase)
if __name__ == '__main__':
suite = buildTestSuite()
unittest.main(argv=sys.argv[:1])
python-feedvalidator-0~svn1022/tests/ 0000755 0001750 0001750 00000000000 11065534336 016143 5 ustar pox pox python-feedvalidator-0~svn1022/tests/testMediaTypes.py 0000755 0001750 0001750 00000006256 10766017570 021500 0 ustar pox pox #!/usr/bin/python
"""$Id: testMediaTypes.py 988 2008-03-12 18:22:48Z sa3ruby $"""
__author__ = "Joseph Walton "
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2004 Joseph Walton"
import os, sys
curdir = os.path.abspath(os.path.dirname(sys.argv[0]))
srcdir = os.path.split(curdir)[0]
if srcdir not in sys.path:
sys.path.insert(0, srcdir)
basedir = os.path.split(srcdir)[0]
import unittest
from feedvalidator import mediaTypes
from feedvalidator.logging import TYPE_RSS1, TYPE_RSS2, TYPE_ATOM
def l(x):
if x:
return x.lower()
else:
return x
class MediaTypesTest(unittest.TestCase):
def testCheckValid(self):
el = []
(t, c) = mediaTypes.checkValid(self.contentType, el)
self.assertEqual(l(t), l(self.mediaType), 'Media type should be ' + self.mediaType)
self.assertEqual(l(c), l(self.charset), 'Charset should be ' + str(self.charset) + ' for ' + self.mediaType + ' was ' + str(c))
if (self.error):
self.assertEqual(len(el), 1, 'Expected errors to be logged')
else:
self.assertEqual(len(el), 0, 'Did not expect errors to be logged')
def testCheckAgainstFeedType(self):
FT=['Unknown', 'RSS 1.0', 'RSS 2.0', 'Atom', 'Atom 0.3']
el = []
r = mediaTypes.checkAgainstFeedType(self.mediaType, self.feedType, el)
if (self.error):
self.assertEqual(len(el), 1, 'Expected errors to be logged (' + self.mediaType + ',' + FT[self.feedType] + ')')
else:
self.assertEqual(len(el), 0, 'Did not expect errors to be logged (' + self.mediaType + ',' + FT[self.feedType] + ')')
# Content-Type, Media type, Charset, Error?
cvCases = [
['text/xml', 'text/xml', None, False],
['text/xml; charset=UTF-8', 'text/xml', 'utf-8', False],
['application/xml', 'application/xml', None, False],
['text/plain', 'text/plain', None, True],
['application/octet-stream', 'application/octet-stream', None, True]
]
# Media type, Feed type, Error?
caftCases = [
['text/xml', TYPE_RSS1, False],
['application/xml', TYPE_RSS1, False],
['application/rss+xml', TYPE_RSS1, False],
['application/rdf+xml', TYPE_RSS1, False],
['application/x.atom+xml', TYPE_RSS1, True],
['application/atom+xml', TYPE_RSS1, True],
['text/xml', TYPE_RSS2, False],
['application/xml', TYPE_RSS1, False],
['application/rss+xml', TYPE_RSS2, False],
['application/rdf+xml', TYPE_RSS2, True],
['application/x.atom+xml', TYPE_RSS2, True],
['application/atom+xml', TYPE_RSS2, True],
['text/xml', TYPE_ATOM, False],
['application/xml', TYPE_ATOM, False],
['application/rss+xml', TYPE_ATOM, True],
['application/rdf+xml', TYPE_ATOM, True],
['application/x.atom+xml', TYPE_ATOM, False],
['application/atom+xml', TYPE_ATOM, False],
]
def buildTestSuite():
suite = unittest.TestSuite()
for (ct, mt, cs, e) in cvCases:
t = MediaTypesTest('testCheckValid')
t.contentType = ct;
t.mediaType = mt
t.charset = cs
t.error = e
suite.addTest(t)
for (mt, ft, e) in caftCases:
t = MediaTypesTest('testCheckAgainstFeedType')
t.mediaType = mt
t.feedType = ft
t.error = e
suite.addTest(t)
return suite
if __name__ == "__main__":
s = buildTestSuite()
unittest.TextTestRunner().run(s)
python-feedvalidator-0~svn1022/tests/testXmlEncodingDecode.py 0000755 0001750 0001750 00000012251 10766017570 022737 0 ustar pox pox #!/usr/bin/python
"""$Id: testXmlEncodingDecode.py 988 2008-03-12 18:22:48Z sa3ruby $"""
__author__ = "Joseph Walton "
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2004 Joseph Walton"
import os, sys
curdir = os.path.abspath(os.path.dirname(sys.argv[0]))
srcdir = os.path.split(curdir)[0]
if srcdir not in sys.path:
sys.path.insert(0, srcdir)
basedir = os.path.split(srcdir)[0]
import unittest
from feedvalidator import xmlEncoding
from feedvalidator.logging import *
ctAX='application/xml'
class TestDecode(unittest.TestCase):
def _assertEqualUnicode(self, a, b):
self.assertNotEqual(a, None, 'Decoded strings should not equal None')
self.assertEqual(type(a), unicode, 'Decoded strings should be Unicode (was ' + str(type(a)) + ')')
self.assertEqual(type(b), unicode, 'Test suite error: test strings must be Unicode')
self.assertEqual(a, b)
def testProvidedEncoding(self):
loggedEvents=[]
(encoding, decoded) = xmlEncoding.decode(ctAX, 'UTF-8', '', loggedEvents)
self.assertEquals('UTF-8', encoding)
self._assertEqualUnicode(decoded, u'')
self.assertEqual(loggedEvents, [])
loggedEvents=[]
(encoding, decoded) = xmlEncoding.decode(ctAX, 'UTF-8', '', loggedEvents)
self.assertEquals('UTF-8', encoding)
self._assertEqualUnicode(decoded, u'')
self.assertEquals(loggedEvents, [])
def testNoDeclarationOrBOM(self):
loggedEvents=[]
self.assertEquals(xmlEncoding.decode(ctAX, None, '', loggedEvents)[-1], None)
self.assertEquals(len(loggedEvents), 1)
self.assertEquals(loggedEvents[0].__class__, MissingEncoding, "Must warn if there's no clue as to encoding")
# This document is currently detected as UTF-8, rather than None.
#
# def testMissingEncodingDeclaration(self):
# loggedEvents=[]
# self._assertEqualUnicode(xmlEncoding.decode(ctAX, None, '', loggedEvents), u'')
# self.assertEquals(len(loggedEvents), 1)
# self.assertEquals(loggedEvents[0].__class__, MissingEncoding, "Must warn if there's no clue as to encoding")
def testJustDeclaration(self):
loggedEvents=[]
(encoding, decoded) = xmlEncoding.decode(ctAX, None, '', loggedEvents)
self.assertEquals(encoding, 'utf-8')
self._assertEqualUnicode(decoded, u'')
self.assertEquals(loggedEvents, [])
def testSupplyUnknownEncoding(self):
loggedEvents=[]
self.assertEquals(xmlEncoding.decode(ctAX, 'X-FAKE', '', loggedEvents)[-1], None)
self.assertEquals(len(loggedEvents), 1)
self.assertEquals(loggedEvents[0].__class__, UnknownEncoding, 'Must fail if an unknown encoding is used')
def testDeclareUnknownEncoding(self):
loggedEvents=[]
self.assertEquals(xmlEncoding.decode(ctAX, None, '', loggedEvents)[-1], None)
self.assert_(loggedEvents)
self.assertEquals(loggedEvents[-1].__class__, UnknownEncoding)
def testWarnMismatch(self):
loggedEvents=[]
self.assertEquals(xmlEncoding.decode(ctAX, 'US-ASCII', '', loggedEvents)[-1], u'')
self.assert_(loggedEvents)
self.assertEquals(loggedEvents[-1].__class__, EncodingMismatch)
def testDecodeUTF8(self):
loggedEvents=[]
self.assertEquals(xmlEncoding.decode(ctAX, 'utf-8', '\xc2\xa3', loggedEvents)[-1], u'\u00a3')
self.assertEquals(loggedEvents, [])
def testDecodeBadUTF8(self):
"""Ensure bad UTF-8 is flagged as such, but still decoded."""
loggedEvents=[]
self.assertEquals(xmlEncoding.decode(ctAX, 'utf-8', '\xa3', loggedEvents)[-1], u'\ufffd')
self.assert_(loggedEvents)
self.assertEquals(loggedEvents[-1].__class__, UnicodeError)
def testRemovedBOM(self):
"""Make sure the initial BOM signature is not in the decoded string."""
loggedEvents=[]
self.assertEquals(xmlEncoding.decode(ctAX, 'UTF-16', '\xff\xfe\x3c\x00\x78\x00\x2f\x00\x3e\x00', loggedEvents)[-1], u'')
self.assertEquals(loggedEvents, [])
class TestRemoveDeclaration(unittest.TestCase):
def testRemoveSimple(self):
self.assertEqual(xmlEncoding.removeDeclaration(
''),
'')
self.assertEqual(xmlEncoding.removeDeclaration(
""),
"")
def testNotRemoved(self):
"""Make sure that invalid, or missing, declarations aren't affected."""
for x in [
'', # Missing version
'', # No declaration
' ' # Space before declaration
]:
self.assertEqual(xmlEncoding.removeDeclaration(x), x)
def buildTestSuite():
suite = unittest.TestSuite()
loader = unittest.TestLoader()
suite.addTest(loader.loadTestsFromTestCase(TestDecode))
suite.addTest(loader.loadTestsFromTestCase(TestRemoveDeclaration))
return suite
if __name__ == "__main__":
unittest.main()
python-feedvalidator-0~svn1022/tests/testUri.py 0000755 0001750 0001750 00000017367 10766017570 020200 0 ustar pox pox #!/usr/bin/python
"""$Id: testUri.py 988 2008-03-12 18:22:48Z sa3ruby $"""
__author__ = "Joseph Walton "
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2004 Joseph Walton"
import os, sys
curdir = os.path.abspath(os.path.dirname(sys.argv[0]))
srcdir = os.path.split(curdir)[0]
if srcdir not in sys.path:
sys.path.insert(0, srcdir)
basedir = os.path.split(srcdir)[0]
import unittest
class UriTest(unittest.TestCase):
pass
testsEqual = [
['http://example.com/', 'http://example.com'],
['HTTP://example.com/', 'http://example.com/'],
['http://example.com/', 'http://example.com:/'],
['http://example.com/', 'http://example.com:80/'],
['http://example.com/', 'http://Example.com/'],
['http://example.com/~smith/', 'http://example.com/%7Esmith/'],
['http://example.com/~smith/', 'http://example.com/%7esmith/'],
['http://example.com/%7Esmith/', 'http://example.com/%7esmith/'],
['http://example.com/%C3%87', 'http://example.com/C%CC%A7'],
['tag:example.com,2004:Test', 'TAG:example.com,2004:Test'],
['ftp://example.com/', 'ftp://EXAMPLE.COM/'],
['ftp://example.com/', 'ftp://example.com:21/'],
['mailto:user@example.com', 'mailto:user@EXAMPLE.COM'],
['../%C3%87', '../C%CC%A7'],
]
testsDifferent = [
['http://example.com/', 'http://example.org/'],
['http://example.com/index.html', 'http://example.com'],
['FTP://example.com/', 'http://example.com/'],
['http://example.com/', 'http://example.com:8080/'],
['http://example.com:8080/', 'http://example.com:80/'],
['http://example.com/index.html', 'http://example.com/INDEX.HTML'],
['http://example.com/~smith/', 'http://example.com/%7Esmith'],
['http://example.com/~smith/', 'http://example.com/%2fsmith/'],
['http://user:password@example.com/', 'http://USER:PASSWORD@example.com/'],
# Not a valid HTTP URL
['http://example.com:x', 'http://example.com/'],
['tag:example.com,2004:Test', 'tag:EXAMPLE.COM,2004:Test'],
['tag:user@example.com,2004:Test', 'tag:user@EXAMPLE.COM,2004:Test'],
['tag:example.com,2004:test', 'Tag:example.com,2004:TEST'],
['tag:example.com,2004:Test', 'Tag:example.com,2004-01:Test'],
['tag:user@example.com,2004:Test', 'tag:USER@example.com,2004:Test'],
['ftp://example.com/', 'ftp://example.com/test'],
['mailto:user@example.com', 'mailto:USER@example.com'],
['mailto:user@example.com?subject=test', 'mailto:user@example.com?subject=TEST']
]
# Examples from PaceCanonicalIds
testsCanonical = [
['HTTP://example.com/', 'http://example.com/'],
['http://EXAMPLE.COM/', 'http://example.com/'],
['http://example.com/%7Ejane', 'http://example.com/~jane'],
['http://example.com/?q=1%2f2', 'http://example.com/?q=1%2F2'],
['http://example.com/?q=1/2'],
['http://example.com/a/./b', 'http://example.com/a/b'],
['http://example.com/a/../a/b', 'http://example.com/a/b'],
['http://user:password@example.com/', 'http://user:password@example.com/'],
['http://User:Password@Example.com/', 'http://User:Password@example.com/'],
['http://@example.com/', 'http://example.com/'],
['http://@Example.com/', 'http://example.com/'],
['http://:@example.com/', 'http://example.com/'],
['http://:@Example.com/', 'http://example.com/'],
['http://example.com', 'http://example.com/'],
['http://example.com:80/', 'http://example.com/'],
['http://www.w3.org/2000/01/rdf-schema#'],
['http://example.com/?q=C%CC%A7', 'http://example.com/?q=%C3%87'],
['http://example.com/?q=%E2%85%A0'],
['http://example.com/?'],
[u'http://example.com/%C3%87'],
# Other tests
['mailto:user@EXAMPLE.COM', 'mailto:user@example.com'],
['TAG:example.com,2004:Test', 'tag:example.com,2004:Test'],
['ftp://Example.Com:21/', 'ftp://example.com/'],
['http://example.com/?q=%E2%85%A0'],
['ldap://[2001:db8::7]/c=GB?objectClass?one'],
['mailto:John.Doe@example.com'],
['news:comp.infosystems.www.servers.unix'],
['tel:+1-816-555-1212'],
['telnet://192.0.2.16:80/'],
['urn:oasis:names:specification:docbook:dtd:xml:4.1.2'],
['http://example.com:081/', 'http://example.com:81/'],
['/test#test#test', '/test#test%23test'],
['http://com./'],
['http://example.com./', 'http://example.com/'],
['http://www.example.com//a//', 'http://www.example.com//a//'],
['http://www.example.com/./a//', 'http://www.example.com/a//'],
['http://www.example.com//a/./', 'http://www.example.com//a/'],
['http://example.com/%2F/'],
["aa1+-.:///?a1-._~!$&'()*+,;=:@/?#a1-._~!$&'()*+,;=:@/?"],
['http://example.com/?a+b'],
['http://a/b/c/../../../../g', 'http://a/g'],
['/.foo', '/.foo'],
['/foo/bar/.', '/foo/bar/'],
['/foo/bar/..', '/foo/'],
['http:test'],
['tag:'],
['file://', 'file:///'],
['telnet://example.com:23/', 'telnet://example.com/'],
['x://:@a/', 'x://a/'],
['tag:www.stanleysy.com,2005://1.119'],
['tag:timothy@hpl.hp.com,2001:web/externalHome'],
['http://xxx/read?id=abc%26x%3Dz&x=y'],
['tag:www.stanleysy.com,2005:%2F%2F1.119'],
# IPv6 literals should be accepted
['http://[fe80::290:4bff:fe1e:4374]/tests/atom/ipv6/'],
['http://[fe80::290:4bff:fe1e:4374]:80/tests/atom/ipv6/',
'http://[fe80::290:4bff:fe1e:4374]/tests/atom/ipv6/'],
['http://[fe80::290:4bff:fe1e:4374]:8080/tests/atom/ipv6/'],
['http://[fe80::290:4bff:fe1e:4374]:/tests/atom/ipv6/',
'http://[fe80::290:4bff:fe1e:4374]/tests/atom/ipv6/'],
]
# These are invalid URI references, but we can still sensibly
# normalise them
testNormalisableBadUris = [
['http://example.com/\\/', 'http://example.com/%5C/'],
['http://example.com/?a b', 'http://example.com/?a%20b'],
]
testsInvalid = [
# This URI is not in canonical form, and cannot be normalised
'http://example.com/?q=%C7'
# Don't try to deal with relative URI references
'foo/../bar',
'./http://',
'./\\/',
# Bad IPv6 literals
'http://fe80::290:4bff:fe1e:4374]/tests/atom/ipv6/',
'http://[fe80::290:4bff:fe1e:4374/tests/atom/ipv6/',
]
import feedvalidator.uri
from feedvalidator.validators import rfc2396
def buildTestSuite():
i = 0
for t in testsEqual:
i+=1
def tstEqual(self, a, b):
self.assertEqual(feedvalidator.uri.Uri(a), feedvalidator.uri.Uri(b))
func = lambda self, a=t[0], b=t[1]: tstEqual(self, a, b)
func.__doc__ = 'Test ' + t[0] + " == " + t[1]
setattr(UriTest, 'test' + str(i), func)
for t in testsDifferent:
i+=1
def tstDifferent(self, a, b):
self.assertNotEqual(feedvalidator.uri.Uri(a), feedvalidator.uri.Uri(b))
func = lambda self, a=t[0], b=t[1]: tstDifferent(self, a, b)
func.__doc__ = 'Test ' + t[0] + " != " + t[1]
setattr(UriTest, 'test' + str(i), func)
for t in testsCanonical + testNormalisableBadUris:
i+=1
o = t[0]
if len(t) > 1:
c = t[1]
else:
c = o
def tstCanonicalForm(self, a, b):
cf = feedvalidator.uri.canonicalForm(a)
self.assertEqual(cf, b, 'Became: ' + str(cf))
func = lambda self, a=o, b=c: tstCanonicalForm(self, a, b)
func.__doc__ = 'Test ' + o + ' becomes ' + c
setattr(UriTest, 'test' + str(i), func)
for a in testsInvalid:
i+= 1
def tstCanFindCanonicalForm(self, a):
self.assertEquals(feedvalidator.uri.canonicalForm(a), None)
func = lambda self, a=a: tstCanFindCanonicalForm(self, a)
func.__doc__ = 'Test ' + a + ' cannot be canonicalised'
setattr(UriTest, 'test' + str(i), func)
# Test everything against the rfc2396 matcher
r2 = feedvalidator.validators.rfc2396()
for t in testsEqual + testsDifferent + testsCanonical:
i+=1
def tstMatchesRe(self, a):
self.assertTrue(r2.rfc2396_re.match(a))
func = lambda self, a=t[0]: tstMatchesRe(self, a)
func.__doc__ = 'Test ' + t[0] + ' is matched by the URI regular expression'
setattr(UriTest, 'test' + str(i), func)
return unittest.TestLoader().loadTestsFromTestCase(UriTest)
if __name__ == '__main__':
buildTestSuite()
unittest.main()
python-feedvalidator-0~svn1022/tests/config.py 0000644 0001750 0001750 00000000454 10152373264 017762 0 ustar pox pox from os import environ
# This is a test config, used by the runtests script, to ensure check.cgi
# runs without requiring a web server.
HOMEURL = 'http://localhost/check'
PYDIR = '/usr/lib/python/'
WEBDIR = environ['FEEDVALIDATOR_HOME']
SRCDIR = WEBDIR + '/src'
DOCSURL = 'docs'
CSSURL = 'css'
python-feedvalidator-0~svn1022/tests/testXmlEncoding.py 0000755 0001750 0001750 00000021003 10766017570 021626 0 ustar pox pox #!/usr/bin/python
"""$Id: testXmlEncoding.py 988 2008-03-12 18:22:48Z sa3ruby $
Test XML character decoding against a range of encodings, valid and not."""
__author__ = "Joseph Walton "
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2004, 2006 Joseph Walton"
import os, sys
import codecs
import re
curdir = os.path.abspath(os.path.dirname(__file__))
srcdir = os.path.split(curdir)[0]
if srcdir not in sys.path:
sys.path.insert(0, srcdir)
basedir = os.path.split(srcdir)[0]
skippedNames = []
import unittest, new, glob, re
from feedvalidator import xmlEncoding
class EncodingTestCase(unittest.TestCase):
def testEncodingMatches(self):
try:
enc = xmlEncoding.detect(self.bytes)
except UnicodeError,u:
self.fail("'" + self.filename + "' should not cause an exception (" + str(u) + ")")
self.assert_(enc, 'An encoding must be returned for all valid files ('
+ self.filename + ')')
self.assertEqual(enc, self.expectedEncoding, 'Encoding for '
+ self.filename + ' should be ' + self.expectedEncoding + ', but was ' + enc)
def testEncodingFails(self):
eventLog = []
try:
encoding = xmlEncoding.detect(self.bytes, eventLog)
except UnicodeError,u:
self.fail("'" + self.filename + "' should not cause an exception (" + str(u) + ")")
if encoding:
self.fail("'" + self.filename + "' should not parse successfully (as " + encoding + ")")
if not(eventLog):
self.fail("'" + self.filename + "' should give a reason for parse failure")
bom8='\xEF\xBB\xBF'
bom16BE='\xFE\xFF'
bom16LE='\xFF\xFE'
bom32BE='\x00\x00\xFE\xFF'
bom32LE='\xFF\xFE\x00\x00'
# Some fairly typical Unicode text. It should survive XML roundtripping.
docText=u'\u201c"This\uFEFF" is\na\r\u00A3t\u20Acst\u201D'
validDecl = re.compile('[A-Za-z][-A-Za-z0-9._]*')
def makeDecl(enc=None):
if enc:
assert validDecl.match(enc), "'" + enc + "' is not a valid encoding name"
return ""
else:
return ""
def encoded(enc, txt=docText):
return codecs.getencoder(enc)(txt, 'xmlcharrefreplace')[0]
def genValidXmlTestCases():
someFailed = False
# Required
yield('UTF-8', ['BOM', 'declaration'],
bom8 + makeDecl('UTF-8') + encoded('UTF-8'))
yield('UTF-8', [],
encoded('UTF-8'))
yield('UTF-8', ['noenc'],
makeDecl() + encoded('UTF-8'))
yield('UTF-8', ['declaration'],
makeDecl('UTF-8') + encoded('UTF-8'))
yield('UTF-8', ['BOM'],
bom8 + encoded('UTF-8'))
yield('UTF-8', ['BOM', 'noenc'],
bom8 + makeDecl('UTF-8') + encoded('UTF-8'))
yield('UTF-16', ['BOM', 'declaration', 'BE'],
bom16BE + encoded('UTF-16BE', makeDecl('UTF-16') + docText))
yield('UTF-16', ['BOM', 'declaration', 'LE'],
bom16LE + encoded('UTF-16LE', makeDecl('UTF-16') + docText))
yield('UTF-16', ['BOM', 'BE'],
bom16BE + encoded('UTF-16BE'))
yield('UTF-16', ['BOM', 'BE', 'noenc'],
bom16BE + encoded('UTF-16BE', makeDecl() + docText))
yield('UTF-16', ['BOM', 'LE'],
bom16LE + encoded('UTF-16LE'))
yield('UTF-16', ['BOM', 'LE', 'noenc'],
bom16LE + encoded('UTF-16LE', makeDecl() + docText))
yield('UTF-16', ['declaration', 'BE'],
encoded('UTF-16BE', makeDecl('UTF-16') + docText))
yield('UTF-16', ['declaration', 'LE'],
encoded('UTF-16LE', makeDecl('UTF-16') + docText))
# Standard wide encodings
try:
yield('ISO-10646-UCS-2', ['BOM', 'declaration', 'BE'],
bom16BE + encoded('UCS-2BE', makeDecl('ISO-10646-UCS-2') + docText))
yield('ISO-10646-UCS-2', ['BOM', 'declaration', 'LE'],
bom16LE + encoded('UCS-2LE', makeDecl('ISO-10646-UCS-2') + docText))
yield('UTF-32', ['BOM', 'declaration', 'BE'],
bom32BE + encoded('UTF-32BE', makeDecl('UTF-32') + docText))
yield('UTF-32', ['BOM', 'declaration', 'LE'],
bom32LE + encoded('UTF-32LE', makeDecl('UTF-32') + docText))
yield('UTF-32', ['declaration', 'BE'],
encoded('UTF-32BE', makeDecl('UTF-32') + docText))
yield('UTF-32', ['declaration', 'LE'],
encoded('UTF-32LE', makeDecl('UTF-32') + docText))
yield('ISO-10646-UCS-4', ['BOM', 'declaration', 'BE'],
bom32BE + encoded('UCS-4BE', makeDecl('ISO-10646-UCS-4') + docText))
yield('ISO-10646-UCS-4', ['BOM', 'declaration', 'LE'],
bom32LE + encoded('UCS-4LE', makeDecl('ISO-10646-UCS-4') + docText))
except LookupError, e:
print e
someFailed = True
# Encodings that don't have BOMs, and require declarations
withDeclarations = [
# Common ASCII-compatible encodings
'US-ASCII', 'ISO-8859-1', 'ISO-8859-15', 'WINDOWS-1252',
# EBCDIC
'IBM037', 'IBM038',
# Encodings with explicit endianness
'UTF-16BE', 'UTF-16LE',
'UTF-32BE', 'UTF-32LE',
# (UCS doesn't seem to define endian'd encodings)
]
for enc in withDeclarations:
try:
yield(enc, ['declaration'], encoded(enc, makeDecl(enc) + docText))
except LookupError, e:
print e
someFailed = True
# 10646-UCS encodings, with no BOM but with a declaration
try:
yield('ISO-10646-UCS-2', ['declaration', 'BE'],
encoded('UCS-2BE', makeDecl('ISO-10646-UCS-2') + docText))
yield('ISO-10646-UCS-2', ['declaration', 'LE'],
encoded('UCS-2LE', makeDecl('ISO-10646-UCS-2') + docText))
yield('ISO-10646-UCS-4', ['declaration', 'BE'],
encoded('UCS-4BE', makeDecl('ISO-10646-UCS-4') + docText))
yield('ISO-10646-UCS-4', ['declaration', 'LE'],
bom32LE + encoded('UCS-4LE', makeDecl('ISO-10646-UCS-4') + docText))
except LookupError, e:
print e
someFailed = True
# Files with aliases for declarations. The declared alias should be
# reported back, rather than the canonical form.
try:
yield('csUnicode', ['alias', 'BOM', 'BE'],
bom16BE + encoded('UCS-2BE', makeDecl('csUnicode') + docText))
yield('csUnicode', ['alias', 'LE'],
encoded('UCS-2LE', makeDecl('csUnicode') + docText))
yield('csucs4', ['alias', 'BE'],
encoded('csucs4', makeDecl('csucs4') + docText))
except LookupError, e:
print e
someFailed = True
if someFailed:
print "Unable to generate some tests; see README for details"
def genInvalidXmlTestCases():
# Invalid files
someFailed = False
# UTF-32 with a non-four-byte declaration
try:
yield('UTF-32', ['BOM', 'BE', 'declaration'],
encoded('UTF-32', makeDecl('US-ASCII') + docText))
except LookupError, e:
print e
someFailed = True
# UTF-16 with a non-two-byte declaration
yield('UTF-16', ['BOM', 'BE', 'declaration'],
encoded('UTF-16', makeDecl('UTF-8') + docText))
# UTF-16BE, with a BOM
yield('UTF-16BE', ['BOM', 'declaration'],
bom16BE + encoded('UTF-16BE', makeDecl('UTF-16BE') + docText))
# UTF-8, with a BOM, declaring US-ASCII
yield('UTF-8', ['BOM', 'declaration'],
bom8 + encoded('UTF-8', makeDecl('US-ASCII') + docText))
try:
# UTF-32, with a BOM, beginning without a declaration
yield('UTF-32', ['BOM', 'BE'],
bom32BE + encoded('UTF-32BE'))
# UTF-32, with a BOM, and a declaration with no encoding
yield('UTF-32', ['BOM', 'BE', 'noenc'],
bom32BE + encoded('UTF-32BE', makeDecl() + docText))
except LookupError, e:
print e
someFailed = True
# UTF-16, no BOM, no declaration
# yield('UTF-16', ['BE'], encoded('UTF-16BE'))
# This case falls through, and is identified as UTF-8; leave it out
# until we're doing decoding as well as detection.
if someFailed:
print "Unable to generate some tests; see README for details"
def genXmlTestCases():
for (enc, t, x) in genValidXmlTestCases():
yield (enc, t, x, True)
for (enc, t, x) in genInvalidXmlTestCases():
yield (enc, t, x, False)
def buildTestSuite():
import codecs
suite = unittest.TestSuite()
for (enc, t, x, valid) in genXmlTestCases():
t.sort()
if valid: pfx = 'valid_'
else: pfx = 'invalid_'
name = pfx + '_'.join([enc] + t) + '.xml'
# name, x is content
try:
alias = enc
if enc.startswith('ISO-10646-'):
alias = enc[10:]
c = codecs.lookup(alias)
if valid:
t = EncodingTestCase('testEncodingMatches')
t.expectedEncoding = enc
else:
t = EncodingTestCase('testEncodingFails')
t.filename = name
t.bytes = x
suite.addTest(t)
except LookupError,e:
print "Skipping " + name + ": " + str(e)
skippedNames.append(name)
return suite
if __name__ == "__main__":
s = buildTestSuite()
unittest.TextTestRunner().run(s)
if skippedNames:
print "Tests skipped:",len(skippedNames)
print "Please see README for details"
python-feedvalidator-0~svn1022/tests/testHowtoNs.py 0000755 0001750 0001750 00000002021 10601211034 020772 0 ustar pox pox #!/usr/bin/python
import os, sys, unittest
curdir = os.path.abspath(os.path.dirname(sys.argv[0]))
srcdir = os.path.split(curdir)[0]
if srcdir not in sys.path:
sys.path.insert(0, srcdir)
basedir = os.path.split(srcdir)[0]
from feedvalidator.base import namespaces
from os.path import dirname,join
class HowtoNsTest(unittest.TestCase):
def test_howto_declare_namespaces(self):
base=dirname(dirname(dirname(os.path.abspath(__file__))))
filename=join(join(join(base,'docs'),'howto'),'declare_namespaces.html')
handle=open(filename)
page=handle.read()
handle.close()
for uri,prefix in namespaces.items():
if prefix=='xml': continue
if prefix=='soap': continue
if uri.find('ModWiki')>0: continue
xmlns = 'xmlns:%s="%s"' % (prefix,uri)
self.assertTrue(page.find(xmlns)>=0,xmlns)
def buildTestSuite():
suite = unittest.TestSuite()
loader = unittest.TestLoader()
suite.addTest(loader.loadTestsFromTestCase(HowtoNsTest))
return suite
if __name__ == '__main__':
unittest.main()
python-feedvalidator-0~svn1022/ws-demo.py 0000755 0001750 0001750 00000003347 10071652502 016730 0 ustar pox pox #!/usr/bin/python
# This is a simple demo of validation through the web service.
WS_HOST = 'www.feedvalidator.org'
WS_URI = '/check.cgi'
import urllib, httplib
from xml.dom import minidom
from sys import exit
# Fetch the feed to validate
rawData = open('../testcases/rss/may/image_height_recommended.xml').read()
# Specify the content type, including the charset if known
hdrs = {'Content-Type': 'application/xml'}
# Simply POST the feed contents to the validator URL
connection=httplib.HTTPConnection(WS_HOST, 80)
connection.request('POST', WS_URI, rawData, hdrs)
response=connection.getresponse()
# The response is a SOAP message, as XML (otherwise there's a problem
# with the validator)
try:
document=minidom.parseString(response.read())
except:
print "Server error, unable to validate:",response.status,response.reason
print "(Unable to parse response as XML.)"
exit(20)
# If the status is OK, validation took place.
if response.status == 200:
errors = document.getElementsByTagName("text")
if not errors:
print "The feed is valid!"
exit(0)
else:
# Errors were found
for node in errors:
print "".join([child.data for child in node.childNodes])
exit(5)
# If there was a problem on the server, show details
elif response.status >= 500:
errors = document.getElementsByTagName("faultstring")
for node in errors:
print "".join([child.data for child in node.childNodes])
traceback = document.getElementsByTagNameNS("http://www.python.org/doc/current/lib/module-traceback.html", "traceback")
if traceback:
print "".join([child.data for child in traceback[0].childNodes])
exit(10)
# The unexpected happened...
else:
print "Unexpected server response:",response.status,response.reason
exit(20)
python-feedvalidator-0~svn1022/index.py 0000644 0001750 0001750 00000004071 10346044323 016455 0 ustar pox pox import feedvalidator
import sys
def escapeURL(url):
import cgi, urllib, urlparse
parts = map(urllib.quote, map(urllib.unquote, urlparse.urlparse(url)))
return cgi.escape(urlparse.urlunparse(parts))
def sanitizeURL(url):
# Allow feed: URIs, as described by draft-obasanjo-feed-URI-scheme-02
if url.lower().startswith('feed:'):
url = url[5:]
if url.startswith('//'):
url = 'http:' + url
if not url.split(':')[0].lower() in ['http','https']:
url = 'http://%s' % url
url = url.strip()
# strip user and password
import re
url = re.sub(r'^(\w*://)[-+.\w]*(:[-+.\w]+)?@', r'\1' ,url)
return url
def index(req,url="",out="xml"):
if not url:
s = """
Feed Validator
Enter the URL to validate:
"""
return s
url = sanitizeURL(url)
events = feedvalidator.validateURL(url, firstOccurrenceOnly=1)['loggedEvents']
# (optional) arg 2 is compatibility level
# "A" is most basic level
# "AA" mimics online validator
# "AAA" is experimental; these rules WILL change or disappear in future versions
from feedvalidator import compatibility
filter = "AA"
filterFunc = getattr(compatibility, filter)
events = filterFunc(events)
if out == "html":
s = "Validating " + escapeURL(url) + "...
"
from feedvalidator.formatter.text_plain import Formatter
output = Formatter(events)
if output:
s += "\n".join(output)
else:
s += "No errors or warnings"
s += "
"
return s
else:
from feedvalidator.formatter.text_xml import Formatter
s = "\n".join(Formatter(events)) or ""
s = '\n\n' + s + ""
req.content_type = "application/xml"
return s
if __name__=="__main__":
import sys
for url in sys.argv[1:]:
print index(0,url=url,out="html")
python-feedvalidator-0~svn1022/feedvalidator/ 0000755 0001750 0001750 00000000000 11065534350 017606 5 ustar pox pox python-feedvalidator-0~svn1022/feedvalidator/root.py 0000644 0001750 0001750 00000016342 11014372756 021155 0 ustar pox pox """$Id: root.py 1013 2008-05-19 21:49:34Z joe.walton.gglcd $"""
__author__ = "Sam Ruby and Mark Pilgrim "
__version__ = "$Revision: 1013 $"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
from base import validatorBase
rss11_namespace='http://purl.org/net/rss1.1#'
purl1_namespace='http://purl.org/rss/1.0/'
soap_namespace='http://feeds.archive.org/validator/'
pie_namespace='http://purl.org/atom/ns#'
atom_namespace='http://www.w3.org/2005/Atom'
opensearch_namespace='http://a9.com/-/spec/opensearch/1.1/'
xrds_namespace='xri://$xrds'
kml20_namespace='http://earth.google.com/kml/2.0'
kml21_namespace='http://earth.google.com/kml/2.1'
kml22_namespace='http://earth.google.com/kml/2.2'
#
# Main document.
# Supports rss, rdf, pie, kml, and ffkar
#
class root(validatorBase):
def __init__(self, parent, base):
validatorBase.__init__(self)
self.parent = parent
self.dispatcher = parent
self.name = "root"
self.xmlBase = base
self.xmlLang = None
def startElementNS(self, name, qname, attrs):
if name=='rss':
if qname:
from logging import InvalidNamespace
self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname}))
self.dispatcher.defaultNamespaces.append(qname)
if name=='feed' or name=='entry':
if self.namespace.has_key('atom'):
from logging import AvoidNamespacePrefix
self.log(AvoidNamespacePrefix({'prefix':'atom'}))
if self.namespace.has_key('xhtml'):
from logging import AvoidNamespacePrefix
self.log(AvoidNamespacePrefix({'prefix':'xhtml'}))
if qname==pie_namespace:
from logging import ObsoleteNamespace
self.log(ObsoleteNamespace({"element":"feed"}))
self.dispatcher.defaultNamespaces.append(pie_namespace)
from logging import TYPE_ATOM
self.setFeedType(TYPE_ATOM)
elif not qname:
from logging import MissingNamespace
self.log(MissingNamespace({"parent":"root", "element":name}))
else:
if name=='feed':
from logging import TYPE_ATOM
self.setFeedType(TYPE_ATOM)
else:
from logging import TYPE_ATOM_ENTRY
self.setFeedType(TYPE_ATOM_ENTRY)
self.dispatcher.defaultNamespaces.append(atom_namespace)
if qname<>atom_namespace:
from logging import InvalidNamespace
self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname}))
self.dispatcher.defaultNamespaces.append(qname)
if name=='Channel':
if not qname:
from logging import MissingNamespace
self.log(MissingNamespace({"parent":"root", "element":name}))
elif qname != rss11_namespace :
from logging import InvalidNamespace
self.log(InvalidNamespace({"parent":"root", "element":name, "namespace":qname}))
else:
self.dispatcher.defaultNamespaces.append(qname)
from logging import TYPE_RSS1
self.setFeedType(TYPE_RSS1)
if name=='kml':
from logging import TYPE_KML20, TYPE_KML21, TYPE_KML22
self.dispatcher.defaultNamespaces.append(qname)
if not qname:
from logging import MissingNamespace
self.log(MissingNamespace({"parent":"root", "element":name}))
qname = kml20_namespace
feedType = TYPE_KML20
elif qname == kml20_namespace:
feedType = TYPE_KML20
elif qname == kml21_namespace:
feedType = TYPE_KML21
elif qname == kml22_namespace:
feedType = TYPE_KML22
elif qname != kml20_namespace and qname != kml21_namespace and qname != kml22_namespace:
from logging import InvalidNamespace
self.log(InvalidNamespace({"element":name, "namespace":qname}))
qname = kml22_namespace
feedType = TYPE_KML22
self.setFeedType(feedType)
if name=='OpenSearchDescription':
if not qname:
from logging import MissingNamespace
self.log(MissingNamespace({"parent":"root", "element":name}))
qname = opensearch_namespace
elif qname != opensearch_namespace:
from logging import InvalidNamespace
self.log(InvalidNamespace({"element":name, "namespace":qname}))
self.dispatcher.defaultNamespaces.append(qname)
qname = opensearch_namespace
if name=='XRDS':
from logging import TYPE_XRD
self.setFeedType(TYPE_XRD)
if not qname:
from logging import MissingNamespace
self.log(MissingNamespace({"parent":"root", "element":name}))
qname = xrds_namespace
elif qname != xrds_namespace:
from logging import InvalidNamespace
self.log(InvalidNamespace({"element":name, "namespace":qname}))
self.dispatcher.defaultNamespaces.append(qname)
qname = xrds_namespace
validatorBase.startElementNS(self, name, qname, attrs)
def unknown_starttag(self, name, qname, attrs):
from logging import ObsoleteNamespace,InvalidNamespace,UndefinedElement
if qname in ['http://example.com/newformat#','http://purl.org/atom/ns#']:
self.log(ObsoleteNamespace({"element":name, "namespace":qname}))
elif name=='feed':
self.log(InvalidNamespace({"element":name, "namespace":qname}))
else:
self.log(UndefinedElement({"parent":"root", "element":name}))
from validators import any
return any(self, name, qname, attrs)
def do_rss(self):
from rss import rss
return rss()
def do_feed(self):
from feed import feed
if pie_namespace in self.dispatcher.defaultNamespaces:
from validators import eater
return eater()
return feed()
def do_entry(self):
from entry import entry
return entry()
def do_app_categories(self):
from logging import TYPE_APP_CATEGORIES
self.setFeedType(TYPE_APP_CATEGORIES)
from categories import categories
return categories()
def do_app_service(self):
from logging import TYPE_APP_SERVICE
self.setFeedType(TYPE_APP_SERVICE)
from service import service
return service()
def do_kml(self):
from kml import kml
return kml()
def do_opml(self):
from opml import opml
return opml()
def do_outlineDocument(self):
from logging import ObsoleteVersion
self.log(ObsoleteVersion({"element":"outlineDocument"}))
from opml import opml
return opml()
def do_opensearch_OpenSearchDescription(self):
import opensearch
self.dispatcher.defaultNamespaces.append(opensearch_namespace)
from logging import TYPE_OPENSEARCH
self.setFeedType(TYPE_OPENSEARCH)
return opensearch.OpenSearchDescription()
def do_xrds_XRDS(self):
from xrd import xrds
return xrds()
def do_rdf_RDF(self):
from rdf import rdf
self.dispatcher.defaultNamespaces.append(purl1_namespace)
return rdf()
def do_Channel(self):
from channel import rss10Channel
return rss10Channel()
def do_soap_Envelope(self):
return root(self, self.xmlBase)
def do_soap_Body(self):
self.dispatcher.defaultNamespaces.append(soap_namespace)
return root(self, self.xmlBase)
def do_request(self):
return root(self, self.xmlBase)
def do_xhtml_html(self):
from logging import UndefinedElement
self.log(UndefinedElement({"parent":"root", "element":"xhtml:html"}))
from validators import eater
return eater()
python-feedvalidator-0~svn1022/feedvalidator/entry.py 0000644 0001750 0001750 00000010160 10766017570 021326 0 ustar pox pox """$Id: entry.py 988 2008-03-12 18:22:48Z sa3ruby $"""
__author__ = "Sam Ruby and Mark Pilgrim "
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
from base import validatorBase
from validators import *
from logging import *
from itunes import itunes_item
from extension import extension_entry
#
# pie/echo entry element.
#
class entry(validatorBase, extension_entry, itunes_item):
def getExpectedAttrNames(self):
return [(u'http://www.w3.org/1999/02/22-rdf-syntax-ns#', u'parseType')]
def prevalidate(self):
self.links=[]
self.content=None
def validate(self):
if not 'title' in self.children:
self.log(MissingElement({"parent":self.name, "element":"title"}))
if not 'author' in self.children and not 'author' in self.parent.children:
self.log(MissingElement({"parent":self.name, "element":"author"}))
if not 'id' in self.children:
self.log(MissingElement({"parent":self.name, "element":"id"}))
if not 'updated' in self.children:
self.log(MissingElement({"parent":self.name, "element":"updated"}))
if self.content:
if not 'summary' in self.children:
if self.content.attrs.has_key((None,"src")):
self.log(MissingSummary({"parent":self.parent.name, "element":self.name}))
ctype = self.content.type
if ctype.find('/') > -1 and not (
ctype.endswith('+xml') or ctype.endswith('/xml') or
ctype.startswith('text/')):
self.log(MissingSummary({"parent":self.parent.name, "element":self.name}))
else:
if not 'summary' in self.children:
self.log(MissingTextualContent({"parent":self.parent.name, "element":self.name}))
for link in self.links:
if link.rel == 'alternate': break
else:
self.log(MissingContentOrAlternate({"parent":self.parent.name, "element":self.name}))
# can only have one alternate per type
types={}
for link in self.links:
if not link.rel=='alternate': continue
if not link.type in types: types[link.type]=[]
if link.hreflang in types[link.type]:
self.log(DuplicateAtomLink({"parent":self.name, "element":"link", "type":link.type, "hreflang":link.hreflang}))
else:
types[link.type] += [link.hreflang]
if self.itunes: itunes_item.validate(self)
def do_author(self):
from author import author
return author()
def do_category(self):
from category import category
return category()
def do_content(self):
from content import content
self.content=content()
return self.content, noduplicates()
def do_contributor(self):
from author import author
return author()
def do_id(self):
return canonicaluri(), nows(), noduplicates(), unique('id',self.parent,DuplicateEntries)
def do_link(self):
from link import link
self.links += [link()]
return self.links[-1]
def do_published(self):
return rfc3339(), nows(), noduplicates()
def do_source(self):
return source(), noduplicates()
def do_rights(self):
from content import textConstruct
return textConstruct(), noduplicates()
def do_summary(self):
from content import textConstruct
return textConstruct(), noduplicates()
def do_title(self):
from content import textConstruct
return textConstruct(), noduplicates()
def do_updated(self):
return rfc3339(), nows(), noduplicates(), unique('updated',self.parent,DuplicateUpdated)
def do_app_edited(self):
return rfc3339(), nows(), noduplicates()
def do_app_control(self):
return app_control(), noduplicates()
class app_control(validatorBase):
def do_app_draft(self):
return yesno(), noduplicates()
from feed import feed
class source(feed):
def missingElement(self, params):
self.log(MissingSourceElement(params))
def validate(self):
self.validate_metadata()
def do_author(self):
if not 'author' in self.parent.children:
self.parent.children.append('author')
return feed.do_author(self)
def do_entry(self):
self.log(UndefinedElement({"parent":self.name, "element":"entry"}))
return eater()
python-feedvalidator-0~svn1022/feedvalidator/rss.py 0000644 0001750 0001750 00000003204 10766017570 020775 0 ustar pox pox """$Id: rss.py 988 2008-03-12 18:22:48Z sa3ruby $"""
__author__ = "Sam Ruby and Mark Pilgrim "
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
from base import validatorBase
from logging import *
from validators import noduplicates
#
# Rss element. The only valid child element is "channel"
#
class rss(validatorBase):
def do_channel(self):
from channel import rss20Channel
return rss20Channel(), noduplicates()
def do_access_restriction(self):
from extension import access_restriction
return access_restriction(), noduplicates()
def getExpectedAttrNames(self):
return [(None, u'version')]
def prevalidate(self):
self.setFeedType(TYPE_RSS2) # could be anything in the 0.9x family, don't really care
self.version = "2.0"
if (None,'version') not in self.attrs.getNames():
self.log(MissingAttribute({"parent":self.parent.name, "element":self.name, "attr":"version"}))
elif [e for e in self.dispatcher.loggedEvents if e.__class__==ValidDoctype]:
self.version = self.attrs[(None,'version')]
if self.attrs[(None,'version')]<>'0.91':
self.log(InvalidDoctype({"parent":self.parent.name, "element":self.name, "attr":"version"}))
else:
self.version = self.attrs[(None,'version')]
if self.version not in ['0.91', '0.92', '2.0']:
self.log(InvalidRSSVersion({"parent":self.parent.name, "element":self.name, "value":self.version}))
def validate(self):
if not "channel" in self.children:
self.log(MissingElement({"parent":self.name, "element":"channel"}))
python-feedvalidator-0~svn1022/feedvalidator/category.py 0000644 0001750 0001750 00000001351 10766017570 022004 0 ustar pox pox """$Id: category.py 988 2008-03-12 18:22:48Z sa3ruby $"""
__author__ = "Sam Ruby and Mark Pilgrim "
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
from base import validatorBase
from validators import *
#
# author element.
#
class category(validatorBase):
def getExpectedAttrNames(self):
return [(None,u'term'),(None,u'scheme'),(None,u'label')]
def prevalidate(self):
self.children.append(True) # force warnings about "mixed" content
self.validate_required_attribute((None,'term'), nonblank)
self.validate_optional_attribute((None,'scheme'), rfc3987_full)
self.validate_optional_attribute((None,'label'), nonhtml)
python-feedvalidator-0~svn1022/feedvalidator/service.py 0000644 0001750 0001750 00000003130 10720344740 021614 0 ustar pox pox from base import validatorBase
from validators import *
from extension import extension_everywhere
class service(validatorBase, extension_everywhere):
def getExpectedAttrNames(self):
return [] # (None,u'scheme'),(None,u'fixed')]
def validate(self):
if not "app_workspace" in self.children:
self.log(MissingElement({"parent":self.name, "element":"app:workspace"}))
def do_app_workspace(self):
return workspace()
class workspace(validatorBase, extension_everywhere):
def validate(self):
if not "atom_title" in self.children:
self.log(MissingElement({"parent":self.name, "element":"atom:title"}))
def do_app_collection(self):
return collection()
def do_atom_title(self):
from content import textConstruct
return textConstruct(), noduplicates()
class collection(validatorBase, extension_everywhere):
def getExpectedAttrNames(self):
return [(None,u'href')]
def prevalidate(self):
self.validate_required_attribute((None,'href'), rfc3987)
def validate(self):
if not "atom_title" in self.children:
self.log(MissingElement({"parent":self.name, "element":"atom:title"}))
def do_atom_title(self):
from content import textConstruct
return textConstruct(), noduplicates()
def do_title(self):
from root import atom_namespace
assert(atom_namespace in self.dispatcher.defaultNamespaces)
self.child = 'atom_title'
return self.do_atom_title()
def do_app_categories(self):
from categories import categories
return categories()
def do_app_accept(self):
from categories import categories
return MediaRange()
python-feedvalidator-0~svn1022/feedvalidator/skipHours.py 0000644 0001750 0001750 00000002646 10766017570 022166 0 ustar pox pox """$Id: skipHours.py 988 2008-03-12 18:22:48Z sa3ruby $"""
__author__ = "Sam Ruby and Mark Pilgrim "
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
from base import validatorBase
from validators import text
from logging import *
#
# skipHours element
#
class skipHours(validatorBase):
def __init__(self):
self.hours = []
validatorBase.__init__(self)
def validate(self):
if "hour" not in self.children:
self.log(MissingElement({"parent":self.name, "element":"hour"}))
if len(self.children) > 24:
self.log(NotEnoughHoursInTheDay({}))
def do_hour(self):
return hour()
class hour(text):
def validate(self):
try:
h = int(self.value)
if h in self.parent.hours or (h in [0,24] and 24-h in self.parent.hours):
self.log(DuplicateValue({"parent":self.parent.name, "element":self.name, "value":self.value}))
if (h < 0) or (h > 23):
raise ValueError
else:
self.parent.hours.append(h)
self.log(ValidHour({"parent":self.parent.name, "element":self.name, "value":self.value}))
except ValueError:
if self.value == '24':
self.log(UseZeroForMidnight({"parent":self.parent.name, "element":self.name, "value":self.value}))
else:
self.log(InvalidHour({"parent":self.parent.name, "element":self.name, "value":self.value}))
python-feedvalidator-0~svn1022/feedvalidator/skipDays.py 0000644 0001750 0001750 00000002315 10766017570 021757 0 ustar pox pox """$Id: skipDays.py 988 2008-03-12 18:22:48Z sa3ruby $"""
__author__ = "Sam Ruby and Mark Pilgrim "
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
from base import validatorBase
from validators import text
from logging import *
#
# skipDays element
#
class skipDays(validatorBase):
def __init__(self):
self.days = []
validatorBase.__init__(self)
def validate(self):
if "day" not in self.children:
self.log(MissingElement({"parent":self.name, "element":"day"}))
if len(self.children) > 7:
self.log(EightDaysAWeek({}))
def do_day(self):
return day()
class day(text):
def validate(self):
if self.value not in ('Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'):
self.log(InvalidDay({"parent":self.parent.name, "element":self.name, "value":self.value}))
elif self.value in self.parent.days:
self.log(DuplicateValue({"parent":self.parent.name, "element":self.name, "value":self.value}))
else:
self.parent.days.append(self.value)
self.log(ValidDay({"parent":self.parent.name, "element":self.name, "value":self.value}))
python-feedvalidator-0~svn1022/feedvalidator/mediaTypes.py 0000644 0001750 0001750 00000010160 10766017570 022271 0 ustar pox pox """
$Id: mediaTypes.py 988 2008-03-12 18:22:48Z sa3ruby $
This module deals with valid internet media types for feeds.
"""
__author__ = "Joseph Walton "
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2004 Joseph Walton"
from cgi import parse_header
from logging import *
FEED_TYPES = [
'text/xml', 'application/xml', 'application/rss+xml', 'application/rdf+xml',
'application/atom+xml', 'text/x-opml', 'application/xrds+xml',
'application/opensearchdescription+xml', 'application/vnd.google-earth.kml+xml', 'application/vnd.google-earth.kmz',
'application/atomsvc+xml', 'application/atomcat+xml',
]
# Is the Content-Type correct?
def checkValid(contentType, loggedEvents):
(mediaType, params) = parse_header(contentType)
if mediaType.lower() not in FEED_TYPES:
loggedEvents.append(UnexpectedContentType({"type": "Feeds", "contentType": mediaType}))
if 'charset' in params:
charset = params['charset']
else:
charset = None
return (mediaType, charset)
# Warn about mismatches between media type and feed version
def checkAgainstFeedType(mediaType, feedType, loggedEvents):
mtl = mediaType.lower()
if mtl in ['application/x.atom+xml', 'application/atom+xml']:
if feedType not in [TYPE_ATOM, TYPE_ATOM_ENTRY]:
loggedEvents.append(UnexpectedContentType({"type": 'Non-Atom 1.0 feeds', "contentType": mediaType}))
elif mtl == 'application/atomcat+xml':
if feedType != TYPE_APP_CATEGORIES:
loggedEvents.append(UnexpectedContentType({"type": 'Non-AtomPub Category document', "contentType": mediaType}))
elif mtl == 'application/atomsvc+xml':
if feedType != TYPE_APP_SERVICE:
loggedEvents.append(UnexpectedContentType({"type": 'Non-AtomPub Service document', "contentType": mediaType}))
elif mtl == 'application/rdf+xml':
if feedType != TYPE_RSS1:
loggedEvents.append(UnexpectedContentType({"type": 'Non-RSS 1.0 feeds', "contentType": mediaType}))
elif mtl == 'application/rss+xml':
if feedType not in [TYPE_RSS1, TYPE_RSS2]:
loggedEvents.append(UnexpectedContentType({"type": 'Non-RSS feeds', "contentType": mediaType}))
elif mtl == 'text/x-opml':
if feedType not in [TYPE_OPML]:
loggedEvents.append(UnexpectedContentType({"type": 'Non-OPML feeds', "contentType": mediaType}))
elif mtl == 'application/opensearchdescription+xml':
if feedType not in [TYPE_OPENSEARCH]:
loggedEvents.append(UnexpectedContentType({"type": 'Non-OpenSearchDescription documents', "contentType": mediaType}))
elif mtl == 'application/xrds+xml':
if feedType not in [TYPE_XRD]:
loggedEvents.append(UnexpectedContentType({"type": 'Non-Extensible Resource Descriptor documents', "contentType": mediaType}))
elif mtl == 'application/vnd.google-earth.kml+xml':
if feedType not in [TYPE_KML20, TYPE_KML21, TYPE_KML22]:
loggedEvents.append(UnexpectedContentType({"type": 'Non-KML documents', "contentType": mediaType}))
elif mtl == 'application/earthviewer':
loggedEvents.append(InvalidKmlMediaType({"type": 'Non-KML documents', "contentType": mediaType}))
# warn if a non-specific media type is used without a 'marker'
def contentSniffing(mediaType, rawdata, loggedEvents):
if mediaType not in FEED_TYPES: return
if mediaType == 'application/atom+xml': return
if mediaType == 'application/atomcat+xml': return
if mediaType == 'application/atomsvc+xml': return
if mediaType == 'application/rss+xml': return
if mediaType == 'text/x-opml': return
if mediaType == 'application/opensearchdescription+xml': return
if mediaType == 'application/xrds+xml': return
if mediaType == 'application/vnd.google-earth.kml+xml': return
block = rawdata[:512]
if block.find('= 0: return
if block.find('= 0: return
if block.find('= 0: return
if block.find('= 0: return
if block.find('= 0: return
if (block.find('=0 and
block.find('http://www.w3.org/1999/02/22-rdf-syntax-ns#') >= 0 and
block.find( 'http://purl.org/rss/1.0/')): return
from logging import NonSpecificMediaType
loggedEvents.append(NonSpecificMediaType({"contentType": mediaType}))
python-feedvalidator-0~svn1022/feedvalidator/compatibility.py 0000644 0001750 0001750 00000002145 10766017570 023042 0 ustar pox pox """$Id: compatibility.py 988 2008-03-12 18:22:48Z sa3ruby $"""
__author__ = "Sam Ruby and Mark Pilgrim "
__version__ = "$Revision: 988 $"
__copyright__ = "Copyright (c) 2002 Sam Ruby and Mark Pilgrim"
from logging import *
def _must(event):
return isinstance(event, Error)
def _should(event):
return isinstance(event, Warning)
def _may(event):
return isinstance(event, Info)
def A(events):
return [event for event in events if _must(event)]
def AA(events):
return [event for event in events if _must(event) or _should(event)]
def AAA(events):
return [event for event in events if _must(event) or _should(event) or _may(event)]
def AAAA(events):
return events
def analyze(events, rawdata):
block = rawdata[0:512].strip().upper()
if block.startswith('