pyfeed-0.7.4/0000755000175000017500000000000010415555336012665 5ustar stevehastevehapyfeed-0.7.4/feed/0000755000175000017500000000000010415552046013563 5ustar stevehastevehapyfeed-0.7.4/feed/date/0000755000175000017500000000000010415552156014502 5ustar stevehastevehapyfeed-0.7.4/feed/date/__init__.py0000644000175000017500000000000010407575434016606 0ustar stevehastevehapyfeed-0.7.4/feed/date/rfc822.py0000644000175000017500000003021510415531426016060 0ustar stevehasteveha# feed.date.rfc822 -- conversion functions for RFC 822 timestamps # This is the BSD license. For more information, see: # http://www.opensource.org/licenses/bsd-license.php # # Copyright (c) 2006, Steve R. Hastings # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # * Neither the name of Steve R. Hastings nor the names # of any contributors may be used to endorse or promote products # derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ Conversion functions to handle RFC 822 timestamp format. These functions actually handle the extended RFC 822 format used in RSS 2.0; four-digit years are permitted (and are the default). "tf" is short for "time float", a float being used as a time value (seconds since the epoch). Always store tf values as UTC values, not local time values. A TF of 0.0 means the epoch in UTC. Please send questions, comments, and bug reports to: pyfeed@langri.com """ import re import time from calendar import timegm from feed.date.tools import tf_utc from feed.date.tools import parse_time_offset module_name = "feed.date.rfc822" module_version = "0.7.4" module_banner = "%s version %s" % (module_name, module_version) # NOTES ON TIME CONVERSIONS # # Most of the time, the tf values will be UTC (aka GMT or Zulu time) # values. Timestamp strings come complete with time offset specifiers, # so when you convert a timestamp to a tf, the time offset will cause an # adjustment to the tf to make it a UTC value. # # Then, we use Python's time conversion functions that work on UTC # values, so we don't get any adjustments for local time. # # Finally, when actually formatting the timestamp string for output, we # calculate the adjustment for the offset value. If you print a # timestamp value with a "Z" offset value, you get no adjustment; if you # use "-0800", you get an 8 hour adjustment; and so on. # # These two timestamps both represent the same time: # # Wed, 31 Dec 1969 16:00:01 -0800 # Thu, 01 Jan 1970 00:00:01 GMT # # They are both a tf of 1.0. def cleanup_time_offset(time_offset): """ Given a time offset, return a time offset in a consistent format. If the offset is for UTC, always return a "GMT". Otherwise, return offset in this format: "(+|-)hh:mm" """ secs = parse_time_offset(time_offset) if secs == 0: return "GMT" return s_time_offset_from_secs(secs) _s_format_rfc822 = "%a, %d %b %Y %H:%M:%S" def timestamp_from_tf(tf, time_offset=None): """ Take a tf and return a timestamp string. Arguments: tf a floating-point UTC time value, seconds since the epoch. time_offset a string specifying an offset from UTC. Examples: z or Z -- offset is 0 ("Zulu" time, UTC, aka GMT) PST -- 8 hours earlier than UTC (Pacific Standard Time) -0800 -- 8 hours earlier than UTC "" -- empty string is technically not legal, but may work Notes: Returned string is extended RFC 822 with 4-digit year. Example: "Tue, 10 Jun 2003 09:41:01 GMT" """ if tf is None: return "" if time_offset is None: time_offset = s_offset_default # converting from tf to timestamp so *add* time offset tf += parse_time_offset(time_offset) try: s = time.strftime(_s_format_rfc822, time.gmtime(tf)) except ValueError: return "" % tf return "%s %s" % (s, time_offset) # date recognition pattern # This is *extremely* permissive as to what it accepts! # Long form regular expression with lots of comments. _pat_rfc822 = re.compile(r""" (\d\d?) # match one or two digits: the date \W* # any non-alpha, or even nothing (example: 06Jan) (\w\w\w)\w* # match multiple alpha (example: Jan) \s* # any whitespace, or even nothing (example: Jan2006) (\d\d|\d\d\d\d) # two or four digits: the year \D+ # any non-digit, at least one (\d\d)\D(\d\d)\D(\d\d) # hours, mins, secs, separated by any non-digit ([.,]\d+)? # optional fractional seconds (American decimal or Euro ",") \s* # optional whitespace (\S+) # at least one non-whitespace: the timezone offset """, re.X) _s_date_parse_format = "%d %b %Y %H:%M:%S" def tf_from_timestamp(s_timestamp): """ Take a RFC 882 timestamp string and return a time float value. timestamp example: "Tue, 10 Jun 2003 09:41:01 GMT" timestamp example: "10 Jun 2003 01:41:01 -0800" Note: according to RFC 822, weekday is optional. This function ignores the weekday value if present. The weekday can't change the date anyway. """ # We want to be able to accept inputs that might be a little sloppy. # # strptime() has a rather fragile parser. So, we will first clean # up and reformat the input string so that it is in exactly the # correct format to make strptime() happy. s_timestamp = s_timestamp.lstrip().rstrip() try: m = _pat_rfc822.search(s_timestamp) s_mday = m.group(1) s_mon = m.group(2) s_year = m.group(3) s_hour = m.group(4) s_min = m.group(5) s_sec = m.group(6) s_zone_offset = m.group(8) # convert two-digit year to four digits if len(s_year) == 2: y = int(s_year) if 32 <= y <= 99: s_year = "19" + s_year else: s_year = "20" + s_year # build string in perfect format s_date = "%s %s %s %s:%s:%s" % \ (s_mday, s_mon, s_year, s_hour, s_min, s_sec) tup = time.strptime(s_date, _s_date_parse_format) # calendar.timegm() is like time.mktime() but doesn't adjust # from local to UTC; it just converts to a tf. tf = timegm(tup) # Use time offset from timestamp to adjust from UTC to correct. # If s_zone_offset is "GMT", "UTC", or "Z", offset is 0. # converting from timestamp to tf so *subtract* time offset tf -= parse_time_offset(s_zone_offset) except: return None return float(tf) def s_time_offset_from_secs(secs): """ Return a string with offset from UTC in RFC 882 format, from secs. """ if secs > 0: sign = "+" else: sign = "-" secs = abs(secs) offset_hour = secs // (60 * 60) offset_min = (secs // 60) % 60 return "%s%02d%02d" % (sign, offset_hour, offset_min) def local_time_offset(): """ Return a string with local offset from UTC in RFC 882 format. """ # If tf is set to local time in seconds since the epoch, then... # ...offset is the value you add to tf to get UTC. This is the # reverse of time.timezone or time.altzone. if time.daylight: secs_offset = -(time.altzone) else: secs_offset = -(time.timezone) return s_time_offset_from_secs(secs_offset) s_offset_local = local_time_offset() offset_default = 0 s_offset_default = "" def set_default_time_offset(s): global offset_default global s_offset_default offset_default = parse_time_offset(s) s_offset_default = s set_default_time_offset(s_offset_local) if __name__ == "__main__": failed_tests = 0 def self_test(message): """ Check to see if a test failed; if so, print warnings. message: string to print on test failure Implicit arguments: failed_tests -- count of failed tests; will be incremented correct -- the expected result of the test result -- the actual result of the test """ global failed_tests if result != correct: failed_tests += 1 print module_banner print "test failed:", message print " correct:", correct print " result: ", result print # The default is to make time stamps in local time with appropriate # offset; for the tests, we want a "GMT" offset default instead. set_default_time_offset("GMT") # Test: convert current time into a timestamp string and back tf_now = tf_utc() # timestamp format does not allow fractional seconds correct = float(int(tf_now)) # truncate any fractional seconds s = timestamp_from_tf(correct) result = tf_from_timestamp(s) self_test("convert tf to timestamp and back 0") # Test: convert a timestamp string to a time value and back correct = "Tue, 10 Jun 2003 04:00:00 GMT" tf = tf_from_timestamp(correct) result = timestamp_from_tf(tf) self_test("convert timestamp to tf and back 0") # Test: convert a timestamp string to a time value and back s_test = "Tue, 10 Jun 2003 00:00:00-0800" tf = tf_from_timestamp(s_test) result = timestamp_from_tf(tf) correct = "Tue, 10 Jun 2003 08:00:00 GMT" self_test("convert timestamp to tf and back 1") # Test: convert a timestamp string to a time value and back correct = "Wed, 08 Mar 2006 13:30:56 PST" tf = tf_from_timestamp(correct) result = timestamp_from_tf(tf, "PST") self_test("convert timestamp to tf and back 2") # Test: convert a timestamp string to a time value and back correct = "Wed, 14 Jun 2006 13:30:56 PDT" tf = tf_from_timestamp(correct) result = timestamp_from_tf(tf, "PDT") self_test("convert timestamp to tf and back 3") # Test: convert a timestamp string to a time value and back correct = "Wed, 07 Jun 2006 13:30:56 PDT" s_test = "Wed,7 Jun 06 13:30:56 PDT" tf = tf_from_timestamp(s_test) result = timestamp_from_tf(tf, "PDT") self_test("convert timestamp to tf and back 4") # Test: convert a timestamp string to a time value and back correct = "Fri, 08 Mar 1996 13:30:56 PDT" s_test = "8 Mar 96 13:30:56 PDT" tf = tf_from_timestamp(s_test) result = timestamp_from_tf(tf, "PDT") self_test("convert timestamp to tf and back 5") # Test: convert a timestamp string to a time value and back correct = "Fri, 08 Mar 1996 13:30:56 PDT" s_test = "8 Mar 96 13:30:56.00 PDT" tf = tf_from_timestamp(s_test) result = timestamp_from_tf(tf, "PDT") self_test("convert timestamp to tf and back 6: fractional seconds") # Test: convert a timestamp string to a time value and back correct = "Fri, 08 Mar 1996 13:30:56 PDT" s_test = "8 Mar\t96\v13:30:56.00PDT" tf = tf_from_timestamp(s_test) result = timestamp_from_tf(tf, "PDT") self_test("convert timestamp to tf and back 7: bizarre whitespace") # Test: convert a tf to a a timestamp string correct = "Fri, 07 Apr 2006 11:38:34 -0700" result = timestamp_from_tf(1144435114, "-0700") self_test("convert tf to timestamp 0") from sys import exit s_module = module_name + " " + module_version if failed_tests == 0: print s_module + ": self-test: all tests succeeded!" exit(0) elif failed_tests == 1: print s_module + " self-test: 1 test failed." exit(1) else: print s_module + " self-test: %d tests failed." % failed_tests exit(1) pyfeed-0.7.4/feed/date/rfc3339.py0000644000175000017500000002265710415533667016172 0ustar stevehasteveha# feed.date.rfc3339 -- conversion functions for RFC 3339 timestamps # This is the BSD license. For more information, see: # http://www.opensource.org/licenses/bsd-license.php # # Copyright (c) 2006, Steve R. Hastings # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # * Neither the name of Steve R. Hastings nor the names # of any contributors may be used to endorse or promote products # derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ Conversion functions to handle RFC 3339 timestamp format. RFC 3339 format is used in Atom syndication feeds. "tf" is short for "time float", a float being used as a time value (seconds since the epoch). Always store tf values as UTC values, not local time values. A TF of 0.0 means the epoch in UTC. Please send questions, comments, and bug reports to: pyfeed@langri.com """ import re import time from calendar import timegm from feed.date.tools import tf_utc from feed.date.tools import parse_time_offset module_name = "feed.date.rfc3339" module_version = "0.7.4" module_banner = "%s version %s" % (module_name, module_version) # NOTES ON TIME CONVERSIONS # # Most of the time, the tf values will be UTC (aka GMT or Zulu time) # values. Timestamp strings come complete with time offset specifiers, # so when you convert a timestamp to a tf, the time offset will cause an # adjustment to the tf to make it a UTC value. # # Then, we use Python's time conversion functions that work on UTC # values, so we don't get any adjustments for local time. # # Finally, when actually formatting the timestamp string for output, we # calculate the adjustment for the offset value. If you print a # timestamp value with a "Z" offset value, you get no adjustment; if you # use "-0800", you get an 8 hour adjustment; and so on. # # These two timestamps both represent the same time: # # 1969-12-31T16:00:01-08:00 # 1970-01-01T00:00:01Z # # They are both a tf of 1.0. def cleanup_time_offset(time_offset): """ Given a time offset, return a time offset in a consistent format. If the offset is for UTC, always return a "Z". Otherwise, return offset in this format: "(+|-)hh:mm" """ secs = parse_time_offset(time_offset) if secs == 0: return "Z" return s_time_offset_from_secs(secs) _format_RFC3339 = "%Y-%m-%dT%H:%M:%S" def timestamp_from_tf(tf, time_offset=None): """ Format a time and offset into a string. Arguments: tf a floating-point time value, seconds since the epoch. time_offset a string specifying an offset from UTC. Examples: z or Z -- offset is 0 ("Zulu" time, UTC, aka GMT) -08:00 -- 8 hours earlier than UTC (Pacific time zone) "" -- empty string is technically not legal, but may work Notes: Returned string complies with RFC 3339. Example: 2003-12-13T18:30:02Z Example: 2003-12-13T18:30:02+02:00 """ if tf is None: return "" if time_offset is None: time_offset = s_offset_default # converting from tf to timestamp so *add* time offset tf += parse_time_offset(time_offset) try: s = time.strftime(_format_RFC3339, time.gmtime(tf)) except ValueError: return "" % tf return s + time_offset # date recognition pattern # This is *extremely* permissive as to what it accepts! # Long form regular expression with lots of comments. _pat_rfc3339 = re.compile(r""" (\d\d\d\d)\D+(\d\d)\D+(\d\d) # year month day, separated by non-digit \D+ # non-digit (\d\d?)\D+(\d\d)\D+(\d\d) # hour minute sec, separated by non-digit ([.,]\d+)? # optional fractional seconds (American decimal or Euro ",") \s* # optional whitespace (\w+|[-+]\d\d?\D*\d\d)? # time offset: letter(s), or +/- hours:minutes """, re.X) def tf_from_timestamp(timestamp): """ Take a RFC 3339 timestamp string and return a time float value. timestamp example: 2003-12-13T18:30:02Z timestamp example: 2003-12-13T18:30:02+02:00 Leaving off the suffix is technically not legal, but allowed. """ timestamp = timestamp.lstrip().rstrip() try: m = _pat_rfc3339.search(timestamp) year = int(m.group(1)) mon = int(m.group(2)) mday = int(m.group(3)) hour = int(m.group(4)) min = int(m.group(5)) sec = int(m.group(6)) s_zone_offset = m.group(8) tup = (year, mon, mday, hour, min, sec, -1, -1, 0) # calendar.timegm() is like time.mktime() but doesn't adjust # from local to UTC; it just converts to a tf. tf = timegm(tup) # Use time offset from timestamp to adjust from UTC to correct. # If s_zone_offset is "GMT", "UTC", or "Z", offset is 0. # converting from timestamp to tf so *subtract* time offset tf -= parse_time_offset(s_zone_offset) except: return None return float(tf) def s_time_offset_from_secs(secs): """ Return a string with offset from UTC in RFC3339 format, from secs. """ if secs > 0: sign = "+" else: sign = "-" secs = abs(secs) offset_hour = secs // (60 * 60) offset_min = (secs // 60) % 60 return "%s%02d:%02d" % (sign, offset_hour, offset_min) def local_time_offset(): """ Return a string with local offset from UTC in RFC3339 format. """ # If tf is set to local time in seconds since the epoch, then... # ...offset is the value you add to tf to get UTC. This is the # reverse of time.timezone or time.altzone. if time.daylight: secs_offset = -(time.altzone) else: secs_offset = -(time.timezone) return s_time_offset_from_secs(secs_offset) s_offset_local = local_time_offset() offset_default = 0 s_offset_default = "" def set_default_time_offset(s): global offset_default global s_offset_default offset_default = parse_time_offset(s) s_offset_default = s set_default_time_offset(s_offset_local) if __name__ == "__main__": failed_tests = 0 def self_test(message): """ Check to see if a test failed; if so, print warnings. message: string to print on test failure Implicit arguments: failed_tests -- count of failed tests; will be incremented correct -- the expected result of the test result -- the actual result of the test """ global failed_tests if result != correct: failed_tests += 1 print module_banner print "test failed:", message print " correct:", correct print " result: ", result print # The default is to make time stamps in local time with appropriate # offset; for the tests, we want a "Z" offset default instead. set_default_time_offset("Z") # Test: convert current time into a timestamp string and back tf_now = tf_utc() # timestamp format does not allow fractional seconds correct = float(int(tf_now)) # truncate any fractional seconds s = timestamp_from_tf(tf_now) result = tf_from_timestamp(s) self_test("convert tf to timestamp and back 0") # Test: convert a timestamp string to a time value and back correct = "2003-12-13T18:30:02-07:00" tf = tf_from_timestamp(correct) result = timestamp_from_tf(tf, "-07:00") self_test("convert timestamp to tf and back 0") # Test: convert a timestamp string to a time value and back s_test = "2003-06-10T00:00:00-08:00" tf = tf_from_timestamp(s_test) result = timestamp_from_tf(tf, "Z") correct = "2003-06-10T08:00:00Z" self_test("convert timestamp to tf and back 1") # Test: convert a tf to a a timestamp string correct = "2006-04-07T11:38:34-07:00" result = timestamp_from_tf(1144435114, "-07:00") self_test("convert tf to timestamp 0") from sys import exit s_module = module_name + " " + module_version if failed_tests == 0: print s_module + ": self-test: all tests succeeded!" exit(0) elif failed_tests == 1: print s_module + " self-test: 1 test failed." exit(1) else: print s_module + " self-test: %d tests failed." % failed_tests exit(1) pyfeed-0.7.4/feed/date/tools.py0000644000175000017500000001543510415370260016216 0ustar stevehasteveha# feed.date.tools -- miscellaneous useful date/time functions # This is the BSD license. For more information, see: # http://www.opensource.org/licenses/bsd-license.php # # Copyright (c) 2006, Steve R. Hastings # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # * Neither the name of Steve R. Hastings nor the names # of any contributors may be used to endorse or promote products # derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ Miscellaneous date/time functions. "tf" is short for "time float", a float being used as a time value (seconds since the epoch). Always store tf values as UTC values, not local time values. A TF of 0.0 means the epoch in UTC. Please send questions, comments, and bug reports to: pyfeed@langri.com """ import re import time module_name = "feed.date.tools" module_version = "0.7.4" module_banner = "%s version %s" % (module_name, module_version) # TF conversion functions def local_from_utc(tf): """ Take a time float with UTC time and return a tf with local time. """ return tf - time.timezone def utc_from_local(tf): """ Take a time float with local time and return a tf with UTC time. """ return tf + time.timezone def tf_local(): """ Return a time float with the current time in local time. """ return time.time() - time.timezone def tf_utc(): """ Return a time float with the current time in UTC time. """ return time.time() # _tz_offset_dict # Look up a time zone offset code and return an offset value. Offset # represents how many hours offset from UTC. _tz_offset_dict = { "ut": 0, "utc": 0, "gmt": 0, "z": 0, "et": -5, "est": -5, "edt": -4, "ct": -6, "cst": -6, "cdt": -5, "mt": -7, "mst": -7, "mdt": -6, "pt": -8, "pst": -8, "pdt": -7, "a": -1, "b": -2, "c": -3, "d": -4, "e": -5, "f": -6, "g": -7, "h": -8, "i": -9, "k": -10, "l": -11, "m": -12, "n": +1, "o": +2, "p": +3, "q": +4, "r": +5, "s": +6, "t": +7, "u": +8, "v": +9, "w": +10, "x": +11, "y": +12} _pat_time_offset = re.compile("([+-])(\d\d):?(\d\d?)?") def parse_time_offset(s): """ Given a time offset string, return the offset from UTC, in seconds. RSS allows any RFC822-compatible time offset, which includes many odd codes such as "EST", "PDT", "N", etc. This function understands them all, plus numeric ones like "-0800". """ # Python's time.strptime() function can understand numeric offset, # or text code, but not either one. if s is None: return 0 try: s = s.lstrip().rstrip().lower() except AttributeError: raise TypeError, "time offset must be a string" if s in _tz_offset_dict: return _tz_offset_dict[s] * 3600 m = _pat_time_offset.search(s) if not m: raise ValueError, "invalid time offset string" sign = m.group(1) offset_hour = int(m.group(2)) if m.group(3) is not None: offset_min = int(m.group(3)) else: offset_min = 0 offset = offset_hour * 3600 + offset_min * 60 if sign == "-": offset *= -1 return offset def tf_from_s(s): """ Return a time float from a date string. Try every format we know. """ from feed.date.rfc3339 import tf_from_timestamp as tf_from_rfc3339 from feed.date.rfc822 import tf_from_timestamp as tf_from_rfc822 tf = tf_from_rfc3339(s) if tf is not None: return tf tf = tf_from_rfc822(s) if tf is not None: return tf return None class TimeSeq(object): """ A class to generate a sequence of timestamps. Atom feed validators complain if multiple timestamps have the same value, so this provides a convenient way to set a bunch of timestamps all at least one second different from each other. """ def __init__(self, init_time=None): if init_time is None: self.tf = float(int(tf_utc())) else: self.tf = float(init_time) def next(self): tf = self.tf self.tf += 1.0 return tf if __name__ == "__main__": failed_tests = 0 def self_test(message): """ Check to see if a test failed; if so, print warnings. message: string to print on test failure Implicit arguments: failed_tests -- count of failed tests; will be incremented correct -- the expected result of the test result -- the actual result of the test """ global failed_tests if result != correct: failed_tests += 1 print module_banner print "test failed:", message print " correct:", correct print " result: ", result print correct = 1141607495.0 result = utc_from_local(local_from_utc(correct)) self_test("local/utc conversion test 0") correct = 1071340202.0 result = tf_from_s("2003-12-13T18:30:02Z") self_test("tf_from_s() test 0") correct = 1143183379.0 result = tf_from_s("2006-03-24 06:56:19.00Z") self_test("tf_from_s() test 1") correct = 1143142223.0 result = tf_from_s("Thu, 23 Mar 2006 11.30.23.00 PST") self_test("tf_from_s() test 2") from sys import exit s_module = module_name + " " + module_version if failed_tests == 0: print s_module + " self-test: all tests succeeded!" exit(0) elif failed_tests == 1: print s_module + " self-test: 1 test failed." exit(1) else: print s_module + " self-test: %d tests failed." % failed_tests exit(1) pyfeed-0.7.4/feed/__init__.py0000644000175000017500000000000010407575434015671 0ustar stevehastevehapyfeed-0.7.4/feed/opml1.py0000644000175000017500000002755410415370275015204 0ustar stevehasteveha# feed.opml1 -- OPML 1.0 XML creation library module # This is the BSD license. For more information, see: # http://www.opensource.org/licenses/bsd-license.php # # Copyright (c) 2006, Steve R. Hastings # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # * Neither the name of Steve R. Hastings nor the names # of any contributors may be used to endorse or promote products # derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ Module to make it really easy to work with OPML 1.0 XML data. You might want to start with the test cases at the end; see how they work, and then go back and look at the code in the module. Please send questions, comments, and bug reports to: pyfeed@langri.com """ import types from xe import * module_name = "feed.opml1" module_version = "0.7.4" module_banner = "%s version %s" % (module_name, module_version) import feed.date.rfc822 as rfc822 from feed.date.rfc822 import set_default_time_offset from feed.date.tools import tf_from_s class Timestamp(CustomTimestampElement): def __init__(self, tag_name, tf=None, time_offset=None): CustomTimestampElement.__init__(self, tag_name, tf, time_offset, rfc822.s_offset_default, rfc822.timestamp_from_tf, tf_from_s, rfc822.cleanup_time_offset) s_text = "text" s_type = "type" s_is_comment = "isComment" s_is_breakpoint = "isBreakpoint" class Title(TextElement): def __init__(self, text="title of OPML document goes here"): TextElement.__init__(self, "title", text) class DateCreated(Timestamp): def __init__(self, tf=None): Timestamp.__init__(self, "dateCreated", tf) class DateModified(Timestamp): def __init__(self, tf=None): Timestamp.__init__(self, "dateModified", tf) class OwnerName(TextElement): def __init__(self, text="owner name goes here"): TextElement.__init__(self, "ownerName", text) class OwnerEmail(TextElement): def __init__(self, text="owner email goes here"): TextElement.__init__(self, "ownerEmail", text) class ExpansionState(CustomElement): def __init__(self, exstate=None): # exstate: a list of integer values representing expansion state CustomElement.__init__(self, "expansionState", exstate, types.ListType) def check_value(self, value): if type(value) is not types.ListType: raise TypeError, "expansionState must be list of integers" for x in value: if type(x) is not types.IntType: raise TypeError, "expansionState must be list of integers" # check for invalid values if x < 0: raise ValueError, "negative numbers not allowed in list" return value def value_from_s(self, s): s = s.replace(",", " ") # replace each comma with a space lst = s.split() # split on any whitespace exstate = [] for s in lst: try: int_val = int(s) except ValueError: # it wasn't a valid integer so give up return None # check for invalid values if int_val < 0: # negative integer is not valid so give up return None exstate.append(int_val) return exstate def s_from_value(self): if self.value is None: return "" lst = [str(int_val) for int_val in self.value] return ", ".join(lst) class VertScrollState(IntElement): def __init__(self, value=0): IntElement.__init__(self, "vertScrollState", value, min=0) class WindowTop(IntElement): def __init__(self, value=0): IntElement.__init__(self, "windowTop", value, min=0) class WindowLeft(IntElement): def __init__(self, value=0): IntElement.__init__(self, "windowLeft", value, min=0) class WindowBottom(IntElement): def __init__(self, value=0): IntElement.__init__(self, "windowBottom", value, min=0) class WindowRight(IntElement): def __init__(self, value=0): IntElement.__init__(self, "windowRight", value, min=0) class Head(NestElement): def __init__(self): NestElement.__init__(self, "head") self.title = Title() self.date_created = DateCreated() self.date_modified = DateModified() self.owner_name = OwnerName() self.owner_email = OwnerEmail() self.expansion_state = ExpansionState() self.vert_scroll_state = VertScrollState() self.window_top = WindowTop() self.window_left = WindowLeft() self.window_bottom = WindowBottom() self.window_right = WindowRight() class Outline(ListElement): def __init__(self, text=""): """ Arguments: text -- text of this outline element Note: for the text is stored as an attribute called "text". This is a NestElement, not a TextElement! """ attr_names = [s_text, s_type, s_is_comment, s_is_breakpoint] ListElement.__init__(self, Outline, "outline", s_text, text, attr_names) self._direct_types = list(types.StringTypes) def direct(self, value): """ Handle direct assignment. Supported types for direct assignment: string """ assert self._direct_types == list(types.StringTypes) assert type(value) in self._direct_types if type(value) in types.StringTypes: self.attrs[s_text] = value else: raise ValueError, "value must be a string" class Body(ListElement): def __init__(self): ListElement.__init__(self, Outline, "body") class OPML(NestElement): def __init__(self): NestElement.__init__(self, "opml") self.attrs["version"] = "1.0" self.head = Head() self.body = Body() def Validate(self): # REVIEW: should do some real tests here assert self._parent == None return True def new_xmldoc_opml(): """ Creates a new XMLDoc() with an OPML() in it. Returns both as a tuple. Return a tuple: (opml, channel) """ xmldoc = XMLDoc() opml = OPML() xmldoc.root_element = opml return (xmldoc, opml) if __name__ == "__main__": failed_tests = 0 def self_test(message): """ Check to see if a test failed; if so, print warnings. message: string to print on test failure Implicit arguments: failed_tests -- count of failed tests; will be incremented correct -- the expected result of the test result -- the actual result of the test """ global failed_tests if result != correct: failed_tests += 1 print module_banner print "test failed:", message print " correct:", correct print " result: ", result print def diff(s0, s1): """ Compare two strings, line by line; return a report on any differences. """ from difflib import ndiff lst0 = s0.split("\n") lst1 = s1.split("\n") report = '\n'.join(ndiff(lst0, lst1)) return report def self_test_diff(message): """ Check to see if a test failed; if so, print a diff. message: string to print on test failure Implicit arguments: failed_tests -- count of failed tests; will be incremented correct -- the expected result of the test result -- the actual result of the test """ global failed_tests if result != correct: failed_tests += 1 print module_banner print "test case failed, diff follows:" print diff(correct, result) print # Since this file is indented using spaces, let's indent our test # code using spaces too so it will compare right. set_indent_str(" ") # The default is to make time stamps using local time offset; # for the tests, we want a "GMT" offset default instead. set_default_time_offset("GMT") # Test: generate a trivial OPML doc # # http://www.atomenabled.org/developers/syndication/#sampleFeed correct = """\ Silly test of OPML Mon, 20 Mar 2006 22:40:08 GMT Tue, 21 Mar 2006 01:23:12 GMT J. Random Guy jrandom@example.com 1, 3, 4 1 61 304 562 842 """ xmldoc, opml = new_xmldoc_opml() opml.head.title = "Silly test of OPML" opml.head.date_created = "Mon, 20 Mar 2006 22:40:08 GMT" opml.head.date_modified = "Tue, 21 Mar 2006 01:23:12 GMT" opml.head.owner_name = "J. Random Guy" opml.head.owner_email = "jrandom@example.com" opml.head.expansion_state = "1, 3, 4" opml.head.expansion_state.value = [1, 3, 4] opml.head.vert_scroll_state = "1" opml.head.window_top = 61 opml.head.window_left = 304 opml.head.window_bottom = 562 opml.head.window_right = 842 outline = Outline("I. Intro") opml.body.append(outline) o = Outline("a. First") outline.append(o) o = Outline("b. Second") outline.append(o) o = Outline("c. Third") outline.append(o) o.append(Outline("0. Even more")) t = Outline() o.append(t) o[1] = "1. Even more still" result = str(xmldoc) self_test_diff("generate test document 0") # Test: verify that xmldoc.Validate() succeeds if not xmldoc.Validate(): failed_tests += 1 print "test case failed:" print "xmldoc.Validate() failed." from sys import exit s_module = module_name + " " + module_version if failed_tests == 0: print s_module + ": self-test: all tests succeeded!" exit(0) elif failed_tests == 1: print s_module + " self-test: 1 test failed." exit(1) else: print s_module + " self-test: %d tests failed." % failed_tests exit(1) pyfeed-0.7.4/feed/opml.py0000644000175000017500000004030010415370303015073 0ustar stevehasteveha# feed.opml -- OPML XML creation library module # This is the BSD license. For more information, see: # http://www.opensource.org/licenses/bsd-license.php # # Copyright (c) 2006, Steve R. Hastings # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # * Neither the name of Steve R. Hastings nor the names # of any contributors may be used to endorse or promote products # derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ Module to make it really easy to create OPML XML data. This module is intended to be current with the most recent version of OPML. For OPML 1.0, please use feed.opml1 instead. You might want to start with the test cases at the end; see how they work, and then go back and look at the code in the module. Please send questions, comments, and bug reports to: pyfeed@langri.com """ import types from xe import * module_name = "feed.opml" module_version = "0.7.4" module_banner = "%s version %s" % (module_name, module_version) import feed.date.rfc822 as rfc822 from feed.date.rfc822 import set_default_time_offset from feed.date.tools import tf_from_s class Timestamp(CustomTimestampElement): def __init__(self, tag_name, tf=None, time_offset=None): CustomTimestampElement.__init__(self, tag_name, tf, time_offset, rfc822.s_offset_default, rfc822.timestamp_from_tf, tf_from_s, rfc822.cleanup_time_offset) s_text = "text" s_type = "type" s_is_comment = "isComment" s_is_breakpoint = "isBreakpoint" s_created = "created" s_category = "category" s_xml_url = "xmlUrl" s_html_url = "htmlUrl" s_url = "url" s_title = "title" s_description = "description" s_language = "language" s_version = "version" class Title(TextElement): def __init__(self, text="title of OPML document goes here"): TextElement.__init__(self, "title", text) class DateCreated(Timestamp): def __init__(self, tf=None): Timestamp.__init__(self, "dateCreated", tf) class DateModified(Timestamp): def __init__(self, tf=None): Timestamp.__init__(self, "dateModified", tf) class OwnerName(TextElement): def __init__(self, text="owner name goes here"): TextElement.__init__(self, "ownerName", text) class OwnerEmail(TextElement): def __init__(self, text="owner@example.com"): TextElement.__init__(self, "ownerEmail", text) class OwnerID(TextElement): def __init__(self, text="http://www.example.com/contact_me.html"): TextElement.__init__(self, "ownerId", text) class Docs(TextElement): def __init__(self, text="http://www.opml.org/spec2/"): TextElement.__init__(self, "docs", text) class ExpansionState(CustomElement): def __init__(self, exstate=None): # exstate: a list of integer values representing expansion state CustomElement.__init__(self, "expansionState", exstate, types.ListType) def check_value(self, value): if type(value) is not types.ListType: raise TypeError, "expansionState must be list of integers" for x in value: if type(x) is not types.IntType: raise TypeError, "expansionState must be list of integers" # check for invalid values if x < 0: raise ValueError, "negative numbers not allowed in list" return value def value_from_s(self, s): s = s.replace(",", " ") # replace each comma with a space lst = s.split() # split on any whitespace exstate = [] for s in lst: try: int_val = int(s) except ValueError: # it wasn't a valid integer so give up return None # check for invalid values if int_val < 0: # negative integer is not valid so give up return None exstate.append(int_val) return exstate def s_from_value(self): if self.value is None: return "" lst = [str(int_val) for int_val in self.value] return ", ".join(lst) class VertScrollState(IntElement): def __init__(self, value=None): IntElement.__init__(self, "vertScrollState", value, min=0) class WindowTop(IntElement): def __init__(self, value=None): IntElement.__init__(self, "windowTop", value, min=0) class WindowLeft(IntElement): def __init__(self, value=None): IntElement.__init__(self, "windowLeft", value, min=0) class WindowBottom(IntElement): def __init__(self, value=None): IntElement.__init__(self, "windowBottom", value, min=0) class WindowRight(IntElement): def __init__(self, value=None): IntElement.__init__(self, "windowRight", value, min=0) class Head(NestElement): def __init__(self): NestElement.__init__(self, "head") self.title = Title() self.date_created = DateCreated() self.date_modified = DateModified() self.owner_name = OwnerName() self.owner_email = OwnerEmail() self.owner_id = OwnerID() self.docs = Docs() self.expansion_state = ExpansionState() self.vert_scroll_state = VertScrollState() self.window_top = WindowTop() self.window_left = WindowLeft() self.window_bottom = WindowBottom() self.window_right = WindowRight() class OutlineTimestamp(object): def __init__(self, value=None): if value is None: # default to current time created self.tf = rfc822.tf_utc() else: self.set(value) self.time_offset = rfc822.s_offset_default def __cmp__(self, o): return cmp(self.tf, o.tf) def __str__(self): if self.tf: return rfc822.timestamp_from_tf(self.tf, self.time_offset) return "" def update(self): self.tf = rfc822.tf_utc() def set(self, value): if type(value) is types.FloatType: self.tf = value elif type(value) in types.StringTypes: self.tf = value tf = rfc822.tf_from_timestamp(value) if tf: self.tf = tf else: raise ValueError, "string must be valid timestamp" else: raise TypeError, \ "must be time float or valid timestamp string" class OutlineAttrs(Attrs): def __init__(self, text="", created=None): Attrs.__init__(self) self.__setitem__(s_text, text) self.__setitem__(s_created, OutlineTimestamp(created)) attr_names = [s_text, s_type, s_is_comment, s_is_breakpoint, s_created, s_category, s_xml_url, s_html_url, s_url, s_title, s_description, s_language, s_version] self.set_names(attr_names) def __cmp__(self, o): return cmp(self._attrs_dict, o._attrs_dict) def __getitem__(self, k): if k == s_created: return self._attrs_dict[s_created] Attrs.__getitem__(self, k) def __setitem__(self, k, value): if k == s_created: try: created = self._attrs_dict[s_created] created.set(value) return except KeyError: pass # fall through to Attrs.__setitem__() Attrs.__setitem__(self, k, value) class Outline(ListElement): def __init__(self, text="", created=None): """ Arguments: text -- text of this outline element Note: for the text is stored as an attribute called "text". This is a NestElement, not a TextElement! """ ListElement.__init__(self, Outline, "outline") self._lock = False self.attrs = OutlineAttrs(text, created) self._lock = True self._direct_types = list(types.StringTypes) def direct(self, value): """ Handle direct assignment. Supported types for direct assignment: string """ assert self._direct_types == list(types.StringTypes) assert type(value) in self._direct_types if type(value) in types.StringTypes: self.attrs[s_text] = value else: raise ValueError, "value must be a string" class Body(ListElement): def __init__(self): ListElement.__init__(self, Outline, "body") self._flags.show_when_empty = True class OPML(NestElement): def __init__(self): NestElement.__init__(self, "opml") self.attrs["version"] = "2.0" self.head = Head() self.body = Body() def Validate(self): # REVIEW: should do some real tests here assert self._parent == None return True def new_xmldoc_opml(): """ Creates a new XMLDoc() with an OPML() in it. Returns both as a tuple. Return a tuple: (opml, channel) """ xmldoc = XMLDoc() opml = OPML() xmldoc.root_element = opml return (xmldoc, opml) if __name__ == "__main__": failed_tests = 0 def self_test(message): """ Check to see if a test failed; if so, print warnings. message: string to print on test failure Implicit arguments: failed_tests -- count of failed tests; will be incremented correct -- the expected result of the test result -- the actual result of the test """ global failed_tests if result != correct: failed_tests += 1 print module_banner print "test failed:", message print " correct:", correct print " result: ", result print def diff(s0, s1): """ Compare two strings, line by line; return a report on any differences. """ from difflib import ndiff lst0 = s0.split("\n") lst1 = s1.split("\n") report = '\n'.join(ndiff(lst0, lst1)) return report def self_test_diff(message): """ Check to see if a test failed; if so, print a diff. message: string to print on test failure Implicit arguments: failed_tests -- count of failed tests; will be incremented correct -- the expected result of the test result -- the actual result of the test """ global failed_tests if result != correct: failed_tests += 1 print module_banner print "test case failed, diff follows:" print diff(correct, result) print # Since this file is indented using spaces, let's indent our test # code using spaces too so it will compare right. set_indent_str(" ") # The default is to make time stamps using local time offset; # for the tests, we want a "GMT" offset default instead. set_default_time_offset("GMT") # Test: generate a test OPML doc correct = """\ Silly test of OPML Mon, 20 Mar 2006 22:40:08 GMT Tue, 21 Mar 2006 01:23:12 GMT J. Random Guy jrandom@example.com http://www.example.com/contact_me.html http://www.opml.org/spec2/ 1, 3, 4 1 61 304 562 842 """ xmldoc, opml = new_xmldoc_opml() opml.head.title = "Silly test of OPML" opml.head.date_created = "Mon, 20 Mar 2006 22:40:08 GMT" opml.head.date_modified = "Tue, 21 Mar 2006 01:23:12 GMT" opml.head.owner_name = "J. Random Guy" opml.head.owner_email = "jrandom@example.com" opml.head.owner_id = "http://www.example.com/contact_me.html" opml.head.expansion_state = "1, 3, 4" opml.head.vert_scroll_state = "1" opml.head.window_top = 61 opml.head.window_left = 304 opml.head.window_bottom = 562 opml.head.window_right = 842 outline = Outline("I. Intro", created=0.0) opml.body.append(outline) o = Outline("a. First", created=0.0) outline.append(o) o = Outline("b. Second", created=0.0) outline.append(o) o = Outline("c. Third", created=0.0) outline.append(o) o.append(Outline("0. Even more", created=0.0)) t = Outline() o.append(t) o[1] = "1. Even more still" o[1].attrs["created"] = 0.0 o = Outline("Test", created=0.0) o.attrs["isComment"] = "false" o.attrs["isBreakpoint"] = "false" outline.append(o) o = Outline("The Mets are the best team in baseball.") o.attrs["created"] = "Mon, 31 Oct 2005 18:21:33 GMT" o.attrs["category"] = "/Philosophy/Baseball/Mets,/Tourism/New York" outline.append(o) o = Outline("CNET News.com", 0.0) o.attrs["description"] = "Tech news and stuff." o.attrs["htmlUrl"] = "http://news.com.com/" o.attrs["language"] = "unknown" o.attrs["title"] = "CNET News.com" o.attrs["type"] = "rss" o.attrs["version"] = "RSS2" o.attrs["xmlUrl"] = "http://news.com.com/2547-1_3-0-5.xml" outline.append(o) result = str(xmldoc) self_test_diff("generate test document 0") # Test: verify that xmldoc.Validate() succeeds if not xmldoc.Validate(): failed_tests += 1 print "test case failed:" print "xmldoc.Validate() failed." # Test: update a few timestamps opml.head.date_modified.update() outline.attrs["created"].update() from sys import exit s_module = module_name + " " + module_version if failed_tests == 0: print s_module + ": self-test: all tests succeeded!" exit(0) elif failed_tests == 1: print s_module + " self-test: 1 test failed." exit(1) else: print s_module + " self-test: %d tests failed." % failed_tests exit(1) pyfeed-0.7.4/feed/rss.py0000644000175000017500000005150110415370312014740 0ustar stevehasteveha# feed.rss -- RSS 2.0 feed creation library module # This is the BSD license. For more information, see: # http://www.opensource.org/licenses/bsd-license.php # # Copyright (c) 2006, Steve R. Hastings # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # * Neither the name of Steve R. Hastings nor the names # of any contributors may be used to endorse or promote products # derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ Module to make it really easy to work with RSS 2.0 syndication feeds. http://blogs.law.harvard.edu/tech/rss You might want to start with the test cases at the end; see how they work, and then go back and look at the code in the module. Please send questions, comments, and bug reports to: pyfeed@langri.com """ import types from xe import * module_name = "feed.rss" module_version = "0.7.4" module_banner = "%s version %s" % (module_name, module_version) class Title(TextElement): def __init__(self, text=""): TextElement.__init__(self, "title", text) class Link(TextElement): def __init__(self, text=""): TextElement.__init__(self, "link", text) class CommentsLink(TextElement): def __init__(self, text=""): TextElement.__init__(self, "comments", text) class Description(TextElement): def __init__(self, text=""): TextElement.__init__(self, "description", text) class Language(TextElement): def __init__(self, text=""): TextElement.__init__(self, "language", text) class Copyright(TextElement): def __init__(self, text=""): TextElement.__init__(self, "copyright", text) class ManagingEditor(TextElement): def __init__(self, text=""): TextElement.__init__(self, "managingEditor", text) class WebMaster(TextElement): def __init__(self, text=""): TextElement.__init__(self, "webMaster", text) class Category(TextElement): def __init__(self, text="", domain=""): TextElement.__init__(self, "category", text) self.attrs[s_domain] = domain class Generator(TextElement): def __init__(self, text=""): TextElement.__init__(self, "generator", text) class Docs(TextElement): def __init__(self, text="http://blogs.law.harvard.edu/tech/rss"): TextElement.__init__(self, "docs", text) s_domain = "domain" s_port = "port" s_path = "path" s_regproc = "registerProcedure" s_protocol = "protocol" tup_protocols = ("xml-rpc", "soap", "http-post") class Cloud(TextElement): def _set_attrs(self, domain, port, path, regproc, protocol): tup = (domain, port, path, regproc, protocol) for s in tup: if s is not None: break else: return # okay, at least one arg was specified for s in tup: if s is port and type(s) == types.IntType: continue if type(s) not in types.StringTypes: raise TypeError, "bad argument type: " + str(s) if not s: raise ValueError, "must specify all five args" if protocol not in tup_protocols: raise ValueError, \ 'protocol must be one of: "xml-rpc", "soap", or "http-post"' self.attrs[s_domain] = domain self.attrs[s_port] = str(port) self.attrs[s_path] = path self.attrs[s_regproc] = regproc self.attrs[s_protocol] = protocol def __init__(self, domain=None, port=None, path=None, regproc=None, protocol=None): """ Set cloud attributes. All five must be specified. Arguments: domain -- domain name or IP address of the cloud port -- TCP port upon which the cloud is running path -- location of the cloud's responder regproc -- name of the procedure to call to request notification protocol -- protocol is "xml-rpc", "soap", or "http-post" protocol is case-sensitive. """ lst = [s_domain, s_port, s_path, s_regproc, s_protocol] TextElement.__init__(self, "cloud", "", attr_names=lst) self._set_attrs(domain, port, path, regproc, protocol) def set(self, domain, port, path, regproc, protocol): """ Set cloud attributes. All five must be specified. Arguments: domain -- domain name or IP address of the cloud port -- TCP port upon which the cloud is running path -- location of the cloud's responder regproc -- name of the procedure to call to request notification protocol -- protocol is "xml-rpc", "soap", or "http-post" protocol is case-sensitive. """ self._set_attrs(domain, port, path, regproc, protocol) import feed.date.rfc822 as rfc822 from feed.date.rfc822 import set_default_time_offset from feed.date.tools import tf_from_s class Timestamp(CustomTimestampElement): def __init__(self, tag_name, tf=None, time_offset=None): CustomTimestampElement.__init__(self, tag_name, tf, time_offset, rfc822.s_offset_default, rfc822.timestamp_from_tf, tf_from_s, rfc822.cleanup_time_offset) class PubDate(Timestamp): def __init__(self, tf=None): Timestamp.__init__(self, "pubDate", tf) class LastBuildDate(Timestamp): def __init__(self, tf=None): Timestamp.__init__(self, "lastBuildDate", tf) class TTL(IntElement): def __init__(self, value=None): IntElement.__init__(self, "ttl", value, min=0) class ImageUrl(TextElement): def __init__(self, text=""): TextElement.__init__(self, "url", text) class ImageTitle(TextElement): def __init__(self, text=""): TextElement.__init__(self, "title", text) class ImageLink(TextElement): def __init__(self, text=""): TextElement.__init__(self, "link", text) class ImageWidth(IntElement): def __init__(self, value=88): IntElement.__init__(self, "width", value, min=0, max=144) class ImageHeight(IntElement): def __init__(self, value=31): IntElement.__init__(self, "height", value, min=0, max=400) class ImageDescription(TextElement): def __init__(self, text=""): TextElement.__init__(self, "description", text) class Image(NestElement): def __init__(self, url="", title="", link="", width=None, height=None, description=""): NestElement.__init__(self, "image") self.url = ImageUrl(url) self.title = ImageTitle(title) self.link = ImageLink(link) self.width = ImageWidth(width) self.height = ImageHeight(height) self.description = ImageDescription(description) class Rating(TextElement): def __init__(self, text=""): TextElement.__init__(self, "rating", text) class TITitle(TextElement): def __init__(self, text=""): TextElement.__init__(self, "title", text) class TIDescription(TextElement): def __init__(self, text=""): TextElement.__init__(self, "description", text) class TIName(TextElement): def __init__(self, text=""): TextElement.__init__(self, "name", text) class TILink(TextElement): def __init__(self, text=""): TextElement.__init__(self, "link", text) class TextInput(NestElement): def __init__(self, title="", description="", name="", link=""): NestElement.__init__(self, "textInput") self.title = TITitle(title) self.description = TIDescription(description) self.name = TIName(name) self.link = TILink(link) class Hour(IntElement): def __init__(self, value=0): IntElement.__init__(self, "hour", value, min=0, max=23) class Day(CustomElement): # Monday==0, etc. to match the tm_wday values in the time module _days = {"monday":0, "tuesday":1, "wednesday":2, \ "thursday":3, "friday":4, "saturday":5, "sunday":6} _day_names = ["Monday", "Tuesday", "Wednesday", \ "Thursday", "Friday", "Saturday", "Sunday"] def __init__(self, value=None): CustomElement.__init__(self, "day", value, types.IntType) def check_value(self, value): try: value = int(value) except (TypeError, ValueError): raise TypeError, "day index must be an integer" if not 0 <= value <= 6: raise ValueError, "day index must be in range 0..6" return value def value_from_s(self, s): try: value = Day._days[s.lower()] except (AttributeError, KeyError): raise ValueError, \ 'can only set to valid day name ("Monday", etc.)' return value def s_from_value(self): if self.value is None: return "" return Day._day_names[self.value] class SkipHours(ListElement): def __init__(self): ListElement.__init__(self, Hour, "skipHours") self._flags.unique_values = True self._flags.sorted = True class SkipDays(ListElement): def __init__(self): ListElement.__init__(self, Day, "skipDays") self._flags.unique_values = True self._flags.sorted = True class Author(TextElement): def __init__(self, text=""): TextElement.__init__(self, "author", text) class Channel(NestElement): def __init__(self): NestElement.__init__(self, "channel") self.title = Title("title of feed goes here") self.link = Link("URL link to feed goes here") self.description = Description("description of feed goes here") self.language = Language() self.copyright = Copyright() self.managing_editor = ManagingEditor() self.web_master = WebMaster() self.pub_date = PubDate() self.last_build_date = LastBuildDate() self.categories = Collection(Category) self.generator = Generator() self.docs = Docs() self.cloud = Cloud() self.ttl = TTL() self.image = Image() self.rating = Rating() self.text_input = TextInput() self.skip_hours = SkipHours() self.skip_days = SkipDays() self.items = Collection(Item) class EncUrl(TextElement): def __init__(self, text=""): TextElement.__init__(self, "url", text) class EncLength(IntElement): def __init__(self, value=None): IntElement.__init__(self, "length", value, min=0) class EncType(TextElement): def __init__(self, text=""): TextElement.__init__(self, "type", text) class Enclosure(NestElement): def __init__(self, url="", length=None, type=""): NestElement.__init__(self, "enclosure") self.url = EncUrl(url) self.length = EncLength(length) self.type = EncType(type) s_is_permalink = "isPermalink" class Guid(TextElement): """ Arguments: text -- the GUID as a text string. Atributes: isPermaLink -- "true" or "false" """ def __init__(self, text="", is_permalink=""): TextElement.__init__(self, "guid", text) if is_permalink not in ("", "true", "false"): raise ValueError, 'is_permalink must be "true" or "false"' self.attrs[s_is_permalink] = is_permalink s_url = "url" class Source(TextElement): def __init__(self, text="", url=""): TextElement.__init__(self, "source", text) if text and not url: raise ValueError, "must specify both text and url" self.attrs[s_url] = url def set(self, text, url): self.text = text self.attrs[s_url] = url class Item(NestElement): def __init__(self): NestElement.__init__(self, "item") self.title = Title() self.link = Link() self.description = Description() self.author = Author() self.categories = Collection(Category) self.comments = CommentsLink() self.enclosure = Enclosure() self.guid = Guid() self.pub_date = PubDate() self.source = Source() s_version = "version" class RSS(NestElement): def __init__(self): attr_names = [ s_version ] NestElement.__init__(self, "rss", s_version, "2.0", attr_names) def Validate(self): # REVIEW: should do some real tests here assert self._parent == None return True def new_xmldoc_channel(): """ Creates a new XMLDoc() with a Channel() in it. Returns both as a tuple. Return a tuple: (rss, channel) """ xmldoc = XMLDoc() rss = RSS() xmldoc.root_element = rss channel = Channel() channel.generator = module_banner rss.channel = channel return (xmldoc, channel) if __name__ == "__main__": failed_tests = 0 def self_test(message): """ Check to see if a test failed; if so, print warnings. message: string to print on test failure Implicit arguments: failed_tests -- count of failed tests; will be incremented correct -- the expected result of the test result -- the actual result of the test """ global failed_tests if result != correct: failed_tests += 1 print module_banner print "test failed:", message print " correct:", correct print " result: ", result print def diff(s0, s1): """ Compare two strings, line by line; return a report on any differences. """ from difflib import ndiff lst0 = s0.split("\n") lst1 = s1.split("\n") report = '\n'.join(ndiff(lst0, lst1)) return report def self_test_diff(message): """ Check to see if a test failed; if so, print a diff. message: string to print on test failure Implicit arguments: failed_tests -- count of failed tests; will be incremented correct -- the expected result of the test result -- the actual result of the test """ global failed_tests if result != correct: failed_tests += 1 print module_banner print "test case failed, diff follows:" print diff(correct, result) print # Since this file is indented using spaces, let's indent our test # code using spaces too so it will compare right. set_indent_str(" ") # The default is to make time stamps using local time offset; # for the tests, we want a "GMT" offset default instead. set_default_time_offset("GMT") # Test: generate an RSS doc correct = """\ Silly Test of RSS http://www.example.com/rss.xml Use every RSS element at least once. en-us Copyright 2006 by Steve R. Hastings. editor@example.com webmaster@example.com Tue, 21 Mar 2006 01:23:12 GMT Tue, 21 Mar 2006 01:20:03 GMT Examples PyFeed -- feed.rss module http://blogs.law.harvard.edu/tech/rss 60 http://example.com/image.jpg Example Image http://example.com 64 32 Silly Image Example (this is not a valid PICS rating string) My Text Input Box Silly input box Fred http://example.com/ 0 1 22 23 Friday Saturday Sunday Example Blog First Entry! http://www.example.com/blog/0 The official blog of example.com has begun.\ blogger@example.com Blog Entries blogger@example.com http://example.com/podcast/0.ogg 8115596 audio/ogg 0xDECAFBADDEADBEEFC0FFEE Tue, 21 Mar 2006 01:06:53 GMT Slashdot """ rss = RSS() channel = Channel() rss.channel = channel channel.title = "Silly Test of RSS" channel.link = "http://www.example.com/rss.xml" channel.description = "Use every RSS element at least once." channel.language = "en-us" channel.copyright = "Copyright 2006 by Steve R. Hastings." channel.managing_editor = "editor@example.com" channel.web_master = "webmaster@example.com" channel.pub_date = 1142904192.0 channel.last_build_date = "21 Mar 2006 01:20:03 GMT" channel.categories.append(Category("Examples")) channel.generator = "PyFeed -- feed.rss module" channel.cloud = Cloud("rpc.sys.com", 80, "/RPC2", "pingMe", "soap") channel.ttl = 60 channel.image = Image(url="http://example.com/image.jpg", title="Example Image", link="http://example.com", width=64, height=32, description="Silly Image Example") channel.rating = "(this is not a valid PICS rating string)" channel.skip_hours.append(Hour(0)) channel.skip_hours.append(Hour(23)) channel.skip_hours.append(Hour(22)) channel.skip_hours.append(Hour(1)) friday = Day().import_xml("Friday") channel.skip_days.append(friday) channel.skip_days.append(Day(6)) channel.skip_days.append(Day(6)) channel.skip_days.append(Day(6)) channel.skip_days.append(Day(6)) channel.skip_days.append(Day("Saturday")) channel.text_input = TextInput("My Text Input Box", "Silly input box", "Fred", "http://example.com/") item = Item() channel.items.append(item) item.title = "Example Blog First Entry!" item.link = "http://www.example.com/blog/0" item.description = "The official blog of example.com has begun." item.author = "blogger@example.com" item.categories.append(Category("Blog Entries")) item.comments = "blogger@example.com" item.enclosure = Enclosure("http://example.com/podcast/0.ogg", 8115596, "audio/ogg") item.guid = "0xDECAFBADDEADBEEFC0FFEE" item.guid.attrs["isPermalink"] = "false" item.pub_date = "21 Mar 2006 01:06:53 GMT" item.source = "Slashdot" item.source.attrs["url"] = "http://slashdot.org/" result = str(rss) self_test_diff("generate test feed 0") # Test: verify that rss.Validate() succeeds if not rss.Validate(): failed_tests += 1 print "test case failed:" print "rss.Validate() failed." from sys import exit s_module = module_name + " " + module_version if failed_tests == 0: print s_module + ": self-test: all tests succeeded!" exit(0) elif failed_tests == 1: print s_module + " self-test: 1 test failed." exit(1) else: print s_module + " self-test: %d tests failed." % failed_tests exit(1) pyfeed-0.7.4/feed/atom.py0000644000175000017500000002663510415370265015112 0ustar stevehasteveha# feed.atom -- Atom feed creation library module # This is the BSD license. For more information, see: # http://www.opensource.org/licenses/bsd-license.php # # Copyright (c) 2006, Steve R. Hastings # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # * Neither the name of Steve R. Hastings nor the names # of any contributors may be used to endorse or promote products # derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ Module to make it really easy to work with Atom syndication feeds. You might want to start with the test cases at the end; see how they work, and then go back and look at the code in the module. Please send questions, comments, and bug reports to: pyfeed@langri.com """ from xe import * module_name = "feed.atom" module_version = "0.7.4" module_banner = "%s version %s" % (module_name, module_version) # string constants # These string values are used in more than one place. s_href = "href" s_lang = "xml:lang" s_link = "link" s_term = "term" s_type = "type" class AtomText(TextElement): def __init__(self, tag_name, text=""): # legal values of type: "text", "html", "xhtml" # REVIEW: should add checker for values of "type" TextElement.__init__(self, tag_name, text, attr_names=[s_type]) class Title(AtomText): def __init__(self, text=""): AtomText.__init__(self, "title") self.text = text class Subtitle(AtomText): def __init__(self, text=""): AtomText.__init__(self, "subtitle") self.text = text class Content(AtomText): def __init__(self, text=""): AtomText.__init__(self, "content") self.text = text class Summary(AtomText): def __init__(self, text=""): AtomText.__init__(self, "summary") self.text = text class Rights(AtomText): def __init__(self, text=""): AtomText.__init__(self, "rights") self.text = text class Id(TextElement): def __init__(self, text=""): TextElement.__init__(self, "id", text) class Generator(TextElement): def __init__(self, text=""): TextElement.__init__(self, "generator", text, attr_names=["uri", "version"]) class Category(TextElement): def __init__(self, term_val=""): attr_names = [s_term, "scheme", "label"] TextElement.__init__(self, "category", "", s_term, term_val, attr_names) class Link(TextElement): def __init__(self, href_val=""): attr_names = [ s_href, "rel", "type", "hreflang", "title", "length", s_lang] TextElement.__init__(self, "link", "", s_href, href_val, attr_names) class Icon(TextElement): def __init__(self, text=""): TextElement.__init__(self, "icon", text) class Logo(TextElement): def __init__(self, text=""): TextElement.__init__(self, "logo", text) class Name(TextElement): def __init__(self, text=""): TextElement.__init__(self, "name", text) class Email(TextElement): def __init__(self, text=""): TextElement.__init__(self, "email", text) class Uri(TextElement): def __init__(self, text=""): TextElement.__init__(self, "uri", text) class BasicAuthor(NestElement): def __init__(self, tag_name, name): NestElement.__init__(self, tag_name) self.name = Name(name) self.email = Email() self.uri = Uri() class Author(BasicAuthor): def __init__(self, name=""): BasicAuthor.__init__(self, "author", name) class Contributor(BasicAuthor): def __init__(self, name=""): BasicAuthor.__init__(self, "contributor", name) import feed.date.rfc3339 as rfc3339 from feed.date.rfc3339 import set_default_time_offset class Timestamp(CustomTimestampElement): def __init__(self, tag_name, tf=None, time_offset=None): CustomTimestampElement.__init__(self, tag_name, tf, time_offset, rfc3339.s_offset_default, rfc3339.timestamp_from_tf, rfc3339.tf_from_timestamp, rfc3339.cleanup_time_offset) class Updated(Timestamp): def __init__(self, tf=None): Timestamp.__init__(self, "updated", tf) class Published(Timestamp): def __init__(self, tf=None): Timestamp.__init__(self, "published", tf) class FeedElement(NestElement): def __init__(self, tag_name): NestElement.__init__(self, tag_name) self.title = Title("") self.id = Id("") self.updated = Updated() self.authors = Collection(Author) self.links = Collection(Link) self.subtitle = Subtitle("") self.categories = Collection(Category) self.contributors = Collection(Contributor) self.generator = Generator() self.icon = Icon() self.logo = Logo() self.rights = Rights("") class Feed(FeedElement): def __init__(self): FeedElement.__init__(self, "feed") self.attrs["xmlns"] = "http://www.w3.org/2005/Atom" self.title.text = "Title of Feed Goes Here" self.id.text = "ID of Feed Goes Here" self.entries = Collection(Entry) class Source(FeedElement): def __init__(self): FeedElement.__init__(self, "source") class Entry(NestElement): def __init__(self): NestElement.__init__(self, "entry") self.title = Title("Title of Entry Goes Here") self.id = Id("ID of Entry Goes Here") self.updated = Updated() self.authors = Collection(Author) self.links = Collection(Link) self.content = Content("") self.summary = Summary("") self.categories = Collection(Category) self.contributors = Collection(Contributor) self.published = Published() self.rights = Rights("") self.source = Source() def new_xmldoc_feed(): """ Creates a new XMLDoc() with a Feed() in it. Returns both as a tuple. Return a tuple: (xmldoc, feed) """ xmldoc = XMLDoc() feed = Feed() feed.generator = module_banner xmldoc.root_element = feed return (xmldoc, feed) if __name__ == "__main__": failed_tests = 0 def self_test(message): """ Check to see if a test failed; if so, print warnings. message: string to print on test failure Implicit arguments: failed_tests -- count of failed tests; will be incremented correct -- the expected result of the test result -- the actual result of the test """ global failed_tests if result != correct: failed_tests += 1 print module_banner print "test failed:", message print " correct:", correct print " result: ", result print def diff(s0, s1): """ Compare two strings, line by line; return a report on any differences. """ from difflib import ndiff lst0 = s0.split("\n") lst1 = s1.split("\n") report = '\n'.join(ndiff(lst0, lst1)) return report def self_test_diff(message): """ Check to see if a test failed; if so, print a diff. message: string to print on test failure Implicit arguments: failed_tests -- count of failed tests; will be incremented correct -- the expected result of the test result -- the actual result of the test """ global failed_tests if result != correct: failed_tests += 1 print module_banner print "test case failed, diff follows:" print diff(correct, result) print # Since this file is indented using spaces, let's indent our test # code using spaces too so it will compare right. set_indent_str(" ") # The default is to make time stamps using local time offset; # for the tests, we want a "GMT" offset default instead. set_default_time_offset("Z") # Test: generate the "Atom-Powered Robots Run Amok" example # # Note: the original had some of the XML declarations in # a different order than atomfeed puts them. I swapped around # the lines here so they would match the atomfeed order. Other # than that, this is the example from: # # http://www.atomenabled.org/developers/syndication/#sampleFeed correct = """\ Example Feed urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6 2003-12-13T18:30:02Z John Doe Atom-Powered Robots Run Amok urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a 2003-12-13T18:30:02Z Some text. """ xmldoc = XMLDoc() feed = Feed() xmldoc.root_element = feed feed.title = "Example Feed" feed.id = "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6" feed.updated = "2003-12-13T18:30:02Z" link = Link("http://example.org/") feed.links.append(link) author = Author("John Doe") feed.authors.append(author) entry = Entry() feed.entries.append(entry) entry.id = "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a" entry.title = "Atom-Powered Robots Run Amok" entry.updated = "2003-12-13T18:30:02Z" entry.summary = "Some text." link = Link("http://example.org/2003/12/13/atom03") entry.links.append(link) result = str(xmldoc) self_test_diff("generate test feed 0") # Test: verify that xmldoc.Validate() succeeds if not xmldoc.Validate(): failed_tests += 1 print "test case failed:" print "xmldoc.Validate() failed." from sys import exit s_module = module_name + " " + module_version if failed_tests == 0: print s_module + ": self-test: all tests succeeded!" exit(0) elif failed_tests == 1: print s_module + " self-test: 1 test failed." exit(1) else: print s_module + " self-test: %d tests failed." % failed_tests exit(1) pyfeed-0.7.4/feed/tools.py0000644000175000017500000001670410410732427015303 0ustar stevehasteveha# feed.tools -- library functions useful in making syndication feeds # This is the BSD license. For more information, see: # http://www.opensource.org/licenses/bsd-license.php # # Copyright (c) 2006, Steve R. Hastings # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # # * Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # # * Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in # the documentation and/or other materials provided with the # distribution. # # * Neither the name of Steve R. Hastings nor the names # of any contributors may be used to endorse or promote products # derived from this software without specific prior written # permission. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS # IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED # TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A # PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER # OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF # LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING # NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """ Miscellaneous functions useful in making syndication feeds. Please send questions, comments, and bug reports to: pyfeed@langri.com """ import re import time module_name = "feed.tools" module_version = "0.7.1" module_banner = "%s version %s" % (module_name, module_version) _pat_nbsp = re.compile(r' ') def entities_to_ws(s): """ Return a copy of s with HTML whitespace entities replaced by a space. Currently just gets rid of HTML non-breaking spaces (" "). """ if not s: return s s = re.sub(_pat_nbsp, " ", s) return s def normalize_ws(s): """ Return a copy of string s with each run of whitespace replaced by one space. >>> s = "and now\n\n\nfor \t something\v completely\r\n different" >>> print normalize_ws(s) and now for something completely different >>> """ lst = s.split() s = " ".join(lst) return s def escape_html(s): """ Return a copy of string s with HTML codes escaped. This is useful when you want HTML tags printed literally, rather than interpreted. >>> print escape_html("") <head> >>> print escape_html(" ") &nbsp; """ s = s.replace("&", "&") s = s.replace("<", "<") s = s.replace(">", ">") return s def unescape_html(s): """ Return a copy of string s with HTML codes unescaped. Replaces HTML entities for ">", "<", and "&" with those characters. This is the reverse of escape_html(). >>> print unescape_html("<head>") >>> print unescape_html("&nbsp;")   """ s = s.replace(">", ">") s = s.replace("<", "<") s = s.replace("&", "&") return s s_copyright_multiyear = "Copyright %s %d-%d by %s." s_copyright_oneyear = "Copyright %s %d by %s." def s_copyright(s_owner, s_csym="(C)", end_year=None, start_year=None): """ Return a string with a copyright notice. s_owner string with copyright owner's name. s_csym string with copyright symbol. (An HTML entity might be good here.) end_year last year of the copyright. Default is the current year. start_year first year of the copyright. If only end_year is specified, only print one year; if both end_year and start_year are specified, print a range. To localize the entire copyright message into another language, change the global variables with the copyright template: s_copyright_multiyear: for a year range s_copyright_oneyear: for a single year """ if not end_year: end_year = time.localtime().tm_year if start_year: return s_copyright_multiyear % (s_csym, start_year, end_year, s_owner) return s_copyright_oneyear % (s_csym, end_year, s_owner) def create_guid(tf, domain_name, uri=""): """ Create globally unique ID using Mark Pilgrim's algorithm. Algorithm taken from here: http://diveintomark.org/archives/2004/05/28/howto-atom-id """ tup = time.localtime(tf) # ymd (year-month-day) example: 2003-12-13 ymd = time.strftime("%Y-%m-%d", tup) if uri == "": # mush (all mushed together) example: 20031213083000 mush = time.strftime("%Y%m%d%H%M%S", tup) uri = "/weblog/" + mush s = "tag:%s,%s:%s" % (domain_name, ymd, uri) s = s.replace("#", "/") return s if __name__ == "__main__": failed_tests = 0 def self_test(message): """ Check to see if a test failed; if so, print warnings. message: string to print on test failure Implicit arguments: failed_tests -- count of failed tests; will be incremented correct -- the expected result of the test result -- the actual result of the test """ global failed_tests if result != correct: failed_tests += 1 print module_banner print "test failed:", message print " correct:", correct print " result: ", result print result = entities_to_ws("nudge nudge say no more") correct = "nudge nudge say no more" self_test("entites_to_ws() test 0") s = "and now\n\n\nfor \t something\v completely\r\n different" result = normalize_ws(s) correct = "and now for something completely different" self_test("normalize_ws() test 0") result = s_copyright("J. Random Guy", "(C)", 1999, 1990) correct = "Copyright (C) 1990-1999 by J. Random Guy." self_test("s_copyright() test 0") s = """Cool example""" result = escape_html(s) correct = """<body><a href="http://www.example.com/">Cool&nbsp;example</a>""" self_test("escape_html() test 0") s = """<body><a href="http://www.example.com/">Cool&nbsp;example</a>""" correct = """Cool example""" result = unescape_html(s) self_test("unescape_html() test 0") correct = """Cool example""" result = unescape_html(escape_html(correct)) self_test("escape_html() test 1") tf = 1141607495 result = create_guid(tf, "www.example.com") correct = "tag:www.example.com,2006-03-05:/weblog/20060305171135" self_test("create_guid() test 0") from sys import exit s_module = module_name + " " + module_version if failed_tests == 0: print s_module + ": self-test: all tests succeeded!" exit(0) elif failed_tests == 1: print s_module + " self-test: 1 test failed." exit(1) else: print s_module + " self-test: %d tests failed." % failed_tests exit(1) pyfeed-0.7.4/setup.py0000644000175000017500000000063610415551421014373 0ustar stevehasteveha#!/usr/bin/env python from distutils.core import setup setup(name='PyFeed', version='0.7.4', description='Modules for working with syndication feeds', # long_description="""long description here""", license='BSD', author='Steve R. Hastings', author_email='steve@hastings.org', url='http://www.blarg.net/~steveha/pyfeed.html', packages=['feed', 'feed.date'], ) pyfeed-0.7.4/README.txt0000644000175000017500000001152510410624001014344 0ustar stevehastevehaTo install: python setup.py install For more information on Python distutils install: http://docs.python.org/inst/ This is the README.txt file that comes with the "PyFeed" family of Python library modules. These modules are useful for making syndication feeds. I have released these modules under the BSD license. Please see the comments at the beginning of each source file for the full text of the license. I would like to donate these modules to the Python Software Foundation. (Of course it's up to the PSF to decide whether they want them or not.) Here is a list of the modules, with notes on each. xe xe (short for "XML elements") is a Python library module that defines classes to work with XML elements in a Pythonic way. PyFeed depends heavily on xe; you must have xe installed to use PyFeed. xe is packaged separately and has its own installer. feed.date.tools This is a Python library module that defines some utility functions for working with Python time float values. feed.date.rfc3339 This is a Python library module with functions for converting timestamp strings in RFC 3339 format to Python time float values, and vice versa. RFC 3339 is the timestamp format used by the Atom feed syndication format. feed.date.rfc822 This is a Python library module with functions for converting timestamp strings in extended RFC 822 format to Python time float values, and vice versa. "Extended RFC 822" means the flavor of RFC 822 that is supported by RSS 2.0; the key extension is that years can be four digits (and this module defaults to writing four-digit years). feed.atom This is a Python library module designed to make it very easy to work with an Atom syndication feed. http://atomenabled.org/developers/syndication/ feed.atom is built on top of xe. It automatically manages the XML tree structure for you; you can just focus on the content you want to syndicate and let the module take care of the formatting. Take a look at the test cases at the end of the module, for example code showing how to set up an Atom feed with an entry. Also, here are a few short examples: To create an XML document with a feed in it, you do this: xmldoc = XMLDoc() feed = Feed() xmldoc.root_element = feed The above lines are so common I added a convenience function to make them into a one-liner: xmldoc, feed = new_xmldoc_feed() To assign an entry to a feed, you just do this: entry = Entry() # set up entry by assigning to its properties feed.entries.append(entry) This adds "entry" to the internal list that keeps track of entries. "entry" is now nested inside "feed", which is nested inside "xmldoc". When you want to save the XML in a file, you can just do this: f = open("file.xml", "w") s = str(xmldoc) f.write(s) f.write("\n") # write() doesn't add a newline on its own feed.rss This is a Python library module designed to make it very easy to work with an RSS 2.0 syndication feed. http://blogs.law.harvard.edu/tech/rss feed.rss is built on top of xe. It automatically manages the XML tree structure for you; you can just focus on the content you want to syndicate and let the module take care of the formatting. Take a look at the test cases at the end of the module, for example code showing how to set up an RSS feed with an item. Also, here are a few short examples: To create an XML document with a feed in it, you do this: xmldoc = XMLDoc() xmldoc.root_element = RSS() channel = Channel() xmldoc.root_element.channel = channel The above lines are so common I added a convenience function to make them into a one-liner: xmldoc, channel = new_xmldoc_channel() To assign an item to a channel, you just do this: item = Item() # set up item by assigning to its properties channel.items.append(item) This adds "item" to the internal list that keeps track of items. "item" is now nested inside "channel", which is nested inside "xmldoc". When you want to save the XML in a file, you can just do this: f = open("file.xml", "w") s = str(xmldoc) f.write(s) f.write("\n") # write() doesn't add a newline on its own feed.opml1 / feed.opml These are Python library modules designed to make it very easy to work with OPML data. opml1.py creates OPML 1.0 XML data; opml.py is intended to create the latest version of OPML (currently, 2.0). See the examples with feed.atom and feed.rss, above, to get an idea how to use these. As with those, there is a convenience function for the common case of: xmldoc, opml = new_xmldoc_opml() feed.tools This is a Python library module that defines some utility functions that are handy when you are generating a syndication feed. These functions are not specific to any particular syndication format. If you have any questions, comments, or bug reports about any of these modules, please contact me using this email address: pyfeed@langri.com I hope you will find these modules useful! Steve R. Hastings steve@hastings.org pyfeed-0.7.4/examples/0000755000175000017500000000000010415552300014467 5ustar stevehastevehapyfeed-0.7.4/examples/rss2sample.py0000644000175000017500000000601310414535776017155 0ustar stevehasteveha# rss2sample.py -- generate the rss2sample.xml file with rssfeed.py # # This will write a file called rss2sample.tmp, which should be # identical to the included rss2sample.xml file. # # This example is taken from here: # http://media-cyber.law.harvard.edu/blogs/gems/tech/rss2sample.xml # # The differences are only: # a) I changed the indenting from 3 spaces to 1 tab per indent. # b) I changed the order in which a few lines appear, to match what # rssfeed.py does by default. # # Steve R. Hastings # pyfeed@langri.com from feed.rss import * from feed.tools import escape_html set_default_time_offset("GMT") xmldoc, channel = new_xmldoc_channel() xmldoc.xml_decl.attrs["encoding"] = "" channel.title = "Liftoff News" channel.link = "http://liftoff.msfc.nasa.gov/" channel.description = "Liftoff to Space Exploration." channel.language = "en-us" channel.pub_date = "Tue, 10 Jun 2003 04:00:00 GMT" channel.last_build_date = "Tue, 10 Jun 2003 09:41:01 GMT" channel.generator = "Weblog Editor 2.0" channel.managing_editor = "editor@example.com" channel.web_master = "webmaster@example.com" item = Item() s = """Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a partial eclipse of the Sun on Saturday, May 31st.""" item.description = escape_html(s) item.pub_date = "Fri, 30 May 2003 11:06:42 GMT" item.guid = "http://liftoff.msfc.nasa.gov/2003/05/30.html#item572" channel.items.append(item) item = Item() item.title = "The Engine That Does More" item.link = "http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp" s = """Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that.""" item.description = escape_html(s) item.pub_date = "Tue, 27 May 2003 08:37:32 GMT" item.guid = "http://liftoff.msfc.nasa.gov/2003/05/27.html#item571" channel.items.append(item) item = Item() item.title = "Astronauts' Dirty Laundry" item.link = "http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp" s = """Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options.""" item.description = escape_html(s) item.pub_date = "Tue, 20 May 2003 08:56:02 GMT" item.guid = "http://liftoff.msfc.nasa.gov/2003/05/20.html#item570" channel.items.append(item) item = Item() item.title = "Star City" item.link = "http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp" s = """How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's Star City.""" item.description = escape_html(s) item.pub_date = "Tue, 03 Jun 2003 09:39:21 GMT" item.guid = "http://liftoff.msfc.nasa.gov/2003/06/03.html#item573" channel.items.insert(0, item) f = open("rss2sample.tmp", "w") s = str(xmldoc) f.write(s) f.write("\n") f.close() pyfeed-0.7.4/examples/rss2sample.xml0000644000175000017500000000445110405750130017310 0ustar stevehasteveha Liftoff News http://liftoff.msfc.nasa.gov/ Liftoff to Space Exploration. en-us editor@example.com webmaster@example.com Tue, 10 Jun 2003 04:00:00 GMT Tue, 10 Jun 2003 09:41:01 GMT Weblog Editor 2.0 http://blogs.law.harvard.edu/tech/rss Star City http://liftoff.msfc.nasa.gov/news/2003/news-starcity.asp How do Americans get ready to work with Russians aboard the International Space Station? They take a crash course in culture, language and protocol at Russia's <a href="http://howe.iki.rssi.ru/GCTC/gctc_e.htm">Star City</a>. http://liftoff.msfc.nasa.gov/2003/06/03.html#item573 Tue, 03 Jun 2003 09:39:21 GMT Sky watchers in Europe, Asia, and parts of Alaska and Canada will experience a <a href="http://science.nasa.gov/headlines/y2003/30may_solareclipse.htm">partial eclipse of the Sun</a> on Saturday, May 31st. http://liftoff.msfc.nasa.gov/2003/05/30.html#item572 Fri, 30 May 2003 11:06:42 GMT The Engine That Does More http://liftoff.msfc.nasa.gov/news/2003/news-VASIMR.asp Before man travels to Mars, NASA hopes to design new engines that will let us fly through the Solar System more quickly. The proposed VASIMR engine would do that. http://liftoff.msfc.nasa.gov/2003/05/27.html#item571 Tue, 27 May 2003 08:37:32 GMT Astronauts' Dirty Laundry http://liftoff.msfc.nasa.gov/news/2003/news-laundry.asp Compared to earlier spacecraft, the International Space Station has many luxuries, but laundry facilities are not one of them. Instead, astronauts have other options. http://liftoff.msfc.nasa.gov/2003/05/20.html#item570 Tue, 20 May 2003 08:56:02 GMT pyfeed-0.7.4/examples/rss_parse_example.py0000644000175000017500000000135210410337622020561 0ustar stevehastevehafrom feed.rss import * xmldoc, channel = new_xmldoc_channel() set_indent_str(" ") lst_errors = [] channel.import_xml("rss2sample.xml", lst_errors) if len(lst_errors) == 0: print "Import result: No errors!\n" else: print "Import result: Errors:\n" + "\n".join(lst_errors) + "\n" print "And here is some info from the channel:" print " title:", channel.title.text print " description:", channel.description.text print " item 0 title:", channel.items[0].title.text print " item 3 title:", channel.items[3].title.text title = Title() title.import_xml("""This is the second item""") channel.items[1].title = title print " item 1 title:", channel.items[1].title.text print channel.items[1] pyfeed-0.7.4/PKG-INFO0000644000175000017500000000041110415551370013750 0ustar stevehastevehaMetadata-Version: 1.0 Name: PyFeed Version: 0.7.4 Summary: Modules for working with syndication feeds Home-page: http://www.blarg.net/~steveha/pyfeed.html Author: Steve R. Hastings Author-email: pyfeed@langri.com License: BSD Description: UNKNOWN Platform: UNKNOWN pyfeed-0.7.4/psf.txt0000644000175000017500000000047710410617235014216 0ustar stevehastevehaI wish to donate xe and the PyFeed modules to the Python Software Foundation. I have released these modules under the BSD license, but I would be happy to license the modules to the PSF under another license (such as the Academic Freedom License) if that would help in any way. --Steve R. Hastings steve@hastings.org pyfeed-0.7.4/TODO0000644000175000017500000000044010415555335013352 0ustar stevehasteveha* add a .fullurl() method to Link(), etc. that walks upward through nest levels, finds xml:base, resolves relative URL, and returns absolute URL * add XML namespace support: if a is the Atom space, recognize (I will add it to xe first, and then make the changes in PyFeed.)