PubTal-3.5/ 0000755 0001050 0001050 00000000000 11555341012 011325 5 ustar cms103 cms103 PubTal-3.5/LICENSE.txt 0000644 0001050 0001050 00000002732 11555340742 013165 0 ustar cms103 cms103 PubTal 3.5
--------------------------------------------------------------------
Copyright (c) 2011 Colin Stewart (http://www.owlfish.com/)
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
3. The name of the author may not be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
PubTal-3.5/README.txt 0000644 0001050 0001050 00000005273 11555340742 013043 0 ustar cms103 cms103 PubTal 3.5
----------
A template driven web site builder for small sites.
Installation
------------
Full installation instructions for Linux, MacOS X and Windows can be found in documentation/html/installation.html.
To install PubTal under Unix:
(Note that to perform the installation of PubTal you will probably
have to have the Python Development package installed.)
1 - Become root
2 - Run "python setup.py install"
Installing Plugins
------------------
PubTal supports the addtion of new functionality through a plugin architecture.
Several plugins are installed by default with PubTal to provide support for
HTMLText, OpenOffice, Catalogue, Binary, and Raw content types.
Additional plugins that are not installed by default can be found in the
optional-plugins directory. Currently these include:
textile.py - provides Textile (http://www.textism.com/tools/textile/)
support. This requires pyTextile (http://diveintomark.org/projects/pytextile/)
and Python 2.2 to be installed.
abiwordContent - provides AbiWord content support. AbiWord currently has
some significant bugs, which is why this plugin is not installed by default.
CSVPlugin - Provides support for generating multiple web pages base on the
contents of a .CSV file. Documentation on how to use this plugin is included
in the main documentation.
To install these extra plugins (or any other PubTal 2.x plugin) simply copy
the plugin to the location of the PubTal plugin directory, beneath the Python
site-packages directory.
(Under Debian this is can be found in:
/usr/lib/python2.2/site-packages/pubtal/plugins/)
Alternative add the following configuration option to your site configuration
file, replacing /usr/local/PubTal/plugins/ with the path to the plugin dir:
Made with PubTal %s
""" % pubtal.__version__ } self.messageBus.notifyEvent ("PagePublisher.InitComplete") def getUI (self): return self.ui def getConfig (self): return self.config def getContentPublisher (self, contentType): return self.supportedContent.get (contentType, None) def publish (self, page): contentType = page.getOption ('content-type') try: publisher = self.supportedContent [contentType] except: msg = "Unsupported content type: %s" % contentType self.log.warn (msg) self.ui.warn (msg) return 1 try: publisher.publish (page) return 1 except Exception, e: self.log.error ("Exception publishing page: %s" % repr (e)) self.ui.taskError ("Page Publication failed: %s " % str (e)) return 0 def expandTemplate (self, template, context, relativeOutputPath, macros): """ Expand the given Template object using the context, writing to the output path. Looks up the character-set for each template and macro. """ absTemplateName = template.getTemplatePath() templateCharset = template.getOption ('character-set', self.characterSet) suppressXMLDeclaration = template.getOption ('suppress-xmldecl') outputType = template.getOption ('output-type') if (outputType == 'HTML'): # For HTML output-type we guess as to the SimpleTAL template kind taltemplate = self.templateCache.getTemplate (absTemplateName, inputEncoding=templateCharset) else: # Assume it's XML taltemplate = self.templateCache.getXMLTemplate (absTemplateName) # Handle XHTML DOCTYPE xmlDoctype = template.getOption ('xml-doctype', None) self.ContextFunctions.setCurrentPage (relativeOutputPath, context) context.addGlobal ('ispage', self.ContextFunctions.isPage) context.addGlobal ('readFile', self.ContextFunctions.readFile) context.addGlobal ('pubtal', self.pubTalInfo) # Add macros to the context macroTemplates = {} for macroName in macros.keys(): macTemplate = self.templateConfig.getTemplate (macros [macroName]) macroCharSet = macTemplate.getOption ('character-set', self.characterSet) mTemp = self.templateCache.getTemplate (macros [macroName], inputEncoding=macroCharSet) macroTemplates [macroName] = mTemp.macros context.addGlobal ('macros', macroTemplates) if (self.log.isEnabledFor (logging.DEBUG)): self.log.debug (str (context)) dest = self.openOuputFile (relativeOutputPath) if (isinstance (taltemplate, simpleTAL.XMLTemplate)): if (xmlDoctype is not None): taltemplate.expand (context, dest, outputEncoding=templateCharset, docType=xmlDoctype, suppressXMLDeclaration=suppressXMLDeclaration) dest.close() return else: taltemplate.expand (context, dest, outputEncoding=templateCharset, suppressXMLDeclaration=suppressXMLDeclaration) dest.close() return taltemplate.expand (context, dest, outputEncoding=templateCharset) dest.close() def openOuputFile (self, relativeOutputPath): """ Creates and required directories and opens a file-like object to the destination path. This provides a common point for PubTal to note all directories it has created and files it has written. The file-like object will keep track of the MD5 of the file written. """ # Make directories if required. outputPath = os.path.join (self.destDir, relativeOutputPath) destDir = os.path.split (outputPath)[0] if (not os.path.exists (destDir)): os.makedirs (destDir) dest = MD5File (outputPath, relativeOutputPath, 'wb', self.localCache) return dest class MD5File: """ This presents a file object to the world, and calculates an MD5 checksum on the fly. When the file is closed it updates a dictionary with the resulting hex digest. This file type should only be used for writing! """ def __init__ (self, filePath, relativeOutputPath, mode, dictionary): self.dictionary = dictionary self.ourmd5 = hashlib.md5() self.ourFile = open (filePath, mode) # We need to transform the path name into ascii compatible strings for some anydbm implementations. utfencode = codecs.lookup ("utf8")[0] self.relativeOutputPath = utfencode (relativeOutputPath)[0] self.closed = 0 def close (self): self.ourFile.close() self.dictionary [self.relativeOutputPath] = self.ourmd5.hexdigest() self.closed = 1 def __del__ (self): if (not self.closed): self.close() def flush (self): return self.ourFile.flush() def fileno (self): return self.ourFile.fileno() def read (self, size=None): return self.ourFile.read(size) def readline (self, size=None): return self.ourFile.readline(size) def readlines (self, size=None): return self.ourFile.readlines (size) def xreadlines (self): return self.ourFile.xreadlines() def seek (self, offset, wence=0): return self.ourFile.seek(offset, wence) def tell (self): return self.ourFile.tell() def truncate (self, size=None): return self.ourFile.truncate (size) def write (self, str): self.ourFile.write (str) self.ourmd5.update (str) def writelines (self, aseq): for value in aseq: self.ourmd5.update (value) self.ourFile.write (value) def __itter__ (self): return self.ourFile.__itter__() class ContextFunctions: def __init__ (self, siteConfig): self.log = logging.getLogger ("PubTal.PagePublisher") self.currentTargetPath = None self.currentContext = None self.config = siteConfig self.contentDir = self.config.getContentDir() self.destinationDir = self.config.getDestinationDir() self.isPage = simpleTALES.PathFunctionVariable (self.isCurrentPage) self.readFile = simpleTALES.PathFunctionVariable (self.readExternalFile) def setCurrentPage (self, targetPath, context): self.currentTargetPath = targetPath.replace (os.sep, '/') self.currentContext = context def isCurrentPage (self, targetPath): if (self.currentTargetPath == targetPath.replace (os.sep, '/')): return 1 return 0 def readExternalFile (self, targetPath): # Start by evaluating the targetPath to resolve the filename targetFileName = self.currentContext.evaluate (targetPath) self.log.info ("Resolved path %s to filename %s" % (targetPath, str (targetFileName))) if (targetFileName): # Read the file (relative to the content directory) try: targetFile = open (os.path.join (self.contentDir, targetFileName)) targetData = targetFile.read() targetFile.close() return targetData except Exception, e: self.log.error ("Error reading file %s: %s" % (os.path.join (self.contentDir, targetFileName), str (e))) raise return None class ContentPublisher: def __init__ (self, pagePublisher): self.pagePublisher = pagePublisher self.config = self.pagePublisher.config self.contentConfig = self.config.getContentConfig() self.templateConfig = self.config.getTemplateConfig() self.characterSet = self.config.getDefaultCharacterSet() self.characterSetCodec = codecs.lookup (self.characterSet)[1] self.destDir = self.config.getDestinationDir() self.contentDir = self.config.getContentDir() def readHeadersAndContent (self, page, preserveCharacterSet = 0): """ This method reads the source file for this page, and then returns the headers defined in this file and the raw content of the body of the file. If preserveCharacterSet is false then Unicode is returned. """ sourceFile = open (page.getSource(), 'r') readingHeaders = 1 headers = {} if (not preserveCharacterSet): pageCharSet = page.getOption ('character-set', None) if (pageCharSet is not None): # This page has it's own character set pageCodec = codecs.lookup (pageCharSet)[1] else: # This page uses the default character set. pageCodec = self.characterSetCodec else: # We use a dummy function that doesn't alter the string if we are preserving the character set. pageCodec = lambda decodedString: (decodedString, 0) while (readingHeaders): line = pageCodec (sourceFile.readline())[0] offSet = line.find (':') if (offSet > 0): headers [line[0:offSet]] = line[offSet + 1:].strip() else: readingHeaders = 0 rawContent = pageCodec (sourceFile.read())[0] sourceFile.close() return (headers, rawContent) def getPageContext (self, page, template): """ Returns the default context which will apply to most pages of content. Template is the template that this context will eventually be used in, and is used to extract the type of output (HTML, XHTML, WML, etc) and the destination file extension. """ copyrightYear = timeformat.format ('%Y') destExtension = '.' + template.getTemplateExtension() relativeDestPath = os.path.splitext (page.getRelativePath())[0] + destExtension destPath = os.path.join (self.destDir, relativeDestPath) destFilename = os.path.basename (destPath) pageContext = {'lastModifiedDate': DateContext.Date (time.localtime (page.getModificationTime()), '%a[SHORT], %d %b[SHORT] %Y %H:%M:%S %Z') ,'copyrightYear': DateContext.Date (time.localtime(), '%Y') ,'sourcePath': page.getRelativePath() ,'absoluteSourcePath': page.getSource() ,'destinationPath': relativeDestPath ,'absoluteDestinationPath': destPath ,'destinationFilename': destFilename ,'depth': page.getDepthString() ,'headers': page.getHeaders() } siteURLPrefix = page.getOption ('url-prefix') if (siteURLPrefix is not None): pageContext ['absoluteDestinationURL'] = '%s/%s' % (siteURLPrefix, relativeDestPath) return pageContext class PublisherException (Exception): pass PubTal-3.5/lib/pubtal/SiteUtils.py 0000644 0001050 0001050 00000031103 11555340742 015670 0 ustar cms103 cms103 """ Utility classes to help automate the PubTal testing. Copyright (c) 2004 Colin Stewart (http://www.owlfish.com/) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. If you make any bug fixes or feature enhancements please let me know! """ try: import logging except: import InfoLogging as logging import os, os.path, copy, hashlib, getpass import xml.sax, xml.sax.handler, StringIO class BlockFilter: def filter (self, msg): return 0 class UserInteraction: """ This class defines the interface that should be provided to interact with the core PubTal library. This implementation is for command line clients. It isn't neseccary to inherit from this class. """ def prompt (self, msg): return raw_input ('%s: ' % msg) def promptPassword (self, msg): return getpass.getpass ('%s: ' % msg) def taskProgress (self, msg, percentageDone): print "(%s %%) %s" % (str (int (percentageDone)), msg) def taskError (self, msg): print "ERROR: %s" % msg def taskDone (self): print "Finished." def warn (self, msg): print "Warning: %s" % msg def info (self, msg): print msg class SilentUI (UserInteraction): def prompt (self, msg): return "" def promptPassword (self, msg): return "" def taskProgress (self, msg, percentageDone): pass def taskError (self, msg): pass def taskDone (self): pass def warn (self, msg): pass def info (self, msg): pass class SiteBuilder: def __init__ (self, location=None): self.log = logging.getLogger ("PubTal.SiteCreation") if (location is None): self.siteDir = os.tempnam() else: self.siteDir = location if (os.access (self.siteDir, os.F_OK)): msg = "Directory %s already exists!" % self.siteDir self.log.error (msg) raise Exception (msg) def buildDirs (self, templateDir="template", destinationDir="dest", contentDir="content"): self.log.debug ("Building site directory %s" % self.siteDir) os.mkdir (self.siteDir) self.contentDir = os.path.join (self.siteDir, contentDir) self.log.debug ("Building content directory %s" % self.contentDir) os.mkdir (self.contentDir) self.destinationDir = os.path.join (self.siteDir, destinationDir) self.log.debug ("Building destination directory %s" % self.destinationDir) os.mkdir (self.destinationDir) self.templateDir = os.path.join (self.siteDir, templateDir) self.log.debug ("Building template directory %s" % self.templateDir) os.mkdir (self.templateDir) def createContent (self, filePath, content): self.log.debug ("Creating content file %s" % filePath) destPath = os.path.join (self.contentDir, filePath) self._createDirsAndFile_ (destPath, content) def createTemplate (self, filePath, template): self.log.debug ("Creating template file %s" % filePath) destPath = os.path.join (self.templateDir, filePath) self._createDirsAndFile_ (destPath, template) def createConfigFile (self, filePath, config): self.log.debug ("Creating configuration file %s" % filePath) destPath = os.path.join (self.siteDir, filePath) self._createDirsAndFile_ (destPath, config) def getSiteDir (self): return self.siteDir def getContentDir (self): return self.contentDir def getDestDir (self): return self.destinationDir def _createDirsAndFile_ (self, destPath, content): # Make directories if required. destDir = os.path.split (destPath)[0] if (not os.path.exists (destDir)): os.makedirs (destDir) dest = open (destPath, 'w') dest.write (content) dest.close() def destroySite (self): self.log.debug ("Destroying site directory and contents") pathCleaner = pathRemover () pathCleaner.walk (self.siteDir) class PageBuilder: """ A class for determining the pages to be generated.""" def __init__ (self, config, ui=SilentUI()): self.ui = ui self.config = config self.messageBus = self.config.getMessageBus() self.contentConfig = config.getContentConfig() self.currentContent = [] self.log = logging.getLogger ('PageBuilder') self.contentDir = config.getContentDir() self.destDir = config.getDestinationDir() self.ignoreFilters = config.getIgnoreFilters() def getPages (self, target, options={}): """ Returns a Page list target is either: None - Get all files List of files or dir paths. """ result = [] self.messageBus.notifyEvent ("PageBuilder.Start", options) if (target is None): self.log.info ("Building whole site.") self.ui.info ("Building whole site.") targetList = [self.contentDir] else: targetList = [] for t in target: tFile = os.path.normpath (os.path.abspath (t)) if (hasattr (os.path, "realpath")): # Under Unix and 2.2 we can remove symlinks. tFile = os.path.realpath (tFile) targetList.append (os.path.abspath (tFile)) self.log.debug ("Target path: %s" % str (targetList)) for targetPath in targetList: self.log.debug ("Checking target path: %s" % targetPath) # Are we doing just one file or a dir? if (os.path.isfile (targetPath)): # Just get this entry try: result.extend (self.contentConfig.getPages (targetPath, options)) except: self.ui.taskError ("Unable to build Page %s" % targetPath) self.messageBus.notifyEvent ("PageBuilder.Error") raise else: os.path.walk (targetPath, self.walkPaths, None) for content in self.currentContent: try: result.extend (self.contentConfig.getPages (content, options)) except: self.ui.taskError ("Unable to build Page %s" % content) self.messageBus.notifyEvent ("PageBuilder.Error") raise self.currentContent = [] self.messageBus.notifyEvent ("PageBuilder.End") return result def walkPaths (self, arg, dirname, names): for name in names: self.log.debug ("Checking path %s for content." % os.path.join (dirname, name)) realName = os.path.join (dirname, name) if (os.path.isfile (realName)): contentFile = 1 for filter in self.ignoreFilters: if (filter.match (realName)): contentFile = 0 if (contentFile): self.currentContent.append (realName) else: self.log.debug ("Ignoring path %s" % realName) class pathRemover: def __init__ (self): self.dirsToRemove = [] self.log = logging.getLogger ("PubTal.SiteCreation.pathRemover") def walk (self, path): self.dirsToRemove = [path] os.path.walk (path, self.walking, None) # Now remove all of the directories we saw, starting with the last one self.dirsToRemove.reverse() for dir in self.dirsToRemove: os.rmdir (dir) self.dirsToRemove = [] def walking (self, arg, dirname, names): for name in names: #self.log.debug ("Would delete file: %s" % os.path.join (dirname, name)) target = os.path.join (dirname, name) if (os.path.islink (target)): os.remove (target) elif (os.path.isfile (target)): os.remove (target) elif (os.path.isdir (target)): self.dirsToRemove.append (target) else: self.log.error ("Path %s is neither a directory or a file!" % target) class XMLChecksumHandler (xml.sax.handler.ContentHandler, xml.sax.handler.DTDHandler, xml.sax.handler.ErrorHandler): """ A class that parses an XML document and generates an MD5 checksum for the document. This allows two XML documents to be compared, ignoring differences in attribute ordering and other such differences. """ def __init__ (self, parser): xml.sax.handler.ContentHandler.__init__ (self) self.ourParser = parser def startDocument (self): self.digest = hashlib.md5() def startPrefixMapping (self, prefix, uri): self.digest.update (prefix) self.digest.update (uri) def endPrefixMapping (self, prefix): self.digest.update (prefix) def startElement (self, name, atts): self.digest.update (name) allAtts = atts.getNames() allAtts.sort() for att in allAtts: self.digest.update (att) self.digest.update (atts [att]) def endElement (self, name): self.digest.update (name) def characters (self, data): self.digest.update (data) def processingInstruction (self, target, data): self.digest.update (target) self.digest.update (data) def skippedEntity (self, name): self.digest.update (name) # DTD Handler def notationDecl(self, name, publicId, systemId): self.digest.update (name) self.digest.update (publicId) self.digest.update (systemId) def unparsedEntityDecl(name, publicId, systemId, ndata): self.digest.update (name) self.digest.update (publicId) self.digest.update (systemId) self.digest.update (ndata) def error (self, excpt): print "Error: %s" % str (excpt) def warning (self, excpt): print "Warning: %s" % str (excpt) def getDigest (self): return self.digest.hexdigest() class DirCompare: def __init__ (self): self.xmlParser = None def compare (self, path, expected, comparisonFunc = None): """ By default do a string comparison between all files in the given path, and all expected files. Use compare (path, expected, comparisonFun = dirCompare.compareXML) to do an XML comparison. """ self.expected = copy.copy (expected) self.path = path self.badFile = None if (comparisonFunc is None): comparisonFunc = self.compareStrings os.path.walk (path, self.walking, comparisonFunc) if (self.badFile is not None): return self.badFile if (len (self.expected) > 0): return "Missing files: " + str (self.expected.keys()) return None def compareStrings (self, target, relTarget): testFile = open (target, 'r') content = testFile.read() testFile.close() if (content != self.expected [relTarget]): self.badFile = "File %s had content:\n%s\nexpected:\n%s\n" % (relTarget, content, self.expected [relTarget]) return 0 return 1 def compareXML (self, target, relTarget): """ Compares XML documents, discounting ordering of attributes, etc. """ if (self.xmlParser is None): self.xmlParser = xml.sax.make_parser() self.xmlParser.setFeature (xml.sax.handler.feature_external_ges, 0) self.xmlParser.setFeature (xml.sax.handler.feature_namespaces, 1) self.checksumHandler = XMLChecksumHandler(self.xmlParser) self.xmlParser.setContentHandler (self.checksumHandler) self.xmlParser.setDTDHandler (self.checksumHandler) self.xmlParser.setErrorHandler (self.checksumHandler) # Get the XML checksum of the file we are testng. testFile = open (target, 'r') self.xmlParser.parse (testFile) realChecksum = self.checksumHandler.getDigest() testFile.close() # Get the XML checksu mof the expected result. testFile = StringIO.StringIO (self.expected [relTarget]) self.xmlParser.parse (testFile) expectedChecksum = self.checksumHandler.getDigest() testFile.close() if (realChecksum != expectedChecksum): testFile = open (target, 'r') content = testFile.read() testFile.close() self.badFile = "File %s had content:\n%s\nexpected:\n%s\n" % (relTarget, content, self.expected [relTarget]) return 0 return 1 def walking (self, arg, dirname, names): if (self.badFile is not None): return comparisonFunc = arg commonRoot = os.path.commonprefix ([self.path, dirname]) for name in names: target = os.path.join (dirname, name) relTarget = target[len (commonRoot)+1:] if (os.path.isfile (target)): if (not self.expected.has_key (relTarget)): self.badFile = "Found unexepected file %s" % relTarget return if (not comparisonFunc (target, relTarget)): return del self.expected [relTarget] PubTal-3.5/lib/pubtal/plugins/ 0000755 0001050 0001050 00000000000 11555341012 015043 5 ustar cms103 cms103 PubTal-3.5/lib/pubtal/plugins/openOfficeContent/ 0000755 0001050 0001050 00000000000 11555341012 020453 5 ustar cms103 cms103 PubTal-3.5/lib/pubtal/plugins/openOfficeContent/__init__.py 0000644 0001050 0001050 00000010344 11555340742 022577 0 ustar cms103 cms103 """ OpenOffice to HTML Plugin for PubTal Copyright (c) 2004 Colin Stewart (http://www.owlfish.com/) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. If you make any bug fixes or feature enhancements please let me know! """ try: import logging except: from pubtal import InfoLogging as logging from pubtal import SitePublisher from simpletal import simpleTAL, simpleTALES import OpenOfficeToHTMLConverter def getPluginInfo (): builtInContent = [{'functionality': 'content', 'content-type': 'OpenOffice' ,'file-type': 'sxw','class': OpenOfficePagePublisher}] return builtInContent class OpenOfficePagePublisher (SitePublisher.ContentPublisher): def __init__ (self, pagePublisher): SitePublisher.ContentPublisher.__init__ (self, pagePublisher) self.log = logging.getLogger ("PubTal.OpenOfficePagePublisher") self.converter = OpenOfficeToHTMLConverter.OpenOfficeConverter() # Get the default character set for the site. config = pagePublisher.getConfig() self.defaultCharset = config.getDefaultCharacterSet() self.encodingCapabilities = config.getEncodingCapabilities() def publish (self, page): template = self.templateConfig.getTemplate (page.getOption ('template', 'template.html')) context = simpleTALES.Context(allowPythonPath=1) # Get the page context for this content map = self.getPageContext (page, template) context.addGlobal ('page', map) macros = page.getMacros() # Determine the destination for this page relativeDestPath = map ['destinationPath'] self.pagePublisher.expandTemplate (template, context, relativeDestPath, macros) # Publish any bundled pictures. for fileName, data in self.converter.getPictures(): destFile = self.pagePublisher.openOuputFile (fileName) destFile.write (data) destFile.close() def getPageContext (self, page, template): pageMap = SitePublisher.ContentPublisher.getPageContext (self, page, template) # Determine the character set that will be used on output templateCharset = template.getOption ('character-set', self.defaultCharset) # Now determine what capabilities this character set offers smartQuotes = not self.encodingCapabilities.getCapability (templateCharset, 'SmartQuotes') hyphens = not self.encodingCapabilities.getCapability (templateCharset, 'Hyphen') # Parse the page options = {'CleanSmartQuotes': smartQuotes, 'CleanHyphens': hyphens} options ['DestinationFile'] = pageMap ['destinationPath'] options ['output-type'] = template.getOption ('output-type', 'HTML') options ['preserveSpaces'] = page.getBooleanOption ('preserve-html-spaces', 1) self.converter.convert (page.getSource(), options) headers = self.converter.getMetaInfo() content = self.converter.getContent() footNotes = self.converter.getFootNotes() actualHeaders = pageMap ['headers'] actualHeaders.update (headers) pageMap ['headers'] = actualHeaders pageMap ['content'] = content pageMap ['footnotes'] = footNotes return pageMap PubTal-3.5/lib/pubtal/plugins/openOfficeContent/OOFilter.py 0000644 0001050 0001050 00000023445 11555340742 022531 0 ustar cms103 cms103 import copy, xml.sax try: import logging except: from pubtal import InfoLogging as logging # These are the tags that we explicitly handle. We also handle all field elements as well, but only by # ignoring them. #~ office:document-content #~ meta:keyword #~ style:style #~ style:properties #~ text:h #~ text:p #~ text:ordered-list #~ text:unordered-list #~ text:list-item #~ text:span #~ text:a #~ text:footnote #~ text:endnote #~ text:footnote-body #~ text:endnote-body #~ text:bookmark-start #~ text:bookmark #~ text:line-break #~ draw:image #~ draw:a #~ svg:desc #~ table:table #~table:sub-table #~ table:table-header-rows #~ table:table-row #~ table:table-cell # ALL Dublin core elements. # The TAG_MAP lists all tags we can handle # meta.xml has document-meta as root element. TAG_MAP = {'office:document-meta': ['office:meta'] ,'office:meta': ['meta:keywords', 'meta:creation-date', 'dc:title', 'dc:description', 'dc:subject', 'dc:creator', 'dc:date', 'dc:language'] ,'meta:keywords': ['meta:keyword'] ,'meta:keyword': [] ,'meta:creation-date': [] ,'dc:title': [] ,'dc:description': [] ,'dc:subject': [] ,'dc:creator': [] ,'dc:date': [] ,'dc:language': [] # styles.xml has document-styles ,'office:document-styles': ['office:styles'] ,'office:styles': ['style:style'] # style:style is used in both style.xml and content.xml, and only contains style:properties. ,'style:style': ['style:properties'] ,'style:properties': [] # The content.xml starts with office:document-content. We only care about styles and the body. ,'office:document-content': ['office:automatic-styles', 'office:body'] ,'office:automatic-styles': ['style:style'] ,'office:body': ['text:h', 'text:p', 'text:ordered-list', 'text:unordered-list' ,'table:table', 'draw:a', 'text:section']} # This list is taken from the OO DTD (text.mod) from the %fields ENTITY # The following elements have been removed, because our parser does not have # any code to handle them: # 'office:annotation', FIELD_ELEMENTS = ['text:date','text:time','text:page-number','text:page-continuation','text:sender-firstname','text:sender-lastname','text:sender-initials' ,'text:sender-title','text:sender-position','text:sender-email','text:sender-phone-private','text:sender-fax' ,'text:sender-company','text:sender-phone-work','text:sender-street','text:sender-city','text:sender-postal-code' ,'text:sender-country','text:sender-state-or-province','text:author-name','text:author-initials','text:placeholder','text:variable-set' ,'text:variable-get','text:variable-input','text:user-field-get','text:user-field-input','text:sequence','text:expression' ,'text:text-input','text:database-display','text:database-next','text:database-select','text:database-row-number','text:database-name' ,'text:initial-creator','text:creation-date','text:creation-time','text:description','text:user-defined','text:print-time','text:print-date' ,'text:printed-by','text:title','text:subject','text:keywords','text:editing-cycles','text:editing-duration','text:modification-time' ,'text:modification-date','text:creator','text:conditional-text','text:hidden-text','text:hidden-paragraph','text:chapter','text:file-name' ,'text:template-name','text:page-variable-set','text:page-variable-get','text:execute-macro','text:dde-connection','text:reference-ref' ,'text:sequence-ref','text:bookmark-ref','text:footnote-ref','text:endnote-ref','text:sheet-name','text:bibliography-mark','text:page-count' ,'text:paragraph-count','text:word-count','text:character-count','text:table-count','text:image-count','text:object-count' ,'text:script','text:measure'] # FIELD_ELEMENTS need to be all empty for us to handle them # These are the ones we can handle, despite not doing so explicitly for elmn in FIELD_ELEMENTS: TAG_MAP [elmn] = [] INLINE_ELEMENTS = copy.copy (FIELD_ELEMENTS) # This is based on the defintion in the text.mod DTD. # I've NOT listed those elements that are harmless but unimplemented (e.g. tab-stop) # Excluded: text:tab-stop, text:bookmark-stop, text:reference-mark, text:reference-mark-start, text:reference-mark-end # %shape, text:toc-mark-start, text:toc-mark-end, text:toc-mark, text:user-index-mark-start, text:user-index-mark-end # text:user-index-mark, text:alphabetical-index-mark-start, text:alphabetical-index-mark-end, text:alphabetical-index-mark # %change-marks;, text:ruby # # We do list draw:text-box as implemented, otherwise we can not handle images with captions. INLINE_ELEMENTS.extend (['text:span', 'text:line-break', 'text:footnote', 'text:endnote' , 'text:a', 'text:s', 'text:bookmark', 'text:bookmark-start', 'draw:a' , 'draw:image']) # Now we need to add these extra elements to the TAG_MAP, otherwise we'll filter them out! TAG_MAP ['text:span'] = INLINE_ELEMENTS TAG_MAP ['text:line-break'] = [] TAG_MAP ['text:footnote'] = ['text:footnote-body'] TAG_MAP ['text:footnote-body'] = ['text:h', 'text:p', 'text:ordered-list', 'text:unordered-list'] TAG_MAP ['text:endnote'] = ['text:endnote-body'] TAG_MAP ['text:endnote-body'] = ['text:h', 'text:p', 'text:ordered-list', 'text:unordered-list'] TAG_MAP ['text:a'] = INLINE_ELEMENTS TAG_MAP ['text:s'] = [] TAG_MAP ['text:bookmark'] = [] TAG_MAP ['text:bookmark-start'] = [] TAG_MAP ['draw:a'] = ['draw:image'] TAG_MAP ['draw:image'] = ['svg:desc'] TAG_MAP ['svg:desc'] = [] # Used by %textSections TEXT_SECTIONS_ELEMENTS = ['text:p', 'text:h', 'text:ordered-list', 'text:unordered-list' ,'table:table', 'text:section'] # We have the following elements left over that need to be defined in the TAG_MAP: # 'text:h', 'text:p', 'text:ordered-list', 'text:unordered-list', 'table:table', 'text:section' TAG_MAP ['text:h'] = INLINE_ELEMENTS TAG_MAP ['text:p'] = INLINE_ELEMENTS TAG_MAP ['text:unordered-list'] = ['text:list-item'] TAG_MAP ['text:ordered-list'] = ['text:list-item'] TAG_MAP ['text:list-item'] = ['text:p', 'text:h', 'text:ordered-list', 'text:unordered-list'] TAG_MAP ['table:table'] = ['table:table-header-rows', 'table:table-row', 'table:table-cell'] TAG_MAP ['table:table-header-rows'] = ['table:table-row'] TAG_MAP ['table:table-row'] = ['table:table-cell'] TAG_MAP ['table:table-cell'] = ['table:sub-table', 'text:h', 'text:p', 'text:ordered-list' ,'text:unordered-list'] TAG_MAP ['table:sub-table'] = ['table:table-header-rows', 'table:table-row', 'table:table-cell'] TAG_MAP ['text:section'] = TEXT_SECTIONS_ELEMENTS URLMAP = {'http://openoffice.org/2000/office': 'office' ,'http://openoffice.org/2000/text': 'text' ,'http://openoffice.org/2000/style': 'style' ,'http://openoffice.org/2000/table': 'table' ,'http://www.w3.org/1999/XSL/Format': 'fo' ,'http://purl.org/dc/elements/1.1/': 'dc' ,'http://openoffice.org/2000/meta': 'meta' ,'http://www.w3.org/1999/xlink': 'xlink' ,'http://www.w3.org/2000/svg': 'svg' ,'http://openoffice.org/2000/drawing': 'draw'} def validateTagMap(): errorMap = {} for element in TAG_MAP.keys(): for child in TAG_MAP [element]: if (not TAG_MAP.has_key (child)): errorMap [child] = 1 return errorMap.keys() class SAXFilter(xml.sax.handler.ContentHandler): """ The purpose of this class is to filter out calls that we don't handle. It also dispatches to other SAX handlers based on the namespaces that they register with. """ def __init__ (self): xml.sax.handler.ContentHandler.__init__ (self) self.log = logging.getLogger ("PubTal.OOC.SAXFilter") self.debugOn = self.log.isEnabledFor (logging.DEBUG) self.documentHandlers = {} self.handlerStack = [] self.allowedElementsStack = [] self.skipDepth = 0 def setHandler (self, namespace, handler): self.documentHandlers [namespace] = handler def startElementNS (self, name, qname, atts): # Are we skipping elements? if (self.skipDepth != 0): # Skipping, so just increment the depth self.skipDepth += 1 if (self.debugOn): self.log.debug ("Skipping element %s - depth now %s" % ('%s:%s' % (URLMAP.get (name[0],''), name[1]), str (self.skipDepth))) return # Determine whether this tag is allowed or not. elementName = '%s:%s' % (URLMAP.get (name[0],''), name[1]) if (not self.__checkAllowed__ (elementName)): self.skipDepth += 1 return # This element is allowed, so find a handler and pass it through handler = self.documentHandlers.get (name[0], None) self.handlerStack.append (handler) if (handler is not None): handler.startElementNS (name, qname, atts) def endElementNS (self, name, qname): if (self.skipDepth != 0): self.skipDepth -= 1 if (self.debugOn): self.log.debug ("Skipping END element %s - depth now %s" % ('%s:%s' % (URLMAP.get (name[0],''), name[1]), str (self.skipDepth))) return handler = self.handlerStack.pop() self.allowedElementsStack.pop() if (self.debugOn): self.log.debug ("Allowed END element %s" % '%s:%s' % (URLMAP.get (name[0],''), name[1])) if (handler is not None): handler.endElementNS (name, qname) def characters (self, data): if (self.skipDepth != 0): return handler = self.handlerStack [-1] if (handler is not None): handler.characters (data) def __checkAllowed__ (self, tagName): if (len (self.allowedElementsStack) == 0): # We are allowed, so let's record what we expect next. self.log.debug ("Root element passed, adding allowed elements to stack.") self.allowedElementsStack.append (TAG_MAP.get (tagName, [])) # We re-check for debug status when we see a root element, that way if logging # config is changed between runs we will pick it up. self.debugOn = self.log.isEnabledFor (logging.DEBUG) return 1 if tagName in self.allowedElementsStack[-1]: # We are allowed, so let's record what we expect next. if (self.debugOn): self.log.debug ("Found element %s, allowing" % tagName) self.allowedElementsStack.append (TAG_MAP.get (tagName, [])) return 1 #self.log.debug ("Element %s blocked." % tagName) return 0 PubTal-3.5/lib/pubtal/plugins/openOfficeContent/OpenOfficeToHTMLConverter.py 0000644 0001050 0001050 00000066474 11555340742 025754 0 ustar cms103 cms103 """ OpenOffice to HTML Converter for PubTal Copyright (c) 2004 Colin Stewart (http://www.owlfish.com/) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. If you make any bug fixes or feature enhancements please let me know! """ import xml.sax, zipfile, StringIO, cgi, re, os.path try: import logging except: from pubtal import InfoLogging as logging import OOFilter from pubtal import HTMLWriter OFFICE_URI='http://openoffice.org/2000/office' TEXT_URI='http://openoffice.org/2000/text' STYLE_URI='http://openoffice.org/2000/style' TABLE_URI='http://openoffice.org/2000/table' FORMAT_URI='http://www.w3.org/1999/XSL/Format' DUBLIN_URI='http://purl.org/dc/elements/1.1/' META_URI='http://openoffice.org/2000/meta' XLINK_URI='http://www.w3.org/1999/xlink' SVG_URI='http://www.w3.org/2000/svg' DRAW_URI='http://openoffice.org/2000/drawing' # These are the fo styles that will be treated as CSS styles. SUPPORTED_FO_STYLES = {'text-align':1, 'font-weight':1, 'font-style':1, 'margin-left':1} # These lists act as filters on which styles are applied to which kind of elements. HEADING_STYLE_FILTER = ['text-align', 'margin-left'] PARAGRAPH_STYLE_FILTER = ['text-align', 'underline', 'line-through', 'overline' ,'font-weight', 'font-style', 'vertical-align', 'margin-left'] SPAN_STYLE_FILTER = PARAGRAPH_STYLE_FILTER # These are the assumed defaults for paragraphs - OO setting these will be ignored. DEFAULT_PARAGRAPH_STYLES = { 'text-align': 'start', 'font-weight': 'normal' ,'font-style': 'normal', 'margin-left': '0cm'} class OpenOfficeConverter: """ Convert OpenOffice format to HTML, XHTML or PlainText """ def __init__ (self): self.log = logging.getLogger ("PubTal.OOC") self.contentParser = SXWContentPraser () def convert (self, fileName, config={}): archive = zipfile.ZipFile (fileName, 'r') self.contentParser.parseContent (archive, config) archive.close() def getMetaInfo (self): return self.contentParser.getMetaInfo() def getContent (self): return self.contentParser.getContent() def getFootNotes (self): return self.contentParser.getFootNotes() def getPictures (self): return self.contentParser.getPictures() class SXWContentPraser (xml.sax.handler.DTDHandler): """ Convert OpenOffice format to HTML, XHTML or PlainText """ def __init__ (self): self.log = logging.getLogger ("PubTal.OOC.SWXContentParser") self.saxFilter = OOFilter.SAXFilter () def parseContent (self, archive, config): self.officeHandler = OfficeHandler(config) self.styleHandler = StyleHandler(config) self.textHandler = TextHandler (self.styleHandler, config) self.tableHandler = TableHandler (self.styleHandler, self.textHandler.result, config) self.drawHandler = DrawHandler (self.styleHandler, self.textHandler, config) self.saxFilter.setHandler (OFFICE_URI, self.officeHandler) self.saxFilter.setHandler (DUBLIN_URI, self.officeHandler) self.saxFilter.setHandler (META_URI, self.officeHandler) self.saxFilter.setHandler (STYLE_URI, self.styleHandler) self.saxFilter.setHandler (TEXT_URI, self.textHandler) self.saxFilter.setHandler (TABLE_URI, self.tableHandler) self.saxFilter.setHandler (DRAW_URI, self.drawHandler) self.saxFilter.setHandler (SVG_URI, self.drawHandler) self.ourParser = xml.sax.make_parser() self.log.debug ("Setting features of parser") self.ourParser.setFeature (xml.sax.handler.feature_external_ges, 0) self.ourParser.setFeature (xml.sax.handler.feature_namespaces, 1) self.ourParser.setContentHandler (self.saxFilter) # Initialise our variables self.pictureList = [] self.log.debug ("Parsing meta data.") sxwContent = archive.read ('meta.xml') contentFile = StringIO.StringIO (sxwContent) self.ourParser.parse (contentFile) self.log.debug ("Parsing styles.") sxwContent = archive.read ('styles.xml') contentFile = StringIO.StringIO (sxwContent) self.ourParser.parse (contentFile) self.log.debug ("Parsing actual content.") sxwContent = archive.read ('content.xml') contentFile = StringIO.StringIO (sxwContent) self.ourParser.parse (contentFile) # Read pictures for pictureFilename, newFilename in self.drawHandler.getBundledPictures(): self.pictureList.append ((newFilename, archive.read (pictureFilename))) def getMetaInfo (self): return self.officeHandler.getMetaInfo() def getContent (self): return self.textHandler.getContent() def getFootNotes (self): return self.textHandler.getFootNotes() def getPictures (self): return self.pictureList class OfficeHandler: def __init__ (self, config): self.log = logging.getLogger ("PubTal.OOC.OfficeHandler") self.metaData = {} self.keywords = [] self.charData = [] self.cleanSmartQuotes = config.get ('CleanSmartQuotes', 0) self.cleanHyphens = config.get ('CleanHyphens', 0) def startElementNS (self, name, qname, atts): self.charData = [] if (name[1] == 'document-content'): try: version = atts [(OFFICE_URI,'version')] self.log.debug ("Open Office format %s found." % version) if (float (version) != 1.0): self.log.warn ("Only OpenOffice format 1.0 is supported, version %s detected." % version) except Exception, e: msg = "Error determining OO version. Error: " + str (e) self.log.error (msg) raise OpenOfficeFormatException (msg) def endElementNS (self, name, qname): data = u"".join (self.charData) self.charData = [] if (name[0] == META_URI): if (name [1] == 'keyword'): self.keywords.append (data) elif (name [1] == 'creation-date'): self.metaData [name [1]] = data if (name[0] == DUBLIN_URI): self.metaData [name [1]] = data def characters (self, data): if (self.cleanSmartQuotes): data = data.replace (u'\u201c', '"') data = data.replace (u'\u201d', '"') if (self.cleanHyphens): data = data.replace (u'\u2013', '-') self.charData.append (data) def getMetaInfo (self): self.metaData ['keywords'] = self.keywords return self.metaData class StyleHandler: def __init__ (self, config): self.log = logging.getLogger ("PubTal.OOC.StyleHandler") self.textStyleMap = {} self.paragraphStyleMap = {} self.currentStyleFamily = None self.currentStyle = None def startElementNS (self, name, qname, atts): realName = name [1] if (realName == 'style'): try: self.currentStyle = {} self.currentStyle ['name'] = atts [(STYLE_URI, 'name')] self.currentStyleFamily = atts [(STYLE_URI, 'family')] self.currentStyle ['parent-name'] = atts.get ((STYLE_URI, 'parent-style-name'), None) except Exception, e: msg = "Error parsing style information. Error: " + str (e) self.log.error (msg) raise OpenOfficeFormatException (msg) if (realName == 'properties' and self.currentStyle is not None): for uri, attName in atts.keys(): if (uri == FORMAT_URI): if SUPPORTED_FO_STYLES.has_key (attName): attValue = atts [(FORMAT_URI, attName)] self.currentStyle [attName] = attValue if (uri == STYLE_URI): attValue = atts [(STYLE_URI, attName)] if (attValue != 'none'): if (attName == 'text-underline'): self.currentStyle ['underline'] = 'underline' if (attName == 'text-crossing-out'): self.currentStyle ['line-through'] = 'line-through' if (attName == 'text-position'): actualPosition = attValue [0:attValue.find (' ')] self.currentStyle ['vertical-align'] = actualPosition def endElementNS (self, name, qname): if (name[1] == 'style'): if (self.currentStyle is not None): name = self.currentStyle ['name'] if (self.currentStyleFamily == "paragraph"): self.log.debug ("Recording paragraph style %s" % name) self.paragraphStyleMap [name] = self.currentStyle elif (self.currentStyleFamily == "text"): self.log.debug ("Recording text style %s" % name) self.textStyleMap [name] = self.currentStyle else: self.log.debug ("Unsupported style family %s" % self.currentStyleFamily) self.currentStyle = None self.currentStyleFamily = None def characters (self, data): pass def getTextStyle (self, name): return self.styleLookup (name, self.textStyleMap) return foundStyle def getParagraphStyle (self, name): return self.styleLookup (name, self.paragraphStyleMap) def styleLookup (self, name, map): foundStyle = {} styleHierachy = [] lookupName = name while (lookupName is not None): lookupStyle = map.get (lookupName, None) if (lookupStyle is not None): styleHierachy.append (lookupStyle) lookupName = lookupStyle ['parent-name'] else: self.log.debug ("Style %s not found!" % lookupName) lookupName = None styleHierachy.reverse() for style in styleHierachy: foundStyle.update (style) return foundStyle class TextHandler: def __init__ (self, styleHandler, config): self.log = logging.getLogger ("PubTal.OOC.TextHandler") self.styleHandler = styleHandler # Check for the kind of output we are generating outputType = config.get ('output-type', 'HTML') self.outputPlainText = 0 if (outputType == 'HTML'): self.outputXHTML = 0 elif (outputType == 'XHTML'): self.outputXHTML = 1 elif (outputType == 'PlainText'): # Plain text trumps outputXHTML self.outputPlainText = 1 else: msg = "Attempt to configure for unsupported output-type %s. " + outputType self.log.error (msg) raise OpenOfficeFormatException (msg) if (self.outputPlainText): # We do not preserve spaces with because our output is not space clean. self.result = HTMLWriter.PlainTextWriter(outputStream=StringIO.StringIO(), outputXHTML=1, preserveSpaces = 0) else: self.result = HTMLWriter.HTMLWriter(outputStream=StringIO.StringIO(), outputXHTML=self.outputXHTML, preserveSpaces = 0) # We use this stack to re-direct output into footnotes. self.resultStack = [] # We treat footnotes and endnotes the same. self.footNoteID = None self.footnotes = [] self.charData = [] # The closeTagsStack holds one entry per open OO text tag. # Those that have corresponding HTML tags have text, everything else has None self.closeTagsStack = [] # The effectiveStyleStack holds the effective style (e.g. paragraph) and is used to filter out # un-needed style changes. self.effectiveStyleStack = [DEFAULT_PARAGRAPH_STYLES] self.cleanSmartQuotes = config.get ('CleanSmartQuotes', 0) self.cleanHyphens = config.get ('CleanHyphens', 0) self.preserveSpaces = config.get ('preserveSpaces', 1) def startElementNS (self, name, qname, atts): #self.log.debug ("Start: %s" % name[1]) realName = name [1] styleName = atts.get ((TEXT_URI, 'style-name'), None) if (realName == 'h'): self.charData = [] # We have a heading - get the level and style. try: headingLevel = int (atts [(TEXT_URI, 'level')]) applicableStyle = self.styleHandler.getParagraphStyle (styleName) if (headingLevel > 6): self.log.warn ("Heading level of %s used, but HTML only supports up to level 6." % str (headingLevel)) headingLevel = 6 self.result.startElement ('h%s' % str (headingLevel), self.getCSSStyle (applicableStyle, HEADING_STYLE_FILTER)) self.closeTagsStack.append ('h%s' % str (headingLevel)) except Exception, e: msg = "Error parsing heading. Error: " + str (e) self.log.error (msg) raise OpenOfficeFormatException (msg) elif (realName == 'p'): # We have a paragraph self.charData = [] applicableStyle = self.styleHandler.getParagraphStyle (styleName) if (styleName == "Preformatted Text"): # We have PRE text self.result.startElement ('pre', self.getCSSStyle (applicableStyle, PARAGRAPH_STYLE_FILTER)) self.closeTagsStack.append ('pre') elif (styleName == "Quotations"): # We have a block qutoe. self.result.startElement ('blockquote') self.result.startElement ('p', self.getCSSStyle (applicableStyle, PARAGRAPH_STYLE_FILTER)) self.closeTagsStack.append (['p', 'blockquote']) else: self.result.startElement ('p', self.getCSSStyle (applicableStyle, PARAGRAPH_STYLE_FILTER)) self.closeTagsStack.append ('p') # Footnotes can start with either paragraphs or lists. if (self.footNoteID is not None): self.result.startElement ('a', ' name="%s" style="vertical-align: super" href="#src%s"'% (self.footNoteID, self.footNoteID)) self.result.write (str (len (self.footnotes) + 1)) self.result.endElement ('a') self.footNoteID = None elif (realName == 'ordered-list'): self.charData = [] applicableStyle = self.styleHandler.getParagraphStyle (styleName) self.result.startElement ('ol', self.getCSSStyle (applicableStyle, PARAGRAPH_STYLE_FILTER)) self.closeTagsStack.append ('ol') # Footnotes can start with either paragraphs or lists. if (self.footNoteID is not None): self.result.startElement ('a', ' name="%s" style="vertical-align: super" href="#src%s"'% (self.footNoteID, self.footNoteID)) self.result.write (str (len (self.footnotes) + 1)) self.result.endElement ('a') self.footNoteID = None elif (realName == 'unordered-list'): self.charData = [] applicableStyle = self.styleHandler.getParagraphStyle (styleName) self.result.startElement ('ul', self.getCSSStyle (applicableStyle, PARAGRAPH_STYLE_FILTER)) self.closeTagsStack.append ('ul') # Footnotes can start with either paragraphs or lists. if (self.footNoteID is not None): self.result.startElement ('a', ' name="%s" style="vertical-align: super" href="#src%s"'% (self.footNoteID, self.footNoteID)) self.result.write (str (len (self.footnotes) + 1)) self.result.endElement ('a') self.footNoteID = None elif (realName == 'list-item'): applicableStyle = self.styleHandler.getTextStyle (styleName) self.result.startElement ('li', self.getCSSStyle (applicableStyle, SPAN_STYLE_FILTER)) self.closeTagsStack.append ('li') elif (realName == 'span'): # We have some text formatting - write out any data already accumulated. self.writeData() applicableStyle = self.styleHandler.getTextStyle (styleName) if (styleName == "Source Text"): # We have PRE text self.result.startElement ('code', self.getCSSStyle (applicableStyle, SPAN_STYLE_FILTER)) self.closeTagsStack.append ('code') else: cssStyle = self.getCSSStyle (applicableStyle, SPAN_STYLE_FILTER) if (len (cssStyle) > 0): self.result.startElement ('span', cssStyle) self.closeTagsStack.append ('span') else: #self.log.debug ("Suppressing span - no change in style.") self.closeTagsStack.append (None) elif (realName == 'a'): self.writeData() linkDest = atts.get ((XLINK_URI, 'href'), None) if (linkDest is not None): self.result.startElement ('a', ' href="%s"' % linkDest) self.closeTagsStack.append ('a') else: self.closeTagsStack.append (None) # Links are underlined - we want this done by the style sheet, so ignore the underline. newEffectiveStyle = {} newEffectiveStyle.update (self.effectiveStyleStack[-1]) newEffectiveStyle ['underline'] = 'underline' self.effectiveStyleStack.append (newEffectiveStyle) elif (realName == 'footnote' or realName == 'endnote'): try: footnoteID = atts[(TEXT_URI, 'id')] except Exception, e: msg = "Error getting footnoteid. Error: " + str (e) self.log.error (msg) raise OpenOfficeFormatException (msg) # Write out any data we have currently stored. self.writeData() # Now write out the link to the footnote self.result.startElement ('a', ' name="src%s" style="vertical-align: super" href="#%s"' % (footnoteID, footnoteID)) self.result.write (str (len (self.footnotes) + 1)) self.result.endElement ('a') self.resultStack.append (self.result) if (self.outputPlainText): self.result = HTMLWriter.PlainTextWriter (outputStream = StringIO.StringIO(), outputXHTML=1, preserveSpaces = 0) else: self.result = HTMLWriter.HTMLWriter(outputStream = StringIO.StringIO(), outputXHTML=self.outputXHTML, preserveSpaces = 0) self.closeTagsStack.append (None) # Re-set the style stack for the footenote self.effectiveStyleStack.append (DEFAULT_PARAGRAPH_STYLES) # Keep this foonote id around for the first paragraph. self.footNoteID = footnoteID elif (realName == 'footnote-body' or realName == 'endnote-body'): self.closeTagsStack.append (None) # Keep the effective style as-is self.effectiveStyleStack.append (self.effectiveStyleStack[-1]) elif (realName == 'bookmark-start' or realName == 'bookmark'): try: bookmarkName = atts[(TEXT_URI, 'name')] except Exception, e: msg = "Error getting bookmark name. Error: " + str (e) self.log.error (msg) raise OpenOfficeFormatException (msg) self.writeData() self.result.startElement ('a', ' name="%s"' % bookmarkName) self.closeTagsStack.append ('a') # Keep the effective style as-is self.effectiveStyleStack.append (self.effectiveStyleStack[-1]) elif (realName == 'line-break'): self.writeData() self.result.lineBreak() self.closeTagsStack.append (None) # Keep the effective style as-is self.effectiveStyleStack.append (self.effectiveStyleStack[-1]) elif (realName == 's'): # An extra space or two # Remove the leading space if possible so that we can output ' ' instead of ' ' removedSpace = 0 if (len (self.charData) > 0): if (self.charData [-1][-1] == u" "): self.charData [-1] = self.charData [-1][:-1] removedSpace = 1 self.writeData() count = int (atts.get ((TEXT_URI, 'c'), 1)) if (self.preserveSpaces): for spaces in xrange (count): self.result.nonbreakingSpace() if (removedSpace): # Add it back now self.charData.append (u" ") # Keep the effective style as-is, and ignore the close element self.effectiveStyleStack.append (self.effectiveStyleStack[-1]) self.closeTagsStack.append (None) else: # We have no HTML output associated with this OO tag. self.closeTagsStack.append (None) # Keep the effective style as-is self.effectiveStyleStack.append (self.effectiveStyleStack[-1]) def endElementNS (self, name, qname): if (len (self.closeTagsStack) > 0): htmlTag = self.closeTagsStack.pop() if (htmlTag is not None): self.writeData() if (type (htmlTag) == type ([])): for a in htmlTag: self.result.endElement (a) else: self.result.endElement (htmlTag) # Remove this effective style. self.effectiveStyleStack.pop() if (name[1] == 'footnote' or name[1] == 'endnote'): # We have just closed a footnote or endnote - record the result, pop the stack. outputFile = self.result.getOutput() self.footnotes.append (outputFile.getvalue()) outputFile.close() self.result = self.resultStack.pop() def characters (self, data): if (self.cleanSmartQuotes): data = data.replace (u'\u201c', '"') data = data.replace (u'\u201d', '"') if (self.cleanHyphens): data = data.replace (u'\u2013', '-') self.charData.append (data) def writeData (self): data = u"".join (self.charData) self.result.write (cgi.escape (data)) self.charData = [] def getCSSStyle (self, applicableStyle, styleList): #self.log.debug ("Filtering styles %s for styles %s" % (str (applicableStyle), str (styleList))) textDecoration = [] cssStyles = [] # Take a look at the effective styles. effectiveStyles = self.effectiveStyleStack [-1] # Store the new effective style for future comparison newEffectiveStyle = {} newEffectiveStyle.update (effectiveStyles) for style in styleList: if (applicableStyle.has_key (style)): if (style in ["underline", "line-through", "overline"]): if (not effectiveStyles.has_key (style)): textDecoration.append (style) else: # We check to see whether the effective style already has this value # I.e. handle paragraph of font-style=normal and span of font-style=normal styleValue = applicableStyle [style] if (effectiveStyles.has_key (style)): if (effectiveStyles[style] != styleValue): cssStyles.append (u"%s:%s" % (style, styleValue)) else: #self.log.debug ("Style %s already in effect with value %s" % (style, styleValue)) pass else: cssStyles.append (u"%s:%s" % (style, styleValue)) # Note this new effective style newEffectiveStyle [style] = styleValue if (len (textDecoration) > 0): cssStyles.append (u"text-decoration: %s" % u",".join (textDecoration)) #self.log.debug ("Adding real effective style (%s) to stack." % str (newEffectiveStyle)) self.effectiveStyleStack.append (newEffectiveStyle) cssStyleList = ";".join (cssStyles) if (len (cssStyleList) > 0): return ' style="%s"' % cssStyleList return '' def getContent (self): return self.result.getOutput().getvalue() def getFootNotes (self): return self.footnotes class DrawHandler: def __init__ (self, styleHandler, textHandler, config): self.log = logging.getLogger ("PubTal.OOC.DrawHandler") self.styleHandler = styleHandler self.result = textHandler.result self.textHandler = textHandler self.charData = [] # The effectiveStyleStack holds the effective style (e.g. paragraph) and is used to filter out # un-needed style changes. self.effectiveStyleStack = [DEFAULT_PARAGRAPH_STYLES] self.closeTagsStack = [] self.bundledPictureList = [] self.currentImage = None # Check for the kind of output we are generating self.cleanSmartQuotes = config.get ('CleanSmartQuotes', 0) self.cleanHyphens = config.get ('CleanHyphens', 0) self.picturePrefix = os.path.join ('Pictures', config.get ('DestinationFile', '').replace ('.', '_')) self.log.debug ("Determined picture prefix as %s" % self.picturePrefix) def getBundledPictures (self): return self.bundledPictureList def startElementNS (self, name, qname, atts): theURI = name [0] realName = name [1] if (theURI == DRAW_URI): if (realName == 'image'): styleName = atts.get ((DRAW_URI, 'style-name'), None) href = atts.get ((XLINK_URI, 'href'), None) if (href is None): self.log.warn ("No href attribute found for image!") self.closeTagsStack = None return # Deal with bundled pictures if (href.startswith ('#Pictures/')): self.log.debug ("Found bundled picture %s" % href) archivePicName = href [1:] href = self.picturePrefix + archivePicName[9:] self.bundledPictureList.append ((archivePicName, href)) alt = atts.get ((DRAW_URI, 'name'), None) self.currentImage = {'href': href, 'alt': alt} self.closeTagsStack.append (None) elif (realName == 'a'): linkDest = atts.get ((XLINK_URI, 'href'), None) if (linkDest is not None): self.textHandler.writeData() self.result.startElement ('a', ' href="%s"' % linkDest) self.closeTagsStack.append ('a') else: self.closeTagsStack.append (None) elif (theURI == SVG_URI): if (realName == 'desc'): self.charData = [] self.closeTagsStack.append (None) else: self.closeTagsStack.append (None) def endElementNS (self, name, qname): if (len (self.closeTagsStack) > 0): htmlTag = self.closeTagsStack.pop() if (htmlTag is not None): self.result.endElement (htmlTag) # Remove this effective style. #self.effectiveStyleStack.pop() theURI = name [0] realName = name [1] if (theURI == SVG_URI): if (realName == 'desc'): # We have an image description - note it! altText = cgi.escape (u"".join (self.charData)) self.charData = [] if (self.currentImage is not None): self.currentImage ['alt'] = altText elif (theURI == DRAW_URI): if (realName == 'image'): self.textHandler.writeData() self.result.startElement ('img', ' src="%s" alt="%s"' % (self.currentImage ['href'], self.currentImage ['alt'])) self.result.endElement ('img') self.currentImage = None def characters (self, data): if (self.cleanSmartQuotes): data = data.replace (u'\u201c', '"') data = data.replace (u'\u201d', '"') if (self.cleanHyphens): data = data.replace (u'\u2013', '-') self.charData.append (data) class TableHandler: def __init__ (self, styleHandler, resultWriter, config): self.log = logging.getLogger ("PubTal.OOC.TextHandler") self.styleHandler = styleHandler self.result = resultWriter self.closeTagsStack = [] self.tableStatusStack = [] def startElementNS (self, name, qname, atts): #self.log.debug ("Start: %s" % name[1]) realName = name [1] styleName = atts.get ((TABLE_URI, 'style-name'), None) if (realName == 'table' or realName == 'sub-table'): self.result.startElement ('table') self.closeTagsStack.append ('table') self.tableStatusStack.append ({'inHeader':0, 'firstRow': 1}) elif (realName == 'table-header-rows'): status = self.tableStatusStack [-1] status ['inHeader'] = 1 self.result.startElement ('thead') self.closeTagsStack.append ('thead') elif (realName == 'table-row'): status = self.tableStatusStack [-1] if ((not status ['inHeader']) and (status ['firstRow'])): status ['firstRow'] = 0 self.result.startElement ('tbody') self.result.startElement ('tr') self.closeTagsStack.append ('tr') elif (realName == 'table-cell'): status = self.tableStatusStack [-1] colSpan = int (atts.get ((TABLE_URI, 'number-columns-spanned'), 0)) if (colSpan != 0): colSpanTxt = ' colspan="%s"' % str (colSpan) else: colSpanTxt = '' if (status ['inHeader']): self.result.startElement ('th', colSpanTxt) self.closeTagsStack.append ('th') else: self.result.startElement ('td', colSpanTxt) self.closeTagsStack.append ('td') else: self.closeTagsStack.append (None) def endElementNS (self, name, qname): realName = name [1] # We check for table because we want to insert tbody close before table close. if (len (self.tableStatusStack) > 0): status = self.tableStatusStack [-1] if (realName == 'table' or realName == 'sub-table'): if (not status ['firstRow']): # The table actually had content. self.result.endElement ('tbody') if (len (self.closeTagsStack) > 0): htmlTag = self.closeTagsStack.pop() if (htmlTag is not None): self.result.endElement (htmlTag) # We check for table header rows here. if (realName == 'table-header-rows'): status ['inHeader'] = 0 if (realName == 'table'): # Pop this table status off the stack self.tableStatusStack.pop() def characters (self, data): pass class OpenOfficeFormatException (Exception): pass PubTal-3.5/lib/pubtal/plugins/weblog/ 0000755 0001050 0001050 00000000000 11555341012 016322 5 ustar cms103 cms103 PubTal-3.5/lib/pubtal/plugins/weblog/__init__.py 0000644 0001050 0001050 00000037210 11555340742 020447 0 ustar cms103 cms103 """ Weblog plugin for PubTal Copyright (c) 2004 Colin Stewart (http://www.owlfish.com/) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. If you make any bug fixes or feature enhancements please let me know! """ import os.path, time try: import logging except: from pubtal import InfoLogging as logging from pubtal import SitePublisher, DateContext from simpletal import simpleTAL, simpleTALES import WeblogContent # These two maps provide a fast lookup for month names SHORT_MONTH_MAP = {} LONG_MONTH_MAP = {} for month in range (1,13): SHORT_MONTH_MAP[month] = time.strftime ('%b', (2004,month,1,1,1,1,0,1,0)) LONG_MONTH_MAP[month] = time.strftime ('%B', (2004,month,1,1,1,1,0,1,0)) def getPluginInfo (): builtInContent = [{'functionality': 'content', 'content-type': 'Weblog' ,'file-type': 'post','class': WeblogPagePublisher}] return builtInContent class WeblogPagePublisher (SitePublisher.ContentPublisher): def __init__ (self, pagePublisher): SitePublisher.ContentPublisher.__init__ (self, pagePublisher) self.log = logging.getLogger ("PubTal.WeblogPagePublisher") self.manager = WeblogContent.WeblogManager(pagePublisher) self.log.info ("Registering page builder with content config.") siteConfig = pagePublisher.getConfig() contentConfig = siteConfig.getContentConfig() contentConfig.registerPageBuilder ('Weblog', self.manager.pageBuilder) self.templateConfig = siteConfig.getTemplateConfig() self.contentConfig = contentConfig self.contentDir = siteConfig.getContentDir() def publish (self, page): pageType = page.getOption ('weblogPageType') weblog = self.manager.getWeblog(page) if (pageType == 'day'): self.log.debug ("Getting template for day page.") template = self.templateConfig.getTemplate (page.getOption ('weblog-day-template', 'template.html')) self.log.debug ("Found weblog day template name of: " + str (template)) elif (pageType == 'index'): self.log.debug ("Getting template for index page.") template = self.templateConfig.getTemplate (page.getOption ('weblog-index-template', 'template.html')) self.log.debug ("Found weblog index template name of: " + str (template)) elif (pageType == 'syndication'): self.log.debug ("Getting templates for syndication pages.") weblogSyndicationTemplates = page.getListOption ('weblog-syndication-template') if (weblogSyndicationTemplates is None or len (weblogSyndicationTemplates) == 0): msg = "Syndication attempted, but no templates are defined!" self.log.error (msg) raise SitePublisher.PublisherException (msg) msg = "Syndication attempted, but no template defined!" for templateName in weblogSyndicationTemplates: context = simpleTALES.Context(allowPythonPath=1) template = self.templateConfig.getTemplate (templateName) # Get the page context for this content map = self.getPageContext (page, template) context.addGlobal ('page', map) macros = page.getMacros() # Determine the destination for this page relativeDestPath = map ['destinationPath'] self.pagePublisher.expandTemplate (template, context, relativeDestPath, macros) weblog.notePagePublished (page.getOption ('pageName')) elif (pageType == 'month'): self.log.debug ("Getting template for monthly archive page.") template = self.templateConfig.getTemplate (page.getOption ('weblog-month-template', 'template.html')) if (pageType != 'syndication'): self.log.debug ("Building non-syndication page.") context = simpleTALES.Context(allowPythonPath=1) # Get the page context for this content self.log.debug ("Getting page context.") map = self.getPageContext (page, template) self.log.debug ("Adding 'page' object to SimpleTALES.Context") context.addGlobal ('page', map) macros = page.getMacros() # Determine the destination for this page relativeDestPath = map ['destinationPath'] self.log.debug ("Expanding template.") self.pagePublisher.expandTemplate (template, context, relativeDestPath, macros) weblog.notePagePublished (page.getOption ('pageName')) def getPageContext (self, page, template): pageMap = SitePublisher.ContentPublisher.getPageContext (self, page, template) # The pageMap will contain two top level entries: months and days. # Pages go in the following locations: # day - yyyy/mm/ddmmyyyy.html # index - index.html # syndication - rss.xml # archive - yyyy/mm/archive.html # links are to the URL location: ddmmyyyy.html#HH:mi:ss # Default depth is 0. i.e. posts appear in weblog/a.post and we want to generate the index # no directories higher, in weblog/ self.log.debug ("Determining weblog home.") weblogDepth = int (page.getOption ('weblog-post-depth', '0')) + 1 # The monthly template is used to determine whether to generate the monthlyArchive object. monthlyTemplate = page.getOption ('weblog-month-template', None) # The site's hostname is needed for creating absolute URLs siteURLPrefix = page.getOption ('url-prefix') # Used for the default value for header/weblog-name weblogName = page.getOption ('weblog-name', 'Weblog') outputType = template.getOption ('output-type') plainTextMaxSize = template.getOption ('plaintext-maxsize') if (plainTextMaxSize is not None): plainTextMaxSize = int (plainTextMaxSize) destExtension = '.' + template.getTemplateExtension() # We need the day's extension for permaLinks - so let's work that out. dailyTemplateName = page.getOption ('weblog-day-template', None) if (dailyTemplateName is not None): dayTemplate = self.templateConfig.getTemplate (dailyTemplateName) dayExtension = '.' + dayTemplate.getTemplateExtension() else: dayExtension = None weblogRelativeHomeDestDir = pageMap ['destinationPath'] # Takes weblog/2004/01/12-34.html and turns it into weblog for depth in range (weblogDepth): weblogRelativeHomeDestDir = os.path.split (weblogRelativeHomeDestDir)[0] self.log.debug ("weblogRelativeHomeDestDir is %s" % weblogRelativeHomeDestDir) # Now get the depth of the weblog... head, tail = os.path.split (weblogRelativeHomeDestDir) weblogDepth = 0 while (tail != ''): weblogDepth += 1 head, tail = os.path.split (head) # We need the data associated with this weblog self.log.debug ("Getting weblog data object.") weblog = self.manager.getWeblog(page) postData = weblog.getPostData() postTree = weblog.getPostTree() pageType = page.getOption ('weblogPageType') if (pageType == 'day'): # We need to generate the list of posts for this day. dayStr = page.getOption ("weblogPageDay") self.log.debug ("Determining all posts for day %s." % dayStr) postList = postTree.getDaysPosts (dayStr) relativeDestPath = os.path.join (weblogRelativeHomeDestDir, dayStr[0:4], dayStr[4:6], "%s%s%s%s" % (dayStr [6:8],dayStr [4:6], dayStr [0:4], destExtension)) elif (pageType == 'index' or pageType == 'syndication'): # We need to get the index list of posts. indexSize = int (page.getOption ('weblog-index-size', '5')) self.log.debug ("Determining latest posts for index or syndication.") postList = postTree.getLatestPosts (indexSize) if (pageType == 'index'): relativeDestPath = os.path.join (weblogRelativeHomeDestDir, "index%s" % destExtension) else: self.log.debug ("Determining name of syndication file (template name is %s." % template.getTemplateName()) relativeDestPath = os.path.join (weblogRelativeHomeDestDir, "%s" % os.path.split (template.getTemplateName())[1]) elif (pageType == 'month'): archiveStr = page.getOption ("weblogArchiveYearMonth") self.log.debug ("Determining all posts for monthly archive %s." % archiveStr) postList = postTree.getMonthsPosts (archiveStr) relativeDestPath = os.path.join (weblogRelativeHomeDestDir, archiveStr[0:4], archiveStr[4:6], "archive%s" % destExtension) dayList = [] curDay = "00000000" curRealDate = None dayMap = None lastModifiedDate = None for post in postList: # Get the context for this post self.log.debug ("Getting context for post %s" % post) fullPathToPost = os.path.join (self.contentDir, post) self.log.debug ("Full path to the post is %s" % fullPathToPost) pageForPost = self.contentConfig.getPage (fullPathToPost) postLastModified = pageForPost.getModificationTime() if (lastModifiedDate is None or (lastModifiedDate < postLastModified)): lastModifiedDate = postLastModified postContext = postData.getPostContextMap (pageForPost, template) postCreationDate = postContext ['headers']['postCreationDate'] if (curDay != postCreationDate [0:8]): self.log.debug ("Found a new day %s." % postCreationDate) # It's a brand new day! if (dayMap is not None): self.log.debug ("Adding old day to the map.") # Get the date as Monday, 11 November 2002 dayMap ['date'] = DateContext.Date (curRealDate, '%a[LONG], %d[NP] %b[LONG] %Y') dayMap ['posts'] = dayPostList dayList.append (dayMap) dayMap = {} dayPostList = [] curRealDate = time.strptime (postCreationDate, WeblogContent.INTERNAL_DATE_FORMAT) curDay = postCreationDate [0:8] # Just need to add permaLink to postContext and we are done! # We only do perma-links if daily archives are enabled. # Perma-links are relative to the current file only for day pages. if (dayExtension is not None): permaLink = "#%s" % postCreationDate [8:16] if (pageType == 'index' or pageType == 'syndication'): # Permalinks for posts have to index into the yyyy/mm/ddmmyyyy.html permaLink = os.path.join (postCreationDate [0:4], postCreationDate [4:6], "%s%s%s%s%s" % (postCreationDate [6:8],postCreationDate [4:6], postCreationDate [0:4], dayExtension, permaLink)) elif (pageType == 'month'): # Permalinks for posts have to index into the yyyy/mm/ddmmyyyy.html permaLink = "%s%s%s%s%s" % (postCreationDate [6:8],postCreationDate [4:6], postCreationDate [0:4], dayExtension, permaLink) if (pageType == 'day'): # Permalink name postContext ['permaLinkName'] = postCreationDate [8:16] if (pageType != 'day'): postContext ['permaLink'] = permaLink if (siteURLPrefix is not None): postContext ['absolutePermaLink'] = '%s/%s' % (siteURLPrefix, os.path.join (weblogRelativeHomeDestDir, permaLink)) # RSS requires truncating of output, so we need to check for that here. if (pageType == 'syndication' and outputType == 'PlainText' and plainTextMaxSize is not None): self.log.info ("Truncating syndication PlainText output to %s" % str (plainTextMaxSize)) postBody = postContext.get ('content', None) if (postBody is not None): if (len (postBody) > plainTextMaxSize): postBody = postBody [:plainTextMaxSize] + "..." postContext ['content'] = postBody else: self.log.warn ("Post body not found!") dayPostList.append (postContext) if (dayMap is not None): self.log.debug ("Adding final day to the map.") # Get the date as Monday, 11 November 2002 dayMap ['date'] = DateContext.Date (curRealDate, '%a[LONG], %d[NP] %b[LONG] %Y') dayMap ['posts'] = dayPostList dayList.append (dayMap) pageMap ['days'] = dayList # Now do the months object if applicable... if (monthlyTemplate is not None): self.log.debug ("Monthly template is defined, so creating monthlyArchive object.") monthlyArchiveList = [] yearObject = {} yearsMonthList = [] currentYear = None for monthYearStr in postTree.getAllMonthlyNames(): # monthlyYearStr is yyyymm self.log.debug ("Handling year/month %s" % monthYearStr) year = int (monthYearStr [0:4]) month = int (monthYearStr [4:6]) if (currentYear != year): self.log.debug ("A new year found.") if (currentYear is not None): self.log.debug ("Old year will be added to the list.") yearObject ['yearName'] = str (currentYear) yearObject ['monthList'] = yearsMonthList monthlyArchiveList.append (yearObject) yearObject = {} yearsMonthList = [] currentYear = year monthLong = LONG_MONTH_MAP [month] monthShort = SHORT_MONTH_MAP [month] # archiveLink depends on current page type, but should point to yyyy/mm/archive.html if (pageType == 'index' or pageType == 'syndication'): # Montly archives have to index into the yyyy/mm/archive.html archiveLink = os.path.join (monthYearStr [0:4], monthYearStr [4:6], "archive%s" % destExtension) elif (pageType == 'month' or pageType == 'day'): # Monthly archives and posts have to index into ../../yyyy/mm/archive.html archiveLink = os.path.join ('..', '..', monthYearStr [0:4], monthYearStr [4:6], "archive%s" % destExtension) yearsMonthList.append ({'monthNameLong': monthLong, 'monthNameShort': monthShort ,'monthNumber': str (month), 'archiveLink': archiveLink}) # Do the final year... if (currentYear is not None): self.log.debug ("A final year found.") yearObject ['yearName'] = str (currentYear) yearObject ['monthList'] = yearsMonthList monthlyArchiveList.append (yearObject) pageMap ['monthlyArchive'] = monthlyArchiveList if (pageType == 'month'): # We do special things for monthly archives. month = int (archiveStr[4:6]) monthLong = LONG_MONTH_MAP [month] monthShort = SHORT_MONTH_MAP [month] pageMap ['yearName'] = archiveStr[0:4] pageMap ['monthNameLong'] = monthLong pageMap ['monthNameShort'] = monthShort pageMap ['depth'] = "../"*(weblogDepth + 2) elif (pageType == 'day'): pageMap ['dayDate'] = DateContext.Date (curRealDate, '%a[LONG], %d[NP] %b[LONG] %Y') pageMap ['depth'] = "../"*(weblogDepth + 2) else: pageMap ['depth'] = "../"*(weblogDepth) # The last modified date of this page is the latest modification date of its components. pageMap ['lastModifiedDate'] = DateContext.Date (time.localtime (lastModifiedDate), '%a[SHORT], %d %b[SHORT] %Y %H:%M:%S %Z') pageMap ['weblog-name'] = weblogName weblogTagPrefix = page.getOption ('weblog-tag-prefix') if (weblogTagPrefix is not None): pageMap ['weblog-tag-prefix'] = "tag:%s" % weblogTagPrefix if (siteURLPrefix is not None): if (len (weblogRelativeHomeDestDir) > 0): pageMap ['weblog-link'] = "%s/%s/" % (siteURLPrefix, weblogRelativeHomeDestDir) else: pageMap ['weblog-link'] = "%s/" % siteURLPrefix if (siteURLPrefix is not None): pageMap ['absoluteDestinationURL'] = '%s/%s' % (siteURLPrefix, relativeDestPath) pageMap ['destinationPath'] = relativeDestPath pageMap ['absoluteDestinationPath'] = os.path.join (self.destDir, relativeDestPath) return pageMap PubTal-3.5/lib/pubtal/plugins/weblog/WeblogContent.py 0000644 0001050 0001050 00000065330 11555340742 021466 0 ustar cms103 cms103 """ Weblog plugin for PubTal Copyright (c) 2009 Colin Stewart (http://www.owlfish.com/) All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 3. The name of the author may not be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. If you make any bug fixes or feature enhancements please let me know! """ import anydbm, os, os.path, re, time, string, hashlib, codecs from pubtal import timeformat FIELDREGEX=re.compile ('(?