odfpy-0.9.6/0000775000076400007640000000000012106145466012640 5ustar rougroug00000000000000odfpy-0.9.6/examples/0000775000076400007640000000000012106145466014456 5ustar rougroug00000000000000odfpy-0.9.6/examples/passwd-as-odt.py0000664000076400007640000000445212106145307017515 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from odf.opendocument import OpenDocumentText from odf.style import Style, TextProperties, ParagraphProperties, TableColumnProperties from odf.text import P from odf.table import Table, TableColumn, TableRow, TableCell PWENC = "utf-8" textdoc = OpenDocumentText() # Create a style for the table content. One we can modify # later in the word processor. tablecontents = Style(name="Table Contents", family="paragraph") tablecontents.addElement(ParagraphProperties(numberlines="false", linenumber="0")) textdoc.styles.addElement(tablecontents) # Create automatic styles for the column widths. # We want two different widths, one in inches, the other one in metric. # ODF Standard section 15.9.1 widthshort = Style(name="Wshort", family="table-column") widthshort.addElement(TableColumnProperties(columnwidth="1.7cm")) textdoc.automaticstyles.addElement(widthshort) widthwide = Style(name="Wwide", family="table-column") widthwide.addElement(TableColumnProperties(columnwidth="1.5in")) textdoc.automaticstyles.addElement(widthwide) # Start the table, and describe the columns table = Table() table.addElement(TableColumn(numbercolumnsrepeated=4,stylename=widthshort)) table.addElement(TableColumn(numbercolumnsrepeated=3,stylename=widthwide)) f = open('/etc/passwd') for line in f: rec = line.strip().split(":") tr = TableRow() table.addElement(tr) for val in rec: tc = TableCell() tr.addElement(tc) p = P(stylename=tablecontents,text=unicode(val,PWENC)) tc.addElement(p) textdoc.text.addElement(table) textdoc.save("passwd.odt") odfpy-0.9.6/examples/barchart.py0000664000076400007640000001411312106145307016610 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # # This is an example of an OpenDocument Chart. # # Opendocument charts are usually not found in the wild. They are intended to be # subobojects of e.g. spreadsheets. But the KDE application called kchart # (http://www.koffice.org/kchart/) is able to read and write ODC files. # # Kchart is able to understand a document without , whereas # OOo misinterprets the label rows and columns. So if you embed the # spectre-balance.odc file in an OOo Writer document, expect to see some # oddities. from odf.opendocument import OpenDocumentChart from odf import chart, style, table, text # import a support class from the examples directory from datatable import DataTable class BarChart(object): def __init__(self): self.charttype = 'chart:bar' self.subtype = 'normal' # 'percentage', 'stacked' or 'normal' self.threedimensional = "true" self.x_axis = "X" self.y_axis = "Y" self.values = (1,2,3) self.title = None self.subtitle = None def __call__(self, doc): chartstyle = style.Style(name="chartstyle", family="chart") chartstyle.addElement( style.GraphicProperties(stroke="none", fillcolor="#ffffff")) doc.automaticstyles.addElement(chartstyle) mychart = chart.Chart( width="576pt", height="504pt", stylename=chartstyle, attributes={'class':self.charttype}) doc.chart.addElement(mychart) # Title if self.title: titlestyle = style.Style(name="titlestyle", family="chart") titlestyle.addElement( style.GraphicProperties(stroke="none", fill="none")) titlestyle.addElement( style.TextProperties(fontfamily="'Nimbus Sans L'", fontfamilygeneric="swiss", fontpitch="variable", fontsize="13pt")) doc.automaticstyles.addElement(titlestyle) mytitle = chart.Title(x="385pt", y="27pt", stylename=titlestyle) mytitle.addElement( text.P(text=self.title)) mychart.addElement(mytitle) # Subtitle if self.subtitle: subtitlestyle = style.Style(name="subtitlestyle", family="chart") subtitlestyle.addElement( style.GraphicProperties(stroke="none", fill="none")) subtitlestyle.addElement( style.TextProperties(fontfamily="'Nimbus Sans L'", fontfamilygeneric="swiss", fontpitch="variable", fontsize="10pt")) doc.automaticstyles.addElement(subtitlestyle) subtitle = chart.Subtitle(x="0pt", y="123pt", stylename=subtitlestyle) subtitle.addElement( text.P(text= self.subtitle)) mychart.addElement(subtitle) # Legend legendstyle = style.Style(name="legendstyle", family="chart") legendstyle.addElement( style.GraphicProperties(fill="none")) legendstyle.addElement( style.TextProperties(fontfamily="'Nimbus Sans L'", fontfamilygeneric="swiss", fontpitch="variable", fontsize="6pt")) doc.automaticstyles.addElement(legendstyle) mylegend = chart.Legend(legendposition="end", legendalign="center", stylename=legendstyle) mychart.addElement(mylegend) # Plot area plotstyle = style.Style(name="plotstyle", family="chart") if self.subtype == "stacked": percentage="false"; stacked="true" elif self.subtype == "percentage": percentage="true"; stacked="false" else: percentage="false"; stacked="false" plotstyle.addElement( style.ChartProperties(seriessource="columns", percentage=percentage, stacked=stacked, threedimensional=self.threedimensional)) doc.automaticstyles.addElement(plotstyle) plotarea = chart.PlotArea(datasourcehaslabels=self.datasourcehaslabels, stylename=plotstyle) mychart.addElement(plotarea) # Style for the X,Y axes axisstyle = style.Style(name="axisstyle", family="chart") axisstyle.addElement( style.ChartProperties(displaylabel="true")) doc.automaticstyles.addElement(axisstyle) # Title for the X axis xaxis = chart.Axis(dimension="x", name="primary-x", stylename=axisstyle) plotarea.addElement(xaxis) xt = chart.Title() xaxis.addElement(xt) xt.addElement(text.P(text=self.x_axis)) # Title for the Y axis yaxis = chart.Axis(dimension="y", name="primary-y", stylename=axisstyle) plotarea.addElement(yaxis) yt = chart.Title() yaxis.addElement(yt) yt.addElement(text.P(text=self.y_axis)) # Data area datatable = DataTable( self.values ) datatable.datasourcehaslabels = self.datasourcehaslabels mychart.addElement(datatable()) if __name__ == "__main__": # Create the document doc = OpenDocumentChart() mychart = BarChart() mychart.title = "SPECTRE" mychart.subtitle = "SPecial Executive for Counter-intelligence, Terrorism, Revenge and Extortion" mychart.x_axis = u"Divisions" mychart.y_axis = u"€ (thousand)" # These represent the data. Six rows in three columns mychart.values = ( ('', 'Expense', 'Revenue'), ('Counterfeit', 1000, 1500), ('Murder', 1100, 1150), ('Prostitution', 3200, 2350), ('Blackmail', 1100, 1150), ('Larceny', 1000, 1750) ) mychart.datasourcehaslabels = "both" mychart(doc) doc.save("spectre-balance", True) odfpy-0.9.6/examples/datatable.py0000664000076400007640000000634012106145307016746 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from odf import table, text def valuetype(val): valuetype="string" if isinstance(val,str): valuetype="string" if isinstance(val,int): valuetype="float" if isinstance(val,float): valuetype="float" if isinstance(val,bool): valuetype="boolean" return valuetype class DataTable(object): def __init__(self, values=()): self.values = values self.datasourcehaslabels = "none" def _set_values(self, value): if isinstance(value, list) or isinstance(value, tuple): self.__dict__['values'] = value firstrow = value[0] if isinstance(firstrow, list) or isinstance(firstrow, tuple): self.numcols = len(firstrow) else: self.numcols = 1 else: raise ValueError, "Value must be list or tuple" def __setattr__(self, name, value): if name == 'values': self._set_values(value) else: self.__dict__[name] = value def __call__(self): datatable = table.Table(name="local-table") if self.datasourcehaslabels in ('row','both'): t = table.TableHeaderColumns() t.addElement(table.TableColumn()) datatable.addElement(t) t = table.TableColumns() if self.datasourcehaslabels in ('row','both'): t.addElement(table.TableColumn(numbercolumnsrepeated=str(self.numcols-1))) else: t.addElement(table.TableColumn(numbercolumnsrepeated=str(self.numcols))) datatable.addElement(t) if self.datasourcehaslabels in ('column','both'): t = table.TableHeaderRows() datatable.addElement(t) tr = table.TableRow() t.addElement(tr) content = self.values[0] for val in content: tc = table.TableCell(valuetype=valuetype(val)) tr.addElement(tc) tc.addElement(text.P(text=str(val))) t = table.TableRows() datatable.addElement(t) rownum = 0 for content in self.values: if rownum == 0 and self.datasourcehaslabels in ('column','both'): rownum += 1 continue tr = table.TableRow() t.addElement(tr) for val in content: tc = table.TableCell(valuetype=valuetype(val), value=val) tr.addElement(tc) tc.addElement(text.P(text=str(val))) rownum += 1 return datatable odfpy-0.9.6/examples/ods-currency.py0000664000076400007640000000705212106145307017443 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2009 Brad Ralph, Sydney, Australia # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # Søren Roug # This example shows how to do a conditional currency style. We want negative # numbers to show as red and as Australian dollars. from odf.opendocument import OpenDocumentSpreadsheet from odf.style import Style, TextProperties, TableColumnProperties, Map from odf.number import NumberStyle, CurrencyStyle, CurrencySymbol, Number, Text from odf.text import P from odf.table import Table, TableColumn, TableRow, TableCell textdoc = OpenDocumentSpreadsheet() # Create a style for the table content. One we can modify # later in the spreadsheet. tablecontents = Style(name="Large number", family="table-cell") tablecontents.addElement(TextProperties(fontfamily="Arial", fontsize="15pt")) textdoc.styles.addElement(tablecontents) # Create automatic styles for the column widths. widewidth = Style(name="co1", family="table-column") widewidth.addElement(TableColumnProperties(columnwidth="2.8cm", breakbefore="auto")) textdoc.automaticstyles.addElement(widewidth) # Create the styles for $AUD format currency values ns1 = CurrencyStyle(name="positive-AUD", volatile="true") ns1.addElement(CurrencySymbol(language="en", country="AU", text=u"$")) ns1.addElement(Number(decimalplaces="2", minintegerdigits="1", grouping="true")) textdoc.styles.addElement(ns1) # Create the main style. ns2 = CurrencyStyle(name="main-AUD") ns2.addElement(TextProperties(color="#ff0000")) ns2.addElement(Text(text=u"-")) ns2.addElement(CurrencySymbol(language="en", country="AU", text=u"$")) ns2.addElement(Number(decimalplaces="2", minintegerdigits="1", grouping="true")) ns2.addElement(Map(condition="value()>=0", applystylename="positive-AUD")) textdoc.styles.addElement(ns2) # Create automatic style for the price cells. moneycontents = Style(name="ce1", family="table-cell", parentstylename=tablecontents, datastylename="main-AUD") textdoc.automaticstyles.addElement(moneycontents) # Start the table, and describe the columns table = Table(name="Currency colours") # Create a column (same as in HTML) Make all cells in column default to currency table.addElement(TableColumn(stylename=widewidth, defaultcellstylename="ce1")) # Create a row (same as in HTML) tr = TableRow() table.addElement(tr) # Create a cell with a negative value. It should show as red. cell = TableCell(valuetype="currency", currency="AUD", value="-125") cell.addElement(P(text=u"$-125.00")) # The current displayed value tr.addElement(cell) # Create a row (same as in HTML) tr = TableRow() table.addElement(tr) # Create another cell but with a positive value. It should show in black cell = TableCell(valuetype="currency", currency="AUD", value="123") cell.addElement(P(text=u"$123.00")) # The current displayed value tr.addElement(cell) textdoc.spreadsheet.addElement(table) textdoc.save("currency.ods") odfpy-0.9.6/examples/europeanflag-as-odt.py0000664000076400007640000001020212106145307020652 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # # This is an example of an OpenDocument Text. # We are going to draw the European flag. I chose this motive because # it has just the right complexity for an example. It contains 12 five-edge # stars in a circle on a blue background. The production specifications can # be found at http://europa.eu/abc/symbols/emblem/index_en.htm # # The stars are drawn with a vector-oriented "turtle" the way Seymour Papert's # LOGO language does. import math from odf.opendocument import OpenDocumentText from odf.style import Style, GraphicProperties, DrawingPageProperties from odf.text import P from odf.draw import G, Polygon, Rect class VectorSet: """ A class to simulate LOGO's turtle. The turtle starts a 0,0 pointing right along the x-axis, since we use the mathematical coordinate system. """ orientation = 0 # Degrees x = 0.0 y = 0.0 polygon = [] def forward(self, length): orirad = math.radians(self.orientation) self.x = self.x + length * math.cos(orirad) self.y = self.y + length * math.sin(orirad) def right(self, turn): self.orientation = (self.orientation + turn) % 360 def left(self, turn): self.orientation = (self.orientation - turn) % 360 def mark(self): self.polygon.append((self.x,self.y)) def firstmark(self): self.polygon.append(self.polygon[0]) def getpoints(self): """ Return the polygon points """ strpairs = ["%.0f,%.0f" % item for item in self.polygon] return ' '.join(strpairs) def getviewbox(self): ''' The value of the viewBox attribute is a list of four numbers , , and ''' xvals = [ item[0] for item in self.polygon] maxx = int(reduce(max,xvals)) + 1 minx = int(reduce(min,xvals)) yvals = [ item[1] for item in self.polygon] maxy = int(reduce(max,yvals)) + 1 miny = int(reduce(min,yvals)) return minx, miny, maxx-minx, maxy-miny # Create the document doc = OpenDocumentText() # The blue background style of the flag backgroundstyle = Style(family="graphic", name="blueback") backgroundstyle.addElement(GraphicProperties(fill="solid", fillcolor="#003399", stroke="none")) doc.automaticstyles.addElement(backgroundstyle) # The style for the stars starstyle = Style(family="graphic", name="starstyle") starstyle.addElement(GraphicProperties(fill="solid", fillcolor="#ffcc00", stroke="none")) doc.automaticstyles.addElement(starstyle) # Create a paragraph to contain the drawing drawpage = P() doc.text.addElement(drawpage) group=G() drawpage.addElement(group) turtle = VectorSet() # Draw the edges turtle.mark() for edge in [ 0,1,2,3,5 ]: turtle.forward(100) turtle.mark() turtle.right(144) turtle.forward(100) turtle.mark() turtle.left(72) turtle.firstmark() # Draw a rectangle containing the blue background group.addElement(Rect(height="120mm", width="180mm", x="0mm", y="0mm", stylename=backgroundstyle)) viewbox = ' '.join(map(str,turtle.getviewbox())) points = turtle.getpoints() # Go around in a circle in twelve steps for deg in range(0,360,30): x = 83.3 + math.cos(math.radians(deg)) * 40 y = 53.3 + math.sin(math.radians(deg)) * 40 group.addElement(Polygon(points=points, stylename=starstyle, viewbox=viewbox, width="13.3mm", height="13.3mm", x="%0.2fmm" % x, y="%0.2fmm" % y)) # Save the work doc.save("europeanflag", True) odfpy-0.9.6/examples/easylists.py0000664000076400007640000000725012106145307017046 0ustar rougroug00000000000000# -*- coding: utf-8 -*- # # Show the easyliststyle.py module # Copyright (C) 2008 J. David Eisenberg # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # from odf import easyliststyle from odf.opendocument import OpenDocumentText from odf.style import Style, TextProperties from odf.text import P, List, ListItem """ This program shows the easyliststyle.py module. It creates a file named "easylist_odfpy.odt" with a bulleted list, a numbered list, and a mixed list. """ bulletListSpec = '*,>,#,%' mixedListSpec = u'1.!\u273f!a)' numberListSpecArray = ('I', '1:', 'a') itemList = ( "Cats", ">Domestic Shorthair", ">Domestic Longhair", ">Purebred", ">>Russian Blue", ">>Siamese", ">>>Seal Point", ">>>Flame Point", "Dogs", ">Retrievers", ">>Golden Retriever", ">>Labrador Retriever", ">Poodles", ">>Toy Poodle", ">>Standard Poodle" ) def createList(itemList, indentDelim, styleName): listArray = [] listItem = ListItem() level = 0 lastLevel = 0 for levCount in range(0,10): listArray.append(None) listArray[0] = List() for item in itemList: level = 0; while (level < len(item) and item[level] == indentDelim): level +=1 item = item[level:] if (level > lastLevel): # open the sub-levels for levCount in range(lastLevel+1, level+1): listArray[levCount] = List() elif (level < lastLevel): # close off the intervening lists for levCount in range(lastLevel, level, -1): listArray[levCount-1].childNodes[-1].addElement(listArray[levCount]) # now that we are at the proper level, add the item. listArray[level].setAttribute( 'stylename', styleName ); listItem = ListItem() para = P(text=item); listItem.addElement(para); listArray[level].addElement(listItem); lastLevel = level; # close off any remaining open lists for levCount in range(lastLevel, 0, -1): listArray[levCount-1].childNodes[-1].addElement(listArray[levCount]) return listArray[0] textdoc = OpenDocumentText() s = textdoc.styles listStyle = easyliststyle.styleFromString('bullet1', bulletListSpec, ',', '0.6cm', easyliststyle.SHOW_ONE_LEVEL) s.addElement(listStyle) listElement = createList(itemList, '>', 'bullet1') textdoc.text.addElement(listElement) para = P(text="-----------------------"); textdoc.text.addElement(para) listStyle = easyliststyle.styleFromList('num1', numberListSpecArray, '0.25in', easyliststyle.SHOW_ALL_LEVELS) s.addElement(listStyle) listElement = createList(itemList, '>', 'num1') textdoc.text.addElement(listElement) para = P(text="-----------------------"); textdoc.text.addElement(para) listStyle = easyliststyle.styleFromString('mix1', mixedListSpec, '!', '0.8cm', easyliststyle.SHOW_ONE_LEVEL) s.addElement(listStyle) listElement = createList(itemList, '>', 'mix1') textdoc.text.addElement(listElement) textdoc.save("easylist_odfpy.odt") # vim: set expandtab sw=4 : odfpy-0.9.6/examples/manualpagebreak.py0000664000076400007640000000257312106145307020150 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2012 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # # This example shows how to create a manual page break. from odf.opendocument import OpenDocumentText from odf.style import Style, TextProperties, ParagraphProperties from odf.text import P textdoc = OpenDocumentText() # Create a style for the paragraph with page-break withbreak = Style(name="WithBreak", parentstylename="Standard", family="paragraph") withbreak.addElement(ParagraphProperties(breakbefore="page")) textdoc.automaticstyles.addElement(withbreak) p = P(text=u'First paragraph') textdoc.text.addElement(p) p = P(stylename=withbreak,text=u'Second paragraph') textdoc.text.addElement(p) textdoc.save("pagebreak_odfpy.odt") odfpy-0.9.6/examples/photoalbum.py0000664000076400007640000001344712106145307017205 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2007-2009 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # import os,sys,getopt,struct from cStringIO import StringIO from odf.opendocument import OpenDocumentPresentation from odf.style import Style, MasterPage, PageLayout, PageLayoutProperties, \ TextProperties, GraphicProperties, ParagraphProperties, DrawingPageProperties from odf.text import P from odf.draw import Page, Frame, TextBox, Image def getImageInfo(data): size = len(data) height = -1 width = -1 w = -1 h = -1 content_type = '' # handle GIFs if (size >= 10) and data[:6] in ('GIF87a', 'GIF89a'): # Check to see if content_type is correct content_type = 'image/gif' w, h = struct.unpack("= 24) and (data[:8] == '\211PNG\r\n\032\n') and (data[12:16] == 'IHDR')): content_type = 'image/png' w, h = struct.unpack(">LL", data[16:24]) width = int(w) height = int(h) # Maybe this is for an older PNG version. elif (size >= 16) and (data[:8] == '\211PNG\r\n\032\n'): # Check to see if we have the right content type content_type = 'image/png' w, h = struct.unpack(">LL", data[8:16]) width = int(w) height = int(h) # handle JPEGs elif (size >= 2) and (data[:2] == '\377\330'): content_type = 'image/jpeg' jpeg = StringIO(data) jpeg.read(2) b = jpeg.read(1) try: while (b and ord(b) != 0xDA): while (ord(b) != 0xFF): b = jpeg.read(1) while (ord(b) == 0xFF): b = jpeg.read(1) if (ord(b) >= 0xC0 and ord(b) <= 0xC3): jpeg.read(3) h, w = struct.unpack(">HH", jpeg.read(4)) break else: jpeg.read(int(struct.unpack(">H", jpeg.read(2))[0])-2) b = jpeg.read(1) width = int(w) height = int(h) except: pass return content_type, width, height def usage(): sys.stderr.write("Usage: %s [-o outputfile] [input-directory]\n" % sys.argv[0]) if __name__ == "__main__": try: opts, args = getopt.getopt(sys.argv[1:], "o:", ["output="]) except getopt.GetoptError: usage() sys.exit(2) outputfile = "photoalbum.odp" for o, a in opts: if o in ("-o", "--output"): outputfile = a if outputfile[-4:] != ".odp": outputfile += ".odp" doc = OpenDocumentPresentation() # We must describe the dimensions of the page pagelayout = PageLayout(name="MyLayout") doc.automaticstyles.addElement(pagelayout) pagelayout.addElement(PageLayoutProperties(margin="0pt", pagewidth="800pt", pageheight="600pt", printorientation="landscape")) # Style for the title frame of the page # We set a centered 34pt font with yellowish background titlestyle = Style(name="MyMaster-title", family="presentation") titlestyle.addElement(ParagraphProperties(textalign="center")) titlestyle.addElement(TextProperties(fontsize="34pt")) titlestyle.addElement(GraphicProperties(fillcolor="#ffff99")) doc.styles.addElement(titlestyle) # Style for the photo frame photostyle = Style(name="MyMaster-photo", family="presentation") doc.styles.addElement(photostyle) # Create automatic transition dpstyle = Style(name="dp1", family="drawing-page") dpstyle.addElement(DrawingPageProperties(transitiontype="automatic", transitionstyle="move-from-top", duration="PT5S")) doc.automaticstyles.addElement(dpstyle) # Every drawing page must have a master page assigned to it. masterpage = MasterPage(name="MyMaster", pagelayoutname=pagelayout) doc.masterstyles.addElement(masterpage) if len(args) == 0: pict_dir = "." else: pict_dir = args[0] # Slides for picture in os.listdir(pict_dir): try: pictdata = open(pict_dir + "/" + picture).read() except: continue ct,w,h = getImageInfo(pictdata) # Get dimensions in pixels if ct != 'image/jpeg': continue if w > 720: h = float(h) * 720.0 / float(w) w = 720.0 if h > 540.0: w = float(w) * 540.0 / float(h) h = 540.0 page = Page(stylename=dpstyle, masterpagename=masterpage) doc.presentation.addElement(page) titleframe = Frame(stylename=titlestyle, width="720pt", height="56pt", x="40pt", y="10pt") page.addElement(titleframe) textbox = TextBox() titleframe.addElement(textbox) textbox.addElement(P(text=picture)) offsetx = 400.0 - w/2.0 photoframe = Frame(stylename=photostyle, width="%fpt" % w, height="%fpt" % h, x="%fpt" % offsetx, y="56pt") page.addElement(photoframe) href = doc.addPicture(pict_dir + "/" + picture) photoframe.addElement(Image(href=href)) doc.save(outputfile) odfpy-0.9.6/examples/helloworld.py0000664000076400007640000000175412106145307017204 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # Raymond Yee # # Probably the shortest script possible from odf.opendocument import OpenDocumentText from odf.text import P textdoc = OpenDocumentText() p = P(text="Hello World!") textdoc.text.addElement(p) textdoc.save("helloworld", True) odfpy-0.9.6/examples/subobject.py0000664000076400007640000001600112106145307017000 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # # This is an example of an OpenDocument Text with an embedded Chart. # from odf.opendocument import OpenDocumentChart, OpenDocumentText from odf import chart, style, table, text, draw # import a support class from the examples directory from datatable import DataTable class BarChart(object): def __init__(self): self.charttype = 'chart:bar' self.subtype = 'normal' # 'percentage', 'stacked' or 'normal' self.threedimensional = "false" self.x_axis = "X" self.y_axis = "Y" self.values = (1,2,3) self.title = None self.subtitle = None def __call__(self, doc): chartstyle = style.Style(name="chartstyle", family="chart") chartstyle.addElement( style.GraphicProperties(stroke="none", fillcolor="#ffffff")) doc.automaticstyles.addElement(chartstyle) mychart = chart.Chart(width="476pt", height="404pt",stylename=chartstyle, attributes={'class':self.charttype}) doc.chart.addElement(mychart) # Title if self.title: titlestyle = style.Style(name="titlestyle", family="chart") titlestyle.addElement( style.GraphicProperties(stroke="none", fill="none")) titlestyle.addElement( style.TextProperties(fontfamily="'Nimbus Sans L'", fontfamilygeneric="swiss", fontpitch="variable", fontsize="13pt")) doc.automaticstyles.addElement(titlestyle) mytitle = chart.Title(x="185pt", y="27pt", stylename=titlestyle) mytitle.addElement( text.P(text=self.title)) mychart.addElement(mytitle) # Subtitle if self.subtitle: subtitlestyle = style.Style(name="subtitlestyle", family="chart") subtitlestyle.addElement( style.GraphicProperties(stroke="none", fill="none")) subtitlestyle.addElement( style.TextProperties(fontfamily="'Nimbus Sans L'", fontfamilygeneric="swiss", fontpitch="variable", fontsize="10pt")) doc.automaticstyles.addElement(subtitlestyle) subtitle = chart.Subtitle(x="50pt", y="50pt", stylename=subtitlestyle) subtitle.addElement( text.P(text= self.subtitle)) mychart.addElement(subtitle) # Legend legendstyle = style.Style(name="legendstyle", family="chart") legendstyle.addElement( style.GraphicProperties(fill="none")) legendstyle.addElement( style.TextProperties(fontfamily="'Nimbus Sans L'", fontfamilygeneric="swiss", fontpitch="variable", fontsize="8pt")) doc.automaticstyles.addElement(legendstyle) mylegend = chart.Legend(legendposition="end", legendalign="center", stylename=legendstyle) mychart.addElement(mylegend) # Plot area plotstyle = style.Style(name="plotstyle", family="chart") if self.subtype == "stacked": percentage="false"; stacked="true" elif self.subtype == "percentage": percentage="true"; stacked="false" else: percentage="false"; stacked="false" plotstyle.addElement( style.ChartProperties(seriessource="columns", percentage=percentage, stacked=stacked, threedimensional=self.threedimensional)) doc.automaticstyles.addElement(plotstyle) plotarea = chart.PlotArea(datasourcehaslabels=self.datasourcehaslabels, stylename=plotstyle) mychart.addElement(plotarea) # Style for the X,Y axes axisstyle = style.Style(name="axisstyle", family="chart") axisstyle.addElement( style.ChartProperties(displaylabel="true")) axisstyle.addElement( style.TextProperties(fontfamily="'Nimbus Sans L'", fontfamilygeneric="swiss", fontpitch="variable", fontsize="8pt")) doc.automaticstyles.addElement(axisstyle) # Title for the X axis xaxis = chart.Axis(dimension="x", name="primary-x", stylename=axisstyle) plotarea.addElement(xaxis) xt = chart.Title() xaxis.addElement(xt) xt.addElement(text.P(text=self.x_axis)) # Title for the Y axis yaxis = chart.Axis(dimension="y", name="primary-y", stylename=axisstyle) plotarea.addElement(yaxis) yt = chart.Title() yaxis.addElement(yt) yt.addElement(text.P(text=self.y_axis)) # Set up the data series. OOo doesn't show correctly without them. s = chart.Series(valuescellrangeaddress="local-table.B2:.B6", labelcelladdress="local-table.B1") s.addElement(chart.DataPoint(repeated=5)) plotarea.addElement(s) s = chart.Series(valuescellrangeaddress="local-table.C2:.C6", labelcelladdress="local-table.C1") s.addElement(chart.DataPoint(repeated=5)) plotarea.addElement(s) # The data are placed in a table inside the chart object - but could also be a # table in the main document datatable = DataTable(self.values) datatable.datasourcehaslabels = self.datasourcehaslabels mychart.addElement(datatable()) if __name__ == "__main__": # Create the subdocument chartdoc = OpenDocumentChart() mychart = BarChart() mychart.title = "SPECTRE" mychart.subtitle = "SPecial Executive for Counter-intelligence, Terrorism, Revenge and Extortion" mychart.x_axis = "Divisions" mychart.y_axis = u"€ (thousand)" # These represent the data. Six rows in three columns mychart.values = ( ('','Expense','Revenue'), ('Counterfeit',1000,1500), ('Murder',1100,1150), ('Prostitution',3200,2350), ('Blackmail',1100,1150), ('Larceny',1000,1750) ) mychart.datasourcehaslabels = "both" mychart(chartdoc) # Create the containg document textdoc = OpenDocumentText() # Create a paragraph to contain the frame. You can put the frame directly # as a child og textdoc.text, but both Kword and OOo has problems wiht # this approach. p = text.P() textdoc.text.addElement(p) # Create the frame. df = draw.Frame(width="476pt", height="404pt", anchortype="paragraph") p.addElement(df) # Here we add the subdocument to the main document. We get back a reference # to use in the href. objectloc = textdoc.addObject(chartdoc) do = draw.Object(href=objectloc) # Put the object inside the frame df.addElement(do) textdoc.save("spectre-balance", True) odfpy-0.9.6/examples/passwd-as-ods.py0000664000076400007640000000461212106145307017512 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from odf.opendocument import OpenDocumentSpreadsheet from odf.style import Style, TextProperties, ParagraphProperties, TableColumnProperties from odf.text import P from odf.table import Table, TableColumn, TableRow, TableCell PWENC = "utf-8" textdoc = OpenDocumentSpreadsheet() # Create a style for the table content. One we can modify # later in the word processor. tablecontents = Style(name="Table Contents", family="paragraph") tablecontents.addElement(ParagraphProperties(numberlines="false", linenumber="0")) tablecontents.addElement(TextProperties(fontweight="bold")) textdoc.styles.addElement(tablecontents) # Create automatic styles for the column widths. # We want two different widths, one in inches, the other one in metric. # ODF Standard section 15.9.1 widthshort = Style(name="Wshort", family="table-column") widthshort.addElement(TableColumnProperties(columnwidth="1.7cm")) textdoc.automaticstyles.addElement(widthshort) widthwide = Style(name="Wwide", family="table-column") widthwide.addElement(TableColumnProperties(columnwidth="1.5in")) textdoc.automaticstyles.addElement(widthwide) # Start the table, and describe the columns table = Table(name="Password") table.addElement(TableColumn(numbercolumnsrepeated=4,stylename=widthshort)) table.addElement(TableColumn(numbercolumnsrepeated=3,stylename=widthwide)) f = open('/etc/passwd') for line in f: rec = line.strip().split(":") tr = TableRow() table.addElement(tr) for val in rec: tc = TableCell() tr.addElement(tc) p = P(stylename=tablecontents,text=unicode(val,PWENC)) tc.addElement(p) textdoc.spreadsheet.addElement(table) textdoc.save("passwd.ods") odfpy-0.9.6/examples/europeanflag-as-odg.py0000664000076400007640000001146012106145307020644 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # # This is an example of an OpenDocument Drawing. # We are going to draw the European flag. I chose this motive because # it has just the right complexity for an example. It contains 12 five-edge # stars in a circle on a blue background. The production specifications can # be found at http://europa.eu/abc/symbols/emblem/index_en.htm # # The stars are drawn with a vector-oriented "turtle" the way Seymour Papert's # LOGO language does. import math from odf.opendocument import OpenDocumentDrawing from odf.style import Style, MasterPage, PageLayout, PageLayoutProperties, \ GraphicProperties, DrawingPageProperties from odf.draw import Page, G, Polygon, Rect class VectorSet: """ A class to simulate LOGO's turtle. The turtle starts a 0,0 pointing right along the x-axis, since we use the mathematical coordinate system. """ orientation = 0 # Degrees x = 0.0 y = 0.0 polygon = [] def forward(self, length): orirad = math.radians(self.orientation) self.x = self.x + length * math.cos(orirad) self.y = self.y + length * math.sin(orirad) def right(self, turn): self.orientation = (self.orientation + turn) % 360 def left(self, turn): self.orientation = (self.orientation - turn) % 360 def mark(self): self.polygon.append((self.x,self.y)) def firstmark(self): self.polygon.append(self.polygon[0]) def getpoints(self): """ Return the polygon points """ strpairs = ["%.0f,%.0f" % item for item in self.polygon] return ' '.join(strpairs) def getviewbox(self): ''' The value of the viewBox attribute is a list of four numbers , , and ''' xvals = [ item[0] for item in self.polygon] maxx = int(reduce(max,xvals)) + 1 minx = int(reduce(min,xvals)) yvals = [ item[1] for item in self.polygon] maxy = int(reduce(max,yvals)) + 1 miny = int(reduce(min,yvals)) return minx, miny, maxx-minx, maxy-miny # Create the document doc = OpenDocumentDrawing() # Create the drawing page dpstyle = Style(family="drawing-page",name="DP1") dpstyle.addElement(DrawingPageProperties(backgroundsize="border", fill="none")) doc.automaticstyles.addElement(dpstyle) # The blue background style of the flag backgroundstyle = Style(family="graphic", name="blueback") backgroundstyle.addElement(GraphicProperties(fill="solid", fillcolor="#003399", stroke="none")) doc.automaticstyles.addElement(backgroundstyle) # The style for the stars starstyle = Style(family="graphic", name="starstyle") starstyle.addElement(GraphicProperties(fill="solid", fillcolor="#ffcc00", stroke="none")) doc.automaticstyles.addElement(starstyle) # Create page layout specifying dimensions plstyle = PageLayout(name="PM1") plstyle.addElement(PageLayoutProperties(margin="0cm", pageheight="120mm", pagewidth="180mm", printorientation="portrait")) doc.automaticstyles.addElement(plstyle) # Create a master page masterpage = MasterPage(stylename=dpstyle, name="Default", pagelayoutname=plstyle) doc.masterstyles.addElement(masterpage) # Create a page to contain the drawing drawpage = Page(masterpagename=masterpage, name="page1", stylename=dpstyle) doc.drawing.addElement(drawpage) group=G() drawpage.addElement(group) turtle = VectorSet() # Draw the edges turtle.mark() for edge in [ 0,1,2,3,5 ]: turtle.forward(100) turtle.mark() turtle.right(144) turtle.forward(100) turtle.mark() turtle.left(72) turtle.firstmark() # Draw a rectangle containing the blue background group.addElement(Rect(height="120mm", width="180mm", x="0mm", y="0mm", stylename=backgroundstyle)) viewbox = ' '.join(map(str,turtle.getviewbox())) points = turtle.getpoints() # Go around in a circle in twelve steps for deg in range(0,360,30): x = 83.3 + math.cos(math.radians(deg)) * 40 y = 53.3 + math.sin(math.radians(deg)) * 40 group.addElement(Polygon(points=points, stylename=starstyle, viewbox=viewbox, width="13.3mm", height="13.3mm", x="%0.2fmm" % x, y="%0.2fmm" % y)) # Save the work doc.save("europeanflag", True) odfpy-0.9.6/examples/list-as-odt.py0000664000076400007640000000361112106145307017163 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from odf.opendocument import OpenDocumentText from odf.style import Style, TextProperties, ParagraphProperties, ListLevelProperties from odf.text import P, List, ListItem, ListStyle, ListLevelStyleBullet textdoc = OpenDocumentText() symbolstyle = Style(name="Numbering Symbols", family="text") textdoc.styles.addElement(symbolstyle) liststyle = Style(name="List Content", family="paragraph") liststyle.addElement(ParagraphProperties(numberlines="false", linenumber="0")) textdoc.automaticstyles.addElement(liststyle) listhier = ListStyle(name="MyList") level = 1 for bullet in [u"–", u"•", u"–",u"•", u"✗", u"✗", u"✗", u"✗", u"✗", u"✗"]: b = ListLevelStyleBullet(level=str(level), stylename=symbolstyle, bulletchar=bullet) listhier.addElement(b) b.addElement(ListLevelProperties(minlabelwidth="%dcm" % level)) b.addElement(TextProperties(fontname="StarSymbol")) level = level + 1 textdoc.styles.addElement(listhier) l = List(stylename=listhier) textdoc.text.addElement(l) for x in [1,2,3,4]: elem = ListItem() elem.addElement(P(text="Listitem %d" % x)) l.addElement(elem) textdoc.save("list-example.odt") odfpy-0.9.6/examples/loadsave.py0000664000076400007640000000177512106145307016632 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2009 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # # # This script simply loads a document into memory and saves it again. # It takes the filename as argument import sys from odf.opendocument import load infile = sys.argv[1] doc = load(infile) outfile = infile[:-4] + "-bak" + infile[-4:] doc.save(outfile) odfpy-0.9.6/examples/ods2odt.py0000664000076400007640000000451212106145307016402 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2008 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # # This script converts a spreadsheet to a text file. I.e. it copies # the sheets and turns them into tables in the textfile # Note: Copy of images does not work? # import sys, getopt from odf.opendocument import OpenDocumentText, load from odf.table import Table from odf.text import P def usage(): sys.stderr.write("Usage: %s [-o outputfile] inputfile\n" % sys.argv[0]) if __name__ == "__main__": try: opts, args = getopt.getopt(sys.argv[1:], "o:", ["output="]) except getopt.GetoptError: usage() sys.exit(2) outputfile = None for o, a in opts: if o in ("-o", "--output"): outputfile = a if len(args) != 1: usage() sys.exit(2) inputfile = args[0] if outputfile is None: outputfile = inputfile[:inputfile.rfind('.')] + ".odt" spreadsheetdoc = load(inputfile) textdoc = OpenDocumentText() # Need to make a copy of the list because addElement unlinks from the original for meta in spreadsheetdoc.meta.childNodes[:]: textdoc.meta.addElement(meta) for font in spreadsheetdoc.fontfacedecls.childNodes[:]: textdoc.fontfacedecls.addElement(font) for style in spreadsheetdoc.styles.childNodes[:]: textdoc.styles.addElement(style) for autostyle in spreadsheetdoc.automaticstyles.childNodes[:]: textdoc.automaticstyles.addElement(autostyle) for sheet in spreadsheetdoc.getElementsByType(Table): textdoc.text.addElement(sheet) textdoc.text.addElement(P()) textdoc.Pictures = spreadsheetdoc.Pictures textdoc.save(outputfile) odfpy-0.9.6/examples/text-with-masterpage.py0000664000076400007640000000224312106145307021106 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from odf.opendocument import OpenDocumentText from odf.style import PageLayout, MasterPage, Header, Footer from odf.text import P textdoc = OpenDocumentText() pl = PageLayout(name="pagelayout") textdoc.automaticstyles.addElement(pl) mp = MasterPage(name="Standard", pagelayoutname=pl) textdoc.masterstyles.addElement(mp) h = Header() hp = P(text="header try") h.addElement(hp) mp.addElement(h) textdoc.save("headers.odt") odfpy-0.9.6/odfoutline/0000775000076400007640000000000012106145466015010 5ustar rougroug00000000000000odfpy-0.9.6/odfoutline/odfoutline.10000664000076400007640000001070512106145307017237 0ustar rougroug00000000000000.\" Title: odfoutline .\" Author: S\(/oren Roug .\" Generator: DocBook XSL Stylesheets v1.74.0 .\" Date: 03/15/2009 .\" Manual: User commands .\" Source: odfpy .\" Language: English .\" .TH "ODFOUTLINE" "1" "03/15/2009" "odfpy" "User commands" .\" ----------------------------------------------------------------- .\" * (re)Define some macros .\" ----------------------------------------------------------------- .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" toupper - uppercase a string (locale-aware) .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de toupper .tr aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ \\$* .tr aabbccddeeffgghhiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" SH-xref - format a cross-reference to an SH section .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de SH-xref .ie n \{\ .\} .toupper \\$* .el \{\ \\$* .\} .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" SH - level-one heading that works better for non-TTY output .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de1 SH .\" put an extra blank line of space above the head in non-TTY output .if t \{\ .sp 1 .\} .sp \\n[PD]u .nr an-level 1 .set-an-margin .nr an-prevailing-indent \\n[IN] .fi .in \\n[an-margin]u .ti 0 .HTML-TAG ".NH \\n[an-level]" .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 \." make the size of the head bigger .ps +3 .ft B .ne (2v + 1u) .ie n \{\ .\" if n (TTY output), use uppercase .toupper \\$* .\} .el \{\ .nr an-break-flag 0 .\" if not n (not TTY), use normal case (not uppercase) \\$1 .in \\n[an-margin]u .ti 0 .\" if not n (not TTY), put a border/line under subheading .sp -.6 \l'\n(.lu' .\} .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" SS - level-two heading that works better for non-TTY output .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de1 SS .sp \\n[PD]u .nr an-level 1 .set-an-margin .nr an-prevailing-indent \\n[IN] .fi .in \\n[IN]u .ti \\n[SN]u .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .ps \\n[PS-SS]u \." make the size of the head bigger .ps +2 .ft B .ne (2v + 1u) .if \\n[.$] \&\\$* .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" BB/BE - put background/screen (filled box) around block of text .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de BB .if t \{\ .sp -.5 .br .in +2n .ll -2n .gcolor red .di BX .\} .. .de EB .if t \{\ .if "\\$2"adjust-for-leading-newline" \{\ .sp -1 .\} .br .di .in .ll .gcolor .nr BW \\n(.lu-\\n(.i .nr BH \\n(dn+.5v .ne \\n(BHu+.5v .ie "\\$2"adjust-for-leading-newline" \{\ \M[\\$1]\h'1n'\v'+.5v'\D'P \\n(BWu 0 0 \\n(BHu -\\n(BWu 0 0 -\\n(BHu'\M[] .\} .el \{\ \M[\\$1]\h'1n'\v'-.5v'\D'P \\n(BWu 0 0 \\n(BHu -\\n(BWu 0 0 -\\n(BHu'\M[] .\} .in 0 .sp -.5v .nf .BX .in .sp .5v .fi .\} .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" BM/EM - put colored marker in margin next to block of text .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de BM .if t \{\ .br .ll -2n .gcolor red .di BX .\} .. .de EM .if t \{\ .br .di .ll .gcolor .nr BH \\n(dn .ne \\n(BHu \M[\\$1]\D'P -.75n 0 0 \\n(BHu -(\\n[.i]u - \\n(INu - .75n) 0 0 -\\n(BHu'\M[] .in 0 .nf .BX .in .fi .\} .. .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .\" ----------------------------------------------------------------- .\" * MAIN CONTENT STARTS HERE * .\" ----------------------------------------------------------------- .SH "Name" odfoutline \- Show outline of OpenDocument .SH "Synopsis" .fam C .HP \w'\fBodfoutline\fR\ 'u \fBodfoutline\fR \fIpath\fR .fam .SH "Description" .PP odfoutline is a simple program that will show the headings in the file and the level the heading is\&. .PP \(lqPath\(rq is assumed to be an OpenDocument file of text, spreadsheet or presentation type\&. .SH "Example" .sp .if n \{\ .RS 4 .\} .fam C .ps -1 .nf .if t \{\ .sp -1 .\} .BB lightgray adjust-for-leading-newline .sp -1 odfoutline odf\-file .EB lightgray adjust-for-leading-newline .if t \{\ .sp 1 .\} .fi .fam .ps +1 .if n \{\ .RE .\} .SH "Author" .PP \fBS\(/oren Roug\fR .RS 4 Original author .RE odfpy-0.9.6/odfoutline/Makefile0000664000076400007640000000023012106145307016435 0ustar rougroug00000000000000all: odf odfoutline.1 txt: odfoutline.txt %.1: %.docbook xmlto man $< %.txt: %.docbook xmlto txt $< clean: rm -f *.txt odf odf: ln -s ../odf odfpy-0.9.6/odfoutline/odfoutline0000775000076400007640000001032512106145307017101 0ustar rougroug00000000000000#!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2006 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # import zipfile from xml.sax import make_parser,handler from xml.sax.xmlreader import InputSource import xml.sax.saxutils import sys from odf.namespaces import TEXTNS, TABLENS, DRAWNS try: from cStringIO import StringIO except ImportError: from StringIO import StringIO def getxmlpart(odffile, xmlfile): """ Get the content out of the ODT file""" z = zipfile.ZipFile(odffile) content = z.read(xmlfile) z.close() return content # # Extract headings from content.xml # class ODTHeadingHandler(handler.ContentHandler): """ Extract headings from content.xml of an ODT file """ def __init__(self, eater): self.r = eater self.data = [] self.level = 0 def characters(self, data): self.data.append(data) def startElementNS(self, tag, qname, attrs): if tag == (TEXTNS, 'h'): self.level = 0 for (att,value) in attrs.items(): if att == (TEXTNS, 'outline-level'): self.level = int(value) self.data = [] def endElementNS(self, tag, qname): if tag == (TEXTNS, 'h'): str = ''.join(self.data) self.data = [] self.r.append("%d%*s%s" % (self.level, self.level, '', str)) class ODTSheetHandler(handler.ContentHandler): """ Extract sheet names from content.xml of an ODS file """ def __init__(self, eater): self.r = eater def startElementNS(self, tag, qname, attrs): if tag == (TABLENS, 'table'): sheetname = attrs.get((TABLENS, 'name')) if sheetname: self.r.append(sheetname) class ODTSlideHandler(handler.ContentHandler): """ Extract headings from content.xml of an ODT file """ def __init__(self, eater): self.r = eater self.data = [] self.pagenum = 0 def characters(self, data): self.data.append(data) def startElementNS(self, tag, qname, attrs): if tag == (DRAWNS, 'page'): self.pagenum = self.pagenum + 1 self.r.append("SLIDE %d: %s" % ( self.pagenum, attrs.get((DRAWNS, 'name'),''))) if tag == (TEXTNS, 'p'): self.data = [] def endElementNS(self, tag, qname): if tag == (TEXTNS, 'p'): str = ''.join(self.data) self.data = [] if len(str) > 0: self.r.append(" " + str) def odtheadings(odtfile): mimetype = getxmlpart(odtfile,'mimetype') content = getxmlpart(odtfile,'content.xml') lines = [] parser = make_parser() parser.setFeature(handler.feature_namespaces, 1) if mimetype in ('application/vnd.oasis.opendocument.text', 'application/vnd.oasis.opendocument.text-template'): parser.setContentHandler(ODTHeadingHandler(lines)) elif mimetype in ('application/vnd.oasis.opendocument.spreadsheet', 'application/vnd.oasis.opendocument.spreadsheet-template'): parser.setContentHandler(ODTSheetHandler(lines)) elif mimetype in ('application/vnd.oasis.opendocument.presentation' 'application/vnd.oasis.opendocument.presentation-template'): parser.setContentHandler(ODTSlideHandler(lines)) else: print "Unsupported fileformat" sys.exit(2) parser.setErrorHandler(handler.ErrorHandler()) inpsrc = InputSource() inpsrc.setByteStream(StringIO(content)) parser.parse(inpsrc) return lines if __name__ == "__main__": filler = " " for heading in odtheadings(sys.argv[1]): print heading odfpy-0.9.6/odfoutline/odfoutline.docbook0000664000076400007640000000223012106145307020511 0ustar rougroug00000000000000 odfpy SørenRoug Original author odfoutline 1 User commands odfoutline Show outline of OpenDocument odfoutline path Description odfoutline is a simple program that will show the headings in the file and the level the heading is. Path is assumed to be an OpenDocument file of text, spreadsheet or presentation type. Example odfoutline odf-file odfpy-0.9.6/odf2mht/0000775000076400007640000000000012106145466014203 5ustar rougroug00000000000000odfpy-0.9.6/odf2mht/Makefile0000664000076400007640000000022212106145305015627 0ustar rougroug00000000000000all: odf odf2mht.1 txt: odf2mht.txt %.1: %.docbook xmlto man $< %.txt: %.docbook xmlto txt $< clean: rm -f *.txt odf odf: ln -s ../odf odfpy-0.9.6/odf2mht/odf2mht0000775000076400007640000000417012106145305015466 0ustar rougroug00000000000000#!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2006 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from odf.odf2xhtml import ODF2XHTML import zipfile import sys #from time import gmtime, strftime from email.MIMEMultipart import MIMEMultipart from email.MIMENonMultipart import MIMENonMultipart from email.MIMEText import MIMEText from email import Encoders if len(sys.argv) != 2: sys.stderr.write("Usage: %s inputfile\n" % sys.argv[0]) sys.exit(1) suffices = { 'wmf':('image','x-wmf'), 'png':('image','png'), 'gif':('image','gif'), 'jpg':('image','jpeg'), 'jpeg':('image','jpeg') } msg = MIMEMultipart('related',type="text/html") # msg['Subject'] = 'Subject here' # msg['From'] = '' # msg['Date'] = strftime("%a, %d %b %Y %H:%M:%S +0000", gmtime()) msg.preamble = 'This is a multi-part message in MIME format.' msg.epilogue = '' odhandler = ODF2XHTML() result = odhandler.odf2xhtml(sys.argv[1]).encode('us-ascii','xmlcharrefreplace') htmlpart = MIMEText(result,'html','us-ascii') htmlpart['Content-Location'] = 'index.html' msg.attach(htmlpart) z = zipfile.ZipFile(sys.argv[1]) for file in z.namelist(): if file[0:9] == 'Pictures/': suffix = file[file.rfind(".")+1:] main,sub = suffices.get(suffix,('application','octet-stream')) img = MIMENonMultipart(main,sub) img.set_payload(z.read(file)) img['Content-Location'] = "" + file Encoders.encode_base64(img) msg.attach(img) z.close() print msg.as_string() odfpy-0.9.6/odf2mht/odf2mht.docbook0000664000076400007640000000373012106145305017103 0ustar rougroug00000000000000 odfpy SørenRoug Original author odf2mht 1 User commands odf2mht Convert ODF to HTML archive odf2mht path Description Odf2mht is a program that will create a MIME-encapsulated web archive (.mht) format where images are preserved. The file can be read by Internet Explorer, MS-Word and many email programs such as MS-Outlook. It will write the web archive to stdout. Path is assumed to be an OpenDocument file of text, spreadsheet or presentation type. References HTTRACK (http://www.httrack.com/) can create such archives with the -%M option. http://en.wikipedia.org/wiki/MHTML http://www.dsv.su.se/~jpalme/ietf/mhtml.html http://users.otenet.gr/~geosp/kmhtconvert/ http://www.faqs.org/rfcs/rfc2557.html Example odf2mht example.odt >example.mht Bugs IE6 seems to have problems with large MHT files. See Also odftools(1), odf2war(1), mailodf(1) odfpy-0.9.6/odf2mht/odf2mht.10000664000076400007640000001210412106145305015616 0ustar rougroug00000000000000.\" Title: odf2mht .\" Author: S\(/oren Roug .\" Generator: DocBook XSL Stylesheets v1.74.0 .\" Date: 03/15/2009 .\" Manual: User commands .\" Source: odfpy .\" Language: English .\" .TH "ODF2MHT" "1" "03/15/2009" "odfpy" "User commands" .\" ----------------------------------------------------------------- .\" * (re)Define some macros .\" ----------------------------------------------------------------- .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" toupper - uppercase a string (locale-aware) .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de toupper .tr aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ \\$* .tr aabbccddeeffgghhiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" SH-xref - format a cross-reference to an SH section .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de SH-xref .ie n \{\ .\} .toupper \\$* .el \{\ \\$* .\} .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" SH - level-one heading that works better for non-TTY output .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de1 SH .\" put an extra blank line of space above the head in non-TTY output .if t \{\ .sp 1 .\} .sp \\n[PD]u .nr an-level 1 .set-an-margin .nr an-prevailing-indent \\n[IN] .fi .in \\n[an-margin]u .ti 0 .HTML-TAG ".NH \\n[an-level]" .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 \." make the size of the head bigger .ps +3 .ft B .ne (2v + 1u) .ie n \{\ .\" if n (TTY output), use uppercase .toupper \\$* .\} .el \{\ .nr an-break-flag 0 .\" if not n (not TTY), use normal case (not uppercase) \\$1 .in \\n[an-margin]u .ti 0 .\" if not n (not TTY), put a border/line under subheading .sp -.6 \l'\n(.lu' .\} .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" SS - level-two heading that works better for non-TTY output .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de1 SS .sp \\n[PD]u .nr an-level 1 .set-an-margin .nr an-prevailing-indent \\n[IN] .fi .in \\n[IN]u .ti \\n[SN]u .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .ps \\n[PS-SS]u \." make the size of the head bigger .ps +2 .ft B .ne (2v + 1u) .if \\n[.$] \&\\$* .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" BB/BE - put background/screen (filled box) around block of text .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de BB .if t \{\ .sp -.5 .br .in +2n .ll -2n .gcolor red .di BX .\} .. .de EB .if t \{\ .if "\\$2"adjust-for-leading-newline" \{\ .sp -1 .\} .br .di .in .ll .gcolor .nr BW \\n(.lu-\\n(.i .nr BH \\n(dn+.5v .ne \\n(BHu+.5v .ie "\\$2"adjust-for-leading-newline" \{\ \M[\\$1]\h'1n'\v'+.5v'\D'P \\n(BWu 0 0 \\n(BHu -\\n(BWu 0 0 -\\n(BHu'\M[] .\} .el \{\ \M[\\$1]\h'1n'\v'-.5v'\D'P \\n(BWu 0 0 \\n(BHu -\\n(BWu 0 0 -\\n(BHu'\M[] .\} .in 0 .sp -.5v .nf .BX .in .sp .5v .fi .\} .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" BM/EM - put colored marker in margin next to block of text .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de BM .if t \{\ .br .ll -2n .gcolor red .di BX .\} .. .de EM .if t \{\ .br .di .ll .gcolor .nr BH \\n(dn .ne \\n(BHu \M[\\$1]\D'P -.75n 0 0 \\n(BHu -(\\n[.i]u - \\n(INu - .75n) 0 0 -\\n(BHu'\M[] .in 0 .nf .BX .in .fi .\} .. .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .\" ----------------------------------------------------------------- .\" * MAIN CONTENT STARTS HERE * .\" ----------------------------------------------------------------- .SH "Name" odf2mht \- Convert ODF to HTML archive .SH "Synopsis" .fam C .HP \w'\fBodf2mht\fR\ 'u \fBodf2mht\fR \fIpath\fR .fam .SH "Description" .PP \fBOdf2mht\fR is a program that will create a MIME\-encapsulated web archive (\&.mht) format where images are preserved\&. The file can be read by Internet Explorer, MS\-Word and many email programs such as MS\-Outlook\&. It will write the web archive to stdout\&. .PP \(lqPath\(rq is assumed to be an OpenDocument file of text, spreadsheet or presentation type\&. .SH "References" .RS 4 HTTRACK (http://www\&.httrack\&.com/) can create such archives with the \-%M option\&. .RE .RS 4 http://en\&.wikipedia\&.org/wiki/MHTML .RE .RS 4 http://www\&.dsv\&.su\&.se/~jpalme/ietf/mhtml\&.html .RE .RS 4 http://users\&.otenet\&.gr/~geosp/kmhtconvert/ .RE .RS 4 http://www\&.faqs\&.org/rfcs/rfc2557\&.html .RE .SH "Example" .sp .if n \{\ .RS 4 .\} .fam C .ps -1 .nf .if t \{\ .sp -1 .\} .BB lightgray adjust-for-leading-newline .sp -1 odf2mht example\&.odt >example\&.mht .EB lightgray adjust-for-leading-newline .if t \{\ .sp 1 .\} .fi .fam .ps +1 .if n \{\ .RE .\} .SH "Bugs" .PP IE6 seems to have problems with large MHT files\&. .SH "See Also" .PP \fBodftools\fR(1), \fBodf2war\fR(1), \fBmailodf\fR(1) .SH "Author" .PP \fBS\(/oren Roug\fR .RS 4 Original author .RE odfpy-0.9.6/contrib/0000775000076400007640000000000012106145466014300 5ustar rougroug00000000000000odfpy-0.9.6/contrib/odf2epub/0000775000076400007640000000000012106145466016006 5ustar rougroug00000000000000odfpy-0.9.6/contrib/odf2epub/Makefile0000664000076400007640000000023012106145304017430 0ustar rougroug00000000000000all: odf odf2epub.1 txt: odf2epub.txt %.1: %.docbook xmlto man $< %.txt: %.docbook xmlto txt $< clean: rm -f *.txt odf odf: ln -s ../../odf odfpy-0.9.6/contrib/odf2epub/odf2epub.docbook0000664000076400007640000000563012106145304021051 0ustar rougroug00000000000000 odfpy SørenRoug Original author odf2epub 1 User commands odf2epub Convert ODF to an ePub ebook odf2epub -p -o outputfile -c cover_image path Description odf2epub is a program that will create an ebook (.epub) from the input file and will write the ebook to stdout or a file specified by -o. "Path" is assumed to be an OpenDocument file of text, spreadsheet or presentation type. If the document doesn't have a title in the properties, then the first heading of any level is used. Options -c, --cover The -c argument add a cover image to the EPUB file. Make sure the cover image itself is scaled to less than 1000px in width and height. Best practice is to use an image in JPG or PNG format at 600 pixels wide by 800 pixels in height. -p, --plain The -p flag will generate HTML without CSS. -o, --output Specify the output file with this flag. "-" implies standard out. Example odf2epub -o example.epub odf-file.odt Bugs The EPUB format has the following limitations when running on a mobile device. If these limits are not adhered to, EPUB files might not work on mobile devices. Image Size: 10MB uncompressed. Individual XHTML file sizes: 300k uncompressed/100k compressed. Odf2epub does not ensure that these limits are adhered to. See Also odf2xhtml(1) odfpy-0.9.6/contrib/odf2epub/odf2epub.10000664000076400007640000000435412106145304017573 0ustar rougroug00000000000000'\" t .\" Title: odf2epub .\" Author: S\(/oren Roug .\" Generator: DocBook XSL Stylesheets v1.75.2 .\" Date: 05/16/2010 .\" Manual: User commands .\" Source: odfpy .\" Language: English .\" .TH "ODF2EPUB" "1" "05/16/2010" "odfpy" "User commands" .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .\" ----------------------------------------------------------------- .\" * MAIN CONTENT STARTS HERE * .\" ----------------------------------------------------------------- .SH "NAME" odf2epub \- Convert ODF to an ePub ebook .SH "SYNOPSIS" .HP \w'\fBodf2epub\fR\ 'u \fBodf2epub\fR [\-p] [\-o\ \fIoutputfile\fR] [\-c\ \fIcover_image\fR] \fIpath\fR .SH "DESCRIPTION" .PP \fBodf2epub\fR is a program that will create an ebook (\&.epub) from the input file and will write the ebook to stdout or a file specified by \-o\&. "Path" is assumed to be an OpenDocument file of text, spreadsheet or presentation type\&. .PP If the document doesn\'t have a title in the properties, then the first heading of any level is used\&. .SH "OPTIONS" .PP \-c, \-\-cover .RS 4 The \-c argument add a cover image to the EPUB file\&. Make sure the cover image itself is scaled to less than 1000px in width and height\&. Best practice is to use an image in JPG or PNG format at 600 pixels wide by 800 pixels in height\&. .RE .PP \-p, \-\-plain .RS 4 The \-p flag will generate HTML without CSS\&. .RE .PP \-o, \-\-output .RS 4 Specify the output file with this flag\&. "\-" implies standard out\&. .RE .SH "EXAMPLE" .sp .if n \{\ .RS 4 .\} .nf odf2epub \-o example\&.epub odf\-file\&.odt .fi .if n \{\ .RE .\} .SH "BUGS" .PP The EPUB format has the following limitations when running on a mobile device\&. If these limits are not adhered to, EPUB files \fImight\fR not work on mobile devices\&. Image Size: 10MB uncompressed\&. Individual XHTML file sizes: 300k uncompressed/100k compressed\&. Odf2epub does not ensure that these limits are adhered to\&. .SH "SEE ALSO" .PP \fBodf2xhtml\fR(1) .SH "AUTHOR" .PP \fBS\(/oren Roug\fR .RS 4 Original author .RE odfpy-0.9.6/contrib/gutenberg/0000775000076400007640000000000012106145466016262 5ustar rougroug00000000000000odfpy-0.9.6/contrib/gutenberg/Makefile0000664000076400007640000000023312106145304017707 0ustar rougroug00000000000000all: odf gbtext2odt.1 txt: gbtext2odt.txt %.1: %.docbook xmlto man $< %.txt: %.docbook xmlto txt $< clean: rm -f *.txt odf odf: ln -s ../../odf odfpy-0.9.6/contrib/gutenberg/gbtext2odt.docbook0000664000076400007640000001035412106145304021704 0ustar rougroug00000000000000 gbtext2odt 1 gbtext2odt Create OpenDocument from Project Gutenberg text gbtext2odt -e encoding -a author -c creation date -l language -n etext -p publisher -t title -T inputfile Description Project Gutenberg is the first and largest single collection of free electronic books, or eBooks. The project started in 1971, and the chosen format is "Plain Vanilla ASCII," and this makes the text frustrating to read. Therefore the gbtext2odt program will convert such a text to OpenDocument and add some light markup. The idea behind the program is to test the feasibility of using OpenDocument for archival of documents. "Inputfile" is assumed to be an eBook from Project Gutenberg in text form. Books work pretty well, whereas plays, such as Romeo and Juliet, will probably be messed up. Options -e encoding Enter the encoding of the source eBook. Common encodings are: iso-8859-1, cp1252 (default), ascii and utf-8 -a author The name of the author. Entered into the metadata. -c creation date The date of the creation. Entered into the metadata. This can be the date of conversion, or the date the author completed his document. The format must be in ISO 8601 format. I.e. YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS. -l language Language of the eBook. It consists of a two or three letter language code taken from the ISO 639 standard optionally followed by a hyphen and a two-letter country code. -n etext Adds the Gutenberg E-text number to the metadata. -p publisher The name of the publisher. Entered into the metadata. Defaults to Gutenberg Project -t title The title of the document. Entered into the metadata. -T Use the title as the output filename, rather than based on the input filename. Example Conversion of Herodotus’ Histories from around 430 BCE. Known from the movie The English Patient. The OpenDocument standard doesn't understand creation dates that are before common era, so we won't add the publication date to the meta data. wget http://www.gutenberg.org/dirs/etext01/1hofh10.txt gbtext2odt -e cp1252 -t "The history of Herodotus — Volume 1" -a Herodotus -l en -T 1hofh10.txt See Also http://www.gutenberg.org Issues OpenOffice doesn't handle creation dates before the year 1000. odfpy-0.9.6/contrib/gutenberg/gbtext2odt.10000664000076400007640000000510512106145304020422 0ustar rougroug00000000000000.\" Title: gbtext2odt .\" Author: .\" Generator: DocBook XSL Stylesheets v1.72.0 .\" Date: 09/01/2007 .\" Manual: .\" Source: .\" .TH "GBTEXT2ODT" "1" "09/01/2007" "" "" .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .SH "NAME" gbtext2odt \- Create OpenDocument from Project Gutenberg text .SH "SYNOPSIS" .HP 11 \fBgbtext2odt\fR [\-e\ \fIencoding\fR] [\-a\ \fIauthor\fR] [\-c\ \fIcreation\ date\fR] [\-l\ \fIlanguage\fR] [\-p\ \fIpublisher\fR] [\-t\ \fItitle\fR] [\-T] [\fIinputfile\fR] .SH "DESCRIPTION" .PP Project Gutenberg is the first and largest single collection of free electronic books, or eBooks. The project started in 1971, and the chosen format is "Plain Vanilla ASCII," and this makes the text frustrating to read. Therefore the gbtext2odt program will convert such a text to OpenDocument and add some light markup. The idea behind the program is to test the feasibility of using OpenDocument for archival of documents. .PP "Inputfile" is assumed to be an eBook from Project Gutenberg in text form. Books work pretty well, whereas plays, such as \fIRomeo and Juliet\fR, will probably be messed up. .SH "OPTIONS" .PP \-e \fIencoding\fR .RS 4 Enter the encoding of the source eBook. Common encodings are: iso\-8859\-1, cp1252 (default), ascii and utf\-8 .RE .PP \-a \fIauthor\fR .RS 4 The name of the author. Entered into the metadata. .RE .PP \-c \fIcreation date\fR .RS 4 The date of the creation. Entered into the metadata. This can be the date of conversion, or the date the author completed his document. The format must be in ISO 8601 format. I.e. YYYY\-MM\-DD or YYYY\-MM\-DDTHH:MM:SS. .RE .PP \-l \fIlanguage\fR .RS 4 Language of the eBook. It consists of a two or three letter language code taken from the ISO 639 standard optionally followed by a hyphen and a two\-letter country code. .RE .PP \-p \fIpublisher\fR .RS 4 The name of the publisher. Entered into the metadata. Defaults to Gutenberg Project .RE .PP \-t \fItitle\fR .RS 4 The title of the document. Entered into the metadata. .RE .PP \-T .RS 4 Use the title as the output filename, rather than based on the input filename. .RE .SH "EXAMPLE" .PP Conversion of Herodotus\(cq Histories from around 430 BC. Known from the movie [The English Patient]. .sp .RS 4 .nf wget http://www.gutenberg.org/dirs/etext01/1hofh10.txt gbtext2odt \-e cp1252 \-t "The history of Herodotus \(em Volume 1" \-a Herodotus \-l en \-T 1hofh10.txt .fi .RE .SH "SEE ALSO" .PP http://www.gutenberg.org .SH "ISSUES" .PP OpenOffice doesn't handle creation dates before the year 1000. odfpy-0.9.6/contrib/gutenberg/gbtext2odt.py0000775000076400007640000002276312106145304020726 0ustar rougroug00000000000000#!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This library is free software; you can redistribute it and/or # modify it under the terms of the GNU Lesser General Public # License as published by the Free Software Foundation; either # version 2.1 of the License, or (at your option) any later version. # # This library is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU # Lesser General Public License for more details. # # You should have received a copy of the GNU Lesser General Public # License along with this library; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from odf.opendocument import OpenDocumentText from odf import style, text, dc, meta import sys, getopt, time def usage(): sys.stderr.write("""Usage: %s [-l language] [-e encoding] [-T] [-a author] \t[-c creation_date] [-d description] [-n etext] [-p publisher] [-t title] inputfile\n""" % sys.argv[0]) try: opts, args = getopt.getopt(sys.argv[1:], "a:n:c:d:e:l:p:t:T", ["author=", "date=", "created=", "description=", "number=", "title=", "language=", "publisher=", "encoding="]) except getopt.GetoptError: usage() sys.exit(2) language = None description = None encoding = 'cp1252' # Codepage 1252 is a superset of ASCII and ISO-8859-1 argencoding = 'utf-8' creator = "" creationdate = None title = "" ebooknum = None publisher = "Project Gutenberg" copyrights = "http://www.gutenberg.org/license" fn_is_title = False for o, a in opts: if o in ("-l", "--language"): if len(a) > 3 and a[2] != '-' and a[3] != '-' or len(a) > 6: sys.stderr.write("""Language must be a two or three letter language code optionally \tfollowed by a hyphen and a two-letter country code""") sys.exit(2) language = a elif o in ("-e", "--encoding"): encoding = a elif o in ("-a", "--author"): creator = unicode(a, argencoding) elif o in ("-d", "--description"): description = a elif o in ("-c", "--date", "--created"): if len(a) > 10 and a[10] != "T": sys.stderr.write("""Date must be in ISO8601 format (YYYY-MM-DDTHH:MM:SS)\n""") sys.exit(2) if len(a) < 10 or (len(a) == 10 and a[4] != "-" and a[7] != "-"): sys.stderr.write("""Date must be in ISO8601 format (YYYY-MM-DD)\n""") sys.exit(2) creationdate = a elif o in ("-p", "--publisher"): publisher = a elif o in ("-n", "--number"): ebooknum = unicode(a, argencoding) elif o in ("-t", "--title"): title = unicode(a, argencoding) elif o == "-T": fn_is_title = True if len(args) != 1: usage() sys.exit(2) doc=OpenDocumentText() textdoc = doc.text if creator != "": doc.meta.addElement(meta.InitialCreator(text=creator)) doc.meta.addElement(dc.Creator(text=creator)) if creationdate is not None: doc.meta.addElement(meta.CreationDate(text=creationdate)) doc.meta.addElement(dc.Date(text=creationdate)) if description is not None: doc.meta.addElement(dc.Description(text=description)) if title != "": doc.meta.addElement(dc.Title(text=title)) if language is not None: doc.meta.addElement(dc.Language(text=language)) if publisher is not None: # doc.meta.addElement(dc.Publisher(text=publisher)) doc.meta.addElement(meta.UserDefined(name="Publisher", text=publisher)) if copyrights is not None: # doc.meta.addElement(dc.Rights(text=copyrights)) doc.meta.addElement(meta.UserDefined(name="Rights", text=copyrights)) if ebooknum is not None: doc.meta.addElement(meta.UserDefined(name="EText", text=ebooknum)) arial = style.FontFace(name="Arial", fontfamily="Arial", fontfamilygeneric="swiss", fontpitch="variable") doc.fontfacedecls.addElement(arial) # Paragraph styles standardstyle = style.Style(name="Standard", family="paragraph") standardstyle.addElement(style.ParagraphProperties(marginbottom="0cm", margintop="0cm" )) doc.styles.addElement(standardstyle) h1style = style.Style(name="Heading 1", family="paragraph", defaultoutlinelevel="1") h1style.addElement(style.TextProperties(attributes={'fontsize':"20pt", 'fontweight':"bold"})) doc.styles.addElement(h1style) textbodystyle = style.Style(name="Text body", family="paragraph", parentstylename=standardstyle) textbodystyle.addElement(style.ParagraphProperties(attributes={'marginbottom':"0.212cm", 'margintop':"0cm", 'textalign':"justify", 'justifysingleword':"false"})) doc.styles.addElement(textbodystyle) subtitlestyle = style.Style(name="Subtitle", family="paragraph", nextstylename=textbodystyle) subtitlestyle.addElement(style.ParagraphProperties(textalign="center") ) subtitlestyle.addElement(style.TextProperties(fontsize="14pt", fontstyle="italic", fontname="Arial")) doc.styles.addElement(subtitlestyle) titlestyle = style.Style(name="Title", family="paragraph", nextstylename=subtitlestyle) titlestyle.addElement(style.ParagraphProperties(textalign="center") ) titlestyle.addElement(style.TextProperties(fontsize="18pt", fontweight="bold", fontname="Arial")) doc.styles.addElement(titlestyle) # Text styles emphasisstyle = style.Style(name="Emphasis",family="text") emphasisstyle.addElement(style.TextProperties(fontstyle="italic")) doc.styles.addElement(emphasisstyle) # Make the Gutenberg sections grey sectstyle = style.Style(name="Sect1", family="section") sectstyle.addElement(style.SectionProperties(backgroundcolor="#e6e6e6")) doc.automaticstyles.addElement(sectstyle) FULLLINE=55 paragraph=[] def addparagraph(section): """ Join the paragraph list and add it to the section """ global paragraph p = ' '.join(paragraph) textsegs = p.split('_') para = text.P(stylename=textbodystyle) section.addElement(para) if len(textsegs) > 1 and (len(textsegs) % 2) == 1: # We have found some kursive text segments for i in range(len(textsegs)): if len(textsegs[i]) > 0: if (i % 2) == 1: y = text.Span(stylename=emphasisstyle, text=textsegs[i]) para.addElement(y) else: para.addText(textsegs[i]) else: para.addText(p) def cleantext(s): if s[0] == '"' or s[-1] == '"': ls=list(s) if ls[0] == '"': ls[0] = u'“' if ls[-1] == '"': ls[-1] = u'”' s = ''.join(ls) s = s.replace('" ',u'” ') s = s.replace(' "',u' “') s = s.replace("'m",u"’m") # I'm s = s.replace("'s",u"’s") # genitive case s = s.replace("'t",u"’t") # don't, doesn't, haven't s = s.replace("'S",u"’S") # genitive case s = s.replace("'T",u"’T") # DON'T, etc s = s.replace("l'",u"l’") # French s = s.replace("d'",u"d’") # French if s.find('---') < 0: # Don't replace double dash for lines s = s.replace('--',u'—') return s def pretext(section, line, linelen): section.addElement(text.P(stylename=standardstyle, text=line)) def posttext(section, line, linelen): section.addElement(text.P(stylename=standardstyle, text=line)) def mainpart(section, line, linelen): global paragraph if linelen > 0 and len(paragraph) == 0 and \ line.upper() == line and line.upper() != line.lower(): # Headlines are always upper case style = h1style l = cleantext(line) section.addElement(text.H(outlinelevel=1, stylename=h1style, text=l)) elif linelen >= FULLLINE: # In the middle of a paragraph paragraph.append(cleantext(line)) elif linelen == 0: # End of paragraph if len(paragraph) > 0: addparagraph(section) paragraph=[] elif linelen < FULLLINE and len(paragraph) > 0: # Short tail of paragraph paragraph.append(cleantext(line)) else: if line == title or line == title + " by " + creator: section.addElement(text.P( stylename=titlestyle, text=cleantext(line))) return if line == "by" or line == creator: section.addElement(text.P( stylename=subtitlestyle, text=cleantext(line))) return if len(paragraph) > 0: addparagraph(section) paragraph=[] section.addElement(text.P(stylename=textbodystyle, text=cleantext(line))) PRETEXT = 1 MAINPART = 2 POSTTEXT = 3 textpart = PRETEXT # Start in the preamble section = text.Section(stylename=sectstyle, name="preamble") #, display="none") textdoc.addElement(section) filename = args[0] if fn_is_title and title is not None and title != "": outfn = title else: suffixi = filename.rfind(".") if suffixi > 1: outfn = filename[:suffixi] else: outfn = "interimname" f = open(filename) for rawline in f: line = unicode(rawline.strip(), encoding) linelen = len(line) if line.find("*** END OF TH") == 0: textpart = POSTTEXT section = text.Section(stylename=sectstyle, name="license") #, display="none") textdoc.addElement(section) if textpart == PRETEXT: pretext(section, line, linelen) if line.find("*** START OF TH") == 0 or \ line.find("*END THE SMALL PRINT!") == 0 or \ line.find("*END*THE SMALL PRINT!") == 0: textpart = MAINPART elif textpart == MAINPART: section = textdoc mainpart(section, line, linelen) else: posttext(section, line, linelen) # print d.contentxml() doc.save(outfn, True) odfpy-0.9.6/contrib/html2odt/0000775000076400007640000000000012106145466016035 5ustar rougroug00000000000000odfpy-0.9.6/contrib/html2odt/Makefile0000664000076400007640000000025612106145305017470 0ustar rougroug00000000000000all: fellowship.content fellowship.content: html2odt.py python html2odt.py http://opendocumentfellowship.org/ >fellowship.content clean: rm -f *.content *.meta *.styles odfpy-0.9.6/contrib/html2odt/emptycontent.py0000664000076400007640000000652612106145305021141 0ustar rougroug00000000000000# -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # chead = ['''\n''', '''''', '''''', '''''', '''''', '''''', '''''', '''''', '''''', '''''', '''''', '''''', '''''', '''''', '''''', '''''', '''''', ''''''] cmiddle = [ '''''',] cfoot = [ '''''', '''''', ''''''] def content(): return ''.join(chead + cmiddle + cfoot) odfpy-0.9.6/contrib/html2odt/shtml2odt.py0000664000076400007640000005453412106145305020332 0ustar rougroug00000000000000#!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2008-2009 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # # import string, sys, re, getopt import urllib2, htmlentitydefs, urlparse from urllib import quote_plus from HTMLParser import HTMLParser from cgi import escape,parse_header from types import StringType from odf.opendocument import OpenDocumentText, load from odf import dc, text, table import htmlstyles def converturl(url, document=None): """ grab and convert url """ url = string.strip(url) # if url.lower()[:5] != "http:": # raise IOError, "Only http is accepted" _proxies = {} proxy_support = urllib2.ProxyHandler(_proxies) opener = urllib2.build_opener(proxy_support, urllib2.HTTPHandler) urllib2.install_opener(opener) req = urllib2.Request(url) req.add_header("User-agent", "HTML2ODT: Convert HTML to OpenDocument") conn = urllib2.urlopen(req) if not conn: raise IOError, "Failure in open" data = conn.read() headers = conn.info() conn.close() encoding = 'iso8859-1' #Standard HTML if headers.has_key('content-type'): (ct, parms) = parse_header(headers['content-type']) if parms.has_key('charset'): encoding = parms['charset'] mhp = HTML2ODTParser(document, encoding, url) mhp.feed(data) return mhp entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') incomplete = re.compile('&[a-zA-Z#]') ampersand = re.compile('&') def listget(list, key, default=None): for l in list: if l[0] == key: default = l[1] return default class TagObject: def __init__(self, tag, attrs, output_loc): self.tag = tag self.attrs = attrs self.output_loc = output_loc class HTML2ODTParser(HTMLParser): def __init__(self, document, encoding, baseurl): HTMLParser.__init__(self) self.doc = document self.curr = self.doc.text if self.doc.getStyleByName("Standard") is None: style = Style(name="Standard", family="paragraph", attributes={'class':"text"}) self.doc.styles.addElement(style) if self.doc.getStyleByName("Text_20_body") is None: style = Style(name="Text_20_body", displayname="Text body", family="paragraph", parentstylename="Standard", attributes={'class':"text"}) p = ParagraphProperties(margintop="0cm", marginbottom="0.212cm") style.addElement(p) self.doc.styles.addElement(style) if self.doc.getStyleByName("Heading") is None: style = Style(name="Heading", family="paragraph", parentstylename="Standard", nextstylename="Text_20_body", attributes={'class':"text"}) p = ParagraphProperties(margintop="0.423cm", marginbottom="0.212cm", keepwithnext="always") style.addElement(p) p = TextProperties(fontname="Nimbus Sans L", fontsize="14pt", fontnameasian="DejaVu LGC Sans", fontsizeasian="14pt", fontnamecomplex="DejaVu LGC Sans", fontsizecomplex="14pt") style.addElement(p) self.doc.styles.addElement(style) self.encoding = encoding (scheme, host, path, params, fragment) = urlparse.urlsplit(baseurl) lastslash = path.rfind('/') if lastslash > -1: path = path[:lastslash] self.baseurl = urlparse.urlunsplit((scheme, host, path,'','')) self.basehost = urlparse.urlunsplit((scheme, host, '','','')) self.sectnum = 0 self.tagstack = [] self.pstack = [] self.processelem = True self.processcont = True self.__data = [] self.elements = { 'a': (self.s_html_a, self.close_tag), 'base': ( self.output_base, None), 'b': ( self.s_html_fontstyle, self.close_tag), 'big': ( self.s_html_fontstyle, self.close_tag), 'br': ( self.output_br, None), 'col': ( self.s_html_col, None), 'dd': ( self.s_html_dd, self.close_tag), 'dt': ( self.s_html_dt, None), 'div': ( self.s_html_section, self.e_html_section), 'em': ( self.s_html_emphasis, self.close_tag), 'h1': ( self.s_html_headline, self.close_tag), 'h2': ( self.s_html_headline, self.close_tag), 'h3': ( self.s_html_headline, self.close_tag), 'h4': ( self.s_html_headline, self.close_tag), 'h5': ( self.s_html_headline, self.close_tag), 'h6': ( self.s_html_headline, self.close_tag), 'head': ( self.s_ignorexml, None), 'i': ( self.s_html_fontstyle, self.close_tag), 'img': ( self.output_img, None), 'li': ( self.s_html_li, self.e_html_li), 'meta': ( self.meta_encoding, None), 'ol': ( self.output_ol, self.e_html_list), 'p': ( self.s_html_block, self.e_html_block), 's': ( self.s_html_fontstyle, self.close_tag), 'small':( self.s_html_fontstyle, self.close_tag), 'span': ( self.s_html_span, self.close_tag), 'strike':( self.s_html_fontstyle, self.close_tag), 'strong':( self.s_html_emphasis, self.close_tag), 'table':( self.s_html_table, self.e_html_table), 'td': ( self.s_html_td, self.close_tag), 'th': ( self.s_html_td, self.close_tag), 'title':( self.s_html_title, self.e_html_title), 'tr': ( self.s_html_tr, self.close_tag), 'tt': ( self.s_html_fontstyle, self.close_tag), 'u': ( self.s_html_fontstyle, self.close_tag), 'ul': ( self.output_ul, self.e_html_list), 'var': ( self.s_html_emphasis, self.close_tag), } def result(self): """ Return a string String must be in UNICODE """ str = string.join(self.__data,'') self.__data = [] return str def meta_name(self, attrs): """ Look in meta tag for textual info""" foundit = 0 # Is there a name attribute? for attr in attrs: if attr[0] == 'name' and string.lower(attr[1]) in ('description', 'keywords','title', 'dc.description','dc.keywords','dc.title' ): foundit = 1 if foundit == 0: return 0 # Is there a content attribute? content = self.find_attr(attrs,'content') if content: self.handle_data(u' ') self.handle_attr(content) self.handle_data(u' ') return 1 def meta_encoding(self, tag, attrs): """ Look in meta tag for page encoding (Content-Type)""" foundit = 0 # Is there a content-type attribute? for attr in attrs: if attr[0] == 'http-equiv' and string.lower(attr[1]) == 'content-type': foundit = 1 if foundit == 0: return 0 # Is there a content attribute? for attr in attrs: if attr[0] == 'content': (ct, parms) = parse_header(attr[1]) if parms.has_key('charset'): self.encoding = parms['charset'] return 1 def s_ignorexml(self, tag, attrs): self.processelem = False def output_base(self, tag, attrs): """ Change the document base if there is a base tag """ baseurl = listget(attrs, 'href', self.baseurl) (scheme, host, path, params, fragment) = urlparse.urlsplit(baseurl) lastslash = path.rfind('/') if lastslash > -1: path = path[:lastslash] self.baseurl = urlparse.urlunsplit((scheme, host, path,'','')) self.basehost = urlparse.urlunsplit((scheme, host, '','','')) def output_br(self, tag, attrs): self.curr.addElement(text.LineBreak()) def s_html_font(self, tag, attrs): """ 15.2.1 Font style elements: the TT, I, B, BIG, SMALL, STRIKE, S, and U elements """ tagdict = { } def s_html_emphasis(self, tag, attrs): """ 9.2.1 Phrase elements: EM, STRONG, DFN, CODE, SAMP, KBD, VAR, CITE, ABBR, and ACRONYM """ tagdict = { 'cite': ['Citation', {'fontstyle':"italic", 'fontstyleasian':"italic", 'fontstylecomplex':"italic" }], 'code': ['Source_20_Text', {'fontname':"Courier", 'fontnameasian':"Courier",'fontnamecomplex':"Courier" }], 'dfn': ['Definition',{ }], 'em': ['Emphasis', {'fontstyle':"italic", 'fontstyleasian':"italic", 'fontstylecomplex':"italic" }], 'strong': ['Strong_20_Emphasis': {'fontweight':"bold",'fontweightasian':"bold",'fontweightcomplex':"bold"}], 'var': ['Variable', {'fontstyle':"italic", 'fontstyleasian':"italic", 'fontstylecomplex':"italic" }], } stylename = tagdict.get(tag,'Emphasis') # Add the styles we need to the stylesheet if stylename == "Source_20_Text" and self.doc.getStyleByName(stylename) is None: style = Style(name="Source_20_Text", displayname="Source Text", family="text") p = TextProperties(fontname="Courier", fontnameasian="Courier", fontnamecomplex="Courier") style.addElement(p) self.doc.styles.addElement(style) e = text.Span(stylename=stylename) self.curr.addElement(e) self.curr = e def s_html_fontstyle(self, tag, attrs): """ 15.2.1 Font style elements: the TT, I, B, BIG, SMALL, STRIKE, S, and U elements ('tt' is not considered an automatic style by OOo) """ tagdict = { 'b': ['BoldX',{'fontweight':"bold", 'fontweightasian':"bold",'fontweightcomplex':"bold" }], 'big': ['BigX', {'fontsize':"120%"}], 'i': ['ItalicX', {'fontstyle':"italic", 'fontstyleasian':"italic", 'fontstylecomplex':"italic" }], 'tt': ['TeletypeX', {'fontname':"Courier", 'fontnameasian':"Courier", 'fontnamecomplex':"Courier" }], 's': ['StrikeX', {'textlinethroughstyle':"solid"}], 'small': ['SmallX', {'fontsize':"80%"}], 'strike': ['StrikeX', {'textlinethroughstyle':"solid"}], 'u': ['UnderlineX', {'textunderlinestyle':"solid", 'textunderlinewidth':"auto", 'textunderlinecolor':"fontcolor"}], } stylename,styledecl = tagdict.get(tag,[None,None]) if stylename and self.doc.getStyleByName(stylename) is None: style = Style(name=stylename, family="text") style.addElement(TextProperties(attributes=styledecl)) self.doc.automaticstyles.addElement(style) if stylename: e = text.Span(stylename=stylename) else: e = text.Span() self.curr.addElement(e) self.curr = e def s_html_span(self, tag, attrs): e = text.Span() self.curr.addElement(e) self.curr = e def s_html_title(self, tag, attrs): e = dc.Title() self.doc.meta.addElement(e) self.curr = e def e_html_title(self, tag): self.curr = self.curr.parentNode def output_img(self, tag, attrs): src = listget(attrs, 'src', "Illegal IMG tag!") alt = listget(attrs, 'alt', src) # Must remember name of image and download it. self.write_odt(u'' % '00000.png') def s_html_a(self, tag, attrs): href = None href = listget(attrs, 'href', None) if href: if href in ("", "#"): href == self.baseurl elif href.find("://") >= 0: pass elif href[0] == '/': href = self.basehost + href e = text.A(type="simple", href=href) else: e = text.A() # if self.curr.parentNode.qname != text.P().qname: # p = text.P() # self.curr.addElement(p) # self.curr = p self.curr.addElement(e) self.curr = e def close_tag(self, tag): self.curr = self.curr.parentNode def s_html_dd(self, tag, attrs): if self.doc.getStyleByName("List_20_Contents") is None: style = Style(name="List_20_Contents", displayname="List Contents", family="paragraph", parentstylename="Standard", attributes={'class':"html"}) p = ParagraphProperties(marginleft="1cm", marginright="0cm", textindent="0cm", autotextindent="false") style.addElement(p) self.doc.styles.addElement(style) e = text.P(stylename="List_20_Contents") self.curr.addElement(e) self.curr = e def s_html_dt(self, tag, attrs): if self.doc.getStyleByName("List_20_Heading") is None: style = Style(name="List_20_Heading", displayname="List Heading", family="paragraph", parentstylename="Standard", nextstylename="List_20_Contents", attributes={'class':"html"}) p = ParagraphProperties(marginleft="0cm", marginright="0cm", textindent="0cm", autotextindent="false") style.addElement(p) self.doc.styles.addElement(style) e = text.P(stylename="List_20_Heading") self.curr.addElement(e) self.curr = e def output_ul(self, tag, attrs): self.write_odt(u'') def output_ol(self, tag, attrs): self.write_odt(u'') def e_html_list(self, tag): self.write_odt(u'') def s_html_li(self, tag, attrs): self.write_odt(u'') def e_html_li(self, tag): self.write_odt(u'') def s_html_headline(self, tag, attrs): stylename = "Heading_20_%s" % tag[1] if stylename == "Heading_20_1" and self.doc.getStyleByName("Heading_20_1") is None: style = Style(name="Heading_20_1", displayname="Heading 1", family="paragraph", parentstylename="Heading", nextstylename="Text_20_body", attributes={'class':"text"}, defaultoutlinelevel=1) p = TextProperties(fontsize="115%", fontweight="bold", fontsizeasian="115%", fontweightasian="bold", fontsizecomplex="115%", fontweightcomplex="bold") style.addElement(p) self.doc.styles.addElement(style) if stylename == "Heading_20_2" and self.doc.getStyleByName("Heading_20_2") is None: style = Style(name="Heading_20_2", displayname="Heading 2", family="paragraph", parentstylename="Heading", nextstylename="Text_20_body", attributes={'class':"text"}, defaultoutlinelevel=2) p = TextProperties(fontsize="14pt", fontstyle="italic", fontweight="bold", fontsizeasian="14pt", fontstyleasian="italic", fontweightasian="bold", fontsizecomplex="14pt", fontstylecomplex="italic", fontweightcomplex="bold") style.addElement(p) self.doc.styles.addElement(style) if stylename == "Heading_20_3" and self.doc.getStyleByName("Heading_20_3") is None: style = Style(name="Heading_20_3", displayname="Heading 3", family="paragraph", parentstylename="Heading", nextstylename="Text_20_body", attributes={'class':"text"}, defaultoutlinelevel=3) p = TextProperties(fontsize="14pt", fontweight="bold", fontsizeasian="14pt", fontweightasian="bold", fontsizecomplex="14pt", fontweightcomplex="bold") style.addElement(p) self.doc.styles.addElement(style) e = text.H(stylename="Heading_20_%s" % tag[1], outlinelevel=tag[1]) self.curr.addElement(e) self.curr = e def s_html_table(self, tag, attrs): e = table.Table() self.curr.addElement(e) self.curr = e def e_html_table(self, tag): self.curr = self.curr.parentNode def s_html_td(self, tag, attrs): e = table.TableCell() self.curr.addElement(e) self.curr = e def s_html_tr(self, tag, attrs): e = table.TableRow() self.curr.addElement(e) self.curr = e def s_html_col(self, tag, attrs): e = table.TableColumn() self.curr.addElement(e) def s_html_section(self, tag, attrs): """ Outputs block tag such as

and

""" name = self.find_attr(attrs,'id') if name is None: self.sectnum = self.sectnum + 1 name = "Sect%d" % self.sectnum e = text.Section(name=name) self.curr.addElement(e) self.curr = e def e_html_section(self, tag): """ Outputs block tag such as

and

""" self.curr = self.curr.parentNode def s_html_block(self, tag, attrs): """ Outputs block tag such as

and

""" e = text.P(stylename="Text_20_body") self.curr.addElement(e) self.curr = e def e_html_block(self, tag): """ Outputs block tag such as

and

""" self.curr = self.curr.parentNode # # HANDLE STARTTAG # def handle_starttag(self, tag, attrs): self.pstack.append( (self.processelem, self.processcont) ) tagobj = TagObject(tag, attrs, self.last_data_pos()) self.tagstack.append(tagobj) method = self.elements.get(tag, (None, None))[0] if self.processelem and method: method(tag, attrs) # # HANDLE END # def handle_endtag(self, tag): """ """ tagobj = self.tagstack.pop() method = self.elements.get(tag, (None, None))[1] if self.processelem and method: method(tag) self.processelem, self.processcont = self.pstack.pop() # # Data operations # def handle_data(self, data): if data.strip() == '': return if self.processelem and self.processcont: self.curr.addText(data) def write_odt(self, data): """ Collect the data to show on the webpage """ if type(data) == StringType: data = unicode(data, self.encoding) self.__data.append(data) def last_data_pos(self): return len(self.__data) def find_attr(self, attrs, key): """ Run through the attibutes to find a specific one return None if not found """ for attr in attrs: if attr[0] == key: return attr[1] return None # # Tagstack operations # def find_tag(self, tag): """ Run down the stack to find the last entry with the same tag name Not Tested """ for tagitem in range(len(self.tagstack), 0, -1): if tagitem.tag == tag: return tagitem return None def handle_charref(self, name): """ Handle character reference for UNICODE """ if name[0] in ('x', 'X'): try: n = int(name[1:],16) except ValueError: return else: try: n = int(name) except ValueError: return if not 0 <= n <= 65535: return self.handle_data(unichr(n)) def handle_entityref(self, name): """Handle entity references. """ table = htmlentitydefs.name2codepoint if name in table: self.handle_data(unichr(table[name])) else: return def handle_attr(self, attrval): """ Scan attribute values for entities and resolve them Simply calls handle_data """ i = 0 n = len(attrval) while i < n: match = ampersand.search(attrval, i) # if match: j = match.start() else: j = n if i < j: self.handle_data(attrval[i:j]) i = j if i == n: break startswith = attrval.startswith if startswith('&#', i): match = charref.match(attrval, i) if match: name = match.group()[2:-1] self.handle_charref(name) k = match.end() if not startswith(';', k-1): k = k - 1 i = k continue else: break elif startswith('&', i): match = entityref.match(attrval, i) if match: name = match.group(1) self.handle_entityref(name) k = match.end() if not startswith(';', k-1): k = k - 1 i = k continue match = incomplete.match(attrval, i) if match: # match.group() will contain at least 2 chars if match.group() == attrval[i:]: self.error("EOF in middle of entity or char ref") # incomplete break elif (i + 1) < n: # not the end of the buffer, and can't be confused # with some other construct self.handle_data("&") i = i + 1 else: break else: assert 0, "interesting.search() lied" # end while if i < n: self.handle_data(attrval[i:n]) i = n def usage(): sys.stderr.write("Usage: %s [-a] inputurl outputfile\n" % sys.argv[0]) if __name__ == "__main__": try: opts, args = getopt.getopt(sys.argv[1:], "a", ["append"]) except getopt.GetoptError: usage() sys.exit(2) appendto = False for o, a in opts: if o in ("-a", "--append"): appendto = True if appendto: doc = load(args[1]) else: doc = OpenDocumentText() result = converturl(args[0], doc) print result.doc.xml() result.doc.save(args[1]) odfpy-0.9.6/contrib/html2odt/html2odt.py0000664000076400007640000003751612106145305020150 0ustar rougroug00000000000000#!/usr/bin/python # -*- coding: utf-8 -*- # Copyright (C) 2006 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # # import string, sys, re import urllib2, htmlentitydefs, urlparse from urllib import quote_plus from HTMLParser import HTMLParser from cgi import escape,parse_header from types import StringType import emptycontent def checkurl(url, http_proxy=None): """ grab and convert url """ url = string.strip(url) # if url.lower()[:5] != "http:": # raise IOError, "Only http is accepted" if http_proxy: _proxies = { 'http': http_proxy } else: _proxies = {} proxy_support = urllib2.ProxyHandler(_proxies) opener = urllib2.build_opener(proxy_support, urllib2.HTTPHandler) urllib2.install_opener(opener) req = urllib2.Request(url) req.add_header("User-agent", "HTML2ODT: Convert HTML to OpenDocument") conn = urllib2.urlopen(req) if not conn: raise IOError, "Failure in open" data = conn.read() headers = conn.info() conn.close() encoding = 'iso8859-1' #Standard HTML if headers.has_key('content-type'): (ct, parms) = parse_header(headers['content-type']) if parms.has_key('charset'): encoding = parms['charset'] mhp = HTML2ODTParser(encoding, url) failure = "" mhp.feed(data) text = mhp.result() # Flush the buffer return text entityref = re.compile('&([a-zA-Z][-.a-zA-Z0-9]*)[^a-zA-Z0-9]') charref = re.compile('&#(?:[0-9]+|[xX][0-9a-fA-F]+)[^0-9a-fA-F]') incomplete = re.compile('&[a-zA-Z#]') ampersand = re.compile('&') def listget(list, key, default=None): for l in list: if l[0] == key: default = l[1] return default class TagObject: def __init__(self, tag, attrs, output_loc): self.tag = tag self.attrs = attrs self.output_loc = output_loc class HTML2ODTParser(HTMLParser): def __init__(self, encoding, baseurl): HTMLParser.__init__(self) self.encoding = encoding (scheme, host, path, params, fragment) = urlparse.urlsplit(baseurl) lastslash = path.rfind('/') if lastslash > -1: path = path[:lastslash] self.baseurl = urlparse.urlunsplit((scheme, host, path,'','')) self.basehost = urlparse.urlunsplit((scheme, host, '','','')) self.sectnum = 0 self.tagstack = [] self.pstack = [] self.processelem = True self.processcont = True self.__data = [] self.elements = { 'a': (self.s_html_a, self.e_html_a), 'base': ( self.output_base, None), 'br': ( self.output_br, None), 'caption': ( self.output_caption, None), 'col': ( self.s_html_col, None), 'dd': ( self.s_html_dd, None), 'dt': ( self.s_html_dt, None), 'div': ( self.s_html_section, self.e_html_section), 'em': ( self.s_html_emphasis, self.e_html_emphasis), 'h1': ( self.s_html_headline, self.e_html_headline), 'h2': ( self.s_html_headline, self.e_html_headline), 'h3': ( self.s_html_headline, self.e_html_headline), 'h4': ( self.s_html_headline, self.e_html_headline), 'h5': ( self.s_html_headline, self.e_html_headline), 'h6': ( self.s_html_headline, self.e_html_headline), 'head': ( self.s_ignorexml, None), 'img': ( self.output_img, None), 'li': ( self.s_html_li, self.e_html_li), 'meta': ( self.meta_encoding, None), 'ol': ( self.output_ol, self.e_html_list), 'p': ( self.s_html_block, self.e_html_block), 'span': ( self.s_html_span, self.e_html_span), 'strong':( self.s_html_emphasis, self.e_html_emphasis), 'table':( self.s_html_table, self.e_html_table), 'td': ( self.s_html_td, self.e_html_td), 'th': ( self.s_html_td, self.e_html_td), 'title':( self.s_html_title, self.e_html_title), 'tr': ( self.s_html_tr, self.e_html_tr), 'ul': ( self.output_ul, self.e_html_list), 'input':( self.output_input, None), 'select':( self.output_select, None), 'textarea':( self.output_textarea, None), } def result(self): """ Return a string String must be in UNICODE """ str = string.join(self.__data,'') self.__data = [] return str def meta_name(self, attrs): """ Look in meta tag for textual info""" foundit = 0 # Is there a name attribute? for attr in attrs: if attr[0] == 'name' and string.lower(attr[1]) in ('description', 'keywords','title', 'dc.description','dc.keywords','dc.title' ): foundit = 1 if foundit == 0: return 0 # Is there a content attribute? content = self.find_attr(attrs,'content') if content: self.handle_data(u' ') self.handle_attr(content) self.handle_data(u' ') return 1 def meta_encoding(self, tag, attrs): """ Look in meta tag for page encoding (Content-Type)""" foundit = 0 # Is there a content-type attribute? for attr in attrs: if attr[0] == 'http-equiv' and string.lower(attr[1]) == 'content-type': foundit = 1 if foundit == 0: return 0 # Is there a content attribute? for attr in attrs: if attr[0] == 'content': (ct, parms) = parse_header(attr[1]) if parms.has_key('charset'): self.encoding = parms['charset'] return 1 def s_ignorexml(self, tag, attrs): self.processelem = False def output_base(self, tag, attrs): """ Change the document base if there is a base tag """ baseurl = listget(attrs, 'href', self.baseurl) (scheme, host, path, params, fragment) = urlparse.urlsplit(baseurl) lastslash = path.rfind('/') if lastslash > -1: path = path[:lastslash] self.baseurl = urlparse.urlunsplit((scheme, host, path,'','')) self.basehost = urlparse.urlunsplit((scheme, host, '','','')) def output_br(self, tag, attrs): self.write_odt(u'') def s_html_emphasis(self, tag, attrs): self.write_odt(u'<%s>' % tag) def e_html_emphasis(self, tag): self.write_odt(u'' % tag) def s_html_span(self, tag, attrs): self.write_odt(u'') def e_html_span(self, tag): self.write_odt(u'') def s_html_title(self, tag, attrs): # Put in meta.xml self.write_odt(u'') def e_html_title(self, tag): # Put in meta.xml self.write_odt(u'') def output_img(self, tag, attrs): src = listget(attrs, 'src', "Illegal IMG tag!") alt = listget(attrs, 'alt', src) # Must remember name of image and download it. self.write_odt(u'' % '00000.png') def s_html_a(self, tag, attrs): href = None href = listget(attrs, 'href', None) if href: if href in ("", "#"): href == self.baseurl elif href.find("://") >= 0: pass elif href[0] == '/': href = self.basehost + href self.write_odt(u' ' % escape(href)) else: self.write_odt(u' ') def e_html_a(self, tag): self.write_odt(u'') def s_html_dd(self, tag, attrs): self.write_odt(u'') def s_html_dt(self, tag, attrs): self.write_odt(u'') def output_ul(self, tag, attrs): self.write_odt(u'') def output_ol(self, tag, attrs): self.write_odt(u'') def e_html_list(self, tag): self.write_odt(u'') def s_html_li(self, tag, attrs): self.write_odt(u'') def e_html_li(self, tag): self.write_odt(u'') def output_select(self, tag, attrs): return self.write_odt(u'
Combo box:') def output_textarea(self, tag, attrs): return self.write_odt(u'') def output_input(self, tag, attrs): return type = listget(attrs, 'type', "text") value = listget(attrs, 'value', "") if type == "text": self.write_odt(u'
Edit:') elif type == "submit": self.write_odt(u' %s' % value) elif type == "checkbox": #FIXME - Only works in XHTML checked = listget(attrs, 'checked', "not checked") self.write_odt(u'
Checkbox:' % checked) elif type == "radio": checked = listget(attrs, 'checked', "not checked") self.write_odt(u'
Radio button:' % checked) elif type == "file": self.write_odt(u'File upload edit %s' % value) self.write_odt(u'
Browse button:') def s_html_headline(self, tag, attrs): self.write_odt(u'' % (tag[1],tag[1])) def e_html_headline(self, tag): self.write_odt(u'') def s_html_table(self, tag, attrs): self.write_odt(u'') def e_html_table(self, tag): self.write_odt(u'') def s_html_td(self, tag, attrs): self.write_odt(u'') def e_html_td(self, tag): self.write_odt(u'') def s_html_tr(self, tag, attrs): self.write_odt(u'') def e_html_tr(self, tag): self.write_odt(u'') def s_html_col(self, tag, attrs): self.write_odt(u'') def output_caption(self, tag, attrs): self.write_odt(u'Caption: ') def s_html_section(self, tag, attrs): """ Outputs block tag such as

and

""" name = self.find_attr(attrs,'id') if name is None: self.sectnum = self.sectnum + 1 name = "Sect%d" % self.sectnum self.write_odt(u'' % name) def e_html_section(self, tag): """ Outputs block tag such as

and

""" self.write_odt(u'') def s_html_block(self, tag, attrs): """ Outputs block tag such as

and

""" self.write_odt(u'') def e_html_block(self, tag): """ Outputs block tag such as

and

""" self.write_odt(u'') # # HANDLE STARTTAG # def handle_starttag(self, tag, attrs): self.pstack.append( (self.processelem, self.processcont) ) tagobj = TagObject(tag, attrs, self.last_data_pos()) self.tagstack.append(tagobj) method = self.elements.get(tag, (None, None))[0] if self.processelem and method: method(tag, attrs) # # HANDLE END # def handle_endtag(self, tag): """ """ tagobj = self.tagstack.pop() method = self.elements.get(tag, (None, None))[1] if self.processelem and method: method(tag) self.processelem, self.processcont = self.pstack.pop() # # Data operations # def handle_data(self, data): if self.processelem and self.processcont: self.write_odt(escape(data)) def write_odt(self, data): """ Collect the data to show on the webpage """ if type(data) == StringType: data = unicode(data, self.encoding) self.__data.append(data) def last_data_pos(self): return len(self.__data) def find_attr(self, attrs, key): """ Run through the attibutes to find a specific one return None if not found """ for attr in attrs: if attr[0] == key: return attr[1] return None # # Tagstack operations # def find_tag(self, tag): """ Run down the stack to find the last entry with the same tag name Not Tested """ for tagitem in range(len(self.tagstack), 0, -1): if tagitem.tag == tag: return tagitem return None def handle_charref(self, name): """ Handle character reference for UNICODE """ if name[0] in ('x', 'X'): try: n = int(name[1:],16) except ValueError: return else: try: n = int(name) except ValueError: return if not 0 <= n <= 65535: return self.handle_data(unichr(n)) def handle_entityref(self, name): """Handle entity references. """ table = htmlentitydefs.name2codepoint if name in table: self.handle_data(unichr(table[name])) else: return def handle_attr(self, attrval): """ Scan attribute values for entities and resolve them Simply calls handle_data """ i = 0 n = len(attrval) while i < n: match = ampersand.search(attrval, i) # if match: j = match.start() else: j = n if i < j: self.handle_data(attrval[i:j]) i = j if i == n: break startswith = attrval.startswith if startswith('&#', i): match = charref.match(attrval, i) if match: name = match.group()[2:-1] self.handle_charref(name) k = match.end() if not startswith(';', k-1): k = k - 1 i = k continue else: break elif startswith('&', i): match = entityref.match(attrval, i) if match: name = match.group(1) self.handle_entityref(name) k = match.end() if not startswith(';', k-1): k = k - 1 i = k continue match = incomplete.match(attrval, i) if match: # match.group() will contain at least 2 chars if match.group() == attrval[i:]: self.error("EOF in middle of entity or char ref") # incomplete break elif (i + 1) < n: # not the end of the buffer, and can't be confused # with some other construct self.handle_data("&") i = i + 1 else: break else: assert 0, "interesting.search() lied" # end while if i < n: self.handle_data(attrval[i:n]) i = n if __name__ == "__main__": import sys result = checkurl(sys.argv[1]) sys.stdout.write('\n'.join(emptycontent.chead)) sys.stdout.write(result.encode('utf-8')) sys.stdout.write('\n'.join(emptycontent.cfoot)) sys.stdout.write('\n') odfpy-0.9.6/contrib/html2odt/htmlstyles.py0000664000076400007640000001403012106145305020605 0ustar rougroug00000000000000# -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from odf.style import Style, ParagraphProperties, TextProperties def addStandardStyles(doc): style = Style(name="Standard", family="paragraph", attributes={'class':"text"}) doc.styles.addElement(style) style = Style(name="Text_20_body", displayname="Text body", family="paragraph", parentstylename="Standard", attributes={'class':"text"}) p = ParagraphProperties(margintop="0cm", marginbottom="0.212cm") style.addElement(p) doc.styles.addElement(style) style = Style(name="List_20_Contents", displayname="List Contents", family="paragraph", parentstylename="Standard", attributes={'class':"html"}) p = ParagraphProperties(marginleft="1cm", marginright="0cm", textindent="0cm", autotextindent="false") style.addElement(p) doc.styles.addElement(style) style = Style(name="List_20_Heading", displayname="List Heading", family="paragraph", parentstylename="Standard", nextstylename="List_20_Contents", attributes={'class':"html"}) p = ParagraphProperties(marginleft="0cm", marginright="0cm", textindent="0cm", autotextindent="false") style.addElement(p) doc.styles.addElement(style) style = Style(name="Text_20_body_20_indent", displayname="Text body indent", family="paragraph", parentstylename="Text_20_body", attributes={'class':"text"}) p = ParagraphProperties(marginleft="0.499cm", marginright="0cm", textindent="0cm", autotextindent="false") style.addElement(p) doc.styles.addElement(style) style = Style(name="Heading", family="paragraph", parentstylename="Standard", nextstylename="Text_20_body", attributes={'class':"text"}) p = ParagraphProperties(margintop="0.423cm", marginbottom="0.212cm", keepwithnext="always") style.addElement(p) p = TextProperties(fontname="Nimbus Sans L", fontsize="14pt", fontnameasian="DejaVu LGC Sans", fontsizeasian="14pt", fontnamecomplex="DejaVu LGC Sans", fontsizecomplex="14pt") style.addElement(p) doc.styles.addElement(style) style = Style(name="Heading_20_1", displayname="Heading 1", family="paragraph", parentstylename="Heading", nextstylename="Text_20_body", attributes={'class':"text"}, defaultoutlinelevel=1) p = TextProperties(fontsize="115%", fontweight="bold", fontsizeasian="115%", fontweightasian="bold", fontsizecomplex="115%", fontweightcomplex="bold") style.addElement(p) doc.styles.addElement(style) style = Style(name="Heading_20_2", displayname="Heading 2", family="paragraph", parentstylename="Heading", nextstylename="Text_20_body", attributes={'class':"text"}, defaultoutlinelevel=2) p = TextProperties(fontsize="14pt", fontstyle="italic", fontweight="bold", fontsizeasian="14pt", fontstyleasian="italic", fontweightasian="bold", fontsizecomplex="14pt", fontstylecomplex="italic", fontweightcomplex="bold") style.addElement(p) doc.styles.addElement(style) style = Style(name="Heading_20_3", displayname="Heading 3", family="paragraph", parentstylename="Heading", nextstylename="Text_20_body", attributes={'class':"text"}, defaultoutlinelevel=3) p = TextProperties(fontsize="14pt", fontweight="bold", fontsizeasian="14pt", fontweightasian="bold", fontsizecomplex="14pt", fontweightcomplex="bold") style.addElement(p) doc.styles.addElement(style) style = Style(name="List", family="paragraph", parentstylename="Text_20_body", attributes={'class':"list"}) doc.styles.addElement(style) style = Style(name="Caption", family="paragraph", parentstylename="Standard", attributes={'class':"extra"}) p = ParagraphProperties(margintop="0.212cm", marginbottom="0.212cm", numberlines="false", linenumber="0") style.addElement(p) p = TextProperties(fontsize="12pt", fontstyle="italic", fontsizeasian="12pt", fontstyleasian="italic", fontsizecomplex="12pt", fontstylecomplex="italic") style.addElement(p) doc.styles.addElement(style) style = Style(name="Index", family="paragraph", parentstylename="Standard", attributes={'class':"index"}) p = ParagraphProperties(numberlines="false", linenumber=0) doc.styles.addElement(style) style = Style(name="Source_20_Text", displayname="Source Text", family="text") p = TextProperties(fontname="Courier", fontnameasian="Courier", fontnamecomplex="Courier") style.addElement(p) doc.styles.addElement(style) style = Style(name="Variable", family="text") p = TextProperties(fontstyle="italic", fontstyleasian="italic", fontstylecomplex="italic") style.addElement(p) doc.styles.addElement(style) style = Style(name="Emphasis", family="text") p = TextProperties(fontstyle="italic", fontstyleasian="italic", fontstylecomplex="italic") style.addElement(p) doc.styles.addElement(style) style = Style(name="Strong_20_Emphasis", displayname="Strong Emphasis", family="text") p = TextProperties(fontweight="bold", fontweightasian="bold", fontweightcomplex="bold") style.addElement(p) doc.styles.addElement(style) # Automatic styles style = Style(name="Bold", displayname="Bold", family="text") p = TextProperties(fontweight="bold", fontweightasian="bold", fontweightcomplex="bold") style.addElement(p) doc.automaticstyles.addElement(style) style = Style(name="Italic", family="text") p = TextProperties(fontstyle="italic", fontstyleasian="italic", fontstylecomplex="italic") style.addElement(p) doc.automaticstyles.addElement(style) odfpy-0.9.6/contrib/tracplugins/0000775000076400007640000000000012106145466016633 5ustar rougroug00000000000000odfpy-0.9.6/contrib/tracplugins/0.10/0000775000076400007640000000000012106145466017211 5ustar rougroug00000000000000odfpy-0.9.6/contrib/tracplugins/0.10/odfpreview/0000775000076400007640000000000012106145466021363 5ustar rougroug00000000000000odfpy-0.9.6/contrib/tracplugins/0.10/odfpreview/__init__.py0000664000076400007640000000141012106145304023457 0ustar rougroug00000000000000# -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from odfpreview import OdfPreview odfpy-0.9.6/contrib/tracplugins/0.10/odfpreview/odfpreview.py0000664000076400007640000000572112106145304024103 0ustar rougroug00000000000000# -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from trac.core import * from trac.mimeview.api import IHTMLPreviewRenderer import os from tempfile import mkstemp from odf.odf2xhtml import ODF2XHTML class ODF2XHTMLBody(ODF2XHTML): def __init__(self): ODF2XHTML.__init__(self, generate_css=False, embedable=True) def rewritelink(self, imghref): imghref = imghref.replace("Pictures/","index_html?pict=") return imghref class OdfPreview(Component): """Display OpenDocument as HTML.""" implements(IHTMLPreviewRenderer) def get_quality_ratio(self, mimetype): self.env.log.debug('Trac checking for %s' % mimetype) if mimetype in ('application/vnd.oasis.opendocument.text', 'application/vnd.oasis.opendocument.text-template', 'application/vnd.oasis.opendocument.spreadsheet', 'application/vnd.oasis.opendocument.presentation'): return 7 return 0 def render(self, req, input_type, content, filename=None, url=None): self.env.log.debug('HTML output for ODF') odhandler = ODF2XHTMLBody() hfile, hfilename = mkstemp('tracodf') try: if hasattr(content,'read'): os.write(hfile, content.read()) else: os.write(hfile, content) os.close(hfile) out = odhandler.odf2xhtml(hfilename).encode('us-ascii','xmlcharrefreplace') except: self.env.log.error("odf2xhtml failed") finally: os.unlink(hfilename) if out != '': return out return "

HTML preview failed

" # def render(self, req, input_type, content, filename=None, url=None): # self.env.log.debug('HTML output for ODF') # hfilename = None # odhandler = ODF2XHTML() # if filename is not None: # infile = filename # else: # hfile, hfilename = mkstemp('tracodf') # if hasattr(content,'read'): # os.write(hfile, content.read()) # else: # os.write(hfile, content) # os.close(hfile) # infile = hfilename # out = odhandler.odf2xhtml(infile).encode('us-ascii','xmlcharrefreplace') # if hfilename is not None: # os.unlink(hfilename) # return out odfpy-0.9.6/contrib/tracplugins/0.10/odftohtml/0000775000076400007640000000000012106145466021211 5ustar rougroug00000000000000odfpy-0.9.6/contrib/tracplugins/0.10/odftohtml/odftohtml.py0000664000076400007640000000340612106145304023555 0ustar rougroug00000000000000# -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from trac.core import * from trac.mimeview.api import IContentConverter import os import re from odf.odf2xhtml import ODF2XHTML class OdfToHtmlConverter(Component): """Convert OpenDocument to HTML.""" implements(IContentConverter) # IContentConverter methods def get_supported_conversions(self): yield ('odt', 'OpenDocument Text', 'odt', 'application/vnd.oasis.opendocument.text', 'text/html', 7) yield ('ott', 'OpenDocument Text', 'ott', 'application/vnd.oasis.opendocument.text-template', 'text/html', 7) yield ('ods', 'OpenDocument Spreadsheet', 'ods', 'application/vnd.oasis.opendocument.spreadsheet', 'text/html', 7) yield ('odp', 'OpenDocument Presentation', 'odp', 'application/vnd.oasis.opendocument.presentation', 'text/html', 7) def convert_content(self, req, input_type, source, output_type): odhandler = ODF2XHTML() out = odhandler.odf2xhtml(source).encode('us-ascii','xmlcharrefreplace') self.env.log.debug('HTML output for ODF') return (out, 'text/html') odfpy-0.9.6/contrib/tracplugins/0.10/odftohtml/__init__.py0000664000076400007640000000137612106145304023320 0ustar rougroug00000000000000# -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from odftohtml import * odfpy-0.9.6/contrib/tracplugins/0.10/setup.py0000664000076400007640000000232312106145304020712 0ustar rougroug00000000000000# -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # from setuptools import setup PACKAGE = 'OdfConversion' VERSION = '0.1' setup(name='OdfConversion', version='0.1', packages=['odfpreview','odftohtml'], author='Soren Roug', author_email='soren.roug@eea.europa.eu', description='A plugin for viewing ODF documents as HTML', url='http://trac-hacks.org/wiki/OdfConversion', entry_points={'trac.plugins': ['odfpreview.odfpreview=odfpreview.odfpreview', 'odftohtml.odftohtml=odftohtml.odftohtml']}) odfpy-0.9.6/contrib/syntaxhighlight/0000775000076400007640000000000012106145466017516 5ustar rougroug00000000000000odfpy-0.9.6/contrib/syntaxhighlight/syntaxhighlight.10000664000076400007640000001157412106145305023016 0ustar rougroug00000000000000.\" Title: syntaxhighlight .\" Author: S\(/oren Roug .\" Generator: DocBook XSL Stylesheets v1.74.0 .\" Date: 03/08/2009 .\" Manual: User commands .\" Source: odfpy .\" Language: English .\" .TH "SYNTAXHIGHLIGHT" "1" "03/08/2009" "odfpy" "User commands" .\" ----------------------------------------------------------------- .\" * (re)Define some macros .\" ----------------------------------------------------------------- .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" toupper - uppercase a string (locale-aware) .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de toupper .tr aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ \\$* .tr aabbccddeeffgghhiijjkkllmmnnooppqqrrssttuuvvwwxxyyzz .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" SH-xref - format a cross-reference to an SH section .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de SH-xref .ie n \{\ .\} .toupper \\$* .el \{\ \\$* .\} .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" SH - level-one heading that works better for non-TTY output .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de1 SH .\" put an extra blank line of space above the head in non-TTY output .if t \{\ .sp 1 .\} .sp \\n[PD]u .nr an-level 1 .set-an-margin .nr an-prevailing-indent \\n[IN] .fi .in \\n[an-margin]u .ti 0 .HTML-TAG ".NH \\n[an-level]" .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 \." make the size of the head bigger .ps +3 .ft B .ne (2v + 1u) .ie n \{\ .\" if n (TTY output), use uppercase .toupper \\$* .\} .el \{\ .nr an-break-flag 0 .\" if not n (not TTY), use normal case (not uppercase) \\$1 .in \\n[an-margin]u .ti 0 .\" if not n (not TTY), put a border/line under subheading .sp -.6 \l'\n(.lu' .\} .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" SS - level-two heading that works better for non-TTY output .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de1 SS .sp \\n[PD]u .nr an-level 1 .set-an-margin .nr an-prevailing-indent \\n[IN] .fi .in \\n[IN]u .ti \\n[SN]u .it 1 an-trap .nr an-no-space-flag 1 .nr an-break-flag 1 .ps \\n[PS-SS]u \." make the size of the head bigger .ps +2 .ft B .ne (2v + 1u) .if \\n[.$] \&\\$* .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" BB/BE - put background/screen (filled box) around block of text .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de BB .if t \{\ .sp -.5 .br .in +2n .ll -2n .gcolor red .di BX .\} .. .de EB .if t \{\ .if "\\$2"adjust-for-leading-newline" \{\ .sp -1 .\} .br .di .in .ll .gcolor .nr BW \\n(.lu-\\n(.i .nr BH \\n(dn+.5v .ne \\n(BHu+.5v .ie "\\$2"adjust-for-leading-newline" \{\ \M[\\$1]\h'1n'\v'+.5v'\D'P \\n(BWu 0 0 \\n(BHu -\\n(BWu 0 0 -\\n(BHu'\M[] .\} .el \{\ \M[\\$1]\h'1n'\v'-.5v'\D'P \\n(BWu 0 0 \\n(BHu -\\n(BWu 0 0 -\\n(BHu'\M[] .\} .in 0 .sp -.5v .nf .BX .in .sp .5v .fi .\} .. .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .\" BM/EM - put colored marker in margin next to block of text .\" ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ .de BM .if t \{\ .br .ll -2n .gcolor red .di BX .\} .. .de EM .if t \{\ .br .di .ll .gcolor .nr BH \\n(dn .ne \\n(BHu \M[\\$1]\D'P -.75n 0 0 \\n(BHu -(\\n[.i]u - \\n(INu - .75n) 0 0 -\\n(BHu'\M[] .in 0 .nf .BX .in .fi .\} .. .\" ----------------------------------------------------------------- .\" * set default formatting .\" ----------------------------------------------------------------- .\" disable hyphenation .nh .\" disable justification (adjust text to left margin only) .ad l .\" ----------------------------------------------------------------- .\" * MAIN CONTENT STARTS HERE * .\" ----------------------------------------------------------------- .SH "Name" syntaxhighlight \- Create OpenDocument with syntax highlighted programming code .SH "Synopsis" .fam C .HP \w'\fBsyntaxhighlight\fR\ 'u \fBsyntaxhighlight\fR [\-e\ \fIencoding\fR] [\-l\ \fIlanguage\fR] [\fIinputfile\fR] [\fIoutputfile\fR] .fam .SH "Description" .PP The syntaxhighlight program will read a source code file, format it with syntax highlighting and write an OpenDocument text file\&. .SH "Options" .PP \-e \fIencoding\fR .RS 4 Enter the encoding of the source file\&. Common encodings are: iso\-8859\-1, cp1252, ascii and utf\-8 (default)\&. .RE .PP \-l \fIlanguage\fR .RS 4 Programming language of the input file\&. If not specified, it is guessed fron the file ending\&. Values can be: Python, HTML, C, C++ and PHP\&. .RE .SH "Example" .sp .if n \{\ .RS 4 .\} .fam C .ps -1 .nf .if t \{\ .sp -1 .\} .BB lightgray adjust-for-leading-newline .sp -1 syntaxhighlight \-e cp1252 \-l Python example\&.py example\&.odt .EB lightgray adjust-for-leading-newline .if t \{\ .sp 1 .\} .fi .fam .ps +1 .if n \{\ .RE .\} .SH "Author" .PP \fBS\(/oren Roug\fR .RS 4 Original author .RE odfpy-0.9.6/contrib/syntaxhighlight/Makefile0000664000076400007640000000024512106145305021147 0ustar rougroug00000000000000all: odf syntaxhighlight.1 txt: syntaxhighlight.txt %.1: %.docbook xmlto man $< %.txt: %.docbook xmlto txt $< clean: rm -f *.txt odf odf: ln -s ../../odf odfpy-0.9.6/contrib/syntaxhighlight/syntaxhighlight.docbook0000664000076400007640000000412412106145305024267 0ustar rougroug00000000000000 odfpy Søren Roug Original author syntaxhighlight 1 User commands syntaxhighlight Create OpenDocument with syntax highlighted programming code syntaxhighlight -e encoding -l language inputfile outputfile Description The syntaxhighlight program will read a source code file, format it with syntax highlighting and write an OpenDocument text file. Options -e encoding Enter the encoding of the source file. Common encodings are: iso-8859-1, cp1252, ascii and utf-8 (default). -l language Programming language of the input file. If not specified, it is guessed fron the file ending. Values can be: Python, HTML, C, C++ and PHP. Example syntaxhighlight -e cp1252 -l Python example.py example.odt odfpy-0.9.6/contrib/syntaxhighlight/syntaxhighlight.py0000664000076400007640000004544012106145305023305 0ustar rougroug00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- # Copyright (C) 2007 Søren Roug, European Environment Agency # # This is free software. You may redistribute it under the terms # of the Apache license and the GNU General Public License Version # 2 or at your option any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA # # Contributor(s): # # # Syntax Highlighting # Originally from Peter Clive Wilkinson (http://www.petersblog.org/node/763) # import os, sys, re, getopt from odf.opendocument import OpenDocumentText from odf.style import FontFace, Style, TextProperties, ParagraphProperties from odf.text import P, Span, S class Highlight: """ Do syntax highlighting. """ courierfont = FontFace(name="Courier", fontfamily="Courier", fontadornments="Normal", fontfamilygeneric="modern", fontpitch="fixed") #--- Paragraph style -- programliststyle = Style(name="Program Listing", family="paragraph") programliststyle.addElement(ParagraphProperties(border="0.002cm solid #000000", margin="0cm", padding="0.2cm")) programliststyle.addElement(TextProperties(fontname="Courier", fontsize="9pt", language="none", country="none")) #--- Text styles -- puncstyle = Style(name="Highlighted Punctuation", family="text") puncstyle.addElement(TextProperties(fontweight="bold")) # Bold numberstyle = Style(name="Highlighted Number", family="text") numberstyle.addElement(TextProperties(color="#ff0000")) # Red keywordstyle = Style(name="Highlighted Keyword", family="text") keywordstyle.addElement(TextProperties(color="#b218b2", fontweight="bold")) # Blue, bold variablestyle = Style(name="Highlighted Magic", family="text") variablestyle.addElement(TextProperties(color="#0000ff")) # Blue tagstyle = Style(name="Highlighted Tag", family="text") tagstyle.addElement(TextProperties(color="#800000")) # Darkred attrstyle = Style(name="Highlighted Tag", family="text") attrstyle.addElement(TextProperties(color="#008000", fontweight="bold")) # Dark green bold stringstyle = Style(name="Highlighted String", family="text") stringstyle.addElement(TextProperties(color="#800000")) # Red commentstyle = Style(name="Highlighted Comment", family="text") commentstyle.addElement(TextProperties(color="#0000ff", fontstyle="italic")) # Blue, Italic preprocstyle = Style(name="Highlighted Preprocessing", family="text") preprocstyle.addElement(TextProperties(color="#ff00ff", fontstyle="italic")) # Magenta, Italic def __init__(self, strMode): """ Initialise highlighter: strMode = language (PYTHON, C, CPP, PHP, HTML) """ self.textdoc = OpenDocumentText() self.textdoc.fontfacedecls.addElement(self.courierfont) self.textdoc.styles.addElement(self.programliststyle) self.textdoc.styles.addElement(self.puncstyle) self.textdoc.styles.addElement(self.numberstyle) self.textdoc.styles.addElement(self.keywordstyle) self.textdoc.styles.addElement(self.variablestyle) self.textdoc.styles.addElement(self.tagstyle) self.textdoc.styles.addElement(self.attrstyle) self.textdoc.styles.addElement(self.stringstyle) self.textdoc.styles.addElement(self.commentstyle) self.textdoc.styles.addElement(self.preprocstyle) self.strSpanStyle = None self.currPara = P(stylename=self.programliststyle) self.textdoc.text.addElement(self.currPara) self.currSpan = None if strMode == 'CPP': strMode = 'C' self.strSuppressTokens = [] elif strMode == 'C': self.strSuppressTokens = ['CPPKEYWORD'] else: self.strSuppressTokens = [] self.strMode = strMode def PythonHighlightToken(self, strTok, oMatch, strStyle): """ Callback for python specific highlighting. """ # # Input matches this type. # strValue = oMatch.group() if strTok == 'MULTILINESTRING': # # If not inside a multiline string then start one now. # self.ChangeStyle(strStyle) self.WriteContent(strValue) # # Remember you are in a string and remember how it was # started (""" vs ''') # self.strMultilineString = oMatch.group(1) return 'PythonMultilineString' elif strTok == 'ENDMULTILINESTRING': # # Multiline Token found within a multiline string # if oMatch.group(1) == self.strMultilineString: # # Token is end of multiline so stop here. # self.WriteMultiline(strValue) self.strMultilineString = '' return 'PYTHON' self.ChangeStyle(strStyle) self.WriteContent(strValue) def CHighlightToken(self, strTok, oMatch, strStyle): """ Callback for C specific highlighting. """ # # Input matches this type. # strValue = oMatch.group() # # Not in multiline mode so change display style as appropriate # and output the text. # self.ChangeStyle(strStyle) self.WriteContent(strValue) def PHPHighlightToken(self, strTok, oMatch, strStyle): """ Callback for PHP specific highlighting. """ # # Input matches this type. # strValue = oMatch.group() if strTok == 'MULTILINESTRING': # # If not inside a multiline string then start one now. # self.ChangeStyle(strStyle) self.WriteContent(strValue) # # Remember you are in a string and remember how it was # started (""" vs ''') # self.strMultilineString = oMatch.group(1) return 'PHPMultilineString' elif strTok == 'ENDMULTILINESTRING': # # Multiline Token found within a multiline string # if oMatch.group(1) == self.strMultilineString: # # Token is end of multiline so stop here. # self.WriteMultiline(strValue) self.strMultilineString = '' return 'PHP' self.ChangeStyle(strStyle) self.WriteContent(strValue) if strTok == 'GOTOHTML': # # Embedded HTML # return 'HTML' else: return None def HTMLHighlightToken(self, strTok, oMatch, strStyle): """ Callback for HTML specific highlighting. """ # # Input matches this type. # strValue = oMatch.group() self.ChangeStyle(strStyle) self.WriteContent(strValue) if strTok == 'TAG': # # Change to mode 1, 'within tag'. # return 'HTMLTag' elif strTok == 'ENDTAG': # # Change to mode 1, 'within tag'. # return 'HTML' elif strTok == 'GOTOPHP': # # Embedded PHP # return 'PHP' else: # # No state change. # return None oStyles = { 'PYTHON': ( PythonHighlightToken, ( ('PUNC', re.compile( r'[-+*!|&^~/%\=<>\[\]{}(),.:]'), puncstyle), ('NUMBER', re.compile( r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'), numberstyle), ('KEYWORD', re.compile( r'(def|class|break|continue|del|exec|finally|pass|' + r'print|raise|return|try|except|global|assert|lambda|' + r'yield|for|while|if|elif|else|and|in|is|not|or|import|' + r'from|True|False)(?![a-zA-Z0-9_])'), keywordstyle), ('MAGIC', re.compile( r'self|None'), variablestyle), ('MULTILINESTRING', re.compile( r'r?u?(\'\'\'|""")'), stringstyle), ('STRING', re.compile( r'r?u?\'(.*?)(?\[\]{}(),.:]'), puncstyle), ('NUMBER', re.compile( r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'), numberstyle), ('KEYWORD', re.compile( r'(sizeof|int|long|short|char|void|' + r'signed|unsigned|float|double|' + r'goto|break|return|continue|asm|' + r'case|default|if|else|switch|while|for|do|' + r'struct|union|enum|typedef|' + r'static|register|auto|volatile|extern|const)(?![a-zA-Z0-9_])'), keywordstyle), ( 'CPPKEYWORD', re.compile( r'(class|private|protected|public|template|new|delete|' + r'this|friend|using|inline|export|bool|throw|try|catch|' + r'operator|typeid|virtual)(?![a-zA-Z0-9_])'), keywordstyle), ('STRING', re.compile( r'r?u?\'(.*?)(?\[\]{}(),.:]'), puncstyle), ('NUMBER', re.compile( r'0x[0-9a-fA-F]+|[+-]?\d+(\.\d+)?([eE][+-]\d+)?|\d+'), numberstyle), ('KEYWORD', re.compile( r'(declare|else|enddeclare|endswitch|elseif|endif|if|switch|' + r'as|do|endfor|endforeach|endwhile|for|foreach|while|' + r'case|default|switch|function|return|break|continue|exit|' + r'var|const|boolean|bool|integer|int|real|double|float|string|' + r'array|object|NULL|extends|implements|instanceof|parent|self|' + r'include|require|include_once|require_once|new|true|false)(?![a-zA-Z0-9_])'), keywordstyle), ('STRING', re.compile( r'r?u?\'(.*?)(?'), stringstyle), ('UNKNOWN', re.compile( r'.'), None) )), 'PHPMultilineString': ( PHPHighlightToken, ( ('ENDMULTILINESTRING', re.compile( r'.*?\n([a-zA-Z0-9_]+)', re.DOTALL), stringstyle), ('UNKNOWN', re.compile( r'.*?(?!\n)'), 'Keep') )), 'HTML': ( HTMLHighlightToken, # Mode 0: just look for tags ( ('COMMENT', re.compile( r'|'), commentstyle), ('XMLCRAP', re.compile( r']*>'), preprocstyle), ('SCRIPT', re.compile( r'