pdfkit-0.6.1/0000755000076500000240000000000013034727717014516 5ustar stgolovanovstaff00000000000000pdfkit-0.6.1/HISTORY.rst0000644000076500000240000000172413034727300016401 0ustar stgolovanovstaff00000000000000Changelog --------- * `0.6.1` * Fix regression on python 3+ when trying to decode pdf output * `0.6.0` * Support repeatable options * Support multiple values for some options * Fix some corner cases when specific argument order is required * Some Python 3+ compatibility fixes * Update README * `0.5.0` * Allow passing multiple css files * Fix problems with external file encodings * Rise an error when X server is missing on \*nix systems * Fix tests that was broken with latest wkhtmltopdf release * Update README * `0.4.1` * More easier custom configuration setting * Update README * `0.4.0` * Allow passing file-like objects * Ability to return PDF as a string * Allow user specification of configuration * API calls now returns True on success * bugfixes * `0.3.0` * Python 3 support * `0.2.4` * Add History * Update setup.py * `0.2.3` * Fix installing with setup.py * Update README pdfkit-0.6.1/LICENSE0000644000076500000240000000203313025761570015514 0ustar stgolovanovstaff00000000000000Copyright (c) 2012 JazzCore Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.pdfkit-0.6.1/MANIFEST.in0000644000076500000240000000004613025761570016247 0ustar stgolovanovstaff00000000000000include README.rst LICENSE HISTORY.rstpdfkit-0.6.1/pdfkit/0000755000076500000240000000000013034727717015777 5ustar stgolovanovstaff00000000000000pdfkit-0.6.1/pdfkit/__init__.py0000644000076500000240000000044613034727362020110 0ustar stgolovanovstaff00000000000000# -*- coding: utf-8 -*- """ Wkhtmltopdf python wrapper to convert html to pdf using the webkit rendering engine and qt """ __author__ = 'Golovanov Stanislav' __version__ = '0.6.1' __license__ = 'MIT' from .pdfkit import PDFKit from .api import from_url, from_file, from_string, configuration pdfkit-0.6.1/pdfkit/api.py0000644000076500000240000000665213025761570017126 0ustar stgolovanovstaff00000000000000# -*- coding: utf-8 -*- from .pdfkit import PDFKit from .pdfkit import Configuration def from_url(url, output_path, options=None, toc=None, cover=None, configuration=None, cover_first=False): """ Convert file of files from URLs to PDF document :param url: URL or list of URLs to be saved :param output_path: path to output PDF file. False means file will be returned as string. :param options: (optional) dict with wkhtmltopdf global and page options, with or w/o '--' :param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--' :param cover: (optional) string with url/filename with a cover html page :param configuration: (optional) instance of pdfkit.configuration.Configuration() :param configuration_first: (optional) if True, cover always precedes TOC Returns: True on success """ r = PDFKit(url, 'url', options=options, toc=toc, cover=cover, configuration=configuration, cover_first=cover_first) return r.to_pdf(output_path) def from_file(input, output_path, options=None, toc=None, cover=None, css=None, configuration=None, cover_first=False): """ Convert HTML file or files to PDF document :param input: path to HTML file or list with paths or file-like object :param output_path: path to output PDF file. False means file will be returned as string. :param options: (optional) dict with wkhtmltopdf options, with or w/o '--' :param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--' :param cover: (optional) string with url/filename with a cover html page :param css: (optional) string with path to css file which will be added to a single input file :param configuration: (optional) instance of pdfkit.configuration.Configuration() :param configuration_first: (optional) if True, cover always precedes TOC Returns: True on success """ r = PDFKit(input, 'file', options=options, toc=toc, cover=cover, css=css, configuration=configuration, cover_first=cover_first) return r.to_pdf(output_path) def from_string(input, output_path, options=None, toc=None, cover=None, css=None, configuration=None, cover_first=False): """ Convert given string or strings to PDF document :param input: string with a desired text. Could be a raw text or a html file :param output_path: path to output PDF file. False means file will be returned as string. :param options: (optional) dict with wkhtmltopdf options, with or w/o '--' :param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--' :param cover: (optional) string with url/filename with a cover html page :param css: (optional) string with path to css file which will be added to a input string :param configuration: (optional) instance of pdfkit.configuration.Configuration() :param configuration_first: (optional) if True, cover always precedes TOC Returns: True on success """ r = PDFKit(input, 'string', options=options, toc=toc, cover=cover, css=css, configuration=configuration, cover_first=cover_first) return r.to_pdf(output_path) def configuration(**kwargs): """ Constructs and returns a :class:`Configuration` with given options :param wkhtmltopdf: path to binary :param meta_tag_prefix: the prefix for ``pdfkit`` specific meta tags """ return Configuration(**kwargs) pdfkit-0.6.1/pdfkit/configuration.py0000644000076500000240000000206613025761570021217 0ustar stgolovanovstaff00000000000000# -*- coding: utf-8 -*- import subprocess import sys class Configuration(object): def __init__(self, wkhtmltopdf='', meta_tag_prefix='pdfkit-'): self.meta_tag_prefix = meta_tag_prefix self.wkhtmltopdf = wkhtmltopdf if not self.wkhtmltopdf: if sys.platform == 'win32': self.wkhtmltopdf = subprocess.Popen( ['where', 'wkhtmltopdf'], stdout=subprocess.PIPE).communicate()[0].strip() else: self.wkhtmltopdf = subprocess.Popen( ['which', 'wkhtmltopdf'], stdout=subprocess.PIPE).communicate()[0].strip() try: with open(self.wkhtmltopdf) as f: pass except IOError: raise IOError('No wkhtmltopdf executable found: "%s"\n' 'If this file exists please check that this process can ' 'read it. Otherwise please install wkhtmltopdf - ' 'https://github.com/JazzCore/python-pdfkit/wiki/Installing-wkhtmltopdf' % self.wkhtmltopdf) pdfkit-0.6.1/pdfkit/pdfkit.py0000644000076500000240000002231713034727166017635 0ustar stgolovanovstaff00000000000000# -*- coding: utf-8 -*- import re import subprocess import sys from .source import Source from .configuration import Configuration import io import codecs try: # Python 2.x and 3.x support for checking string types assert basestring except NameError: basestring = str class PDFKit(object): """ Main class that does all generation routine. :param url_or_file: str - either a URL, a path to a file or a string containing HTML to convert :param type_: str - either 'url', 'file' or 'string' :param options: dict (optional) with wkhtmltopdf options, with or w/o '--' :param toc: dict (optional) - toc-specific wkhtmltopdf options, with or w/o '--' :param cover: str (optional) - url/filename with a cover html page :param configuration: (optional) instance of pdfkit.configuration.Configuration() """ class ImproperSourceError(Exception): """Wrong source type for stylesheets""" def __init__(self, msg): self.msg = msg def __str__(self): return self.msg def __init__(self, url_or_file, type_, options=None, toc=None, cover=None, css=None, configuration=None, cover_first=False): self.source = Source(url_or_file, type_) self.configuration = (Configuration() if configuration is None else configuration) try: self.wkhtmltopdf = self.configuration.wkhtmltopdf.decode('utf-8') except AttributeError: self.wkhtmltopdf = self.configuration.wkhtmltopdf self.options = dict() if self.source.isString(): self.options.update(self._find_options_in_meta(url_or_file)) if options is not None: self.options.update(options) self.toc = {} if toc is None else toc self.cover = cover self.cover_first = cover_first self.css = css self.stylesheets = [] def _genargs(self, opts): """ Generator of args parts based on options specification. Note: Empty parts will be filtered out at _command generator """ for optkey, optval in self._normalize_options(opts): yield optkey if isinstance(optval, (list, tuple)): assert len(optval) == 2 and optval[0] and optval[1], 'Option value can only be either a string or a (tuple, list) of 2 items' yield optval[0] yield optval[1] else: yield optval def _command(self, path=None): """ Generator of all command parts """ if self.css: self._prepend_css(self.css) yield self.wkhtmltopdf for argpart in self._genargs(self.options): if argpart: yield argpart if self.cover and self.cover_first: yield 'cover' yield self.cover if self.toc: yield 'toc' for argpart in self._genargs(self.toc): if argpart: yield argpart if self.cover and not self.cover_first: yield 'cover' yield self.cover # If the source is a string then we will pipe it into wkhtmltopdf # If the source is file-like then we will read from it and pipe it in if self.source.isString() or self.source.isFileObj(): yield '-' else: if isinstance(self.source.source, basestring): yield self.source.to_s() else: for s in self.source.source: yield s # If output_path evaluates to False append '-' to end of args # and wkhtmltopdf will pass generated PDF to stdout if path: yield path else: yield '-' def command(self, path=None): return list(self._command(path)) def to_pdf(self, path=None): args = self.command(path) result = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE) # If the source is a string then we will pipe it into wkhtmltopdf. # If we want to add custom CSS to file then we read input file to # string and prepend css to it and then pass it to stdin. # This is a workaround for a bug in wkhtmltopdf (look closely in README) if self.source.isString() or (self.source.isFile() and self.css): input = self.source.to_s().encode('utf-8') elif self.source.isFileObj(): input = self.source.source.read().encode('utf-8') else: input = None stdout, stderr = result.communicate(input=input) stderr = stderr or stdout try: stderr = stderr.decode('utf-8') except UnicodeDecodeError: stderr = '' exit_code = result.returncode if 'cannot connect to X server' in stderr: raise IOError('%s\n' 'You will need to run wkhtmltopdf within a "virtual" X server.\n' 'Go to the link below for more information\n' 'https://github.com/JazzCore/python-pdfkit/wiki/Using-wkhtmltopdf-without-X-server' % stderr) if 'Error' in stderr: raise IOError('wkhtmltopdf reported an error:\n' + stderr) if exit_code != 0: raise IOError("wkhtmltopdf exited with non-zero code {0}. error:\n{1}".format(exit_code, stderr)) # Since wkhtmltopdf sends its output to stderr we will capture it # and properly send to stdout if '--quiet' not in args: sys.stdout.write(stderr) if not path: return stdout else: try: with codecs.open(path, encoding='utf-8') as f: # read 4 bytes to get PDF signature '%PDF' text = f.read(4) if text == '': raise IOError('Command failed: %s\n' 'Check whhtmltopdf output without \'quiet\' ' 'option' % ' '.join(args)) return True except IOError as e: raise IOError('Command failed: %s\n' 'Check whhtmltopdf output without \'quiet\' option\n' '%s ' %(' '.join(args)),e) def _normalize_options(self, options): """ Generator of 2-tuples (option-key, option-value). When options spec is a list, generate a 2-tuples per list item. :param options: dict {option name: value} returns: iterator (option-key, option-value) - option names lower cased and prepended with '--' if necessary. Non-empty values cast to str """ for key, value in list(options.items()): if not '--' in key: normalized_key = '--%s' % self._normalize_arg(key) else: normalized_key = self._normalize_arg(key) if isinstance(value, (list, tuple)): for optval in value: yield (normalized_key, optval) else: yield (normalized_key, str(value) if value else value) def _normalize_arg(self, arg): return arg.lower() def _style_tag_for(self, stylesheet): return "" % stylesheet def _prepend_css(self, path): if self.source.isUrl() or isinstance(self.source.source, list): raise self.ImproperSourceError('CSS files can be added only to a single ' 'file or string') if not isinstance(path, list): path = [path] css_data = [] for p in path: with codecs.open(p, encoding="UTF-8") as f: css_data.append(f.read()) css_data = "\n".join(css_data) if self.source.isFile(): with codecs.open(self.source.to_s(), encoding="UTF-8") as f: inp = f.read() self.source = Source( inp.replace('', self._style_tag_for(css_data) + ''), 'string') elif self.source.isString(): if '' in self.source.to_s(): self.source.source = self.source.to_s().replace( '', self._style_tag_for(css_data) + '') else: self.source.source = self._style_tag_for(css_data) + self.source.to_s() def _find_options_in_meta(self, content): """Reads 'content' and extracts options encoded in HTML meta tags :param content: str or file-like object - contains HTML to parse returns: dict: {config option: value} """ if (isinstance(content, io.IOBase) or content.__class__.__name__ == 'StreamReaderWriter'): content = content.read() found = {} for x in re.findall(']*>', content): if re.search('name=["\']%s' % self.configuration.meta_tag_prefix, x): name = re.findall('name=["\']%s([^"\']*)' % self.configuration.meta_tag_prefix, x)[0] found[name] = re.findall('content=["\']([^"\']*)', x)[0] return found pdfkit-0.6.1/pdfkit/source.py0000644000076500000240000000224713025761570017651 0ustar stgolovanovstaff00000000000000# -*- coding: utf-8 -*- import os import io class Source(object): def __init__(self, url_or_file, type_): self.source = url_or_file self.type = type_ if self.type is 'file': self.checkFiles() def isUrl(self): return 'url' in self.type def isFile(self, path=None): # dirty hack to check where file is opened with codecs module # (because it returns 'instance' type when encoding is specified if path: return isinstance(path, io.IOBase) or path.__class__.__name__ == 'StreamReaderWriter' else: return 'file' in self.type def checkFiles(self): if isinstance(self.source, list): for path in self.source: if not os.path.exists(path): raise IOError('No such file: %s' % path) else: if not hasattr(self.source, 'read') and not os.path.exists(self.source): raise IOError('No such file: %s' % self.source) def isString(self): return 'string' in self.type def isFileObj(self): return hasattr(self.source, 'read') def to_s(self): return self.source pdfkit-0.6.1/pdfkit.egg-info/0000755000076500000240000000000013034727717017471 5ustar stgolovanovstaff00000000000000pdfkit-0.6.1/pdfkit.egg-info/dependency_links.txt0000644000076500000240000000000113034727717023537 0ustar stgolovanovstaff00000000000000 pdfkit-0.6.1/pdfkit.egg-info/pbr.json0000644000076500000240000000005713034727717021151 0ustar stgolovanovstaff00000000000000{"is_release": false, "git_version": "c8c1030"}pdfkit-0.6.1/pdfkit.egg-info/PKG-INFO0000644000076500000240000002111113034727717020562 0ustar stgolovanovstaff00000000000000Metadata-Version: 1.1 Name: pdfkit Version: 0.6.1 Summary: Wkhtmltopdf python wrapper to convert html to pdf using the webkit rendering engine and qt Home-page: UNKNOWN Author: Golovanov Stanislav Author-email: stgolovanov@gmail.com License: MIT Download-URL: https://github.com/JazzCore/python-pdfkit Description: Python-PDFKit: HTML to PDF wrapper ================================== .. image:: https://travis-ci.org/JazzCore/python-pdfkit.png?branch=master :target: https://travis-ci.org/JazzCore/python-pdfkit .. image:: https://badge.fury.io/py/pdfkit.svg :target: http://badge.fury.io/py/pdfkit Python 2 and 3 wrapper for wkhtmltopdf utility to convert HTML to PDF using Webkit. This is adapted version of `ruby PDFKit `_ library, so big thanks to them! Installation ------------ 1. Install python-pdfkit:: $ pip install pdfkit 2. Install wkhtmltopdf: * Debian/Ubuntu:: $ sudo apt-get install wkhtmltopdf **Warning!** Version in debian/ubuntu repos have reduced functionality (because it compiled without the wkhtmltopdf QT patches), such as adding outlines, headers, footers, TOC etc. To use this options you should install static binary from `wkhtmltopdf `_ site or you can use `this script `_. * Windows and other options: check wkhtmltopdf `homepage `_ for binary installers Usage ----- For simple tasks:: import pdfkit pdfkit.from_url('http://google.com', 'out.pdf') pdfkit.from_file('test.html', 'out.pdf') pdfkit.from_string('Hello!', 'out.pdf') You can pass a list with multiple URLs or files:: pdfkit.from_url(['google.com', 'yandex.ru', 'engadget.com'], 'out.pdf') pdfkit.from_file(['file1.html', 'file2.html'], 'out.pdf') Also you can pass an opened file:: with open('file.html') as f: pdfkit.from_file(f, 'out.pdf') If you wish to further process generated PDF, you can read it to a variable:: # Use False instead of output path to save pdf to a variable pdf = pdfkit.from_url('http://google.com', False) You can specify all wkhtmltopdf `options `_. You can drop '--' in option name. If option without value, use *None, False* or *''* for dict value:. For repeatable options (incl. allow, cookie, custom-header, post, postfile, run-script, replace) you may use a list or a tuple. With option that need multiple values (e.g. --custom-header Authorization secret) we may use a 2-tuple (see example below). :: options = { 'page-size': 'Letter', 'margin-top': '0.75in', 'margin-right': '0.75in', 'margin-bottom': '0.75in', 'margin-left': '0.75in', 'encoding': "UTF-8", 'custom-header' : [ ('Accept-Encoding', 'gzip') ] 'cookie': [ ('cookie-name1', 'cookie-value1'), ('cookie-name2', 'cookie-value2'), ], 'no-outline': None } pdfkit.from_url('http://google.com', 'out.pdf', options=options) By default, PDFKit will show all ``wkhtmltopdf`` output. If you dont want it, you need to pass ``quiet`` option:: options = { 'quiet': '' } pdfkit.from_url('google.com', 'out.pdf', options=options) Due to wkhtmltopdf command syntax, **TOC** and **Cover** options must be specified separately. If you need cover before TOC, use ``cover_first`` option:: toc = { 'xsl-style-sheet': 'toc.xsl' } cover = 'cover.html' pdfkit.from_file('file.html', options=options, toc=toc, cover=cover) pdfkit.from_file('file.html', options=options, toc=toc, cover=cover, cover_first=True) You can specify external CSS files when converting files or strings using *css* option. **Warning** This is a workaround for `this bug `_ in wkhtmltopdf. You should try *--user-style-sheet* option first. :: # Single CSS file css = 'example.css' pdfkit.from_file('file.html', options=options, css=css) # Multiple CSS files css = ['example.css', 'example2.css'] pdfkit.from_file('file.html', options=options, css=css) You can also pass any options through meta tags in your HTML:: body = """ Hello World! """ pdfkit.from_string(body, 'out.pdf') #with --page-size=Legal and --orientation=Landscape Configuration ------------- Each API call takes an optional configuration paramater. This should be an instance of ``pdfkit.configuration()`` API call. It takes the configuration options as initial paramaters. The available options are: * ``wkhtmltopdf`` - the location of the ``wkhtmltopdf`` binary. By default ``pdfkit`` will attempt to locate this using ``which`` (on UNIX type systems) or ``where`` (on Windows). * ``meta_tag_prefix`` - the prefix for ``pdfkit`` specific meta tags - by default this is ``pdfkit-`` Example - for when ``wkhtmltopdf`` is not on ``$PATH``:: config = pdfkit.configuration(wkhtmltopdf='/opt/bin/wkhtmltopdf') pdfkit.from_string(html_string, output_file, configuration=config) Troubleshooting --------------- - ``IOError: 'No wkhtmltopdf executable found'``: Make sure that you have wkhtmltopdf in your `$PATH` or set via custom configuration (see preceding section). *where wkhtmltopdf* in Windows or *which wkhtmltopdf* on Linux should return actual path to binary. - ``IOError: 'Command Failed'`` This error means that PDFKit was unable to process an input. You can try to directly run a command from error message and see what error caused failure (on some wkhtmltopdf versions this can be cause by segmentation faults) Changelog --------- * `0.6.1` * Fix regression on python 3+ when trying to decode pdf output * `0.6.0` * Support repeatable options * Support multiple values for some options * Fix some corner cases when specific argument order is required * Some Python 3+ compatibility fixes * Update README * `0.5.0` * Allow passing multiple css files * Fix problems with external file encodings * Rise an error when X server is missing on \*nix systems * Fix tests that was broken with latest wkhtmltopdf release * Update README * `0.4.1` * More easier custom configuration setting * Update README * `0.4.0` * Allow passing file-like objects * Ability to return PDF as a string * Allow user specification of configuration * API calls now returns True on success * bugfixes * `0.3.0` * Python 3 support * `0.2.4` * Add History * Update setup.py * `0.2.3` * Fix installing with setup.py * Update README Platform: UNKNOWN Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3.2 Classifier: Programming Language :: Python :: 3.3 Classifier: Topic :: Text Processing Classifier: Topic :: Text Processing :: General Classifier: Topic :: Text Processing :: Markup Classifier: Topic :: Text Processing :: Markup :: HTML Classifier: Topic :: Text Processing :: Markup :: XML Classifier: Topic :: Utilities pdfkit-0.6.1/pdfkit.egg-info/SOURCES.txt0000644000076500000240000000043713034727717021361 0ustar stgolovanovstaff00000000000000HISTORY.rst LICENSE MANIFEST.in README.rst setup.py pdfkit/__init__.py pdfkit/api.py pdfkit/configuration.py pdfkit/pdfkit.py pdfkit/source.py pdfkit.egg-info/PKG-INFO pdfkit.egg-info/SOURCES.txt pdfkit.egg-info/dependency_links.txt pdfkit.egg-info/pbr.json pdfkit.egg-info/top_level.txtpdfkit-0.6.1/pdfkit.egg-info/top_level.txt0000644000076500000240000000000713034727717022220 0ustar stgolovanovstaff00000000000000pdfkit pdfkit-0.6.1/PKG-INFO0000644000076500000240000002111113034727717015607 0ustar stgolovanovstaff00000000000000Metadata-Version: 1.1 Name: pdfkit Version: 0.6.1 Summary: Wkhtmltopdf python wrapper to convert html to pdf using the webkit rendering engine and qt Home-page: UNKNOWN Author: Golovanov Stanislav Author-email: stgolovanov@gmail.com License: MIT Download-URL: https://github.com/JazzCore/python-pdfkit Description: Python-PDFKit: HTML to PDF wrapper ================================== .. image:: https://travis-ci.org/JazzCore/python-pdfkit.png?branch=master :target: https://travis-ci.org/JazzCore/python-pdfkit .. image:: https://badge.fury.io/py/pdfkit.svg :target: http://badge.fury.io/py/pdfkit Python 2 and 3 wrapper for wkhtmltopdf utility to convert HTML to PDF using Webkit. This is adapted version of `ruby PDFKit `_ library, so big thanks to them! Installation ------------ 1. Install python-pdfkit:: $ pip install pdfkit 2. Install wkhtmltopdf: * Debian/Ubuntu:: $ sudo apt-get install wkhtmltopdf **Warning!** Version in debian/ubuntu repos have reduced functionality (because it compiled without the wkhtmltopdf QT patches), such as adding outlines, headers, footers, TOC etc. To use this options you should install static binary from `wkhtmltopdf `_ site or you can use `this script `_. * Windows and other options: check wkhtmltopdf `homepage `_ for binary installers Usage ----- For simple tasks:: import pdfkit pdfkit.from_url('http://google.com', 'out.pdf') pdfkit.from_file('test.html', 'out.pdf') pdfkit.from_string('Hello!', 'out.pdf') You can pass a list with multiple URLs or files:: pdfkit.from_url(['google.com', 'yandex.ru', 'engadget.com'], 'out.pdf') pdfkit.from_file(['file1.html', 'file2.html'], 'out.pdf') Also you can pass an opened file:: with open('file.html') as f: pdfkit.from_file(f, 'out.pdf') If you wish to further process generated PDF, you can read it to a variable:: # Use False instead of output path to save pdf to a variable pdf = pdfkit.from_url('http://google.com', False) You can specify all wkhtmltopdf `options `_. You can drop '--' in option name. If option without value, use *None, False* or *''* for dict value:. For repeatable options (incl. allow, cookie, custom-header, post, postfile, run-script, replace) you may use a list or a tuple. With option that need multiple values (e.g. --custom-header Authorization secret) we may use a 2-tuple (see example below). :: options = { 'page-size': 'Letter', 'margin-top': '0.75in', 'margin-right': '0.75in', 'margin-bottom': '0.75in', 'margin-left': '0.75in', 'encoding': "UTF-8", 'custom-header' : [ ('Accept-Encoding', 'gzip') ] 'cookie': [ ('cookie-name1', 'cookie-value1'), ('cookie-name2', 'cookie-value2'), ], 'no-outline': None } pdfkit.from_url('http://google.com', 'out.pdf', options=options) By default, PDFKit will show all ``wkhtmltopdf`` output. If you dont want it, you need to pass ``quiet`` option:: options = { 'quiet': '' } pdfkit.from_url('google.com', 'out.pdf', options=options) Due to wkhtmltopdf command syntax, **TOC** and **Cover** options must be specified separately. If you need cover before TOC, use ``cover_first`` option:: toc = { 'xsl-style-sheet': 'toc.xsl' } cover = 'cover.html' pdfkit.from_file('file.html', options=options, toc=toc, cover=cover) pdfkit.from_file('file.html', options=options, toc=toc, cover=cover, cover_first=True) You can specify external CSS files when converting files or strings using *css* option. **Warning** This is a workaround for `this bug `_ in wkhtmltopdf. You should try *--user-style-sheet* option first. :: # Single CSS file css = 'example.css' pdfkit.from_file('file.html', options=options, css=css) # Multiple CSS files css = ['example.css', 'example2.css'] pdfkit.from_file('file.html', options=options, css=css) You can also pass any options through meta tags in your HTML:: body = """ Hello World! """ pdfkit.from_string(body, 'out.pdf') #with --page-size=Legal and --orientation=Landscape Configuration ------------- Each API call takes an optional configuration paramater. This should be an instance of ``pdfkit.configuration()`` API call. It takes the configuration options as initial paramaters. The available options are: * ``wkhtmltopdf`` - the location of the ``wkhtmltopdf`` binary. By default ``pdfkit`` will attempt to locate this using ``which`` (on UNIX type systems) or ``where`` (on Windows). * ``meta_tag_prefix`` - the prefix for ``pdfkit`` specific meta tags - by default this is ``pdfkit-`` Example - for when ``wkhtmltopdf`` is not on ``$PATH``:: config = pdfkit.configuration(wkhtmltopdf='/opt/bin/wkhtmltopdf') pdfkit.from_string(html_string, output_file, configuration=config) Troubleshooting --------------- - ``IOError: 'No wkhtmltopdf executable found'``: Make sure that you have wkhtmltopdf in your `$PATH` or set via custom configuration (see preceding section). *where wkhtmltopdf* in Windows or *which wkhtmltopdf* on Linux should return actual path to binary. - ``IOError: 'Command Failed'`` This error means that PDFKit was unable to process an input. You can try to directly run a command from error message and see what error caused failure (on some wkhtmltopdf versions this can be cause by segmentation faults) Changelog --------- * `0.6.1` * Fix regression on python 3+ when trying to decode pdf output * `0.6.0` * Support repeatable options * Support multiple values for some options * Fix some corner cases when specific argument order is required * Some Python 3+ compatibility fixes * Update README * `0.5.0` * Allow passing multiple css files * Fix problems with external file encodings * Rise an error when X server is missing on \*nix systems * Fix tests that was broken with latest wkhtmltopdf release * Update README * `0.4.1` * More easier custom configuration setting * Update README * `0.4.0` * Allow passing file-like objects * Ability to return PDF as a string * Allow user specification of configuration * API calls now returns True on success * bugfixes * `0.3.0` * Python 3 support * `0.2.4` * Add History * Update setup.py * `0.2.3` * Fix installing with setup.py * Update README Platform: UNKNOWN Classifier: Programming Language :: Python Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3.2 Classifier: Programming Language :: Python :: 3.3 Classifier: Topic :: Text Processing Classifier: Topic :: Text Processing :: General Classifier: Topic :: Text Processing :: Markup Classifier: Topic :: Text Processing :: Markup :: HTML Classifier: Topic :: Text Processing :: Markup :: XML Classifier: Topic :: Utilities pdfkit-0.6.1/README.rst0000644000076500000240000001321613025762247016205 0ustar stgolovanovstaff00000000000000Python-PDFKit: HTML to PDF wrapper ================================== .. image:: https://travis-ci.org/JazzCore/python-pdfkit.png?branch=master :target: https://travis-ci.org/JazzCore/python-pdfkit .. image:: https://badge.fury.io/py/pdfkit.svg :target: http://badge.fury.io/py/pdfkit Python 2 and 3 wrapper for wkhtmltopdf utility to convert HTML to PDF using Webkit. This is adapted version of `ruby PDFKit `_ library, so big thanks to them! Installation ------------ 1. Install python-pdfkit: .. code-block:: bash $ pip install pdfkit 2. Install wkhtmltopdf: * Debian/Ubuntu: .. code-block:: bash $ sudo apt-get install wkhtmltopdf **Warning!** Version in debian/ubuntu repos have reduced functionality (because it compiled without the wkhtmltopdf QT patches), such as adding outlines, headers, footers, TOC etc. To use this options you should install static binary from `wkhtmltopdf `_ site or you can use `this script `_. * Windows and other options: check wkhtmltopdf `homepage `_ for binary installers Usage ----- For simple tasks: .. code-block:: python import pdfkit pdfkit.from_url('http://google.com', 'out.pdf') pdfkit.from_file('test.html', 'out.pdf') pdfkit.from_string('Hello!', 'out.pdf') You can pass a list with multiple URLs or files: .. code-block:: python pdfkit.from_url(['google.com', 'yandex.ru', 'engadget.com'], 'out.pdf') pdfkit.from_file(['file1.html', 'file2.html'], 'out.pdf') Also you can pass an opened file: .. code-block:: python with open('file.html') as f: pdfkit.from_file(f, 'out.pdf') If you wish to further process generated PDF, you can read it to a variable: .. code-block:: python # Use False instead of output path to save pdf to a variable pdf = pdfkit.from_url('http://google.com', False) You can specify all wkhtmltopdf `options `_. You can drop '--' in option name. If option without value, use *None, False* or *''* for dict value:. For repeatable options (incl. allow, cookie, custom-header, post, postfile, run-script, replace) you may use a list or a tuple. With option that need multiple values (e.g. --custom-header Authorization secret) we may use a 2-tuple (see example below). .. code-block:: python options = { 'page-size': 'Letter', 'margin-top': '0.75in', 'margin-right': '0.75in', 'margin-bottom': '0.75in', 'margin-left': '0.75in', 'encoding': "UTF-8", 'custom-header' : [ ('Accept-Encoding', 'gzip') ] 'cookie': [ ('cookie-name1', 'cookie-value1'), ('cookie-name2', 'cookie-value2'), ], 'no-outline': None } pdfkit.from_url('http://google.com', 'out.pdf', options=options) By default, PDFKit will show all ``wkhtmltopdf`` output. If you dont want it, you need to pass ``quiet`` option: .. code-block:: python options = { 'quiet': '' } pdfkit.from_url('google.com', 'out.pdf', options=options) Due to wkhtmltopdf command syntax, **TOC** and **Cover** options must be specified separately. If you need cover before TOC, use ``cover_first`` option: .. code-block:: python toc = { 'xsl-style-sheet': 'toc.xsl' } cover = 'cover.html' pdfkit.from_file('file.html', options=options, toc=toc, cover=cover) pdfkit.from_file('file.html', options=options, toc=toc, cover=cover, cover_first=True) You can specify external CSS files when converting files or strings using *css* option. **Warning** This is a workaround for `this bug `_ in wkhtmltopdf. You should try *--user-style-sheet* option first. .. code-block:: python # Single CSS file css = 'example.css' pdfkit.from_file('file.html', options=options, css=css) # Multiple CSS files css = ['example.css', 'example2.css'] pdfkit.from_file('file.html', options=options, css=css) You can also pass any options through meta tags in your HTML: .. code-block:: python body = """ Hello World! """ pdfkit.from_string(body, 'out.pdf') #with --page-size=Legal and --orientation=Landscape Configuration ------------- Each API call takes an optional configuration paramater. This should be an instance of ``pdfkit.configuration()`` API call. It takes the configuration options as initial paramaters. The available options are: * ``wkhtmltopdf`` - the location of the ``wkhtmltopdf`` binary. By default ``pdfkit`` will attempt to locate this using ``which`` (on UNIX type systems) or ``where`` (on Windows). * ``meta_tag_prefix`` - the prefix for ``pdfkit`` specific meta tags - by default this is ``pdfkit-`` Example - for when ``wkhtmltopdf`` is not on ``$PATH``: .. code-block:: python config = pdfkit.configuration(wkhtmltopdf='/opt/bin/wkhtmltopdf') pdfkit.from_string(html_string, output_file, configuration=config) Troubleshooting --------------- - ``IOError: 'No wkhtmltopdf executable found'``: Make sure that you have wkhtmltopdf in your `$PATH` or set via custom configuration (see preceding section). *where wkhtmltopdf* in Windows or *which wkhtmltopdf* on Linux should return actual path to binary. - ``IOError: 'Command Failed'`` This error means that PDFKit was unable to process an input. You can try to directly run a command from error message and see what error caused failure (on some wkhtmltopdf versions this can be cause by segmentation faults) pdfkit-0.6.1/setup.cfg0000644000076500000240000000007313034727717016337 0ustar stgolovanovstaff00000000000000[egg_info] tag_build = tag_date = 0 tag_svn_revision = 0 pdfkit-0.6.1/setup.py0000644000076500000240000000327213025765130016223 0ustar stgolovanovstaff00000000000000import codecs from distutils.core import setup from setuptools.command.test import test as TestCommand import re import os import sys import pdfkit class PyTest(TestCommand): def finalize_options(self): TestCommand.finalize_options(self) self.test_args = ['pdfkit-tests.py'] self.test_suite = True def run_tests(self): #import here, cause outside the eggs aren't loaded import pytest os.chdir('tests/') errno = pytest.main(self.test_args) sys.exit(errno) def long_description(): """Pre-process the README so that PyPi can render it properly.""" with codecs.open('README.rst', encoding='utf8') as f: rst = f.read() code_block = '(:\n\n)?\.\. code-block::.*' rst = re.sub(code_block, '::', rst) return rst + '\n\n' + open('HISTORY.rst').read() setup( name='pdfkit', version=pdfkit.__version__, description=pdfkit.__doc__.strip(), long_description=long_description(), download_url='https://github.com/JazzCore/python-pdfkit', license=pdfkit.__license__, tests_require=['pytest'], cmdclass = {'test': PyTest}, packages=['pdfkit'], author=pdfkit.__author__, author_email='stgolovanov@gmail.com', classifiers=[ 'Programming Language :: Python', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3.2', 'Programming Language :: Python :: 3.3', 'Topic :: Text Processing', 'Topic :: Text Processing :: General', 'Topic :: Text Processing :: Markup', 'Topic :: Text Processing :: Markup :: HTML', 'Topic :: Text Processing :: Markup :: XML', 'Topic :: Utilities' ] )