pdfkit-0.6.1/ 0000755 0000765 0000024 00000000000 13034727717 014516 5 ustar stgolovanov staff 0000000 0000000 pdfkit-0.6.1/HISTORY.rst 0000644 0000765 0000024 00000001724 13034727300 016401 0 ustar stgolovanov staff 0000000 0000000 Changelog
---------
* `0.6.1`
* Fix regression on python 3+ when trying to decode pdf output
* `0.6.0`
* Support repeatable options
* Support multiple values for some options
* Fix some corner cases when specific argument order is required
* Some Python 3+ compatibility fixes
* Update README
* `0.5.0`
* Allow passing multiple css files
* Fix problems with external file encodings
* Rise an error when X server is missing on \*nix systems
* Fix tests that was broken with latest wkhtmltopdf release
* Update README
* `0.4.1`
* More easier custom configuration setting
* Update README
* `0.4.0`
* Allow passing file-like objects
* Ability to return PDF as a string
* Allow user specification of configuration
* API calls now returns True on success
* bugfixes
* `0.3.0`
* Python 3 support
* `0.2.4`
* Add History
* Update setup.py
* `0.2.3`
* Fix installing with setup.py
* Update README
pdfkit-0.6.1/LICENSE 0000644 0000765 0000024 00000002033 13025761570 015514 0 ustar stgolovanov staff 0000000 0000000 Copyright (c) 2012 JazzCore
Permission is hereby granted, free of charge, to any person obtaining
a copy of this software and associated documentation files (the
"Software"), to deal in the Software without restriction, including
without limitation the rights to use, copy, modify, merge, publish,
distribute, sublicense, and/or sell copies of the Software, and to
permit persons to whom the Software is furnished to do so, subject to
the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. pdfkit-0.6.1/MANIFEST.in 0000644 0000765 0000024 00000000046 13025761570 016247 0 ustar stgolovanov staff 0000000 0000000 include README.rst LICENSE HISTORY.rst pdfkit-0.6.1/pdfkit/ 0000755 0000765 0000024 00000000000 13034727717 015777 5 ustar stgolovanov staff 0000000 0000000 pdfkit-0.6.1/pdfkit/__init__.py 0000644 0000765 0000024 00000000446 13034727362 020110 0 ustar stgolovanov staff 0000000 0000000 # -*- coding: utf-8 -*-
"""
Wkhtmltopdf python wrapper to convert html to pdf using the webkit rendering engine and qt
"""
__author__ = 'Golovanov Stanislav'
__version__ = '0.6.1'
__license__ = 'MIT'
from .pdfkit import PDFKit
from .api import from_url, from_file, from_string, configuration
pdfkit-0.6.1/pdfkit/api.py 0000644 0000765 0000024 00000006652 13025761570 017126 0 ustar stgolovanov staff 0000000 0000000 # -*- coding: utf-8 -*-
from .pdfkit import PDFKit
from .pdfkit import Configuration
def from_url(url, output_path, options=None, toc=None, cover=None,
configuration=None, cover_first=False):
"""
Convert file of files from URLs to PDF document
:param url: URL or list of URLs to be saved
:param output_path: path to output PDF file. False means file will be returned as string.
:param options: (optional) dict with wkhtmltopdf global and page options, with or w/o '--'
:param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--'
:param cover: (optional) string with url/filename with a cover html page
:param configuration: (optional) instance of pdfkit.configuration.Configuration()
:param configuration_first: (optional) if True, cover always precedes TOC
Returns: True on success
"""
r = PDFKit(url, 'url', options=options, toc=toc, cover=cover,
configuration=configuration, cover_first=cover_first)
return r.to_pdf(output_path)
def from_file(input, output_path, options=None, toc=None, cover=None, css=None,
configuration=None, cover_first=False):
"""
Convert HTML file or files to PDF document
:param input: path to HTML file or list with paths or file-like object
:param output_path: path to output PDF file. False means file will be returned as string.
:param options: (optional) dict with wkhtmltopdf options, with or w/o '--'
:param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--'
:param cover: (optional) string with url/filename with a cover html page
:param css: (optional) string with path to css file which will be added to a single input file
:param configuration: (optional) instance of pdfkit.configuration.Configuration()
:param configuration_first: (optional) if True, cover always precedes TOC
Returns: True on success
"""
r = PDFKit(input, 'file', options=options, toc=toc, cover=cover, css=css,
configuration=configuration, cover_first=cover_first)
return r.to_pdf(output_path)
def from_string(input, output_path, options=None, toc=None, cover=None, css=None,
configuration=None, cover_first=False):
"""
Convert given string or strings to PDF document
:param input: string with a desired text. Could be a raw text or a html file
:param output_path: path to output PDF file. False means file will be returned as string.
:param options: (optional) dict with wkhtmltopdf options, with or w/o '--'
:param toc: (optional) dict with toc-specific wkhtmltopdf options, with or w/o '--'
:param cover: (optional) string with url/filename with a cover html page
:param css: (optional) string with path to css file which will be added to a input string
:param configuration: (optional) instance of pdfkit.configuration.Configuration()
:param configuration_first: (optional) if True, cover always precedes TOC
Returns: True on success
"""
r = PDFKit(input, 'string', options=options, toc=toc, cover=cover, css=css,
configuration=configuration, cover_first=cover_first)
return r.to_pdf(output_path)
def configuration(**kwargs):
"""
Constructs and returns a :class:`Configuration` with given options
:param wkhtmltopdf: path to binary
:param meta_tag_prefix: the prefix for ``pdfkit`` specific meta tags
"""
return Configuration(**kwargs)
pdfkit-0.6.1/pdfkit/configuration.py 0000644 0000765 0000024 00000002066 13025761570 021217 0 ustar stgolovanov staff 0000000 0000000 # -*- coding: utf-8 -*-
import subprocess
import sys
class Configuration(object):
def __init__(self, wkhtmltopdf='', meta_tag_prefix='pdfkit-'):
self.meta_tag_prefix = meta_tag_prefix
self.wkhtmltopdf = wkhtmltopdf
if not self.wkhtmltopdf:
if sys.platform == 'win32':
self.wkhtmltopdf = subprocess.Popen(
['where', 'wkhtmltopdf'], stdout=subprocess.PIPE).communicate()[0].strip()
else:
self.wkhtmltopdf = subprocess.Popen(
['which', 'wkhtmltopdf'], stdout=subprocess.PIPE).communicate()[0].strip()
try:
with open(self.wkhtmltopdf) as f:
pass
except IOError:
raise IOError('No wkhtmltopdf executable found: "%s"\n'
'If this file exists please check that this process can '
'read it. Otherwise please install wkhtmltopdf - '
'https://github.com/JazzCore/python-pdfkit/wiki/Installing-wkhtmltopdf' % self.wkhtmltopdf)
pdfkit-0.6.1/pdfkit/pdfkit.py 0000644 0000765 0000024 00000022317 13034727166 017635 0 ustar stgolovanov staff 0000000 0000000 # -*- coding: utf-8 -*-
import re
import subprocess
import sys
from .source import Source
from .configuration import Configuration
import io
import codecs
try:
# Python 2.x and 3.x support for checking string types
assert basestring
except NameError:
basestring = str
class PDFKit(object):
"""
Main class that does all generation routine.
:param url_or_file: str - either a URL, a path to a file or a string containing HTML
to convert
:param type_: str - either 'url', 'file' or 'string'
:param options: dict (optional) with wkhtmltopdf options, with or w/o '--'
:param toc: dict (optional) - toc-specific wkhtmltopdf options, with or w/o '--'
:param cover: str (optional) - url/filename with a cover html page
:param configuration: (optional) instance of pdfkit.configuration.Configuration()
"""
class ImproperSourceError(Exception):
"""Wrong source type for stylesheets"""
def __init__(self, msg):
self.msg = msg
def __str__(self):
return self.msg
def __init__(self, url_or_file, type_, options=None, toc=None, cover=None,
css=None, configuration=None, cover_first=False):
self.source = Source(url_or_file, type_)
self.configuration = (Configuration() if configuration is None
else configuration)
try:
self.wkhtmltopdf = self.configuration.wkhtmltopdf.decode('utf-8')
except AttributeError:
self.wkhtmltopdf = self.configuration.wkhtmltopdf
self.options = dict()
if self.source.isString():
self.options.update(self._find_options_in_meta(url_or_file))
if options is not None: self.options.update(options)
self.toc = {} if toc is None else toc
self.cover = cover
self.cover_first = cover_first
self.css = css
self.stylesheets = []
def _genargs(self, opts):
"""
Generator of args parts based on options specification.
Note: Empty parts will be filtered out at _command generator
"""
for optkey, optval in self._normalize_options(opts):
yield optkey
if isinstance(optval, (list, tuple)):
assert len(optval) == 2 and optval[0] and optval[1], 'Option value can only be either a string or a (tuple, list) of 2 items'
yield optval[0]
yield optval[1]
else:
yield optval
def _command(self, path=None):
"""
Generator of all command parts
"""
if self.css:
self._prepend_css(self.css)
yield self.wkhtmltopdf
for argpart in self._genargs(self.options):
if argpart:
yield argpart
if self.cover and self.cover_first:
yield 'cover'
yield self.cover
if self.toc:
yield 'toc'
for argpart in self._genargs(self.toc):
if argpart:
yield argpart
if self.cover and not self.cover_first:
yield 'cover'
yield self.cover
# If the source is a string then we will pipe it into wkhtmltopdf
# If the source is file-like then we will read from it and pipe it in
if self.source.isString() or self.source.isFileObj():
yield '-'
else:
if isinstance(self.source.source, basestring):
yield self.source.to_s()
else:
for s in self.source.source:
yield s
# If output_path evaluates to False append '-' to end of args
# and wkhtmltopdf will pass generated PDF to stdout
if path:
yield path
else:
yield '-'
def command(self, path=None):
return list(self._command(path))
def to_pdf(self, path=None):
args = self.command(path)
result = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
# If the source is a string then we will pipe it into wkhtmltopdf.
# If we want to add custom CSS to file then we read input file to
# string and prepend css to it and then pass it to stdin.
# This is a workaround for a bug in wkhtmltopdf (look closely in README)
if self.source.isString() or (self.source.isFile() and self.css):
input = self.source.to_s().encode('utf-8')
elif self.source.isFileObj():
input = self.source.source.read().encode('utf-8')
else:
input = None
stdout, stderr = result.communicate(input=input)
stderr = stderr or stdout
try:
stderr = stderr.decode('utf-8')
except UnicodeDecodeError:
stderr = ''
exit_code = result.returncode
if 'cannot connect to X server' in stderr:
raise IOError('%s\n'
'You will need to run wkhtmltopdf within a "virtual" X server.\n'
'Go to the link below for more information\n'
'https://github.com/JazzCore/python-pdfkit/wiki/Using-wkhtmltopdf-without-X-server' % stderr)
if 'Error' in stderr:
raise IOError('wkhtmltopdf reported an error:\n' + stderr)
if exit_code != 0:
raise IOError("wkhtmltopdf exited with non-zero code {0}. error:\n{1}".format(exit_code, stderr))
# Since wkhtmltopdf sends its output to stderr we will capture it
# and properly send to stdout
if '--quiet' not in args:
sys.stdout.write(stderr)
if not path:
return stdout
else:
try:
with codecs.open(path, encoding='utf-8') as f:
# read 4 bytes to get PDF signature '%PDF'
text = f.read(4)
if text == '':
raise IOError('Command failed: %s\n'
'Check whhtmltopdf output without \'quiet\' '
'option' % ' '.join(args))
return True
except IOError as e:
raise IOError('Command failed: %s\n'
'Check whhtmltopdf output without \'quiet\' option\n'
'%s ' %(' '.join(args)),e)
def _normalize_options(self, options):
""" Generator of 2-tuples (option-key, option-value).
When options spec is a list, generate a 2-tuples per list item.
:param options: dict {option name: value}
returns:
iterator (option-key, option-value)
- option names lower cased and prepended with
'--' if necessary. Non-empty values cast to str
"""
for key, value in list(options.items()):
if not '--' in key:
normalized_key = '--%s' % self._normalize_arg(key)
else:
normalized_key = self._normalize_arg(key)
if isinstance(value, (list, tuple)):
for optval in value:
yield (normalized_key, optval)
else:
yield (normalized_key, str(value) if value else value)
def _normalize_arg(self, arg):
return arg.lower()
def _style_tag_for(self, stylesheet):
return "" % stylesheet
def _prepend_css(self, path):
if self.source.isUrl() or isinstance(self.source.source, list):
raise self.ImproperSourceError('CSS files can be added only to a single '
'file or string')
if not isinstance(path, list):
path = [path]
css_data = []
for p in path:
with codecs.open(p, encoding="UTF-8") as f:
css_data.append(f.read())
css_data = "\n".join(css_data)
if self.source.isFile():
with codecs.open(self.source.to_s(), encoding="UTF-8") as f:
inp = f.read()
self.source = Source(
inp.replace('', self._style_tag_for(css_data) + ''),
'string')
elif self.source.isString():
if '' in self.source.to_s():
self.source.source = self.source.to_s().replace(
'', self._style_tag_for(css_data) + '')
else:
self.source.source = self._style_tag_for(css_data) + self.source.to_s()
def _find_options_in_meta(self, content):
"""Reads 'content' and extracts options encoded in HTML meta tags
:param content: str or file-like object - contains HTML to parse
returns:
dict: {config option: value}
"""
if (isinstance(content, io.IOBase)
or content.__class__.__name__ == 'StreamReaderWriter'):
content = content.read()
found = {}
for x in re.findall(']*>', content):
if re.search('name=["\']%s' % self.configuration.meta_tag_prefix, x):
name = re.findall('name=["\']%s([^"\']*)' %
self.configuration.meta_tag_prefix, x)[0]
found[name] = re.findall('content=["\']([^"\']*)', x)[0]
return found
pdfkit-0.6.1/pdfkit/source.py 0000644 0000765 0000024 00000002247 13025761570 017651 0 ustar stgolovanov staff 0000000 0000000 # -*- coding: utf-8 -*-
import os
import io
class Source(object):
def __init__(self, url_or_file, type_):
self.source = url_or_file
self.type = type_
if self.type is 'file':
self.checkFiles()
def isUrl(self):
return 'url' in self.type
def isFile(self, path=None):
# dirty hack to check where file is opened with codecs module
# (because it returns 'instance' type when encoding is specified
if path:
return isinstance(path, io.IOBase) or path.__class__.__name__ == 'StreamReaderWriter'
else:
return 'file' in self.type
def checkFiles(self):
if isinstance(self.source, list):
for path in self.source:
if not os.path.exists(path):
raise IOError('No such file: %s' % path)
else:
if not hasattr(self.source, 'read') and not os.path.exists(self.source):
raise IOError('No such file: %s' % self.source)
def isString(self):
return 'string' in self.type
def isFileObj(self):
return hasattr(self.source, 'read')
def to_s(self):
return self.source
pdfkit-0.6.1/pdfkit.egg-info/ 0000755 0000765 0000024 00000000000 13034727717 017471 5 ustar stgolovanov staff 0000000 0000000 pdfkit-0.6.1/pdfkit.egg-info/dependency_links.txt 0000644 0000765 0000024 00000000001 13034727717 023537 0 ustar stgolovanov staff 0000000 0000000
pdfkit-0.6.1/pdfkit.egg-info/pbr.json 0000644 0000765 0000024 00000000057 13034727717 021151 0 ustar stgolovanov staff 0000000 0000000 {"is_release": false, "git_version": "c8c1030"} pdfkit-0.6.1/pdfkit.egg-info/PKG-INFO 0000644 0000765 0000024 00000021111 13034727717 020562 0 ustar stgolovanov staff 0000000 0000000 Metadata-Version: 1.1
Name: pdfkit
Version: 0.6.1
Summary: Wkhtmltopdf python wrapper to convert html to pdf using the webkit rendering engine and qt
Home-page: UNKNOWN
Author: Golovanov Stanislav
Author-email: stgolovanov@gmail.com
License: MIT
Download-URL: https://github.com/JazzCore/python-pdfkit
Description: Python-PDFKit: HTML to PDF wrapper
==================================
.. image:: https://travis-ci.org/JazzCore/python-pdfkit.png?branch=master
:target: https://travis-ci.org/JazzCore/python-pdfkit
.. image:: https://badge.fury.io/py/pdfkit.svg
:target: http://badge.fury.io/py/pdfkit
Python 2 and 3 wrapper for wkhtmltopdf utility to convert HTML to PDF using Webkit.
This is adapted version of `ruby PDFKit `_ library, so big thanks to them!
Installation
------------
1. Install python-pdfkit::
$ pip install pdfkit
2. Install wkhtmltopdf:
* Debian/Ubuntu::
$ sudo apt-get install wkhtmltopdf
**Warning!** Version in debian/ubuntu repos have reduced functionality (because it compiled without the wkhtmltopdf QT patches), such as adding outlines, headers, footers, TOC etc. To use this options you should install static binary from `wkhtmltopdf `_ site or you can use `this script `_.
* Windows and other options: check wkhtmltopdf `homepage `_ for binary installers
Usage
-----
For simple tasks::
import pdfkit
pdfkit.from_url('http://google.com', 'out.pdf')
pdfkit.from_file('test.html', 'out.pdf')
pdfkit.from_string('Hello!', 'out.pdf')
You can pass a list with multiple URLs or files::
pdfkit.from_url(['google.com', 'yandex.ru', 'engadget.com'], 'out.pdf')
pdfkit.from_file(['file1.html', 'file2.html'], 'out.pdf')
Also you can pass an opened file::
with open('file.html') as f:
pdfkit.from_file(f, 'out.pdf')
If you wish to further process generated PDF, you can read it to a variable::
# Use False instead of output path to save pdf to a variable
pdf = pdfkit.from_url('http://google.com', False)
You can specify all wkhtmltopdf `options `_. You can drop '--' in option name. If option without value, use *None, False* or *''* for dict value:. For repeatable options (incl. allow, cookie, custom-header, post, postfile, run-script, replace) you may use a list or a tuple. With option that need multiple values (e.g. --custom-header Authorization secret) we may use a 2-tuple (see example below).
::
options = {
'page-size': 'Letter',
'margin-top': '0.75in',
'margin-right': '0.75in',
'margin-bottom': '0.75in',
'margin-left': '0.75in',
'encoding': "UTF-8",
'custom-header' : [
('Accept-Encoding', 'gzip')
]
'cookie': [
('cookie-name1', 'cookie-value1'),
('cookie-name2', 'cookie-value2'),
],
'no-outline': None
}
pdfkit.from_url('http://google.com', 'out.pdf', options=options)
By default, PDFKit will show all ``wkhtmltopdf`` output. If you dont want it, you need to pass ``quiet`` option::
options = {
'quiet': ''
}
pdfkit.from_url('google.com', 'out.pdf', options=options)
Due to wkhtmltopdf command syntax, **TOC** and **Cover** options must be specified separately. If you need cover before TOC, use ``cover_first`` option::
toc = {
'xsl-style-sheet': 'toc.xsl'
}
cover = 'cover.html'
pdfkit.from_file('file.html', options=options, toc=toc, cover=cover)
pdfkit.from_file('file.html', options=options, toc=toc, cover=cover, cover_first=True)
You can specify external CSS files when converting files or strings using *css* option.
**Warning** This is a workaround for `this bug `_ in wkhtmltopdf. You should try *--user-style-sheet* option first.
::
# Single CSS file
css = 'example.css'
pdfkit.from_file('file.html', options=options, css=css)
# Multiple CSS files
css = ['example.css', 'example2.css']
pdfkit.from_file('file.html', options=options, css=css)
You can also pass any options through meta tags in your HTML::
body = """
Hello World!
"""
pdfkit.from_string(body, 'out.pdf') #with --page-size=Legal and --orientation=Landscape
Configuration
-------------
Each API call takes an optional configuration paramater. This should be an instance of ``pdfkit.configuration()`` API call. It takes the configuration options as initial paramaters. The available options are:
* ``wkhtmltopdf`` - the location of the ``wkhtmltopdf`` binary. By default ``pdfkit`` will attempt to locate this using ``which`` (on UNIX type systems) or ``where`` (on Windows).
* ``meta_tag_prefix`` - the prefix for ``pdfkit`` specific meta tags - by default this is ``pdfkit-``
Example - for when ``wkhtmltopdf`` is not on ``$PATH``::
config = pdfkit.configuration(wkhtmltopdf='/opt/bin/wkhtmltopdf')
pdfkit.from_string(html_string, output_file, configuration=config)
Troubleshooting
---------------
- ``IOError: 'No wkhtmltopdf executable found'``:
Make sure that you have wkhtmltopdf in your `$PATH` or set via custom configuration (see preceding section). *where wkhtmltopdf* in Windows or *which wkhtmltopdf* on Linux should return actual path to binary.
- ``IOError: 'Command Failed'``
This error means that PDFKit was unable to process an input. You can try to directly run a command from error message and see what error caused failure (on some wkhtmltopdf versions this can be cause by segmentation faults)
Changelog
---------
* `0.6.1`
* Fix regression on python 3+ when trying to decode pdf output
* `0.6.0`
* Support repeatable options
* Support multiple values for some options
* Fix some corner cases when specific argument order is required
* Some Python 3+ compatibility fixes
* Update README
* `0.5.0`
* Allow passing multiple css files
* Fix problems with external file encodings
* Rise an error when X server is missing on \*nix systems
* Fix tests that was broken with latest wkhtmltopdf release
* Update README
* `0.4.1`
* More easier custom configuration setting
* Update README
* `0.4.0`
* Allow passing file-like objects
* Ability to return PDF as a string
* Allow user specification of configuration
* API calls now returns True on success
* bugfixes
* `0.3.0`
* Python 3 support
* `0.2.4`
* Add History
* Update setup.py
* `0.2.3`
* Fix installing with setup.py
* Update README
Platform: UNKNOWN
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3
Classifier: Topic :: Text Processing
Classifier: Topic :: Text Processing :: General
Classifier: Topic :: Text Processing :: Markup
Classifier: Topic :: Text Processing :: Markup :: HTML
Classifier: Topic :: Text Processing :: Markup :: XML
Classifier: Topic :: Utilities
pdfkit-0.6.1/pdfkit.egg-info/SOURCES.txt 0000644 0000765 0000024 00000000437 13034727717 021361 0 ustar stgolovanov staff 0000000 0000000 HISTORY.rst
LICENSE
MANIFEST.in
README.rst
setup.py
pdfkit/__init__.py
pdfkit/api.py
pdfkit/configuration.py
pdfkit/pdfkit.py
pdfkit/source.py
pdfkit.egg-info/PKG-INFO
pdfkit.egg-info/SOURCES.txt
pdfkit.egg-info/dependency_links.txt
pdfkit.egg-info/pbr.json
pdfkit.egg-info/top_level.txt pdfkit-0.6.1/pdfkit.egg-info/top_level.txt 0000644 0000765 0000024 00000000007 13034727717 022220 0 ustar stgolovanov staff 0000000 0000000 pdfkit
pdfkit-0.6.1/PKG-INFO 0000644 0000765 0000024 00000021111 13034727717 015607 0 ustar stgolovanov staff 0000000 0000000 Metadata-Version: 1.1
Name: pdfkit
Version: 0.6.1
Summary: Wkhtmltopdf python wrapper to convert html to pdf using the webkit rendering engine and qt
Home-page: UNKNOWN
Author: Golovanov Stanislav
Author-email: stgolovanov@gmail.com
License: MIT
Download-URL: https://github.com/JazzCore/python-pdfkit
Description: Python-PDFKit: HTML to PDF wrapper
==================================
.. image:: https://travis-ci.org/JazzCore/python-pdfkit.png?branch=master
:target: https://travis-ci.org/JazzCore/python-pdfkit
.. image:: https://badge.fury.io/py/pdfkit.svg
:target: http://badge.fury.io/py/pdfkit
Python 2 and 3 wrapper for wkhtmltopdf utility to convert HTML to PDF using Webkit.
This is adapted version of `ruby PDFKit `_ library, so big thanks to them!
Installation
------------
1. Install python-pdfkit::
$ pip install pdfkit
2. Install wkhtmltopdf:
* Debian/Ubuntu::
$ sudo apt-get install wkhtmltopdf
**Warning!** Version in debian/ubuntu repos have reduced functionality (because it compiled without the wkhtmltopdf QT patches), such as adding outlines, headers, footers, TOC etc. To use this options you should install static binary from `wkhtmltopdf `_ site or you can use `this script `_.
* Windows and other options: check wkhtmltopdf `homepage `_ for binary installers
Usage
-----
For simple tasks::
import pdfkit
pdfkit.from_url('http://google.com', 'out.pdf')
pdfkit.from_file('test.html', 'out.pdf')
pdfkit.from_string('Hello!', 'out.pdf')
You can pass a list with multiple URLs or files::
pdfkit.from_url(['google.com', 'yandex.ru', 'engadget.com'], 'out.pdf')
pdfkit.from_file(['file1.html', 'file2.html'], 'out.pdf')
Also you can pass an opened file::
with open('file.html') as f:
pdfkit.from_file(f, 'out.pdf')
If you wish to further process generated PDF, you can read it to a variable::
# Use False instead of output path to save pdf to a variable
pdf = pdfkit.from_url('http://google.com', False)
You can specify all wkhtmltopdf `options `_. You can drop '--' in option name. If option without value, use *None, False* or *''* for dict value:. For repeatable options (incl. allow, cookie, custom-header, post, postfile, run-script, replace) you may use a list or a tuple. With option that need multiple values (e.g. --custom-header Authorization secret) we may use a 2-tuple (see example below).
::
options = {
'page-size': 'Letter',
'margin-top': '0.75in',
'margin-right': '0.75in',
'margin-bottom': '0.75in',
'margin-left': '0.75in',
'encoding': "UTF-8",
'custom-header' : [
('Accept-Encoding', 'gzip')
]
'cookie': [
('cookie-name1', 'cookie-value1'),
('cookie-name2', 'cookie-value2'),
],
'no-outline': None
}
pdfkit.from_url('http://google.com', 'out.pdf', options=options)
By default, PDFKit will show all ``wkhtmltopdf`` output. If you dont want it, you need to pass ``quiet`` option::
options = {
'quiet': ''
}
pdfkit.from_url('google.com', 'out.pdf', options=options)
Due to wkhtmltopdf command syntax, **TOC** and **Cover** options must be specified separately. If you need cover before TOC, use ``cover_first`` option::
toc = {
'xsl-style-sheet': 'toc.xsl'
}
cover = 'cover.html'
pdfkit.from_file('file.html', options=options, toc=toc, cover=cover)
pdfkit.from_file('file.html', options=options, toc=toc, cover=cover, cover_first=True)
You can specify external CSS files when converting files or strings using *css* option.
**Warning** This is a workaround for `this bug `_ in wkhtmltopdf. You should try *--user-style-sheet* option first.
::
# Single CSS file
css = 'example.css'
pdfkit.from_file('file.html', options=options, css=css)
# Multiple CSS files
css = ['example.css', 'example2.css']
pdfkit.from_file('file.html', options=options, css=css)
You can also pass any options through meta tags in your HTML::
body = """
Hello World!
"""
pdfkit.from_string(body, 'out.pdf') #with --page-size=Legal and --orientation=Landscape
Configuration
-------------
Each API call takes an optional configuration paramater. This should be an instance of ``pdfkit.configuration()`` API call. It takes the configuration options as initial paramaters. The available options are:
* ``wkhtmltopdf`` - the location of the ``wkhtmltopdf`` binary. By default ``pdfkit`` will attempt to locate this using ``which`` (on UNIX type systems) or ``where`` (on Windows).
* ``meta_tag_prefix`` - the prefix for ``pdfkit`` specific meta tags - by default this is ``pdfkit-``
Example - for when ``wkhtmltopdf`` is not on ``$PATH``::
config = pdfkit.configuration(wkhtmltopdf='/opt/bin/wkhtmltopdf')
pdfkit.from_string(html_string, output_file, configuration=config)
Troubleshooting
---------------
- ``IOError: 'No wkhtmltopdf executable found'``:
Make sure that you have wkhtmltopdf in your `$PATH` or set via custom configuration (see preceding section). *where wkhtmltopdf* in Windows or *which wkhtmltopdf* on Linux should return actual path to binary.
- ``IOError: 'Command Failed'``
This error means that PDFKit was unable to process an input. You can try to directly run a command from error message and see what error caused failure (on some wkhtmltopdf versions this can be cause by segmentation faults)
Changelog
---------
* `0.6.1`
* Fix regression on python 3+ when trying to decode pdf output
* `0.6.0`
* Support repeatable options
* Support multiple values for some options
* Fix some corner cases when specific argument order is required
* Some Python 3+ compatibility fixes
* Update README
* `0.5.0`
* Allow passing multiple css files
* Fix problems with external file encodings
* Rise an error when X server is missing on \*nix systems
* Fix tests that was broken with latest wkhtmltopdf release
* Update README
* `0.4.1`
* More easier custom configuration setting
* Update README
* `0.4.0`
* Allow passing file-like objects
* Ability to return PDF as a string
* Allow user specification of configuration
* API calls now returns True on success
* bugfixes
* `0.3.0`
* Python 3 support
* `0.2.4`
* Add History
* Update setup.py
* `0.2.3`
* Fix installing with setup.py
* Update README
Platform: UNKNOWN
Classifier: Programming Language :: Python
Classifier: Programming Language :: Python :: 2.7
Classifier: Programming Language :: Python :: 3.2
Classifier: Programming Language :: Python :: 3.3
Classifier: Topic :: Text Processing
Classifier: Topic :: Text Processing :: General
Classifier: Topic :: Text Processing :: Markup
Classifier: Topic :: Text Processing :: Markup :: HTML
Classifier: Topic :: Text Processing :: Markup :: XML
Classifier: Topic :: Utilities
pdfkit-0.6.1/README.rst 0000644 0000765 0000024 00000013216 13025762247 016205 0 ustar stgolovanov staff 0000000 0000000 Python-PDFKit: HTML to PDF wrapper
==================================
.. image:: https://travis-ci.org/JazzCore/python-pdfkit.png?branch=master
:target: https://travis-ci.org/JazzCore/python-pdfkit
.. image:: https://badge.fury.io/py/pdfkit.svg
:target: http://badge.fury.io/py/pdfkit
Python 2 and 3 wrapper for wkhtmltopdf utility to convert HTML to PDF using Webkit.
This is adapted version of `ruby PDFKit `_ library, so big thanks to them!
Installation
------------
1. Install python-pdfkit:
.. code-block:: bash
$ pip install pdfkit
2. Install wkhtmltopdf:
* Debian/Ubuntu:
.. code-block:: bash
$ sudo apt-get install wkhtmltopdf
**Warning!** Version in debian/ubuntu repos have reduced functionality (because it compiled without the wkhtmltopdf QT patches), such as adding outlines, headers, footers, TOC etc. To use this options you should install static binary from `wkhtmltopdf `_ site or you can use `this script `_.
* Windows and other options: check wkhtmltopdf `homepage `_ for binary installers
Usage
-----
For simple tasks:
.. code-block:: python
import pdfkit
pdfkit.from_url('http://google.com', 'out.pdf')
pdfkit.from_file('test.html', 'out.pdf')
pdfkit.from_string('Hello!', 'out.pdf')
You can pass a list with multiple URLs or files:
.. code-block:: python
pdfkit.from_url(['google.com', 'yandex.ru', 'engadget.com'], 'out.pdf')
pdfkit.from_file(['file1.html', 'file2.html'], 'out.pdf')
Also you can pass an opened file:
.. code-block:: python
with open('file.html') as f:
pdfkit.from_file(f, 'out.pdf')
If you wish to further process generated PDF, you can read it to a variable:
.. code-block:: python
# Use False instead of output path to save pdf to a variable
pdf = pdfkit.from_url('http://google.com', False)
You can specify all wkhtmltopdf `options `_. You can drop '--' in option name. If option without value, use *None, False* or *''* for dict value:. For repeatable options (incl. allow, cookie, custom-header, post, postfile, run-script, replace) you may use a list or a tuple. With option that need multiple values (e.g. --custom-header Authorization secret) we may use a 2-tuple (see example below).
.. code-block:: python
options = {
'page-size': 'Letter',
'margin-top': '0.75in',
'margin-right': '0.75in',
'margin-bottom': '0.75in',
'margin-left': '0.75in',
'encoding': "UTF-8",
'custom-header' : [
('Accept-Encoding', 'gzip')
]
'cookie': [
('cookie-name1', 'cookie-value1'),
('cookie-name2', 'cookie-value2'),
],
'no-outline': None
}
pdfkit.from_url('http://google.com', 'out.pdf', options=options)
By default, PDFKit will show all ``wkhtmltopdf`` output. If you dont want it, you need to pass ``quiet`` option:
.. code-block:: python
options = {
'quiet': ''
}
pdfkit.from_url('google.com', 'out.pdf', options=options)
Due to wkhtmltopdf command syntax, **TOC** and **Cover** options must be specified separately. If you need cover before TOC, use ``cover_first`` option:
.. code-block:: python
toc = {
'xsl-style-sheet': 'toc.xsl'
}
cover = 'cover.html'
pdfkit.from_file('file.html', options=options, toc=toc, cover=cover)
pdfkit.from_file('file.html', options=options, toc=toc, cover=cover, cover_first=True)
You can specify external CSS files when converting files or strings using *css* option.
**Warning** This is a workaround for `this bug `_ in wkhtmltopdf. You should try *--user-style-sheet* option first.
.. code-block:: python
# Single CSS file
css = 'example.css'
pdfkit.from_file('file.html', options=options, css=css)
# Multiple CSS files
css = ['example.css', 'example2.css']
pdfkit.from_file('file.html', options=options, css=css)
You can also pass any options through meta tags in your HTML:
.. code-block:: python
body = """
Hello World!
"""
pdfkit.from_string(body, 'out.pdf') #with --page-size=Legal and --orientation=Landscape
Configuration
-------------
Each API call takes an optional configuration paramater. This should be an instance of ``pdfkit.configuration()`` API call. It takes the configuration options as initial paramaters. The available options are:
* ``wkhtmltopdf`` - the location of the ``wkhtmltopdf`` binary. By default ``pdfkit`` will attempt to locate this using ``which`` (on UNIX type systems) or ``where`` (on Windows).
* ``meta_tag_prefix`` - the prefix for ``pdfkit`` specific meta tags - by default this is ``pdfkit-``
Example - for when ``wkhtmltopdf`` is not on ``$PATH``:
.. code-block:: python
config = pdfkit.configuration(wkhtmltopdf='/opt/bin/wkhtmltopdf')
pdfkit.from_string(html_string, output_file, configuration=config)
Troubleshooting
---------------
- ``IOError: 'No wkhtmltopdf executable found'``:
Make sure that you have wkhtmltopdf in your `$PATH` or set via custom configuration (see preceding section). *where wkhtmltopdf* in Windows or *which wkhtmltopdf* on Linux should return actual path to binary.
- ``IOError: 'Command Failed'``
This error means that PDFKit was unable to process an input. You can try to directly run a command from error message and see what error caused failure (on some wkhtmltopdf versions this can be cause by segmentation faults)
pdfkit-0.6.1/setup.cfg 0000644 0000765 0000024 00000000073 13034727717 016337 0 ustar stgolovanov staff 0000000 0000000 [egg_info]
tag_build =
tag_date = 0
tag_svn_revision = 0
pdfkit-0.6.1/setup.py 0000644 0000765 0000024 00000003272 13025765130 016223 0 ustar stgolovanov staff 0000000 0000000 import codecs
from distutils.core import setup
from setuptools.command.test import test as TestCommand
import re
import os
import sys
import pdfkit
class PyTest(TestCommand):
def finalize_options(self):
TestCommand.finalize_options(self)
self.test_args = ['pdfkit-tests.py']
self.test_suite = True
def run_tests(self):
#import here, cause outside the eggs aren't loaded
import pytest
os.chdir('tests/')
errno = pytest.main(self.test_args)
sys.exit(errno)
def long_description():
"""Pre-process the README so that PyPi can render it properly."""
with codecs.open('README.rst', encoding='utf8') as f:
rst = f.read()
code_block = '(:\n\n)?\.\. code-block::.*'
rst = re.sub(code_block, '::', rst)
return rst + '\n\n' + open('HISTORY.rst').read()
setup(
name='pdfkit',
version=pdfkit.__version__,
description=pdfkit.__doc__.strip(),
long_description=long_description(),
download_url='https://github.com/JazzCore/python-pdfkit',
license=pdfkit.__license__,
tests_require=['pytest'],
cmdclass = {'test': PyTest},
packages=['pdfkit'],
author=pdfkit.__author__,
author_email='stgolovanov@gmail.com',
classifiers=[
'Programming Language :: Python',
'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3.2',
'Programming Language :: Python :: 3.3',
'Topic :: Text Processing',
'Topic :: Text Processing :: General',
'Topic :: Text Processing :: Markup',
'Topic :: Text Processing :: Markup :: HTML',
'Topic :: Text Processing :: Markup :: XML',
'Topic :: Utilities'
]
)