pypandoc-1.4/0000755000175000017500000000000013076637561014053 5ustar travistravis00000000000000pypandoc-1.4/examples/0000755000175000017500000000000013076637561015671 5ustar travistravis00000000000000pypandoc-1.4/examples/services.py0000644000175000017500000000367313076637465020102 0ustar travistravis00000000000000# -*- coding: utf-8 -*- """ Example Services for using pypandoc """ from tempfile import NamedTemporaryFile import pypandoc class BasePandocService(object): """ Base class for converting provided HTML to a doc or docx """ file_object = None def __init__(self): self.service = self.get_service() def get_service(self): return pypandoc def generate(self, **kwargs): raise NotImplementedError class PandocPDFService(BasePandocService): """ Generate html to pdf format """ def generate(self, html, **kwargs): """ generate the pdf but needs to be set as tex so pandoc handles it correctly see docs: http://johnmacfarlane.net/pandoc/ #search pdf """ from_format = kwargs.get('from_format', 'html') to_format = kwargs.get('to_format', 'tex') # create temp file self.file_object = NamedTemporaryFile(suffix='.pdf') extra_args = ( '--smart', '--standalone', '-o', self.file_object.name ) # generate it using pandoc self.service.convert(html, to_format, format=from_format, extra_args=extra_args) # return the file which is now populated with the docx forms return self.file_object class PandocDocxService(BasePandocService): """ Generate html to docx format """ def generate(self, html, **kwargs): from_format = kwargs.get('from_format', 'html') to_format = kwargs.get('to_format', 'docx') # create temp file self.file_object = NamedTemporaryFile(suffix='.docx') extra_args = ( '--smart', '--standalone', '-o', self.file_object.name ) # generate it using pandoc self.service.convert(html, to_format, format=from_format, extra_args=extra_args) # return the file which is now populated with the docx forms return self.file_object pypandoc-1.4/pypandoc/0000755000175000017500000000000013076637561015670 5ustar travistravis00000000000000pypandoc-1.4/pypandoc/__init__.py0000644000175000017500000005072713076637465020017 0ustar travistravis00000000000000# -*- coding: utf-8 -*- from __future__ import with_statement, absolute_import, print_function import subprocess import sys import textwrap import os import re import warnings import tempfile from .py3compat import string_types, cast_bytes, cast_unicode, urlparse from pypandoc.pandoc_download import DEFAULT_TARGET_FOLDER, download_pandoc __author__ = u'Juho Vepsäläinen' __version__ = '1.4' __license__ = 'MIT' __all__ = ['convert', 'convert_file', 'convert_text', 'get_pandoc_formats', 'get_pandoc_version', 'get_pandoc_path', 'download_pandoc'] def convert(source, to, format=None, extra_args=(), encoding='utf-8', outputfile=None, filters=None): """Converts given `source` from `format` to `to` (deprecated). :param str source: Unicode string or bytes or a file path (see encoding) :param str to: format into which the input should be converted; can be one of `pypandoc.get_pandoc_formats()[1]` :param str format: the format of the inputs; will be inferred if input is a file with an known filename extension; can be one of `pypandoc.get_pandoc_formats()[1]` (Default value = None) :param list extra_args: extra arguments (list of strings) to be passed to pandoc (Default value = ()) :param str encoding: the encoding of the file or the input bytes (Default value = 'utf-8') :param str outputfile: output will be written to outfilename or the converted content returned if None (Default value = None) :param list filters: pandoc filters e.g. filters=['pandoc-citeproc'] :returns: converted string (unicode) or an empty string if an outputfile was given :rtype: unicode :raises RuntimeError: if any of the inputs are not valid of if pandoc fails with an error :raises OSError: if pandoc is not found; make sure it has been installed and is available at path. """ msg = ("Due to possible ambiguity, 'convert()' is deprecated. " "Use 'convert_file()' or 'convert_text()'.") warnings.warn(msg, DeprecationWarning, stacklevel=2) path = _identify_path(source) if path: format = _identify_format_from_path(source, format) input_type = 'path' else: source = _as_unicode(source, encoding) input_type = 'string' if not format: raise RuntimeError("Format missing, but need one (identified source as text as no " "file with that name was found).") return _convert_input(source, format, input_type, to, extra_args=extra_args, outputfile=outputfile, filters=filters) def convert_text(source, to, format, extra_args=(), encoding='utf-8', outputfile=None, filters=None): """Converts given `source` from `format` to `to`. :param str source: Unicode string or bytes (see encoding) :param str to: format into which the input should be converted; can be one of `pypandoc.get_pandoc_formats()[1]` :param str format: the format of the inputs; can be one of `pypandoc.get_pandoc_formats()[1]` :param list extra_args: extra arguments (list of strings) to be passed to pandoc (Default value = ()) :param str encoding: the encoding of the input bytes (Default value = 'utf-8') :param str outputfile: output will be written to outfilename or the converted content returned if None (Default value = None) :param list filters: pandoc filters e.g. filters=['pandoc-citeproc'] :returns: converted string (unicode) or an empty string if an outputfile was given :rtype: unicode :raises RuntimeError: if any of the inputs are not valid of if pandoc fails with an error :raises OSError: if pandoc is not found; make sure it has been installed and is available at path. """ source = _as_unicode(source, encoding) return _convert_input(source, format, 'string', to, extra_args=extra_args, outputfile=outputfile, filters=filters) def convert_file(source_file, to, format=None, extra_args=(), encoding='utf-8', outputfile=None, filters=None): """Converts given `source` from `format` to `to`. :param str source_file: file path (see encoding) :param str to: format into which the input should be converted; can be one of `pypandoc.get_pandoc_formats()[1]` :param str format: the format of the inputs; will be inferred from the source_file with an known filename extension; can be one of `pypandoc.get_pandoc_formats()[1]` (Default value = None) :param list extra_args: extra arguments (list of strings) to be passed to pandoc (Default value = ()) :param str encoding: the encoding of the file or the input bytes (Default value = 'utf-8') :param str outputfile: output will be written to outfilename or the converted content returned if None (Default value = None) :param list filters: pandoc filters e.g. filters=['pandoc-citeproc'] :returns: converted string (unicode) or an empty string if an outputfile was given :rtype: unicode :raises RuntimeError: if any of the inputs are not valid of if pandoc fails with an error :raises OSError: if pandoc is not found; make sure it has been installed and is available at path. """ if not _identify_path(source_file): raise RuntimeError("source_file is not a valid path") format = _identify_format_from_path(source_file, format) return _convert_input(source_file, format, 'path', to, extra_args=extra_args, outputfile=outputfile, filters=filters) def _identify_path(source): # guard against problems if source is None or not isinstance(source, string_types): return False path = False try: path = os.path.exists(source) except UnicodeEncodeError: source = source.encode('utf-8') path = os.path.exists(source) except: path # still false if not path: # check if it's an URL result = urlparse(source) if result.scheme in ["http", "https"]: path = True # unfortunately, pandoc currently doesn't support anything else currently # https://github.com/jgm/pandoc/issues/319 # elif result.scheme and result.netloc and result.path: # # complete uri including one with a network path # path = True # elif result.scheme == "file" and result.path: # path = path = os.path.exists(url2path(source)) return path def _identify_format_from_path(sourcefile, format): return format or os.path.splitext(sourcefile)[1].strip('.') def _as_unicode(source, encoding): if encoding != 'utf-8': # if a source and a different encoding is given, try to decode the the source into a # unicode string try: source = cast_unicode(source, encoding=encoding) except (UnicodeDecodeError, UnicodeEncodeError): pass return source def _identify_input_type(source, format, encoding='utf-8'): path = _identify_path(source) if path: format = _identify_format_from_path(source, format) input_type = 'path' else: source = _as_unicode(source, encoding) input_type = 'string' return source, format, input_type def _validate_formats(format, to, outputfile): def normalize_format(fmt): formats = { 'dbk': 'docbook', 'md': 'markdown', 'tex': 'latex', } fmt = formats.get(fmt, fmt) # rst format can have extensions if fmt[:4] == "rest": fmt = "rst"+fmt[4:] return fmt format = normalize_format(format) to = normalize_format(to) if not format: raise RuntimeError('Missing format!') from_formats, to_formats = get_pandoc_formats() if _get_base_format(format) not in from_formats: raise RuntimeError( 'Invalid input format! Got "%s" but expected one of these: %s' % ( _get_base_format(format), ', '.join(from_formats))) base_to_format = _get_base_format(to) file_extension = os.path.splitext(to)[1] if (base_to_format not in to_formats and base_to_format != "pdf" and # pdf is handled later # noqa: E127 file_extension != '.lua'): raise RuntimeError( 'Invalid output format! Got %s but expected one of these: %s' % ( base_to_format, ', '.join(to_formats))) # list from https://github.com/jgm/pandoc/blob/master/pandoc.hs # `[...] where binaries = ["odt","docx","epub","epub3"] [...]` # pdf has the same restriction if base_to_format in ["odt", "docx", "epub", "epub3", "pdf"] and not outputfile: raise RuntimeError( 'Output to %s only works by using a outputfile.' % base_to_format ) if base_to_format == "pdf": # pdf formats needs to actually have a to format of latex and a # filename with an ending pf .pdf if outputfile[-4:] != ".pdf": raise RuntimeError('PDF output needs an outputfile with ".pdf" as a fileending.') # to is not allowed to contain pdf, but must point to latex # it's also not allowed to contain extensions according to the docs if to != base_to_format: raise RuntimeError("PDF output can't contain any extensions: %s" % to) to = "latex" return format, to def _convert_input(source, format, input_type, to, extra_args=(), outputfile=None, filters=None): _ensure_pandoc_path() format, to = _validate_formats(format, to, outputfile) string_input = input_type == 'string' input_file = [source] if not string_input else [] args = [__pandoc_path, '--from=' + format] args.append('--to=' + to) args += input_file if outputfile: args.append("--output="+outputfile) args.extend(extra_args) # adds the proper filter syntax for each item in the filters list if filters is not None: if isinstance(filters, string_types): filters = filters.split() f = ['--filter=' + x for x in filters] args.extend(f) # To get access to pandoc-citeproc when we use a included copy of pandoc, # we need to add the pypandoc/files dir to the PATH new_env = os.environ.copy() files_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), "files") new_env["PATH"] = new_env.get("PATH", "") + os.pathsep + files_path p = subprocess.Popen( args, stdin=subprocess.PIPE if string_input else None, stdout=subprocess.PIPE, stderr=subprocess.PIPE, env=new_env) # something else than 'None' indicates that the process already terminated if not (p.returncode is None): raise RuntimeError( 'Pandoc died with exitcode "%s" before receiving input: %s' % (p.returncode, p.stderr.read()) ) try: source = cast_bytes(source, encoding='utf-8') except (UnicodeDecodeError, UnicodeEncodeError): # assume that it is already a utf-8 encoded string pass try: stdout, stderr = p.communicate(source if string_input else None) except OSError: # this is happening only on Py2.6 when pandoc dies before reading all # the input. We treat that the same as when we exit with an error... raise RuntimeError('Pandoc died with exitcode "%s" during conversion.' % (p.returncode)) try: stdout = stdout.decode('utf-8') except UnicodeDecodeError: # this shouldn't happen: pandoc more or less garantees that the output is utf-8! raise RuntimeError('Pandoc output was not utf-8.') # check that pandoc returned successfully if p.returncode != 0: raise RuntimeError( 'Pandoc died with exitcode "%s" during conversion: %s' % (p.returncode, stderr) ) # if there is an outputfile, then stdout is likely empty! return stdout def _get_base_format(format): ''' According to http://johnmacfarlane.net/pandoc/README.html#general-options, syntax extensions for markdown can be individually enabled or disabled by appending +EXTENSION or -EXTENSION to the format name. Return the base format without any extensions. ''' return re.split('\+|-', format)[0] def get_pandoc_formats(): ''' Dynamic preprocessor for Pandoc formats. Return 2 lists. "from_formats" and "to_formats". ''' _ensure_pandoc_path() p = subprocess.Popen( [__pandoc_path, '--list-output-formats'], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE) comm = p.communicate() out = comm[0].decode().splitlines(False) if p.returncode != 0: # try the old version and see if that returns something return get_pandoc_formats_pre_1_18() p = subprocess.Popen( [__pandoc_path, '--list-input-formats'], stdin=subprocess.PIPE, stderr=subprocess.PIPE, stdout=subprocess.PIPE) comm = p.communicate() in_ = comm[0].decode().splitlines(False) return [f.strip() for f in in_], [f.strip() for f in out] def get_pandoc_formats_pre_1_18(): ''' Dynamic preprocessor for Pandoc formats for version < 1.18. Return 2 lists. "from_formats" and "to_formats". ''' _ensure_pandoc_path() p = subprocess.Popen( [__pandoc_path, '-h'], stdin=subprocess.PIPE, stdout=subprocess.PIPE) comm = p.communicate() help_text = comm[0].decode().splitlines(False) if p.returncode != 0 or 'Options:' not in help_text: raise RuntimeError("Couldn't call pandoc to get output formats. Output from pandoc:\n%s" % str(comm)) txt = ' '.join(help_text[1:help_text.index('Options:')]) aux = txt.split('Output formats: ') in_ = re.sub('Input\sformats:\s|\*|\[.*?\]', '', aux[0]).split(',') out = re.sub('\*|\[.*?\]', '', aux[1]).split(',') return [f.strip() for f in in_], [f.strip() for f in out] # copied and adapted from jupyter_nbconvert/utils/pandoc.py, Modified BSD License def _get_pandoc_version(pandoc_path): new_env = os.environ.copy() if 'HOME' not in os.environ: new_env['HOME'] = tempfile.gettempdir() p = subprocess.Popen( [pandoc_path, '--version'], stdin=subprocess.PIPE, stdout=subprocess.PIPE, env=new_env) comm = p.communicate() out_lines = comm[0].decode().splitlines(False) if p.returncode != 0 or len(out_lines) == 0: raise RuntimeError("Couldn't call pandoc to get version information. Output from " "pandoc:\n%s" % str(comm)) version_pattern = re.compile(r"^\d+(\.\d+){1,}$") for tok in out_lines[0].split(): if version_pattern.match(tok): version = tok break return version def get_pandoc_version(): """Gets the Pandoc version if Pandoc is installed. It will probe Pandoc for its version, cache it and return that value. If a cached version is found, it will return the cached version and stop probing Pandoc (unless :func:`clean_version_cache()` is called). :raises OSError: if pandoc is not found; make sure it has been installed and is available at path. """ global __version if __version is None: _ensure_pandoc_path() __version = _get_pandoc_version(__pandoc_path) return __version def get_pandoc_path(): """Gets the Pandoc path if Pandoc is installed. It will return a path to pandoc which is used by pypandoc. This might be a full path or, if pandoc is on PATH, simple `pandoc`. It's garanteed to be callable (i.e. we could get version information from `pandoc --version`). If `PYPANDOC_PANDOC` is set and valid, it will return that value. If the environment variable is not set, either the full path to the included pandoc or the pandoc in `PATH` or a pandoc in some of the more usual (platform specific) install locations (whatever is the higher version) will be returned. If a cached path is found, it will return the cached path and stop probing Pandoc (unless :func:`clean_pandocpath_cache()` is called). :raises OSError: if pandoc is not found """ _ensure_pandoc_path() return __pandoc_path def _ensure_pandoc_path(): global __pandoc_path if __pandoc_path is None: included_pandoc = os.path.join(os.path.dirname(os.path.realpath(__file__)), "files", "pandoc") search_paths = ["pandoc", included_pandoc] pf = "linux" if sys.platform.startswith("linux") else sys.platform try: search_paths.append(os.path.join(DEFAULT_TARGET_FOLDER[pf], "pandoc")) except: # not one of the know platforms... pass if pf == "linux": # Currently we install into ~/bin, but this is equally likely... search_paths.append("~/.bin/pandoc") # Also add the interpreter script path, as that's where pandoc could be # installed if it's an environment and the environment wasn't activated if pf == "win32": search_paths.append(os.path.join(sys.exec_prefix, "Scripts", "pandoc")) # bin can also be used on windows (conda at leats has it in path), so # include it unconditionally search_paths.append(os.path.join(sys.exec_prefix, "bin", "pandoc")) # If a user added the complete path to pandoc to an env, use that as the # only way to get pandoc so that a user can overwrite even a higher # version in some other places. if os.getenv('PYPANDOC_PANDOC', None): search_paths = [os.getenv('PYPANDOC_PANDOC')] for path in search_paths: # Needed for windows and subprocess which can't expand it on it's # own... path = os.path.expanduser(path) curr_version = [0, 0, 0] version_string = "0.0.0" # print("Trying: %s" % path) try: version_string = _get_pandoc_version(path) except Exception as e: # we can't use that path... if os.path.exists(path): # path exist but is not useable -> not executable? print("Found %s, but not using it because of an error:" % (path), file=sys.stderr) print(e, file=sys.stderr) continue version = [int(x) for x in version_string.split(".")] while len(version) < len(curr_version): version.append(0) # print("%s, %s" % (path, version)) for pos in range(len(curr_version)): # Only use the new version if it is any bigger... if version[pos] > curr_version[pos]: # print("Found: %s" % path) __pandoc_path = path curr_version = version break if __pandoc_path is None: if os.path.exists('/usr/local/bin/brew'): sys.stderr.write(textwrap.dedent("""\ Maybe try: brew install pandoc """)) elif os.path.exists('/usr/bin/apt-get'): sys.stderr.write(textwrap.dedent("""\ Maybe try: sudo apt-get install pandoc """)) elif os.path.exists('/usr/bin/yum'): sys.stderr.write(textwrap.dedent("""\ Maybe try: sudo yum install pandoc """)) sys.stderr.write(textwrap.dedent("""\ See http://johnmacfarlane.net/pandoc/installing.html for installation options """)) sys.stderr.write(textwrap.dedent("""\ --------------------------------------------------------------- """)) raise OSError("No pandoc was found: either install pandoc and add it\n" "to your PATH or or call pypandoc.download_pandoc(...) or\n" "install pypandoc wheels with included pandoc.") # ----------------------------------------------------------------------------- # Internal state management # ----------------------------------------------------------------------------- def clean_version_cache(): global __version __version = None def clean_pandocpath_cache(): global __pandoc_path __pandoc_path = None __version = None __pandoc_path = None pypandoc-1.4/pypandoc/pandoc_download.py0000644000175000017500000001614113076637465021403 0ustar travistravis00000000000000# -*- coding: utf-8 -*- import sys import os import shutil import tempfile import os.path import subprocess import platform import re try: from urllib.request import urlopen except ImportError: from urllib import urlopen DEFAULT_TARGET_FOLDER = { "win32": "~\\AppData\\Local\\Pandoc", "linux": "~/bin", "darwin": "~/Applications/pandoc" } def _get_pandoc_urls(version="latest"): """Get the urls of pandoc's binaries Uses sys.platform keys, but removes the 2 from linux2 Adding a new platform means implementing unpacking in "DownloadPandocCommand" and adding the URL here :param str version: pandoc version. Valid values are either a valid pandoc version e.g. "1.19.1", or "latest" Default: "latest". :return: str pandoc_urls: a dictionary with keys as system platform and values as the url pointing to respective binaries :return: str version: actual pandoc version. (e.g. "lastest" will be resolved to the actual one) """ # url to pandoc download page url = "https://github.com/jgm/pandoc/releases/" + \ ("tag/" if version != "latest" else "") + version # read the HTML content response = urlopen(url) content = response.read() # regex for the binaries regex = re.compile(r"/jgm/pandoc/releases/download/.*\.(?:msi|deb|pkg)") # a list of urls to the bainaries pandoc_urls_list = regex.findall(content.decode("utf-8")) # actual pandoc version version = pandoc_urls_list[0].split('/')[5] # dict that lookup the platform from binary extension ext2platform = { 'msi': 'win32', 'deb': 'linux', 'pkg': 'darwin' } # parse pandoc_urls from list to dict # py26 don't like dict comprehension. Use this one instead when py26 support is dropped # pandoc_urls = {ext2platform[url_frag[-3:]]: ("https://github.com" + url_frag) for url_frag in pandoc_urls_list} pandoc_urls = dict((ext2platform[ url_frag[-3:]], ("https://github.com" + url_frag)) for url_frag in pandoc_urls_list) return pandoc_urls, version def _make_executable(path): mode = os.stat(path).st_mode mode |= (mode & 0o444) >> 2 # copy R bits to X print("* Making %s executeable..." % (path)) os.chmod(path, mode) def _handle_linux(filename, targetfolder): print("* Unpacking %s to tempfolder..." % (filename)) tempfolder = tempfile.mkdtemp() cur_wd = os.getcwd() filename = os.path.abspath(filename) try: os.chdir(tempfolder) cmd = ["ar", "x", filename] # if only 3.5 is supported, should be `run(..., check=True)` subprocess.check_call(cmd) cmd = ["tar", "xzf", "data.tar.gz"] subprocess.check_call(cmd) # pandoc and pandoc-citeproc are in ./usr/bin subfolder for exe in ["pandoc", "pandoc-citeproc"]: src = os.path.join(tempfolder, "usr", "bin", exe) dst = os.path.join(targetfolder, exe) print("* Copying %s to %s ..." % (exe, targetfolder)) shutil.copyfile(src, dst) _make_executable(dst) src = os.path.join(tempfolder, "usr", "share", "doc", "pandoc", "copyright") dst = os.path.join(targetfolder, "copyright.pandoc") print("* Copying copyright to %s ..." % (targetfolder)) shutil.copyfile(src, dst) finally: os.chdir(cur_wd) shutil.rmtree(tempfolder) def _handle_darwin(filename, targetfolder): print("* Unpacking %s to tempfolder..." % (filename)) tempfolder = tempfile.mkdtemp() pkgutilfolder = os.path.join(tempfolder, 'tmp') cmd = ["pkgutil", "--expand", filename, pkgutilfolder] # if only 3.5 is supported, should be `run(..., check=True)` subprocess.check_call(cmd) # this will generate usr/local/bin below the dir cmd = ["tar", "xvf", os.path.join(pkgutilfolder, "pandoc.pkg", "Payload"), "-C", pkgutilfolder] subprocess.check_call(cmd) # pandoc and pandoc-citeproc are in the ./usr/local/bin subfolder for exe in ["pandoc", "pandoc-citeproc"]: src = os.path.join(pkgutilfolder, "usr", "local", "bin", exe) dst = os.path.join(targetfolder, exe) print("* Copying %s to %s ..." % (exe, targetfolder)) shutil.copyfile(src, dst) _make_executable(dst) # remove temporary dir shutil.rmtree(tempfolder) print("* Done.") def _handle_win32(filename, targetfolder): print("* Unpacking %s to tempfolder..." % (filename)) tempfolder = tempfile.mkdtemp() cmd = ["msiexec", "/a", filename, "/qb", "TARGETDIR=%s" % (tempfolder)] # if only 3.5 is supported, should be `run(..., check=True)` subprocess.check_call(cmd) # pandoc.exe, pandoc-citeproc.exe, and the COPYRIGHT are in the Pandoc subfolder for exe in ["pandoc.exe", "pandoc-citeproc.exe", "COPYRIGHT.txt"]: src = os.path.join(tempfolder, "Pandoc", exe) dst = os.path.join(targetfolder, exe) print("* Copying %s to %s ..." % (exe, targetfolder)) shutil.copyfile(src, dst) # remove temporary dir shutil.rmtree(tempfolder) print("* Done.") def download_pandoc(url=None, targetfolder=None, version="latest"): """Download and unpack pandoc Downloads prebuild binaries for pandoc from `url` and unpacks it into `targetfolder`. :param str url: URL for the to be downloaded pandoc binary distribution for the platform under which this python runs. If no `url` is give, uses the latest available release at the time pypandoc was released. :param str targetfolder: directory, where the binaries should be installed to. If no `targetfolder` is give, uses a platform specific user location: `~/bin` on Linux, `~/Applications/pandoc` on Mac OS X, and `~\\AppData\\Local\\Pandoc` on Windows. """ # get pandoc_urls pandoc_urls, _ = _get_pandoc_urls(version) pf = sys.platform # compatibility with py3 if pf.startswith("linux"): pf = "linux" if platform.architecture()[0] != "64bit": raise RuntimeError("Linux pandoc is only compiled for 64bit.") if pf not in pandoc_urls: raise RuntimeError("Can't handle your platform (only Linux, Mac OS X, Windows).") if url is None: url = pandoc_urls[pf] filename = url.split("/")[-1] if os.path.isfile(filename): print("* Using already downloaded file %s" % (filename)) else: print("* Downloading pandoc from %s ..." % url) # https://stackoverflow.com/questions/30627937/tracebaclk-attributeerroraddinfourl-instance-has-no-attribute-exit response = urlopen(url) with open(filename, 'wb') as out_file: shutil.copyfileobj(response, out_file) if targetfolder is None: targetfolder = DEFAULT_TARGET_FOLDER[pf] targetfolder = os.path.expanduser(targetfolder) # Make sure target folder exists... try: os.makedirs(targetfolder) except OSError: pass # dir already exists... unpack = globals().get("_handle_" + pf) assert unpack is not None, "Can't handle download, only Linux, Windows and OS X are supported." unpack(filename, targetfolder) pypandoc-1.4/pypandoc/py3compat.py0000644000175000017500000000347713076637465020177 0ustar travistravis00000000000000# -*- coding: utf-8 -*- from __future__ import with_statement import sys import locale # compat code from IPython py3compat.py and encoding.py, which is licensed under the terms of the # Modified BSD License (also known as New or Revised or 3-Clause BSD) _DEFAULT_ENCODING = None try: # There are reports of getpreferredencoding raising errors # in some cases, which may well be fixed, but let's be conservative here. _DEFAULT_ENCODING = locale.getpreferredencoding() except Exception: pass _DEFAULT_ENCODING = _DEFAULT_ENCODING or sys.getdefaultencoding() def _decode(s, encoding=None): encoding = encoding or _DEFAULT_ENCODING return s.decode(encoding) def _encode(u, encoding=None): encoding = encoding or _DEFAULT_ENCODING return u.encode(encoding) def cast_unicode(s, encoding=None): if isinstance(s, bytes): return _decode(s, encoding) return s def cast_bytes(s, encoding=None): # bytes == str on py2.7 -> always encode on py2 if not isinstance(s, bytes): return _encode(s, encoding) return s if sys.version_info[0] >= 3: PY3 = True string_types = (str,) unicode_type = str # from http://stackoverflow.com/questions/11687478/convert-a-filename-to-a-file-url from urllib.parse import urljoin, urlparse from urllib.request import pathname2url, url2pathname def path2url(path): return urljoin('file:', pathname2url(path)) def url2path(url): return url2pathname(urlparse(url).path) else: PY3 = False string_types = (str, unicode) # noqa: F821 unicode_type = unicode # noqa: F821 from urlparse import urljoin, urlparse import urllib def path2url(path): return urljoin('file:', urllib.pathname2url(path)) def url2path(url): return urllib.url2pathname(urlparse(url).path) pypandoc-1.4/LICENSE0000644000175000017500000000553413076637465015072 0ustar travistravis00000000000000Copyright (c) 2011 Juho Vepsäläinen Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. compat code from IPython py3compat.py and encoding.py, which is licensed the terms of the Modified BSD License (also known as New or Revised or 3-Clause BSD) - Copyright (c) 2008-2014, IPython Development Team - Copyright (c) 2001-2007, Fernando Perez - Copyright (c) 2001, Janko Hauser - Copyright (c) 2001, Nathaniel Gray All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. Neither the name of the IPython Development Team nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. pypandoc-1.4/MANIFEST.in0000644000175000017500000000013613076637465015614 0ustar travistravis00000000000000include README.md include LICENSE include tests.py include filter_test.md include examples/* pypandoc-1.4/README.md0000644000175000017500000002705713076637465015350 0ustar travistravis00000000000000# pypandoc [![Build Status](https://travis-ci.org/bebraw/pypandoc.svg?branch=master)](https://travis-ci.org/bebraw/pypandoc) [![Appveyor Build Status](https://ci.appveyor.com/api/projects/status/github/bebraw/pypandoc?svg=true)](https://ci.appveyor.com/project/bebraw/pypandoc) [![GitHub Releases](https://img.shields.io/github/tag/bebraw/pypandoc.svg?label=github+release)](https://github.com/bebraw/pypandoc/releases) [![PyPI version](https://badge.fury.io/py/pypandoc.svg)](https://pypi.python.org/pypi/pypandoc/) [![conda version](https://anaconda.org/conda-forge/pypandoc/badges/version.svg)](https://anaconda.org/conda-forge/pypandoc/) [![Development Status](https://img.shields.io/pypi/status/pypandoc.svg)](https://pypi.python.org/pypi/pypandoc/) [![Python version](https://img.shields.io/pypi/pyversions/pypandoc.svg)](https://pypi.python.org/pypi/pypandoc/) ![License](https://img.shields.io/pypi/l/pypandoc.svg) Pypandoc provides a thin wrapper for [pandoc](https://pandoc.org), a universal document converter. ## Installation Pypandoc uses pandoc, so it needs an available installation of pandoc. For some common cases (wheels, conda packages), pypandoc already includes pandoc (and pandoc-citeproc) in it's prebuilt package. If pandoc is already installed (i.e. pandoc is in the `PATH`), pypandoc uses the version with the higher version number, and if both are the same, the already installed version. See [Specifying the location of pandoc binaries](#specifying-the-location-of-pandoc-binaries) for more. To use pandoc filters, you must have the relevant filters installed on your machine. ### Installing via pip Install via `pip install pypandoc`. Prebuilt [wheels for Windows and Mac OS X](https://pypi.python.org/pypi/pypandoc/) include pandoc. If there is no prebuilt binary available, you have to [install pandoc yourself](#installing-pandoc-manually). If you use Linux and have [your own wheelhouse](https://wheel.readthedocs.org/en/latest/#usage), you can build a wheel which include pandoc with `python setup.py download_pandoc; python setup.py bdist_wheel`. Be aware that this works only on 64bit intel systems, as we only download it from the [official releases](https://github.com/jgm/pandoc/releases). ### Installing via conda Pypandoc is included in [conda-forge](https://conda-forge.github.io/). The conda packages will also install the pandoc package, so pandoc is available in the installation. Install via `conda install -c conda-forge pypandoc`. You can also add the channel to your conda config via `conda config --add channels conda-forge`. This makes it possible to use `conda install pypandoc` directly and also lets you update via `conda update pypandoc`. ### Installing pandoc If you don't get pandoc installed via a prebuild wheel which includes pandoc or via the conda package dependencies, you need to install pandoc by yourself. #### Installing pandoc via pypandoc Installing via pypandoc is possible on Windows, Mac OS X or Linux (Intel-based, 64-bit): ```python # expects an installed pypandoc: pip install pypandoc from pypandoc.pandoc_download import download_pandoc # see the documentation how to customize the installation path # but be aware that you then need to include it in the `PATH` download_pandoc() ``` The default install location is included in the search path for pandoc, so you don't need to add it to the `PATH`. By default, the latest pandoc version is installed. If you want to specify your own version, say 1.19.1, use `download_pandoc(version='1.19.1')` instead. #### Installing pandoc manually Installing manually via the system mechanism is also possible. Such installation mechanism make pandoc available on many more platforms: - Ubuntu/Debian: `sudo apt-get install pandoc` - Fedora/Red Hat: `sudo yum install pandoc` - Arch: `sudo pacman -S pandoc` - Mac OS X with Homebrew: `brew install pandoc pandoc-citeproc Caskroom/cask/mactex` - Machine with Haskell: `cabal-install pandoc` - Windows: There is an installer available [here](https://pandoc.org/installing.html) - [FreeBSD port](https://www.freshports.org/textproc/pandoc/) - Or see [Pandoc - Installing pandoc](https://pandoc.org/installing.html) Be aware that not all install mechanisms put pandoc in the `PATH`, so you either have to change the `PATH` yourself or set the full `PATH` to pandoc in `PYPANDOC_PANDOC`. See the next section for more information. ### Specifying the location of pandoc binaries You can point to a specific pandoc version by setting the environment variable `PYPANDOC_PANDOC` to the full `PATH` to the pandoc binary (`PYPANDOC_PANDOC=/home/x/whatever/pandoc` or `PYPANDOC_PANDOC=c:\pandoc\pandoc.exe`). If this environment variable is set, this is the only place where pandoc is searched for. In certain cases, e.g. pandoc is installed but a web server with its own user cannot find the binaries, it is useful to specify the location at runtime: ```python import os os.environ.setdefault('PYPANDOC_PANDOC', '/home/x/whatever/pandoc') ``` ## Usage There are two basic ways to use pypandoc: with input files or with input strings. ```python import pypandoc # With an input file: it will infer the input format from the filename output = pypandoc.convert_file('somefile.md', 'rst') # ...but you can overwrite the format via the `format` argument: output = pypandoc.convert_file('somefile.txt', 'rst', format='md') # alternatively you could just pass some string. In this case you need to # define the input format: output = pypandoc.convert_text('#some title', 'rst', format='md') # output == 'some title\r\n==========\r\n\r\n' ``` `convert_text` expects this string to be unicode or utf-8 encoded bytes. `convert_*` will always return a unicode string. It's also possible to directly let pandoc write the output to a file. This is the only way to convert to some output formats (e.g. odt, docx, epub, epub3, pdf). In that case `convert_*()` will return an empty string. ```python import pypandoc output = pypandoc.convert_file('somefile.md', 'docx', outputfile="somefile.docx") assert output == "" ``` In addition to `format`, it is possible to pass `extra_args`. That makes it possible to access various pandoc options easily. ```python output = pypandoc.convert_text( '

Primary Heading

', 'md', format='html', extra_args=['--atx-headers']) # output == '# Primary Heading\r\n' output = pypandoc.convert( '# Primary Heading', 'html', format='md', extra_args=['--base-header-level=2']) # output == '

Primary Heading

\r\n' ``` pypandoc now supports easy addition of [pandoc filters](https://pandoc.org/scripting.html). ```python filters = ['pandoc-citeproc'] pdoc_args = ['--mathjax', '--smart'] output = pd.convert_file(source=filename, to='html5', format='md', extra_args=pdoc_args, filters=filters) ``` Please pass any filters in as a list and not as a string. Please refer to `pandoc -h` and the [official documentation](https://pandoc.org/MANUAL.html) for further details. > Note: the old way of using `convert(input, output)` is deprecated as in some cases it wasn't possible to determine whether the input should be used as a filename or as text. ## Dealing with Formatting Arguments Pandoc supports custom formatting though `-V` parameter. In order to use it through pypandoc, use code such as this: ```python output = pypandoc.convert_file('demo.md', 'pdf', outputfile='demo.pdf', extra_args=['-V', 'geometry:margin=1.5cm']) ``` > Note: it's important to separate `-V` and its argument within a list like that or else it won't work. This gotcha has to do with the way [`subprocess.Popen`](https://docs.python.org/2/library/subprocess.html#subprocess.Popen) works. ## Getting Pandoc Version As it can be useful sometimes to check what pandoc version is available at your system or which particular pandoc binary is used by pypandoc. For that, pypandoc provides the following utility functions. Example: ``` print(pypandoc.get_pandoc_version()) print(pypandoc.get_pandoc_path()) print(pypandoc.get_pandoc_formats()) ``` ## Related * [pydocverter](https://github.com/msabramo/pydocverter) is a client for a service called [Docverter](https://www.docverter.com), which offers pandoc as a service (plus some extra goodies). * See [pyandoc](https://pypi.python.org/pypi/pyandoc/) for an alternative implementation of a pandoc wrapper from Kenneth Reitz. This one hasn't been active in a while though. * See [panflute](https://github.com/sergiocorreia/panflute) which provides `convert_text` similar to pypandoc's. Its focus is on writing and running pandoc filters though. ## Contributing Contributions are welcome. When opening a PR, please keep the following guidelines in mind: 1. Before implementing, please open an issue for discussion. 2. Make sure you have tests for the new logic. 3. Make sure your code passes `flake8 pypandoc/*.py tests.py` 4. Add yourself to contributors at `README.md` unless you are already there. In that case tweak your contributions. Note that for citeproc tests to pass you'll need to have [pandoc-citeproc](https://github.com/jgm/pandoc-citeproc) installed. If you installed a prebuilt wheel or conda package, it is already included. ## Contributors * [Valentin Haenel](https://github.com/esc) - String conversion fix * [Daniel Sanchez](https://github.com/ErunamoJAZZ) - Automatic parsing of input/output formats * [Thomas G.](https://github.com/coldfix) - Python 3 support * [Ben Jao Ming](https://github.com/benjaoming) - Fail gracefully if pandoc is missing * [Ross Crawford-d'Heureuse](https://github.com/rosscdh) - Encode input in UTF-8 and add Django example * [Michael Chow](https://github.com/machow) - Decode output in UTF-8 * [Janusz Skonieczny](https://github.com/wooyek) - Support Windows newlines and allow encoding to be specified. * [gabeos](https://github.com/gabeos) - Fix help parsing * [Marc Abramowitz](https://github.com/msabramo) - Make `setup.py` fail hard if pandoc is missing, Travis, Dockerfile, PyPI badge, Tox, PEP-8, improved documentation * [Daniel L.](https://github.com/mcktrtl) - Add `extra_args` example to README * [Amy Guy](https://github.com/rhiaro) - Exception handling for unicode errors * [Florian Eßer](https://github.com/flesser) - Allow Markdown extensions in output format * [Philipp Wendler](https://github.com/PhilippWendler) - Allow Markdown extensions in input format * [Jan Schulz](https://github.com/JanSchulz) - Handling output to a file, Travis to work on newer version of pandoc, return code checking, get_pandoc_version. Helped to fix the Travis build, new `convert_*` API * [Aaron Gonzales](https://github.com/xysmas) - Added better filter handling * [David Lukes](https://github.com/dlukes) - Enabled input from non-plain-text files and made sure tests clean up template files correctly if they fail * [valholl](https://github.com/valholl) - Set up licensing information correctly and include examples to distribution version * [Cyrille Rossant](https://github.com/rossant) - Fixed bug by trimming out stars in the list of pandoc formats. Helped to fix the Travis build. * [Paul Osborne](https://github.com/posborne) - Don't require pandoc to install pypandoc. * [Felix Yan](https://github.com/felixonmars) - Added installation instructions for Arch Linux. * [Kolen Cheung](https://github.com/ickc) - Implement `_get_pandoc_urls` for installing arbitrary version as well as the latest version of pandoc. Minor: README, Travis, setup.py. ## License Pypandoc is available under MIT license. See LICENSE for more details. Pandoc itself is [available under the GPL2 license](https://github.com/jgm/pandoc/blob/master/COPYING.md). pypandoc-1.4/filter_test.md0000644000175000017500000000077713076637465016737 0ustar travistravis00000000000000--- ## This is an in-header citation in bibyaml format ## Example taken from pandoc website references: - id: fenner2012a title: One-click science marketing author: - family: Fenner given: Martin container-title: Nature Materials volume: 11 URL: 'http://dx.doi.org/10.1038/nmat3283' DOI: 10.1038/nmat3283 issue: 4 publisher: Nature Publishing Group page: 261-263 type: article-journal issued: year: 2012 month: 3 --- ## Section ## A reference [@fenner2012a]. # References pypandoc-1.4/setup.py0000755000175000017500000001046413076637465015600 0ustar travistravis00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import pypandoc from setuptools import setup, Command import sys import os import os.path try: from urllib.request import urlopen except ImportError: from urllib import urlopen try: long_description = pypandoc.convert('README.md', 'rst') long_description = long_description.replace("\r","") except OSError as e: print("\n\n!!! pandoc not found, long_description is bad, don't upload this to PyPI !!!\n\n") import io # pandoc is not installed, fallback to using raw contents with io.open('README.md', encoding="utf-8") as f: long_description = f.read() class DownloadPandocCommand(Command): """Download pandoc""" description = "downloads a pandoc release and adds it to the package" user_options = [] def initialize_options(self): pass def finalize_options(self): pass def run(self): from pypandoc.pandoc_download import download_pandoc targetfolder = os.path.join(os.path.dirname(os.path.realpath(__file__)), "pypandoc", "files") download_pandoc(targetfolder=targetfolder) cmd_classes = {'download_pandoc': DownloadPandocCommand} # Make sure wheels end up platform specific, if they include a pandoc binary has_pandoc = (os.path.isfile(os.path.join("pypandoc", "files", "pandoc")) or os.path.isfile(os.path.join("pypandoc", "files", "pandoc.exe"))) is_build_wheel = ("bdist_wheel" in sys.argv) is_download_pandoc = ("download_pandoc" in sys.argv) if is_build_wheel: if has_pandoc or is_download_pandoc: # we need to make sure that bdist_wheel is after is_download_pandoc, # otherwise we don't include pandoc in the wheel... :-( pos_bdist_wheel = sys.argv.index("bdist_wheel") if is_download_pandoc: pos_download_pandoc = sys.argv.index("download_pandoc") if pos_bdist_wheel < pos_download_pandoc: raise RuntimeError("'download_pandoc' needs to be before 'bdist_wheel'.") # we also need to make sure that this version of bdist_wheel supports # the --plat-name argument try: import wheel from distutils.version import StrictVersion if not StrictVersion(wheel.__version__) >= StrictVersion("0.27"): msg = "Including pandoc in wheel needs wheel >=0.27 but found %s.\nPlease update wheel!" raise RuntimeError(msg % wheel.__version__) except ImportError: # the real error will happen further down... print("No wheel installed, please install 'wheel'...") print("forcing platform specific wheel name...") from distutils.util import get_platform sys.argv.insert(pos_bdist_wheel + 1, '--plat-name') sys.argv.insert(pos_bdist_wheel + 2, get_platform()) else: print("no pandoc found, building platform unspecific wheel...") print("use 'python setup.py download_pandoc' to download pandoc.") module = pypandoc setup( name = 'pypandoc', version = module.__version__, url = 'https://github.com/bebraw/pypandoc', license = 'MIT', description = 'Thin wrapper for pandoc.', long_description = long_description, author = module.__author__, author_email = 'bebraw@gmail.com', packages = ['pypandoc'], package_data={'pypandoc': ['files/*']}, install_requires = ['setuptools', 'pip>=8.1.0', 'wheel>=0.25.0'], classifiers=[ 'Development Status :: 4 - Beta', 'Environment :: Console', 'Intended Audience :: Developers', 'Intended Audience :: System Administrators', 'License :: OSI Approved :: MIT License', 'Operating System :: POSIX', 'Programming Language :: Python', 'Topic :: Text Processing', 'Topic :: Text Processing :: Filters', 'Programming Language :: Python :: 2', 'Programming Language :: Python :: 2.6', 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy' ], test_suite = 'tests', cmdclass=cmd_classes ) pypandoc-1.4/tests.py0000755000175000017500000004263113076637465015603 0ustar travistravis00000000000000#!/usr/bin/env python # -*- coding: utf-8 -*- import unittest import tempfile import pypandoc from pypandoc.py3compat import unicode_type, string_types, path2url import os import io import sys import warnings import contextlib import shutil import subprocess @contextlib.contextmanager def closed_tempfile(suffix, text=None, dir_name=None): if dir_name: dir_name = tempfile.mkdtemp(suffix=dir_name) with tempfile.NamedTemporaryFile('w+t', suffix=suffix, delete=False, dir=dir_name) as test_file: file_name = test_file.name if text: test_file.write(text) test_file.flush() yield file_name if dir_name: shutil.rmtree(dir_name, ignore_errors=True) else: os.remove(file_name) # Stolen from pandas def is_list_like(arg): return (hasattr(arg, '__iter__') and not isinstance(arg, string_types)) @contextlib.contextmanager def assert_produces_warning(expected_warning=Warning, filter_level="always", clear=None, check_stacklevel=True): """ Context manager for running code that expects to raise (or not raise) warnings. Checks that code raises the expected warning and only the expected warning. Pass ``False`` or ``None`` to check that it does *not* raise a warning. Defaults to ``exception.Warning``, baseclass of all Warnings. (basically a wrapper around ``warnings.catch_warnings``). >>> import warnings >>> with assert_produces_warning(): ... warnings.warn(UserWarning()) ... >>> with assert_produces_warning(False): ... warnings.warn(RuntimeWarning()) ... Traceback (most recent call last): ... AssertionError: Caused unexpected warning(s): ['RuntimeWarning']. >>> with assert_produces_warning(UserWarning): ... warnings.warn(RuntimeWarning()) Traceback (most recent call last): ... AssertionError: Did not see expected warning of class 'UserWarning'. ..warn:: This is *not* thread-safe. """ with warnings.catch_warnings(record=True) as w: if clear is not None: # make sure that we are clearning these warnings # if they have happened before # to guarantee that we will catch them if not is_list_like(clear): clear = [clear] for m in clear: try: m.__warningregistry__.clear() except: pass saw_warning = False warnings.simplefilter(filter_level) yield w extra_warnings = [] for actual_warning in w: if (expected_warning and issubclass(actual_warning.category, expected_warning)): saw_warning = True if check_stacklevel and issubclass(actual_warning.category, (FutureWarning, DeprecationWarning)): from inspect import getframeinfo, stack caller = getframeinfo(stack()[2][0]) msg = ("Warning not set with correct stacklevel. " "File where warning is raised: {0} != {1}. " "Warning message: {2}".format( actual_warning.filename, caller.filename, actual_warning.message)) assert actual_warning.filename == caller.filename, msg else: extra_warnings.append(actual_warning.category.__name__) if expected_warning: assert saw_warning, ("Did not see expected warning of class %r." % expected_warning.__name__) assert not extra_warnings, ("Caused unexpected warning(s): %r." % extra_warnings) class TestPypandoc(unittest.TestCase): def setUp(self): if 'HOME' not in os.environ: # if this is used with older versions of pandoc-citeproc # https://github.com/jgm/pandoc-citeproc/issues/35 if 'TRAVIS_BUILD_DIR' in os.environ: os.environ["HOME"] = os.environ["TRAVIS_BUILD_DIR"] print("Using TRAVIS_BUILD_DIR as HOME") else: os.environ["HOME"] = str(os.getcwd()) print("Using current dir as HOME") print(os.environ["HOME"]) def test_get_pandoc_formats(self): inputs, outputs = pypandoc.get_pandoc_formats() self.assertTrue("markdown" in inputs) self.assertTrue("json" in inputs) self.assertTrue("twiki" in inputs) self.assertTrue("markdown" in outputs) def test_get_pandoc_version(self): assert "HOME" in os.environ, "No HOME set, this will error..." version = pypandoc.get_pandoc_version() self.assertTrue(isinstance(version, pypandoc.string_types)) major = int(version.split(".")[0]) # according to http://pandoc.org/releases.html there were only two versions 0.x ... self.assertTrue(major in [0, 1]) def test_converts_valid_format(self): self.assertEqualExceptForNewlineEnd(pypandoc.convert("ok", format='md', to='rest'), 'ok') def test_does_not_convert_to_invalid_format(self): def f(): pypandoc.convert("ok", format='md', to='invalid') self.assertRaises(RuntimeError, f) def test_does_not_convert_from_invalid_format(self): def f(): pypandoc.convert("ok", format='invalid', to='rest') self.assertRaises(RuntimeError, f) def test_basic_conversion_from_file(self): with closed_tempfile('.md', text='#some title\n') as file_name: expected = u'some title{0}=========={0}{0}'.format(os.linesep) received = pypandoc.convert(file_name, 'rst') self.assertEqualExceptForNewlineEnd(expected, received) def test_basic_conversion_from_file_url(self): # this currently doesn't work: https://github.com/jgm/pandoc/issues/3196 return with closed_tempfile('.md', text='#some title\n') as file_name: expected = u'some title{0}=========={0}{0}'.format(os.linesep) # this keeps the : (which should be '|' on windows but pandoc # doesn't like it file_url = path2url(file_name) assert pypandoc._identify_path(file_url) received = pypandoc.convert(file_url, 'rst') self.assertEqualExceptForNewlineEnd(expected, received) def test_basic_conversion_from_http_url(self): url = 'https://raw.githubusercontent.com/bebraw/pypandoc/master/README.md' received = pypandoc.convert(url, 'html') assert "GPL2 license" in received def test_convert_with_custom_writer(self): lua_file_content = self.create_sample_lua() with closed_tempfile('.md', text='#title\n') as file_name: with closed_tempfile('.lua', text=lua_file_content, dir_name="foo-bar+baz") as lua_file_name: expected = u'

title

{0}'.format(os.linesep) received = pypandoc.convert_file(file_name, lua_file_name) self.assertEqualExceptForNewlineEnd(expected, received) def test_basic_conversion_from_file_with_format(self): with closed_tempfile('.md', text='#some title\n') as file_name: expected = u'some title{0}=========={0}{0}'.format(os.linesep) received = pypandoc.convert(file_name, 'rst', format='md') self.assertEqualExceptForNewlineEnd(expected, received) received = pypandoc.convert_file(file_name, 'rst', format='md') self.assertEqualExceptForNewlineEnd(expected, received) def test_basic_conversion_from_string(self): expected = u'some title{0}=========={0}{0}'.format(os.linesep) received = pypandoc.convert('#some title', 'rst', format='md') self.assertEqualExceptForNewlineEnd(expected, received) expected = u'some title{0}=========={0}{0}'.format(os.linesep) received = pypandoc.convert_text('#some title', 'rst', format='md') self.assertEqualExceptForNewlineEnd(expected, received) def test_conversion_with_markdown_extensions(self): input = 'strike' expected_with_extension = u'~~strike~~' expected_without_extension = u'strike' received_with_extension = pypandoc.convert(input, 'markdown+strikeout', format='html') received_without_extension = pypandoc.convert(input, 'markdown-strikeout', format='html') self.assertEqualExceptForNewlineEnd(expected_with_extension, received_with_extension) self.assertEqualExceptForNewlineEnd(expected_without_extension, received_without_extension) def test_conversion_from_markdown_with_extensions(self): input = u'~~strike~~' expected_with_extension = u'

strike

' expected_without_extension = u'

strike

' received_with_extension = pypandoc.convert(input, 'html', format=u'markdown+strikeout') received_without_extension = pypandoc.convert(input, 'html', format=u'markdown-strikeout') self.assertEqualExceptForNewlineEnd(expected_with_extension, received_with_extension) self.assertEqualExceptForNewlineEnd(expected_without_extension, received_without_extension) def test_basic_conversion_to_file(self): with closed_tempfile('.rst',) as file_name: expected = u'some title{0}=========={0}{0}'.format(os.linesep) received = pypandoc.convert('#some title\n', to='rst', format='md', outputfile=file_name) self.assertEqualExceptForNewlineEnd("", received) with io.open(file_name) as f: written = f.read() self.assertEqualExceptForNewlineEnd(expected, written) # to odf does not work without a file def f(): pypandoc.convert('#some title\n', to='odf', format='md', outputfile=None) self.assertRaises(RuntimeError, f) def test_conversion_with_citeproc_filter(self): # we just want to get a temp file name, where we can write to filters = ['pandoc-citeproc'] written = pypandoc.convert('./filter_test.md', to='html', format='md', outputfile=None, filters=filters) import re as re # only properly converted file will have this in it found = re.search(r'Fenner', written) self.assertTrue(found.group() == 'Fenner') # only properly converted file will have this in it found = re.search(r'10.1038', written) self.assertTrue(found.group() == '10.1038') # make sure that it splits the filter line for filters in ['pandoc-citeproc', u'pandoc-citeproc']: written = pypandoc.convert('./filter_test.md', to='html', format='md', outputfile=None, filters=filters) # only properly converted file will have this in it found = re.search(r'Fenner', written) self.assertTrue(found.group() == 'Fenner') # only properly converted file will have this in it found = re.search(r'10.1038', written) self.assertTrue(found.group() == '10.1038') def test_conversion_with_empty_filter(self): # we just want to get a temp file name, where we can write to filters = '' written = pypandoc.convert('./filter_test.md', to='html', format='md', outputfile=None, filters=filters) import re as re # This should not use the pandoc-citeproc module and will not find the # strings found = re.search(r'Fenner', written) self.assertTrue(found is None) found = re.search(r'10.1038', written) self.assertTrue(found is None) def test_conversion_error(self): # pandoc dies on wrong commandline arguments def f(): pypandoc.convert('

Primary Heading

', 'md', format='html', extra_args=["--blah"]) self.assertRaises(RuntimeError, f) def test_unicode_input(self): # make sure that pandoc always returns unicode and does not mishandle it expected = u'üäöîôû{0}======{0}{0}'.format(os.linesep) written = pypandoc.convert(u'

üäöîôû

', 'md', format='html') self.assertTrue(isinstance(written, unicode_type)) self.assertEqualExceptForNewlineEnd(expected, written) bytes = u'

üäöîôû

'.encode("utf-8") written = pypandoc.convert(bytes, 'md', format='html') self.assertEqualExceptForNewlineEnd(expected, written) self.assertTrue(isinstance(written, unicode_type)) # Only use german umlauts in th next test, as iso-8859-15 covers that expected = u'üäö€{0}===={0}{0}'.format(os.linesep) bytes = u'

üäö€

'.encode("iso-8859-15") # Without encoding, this fails as we expect utf-8 per default def f(): pypandoc.convert(bytes, 'md', format='html') self.assertRaises(RuntimeError, f) def f(): # we have to use something which interprets '\xa4', so latin and -1 does not work :-/ pypandoc.convert(bytes, 'md', format='html', encoding="utf-16") self.assertRaises(RuntimeError, f) # with the right encoding it should work... written = pypandoc.convert(bytes, 'md', format='html', encoding="iso-8859-15") self.assertEqualExceptForNewlineEnd(expected, written) self.assertTrue(isinstance(written, unicode_type)) def test_conversion_from_non_plain_text_file(self): with closed_tempfile('.docx') as file_name: expected = u'some title{0}=========={0}{0}'.format(os.linesep) # let's just test conversion (to and) from docx, testing e.g. odt # as well would really be testing pandoc rather than pypandoc received = pypandoc.convert('#some title\n', to='docx', format='md', outputfile=file_name) self.assertEqualExceptForNewlineEnd("", received) received = pypandoc.convert(file_name, to='rst') self.assertEqualExceptForNewlineEnd(expected, received) def test_pdf_conversion(self): with closed_tempfile('.pdf') as file_name: ret = pypandoc.convert_text('#some title\n', to='pdf', format='md', outputfile=file_name) assert ret == "" with io.open(file_name, mode='rb') as f: written = f.read() assert written[:4] == b"%PDF" # TODO: find a test for the content? def f(): # needs an outputfile pypandoc.convert_text('#some title\n', to='pdf', format='md') self.assertRaises(RuntimeError, f) # outputfile needs to end in pdf with closed_tempfile('.WRONG') as file_name: def f(): pypandoc.convert_text('#some title\n', to='pdf', format='md', outputfile=file_name) self.assertRaises(RuntimeError, f) # no extensions allowed with closed_tempfile('.pdf') as file_name: def f(): pypandoc.convert_text('#some title\n', to='pdf+somethign', format='md', outputfile=file_name) self.assertRaises(RuntimeError, f) def test_get_pandoc_path(self): result = pypandoc.get_pandoc_path() assert "pandoc" in result def test_call_with_nonexisting_file(self): files = ['/file/does/not/exists.md', 'file:///file/does/not/exists.md' '', 42, None ] def f(filepath): pypandoc.convert(filepath, 'rst') for filepath in files: self.assertRaises(RuntimeError, f, filepath) def f(filepath): pypandoc.convert_file(filepath, 'rst') for filepath in files: self.assertRaises(RuntimeError, f, filepath) def test_convert_text_with_existing_file(self): with closed_tempfile('.md', text='#some title\n') as file_name: received = pypandoc.convert_text(file_name, 'rst', format='md') self.assertTrue("title" not in received) # The following is a problematic case received = pypandoc.convert(file_name, 'rst', format='md') self.assertTrue("title" in received) def test_depreaction_warnings(self): # convert itself is deprecated... with assert_produces_warning(DeprecationWarning): pypandoc.convert('#some title\n', to='rst', format='md') def create_sample_lua(self): args = [pypandoc.get_pandoc_path(), '--print-default-data-file', 'sample.lua'] p = subprocess.Popen(args, stdout=subprocess.PIPE) out, err = p.communicate() return out.decode('utf-8') def assertEqualExceptForNewlineEnd(self, expected, received): # output written to a file does not seem to have os.linesep # handle everything here by replacing the os linesep by a simple \n expected = expected.replace(os.linesep, "\n") received = received.replace(os.linesep, "\n") self.assertEqual(expected.rstrip('\n'), received.rstrip('\n')) suite = unittest.TestLoader().loadTestsFromTestCase(TestPypandoc) ret = unittest.TextTestRunner(verbosity=2).run(suite) sys.exit(0 if ret.wasSuccessful() else 1) pypandoc-1.4/setup.cfg0000644000175000017500000000004613076637561015674 0ustar travistravis00000000000000[egg_info] tag_build = tag_date = 0