` # we actually replace the `
` in its entirety. # The `
` element may contain more than a single text content
# (`nl2br` can introduce a `
`). In this situation, `c.text` returns
# the very first content, ignore children contents or tail content.
# `len(c) == 0` is here to ensure there is only text in the `
`. if c.text and c.text.strip() == self.marker and len(c) == 0: for i in range(len(p)): if p[i] == c: p[i] = elem break def set_level(self, elem: etree.Element) -> None: """ Adjust header level according to base level. """ level = int(elem.tag[-1]) + self.base_level if level > 6: level = 6 elem.tag = 'h%d' % level def add_anchor(self, c: etree.Element, elem_id: str) -> None: anchor = etree.Element("a") anchor.text = c.text anchor.attrib["href"] = "#" + elem_id anchor.attrib["class"] = self.anchorlink_class c.text = "" for elem in c: anchor.append(elem) while len(c): c.remove(c[0]) c.append(anchor) def add_permalink(self, c: etree.Element, elem_id: str) -> None: permalink = etree.Element("a") permalink.text = ("%spara;" % AMP_SUBSTITUTE if self.use_permalinks is True else self.use_permalinks) permalink.attrib["href"] = "#" + elem_id permalink.attrib["class"] = self.permalink_class if self.permalink_title: permalink.attrib["title"] = self.permalink_title if self.permalink_leading: permalink.tail = c.text c.text = "" c.insert(0, permalink) else: c.append(permalink) def build_toc_div(self, toc_list: list) -> etree.Element: """ Return a string div given a toc list. """ div = etree.Element("div") div.attrib["class"] = self.toc_class # Add title to the div if self.title: header = etree.SubElement(div, "span") if self.title_class: header.attrib["class"] = self.title_class header.text = self.title def build_etree_ul(toc_list: list, parent: etree.Element) -> etree.Element: ul = etree.SubElement(parent, "ul") for item in toc_list: # List item link, to be inserted into the toc div li = etree.SubElement(ul, "li") link = etree.SubElement(li, "a") link.text = item.get('name', '') link.attrib["href"] = '#' + item.get('id', '') if item['children']: build_etree_ul(item['children'], li) return ul build_etree_ul(toc_list, div) if 'prettify' in self.md.treeprocessors: self.md.treeprocessors['prettify'].run(div) return div def run(self, doc: etree.Element) -> None: # Get a list of id attributes used_ids = set() for el in doc.iter(): if "id" in el.attrib: used_ids.add(el.attrib["id"]) toc_tokens = [] for el in doc.iter(): if isinstance(el.tag, str) and self.header_rgx.match(el.tag): self.set_level(el) innerhtml = render_inner_html(remove_fnrefs(el), self.md) name = strip_tags(innerhtml) # Do not override pre-existing ids if "id" not in el.attrib: el.attrib["id"] = unique(self.slugify(html.unescape(name), self.sep), used_ids) data_toc_label = '' if 'data-toc-label' in el.attrib: data_toc_label = run_postprocessors(unescape(el.attrib['data-toc-label']), self.md) # Overwrite name with sanitized value of `data-toc-label`. name = escape_cdata(strip_tags(data_toc_label)) # Remove the data-toc-label attribute as it is no longer needed del el.attrib['data-toc-label'] if int(el.tag[-1]) >= self.toc_top and int(el.tag[-1]) <= self.toc_bottom: toc_tokens.append({ 'level': int(el.tag[-1]), 'id': el.attrib["id"], 'name': name, 'html': innerhtml, 'data-toc-label': data_toc_label }) if self.use_anchors: self.add_anchor(el, el.attrib["id"]) if self.use_permalinks not in [False, None]: self.add_permalink(el, el.attrib["id"]) toc_tokens = nest_toc_tokens(toc_tokens) div = self.build_toc_div(toc_tokens) if self.marker: self.replace_marker(doc, div) # serialize and attach to markdown instance. toc = self.md.serializer(div) for pp in self.md.postprocessors: toc = pp.run(toc) self.md.toc_tokens = toc_tokens self.md.toc = toc class TocExtension(Extension): TreeProcessorClass = TocTreeprocessor def __init__(self, **kwargs): self.config = { 'marker': [ '[TOC]', 'Text to find and replace with Table of Contents. Set to an empty string to disable. ' 'Default: `[TOC]`.' ], 'title': [ '', 'Title to insert into TOC `
` element containing the escaped matching text. """
def __init__(self, pattern: str):
InlineProcessor.__init__(self, pattern)
self.ESCAPED_BSLASH = '{}{}{}'.format(util.STX, ord('\\'), util.ETX)
self.tag = 'code'
""" The tag of the rendered element. """
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | str, int, int]:
"""
If the match contains `group(3)` of a pattern, then return a `code`
[`Element`][xml.etree.ElementTree.Element] which contains HTML escaped text (with
[`code_escape`][markdown.util.code_escape]) as an [`AtomicString`][markdown.util.AtomicString].
If the match does not contain `group(3)` then return the text of `group(1)` backslash escaped.
"""
if m.group(3):
el = etree.Element(self.tag)
el.text = util.AtomicString(util.code_escape(m.group(3).strip()))
return el, m.start(0), m.end(0)
else:
return m.group(1).replace('\\\\', self.ESCAPED_BSLASH), m.start(0), m.end(0)
class DoubleTagPattern(SimpleTagPattern): # pragma: no cover
"""Return a ElementTree element nested in tag2 nested in tag1.
Useful for strong emphasis etc.
"""
def handleMatch(self, m: re.Match[str]) -> etree.Element:
"""
Return [`Element`][xml.etree.ElementTree.Element] in following format:
`group(3) group(4)` where `group(4)` is optional.
"""
tag1, tag2 = self.tag.split(",")
el1 = etree.Element(tag1)
el2 = etree.SubElement(el1, tag2)
el2.text = m.group(3)
if len(m.groups()) == 5:
el2.tail = m.group(4)
return el1
class DoubleTagInlineProcessor(SimpleTagInlineProcessor):
"""Return a ElementTree element nested in tag2 nested in tag1.
Useful for strong emphasis etc.
"""
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]: # pragma: no cover
"""
Return [`Element`][xml.etree.ElementTree.Element] in following format:
`group(2) group(3)` where `group(3)` is optional.
"""
tag1, tag2 = self.tag.split(",")
el1 = etree.Element(tag1)
el2 = etree.SubElement(el1, tag2)
el2.text = m.group(2)
if len(m.groups()) == 3:
el2.tail = m.group(3)
return el1, m.start(0), m.end(0)
class HtmlInlineProcessor(InlineProcessor):
""" Store raw inline html and return a placeholder. """
def handleMatch(self, m: re.Match[str], data: str) -> tuple[str, int, int]:
""" Store the text of `group(1)` of a pattern and return a placeholder string. """
rawhtml = self.backslash_unescape(self.unescape(m.group(1)))
place_holder = self.md.htmlStash.store(rawhtml)
return place_holder, m.start(0), m.end(0)
def unescape(self, text: str) -> str:
""" Return unescaped text given text with an inline placeholder. """
try:
stash = self.md.treeprocessors['inline'].stashed_nodes
except KeyError: # pragma: no cover
return text
def get_stash(m: re.Match[str]) -> str:
id = m.group(1)
value = stash.get(id)
if value is not None:
try:
return self.md.serializer(value)
except Exception:
return r'\%s' % value
return util.INLINE_PLACEHOLDER_RE.sub(get_stash, text)
def backslash_unescape(self, text: str) -> str:
""" Return text with backslash escapes undone (backslashes are restored). """
try:
RE = self.md.treeprocessors['unescape'].RE
except KeyError: # pragma: no cover
return text
def _unescape(m: re.Match[str]) -> str:
return chr(int(m.group(1)))
return RE.sub(_unescape, text)
class AsteriskProcessor(InlineProcessor):
"""Emphasis processor for handling strong and em matches inside asterisks."""
PATTERNS = [
EmStrongItem(re.compile(EM_STRONG_RE, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
EmStrongItem(re.compile(STRONG_EM_RE, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
EmStrongItem(re.compile(STRONG_EM3_RE, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
EmStrongItem(re.compile(STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'),
EmStrongItem(re.compile(EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em')
]
""" The various strong and emphasis patterns handled by this processor. """
def build_single(self, m: re.Match[str], tag: str, idx: int) -> etree.Element:
"""Return single tag."""
el1 = etree.Element(tag)
text = m.group(2)
self.parse_sub_patterns(text, el1, None, idx)
return el1
def build_double(self, m: re.Match[str], tags: str, idx: int) -> etree.Element:
"""Return double tag."""
tag1, tag2 = tags.split(",")
el1 = etree.Element(tag1)
el2 = etree.Element(tag2)
text = m.group(2)
self.parse_sub_patterns(text, el2, None, idx)
el1.append(el2)
if len(m.groups()) == 3:
text = m.group(3)
self.parse_sub_patterns(text, el1, el2, idx)
return el1
def build_double2(self, m: re.Match[str], tags: str, idx: int) -> etree.Element:
"""Return double tags (variant 2): `text text`."""
tag1, tag2 = tags.split(",")
el1 = etree.Element(tag1)
el2 = etree.Element(tag2)
text = m.group(2)
self.parse_sub_patterns(text, el1, None, idx)
text = m.group(3)
el1.append(el2)
self.parse_sub_patterns(text, el2, None, idx)
return el1
def parse_sub_patterns(
self, data: str, parent: etree.Element, last: etree.Element | None, idx: int
) -> None:
"""
Parses sub patterns.
`data`: text to evaluate.
`parent`: Parent to attach text and sub elements to.
`last`: Last appended child to parent. Can also be None if parent has no children.
`idx`: Current pattern index that was used to evaluate the parent.
"""
offset = 0
pos = 0
length = len(data)
while pos < length:
# Find the start of potential emphasis or strong tokens
if self.compiled_re.match(data, pos):
matched = False
# See if the we can match an emphasis/strong pattern
for index, item in enumerate(self.PATTERNS):
# Only evaluate patterns that are after what was used on the parent
if index <= idx:
continue
m = item.pattern.match(data, pos)
if m:
# Append child nodes to parent
# Text nodes should be appended to the last
# child if present, and if not, it should
# be added as the parent's text node.
text = data[offset:m.start(0)]
if text:
if last is not None:
last.tail = text
else:
parent.text = text
el = self.build_element(m, item.builder, item.tags, index)
parent.append(el)
last = el
# Move our position past the matched hunk
offset = pos = m.end(0)
matched = True
if not matched:
# We matched nothing, move on to the next character
pos += 1
else:
# Increment position as no potential emphasis start was found.
pos += 1
# Append any leftover text as a text node.
text = data[offset:]
if text:
if last is not None:
last.tail = text
else:
parent.text = text
def build_element(self, m: re.Match[str], builder: str, tags: str, index: int) -> etree.Element:
"""Element builder."""
if builder == 'double2':
return self.build_double2(m, tags, index)
elif builder == 'double':
return self.build_double(m, tags, index)
else:
return self.build_single(m, tags, index)
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
"""Parse patterns."""
el = None
start = None
end = None
for index, item in enumerate(self.PATTERNS):
m1 = item.pattern.match(data, m.start(0))
if m1:
start = m1.start(0)
end = m1.end(0)
el = self.build_element(m1, item.builder, item.tags, index)
break
return el, start, end
class UnderscoreProcessor(AsteriskProcessor):
"""Emphasis processor for handling strong and em matches inside underscores."""
PATTERNS = [
EmStrongItem(re.compile(EM_STRONG2_RE, re.DOTALL | re.UNICODE), 'double', 'strong,em'),
EmStrongItem(re.compile(STRONG_EM2_RE, re.DOTALL | re.UNICODE), 'double', 'em,strong'),
EmStrongItem(re.compile(SMART_STRONG_EM_RE, re.DOTALL | re.UNICODE), 'double2', 'strong,em'),
EmStrongItem(re.compile(SMART_STRONG_RE, re.DOTALL | re.UNICODE), 'single', 'strong'),
EmStrongItem(re.compile(SMART_EMPHASIS_RE, re.DOTALL | re.UNICODE), 'single', 'em')
]
""" The various strong and emphasis patterns handled by this processor. """
class LinkInlineProcessor(InlineProcessor):
""" Return a link element from the given match. """
RE_LINK = re.compile(r'''\(\s*(?:(<[^<>]*>)\s*(?:('[^']*'|"[^"]*")\s*)?\))?''', re.DOTALL | re.UNICODE)
RE_TITLE_CLEAN = re.compile(r'\s')
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
""" Return an `a` [`Element`][xml.etree.ElementTree.Element] or `(None, None, None)`. """
text, index, handled = self.getText(data, m.end(0))
if not handled:
return None, None, None
href, title, index, handled = self.getLink(data, index)
if not handled:
return None, None, None
el = etree.Element("a")
el.text = text
el.set("href", href)
if title is not None:
el.set("title", title)
return el, m.start(0), index
def getLink(self, data: str, index: int) -> tuple[str, str | None, int, bool]:
"""Parse data between `()` of `[Text]()` allowing recursive `()`. """
href = ''
title: str | None = None
handled = False
m = self.RE_LINK.match(data, pos=index)
if m and m.group(1):
# Matches [Text]( "title")
href = m.group(1)[1:-1].strip()
if m.group(2):
title = m.group(2)[1:-1]
index = m.end(0)
handled = True
elif m:
# Track bracket nesting and index in string
bracket_count = 1
backtrack_count = 1
start_index = m.end()
index = start_index
last_bracket = -1
# Primary (first found) quote tracking.
quote: str | None = None
start_quote = -1
exit_quote = -1
ignore_matches = False
# Secondary (second found) quote tracking.
alt_quote = None
start_alt_quote = -1
exit_alt_quote = -1
# Track last character
last = ''
for pos in range(index, len(data)):
c = data[pos]
if c == '(':
# Count nested (
# Don't increment the bracket count if we are sure we're in a title.
if not ignore_matches:
bracket_count += 1
elif backtrack_count > 0:
backtrack_count -= 1
elif c == ')':
# Match nested ) to (
# Don't decrement if we are sure we are in a title that is unclosed.
if ((exit_quote != -1 and quote == last) or (exit_alt_quote != -1 and alt_quote == last)):
bracket_count = 0
elif not ignore_matches:
bracket_count -= 1
elif backtrack_count > 0:
backtrack_count -= 1
# We've found our backup end location if the title doesn't resolve.
if backtrack_count == 0:
last_bracket = index + 1
elif c in ("'", '"'):
# Quote has started
if not quote:
# We'll assume we are now in a title.
# Brackets are quoted, so no need to match them (except for the final one).
ignore_matches = True
backtrack_count = bracket_count
bracket_count = 1
start_quote = index + 1
quote = c
# Secondary quote (in case the first doesn't resolve): [text](link'"title")
elif c != quote and not alt_quote:
start_alt_quote = index + 1
alt_quote = c
# Update primary quote match
elif c == quote:
exit_quote = index + 1
# Update secondary quote match
elif alt_quote and c == alt_quote:
exit_alt_quote = index + 1
index += 1
# Link is closed, so let's break out of the loop
if bracket_count == 0:
# Get the title if we closed a title string right before link closed
if exit_quote >= 0 and quote == last:
href = data[start_index:start_quote - 1]
title = ''.join(data[start_quote:exit_quote - 1])
elif exit_alt_quote >= 0 and alt_quote == last:
href = data[start_index:start_alt_quote - 1]
title = ''.join(data[start_alt_quote:exit_alt_quote - 1])
else:
href = data[start_index:index - 1]
break
if c != ' ':
last = c
# We have a scenario: `[test](link"notitle)`
# When we enter a string, we stop tracking bracket resolution in the main counter,
# but we do keep a backup counter up until we discover where we might resolve all brackets
# if the title string fails to resolve.
if bracket_count != 0 and backtrack_count == 0:
href = data[start_index:last_bracket - 1]
index = last_bracket
bracket_count = 0
handled = bracket_count == 0
if title is not None:
title = self.RE_TITLE_CLEAN.sub(' ', dequote(self.unescape(title.strip())))
href = self.unescape(href).strip()
return href, title, index, handled
def getText(self, data: str, index: int) -> tuple[str, int, bool]:
"""Parse the content between `[]` of the start of an image or link
resolving nested square brackets.
"""
bracket_count = 1
text = []
for pos in range(index, len(data)):
c = data[pos]
if c == ']':
bracket_count -= 1
elif c == '[':
bracket_count += 1
index += 1
if bracket_count == 0:
break
text.append(c)
return ''.join(text), index, bracket_count == 0
class ImageInlineProcessor(LinkInlineProcessor):
""" Return a `img` element from the given match. """
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
""" Return an `img` [`Element`][xml.etree.ElementTree.Element] or `(None, None, None)`. """
text, index, handled = self.getText(data, m.end(0))
if not handled:
return None, None, None
src, title, index, handled = self.getLink(data, index)
if not handled:
return None, None, None
el = etree.Element("img")
el.set("src", src)
if title is not None:
el.set("title", title)
el.set('alt', self.unescape(text))
return el, m.start(0), index
class ReferenceInlineProcessor(LinkInlineProcessor):
""" Match to a stored reference and return link element. """
NEWLINE_CLEANUP_RE = re.compile(r'\s+', re.MULTILINE)
RE_LINK = re.compile(r'\s?\[([^\]]*)\]', re.DOTALL | re.UNICODE)
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element | None, int | None, int | None]:
"""
Return [`Element`][xml.etree.ElementTree.Element] returned by `makeTag` method or `(None, None, None)`.
"""
text, index, handled = self.getText(data, m.end(0))
if not handled:
return None, None, None
id, end, handled = self.evalId(data, index, text)
if not handled:
return None, None, None
# Clean up line breaks in id
id = self.NEWLINE_CLEANUP_RE.sub(' ', id)
if id not in self.md.references: # ignore undefined refs
return None, m.start(0), end
href, title = self.md.references[id]
return self.makeTag(href, title, text), m.start(0), end
def evalId(self, data: str, index: int, text: str) -> tuple[str | None, int, bool]:
"""
Evaluate the id portion of `[ref][id]`.
If `[ref][]` use `[ref]`.
"""
m = self.RE_LINK.match(data, pos=index)
if not m:
return None, index, False
else:
id = m.group(1).lower()
end = m.end(0)
if not id:
id = text.lower()
return id, end, True
def makeTag(self, href: str, title: str, text: str) -> etree.Element:
""" Return an `a` [`Element`][xml.etree.ElementTree.Element]. """
el = etree.Element('a')
el.set('href', href)
if title:
el.set('title', title)
el.text = text
return el
class ShortReferenceInlineProcessor(ReferenceInlineProcessor):
"""Short form of reference: `[google]`. """
def evalId(self, data: str, index: int, text: str) -> tuple[str, int, bool]:
"""Evaluate the id of `[ref]`. """
return text.lower(), index, True
class ImageReferenceInlineProcessor(ReferenceInlineProcessor):
""" Match to a stored reference and return `img` element. """
def makeTag(self, href: str, title: str, text: str) -> etree.Element:
""" Return an `img` [`Element`][xml.etree.ElementTree.Element]. """
el = etree.Element("img")
el.set("src", href)
if title:
el.set("title", title)
el.set("alt", self.unescape(text))
return el
class ShortImageReferenceInlineProcessor(ImageReferenceInlineProcessor):
""" Short form of image reference: `![ref]`. """
def evalId(self, data: str, index: int, text: str) -> tuple[str, int, bool]:
"""Evaluate the id of `[ref]`. """
return text.lower(), index, True
class AutolinkInlineProcessor(InlineProcessor):
""" Return a link Element given an auto-link (``). """
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]:
""" Return an `a` [`Element`][xml.etree.ElementTree.Element] of `group(1)`. """
el = etree.Element("a")
el.set('href', self.unescape(m.group(1)))
el.text = util.AtomicString(m.group(1))
return el, m.start(0), m.end(0)
class AutomailInlineProcessor(InlineProcessor):
"""
Return a `mailto` link Element given an auto-mail link (``).
"""
def handleMatch(self, m: re.Match[str], data: str) -> tuple[etree.Element, int, int]:
""" Return an [`Element`][xml.etree.ElementTree.Element] containing a `mailto` link of `group(1)`. """
el = etree.Element('a')
email = self.unescape(m.group(1))
if email.startswith("mailto:"):
email = email[len("mailto:"):]
def codepoint2name(code: int) -> str:
"""Return entity definition by code, or the code if not defined."""
entity = entities.codepoint2name.get(code)
if entity:
return "{}{};".format(util.AMP_SUBSTITUTE, entity)
else:
return "%s#%d;" % (util.AMP_SUBSTITUTE, code)
letters = [codepoint2name(ord(letter)) for letter in email]
el.text = util.AtomicString(''.join(letters))
mailto = "mailto:" + email
mailto = "".join([util.AMP_SUBSTITUTE + '#%d;' %
ord(letter) for letter in mailto])
el.set('href', mailto)
return el, m.start(0), m.end(0)
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1723823708.0
markdown-3.7/markdown/postprocessors.py 0000644 0001751 0000177 00000011307 14657673134 020140 0 ustar 00runner docker # Python Markdown
# A Python implementation of John Gruber's Markdown.
# Documentation: https://python-markdown.github.io/
# GitHub: https://github.com/Python-Markdown/markdown/
# PyPI: https://pypi.org/project/Markdown/
# Started by Manfred Stienstra (http://www.dwerg.net/).
# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
# Currently maintained by Waylan Limberg (https://github.com/waylan),
# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
# Copyright 2004 Manfred Stienstra (the original version)
# License: BSD (see LICENSE.md for details).
"""
Post-processors run on the text of the entire document after is has been serialized into a string.
Postprocessors should be used to work with the text just before output. Usually, they are used add
back sections that were extracted in a preprocessor, fix up outgoing encodings, or wrap the whole
document.
"""
from __future__ import annotations
from collections import OrderedDict
from typing import TYPE_CHECKING, Any
from . import util
import re
if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown
def build_postprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Postprocessor]:
""" Build the default postprocessors for Markdown. """
postprocessors = util.Registry()
postprocessors.register(RawHtmlPostprocessor(md), 'raw_html', 30)
postprocessors.register(AndSubstitutePostprocessor(), 'amp_substitute', 20)
return postprocessors
class Postprocessor(util.Processor):
"""
Postprocessors are run after the ElementTree it converted back into text.
Each Postprocessor implements a `run` method that takes a pointer to a
text string, modifies it as necessary and returns a text string.
Postprocessors must extend `Postprocessor`.
"""
def run(self, text: str) -> str:
"""
Subclasses of `Postprocessor` should implement a `run` method, which
takes the html document as a single text string and returns a
(possibly modified) string.
"""
pass # pragma: no cover
class RawHtmlPostprocessor(Postprocessor):
""" Restore raw html to the document. """
BLOCK_LEVEL_REGEX = re.compile(r'^\<\/?([^ >]+)')
def run(self, text: str) -> str:
""" Iterate over html stash and restore html. """
replacements = OrderedDict()
for i in range(self.md.htmlStash.html_counter):
html = self.stash_to_string(self.md.htmlStash.rawHtmlBlocks[i])
if self.isblocklevel(html):
replacements["{}
".format(
self.md.htmlStash.get_placeholder(i))] = html
replacements[self.md.htmlStash.get_placeholder(i)] = html
def substitute_match(m: re.Match[str]) -> str:
key = m.group(0)
if key not in replacements:
if key[3:-4] in replacements:
return f'{ replacements[key[3:-4]] }
'
else:
return key
return replacements[key]
if replacements:
base_placeholder = util.HTML_PLACEHOLDER % r'([0-9]+)'
pattern = re.compile(f'{ base_placeholder }
|{ base_placeholder }')
processed_text = pattern.sub(substitute_match, text)
else:
return text
if processed_text == text:
return processed_text
else:
return self.run(processed_text)
def isblocklevel(self, html: str) -> bool:
""" Check is block of HTML is block-level. """
m = self.BLOCK_LEVEL_REGEX.match(html)
if m:
if m.group(1)[0] in ('!', '?', '@', '%'):
# Comment, PHP etc...
return True
return self.md.is_block_level(m.group(1))
return False
def stash_to_string(self, text: str) -> str:
""" Convert a stashed object to a string. """
return str(text)
class AndSubstitutePostprocessor(Postprocessor):
""" Restore valid entities """
def run(self, text: str) -> str:
text = text.replace(util.AMP_SUBSTITUTE, "&")
return text
@util.deprecated(
"This class is deprecated and will be removed in the future; "
"use [`UnescapeTreeprocessor`][markdown.treeprocessors.UnescapeTreeprocessor] instead."
)
class UnescapePostprocessor(Postprocessor):
""" Restore escaped chars. """
RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX))
def unescape(self, m: re.Match[str]) -> str:
return chr(int(m.group(1)))
def run(self, text: str) -> str:
return self.RE.sub(self.unescape, text)
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1723823708.0
markdown-3.7/markdown/preprocessors.py 0000644 0001751 0000177 00000006230 14657673134 017740 0 ustar 00runner docker # Python Markdown
# A Python implementation of John Gruber's Markdown.
# Documentation: https://python-markdown.github.io/
# GitHub: https://github.com/Python-Markdown/markdown/
# PyPI: https://pypi.org/project/Markdown/
# Started by Manfred Stienstra (http://www.dwerg.net/).
# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
# Currently maintained by Waylan Limberg (https://github.com/waylan),
# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
# Copyright 2004 Manfred Stienstra (the original version)
# License: BSD (see LICENSE.md for details).
"""
Preprocessors work on source text before it is broken down into its individual parts.
This is an excellent place to clean up bad characters or to extract portions for later
processing that the parser may otherwise choke on.
"""
from __future__ import annotations
from typing import TYPE_CHECKING, Any
from . import util
from .htmlparser import HTMLExtractor
import re
if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown
def build_preprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Preprocessor]:
""" Build and return the default set of preprocessors used by Markdown. """
preprocessors = util.Registry()
preprocessors.register(NormalizeWhitespace(md), 'normalize_whitespace', 30)
preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20)
return preprocessors
class Preprocessor(util.Processor):
"""
Preprocessors are run after the text is broken into lines.
Each preprocessor implements a `run` method that takes a pointer to a
list of lines of the document, modifies it as necessary and returns
either the same pointer or a pointer to a new list.
Preprocessors must extend `Preprocessor`.
"""
def run(self, lines: list[str]) -> list[str]:
"""
Each subclass of `Preprocessor` should override the `run` method, which
takes the document as a list of strings split by newlines and returns
the (possibly modified) list of lines.
"""
pass # pragma: no cover
class NormalizeWhitespace(Preprocessor):
""" Normalize whitespace for consistent parsing. """
def run(self, lines: list[str]) -> list[str]:
source = '\n'.join(lines)
source = source.replace(util.STX, "").replace(util.ETX, "")
source = source.replace("\r\n", "\n").replace("\r", "\n") + "\n\n"
source = source.expandtabs(self.md.tab_length)
source = re.sub(r'(?<=\n) +\n', '\n', source)
return source.split('\n')
class HtmlBlockPreprocessor(Preprocessor):
"""
Remove html blocks from the text and store them for later retrieval.
The raw HTML is stored in the [`htmlStash`][markdown.util.HtmlStash] of the
[`Markdown`][markdown.Markdown] instance.
"""
def run(self, lines: list[str]) -> list[str]:
source = '\n'.join(lines)
parser = HTMLExtractor(self.md)
parser.feed(source)
parser.close()
return ''.join(parser.cleandoc).split('\n')
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1723823708.0
markdown-3.7/markdown/serializers.py 0000644 0001751 0000177 00000016006 14657673134 017365 0 ustar 00runner docker # Add x/html serialization to `Elementree`
# Taken from ElementTree 1.3 preview with slight modifications
#
# Copyright (c) 1999-2007 by Fredrik Lundh. All rights reserved.
#
# fredrik@pythonware.com
# https://www.pythonware.com/
#
# --------------------------------------------------------------------
# The ElementTree toolkit is
#
# Copyright (c) 1999-2007 by Fredrik Lundh
#
# By obtaining, using, and/or copying this software and/or its
# associated documentation, you agree that you have read, understood,
# and will comply with the following terms and conditions:
#
# Permission to use, copy, modify, and distribute this software and
# its associated documentation for any purpose and without fee is
# hereby granted, provided that the above copyright notice appears in
# all copies, and that both that copyright notice and this permission
# notice appear in supporting documentation, and that the name of
# Secret Labs AB or the author not be used in advertising or publicity
# pertaining to distribution of the software without specific, written
# prior permission.
#
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
# OF THIS SOFTWARE.
# --------------------------------------------------------------------
"""
Python-Markdown provides two serializers which render [`ElementTree.Element`][xml.etree.ElementTree.Element]
objects to a string of HTML. Both functions wrap the same underlying code with only a few minor
differences as outlined below:
1. Empty (self-closing) tags are rendered as `` for HTML and as ` ` for XHTML.
2. Boolean attributes are rendered as `attrname` for HTML and as `attrname="attrname"` for XHTML.
"""
from __future__ import annotations
from xml.etree.ElementTree import ProcessingInstruction
from xml.etree.ElementTree import Comment, ElementTree, Element, QName, HTML_EMPTY
import re
from typing import Callable, Literal, NoReturn
__all__ = ['to_html_string', 'to_xhtml_string']
RE_AMP = re.compile(r'&(?!(?:\#[0-9]+|\#x[0-9a-f]+|[0-9a-z]+);)', re.I)
def _raise_serialization_error(text: str) -> NoReturn: # pragma: no cover
raise TypeError(
"cannot serialize {!r} (type {})".format(text, type(text).__name__)
)
def _escape_cdata(text) -> str:
# escape character data
try:
# it's worth avoiding do-nothing calls for strings that are
# shorter than 500 character, or so. assume that's, by far,
# the most common case in most applications.
if "&" in text:
# Only replace & when not part of an entity
text = RE_AMP.sub('&', text)
if "<" in text:
text = text.replace("<", "<")
if ">" in text:
text = text.replace(">", ">")
return text
except (TypeError, AttributeError): # pragma: no cover
_raise_serialization_error(text)
def _escape_attrib(text: str) -> str:
# escape attribute value
try:
if "&" in text:
# Only replace & when not part of an entity
text = RE_AMP.sub('&', text)
if "<" in text:
text = text.replace("<", "<")
if ">" in text:
text = text.replace(">", ">")
if "\"" in text:
text = text.replace("\"", """)
if "\n" in text:
text = text.replace("\n", "
")
return text
except (TypeError, AttributeError): # pragma: no cover
_raise_serialization_error(text)
def _escape_attrib_html(text: str) -> str:
# escape attribute value
try:
if "&" in text:
# Only replace & when not part of an entity
text = RE_AMP.sub('&', text)
if "<" in text:
text = text.replace("<", "<")
if ">" in text:
text = text.replace(">", ">")
if "\"" in text:
text = text.replace("\"", """)
return text
except (TypeError, AttributeError): # pragma: no cover
_raise_serialization_error(text)
def _serialize_html(write: Callable[[str], None], elem: Element, format: Literal["html", "xhtml"]) -> None:
tag = elem.tag
text = elem.text
if tag is Comment:
write("" % _escape_cdata(text))
elif tag is ProcessingInstruction:
write("%s?>" % _escape_cdata(text))
elif tag is None:
if text:
write(_escape_cdata(text))
for e in elem:
_serialize_html(write, e, format)
else:
namespace_uri = None
if isinstance(tag, QName):
# `QNAME` objects store their data as a string: `{uri}tag`
if tag.text[:1] == "{":
namespace_uri, tag = tag.text[1:].split("}", 1)
else:
raise ValueError('QName objects must define a tag.')
write("<" + tag)
items = elem.items()
if items:
items = sorted(items) # lexical order
for k, v in items:
if isinstance(k, QName):
# Assume a text only `QName`
k = k.text
if isinstance(v, QName):
# Assume a text only `QName`
v = v.text
else:
v = _escape_attrib_html(v)
if k == v and format == 'html':
# handle boolean attributes
write(" %s" % v)
else:
write(' {}="{}"'.format(k, v))
if namespace_uri:
write(' xmlns="%s"' % (_escape_attrib(namespace_uri)))
if format == "xhtml" and tag.lower() in HTML_EMPTY:
write(" />")
else:
write(">")
if text:
if tag.lower() in ["script", "style"]:
write(text)
else:
write(_escape_cdata(text))
for e in elem:
_serialize_html(write, e, format)
if tag.lower() not in HTML_EMPTY:
write("" + tag + ">")
if elem.tail:
write(_escape_cdata(elem.tail))
def _write_html(root: Element, format: Literal["html", "xhtml"] = "html") -> str:
assert root is not None
data: list[str] = []
write = data.append
_serialize_html(write, root, format)
return "".join(data)
# --------------------------------------------------------------------
# public functions
def to_html_string(element: Element) -> str:
""" Serialize element and its children to a string of HTML5. """
return _write_html(ElementTree(element).getroot(), format="html")
def to_xhtml_string(element: Element) -> str:
""" Serialize element and its children to a string of XHTML. """
return _write_html(ElementTree(element).getroot(), format="xhtml")
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1723823708.0
markdown-3.7/markdown/test_tools.py 0000644 0001751 0000177 00000020726 14657673134 017234 0 ustar 00runner docker # Python Markdown
# A Python implementation of John Gruber's Markdown.
# Documentation: https://python-markdown.github.io/
# GitHub: https://github.com/Python-Markdown/markdown/
# PyPI: https://pypi.org/project/Markdown/
# Started by Manfred Stienstra (http://www.dwerg.net/).
# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
# Currently maintained by Waylan Limberg (https://github.com/waylan),
# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
# Copyright 2004 Manfred Stienstra (the original version)
# License: BSD (see LICENSE.md for details).
""" A collection of tools for testing the Markdown code base and extensions. """
from __future__ import annotations
import os
import sys
import unittest
import textwrap
from typing import Any
from . import markdown, Markdown, util
try:
import tidylib
except ImportError:
tidylib = None
__all__ = ['TestCase', 'LegacyTestCase', 'Kwargs']
class TestCase(unittest.TestCase):
"""
A [`unittest.TestCase`][] subclass with helpers for testing Markdown output.
Define `default_kwargs` as a `dict` of keywords to pass to Markdown for each
test. The defaults can be overridden on individual tests.
The `assertMarkdownRenders` method accepts the source text, the expected
output, and any keywords to pass to Markdown. The `default_kwargs` are used
except where overridden by `kwargs`. The output and expected output are passed
to `TestCase.assertMultiLineEqual`. An `AssertionError` is raised with a diff
if the actual output does not equal the expected output.
The `dedent` method is available to dedent triple-quoted strings if
necessary.
In all other respects, behaves as `unittest.TestCase`.
"""
default_kwargs: dict[str, Any] = {}
""" Default options to pass to Markdown for each test. """
def assertMarkdownRenders(self, source, expected, expected_attrs=None, **kwargs):
"""
Test that source Markdown text renders to expected output with given keywords.
`expected_attrs` accepts a `dict`. Each key should be the name of an attribute
on the `Markdown` instance and the value should be the expected value after
the source text is parsed by Markdown. After the expected output is tested,
the expected value for each attribute is compared against the actual
attribute of the `Markdown` instance using `TestCase.assertEqual`.
"""
expected_attrs = expected_attrs or {}
kws = self.default_kwargs.copy()
kws.update(kwargs)
md = Markdown(**kws)
output = md.convert(source)
self.assertMultiLineEqual(output, expected)
for key, value in expected_attrs.items():
self.assertEqual(getattr(md, key), value)
def dedent(self, text):
"""
Dedent text.
"""
# TODO: If/when actual output ends with a newline, then use:
# return textwrap.dedent(text.strip('/n'))
return textwrap.dedent(text).strip()
class recursionlimit:
"""
A context manager which temporarily modifies the Python recursion limit.
The testing framework, coverage, etc. may add an arbitrary number of levels to the depth. To maintain consistency
in the tests, the current stack depth is determined when called, then added to the provided limit.
Example usage:
``` python
with recursionlimit(20):
# test code here
```
See .
"""
def __init__(self, limit):
self.limit = util._get_stack_depth() + limit
self.old_limit = sys.getrecursionlimit()
def __enter__(self):
sys.setrecursionlimit(self.limit)
def __exit__(self, type, value, tb):
sys.setrecursionlimit(self.old_limit)
#########################
# Legacy Test Framework #
#########################
class Kwargs(dict):
""" A `dict` like class for holding keyword arguments. """
pass
def _normalize_whitespace(text):
""" Normalize whitespace for a string of HTML using `tidylib`. """
output, errors = tidylib.tidy_fragment(text, options={
'drop_empty_paras': 0,
'fix_backslash': 0,
'fix_bad_comments': 0,
'fix_uri': 0,
'join_styles': 0,
'lower_literals': 0,
'merge_divs': 0,
'output_xhtml': 1,
'quote_ampersand': 0,
'newline': 'LF'
})
return output
class LegacyTestMeta(type):
def __new__(cls, name, bases, dct):
def generate_test(infile, outfile, normalize, kwargs):
def test(self):
with open(infile, encoding="utf-8") as f:
input = f.read()
with open(outfile, encoding="utf-8") as f:
# Normalize line endings
# (on Windows, git may have altered line endings).
expected = f.read().replace("\r\n", "\n")
output = markdown(input, **kwargs)
if tidylib and normalize:
try:
expected = _normalize_whitespace(expected)
output = _normalize_whitespace(output)
except OSError:
self.skipTest("Tidylib's c library not available.")
elif normalize:
self.skipTest('Tidylib not available.')
self.assertMultiLineEqual(output, expected)
return test
location = dct.get('location', '')
exclude = dct.get('exclude', [])
normalize = dct.get('normalize', False)
input_ext = dct.get('input_ext', '.txt')
output_ext = dct.get('output_ext', '.html')
kwargs = dct.get('default_kwargs', Kwargs())
if os.path.isdir(location):
for file in os.listdir(location):
infile = os.path.join(location, file)
if os.path.isfile(infile):
tname, ext = os.path.splitext(file)
if ext == input_ext:
outfile = os.path.join(location, tname + output_ext)
tname = tname.replace(' ', '_').replace('-', '_')
kws = kwargs.copy()
if tname in dct:
kws.update(dct[tname])
test_name = 'test_%s' % tname
if tname not in exclude:
dct[test_name] = generate_test(infile, outfile, normalize, kws)
else:
dct[test_name] = unittest.skip('Excluded')(lambda: None)
return type.__new__(cls, name, bases, dct)
class LegacyTestCase(unittest.TestCase, metaclass=LegacyTestMeta):
"""
A [`unittest.TestCase`][] subclass for running Markdown's legacy file-based tests.
A subclass should define various properties which point to a directory of
text-based test files and define various behaviors/defaults for those tests.
The following properties are supported:
Attributes:
location (str): A path to the directory of test files. An absolute path is preferred.
exclude (list[str]): A list of tests to exclude. Each test name should comprise the filename
without an extension.
normalize (bool): A boolean value indicating if the HTML should be normalized. Default: `False`.
input_ext (str): A string containing the file extension of input files. Default: `.txt`.
output_ext (str): A string containing the file extension of expected output files. Default: `html`.
default_kwargs (Kwargs[str, Any]): The default set of keyword arguments for all test files in the directory.
In addition, properties can be defined for each individual set of test files within
the directory. The property should be given the name of the file without the file
extension. Any spaces and dashes in the filename should be replaced with
underscores. The value of the property should be a `Kwargs` instance which
contains the keyword arguments that should be passed to `Markdown` for that
test file. The keyword arguments will "update" the `default_kwargs`.
When the class instance is created, it will walk the given directory and create
a separate `Unitttest` for each set of test files using the naming scheme:
`test_filename`. One `Unittest` will be run for each set of input and output files.
"""
pass
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1723823708.0
markdown-3.7/markdown/treeprocessors.py 0000644 0001751 0000177 00000042363 14657673134 020120 0 ustar 00runner docker # Python Markdown
# A Python implementation of John Gruber's Markdown.
# Documentation: https://python-markdown.github.io/
# GitHub: https://github.com/Python-Markdown/markdown/
# PyPI: https://pypi.org/project/Markdown/
# Started by Manfred Stienstra (http://www.dwerg.net/).
# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
# Currently maintained by Waylan Limberg (https://github.com/waylan),
# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
# Copyright 2004 Manfred Stienstra (the original version)
# License: BSD (see LICENSE.md for details).
"""
Tree processors manipulate the tree created by block processors. They can even create an entirely
new `ElementTree` object. This is an excellent place for creating summaries, adding collected
references, or last minute adjustments.
"""
from __future__ import annotations
import re
import xml.etree.ElementTree as etree
from typing import TYPE_CHECKING, Any
from . import util
from . import inlinepatterns
if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown
def build_treeprocessors(md: Markdown, **kwargs: Any) -> util.Registry[Treeprocessor]:
""" Build the default `treeprocessors` for Markdown. """
treeprocessors = util.Registry()
treeprocessors.register(InlineProcessor(md), 'inline', 20)
treeprocessors.register(PrettifyTreeprocessor(md), 'prettify', 10)
treeprocessors.register(UnescapeTreeprocessor(md), 'unescape', 0)
return treeprocessors
def isString(s: object) -> bool:
""" Return `True` if object is a string but not an [`AtomicString`][markdown.util.AtomicString]. """
if not isinstance(s, util.AtomicString):
return isinstance(s, str)
return False
class Treeprocessor(util.Processor):
"""
`Treeprocessor`s are run on the `ElementTree` object before serialization.
Each `Treeprocessor` implements a `run` method that takes a pointer to an
`Element` and modifies it as necessary.
`Treeprocessors` must extend `markdown.Treeprocessor`.
"""
def run(self, root: etree.Element) -> etree.Element | None:
"""
Subclasses of `Treeprocessor` should implement a `run` method, which
takes a root `Element`. This method can return another `Element`
object, and the existing root `Element` will be replaced, or it can
modify the current tree and return `None`.
"""
pass # pragma: no cover
class InlineProcessor(Treeprocessor):
"""
A `Treeprocessor` that traverses a tree, applying inline patterns.
"""
def __init__(self, md: Markdown):
self.__placeholder_prefix = util.INLINE_PLACEHOLDER_PREFIX
self.__placeholder_suffix = util.ETX
self.__placeholder_length = 4 + len(self.__placeholder_prefix) \
+ len(self.__placeholder_suffix)
self.__placeholder_re = util.INLINE_PLACEHOLDER_RE
self.md = md
self.inlinePatterns = md.inlinePatterns
self.ancestors: list[str] = []
def __makePlaceholder(self, type: str) -> tuple[str, str]:
""" Generate a placeholder """
id = "%04d" % len(self.stashed_nodes)
hash = util.INLINE_PLACEHOLDER % id
return hash, id
def __findPlaceholder(self, data: str, index: int) -> tuple[str | None, int]:
"""
Extract id from data string, start from index.
Arguments:
data: String.
index: Index, from which we start search.
Returns:
Placeholder id and string index, after the found placeholder.
"""
m = self.__placeholder_re.search(data, index)
if m:
return m.group(1), m.end()
else:
return None, index + 1
def __stashNode(self, node: etree.Element | str, type: str) -> str:
""" Add node to stash. """
placeholder, id = self.__makePlaceholder(type)
self.stashed_nodes[id] = node
return placeholder
def __handleInline(self, data: str, patternIndex: int = 0) -> str:
"""
Process string with inline patterns and replace it with placeholders.
Arguments:
data: A line of Markdown text.
patternIndex: The index of the `inlinePattern` to start with.
Returns:
String with placeholders.
"""
if not isinstance(data, util.AtomicString):
startIndex = 0
count = len(self.inlinePatterns)
while patternIndex < count:
data, matched, startIndex = self.__applyPattern(
self.inlinePatterns[patternIndex], data, patternIndex, startIndex
)
if not matched:
patternIndex += 1
return data
def __processElementText(self, node: etree.Element, subnode: etree.Element, isText: bool = True) -> None:
"""
Process placeholders in `Element.text` or `Element.tail`
of Elements popped from `self.stashed_nodes`.
Arguments:
node: Parent node.
subnode: Processing node.
isText: Boolean variable, True - it's text, False - it's a tail.
"""
if isText:
text = subnode.text
subnode.text = None
else:
text = subnode.tail
subnode.tail = None
childResult = self.__processPlaceholders(text, subnode, isText)
if not isText and node is not subnode:
pos = list(node).index(subnode) + 1
else:
pos = 0
childResult.reverse()
for newChild in childResult:
node.insert(pos, newChild[0])
def __processPlaceholders(
self,
data: str | None,
parent: etree.Element,
isText: bool = True
) -> list[tuple[etree.Element, list[str]]]:
"""
Process string with placeholders and generate `ElementTree` tree.
Arguments:
data: String with placeholders instead of `ElementTree` elements.
parent: Element, which contains processing inline data.
isText: Boolean variable, True - it's text, False - it's a tail.
Returns:
List with `ElementTree` elements with applied inline patterns.
"""
def linkText(text: str | None) -> None:
if text:
if result:
if result[-1][0].tail:
result[-1][0].tail += text
else:
result[-1][0].tail = text
elif not isText:
if parent.tail:
parent.tail += text
else:
parent.tail = text
else:
if parent.text:
parent.text += text
else:
parent.text = text
result = []
strartIndex = 0
while data:
index = data.find(self.__placeholder_prefix, strartIndex)
if index != -1:
id, phEndIndex = self.__findPlaceholder(data, index)
if id in self.stashed_nodes:
node = self.stashed_nodes.get(id)
if index > 0:
text = data[strartIndex:index]
linkText(text)
if not isinstance(node, str): # it's Element
for child in [node] + list(node):
if child.tail:
if child.tail.strip():
self.__processElementText(
node, child, False
)
if child.text:
if child.text.strip():
self.__processElementText(child, child)
else: # it's just a string
linkText(node)
strartIndex = phEndIndex
continue
strartIndex = phEndIndex
result.append((node, self.ancestors[:]))
else: # wrong placeholder
end = index + len(self.__placeholder_prefix)
linkText(data[strartIndex:end])
strartIndex = end
else:
text = data[strartIndex:]
if isinstance(data, util.AtomicString):
# We don't want to loose the `AtomicString`
text = util.AtomicString(text)
linkText(text)
data = ""
return result
def __applyPattern(
self,
pattern: inlinepatterns.Pattern,
data: str,
patternIndex: int,
startIndex: int = 0
) -> tuple[str, bool, int]:
"""
Check if the line fits the pattern, create the necessary
elements, add it to `stashed_nodes`.
Arguments:
data: The text to be processed.
pattern: The pattern to be checked.
patternIndex: Index of current pattern.
startIndex: String index, from which we start searching.
Returns:
String with placeholders instead of `ElementTree` elements.
"""
new_style = isinstance(pattern, inlinepatterns.InlineProcessor)
for exclude in pattern.ANCESTOR_EXCLUDES:
if exclude.lower() in self.ancestors:
return data, False, 0
if new_style:
match = None
# Since `handleMatch` may reject our first match,
# we iterate over the buffer looking for matches
# until we can't find any more.
for match in pattern.getCompiledRegExp().finditer(data, startIndex):
node, start, end = pattern.handleMatch(match, data)
if start is None or end is None:
startIndex += match.end(0)
match = None
continue
break
else: # pragma: no cover
match = pattern.getCompiledRegExp().match(data[startIndex:])
leftData = data[:startIndex]
if not match:
return data, False, 0
if not new_style: # pragma: no cover
node = pattern.handleMatch(match)
start = match.start(0)
end = match.end(0)
if node is None:
return data, True, end
if not isinstance(node, str):
if not isinstance(node.text, util.AtomicString):
# We need to process current node too
for child in [node] + list(node):
if not isString(node):
if child.text:
self.ancestors.append(child.tag.lower())
child.text = self.__handleInline(
child.text, patternIndex + 1
)
self.ancestors.pop()
if child.tail:
child.tail = self.__handleInline(
child.tail, patternIndex
)
placeholder = self.__stashNode(node, pattern.type())
if new_style:
return "{}{}{}".format(data[:start],
placeholder, data[end:]), True, 0
else: # pragma: no cover
return "{}{}{}{}".format(leftData,
match.group(1),
placeholder, match.groups()[-1]), True, 0
def __build_ancestors(self, parent: etree.Element | None, parents: list[str]) -> None:
"""Build the ancestor list."""
ancestors = []
while parent is not None:
if parent is not None:
ancestors.append(parent.tag.lower())
parent = self.parent_map.get(parent)
ancestors.reverse()
parents.extend(ancestors)
def run(self, tree: etree.Element, ancestors: list[str] | None = None) -> etree.Element:
"""Apply inline patterns to a parsed Markdown tree.
Iterate over `Element`, find elements with inline tag, apply inline
patterns and append newly created Elements to tree. To avoid further
processing of string with inline patterns, instead of normal string,
use subclass [`AtomicString`][markdown.util.AtomicString]:
node.text = markdown.util.AtomicString("This will not be processed.")
Arguments:
tree: `Element` object, representing Markdown tree.
ancestors: List of parent tag names that precede the tree node (if needed).
Returns:
An element tree object with applied inline patterns.
"""
self.stashed_nodes: dict[str, etree.Element | str] = {}
# Ensure a valid parent list, but copy passed in lists
# to ensure we don't have the user accidentally change it on us.
tree_parents = [] if ancestors is None else ancestors[:]
self.parent_map = {c: p for p in tree.iter() for c in p}
stack = [(tree, tree_parents)]
while stack:
currElement, parents = stack.pop()
self.ancestors = parents
self.__build_ancestors(currElement, self.ancestors)
insertQueue = []
for child in currElement:
if child.text and not isinstance(
child.text, util.AtomicString
):
self.ancestors.append(child.tag.lower())
text = child.text
child.text = None
lst = self.__processPlaceholders(
self.__handleInline(text), child
)
for item in lst:
self.parent_map[item[0]] = child
stack += lst
insertQueue.append((child, lst))
self.ancestors.pop()
if child.tail:
tail = self.__handleInline(child.tail)
dumby = etree.Element('d')
child.tail = None
tailResult = self.__processPlaceholders(tail, dumby, False)
if dumby.tail:
child.tail = dumby.tail
pos = list(currElement).index(child) + 1
tailResult.reverse()
for newChild in tailResult:
self.parent_map[newChild[0]] = currElement
currElement.insert(pos, newChild[0])
if len(child):
self.parent_map[child] = currElement
stack.append((child, self.ancestors[:]))
for element, lst in insertQueue:
for i, obj in enumerate(lst):
newChild = obj[0]
element.insert(i, newChild)
return tree
class PrettifyTreeprocessor(Treeprocessor):
""" Add line breaks to the html document. """
def _prettifyETree(self, elem: etree.Element) -> None:
""" Recursively add line breaks to `ElementTree` children. """
i = "\n"
if self.md.is_block_level(elem.tag) and elem.tag not in ['code', 'pre']:
if (not elem.text or not elem.text.strip()) \
and len(elem) and self.md.is_block_level(elem[0].tag):
elem.text = i
for e in elem:
if self.md.is_block_level(e.tag):
self._prettifyETree(e)
if not elem.tail or not elem.tail.strip():
elem.tail = i
def run(self, root: etree.Element) -> None:
""" Add line breaks to `Element` object and its children. """
self._prettifyETree(root)
# Do `
`'s separately as they are often in the middle of
# inline content and missed by `_prettifyETree`.
brs = root.iter('br')
for br in brs:
if not br.tail or not br.tail.strip():
br.tail = '\n'
else:
br.tail = '\n%s' % br.tail
# Clean up extra empty lines at end of code blocks.
pres = root.iter('pre')
for pre in pres:
if len(pre) and pre[0].tag == 'code':
code = pre[0]
# Only prettify code containing text only
if not len(code) and code.text is not None:
code.text = util.AtomicString(code.text.rstrip() + '\n')
class UnescapeTreeprocessor(Treeprocessor):
""" Restore escaped chars """
RE = re.compile(r'{}(\d+){}'.format(util.STX, util.ETX))
def _unescape(self, m: re.Match[str]) -> str:
return chr(int(m.group(1)))
def unescape(self, text: str) -> str:
return self.RE.sub(self._unescape, text)
def run(self, root: etree.Element) -> None:
""" Loop over all elements and unescape all text. """
for elem in root.iter():
# Unescape text content
if elem.text and not elem.tag == 'code':
elem.text = self.unescape(elem.text)
# Unescape tail content
if elem.tail:
elem.tail = self.unescape(elem.tail)
# Unescape attribute values
for key, value in elem.items():
elem.set(key, self.unescape(value))
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1723823708.0
markdown-3.7/markdown/util.py 0000644 0001751 0000177 00000033151 14657673134 016006 0 ustar 00runner docker # Python Markdown
# A Python implementation of John Gruber's Markdown.
# Documentation: https://python-markdown.github.io/
# GitHub: https://github.com/Python-Markdown/markdown/
# PyPI: https://pypi.org/project/Markdown/
# Started by Manfred Stienstra (http://www.dwerg.net/).
# Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
# Currently maintained by Waylan Limberg (https://github.com/waylan),
# Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
# Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
# Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
# Copyright 2004 Manfred Stienstra (the original version)
# License: BSD (see LICENSE.md for details).
"""
This module contains various contacts, classes and functions which get referenced and used
throughout the code base.
"""
from __future__ import annotations
import re
import sys
import warnings
from functools import wraps, lru_cache
from itertools import count
from typing import TYPE_CHECKING, Generic, Iterator, NamedTuple, TypeVar, TypedDict, overload
if TYPE_CHECKING: # pragma: no cover
from markdown import Markdown
import xml.etree.ElementTree as etree
_T = TypeVar('_T')
"""
Constants you might want to modify
-----------------------------------------------------------------------------
"""
BLOCK_LEVEL_ELEMENTS: list[str] = [
# Elements which are invalid to wrap in a `` tag.
# See https://w3c.github.io/html/grouping-content.html#the-p-element
'address', 'article', 'aside', 'blockquote', 'details', 'div', 'dl',
'fieldset', 'figcaption', 'figure', 'footer', 'form', 'h1', 'h2', 'h3',
'h4', 'h5', 'h6', 'header', 'hgroup', 'hr', 'main', 'menu', 'nav', 'ol',
'p', 'pre', 'section', 'table', 'ul',
# Other elements which Markdown should not be mucking up the contents of.
'canvas', 'colgroup', 'dd', 'body', 'dt', 'group', 'html', 'iframe', 'li', 'legend',
'math', 'map', 'noscript', 'output', 'object', 'option', 'progress', 'script',
'style', 'summary', 'tbody', 'td', 'textarea', 'tfoot', 'th', 'thead', 'tr', 'video'
]
"""
List of HTML tags which get treated as block-level elements. Same as the `block_level_elements`
attribute of the [`Markdown`][markdown.Markdown] class. Generally one should use the
attribute on the class. This remains for compatibility with older extensions.
"""
# Placeholders
STX = '\u0002'
""" "Start of Text" marker for placeholder templates. """
ETX = '\u0003'
""" "End of Text" marker for placeholder templates. """
INLINE_PLACEHOLDER_PREFIX = STX+"klzzwxh:"
""" Prefix for inline placeholder template. """
INLINE_PLACEHOLDER = INLINE_PLACEHOLDER_PREFIX + "%s" + ETX
""" Placeholder template for stashed inline text. """
INLINE_PLACEHOLDER_RE = re.compile(INLINE_PLACEHOLDER % r'([0-9]+)')
""" Regular Expression which matches inline placeholders. """
AMP_SUBSTITUTE = STX+"amp"+ETX
""" Placeholder template for HTML entities. """
HTML_PLACEHOLDER = STX + "wzxhzdk:%s" + ETX
""" Placeholder template for raw HTML. """
HTML_PLACEHOLDER_RE = re.compile(HTML_PLACEHOLDER % r'([0-9]+)')
""" Regular expression which matches HTML placeholders. """
TAG_PLACEHOLDER = STX + "hzzhzkh:%s" + ETX
""" Placeholder template for tags. """
# Constants you probably do not need to change
# -----------------------------------------------------------------------------
RTL_BIDI_RANGES = (
('\u0590', '\u07FF'),
# Hebrew (0590-05FF), Arabic (0600-06FF),
# Syriac (0700-074F), Arabic supplement (0750-077F),
# Thaana (0780-07BF), Nko (07C0-07FF).
('\u2D30', '\u2D7F') # Tifinagh
)
# AUXILIARY GLOBAL FUNCTIONS
# =============================================================================
@lru_cache(maxsize=None)
def get_installed_extensions():
""" Return all entry_points in the `markdown.extensions` group. """
if sys.version_info >= (3, 10):
from importlib import metadata
else: # ` bool | None:
"""Parses a string representing a boolean value. If parsing was successful,
returns `True` or `False`. If `preserve_none=True`, returns `True`, `False`,
or `None`. If parsing was not successful, raises `ValueError`, or, if
`fail_on_errors=False`, returns `None`."""
if not isinstance(value, str):
if preserve_none and value is None:
return value
return bool(value)
elif preserve_none and value.lower() == 'none':
return None
elif value.lower() in ('true', 'yes', 'y', 'on', '1'):
return True
elif value.lower() in ('false', 'no', 'n', 'off', '0', 'none'):
return False
elif fail_on_errors:
raise ValueError('Cannot parse bool value: %r' % value)
def code_escape(text: str) -> str:
"""HTML escape a string of code."""
if "&" in text:
text = text.replace("&", "&")
if "<" in text:
text = text.replace("<", "<")
if ">" in text:
text = text.replace(">", ">")
return text
def _get_stack_depth(size: int = 2) -> int:
"""Get current stack depth, performantly.
"""
frame = sys._getframe(size)
for size in count(size):
frame = frame.f_back
if not frame:
return size
def nearing_recursion_limit() -> bool:
"""Return true if current stack depth is within 100 of maximum limit."""
return sys.getrecursionlimit() - _get_stack_depth() < 100
# MISC AUXILIARY CLASSES
# =============================================================================
class AtomicString(str):
"""A string which should not be further processed."""
pass
class Processor:
""" The base class for all processors.
Attributes:
Processor.md: The `Markdown` instance passed in an initialization.
Arguments:
md: The `Markdown` instance this processor is a part of.
"""
def __init__(self, md: Markdown | None = None):
self.md = md
if TYPE_CHECKING: # pragma: no cover
class TagData(TypedDict):
tag: str
attrs: dict[str, str]
left_index: int
right_index: int
class HtmlStash:
"""
This class is used for stashing HTML objects that we extract
in the beginning and replace with place-holders.
"""
def __init__(self):
""" Create an `HtmlStash`. """
self.html_counter = 0 # for counting inline html segments
self.rawHtmlBlocks: list[str | etree.Element] = []
self.tag_counter = 0
self.tag_data: list[TagData] = [] # list of dictionaries in the order tags appear
def store(self, html: str | etree.Element) -> str:
"""
Saves an HTML segment for later reinsertion. Returns a
placeholder string that needs to be inserted into the
document.
Keyword arguments:
html: An html segment.
Returns:
A placeholder string.
"""
self.rawHtmlBlocks.append(html)
placeholder = self.get_placeholder(self.html_counter)
self.html_counter += 1
return placeholder
def reset(self) -> None:
""" Clear the stash. """
self.html_counter = 0
self.rawHtmlBlocks = []
def get_placeholder(self, key: int) -> str:
return HTML_PLACEHOLDER % key
def store_tag(self, tag: str, attrs: dict[str, str], left_index: int, right_index: int) -> str:
"""Store tag data and return a placeholder."""
self.tag_data.append({'tag': tag, 'attrs': attrs,
'left_index': left_index,
'right_index': right_index})
placeholder = TAG_PLACEHOLDER % str(self.tag_counter)
self.tag_counter += 1 # equal to the tag's index in `self.tag_data`
return placeholder
# Used internally by `Registry` for each item in its sorted list.
# Provides an easier to read API when editing the code later.
# For example, `item.name` is more clear than `item[0]`.
class _PriorityItem(NamedTuple):
name: str
priority: float
class Registry(Generic[_T]):
"""
A priority sorted registry.
A `Registry` instance provides two public methods to alter the data of the
registry: `register` and `deregister`. Use `register` to add items and
`deregister` to remove items. See each method for specifics.
When registering an item, a "name" and a "priority" must be provided. All
items are automatically sorted by "priority" from highest to lowest. The
"name" is used to remove ("deregister") and get items.
A `Registry` instance it like a list (which maintains order) when reading
data. You may iterate over the items, get an item and get a count (length)
of all items. You may also check that the registry contains an item.
When getting an item you may use either the index of the item or the
string-based "name". For example:
registry = Registry()
registry.register(SomeItem(), 'itemname', 20)
# Get the item by index
item = registry[0]
# Get the item by name
item = registry['itemname']
When checking that the registry contains an item, you may use either the
string-based "name", or a reference to the actual item. For example:
someitem = SomeItem()
registry.register(someitem, 'itemname', 20)
# Contains the name
assert 'itemname' in registry
# Contains the item instance
assert someitem in registry
The method `get_index_for_name` is also available to obtain the index of
an item using that item's assigned "name".
"""
def __init__(self):
self._data: dict[str, _T] = {}
self._priority: list[_PriorityItem] = []
self._is_sorted = False
def __contains__(self, item: str | _T) -> bool:
if isinstance(item, str):
# Check if an item exists by this name.
return item in self._data.keys()
# Check if this instance exists.
return item in self._data.values()
def __iter__(self) -> Iterator[_T]:
self._sort()
return iter([self._data[k] for k, p in self._priority])
@overload
def __getitem__(self, key: str | int) -> _T: # pragma: no cover
...
@overload
def __getitem__(self, key: slice) -> Registry[_T]: # pragma: no cover
...
def __getitem__(self, key: str | int | slice) -> _T | Registry[_T]:
self._sort()
if isinstance(key, slice):
data: Registry[_T] = Registry()
for k, p in self._priority[key]:
data.register(self._data[k], k, p)
return data
if isinstance(key, int):
return self._data[self._priority[key].name]
return self._data[key]
def __len__(self) -> int:
return len(self._priority)
def __repr__(self):
return '<{}({})>'.format(self.__class__.__name__, list(self))
def get_index_for_name(self, name: str) -> int:
"""
Return the index of the given name.
"""
if name in self:
self._sort()
return self._priority.index(
[x for x in self._priority if x.name == name][0]
)
raise ValueError('No item named "{}" exists.'.format(name))
def register(self, item: _T, name: str, priority: float) -> None:
"""
Add an item to the registry with the given name and priority.
Arguments:
item: The item being registered.
name: A string used to reference the item.
priority: An integer or float used to sort against all items.
If an item is registered with a "name" which already exists, the
existing item is replaced with the new item. Treat carefully as the
old item is lost with no way to recover it. The new item will be
sorted according to its priority and will **not** retain the position
of the old item.
"""
if name in self:
# Remove existing item of same name first
self.deregister(name)
self._is_sorted = False
self._data[name] = item
self._priority.append(_PriorityItem(name, priority))
def deregister(self, name: str, strict: bool = True) -> None:
"""
Remove an item from the registry.
Set `strict=False` to fail silently. Otherwise a [`ValueError`][] is raised for an unknown `name`.
"""
try:
index = self.get_index_for_name(name)
del self._priority[index]
del self._data[name]
except ValueError:
if strict:
raise
def _sort(self) -> None:
"""
Sort the registry by priority from highest to lowest.
This method is called internally and should never be explicitly called.
"""
if not self._is_sorted:
self._priority.sort(key=lambda item: item.priority, reverse=True)
self._is_sorted = True
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1723823708.0
markdown-3.7/mkdocs.yml 0000644 0001751 0000177 00000006735 14657673134 014650 0 ustar 00runner docker site_name: Python-Markdown
site_url: https://Python-Markdown.github.io/
repo_url: https://github.com/Python-Markdown/markdown
site_author: "The Python-Markdown Project"
copyright: "Copyright © 2010-2023"
use_directory_urls: true
watch: [markdown, scripts]
theme:
name: nature
icon: py.png
release: !!python/name:markdown.__version__
issue_tracker: https://github.com/Python-Markdown/markdown/issues
extra_css:
- custom.css
- mkdocstrings.css
nav:
- Python-Markdown: index.md
- Installation: install.md
- Library Reference: reference.md
- Command Line: cli.md
- Extensions: extensions/index.md
- Officially Supported Extensions:
- Abbreviations: extensions/abbreviations.md
- Admonition: extensions/admonition.md
- Attribute Lists: extensions/attr_list.md
- CodeHilite: extensions/code_hilite.md
- Definition Lists: extensions/definition_lists.md
- Extra: extensions/extra.md
- Fenced Code Blocks: extensions/fenced_code_blocks.md
- Footnotes: extensions/footnotes.md
- Legacy Attributes: extensions/legacy_attrs.md
- Legacy Emphasis: extensions/legacy_em.md
- Meta-Data: extensions/meta_data.md
- New Line to Break: extensions/nl2br.md
- Markdown in HTML: extensions/md_in_html.md
- Sane Lists: extensions/sane_lists.md
- SmartyPants: extensions/smarty.md
- Table of Contents: extensions/toc.md
- Tables: extensions/tables.md
- WikiLinks: extensions/wikilinks.md
- Extension API: extensions/api.md
- Test Tools: test_tools.md
- API Reference: reference/
- Contributing to Python-Markdown: contributing.md
- Changelog: changelog.md
- Authors: authors.md
not_in_nav: |
change_log/
markdown_extensions:
- extra
- admonition
- smarty
- codehilite
- toc:
permalink: true
- mdx_gh_links:
user: Python-Markdown
repo: markdown
plugins:
- search
- gen-files:
scripts:
- scripts/gen_ref_nav.py
- literate-nav:
nav_file: SUMMARY.md
- section-index
- mkdocstrings:
custom_templates: docs/templates
handlers:
python:
import:
- https://docs.python.org/3/objects.inv
options:
annotations_path: brief
docstring_options:
ignore_init_summary: true
docstring_style: google
docstring_section_style: list
extensions:
- scripts/griffe_extensions.py:DeprecatedExtension
- scripts/griffe_extensions.py:PriorityTableExtension:
paths:
- markdown.preprocessors.build_preprocessors
- markdown.blockprocessors.build_block_parser
- markdown.treeprocessors.build_treeprocessors
- markdown.inlinepatterns.build_inlinepatterns
- markdown.postprocessors.build_postprocessors
filters: ["!^_"]
group_by_category: false
heading_level: 1
inherited_members: true
members_order: source
merge_init_into_class: true
separate_signature: false
show_root_heading: true
show_object_full_path: true
show_signature_annotations: true
show_source: false
show_symbol_type_heading: true
show_symbol_type_toc: false
signature_crossrefs: false
summary: true
source:
repo: https://github.com/Python-Markdown/markdown
tag: !!python/name:markdown.__version__
title: "View source code on GitHub."
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1723823708.0
markdown-3.7/pyproject.toml 0000644 0001751 0000177 00000007300 14657673134 015546 0 ustar 00runner docker [build-system]
# Minimum requirements for the build system to execute.
requires = ["setuptools>=61.2"]
build-backend = "setuptools.build_meta"
[project]
name = 'Markdown'
dynamic = ['version']
description = "Python implementation of John Gruber's Markdown."
readme = {file = 'README.md', content-type='text/markdown'}
authors = [
{name = 'Manfred Stienstra'},
{name = 'Yuri Takhteyev'},
{name = 'Waylan limberg', email = 'python.markdown@gmail.com'}
]
maintainers = [
{name = 'Waylan Limberg', email = 'python.markdown@gmail.com'},
{name = 'Isaac Muse'}
]
license = {file = 'LICENSE.md'}
requires-python = '>=3.8'
dependencies = [
"importlib-metadata>=4.4;python_version<'3.10'"
]
keywords = ['markdown', 'markdown-parser', 'python-markdown', 'markdown-to-html']
classifiers = [
'Development Status :: 5 - Production/Stable',
'License :: OSI Approved :: BSD License',
'Operating System :: OS Independent',
'Programming Language :: Python',
'Programming Language :: Python :: 3',
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Programming Language :: Python :: 3.10',
'Programming Language :: Python :: 3.11',
'Programming Language :: Python :: 3.12',
'Programming Language :: Python :: 3 :: Only',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',
'Topic :: Communications :: Email :: Filters',
'Topic :: Internet :: WWW/HTTP :: Dynamic Content :: CGI Tools/Libraries',
'Topic :: Internet :: WWW/HTTP :: Site Management',
'Topic :: Software Development :: Documentation',
'Topic :: Software Development :: Libraries :: Python Modules',
'Topic :: Text Processing :: Filters',
'Topic :: Text Processing :: Markup :: HTML',
'Topic :: Text Processing :: Markup :: Markdown'
]
[project.optional-dependencies]
testing = [
'coverage',
'pyyaml'
]
docs = [
'mkdocs>=1.5',
'mkdocs-nature>=0.6',
'mdx_gh_links>=0.2',
"mkdocstrings[python]",
"mkdocs-gen-files",
"mkdocs-section-index",
"mkdocs-literate-nav",
]
[project.urls]
'Homepage' = 'https://Python-Markdown.github.io/'
'Documentation' = 'https://Python-Markdown.github.io/'
'Repository' = 'https://github.com/Python-Markdown/markdown'
'Issue Tracker' = 'https://github.com/Python-Markdown/markdown/issues'
'Changelog' = 'https://python-markdown.github.io/changelog/'
[project.scripts]
markdown_py = 'markdown.__main__:run'
[project.entry-points.'markdown.extensions']
abbr = 'markdown.extensions.abbr:AbbrExtension'
admonition = 'markdown.extensions.admonition:AdmonitionExtension'
attr_list = 'markdown.extensions.attr_list:AttrListExtension'
codehilite = 'markdown.extensions.codehilite:CodeHiliteExtension'
def_list = 'markdown.extensions.def_list:DefListExtension'
extra = 'markdown.extensions.extra:ExtraExtension'
fenced_code = 'markdown.extensions.fenced_code:FencedCodeExtension'
footnotes = 'markdown.extensions.footnotes:FootnoteExtension'
md_in_html = 'markdown.extensions.md_in_html:MarkdownInHtmlExtension'
meta = 'markdown.extensions.meta:MetaExtension'
nl2br = 'markdown.extensions.nl2br:Nl2BrExtension'
sane_lists = 'markdown.extensions.sane_lists:SaneListExtension'
smarty = 'markdown.extensions.smarty:SmartyExtension'
tables = 'markdown.extensions.tables:TableExtension'
toc = 'markdown.extensions.toc:TocExtension'
wikilinks = 'markdown.extensions.wikilinks:WikiLinkExtension'
legacy_attrs = 'markdown.extensions.legacy_attrs:LegacyAttrExtension'
legacy_em = 'markdown.extensions.legacy_em:LegacyEmExtension'
[tool.setuptools]
packages = ['markdown', 'markdown.extensions']
[tool.setuptools.dynamic]
version = {attr = 'markdown.__meta__.__version__'}
././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 010212 x ustar 00 28 mtime=1723823713.0597374
markdown-3.7/scripts/ 0000755 0001751 0000177 00000000000 14657673141 014317 5 ustar 00runner docker ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1723823708.0
markdown-3.7/scripts/gen_ref_nav.py 0000755 0001751 0000177 00000004347 14657673134 017157 0 ustar 00runner docker """Generate the code reference pages and navigation."""
import textwrap
import yaml
from pathlib import Path
import mkdocs_gen_files
nav = mkdocs_gen_files.Nav()
per_module_options = {
"markdown": {"summary": {"attributes": True, "functions": True, "classes": True}}
}
base_path = Path(__file__).resolve().parent.parent
modules = [
base_path.joinpath("markdown", "__init__.py"),
base_path.joinpath("markdown", "preprocessors.py"),
base_path.joinpath("markdown", "blockparser.py"),
base_path.joinpath("markdown", "blockprocessors.py"),
base_path.joinpath("markdown", "treeprocessors.py"),
base_path.joinpath("markdown", "inlinepatterns.py"),
base_path.joinpath("markdown", "postprocessors.py"),
base_path.joinpath("markdown", "serializers.py"),
base_path.joinpath("markdown", "util.py"),
base_path.joinpath("markdown", "htmlparser.py"),
base_path.joinpath("markdown", "test_tools.py"),
*sorted(base_path.joinpath("markdown", "extensions").rglob("*.py")),
]
for src_path in modules:
path = src_path.relative_to(base_path)
module_path = path.with_suffix("")
doc_path = path.with_suffix(".md")
full_doc_path = Path("reference", doc_path)
parts = tuple(module_path.parts)
if parts[-1] == "__init__":
parts = parts[:-1]
doc_path = doc_path.with_name("index.md")
full_doc_path = full_doc_path.with_name("index.md")
elif parts[-1].startswith("_"):
continue
nav_parts = [f"{part}
" for part in parts]
nav[nav_parts] = doc_path.as_posix()
with mkdocs_gen_files.open(full_doc_path, "w") as fd:
ident = ".".join(parts)
fd.write(f"::: {ident}")
if ident in per_module_options:
yaml_options = yaml.dump({"options": per_module_options[ident]})
fd.write(f"\n{textwrap.indent(yaml_options, prefix=' ')}")
elif ident.startswith("markdown.extensions."):
yaml_options = yaml.dump({"options": {"inherited_members": False}})
fd.write(f"\n{textwrap.indent(yaml_options, prefix=' ')}")
mkdocs_gen_files.set_edit_path(full_doc_path, ".." / path)
with mkdocs_gen_files.open("reference/SUMMARY.md", "w") as nav_file:
nav_file.writelines(nav.build_literate_nav())
././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1723823708.0
markdown-3.7/scripts/griffe_extensions.py 0000644 0001751 0000177 00000010032 14657673134 020410 0 ustar 00runner docker """Griffe extensions."""
from __future__ import annotations
import ast
from typing import TYPE_CHECKING, Any
import textwrap
from griffe import Docstring, Extension, DocstringSectionAdmonition, DocstringSectionText, Visitor, Inspector
if TYPE_CHECKING:
from griffe import Class, Function, ObjectNode
def _deprecated(obj: Class | Function) -> str | None:
for decorator in obj.decorators:
if decorator.callable_path == "markdown.util.deprecated":
return ast.literal_eval(str(decorator.value.arguments[0]))
return None
class DeprecatedExtension(Extension):
"""Griffe extension for `@markdown.util.deprecated` decorator support."""
def _insert_message(self, obj: Function | Class, message: str) -> None:
if not obj.docstring:
obj.docstring = Docstring("", parent=obj)
sections = obj.docstring.parsed
sections.insert(0, DocstringSectionAdmonition(kind="warning", text=message, title="Deprecated"))
def on_class_instance(self, node: ast.AST | ObjectNode, cls: Class, agent: Visitor | Inspector, **kwargs: Any) -> None: # noqa: ARG002
"""Add section to docstrings of deprecated classes."""
if message := _deprecated(cls):
self._insert_message(cls, message)
cls.labels.add("deprecated")
def on_function_instance(self, node: ast.AST | ObjectNode, func: Function, agent: Visitor | Inspector, **kwargs: Any) -> None: # noqa: ARG002
"""Add section to docstrings of deprecated functions."""
if message := _deprecated(func):
self._insert_message(func, message)
func.labels.add("deprecated")
class PriorityTableExtension(Extension):
""" Griffe extension to insert a table of processor priority in specified functions. """
def __init__(self, paths: list[str] | None = None) -> None:
super().__init__()
self.paths = paths
def linked_obj(self, value: str, path: str) -> str:
""" Wrap object name in reference link. """
return f'[`{value}`][{path}.{value}]'
def on_function_instance(self, node: ast.AST | ObjectNode, func: Function, agent: Visitor | Inspector, **kwargs: Any) -> None: # noqa: ARG002
"""Add table to specified function docstrings."""
if self.paths and func.path not in self.paths:
return # skip objects that were not selected
# Table header
data = [
'Class Instance | Name | Priority',
'-------------- | ---- | :------:'
]
# Extract table body from source code of function.
for obj in node.body:
# Extract the arguments passed to `util.Registry.register`.
if isinstance(obj, ast.Expr) and isinstance(obj.value, ast.Call) and obj.value.func.attr == 'register':
_args = obj.value.args
cls = self.linked_obj(_args[0].func.id, func.path.rsplit('.', 1)[0])
name = _args[1].value
priority = str(_args[2].value)
if func.name == ('build_inlinepatterns'):
# Include Pattern: first arg passed to class
if isinstance(_args[0].args[0], ast.Constant):
# Pattern is a string
value = f'`"{_args[0].args[0].value}"`'
else:
# Pattern is a variable
value = self.linked_obj(_args[0].args[0].id, func.path.rsplit('.', 1)[0])
cls = f'{cls}({value})'
data.append(f'{cls} | `{name}` | `{priority}`')
table = '\n'.join(data)
body = (
f"Return a [`{func.returns.canonical_name}`][{func.returns.canonical_path}] instance which contains "
"the following collection of classes with their assigned names and priorities.\n\n"
f"{table}"
)
# Add to docstring.
if not func.docstring:
func.docstring = Docstring("", parent=func)
sections = func.docstring.parsed
sections.append(DocstringSectionText(body, title="Priority Table"))
././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 010212 x ustar 00 28 mtime=1723823713.1077383
markdown-3.7/setup.cfg 0000644 0001751 0000177 00000000046 14657673141 014451 0 ustar 00runner docker [egg_info]
tag_build =
tag_date = 0
././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 010212 x ustar 00 28 mtime=1723823713.0597374
markdown-3.7/tests/ 0000755 0001751 0000177 00000000000 14657673141 013772 5 ustar 00runner docker ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1723823708.0
markdown-3.7/tests/__init__.py 0000644 0001751 0000177 00000001341 14657673134 016104 0 ustar 00runner docker """
Python Markdown
A Python implementation of John Gruber's Markdown.
Documentation: https://python-markdown.github.io/
GitHub: https://github.com/Python-Markdown/markdown/
PyPI: https://pypi.org/project/Markdown/
Started by Manfred Stienstra (http://www.dwerg.net/).
Maintained for a few years by Yuri Takhteyev (http://www.freewisdom.org).
Currently maintained by Waylan Limberg (https://github.com/waylan),
Dmitry Shachnev (https://github.com/mitya57) and Isaac Muse (https://github.com/facelessuser).
Copyright 2007-2023 The Python Markdown Project (v. 1.7 and later)
Copyright 2004, 2005, 2006 Yuri Takhteyev (v. 0.2-1.6b)
Copyright 2004 Manfred Stienstra (the original version)
License: BSD (see LICENSE.md for details).
"""
././@PaxHeader 0000000 0000000 0000000 00000000034 00000000000 010212 x ustar 00 28 mtime=1723823713.0637374
markdown-3.7/tests/basic/ 0000755 0001751 0000177 00000000000 14657673141 015053 5 ustar 00runner docker ././@PaxHeader 0000000 0000000 0000000 00000000026 00000000000 010213 x ustar 00 22 mtime=1723823708.0
markdown-3.7/tests/basic/amps-and-angle-encoding.html 0000644 0001751 0000177 00000000763 14657673134 022321 0 ustar 00runner docker AT&T has an ampersand in their name.
AT&T is another way to write it.
This & that.
4 < 5.
6 > 5.
Here's a link with an ampersand in the URL.
Here's a link with an amersand in the link text: AT&T.
Here's an inline link.
Here's an inline link.