Eliminado venv y www del repositorio, agrege un requirements igual

This commit is contained in:
2020-11-22 21:14:46 -03:00
parent 18cf2d335a
commit 199a1e2a61
820 changed files with 15495 additions and 22017 deletions

View File

@@ -75,15 +75,18 @@ class Extension:
md = args[0]
try:
self.extendMarkdown(md)
except TypeError:
# Must be a 2.x extension. Pass in a dumby md_globals.
self.extendMarkdown(md, {})
warnings.warn(
"The 'md_globals' parameter of '{}.{}.extendMarkdown' is "
"deprecated.".format(self.__class__.__module__, self.__class__.__name__),
category=DeprecationWarning,
stacklevel=2
)
except TypeError as e:
if "missing 1 required positional argument" in str(e):
# Must be a 2.x extension. Pass in a dumby md_globals.
self.extendMarkdown(md, {})
warnings.warn(
"The 'md_globals' parameter of '{}.{}.extendMarkdown' is "
"deprecated.".format(self.__class__.__module__, self.__class__.__name__),
category=DeprecationWarning,
stacklevel=2
)
else:
raise
def extendMarkdown(self, md):
"""

View File

@@ -17,48 +17,53 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
'''
from . import Extension
from ..preprocessors import Preprocessor
from ..blockprocessors import BlockProcessor
from ..inlinepatterns import InlineProcessor
from ..util import AtomicString
import re
import xml.etree.ElementTree as etree
# Global Vars
ABBR_REF_RE = re.compile(r'[*]\[(?P<abbr>[^\]]*)\][ ]?:\s*(?P<title>.*)')
class AbbrExtension(Extension):
""" Abbreviation Extension for Python-Markdown. """
def extendMarkdown(self, md):
""" Insert AbbrPreprocessor before ReferencePreprocessor. """
md.preprocessors.register(AbbrPreprocessor(md), 'abbr', 12)
md.parser.blockprocessors.register(AbbrPreprocessor(md.parser), 'abbr', 16)
class AbbrPreprocessor(Preprocessor):
class AbbrPreprocessor(BlockProcessor):
""" Abbreviation Preprocessor - parse text for abbr references. """
def run(self, lines):
RE = re.compile(r'^[*]\[(?P<abbr>[^\]]*)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
def test(self, parent, block):
return True
def run(self, parent, blocks):
'''
Find and remove all Abbreviation references from the text.
Each reference is set as a new AbbrPattern in the markdown instance.
'''
new_text = []
for line in lines:
m = ABBR_REF_RE.match(line)
if m:
abbr = m.group('abbr').strip()
title = m.group('title').strip()
self.md.inlinePatterns.register(
AbbrInlineProcessor(self._generate_pattern(abbr), title), 'abbr-%s' % abbr, 2
)
# Preserve the line to prevent raw HTML indexing issue.
# https://github.com/Python-Markdown/markdown/issues/584
new_text.append('')
else:
new_text.append(line)
return new_text
block = blocks.pop(0)
m = self.RE.search(block)
if m:
abbr = m.group('abbr').strip()
title = m.group('title').strip()
self.parser.md.inlinePatterns.register(
AbbrInlineProcessor(self._generate_pattern(abbr), title), 'abbr-%s' % abbr, 2
)
if block[m.end():].strip():
# Add any content after match back to blocks as separate block
blocks.insert(0, block[m.end():].lstrip('\n'))
if block[:m.start()].strip():
# Add any content before match back to blocks as separate block
blocks.insert(0, block[:m.start()].rstrip('\n'))
return True
# No match. Restore block.
blocks.insert(0, block)
return False
def _generate_pattern(self, text):
'''

View File

@@ -40,19 +40,82 @@ class AdmonitionProcessor(BlockProcessor):
RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)')
RE_SPACES = re.compile(' +')
def test(self, parent, block):
def __init__(self, parser):
"""Initialization."""
super().__init__(parser)
self.current_sibling = None
self.content_indention = 0
def get_sibling(self, parent, block):
"""Get sibling admontion.
Retrieve the appropriate siblimg element. This can get trickly when
dealing with lists.
"""
# We already acquired the block via test
if self.current_sibling is not None:
sibling = self.current_sibling
block = block[self.content_indent:]
self.current_sibling = None
self.content_indent = 0
return sibling, block
sibling = self.lastChild(parent)
return self.RE.search(block) or \
(block.startswith(' ' * self.tab_length) and sibling is not None and
sibling.get('class', '').find(self.CLASSNAME) != -1)
if sibling is None or sibling.get('class', '').find(self.CLASSNAME) == -1:
sibling = None
else:
# If the last child is a list and the content is idented sufficient
# to be under it, then the content's is sibling is in the list.
last_child = self.lastChild(sibling)
indent = 0
while last_child:
if (
sibling and block.startswith(' ' * self.tab_length * 2) and
last_child and last_child.tag in ('ul', 'ol', 'dl')
):
# The expectation is that we'll find an <li> or <dt>.
# We should get it's last child as well.
sibling = self.lastChild(last_child)
last_child = self.lastChild(sibling) if sibling else None
# Context has been lost at this point, so we must adjust the
# text's identation level so it will be evaluated correctly
# under the list.
block = block[self.tab_length:]
indent += self.tab_length
else:
last_child = None
if not block.startswith(' ' * self.tab_length):
sibling = None
if sibling is not None:
self.current_sibling = sibling
self.content_indent = indent
return sibling, block
def test(self, parent, block):
if self.RE.search(block):
return True
else:
return self.get_sibling(parent, block)[0] is not None
def run(self, parent, blocks):
sibling = self.lastChild(parent)
block = blocks.pop(0)
m = self.RE.search(block)
if m:
block = block[m.end():] # removes the first line
else:
sibling, block = self.get_sibling(parent, block)
block, theRest = self.detab(block)
@@ -65,6 +128,13 @@ class AdmonitionProcessor(BlockProcessor):
p.text = title
p.set('class', self.CLASSNAME_TITLE)
else:
# Sibling is a list item, but we need to wrap it's content should be wrapped in <p>
if sibling.tag in ('li', 'dd') and sibling.text:
text = sibling.text
sibling.text = ''
p = etree.SubElement(sibling, 'p')
p.text = text
div = sibling
self.parser.parseChunk(div, block)

View File

@@ -64,10 +64,10 @@ def isheader(elem):
class AttrListTreeprocessor(Treeprocessor):
BASE_RE = r'\{\:?([^\}\n]*)\}'
HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE)
BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE)
INLINE_RE = re.compile(r'^%s' % BASE_RE)
BASE_RE = r'\{\:?[ ]*([^\}\n ][^\}\n]*)[ ]*\}'
HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
@@ -79,8 +79,8 @@ class AttrListTreeprocessor(Treeprocessor):
if self.md.is_block_level(elem.tag):
# Block level: check for attrs on last line of text
RE = self.BLOCK_RE
if isheader(elem) or elem.tag == 'dt':
# header or def-term: check for attrs at end of line
if isheader(elem) or elem.tag in ['dt', 'td', 'th']:
# header, def-term, or table cell: check for attrs at end of element
RE = self.HEADER_RE
if len(elem) and elem.tag == 'li':
# special case list items. children may include a ul or ol.
@@ -120,8 +120,6 @@ class AttrListTreeprocessor(Treeprocessor):
elif elem.text:
# no children. Get from text.
m = RE.search(elem.text)
if not m and elem.tag == 'td':
m = re.search(self.BASE_RE, elem.text)
if m:
self.assign_attrs(elem, m.group(1))
elem.text = elem.text[:m.start()]
@@ -161,6 +159,7 @@ class AttrListTreeprocessor(Treeprocessor):
class AttrListExtension(Extension):
def extendMarkdown(self, md):
md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
md.registerExtension(self)
def makeExtension(**kwargs): # pragma: no cover

View File

@@ -17,13 +17,14 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
from . import Extension
from ..treeprocessors import Treeprocessor
from ..util import parseBoolValue
try:
try: # pragma: no cover
from pygments import highlight
from pygments.lexers import get_lexer_by_name, guess_lexer
from pygments.formatters import get_formatter_by_name
pygments = True
except ImportError:
except ImportError: # pragma: no cover
pygments = False
@@ -38,52 +39,78 @@ def parse_hl_lines(expr):
try:
return list(map(int, expr.split()))
except ValueError:
except ValueError: # pragma: no cover
return []
# ------------------ The Main CodeHilite Class ----------------------
class CodeHilite:
"""
Determine language of source code, and pass it into pygments hilighter.
Determine language of source code, and pass it on to the Pygments highlighter.
Basic Usage:
>>> code = CodeHilite(src = 'some text')
>>> html = code.hilite()
Usage:
code = CodeHilite(src=some_code, lang='python')
html = code.hilite()
Arguments:
* src: Source string or any object with a .readline attribute.
* linenums: (Boolean) Set line numbering to 'on' (True),
'off' (False) or 'auto'(None). Set to 'auto' by default.
* lang: String name of Pygments lexer to use for highlighting. Default: `None`.
* guess_lang: (Boolean) Turn language auto-detection
'on' or 'off' (on by default).
* guess_lang: Auto-detect which lexer to use. Ignored if `lang` is set to a valid
value. Default: `True`.
* css_class: Set class name of wrapper div ('codehilite' by default).
* use_pygments: Pass code to pygments for code highlighting. If `False`, the code is
instead wrapped for highlighting by a JavaScript library. Default: `True`.
* hl_lines: (List of integers) Lines to emphasize, 1-indexed.
* linenums: An alias to Pygments `linenos` formatter option. Default: `None`.
Low Level Usage:
>>> code = CodeHilite()
>>> code.src = 'some text' # String or anything with a .readline attr.
>>> code.linenos = True # Turns line numbering on or of.
>>> html = code.hilite()
* css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'.
* lang_prefix: Prefix prepended to the language when `use_pygments` is `False`.
Default: "language-".
Other Options:
Any other options are accepted and passed on to the lexer and formatter. Therefore,
valid options include any options which are accepted by the `html` formatter or
whichever lexer the code's language uses. Note that most lexers do not have any
options. However, a few have very useful options, such as PHP's `startinline` option.
Any invalid options are ignored without error.
Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter
Lexer Options: https://pygments.org/docs/lexers/
Advanced Usage:
code = CodeHilite(
src = some_code,
lang = 'php',
startinline = True, # Lexer option. Snippet does not start with `<?php`.
linenostart = 42, # Formatter option. Snippet starts on line 42.
hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50.
linenos = 'inline' # Formatter option. Avoid alignment problems.
)
html = code.hilite()
"""
def __init__(self, src=None, linenums=None, guess_lang=True,
css_class="codehilite", lang=None, style='default',
noclasses=False, tab_length=4, hl_lines=None, use_pygments=True):
def __init__(self, src, **options):
self.src = src
self.lang = lang
self.linenums = linenums
self.guess_lang = guess_lang
self.css_class = css_class
self.style = style
self.noclasses = noclasses
self.tab_length = tab_length
self.hl_lines = hl_lines or []
self.use_pygments = use_pygments
self.lang = options.pop('lang', None)
self.guess_lang = options.pop('guess_lang', True)
self.use_pygments = options.pop('use_pygments', True)
self.lang_prefix = options.pop('lang_prefix', 'language-')
if 'linenos' not in options:
options['linenos'] = options.pop('linenums', None)
if 'cssclass' not in options:
options['cssclass'] = options.pop('css_class', 'codehilite')
if 'wrapcode' not in options:
# Override pygments default
options['wrapcode'] = True
# Disallow use of `full` option
options['full'] = False
self.options = options
def hilite(self):
"""
@@ -103,22 +130,16 @@ class CodeHilite:
if pygments and self.use_pygments:
try:
lexer = get_lexer_by_name(self.lang)
lexer = get_lexer_by_name(self.lang, **self.options)
except ValueError:
try:
if self.guess_lang:
lexer = guess_lexer(self.src)
lexer = guess_lexer(self.src, **self.options)
else:
lexer = get_lexer_by_name('text')
except ValueError:
lexer = get_lexer_by_name('text')
formatter = get_formatter_by_name('html',
linenos=self.linenums,
cssclass=self.css_class,
style=self.style,
noclasses=self.noclasses,
hl_lines=self.hl_lines,
wrapcode=True)
lexer = get_lexer_by_name('text', **self.options)
except ValueError: # pragma: no cover
lexer = get_lexer_by_name('text', **self.options)
formatter = get_formatter_by_name('html', **self.options)
return highlight(self.src, lexer, formatter)
else:
# just escape and build markup usable by JS highlighting libs
@@ -128,27 +149,30 @@ class CodeHilite:
txt = txt.replace('"', '&quot;')
classes = []
if self.lang:
classes.append('language-%s' % self.lang)
if self.linenums:
classes.append('{}{}'.format(self.lang_prefix, self.lang))
if self.options['linenos']:
classes.append('linenums')
class_str = ''
if classes:
class_str = ' class="%s"' % ' '.join(classes)
return '<pre class="%s"><code%s>%s</code></pre>\n' % \
(self.css_class, class_str, txt)
class_str = ' class="{}"'.format(' '.join(classes))
return '<pre class="{}"><code{}>{}\n</code></pre>\n'.format(
self.options['cssclass'],
class_str,
txt
)
def _parseHeader(self):
"""
Determines language of a code block from shebang line and whether said
line should be removed or left in place. If the sheband line contains a
path (even a single /) then it is assumed to be a real shebang line and
left alone. However, if no path is given (e.i.: #!python or :::python)
then it is assumed to be a mock shebang for language identifitation of
a code fragment and removed from the code block prior to processing for
code highlighting. When a mock shebang (e.i: #!python) is found, line
numbering is turned on. When colons are found in place of a shebang
(e.i.: :::python), line numbering is left in the current state - off
by default.
Determines language of a code block from shebang line and whether the
said line should be removed or left in place. If the sheband line
contains a path (even a single /) then it is assumed to be a real
shebang line and left alone. However, if no path is given
(e.i.: #!python or :::python) then it is assumed to be a mock shebang
for language identification of a code fragment and removed from the
code block prior to processing for code highlighting. When a mock
shebang (e.i: #!python) is found, line numbering is turned on. When
colons are found in place of a shebang (e.i.: :::python), line
numbering is left in the current state - off by default.
Also parses optional list of highlight lines, like:
@@ -176,16 +200,16 @@ class CodeHilite:
# we have a match
try:
self.lang = m.group('lang').lower()
except IndexError:
except IndexError: # pragma: no cover
self.lang = None
if m.group('path'):
# path exists - restore first line
lines.insert(0, fl)
if self.linenums is None and m.group('shebang'):
if self.options['linenos'] is None and m.group('shebang'):
# Overridable and Shebang exists - use line numbers
self.linenums = True
self.options['linenos'] = True
self.hl_lines = parse_hl_lines(m.group('hl_lines'))
self.options['hl_lines'] = parse_hl_lines(m.group('hl_lines'))
else:
# No match
lines.insert(0, fl)
@@ -201,9 +225,11 @@ class HiliteTreeprocessor(Treeprocessor):
def code_unescape(self, text):
"""Unescape code."""
text = text.replace("&amp;", "&")
text = text.replace("&lt;", "<")
text = text.replace("&gt;", ">")
# Escaped '&' should be replaced at the end to avoid
# conflicting with < and >.
text = text.replace("&amp;", "&")
return text
def run(self, root):
@@ -213,13 +239,9 @@ class HiliteTreeprocessor(Treeprocessor):
if len(block) == 1 and block[0].tag == 'code':
code = CodeHilite(
self.code_unescape(block[0].text),
linenums=self.config['linenums'],
guess_lang=self.config['guess_lang'],
css_class=self.config['css_class'],
style=self.config['pygments_style'],
noclasses=self.config['noclasses'],
tab_length=self.md.tab_length,
use_pygments=self.config['use_pygments']
style=self.config.pop('pygments_style', 'default'),
**self.config
)
placeholder = self.md.htmlStash.store(code.hilite())
# Clear codeblock in etree instance
@@ -237,7 +259,7 @@ class CodeHiliteExtension(Extension):
# define default configs
self.config = {
'linenums': [None,
"Use lines numbers. True=yes, False=no, None=auto"],
"Use lines numbers. True|table|inline=yes, False=no, None=auto"],
'guess_lang': [True,
"Automatic language detection - Default: True"],
'css_class': ["codehilite",
@@ -252,10 +274,25 @@ class CodeHiliteExtension(Extension):
'use_pygments': [True,
'Use Pygments to Highlight code blocks. '
'Disable if using a JavaScript library. '
'Default: True']
'Default: True'],
'lang_prefix': [
'language-',
'Prefix prepended to the language when use_pygments is false. Default: "language-"'
]
}
super().__init__(**kwargs)
for key, value in kwargs.items():
if key in self.config:
self.setConfig(key, value)
else:
# manually set unknown keywords.
if isinstance(value, str):
try:
# Attempt to parse str as a bool value
value = parseBoolValue(value, preserve_none=True)
except ValueError:
pass # Assume it's not a bool value. Use as-is.
self.config[key] = [value, '']
def extendMarkdown(self, md):
""" Add HilitePostprocessor to Markdown instance. """

View File

@@ -34,8 +34,8 @@ class DefListProcessor(BlockProcessor):
raw_block = blocks.pop(0)
m = self.RE.search(raw_block)
terms = [l.strip() for l in
raw_block[:m.start()].split('\n') if l.strip()]
terms = [term.strip() for term in
raw_block[:m.start()].split('\n') if term.strip()]
block = raw_block[m.end():]
no_indent = self.NO_INDENT_RE.match(block)
if no_indent:
@@ -87,11 +87,13 @@ class DefListProcessor(BlockProcessor):
class DefListIndentProcessor(ListIndentProcessor):
""" Process indented children of definition list items. """
ITEM_TYPES = ['dd']
LIST_TYPES = ['dl']
# Defintion lists need to be aware of all list types
ITEM_TYPES = ['dd', 'li']
LIST_TYPES = ['dl', 'ol', 'ul']
def create_item(self, parent, block):
""" Create a new dd and parse the block with it as the parent. """
""" Create a new dd or li (depending on parent) and parse the block with it as the parent. """
dd = etree.SubElement(parent, 'dd')
self.parser.parseBlocks(dd, [block])

View File

@@ -15,78 +15,130 @@ All changes Copyright 2008-2014 The Python Markdown Project
License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
from textwrap import dedent
from . import Extension
from ..preprocessors import Preprocessor
from .codehilite import CodeHilite, CodeHiliteExtension, parse_hl_lines
from .attr_list import get_attrs, AttrListExtension
from ..util import parseBoolValue
import re
class FencedCodeExtension(Extension):
def __init__(self, **kwargs):
self.config = {
'lang_prefix': ['language-', 'Prefix prepended to the language. Default: "language-"']
}
super().__init__(**kwargs)
def extendMarkdown(self, md):
""" Add FencedBlockPreprocessor to the Markdown instance. """
md.registerExtension(self)
md.preprocessors.register(FencedBlockPreprocessor(md), 'fenced_code_block', 25)
md.preprocessors.register(FencedBlockPreprocessor(md, self.getConfigs()), 'fenced_code_block', 25)
class FencedBlockPreprocessor(Preprocessor):
FENCED_BLOCK_RE = re.compile(r'''
(?P<fence>^(?:~{3,}|`{3,}))[ ]* # Opening ``` or ~~~
(\{?\.?(?P<lang>[\w#.+-]*))?[ ]* # Optional {, and lang
# Optional highlight lines, single- or double-quote-delimited
(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?[ ]*
}?[ ]*\n # Optional closing }
(?P<code>.*?)(?<=\n)
(?P=fence)[ ]*$''', re.MULTILINE | re.DOTALL | re.VERBOSE)
CODE_WRAP = '<pre><code%s>%s</code></pre>'
LANG_TAG = ' class="%s"'
FENCED_BLOCK_RE = re.compile(
dedent(r'''
(?P<fence>^(?:~{3,}|`{3,}))[ ]* # opening fence
((\{(?P<attrs>[^\}\n]*)\})?| # (optional {attrs} or
(\.?(?P<lang>[\w#.+-]*))?[ ]* # optional (.)lang
(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?) # optional hl_lines)
[ ]*\n # newline (end of opening fence)
(?P<code>.*?)(?<=\n) # the code block
(?P=fence)[ ]*$ # closing fence
'''),
re.MULTILINE | re.DOTALL | re.VERBOSE
)
def __init__(self, md):
def __init__(self, md, config):
super().__init__(md)
self.checked_for_codehilite = False
self.config = config
self.checked_for_deps = False
self.codehilite_conf = {}
self.use_attr_list = False
# List of options to convert to bool values
self.bool_options = [
'linenums',
'guess_lang',
'noclasses',
'use_pygments'
]
def run(self, lines):
""" Match and store Fenced Code Blocks in the HtmlStash. """
# Check for code hilite extension
if not self.checked_for_codehilite:
# Check for dependent extensions
if not self.checked_for_deps:
for ext in self.md.registeredExtensions:
if isinstance(ext, CodeHiliteExtension):
self.codehilite_conf = ext.config
break
self.codehilite_conf = ext.getConfigs()
if isinstance(ext, AttrListExtension):
self.use_attr_list = True
self.checked_for_codehilite = True
self.checked_for_deps = True
text = "\n".join(lines)
while 1:
m = self.FENCED_BLOCK_RE.search(text)
if m:
lang = ''
if m.group('lang'):
lang = self.LANG_TAG % m.group('lang')
lang, id, classes, config = None, '', [], {}
if m.group('attrs'):
id, classes, config = self.handle_attrs(get_attrs(m.group('attrs')))
if len(classes):
lang = classes.pop(0)
else:
if m.group('lang'):
lang = m.group('lang')
if m.group('hl_lines'):
# Support hl_lines outside of attrs for backward-compatibility
config['hl_lines'] = parse_hl_lines(m.group('hl_lines'))
# If config is not empty, then the codehighlite extension
# is enabled, so we call it to highlight the code
if self.codehilite_conf:
if self.codehilite_conf and self.codehilite_conf['use_pygments'] and config.get('use_pygments', True):
local_config = self.codehilite_conf.copy()
local_config.update(config)
# Combine classes with cssclass. Ensure cssclass is at end
# as pygments appends a suffix under certain circumstances.
# Ignore ID as Pygments does not offer an option to set it.
if classes:
local_config['css_class'] = '{} {}'.format(
' '.join(classes),
local_config['css_class']
)
highliter = CodeHilite(
m.group('code'),
linenums=self.codehilite_conf['linenums'][0],
guess_lang=self.codehilite_conf['guess_lang'][0],
css_class=self.codehilite_conf['css_class'][0],
style=self.codehilite_conf['pygments_style'][0],
use_pygments=self.codehilite_conf['use_pygments'][0],
lang=(m.group('lang') or None),
noclasses=self.codehilite_conf['noclasses'][0],
hl_lines=parse_hl_lines(m.group('hl_lines'))
lang=lang,
style=local_config.pop('pygments_style', 'default'),
**local_config
)
code = highliter.hilite()
else:
code = self.CODE_WRAP % (lang,
self._escape(m.group('code')))
id_attr = lang_attr = class_attr = kv_pairs = ''
if lang:
lang_attr = ' class="{}{}"'.format(self.config.get('lang_prefix', 'language-'), lang)
if classes:
class_attr = ' class="{}"'.format(' '.join(classes))
if id:
id_attr = ' id="{}"'.format(id)
if self.use_attr_list and config and not config.get('use_pygments', False):
# Only assign key/value pairs to code element if attr_list ext is enabled, key/value pairs
# were defined on the code block, and the `use_pygments` key was not set to True. The
# `use_pygments` key could be either set to False or not defined. It is omitted from output.
kv_pairs = ' ' + ' '.join(
'{k}="{v}"'.format(k=k, v=v) for k, v in config.items() if k != 'use_pygments'
)
code = '<pre{id}{cls}><code{lang}{kv}>{code}</code></pre>'.format(
id=id_attr,
cls=class_attr,
lang=lang_attr,
kv=kv_pairs,
code=self._escape(m.group('code'))
)
placeholder = self.md.htmlStash.store(code)
text = '{}\n{}\n{}'.format(text[:m.start()],
@@ -96,6 +148,24 @@ class FencedBlockPreprocessor(Preprocessor):
break
return text.split("\n")
def handle_attrs(self, attrs):
""" Return tuple: (id, [list, of, classes], {configs}) """
id = ''
classes = []
configs = {}
for k, v in attrs:
if k == 'id':
id = v
elif k == '.':
classes.append(v)
elif k == 'hl_lines':
configs[k] = parse_hl_lines(v)
elif k in self.bool_options:
configs[k] = parseBoolValue(v, fail_on_errors=False, preserve_none=True)
else:
configs[k] = v
return id, classes, configs
def _escape(self, txt):
""" basic html escaping """
txt = txt.replace('&', '&amp;')

View File

@@ -14,7 +14,7 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
"""
from . import Extension
from ..preprocessors import Preprocessor
from ..blockprocessors import BlockProcessor
from ..inlinepatterns import InlineProcessor
from ..treeprocessors import Treeprocessor
from ..postprocessors import Postprocessor
@@ -26,8 +26,6 @@ import xml.etree.ElementTree as etree
FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX
NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX
DEF_RE = re.compile(r'[ ]{0,3}\[\^([^\]]*)\]:\s*(.*)')
TABBED_RE = re.compile(r'((\t)|( ))(.*)')
RE_REF_ID = re.compile(r'(fnref)(\d+)')
@@ -72,8 +70,8 @@ class FootnoteExtension(Extension):
md.registerExtension(self)
self.parser = md.parser
self.md = md
# Insert a preprocessor before ReferencePreprocessor
md.preprocessors.register(FootnotePreprocessor(self), 'footnote', 15)
# Insert a blockprocessor before ReferencePreprocessor
md.parser.blockprocessors.register(FootnoteBlockProcessor(self), 'footnote', 17)
# Insert an inline pattern before ImageReferencePattern
FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
@@ -202,106 +200,92 @@ class FootnoteExtension(Extension):
return div
class FootnotePreprocessor(Preprocessor):
class FootnoteBlockProcessor(BlockProcessor):
""" Find all footnote references and store for later use. """
RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE)
def __init__(self, footnotes):
super().__init__(footnotes.parser)
self.footnotes = footnotes
def run(self, lines):
"""
Loop through lines and find, set, and remove footnote definitions.
def test(self, parent, block):
return True
Keywords:
def run(self, parent, blocks):
""" Find, set, and remove footnote definitions. """
block = blocks.pop(0)
m = self.RE.search(block)
if m:
id = m.group(1)
fn_blocks = [m.group(2)]
* lines: A list of lines of text
Return: A list of lines of text with footnote definitions removed.
"""
newlines = []
i = 0
while True:
m = DEF_RE.match(lines[i])
if m:
fn, _i = self.detectTabbed(lines[i+1:])
fn.insert(0, m.group(2))
i += _i-1 # skip past footnote
footnote = "\n".join(fn)
self.footnotes.setFootnote(m.group(1), footnote.rstrip())
# Preserve a line for each block to prevent raw HTML indexing issue.
# https://github.com/Python-Markdown/markdown/issues/584
num_blocks = (len(footnote.split('\n\n')) * 2)
newlines.extend([''] * (num_blocks))
# Handle rest of block
therest = block[m.end():].lstrip('\n')
m2 = self.RE.search(therest)
if m2:
# Another footnote exists in the rest of this block.
# Any content before match is continuation of this footnote, which may be lazily indented.
before = therest[:m2.start()].rstrip('\n')
fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(before)]).lstrip('\n')
# Add back to blocks everything from begining of match forward for next iteration.
blocks.insert(0, therest[m2.start():])
else:
newlines.append(lines[i])
if len(lines) > i+1:
i += 1
else:
break
return newlines
# All remaining lines of block are continuation of this footnote, which may be lazily indented.
fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(therest)]).strip('\n')
def detectTabbed(self, lines):
# Check for child elements in remaining blocks.
fn_blocks.extend(self.detectTabbed(blocks))
footnote = "\n\n".join(fn_blocks)
self.footnotes.setFootnote(id, footnote.rstrip())
if block[:m.start()].strip():
# Add any content before match back to blocks as separate block
blocks.insert(0, block[:m.start()].rstrip('\n'))
return True
# No match. Restore block.
blocks.insert(0, block)
return False
def detectTabbed(self, blocks):
""" Find indented text and remove indent before further proccesing.
Keyword arguments:
* lines: an array of strings
Returns: a list of post processed items and the index of last line.
Returns: a list of blocks with indentation removed.
"""
items = []
blank_line = False # have we encountered a blank line yet?
i = 0 # to keep track of where we are
def detab(line):
match = TABBED_RE.match(line)
if match:
return match.group(4)
for line in lines:
if line.strip(): # Non-blank line
detabbed_line = detab(line)
if detabbed_line:
items.append(detabbed_line)
i += 1
continue
elif not blank_line and not DEF_RE.match(line):
# not tabbed but still part of first par.
items.append(line)
i += 1
continue
fn_blocks = []
while blocks:
if blocks[0].startswith(' '*4):
block = blocks.pop(0)
# Check for new footnotes within this block and split at new footnote.
m = self.RE.search(block)
if m:
# Another footnote exists in this block.
# Any content before match is continuation of this footnote, which may be lazily indented.
before = block[:m.start()].rstrip('\n')
fn_blocks.append(self.detab(before))
# Add back to blocks everything from begining of match forward for next iteration.
blocks.insert(0, block[m.start():])
# End of this footnote.
break
else:
return items, i+1
# Entire block is part of this footnote.
fn_blocks.append(self.detab(block))
else:
# End of this footnote.
break
return fn_blocks
else: # Blank line: _maybe_ we are done.
blank_line = True
i += 1 # advance
def detab(self, block):
""" Remove one level of indent from a block.
# Find the next non-blank line
for j in range(i, len(lines)):
if lines[j].strip():
next_line = lines[j]
break
else:
# Include extreaneous padding to prevent raw HTML
# parsing issue: https://github.com/Python-Markdown/markdown/issues/584
items.append("")
i += 1
else:
break # There is no more text; we are done.
# Check if the next non-blank line is tabbed
if detab(next_line): # Yes, more work to do.
items.append("")
continue
else:
break # No, we are done.
else:
i += 1
return items, i
Preserve lazily indented blocks by only removing indent from indented lines.
"""
lines = block.split('\n')
for i, line in enumerate(lines):
if line.startswith(' '*4):
lines[i] = line[4:]
return '\n'.join(lines)
class FootnoteInlineProcessor(InlineProcessor):
@@ -347,8 +331,8 @@ class FootnotePostTreeprocessor(Treeprocessor):
self.offset += 1
# Add all the new duplicate links.
el = list(li)[-1]
for l in links:
el.append(l)
for link in links:
el.append(link)
break
def get_num_duplicates(self, li):

View File

@@ -21,7 +21,7 @@ EMPHASIS_RE = r'(_)([^_]+)\1'
STRONG_RE = r'(_{2})(.+?)\1'
# __strong_em___
STRONG_EM_RE = r'(_)\1(?!\1)(.+?)\1(?!\1)(.+?)\1{3}'
STRONG_EM_RE = r'(_)\1(?!\1)([^_]+?)\1(?!\1)(.+?)\1{3}'
class LegacyUnderscoreProcessor(UnderscoreProcessor):

View File

@@ -16,68 +16,313 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
from . import Extension
from ..blockprocessors import BlockProcessor
from ..preprocessors import Preprocessor
from ..postprocessors import RawHtmlPostprocessor
from .. import util
import re
from ..htmlparser import HTMLExtractor
import xml.etree.ElementTree as etree
class MarkdownInHtmlProcessor(BlockProcessor):
"""Process Markdown Inside HTML Blocks."""
def test(self, parent, block):
return block == util.TAG_PLACEHOLDER % \
str(self.parser.blockprocessors.tag_counter + 1)
class HTMLExtractorExtra(HTMLExtractor):
"""
Override HTMLExtractor and create etree Elements for any elements which should have content parsed as Markdown.
"""
def _process_nests(self, element, block):
"""Process the element's child elements in self.run."""
# Build list of indexes of each nest within the parent element.
nest_index = [] # a list of tuples: (left index, right index)
i = self.parser.blockprocessors.tag_counter + 1
while len(self._tag_data) > i and self._tag_data[i]['left_index']:
left_child_index = self._tag_data[i]['left_index']
right_child_index = self._tag_data[i]['right_index']
nest_index.append((left_child_index - 1, right_child_index))
i += 1
def __init__(self, md, *args, **kwargs):
# All block-level tags.
self.block_level_tags = set(md.block_level_elements.copy())
# Block-level tags in which the content only gets span level parsing
self.span_tags = set(
['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'td', 'th']
)
# Block-level tags which never get their content parsed.
self.raw_tags = set(['canvas', 'math', 'option', 'pre', 'script', 'style', 'textarea'])
# Block-level tags in which the content gets parsed as blocks
super().__init__(md, *args, **kwargs)
# Create each nest subelement.
for i, (left_index, right_index) in enumerate(nest_index[:-1]):
self.run(element, block[left_index:right_index],
block[right_index:nest_index[i + 1][0]], True)
self.run(element, block[nest_index[-1][0]:nest_index[-1][1]], # last
block[nest_index[-1][1]:], True) # nest
self.block_tags = set(self.block_level_tags) - (self.span_tags | self.raw_tags | self.empty_tags)
self.span_and_blocks_tags = self.block_tags | self.span_tags
def run(self, parent, blocks, tail=None, nest=False):
self._tag_data = self.parser.md.htmlStash.tag_data
def reset(self):
"""Reset this instance. Loses all unprocessed data."""
self.mdstack = [] # When markdown=1, stack contains a list of tags
self.treebuilder = etree.TreeBuilder()
self.mdstate = [] # one of 'block', 'span', 'off', or None
super().reset()
self.parser.blockprocessors.tag_counter += 1
tag = self._tag_data[self.parser.blockprocessors.tag_counter]
def close(self):
"""Handle any buffered data."""
super().close()
# Handle any unclosed tags.
if self.mdstack:
# Close the outermost parent. handle_endtag will close all unclosed children.
self.handle_endtag(self.mdstack[0])
# Create Element
markdown_value = tag['attrs'].pop('markdown')
element = etree.SubElement(parent, tag['tag'], tag['attrs'])
def get_element(self):
""" Return element from treebuilder and reset treebuilder for later use. """
element = self.treebuilder.close()
self.treebuilder = etree.TreeBuilder()
return element
# Slice Off Block
if nest:
self.parser.parseBlocks(parent, tail) # Process Tail
block = blocks[1:]
else: # includes nests since a third level of nesting isn't supported
block = blocks[tag['left_index'] + 1: tag['right_index']]
del blocks[:tag['right_index']]
def get_state(self, tag, attrs):
""" Return state from tag and `markdown` attr. One of 'block', 'span', or 'off'. """
md_attr = attrs.get('markdown', '0')
if md_attr == 'markdown':
# `<tag markdown>` is the same as `<tag markdown='1'>`.
md_attr = '1'
parent_state = self.mdstate[-1] if self.mdstate else None
if parent_state == 'off' or (parent_state == 'span' and md_attr != '0'):
# Only use the parent state if it is more restrictive than the markdown attribute.
md_attr = parent_state
if ((md_attr == '1' and tag in self.block_tags) or
(md_attr == 'block' and tag in self.span_and_blocks_tags)):
return 'block'
elif ((md_attr == '1' and tag in self.span_tags) or
(md_attr == 'span' and tag in self.span_and_blocks_tags)):
return 'span'
elif tag in self.block_level_tags:
return 'off'
else: # pragma: no cover
return None
# Process Text
if (self.parser.blockprocessors.contain_span_tags.match( # Span Mode
tag['tag']) and markdown_value != 'block') or \
markdown_value == 'span':
element.text = '\n'.join(block)
else: # Block Mode
i = self.parser.blockprocessors.tag_counter + 1
if len(self._tag_data) > i and self._tag_data[i]['left_index']:
first_subelement_index = self._tag_data[i]['left_index'] - 1
self.parser.parseBlocks(
element, block[:first_subelement_index])
if not nest:
block = self._process_nests(element, block)
def at_line_start(self):
"""At line start."""
value = super().at_line_start()
if not value and self.cleandoc and self.cleandoc[-1].endswith('\n'):
value = True
return value
def handle_starttag(self, tag, attrs):
# Handle tags that should always be empty and do not specify a closing tag
if tag in self.empty_tags:
attrs = {key: value if value is not None else key for key, value in attrs}
if "markdown" in attrs:
attrs.pop('markdown')
element = etree.Element(tag, attrs)
data = etree.tostring(element, encoding='unicode', method='html')
else:
self.parser.parseBlocks(element, block)
data = self.get_starttag_text()
self.handle_empty_tag(data, True)
return
if tag in self.block_level_tags:
# Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`.
# Convert to `{'checked': 'checked'}`.
attrs = {key: value if value is not None else key for key, value in attrs}
state = self.get_state(tag, attrs)
if self.inraw or (state in [None, 'off'] and not self.mdstack) or not self.at_line_start():
# fall back to default behavior
attrs.pop('markdown', None)
super().handle_starttag(tag, attrs)
else:
if 'p' in self.mdstack and tag in self.block_level_tags:
# Close unclosed 'p' tag
self.handle_endtag('p')
self.mdstate.append(state)
self.mdstack.append(tag)
attrs['markdown'] = state
self.treebuilder.start(tag, attrs)
else:
# Span level tag
if self.inraw:
super().handle_starttag(tag, attrs)
else:
text = self.get_starttag_text()
if self.mdstate and self.mdstate[-1] == "off":
self.handle_data(self.md.htmlStash.store(text))
else:
self.handle_data(text)
def handle_endtag(self, tag):
if tag in self.block_level_tags:
if self.inraw:
super().handle_endtag(tag)
elif tag in self.mdstack:
# Close element and any unclosed children
while self.mdstack:
item = self.mdstack.pop()
self.mdstate.pop()
self.treebuilder.end(item)
if item == tag:
break
if not self.mdstack:
# Last item in stack is closed. Stash it
element = self.get_element()
# Get last entry to see if it ends in newlines
# If it is an element, assume there is no newlines
item = self.cleandoc[-1] if self.cleandoc else ''
# If we only have one newline before block element, add another
if not item.endswith('\n\n') and item.endswith('\n'):
self.cleandoc.append('\n')
self.cleandoc.append(self.md.htmlStash.store(element))
self.cleandoc.append('\n\n')
self.state = []
else:
# Treat orphan closing tag as a span level tag.
text = self.get_endtag_text(tag)
if self.mdstate and self.mdstate[-1] == "off":
self.handle_data(self.md.htmlStash.store(text))
else:
self.handle_data(text)
else:
# Span level tag
if self.inraw:
super().handle_endtag(tag)
else:
text = self.get_endtag_text(tag)
if self.mdstate and self.mdstate[-1] == "off":
self.handle_data(self.md.htmlStash.store(text))
else:
self.handle_data(text)
def handle_startendtag(self, tag, attrs):
if tag in self.empty_tags:
attrs = {key: value if value is not None else key for key, value in attrs}
if "markdown" in attrs:
attrs.pop('markdown')
element = etree.Element(tag, attrs)
data = etree.tostring(element, encoding='unicode', method='html')
else:
data = self.get_starttag_text()
else:
data = self.get_starttag_text()
self.handle_empty_tag(data, is_block=self.md.is_block_level(tag))
def handle_data(self, data):
if self.inraw or not self.mdstack:
super().handle_data(data)
else:
self.treebuilder.data(data)
def handle_empty_tag(self, data, is_block):
if self.inraw or not self.mdstack:
super().handle_empty_tag(data, is_block)
else:
if self.at_line_start() and is_block:
self.handle_data('\n' + self.md.htmlStash.store(data) + '\n\n')
else:
if self.mdstate and self.mdstate[-1] == "off":
self.handle_data(self.md.htmlStash.store(data))
else:
self.handle_data(data)
class HtmlBlockPreprocessor(Preprocessor):
"""Remove html blocks from the text and store them for later retrieval."""
def run(self, lines):
source = '\n'.join(lines)
parser = HTMLExtractorExtra(self.md)
parser.feed(source)
parser.close()
return ''.join(parser.cleandoc).split('\n')
class MarkdownInHtmlProcessor(BlockProcessor):
"""Process Markdown Inside HTML Blocks which have been stored in the HtmlStash."""
def test(self, parent, block):
# ALways return True. `run` will return `False` it not a valid match.
return True
def parse_element_content(self, element):
"""
Resursively parse the text content of an etree Element as Markdown.
Any block level elements generated from the Markdown will be inserted as children of the element in place
of the text content. All `markdown` attributes are removed. For any elements in which Markdown parsing has
been dissabled, the text content of it and its chidlren are wrapped in an `AtomicString`.
"""
md_attr = element.attrib.pop('markdown', 'off')
if md_attr == 'block':
# Parse content as block level
# The order in which the different parts are parsed (text, children, tails) is important here as the
# order of elements needs to be preserved. We can't be inserting items at a later point in the current
# iteration as we don't want to do raw processing on elements created from parsing Markdown text (for
# example). Therefore, the order of operations is children, tails, text.
# Recursively parse existing children from raw HTML
for child in list(element):
self.parse_element_content(child)
# Parse Markdown text in tail of children. Do this seperate to avoid raw HTML parsing.
# Save the position of each item to be inserted later in reverse.
tails = []
for pos, child in enumerate(element):
if child.tail:
block = child.tail.rstrip('\n')
child.tail = ''
# Use a dummy placeholder element.
dummy = etree.Element('div')
self.parser.parseBlocks(dummy, block.split('\n\n'))
children = list(dummy)
children.reverse()
tails.append((pos + 1, children))
# Insert the elements created from the tails in reverse.
tails.reverse()
for pos, tail in tails:
for item in tail:
element.insert(pos, item)
# Parse Markdown text content. Do this last to avoid raw HTML parsing.
if element.text:
block = element.text.rstrip('\n')
element.text = ''
# Use a dummy placeholder element as the content needs to get inserted before existing children.
dummy = etree.Element('div')
self.parser.parseBlocks(dummy, block.split('\n\n'))
children = list(dummy)
children.reverse()
for child in children:
element.insert(0, child)
elif md_attr == 'span':
# Span level parsing will be handled by inlineprocessors.
# Walk children here to remove any `markdown` attributes.
for child in list(element):
self.parse_element_content(child)
else:
# Disable inline parsing for everything else
if element.text is None:
element.text = ''
element.text = util.AtomicString(element.text)
for child in list(element):
self.parse_element_content(child)
if child.tail:
child.tail = util.AtomicString(child.tail)
def run(self, parent, blocks):
m = util.HTML_PLACEHOLDER_RE.match(blocks[0])
if m:
index = int(m.group(1))
element = self.parser.md.htmlStash.rawHtmlBlocks[index]
if isinstance(element, etree.Element):
# We have a matched element. Process it.
blocks.pop(0)
self.parse_element_content(element)
parent.append(element)
# Cleanup stash. Replace element with empty string to avoid confusing postprocessor.
self.parser.md.htmlStash.rawHtmlBlocks.pop(index)
self.parser.md.htmlStash.rawHtmlBlocks.insert(index, '')
# Comfirm the match to the blockparser.
return True
# No match found.
return False
class MarkdownInHTMLPostprocessor(RawHtmlPostprocessor):
def stash_to_string(self, text):
""" Override default to handle any etree elements still in the stash. """
if isinstance(text, etree.Element):
return self.md.serializer(text)
else:
return str(text)
class MarkdownInHtmlExtension(Extension):
@@ -86,14 +331,14 @@ class MarkdownInHtmlExtension(Extension):
def extendMarkdown(self, md):
""" Register extension instances. """
# Turn on processing of markdown text within raw html
md.preprocessors['html_block'].markdown_in_raw = True
# Replace raw HTML preprocessor
md.preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20)
# Add blockprocessor which handles the placeholders for etree elements
md.parser.blockprocessors.register(
MarkdownInHtmlProcessor(md.parser), 'markdown_block', 105
)
md.parser.blockprocessors.tag_counter = -1
md.parser.blockprocessors.contain_span_tags = re.compile(
r'^(p|h[1-6]|li|dd|dt|td|th|legend|address)$', re.IGNORECASE)
# Replace raw HTML postprocessor
md.postprocessors.register(MarkdownInHTMLPostprocessor(md), 'raw_html', 30)
def makeExtension(**kwargs): # pragma: no cover

View File

@@ -15,18 +15,24 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
from . import Extension
from ..treeprocessors import Treeprocessor
from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE
from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, AtomicString
from ..postprocessors import UnescapePostprocessor
import re
import html
import unicodedata
import xml.etree.ElementTree as etree
def slugify(value, separator):
def slugify(value, separator, encoding='ascii'):
""" Slugify a string, to make it URL friendly. """
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
value = re.sub(r'[^\w\s-]', '', value.decode('ascii')).strip().lower()
return re.sub(r'[%s\s]+' % separator, separator, value)
value = unicodedata.normalize('NFKD', value).encode(encoding, 'ignore')
value = re.sub(r'[^\w\s-]', '', value.decode(encoding)).strip().lower()
return re.sub(r'[{}\s]+'.format(separator), separator, value)
def slugify_unicode(value, separator):
""" Slugify a string, to make it URL friendly while preserving Unicode characters. """
return slugify(value, separator, 'utf-8')
IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
@@ -44,6 +50,18 @@ def unique(id, ids):
return id
def get_name(el):
"""Get title name."""
text = []
for c in el.itertext():
if isinstance(c, AtomicString):
text.append(html.unescape(c))
else:
text.append(c)
return ''.join(text).strip()
def stashedHTML2text(text, md, strip_entities=True):
""" Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
def _html_sub(m):
@@ -253,7 +271,7 @@ class TocTreeprocessor(Treeprocessor):
self.set_level(el)
if int(el.tag[-1]) < self.toc_top or int(el.tag[-1]) > self.toc_bottom:
continue
text = ''.join(el.itertext()).strip()
text = get_name(el)
# Do not override pre-existing ids
if "id" not in el.attrib: