Eliminado venv y www del repositorio, agrege un requirements igual
This commit is contained in:
@@ -75,15 +75,18 @@ class Extension:
|
||||
md = args[0]
|
||||
try:
|
||||
self.extendMarkdown(md)
|
||||
except TypeError:
|
||||
# Must be a 2.x extension. Pass in a dumby md_globals.
|
||||
self.extendMarkdown(md, {})
|
||||
warnings.warn(
|
||||
"The 'md_globals' parameter of '{}.{}.extendMarkdown' is "
|
||||
"deprecated.".format(self.__class__.__module__, self.__class__.__name__),
|
||||
category=DeprecationWarning,
|
||||
stacklevel=2
|
||||
)
|
||||
except TypeError as e:
|
||||
if "missing 1 required positional argument" in str(e):
|
||||
# Must be a 2.x extension. Pass in a dumby md_globals.
|
||||
self.extendMarkdown(md, {})
|
||||
warnings.warn(
|
||||
"The 'md_globals' parameter of '{}.{}.extendMarkdown' is "
|
||||
"deprecated.".format(self.__class__.__module__, self.__class__.__name__),
|
||||
category=DeprecationWarning,
|
||||
stacklevel=2
|
||||
)
|
||||
else:
|
||||
raise
|
||||
|
||||
def extendMarkdown(self, md):
|
||||
"""
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -17,48 +17,53 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
|
||||
'''
|
||||
|
||||
from . import Extension
|
||||
from ..preprocessors import Preprocessor
|
||||
from ..blockprocessors import BlockProcessor
|
||||
from ..inlinepatterns import InlineProcessor
|
||||
from ..util import AtomicString
|
||||
import re
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
# Global Vars
|
||||
ABBR_REF_RE = re.compile(r'[*]\[(?P<abbr>[^\]]*)\][ ]?:\s*(?P<title>.*)')
|
||||
|
||||
|
||||
class AbbrExtension(Extension):
|
||||
""" Abbreviation Extension for Python-Markdown. """
|
||||
|
||||
def extendMarkdown(self, md):
|
||||
""" Insert AbbrPreprocessor before ReferencePreprocessor. """
|
||||
md.preprocessors.register(AbbrPreprocessor(md), 'abbr', 12)
|
||||
md.parser.blockprocessors.register(AbbrPreprocessor(md.parser), 'abbr', 16)
|
||||
|
||||
|
||||
class AbbrPreprocessor(Preprocessor):
|
||||
class AbbrPreprocessor(BlockProcessor):
|
||||
""" Abbreviation Preprocessor - parse text for abbr references. """
|
||||
|
||||
def run(self, lines):
|
||||
RE = re.compile(r'^[*]\[(?P<abbr>[^\]]*)\][ ]?:[ ]*\n?[ ]*(?P<title>.*)$', re.MULTILINE)
|
||||
|
||||
def test(self, parent, block):
|
||||
return True
|
||||
|
||||
def run(self, parent, blocks):
|
||||
'''
|
||||
Find and remove all Abbreviation references from the text.
|
||||
Each reference is set as a new AbbrPattern in the markdown instance.
|
||||
|
||||
'''
|
||||
new_text = []
|
||||
for line in lines:
|
||||
m = ABBR_REF_RE.match(line)
|
||||
if m:
|
||||
abbr = m.group('abbr').strip()
|
||||
title = m.group('title').strip()
|
||||
self.md.inlinePatterns.register(
|
||||
AbbrInlineProcessor(self._generate_pattern(abbr), title), 'abbr-%s' % abbr, 2
|
||||
)
|
||||
# Preserve the line to prevent raw HTML indexing issue.
|
||||
# https://github.com/Python-Markdown/markdown/issues/584
|
||||
new_text.append('')
|
||||
else:
|
||||
new_text.append(line)
|
||||
return new_text
|
||||
block = blocks.pop(0)
|
||||
m = self.RE.search(block)
|
||||
if m:
|
||||
abbr = m.group('abbr').strip()
|
||||
title = m.group('title').strip()
|
||||
self.parser.md.inlinePatterns.register(
|
||||
AbbrInlineProcessor(self._generate_pattern(abbr), title), 'abbr-%s' % abbr, 2
|
||||
)
|
||||
if block[m.end():].strip():
|
||||
# Add any content after match back to blocks as separate block
|
||||
blocks.insert(0, block[m.end():].lstrip('\n'))
|
||||
if block[:m.start()].strip():
|
||||
# Add any content before match back to blocks as separate block
|
||||
blocks.insert(0, block[:m.start()].rstrip('\n'))
|
||||
return True
|
||||
# No match. Restore block.
|
||||
blocks.insert(0, block)
|
||||
return False
|
||||
|
||||
def _generate_pattern(self, text):
|
||||
'''
|
||||
|
||||
@@ -40,19 +40,82 @@ class AdmonitionProcessor(BlockProcessor):
|
||||
RE = re.compile(r'(?:^|\n)!!! ?([\w\-]+(?: +[\w\-]+)*)(?: +"(.*?)")? *(?:\n|$)')
|
||||
RE_SPACES = re.compile(' +')
|
||||
|
||||
def test(self, parent, block):
|
||||
def __init__(self, parser):
|
||||
"""Initialization."""
|
||||
|
||||
super().__init__(parser)
|
||||
|
||||
self.current_sibling = None
|
||||
self.content_indention = 0
|
||||
|
||||
def get_sibling(self, parent, block):
|
||||
"""Get sibling admontion.
|
||||
|
||||
Retrieve the appropriate siblimg element. This can get trickly when
|
||||
dealing with lists.
|
||||
|
||||
"""
|
||||
|
||||
# We already acquired the block via test
|
||||
if self.current_sibling is not None:
|
||||
sibling = self.current_sibling
|
||||
block = block[self.content_indent:]
|
||||
self.current_sibling = None
|
||||
self.content_indent = 0
|
||||
return sibling, block
|
||||
|
||||
sibling = self.lastChild(parent)
|
||||
return self.RE.search(block) or \
|
||||
(block.startswith(' ' * self.tab_length) and sibling is not None and
|
||||
sibling.get('class', '').find(self.CLASSNAME) != -1)
|
||||
|
||||
if sibling is None or sibling.get('class', '').find(self.CLASSNAME) == -1:
|
||||
sibling = None
|
||||
else:
|
||||
# If the last child is a list and the content is idented sufficient
|
||||
# to be under it, then the content's is sibling is in the list.
|
||||
last_child = self.lastChild(sibling)
|
||||
indent = 0
|
||||
while last_child:
|
||||
if (
|
||||
sibling and block.startswith(' ' * self.tab_length * 2) and
|
||||
last_child and last_child.tag in ('ul', 'ol', 'dl')
|
||||
):
|
||||
|
||||
# The expectation is that we'll find an <li> or <dt>.
|
||||
# We should get it's last child as well.
|
||||
sibling = self.lastChild(last_child)
|
||||
last_child = self.lastChild(sibling) if sibling else None
|
||||
|
||||
# Context has been lost at this point, so we must adjust the
|
||||
# text's identation level so it will be evaluated correctly
|
||||
# under the list.
|
||||
block = block[self.tab_length:]
|
||||
indent += self.tab_length
|
||||
else:
|
||||
last_child = None
|
||||
|
||||
if not block.startswith(' ' * self.tab_length):
|
||||
sibling = None
|
||||
|
||||
if sibling is not None:
|
||||
self.current_sibling = sibling
|
||||
self.content_indent = indent
|
||||
|
||||
return sibling, block
|
||||
|
||||
def test(self, parent, block):
|
||||
|
||||
if self.RE.search(block):
|
||||
return True
|
||||
else:
|
||||
return self.get_sibling(parent, block)[0] is not None
|
||||
|
||||
def run(self, parent, blocks):
|
||||
sibling = self.lastChild(parent)
|
||||
block = blocks.pop(0)
|
||||
m = self.RE.search(block)
|
||||
|
||||
if m:
|
||||
block = block[m.end():] # removes the first line
|
||||
else:
|
||||
sibling, block = self.get_sibling(parent, block)
|
||||
|
||||
block, theRest = self.detab(block)
|
||||
|
||||
@@ -65,6 +128,13 @@ class AdmonitionProcessor(BlockProcessor):
|
||||
p.text = title
|
||||
p.set('class', self.CLASSNAME_TITLE)
|
||||
else:
|
||||
# Sibling is a list item, but we need to wrap it's content should be wrapped in <p>
|
||||
if sibling.tag in ('li', 'dd') and sibling.text:
|
||||
text = sibling.text
|
||||
sibling.text = ''
|
||||
p = etree.SubElement(sibling, 'p')
|
||||
p.text = text
|
||||
|
||||
div = sibling
|
||||
|
||||
self.parser.parseChunk(div, block)
|
||||
|
||||
@@ -64,10 +64,10 @@ def isheader(elem):
|
||||
|
||||
class AttrListTreeprocessor(Treeprocessor):
|
||||
|
||||
BASE_RE = r'\{\:?([^\}\n]*)\}'
|
||||
HEADER_RE = re.compile(r'[ ]+%s[ ]*$' % BASE_RE)
|
||||
BLOCK_RE = re.compile(r'\n[ ]*%s[ ]*$' % BASE_RE)
|
||||
INLINE_RE = re.compile(r'^%s' % BASE_RE)
|
||||
BASE_RE = r'\{\:?[ ]*([^\}\n ][^\}\n]*)[ ]*\}'
|
||||
HEADER_RE = re.compile(r'[ ]+{}[ ]*$'.format(BASE_RE))
|
||||
BLOCK_RE = re.compile(r'\n[ ]*{}[ ]*$'.format(BASE_RE))
|
||||
INLINE_RE = re.compile(r'^{}'.format(BASE_RE))
|
||||
NAME_RE = re.compile(r'[^A-Z_a-z\u00c0-\u00d6\u00d8-\u00f6\u00f8-\u02ff'
|
||||
r'\u0370-\u037d\u037f-\u1fff\u200c-\u200d'
|
||||
r'\u2070-\u218f\u2c00-\u2fef\u3001-\ud7ff'
|
||||
@@ -79,8 +79,8 @@ class AttrListTreeprocessor(Treeprocessor):
|
||||
if self.md.is_block_level(elem.tag):
|
||||
# Block level: check for attrs on last line of text
|
||||
RE = self.BLOCK_RE
|
||||
if isheader(elem) or elem.tag == 'dt':
|
||||
# header or def-term: check for attrs at end of line
|
||||
if isheader(elem) or elem.tag in ['dt', 'td', 'th']:
|
||||
# header, def-term, or table cell: check for attrs at end of element
|
||||
RE = self.HEADER_RE
|
||||
if len(elem) and elem.tag == 'li':
|
||||
# special case list items. children may include a ul or ol.
|
||||
@@ -120,8 +120,6 @@ class AttrListTreeprocessor(Treeprocessor):
|
||||
elif elem.text:
|
||||
# no children. Get from text.
|
||||
m = RE.search(elem.text)
|
||||
if not m and elem.tag == 'td':
|
||||
m = re.search(self.BASE_RE, elem.text)
|
||||
if m:
|
||||
self.assign_attrs(elem, m.group(1))
|
||||
elem.text = elem.text[:m.start()]
|
||||
@@ -161,6 +159,7 @@ class AttrListTreeprocessor(Treeprocessor):
|
||||
class AttrListExtension(Extension):
|
||||
def extendMarkdown(self, md):
|
||||
md.treeprocessors.register(AttrListTreeprocessor(md), 'attr_list', 8)
|
||||
md.registerExtension(self)
|
||||
|
||||
|
||||
def makeExtension(**kwargs): # pragma: no cover
|
||||
|
||||
@@ -17,13 +17,14 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
|
||||
|
||||
from . import Extension
|
||||
from ..treeprocessors import Treeprocessor
|
||||
from ..util import parseBoolValue
|
||||
|
||||
try:
|
||||
try: # pragma: no cover
|
||||
from pygments import highlight
|
||||
from pygments.lexers import get_lexer_by_name, guess_lexer
|
||||
from pygments.formatters import get_formatter_by_name
|
||||
pygments = True
|
||||
except ImportError:
|
||||
except ImportError: # pragma: no cover
|
||||
pygments = False
|
||||
|
||||
|
||||
@@ -38,52 +39,78 @@ def parse_hl_lines(expr):
|
||||
|
||||
try:
|
||||
return list(map(int, expr.split()))
|
||||
except ValueError:
|
||||
except ValueError: # pragma: no cover
|
||||
return []
|
||||
|
||||
|
||||
# ------------------ The Main CodeHilite Class ----------------------
|
||||
class CodeHilite:
|
||||
"""
|
||||
Determine language of source code, and pass it into pygments hilighter.
|
||||
Determine language of source code, and pass it on to the Pygments highlighter.
|
||||
|
||||
Basic Usage:
|
||||
>>> code = CodeHilite(src = 'some text')
|
||||
>>> html = code.hilite()
|
||||
Usage:
|
||||
code = CodeHilite(src=some_code, lang='python')
|
||||
html = code.hilite()
|
||||
|
||||
Arguments:
|
||||
* src: Source string or any object with a .readline attribute.
|
||||
|
||||
* linenums: (Boolean) Set line numbering to 'on' (True),
|
||||
'off' (False) or 'auto'(None). Set to 'auto' by default.
|
||||
* lang: String name of Pygments lexer to use for highlighting. Default: `None`.
|
||||
|
||||
* guess_lang: (Boolean) Turn language auto-detection
|
||||
'on' or 'off' (on by default).
|
||||
* guess_lang: Auto-detect which lexer to use. Ignored if `lang` is set to a valid
|
||||
value. Default: `True`.
|
||||
|
||||
* css_class: Set class name of wrapper div ('codehilite' by default).
|
||||
* use_pygments: Pass code to pygments for code highlighting. If `False`, the code is
|
||||
instead wrapped for highlighting by a JavaScript library. Default: `True`.
|
||||
|
||||
* hl_lines: (List of integers) Lines to emphasize, 1-indexed.
|
||||
* linenums: An alias to Pygments `linenos` formatter option. Default: `None`.
|
||||
|
||||
Low Level Usage:
|
||||
>>> code = CodeHilite()
|
||||
>>> code.src = 'some text' # String or anything with a .readline attr.
|
||||
>>> code.linenos = True # Turns line numbering on or of.
|
||||
>>> html = code.hilite()
|
||||
* css_class: An alias to Pygments `cssclass` formatter option. Default: 'codehilite'.
|
||||
|
||||
* lang_prefix: Prefix prepended to the language when `use_pygments` is `False`.
|
||||
Default: "language-".
|
||||
|
||||
Other Options:
|
||||
Any other options are accepted and passed on to the lexer and formatter. Therefore,
|
||||
valid options include any options which are accepted by the `html` formatter or
|
||||
whichever lexer the code's language uses. Note that most lexers do not have any
|
||||
options. However, a few have very useful options, such as PHP's `startinline` option.
|
||||
Any invalid options are ignored without error.
|
||||
|
||||
Formatter options: https://pygments.org/docs/formatters/#HtmlFormatter
|
||||
Lexer Options: https://pygments.org/docs/lexers/
|
||||
|
||||
Advanced Usage:
|
||||
code = CodeHilite(
|
||||
src = some_code,
|
||||
lang = 'php',
|
||||
startinline = True, # Lexer option. Snippet does not start with `<?php`.
|
||||
linenostart = 42, # Formatter option. Snippet starts on line 42.
|
||||
hl_lines = [45, 49, 50], # Formatter option. Highlight lines 45, 49, and 50.
|
||||
linenos = 'inline' # Formatter option. Avoid alignment problems.
|
||||
)
|
||||
html = code.hilite()
|
||||
|
||||
"""
|
||||
|
||||
def __init__(self, src=None, linenums=None, guess_lang=True,
|
||||
css_class="codehilite", lang=None, style='default',
|
||||
noclasses=False, tab_length=4, hl_lines=None, use_pygments=True):
|
||||
def __init__(self, src, **options):
|
||||
self.src = src
|
||||
self.lang = lang
|
||||
self.linenums = linenums
|
||||
self.guess_lang = guess_lang
|
||||
self.css_class = css_class
|
||||
self.style = style
|
||||
self.noclasses = noclasses
|
||||
self.tab_length = tab_length
|
||||
self.hl_lines = hl_lines or []
|
||||
self.use_pygments = use_pygments
|
||||
self.lang = options.pop('lang', None)
|
||||
self.guess_lang = options.pop('guess_lang', True)
|
||||
self.use_pygments = options.pop('use_pygments', True)
|
||||
self.lang_prefix = options.pop('lang_prefix', 'language-')
|
||||
|
||||
if 'linenos' not in options:
|
||||
options['linenos'] = options.pop('linenums', None)
|
||||
if 'cssclass' not in options:
|
||||
options['cssclass'] = options.pop('css_class', 'codehilite')
|
||||
if 'wrapcode' not in options:
|
||||
# Override pygments default
|
||||
options['wrapcode'] = True
|
||||
# Disallow use of `full` option
|
||||
options['full'] = False
|
||||
|
||||
self.options = options
|
||||
|
||||
def hilite(self):
|
||||
"""
|
||||
@@ -103,22 +130,16 @@ class CodeHilite:
|
||||
|
||||
if pygments and self.use_pygments:
|
||||
try:
|
||||
lexer = get_lexer_by_name(self.lang)
|
||||
lexer = get_lexer_by_name(self.lang, **self.options)
|
||||
except ValueError:
|
||||
try:
|
||||
if self.guess_lang:
|
||||
lexer = guess_lexer(self.src)
|
||||
lexer = guess_lexer(self.src, **self.options)
|
||||
else:
|
||||
lexer = get_lexer_by_name('text')
|
||||
except ValueError:
|
||||
lexer = get_lexer_by_name('text')
|
||||
formatter = get_formatter_by_name('html',
|
||||
linenos=self.linenums,
|
||||
cssclass=self.css_class,
|
||||
style=self.style,
|
||||
noclasses=self.noclasses,
|
||||
hl_lines=self.hl_lines,
|
||||
wrapcode=True)
|
||||
lexer = get_lexer_by_name('text', **self.options)
|
||||
except ValueError: # pragma: no cover
|
||||
lexer = get_lexer_by_name('text', **self.options)
|
||||
formatter = get_formatter_by_name('html', **self.options)
|
||||
return highlight(self.src, lexer, formatter)
|
||||
else:
|
||||
# just escape and build markup usable by JS highlighting libs
|
||||
@@ -128,27 +149,30 @@ class CodeHilite:
|
||||
txt = txt.replace('"', '"')
|
||||
classes = []
|
||||
if self.lang:
|
||||
classes.append('language-%s' % self.lang)
|
||||
if self.linenums:
|
||||
classes.append('{}{}'.format(self.lang_prefix, self.lang))
|
||||
if self.options['linenos']:
|
||||
classes.append('linenums')
|
||||
class_str = ''
|
||||
if classes:
|
||||
class_str = ' class="%s"' % ' '.join(classes)
|
||||
return '<pre class="%s"><code%s>%s</code></pre>\n' % \
|
||||
(self.css_class, class_str, txt)
|
||||
class_str = ' class="{}"'.format(' '.join(classes))
|
||||
return '<pre class="{}"><code{}>{}\n</code></pre>\n'.format(
|
||||
self.options['cssclass'],
|
||||
class_str,
|
||||
txt
|
||||
)
|
||||
|
||||
def _parseHeader(self):
|
||||
"""
|
||||
Determines language of a code block from shebang line and whether said
|
||||
line should be removed or left in place. If the sheband line contains a
|
||||
path (even a single /) then it is assumed to be a real shebang line and
|
||||
left alone. However, if no path is given (e.i.: #!python or :::python)
|
||||
then it is assumed to be a mock shebang for language identifitation of
|
||||
a code fragment and removed from the code block prior to processing for
|
||||
code highlighting. When a mock shebang (e.i: #!python) is found, line
|
||||
numbering is turned on. When colons are found in place of a shebang
|
||||
(e.i.: :::python), line numbering is left in the current state - off
|
||||
by default.
|
||||
Determines language of a code block from shebang line and whether the
|
||||
said line should be removed or left in place. If the sheband line
|
||||
contains a path (even a single /) then it is assumed to be a real
|
||||
shebang line and left alone. However, if no path is given
|
||||
(e.i.: #!python or :::python) then it is assumed to be a mock shebang
|
||||
for language identification of a code fragment and removed from the
|
||||
code block prior to processing for code highlighting. When a mock
|
||||
shebang (e.i: #!python) is found, line numbering is turned on. When
|
||||
colons are found in place of a shebang (e.i.: :::python), line
|
||||
numbering is left in the current state - off by default.
|
||||
|
||||
Also parses optional list of highlight lines, like:
|
||||
|
||||
@@ -176,16 +200,16 @@ class CodeHilite:
|
||||
# we have a match
|
||||
try:
|
||||
self.lang = m.group('lang').lower()
|
||||
except IndexError:
|
||||
except IndexError: # pragma: no cover
|
||||
self.lang = None
|
||||
if m.group('path'):
|
||||
# path exists - restore first line
|
||||
lines.insert(0, fl)
|
||||
if self.linenums is None and m.group('shebang'):
|
||||
if self.options['linenos'] is None and m.group('shebang'):
|
||||
# Overridable and Shebang exists - use line numbers
|
||||
self.linenums = True
|
||||
self.options['linenos'] = True
|
||||
|
||||
self.hl_lines = parse_hl_lines(m.group('hl_lines'))
|
||||
self.options['hl_lines'] = parse_hl_lines(m.group('hl_lines'))
|
||||
else:
|
||||
# No match
|
||||
lines.insert(0, fl)
|
||||
@@ -201,9 +225,11 @@ class HiliteTreeprocessor(Treeprocessor):
|
||||
|
||||
def code_unescape(self, text):
|
||||
"""Unescape code."""
|
||||
text = text.replace("&", "&")
|
||||
text = text.replace("<", "<")
|
||||
text = text.replace(">", ">")
|
||||
# Escaped '&' should be replaced at the end to avoid
|
||||
# conflicting with < and >.
|
||||
text = text.replace("&", "&")
|
||||
return text
|
||||
|
||||
def run(self, root):
|
||||
@@ -213,13 +239,9 @@ class HiliteTreeprocessor(Treeprocessor):
|
||||
if len(block) == 1 and block[0].tag == 'code':
|
||||
code = CodeHilite(
|
||||
self.code_unescape(block[0].text),
|
||||
linenums=self.config['linenums'],
|
||||
guess_lang=self.config['guess_lang'],
|
||||
css_class=self.config['css_class'],
|
||||
style=self.config['pygments_style'],
|
||||
noclasses=self.config['noclasses'],
|
||||
tab_length=self.md.tab_length,
|
||||
use_pygments=self.config['use_pygments']
|
||||
style=self.config.pop('pygments_style', 'default'),
|
||||
**self.config
|
||||
)
|
||||
placeholder = self.md.htmlStash.store(code.hilite())
|
||||
# Clear codeblock in etree instance
|
||||
@@ -237,7 +259,7 @@ class CodeHiliteExtension(Extension):
|
||||
# define default configs
|
||||
self.config = {
|
||||
'linenums': [None,
|
||||
"Use lines numbers. True=yes, False=no, None=auto"],
|
||||
"Use lines numbers. True|table|inline=yes, False=no, None=auto"],
|
||||
'guess_lang': [True,
|
||||
"Automatic language detection - Default: True"],
|
||||
'css_class': ["codehilite",
|
||||
@@ -252,10 +274,25 @@ class CodeHiliteExtension(Extension):
|
||||
'use_pygments': [True,
|
||||
'Use Pygments to Highlight code blocks. '
|
||||
'Disable if using a JavaScript library. '
|
||||
'Default: True']
|
||||
'Default: True'],
|
||||
'lang_prefix': [
|
||||
'language-',
|
||||
'Prefix prepended to the language when use_pygments is false. Default: "language-"'
|
||||
]
|
||||
}
|
||||
|
||||
super().__init__(**kwargs)
|
||||
for key, value in kwargs.items():
|
||||
if key in self.config:
|
||||
self.setConfig(key, value)
|
||||
else:
|
||||
# manually set unknown keywords.
|
||||
if isinstance(value, str):
|
||||
try:
|
||||
# Attempt to parse str as a bool value
|
||||
value = parseBoolValue(value, preserve_none=True)
|
||||
except ValueError:
|
||||
pass # Assume it's not a bool value. Use as-is.
|
||||
self.config[key] = [value, '']
|
||||
|
||||
def extendMarkdown(self, md):
|
||||
""" Add HilitePostprocessor to Markdown instance. """
|
||||
|
||||
@@ -34,8 +34,8 @@ class DefListProcessor(BlockProcessor):
|
||||
|
||||
raw_block = blocks.pop(0)
|
||||
m = self.RE.search(raw_block)
|
||||
terms = [l.strip() for l in
|
||||
raw_block[:m.start()].split('\n') if l.strip()]
|
||||
terms = [term.strip() for term in
|
||||
raw_block[:m.start()].split('\n') if term.strip()]
|
||||
block = raw_block[m.end():]
|
||||
no_indent = self.NO_INDENT_RE.match(block)
|
||||
if no_indent:
|
||||
@@ -87,11 +87,13 @@ class DefListProcessor(BlockProcessor):
|
||||
class DefListIndentProcessor(ListIndentProcessor):
|
||||
""" Process indented children of definition list items. """
|
||||
|
||||
ITEM_TYPES = ['dd']
|
||||
LIST_TYPES = ['dl']
|
||||
# Defintion lists need to be aware of all list types
|
||||
ITEM_TYPES = ['dd', 'li']
|
||||
LIST_TYPES = ['dl', 'ol', 'ul']
|
||||
|
||||
def create_item(self, parent, block):
|
||||
""" Create a new dd and parse the block with it as the parent. """
|
||||
""" Create a new dd or li (depending on parent) and parse the block with it as the parent. """
|
||||
|
||||
dd = etree.SubElement(parent, 'dd')
|
||||
self.parser.parseBlocks(dd, [block])
|
||||
|
||||
|
||||
@@ -15,78 +15,130 @@ All changes Copyright 2008-2014 The Python Markdown Project
|
||||
License: [BSD](https://opensource.org/licenses/bsd-license.php)
|
||||
"""
|
||||
|
||||
|
||||
from textwrap import dedent
|
||||
from . import Extension
|
||||
from ..preprocessors import Preprocessor
|
||||
from .codehilite import CodeHilite, CodeHiliteExtension, parse_hl_lines
|
||||
from .attr_list import get_attrs, AttrListExtension
|
||||
from ..util import parseBoolValue
|
||||
import re
|
||||
|
||||
|
||||
class FencedCodeExtension(Extension):
|
||||
def __init__(self, **kwargs):
|
||||
self.config = {
|
||||
'lang_prefix': ['language-', 'Prefix prepended to the language. Default: "language-"']
|
||||
}
|
||||
super().__init__(**kwargs)
|
||||
|
||||
def extendMarkdown(self, md):
|
||||
""" Add FencedBlockPreprocessor to the Markdown instance. """
|
||||
md.registerExtension(self)
|
||||
|
||||
md.preprocessors.register(FencedBlockPreprocessor(md), 'fenced_code_block', 25)
|
||||
md.preprocessors.register(FencedBlockPreprocessor(md, self.getConfigs()), 'fenced_code_block', 25)
|
||||
|
||||
|
||||
class FencedBlockPreprocessor(Preprocessor):
|
||||
FENCED_BLOCK_RE = re.compile(r'''
|
||||
(?P<fence>^(?:~{3,}|`{3,}))[ ]* # Opening ``` or ~~~
|
||||
(\{?\.?(?P<lang>[\w#.+-]*))?[ ]* # Optional {, and lang
|
||||
# Optional highlight lines, single- or double-quote-delimited
|
||||
(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?[ ]*
|
||||
}?[ ]*\n # Optional closing }
|
||||
(?P<code>.*?)(?<=\n)
|
||||
(?P=fence)[ ]*$''', re.MULTILINE | re.DOTALL | re.VERBOSE)
|
||||
CODE_WRAP = '<pre><code%s>%s</code></pre>'
|
||||
LANG_TAG = ' class="%s"'
|
||||
FENCED_BLOCK_RE = re.compile(
|
||||
dedent(r'''
|
||||
(?P<fence>^(?:~{3,}|`{3,}))[ ]* # opening fence
|
||||
((\{(?P<attrs>[^\}\n]*)\})?| # (optional {attrs} or
|
||||
(\.?(?P<lang>[\w#.+-]*))?[ ]* # optional (.)lang
|
||||
(hl_lines=(?P<quot>"|')(?P<hl_lines>.*?)(?P=quot))?) # optional hl_lines)
|
||||
[ ]*\n # newline (end of opening fence)
|
||||
(?P<code>.*?)(?<=\n) # the code block
|
||||
(?P=fence)[ ]*$ # closing fence
|
||||
'''),
|
||||
re.MULTILINE | re.DOTALL | re.VERBOSE
|
||||
)
|
||||
|
||||
def __init__(self, md):
|
||||
def __init__(self, md, config):
|
||||
super().__init__(md)
|
||||
|
||||
self.checked_for_codehilite = False
|
||||
self.config = config
|
||||
self.checked_for_deps = False
|
||||
self.codehilite_conf = {}
|
||||
self.use_attr_list = False
|
||||
# List of options to convert to bool values
|
||||
self.bool_options = [
|
||||
'linenums',
|
||||
'guess_lang',
|
||||
'noclasses',
|
||||
'use_pygments'
|
||||
]
|
||||
|
||||
def run(self, lines):
|
||||
""" Match and store Fenced Code Blocks in the HtmlStash. """
|
||||
|
||||
# Check for code hilite extension
|
||||
if not self.checked_for_codehilite:
|
||||
# Check for dependent extensions
|
||||
if not self.checked_for_deps:
|
||||
for ext in self.md.registeredExtensions:
|
||||
if isinstance(ext, CodeHiliteExtension):
|
||||
self.codehilite_conf = ext.config
|
||||
break
|
||||
self.codehilite_conf = ext.getConfigs()
|
||||
if isinstance(ext, AttrListExtension):
|
||||
self.use_attr_list = True
|
||||
|
||||
self.checked_for_codehilite = True
|
||||
self.checked_for_deps = True
|
||||
|
||||
text = "\n".join(lines)
|
||||
while 1:
|
||||
m = self.FENCED_BLOCK_RE.search(text)
|
||||
if m:
|
||||
lang = ''
|
||||
if m.group('lang'):
|
||||
lang = self.LANG_TAG % m.group('lang')
|
||||
lang, id, classes, config = None, '', [], {}
|
||||
if m.group('attrs'):
|
||||
id, classes, config = self.handle_attrs(get_attrs(m.group('attrs')))
|
||||
if len(classes):
|
||||
lang = classes.pop(0)
|
||||
else:
|
||||
if m.group('lang'):
|
||||
lang = m.group('lang')
|
||||
if m.group('hl_lines'):
|
||||
# Support hl_lines outside of attrs for backward-compatibility
|
||||
config['hl_lines'] = parse_hl_lines(m.group('hl_lines'))
|
||||
|
||||
# If config is not empty, then the codehighlite extension
|
||||
# is enabled, so we call it to highlight the code
|
||||
if self.codehilite_conf:
|
||||
if self.codehilite_conf and self.codehilite_conf['use_pygments'] and config.get('use_pygments', True):
|
||||
local_config = self.codehilite_conf.copy()
|
||||
local_config.update(config)
|
||||
# Combine classes with cssclass. Ensure cssclass is at end
|
||||
# as pygments appends a suffix under certain circumstances.
|
||||
# Ignore ID as Pygments does not offer an option to set it.
|
||||
if classes:
|
||||
local_config['css_class'] = '{} {}'.format(
|
||||
' '.join(classes),
|
||||
local_config['css_class']
|
||||
)
|
||||
highliter = CodeHilite(
|
||||
m.group('code'),
|
||||
linenums=self.codehilite_conf['linenums'][0],
|
||||
guess_lang=self.codehilite_conf['guess_lang'][0],
|
||||
css_class=self.codehilite_conf['css_class'][0],
|
||||
style=self.codehilite_conf['pygments_style'][0],
|
||||
use_pygments=self.codehilite_conf['use_pygments'][0],
|
||||
lang=(m.group('lang') or None),
|
||||
noclasses=self.codehilite_conf['noclasses'][0],
|
||||
hl_lines=parse_hl_lines(m.group('hl_lines'))
|
||||
lang=lang,
|
||||
style=local_config.pop('pygments_style', 'default'),
|
||||
**local_config
|
||||
)
|
||||
|
||||
code = highliter.hilite()
|
||||
else:
|
||||
code = self.CODE_WRAP % (lang,
|
||||
self._escape(m.group('code')))
|
||||
id_attr = lang_attr = class_attr = kv_pairs = ''
|
||||
if lang:
|
||||
lang_attr = ' class="{}{}"'.format(self.config.get('lang_prefix', 'language-'), lang)
|
||||
if classes:
|
||||
class_attr = ' class="{}"'.format(' '.join(classes))
|
||||
if id:
|
||||
id_attr = ' id="{}"'.format(id)
|
||||
if self.use_attr_list and config and not config.get('use_pygments', False):
|
||||
# Only assign key/value pairs to code element if attr_list ext is enabled, key/value pairs
|
||||
# were defined on the code block, and the `use_pygments` key was not set to True. The
|
||||
# `use_pygments` key could be either set to False or not defined. It is omitted from output.
|
||||
kv_pairs = ' ' + ' '.join(
|
||||
'{k}="{v}"'.format(k=k, v=v) for k, v in config.items() if k != 'use_pygments'
|
||||
)
|
||||
code = '<pre{id}{cls}><code{lang}{kv}>{code}</code></pre>'.format(
|
||||
id=id_attr,
|
||||
cls=class_attr,
|
||||
lang=lang_attr,
|
||||
kv=kv_pairs,
|
||||
code=self._escape(m.group('code'))
|
||||
)
|
||||
|
||||
placeholder = self.md.htmlStash.store(code)
|
||||
text = '{}\n{}\n{}'.format(text[:m.start()],
|
||||
@@ -96,6 +148,24 @@ class FencedBlockPreprocessor(Preprocessor):
|
||||
break
|
||||
return text.split("\n")
|
||||
|
||||
def handle_attrs(self, attrs):
|
||||
""" Return tuple: (id, [list, of, classes], {configs}) """
|
||||
id = ''
|
||||
classes = []
|
||||
configs = {}
|
||||
for k, v in attrs:
|
||||
if k == 'id':
|
||||
id = v
|
||||
elif k == '.':
|
||||
classes.append(v)
|
||||
elif k == 'hl_lines':
|
||||
configs[k] = parse_hl_lines(v)
|
||||
elif k in self.bool_options:
|
||||
configs[k] = parseBoolValue(v, fail_on_errors=False, preserve_none=True)
|
||||
else:
|
||||
configs[k] = v
|
||||
return id, classes, configs
|
||||
|
||||
def _escape(self, txt):
|
||||
""" basic html escaping """
|
||||
txt = txt.replace('&', '&')
|
||||
|
||||
@@ -14,7 +14,7 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
|
||||
"""
|
||||
|
||||
from . import Extension
|
||||
from ..preprocessors import Preprocessor
|
||||
from ..blockprocessors import BlockProcessor
|
||||
from ..inlinepatterns import InlineProcessor
|
||||
from ..treeprocessors import Treeprocessor
|
||||
from ..postprocessors import Postprocessor
|
||||
@@ -26,8 +26,6 @@ import xml.etree.ElementTree as etree
|
||||
|
||||
FN_BACKLINK_TEXT = util.STX + "zz1337820767766393qq" + util.ETX
|
||||
NBSP_PLACEHOLDER = util.STX + "qq3936677670287331zz" + util.ETX
|
||||
DEF_RE = re.compile(r'[ ]{0,3}\[\^([^\]]*)\]:\s*(.*)')
|
||||
TABBED_RE = re.compile(r'((\t)|( ))(.*)')
|
||||
RE_REF_ID = re.compile(r'(fnref)(\d+)')
|
||||
|
||||
|
||||
@@ -72,8 +70,8 @@ class FootnoteExtension(Extension):
|
||||
md.registerExtension(self)
|
||||
self.parser = md.parser
|
||||
self.md = md
|
||||
# Insert a preprocessor before ReferencePreprocessor
|
||||
md.preprocessors.register(FootnotePreprocessor(self), 'footnote', 15)
|
||||
# Insert a blockprocessor before ReferencePreprocessor
|
||||
md.parser.blockprocessors.register(FootnoteBlockProcessor(self), 'footnote', 17)
|
||||
|
||||
# Insert an inline pattern before ImageReferencePattern
|
||||
FOOTNOTE_RE = r'\[\^([^\]]*)\]' # blah blah [^1] blah
|
||||
@@ -202,106 +200,92 @@ class FootnoteExtension(Extension):
|
||||
return div
|
||||
|
||||
|
||||
class FootnotePreprocessor(Preprocessor):
|
||||
class FootnoteBlockProcessor(BlockProcessor):
|
||||
""" Find all footnote references and store for later use. """
|
||||
|
||||
RE = re.compile(r'^[ ]{0,3}\[\^([^\]]*)\]:[ ]*(.*)$', re.MULTILINE)
|
||||
|
||||
def __init__(self, footnotes):
|
||||
super().__init__(footnotes.parser)
|
||||
self.footnotes = footnotes
|
||||
|
||||
def run(self, lines):
|
||||
"""
|
||||
Loop through lines and find, set, and remove footnote definitions.
|
||||
def test(self, parent, block):
|
||||
return True
|
||||
|
||||
Keywords:
|
||||
def run(self, parent, blocks):
|
||||
""" Find, set, and remove footnote definitions. """
|
||||
block = blocks.pop(0)
|
||||
m = self.RE.search(block)
|
||||
if m:
|
||||
id = m.group(1)
|
||||
fn_blocks = [m.group(2)]
|
||||
|
||||
* lines: A list of lines of text
|
||||
|
||||
Return: A list of lines of text with footnote definitions removed.
|
||||
|
||||
"""
|
||||
newlines = []
|
||||
i = 0
|
||||
while True:
|
||||
m = DEF_RE.match(lines[i])
|
||||
if m:
|
||||
fn, _i = self.detectTabbed(lines[i+1:])
|
||||
fn.insert(0, m.group(2))
|
||||
i += _i-1 # skip past footnote
|
||||
footnote = "\n".join(fn)
|
||||
self.footnotes.setFootnote(m.group(1), footnote.rstrip())
|
||||
# Preserve a line for each block to prevent raw HTML indexing issue.
|
||||
# https://github.com/Python-Markdown/markdown/issues/584
|
||||
num_blocks = (len(footnote.split('\n\n')) * 2)
|
||||
newlines.extend([''] * (num_blocks))
|
||||
# Handle rest of block
|
||||
therest = block[m.end():].lstrip('\n')
|
||||
m2 = self.RE.search(therest)
|
||||
if m2:
|
||||
# Another footnote exists in the rest of this block.
|
||||
# Any content before match is continuation of this footnote, which may be lazily indented.
|
||||
before = therest[:m2.start()].rstrip('\n')
|
||||
fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(before)]).lstrip('\n')
|
||||
# Add back to blocks everything from begining of match forward for next iteration.
|
||||
blocks.insert(0, therest[m2.start():])
|
||||
else:
|
||||
newlines.append(lines[i])
|
||||
if len(lines) > i+1:
|
||||
i += 1
|
||||
else:
|
||||
break
|
||||
return newlines
|
||||
# All remaining lines of block are continuation of this footnote, which may be lazily indented.
|
||||
fn_blocks[0] = '\n'.join([fn_blocks[0], self.detab(therest)]).strip('\n')
|
||||
|
||||
def detectTabbed(self, lines):
|
||||
# Check for child elements in remaining blocks.
|
||||
fn_blocks.extend(self.detectTabbed(blocks))
|
||||
|
||||
footnote = "\n\n".join(fn_blocks)
|
||||
self.footnotes.setFootnote(id, footnote.rstrip())
|
||||
|
||||
if block[:m.start()].strip():
|
||||
# Add any content before match back to blocks as separate block
|
||||
blocks.insert(0, block[:m.start()].rstrip('\n'))
|
||||
return True
|
||||
# No match. Restore block.
|
||||
blocks.insert(0, block)
|
||||
return False
|
||||
|
||||
def detectTabbed(self, blocks):
|
||||
""" Find indented text and remove indent before further proccesing.
|
||||
|
||||
Keyword arguments:
|
||||
|
||||
* lines: an array of strings
|
||||
|
||||
Returns: a list of post processed items and the index of last line.
|
||||
|
||||
Returns: a list of blocks with indentation removed.
|
||||
"""
|
||||
items = []
|
||||
blank_line = False # have we encountered a blank line yet?
|
||||
i = 0 # to keep track of where we are
|
||||
|
||||
def detab(line):
|
||||
match = TABBED_RE.match(line)
|
||||
if match:
|
||||
return match.group(4)
|
||||
|
||||
for line in lines:
|
||||
if line.strip(): # Non-blank line
|
||||
detabbed_line = detab(line)
|
||||
if detabbed_line:
|
||||
items.append(detabbed_line)
|
||||
i += 1
|
||||
continue
|
||||
elif not blank_line and not DEF_RE.match(line):
|
||||
# not tabbed but still part of first par.
|
||||
items.append(line)
|
||||
i += 1
|
||||
continue
|
||||
fn_blocks = []
|
||||
while blocks:
|
||||
if blocks[0].startswith(' '*4):
|
||||
block = blocks.pop(0)
|
||||
# Check for new footnotes within this block and split at new footnote.
|
||||
m = self.RE.search(block)
|
||||
if m:
|
||||
# Another footnote exists in this block.
|
||||
# Any content before match is continuation of this footnote, which may be lazily indented.
|
||||
before = block[:m.start()].rstrip('\n')
|
||||
fn_blocks.append(self.detab(before))
|
||||
# Add back to blocks everything from begining of match forward for next iteration.
|
||||
blocks.insert(0, block[m.start():])
|
||||
# End of this footnote.
|
||||
break
|
||||
else:
|
||||
return items, i+1
|
||||
# Entire block is part of this footnote.
|
||||
fn_blocks.append(self.detab(block))
|
||||
else:
|
||||
# End of this footnote.
|
||||
break
|
||||
return fn_blocks
|
||||
|
||||
else: # Blank line: _maybe_ we are done.
|
||||
blank_line = True
|
||||
i += 1 # advance
|
||||
def detab(self, block):
|
||||
""" Remove one level of indent from a block.
|
||||
|
||||
# Find the next non-blank line
|
||||
for j in range(i, len(lines)):
|
||||
if lines[j].strip():
|
||||
next_line = lines[j]
|
||||
break
|
||||
else:
|
||||
# Include extreaneous padding to prevent raw HTML
|
||||
# parsing issue: https://github.com/Python-Markdown/markdown/issues/584
|
||||
items.append("")
|
||||
i += 1
|
||||
else:
|
||||
break # There is no more text; we are done.
|
||||
|
||||
# Check if the next non-blank line is tabbed
|
||||
if detab(next_line): # Yes, more work to do.
|
||||
items.append("")
|
||||
continue
|
||||
else:
|
||||
break # No, we are done.
|
||||
else:
|
||||
i += 1
|
||||
|
||||
return items, i
|
||||
Preserve lazily indented blocks by only removing indent from indented lines.
|
||||
"""
|
||||
lines = block.split('\n')
|
||||
for i, line in enumerate(lines):
|
||||
if line.startswith(' '*4):
|
||||
lines[i] = line[4:]
|
||||
return '\n'.join(lines)
|
||||
|
||||
|
||||
class FootnoteInlineProcessor(InlineProcessor):
|
||||
@@ -347,8 +331,8 @@ class FootnotePostTreeprocessor(Treeprocessor):
|
||||
self.offset += 1
|
||||
# Add all the new duplicate links.
|
||||
el = list(li)[-1]
|
||||
for l in links:
|
||||
el.append(l)
|
||||
for link in links:
|
||||
el.append(link)
|
||||
break
|
||||
|
||||
def get_num_duplicates(self, li):
|
||||
|
||||
@@ -21,7 +21,7 @@ EMPHASIS_RE = r'(_)([^_]+)\1'
|
||||
STRONG_RE = r'(_{2})(.+?)\1'
|
||||
|
||||
# __strong_em___
|
||||
STRONG_EM_RE = r'(_)\1(?!\1)(.+?)\1(?!\1)(.+?)\1{3}'
|
||||
STRONG_EM_RE = r'(_)\1(?!\1)([^_]+?)\1(?!\1)(.+?)\1{3}'
|
||||
|
||||
|
||||
class LegacyUnderscoreProcessor(UnderscoreProcessor):
|
||||
|
||||
@@ -16,68 +16,313 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
|
||||
|
||||
from . import Extension
|
||||
from ..blockprocessors import BlockProcessor
|
||||
from ..preprocessors import Preprocessor
|
||||
from ..postprocessors import RawHtmlPostprocessor
|
||||
from .. import util
|
||||
import re
|
||||
from ..htmlparser import HTMLExtractor
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
|
||||
class MarkdownInHtmlProcessor(BlockProcessor):
|
||||
"""Process Markdown Inside HTML Blocks."""
|
||||
def test(self, parent, block):
|
||||
return block == util.TAG_PLACEHOLDER % \
|
||||
str(self.parser.blockprocessors.tag_counter + 1)
|
||||
class HTMLExtractorExtra(HTMLExtractor):
|
||||
"""
|
||||
Override HTMLExtractor and create etree Elements for any elements which should have content parsed as Markdown.
|
||||
"""
|
||||
|
||||
def _process_nests(self, element, block):
|
||||
"""Process the element's child elements in self.run."""
|
||||
# Build list of indexes of each nest within the parent element.
|
||||
nest_index = [] # a list of tuples: (left index, right index)
|
||||
i = self.parser.blockprocessors.tag_counter + 1
|
||||
while len(self._tag_data) > i and self._tag_data[i]['left_index']:
|
||||
left_child_index = self._tag_data[i]['left_index']
|
||||
right_child_index = self._tag_data[i]['right_index']
|
||||
nest_index.append((left_child_index - 1, right_child_index))
|
||||
i += 1
|
||||
def __init__(self, md, *args, **kwargs):
|
||||
# All block-level tags.
|
||||
self.block_level_tags = set(md.block_level_elements.copy())
|
||||
# Block-level tags in which the content only gets span level parsing
|
||||
self.span_tags = set(
|
||||
['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'td', 'th']
|
||||
)
|
||||
# Block-level tags which never get their content parsed.
|
||||
self.raw_tags = set(['canvas', 'math', 'option', 'pre', 'script', 'style', 'textarea'])
|
||||
# Block-level tags in which the content gets parsed as blocks
|
||||
super().__init__(md, *args, **kwargs)
|
||||
|
||||
# Create each nest subelement.
|
||||
for i, (left_index, right_index) in enumerate(nest_index[:-1]):
|
||||
self.run(element, block[left_index:right_index],
|
||||
block[right_index:nest_index[i + 1][0]], True)
|
||||
self.run(element, block[nest_index[-1][0]:nest_index[-1][1]], # last
|
||||
block[nest_index[-1][1]:], True) # nest
|
||||
self.block_tags = set(self.block_level_tags) - (self.span_tags | self.raw_tags | self.empty_tags)
|
||||
self.span_and_blocks_tags = self.block_tags | self.span_tags
|
||||
|
||||
def run(self, parent, blocks, tail=None, nest=False):
|
||||
self._tag_data = self.parser.md.htmlStash.tag_data
|
||||
def reset(self):
|
||||
"""Reset this instance. Loses all unprocessed data."""
|
||||
self.mdstack = [] # When markdown=1, stack contains a list of tags
|
||||
self.treebuilder = etree.TreeBuilder()
|
||||
self.mdstate = [] # one of 'block', 'span', 'off', or None
|
||||
super().reset()
|
||||
|
||||
self.parser.blockprocessors.tag_counter += 1
|
||||
tag = self._tag_data[self.parser.blockprocessors.tag_counter]
|
||||
def close(self):
|
||||
"""Handle any buffered data."""
|
||||
super().close()
|
||||
# Handle any unclosed tags.
|
||||
if self.mdstack:
|
||||
# Close the outermost parent. handle_endtag will close all unclosed children.
|
||||
self.handle_endtag(self.mdstack[0])
|
||||
|
||||
# Create Element
|
||||
markdown_value = tag['attrs'].pop('markdown')
|
||||
element = etree.SubElement(parent, tag['tag'], tag['attrs'])
|
||||
def get_element(self):
|
||||
""" Return element from treebuilder and reset treebuilder for later use. """
|
||||
element = self.treebuilder.close()
|
||||
self.treebuilder = etree.TreeBuilder()
|
||||
return element
|
||||
|
||||
# Slice Off Block
|
||||
if nest:
|
||||
self.parser.parseBlocks(parent, tail) # Process Tail
|
||||
block = blocks[1:]
|
||||
else: # includes nests since a third level of nesting isn't supported
|
||||
block = blocks[tag['left_index'] + 1: tag['right_index']]
|
||||
del blocks[:tag['right_index']]
|
||||
def get_state(self, tag, attrs):
|
||||
""" Return state from tag and `markdown` attr. One of 'block', 'span', or 'off'. """
|
||||
md_attr = attrs.get('markdown', '0')
|
||||
if md_attr == 'markdown':
|
||||
# `<tag markdown>` is the same as `<tag markdown='1'>`.
|
||||
md_attr = '1'
|
||||
parent_state = self.mdstate[-1] if self.mdstate else None
|
||||
if parent_state == 'off' or (parent_state == 'span' and md_attr != '0'):
|
||||
# Only use the parent state if it is more restrictive than the markdown attribute.
|
||||
md_attr = parent_state
|
||||
if ((md_attr == '1' and tag in self.block_tags) or
|
||||
(md_attr == 'block' and tag in self.span_and_blocks_tags)):
|
||||
return 'block'
|
||||
elif ((md_attr == '1' and tag in self.span_tags) or
|
||||
(md_attr == 'span' and tag in self.span_and_blocks_tags)):
|
||||
return 'span'
|
||||
elif tag in self.block_level_tags:
|
||||
return 'off'
|
||||
else: # pragma: no cover
|
||||
return None
|
||||
|
||||
# Process Text
|
||||
if (self.parser.blockprocessors.contain_span_tags.match( # Span Mode
|
||||
tag['tag']) and markdown_value != 'block') or \
|
||||
markdown_value == 'span':
|
||||
element.text = '\n'.join(block)
|
||||
else: # Block Mode
|
||||
i = self.parser.blockprocessors.tag_counter + 1
|
||||
if len(self._tag_data) > i and self._tag_data[i]['left_index']:
|
||||
first_subelement_index = self._tag_data[i]['left_index'] - 1
|
||||
self.parser.parseBlocks(
|
||||
element, block[:first_subelement_index])
|
||||
if not nest:
|
||||
block = self._process_nests(element, block)
|
||||
def at_line_start(self):
|
||||
"""At line start."""
|
||||
|
||||
value = super().at_line_start()
|
||||
if not value and self.cleandoc and self.cleandoc[-1].endswith('\n'):
|
||||
value = True
|
||||
return value
|
||||
|
||||
def handle_starttag(self, tag, attrs):
|
||||
# Handle tags that should always be empty and do not specify a closing tag
|
||||
if tag in self.empty_tags:
|
||||
attrs = {key: value if value is not None else key for key, value in attrs}
|
||||
if "markdown" in attrs:
|
||||
attrs.pop('markdown')
|
||||
element = etree.Element(tag, attrs)
|
||||
data = etree.tostring(element, encoding='unicode', method='html')
|
||||
else:
|
||||
self.parser.parseBlocks(element, block)
|
||||
data = self.get_starttag_text()
|
||||
self.handle_empty_tag(data, True)
|
||||
return
|
||||
|
||||
if tag in self.block_level_tags:
|
||||
# Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`.
|
||||
# Convert to `{'checked': 'checked'}`.
|
||||
attrs = {key: value if value is not None else key for key, value in attrs}
|
||||
state = self.get_state(tag, attrs)
|
||||
|
||||
if self.inraw or (state in [None, 'off'] and not self.mdstack) or not self.at_line_start():
|
||||
# fall back to default behavior
|
||||
attrs.pop('markdown', None)
|
||||
super().handle_starttag(tag, attrs)
|
||||
else:
|
||||
if 'p' in self.mdstack and tag in self.block_level_tags:
|
||||
# Close unclosed 'p' tag
|
||||
self.handle_endtag('p')
|
||||
self.mdstate.append(state)
|
||||
self.mdstack.append(tag)
|
||||
attrs['markdown'] = state
|
||||
self.treebuilder.start(tag, attrs)
|
||||
else:
|
||||
# Span level tag
|
||||
if self.inraw:
|
||||
super().handle_starttag(tag, attrs)
|
||||
else:
|
||||
text = self.get_starttag_text()
|
||||
if self.mdstate and self.mdstate[-1] == "off":
|
||||
self.handle_data(self.md.htmlStash.store(text))
|
||||
else:
|
||||
self.handle_data(text)
|
||||
|
||||
def handle_endtag(self, tag):
|
||||
if tag in self.block_level_tags:
|
||||
if self.inraw:
|
||||
super().handle_endtag(tag)
|
||||
elif tag in self.mdstack:
|
||||
# Close element and any unclosed children
|
||||
while self.mdstack:
|
||||
item = self.mdstack.pop()
|
||||
self.mdstate.pop()
|
||||
self.treebuilder.end(item)
|
||||
if item == tag:
|
||||
break
|
||||
if not self.mdstack:
|
||||
# Last item in stack is closed. Stash it
|
||||
element = self.get_element()
|
||||
# Get last entry to see if it ends in newlines
|
||||
# If it is an element, assume there is no newlines
|
||||
item = self.cleandoc[-1] if self.cleandoc else ''
|
||||
# If we only have one newline before block element, add another
|
||||
if not item.endswith('\n\n') and item.endswith('\n'):
|
||||
self.cleandoc.append('\n')
|
||||
self.cleandoc.append(self.md.htmlStash.store(element))
|
||||
self.cleandoc.append('\n\n')
|
||||
self.state = []
|
||||
else:
|
||||
# Treat orphan closing tag as a span level tag.
|
||||
text = self.get_endtag_text(tag)
|
||||
if self.mdstate and self.mdstate[-1] == "off":
|
||||
self.handle_data(self.md.htmlStash.store(text))
|
||||
else:
|
||||
self.handle_data(text)
|
||||
else:
|
||||
# Span level tag
|
||||
if self.inraw:
|
||||
super().handle_endtag(tag)
|
||||
else:
|
||||
text = self.get_endtag_text(tag)
|
||||
if self.mdstate and self.mdstate[-1] == "off":
|
||||
self.handle_data(self.md.htmlStash.store(text))
|
||||
else:
|
||||
self.handle_data(text)
|
||||
|
||||
def handle_startendtag(self, tag, attrs):
|
||||
if tag in self.empty_tags:
|
||||
attrs = {key: value if value is not None else key for key, value in attrs}
|
||||
if "markdown" in attrs:
|
||||
attrs.pop('markdown')
|
||||
element = etree.Element(tag, attrs)
|
||||
data = etree.tostring(element, encoding='unicode', method='html')
|
||||
else:
|
||||
data = self.get_starttag_text()
|
||||
else:
|
||||
data = self.get_starttag_text()
|
||||
self.handle_empty_tag(data, is_block=self.md.is_block_level(tag))
|
||||
|
||||
def handle_data(self, data):
|
||||
if self.inraw or not self.mdstack:
|
||||
super().handle_data(data)
|
||||
else:
|
||||
self.treebuilder.data(data)
|
||||
|
||||
def handle_empty_tag(self, data, is_block):
|
||||
if self.inraw or not self.mdstack:
|
||||
super().handle_empty_tag(data, is_block)
|
||||
else:
|
||||
if self.at_line_start() and is_block:
|
||||
self.handle_data('\n' + self.md.htmlStash.store(data) + '\n\n')
|
||||
else:
|
||||
if self.mdstate and self.mdstate[-1] == "off":
|
||||
self.handle_data(self.md.htmlStash.store(data))
|
||||
else:
|
||||
self.handle_data(data)
|
||||
|
||||
|
||||
class HtmlBlockPreprocessor(Preprocessor):
|
||||
"""Remove html blocks from the text and store them for later retrieval."""
|
||||
|
||||
def run(self, lines):
|
||||
source = '\n'.join(lines)
|
||||
parser = HTMLExtractorExtra(self.md)
|
||||
parser.feed(source)
|
||||
parser.close()
|
||||
return ''.join(parser.cleandoc).split('\n')
|
||||
|
||||
|
||||
class MarkdownInHtmlProcessor(BlockProcessor):
|
||||
"""Process Markdown Inside HTML Blocks which have been stored in the HtmlStash."""
|
||||
|
||||
def test(self, parent, block):
|
||||
# ALways return True. `run` will return `False` it not a valid match.
|
||||
return True
|
||||
|
||||
def parse_element_content(self, element):
|
||||
"""
|
||||
Resursively parse the text content of an etree Element as Markdown.
|
||||
|
||||
Any block level elements generated from the Markdown will be inserted as children of the element in place
|
||||
of the text content. All `markdown` attributes are removed. For any elements in which Markdown parsing has
|
||||
been dissabled, the text content of it and its chidlren are wrapped in an `AtomicString`.
|
||||
"""
|
||||
|
||||
md_attr = element.attrib.pop('markdown', 'off')
|
||||
|
||||
if md_attr == 'block':
|
||||
# Parse content as block level
|
||||
# The order in which the different parts are parsed (text, children, tails) is important here as the
|
||||
# order of elements needs to be preserved. We can't be inserting items at a later point in the current
|
||||
# iteration as we don't want to do raw processing on elements created from parsing Markdown text (for
|
||||
# example). Therefore, the order of operations is children, tails, text.
|
||||
|
||||
# Recursively parse existing children from raw HTML
|
||||
for child in list(element):
|
||||
self.parse_element_content(child)
|
||||
|
||||
# Parse Markdown text in tail of children. Do this seperate to avoid raw HTML parsing.
|
||||
# Save the position of each item to be inserted later in reverse.
|
||||
tails = []
|
||||
for pos, child in enumerate(element):
|
||||
if child.tail:
|
||||
block = child.tail.rstrip('\n')
|
||||
child.tail = ''
|
||||
# Use a dummy placeholder element.
|
||||
dummy = etree.Element('div')
|
||||
self.parser.parseBlocks(dummy, block.split('\n\n'))
|
||||
children = list(dummy)
|
||||
children.reverse()
|
||||
tails.append((pos + 1, children))
|
||||
|
||||
# Insert the elements created from the tails in reverse.
|
||||
tails.reverse()
|
||||
for pos, tail in tails:
|
||||
for item in tail:
|
||||
element.insert(pos, item)
|
||||
|
||||
# Parse Markdown text content. Do this last to avoid raw HTML parsing.
|
||||
if element.text:
|
||||
block = element.text.rstrip('\n')
|
||||
element.text = ''
|
||||
# Use a dummy placeholder element as the content needs to get inserted before existing children.
|
||||
dummy = etree.Element('div')
|
||||
self.parser.parseBlocks(dummy, block.split('\n\n'))
|
||||
children = list(dummy)
|
||||
children.reverse()
|
||||
for child in children:
|
||||
element.insert(0, child)
|
||||
|
||||
elif md_attr == 'span':
|
||||
# Span level parsing will be handled by inlineprocessors.
|
||||
# Walk children here to remove any `markdown` attributes.
|
||||
for child in list(element):
|
||||
self.parse_element_content(child)
|
||||
|
||||
else:
|
||||
# Disable inline parsing for everything else
|
||||
if element.text is None:
|
||||
element.text = ''
|
||||
element.text = util.AtomicString(element.text)
|
||||
for child in list(element):
|
||||
self.parse_element_content(child)
|
||||
if child.tail:
|
||||
child.tail = util.AtomicString(child.tail)
|
||||
|
||||
def run(self, parent, blocks):
|
||||
m = util.HTML_PLACEHOLDER_RE.match(blocks[0])
|
||||
if m:
|
||||
index = int(m.group(1))
|
||||
element = self.parser.md.htmlStash.rawHtmlBlocks[index]
|
||||
if isinstance(element, etree.Element):
|
||||
# We have a matched element. Process it.
|
||||
blocks.pop(0)
|
||||
self.parse_element_content(element)
|
||||
parent.append(element)
|
||||
# Cleanup stash. Replace element with empty string to avoid confusing postprocessor.
|
||||
self.parser.md.htmlStash.rawHtmlBlocks.pop(index)
|
||||
self.parser.md.htmlStash.rawHtmlBlocks.insert(index, '')
|
||||
# Comfirm the match to the blockparser.
|
||||
return True
|
||||
# No match found.
|
||||
return False
|
||||
|
||||
|
||||
class MarkdownInHTMLPostprocessor(RawHtmlPostprocessor):
|
||||
def stash_to_string(self, text):
|
||||
""" Override default to handle any etree elements still in the stash. """
|
||||
if isinstance(text, etree.Element):
|
||||
return self.md.serializer(text)
|
||||
else:
|
||||
return str(text)
|
||||
|
||||
|
||||
class MarkdownInHtmlExtension(Extension):
|
||||
@@ -86,14 +331,14 @@ class MarkdownInHtmlExtension(Extension):
|
||||
def extendMarkdown(self, md):
|
||||
""" Register extension instances. """
|
||||
|
||||
# Turn on processing of markdown text within raw html
|
||||
md.preprocessors['html_block'].markdown_in_raw = True
|
||||
# Replace raw HTML preprocessor
|
||||
md.preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20)
|
||||
# Add blockprocessor which handles the placeholders for etree elements
|
||||
md.parser.blockprocessors.register(
|
||||
MarkdownInHtmlProcessor(md.parser), 'markdown_block', 105
|
||||
)
|
||||
md.parser.blockprocessors.tag_counter = -1
|
||||
md.parser.blockprocessors.contain_span_tags = re.compile(
|
||||
r'^(p|h[1-6]|li|dd|dt|td|th|legend|address)$', re.IGNORECASE)
|
||||
# Replace raw HTML postprocessor
|
||||
md.postprocessors.register(MarkdownInHTMLPostprocessor(md), 'raw_html', 30)
|
||||
|
||||
|
||||
def makeExtension(**kwargs): # pragma: no cover
|
||||
|
||||
@@ -15,18 +15,24 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)
|
||||
|
||||
from . import Extension
|
||||
from ..treeprocessors import Treeprocessor
|
||||
from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE
|
||||
from ..util import code_escape, parseBoolValue, AMP_SUBSTITUTE, HTML_PLACEHOLDER_RE, AtomicString
|
||||
from ..postprocessors import UnescapePostprocessor
|
||||
import re
|
||||
import html
|
||||
import unicodedata
|
||||
import xml.etree.ElementTree as etree
|
||||
|
||||
|
||||
def slugify(value, separator):
|
||||
def slugify(value, separator, encoding='ascii'):
|
||||
""" Slugify a string, to make it URL friendly. """
|
||||
value = unicodedata.normalize('NFKD', value).encode('ascii', 'ignore')
|
||||
value = re.sub(r'[^\w\s-]', '', value.decode('ascii')).strip().lower()
|
||||
return re.sub(r'[%s\s]+' % separator, separator, value)
|
||||
value = unicodedata.normalize('NFKD', value).encode(encoding, 'ignore')
|
||||
value = re.sub(r'[^\w\s-]', '', value.decode(encoding)).strip().lower()
|
||||
return re.sub(r'[{}\s]+'.format(separator), separator, value)
|
||||
|
||||
|
||||
def slugify_unicode(value, separator):
|
||||
""" Slugify a string, to make it URL friendly while preserving Unicode characters. """
|
||||
return slugify(value, separator, 'utf-8')
|
||||
|
||||
|
||||
IDCOUNT_RE = re.compile(r'^(.*)_([0-9]+)$')
|
||||
@@ -44,6 +50,18 @@ def unique(id, ids):
|
||||
return id
|
||||
|
||||
|
||||
def get_name(el):
|
||||
"""Get title name."""
|
||||
|
||||
text = []
|
||||
for c in el.itertext():
|
||||
if isinstance(c, AtomicString):
|
||||
text.append(html.unescape(c))
|
||||
else:
|
||||
text.append(c)
|
||||
return ''.join(text).strip()
|
||||
|
||||
|
||||
def stashedHTML2text(text, md, strip_entities=True):
|
||||
""" Extract raw HTML from stash, reduce to plain text and swap with placeholder. """
|
||||
def _html_sub(m):
|
||||
@@ -253,7 +271,7 @@ class TocTreeprocessor(Treeprocessor):
|
||||
self.set_level(el)
|
||||
if int(el.tag[-1]) < self.toc_top or int(el.tag[-1]) > self.toc_bottom:
|
||||
continue
|
||||
text = ''.join(el.itertext()).strip()
|
||||
text = get_name(el)
|
||||
|
||||
# Do not override pre-existing ids
|
||||
if "id" not in el.attrib:
|
||||
|
||||
Reference in New Issue
Block a user