Eliminado venv y www del repositorio, agrege un requirements igual

2020-11-22 21:14:46 -03:00
parent 18cf2d335a
commit 199a1e2a61
820 changed files with 15495 additions and 22017 deletions
--- a/venv/lib/python3.8/site-packages/markdown/extensions/md_in_html.py
+++ b/venv/lib/python3.8/site-packages/markdown/extensions/md_in_html.py
@@ -16,68 +16,313 @@ License: [BSD](https://opensource.org/licenses/bsd-license.php)

 from . import Extension
 from ..blockprocessors import BlockProcessor
+from ..preprocessors import Preprocessor
+from ..postprocessors import RawHtmlPostprocessor
 from .. import util
-import re
+from ..htmlparser import HTMLExtractor
 import xml.etree.ElementTree as etree


-class MarkdownInHtmlProcessor(BlockProcessor):
-    """Process Markdown Inside HTML Blocks."""
-    def test(self, parent, block):
-        return block == util.TAG_PLACEHOLDER % \
-            str(self.parser.blockprocessors.tag_counter + 1)
+class HTMLExtractorExtra(HTMLExtractor):
+    """
+    Override HTMLExtractor and create etree Elements for any elements which should have content parsed as Markdown.
+    """

-    def _process_nests(self, element, block):
-        """Process the element's child elements in self.run."""
-        # Build list of indexes of each nest within the parent element.
-        nest_index = []  # a list of tuples: (left index, right index)
-        i = self.parser.blockprocessors.tag_counter + 1
-        while len(self._tag_data) > i and self._tag_data[i]['left_index']:
-            left_child_index = self._tag_data[i]['left_index']
-            right_child_index = self._tag_data[i]['right_index']
-            nest_index.append((left_child_index - 1, right_child_index))
-            i += 1
+    def __init__(self, md, *args, **kwargs):
+        # All block-level tags.
+        self.block_level_tags = set(md.block_level_elements.copy())
+        # Block-level tags in which the content only gets span level parsing
+        self.span_tags = set(
+            ['address', 'dd', 'dt', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'legend', 'li', 'p', 'td', 'th']
+        )
+        # Block-level tags which never get their content parsed.
+        self.raw_tags = set(['canvas', 'math', 'option', 'pre', 'script', 'style', 'textarea'])
+        # Block-level tags in which the content gets parsed as blocks
+        super().__init__(md, *args, **kwargs)

-        # Create each nest subelement.
-        for i, (left_index, right_index) in enumerate(nest_index[:-1]):
-            self.run(element, block[left_index:right_index],
-                     block[right_index:nest_index[i + 1][0]], True)
-        self.run(element, block[nest_index[-1][0]:nest_index[-1][1]],  # last
-                 block[nest_index[-1][1]:], True)                      # nest
+        self.block_tags = set(self.block_level_tags) - (self.span_tags | self.raw_tags | self.empty_tags)
+        self.span_and_blocks_tags = self.block_tags | self.span_tags

-    def run(self, parent, blocks, tail=None, nest=False):
-        self._tag_data = self.parser.md.htmlStash.tag_data
+    def reset(self):
+        """Reset this instance.  Loses all unprocessed data."""
+        self.mdstack = []  # When markdown=1, stack contains a list of tags
+        self.treebuilder = etree.TreeBuilder()
+        self.mdstate = []  # one of 'block', 'span', 'off', or None
+        super().reset()

-        self.parser.blockprocessors.tag_counter += 1
-        tag = self._tag_data[self.parser.blockprocessors.tag_counter]
+    def close(self):
+        """Handle any buffered data."""
+        super().close()
+        # Handle any unclosed tags.
+        if self.mdstack:
+            # Close the outermost parent. handle_endtag will close all unclosed children.
+            self.handle_endtag(self.mdstack[0])

-        # Create Element
-        markdown_value = tag['attrs'].pop('markdown')
-        element = etree.SubElement(parent, tag['tag'], tag['attrs'])
+    def get_element(self):
+        """ Return element from treebuilder and reset treebuilder for later use. """
+        element = self.treebuilder.close()
+        self.treebuilder = etree.TreeBuilder()
+        return element

-        # Slice Off Block
-        if nest:
-            self.parser.parseBlocks(parent, tail)  # Process Tail
-            block = blocks[1:]
-        else:  # includes nests since a third level of nesting isn't supported
-            block = blocks[tag['left_index'] + 1: tag['right_index']]
-            del blocks[:tag['right_index']]
+    def get_state(self, tag, attrs):
+        """ Return state from tag and `markdown` attr. One of 'block', 'span', or 'off'. """
+        md_attr = attrs.get('markdown', '0')
+        if md_attr == 'markdown':
+            # `<tag markdown>` is the same as `<tag markdown='1'>`.
+            md_attr = '1'
+        parent_state = self.mdstate[-1] if self.mdstate else None
+        if parent_state == 'off' or (parent_state == 'span' and md_attr != '0'):
+            # Only use the parent state if it is more restrictive than the markdown attribute.
+            md_attr = parent_state
+        if ((md_attr == '1' and tag in self.block_tags) or
+                (md_attr == 'block' and tag in self.span_and_blocks_tags)):
+            return 'block'
+        elif ((md_attr == '1' and tag in self.span_tags) or
+              (md_attr == 'span' and tag in self.span_and_blocks_tags)):
+            return 'span'
+        elif tag in self.block_level_tags:
+            return 'off'
+        else:  # pragma: no cover
+            return None

-        # Process Text
-        if (self.parser.blockprocessors.contain_span_tags.match(  # Span Mode
-                tag['tag']) and markdown_value != 'block') or \
-                markdown_value == 'span':
-            element.text = '\n'.join(block)
-        else:                                                     # Block Mode
-            i = self.parser.blockprocessors.tag_counter + 1
-            if len(self._tag_data) > i and self._tag_data[i]['left_index']:
-                first_subelement_index = self._tag_data[i]['left_index'] - 1
-                self.parser.parseBlocks(
-                    element, block[:first_subelement_index])
-                if not nest:
-                    block = self._process_nests(element, block)
+    def at_line_start(self):
+        """At line start."""
+
+        value = super().at_line_start()
+        if not value and self.cleandoc and self.cleandoc[-1].endswith('\n'):
+            value = True
+        return value
+
+    def handle_starttag(self, tag, attrs):
+        # Handle tags that should always be empty and do not specify a closing tag
+        if tag in self.empty_tags:
+            attrs = {key: value if value is not None else key for key, value in attrs}
+            if "markdown" in attrs:
+                attrs.pop('markdown')
+                element = etree.Element(tag, attrs)
+                data = etree.tostring(element, encoding='unicode', method='html')
            else:
-                self.parser.parseBlocks(element, block)
+                data = self.get_starttag_text()
+            self.handle_empty_tag(data, True)
+            return
+
+        if tag in self.block_level_tags:
+            # Valueless attr (ex: `<tag checked>`) results in `[('checked', None)]`.
+            # Convert to `{'checked': 'checked'}`.
+            attrs = {key: value if value is not None else key for key, value in attrs}
+            state = self.get_state(tag, attrs)
+
+            if self.inraw or (state in [None, 'off'] and not self.mdstack) or not self.at_line_start():
+                # fall back to default behavior
+                attrs.pop('markdown', None)
+                super().handle_starttag(tag, attrs)
+            else:
+                if 'p' in self.mdstack and tag in self.block_level_tags:
+                    # Close unclosed 'p' tag
+                    self.handle_endtag('p')
+                self.mdstate.append(state)
+                self.mdstack.append(tag)
+                attrs['markdown'] = state
+                self.treebuilder.start(tag, attrs)
+        else:
+            # Span level tag
+            if self.inraw:
+                super().handle_starttag(tag, attrs)
+            else:
+                text = self.get_starttag_text()
+                if self.mdstate and self.mdstate[-1] == "off":
+                    self.handle_data(self.md.htmlStash.store(text))
+                else:
+                    self.handle_data(text)
+
+    def handle_endtag(self, tag):
+        if tag in self.block_level_tags:
+            if self.inraw:
+                super().handle_endtag(tag)
+            elif tag in self.mdstack:
+                # Close element and any unclosed children
+                while self.mdstack:
+                    item = self.mdstack.pop()
+                    self.mdstate.pop()
+                    self.treebuilder.end(item)
+                    if item == tag:
+                        break
+                if not self.mdstack:
+                    # Last item in stack is closed. Stash it
+                    element = self.get_element()
+                    # Get last entry to see if it ends in newlines
+                    # If it is an element, assume there is no newlines
+                    item = self.cleandoc[-1] if self.cleandoc else ''
+                    # If we only have one newline before block element, add another
+                    if not item.endswith('\n\n') and item.endswith('\n'):
+                        self.cleandoc.append('\n')
+                    self.cleandoc.append(self.md.htmlStash.store(element))
+                    self.cleandoc.append('\n\n')
+                    self.state = []
+            else:
+                # Treat orphan closing tag as a span level tag.
+                text = self.get_endtag_text(tag)
+                if self.mdstate and self.mdstate[-1] == "off":
+                    self.handle_data(self.md.htmlStash.store(text))
+                else:
+                    self.handle_data(text)
+        else:
+            # Span level tag
+            if self.inraw:
+                super().handle_endtag(tag)
+            else:
+                text = self.get_endtag_text(tag)
+                if self.mdstate and self.mdstate[-1] == "off":
+                    self.handle_data(self.md.htmlStash.store(text))
+                else:
+                    self.handle_data(text)
+
+    def handle_startendtag(self, tag, attrs):
+        if tag in self.empty_tags:
+            attrs = {key: value if value is not None else key for key, value in attrs}
+            if "markdown" in attrs:
+                attrs.pop('markdown')
+                element = etree.Element(tag, attrs)
+                data = etree.tostring(element, encoding='unicode', method='html')
+            else:
+                data = self.get_starttag_text()
+        else:
+            data = self.get_starttag_text()
+        self.handle_empty_tag(data, is_block=self.md.is_block_level(tag))
+
+    def handle_data(self, data):
+        if self.inraw or not self.mdstack:
+            super().handle_data(data)
+        else:
+            self.treebuilder.data(data)
+
+    def handle_empty_tag(self, data, is_block):
+        if self.inraw or not self.mdstack:
+            super().handle_empty_tag(data, is_block)
+        else:
+            if self.at_line_start() and is_block:
+                self.handle_data('\n' + self.md.htmlStash.store(data) + '\n\n')
+            else:
+                if self.mdstate and self.mdstate[-1] == "off":
+                    self.handle_data(self.md.htmlStash.store(data))
+                else:
+                    self.handle_data(data)
+
+
+class HtmlBlockPreprocessor(Preprocessor):
+    """Remove html blocks from the text and store them for later retrieval."""
+
+    def run(self, lines):
+        source = '\n'.join(lines)
+        parser = HTMLExtractorExtra(self.md)
+        parser.feed(source)
+        parser.close()
+        return ''.join(parser.cleandoc).split('\n')
+
+
+class MarkdownInHtmlProcessor(BlockProcessor):
+    """Process Markdown Inside HTML Blocks which have been stored in the HtmlStash."""
+
+    def test(self, parent, block):
+        # ALways return True. `run` will return `False` it not a valid match.
+        return True
+
+    def parse_element_content(self, element):
+        """
+        Resursively parse the text content of an etree Element as Markdown.
+
+        Any block level elements generated from the Markdown will be inserted as children of the element in place
+        of the text content. All `markdown` attributes are removed. For any elements in which Markdown parsing has
+        been dissabled, the text content of it and its chidlren are wrapped in an `AtomicString`.
+        """
+
+        md_attr = element.attrib.pop('markdown', 'off')
+
+        if md_attr == 'block':
+            # Parse content as block level
+            # The order in which the different parts are parsed (text, children, tails) is important here as the
+            # order of elements needs to be preserved. We can't be inserting items at a later point in the current
+            # iteration as we don't want to do raw processing on elements created from parsing Markdown text (for
+            # example). Therefore, the order of operations is children, tails, text.
+
+            # Recursively parse existing children from raw HTML
+            for child in list(element):
+                self.parse_element_content(child)
+
+            # Parse Markdown text in tail of children. Do this seperate to avoid raw HTML parsing.
+            # Save the position of each item to be inserted later in reverse.
+            tails = []
+            for pos, child in enumerate(element):
+                if child.tail:
+                    block = child.tail.rstrip('\n')
+                    child.tail = ''
+                    # Use a dummy placeholder element.
+                    dummy = etree.Element('div')
+                    self.parser.parseBlocks(dummy, block.split('\n\n'))
+                    children = list(dummy)
+                    children.reverse()
+                    tails.append((pos + 1, children))
+
+            # Insert the elements created from the tails in reverse.
+            tails.reverse()
+            for pos, tail in tails:
+                for item in tail:
+                    element.insert(pos, item)
+
+            # Parse Markdown text content. Do this last to avoid raw HTML parsing.
+            if element.text:
+                block = element.text.rstrip('\n')
+                element.text = ''
+                # Use a dummy placeholder element as the content needs to get inserted before existing children.
+                dummy = etree.Element('div')
+                self.parser.parseBlocks(dummy, block.split('\n\n'))
+                children = list(dummy)
+                children.reverse()
+                for child in children:
+                    element.insert(0, child)
+
+        elif md_attr == 'span':
+            # Span level parsing will be handled by inlineprocessors.
+            # Walk children here to remove any `markdown` attributes.
+            for child in list(element):
+                self.parse_element_content(child)
+
+        else:
+            # Disable inline parsing for everything else
+            if element.text is None:
+                element.text = ''
+            element.text = util.AtomicString(element.text)
+            for child in list(element):
+                self.parse_element_content(child)
+                if child.tail:
+                    child.tail = util.AtomicString(child.tail)
+
+    def run(self, parent, blocks):
+        m = util.HTML_PLACEHOLDER_RE.match(blocks[0])
+        if m:
+            index = int(m.group(1))
+            element = self.parser.md.htmlStash.rawHtmlBlocks[index]
+            if isinstance(element, etree.Element):
+                # We have a matched element. Process it.
+                blocks.pop(0)
+                self.parse_element_content(element)
+                parent.append(element)
+                # Cleanup stash. Replace element with empty string to avoid confusing postprocessor.
+                self.parser.md.htmlStash.rawHtmlBlocks.pop(index)
+                self.parser.md.htmlStash.rawHtmlBlocks.insert(index, '')
+                # Comfirm the match to the blockparser.
+                return True
+        # No match found.
+        return False
+
+
+class MarkdownInHTMLPostprocessor(RawHtmlPostprocessor):
+    def stash_to_string(self, text):
+        """ Override default to handle any etree elements still in the stash. """
+        if isinstance(text, etree.Element):
+            return self.md.serializer(text)
+        else:
+            return str(text)


 class MarkdownInHtmlExtension(Extension):
@@ -86,14 +331,14 @@ class MarkdownInHtmlExtension(Extension):
    def extendMarkdown(self, md):
        """ Register extension instances. """

-        # Turn on processing of markdown text within raw html
-        md.preprocessors['html_block'].markdown_in_raw = True
+        # Replace raw HTML preprocessor
+        md.preprocessors.register(HtmlBlockPreprocessor(md), 'html_block', 20)
+        # Add blockprocessor which handles the placeholders for etree elements
        md.parser.blockprocessors.register(
            MarkdownInHtmlProcessor(md.parser), 'markdown_block', 105
        )
-        md.parser.blockprocessors.tag_counter = -1
-        md.parser.blockprocessors.contain_span_tags = re.compile(
-            r'^(p|h[1-6]|li|dd|dt|td|th|legend|address)$', re.IGNORECASE)
+        # Replace raw HTML postprocessor
+        md.postprocessors.register(MarkdownInHTMLPostprocessor(md), 'raw_html', 30)


 def makeExtension(**kwargs):  # pragma: no cover