Eliminado venv y www del repositorio, agrege un requirements igual
This commit is contained in:
@@ -32,4 +32,4 @@ __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
|
||||
|
||||
# this has to be at the top level, see how setup.py parses this
|
||||
#: Distribution version number.
|
||||
__version__ = "1.0.1"
|
||||
__version__ = "1.1"
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -136,6 +136,7 @@ def normaliseCharList(charList):
|
||||
i += j
|
||||
return rv
|
||||
|
||||
|
||||
# We don't really support characters above the BMP :(
|
||||
max_unicode = int("FFFF", 16)
|
||||
|
||||
@@ -254,7 +255,7 @@ class InfosetFilter(object):
|
||||
nameRest = name[1:]
|
||||
m = nonXmlNameFirstBMPRegexp.match(nameFirst)
|
||||
if m:
|
||||
warnings.warn("Coercing non-XML name", DataLossWarning)
|
||||
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
|
||||
nameFirstOutput = self.getReplacementCharacter(nameFirst)
|
||||
else:
|
||||
nameFirstOutput = nameFirst
|
||||
@@ -262,7 +263,7 @@ class InfosetFilter(object):
|
||||
nameRestOutput = nameRest
|
||||
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
|
||||
for char in replaceChars:
|
||||
warnings.warn("Coercing non-XML name", DataLossWarning)
|
||||
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
|
||||
replacement = self.getReplacementCharacter(char)
|
||||
nameRestOutput = nameRestOutput.replace(char, replacement)
|
||||
return nameFirstOutput + nameRestOutput
|
||||
|
||||
@@ -1,10 +1,11 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from pip._vendor.six import text_type, binary_type
|
||||
from pip._vendor.six import text_type
|
||||
from pip._vendor.six.moves import http_client, urllib
|
||||
|
||||
import codecs
|
||||
import re
|
||||
from io import BytesIO, StringIO
|
||||
|
||||
from pip._vendor import webencodings
|
||||
|
||||
@@ -12,13 +13,6 @@ from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
|
||||
from .constants import _ReparseException
|
||||
from . import _utils
|
||||
|
||||
from io import StringIO
|
||||
|
||||
try:
|
||||
from io import BytesIO
|
||||
except ImportError:
|
||||
BytesIO = StringIO
|
||||
|
||||
# Non-unicode versions of constants for use in the pre-parser
|
||||
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
|
||||
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
|
||||
@@ -40,13 +34,13 @@ if _utils.supports_lone_surrogates:
|
||||
else:
|
||||
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
|
||||
|
||||
non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
||||
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
|
||||
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
|
||||
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
|
||||
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
|
||||
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
|
||||
0x10FFFE, 0x10FFFF])
|
||||
non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
|
||||
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
|
||||
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
|
||||
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
|
||||
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
|
||||
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
|
||||
0x10FFFE, 0x10FFFF}
|
||||
|
||||
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
|
||||
|
||||
@@ -367,7 +361,7 @@ class HTMLUnicodeInputStream(object):
|
||||
def unget(self, char):
|
||||
# Only one character is allowed to be ungotten at once - it must
|
||||
# be consumed again before any further call to unget
|
||||
if char is not None:
|
||||
if char is not EOF:
|
||||
if self.chunkOffset == 0:
|
||||
# unget is called quite rarely, so it's a good idea to do
|
||||
# more work here if it saves a bit of work in the frequently
|
||||
@@ -449,7 +443,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
|
||||
try:
|
||||
stream.seek(stream.tell())
|
||||
except: # pylint:disable=bare-except
|
||||
except Exception:
|
||||
stream = BufferedStream(stream)
|
||||
|
||||
return stream
|
||||
@@ -461,7 +455,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
|
||||
# If we've been overriden, we've been overriden
|
||||
# If we've been overridden, we've been overridden
|
||||
charEncoding = lookupEncoding(self.override_encoding), "certain"
|
||||
if charEncoding[0] is not None:
|
||||
return charEncoding
|
||||
@@ -664,9 +658,7 @@ class EncodingBytes(bytes):
|
||||
"""Look for a sequence of bytes at the start of a string. If the bytes
|
||||
are found return True and advance the position to the byte after the
|
||||
match. Otherwise return False and leave the position alone"""
|
||||
p = self.position
|
||||
data = self[p:p + len(bytes)]
|
||||
rv = data.startswith(bytes)
|
||||
rv = self.startswith(bytes, self.position)
|
||||
if rv:
|
||||
self.position += len(bytes)
|
||||
return rv
|
||||
@@ -674,15 +666,11 @@ class EncodingBytes(bytes):
|
||||
def jumpTo(self, bytes):
|
||||
"""Look for the next sequence of bytes matching a given sequence. If
|
||||
a match is found advance the position to the last byte of the match"""
|
||||
newPosition = self[self.position:].find(bytes)
|
||||
if newPosition > -1:
|
||||
# XXX: This is ugly, but I can't see a nicer way to fix this.
|
||||
if self._position == -1:
|
||||
self._position = 0
|
||||
self._position += (newPosition + len(bytes) - 1)
|
||||
return True
|
||||
else:
|
||||
try:
|
||||
self._position = self.index(bytes, self.position) + len(bytes) - 1
|
||||
except ValueError:
|
||||
raise StopIteration
|
||||
return True
|
||||
|
||||
|
||||
class EncodingParser(object):
|
||||
@@ -694,6 +682,9 @@ class EncodingParser(object):
|
||||
self.encoding = None
|
||||
|
||||
def getEncoding(self):
|
||||
if b"<meta" not in self.data:
|
||||
return None
|
||||
|
||||
methodDispatch = (
|
||||
(b"<!--", self.handleComment),
|
||||
(b"<meta", self.handleMeta),
|
||||
@@ -703,6 +694,10 @@ class EncodingParser(object):
|
||||
(b"<", self.handlePossibleStartTag))
|
||||
for _ in self.data:
|
||||
keepParsing = True
|
||||
try:
|
||||
self.data.jumpTo(b"<")
|
||||
except StopIteration:
|
||||
break
|
||||
for key, method in methodDispatch:
|
||||
if self.data.matchBytes(key):
|
||||
try:
|
||||
@@ -908,7 +903,7 @@ class ContentAttrParser(object):
|
||||
def lookupEncoding(encoding):
|
||||
"""Return the python codec name corresponding to an encoding or None if the
|
||||
string doesn't correspond to a valid encoding."""
|
||||
if isinstance(encoding, binary_type):
|
||||
if isinstance(encoding, bytes):
|
||||
try:
|
||||
encoding = encoding.decode("ascii")
|
||||
except UnicodeDecodeError:
|
||||
|
||||
@@ -2,7 +2,8 @@ from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from pip._vendor.six import unichr as chr
|
||||
|
||||
from collections import deque
|
||||
from collections import deque, OrderedDict
|
||||
from sys import version_info
|
||||
|
||||
from .constants import spaceCharacters
|
||||
from .constants import entities
|
||||
@@ -17,6 +18,11 @@ from ._trie import Trie
|
||||
|
||||
entitiesTrie = Trie(entities)
|
||||
|
||||
if version_info >= (3, 7):
|
||||
attributeMap = dict
|
||||
else:
|
||||
attributeMap = OrderedDict
|
||||
|
||||
|
||||
class HTMLTokenizer(object):
|
||||
""" This class takes care of tokenizing HTML.
|
||||
@@ -228,6 +234,14 @@ class HTMLTokenizer(object):
|
||||
# Add token to the queue to be yielded
|
||||
if (token["type"] in tagTokenTypes):
|
||||
token["name"] = token["name"].translate(asciiUpper2Lower)
|
||||
if token["type"] == tokenTypes["StartTag"]:
|
||||
raw = token["data"]
|
||||
data = attributeMap(raw)
|
||||
if len(raw) > len(data):
|
||||
# we had some duplicated attribute, fix so first wins
|
||||
data.update(raw[::-1])
|
||||
token["data"] = data
|
||||
|
||||
if token["type"] == tokenTypes["EndTag"]:
|
||||
if token["data"]:
|
||||
self.tokenQueue.append({"type": tokenTypes["ParseError"],
|
||||
|
||||
@@ -1,14 +1,5 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from .py import Trie as PyTrie
|
||||
from .py import Trie
|
||||
|
||||
Trie = PyTrie
|
||||
|
||||
# pylint:disable=wrong-import-position
|
||||
try:
|
||||
from .datrie import Trie as DATrie
|
||||
except ImportError:
|
||||
pass
|
||||
else:
|
||||
Trie = DATrie
|
||||
# pylint:enable=wrong-import-position
|
||||
__all__ = ["Trie"]
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,44 +0,0 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from datrie import Trie as DATrie
|
||||
from pip._vendor.six import text_type
|
||||
|
||||
from ._base import Trie as ABCTrie
|
||||
|
||||
|
||||
class Trie(ABCTrie):
|
||||
def __init__(self, data):
|
||||
chars = set()
|
||||
for key in data.keys():
|
||||
if not isinstance(key, text_type):
|
||||
raise TypeError("All keys must be strings")
|
||||
for char in key:
|
||||
chars.add(char)
|
||||
|
||||
self._data = DATrie("".join(chars))
|
||||
for key, value in data.items():
|
||||
self._data[key] = value
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self._data
|
||||
|
||||
def __len__(self):
|
||||
return len(self._data)
|
||||
|
||||
def __iter__(self):
|
||||
raise NotImplementedError()
|
||||
|
||||
def __getitem__(self, key):
|
||||
return self._data[key]
|
||||
|
||||
def keys(self, prefix=None):
|
||||
return self._data.keys(prefix)
|
||||
|
||||
def has_keys_with_prefix(self, prefix):
|
||||
return self._data.has_keys_with_prefix(prefix)
|
||||
|
||||
def longest_prefix(self, prefix):
|
||||
return self._data.longest_prefix(prefix)
|
||||
|
||||
def longest_prefix_item(self, prefix):
|
||||
return self._data.longest_prefix_item(prefix)
|
||||
@@ -2,12 +2,20 @@ from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
from types import ModuleType
|
||||
|
||||
from pip._vendor.six import text_type
|
||||
|
||||
try:
|
||||
import xml.etree.cElementTree as default_etree
|
||||
from collections.abc import Mapping
|
||||
except ImportError:
|
||||
from collections import Mapping
|
||||
|
||||
from pip._vendor.six import text_type, PY3
|
||||
|
||||
if PY3:
|
||||
import xml.etree.ElementTree as default_etree
|
||||
else:
|
||||
try:
|
||||
import xml.etree.cElementTree as default_etree
|
||||
except ImportError:
|
||||
import xml.etree.ElementTree as default_etree
|
||||
|
||||
|
||||
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
|
||||
@@ -27,7 +35,7 @@ try:
|
||||
# We need this with u"" because of http://bugs.jython.org/issue2039
|
||||
_x = eval('u"\\uD800"') # pylint:disable=eval-used
|
||||
assert isinstance(_x, text_type)
|
||||
except: # pylint:disable=bare-except
|
||||
except Exception:
|
||||
supports_lone_surrogates = False
|
||||
else:
|
||||
supports_lone_surrogates = True
|
||||
@@ -47,9 +55,6 @@ class MethodDispatcher(dict):
|
||||
"""
|
||||
|
||||
def __init__(self, items=()):
|
||||
# Using _dictEntries instead of directly assigning to self is about
|
||||
# twice as fast. Please do careful performance testing before changing
|
||||
# anything here.
|
||||
_dictEntries = []
|
||||
for name, value in items:
|
||||
if isinstance(name, (list, tuple, frozenset, set)):
|
||||
@@ -64,6 +69,36 @@ class MethodDispatcher(dict):
|
||||
def __getitem__(self, key):
|
||||
return dict.get(self, key, self.default)
|
||||
|
||||
def __get__(self, instance, owner=None):
|
||||
return BoundMethodDispatcher(instance, self)
|
||||
|
||||
|
||||
class BoundMethodDispatcher(Mapping):
|
||||
"""Wraps a MethodDispatcher, binding its return values to `instance`"""
|
||||
def __init__(self, instance, dispatcher):
|
||||
self.instance = instance
|
||||
self.dispatcher = dispatcher
|
||||
|
||||
def __getitem__(self, key):
|
||||
# see https://docs.python.org/3/reference/datamodel.html#object.__get__
|
||||
# on a function, __get__ is used to bind a function to an instance as a bound method
|
||||
return self.dispatcher[key].__get__(self.instance)
|
||||
|
||||
def get(self, key, default):
|
||||
if key in self.dispatcher:
|
||||
return self[key]
|
||||
else:
|
||||
return default
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self.dispatcher)
|
||||
|
||||
def __len__(self):
|
||||
return len(self.dispatcher)
|
||||
|
||||
def __contains__(self, key):
|
||||
return key in self.dispatcher
|
||||
|
||||
|
||||
# Some utility functions to deal with weirdness around UCS2 vs UCS4
|
||||
# python builds
|
||||
|
||||
@@ -519,8 +519,8 @@ adjustForeignAttributes = {
|
||||
"xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
|
||||
}
|
||||
|
||||
unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
|
||||
adjustForeignAttributes.items()])
|
||||
unadjustForeignAttributes = {(ns, local): qname for qname, (prefix, local, ns) in
|
||||
adjustForeignAttributes.items()}
|
||||
|
||||
spaceCharacters = frozenset([
|
||||
"\t",
|
||||
@@ -544,8 +544,7 @@ asciiLetters = frozenset(string.ascii_letters)
|
||||
digits = frozenset(string.digits)
|
||||
hexDigits = frozenset(string.hexdigits)
|
||||
|
||||
asciiUpper2Lower = dict([(ord(c), ord(c.lower()))
|
||||
for c in string.ascii_uppercase])
|
||||
asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase}
|
||||
|
||||
# Heading elements need to be ordered
|
||||
headingElements = (
|
||||
@@ -2934,7 +2933,7 @@ tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
|
||||
tokenTypes["EmptyTag"]])
|
||||
|
||||
|
||||
prefixes = dict([(v, k) for k, v in namespaces.items()])
|
||||
prefixes = {v: k for k, v in namespaces.items()}
|
||||
prefixes["http://www.w3.org/1998/Math/MathML"] = "math"
|
||||
|
||||
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -1,6 +1,15 @@
|
||||
"""Deprecated from html5lib 1.1.
|
||||
|
||||
See `here <https://github.com/html5lib/html5lib-python/issues/443>`_ for
|
||||
information about its deprecation; `Bleach <https://github.com/mozilla/bleach>`_
|
||||
is recommended as a replacement. Please let us know in the aforementioned issue
|
||||
if Bleach is unsuitable for your needs.
|
||||
|
||||
"""
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
import re
|
||||
import warnings
|
||||
from xml.sax.saxutils import escape, unescape
|
||||
|
||||
from pip._vendor.six.moves import urllib_parse as urlparse
|
||||
@@ -11,6 +20,14 @@ from ..constants import namespaces, prefixes
|
||||
__all__ = ["Filter"]
|
||||
|
||||
|
||||
_deprecation_msg = (
|
||||
"html5lib's sanitizer is deprecated; see " +
|
||||
"https://github.com/html5lib/html5lib-python/issues/443 and please let " +
|
||||
"us know if Bleach is unsuitable for your needs"
|
||||
)
|
||||
|
||||
warnings.warn(_deprecation_msg, DeprecationWarning)
|
||||
|
||||
allowed_elements = frozenset((
|
||||
(namespaces['html'], 'a'),
|
||||
(namespaces['html'], 'abbr'),
|
||||
@@ -750,6 +767,9 @@ class Filter(base.Filter):
|
||||
|
||||
"""
|
||||
super(Filter, self).__init__(source)
|
||||
|
||||
warnings.warn(_deprecation_msg, DeprecationWarning)
|
||||
|
||||
self.allowed_elements = allowed_elements
|
||||
self.allowed_attributes = allowed_attributes
|
||||
self.allowed_css_properties = allowed_css_properties
|
||||
|
||||
@@ -2,7 +2,6 @@ from __future__ import absolute_import, division, unicode_literals
|
||||
from pip._vendor.six import with_metaclass, viewkeys
|
||||
|
||||
import types
|
||||
from collections import OrderedDict
|
||||
|
||||
from . import _inputstream
|
||||
from . import _tokenizer
|
||||
@@ -119,8 +118,8 @@ class HTMLParser(object):
|
||||
self.tree = tree(namespaceHTMLElements)
|
||||
self.errors = []
|
||||
|
||||
self.phases = dict([(name, cls(self, self.tree)) for name, cls in
|
||||
getPhases(debug).items()])
|
||||
self.phases = {name: cls(self, self.tree) for name, cls in
|
||||
getPhases(debug).items()}
|
||||
|
||||
def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):
|
||||
|
||||
@@ -202,7 +201,7 @@ class HTMLParser(object):
|
||||
DoctypeToken = tokenTypes["Doctype"]
|
||||
ParseErrorToken = tokenTypes["ParseError"]
|
||||
|
||||
for token in self.normalizedTokens():
|
||||
for token in self.tokenizer:
|
||||
prev_token = None
|
||||
new_token = token
|
||||
while new_token is not None:
|
||||
@@ -260,10 +259,6 @@ class HTMLParser(object):
|
||||
if reprocess:
|
||||
assert self.phase not in phases
|
||||
|
||||
def normalizedTokens(self):
|
||||
for token in self.tokenizer:
|
||||
yield self.normalizeToken(token)
|
||||
|
||||
def parse(self, stream, *args, **kwargs):
|
||||
"""Parse a HTML document into a well-formed tree
|
||||
|
||||
@@ -325,17 +320,6 @@ class HTMLParser(object):
|
||||
if self.strict:
|
||||
raise ParseError(E[errorcode] % datavars)
|
||||
|
||||
def normalizeToken(self, token):
|
||||
# HTML5 specific normalizations to the token stream
|
||||
if token["type"] == tokenTypes["StartTag"]:
|
||||
raw = token["data"]
|
||||
token["data"] = OrderedDict(raw)
|
||||
if len(raw) > len(token["data"]):
|
||||
# we had some duplicated attribute, fix so first wins
|
||||
token["data"].update(raw[::-1])
|
||||
|
||||
return token
|
||||
|
||||
def adjustMathMLAttributes(self, token):
|
||||
adjust_attributes(token, adjustMathMLAttributes)
|
||||
|
||||
@@ -413,16 +397,12 @@ class HTMLParser(object):
|
||||
def getPhases(debug):
|
||||
def log(function):
|
||||
"""Logger that records which phase processes each token"""
|
||||
type_names = dict((value, key) for key, value in
|
||||
tokenTypes.items())
|
||||
type_names = {value: key for key, value in tokenTypes.items()}
|
||||
|
||||
def wrapped(self, *args, **kwargs):
|
||||
if function.__name__.startswith("process") and len(args) > 0:
|
||||
token = args[0]
|
||||
try:
|
||||
info = {"type": type_names[token['type']]}
|
||||
except:
|
||||
raise
|
||||
info = {"type": type_names[token['type']]}
|
||||
if token['type'] in tagTokenTypes:
|
||||
info["name"] = token['name']
|
||||
|
||||
@@ -446,10 +426,13 @@ def getPhases(debug):
|
||||
class Phase(with_metaclass(getMetaclass(debug, log))):
|
||||
"""Base class for helper object that implements each phase of processing
|
||||
"""
|
||||
__slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
|
||||
|
||||
def __init__(self, parser, tree):
|
||||
self.parser = parser
|
||||
self.tree = tree
|
||||
self.__startTagCache = {}
|
||||
self.__endTagCache = {}
|
||||
|
||||
def processEOF(self):
|
||||
raise NotImplementedError
|
||||
@@ -469,7 +452,21 @@ def getPhases(debug):
|
||||
self.tree.insertText(token["data"])
|
||||
|
||||
def processStartTag(self, token):
|
||||
return self.startTagHandler[token["name"]](token)
|
||||
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
|
||||
# requires a circular reference to the Phase, and this ends up with a significant
|
||||
# (CPython 2.7, 3.8) GC cost when parsing many short inputs
|
||||
name = token["name"]
|
||||
# In Py2, using `in` is quicker in general than try/except KeyError
|
||||
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
|
||||
if name in self.__startTagCache:
|
||||
func = self.__startTagCache[name]
|
||||
else:
|
||||
func = self.__startTagCache[name] = self.startTagHandler[name]
|
||||
# bound the cache size in case we get loads of unknown tags
|
||||
while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:
|
||||
# this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
|
||||
self.__startTagCache.pop(next(iter(self.__startTagCache)))
|
||||
return func(token)
|
||||
|
||||
def startTagHtml(self, token):
|
||||
if not self.parser.firstStartTag and token["name"] == "html":
|
||||
@@ -482,9 +479,25 @@ def getPhases(debug):
|
||||
self.parser.firstStartTag = False
|
||||
|
||||
def processEndTag(self, token):
|
||||
return self.endTagHandler[token["name"]](token)
|
||||
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
|
||||
# requires a circular reference to the Phase, and this ends up with a significant
|
||||
# (CPython 2.7, 3.8) GC cost when parsing many short inputs
|
||||
name = token["name"]
|
||||
# In Py2, using `in` is quicker in general than try/except KeyError
|
||||
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
|
||||
if name in self.__endTagCache:
|
||||
func = self.__endTagCache[name]
|
||||
else:
|
||||
func = self.__endTagCache[name] = self.endTagHandler[name]
|
||||
# bound the cache size in case we get loads of unknown tags
|
||||
while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:
|
||||
# this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
|
||||
self.__endTagCache.pop(next(iter(self.__endTagCache)))
|
||||
return func(token)
|
||||
|
||||
class InitialPhase(Phase):
|
||||
__slots__ = tuple()
|
||||
|
||||
def processSpaceCharacters(self, token):
|
||||
pass
|
||||
|
||||
@@ -613,6 +626,8 @@ def getPhases(debug):
|
||||
return True
|
||||
|
||||
class BeforeHtmlPhase(Phase):
|
||||
__slots__ = tuple()
|
||||
|
||||
# helper methods
|
||||
def insertHtmlElement(self):
|
||||
self.tree.insertRoot(impliedTagToken("html", "StartTag"))
|
||||
@@ -648,19 +663,7 @@ def getPhases(debug):
|
||||
return token
|
||||
|
||||
class BeforeHeadPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("head", self.startTagHead)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
(("head", "body", "html", "br"), self.endTagImplyHead)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
self.startTagHead(impliedTagToken("head", "StartTag"))
|
||||
@@ -693,28 +696,19 @@ def getPhases(debug):
|
||||
self.parser.parseError("end-tag-after-implied-root",
|
||||
{"name": token["name"]})
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml),
|
||||
("head", startTagHead)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
(("head", "body", "html", "br"), endTagImplyHead)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InHeadPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("title", self.startTagTitle),
|
||||
(("noframes", "style"), self.startTagNoFramesStyle),
|
||||
("noscript", self.startTagNoscript),
|
||||
("script", self.startTagScript),
|
||||
(("base", "basefont", "bgsound", "command", "link"),
|
||||
self.startTagBaseLinkCommand),
|
||||
("meta", self.startTagMeta),
|
||||
("head", self.startTagHead)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("head", self.endTagHead),
|
||||
(("br", "html", "body"), self.endTagHtmlBodyBr)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
# the real thing
|
||||
def processEOF(self):
|
||||
@@ -796,22 +790,27 @@ def getPhases(debug):
|
||||
def anythingElse(self):
|
||||
self.endTagHead(impliedTagToken("head"))
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml),
|
||||
("title", startTagTitle),
|
||||
(("noframes", "style"), startTagNoFramesStyle),
|
||||
("noscript", startTagNoscript),
|
||||
("script", startTagScript),
|
||||
(("base", "basefont", "bgsound", "command", "link"),
|
||||
startTagBaseLinkCommand),
|
||||
("meta", startTagMeta),
|
||||
("head", startTagHead)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("head", endTagHead),
|
||||
(("br", "html", "body"), endTagHtmlBodyBr)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InHeadNoscriptPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
(("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand),
|
||||
(("head", "noscript"), self.startTagHeadNoscript),
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("noscript", self.endTagNoscript),
|
||||
("br", self.endTagBr),
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
self.parser.parseError("eof-in-head-noscript")
|
||||
@@ -860,23 +859,21 @@ def getPhases(debug):
|
||||
# Caller must raise parse error first!
|
||||
self.endTagNoscript(impliedTagToken("noscript"))
|
||||
|
||||
class AfterHeadPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml),
|
||||
(("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand),
|
||||
(("head", "noscript"), startTagHeadNoscript),
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("body", self.startTagBody),
|
||||
("frameset", self.startTagFrameset),
|
||||
(("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
|
||||
"style", "title"),
|
||||
self.startTagFromHead),
|
||||
("head", self.startTagHead)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
|
||||
self.endTagHtmlBodyBr)])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("noscript", endTagNoscript),
|
||||
("br", endTagBr),
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class AfterHeadPhase(Phase):
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
self.anythingElse()
|
||||
@@ -927,80 +924,30 @@ def getPhases(debug):
|
||||
self.parser.phase = self.parser.phases["inBody"]
|
||||
self.parser.framesetOK = True
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml),
|
||||
("body", startTagBody),
|
||||
("frameset", startTagFrameset),
|
||||
(("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
|
||||
"style", "title"),
|
||||
startTagFromHead),
|
||||
("head", startTagHead)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
|
||||
endTagHtmlBodyBr)])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InBodyPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
|
||||
# the really-really-really-very crazy mode
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
__slots__ = ("processSpaceCharacters",)
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(InBodyPhase, self).__init__(*args, **kwargs)
|
||||
# Set this to the default handler
|
||||
self.processSpaceCharacters = self.processSpaceCharactersNonPre
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
(("base", "basefont", "bgsound", "command", "link", "meta",
|
||||
"script", "style", "title"),
|
||||
self.startTagProcessInHead),
|
||||
("body", self.startTagBody),
|
||||
("frameset", self.startTagFrameset),
|
||||
(("address", "article", "aside", "blockquote", "center", "details",
|
||||
"dir", "div", "dl", "fieldset", "figcaption", "figure",
|
||||
"footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
|
||||
"section", "summary", "ul"),
|
||||
self.startTagCloseP),
|
||||
(headingElements, self.startTagHeading),
|
||||
(("pre", "listing"), self.startTagPreListing),
|
||||
("form", self.startTagForm),
|
||||
(("li", "dd", "dt"), self.startTagListItem),
|
||||
("plaintext", self.startTagPlaintext),
|
||||
("a", self.startTagA),
|
||||
(("b", "big", "code", "em", "font", "i", "s", "small", "strike",
|
||||
"strong", "tt", "u"), self.startTagFormatting),
|
||||
("nobr", self.startTagNobr),
|
||||
("button", self.startTagButton),
|
||||
(("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
|
||||
("xmp", self.startTagXmp),
|
||||
("table", self.startTagTable),
|
||||
(("area", "br", "embed", "img", "keygen", "wbr"),
|
||||
self.startTagVoidFormatting),
|
||||
(("param", "source", "track"), self.startTagParamSource),
|
||||
("input", self.startTagInput),
|
||||
("hr", self.startTagHr),
|
||||
("image", self.startTagImage),
|
||||
("isindex", self.startTagIsIndex),
|
||||
("textarea", self.startTagTextarea),
|
||||
("iframe", self.startTagIFrame),
|
||||
("noscript", self.startTagNoscript),
|
||||
(("noembed", "noframes"), self.startTagRawtext),
|
||||
("select", self.startTagSelect),
|
||||
(("rp", "rt"), self.startTagRpRt),
|
||||
(("option", "optgroup"), self.startTagOpt),
|
||||
(("math"), self.startTagMath),
|
||||
(("svg"), self.startTagSvg),
|
||||
(("caption", "col", "colgroup", "frame", "head",
|
||||
"tbody", "td", "tfoot", "th", "thead",
|
||||
"tr"), self.startTagMisplaced)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("body", self.endTagBody),
|
||||
("html", self.endTagHtml),
|
||||
(("address", "article", "aside", "blockquote", "button", "center",
|
||||
"details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
|
||||
"footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
|
||||
"section", "summary", "ul"), self.endTagBlock),
|
||||
("form", self.endTagForm),
|
||||
("p", self.endTagP),
|
||||
(("dd", "dt", "li"), self.endTagListItem),
|
||||
(headingElements, self.endTagHeading),
|
||||
(("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
|
||||
"strike", "strong", "tt", "u"), self.endTagFormatting),
|
||||
(("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
|
||||
("br", self.endTagBr),
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
|
||||
def isMatchingFormattingElement(self, node1, node2):
|
||||
return (node1.name == node2.name and
|
||||
node1.namespace == node2.namespace and
|
||||
@@ -1650,14 +1597,73 @@ def getPhases(debug):
|
||||
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
|
||||
break
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
(("base", "basefont", "bgsound", "command", "link", "meta",
|
||||
"script", "style", "title"),
|
||||
startTagProcessInHead),
|
||||
("body", startTagBody),
|
||||
("frameset", startTagFrameset),
|
||||
(("address", "article", "aside", "blockquote", "center", "details",
|
||||
"dir", "div", "dl", "fieldset", "figcaption", "figure",
|
||||
"footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
|
||||
"section", "summary", "ul"),
|
||||
startTagCloseP),
|
||||
(headingElements, startTagHeading),
|
||||
(("pre", "listing"), startTagPreListing),
|
||||
("form", startTagForm),
|
||||
(("li", "dd", "dt"), startTagListItem),
|
||||
("plaintext", startTagPlaintext),
|
||||
("a", startTagA),
|
||||
(("b", "big", "code", "em", "font", "i", "s", "small", "strike",
|
||||
"strong", "tt", "u"), startTagFormatting),
|
||||
("nobr", startTagNobr),
|
||||
("button", startTagButton),
|
||||
(("applet", "marquee", "object"), startTagAppletMarqueeObject),
|
||||
("xmp", startTagXmp),
|
||||
("table", startTagTable),
|
||||
(("area", "br", "embed", "img", "keygen", "wbr"),
|
||||
startTagVoidFormatting),
|
||||
(("param", "source", "track"), startTagParamSource),
|
||||
("input", startTagInput),
|
||||
("hr", startTagHr),
|
||||
("image", startTagImage),
|
||||
("isindex", startTagIsIndex),
|
||||
("textarea", startTagTextarea),
|
||||
("iframe", startTagIFrame),
|
||||
("noscript", startTagNoscript),
|
||||
(("noembed", "noframes"), startTagRawtext),
|
||||
("select", startTagSelect),
|
||||
(("rp", "rt"), startTagRpRt),
|
||||
(("option", "optgroup"), startTagOpt),
|
||||
(("math"), startTagMath),
|
||||
(("svg"), startTagSvg),
|
||||
(("caption", "col", "colgroup", "frame", "head",
|
||||
"tbody", "td", "tfoot", "th", "thead",
|
||||
"tr"), startTagMisplaced)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("body", endTagBody),
|
||||
("html", endTagHtml),
|
||||
(("address", "article", "aside", "blockquote", "button", "center",
|
||||
"details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
|
||||
"footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
|
||||
"section", "summary", "ul"), endTagBlock),
|
||||
("form", endTagForm),
|
||||
("p", endTagP),
|
||||
(("dd", "dt", "li"), endTagListItem),
|
||||
(headingElements, endTagHeading),
|
||||
(("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
|
||||
"strike", "strong", "tt", "u"), endTagFormatting),
|
||||
(("applet", "marquee", "object"), endTagAppletMarqueeObject),
|
||||
("br", endTagBr),
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class TextPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
self.startTagHandler = _utils.MethodDispatcher([])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("script", self.endTagScript)])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processCharacters(self, token):
|
||||
self.tree.insertText(token["data"])
|
||||
@@ -1683,30 +1689,15 @@ def getPhases(debug):
|
||||
self.tree.openElements.pop()
|
||||
self.parser.phase = self.parser.originalPhase
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([])
|
||||
startTagHandler.default = startTagOther
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("script", endTagScript)])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InTablePhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("caption", self.startTagCaption),
|
||||
("colgroup", self.startTagColgroup),
|
||||
("col", self.startTagCol),
|
||||
(("tbody", "tfoot", "thead"), self.startTagRowGroup),
|
||||
(("td", "th", "tr"), self.startTagImplyTbody),
|
||||
("table", self.startTagTable),
|
||||
(("style", "script"), self.startTagStyleScript),
|
||||
("input", self.startTagInput),
|
||||
("form", self.startTagForm)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("table", self.endTagTable),
|
||||
(("body", "caption", "col", "colgroup", "html", "tbody", "td",
|
||||
"tfoot", "th", "thead", "tr"), self.endTagIgnore)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
# helper methods
|
||||
def clearStackToTableContext(self):
|
||||
@@ -1828,9 +1819,32 @@ def getPhases(debug):
|
||||
self.parser.phases["inBody"].processEndTag(token)
|
||||
self.tree.insertFromTable = False
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
("caption", startTagCaption),
|
||||
("colgroup", startTagColgroup),
|
||||
("col", startTagCol),
|
||||
(("tbody", "tfoot", "thead"), startTagRowGroup),
|
||||
(("td", "th", "tr"), startTagImplyTbody),
|
||||
("table", startTagTable),
|
||||
(("style", "script"), startTagStyleScript),
|
||||
("input", startTagInput),
|
||||
("form", startTagForm)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("table", endTagTable),
|
||||
(("body", "caption", "col", "colgroup", "html", "tbody", "td",
|
||||
"tfoot", "th", "thead", "tr"), endTagIgnore)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InTableTextPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
__slots__ = ("originalPhase", "characterTokens")
|
||||
|
||||
def __init__(self, *args, **kwargs):
|
||||
super(InTableTextPhase, self).__init__(*args, **kwargs)
|
||||
self.originalPhase = None
|
||||
self.characterTokens = []
|
||||
|
||||
@@ -1875,23 +1889,7 @@ def getPhases(debug):
|
||||
|
||||
class InCaptionPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
||||
"thead", "tr"), self.startTagTableElement)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("caption", self.endTagCaption),
|
||||
("table", self.endTagTable),
|
||||
(("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
|
||||
"thead", "tr"), self.endTagIgnore)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def ignoreEndTagCaption(self):
|
||||
return not self.tree.elementInScope("caption", variant="table")
|
||||
@@ -1944,23 +1942,24 @@ def getPhases(debug):
|
||||
def endTagOther(self, token):
|
||||
return self.parser.phases["inBody"].processEndTag(token)
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
||||
"thead", "tr"), startTagTableElement)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("caption", endTagCaption),
|
||||
("table", endTagTable),
|
||||
(("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
|
||||
"thead", "tr"), endTagIgnore)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InColumnGroupPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-column
|
||||
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("col", self.startTagCol)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("colgroup", self.endTagColgroup),
|
||||
("col", self.endTagCol)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def ignoreEndTagColgroup(self):
|
||||
return self.tree.openElements[-1].name == "html"
|
||||
@@ -2010,26 +2009,21 @@ def getPhases(debug):
|
||||
if not ignoreEndTag:
|
||||
return token
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
("col", startTagCol)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("colgroup", endTagColgroup),
|
||||
("col", endTagCol)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InTableBodyPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("tr", self.startTagTr),
|
||||
(("td", "th"), self.startTagTableCell),
|
||||
(("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
|
||||
self.startTagTableOther)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
(("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
|
||||
("table", self.endTagTable),
|
||||
(("body", "caption", "col", "colgroup", "html", "td", "th",
|
||||
"tr"), self.endTagIgnore)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
# helper methods
|
||||
def clearStackToTableBodyContext(self):
|
||||
@@ -2108,26 +2102,26 @@ def getPhases(debug):
|
||||
def endTagOther(self, token):
|
||||
return self.parser.phases["inTable"].processEndTag(token)
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
("tr", startTagTr),
|
||||
(("td", "th"), startTagTableCell),
|
||||
(("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
|
||||
startTagTableOther)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
(("tbody", "tfoot", "thead"), endTagTableRowGroup),
|
||||
("table", endTagTable),
|
||||
(("body", "caption", "col", "colgroup", "html", "td", "th",
|
||||
"tr"), endTagIgnore)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InRowPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
(("td", "th"), self.startTagTableCell),
|
||||
(("caption", "col", "colgroup", "tbody", "tfoot", "thead",
|
||||
"tr"), self.startTagTableOther)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("tr", self.endTagTr),
|
||||
("table", self.endTagTable),
|
||||
(("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
|
||||
(("body", "caption", "col", "colgroup", "html", "td", "th"),
|
||||
self.endTagIgnore)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
# helper methods (XXX unify this with other table helper methods)
|
||||
def clearStackToTableRowContext(self):
|
||||
@@ -2197,23 +2191,26 @@ def getPhases(debug):
|
||||
def endTagOther(self, token):
|
||||
return self.parser.phases["inTable"].processEndTag(token)
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
(("td", "th"), startTagTableCell),
|
||||
(("caption", "col", "colgroup", "tbody", "tfoot", "thead",
|
||||
"tr"), startTagTableOther)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("tr", endTagTr),
|
||||
("table", endTagTable),
|
||||
(("tbody", "tfoot", "thead"), endTagTableRowGroup),
|
||||
(("body", "caption", "col", "colgroup", "html", "td", "th"),
|
||||
endTagIgnore)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InCellPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
||||
"thead", "tr"), self.startTagTableOther)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
(("td", "th"), self.endTagTableCell),
|
||||
(("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
|
||||
(("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
# helper
|
||||
def closeCell(self):
|
||||
@@ -2273,26 +2270,22 @@ def getPhases(debug):
|
||||
def endTagOther(self, token):
|
||||
return self.parser.phases["inBody"].processEndTag(token)
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
|
||||
"thead", "tr"), startTagTableOther)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
(("td", "th"), endTagTableCell),
|
||||
(("body", "caption", "col", "colgroup", "html"), endTagIgnore),
|
||||
(("table", "tbody", "tfoot", "thead", "tr"), endTagImply)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InSelectPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("option", self.startTagOption),
|
||||
("optgroup", self.startTagOptgroup),
|
||||
("select", self.startTagSelect),
|
||||
(("input", "keygen", "textarea"), self.startTagInput),
|
||||
("script", self.startTagScript)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("option", self.endTagOption),
|
||||
("optgroup", self.endTagOptgroup),
|
||||
("select", self.endTagSelect)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
|
||||
def processEOF(self):
|
||||
@@ -2373,21 +2366,25 @@ def getPhases(debug):
|
||||
self.parser.parseError("unexpected-end-tag-in-select",
|
||||
{"name": token["name"]})
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
("option", startTagOption),
|
||||
("optgroup", startTagOptgroup),
|
||||
("select", startTagSelect),
|
||||
(("input", "keygen", "textarea"), startTagInput),
|
||||
("script", startTagScript)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("option", endTagOption),
|
||||
("optgroup", endTagOptgroup),
|
||||
("select", endTagSelect)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InSelectInTablePhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
||||
self.startTagTable)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
||||
self.endTagTable)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
self.parser.phases["inSelect"].processEOF()
|
||||
@@ -2412,7 +2409,21 @@ def getPhases(debug):
|
||||
def endTagOther(self, token):
|
||||
return self.parser.phases["inSelect"].processEndTag(token)
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
||||
startTagTable)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
|
||||
endTagTable)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InForeignContentPhase(Phase):
|
||||
__slots__ = tuple()
|
||||
|
||||
breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
|
||||
"center", "code", "dd", "div", "dl", "dt",
|
||||
"em", "embed", "h1", "h2", "h3",
|
||||
@@ -2422,9 +2433,6 @@ def getPhases(debug):
|
||||
"span", "strong", "strike", "sub", "sup",
|
||||
"table", "tt", "u", "ul", "var"])
|
||||
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
def adjustSVGTagNames(self, token):
|
||||
replacements = {"altglyph": "altGlyph",
|
||||
"altglyphdef": "altGlyphDef",
|
||||
@@ -2478,7 +2486,7 @@ def getPhases(debug):
|
||||
currentNode = self.tree.openElements[-1]
|
||||
if (token["name"] in self.breakoutElements or
|
||||
(token["name"] == "font" and
|
||||
set(token["data"].keys()) & set(["color", "face", "size"]))):
|
||||
set(token["data"].keys()) & {"color", "face", "size"})):
|
||||
self.parser.parseError("unexpected-html-element-in-foreign-content",
|
||||
{"name": token["name"]})
|
||||
while (self.tree.openElements[-1].namespace !=
|
||||
@@ -2528,16 +2536,7 @@ def getPhases(debug):
|
||||
return new_token
|
||||
|
||||
class AfterBodyPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
# Stop parsing
|
||||
@@ -2574,23 +2573,17 @@ def getPhases(debug):
|
||||
self.parser.phase = self.parser.phases["inBody"]
|
||||
return token
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class InFramesetPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("frameset", self.startTagFrameset),
|
||||
("frame", self.startTagFrame),
|
||||
("noframes", self.startTagNoframes)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("frameset", self.endTagFrameset)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
if self.tree.openElements[-1].name != "html":
|
||||
@@ -2631,21 +2624,22 @@ def getPhases(debug):
|
||||
self.parser.parseError("unexpected-end-tag-in-frameset",
|
||||
{"name": token["name"]})
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
("frameset", startTagFrameset),
|
||||
("frame", startTagFrame),
|
||||
("noframes", startTagNoframes)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("frameset", endTagFrameset)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class AfterFramesetPhase(Phase):
|
||||
# http://www.whatwg.org/specs/web-apps/current-work/#after3
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("noframes", self.startTagNoframes)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
|
||||
self.endTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.endTagHtml)
|
||||
])
|
||||
self.endTagHandler.default = self.endTagOther
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
# Stop parsing
|
||||
@@ -2668,14 +2662,19 @@ def getPhases(debug):
|
||||
self.parser.parseError("unexpected-end-tag-after-frameset",
|
||||
{"name": token["name"]})
|
||||
|
||||
class AfterAfterBodyPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", Phase.startTagHtml),
|
||||
("noframes", startTagNoframes)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
endTagHandler = _utils.MethodDispatcher([
|
||||
("html", endTagHtml)
|
||||
])
|
||||
endTagHandler.default = endTagOther
|
||||
|
||||
class AfterAfterBodyPhase(Phase):
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
pass
|
||||
@@ -2706,15 +2705,13 @@ def getPhases(debug):
|
||||
self.parser.phase = self.parser.phases["inBody"]
|
||||
return token
|
||||
|
||||
class AfterAfterFramesetPhase(Phase):
|
||||
def __init__(self, parser, tree):
|
||||
Phase.__init__(self, parser, tree)
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
self.startTagHandler = _utils.MethodDispatcher([
|
||||
("html", self.startTagHtml),
|
||||
("noframes", self.startTagNoFrames)
|
||||
])
|
||||
self.startTagHandler.default = self.startTagOther
|
||||
class AfterAfterFramesetPhase(Phase):
|
||||
__slots__ = tuple()
|
||||
|
||||
def processEOF(self):
|
||||
pass
|
||||
@@ -2741,6 +2738,13 @@ def getPhases(debug):
|
||||
def processEndTag(self, token):
|
||||
self.parser.parseError("expected-eof-but-got-end-tag",
|
||||
{"name": token["name"]})
|
||||
|
||||
startTagHandler = _utils.MethodDispatcher([
|
||||
("html", startTagHtml),
|
||||
("noframes", startTagNoFrames)
|
||||
])
|
||||
startTagHandler.default = startTagOther
|
||||
|
||||
# pylint:enable=unused-argument
|
||||
|
||||
return {
|
||||
@@ -2774,8 +2778,8 @@ def getPhases(debug):
|
||||
def adjust_attributes(token, replacements):
|
||||
needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
|
||||
if needs_adjustment:
|
||||
token['data'] = OrderedDict((replacements.get(k, k), v)
|
||||
for k, v in token['data'].items())
|
||||
token['data'] = type(token['data'])((replacements.get(k, k), v)
|
||||
for k, v in token['data'].items())
|
||||
|
||||
|
||||
def impliedTagToken(name, type="EndTag", attributes=None,
|
||||
|
||||
@@ -274,7 +274,7 @@ class HTMLSerializer(object):
|
||||
if token["systemId"]:
|
||||
if token["systemId"].find('"') >= 0:
|
||||
if token["systemId"].find("'") >= 0:
|
||||
self.serializeError("System identifer contains both single and double quote characters")
|
||||
self.serializeError("System identifier contains both single and double quote characters")
|
||||
quote_char = "'"
|
||||
else:
|
||||
quote_char = '"'
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -10,9 +10,9 @@ Marker = None
|
||||
|
||||
listElementsMap = {
|
||||
None: (frozenset(scopingElements), False),
|
||||
"button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
|
||||
"list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
|
||||
(namespaces["html"], "ul")])), False),
|
||||
"button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False),
|
||||
"list": (frozenset(scopingElements | {(namespaces["html"], "ol"),
|
||||
(namespaces["html"], "ul")}), False),
|
||||
"table": (frozenset([(namespaces["html"], "html"),
|
||||
(namespaces["html"], "table")]), False),
|
||||
"select": (frozenset([(namespaces["html"], "optgroup"),
|
||||
@@ -28,7 +28,7 @@ class Node(object):
|
||||
:arg name: The tag name associated with the node
|
||||
|
||||
"""
|
||||
# The tag name assocaited with the node
|
||||
# The tag name associated with the node
|
||||
self.name = name
|
||||
# The parent of the current node (or None for the document node)
|
||||
self.parent = None
|
||||
|
||||
@@ -5,6 +5,8 @@ from pip._vendor.six import text_type
|
||||
|
||||
import re
|
||||
|
||||
from copy import copy
|
||||
|
||||
from . import base
|
||||
from .. import _ihatexml
|
||||
from .. import constants
|
||||
@@ -61,16 +63,17 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
||||
return self._element.attrib
|
||||
|
||||
def _setAttributes(self, attributes):
|
||||
# Delete existing attributes first
|
||||
# XXX - there may be a better way to do this...
|
||||
for key in list(self._element.attrib.keys()):
|
||||
del self._element.attrib[key]
|
||||
for key, value in attributes.items():
|
||||
if isinstance(key, tuple):
|
||||
name = "{%s}%s" % (key[2], key[1])
|
||||
else:
|
||||
name = key
|
||||
self._element.set(name, value)
|
||||
el_attrib = self._element.attrib
|
||||
el_attrib.clear()
|
||||
if attributes:
|
||||
# calling .items _always_ allocates, and the above truthy check is cheaper than the
|
||||
# allocation on average
|
||||
for key, value in attributes.items():
|
||||
if isinstance(key, tuple):
|
||||
name = "{%s}%s" % (key[2], key[1])
|
||||
else:
|
||||
name = key
|
||||
el_attrib[name] = value
|
||||
|
||||
attributes = property(_getAttributes, _setAttributes)
|
||||
|
||||
@@ -129,8 +132,8 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
|
||||
|
||||
def cloneNode(self):
|
||||
element = type(self)(self.name, self.namespace)
|
||||
for name, value in self.attributes.items():
|
||||
element.attributes[name] = value
|
||||
if self._element.attrib:
|
||||
element._element.attrib = copy(self._element.attrib)
|
||||
return element
|
||||
|
||||
def reparentChildren(self, newParent):
|
||||
|
||||
@@ -16,6 +16,11 @@ import warnings
|
||||
import re
|
||||
import sys
|
||||
|
||||
try:
|
||||
from collections.abc import MutableMapping
|
||||
except ImportError:
|
||||
from collections import MutableMapping
|
||||
|
||||
from . import base
|
||||
from ..constants import DataLossWarning
|
||||
from .. import constants
|
||||
@@ -23,6 +28,7 @@ from . import etree as etree_builders
|
||||
from .. import _ihatexml
|
||||
|
||||
import lxml.etree as etree
|
||||
from pip._vendor.six import PY3, binary_type
|
||||
|
||||
|
||||
fullTree = True
|
||||
@@ -44,7 +50,11 @@ class Document(object):
|
||||
self._childNodes = []
|
||||
|
||||
def appendChild(self, element):
|
||||
self._elementTree.getroot().addnext(element._element)
|
||||
last = self._elementTree.getroot()
|
||||
for last in self._elementTree.getroot().itersiblings():
|
||||
pass
|
||||
|
||||
last.addnext(element._element)
|
||||
|
||||
def _getChildNodes(self):
|
||||
return self._childNodes
|
||||
@@ -185,26 +195,37 @@ class TreeBuilder(base.TreeBuilder):
|
||||
infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
|
||||
self.namespaceHTMLElements = namespaceHTMLElements
|
||||
|
||||
class Attributes(dict):
|
||||
def __init__(self, element, value=None):
|
||||
if value is None:
|
||||
value = {}
|
||||
class Attributes(MutableMapping):
|
||||
def __init__(self, element):
|
||||
self._element = element
|
||||
dict.__init__(self, value) # pylint:disable=non-parent-init-called
|
||||
for key, value in self.items():
|
||||
if isinstance(key, tuple):
|
||||
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
|
||||
else:
|
||||
name = infosetFilter.coerceAttribute(key)
|
||||
self._element._element.attrib[name] = value
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
dict.__setitem__(self, key, value)
|
||||
def _coerceKey(self, key):
|
||||
if isinstance(key, tuple):
|
||||
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
|
||||
else:
|
||||
name = infosetFilter.coerceAttribute(key)
|
||||
self._element._element.attrib[name] = value
|
||||
return name
|
||||
|
||||
def __getitem__(self, key):
|
||||
value = self._element._element.attrib[self._coerceKey(key)]
|
||||
if not PY3 and isinstance(value, binary_type):
|
||||
value = value.decode("ascii")
|
||||
return value
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
self._element._element.attrib[self._coerceKey(key)] = value
|
||||
|
||||
def __delitem__(self, key):
|
||||
del self._element._element.attrib[self._coerceKey(key)]
|
||||
|
||||
def __iter__(self):
|
||||
return iter(self._element._element.attrib)
|
||||
|
||||
def __len__(self):
|
||||
return len(self._element._element.attrib)
|
||||
|
||||
def clear(self):
|
||||
return self._element._element.attrib.clear()
|
||||
|
||||
class Element(builder.Element):
|
||||
def __init__(self, name, namespace):
|
||||
@@ -225,8 +246,10 @@ class TreeBuilder(base.TreeBuilder):
|
||||
def _getAttributes(self):
|
||||
return self._attributes
|
||||
|
||||
def _setAttributes(self, attributes):
|
||||
self._attributes = Attributes(self, attributes)
|
||||
def _setAttributes(self, value):
|
||||
attributes = self.attributes
|
||||
attributes.clear()
|
||||
attributes.update(value)
|
||||
|
||||
attributes = property(_getAttributes, _setAttributes)
|
||||
|
||||
@@ -234,8 +257,11 @@ class TreeBuilder(base.TreeBuilder):
|
||||
data = infosetFilter.coerceCharacters(data)
|
||||
builder.Element.insertText(self, data, insertBefore)
|
||||
|
||||
def appendChild(self, child):
|
||||
builder.Element.appendChild(self, child)
|
||||
def cloneNode(self):
|
||||
element = type(self)(self.name, self.namespace)
|
||||
if self._element.attrib:
|
||||
element._element.attrib.update(self._element.attrib)
|
||||
return element
|
||||
|
||||
class Comment(builder.Comment):
|
||||
def __init__(self, data):
|
||||
|
||||
@@ -2,10 +2,10 @@
|
||||
tree, generating tokens identical to those produced by the tokenizer
|
||||
module.
|
||||
|
||||
To create a tree walker for a new type of tree, you need to do
|
||||
To create a tree walker for a new type of tree, you need to
|
||||
implement a tree walker object (called TreeWalker by convention) that
|
||||
implements a 'serialize' method taking a tree as sole argument and
|
||||
returning an iterator generating tokens.
|
||||
implements a 'serialize' method which takes a tree as sole argument and
|
||||
returns an iterator which generates tokens.
|
||||
"""
|
||||
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -127,4 +127,5 @@ def getETreeBuilder(ElementTreeImplementation):
|
||||
|
||||
return locals()
|
||||
|
||||
|
||||
getETreeModule = moduleFactoryFactory(getETreeBuilder)
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
from __future__ import absolute_import, division, unicode_literals
|
||||
from pip._vendor.six import text_type
|
||||
|
||||
from collections import OrderedDict
|
||||
|
||||
from lxml import etree
|
||||
from ..treebuilders.etree import tag_regexp
|
||||
|
||||
@@ -163,7 +165,7 @@ class TreeWalker(base.NonRecursiveTreeWalker):
|
||||
else:
|
||||
namespace = None
|
||||
tag = ensure_str(node.tag)
|
||||
attrs = {}
|
||||
attrs = OrderedDict()
|
||||
for name, value in list(node.attrib.items()):
|
||||
name = ensure_str(name)
|
||||
value = ensure_str(value)
|
||||
|
||||
Reference in New Issue
Block a user