Eliminado venv y www del repositorio, agrege un requirements igual

This commit is contained in:
2020-11-22 21:14:46 -03:00
parent 18cf2d335a
commit 199a1e2a61
820 changed files with 15495 additions and 22017 deletions

View File

@@ -32,4 +32,4 @@ __all__ = ["HTMLParser", "parse", "parseFragment", "getTreeBuilder",
# this has to be at the top level, see how setup.py parses this
#: Distribution version number.
__version__ = "1.0.1"
__version__ = "1.1"

View File

@@ -136,6 +136,7 @@ def normaliseCharList(charList):
i += j
return rv
# We don't really support characters above the BMP :(
max_unicode = int("FFFF", 16)
@@ -254,7 +255,7 @@ class InfosetFilter(object):
nameRest = name[1:]
m = nonXmlNameFirstBMPRegexp.match(nameFirst)
if m:
warnings.warn("Coercing non-XML name", DataLossWarning)
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
nameFirstOutput = self.getReplacementCharacter(nameFirst)
else:
nameFirstOutput = nameFirst
@@ -262,7 +263,7 @@ class InfosetFilter(object):
nameRestOutput = nameRest
replaceChars = set(nonXmlNameBMPRegexp.findall(nameRest))
for char in replaceChars:
warnings.warn("Coercing non-XML name", DataLossWarning)
warnings.warn("Coercing non-XML name: %s" % name, DataLossWarning)
replacement = self.getReplacementCharacter(char)
nameRestOutput = nameRestOutput.replace(char, replacement)
return nameFirstOutput + nameRestOutput

View File

@@ -1,10 +1,11 @@
from __future__ import absolute_import, division, unicode_literals
from pip._vendor.six import text_type, binary_type
from pip._vendor.six import text_type
from pip._vendor.six.moves import http_client, urllib
import codecs
import re
from io import BytesIO, StringIO
from pip._vendor import webencodings
@@ -12,13 +13,6 @@ from .constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
from .constants import _ReparseException
from . import _utils
from io import StringIO
try:
from io import BytesIO
except ImportError:
BytesIO = StringIO
# Non-unicode versions of constants for use in the pre-parser
spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
@@ -40,13 +34,13 @@ if _utils.supports_lone_surrogates:
else:
invalid_unicode_re = re.compile(invalid_unicode_no_surrogate)
non_bmp_invalid_codepoints = set([0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
0x10FFFE, 0x10FFFF])
non_bmp_invalid_codepoints = {0x1FFFE, 0x1FFFF, 0x2FFFE, 0x2FFFF, 0x3FFFE,
0x3FFFF, 0x4FFFE, 0x4FFFF, 0x5FFFE, 0x5FFFF,
0x6FFFE, 0x6FFFF, 0x7FFFE, 0x7FFFF, 0x8FFFE,
0x8FFFF, 0x9FFFE, 0x9FFFF, 0xAFFFE, 0xAFFFF,
0xBFFFE, 0xBFFFF, 0xCFFFE, 0xCFFFF, 0xDFFFE,
0xDFFFF, 0xEFFFE, 0xEFFFF, 0xFFFFE, 0xFFFFF,
0x10FFFE, 0x10FFFF}
ascii_punctuation_re = re.compile("[\u0009-\u000D\u0020-\u002F\u003A-\u0040\u005C\u005B-\u0060\u007B-\u007E]")
@@ -367,7 +361,7 @@ class HTMLUnicodeInputStream(object):
def unget(self, char):
# Only one character is allowed to be ungotten at once - it must
# be consumed again before any further call to unget
if char is not None:
if char is not EOF:
if self.chunkOffset == 0:
# unget is called quite rarely, so it's a good idea to do
# more work here if it saves a bit of work in the frequently
@@ -449,7 +443,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
try:
stream.seek(stream.tell())
except: # pylint:disable=bare-except
except Exception:
stream = BufferedStream(stream)
return stream
@@ -461,7 +455,7 @@ class HTMLBinaryInputStream(HTMLUnicodeInputStream):
if charEncoding[0] is not None:
return charEncoding
# If we've been overriden, we've been overriden
# If we've been overridden, we've been overridden
charEncoding = lookupEncoding(self.override_encoding), "certain"
if charEncoding[0] is not None:
return charEncoding
@@ -664,9 +658,7 @@ class EncodingBytes(bytes):
"""Look for a sequence of bytes at the start of a string. If the bytes
are found return True and advance the position to the byte after the
match. Otherwise return False and leave the position alone"""
p = self.position
data = self[p:p + len(bytes)]
rv = data.startswith(bytes)
rv = self.startswith(bytes, self.position)
if rv:
self.position += len(bytes)
return rv
@@ -674,15 +666,11 @@ class EncodingBytes(bytes):
def jumpTo(self, bytes):
"""Look for the next sequence of bytes matching a given sequence. If
a match is found advance the position to the last byte of the match"""
newPosition = self[self.position:].find(bytes)
if newPosition > -1:
# XXX: This is ugly, but I can't see a nicer way to fix this.
if self._position == -1:
self._position = 0
self._position += (newPosition + len(bytes) - 1)
return True
else:
try:
self._position = self.index(bytes, self.position) + len(bytes) - 1
except ValueError:
raise StopIteration
return True
class EncodingParser(object):
@@ -694,6 +682,9 @@ class EncodingParser(object):
self.encoding = None
def getEncoding(self):
if b"<meta" not in self.data:
return None
methodDispatch = (
(b"<!--", self.handleComment),
(b"<meta", self.handleMeta),
@@ -703,6 +694,10 @@ class EncodingParser(object):
(b"<", self.handlePossibleStartTag))
for _ in self.data:
keepParsing = True
try:
self.data.jumpTo(b"<")
except StopIteration:
break
for key, method in methodDispatch:
if self.data.matchBytes(key):
try:
@@ -908,7 +903,7 @@ class ContentAttrParser(object):
def lookupEncoding(encoding):
"""Return the python codec name corresponding to an encoding or None if the
string doesn't correspond to a valid encoding."""
if isinstance(encoding, binary_type):
if isinstance(encoding, bytes):
try:
encoding = encoding.decode("ascii")
except UnicodeDecodeError:

View File

@@ -2,7 +2,8 @@ from __future__ import absolute_import, division, unicode_literals
from pip._vendor.six import unichr as chr
from collections import deque
from collections import deque, OrderedDict
from sys import version_info
from .constants import spaceCharacters
from .constants import entities
@@ -17,6 +18,11 @@ from ._trie import Trie
entitiesTrie = Trie(entities)
if version_info >= (3, 7):
attributeMap = dict
else:
attributeMap = OrderedDict
class HTMLTokenizer(object):
""" This class takes care of tokenizing HTML.
@@ -228,6 +234,14 @@ class HTMLTokenizer(object):
# Add token to the queue to be yielded
if (token["type"] in tagTokenTypes):
token["name"] = token["name"].translate(asciiUpper2Lower)
if token["type"] == tokenTypes["StartTag"]:
raw = token["data"]
data = attributeMap(raw)
if len(raw) > len(data):
# we had some duplicated attribute, fix so first wins
data.update(raw[::-1])
token["data"] = data
if token["type"] == tokenTypes["EndTag"]:
if token["data"]:
self.tokenQueue.append({"type": tokenTypes["ParseError"],

View File

@@ -1,14 +1,5 @@
from __future__ import absolute_import, division, unicode_literals
from .py import Trie as PyTrie
from .py import Trie
Trie = PyTrie
# pylint:disable=wrong-import-position
try:
from .datrie import Trie as DATrie
except ImportError:
pass
else:
Trie = DATrie
# pylint:enable=wrong-import-position
__all__ = ["Trie"]

View File

@@ -1,44 +0,0 @@
from __future__ import absolute_import, division, unicode_literals
from datrie import Trie as DATrie
from pip._vendor.six import text_type
from ._base import Trie as ABCTrie
class Trie(ABCTrie):
def __init__(self, data):
chars = set()
for key in data.keys():
if not isinstance(key, text_type):
raise TypeError("All keys must be strings")
for char in key:
chars.add(char)
self._data = DATrie("".join(chars))
for key, value in data.items():
self._data[key] = value
def __contains__(self, key):
return key in self._data
def __len__(self):
return len(self._data)
def __iter__(self):
raise NotImplementedError()
def __getitem__(self, key):
return self._data[key]
def keys(self, prefix=None):
return self._data.keys(prefix)
def has_keys_with_prefix(self, prefix):
return self._data.has_keys_with_prefix(prefix)
def longest_prefix(self, prefix):
return self._data.longest_prefix(prefix)
def longest_prefix_item(self, prefix):
return self._data.longest_prefix_item(prefix)

View File

@@ -2,12 +2,20 @@ from __future__ import absolute_import, division, unicode_literals
from types import ModuleType
from pip._vendor.six import text_type
try:
import xml.etree.cElementTree as default_etree
from collections.abc import Mapping
except ImportError:
from collections import Mapping
from pip._vendor.six import text_type, PY3
if PY3:
import xml.etree.ElementTree as default_etree
else:
try:
import xml.etree.cElementTree as default_etree
except ImportError:
import xml.etree.ElementTree as default_etree
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
@@ -27,7 +35,7 @@ try:
# We need this with u"" because of http://bugs.jython.org/issue2039
_x = eval('u"\\uD800"') # pylint:disable=eval-used
assert isinstance(_x, text_type)
except: # pylint:disable=bare-except
except Exception:
supports_lone_surrogates = False
else:
supports_lone_surrogates = True
@@ -47,9 +55,6 @@ class MethodDispatcher(dict):
"""
def __init__(self, items=()):
# Using _dictEntries instead of directly assigning to self is about
# twice as fast. Please do careful performance testing before changing
# anything here.
_dictEntries = []
for name, value in items:
if isinstance(name, (list, tuple, frozenset, set)):
@@ -64,6 +69,36 @@ class MethodDispatcher(dict):
def __getitem__(self, key):
return dict.get(self, key, self.default)
def __get__(self, instance, owner=None):
return BoundMethodDispatcher(instance, self)
class BoundMethodDispatcher(Mapping):
"""Wraps a MethodDispatcher, binding its return values to `instance`"""
def __init__(self, instance, dispatcher):
self.instance = instance
self.dispatcher = dispatcher
def __getitem__(self, key):
# see https://docs.python.org/3/reference/datamodel.html#object.__get__
# on a function, __get__ is used to bind a function to an instance as a bound method
return self.dispatcher[key].__get__(self.instance)
def get(self, key, default):
if key in self.dispatcher:
return self[key]
else:
return default
def __iter__(self):
return iter(self.dispatcher)
def __len__(self):
return len(self.dispatcher)
def __contains__(self, key):
return key in self.dispatcher
# Some utility functions to deal with weirdness around UCS2 vs UCS4
# python builds

View File

@@ -519,8 +519,8 @@ adjustForeignAttributes = {
"xmlns:xlink": ("xmlns", "xlink", namespaces["xmlns"])
}
unadjustForeignAttributes = dict([((ns, local), qname) for qname, (prefix, local, ns) in
adjustForeignAttributes.items()])
unadjustForeignAttributes = {(ns, local): qname for qname, (prefix, local, ns) in
adjustForeignAttributes.items()}
spaceCharacters = frozenset([
"\t",
@@ -544,8 +544,7 @@ asciiLetters = frozenset(string.ascii_letters)
digits = frozenset(string.digits)
hexDigits = frozenset(string.hexdigits)
asciiUpper2Lower = dict([(ord(c), ord(c.lower()))
for c in string.ascii_uppercase])
asciiUpper2Lower = {ord(c): ord(c.lower()) for c in string.ascii_uppercase}
# Heading elements need to be ordered
headingElements = (
@@ -2934,7 +2933,7 @@ tagTokenTypes = frozenset([tokenTypes["StartTag"], tokenTypes["EndTag"],
tokenTypes["EmptyTag"]])
prefixes = dict([(v, k) for k, v in namespaces.items()])
prefixes = {v: k for k, v in namespaces.items()}
prefixes["http://www.w3.org/1998/Math/MathML"] = "math"

View File

@@ -1,6 +1,15 @@
"""Deprecated from html5lib 1.1.
See `here <https://github.com/html5lib/html5lib-python/issues/443>`_ for
information about its deprecation; `Bleach <https://github.com/mozilla/bleach>`_
is recommended as a replacement. Please let us know in the aforementioned issue
if Bleach is unsuitable for your needs.
"""
from __future__ import absolute_import, division, unicode_literals
import re
import warnings
from xml.sax.saxutils import escape, unescape
from pip._vendor.six.moves import urllib_parse as urlparse
@@ -11,6 +20,14 @@ from ..constants import namespaces, prefixes
__all__ = ["Filter"]
_deprecation_msg = (
"html5lib's sanitizer is deprecated; see " +
"https://github.com/html5lib/html5lib-python/issues/443 and please let " +
"us know if Bleach is unsuitable for your needs"
)
warnings.warn(_deprecation_msg, DeprecationWarning)
allowed_elements = frozenset((
(namespaces['html'], 'a'),
(namespaces['html'], 'abbr'),
@@ -750,6 +767,9 @@ class Filter(base.Filter):
"""
super(Filter, self).__init__(source)
warnings.warn(_deprecation_msg, DeprecationWarning)
self.allowed_elements = allowed_elements
self.allowed_attributes = allowed_attributes
self.allowed_css_properties = allowed_css_properties

View File

@@ -2,7 +2,6 @@ from __future__ import absolute_import, division, unicode_literals
from pip._vendor.six import with_metaclass, viewkeys
import types
from collections import OrderedDict
from . import _inputstream
from . import _tokenizer
@@ -119,8 +118,8 @@ class HTMLParser(object):
self.tree = tree(namespaceHTMLElements)
self.errors = []
self.phases = dict([(name, cls(self, self.tree)) for name, cls in
getPhases(debug).items()])
self.phases = {name: cls(self, self.tree) for name, cls in
getPhases(debug).items()}
def _parse(self, stream, innerHTML=False, container="div", scripting=False, **kwargs):
@@ -202,7 +201,7 @@ class HTMLParser(object):
DoctypeToken = tokenTypes["Doctype"]
ParseErrorToken = tokenTypes["ParseError"]
for token in self.normalizedTokens():
for token in self.tokenizer:
prev_token = None
new_token = token
while new_token is not None:
@@ -260,10 +259,6 @@ class HTMLParser(object):
if reprocess:
assert self.phase not in phases
def normalizedTokens(self):
for token in self.tokenizer:
yield self.normalizeToken(token)
def parse(self, stream, *args, **kwargs):
"""Parse a HTML document into a well-formed tree
@@ -325,17 +320,6 @@ class HTMLParser(object):
if self.strict:
raise ParseError(E[errorcode] % datavars)
def normalizeToken(self, token):
# HTML5 specific normalizations to the token stream
if token["type"] == tokenTypes["StartTag"]:
raw = token["data"]
token["data"] = OrderedDict(raw)
if len(raw) > len(token["data"]):
# we had some duplicated attribute, fix so first wins
token["data"].update(raw[::-1])
return token
def adjustMathMLAttributes(self, token):
adjust_attributes(token, adjustMathMLAttributes)
@@ -413,16 +397,12 @@ class HTMLParser(object):
def getPhases(debug):
def log(function):
"""Logger that records which phase processes each token"""
type_names = dict((value, key) for key, value in
tokenTypes.items())
type_names = {value: key for key, value in tokenTypes.items()}
def wrapped(self, *args, **kwargs):
if function.__name__.startswith("process") and len(args) > 0:
token = args[0]
try:
info = {"type": type_names[token['type']]}
except:
raise
info = {"type": type_names[token['type']]}
if token['type'] in tagTokenTypes:
info["name"] = token['name']
@@ -446,10 +426,13 @@ def getPhases(debug):
class Phase(with_metaclass(getMetaclass(debug, log))):
"""Base class for helper object that implements each phase of processing
"""
__slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
def __init__(self, parser, tree):
self.parser = parser
self.tree = tree
self.__startTagCache = {}
self.__endTagCache = {}
def processEOF(self):
raise NotImplementedError
@@ -469,7 +452,21 @@ def getPhases(debug):
self.tree.insertText(token["data"])
def processStartTag(self, token):
return self.startTagHandler[token["name"]](token)
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
# requires a circular reference to the Phase, and this ends up with a significant
# (CPython 2.7, 3.8) GC cost when parsing many short inputs
name = token["name"]
# In Py2, using `in` is quicker in general than try/except KeyError
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
if name in self.__startTagCache:
func = self.__startTagCache[name]
else:
func = self.__startTagCache[name] = self.startTagHandler[name]
# bound the cache size in case we get loads of unknown tags
while len(self.__startTagCache) > len(self.startTagHandler) * 1.1:
# this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
self.__startTagCache.pop(next(iter(self.__startTagCache)))
return func(token)
def startTagHtml(self, token):
if not self.parser.firstStartTag and token["name"] == "html":
@@ -482,9 +479,25 @@ def getPhases(debug):
self.parser.firstStartTag = False
def processEndTag(self, token):
return self.endTagHandler[token["name"]](token)
# Note the caching is done here rather than BoundMethodDispatcher as doing it there
# requires a circular reference to the Phase, and this ends up with a significant
# (CPython 2.7, 3.8) GC cost when parsing many short inputs
name = token["name"]
# In Py2, using `in` is quicker in general than try/except KeyError
# In Py3, `in` is quicker when there are few cache hits (typically short inputs)
if name in self.__endTagCache:
func = self.__endTagCache[name]
else:
func = self.__endTagCache[name] = self.endTagHandler[name]
# bound the cache size in case we get loads of unknown tags
while len(self.__endTagCache) > len(self.endTagHandler) * 1.1:
# this makes the eviction policy random on Py < 3.7 and FIFO >= 3.7
self.__endTagCache.pop(next(iter(self.__endTagCache)))
return func(token)
class InitialPhase(Phase):
__slots__ = tuple()
def processSpaceCharacters(self, token):
pass
@@ -613,6 +626,8 @@ def getPhases(debug):
return True
class BeforeHtmlPhase(Phase):
__slots__ = tuple()
# helper methods
def insertHtmlElement(self):
self.tree.insertRoot(impliedTagToken("html", "StartTag"))
@@ -648,19 +663,7 @@ def getPhases(debug):
return token
class BeforeHeadPhase(Phase):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("head", self.startTagHead)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
(("head", "body", "html", "br"), self.endTagImplyHead)
])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
def processEOF(self):
self.startTagHead(impliedTagToken("head", "StartTag"))
@@ -693,28 +696,19 @@ def getPhases(debug):
self.parser.parseError("end-tag-after-implied-root",
{"name": token["name"]})
startTagHandler = _utils.MethodDispatcher([
("html", startTagHtml),
("head", startTagHead)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([
(("head", "body", "html", "br"), endTagImplyHead)
])
endTagHandler.default = endTagOther
class InHeadPhase(Phase):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("title", self.startTagTitle),
(("noframes", "style"), self.startTagNoFramesStyle),
("noscript", self.startTagNoscript),
("script", self.startTagScript),
(("base", "basefont", "bgsound", "command", "link"),
self.startTagBaseLinkCommand),
("meta", self.startTagMeta),
("head", self.startTagHead)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
("head", self.endTagHead),
(("br", "html", "body"), self.endTagHtmlBodyBr)
])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
# the real thing
def processEOF(self):
@@ -796,22 +790,27 @@ def getPhases(debug):
def anythingElse(self):
self.endTagHead(impliedTagToken("head"))
startTagHandler = _utils.MethodDispatcher([
("html", startTagHtml),
("title", startTagTitle),
(("noframes", "style"), startTagNoFramesStyle),
("noscript", startTagNoscript),
("script", startTagScript),
(("base", "basefont", "bgsound", "command", "link"),
startTagBaseLinkCommand),
("meta", startTagMeta),
("head", startTagHead)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([
("head", endTagHead),
(("br", "html", "body"), endTagHtmlBodyBr)
])
endTagHandler.default = endTagOther
class InHeadNoscriptPhase(Phase):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
(("basefont", "bgsound", "link", "meta", "noframes", "style"), self.startTagBaseLinkCommand),
(("head", "noscript"), self.startTagHeadNoscript),
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
("noscript", self.endTagNoscript),
("br", self.endTagBr),
])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
def processEOF(self):
self.parser.parseError("eof-in-head-noscript")
@@ -860,23 +859,21 @@ def getPhases(debug):
# Caller must raise parse error first!
self.endTagNoscript(impliedTagToken("noscript"))
class AfterHeadPhase(Phase):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
startTagHandler = _utils.MethodDispatcher([
("html", startTagHtml),
(("basefont", "bgsound", "link", "meta", "noframes", "style"), startTagBaseLinkCommand),
(("head", "noscript"), startTagHeadNoscript),
])
startTagHandler.default = startTagOther
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("body", self.startTagBody),
("frameset", self.startTagFrameset),
(("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
"style", "title"),
self.startTagFromHead),
("head", self.startTagHead)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
self.endTagHtmlBodyBr)])
self.endTagHandler.default = self.endTagOther
endTagHandler = _utils.MethodDispatcher([
("noscript", endTagNoscript),
("br", endTagBr),
])
endTagHandler.default = endTagOther
class AfterHeadPhase(Phase):
__slots__ = tuple()
def processEOF(self):
self.anythingElse()
@@ -927,80 +924,30 @@ def getPhases(debug):
self.parser.phase = self.parser.phases["inBody"]
self.parser.framesetOK = True
startTagHandler = _utils.MethodDispatcher([
("html", startTagHtml),
("body", startTagBody),
("frameset", startTagFrameset),
(("base", "basefont", "bgsound", "link", "meta", "noframes", "script",
"style", "title"),
startTagFromHead),
("head", startTagHead)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([(("body", "html", "br"),
endTagHtmlBodyBr)])
endTagHandler.default = endTagOther
class InBodyPhase(Phase):
# http://www.whatwg.org/specs/web-apps/current-work/#parsing-main-inbody
# the really-really-really-very crazy mode
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
__slots__ = ("processSpaceCharacters",)
def __init__(self, *args, **kwargs):
super(InBodyPhase, self).__init__(*args, **kwargs)
# Set this to the default handler
self.processSpaceCharacters = self.processSpaceCharactersNonPre
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
(("base", "basefont", "bgsound", "command", "link", "meta",
"script", "style", "title"),
self.startTagProcessInHead),
("body", self.startTagBody),
("frameset", self.startTagFrameset),
(("address", "article", "aside", "blockquote", "center", "details",
"dir", "div", "dl", "fieldset", "figcaption", "figure",
"footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
"section", "summary", "ul"),
self.startTagCloseP),
(headingElements, self.startTagHeading),
(("pre", "listing"), self.startTagPreListing),
("form", self.startTagForm),
(("li", "dd", "dt"), self.startTagListItem),
("plaintext", self.startTagPlaintext),
("a", self.startTagA),
(("b", "big", "code", "em", "font", "i", "s", "small", "strike",
"strong", "tt", "u"), self.startTagFormatting),
("nobr", self.startTagNobr),
("button", self.startTagButton),
(("applet", "marquee", "object"), self.startTagAppletMarqueeObject),
("xmp", self.startTagXmp),
("table", self.startTagTable),
(("area", "br", "embed", "img", "keygen", "wbr"),
self.startTagVoidFormatting),
(("param", "source", "track"), self.startTagParamSource),
("input", self.startTagInput),
("hr", self.startTagHr),
("image", self.startTagImage),
("isindex", self.startTagIsIndex),
("textarea", self.startTagTextarea),
("iframe", self.startTagIFrame),
("noscript", self.startTagNoscript),
(("noembed", "noframes"), self.startTagRawtext),
("select", self.startTagSelect),
(("rp", "rt"), self.startTagRpRt),
(("option", "optgroup"), self.startTagOpt),
(("math"), self.startTagMath),
(("svg"), self.startTagSvg),
(("caption", "col", "colgroup", "frame", "head",
"tbody", "td", "tfoot", "th", "thead",
"tr"), self.startTagMisplaced)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
("body", self.endTagBody),
("html", self.endTagHtml),
(("address", "article", "aside", "blockquote", "button", "center",
"details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
"footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
"section", "summary", "ul"), self.endTagBlock),
("form", self.endTagForm),
("p", self.endTagP),
(("dd", "dt", "li"), self.endTagListItem),
(headingElements, self.endTagHeading),
(("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
"strike", "strong", "tt", "u"), self.endTagFormatting),
(("applet", "marquee", "object"), self.endTagAppletMarqueeObject),
("br", self.endTagBr),
])
self.endTagHandler.default = self.endTagOther
def isMatchingFormattingElement(self, node1, node2):
return (node1.name == node2.name and
node1.namespace == node2.namespace and
@@ -1650,14 +1597,73 @@ def getPhases(debug):
self.parser.parseError("unexpected-end-tag", {"name": token["name"]})
break
startTagHandler = _utils.MethodDispatcher([
("html", Phase.startTagHtml),
(("base", "basefont", "bgsound", "command", "link", "meta",
"script", "style", "title"),
startTagProcessInHead),
("body", startTagBody),
("frameset", startTagFrameset),
(("address", "article", "aside", "blockquote", "center", "details",
"dir", "div", "dl", "fieldset", "figcaption", "figure",
"footer", "header", "hgroup", "main", "menu", "nav", "ol", "p",
"section", "summary", "ul"),
startTagCloseP),
(headingElements, startTagHeading),
(("pre", "listing"), startTagPreListing),
("form", startTagForm),
(("li", "dd", "dt"), startTagListItem),
("plaintext", startTagPlaintext),
("a", startTagA),
(("b", "big", "code", "em", "font", "i", "s", "small", "strike",
"strong", "tt", "u"), startTagFormatting),
("nobr", startTagNobr),
("button", startTagButton),
(("applet", "marquee", "object"), startTagAppletMarqueeObject),
("xmp", startTagXmp),
("table", startTagTable),
(("area", "br", "embed", "img", "keygen", "wbr"),
startTagVoidFormatting),
(("param", "source", "track"), startTagParamSource),
("input", startTagInput),
("hr", startTagHr),
("image", startTagImage),
("isindex", startTagIsIndex),
("textarea", startTagTextarea),
("iframe", startTagIFrame),
("noscript", startTagNoscript),
(("noembed", "noframes"), startTagRawtext),
("select", startTagSelect),
(("rp", "rt"), startTagRpRt),
(("option", "optgroup"), startTagOpt),
(("math"), startTagMath),
(("svg"), startTagSvg),
(("caption", "col", "colgroup", "frame", "head",
"tbody", "td", "tfoot", "th", "thead",
"tr"), startTagMisplaced)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([
("body", endTagBody),
("html", endTagHtml),
(("address", "article", "aside", "blockquote", "button", "center",
"details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure",
"footer", "header", "hgroup", "listing", "main", "menu", "nav", "ol", "pre",
"section", "summary", "ul"), endTagBlock),
("form", endTagForm),
("p", endTagP),
(("dd", "dt", "li"), endTagListItem),
(headingElements, endTagHeading),
(("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small",
"strike", "strong", "tt", "u"), endTagFormatting),
(("applet", "marquee", "object"), endTagAppletMarqueeObject),
("br", endTagBr),
])
endTagHandler.default = endTagOther
class TextPhase(Phase):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
("script", self.endTagScript)])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
def processCharacters(self, token):
self.tree.insertText(token["data"])
@@ -1683,30 +1689,15 @@ def getPhases(debug):
self.tree.openElements.pop()
self.parser.phase = self.parser.originalPhase
startTagHandler = _utils.MethodDispatcher([])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([
("script", endTagScript)])
endTagHandler.default = endTagOther
class InTablePhase(Phase):
# http://www.whatwg.org/specs/web-apps/current-work/#in-table
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("caption", self.startTagCaption),
("colgroup", self.startTagColgroup),
("col", self.startTagCol),
(("tbody", "tfoot", "thead"), self.startTagRowGroup),
(("td", "th", "tr"), self.startTagImplyTbody),
("table", self.startTagTable),
(("style", "script"), self.startTagStyleScript),
("input", self.startTagInput),
("form", self.startTagForm)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
("table", self.endTagTable),
(("body", "caption", "col", "colgroup", "html", "tbody", "td",
"tfoot", "th", "thead", "tr"), self.endTagIgnore)
])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
# helper methods
def clearStackToTableContext(self):
@@ -1828,9 +1819,32 @@ def getPhases(debug):
self.parser.phases["inBody"].processEndTag(token)
self.tree.insertFromTable = False
startTagHandler = _utils.MethodDispatcher([
("html", Phase.startTagHtml),
("caption", startTagCaption),
("colgroup", startTagColgroup),
("col", startTagCol),
(("tbody", "tfoot", "thead"), startTagRowGroup),
(("td", "th", "tr"), startTagImplyTbody),
("table", startTagTable),
(("style", "script"), startTagStyleScript),
("input", startTagInput),
("form", startTagForm)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([
("table", endTagTable),
(("body", "caption", "col", "colgroup", "html", "tbody", "td",
"tfoot", "th", "thead", "tr"), endTagIgnore)
])
endTagHandler.default = endTagOther
class InTableTextPhase(Phase):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
__slots__ = ("originalPhase", "characterTokens")
def __init__(self, *args, **kwargs):
super(InTableTextPhase, self).__init__(*args, **kwargs)
self.originalPhase = None
self.characterTokens = []
@@ -1875,23 +1889,7 @@ def getPhases(debug):
class InCaptionPhase(Phase):
# http://www.whatwg.org/specs/web-apps/current-work/#in-caption
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
"thead", "tr"), self.startTagTableElement)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
("caption", self.endTagCaption),
("table", self.endTagTable),
(("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
"thead", "tr"), self.endTagIgnore)
])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
def ignoreEndTagCaption(self):
return not self.tree.elementInScope("caption", variant="table")
@@ -1944,23 +1942,24 @@ def getPhases(debug):
def endTagOther(self, token):
return self.parser.phases["inBody"].processEndTag(token)
startTagHandler = _utils.MethodDispatcher([
("html", Phase.startTagHtml),
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
"thead", "tr"), startTagTableElement)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([
("caption", endTagCaption),
("table", endTagTable),
(("body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th",
"thead", "tr"), endTagIgnore)
])
endTagHandler.default = endTagOther
class InColumnGroupPhase(Phase):
# http://www.whatwg.org/specs/web-apps/current-work/#in-column
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("col", self.startTagCol)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
("colgroup", self.endTagColgroup),
("col", self.endTagCol)
])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
def ignoreEndTagColgroup(self):
return self.tree.openElements[-1].name == "html"
@@ -2010,26 +2009,21 @@ def getPhases(debug):
if not ignoreEndTag:
return token
startTagHandler = _utils.MethodDispatcher([
("html", Phase.startTagHtml),
("col", startTagCol)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([
("colgroup", endTagColgroup),
("col", endTagCol)
])
endTagHandler.default = endTagOther
class InTableBodyPhase(Phase):
# http://www.whatwg.org/specs/web-apps/current-work/#in-table0
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("tr", self.startTagTr),
(("td", "th"), self.startTagTableCell),
(("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
self.startTagTableOther)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
(("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
("table", self.endTagTable),
(("body", "caption", "col", "colgroup", "html", "td", "th",
"tr"), self.endTagIgnore)
])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
# helper methods
def clearStackToTableBodyContext(self):
@@ -2108,26 +2102,26 @@ def getPhases(debug):
def endTagOther(self, token):
return self.parser.phases["inTable"].processEndTag(token)
startTagHandler = _utils.MethodDispatcher([
("html", Phase.startTagHtml),
("tr", startTagTr),
(("td", "th"), startTagTableCell),
(("caption", "col", "colgroup", "tbody", "tfoot", "thead"),
startTagTableOther)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([
(("tbody", "tfoot", "thead"), endTagTableRowGroup),
("table", endTagTable),
(("body", "caption", "col", "colgroup", "html", "td", "th",
"tr"), endTagIgnore)
])
endTagHandler.default = endTagOther
class InRowPhase(Phase):
# http://www.whatwg.org/specs/web-apps/current-work/#in-row
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
(("td", "th"), self.startTagTableCell),
(("caption", "col", "colgroup", "tbody", "tfoot", "thead",
"tr"), self.startTagTableOther)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
("tr", self.endTagTr),
("table", self.endTagTable),
(("tbody", "tfoot", "thead"), self.endTagTableRowGroup),
(("body", "caption", "col", "colgroup", "html", "td", "th"),
self.endTagIgnore)
])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
# helper methods (XXX unify this with other table helper methods)
def clearStackToTableRowContext(self):
@@ -2197,23 +2191,26 @@ def getPhases(debug):
def endTagOther(self, token):
return self.parser.phases["inTable"].processEndTag(token)
startTagHandler = _utils.MethodDispatcher([
("html", Phase.startTagHtml),
(("td", "th"), startTagTableCell),
(("caption", "col", "colgroup", "tbody", "tfoot", "thead",
"tr"), startTagTableOther)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([
("tr", endTagTr),
("table", endTagTable),
(("tbody", "tfoot", "thead"), endTagTableRowGroup),
(("body", "caption", "col", "colgroup", "html", "td", "th"),
endTagIgnore)
])
endTagHandler.default = endTagOther
class InCellPhase(Phase):
# http://www.whatwg.org/specs/web-apps/current-work/#in-cell
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
"thead", "tr"), self.startTagTableOther)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
(("td", "th"), self.endTagTableCell),
(("body", "caption", "col", "colgroup", "html"), self.endTagIgnore),
(("table", "tbody", "tfoot", "thead", "tr"), self.endTagImply)
])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
# helper
def closeCell(self):
@@ -2273,26 +2270,22 @@ def getPhases(debug):
def endTagOther(self, token):
return self.parser.phases["inBody"].processEndTag(token)
startTagHandler = _utils.MethodDispatcher([
("html", Phase.startTagHtml),
(("caption", "col", "colgroup", "tbody", "td", "tfoot", "th",
"thead", "tr"), startTagTableOther)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([
(("td", "th"), endTagTableCell),
(("body", "caption", "col", "colgroup", "html"), endTagIgnore),
(("table", "tbody", "tfoot", "thead", "tr"), endTagImply)
])
endTagHandler.default = endTagOther
class InSelectPhase(Phase):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("option", self.startTagOption),
("optgroup", self.startTagOptgroup),
("select", self.startTagSelect),
(("input", "keygen", "textarea"), self.startTagInput),
("script", self.startTagScript)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
("option", self.endTagOption),
("optgroup", self.endTagOptgroup),
("select", self.endTagSelect)
])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
# http://www.whatwg.org/specs/web-apps/current-work/#in-select
def processEOF(self):
@@ -2373,21 +2366,25 @@ def getPhases(debug):
self.parser.parseError("unexpected-end-tag-in-select",
{"name": token["name"]})
startTagHandler = _utils.MethodDispatcher([
("html", Phase.startTagHtml),
("option", startTagOption),
("optgroup", startTagOptgroup),
("select", startTagSelect),
(("input", "keygen", "textarea"), startTagInput),
("script", startTagScript)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([
("option", endTagOption),
("optgroup", endTagOptgroup),
("select", endTagSelect)
])
endTagHandler.default = endTagOther
class InSelectInTablePhase(Phase):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
self.startTagTable)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
self.endTagTable)
])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
def processEOF(self):
self.parser.phases["inSelect"].processEOF()
@@ -2412,7 +2409,21 @@ def getPhases(debug):
def endTagOther(self, token):
return self.parser.phases["inSelect"].processEndTag(token)
startTagHandler = _utils.MethodDispatcher([
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
startTagTable)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([
(("caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th"),
endTagTable)
])
endTagHandler.default = endTagOther
class InForeignContentPhase(Phase):
__slots__ = tuple()
breakoutElements = frozenset(["b", "big", "blockquote", "body", "br",
"center", "code", "dd", "div", "dl", "dt",
"em", "embed", "h1", "h2", "h3",
@@ -2422,9 +2433,6 @@ def getPhases(debug):
"span", "strong", "strike", "sub", "sup",
"table", "tt", "u", "ul", "var"])
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
def adjustSVGTagNames(self, token):
replacements = {"altglyph": "altGlyph",
"altglyphdef": "altGlyphDef",
@@ -2478,7 +2486,7 @@ def getPhases(debug):
currentNode = self.tree.openElements[-1]
if (token["name"] in self.breakoutElements or
(token["name"] == "font" and
set(token["data"].keys()) & set(["color", "face", "size"]))):
set(token["data"].keys()) & {"color", "face", "size"})):
self.parser.parseError("unexpected-html-element-in-foreign-content",
{"name": token["name"]})
while (self.tree.openElements[-1].namespace !=
@@ -2528,16 +2536,7 @@ def getPhases(debug):
return new_token
class AfterBodyPhase(Phase):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([("html", self.endTagHtml)])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
def processEOF(self):
# Stop parsing
@@ -2574,23 +2573,17 @@ def getPhases(debug):
self.parser.phase = self.parser.phases["inBody"]
return token
startTagHandler = _utils.MethodDispatcher([
("html", startTagHtml)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([("html", endTagHtml)])
endTagHandler.default = endTagOther
class InFramesetPhase(Phase):
# http://www.whatwg.org/specs/web-apps/current-work/#in-frameset
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("frameset", self.startTagFrameset),
("frame", self.startTagFrame),
("noframes", self.startTagNoframes)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
("frameset", self.endTagFrameset)
])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
def processEOF(self):
if self.tree.openElements[-1].name != "html":
@@ -2631,21 +2624,22 @@ def getPhases(debug):
self.parser.parseError("unexpected-end-tag-in-frameset",
{"name": token["name"]})
startTagHandler = _utils.MethodDispatcher([
("html", Phase.startTagHtml),
("frameset", startTagFrameset),
("frame", startTagFrame),
("noframes", startTagNoframes)
])
startTagHandler.default = startTagOther
endTagHandler = _utils.MethodDispatcher([
("frameset", endTagFrameset)
])
endTagHandler.default = endTagOther
class AfterFramesetPhase(Phase):
# http://www.whatwg.org/specs/web-apps/current-work/#after3
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("noframes", self.startTagNoframes)
])
self.startTagHandler.default = self.startTagOther
self.endTagHandler = _utils.MethodDispatcher([
("html", self.endTagHtml)
])
self.endTagHandler.default = self.endTagOther
__slots__ = tuple()
def processEOF(self):
# Stop parsing
@@ -2668,14 +2662,19 @@ def getPhases(debug):
self.parser.parseError("unexpected-end-tag-after-frameset",
{"name": token["name"]})
class AfterAfterBodyPhase(Phase):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
startTagHandler = _utils.MethodDispatcher([
("html", Phase.startTagHtml),
("noframes", startTagNoframes)
])
startTagHandler.default = startTagOther
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml)
])
self.startTagHandler.default = self.startTagOther
endTagHandler = _utils.MethodDispatcher([
("html", endTagHtml)
])
endTagHandler.default = endTagOther
class AfterAfterBodyPhase(Phase):
__slots__ = tuple()
def processEOF(self):
pass
@@ -2706,15 +2705,13 @@ def getPhases(debug):
self.parser.phase = self.parser.phases["inBody"]
return token
class AfterAfterFramesetPhase(Phase):
def __init__(self, parser, tree):
Phase.__init__(self, parser, tree)
startTagHandler = _utils.MethodDispatcher([
("html", startTagHtml)
])
startTagHandler.default = startTagOther
self.startTagHandler = _utils.MethodDispatcher([
("html", self.startTagHtml),
("noframes", self.startTagNoFrames)
])
self.startTagHandler.default = self.startTagOther
class AfterAfterFramesetPhase(Phase):
__slots__ = tuple()
def processEOF(self):
pass
@@ -2741,6 +2738,13 @@ def getPhases(debug):
def processEndTag(self, token):
self.parser.parseError("expected-eof-but-got-end-tag",
{"name": token["name"]})
startTagHandler = _utils.MethodDispatcher([
("html", startTagHtml),
("noframes", startTagNoFrames)
])
startTagHandler.default = startTagOther
# pylint:enable=unused-argument
return {
@@ -2774,8 +2778,8 @@ def getPhases(debug):
def adjust_attributes(token, replacements):
needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
if needs_adjustment:
token['data'] = OrderedDict((replacements.get(k, k), v)
for k, v in token['data'].items())
token['data'] = type(token['data'])((replacements.get(k, k), v)
for k, v in token['data'].items())
def impliedTagToken(name, type="EndTag", attributes=None,

View File

@@ -274,7 +274,7 @@ class HTMLSerializer(object):
if token["systemId"]:
if token["systemId"].find('"') >= 0:
if token["systemId"].find("'") >= 0:
self.serializeError("System identifer contains both single and double quote characters")
self.serializeError("System identifier contains both single and double quote characters")
quote_char = "'"
else:
quote_char = '"'

View File

@@ -10,9 +10,9 @@ Marker = None
listElementsMap = {
None: (frozenset(scopingElements), False),
"button": (frozenset(scopingElements | set([(namespaces["html"], "button")])), False),
"list": (frozenset(scopingElements | set([(namespaces["html"], "ol"),
(namespaces["html"], "ul")])), False),
"button": (frozenset(scopingElements | {(namespaces["html"], "button")}), False),
"list": (frozenset(scopingElements | {(namespaces["html"], "ol"),
(namespaces["html"], "ul")}), False),
"table": (frozenset([(namespaces["html"], "html"),
(namespaces["html"], "table")]), False),
"select": (frozenset([(namespaces["html"], "optgroup"),
@@ -28,7 +28,7 @@ class Node(object):
:arg name: The tag name associated with the node
"""
# The tag name assocaited with the node
# The tag name associated with the node
self.name = name
# The parent of the current node (or None for the document node)
self.parent = None

View File

@@ -5,6 +5,8 @@ from pip._vendor.six import text_type
import re
from copy import copy
from . import base
from .. import _ihatexml
from .. import constants
@@ -61,16 +63,17 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
return self._element.attrib
def _setAttributes(self, attributes):
# Delete existing attributes first
# XXX - there may be a better way to do this...
for key in list(self._element.attrib.keys()):
del self._element.attrib[key]
for key, value in attributes.items():
if isinstance(key, tuple):
name = "{%s}%s" % (key[2], key[1])
else:
name = key
self._element.set(name, value)
el_attrib = self._element.attrib
el_attrib.clear()
if attributes:
# calling .items _always_ allocates, and the above truthy check is cheaper than the
# allocation on average
for key, value in attributes.items():
if isinstance(key, tuple):
name = "{%s}%s" % (key[2], key[1])
else:
name = key
el_attrib[name] = value
attributes = property(_getAttributes, _setAttributes)
@@ -129,8 +132,8 @@ def getETreeBuilder(ElementTreeImplementation, fullTree=False):
def cloneNode(self):
element = type(self)(self.name, self.namespace)
for name, value in self.attributes.items():
element.attributes[name] = value
if self._element.attrib:
element._element.attrib = copy(self._element.attrib)
return element
def reparentChildren(self, newParent):

View File

@@ -16,6 +16,11 @@ import warnings
import re
import sys
try:
from collections.abc import MutableMapping
except ImportError:
from collections import MutableMapping
from . import base
from ..constants import DataLossWarning
from .. import constants
@@ -23,6 +28,7 @@ from . import etree as etree_builders
from .. import _ihatexml
import lxml.etree as etree
from pip._vendor.six import PY3, binary_type
fullTree = True
@@ -44,7 +50,11 @@ class Document(object):
self._childNodes = []
def appendChild(self, element):
self._elementTree.getroot().addnext(element._element)
last = self._elementTree.getroot()
for last in self._elementTree.getroot().itersiblings():
pass
last.addnext(element._element)
def _getChildNodes(self):
return self._childNodes
@@ -185,26 +195,37 @@ class TreeBuilder(base.TreeBuilder):
infosetFilter = self.infosetFilter = _ihatexml.InfosetFilter(preventDoubleDashComments=True)
self.namespaceHTMLElements = namespaceHTMLElements
class Attributes(dict):
def __init__(self, element, value=None):
if value is None:
value = {}
class Attributes(MutableMapping):
def __init__(self, element):
self._element = element
dict.__init__(self, value) # pylint:disable=non-parent-init-called
for key, value in self.items():
if isinstance(key, tuple):
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
else:
name = infosetFilter.coerceAttribute(key)
self._element._element.attrib[name] = value
def __setitem__(self, key, value):
dict.__setitem__(self, key, value)
def _coerceKey(self, key):
if isinstance(key, tuple):
name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
else:
name = infosetFilter.coerceAttribute(key)
self._element._element.attrib[name] = value
return name
def __getitem__(self, key):
value = self._element._element.attrib[self._coerceKey(key)]
if not PY3 and isinstance(value, binary_type):
value = value.decode("ascii")
return value
def __setitem__(self, key, value):
self._element._element.attrib[self._coerceKey(key)] = value
def __delitem__(self, key):
del self._element._element.attrib[self._coerceKey(key)]
def __iter__(self):
return iter(self._element._element.attrib)
def __len__(self):
return len(self._element._element.attrib)
def clear(self):
return self._element._element.attrib.clear()
class Element(builder.Element):
def __init__(self, name, namespace):
@@ -225,8 +246,10 @@ class TreeBuilder(base.TreeBuilder):
def _getAttributes(self):
return self._attributes
def _setAttributes(self, attributes):
self._attributes = Attributes(self, attributes)
def _setAttributes(self, value):
attributes = self.attributes
attributes.clear()
attributes.update(value)
attributes = property(_getAttributes, _setAttributes)
@@ -234,8 +257,11 @@ class TreeBuilder(base.TreeBuilder):
data = infosetFilter.coerceCharacters(data)
builder.Element.insertText(self, data, insertBefore)
def appendChild(self, child):
builder.Element.appendChild(self, child)
def cloneNode(self):
element = type(self)(self.name, self.namespace)
if self._element.attrib:
element._element.attrib.update(self._element.attrib)
return element
class Comment(builder.Comment):
def __init__(self, data):

View File

@@ -2,10 +2,10 @@
tree, generating tokens identical to those produced by the tokenizer
module.
To create a tree walker for a new type of tree, you need to do
To create a tree walker for a new type of tree, you need to
implement a tree walker object (called TreeWalker by convention) that
implements a 'serialize' method taking a tree as sole argument and
returning an iterator generating tokens.
implements a 'serialize' method which takes a tree as sole argument and
returns an iterator which generates tokens.
"""
from __future__ import absolute_import, division, unicode_literals

View File

@@ -127,4 +127,5 @@ def getETreeBuilder(ElementTreeImplementation):
return locals()
getETreeModule = moduleFactoryFactory(getETreeBuilder)

View File

@@ -1,6 +1,8 @@
from __future__ import absolute_import, division, unicode_literals
from pip._vendor.six import text_type
from collections import OrderedDict
from lxml import etree
from ..treebuilders.etree import tag_regexp
@@ -163,7 +165,7 @@ class TreeWalker(base.NonRecursiveTreeWalker):
else:
namespace = None
tag = ensure_str(node.tag)
attrs = {}
attrs = OrderedDict()
for name, value in list(node.attrib.items()):
name = ensure_str(name)
value = ensure_str(value)