=encoding utf8
=head1 NAME
html/treebuilder - HTML tree construction framework.
=head1 SYNOPSIS
from html/treebuilder import HTMLTreeBuilder;
let result := new HTMLTreeBuilder(
_input: "<!doctype html><title>Example</title>",
).parse();
=head1 NOTE
This module is not normally useful to end users. Instead use C<html/parser>.
=head1 DESCRIPTION
This module implements the tree-builder layer for C<html/parser>. It
connects the tokenizer to the C<html/dom> classes and covers the
initial, before html, before head, in head, text, after head, in body,
table, select, template, frameset, after body, after after body, and
fragment insertion-mode setup. It also routes SVG and MathML foreign
content through namespace-aware insertion, adjusted SVG/MathML names,
foreign XLink/XML/XMLNS attributes, HTML/MathML integration points, and
foreign CDATA sections.
It deliberately does not implement script execution, file load/dump
helpers, or the html5lib C<.dat> harness.
=head1 EXPORTS
=head2 Classes
=over
=item C<HTMLTreeBuilder>
Tree-construction engine. Most applications should use C<HTML.parse> or
C<HTMLParser>; this class is exported for tests, diagnostics, and tools
which need direct access to the tree-building layer.
Construct with C<_input> to provide source text. C<parse()> returns an
C<HTMLTreeConstructionResult> for a full document. C<parseFragment>
parses a context-sensitive fragment and returns an
C<HTMLTreeConstructionResult> with both a staging document and a
fragment.
Useful public accessors are C<tokenizer>, C<document>, C<fragment>,
C<errors>, C<parseErrors>, C<insertionMode>, and C<currentNode>.
C<errors()> returns tokenizer and tree-construction parse errors
collected during the latest parse.
Lower-level stack, scope, insertion, and mode methods are exposed by the
class because the implementation is Pure ZuzuScript, but they are not
part of the stable application API. Prefer the parser facade unless a
test or tool needs exact tree-builder state.
=item C<HTMLTreeConstructionResult>
Result object returned by C<HTMLTreeBuilder.parse> and
C<HTMLTreeBuilder.parseFragment>. C<document()> returns the parsed or
staging C<HTMLDocument>. C<fragment()> returns the
C<HTMLDocumentFragment> for fragment parses and C<null> for full
documents. C<errors()> returns a copy of parse errors, and
C<parseErrors()> is an alias for C<errors()>.
=item C<HTMLTreeTestSerializer>
Serializer for html5lib tree-construction tests. The static
C<serialize(node)> method returns the tree-test representation used by
C<tests/html/tree-construction.zzs>. It serializes document and fragment
children, element namespaces, sorted attributes, comments, doctypes,
text nodes, and template content in the shape expected by the vendored
fixtures.
=back
=head1 LIMITATIONS
This module implements the tree-construction behaviour claimed by the
distribution tests, not every edge case in the WHATWG algorithm. Known
html5lib expected failures are tracked in
C<tests/html/tree-construction-xfails.zzm> and summarized in the
distribution README.
Script execution during parsing is not implemented. The C<scripting>
flag affects C<noscript> parsing decisions but does not run scripts or
allow parser-time script DOM mutation.
=head1 COPYRIGHT AND LICENCE
B<< html/treebuilder >> is copyright Toby Inkster.
It is free software; you may redistribute it and/or modify it under
the terms of either the Artistic License 1.0 or the GNU General Public
License version 2.
=cut
from html/dom import
HTMLDocument,
HTMLDocumentFragment,
HTML_NAMESPACE_URI,
HTMLElement,
MATHML_NAMESPACE_URI,
SVG_NAMESPACE_URI,
XLINK_NAMESPACE_URI,
XML_NAMESPACE_URI,
XMLNS_NAMESPACE_URI;
from html/tokenizer import HTMLParseError, HTMLToken, HTMLTokenizer;
from std/string import join, replace, substr;
function _html_tb_string ( value ) {
return value ≡ null ? "" : "" _ value;
}
function _html_tb_is_ws ( String data ) {
return data ~ /^[ \t\n\f]*$/;
}
function _html_tb_void_element ( String name ) {
return [
"area",
"base",
"br",
"col",
"embed",
"hr",
"img",
"input",
"link",
"meta",
"source",
"track",
"wbr",
].contains(lc(name));
}
function _html_tb_block_element ( String name ) {
return [
"address",
"article",
"aside",
"blockquote",
"center",
"details",
"dialog",
"dir",
"div",
"dl",
"fieldset",
"figcaption",
"figure",
"footer",
"header",
"main",
"menu",
"nav",
"ol",
"p",
"section",
"summary",
"ul",
].contains(lc(name));
}
function _html_tb_formatting_element ( String name ) {
return [
"a",
"b",
"big",
"code",
"em",
"font",
"i",
"nobr",
"s",
"small",
"strike",
"strong",
"tt",
"u",
].contains(lc(name));
}
function _html_tb_heading_element ( String name ) {
return [ "h1", "h2", "h3", "h4", "h5", "h6" ].contains(lc(name));
}
function _html_tb_list_item_element ( String name ) {
return [ "li", "dd", "dt" ].contains(lc(name));
}
function _html_tb_table_context_element ( String name ) {
return [ "table", "tbody", "tfoot", "thead", "tr" ].contains(lc(name));
}
function _html_tb_table_section_element ( String name ) {
return [ "tbody", "tfoot", "thead" ].contains(lc(name));
}
function _html_tb_table_cell_element ( String name ) {
return [ "td", "th" ].contains(lc(name));
}
function _html_tb_table_structural_element ( String name ) {
return [
"caption",
"col",
"colgroup",
"tbody",
"td",
"tfoot",
"th",
"thead",
"tr",
].contains(lc(name));
}
function _html_tb_headish_element ( String name ) {
return [
"base",
"link",
"meta",
"noscript",
"script",
"style",
"title",
].contains(lc(name));
}
function _html_tb_svg_tag_name ( String name ) {
let map := {
altglyph: "altGlyph",
animatecolor: "animateColor",
animatemotion: "animateMotion",
animatetransform: "animateTransform",
clippath: "clipPath",
feblend: "feBlend",
fecolormatrix: "feColorMatrix",
fecomponenttransfer: "feComponentTransfer",
fecomposite: "feComposite",
feconvolvematrix: "feConvolveMatrix",
fediffuselighting: "feDiffuseLighting",
fedisplacementmap: "feDisplacementMap",
fedistantlight: "feDistantLight",
fedropshadow: "feDropShadow",
feflood: "feFlood",
fefunca: "feFuncA",
fefuncb: "feFuncB",
fefuncg: "feFuncG",
fefuncr: "feFuncR",
fegaussianblur: "feGaussianBlur",
feimage: "feImage",
femerge: "feMerge",
femergenode: "feMergeNode",
femorphology: "feMorphology",
feoffset: "feOffset",
fepointlight: "fePointLight",
fespecularlighting: "feSpecularLighting",
fespotlight: "feSpotLight",
fetile: "feTile",
feturbulence: "feTurbulence",
foreignobject: "foreignObject",
glyphref: "glyphRef",
lineargradient: "linearGradient",
radialgradient: "radialGradient",
textpath: "textPath",
};
return map.exists(lc(name)) ? map{(lc(name))} : name;
}
function _html_tb_svg_attr_name ( String name ) {
let map := {
attributename: "attributeName",
basefrequency: "baseFrequency",
gradientunits: "gradientUnits",
preserveaspectratio: "preserveAspectRatio",
viewbox: "viewBox",
xchannelselector: "xChannelSelector",
ychannelselector: "yChannelSelector",
};
return map.exists(lc(name)) ? map{(lc(name))} : name;
}
function _html_tb_mathml_attr_name ( String name ) {
return lc(name) eq "definitionurl" ? "definitionURL" : name;
}
function _html_tb_breakout_start_tag ( HTMLToken token ) {
let name := token.tagName();
return true if [
"b",
"big",
"blockquote",
"body",
"br",
"center",
"code",
"dd",
"div",
"dl",
"dt",
"em",
"embed",
"h1",
"h2",
"h3",
"h4",
"h5",
"h6",
"head",
"hr",
"i",
"img",
"li",
"listing",
"menu",
"meta",
"nobr",
"ol",
"p",
"pre",
"ruby",
"s",
"small",
"span",
"strong",
"strike",
"sub",
"sup",
"table",
"tt",
"u",
"ul",
"var",
].contains(name);
return true if name eq "font"
and (
token.hasAttribute("color")
or token.hasAttribute("face")
or token.hasAttribute("size")
);
return false;
}
function _html_tb_implied_end_element ( String name ) {
return [
"dd",
"dt",
"li",
"optgroup",
"option",
"p",
"rb",
"rp",
"rt",
"rtc",
].contains(lc(name));
}
function _html_tb_special_element ( String name ) {
return _html_tb_block_element(name)
or _html_tb_heading_element(name)
or [
"applet",
"area",
"base",
"basefont",
"bgsound",
"body",
"br",
"button",
"caption",
"col",
"colgroup",
"dd",
"dt",
"embed",
"form",
"frame",
"frameset",
"head",
"hr",
"html",
"iframe",
"img",
"input",
"keygen",
"li",
"link",
"listing",
"marquee",
"meta",
"noembed",
"noframes",
"noscript",
"object",
"param",
"plaintext",
"pre",
"script",
"select",
"source",
"style",
"table",
"tbody",
"td",
"template",
"textarea",
"tfoot",
"th",
"thead",
"title",
"tr",
"track",
"wbr",
].contains(lc(name));
}
function _html_tb_same_element_shape ( a, b ) {
return false if a ≡ null or b ≡ null;
return false unless a.nodeKind() eq "element" and b.nodeKind() eq "element";
return false unless a.namespaceURI() eq b.namespaceURI();
return false unless a.tagName() eq b.tagName();
let a_attrs := a.attributes();
let b_attrs := b.attributes();
return false unless a_attrs.keys().length() == b_attrs.keys().length();
for ( let name in a_attrs.keys() ) {
return false unless b_attrs.exists(name);
return false unless a_attrs{(name)} eq b_attrs{(name)};
}
return true;
}
function _html_tb_quote_text ( String text ) {
let out := replace( text, "\"", "\\\"", "g" );
out := replace( out, "\n", "\\n", "g" );
return out;
}
class HTMLTreeConstructionResult {
let _document := null;
let _fragment := null;
let Array _errors := [];
method __build__ () {
_errors := [] if _errors ≡ null;
}
method document () {
return _document;
}
method fragment () {
return _fragment;
}
method errors () {
let out := [];
for ( let error in _errors ) {
out.push(error);
}
return out;
}
method parseErrors () {
return self.errors();
}
}
class HTMLTreeBuilder {
let String _input := "";
let _tokenizer := null;
let _document := null;
let Array _open_elements := [];
let Array _active_formatting_elements := [];
let Array _pending_table_character_tokens := [];
let Boolean _foster_parenting := false;
let Array _template_insertion_modes := [];
let _form_element := null;
let _head_element := null;
let _body_element := null;
let String _insertion_mode := "initial";
let String _original_insertion_mode := "initial";
let Boolean _frameset_ok := true;
let Boolean _scripting := false;
let Boolean _finished := false;
let Array _errors := [];
let Number _tokenizer_error_index := 0;
let Boolean _fragment_parsing := false;
let _fragment_context_element := null;
let _fragment := null;
let _synthetic_html_element := null;
method __build__ () {
self.reset(_input);
}
method reset ( String input := "" ) {
_input := _html_tb_string(input);
_tokenizer := new HTMLTokenizer( _input: _input );
_document := new HTMLDocument();
_open_elements := [];
_active_formatting_elements := [];
_pending_table_character_tokens := [];
_foster_parenting := false;
_template_insertion_modes := [];
_form_element := null;
_head_element := null;
_body_element := null;
_insertion_mode := "initial";
_original_insertion_mode := "initial";
_frameset_ok := true;
_finished := false;
_errors := [];
_tokenizer_error_index := 0;
_fragment_parsing := false;
_fragment_context_element := null;
_fragment := null;
_synthetic_html_element := null;
return self;
}
method tokenizer () {
return _tokenizer;
}
method document () {
return _document;
}
method fragment () {
return _fragment;
}
method errors () {
let out := [];
for ( let error in _errors ) {
out.push(error);
}
return out;
}
method parseErrors () {
return self.errors();
}
method insertionMode () {
return _insertion_mode;
}
method currentNode () {
return _document if _open_elements.length() == 0;
return _open_elements[ _open_elements.length() - 1 ];
}
method _adjusted_current_node () {
if (
_fragment_parsing
and _open_elements.length() == 1
and _fragment_context_element ≢ null
) {
return _fragment_context_element;
}
return self.currentNode();
}
method _is_html_ns ( node ) {
return node ≢ null
and node.nodeKind() eq "element"
and node.namespaceURI() eq HTML_NAMESPACE_URI;
}
method _is_svg_ns ( node ) {
return node ≢ null
and node.nodeKind() eq "element"
and node.namespaceURI() eq SVG_NAMESPACE_URI;
}
method _is_mathml_ns ( node ) {
return node ≢ null
and node.nodeKind() eq "element"
and node.namespaceURI() eq MATHML_NAMESPACE_URI;
}
method _is_foreign_context () {
let node := self._adjusted_current_node();
return false unless node.nodeKind() eq "element";
return node.namespaceURI() ne HTML_NAMESPACE_URI;
}
method _foreign_fragment_context_only () {
return _fragment_parsing
and _open_elements.length() == 1
and _fragment_context_element ≢ null
and _fragment_context_element.nodeKind() eq "element"
and _fragment_context_element.namespaceURI() ne HTML_NAMESPACE_URI;
}
method _fragment_context_is_table_cell () {
return _fragment_parsing
and _fragment_context_element ≢ null
and _fragment_context_element.nodeKind() eq "element"
and _fragment_context_element.namespaceURI() eq HTML_NAMESPACE_URI
and [ "td", "th" ].contains(lc(_fragment_context_element.localName()))
and not self.hasElementInTableScope("td")
and not self.hasElementInTableScope("th");
}
method _is_mathml_text_integration_point ( node ) {
return false unless self._is_mathml_ns(node);
return [ "mi", "mo", "mn", "ms", "mtext" ].contains(node.localName());
}
method _is_html_integration_point ( node ) {
return false unless node ≢ null and node.nodeKind() eq "element";
if ( self._is_svg_ns(node) ) {
return [ "foreignObject", "desc", "title" ].contains(node.localName());
}
if (
self._is_mathml_ns(node) and
node.localName() eq "annotation-xml"
) {
let enc := lc(node.getAttribute("encoding") ?: "");
return enc eq "text/html" or enc eq "application/xhtml+xml";
}
return false;
}
method _should_process_using_html_rules ( HTMLToken token ) {
return true if token.type() eq "EOF";
let node := self._adjusted_current_node();
return true if node.nodeKind() ne "element";
return true if node.namespaceURI() eq HTML_NAMESPACE_URI;
if ( self._is_mathml_text_integration_point(node) ) {
return true if token.type() eq "Character";
return true if token.type() eq "StartTag"
and not [ "mglyph", "malignmark" ].contains(token.tagName());
}
if ( self._is_html_integration_point(node) ) {
return true if token.type() eq "Character" or token.type() eq "StartTag";
}
return false;
}
method pushElement ( element ) {
_open_elements.push(element);
return element;
}
method popElement () {
return null if _open_elements.length() == 0;
return _open_elements.pop();
}
method switchTo ( String mode ) {
_insertion_mode := mode;
return self;
}
method hasElementInScope ( String name ) {
return self._has_element_in_scope( name, [
"applet",
"caption",
"html",
"table",
"td",
"th",
"marquee",
"object",
"template",
] );
}
method hasElementInButtonScope ( String name ) {
return self._has_element_in_scope( name, [
"applet",
"button",
"caption",
"html",
"table",
"td",
"th",
"marquee",
"object",
"template",
] );
}
method hasElementInListItemScope ( String name ) {
return self._has_element_in_scope( name, [
"applet",
"caption",
"html",
"ol",
"ul",
"table",
"td",
"th",
"marquee",
"object",
"template",
] );
}
method hasElementInTableScope ( String name ) {
return self._has_element_in_scope( name, [ "html", "table", "template" ] );
}
method _has_element_in_table_scope ( String name ) {
return self.hasElementInTableScope(name);
}
method _has_template_on_stack () {
for ( let element in _open_elements ) {
return true if element.nodeKind() eq "element"
and element.tagName() eq "template";
}
return false;
}
method _has_element_in_scope ( String name, Array boundaries ) {
let wanted := lc(name);
let i := _open_elements.length() - 1;
while ( i >= 0 ) {
let element := _open_elements[i];
if ( element.nodeKind() eq "element" ) {
return true if element.tagName() eq wanted;
return false if boundaries.contains(element.tagName());
}
i--;
}
return false;
}
method _stack_index ( node ) {
let i := 0;
while ( i < _open_elements.length() ) {
return i if _open_elements[i] ≡ node;
i++;
}
return -1;
}
method _stack_index_for_name ( String name ) {
let wanted := lc(name);
let i := _open_elements.length() - 1;
while ( i >= 0 ) {
let element := _open_elements[i];
return i if element.nodeKind() eq "element"
and element.tagName() eq wanted;
i--;
}
return -1;
}
method _current_node_is ( String name ) {
return false unless self.currentNode().nodeKind() eq "element";
return self.currentNode().tagName() eq lc(name);
}
method _clear_stack_back_to_table_context () {
while (
_open_elements.length() > 0 and
self.currentNode().nodeKind() eq "element" and
not [ "html", "table", "template" ].contains(self.currentNode().tagName())
) {
self.popElement();
}
return self;
}
method _clear_stack_back_to_table_body_context () {
while (
_open_elements.length() > 0 and
self.currentNode().nodeKind() eq "element" and
not [ "tbody", "tfoot", "thead", "html", "template" ]
.contains(self.currentNode().tagName())
) {
self.popElement();
}
return self;
}
method _clear_stack_back_to_table_row_context () {
while (
_open_elements.length() > 0 and
self.currentNode().nodeKind() eq "element" and
not [ "tr", "html", "template" ].contains(self.currentNode().tagName())
) {
self.popElement();
}
return self;
}
method _pop_until_name ( String name ) {
while ( _open_elements.length() > 0 ) {
let element := self.popElement();
last if element.nodeKind() eq "element" and element.tagName() eq lc(name);
}
return self;
}
method _remove_from_open_elements ( node ) {
let kept := [];
for ( let element in _open_elements ) {
kept.push(element) unless element ≡ node;
}
_open_elements := kept;
return self;
}
method _insert_open_element_at ( node, Number index ) {
let out := [];
let inserted := false;
let i := 0;
if ( index <= 0 ) {
out.push(node);
inserted := true;
}
while ( i < _open_elements.length() ) {
if ( not inserted and i >= index ) {
out.push(node);
inserted := true;
}
out.push(_open_elements[i]);
i++;
}
out.push(node) unless inserted;
_open_elements := out;
return self;
}
method _replace_open_element ( oldNode, newNode ) {
let kept := [];
for ( let element in _open_elements ) {
kept.push( element ≡ oldNode ? newNode : element );
}
_open_elements := kept;
return self;
}
method _generate_implied_end_tags ( except := null ) {
while (
self.currentNode().nodeKind() eq "element" and
_html_tb_implied_end_element(self.currentNode().tagName()) and
( except ≡ null or self.currentNode().tagName() ne lc("" _ except) )
) {
self.popElement();
}
return self;
}
method _clone_for_insertion ( element ) {
let clone := _document.createElementNS(
element.namespaceURI(),
element.tagName(),
);
for ( let record in element.attributeRecords() ) {
if ( record{namespaceURI} ≡ null ) {
clone.setAttribute( record{qualifiedName}, record{value} );
}
else {
clone.setAttributeNS(
record{namespaceURI},
record{qualifiedName},
record{value},
);
}
}
return clone;
}
method _active_index ( node ) {
let i := 0;
while ( i < _active_formatting_elements.length() ) {
return i if _active_formatting_elements[i] ≡ node;
i++;
}
return -1;
}
method _find_active_formatting_element ( String name ) {
let wanted := lc(name);
let i := _active_formatting_elements.length() - 1;
while ( i >= 0 ) {
let entry := _active_formatting_elements[i];
return entry if entry ≢ null
and entry.nodeKind() eq "element"
and entry.tagName() eq wanted;
i--;
}
return null;
}
method _push_active_formatting_element ( element ) {
let matching := [];
let i := 0;
while ( i < _active_formatting_elements.length() ) {
let entry := _active_formatting_elements[i];
matching.push(i) if entry ≢ null
and _html_tb_same_element_shape( entry, element );
i++;
}
if ( matching.length() >= 3 ) {
let remove_at := matching[0];
let kept := [];
i := 0;
while ( i < _active_formatting_elements.length() ) {
kept.push(_active_formatting_elements[i]) unless i == remove_at;
i++;
}
_active_formatting_elements := kept;
}
_active_formatting_elements.push(element);
return element;
}
method _push_active_formatting_marker () {
_active_formatting_elements.push(null);
return self;
}
method _clear_active_formatting_to_last_marker () {
while ( _active_formatting_elements.length() > 0 ) {
let entry := _active_formatting_elements.pop();
last if entry ≡ null;
}
return self;
}
method _remove_from_active_formatting_elements ( element ) {
let kept := [];
for ( let entry in _active_formatting_elements ) {
kept.push(entry) unless entry ≡ element;
}
_active_formatting_elements := kept;
return self;
}
method _insert_active_formatting_element_at ( element, Number index ) {
let out := [];
let inserted := false;
let i := 0;
if ( index <= 0 ) {
out.push(element);
inserted := true;
}
while ( i < _active_formatting_elements.length() ) {
if ( not inserted and i >= index ) {
out.push(element);
inserted := true;
}
out.push(_active_formatting_elements[i]);
i++;
}
out.push(element) unless inserted;
_active_formatting_elements := out;
return self;
}
method _replace_active_formatting_element ( oldNode, newNode ) {
let kept := [];
for ( let entry in _active_formatting_elements ) {
kept.push( entry ≡ oldNode ? newNode : entry );
}
_active_formatting_elements := kept;
return self;
}
method _reconstruct_active_formatting_elements () {
return self if _active_formatting_elements.length() == 0;
let i := _active_formatting_elements.length() - 1;
let entry := _active_formatting_elements[i];
return self if entry ≡ null or self._stack_index(entry) >= 0;
while ( i > 0 ) {
let previous := _active_formatting_elements[i - 1];
last if previous ≡ null or self._stack_index(previous) >= 0;
i--;
}
while ( i < _active_formatting_elements.length() ) {
let old := _active_formatting_elements[i];
let clone := self._clone_for_insertion(old);
self.currentNode().appendChild(clone);
self.pushElement(clone);
self._replace_active_formatting_element( old, clone );
i++;
}
return self;
}
method _adoption_agency_end_tag ( String name, token := null ) {
if (
self.currentNode().nodeKind() eq "element" and
self.currentNode().tagName() eq lc(name) and
self._active_index(self.currentNode()) < 0
) {
self.popElement();
return self;
}
let outer := 0;
while ( outer < 8 ) {
outer++;
let formatting := self._find_active_formatting_element(name);
if ( formatting ≡ null ) {
self.parseError(
"adoption-agency-missing-formatting-element",
"No active formatting element " _ name,
token,
);
self._generic_end_tag(name, token);
return self;
}
let formatting_stack_index := self._stack_index(formatting);
if ( formatting_stack_index < 0 ) {
self.parseError(
"adoption-agency-formatting-element-not-open",
"Formatting element " _ name _ " is not open",
token,
);
self._remove_from_active_formatting_elements(formatting);
return self;
}
if ( not self.hasElementInScope(name) ) {
self.parseError(
"adoption-agency-formatting-element-not-in-scope",
"Formatting element " _ name _ " is not in scope",
token,
);
return self;
}
if ( self.currentNode() ≢ formatting ) {
self.parseError(
"adoption-agency-misnested-formatting-element",
"Misnested formatting element " _ name,
token,
);
}
let furthest_block := null;
let i := formatting_stack_index + 1;
while ( i < _open_elements.length() ) {
let candidate := _open_elements[i];
if (
candidate.nodeKind() eq "element" and
_html_tb_special_element(candidate.tagName())
) {
furthest_block := candidate;
last;
}
i++;
}
if ( furthest_block ≡ null ) {
while ( _open_elements.length() > 0 ) {
let element := self.popElement();
last if element ≡ formatting;
}
self._remove_from_active_formatting_elements(formatting);
return self;
}
let common_ancestor := _open_elements[formatting_stack_index - 1];
let bookmark := self._active_index(formatting);
let node := furthest_block;
let last_node := furthest_block;
let inner := 0;
while ( true ) {
inner++;
let node_index := self._stack_index(node);
node_index--;
last if node_index < 0;
node := _open_elements[node_index];
last if node ≡ formatting;
if ( inner > 3 and self._active_index(node) >= 0 ) {
self._remove_from_active_formatting_elements(node);
}
if ( self._active_index(node) < 0 ) {
self._remove_from_open_elements(node);
next;
}
bookmark := self._active_index(node) + 1
if last_node ≡ furthest_block;
let clone := self._clone_for_insertion(node);
self._replace_active_formatting_element( node, clone );
self._replace_open_element( node, clone );
node := clone;
node.appendChild(last_node);
last_node := node;
}
common_ancestor.appendChild(last_node);
let replacement := self._clone_for_insertion(formatting);
for ( let child in furthest_block.childNodes() ) {
replacement.appendChild(child);
}
furthest_block.appendChild(replacement);
self._remove_from_active_formatting_elements(formatting);
self._insert_active_formatting_element_at( replacement, bookmark );
self._remove_from_open_elements(formatting);
self._insert_open_element_at(
replacement,
self._stack_index(furthest_block) + 1,
);
}
return self;
}
method _merge_missing_attributes ( target, HTMLToken token ) {
return self if target ≡ null;
for ( let attr in token.attributes() ) {
target.setAttribute( attr{name}, attr{value} )
unless target.hasAttribute(attr{name});
}
return self;
}
method _insert_from_token_then_pop ( HTMLToken token ) {
self.insertElementForToken(token);
self.popElement();
return self;
}
method _start_headish_in_body ( HTMLToken token ) {
return null if token.tagName() eq "noscript" and not _scripting;
if ( [ "base", "link", "meta", "style", "title" ].contains(token.tagName()) ) {
return self._mode_in_head(token);
}
if ( token.tagName() eq "script" or token.tagName() eq "noscript" ) {
return self._mode_in_head(token);
}
return null;
}
method _close_p ( token := null ) {
if ( not self.hasElementInButtonScope("p") ) {
self.parseError(
"unexpected-end-tag-p",
"p element is not in button scope",
token,
);
self.insertHtmlElement("p");
}
self._generate_implied_end_tags("p");
self.parseError( "misnested-end-tag", "Misnested p end tag", token )
unless self._current_node_is("p");
self._pop_until_name("p");
return self;
}
method _close_list_item ( String name, token := null ) {
let scope := name eq "li"
? self.hasElementInListItemScope(name)
: self.hasElementInScope(name);
if ( not scope ) {
self.parseError(
"unexpected-end-tag",
name _ " element is not in scope",
token,
);
return self;
}
self._generate_implied_end_tags(name);
self.parseError(
"misnested-end-tag",
"Misnested " _ name _ " end tag",
token,
) unless self._current_node_is(name);
self._pop_until_name(name);
return self;
}
method _close_heading ( token := null ) {
let index := -1;
let i := _open_elements.length() - 1;
while ( i >= 0 ) {
if (
_open_elements[i].nodeKind() eq "element" and
_html_tb_heading_element(_open_elements[i].tagName())
) {
index := i;
last;
}
i--;
}
if ( index < 0 ) {
self.parseError(
"unexpected-heading-end-tag",
"Heading element is not in scope",
token,
);
return self;
}
self._generate_implied_end_tags();
self.parseError(
"misnested-heading-end-tag",
"Misnested heading end tag",
token,
) unless _html_tb_heading_element(self.currentNode().tagName());
while ( _open_elements.length() > index ) {
self.popElement();
}
return self;
}
method _close_button ( token := null ) {
if ( not self.hasElementInScope("button") ) {
self.parseError(
"unexpected-button-end-tag",
"button element is not in scope",
token,
);
return self;
}
self._generate_implied_end_tags();
self.parseError(
"misnested-button-end-tag",
"Misnested button end tag",
token,
) unless self._current_node_is("button");
self._pop_until_name("button");
return self;
}
method parse () {
while ( not _finished ) {
_tokenizer.setAllowCDATA(
self._is_foreign_context()
and not self._is_html_integration_point(self._adjusted_current_node())
and not self._is_mathml_text_integration_point(
self._adjusted_current_node(),
),
);
let token := _tokenizer.nextToken();
last if token ≡ null;
self._copy_tokenizer_errors();
let reprocess := true;
while ( reprocess ) {
reprocess := self._process_token(token);
}
self._copy_tokenizer_errors();
}
self._copy_tokenizer_errors();
return new HTMLTreeConstructionResult(
_document: _document,
_fragment: _fragment,
_errors: self.errors(),
);
}
method parseFragment (
String input := "",
context := "div",
Boolean scripting := false,
) {
self.reset(input);
_scripting := scripting ? true : false;
_fragment_parsing := true;
_fragment := new HTMLDocumentFragment( _owner_document: _document );
_fragment_context_element := self._normalise_fragment_context(context);
_synthetic_html_element := _document.createElement("html");
_document.appendChild(_synthetic_html_element);
self.pushElement(_synthetic_html_element);
self._setup_fragment_form_pointer(_fragment_context_element);
self._set_fragment_tokenizer_state(_fragment_context_element);
self._reset_insertion_mode_appropriately();
let result := self.parse();
self._finish_fragment();
return new HTMLTreeConstructionResult(
_document: _document,
_fragment: _fragment,
_errors: result.errors(),
);
}
method _normalise_fragment_context ( context ) {
if ( context instanceof HTMLElement ) {
return context;
}
let name := lc(_html_tb_string(context));
name := "div" if name eq "";
if ( name eq "svg" ) {
return _document.createElementNS( SVG_NAMESPACE_URI, "svg" );
}
if ( name eq "math" ) {
return _document.createElementNS( MATHML_NAMESPACE_URI, "math" );
}
return _document.createElement(name);
}
method _setup_fragment_form_pointer ( context ) {
let node := context;
while ( node ≢ null ) {
if (
node.nodeKind() eq "element"
and node.namespaceURI() eq HTML_NAMESPACE_URI
and node.tagName() eq "form"
) {
_form_element := node;
return self;
}
node := node.parentNode();
}
return self;
}
method _set_fragment_tokenizer_state ( context ) {
return self if context ≡ null or context.nodeKind() ne "element";
let name := lc(context.localName());
let state := "data";
if ( name eq "title" or name eq "textarea" ) {
state := "rcdata";
}
else if (
[ "style", "xmp", "iframe", "noembed", "noframes" ].contains(name)
) {
state := "rawtext";
}
else if ( name eq "script" ) {
state := "script_data";
}
else if ( name eq "noscript" and _scripting ) {
state := "rawtext";
}
else if ( name eq "plaintext" ) {
state := "plaintext";
}
_tokenizer.setState(state);
_tokenizer.setLastStartTagName( state eq "data" ? null : name );
return self;
}
method _finish_fragment () {
return self if _fragment ≡ null or _synthetic_html_element ≡ null;
while ( _synthetic_html_element.childNodes().length() > 0 ) {
_fragment.appendChild(_synthetic_html_element.firstChild());
}
return self;
}
method _copy_tokenizer_errors () {
let tokenizer_errors := _tokenizer.errors();
while ( _tokenizer_error_index < tokenizer_errors.length() ) {
_errors.push(tokenizer_errors[_tokenizer_error_index]);
_tokenizer_error_index++;
}
return self;
}
method parseError ( String code, String message, token := null ) {
_errors.push(new HTMLParseError(
_code: code,
_message: message,
_line: 0,
_column: 0,
_offset: 0,
_state: _insertion_mode,
));
return self;
}
method _process_token ( HTMLToken token ) {
if (
self._is_foreign_context()
and not self._should_process_using_html_rules(token)
) {
return self._process_foreign_content(token);
}
return self._process_html_token(token);
}
method _process_html_token ( HTMLToken token ) {
switch ( _insertion_mode: eq ) {
case "initial": return self._mode_initial(token);
case "before html": return self._mode_before_html(token);
case "before head": return self._mode_before_head(token);
case "in head": return self._mode_in_head(token);
case "text": return self._mode_text(token);
case "after head": return self._mode_after_head(token);
case "in body": return self._mode_in_body(token);
case "in table": return self._mode_in_table(token);
case "in table text": return self._mode_in_table_text(token);
case "in caption": return self._mode_in_caption(token);
case "in column group": return self._mode_in_column_group(token);
case "in table body": return self._mode_in_table_body(token);
case "in row": return self._mode_in_row(token);
case "in cell": return self._mode_in_cell(token);
case "in select": return self._mode_in_select(token);
case "in select in table": return self._mode_in_select_in_table(token);
case "in template": return self._mode_in_template(token);
case "in frameset": return self._mode_in_frameset(token);
case "after frameset": return self._mode_after_frameset(token);
case "after body": return self._mode_after_body(token);
case "after after body": return self._mode_after_after_body(token);
case "after after frameset": return self._mode_after_after_frameset(token);
}
self.parseError(
"unsupported-insertion-mode",
"Unsupported insertion mode " _ _insertion_mode,
token,
);
_finished := true;
return false;
}
method _pop_until_html_or_integration_point () {
while (
_open_elements.length() > 0 and
self.currentNode().nodeKind() eq "element" and
self.currentNode().namespaceURI() ne HTML_NAMESPACE_URI and
not self._is_html_integration_point(self.currentNode()) and
not self._is_mathml_text_integration_point(self.currentNode())
) {
self.popElement();
}
return self;
}
method _process_foreign_content ( HTMLToken token ) {
if ( token.type() eq "Character" ) {
self.insertCharacter(token.data());
_frameset_ok := false unless _html_tb_is_ws(token.data());
return false;
}
if ( token.type() eq "Comment" ) {
self.insertComment(token);
return false;
}
if ( token.type() eq "DOCTYPE" ) {
self.parseError(
"unexpected-doctype-in-foreign-content",
"Unexpected doctype in foreign content",
token,
);
return false;
}
if ( token.type() eq "StartTag" ) {
if ( _html_tb_breakout_start_tag(token) ) {
self.parseError(
"html-start-tag-in-foreign-content",
"HTML start tag exits foreign content",
token,
);
return self._process_html_token(token)
if self._foreign_fragment_context_only();
self._pop_until_html_or_integration_point();
return true;
}
let ns := self._adjusted_current_node().namespaceURI();
ns := SVG_NAMESPACE_URI if token.tagName() eq "svg";
ns := MATHML_NAMESPACE_URI if token.tagName() eq "math";
self.insertForeignElementForToken( token, ns );
self.popElement() if token.selfClosing();
return false;
}
if ( token.type() eq "EndTag" ) {
let name := token.tagName();
if ( name eq "br" or name eq "p" ) {
self.parseError(
"html-end-tag-in-foreign-content",
"HTML end tag exits foreign content",
token,
);
return self._process_html_token(token)
if self._foreign_fragment_context_only();
self._pop_until_html_or_integration_point();
return true;
}
let i := _open_elements.length() - 1;
while ( i >= 0 ) {
let node := _open_elements[i];
if ( node.nodeKind() eq "element" ) {
if ( node.namespaceURI() eq HTML_NAMESPACE_URI ) {
last;
}
if ( lc(node.localName()) eq name ) {
while ( _open_elements.length() > i ) {
self.popElement();
}
return false;
}
}
i--;
}
self.parseError(
"unexpected-foreign-end-tag",
"Foreign end tag ignored",
token,
);
return false;
}
return false;
}
method _mode_initial ( HTMLToken token ) {
if ( token.type() eq "Character" and _html_tb_is_ws(token.data()) ) {
return false;
}
if ( token.type() eq "Comment" ) {
self.insertComment( token, _document );
return false;
}
if ( token.type() eq "DOCTYPE" ) {
self.appendDoctype(token);
self.switchTo("before html");
return false;
}
self.parseError( "missing-doctype", "Document has no doctype", token );
self.switchTo("before html");
return true;
}
method _mode_before_html ( HTMLToken token ) {
if ( token.type() eq "Character" and _html_tb_is_ws(token.data()) ) {
return false;
}
if ( token.type() eq "Comment" ) {
self.insertComment( token, _document );
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "html" ) {
self.insertElementForToken(token);
self.switchTo("before head");
return false;
}
if ( token.type() eq "EOF" ) {
self.insertHtmlElement("html");
self.switchTo("before head");
return true;
}
self.insertHtmlElement("html");
self.switchTo("before head");
return true;
}
method _mode_before_head ( HTMLToken token ) {
if ( token.type() eq "Character" and _html_tb_is_ws(token.data()) ) {
return false;
}
if ( token.type() eq "Comment" ) {
self.insertComment(token);
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "head" ) {
self.insertElementForToken(token);
self.switchTo("in head");
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "html" ) {
self.parseError(
"unexpected-html-start-tag-before-head",
"Unexpected html start tag before head",
token,
);
return false;
}
self.insertHtmlElement("head");
self.switchTo("in head");
return true;
}
method _mode_in_head ( HTMLToken token ) {
if ( token.type() eq "Character" and _html_tb_is_ws(token.data()) ) {
self.insertCharacter(token.data());
return false;
}
if ( token.type() eq "Comment" ) {
self.insertComment(token);
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "title" ) {
self.insertElementForToken(token);
self._enter_text_mode( "rcdata", token.tagName() );
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "style" ) {
self.insertElementForToken(token);
self._enter_text_mode( "rawtext", token.tagName() );
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "script" ) {
self.insertElementForToken(token);
self._enter_text_mode( "script_data", token.tagName() );
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "noscript" ) {
self.insertElementForToken(token);
if ( _scripting ) {
self._enter_text_mode( "rawtext", token.tagName() );
}
return false;
}
if ( token.type() eq "EndTag" and token.tagName() eq "noscript" ) {
self.popElement() if self._current_node_is("noscript");
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "noframes" ) {
self.insertElementForToken(token);
self._enter_text_mode( "rawtext", token.tagName() );
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "template" ) {
self.insertElementForToken(token);
self._push_active_formatting_marker();
_template_insertion_modes.push("in template");
self.switchTo("in template");
return false;
}
if ( token.type() eq "EndTag" and token.tagName() eq "template" ) {
return self._end_template(token);
}
if (
token.type() eq "StartTag" and
[ "base", "link", "meta" ].contains(token.tagName())
) {
self.insertElementForToken(token);
self.popElement();
return false;
}
if ( token.type() eq "EndTag" and token.tagName() eq "head" ) {
self.popElement();
self.switchTo("after head");
return false;
}
if ( token.type() eq "EOF" ) {
self.parseError( "eof-in-head", "EOF in head", token );
}
self.popElement();
self.switchTo("after head");
return true;
}
method _mode_text ( HTMLToken token ) {
if ( token.type() eq "Character" ) {
self.insertCharacter(token.data());
return false;
}
if ( token.type() eq "EOF" ) {
self.parseError(
"eof-in-text-mode",
"EOF in text insertion mode",
token,
);
self.popElement();
self.switchTo(_original_insertion_mode);
_tokenizer.setState("data");
_tokenizer.setLastStartTagName(null);
return true;
}
if (
token.type() eq "EndTag" and
self.currentNode().nodeKind() eq "element" and
self.currentNode().tagName() eq token.tagName()
) {
self.popElement();
self.switchTo(_original_insertion_mode);
_tokenizer.setState("data");
_tokenizer.setLastStartTagName(null);
return false;
}
self.parseError(
"unexpected-token-in-text-mode",
"Unexpected token in text insertion mode",
token,
);
return false;
}
method _mode_after_head ( HTMLToken token ) {
if ( token.type() eq "Character" and _html_tb_is_ws(token.data()) ) {
self.insertHtmlElement("body");
self.switchTo("in body");
return true;
}
if ( token.type() eq "Comment" ) {
self.insertComment(token);
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "body" ) {
self.insertElementForToken(token);
self.switchTo("in body");
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "frameset" ) {
self.insertElementForToken(token);
self.switchTo("in frameset");
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "html" ) {
self.switchTo("in body");
return true;
}
self.insertHtmlElement("body");
self.switchTo("in body");
return true;
}
method _mode_in_body ( HTMLToken token ) {
if ( token.type() eq "Character" ) {
self._reconstruct_active_formatting_elements();
self.insertCharacter(token.data());
_frameset_ok := false unless _html_tb_is_ws(token.data());
return false;
}
if ( token.type() eq "Comment" ) {
self.insertComment(token);
return false;
}
if ( token.type() eq "StartTag" ) {
return self._start_tag_in_body(token);
}
if ( token.type() eq "EndTag" ) {
return self._end_tag_in_body(token);
}
if ( token.type() eq "DOCTYPE" ) {
self.parseError(
"unexpected-doctype-in-body",
"Unexpected doctype in body",
token,
);
return false;
}
if ( token.type() eq "EOF" ) {
if ( self._has_template_on_stack() ) {
self.switchTo("in template");
return true;
}
_finished := true;
return false;
}
return false;
}
method _start_tag_in_body ( HTMLToken token ) {
let name := token.tagName();
if ( name eq "html" ) {
self.parseError(
"unexpected-html-start-tag",
"Unexpected html start tag in body",
token,
);
self._merge_missing_attributes( _document.documentElement(), token );
return false;
}
if ( name eq "body" ) {
self.parseError(
"unexpected-body-start-tag",
"Unexpected body start tag in body",
token,
);
self._merge_missing_attributes( _body_element, token );
return false;
}
if ( self._start_headish_in_body(token) ≢ null ) {
return false;
}
if ( name eq "table" ) {
self._close_p(token) if self.hasElementInButtonScope("p");
self._reconstruct_active_formatting_elements();
self.insertElementForToken(token);
_frameset_ok := false;
self.switchTo("in table");
return false;
}
if ( name eq "select" ) {
self._reconstruct_active_formatting_elements();
self.insertElementForToken(token);
_frameset_ok := false;
if (
[ "in table", "in caption", "in table body", "in row", "in cell" ]
.contains(_insertion_mode)
) {
self.switchTo("in select in table");
}
else {
self.switchTo("in select");
}
return false;
}
if ( name eq "math" ) {
self._reconstruct_active_formatting_elements();
self.insertForeignElementForToken( token, MATHML_NAMESPACE_URI );
self.popElement() if token.selfClosing();
return false;
}
if ( name eq "svg" ) {
self._reconstruct_active_formatting_elements();
self.insertForeignElementForToken( token, SVG_NAMESPACE_URI );
self.popElement() if token.selfClosing();
return false;
}
if ( name eq "frameset" ) {
if ( not _frameset_ok or _body_element ≡ null ) {
self.parseError(
"unexpected-frameset-start-tag",
"frameset start tag ignored",
token,
);
return false;
}
while (
_open_elements.length() > 0 and
self.currentNode() ≢ _body_element
) {
self.popElement();
}
self.popElement() if self.currentNode() ≡ _body_element;
_body_element.remove();
_body_element := null;
self.insertElementForToken(token);
self.switchTo("in frameset");
return false;
}
if ( _html_tb_block_element(name) ) {
self._close_p(token) if self.hasElementInButtonScope("p");
self.insertElementForToken(token);
return false;
}
if ( _html_tb_heading_element(name) ) {
self._close_p(token) if self.hasElementInButtonScope("p");
if (
self.currentNode().nodeKind() eq "element" and
_html_tb_heading_element(self.currentNode().tagName())
) {
self.parseError(
"nested-heading",
"Nested heading start tag",
token,
);
self.popElement();
}
self.insertElementForToken(token);
return false;
}
if ( name eq "pre" or name eq "listing" ) {
self._close_p(token) if self.hasElementInButtonScope("p");
self.insertElementForToken(token);
_frameset_ok := false;
return false;
}
if ( name eq "form" ) {
if ( _form_element ≢ null ) {
self.parseError(
"nested-form",
"Nested form start tag ignored",
token,
);
return false;
}
self._close_p(token) if self.hasElementInButtonScope("p");
_form_element := self.insertElementForToken(token);
return false;
}
if ( name eq "li" ) {
self._close_list_item( "li", token )
if self.hasElementInListItemScope("li");
self._close_p(token) if self.hasElementInButtonScope("p");
self.insertElementForToken(token);
return false;
}
if ( name eq "dd" or name eq "dt" ) {
self._close_list_item( "dd", token ) if self.hasElementInScope("dd");
self._close_list_item( "dt", token ) if self.hasElementInScope("dt");
self._close_p(token) if self.hasElementInButtonScope("p");
self.insertElementForToken(token);
return false;
}
if ( name eq "button" ) {
if ( self.hasElementInScope("button") ) {
self.parseError(
"nested-button",
"Nested button start tag",
token,
);
self._close_button(token);
}
self._reconstruct_active_formatting_elements();
self.insertElementForToken(token);
_frameset_ok := false;
return false;
}
if ( name eq "a" ) {
let active_a := self._find_active_formatting_element("a");
if ( active_a ≢ null ) {
self.parseError(
"nested-anchor",
"Nested a start tag",
token,
);
self._adoption_agency_end_tag( "a", token );
self._remove_from_open_elements(active_a);
self._remove_from_active_formatting_elements(active_a);
}
self._reconstruct_active_formatting_elements();
self._push_active_formatting_element(self.insertElementForToken(token));
return false;
}
if ( _html_tb_formatting_element(name) ) {
self._reconstruct_active_formatting_elements();
self._push_active_formatting_element(self.insertElementForToken(token));
return false;
}
if ( [ "br", "area", "embed", "img", "input", "wbr" ].contains(name) ) {
self._reconstruct_active_formatting_elements();
self._insert_from_token_then_pop(token);
_frameset_ok := false unless name eq "input"
and lc(token.getAttribute("type") ≡ null ? "" : token.getAttribute("type"))
eq "hidden";
return false;
}
if ( name eq "hr" ) {
self._close_p(token) if self.hasElementInButtonScope("p");
self._insert_from_token_then_pop(token);
_frameset_ok := false;
return false;
}
if ( name eq "plaintext" ) {
self._close_p(token) if self.hasElementInButtonScope("p");
self.insertElementForToken(token);
_tokenizer.setState("plaintext");
_frameset_ok := false;
return false;
}
if ( name eq "template" ) {
self.insertElementForToken(token);
self._push_active_formatting_marker();
_template_insertion_modes.push("in template");
self.switchTo("in template");
return false;
}
self._reconstruct_active_formatting_elements();
self.insertElementForToken(token);
self.popElement() if _html_tb_void_element(name) or token.selfClosing();
return false;
}
method _end_tag_in_body ( HTMLToken token ) {
let name := token.tagName();
if (
_fragment_parsing
and _fragment_context_element ≢ null
and _fragment_context_element.nodeKind() eq "element"
and lc(_fragment_context_element.localName()) eq name
and _tokenizer.state() ne "data"
and _tokenizer.state() ne "plaintext"
) {
_tokenizer.setState("data");
_tokenizer.setLastStartTagName(null);
return false;
}
if ( name eq "body" ) {
self._close_body(token);
return false;
}
if ( name eq "html" ) {
return self._close_body(token);
}
if ( name eq "p" ) {
self._close_p(token);
return false;
}
if ( _html_tb_block_element(name) ) {
if ( not self.hasElementInScope(name) ) {
self.parseError(
"unexpected-end-tag",
name _ " element is not in scope",
token,
);
return false;
}
self._generate_implied_end_tags();
self.parseError(
"misnested-end-tag",
"Misnested " _ name _ " end tag",
token,
) unless self._current_node_is(name);
self._pop_until_name(name);
return false;
}
if ( _html_tb_list_item_element(name) ) {
self._close_list_item( name, token );
return false;
}
if ( _html_tb_heading_element(name) ) {
self._close_heading(token);
return false;
}
if ( name eq "form" ) {
let form := _form_element;
_form_element := null;
if ( form ≡ null or self._stack_index(form) < 0 ) {
self.parseError(
"unexpected-form-end-tag",
"form element is not in scope",
token,
);
return false;
}
self._generate_implied_end_tags();
self.parseError(
"misnested-form-end-tag",
"Misnested form end tag",
token,
) unless self.currentNode() ≡ form;
self._remove_from_open_elements(form);
return false;
}
if ( name eq "template" ) {
return self._end_template(token);
}
if ( name eq "button" ) {
self._close_button(token);
return false;
}
if ( _html_tb_formatting_element(name) ) {
self._adoption_agency_end_tag( name, token );
return false;
}
if ( name eq "br" ) {
self.parseError(
"unexpected-br-end-tag",
"br end tag treated as start tag",
token,
);
self._reconstruct_active_formatting_elements();
let br := _document.createElement("br");
self.currentNode().appendChild(br);
return false;
}
self._generic_end_tag( name, token );
return false;
}
method _mode_in_table ( HTMLToken token ) {
if ( token.type() eq "Character" ) {
_pending_table_character_tokens := [ token.data() ];
_original_insertion_mode := _insertion_mode;
self.switchTo("in table text");
return false;
}
if ( token.type() eq "Comment" ) {
self.insertComment(token);
return false;
}
if ( token.type() eq "DOCTYPE" ) {
self.parseError(
"unexpected-doctype-in-table",
"Unexpected doctype in table",
token,
);
return false;
}
if ( token.type() eq "StartTag" ) {
let name := token.tagName();
if ( name eq "caption" ) {
self._clear_stack_back_to_table_context();
self._push_active_formatting_marker();
self.insertElementForToken(token);
self.switchTo("in caption");
return false;
}
if ( name eq "colgroup" ) {
self._clear_stack_back_to_table_context();
self.insertElementForToken(token);
self.switchTo("in column group");
return false;
}
if ( name eq "col" ) {
self._clear_stack_back_to_table_context();
self.insertHtmlElement("colgroup");
self.switchTo("in column group");
return true;
}
if ( _html_tb_table_section_element(name) ) {
self._clear_stack_back_to_table_context();
self.insertElementForToken(token);
self.switchTo("in table body");
return false;
}
if ( name eq "tr" or _html_tb_table_cell_element(name) ) {
self._clear_stack_back_to_table_context();
self.insertHtmlElement("tbody");
self.switchTo("in table body");
return true;
}
if ( name eq "table" ) {
self.parseError(
"nested-table",
"Nested table start tag closes current table",
token,
);
if ( self.hasElementInTableScope("table") ) {
self._pop_until_name("table");
self._reset_insertion_mode_appropriately();
return true;
}
return false;
}
if (
name eq "input" and
lc(token.getAttribute("type") ≡ null ? "" : token.getAttribute("type"))
eq "hidden"
) {
self._insert_from_token_then_pop(token);
return false;
}
if ( name eq "form" ) {
if ( _form_element ≢ null or self._has_template_on_stack() ) {
self.parseError(
"form-in-table-ignored",
"form in table ignored",
token,
);
return false;
}
_form_element := self.insertElementForToken(token);
self.popElement();
return false;
}
if ( name eq "template" ) {
self.insertElementForToken(token);
self._push_active_formatting_marker();
_template_insertion_modes.push("in template");
self.switchTo("in template");
return false;
}
if ( _html_tb_headish_element(name) ) {
return self._process_using_in_head(token);
}
}
if ( token.type() eq "EndTag" and token.tagName() eq "table" ) {
if ( not self.hasElementInTableScope("table") ) {
self.parseError(
"table-not-in-table-scope",
"table end tag not in table scope",
token,
);
return false;
}
self._pop_until_name("table");
if ( self._current_node_is("template") ) {
_template_insertion_modes.pop()
if _template_insertion_modes.length() > 0;
_template_insertion_modes.push("in template");
self.switchTo("in template");
}
else {
self._reset_insertion_mode_appropriately();
}
return false;
}
if ( token.type() eq "EndTag" and token.tagName() eq "template" ) {
return self._end_template(token);
}
if ( token.type() eq "EndTag" and token.tagName() eq "html" ) {
return false;
}
if ( token.type() eq "EOF" ) {
return self._mode_in_body(token);
}
self.parseError(
"unexpected-token-in-table",
"Unexpected token in table",
token,
);
_foster_parenting := true;
let reprocess := self._mode_in_body(token);
_foster_parenting := false;
return reprocess;
}
method _mode_in_table_text ( HTMLToken token ) {
if ( token.type() eq "Character" ) {
_pending_table_character_tokens.push(token.data());
return false;
}
let all_ws := true;
for ( let data in _pending_table_character_tokens ) {
all_ws := false unless _html_tb_is_ws(data);
}
if ( all_ws ) {
for ( let data in _pending_table_character_tokens ) {
self.insertCharacter(data);
}
}
else {
_foster_parenting := true;
for ( let data in _pending_table_character_tokens ) {
self.insertCharacter(data);
}
_foster_parenting := false;
}
_pending_table_character_tokens := [];
self.switchTo(_original_insertion_mode);
return true;
}
method _mode_in_caption ( HTMLToken token ) {
if (
token.type() eq "EndTag" and
token.tagName() eq "caption"
) {
self._close_caption(token);
return false;
}
if (
( token.type() eq "StartTag" and _html_tb_table_structural_element(token.tagName()) )
or ( token.type() eq "EndTag" and token.tagName() eq "table" )
) {
return false unless self._close_caption(token);
return true;
}
if (
token.type() eq "EndTag" and
[ "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" ]
.contains(token.tagName())
) {
self.parseError(
"ignored-end-tag-in-caption",
"Ignored end tag in caption",
token,
);
return false;
}
return self._mode_in_body(token);
}
method _mode_in_column_group ( HTMLToken token ) {
if ( token.type() eq "Character" and _html_tb_is_ws(token.data()) ) {
self.insertCharacter(token.data());
return false;
}
if ( token.type() eq "Comment" ) {
self.insertComment(token);
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "col" ) {
self._insert_from_token_then_pop(token);
return false;
}
if ( token.type() eq "EndTag" and token.tagName() eq "colgroup" ) {
return false unless self._current_node_is("colgroup");
self.popElement();
self.switchTo("in table");
return false;
}
if ( token.type() eq "EndTag" and token.tagName() eq "template" ) {
return self._end_template(token);
}
if ( token.type() eq "EOF" ) {
return self._mode_in_body(token);
}
return false unless self._current_node_is("colgroup");
self.popElement();
self.switchTo("in table");
return true;
}
method _mode_in_table_body ( HTMLToken token ) {
if ( token.type() eq "StartTag" and token.tagName() eq "tr" ) {
self._clear_stack_back_to_table_body_context();
self.insertElementForToken(token);
self.switchTo("in row");
return false;
}
if ( token.type() eq "StartTag" and _html_tb_table_cell_element(token.tagName()) ) {
self._clear_stack_back_to_table_body_context();
self.insertHtmlElement("tr");
self.switchTo("in row");
return true;
}
if (
token.type() eq "EndTag" and
_html_tb_table_section_element(token.tagName())
) {
if ( not self.hasElementInTableScope(token.tagName()) ) {
self.parseError(
"table-section-not-in-scope",
"Table section end tag not in scope",
token,
);
return false;
}
self._clear_stack_back_to_table_body_context();
self.popElement();
self.switchTo("in table");
return false;
}
if (
( token.type() eq "StartTag" and
[ "caption", "col", "colgroup", "tbody", "tfoot", "thead" ]
.contains(token.tagName()) )
or ( token.type() eq "EndTag" and token.tagName() eq "table" )
) {
let current := self.currentNode().tagName();
if ( not _html_tb_table_section_element(current) ) {
self.parseError(
"table-body-context-missing",
"No table body context to close",
token,
);
return false;
}
self._clear_stack_back_to_table_body_context();
self.popElement();
self.switchTo("in table");
return true;
}
return self._mode_in_table(token);
}
method _mode_in_row ( HTMLToken token ) {
if ( token.type() eq "StartTag" and _html_tb_table_cell_element(token.tagName()) ) {
self._clear_stack_back_to_table_row_context();
self.insertElementForToken(token);
self._push_active_formatting_marker();
self.switchTo("in cell");
return false;
}
if ( token.type() eq "EndTag" and token.tagName() eq "tr" ) {
if ( not self.hasElementInTableScope("tr") ) {
self.parseError(
"tr-not-in-table-scope",
"tr end tag not in table scope",
token,
);
return false;
}
self._clear_stack_back_to_table_row_context();
self.popElement();
self.switchTo("in table body");
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "tr" ) {
self._clear_stack_back_to_table_row_context();
self.popElement();
self.switchTo("in table body");
return true;
}
if (
( token.type() eq "StartTag" and
[ "caption", "col", "colgroup", "tbody", "tfoot", "thead" ]
.contains(token.tagName()) )
or ( token.type() eq "EndTag" and
[ "table", "tbody", "tfoot", "thead" ].contains(token.tagName()) )
) {
if ( not self.hasElementInTableScope("tr") ) {
self.parseError(
"tr-not-in-table-scope",
"No tr to close",
token,
);
return false;
}
self._clear_stack_back_to_table_row_context();
self.popElement();
self.switchTo("in table body");
return true;
}
return self._mode_in_table(token);
}
method _mode_in_cell ( HTMLToken token ) {
if (
token.type() eq "EndTag" and
_html_tb_table_cell_element(token.tagName())
) {
if ( not self.hasElementInTableScope(token.tagName()) ) {
self.parseError(
"cell-not-in-table-scope",
"Cell end tag not in table scope",
token,
);
return false;
}
self._close_cell(token);
return false;
}
if (
token.type() eq "StartTag" and
_html_tb_table_cell_element(token.tagName())
) {
if ( self._fragment_context_is_table_cell() ) {
self.parseError(
"ignored-cell-start-tag-in-cell-fragment",
"Ignored cell start tag in cell fragment",
token,
);
return false;
}
self._close_cell(token);
return true;
}
if (
( token.type() eq "StartTag" and
[ "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr" ]
.contains(token.tagName()) )
or ( token.type() eq "EndTag" and
[ "table", "tbody", "tfoot", "thead", "tr" ].contains(token.tagName()) )
) {
if (
self._fragment_context_is_table_cell()
) {
self.parseError(
"ignored-table-token-in-cell-fragment",
"Ignored table token in cell fragment",
token,
);
return false;
}
self._close_cell(token);
return true;
}
if (
token.type() eq "EndTag" and
[ "body", "caption", "col", "colgroup", "html" ].contains(token.tagName())
) {
self.parseError(
"ignored-end-tag-in-cell",
"Ignored end tag in cell",
token,
);
return false;
}
return self._mode_in_body(token);
}
method _mode_in_select ( HTMLToken token ) {
if ( token.type() eq "Character" ) {
self.insertCharacter(token.data());
return false;
}
if ( token.type() eq "Comment" ) {
self.insertComment(token);
return false;
}
if ( token.type() eq "DOCTYPE" ) {
self.parseError(
"unexpected-doctype-in-select",
"Unexpected doctype in select",
token,
);
return false;
}
if ( token.type() eq "StartTag" ) {
let name := token.tagName();
if ( name eq "html" ) {
return self._mode_in_body(token);
}
if ( name eq "option" ) {
self.popElement() if self._current_node_is("option");
self.insertElementForToken(token);
return false;
}
if ( name eq "optgroup" ) {
self.popElement() if self._current_node_is("option");
self.popElement() if self._current_node_is("optgroup");
self.insertElementForToken(token);
return false;
}
if ( name eq "select" ) {
self.parseError(
"nested-select",
"Nested select start tag closes select",
token,
);
return self._select_end(token);
}
if ( _html_tb_table_structural_element(name) or name eq "table" ) {
return self._mode_in_select_in_table(token);
}
self.parseError(
"ignored-start-tag-in-select",
"Ignored start tag in select",
token,
);
return false;
}
if ( token.type() eq "EndTag" ) {
let name := token.tagName();
if ( name eq "option" ) {
self.popElement() if self._current_node_is("option");
return false;
}
if ( name eq "optgroup" ) {
self.popElement() if self._current_node_is("option");
self.popElement() if self._current_node_is("optgroup");
return false;
}
if ( name eq "select" ) {
return self._select_end(token);
}
if ( _html_tb_table_structural_element(name) or name eq "table" ) {
return self._mode_in_select_in_table(token);
}
}
if ( token.type() eq "EOF" ) {
return self._mode_in_body(token);
}
return false;
}
method _select_end ( HTMLToken token ) {
if ( not self.hasElementInScope("select") ) {
self.parseError(
"select-not-in-scope",
"select element is not in scope",
token,
);
return false;
}
self._pop_until_name("select");
self._reset_insertion_mode_appropriately();
return false;
}
method _mode_in_select_in_table ( HTMLToken token ) {
let name := token.type() eq "StartTag" or token.type() eq "EndTag"
? token.tagName()
: "";
if (
( token.type() eq "StartTag" or token.type() eq "EndTag" ) and
( _html_tb_table_structural_element(name) or name eq "table" )
) {
self.parseError(
"table-token-in-select",
"Table token in select closes select",
token,
);
self._pop_until_name("select") if self.hasElementInScope("select");
self._reset_insertion_mode_appropriately();
return true;
}
return self._mode_in_select(token);
}
method _mode_in_template ( HTMLToken token ) {
if ( token.type() eq "EndTag" and token.tagName() eq "template" ) {
return self._end_template(token);
}
if ( token.type() eq "EOF" ) {
if ( not self.hasElementInScope("template") ) {
_finished := true;
return false;
}
self.parseError(
"eof-in-template",
"EOF in template",
token,
);
self._pop_until_name("template");
self._clear_active_formatting_to_last_marker();
_template_insertion_modes.pop() if _template_insertion_modes.length() > 0;
self._reset_insertion_mode_appropriately();
return true;
}
if ( token.type() eq "StartTag" ) {
let name := token.tagName();
if ( _html_tb_headish_element(name) or name eq "template" ) {
return self._process_using_in_head(token);
}
if ( name eq "table" ) {
_template_insertion_modes.pop() if _template_insertion_modes.length() > 0;
_template_insertion_modes.push("in table");
self.insertElementForToken(token);
self.switchTo("in table");
return false;
}
if ( name eq "col" or name eq "colgroup" ) {
_template_insertion_modes.pop() if _template_insertion_modes.length() > 0;
_template_insertion_modes.push("in column group");
self.switchTo("in column group");
return true;
}
if ( _html_tb_table_section_element(name) ) {
_template_insertion_modes.pop() if _template_insertion_modes.length() > 0;
_template_insertion_modes.push("in table body");
self.switchTo("in table body");
return true;
}
if ( name eq "tr" ) {
_template_insertion_modes.pop() if _template_insertion_modes.length() > 0;
_template_insertion_modes.push("in table body");
self.switchTo("in table body");
return true;
}
if ( _html_tb_table_cell_element(name) ) {
_template_insertion_modes.pop() if _template_insertion_modes.length() > 0;
_template_insertion_modes.push("in row");
self.switchTo("in row");
return true;
}
}
_template_insertion_modes.pop() if _template_insertion_modes.length() > 0;
_template_insertion_modes.push("in body");
self.switchTo("in body");
return true;
}
method _mode_in_frameset ( HTMLToken token ) {
if ( token.type() eq "Character" and _html_tb_is_ws(token.data()) ) {
self.insertCharacter(token.data());
return false;
}
if ( token.type() eq "Comment" ) {
self.insertComment(token);
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "html" ) {
return self._mode_in_body(token);
}
if ( token.type() eq "StartTag" and token.tagName() eq "frameset" ) {
self.insertElementForToken(token);
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "frame" ) {
self._insert_from_token_then_pop(token);
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "noframes" ) {
return self._process_using_in_head(token);
}
if ( token.type() eq "EndTag" and token.tagName() eq "frameset" ) {
if ( self._current_node_is("html") ) {
self.parseError(
"unexpected-frameset-end-tag",
"Unexpected frameset end tag",
token,
);
return false;
}
self.popElement();
self.switchTo("after frameset")
unless self._current_node_is("frameset");
return false;
}
if ( token.type() eq "EOF" ) {
_finished := true;
return false;
}
self.parseError(
"unexpected-token-in-frameset",
"Unexpected token in frameset",
token,
);
return false;
}
method _mode_after_frameset ( HTMLToken token ) {
if ( token.type() eq "Character" and _html_tb_is_ws(token.data()) ) {
self.insertCharacter(token.data());
return false;
}
if ( token.type() eq "Comment" ) {
self.insertComment(token);
return false;
}
if ( token.type() eq "StartTag" and token.tagName() eq "html" ) {
return self._mode_in_body(token);
}
if ( token.type() eq "StartTag" and token.tagName() eq "noframes" ) {
return self._process_using_in_head(token);
}
if ( token.type() eq "EndTag" and token.tagName() eq "html" ) {
self.switchTo("after after frameset");
return false;
}
if ( token.type() eq "EOF" ) {
_finished := true;
return false;
}
self.parseError(
"unexpected-token-after-frameset",
"Unexpected token after frameset",
token,
);
return false;
}
method _mode_after_body ( HTMLToken token ) {
if ( token.type() eq "Character" and _html_tb_is_ws(token.data()) ) {
return false;
}
if ( token.type() eq "Comment" ) {
let html := _document.documentElement();
self.insertComment( token, html ≢ null ? html : _document );
return false;
}
if ( token.type() eq "EndTag" and token.tagName() eq "html" ) {
self.switchTo("after after body");
return false;
}
if ( token.type() eq "EOF" ) {
_finished := true;
return false;
}
self.parseError(
"unexpected-token-after-body",
"Unexpected token after body",
token,
);
self.switchTo("in body");
return true;
}
method _mode_after_after_body ( HTMLToken token ) {
if ( token.type() eq "Comment" ) {
self.insertComment( token, _document );
return false;
}
if ( token.type() eq "Character" and _html_tb_is_ws(token.data()) ) {
self.switchTo("in body");
return true;
}
if ( token.type() eq "EOF" ) {
_finished := true;
return false;
}
self.parseError(
"unexpected-token-after-html",
"Unexpected token after html",
token,
);
self.switchTo("in body");
return true;
}
method _mode_after_after_frameset ( HTMLToken token ) {
if ( token.type() eq "Comment" ) {
self.insertComment( token, _document );
return false;
}
if ( token.type() eq "Character" and _html_tb_is_ws(token.data()) ) {
self.switchTo("in body");
return true;
}
if ( token.type() eq "StartTag" and token.tagName() eq "noframes" ) {
return self._process_using_in_head(token);
}
if ( token.type() eq "EOF" ) {
_finished := true;
return false;
}
self.parseError(
"unexpected-token-after-frameset-html",
"Unexpected token after frameset html",
token,
);
return false;
}
method _enter_text_mode ( String state, String tag_name ) {
_original_insertion_mode := _insertion_mode;
_tokenizer.setState(state);
_tokenizer.setLastStartTagName(tag_name);
self.switchTo("text");
return self;
}
method _close_body ( HTMLToken token ) {
if ( not self.hasElementInScope("body") ) {
self.parseError( "body-not-in-scope", "Body not in scope", token );
return false;
}
for ( let element in _open_elements ) {
if (
element.nodeKind() eq "element" and
not [ "html", "body" ].contains(element.tagName())
) {
self.parseError(
"unclosed-element-before-body-end",
"Open element before body end tag",
token,
);
last;
}
}
self.switchTo("after body");
return true;
}
method _close_named_element ( HTMLToken token ) {
let name := token.tagName();
let found := false;
for ( let element in _open_elements ) {
found := true if element.nodeKind() eq "element"
and element.tagName() eq name;
}
if ( not found ) {
self.parseError(
"unexpected-end-tag",
"Unexpected end tag " _ name,
token,
);
return self;
}
if (
self.currentNode().nodeKind() ne "element" or
self.currentNode().tagName() ne name
) {
self.parseError(
"misnested-end-tag",
"Misnested end tag " _ name,
token,
);
}
while ( _open_elements.length() > 0 ) {
let element := self.popElement();
last if element.nodeKind() eq "element" and element.tagName() eq name;
}
return self;
}
method _generic_end_tag ( String name, token := null ) {
let i := _open_elements.length() - 1;
while ( i >= 0 ) {
let element := _open_elements[i];
if ( element.nodeKind() eq "element" and element.tagName() eq name ) {
self._generate_implied_end_tags(name);
self.parseError(
"misnested-end-tag",
"Misnested end tag " _ name,
token,
) unless self._current_node_is(name);
self._pop_until_name(name);
return self;
}
if (
element.nodeKind() eq "element" and
_html_tb_special_element(element.tagName())
) {
self.parseError(
"unexpected-end-tag",
"Unexpected end tag " _ name,
token,
);
return self;
}
i--;
}
self.parseError(
"unexpected-end-tag",
"Unexpected end tag " _ name,
token,
);
return self;
}
method _reset_insertion_mode_appropriately () {
let i := _open_elements.length() - 1;
while ( i >= 0 ) {
let node := _open_elements[i];
node := _fragment_context_element
if _fragment_parsing
and i == 0
and _fragment_context_element ≢ null;
if ( node.nodeKind() eq "element" ) {
let name := node.tagName();
if ( name eq "select" ) {
let j := i - 1;
while ( j >= 0 ) {
if (
_open_elements[j].nodeKind() eq "element" and
_open_elements[j].tagName() eq "template"
) {
self.switchTo("in select");
return self;
}
if (
_open_elements[j].nodeKind() eq "element" and
[ "table", "caption", "tbody", "tfoot", "thead", "tr", "td", "th" ]
.contains(_open_elements[j].tagName())
) {
self.switchTo("in select in table");
return self;
}
j--;
}
self.switchTo("in select");
return self;
}
if ( _html_tb_table_cell_element(name) ) {
self.switchTo("in cell");
return self;
}
if ( name eq "tr" ) {
self.switchTo("in row");
return self;
}
if ( _html_tb_table_section_element(name) ) {
self.switchTo("in table body");
return self;
}
if ( name eq "caption" ) {
self.switchTo("in caption");
return self;
}
if ( name eq "colgroup" ) {
self.switchTo("in column group");
return self;
}
if ( name eq "table" ) {
self.switchTo("in table");
return self;
}
if ( name eq "template" ) {
self.switchTo(
_template_insertion_modes.length() == 0
? "in template"
: _template_insertion_modes[
_template_insertion_modes.length() - 1
],
);
return self;
}
if ( name eq "head" ) {
self.switchTo("in head");
return self;
}
if ( name eq "body" ) {
self.switchTo("in body");
return self;
}
if ( name eq "frameset" ) {
self.switchTo("in frameset");
return self;
}
}
i--;
}
self.switchTo("in body");
return self;
}
method _close_caption ( token := null ) {
if ( not self.hasElementInTableScope("caption") ) {
self.parseError(
"caption-not-in-table-scope",
"caption element is not in table scope",
token,
);
return false;
}
self._generate_implied_end_tags();
self.parseError(
"misnested-caption-end-tag",
"Misnested caption end tag",
token,
) unless self._current_node_is("caption");
self._pop_until_name("caption");
self._clear_active_formatting_to_last_marker();
self.switchTo("in table");
return true;
}
method _close_cell ( token := null ) {
let name := self.hasElementInTableScope("td") ? "td" : "th";
if ( not self.hasElementInTableScope(name) ) {
self.parseError(
"cell-not-in-table-scope",
"No table cell is in table scope",
token,
);
return self;
}
self._generate_implied_end_tags();
self.parseError(
"misnested-cell-end-tag",
"Misnested cell end tag",
token,
) unless self._current_node_is(name);
self._pop_until_name(name);
self._clear_active_formatting_to_last_marker();
self.switchTo("in row");
return self;
}
method _end_template ( HTMLToken token ) {
if ( not self.hasElementInScope("template") ) {
self.parseError(
"template-not-in-scope",
"template end tag without open template",
token,
);
return false;
}
self._generate_implied_end_tags();
self._pop_until_name("template");
self._clear_active_formatting_to_last_marker();
_template_insertion_modes.pop() if _template_insertion_modes.length() > 0;
self._reset_insertion_mode_appropriately();
return false;
}
method _process_using_in_body ( HTMLToken token ) {
let old_mode := _insertion_mode;
_insertion_mode := "in body";
let reprocess := self._mode_in_body(token);
_insertion_mode := old_mode
if not reprocess and _insertion_mode eq "in body";
return reprocess;
}
method _process_using_in_head ( HTMLToken token ) {
let old_mode := _insertion_mode;
_insertion_mode := "in head";
let reprocess := self._mode_in_head(token);
_original_insertion_mode := old_mode if _insertion_mode eq "text";
_insertion_mode := old_mode
if not reprocess and _insertion_mode eq "in head";
return reprocess;
}
method _adjusted_insertion_location () {
let target := self.currentNode();
let parent := target;
let before := null;
if (
target.nodeKind() eq "element" and
target.tagName() eq "template"
) {
parent := target.content();
}
if ( _foster_parenting ) {
let table_index := -1;
let template_index := -1;
let i := _open_elements.length() - 1;
while ( i >= 0 ) {
if (
_open_elements[i].nodeKind() eq "element" and
_open_elements[i].tagName() eq "template"
) {
template_index := i;
last;
}
if (
_open_elements[i].nodeKind() eq "element" and
_open_elements[i].tagName() eq "table"
) {
table_index := i;
last;
}
i--;
}
if ( template_index >= 0 and template_index > table_index ) {
let template := _open_elements[template_index];
parent := template.content();
before := null;
}
else if ( table_index >= 0 ) {
let table := _open_elements[table_index];
if ( table.parentNode() ≢ null ) {
parent := table.parentNode();
before := table;
}
else if ( table_index > 0 ) {
parent := _open_elements[table_index - 1];
before := null;
}
}
}
return { parent: parent, before: before };
}
method _insert_node_at_adjusted_location ( node ) {
let location := self._adjusted_insertion_location();
if ( location{before} ≢ null ) {
location{parent}.insertBefore( node, location{before} );
}
else {
location{parent}.appendChild(node);
}
return node;
}
method insertElementForToken ( HTMLToken token ) {
let element := self._create_element_for_token( token, HTML_NAMESPACE_URI );
self._insert_node_at_adjusted_location(element);
self.pushElement(element);
_head_element := element if element.tagName() eq "head";
_body_element := element if element.tagName() eq "body";
return element;
}
method _foreign_tag_name ( HTMLToken token, String namespaceURI ) {
return namespaceURI eq SVG_NAMESPACE_URI
? _html_tb_svg_tag_name(token.tagName())
: token.tagName();
}
method _adjust_attr_name_for_ns ( String name, String namespaceURI ) {
if ( namespaceURI eq SVG_NAMESPACE_URI ) {
return _html_tb_svg_attr_name(name);
}
if ( namespaceURI eq MATHML_NAMESPACE_URI ) {
return _html_tb_mathml_attr_name(name);
}
return name;
}
method _copy_token_attributes ( element, HTMLToken token, String namespaceURI ) {
for ( let attr in token.attributes() ) {
let name := attr{name};
if ( name ~ /^xlink:/ ) {
element.setAttributeNS(
XLINK_NAMESPACE_URI,
"xlink:" _ substr( name, 6 ),
attr{value},
);
}
else if ( name ~ /^xml:/ ) {
element.setAttributeNS(
XML_NAMESPACE_URI,
"xml:" _ substr( name, 4 ),
attr{value},
);
}
else if ( name eq "xmlns" ) {
element.setAttributeNS( XMLNS_NAMESPACE_URI, "xmlns", attr{value} );
}
else if ( name ~ /^xmlns:/ ) {
element.setAttributeNS(
XMLNS_NAMESPACE_URI,
"xmlns:" _ substr( name, 6 ),
attr{value},
);
}
else {
element.setAttribute(
self._adjust_attr_name_for_ns( name, namespaceURI ),
attr{value},
);
}
}
return element;
}
method _create_element_for_token ( HTMLToken token, String namespaceURI ) {
let tag := namespaceURI eq HTML_NAMESPACE_URI
? token.tagName()
: self._foreign_tag_name( token, namespaceURI );
let element := _document.createElementNS( namespaceURI, tag );
self._copy_token_attributes( element, token, namespaceURI );
return element;
}
method insertForeignElementForToken ( HTMLToken token, String namespaceURI ) {
let element := self._create_element_for_token( token, namespaceURI );
self._insert_node_at_adjusted_location(element);
self.pushElement(element);
return element;
}
method insertHtmlElement ( String name ) {
let element := _document.createElementNS( HTML_NAMESPACE_URI, name );
self._insert_node_at_adjusted_location(element);
self.pushElement(element);
_head_element := element if element.tagName() eq "head";
_body_element := element if element.tagName() eq "body";
return element;
}
method insertCharacter ( String data ) {
return self if data eq "";
let location := self._adjusted_insertion_location();
let previous := location{before} ≡ null
? location{parent}.lastChild()
: location{before}.previousSibling();
if ( previous ≢ null and previous.nodeKind() eq "text" ) {
previous.setData(previous.data() _ data);
return self;
}
if ( location{before} ≢ null ) {
location{parent}.insertBefore(
_document.createTextNode(data),
location{before},
);
}
else {
location{parent}.appendChild(_document.createTextNode(data));
}
return self;
}
method insertComment ( HTMLToken token, parent := null ) {
let insertion_parent := parent ≡ null ? self.currentNode() : parent;
if ( parent ≢ null ) {
insertion_parent.appendChild(_document.createComment(token.data()));
}
else {
self._insert_node_at_adjusted_location(_document.createComment(token.data()));
}
return self;
}
method appendDoctype ( HTMLToken token ) {
_document.appendChild(_document.createDoctype(
token.tagName(),
token.publicId() ≡ null ? "" : token.publicId(),
token.systemId() ≡ null ? "" : token.systemId(),
));
return self;
}
}
class HTMLTreeTestSerializer {
static method serialize ( node ) {
let lines := [];
self._serialize_node( node, 0, lines );
return join( "\n", lines );
}
static method _indent ( Number depth ) {
let out := "| ";
let i := 0;
while ( i < depth ) {
out _= " ";
i++;
}
return out;
}
static method _element_label ( node ) {
if ( node.namespaceURI() eq SVG_NAMESPACE_URI ) {
return "svg " _ node.localName();
}
if ( node.namespaceURI() eq MATHML_NAMESPACE_URI ) {
return "math " _ node.localName();
}
return node.tagName();
}
static method _attribute_label ( Dict record ) {
if ( record{namespaceURI} eq XLINK_NAMESPACE_URI ) {
return "xlink " _ record{localName};
}
if ( record{namespaceURI} eq XML_NAMESPACE_URI ) {
return "xml " _ record{localName};
}
if ( record{namespaceURI} eq XMLNS_NAMESPACE_URI ) {
return record{prefix} ≡ null
? "xmlns"
: "xmlns " _ record{localName};
}
return record{qualifiedName};
}
static method _sorted_attribute_records ( node ) {
let out := [];
for ( let record in node.attributeRecords() ) {
out.push(record);
}
return out.sort( function ( a, b ) {
let al := HTMLTreeTestSerializer._attribute_label(a);
let bl := HTMLTreeTestSerializer._attribute_label(b);
return a{value} cmp b{value} if al eq bl;
return al cmp bl;
} );
}
static method _doctype_label ( node ) {
let out := "<!DOCTYPE " _ node.name();
if ( node.publicId() ne "" or node.systemId() ne "" ) {
out _= " \"" _ _html_tb_quote_text(node.publicId()) _ "\"";
out _= " \"" _ _html_tb_quote_text(node.systemId()) _ "\"";
}
out _= ">";
return out;
}
static method _serialize_node ( node, Number depth, Array lines ) {
if ( node.nodeKind() eq "document" ) {
for ( let child in node.childNodes() ) {
self._serialize_node( child, depth, lines );
}
return self;
}
if ( node.nodeKind() eq "fragment" ) {
for ( let child in node.childNodes() ) {
self._serialize_node( child, depth, lines );
}
return self;
}
if ( node.nodeKind() eq "doctype" ) {
lines.push(self._indent(depth) _ self._doctype_label(node));
return self;
}
if ( node.nodeKind() eq "element" ) {
lines.push(self._indent(depth) _ "<" _ self._element_label(node) _ ">");
for ( let record in self._sorted_attribute_records(node) ) {
lines.push(
self._indent(depth + 1) _ self._attribute_label(record) _ "=\""
_ _html_tb_quote_text(record{value}) _ "\"",
);
}
if (
node.namespaceURI() eq HTML_NAMESPACE_URI
and node.tagName() eq "template"
) {
lines.push(self._indent(depth + 1) _ "content");
for ( let child in node.content().childNodes() ) {
self._serialize_node( child, depth + 2, lines );
}
}
else {
for ( let child in node.childNodes() ) {
self._serialize_node( child, depth + 1, lines );
}
}
return self;
}
if ( node.nodeKind() eq "text" ) {
lines.push(
self._indent(depth) _ "\"" _ _html_tb_quote_text(node.data())
_ "\"",
);
return self;
}
if ( node.nodeKind() eq "comment" ) {
lines.push(self._indent(depth) _ "<!-- " _ node.data() _ " -->");
return self;
}
return self;
}
}
modules/html/treebuilder.zzm
html-0.0.1 source code
Package
- Name
- html
- Version
- 0.0.1
- Uploaded
- 2026-06-10 01:22:42
- Repository
- https://github.com/tobyink/zuzu-html
- Dependencies
-
-
std/io>= 0 -
std/string>= 0
-
- Metadata
- zuzu-distribution.json
- Archive
- Download .tar.gz