summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-04-07 02:04:05 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2008-04-07 02:04:05 +0000
commit427ce60a0cf055347b2fd7ac4a37bec59d65c3ac (patch)
treeff3c9fc5b4d0366f5b2066209bbfb0ec824a5f6d
parent2ef742b2bbe323e50001bece2116734ec2b01ee0 (diff)
downloadlibhubbub-427ce60a0cf055347b2fd7ac4a37bec59d65c3ac.tar.gz
libhubbub-427ce60a0cf055347b2fd7ac4a37bec59d65c3ac.tar.bz2
Implement "in body" insertion mode.
Modify treebuilder test driver to bring it in line with API changes. A few minimal bits of testdata for various bits of in body. Proper testing will come once we're actually building a tree. svn path=/trunk/hubbub/; revision=4076
-rw-r--r--include/hubbub/functypes.h35
-rw-r--r--include/hubbub/tree.h6
-rw-r--r--src/treebuilder/Makefile2
-rw-r--r--src/treebuilder/in_body.c1898
-rw-r--r--src/treebuilder/in_body.h18
-rw-r--r--src/treebuilder/internal.h190
-rw-r--r--src/treebuilder/treebuilder.c801
-rw-r--r--test/data/html/INDEX2
-rw-r--r--test/data/html/isindex.html8
-rw-r--r--test/data/html/misnested.html11
-rw-r--r--test/tree.c84
11 files changed, 2597 insertions, 458 deletions
diff --git a/include/hubbub/functypes.h b/include/hubbub/functypes.h
index ddc307a..80c8388 100644
--- a/include/hubbub/functypes.h
+++ b/include/hubbub/functypes.h
@@ -9,6 +9,7 @@
#define hubbub_functypes_h_
#include <stdbool.h>
+#include <stdint.h>
#include <stdlib.h>
#include <hubbub/types.h>
@@ -53,12 +54,6 @@ typedef int (*hubbub_tree_create_element)(void *ctx, const hubbub_tag *tag,
void **result);
/**
- * Type of tree element node creation function (verbatim name)
- */
-typedef int (*hubbub_tree_create_element_verbatim)(void *ctx,
- const uint8_t *name, size_t name_len, void **result);
-
-/**
* Type of tree text node creation function
*/
typedef int (*hubbub_tree_create_text)(void *ctx, const hubbub_string *data,
@@ -99,6 +94,34 @@ typedef int (*hubbub_tree_clone_node)(void *ctx, void *node, bool deep,
void **result);
/**
+ * Type of child reparenting function
+ */
+typedef int (*hubbub_tree_reparent_children)(void *ctx, void *node,
+ void *new_parent);
+
+/**
+ * Type of parent node acquisition function
+ */
+typedef int (*hubbub_tree_get_parent)(void *ctx, void *node, bool element_only,
+ void **result);
+
+/**
+ * Type of child presence query function
+ */
+typedef int (*hubbub_tree_has_children)(void *ctx, void *node, bool *result);
+
+/**
+ * Type of form association function
+ */
+typedef int (*hubbub_tree_form_associate)(void *ctx, void *form, void *node);
+
+/**
+ * Type of attribute addition function
+ */
+typedef int (*hubbub_tree_add_attributes)(void *ctx, void *node,
+ const hubbub_attribute *attributes, uint32_t n_attributes);
+
+/**
* Type of tree quirks mode notification function
*/
typedef int (*hubbub_tree_set_quirks_mode)(void *ctx, hubbub_quirks_mode mode);
diff --git a/include/hubbub/tree.h b/include/hubbub/tree.h
index cc66acf..7e2e11f 100644
--- a/include/hubbub/tree.h
+++ b/include/hubbub/tree.h
@@ -17,7 +17,6 @@ typedef struct hubbub_tree_handler {
hubbub_tree_create_comment create_comment;
hubbub_tree_create_doctype create_doctype;
hubbub_tree_create_element create_element;
- hubbub_tree_create_element_verbatim create_element_verbatim;
hubbub_tree_create_text create_text;
hubbub_tree_ref_node ref_node;
hubbub_tree_unref_node unref_node;
@@ -25,6 +24,11 @@ typedef struct hubbub_tree_handler {
hubbub_tree_insert_before insert_before;
hubbub_tree_remove_child remove_child;
hubbub_tree_clone_node clone_node;
+ hubbub_tree_reparent_children reparent_children;
+ hubbub_tree_get_parent get_parent;
+ hubbub_tree_has_children has_children;
+ hubbub_tree_form_associate form_associate;
+ hubbub_tree_add_attributes add_attributes;
hubbub_tree_set_quirks_mode set_quirks_mode;
void *ctx;
} hubbub_tree_handler;
diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile
index d63a7a3..3353a26 100644
--- a/src/treebuilder/Makefile
+++ b/src/treebuilder/Makefile
@@ -22,7 +22,7 @@
CFLAGS += -I$(CURDIR)
# Objects
-OBJS = treebuilder
+OBJS = in_body treebuilder
.PHONY: clean debug distclean export release setup test
diff --git a/src/treebuilder/in_body.c b/src/treebuilder/in_body.c
new file mode 100644
index 0000000..7fefdfd
--- /dev/null
+++ b/src/treebuilder/in_body.c
@@ -0,0 +1,1898 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include "treebuilder/in_body.h"
+#include "utils/utils.h"
+
+#undef DEBUG_IN_BODY
+
+typedef struct bookmark {
+ formatting_list_entry *prev;
+ formatting_list_entry *next;
+} bookmark;
+
+static void process_character(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static bool process_start_tag(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static bool process_end_tag(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+
+static void process_html_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_body_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_container_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_form_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_dd_dt_li_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token, element_type type);
+static void process_plaintext_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_a_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_presentational_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token, element_type type);
+static void process_nobr_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_button_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_applet_marquee_object_in_body(
+ hubbub_treebuilder *treebuilder, const hubbub_token *token,
+ element_type type);
+static void process_hr_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_image_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_input_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_isindex_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_textarea_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_select_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+static void process_phrasing_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+
+static bool process_0body_in_body(hubbub_treebuilder *treebuilder);
+static void process_0container_in_body(hubbub_treebuilder *treebuilder,
+ element_type type);
+static void process_0p_in_body(hubbub_treebuilder *treebuilder);
+static void process_0dd_dt_li_in_body(hubbub_treebuilder *treebuilder,
+ element_type type);
+static void process_0h_in_body(hubbub_treebuilder *treebuilder,
+ element_type type);
+static void process_0presentational_in_body(hubbub_treebuilder *treebuilder,
+ element_type type);
+static void process_0applet_button_marquee_object_in_body(
+ hubbub_treebuilder *treebuilder, element_type type);
+static void process_0br_in_body(hubbub_treebuilder *treebuilder);
+static void process_0generic_in_body(hubbub_treebuilder *treebuilder,
+ element_type type);
+
+static bool aa_find_and_validate_formatting_element(
+ hubbub_treebuilder *treebuilder, element_type type,
+ formatting_list_entry **element);
+static formatting_list_entry *aa_find_formatting_element(
+ hubbub_treebuilder *treebuilder, element_type type);
+static bool aa_find_furthest_block(hubbub_treebuilder *treebuilder,
+ formatting_list_entry *formatting_element,
+ uint32_t *furthest_block);
+static void aa_remove_from_parent(hubbub_treebuilder *treebuilder, void *node);
+static void aa_reparent_node(hubbub_treebuilder *treebuilder, void *node,
+ void *new_parent);
+static void aa_find_bookmark_location_reparenting_misnested(
+ hubbub_treebuilder *treebuilder,
+ uint32_t formatting_element, uint32_t furthest_block,
+ bookmark *bookmark, uint32_t *last_node);
+static void aa_remove_element_stack_item(hubbub_treebuilder *treebuilder,
+ uint32_t index, uint32_t limit);
+static void aa_clone_and_replace_entries(hubbub_treebuilder *treebuilder,
+ formatting_list_entry *element);
+static void aa_insert_into_foster_parent(hubbub_treebuilder *treebuilder,
+ void *node);
+
+
+/**
+ * Handle tokens in "in body" insertion mode
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ * \return True to reprocess the token, false otherwise
+ */
+bool handle_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ bool reprocess = false;
+
+#if !defined(NDEBUG) && defined(DEBUG_IN_BODY)
+ fprintf(stdout, "Processing token %d\n", token->type);
+ element_stack_dump(treebuilder, stdout);
+ formatting_list_dump(treebuilder, stdout);
+#endif
+
+ if (treebuilder->context.strip_leading_lr &&
+ token->type != HUBBUB_TOKEN_CHARACTER) {
+ /* Reset the LR stripping flag */
+ treebuilder->context.strip_leading_lr = false;
+ }
+
+ switch (token->type) {
+ case HUBBUB_TOKEN_CHARACTER:
+ process_character(treebuilder, token);
+ break;
+ case HUBBUB_TOKEN_COMMENT:
+ process_comment_append(treebuilder, token,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+ break;
+ case HUBBUB_TOKEN_DOCTYPE:
+ /** \todo parse error */
+ break;
+ case HUBBUB_TOKEN_START_TAG:
+ reprocess = process_start_tag(treebuilder, token);
+ break;
+ case HUBBUB_TOKEN_END_TAG:
+ reprocess = process_end_tag(treebuilder, token);
+ break;
+ case HUBBUB_TOKEN_EOF:
+ for (uint32_t i = treebuilder->context.current_node;
+ i > 0; i--) {
+ element_type type =
+ treebuilder->context.element_stack[i].type;
+
+ if (!(type == DD || type == DT || type == LI ||
+ type == P || type == TBODY ||
+ type == TD || type == TFOOT ||
+ type == TH || type == THEAD ||
+ type == TR || type == BODY)) {
+ /** \todo parse error */
+ break;
+ }
+ }
+ break;
+ }
+
+#if !defined(NDEBUG) && defined(DEBUG_IN_BODY)
+ fprintf(stdout, "Processed\n");
+ element_stack_dump(treebuilder, stdout);
+ formatting_list_dump(treebuilder, stdout);
+#endif
+
+ return reprocess;
+}
+
+/**
+ * Process a character token
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_character(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ hubbub_string dummy = token->data.character;
+
+ reconstruct_active_formatting_list(treebuilder);
+
+ if (treebuilder->context.strip_leading_lr) {
+ const uint8_t *str =
+ treebuilder->input_buffer + dummy.data.off;
+
+ /** \todo UTF-16 */
+ if (*str == '\n') {
+ dummy.data.off++;
+ dummy.len--;
+ }
+
+ treebuilder->context.strip_leading_lr = false;
+ }
+
+ append_text(treebuilder, &dummy);
+}
+
+/**
+ * Process a tag as if in "in body" mode
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ * \return True to reprocess the token
+ */
+bool process_tag_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ bool reprocess = false;
+
+ switch (token->type)
+ {
+ case HUBBUB_TOKEN_START_TAG:
+ reprocess = process_start_tag(treebuilder, token);
+ break;
+ case HUBBUB_TOKEN_END_TAG:
+ reprocess = process_end_tag(treebuilder, token);
+ break;
+ case HUBBUB_TOKEN_CHARACTER:
+ case HUBBUB_TOKEN_COMMENT:
+ case HUBBUB_TOKEN_DOCTYPE:
+ case HUBBUB_TOKEN_EOF:
+ assert(0);
+ break;
+ }
+
+ return reprocess;
+}
+
+/**
+ * Process a start tag
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ * \return True to reprocess the token
+ */
+bool process_start_tag(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ bool reprocess = false;
+ element_type type = element_type_from_name(treebuilder,
+ &token->data.tag.name);
+
+ if (type == HTML) {
+ process_html_in_body(treebuilder, token);
+ } else if (type == BASE || type == LINK || type == META) {
+ process_base_link_meta_in_head(treebuilder,
+ token, type);
+ } else if (type == SCRIPT) {
+ process_script_in_head(treebuilder, token);
+ } else if (type == STYLE) {
+ parse_generic_rcdata(treebuilder, token, false);
+ } else if (type == TITLE) {
+ parse_generic_rcdata(treebuilder, token, true);
+ } else if (type == BODY) {
+ process_body_in_body(treebuilder, token);
+ } else if (type == ADDRESS || type == BLOCKQUOTE ||
+ type == CENTER || type == DIR ||
+ type == DIV || type == DL ||
+ type == FIELDSET || type == H1 || type == H2 ||
+ type == H3 || type == H4 || type == H5 ||
+ type == H6 || type == MENU || type == OL ||
+ type == P || type == UL) {
+ process_container_in_body(treebuilder, token);
+ } else if (type == PRE || type == LISTING) {
+ process_container_in_body(treebuilder, token);
+
+ treebuilder->context.strip_leading_lr = true;
+ } else if (type == FORM) {
+ process_form_in_body(treebuilder, token);
+ } else if (type == DD || type == DT || type == LI) {
+ process_dd_dt_li_in_body(treebuilder, token, type);
+ } else if (type == PLAINTEXT) {
+ process_plaintext_in_body(treebuilder, token);
+ } else if (type == A) {
+ process_a_in_body(treebuilder, token);
+ } else if (type == B || type == BIG || type == EM ||
+ type == FONT || type == I || type == S ||
+ type == SMALL || type == STRIKE ||
+ type == STRONG || type == TT || type == U) {
+ process_presentational_in_body(treebuilder,
+ token, type);
+ } else if (type == NOBR) {
+ process_nobr_in_body(treebuilder, token);
+ } else if (type == BUTTON) {
+ process_button_in_body(treebuilder, token);
+ } else if (type == APPLET || type == MARQUEE ||
+ type == OBJECT) {
+ process_applet_marquee_object_in_body(treebuilder,
+ token, type);
+ } else if (type == XMP) {
+ reconstruct_active_formatting_list(treebuilder);
+ parse_generic_rcdata(treebuilder, token, false);
+ } else if (type == TABLE) {
+ process_container_in_body(treebuilder, token);
+
+ if (treebuilder->context.mode == IN_BODY) {
+ treebuilder->context.mode = IN_TABLE;
+ }
+ } else if (type == AREA || type == BASEFONT ||
+ type == BGSOUND || type == BR ||
+ type == EMBED || type == IMG || type == PARAM ||
+ type == SPACER || type == WBR) {
+ reconstruct_active_formatting_list(treebuilder);
+ insert_element_no_push(treebuilder, &token->data.tag);
+ } else if (type == HR) {
+ process_hr_in_body(treebuilder, token);
+ } else if (type == IMAGE) {
+ process_image_in_body(treebuilder, token);
+ } else if (type == INPUT) {
+ process_input_in_body(treebuilder, token);
+ } else if (type == ISINDEX) {
+ process_isindex_in_body(treebuilder, token);
+ } else if (type == TEXTAREA) {
+ process_textarea_in_body(treebuilder, token);
+ } else if (type == IFRAME || type == NOEMBED ||
+ type == NOFRAMES ||
+ (false /* scripting */ && type == NOSCRIPT)) {
+ parse_generic_rcdata(treebuilder, token, false);
+ } else if (type == SELECT) {
+ process_select_in_body(treebuilder, token);
+
+ if (treebuilder->context.mode == IN_BODY) {
+ treebuilder->context.mode = IN_SELECT;
+ } else if (treebuilder->context.mode == IN_TABLE ||
+ treebuilder->context.mode == IN_CAPTION ||
+ treebuilder->context.mode == IN_COLUMN_GROUP ||
+ treebuilder->context.mode == IN_TABLE_BODY ||
+ treebuilder->context.mode == IN_ROW ||
+ treebuilder->context.mode == IN_CELL) {
+ treebuilder->context.mode = IN_SELECT_IN_TABLE;
+ }
+ } else if (type == CAPTION || type == COL || type == COLGROUP ||
+ type == FRAME || type == FRAMESET ||
+ type == HEAD || type == OPTION ||
+ type == OPTGROUP || type == TBODY ||
+ type == TD || type == TFOOT || type == TH ||
+ type == THEAD || type == TR) {
+ /** \todo parse error */
+/* } else if (type == EVENT_SOURCE || type == SECTION ||
+ type == NAV || type == ARTICLE ||
+ type == ASIDE || type == HEADER ||
+ type == FOOTER || type == DATAGRID ||
+ type == COMMAND) {
+*/ } else {
+ process_phrasing_in_body(treebuilder, token);
+ }
+
+ return reprocess;
+}
+
+/**
+ * Process an end tag
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ * \return True to reprocess the token
+ */
+bool process_end_tag(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ bool reprocess = false;
+ element_type type = element_type_from_name(treebuilder,
+ &token->data.tag.name);
+
+ if (type == BODY) {
+ if (process_0body_in_body(treebuilder) &&
+ treebuilder->context.mode == IN_BODY) {
+ treebuilder->context.mode = AFTER_BODY;
+ }
+ } else if (type == HTML) {
+ /* Act as if </body> has been seen then, if
+ * that wasn't ignored, reprocess this token */
+ if (process_0body_in_body(treebuilder) &&
+ treebuilder->context.mode == IN_BODY) {
+ treebuilder->context.mode = AFTER_BODY;
+ }
+ reprocess = true;
+ } else if (type == ADDRESS || type == BLOCKQUOTE ||
+ type == CENTER || type == DIR || type == DIV ||
+ type == DL || type == FIELDSET ||
+ type == LISTING || type == MENU ||
+ type == OL || type == PRE || type == UL ||
+ type == FORM) {
+ process_0container_in_body(treebuilder, type);
+ } else if (type == P) {
+ process_0p_in_body(treebuilder);
+ } else if (type == DD || type == DT || type == LI) {
+ process_0dd_dt_li_in_body(treebuilder, type);
+ } else if (type == H1 || type == H2 || type == H3 ||
+ type == H4 || type == H5 || type == H6) {
+ process_0h_in_body(treebuilder, type);
+ } else if (type == A || type == B || type == BIG ||
+ type == EM || type == FONT || type == I ||
+ type == NOBR || type == S || type == SMALL ||
+ type == STRIKE || type == STRONG ||
+ type == TT || type == U) {
+ process_0presentational_in_body(treebuilder, type);
+ } else if (type == APPLET || type == BUTTON ||
+ type == MARQUEE || type == OBJECT) {
+ process_0applet_button_marquee_object_in_body(
+ treebuilder, type);
+ } else if (type == BR) {
+ process_0br_in_body(treebuilder);
+ } else if (type == AREA || type == BASEFONT ||
+ type == BGSOUND || type == EMBED ||
+ type == HR || type == IFRAME ||
+ type == IMAGE || type == IMG ||
+ type == INPUT || type == ISINDEX ||
+ type == NOEMBED || type == NOFRAMES ||
+ type == PARAM || type == SELECT ||
+ type == SPACER || type == TABLE ||
+ type == TEXTAREA || type == WBR ||
+ (false /* scripting enabled */ &&
+ type == NOSCRIPT)) {
+ /** \todo parse error */
+/* } else if (type == EVENT_SOURCE || type == SECTION ||
+ type == NAV || type == ARTICLE ||
+ type == ASIDE || type == HEADER ||
+ type == FOOTER || type == DATAGRID ||
+ type == COMMAND) {
+*/ } else {
+ process_0generic_in_body(treebuilder, type);
+ }
+
+ return reprocess;
+}
+
+/**
+ * Process a <html> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_html_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ /** \todo parse error */
+
+ treebuilder->tree_handler->add_attributes(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[0].node,
+ token->data.tag.attributes,
+ token->data.tag.n_attributes);
+}
+
+/**
+ * Process a <body> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_body_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ /** \todo parse error */
+
+ if (treebuilder->context.current_node < 1 ||
+ treebuilder->context.element_stack[1].type != BODY)
+ return;
+
+ treebuilder->tree_handler->add_attributes(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[1].node,
+ token->data.tag.attributes,
+ token->data.tag.n_attributes);
+}
+
+/**
+ * Process a generic container start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_container_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ if (element_in_scope(treebuilder, P, false)) {
+ process_0p_in_body(treebuilder);
+ }
+
+ insert_element(treebuilder, &token->data.tag);
+}
+
+/**
+ * Process a <form> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_form_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ if (treebuilder->context.form_element != NULL) {
+ /** \todo parse error */
+ } else {
+ if (element_in_scope(treebuilder, P, false)) {
+ process_0p_in_body(treebuilder);
+ }
+
+ insert_element(treebuilder, &token->data.tag);
+
+ /* Claim a reference on the node and
+ * use it as the current form element */
+ treebuilder->tree_handler->ref_node(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+
+ treebuilder->context.form_element =
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node;
+ }
+}
+
+/**
+ * Process a <dd>, <dt> or <li> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ * \param type The element type
+ */
+void process_dd_dt_li_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token, element_type type)
+{
+ element_context *stack = treebuilder->context.element_stack;
+ uint32_t node;
+
+ if (element_in_scope(treebuilder, P, false)) {
+ process_0p_in_body(treebuilder);
+ }
+
+ /* Find last LI/(DD,DT) on stack, if any */
+ for (node = treebuilder->context.current_node; node > 0; node--) {
+ element_type ntype = stack[node].type;
+
+ if (type == LI && ntype == LI)
+ break;
+
+ if (((type == DD || type == DT) &&
+ (ntype == DD || ntype == DT)))
+ break;
+
+ if (!is_formatting_element(ntype) &&
+ !is_phrasing_element(ntype) &&
+ ntype != ADDRESS &&
+ ntype != DIV)
+ break;
+ }
+
+ /* If we found one, then pop all nodes up to and including it */
+ if (stack[node].type == LI || stack[node].type == DD ||
+ stack[node].type == DT) {
+ /* Check that we're only popping one node
+ * and emit a parse error if not */
+ if (treebuilder->context.current_node > node) {
+ /** \todo parse error */
+ }
+
+ do {
+ element_type otype;
+ void *node;
+
+ if (!element_stack_pop(treebuilder, &otype, &node)) {
+ /** \todo errors */
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ node);
+ } while (treebuilder->context.current_node >= node);
+ }
+
+ insert_element(treebuilder, &token->data.tag);
+}
+
+/**
+ * Process a <plaintext> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_plaintext_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ hubbub_tokeniser_optparams params;
+
+ if (element_in_scope(treebuilder, P, false)) {
+ process_0p_in_body(treebuilder);
+ }
+
+ insert_element(treebuilder, &token->data.tag);
+
+ params.content_model.model = HUBBUB_CONTENT_MODEL_PLAINTEXT;
+
+ hubbub_tokeniser_setopt(treebuilder->tokeniser,
+ HUBBUB_TOKENISER_CONTENT_MODEL,
+ &params);
+}
+
+/**
+ * Process a <a> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_a_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ formatting_list_entry *entry =
+ aa_find_formatting_element(treebuilder, A);
+
+ if (entry != NULL) {
+ uint32_t index = entry->stack_index;
+ void *node = entry->details.node;
+ formatting_list_entry *entry2;
+
+ /** \todo parse error */
+
+ /* Act as if </a> were seen */
+ process_0presentational_in_body(treebuilder, A);
+
+ entry2 = aa_find_formatting_element(treebuilder, A);
+
+ /* Remove from formatting list, if it's still there */
+ if (entry2 == entry && entry2->details.node == node) {
+ element_type otype;
+ void *onode;
+ uint32_t oindex;
+
+ formatting_list_remove(treebuilder, entry,
+ &otype, &onode, &oindex);
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, onode);
+
+ }
+
+ /* Remove from the stack of open elements, if still there */
+ if (index <= treebuilder->context.current_node &&
+ treebuilder->context.element_stack[index].node
+ == node) {
+ aa_remove_element_stack_item(treebuilder, index,
+ treebuilder->context.current_node);
+ treebuilder->context.current_node--;
+ }
+ }
+
+ reconstruct_active_formatting_list(treebuilder);
+
+ insert_element(treebuilder, &token->data.tag);
+
+ treebuilder->tree_handler->ref_node(treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+
+ formatting_list_append(treebuilder, A,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node,
+ treebuilder->context.current_node);
+}
+
+/**
+ * Process a <b>, <big>, <em>, <font>, <i>, <s>, <small>,
+ * <strike>, <strong>, <tt>, or <u> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ * \param type The element type
+ */
+void process_presentational_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token, element_type type)
+{
+ reconstruct_active_formatting_list(treebuilder);
+
+ insert_element(treebuilder, &token->data.tag);
+
+ treebuilder->tree_handler->ref_node(treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+
+ formatting_list_append(treebuilder, type,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node,
+ treebuilder->context.current_node);
+}
+
+/**
+ * Process a <nobr> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_nobr_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ reconstruct_active_formatting_list(treebuilder);
+
+ if (element_in_scope(treebuilder, NOBR, false)) {
+ /** \todo parse error */
+
+ /* Act as if </nobr> were seen */
+ process_0presentational_in_body(treebuilder, NOBR);
+
+ /* Yes, again */
+ reconstruct_active_formatting_list(treebuilder);
+ }
+
+ insert_element(treebuilder, &token->data.tag);
+
+ treebuilder->tree_handler->ref_node(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+
+ formatting_list_append(treebuilder, NOBR,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node,
+ treebuilder->context.current_node);
+}
+
+/**
+ * Process a <button> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_button_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ if (element_in_scope(treebuilder, BUTTON, false)) {
+ /** \todo parse error */
+
+ /* Act as if </button> has been seen */
+ process_0applet_button_marquee_object_in_body(treebuilder,
+ BUTTON);
+ }
+
+ reconstruct_active_formatting_list(treebuilder);
+
+ insert_element(treebuilder, &token->data.tag);
+
+ if (treebuilder->context.form_element != NULL) {
+ treebuilder->tree_handler->form_associate(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.form_element,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+ }
+
+ treebuilder->tree_handler->ref_node(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+
+ formatting_list_append(treebuilder, BUTTON,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node,
+ treebuilder->context.current_node);
+}
+
+/**
+ * Process an <applet>, <marquee> or <object> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ * \param type The element type
+ */
+void process_applet_marquee_object_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token, element_type type)
+{
+ reconstruct_active_formatting_list(treebuilder);
+
+ insert_element(treebuilder, &token->data.tag);
+
+ treebuilder->tree_handler->ref_node(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+
+ formatting_list_append(treebuilder, type,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node,
+ treebuilder->context.current_node);
+}
+
+/**
+ * Process an <hr> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_hr_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ if (element_in_scope(treebuilder, P, false)) {
+ process_0p_in_body(treebuilder);
+ }
+
+ insert_element_no_push(treebuilder, &token->data.tag);
+}
+
+/**
+ * Process an <image> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_image_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ hubbub_tag tag;
+
+ /** \todo UTF-16 */
+ tag.name.type = HUBBUB_STRING_PTR;
+ tag.name.data.ptr = (const uint8_t *) "img";
+ tag.name.len = SLEN("img");
+
+ tag.n_attributes = token->data.tag.n_attributes;
+ tag.attributes = token->data.tag.attributes;
+
+ reconstruct_active_formatting_list(treebuilder);
+
+ insert_element_no_push(treebuilder, &tag);
+}
+
+/**
+ * Process an <input> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_input_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ element_type otype;
+ void *node;
+
+ reconstruct_active_formatting_list(treebuilder);
+
+ insert_element(treebuilder, &token->data.tag);
+
+ if (treebuilder->context.form_element != NULL) {
+ treebuilder->tree_handler->form_associate(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.form_element,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+ }
+
+ if (!element_stack_pop(treebuilder, &otype, &node)) {
+ /** \todo errors */
+ }
+
+ treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
+ node);
+}
+
+/**
+ * Process an <isindex> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_isindex_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ hubbub_token dummy;
+ hubbub_attribute *action = NULL;
+ hubbub_attribute *prompt = NULL;
+ hubbub_attribute *attrs = NULL;
+ size_t n_attrs = 0;
+
+ /** \todo parse error */
+
+ if (treebuilder->context.form_element != NULL)
+ return;
+
+ /* First up, clone the token's attributes */
+ if (token->data.tag.n_attributes > 0) {
+ attrs = treebuilder->alloc(NULL,
+ token->data.tag.n_attributes * sizeof(hubbub_attribute),
+ treebuilder->alloc_pw);
+ if (attrs == NULL) {
+ /** \todo error handling */
+ return;
+ }
+
+ for (uint32_t i = 0; i < token->data.tag.n_attributes; i++) {
+ hubbub_attribute *attr = &token->data.tag.attributes[i];
+ const uint8_t *name = treebuilder->input_buffer +
+ attr->name.data.off;
+
+ if (strncmp((const char *) name, "action",
+ attr->name.len) == 0) {
+ action = attr;
+ } else if (strncmp((const char *) name, "prompt",
+ attr->name.len) == 0) {
+ prompt = attr;
+ } else {
+ attrs[n_attrs++] = *attr;
+ }
+ }
+ }
+
+ /* isindex algorithm */
+
+ /* Set up dummy as a start tag token */
+ dummy.type = HUBBUB_TOKEN_START_TAG;
+ dummy.data.tag.name.type = HUBBUB_STRING_PTR;
+
+ /* Act as if <form> were seen */
+ dummy.data.tag.name.data.ptr = (const uint8_t *) "form";
+ dummy.data.tag.name.len = SLEN("form");
+
+ dummy.data.tag.n_attributes = action != NULL ? 1 : 0;
+ dummy.data.tag.attributes = action;
+
+ process_form_in_body(treebuilder, &dummy);
+
+ /* Act as if <hr> were seen */
+ dummy.data.tag.name.data.ptr = (const uint8_t *) "hr";
+ dummy.data.tag.name.len = SLEN("hr");
+ dummy.data.tag.n_attributes = 0;
+ dummy.data.tag.attributes = NULL;
+
+ process_hr_in_body(treebuilder, &dummy);
+
+ /* Act as if <p> were seen */
+ dummy.data.tag.name.data.ptr = (const uint8_t *) "p";
+ dummy.data.tag.name.len = SLEN("p");
+ dummy.data.tag.n_attributes = 0;
+ dummy.data.tag.attributes = NULL;
+
+ process_container_in_body(treebuilder, &dummy);
+
+ /* Act as if <label> were seen */
+ dummy.data.tag.name.data.ptr = (const uint8_t *) "label";
+ dummy.data.tag.name.len = SLEN("label");
+ dummy.data.tag.n_attributes = 0;
+ dummy.data.tag.attributes = NULL;
+
+ process_phrasing_in_body(treebuilder, &dummy);
+
+ /* Act as if a stream of characters were seen */
+ dummy.type = HUBBUB_TOKEN_CHARACTER;
+ if (prompt != NULL) {
+ dummy.data.character = prompt->value;
+ } else {
+ /** \todo Localisation */
+#define PROMPT "This is a searchable index. Insert your search keywords here:"
+ dummy.data.character.type = HUBBUB_STRING_PTR;
+ dummy.data.character.data.ptr = (const uint8_t *) PROMPT;
+ dummy.data.character.len = SLEN(PROMPT);
+#undef PROMPT
+ }
+
+ process_character(treebuilder, &dummy);
+
+ /* Act as if <input> was seen */
+ dummy.type = HUBBUB_TOKEN_START_TAG;
+ dummy.data.tag.name.type = HUBBUB_STRING_PTR;
+ dummy.data.tag.name.data.ptr = (const uint8_t *) "input";
+ dummy.data.tag.name.len = SLEN("input");
+
+ dummy.data.tag.n_attributes = n_attrs;
+ dummy.data.tag.attributes = attrs;
+
+ process_input_in_body(treebuilder, &dummy);
+
+ /* Act as if </label> was seen */
+ process_0generic_in_body(treebuilder, LABEL);
+
+ /* Act as if </p> was seen */
+ process_0p_in_body(treebuilder);
+
+ /* Act as if <hr> was seen */
+ dummy.data.tag.name.data.ptr = (const uint8_t *) "hr";
+ dummy.data.tag.name.len = SLEN("hr");
+ dummy.data.tag.n_attributes = 0;
+ dummy.data.tag.attributes = NULL;
+
+ process_hr_in_body(treebuilder, &dummy);
+
+ /* Act as if </form> was seen */
+ process_0container_in_body(treebuilder, FORM);
+
+ /* Clean up */
+ treebuilder->alloc(attrs, 0, treebuilder->alloc_pw);
+}
+
+/**
+ * Process a <textarea> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_textarea_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ treebuilder->context.strip_leading_lr = true;
+ parse_generic_rcdata(treebuilder, token, true);
+}
+
+/**
+ * Process a <select> start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_select_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ reconstruct_active_formatting_list(treebuilder);
+
+ insert_element(treebuilder, &token->data.tag);
+
+ if (treebuilder->context.form_element != NULL) {
+ treebuilder->tree_handler->form_associate(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.form_element,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+ }
+}
+
+/**
+ * Process a phrasing start tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param token The token to process
+ */
+void process_phrasing_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token)
+{
+ reconstruct_active_formatting_list(treebuilder);
+
+ insert_element(treebuilder, &token->data.tag);
+}
+
+/**
+ * Process a </body> end tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \return True if processed, false otherwise
+ */
+bool process_0body_in_body(hubbub_treebuilder *treebuilder)
+{
+ bool processed = true;
+
+ if (!element_in_scope(treebuilder, BODY, false)) {
+ /** \todo parse error */
+ processed = true;
+ } else {
+ element_context *stack = treebuilder->context.element_stack;
+ uint32_t node;
+
+ for (node = treebuilder->context.current_node;
+ node > 0; node--) {
+ element_type ntype = stack[node].type;
+
+ if (ntype != DD && ntype != DT && ntype != LI &&
+ ntype != P && ntype != TBODY &&
+ ntype != TD && ntype != TFOOT &&
+ ntype != TH && ntype != THEAD &&
+ ntype != TR && ntype != BODY) {
+ /** \todo parse error */
+ }
+ }
+ }
+
+ return processed;
+}
+
+/**
+ * Process a container end tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param type The element type
+ */
+void process_0container_in_body(hubbub_treebuilder *treebuilder,
+ element_type type)
+{
+ if (type == FORM) {
+ assert(treebuilder->context.form_element != NULL);
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.form_element);
+ treebuilder->context.form_element = NULL;
+ }
+
+ if (!element_in_scope(treebuilder, type, false)) {
+ /** \todo parse error */
+ } else {
+ uint32_t popped = 0;
+ element_type otype;
+
+ close_implied_end_tags(treebuilder, UNKNOWN);
+
+ do {
+ void *node;
+
+ if (!element_stack_pop(treebuilder, &otype, &node)) {
+ /** \todo errors */
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ node);
+
+ popped++;
+ } while (otype != type);
+
+ if (popped > 1) {
+ /** \todo parse error */
+ }
+ }
+}
+
+/**
+ * Process a </p> end tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ */
+void process_0p_in_body(hubbub_treebuilder *treebuilder)
+{
+ uint32_t popped = 0;
+
+ if (treebuilder->context.element_stack[
+ treebuilder->context.current_node].type != P) {
+ /** \todo parse error */
+ }
+
+ while(element_in_scope(treebuilder, P, false)) {
+ element_type type;
+ void *node;
+
+ if (!element_stack_pop(treebuilder, &type, &node)) {
+ /** \todo errors */
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, node);
+
+ popped++;
+ }
+
+ if (popped == 0) {
+ hubbub_token dummy;
+
+ dummy.type = HUBBUB_TOKEN_START_TAG;
+ dummy.data.tag.name.type = HUBBUB_STRING_PTR;
+ /** \todo UTF-16 */
+ dummy.data.tag.name.data.ptr = (const uint8_t *) "p";
+ dummy.data.tag.name.len = SLEN("p");
+ dummy.data.tag.n_attributes = 0;
+ dummy.data.tag.attributes = NULL;
+
+ process_container_in_body(treebuilder, &dummy);
+
+ /* Reprocess the end tag. This is safe as we've just
+ * inserted a <p> into the current scope */
+ process_0p_in_body(treebuilder);
+ }
+}
+
+/**
+ * Process a </dd>, </dt>, or </li> end tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param type The element type
+ */
+void process_0dd_dt_li_in_body(hubbub_treebuilder *treebuilder,
+ element_type type)
+{
+ if (!element_in_scope(treebuilder, type, false)) {
+ /** \todo parse error */
+ } else {
+ uint32_t popped = 0;
+ element_type otype;
+
+ close_implied_end_tags(treebuilder, type);
+
+ do {
+ void *node;
+
+ if (!element_stack_pop(treebuilder, &otype, &node)) {
+ /** \todo errors */
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ node);
+
+ popped++;
+ } while (otype != type);
+
+ if (popped > 1) {
+ /** \todo parse error */
+ }
+ }
+}
+
+/**
+ * Process a </h1>, </h2>, </h3>, </h4>,
+ * </h5>, or </h6> end tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param type The element type
+ */
+void process_0h_in_body(hubbub_treebuilder *treebuilder,
+ element_type type)
+{
+ UNUSED(type);
+
+ /** \todo optimise this */
+ if (element_in_scope(treebuilder, H1, false) ||
+ element_in_scope(treebuilder, H2, false) ||
+ element_in_scope(treebuilder, H3, false) ||
+ element_in_scope(treebuilder, H4, false) ||
+ element_in_scope(treebuilder, H5, false) ||
+ element_in_scope(treebuilder, H6, false)) {
+ uint32_t popped = 0;
+ element_type otype;
+
+ close_implied_end_tags(treebuilder, UNKNOWN);
+
+ do {
+ void *node;
+
+ if (!element_stack_pop(treebuilder, &otype, &node)) {
+ /** \todo errors */
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ node);
+
+ popped++;
+ } while (otype != H1 && otype != H2 &&
+ otype != H3 && otype != H4 &&
+ otype != H5 && otype != H6);
+
+ if (popped > 1) {
+ /** \todo parse error */
+ }
+ } else {
+ /** \todo parse error */
+ }
+}
+
+/**
+ * Process a presentational end tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param type The element type
+ */
+void process_0presentational_in_body(hubbub_treebuilder *treebuilder,
+ element_type type)
+{
+ /* Welcome to the adoption agency */
+
+ while (true) {
+ element_context *stack = treebuilder->context.element_stack;
+
+ /* 1 */
+ formatting_list_entry *entry;
+ uint32_t formatting_element;
+
+ if (!aa_find_and_validate_formatting_element(treebuilder,
+ type, &entry))
+ return;
+
+ /* Take a copy of the stack index for use
+ * during stack manipulation */
+ formatting_element = entry->stack_index;
+
+ /* 2 & 3 */
+ uint32_t furthest_block;
+
+ if (!aa_find_furthest_block(treebuilder,
+ entry, &furthest_block))
+ return;
+
+ /* 4 */
+ uint32_t common_ancestor = formatting_element - 1;
+
+ /* 5 */
+ aa_remove_from_parent(treebuilder, stack[furthest_block].node);
+
+ /* 6 */
+ bookmark bookmark;
+
+ bookmark.prev = entry->prev;
+ bookmark.next = entry->next;
+
+ /* 7 */
+ uint32_t last_node;
+
+ aa_find_bookmark_location_reparenting_misnested(treebuilder,
+ formatting_element, furthest_block,
+ &bookmark, &last_node);
+
+ /* 8 */
+ if (stack[common_ancestor].type == TABLE ||
+ stack[common_ancestor].type == TBODY ||
+ stack[common_ancestor].type == TFOOT ||
+ stack[common_ancestor].type == THEAD ||
+ stack[common_ancestor].type == TR) {
+ aa_insert_into_foster_parent(treebuilder,
+ stack[last_node].node);
+ } else {
+ aa_reparent_node(treebuilder, stack[last_node].node,
+ stack[common_ancestor].node);
+ }
+
+ /* 9 */
+ void *fe_clone = NULL;
+
+ treebuilder->tree_handler->clone_node(
+ treebuilder->tree_handler->ctx,
+ entry->details.node, false, &fe_clone);
+
+ /* 10 */
+ treebuilder->tree_handler->reparent_children(
+ treebuilder->tree_handler->ctx,
+ stack[furthest_block].node, fe_clone);
+
+ /* 11 */
+ void *clone_appended = NULL;
+
+ treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ stack[furthest_block].node, fe_clone,
+ &clone_appended);
+
+ /* 12 and 13 are reversed here so that we know the correct
+ * stack index to use when inserting into the formatting list */
+
+ /* 13 */
+ aa_remove_element_stack_item(treebuilder, formatting_element,
+ furthest_block);
+
+ /* Fix up furthest block index */
+ furthest_block--;
+
+ /* Now, in the gap after furthest block,
+ * we insert an entry for clone */
+ stack[furthest_block + 1].type = entry->details.type;
+ stack[furthest_block + 1].node = clone_appended;
+
+ /* 12 */
+ element_type otype;
+ void *onode;
+ uint32_t oindex;
+
+ formatting_list_remove(treebuilder, entry,
+ &otype, &onode, &oindex);
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, onode);
+
+ formatting_list_insert(treebuilder,
+ bookmark.prev, bookmark.next,
+ otype, fe_clone, furthest_block + 1);
+
+ /* 14 */
+ }
+}
+
+/**
+ * Adoption agency: find and validate the formatting element
+ *
+ * \param treebuilder The treebuilder instance
+ * \param type Element type to search for
+ * \param element Pointer to location to receive list entry
+ * \return True to continue processing, false to stop
+ */
+bool aa_find_and_validate_formatting_element(hubbub_treebuilder *treebuilder,
+ element_type type, formatting_list_entry **element)
+{
+ formatting_list_entry *entry;
+
+ entry = aa_find_formatting_element(treebuilder, type);
+
+ if (entry == NULL || (entry->stack_index != 0 &&
+ element_in_scope(treebuilder, entry->details.type,
+ false) != entry->stack_index)) {
+ /** \todo parse error */
+ return false;
+ }
+
+ if (entry->stack_index == 0) {
+ /* Not in element stack => remove from formatting list */
+ element_type type;
+ void *node;
+ uint32_t index;
+
+ /** \todo parse error */
+
+ if (!formatting_list_remove(treebuilder, entry,
+ &type, &node, &index)) {
+ /** \todo errors */
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, node);
+
+ return false;
+ }
+
+ if (entry->stack_index != treebuilder->context.current_node) {
+ /** \todo parse error */
+ }
+
+ *element = entry;
+
+ return true;
+}
+
+/**
+ * Adoption agency: find formatting element
+ *
+ * \param treebuilder The treebuilder instance
+ * \param type Type of element to search for
+ * \return Pointer to formatting element, or NULL if none found
+ */
+formatting_list_entry *aa_find_formatting_element(
+ hubbub_treebuilder *treebuilder, element_type type)
+{
+ formatting_list_entry *entry;
+
+ for (entry = treebuilder->context.formatting_list_end;
+ entry != NULL; entry = entry->prev) {
+
+ /* Assumption: HTML and TABLE elements are not in the list */
+ if (is_scoping_element(entry->details.type) ||
+ entry->details.type == type)
+ break;
+ }
+
+ /* Check if we stopped on a marker, rather than a formatting element */
+ if (entry != NULL && is_scoping_element(entry->details.type))
+ entry = NULL;
+
+ return entry;
+}
+
+/**
+ * Adoption agency: find furthest block
+ *
+ * \param treebuilder The treebuilder instance
+ * \param formatting_element The formatting element
+ * \param furthest_block Pointer to location to receive furthest block
+ * \return True to continue processing (::furthest_block filled in).
+ */
+bool aa_find_furthest_block(hubbub_treebuilder *treebuilder,
+ formatting_list_entry *formatting_element,
+ uint32_t *furthest_block)
+{
+ uint32_t fe_index = formatting_element->stack_index;
+ uint32_t fb;
+
+ for (fb = fe_index + 1; fb <= treebuilder->context.current_node; fb++) {
+ element_type type = treebuilder->context.element_stack[fb].type;
+
+ if (!(is_phrasing_element(type) || is_formatting_element(type)))
+ break;
+ }
+
+ if (fb > treebuilder->context.current_node) {
+ element_type type;
+ void *node;
+ uint32_t index;
+
+ /* Pop all elements off the stack up to,
+ * and including, the formatting element */
+ do {
+ if (!element_stack_pop(treebuilder, &type, &node)) {
+ /** \todo errors */
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ node);
+ } while (treebuilder->context.current_node >= fe_index);
+
+ /* Remove the formatting element from the list */
+ if (!formatting_list_remove(treebuilder, formatting_element,
+ &type, &node, &index)) {
+ /* \todo errors */
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, node);
+
+ return false;
+ }
+
+ *furthest_block = fb;
+
+ return true;
+}
+
+/**
+ * Adoption agency: remove a node from its parent
+ *
+ * \param treebuilder The treebuilder instance
+ * \param node Node to remove
+ */
+void aa_remove_from_parent(hubbub_treebuilder *treebuilder, void *node)
+{
+ /* Get parent */
+ void *parent = NULL;
+
+ treebuilder->tree_handler->get_parent(treebuilder->tree_handler->ctx,
+ node, false, &parent);
+
+ if (parent != NULL) {
+ void *removed;
+
+ treebuilder->tree_handler->remove_child(
+ treebuilder->tree_handler->ctx,
+ parent, node, &removed);
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, removed);
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, parent);
+ }
+}
+
+/**
+ * Adoption agency: reparent a node
+ *
+ * \param treebuilder The treebuilder instance
+ * \param node The node to reparent
+ * \param new_parent The new parent
+ */
+void aa_reparent_node(hubbub_treebuilder *treebuilder, void *node,
+ void *new_parent)
+{
+ void *appended;
+
+ aa_remove_from_parent(treebuilder, node);
+
+ treebuilder->tree_handler->append_child(treebuilder->tree_handler->ctx,
+ new_parent, node, &appended);
+
+ treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
+ appended);
+}
+
+/**
+ * Adoption agency: this is step 7
+ *
+ * \param treebuilder The treebuilder instance
+ * \param formatting_element The stack index of the formatting element
+ * \param furthest_block Index of furthest block in element stack
+ * \param bookmark Pointer to bookmark (pre-initialised)
+ * \param last_node Pointer to location to receive index of last node
+ */
+void aa_find_bookmark_location_reparenting_misnested(
+ hubbub_treebuilder *treebuilder,
+ uint32_t formatting_element, uint32_t furthest_block,
+ bookmark *bookmark, uint32_t *last_node)
+{
+ element_context *stack = treebuilder->context.element_stack;
+ uint32_t node, last;
+ formatting_list_entry *node_entry;
+
+ node = last = furthest_block;
+
+ while (true) {
+ /* i */
+ node--;
+
+ /* ii */
+ for (node_entry = treebuilder->context.formatting_list_end;
+ node_entry != NULL;
+ node_entry = node_entry->next) {
+ if (node_entry->stack_index == node)
+ break;
+ }
+
+ /* Node is not in list of active formatting elements */
+ if (node_entry == NULL) {
+ aa_remove_element_stack_item(treebuilder,
+ node, treebuilder->context.current_node);
+
+ /* Update furthest block index and the last node index,
+ * as these are always below node in the stack */
+ furthest_block--;
+ last--;
+
+ /* Fixup the current_node index */
+ treebuilder->context.current_node--;
+
+ /* Back to i */
+ continue;
+ }
+
+ /* iii */
+ if (node == formatting_element)
+ break;
+
+ /* iv */
+ if (last == furthest_block) {
+ bookmark->prev = node_entry->prev;
+ bookmark->next = node_entry->next;
+ }
+
+ /* v */
+ bool children = false;
+
+ treebuilder->tree_handler->has_children(
+ treebuilder->tree_handler->ctx,
+ node_entry->details.node, &children);
+
+ if (children) {
+ aa_clone_and_replace_entries(treebuilder, node_entry);
+ }
+
+ /* vi */
+ aa_reparent_node(treebuilder,
+ stack[last].node, stack[node].node);
+
+ /* vii */
+ last = node;
+
+ /* viii */
+ }
+
+ *last_node = last;
+}
+
+/**
+ * Adoption agency: remove an entry from the stack at the given index
+ *
+ * \param treebuilder The treebuilder instance
+ * \param index The index of the item to remove
+ * \param limit The index of the last item to move
+ *
+ * Preconditions: index < limit, limit <= current_node
+ * Postcondition: stack[limit] is empty
+ */
+void aa_remove_element_stack_item(hubbub_treebuilder *treebuilder,
+ uint32_t index, uint32_t limit)
+{
+ element_context *stack = treebuilder->context.element_stack;
+
+ assert(index < limit);
+ assert(limit <= treebuilder->context.current_node);
+
+ /* First, scan over subsequent entries in the stack,
+ * searching for them in the list of active formatting
+ * entries. If found, update the corresponding
+ * formatting list entry's stack index to match the
+ * new stack location */
+ for (uint32_t n = index + 1; n <= limit; n++) {
+ if (is_formatting_element(stack[n].type) ||
+ (is_scoping_element(stack[n].type) &&
+ stack[n].type != HTML &&
+ stack[n].type != TABLE)) {
+ formatting_list_entry *e;
+
+ for (e = treebuilder->context.formatting_list_end;
+ e != NULL; e = e->prev) {
+ if (e->stack_index == n)
+ e->stack_index--;
+ }
+ }
+ }
+
+ /* Reduce node's reference count */
+ treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
+ stack[index].node);
+
+ /* Now, shuffle the stack up one, removing node in the process */
+ memmove(&stack[index], &stack[index + 1],
+ (limit - index) * sizeof(element_context));
+}
+
+/**
+ * Adoption agency: shallow clone a node and replace its formatting list
+ * and element stack entries
+ *
+ * \param treebuilder The treebuilder instance
+ * \param element The item in the formatting list containing the node
+ */
+void aa_clone_and_replace_entries(hubbub_treebuilder *treebuilder,
+ formatting_list_entry *element)
+{
+ element_type otype;
+ uint32_t oindex;
+ void *clone, *onode;
+
+ /* Shallow clone of node */
+ treebuilder->tree_handler->clone_node(treebuilder->tree_handler->ctx,
+ element->details.node, false, &clone);
+
+ /* Replace formatting list entry for node with clone */
+ formatting_list_replace(treebuilder, element,
+ element->details.type, clone, element->stack_index,
+ &otype, &onode, &oindex);
+
+ treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
+ onode);
+
+ treebuilder->tree_handler->ref_node(treebuilder->tree_handler->ctx,
+ clone);
+
+ /* Replace node's stack entry with clone */
+ treebuilder->context.element_stack[element->stack_index].node = clone;
+
+ treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
+ onode);
+}
+
+/**
+ * Adoption agency: locate foster parent and insert node into it
+ *
+ * \param treebuilder The treebuilder instance
+ * \param node The node to insert
+ */
+void aa_insert_into_foster_parent(hubbub_treebuilder *treebuilder, void *node)
+{
+ element_context *stack = treebuilder->context.element_stack;
+ void *foster_parent = NULL;
+ bool insert = false;
+ void *inserted;
+
+ if (treebuilder->context.current_table == 0) {
+ treebuilder->tree_handler->ref_node(
+ treebuilder->tree_handler->ctx,
+ stack[0].node);
+
+ foster_parent = stack[0].node;
+ } else {
+ void *t_parent = NULL;
+
+ treebuilder->tree_handler->get_parent(
+ treebuilder->tree_handler->ctx,
+ stack[treebuilder->context.current_table].node,
+ true, &t_parent);
+
+ if (t_parent != NULL) {
+ foster_parent = t_parent;
+ insert = true;
+ } else {
+ treebuilder->tree_handler->ref_node(
+ treebuilder->tree_handler->ctx,
+ stack[treebuilder->context.
+ current_table - 1].node);
+ foster_parent = stack[treebuilder->context.
+ current_table - 1].node;
+ }
+ }
+
+ if (insert) {
+ treebuilder->tree_handler->insert_before(
+ treebuilder->tree_handler->ctx,
+ foster_parent, node,
+ stack[treebuilder->context.current_table].node,
+ &inserted);
+ } else {
+ treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ foster_parent, node,
+ &inserted);
+ }
+
+ treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
+ inserted);
+
+ treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
+ foster_parent);
+}
+
+
+/**
+ * Process an </applet>, <button>, <marquee>,
+ * or <object> end tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param type The element type
+ */
+void process_0applet_button_marquee_object_in_body(
+ hubbub_treebuilder *treebuilder, element_type type)
+{
+ if (!element_in_scope(treebuilder, type, false)) {
+ /** \todo parse error */
+ } else {
+ uint32_t popped = 0;
+ element_type otype;
+
+ close_implied_end_tags(treebuilder, UNKNOWN);
+
+ do {
+ void *node;
+
+ if (!element_stack_pop(treebuilder, &otype, &node)) {
+ /** \todo errors */
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ node);
+
+ popped++;
+ } while (otype != type);
+
+ if (popped > 1) {
+ /** \todo parse error */
+ }
+
+ clear_active_formatting_list_to_marker(treebuilder);
+ }
+}
+
+/**
+ * Process a </br> end tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ */
+void process_0br_in_body(hubbub_treebuilder *treebuilder)
+{
+ hubbub_tag tag;
+
+ /** \todo parse error */
+
+ /* Act as if <br> has been seen. */
+
+ /** \todo UTF-16 */
+ tag.name.type = HUBBUB_STRING_PTR;
+ tag.name.data.ptr = (const uint8_t *) "br";
+ tag.name.len = SLEN("br");
+
+ tag.n_attributes = 0;
+ tag.attributes = NULL;
+
+ reconstruct_active_formatting_list(treebuilder);
+
+ insert_element_no_push(treebuilder, &tag);
+}
+
+/**
+ * Process a generic end tag as if in "in body"
+ *
+ * \param treebuilder The treebuilder instance
+ * \param type The element type
+ */
+void process_0generic_in_body(hubbub_treebuilder *treebuilder,
+ element_type type)
+{
+ element_context *stack = treebuilder->context.element_stack;
+ uint32_t node = treebuilder->context.current_node;
+
+ do {
+ if (stack[node].type == type) {
+ uint32_t popped = 0;
+ element_type otype;
+
+ close_implied_end_tags(treebuilder, UNKNOWN);
+
+ do {
+ void *node;
+
+ if (!element_stack_pop(treebuilder,
+ &otype, &node)) {
+ /** \todo errors */
+ }
+
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ node);
+
+ popped++;
+ } while (otype != type);
+
+ if (popped > 1) {
+ /** \todo parse error */
+ }
+
+ break;
+ } else if (!is_formatting_element(stack[node].type) &&
+ !is_phrasing_element(stack[node].type)) {
+ /** \todo parse error */
+ break;
+ }
+ } while (--node > 0);
+}
+
diff --git a/src/treebuilder/in_body.h b/src/treebuilder/in_body.h
new file mode 100644
index 0000000..7d1154e
--- /dev/null
+++ b/src/treebuilder/in_body.h
@@ -0,0 +1,18 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_treebuilder_in_body_h_
+#define hubbub_treebuilder_in_body_h_
+
+#include "treebuilder/internal.h"
+
+bool handle_in_body(hubbub_treebuilder *treebuilder, const hubbub_token *token);
+bool process_tag_in_body(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+
+#endif
+
diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h
new file mode 100644
index 0000000..e5410f5
--- /dev/null
+++ b/src/treebuilder/internal.h
@@ -0,0 +1,190 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_treebuilder_internal_h_
+#define hubbub_treebuilder_internal_h_
+
+#include "treebuilder/treebuilder.h"
+
+typedef enum
+{
+ INITIAL,
+ BEFORE_HTML,
+ BEFORE_HEAD,
+ IN_HEAD,
+ IN_HEAD_NOSCRIPT,
+ AFTER_HEAD,
+ IN_BODY,
+ IN_TABLE,
+ IN_CAPTION,
+ IN_COLUMN_GROUP,
+ IN_TABLE_BODY,
+ IN_ROW,
+ IN_CELL,
+ IN_SELECT,
+ IN_SELECT_IN_TABLE,
+ AFTER_BODY,
+ IN_FRAMESET,
+ AFTER_FRAMESET,
+ AFTER_AFTER_BODY,
+ AFTER_AFTER_FRAMESET,
+ GENERIC_RCDATA,
+ SCRIPT_COLLECT_CHARACTERS,
+} insertion_mode;
+
+typedef enum
+{
+/* Special */
+ ADDRESS, AREA, BASE, BASEFONT, BGSOUND, BLOCKQUOTE, BODY, BR, CENTER,
+ COL, COLGROUP, DD, DIR, DIV, DL, DT, EMBED, FIELDSET, FORM, FRAME,
+ FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HR, IFRAME, IMAGE, IMG, INPUT,
+ ISINDEX, LI, LINK, LISTING, MENU, META, NOEMBED, NOFRAMES, NOSCRIPT,
+ OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT, PRE, SCRIPT, SELECT, SPACER,
+ STYLE, TBODY, TEXTAREA, TFOOT, THEAD, TITLE, TR, UL, WBR,
+/* Scoping */
+ APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH,
+/* Formatting */
+ A, B, BIG, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U,
+/* Phrasing */
+ /**< \todo Enumerate phrasing elements */
+ XMP, LABEL,
+ UNKNOWN,
+} element_type;
+
+typedef struct element_context
+{
+ element_type type;
+ void *node;
+} element_context;
+
+typedef struct formatting_list_entry
+{
+ element_context details; /**< Entry details */
+
+ uint32_t stack_index; /**< Index into element stack */
+
+ struct formatting_list_entry *prev; /**< Previous in list */
+ struct formatting_list_entry *next; /**< Next in list */
+} formatting_list_entry;
+
+typedef struct hubbub_treebuilder_context
+{
+ insertion_mode mode; /**< The current insertion mode */
+
+#define ELEMENT_STACK_CHUNK 128
+ element_context *element_stack; /**< Stack of open elements */
+ uint32_t stack_alloc; /**< Number of stack slots allocated */
+ uint32_t current_node; /**< Index of current node in stack */
+ uint32_t current_table; /**< Index of current table in stack */
+
+ formatting_list_entry *formatting_list; /**< List of active formatting
+ * elements */
+ formatting_list_entry *formatting_list_end; /**< End of active
+ * formatting list */
+
+ void *head_element; /**< Pointer to HEAD element */
+
+ void *form_element; /**< Pointer to most recently
+ * opened FORM element */
+
+ void *document; /**< Pointer to the document node */
+
+ struct {
+ insertion_mode mode; /**< Insertion mode to return to */
+ void *node; /**< Node to attach Text child to */
+ element_type type; /**< Type of node */
+ hubbub_string string; /**< Text data */
+ } collect; /**< Context for character collecting */
+
+ bool strip_leading_lr; /**< Whether to strip a LR from the
+ * start of the next character sequence
+ * received */
+} hubbub_treebuilder_context;
+
+struct hubbub_treebuilder
+{
+ hubbub_tokeniser *tokeniser; /**< Underlying tokeniser */
+
+ const uint8_t *input_buffer; /**< Start of tokeniser's buffer */
+ size_t input_buffer_len; /**< Length of input buffer */
+
+ hubbub_treebuilder_context context;
+
+ hubbub_tree_handler *tree_handler;
+
+ hubbub_buffer_handler buffer_handler;
+ void *buffer_pw;
+
+ hubbub_error_handler error_handler;
+ void *error_pw;
+
+ hubbub_alloc alloc; /**< Memory (de)allocation function */
+ void *alloc_pw; /**< Client private data */
+};
+
+bool process_characters_expect_whitespace(
+ hubbub_treebuilder *treebuilder, const hubbub_token *token,
+ bool insert_into_current_node);
+void process_comment_append(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token, void *parent);
+void parse_generic_rcdata(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token, bool rcdata);
+void process_base_link_meta_in_head(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token, element_type type);
+void process_script_in_head(hubbub_treebuilder *treebuilder,
+ const hubbub_token *token);
+
+uint32_t element_in_scope(hubbub_treebuilder *treebuilder,
+ element_type type, bool in_table);
+void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder);
+void clear_active_formatting_list_to_marker(
+ hubbub_treebuilder *treebuilder);
+void insert_element(hubbub_treebuilder *treebuilder,
+ const hubbub_tag *tag_name);
+void insert_element_no_push(hubbub_treebuilder *treebuilder,
+ const hubbub_tag *tag_name);
+void close_implied_end_tags(hubbub_treebuilder *treebuilder,
+ element_type except);
+void reset_insertion_mode(hubbub_treebuilder *treebuilder);
+void append_text(hubbub_treebuilder *treebuilder,
+ const hubbub_string *string);
+
+element_type element_type_from_name(hubbub_treebuilder *treebuilder,
+ const hubbub_string *tag_name);
+
+bool is_special_element(element_type type);
+bool is_scoping_element(element_type type);
+bool is_formatting_element(element_type type);
+bool is_phrasing_element(element_type type);
+
+bool element_stack_push(hubbub_treebuilder *treebuilder,
+ element_type type, void *node);
+bool element_stack_pop(hubbub_treebuilder *treebuilder,
+ element_type *type, void **node);
+
+bool formatting_list_append(hubbub_treebuilder *treebuilder,
+ element_type type, void *node, uint32_t stack_index);
+bool formatting_list_insert(hubbub_treebuilder *treebuilder,
+ formatting_list_entry *prev, formatting_list_entry *next,
+ element_type type, void *node, uint32_t stack_index);
+bool formatting_list_remove(hubbub_treebuilder *treebuilder,
+ formatting_list_entry *entry,
+ element_type *type, void **node, uint32_t *stack_index);
+bool formatting_list_replace(hubbub_treebuilder *treebuilder,
+ formatting_list_entry *entry,
+ element_type type, void *node, uint32_t stack_index,
+ element_type *otype, void **onode, uint32_t *ostack_index);
+
+#ifndef NDEBUG
+#include <stdio.h>
+
+void element_stack_dump(hubbub_treebuilder *treebuilder, FILE *fp);
+void formatting_list_dump(hubbub_treebuilder *treebuilder, FILE *fp);
+#endif
+
+#endif
+
diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
index 01e31e4..90cca11 100644
--- a/src/treebuilder/treebuilder.c
+++ b/src/treebuilder/treebuilder.c
@@ -8,119 +8,61 @@
#include <assert.h>
#include <string.h>
+#include "treebuilder/in_body.h"
+#include "treebuilder/internal.h"
#include "treebuilder/treebuilder.h"
#include "utils/utils.h"
-typedef enum
-{
- INITIAL,
- BEFORE_HTML,
- BEFORE_HEAD,
- IN_HEAD,
- IN_HEAD_NOSCRIPT,
- AFTER_HEAD,
- IN_BODY,
- IN_TABLE,
- IN_CAPTION,
- IN_COLUMN_GROUP,
- IN_TABLE_BODY,
- IN_ROW,
- IN_CELL,
- IN_SELECT,
- IN_SELECT_IN_TABLE,
- AFTER_BODY,
- IN_FRAMESET,
- AFTER_FRAMESET,
- AFTER_AFTER_BODY,
- AFTER_AFTER_FRAMESET,
- GENERIC_RCDATA,
- SCRIPT_COLLECT_CHARACTERS,
-} insertion_mode;
-
-typedef enum
-{
-/* Special */
- ADDRESS, AREA, BASE, BASEFONT, BGSOUND, BLOCKQUOTE, BODY, BR, CENTER,
- COL, COLGROUP, DD, DIR, DIV, DL, DT, EMBED, FIELDSET, FORM, FRAME,
- FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HR, IFRAME, IMAGE, IMG, INPUT,
- ISINDEX, LI, LINK, LISTING, MENU, META, NOEMBED, NOFRAMES, NOSCRIPT,
- OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT, PRE, SCRIPT, SELECT, SPACER,
- STYLE, TBODY, TEXTAREA, TFOOT, THEAD, TITLE, TR, UL, WBR,
-/* Scoping */
- APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH,
-/* Formatting */
- A, B, BIG, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U,
-/* Phrasing */
- /**< \todo Enumerate phrasing elements */
-} element_type;
-
-typedef struct element_context
-{
+static const struct {
+ const char *name;
element_type type;
- void *node;
-} element_context;
-
-typedef struct formatting_list_entry
-{
- element_context details; /**< Entry details */
-
- uint32_t stack_index; /**< Index into element stack */
-
- struct formatting_list_entry *prev; /**< Previous in list */
- struct formatting_list_entry *next; /**< Next in list */
-} formatting_list_entry;
-
-typedef struct hubbub_treebuilder_context
-{
- insertion_mode mode; /**< The current insertion mode */
-
-#define ELEMENT_STACK_CHUNK 128
- element_context *element_stack; /**< Stack of open elements */
- uint32_t stack_alloc; /**< Number of stack slots allocated */
- uint32_t current_node; /**< Index of current node in stack */
- uint32_t current_table; /**< Index of current table in stack */
-
- formatting_list_entry *formatting_list; /**< List of active formatting
- * elements */
- formatting_list_entry *formatting_list_end; /**< End of active
- * formatting list */
-
- void *head_element; /**< Pointer to HEAD element */
-
- void *form_element; /**< Pointer to most recently
- * opened FORM element */
-
- void *document; /**< Pointer to the document node */
-
- struct {
- insertion_mode mode; /**< Insertion mode to return to */
- void *node; /**< Node to attach Text child to */
- element_type type; /**< Type of node */
- hubbub_string string; /**< Text data */
- } collect; /**< Context for character collecting */
-} hubbub_treebuilder_context;
-
-struct hubbub_treebuilder
-{
- hubbub_tokeniser *tokeniser; /**< Underlying tokeniser */
-
- const uint8_t *input_buffer; /**< Start of tokeniser's buffer */
- size_t input_buffer_len; /**< Length of input buffer */
-
- hubbub_treebuilder_context context;
-
- hubbub_tree_handler *tree_handler;
-
- hubbub_buffer_handler buffer_handler;
- void *buffer_pw;
-
- hubbub_error_handler error_handler;
- void *error_pw;
-
- hubbub_alloc alloc; /**< Memory (de)allocation function */
- void *alloc_pw; /**< Client private data */
+} name_type_map[] = {
+ { "ADDRESS", ADDRESS }, { "AREA", AREA },
+ { "BASE", BASE }, { "BASEFONT", BASEFONT },
+ { "BGSOUND", BGSOUND }, { "BLOCKQUOTE", BLOCKQUOTE },
+ { "BODY", BODY }, { "BR", BR },
+ { "CENTER", CENTER }, { "COL", COL },
+ { "COLGROUP", COLGROUP }, { "DD", DD },
+ { "DIR", DIR }, { "DIV", DIV },
+ { "DL", DL }, { "DT", DT },
+ { "EMBED", EMBED }, { "FIELDSET", FIELDSET },
+ { "FORM", FORM }, { "FRAME", FRAME },
+ { "FRAMESET", FRAMESET }, { "H1", H1 },
+ { "H2", H2 }, { "H3", H3 },
+ { "H4", H4 }, { "H5", H5 },
+ { "H6", H6 }, { "HEAD", HEAD },
+ { "HR", HR }, { "IFRAME", IFRAME },
+ { "IMAGE", IMAGE }, { "IMG", IMG },
+ { "INPUT", INPUT }, { "ISINDEX", ISINDEX },
+ { "LI", LI }, { "LINK", LINK },
+ { "LISTING", LISTING }, { "MENU", MENU },
+ { "META", META }, { "NOEMBED", NOEMBED },
+ { "NOFRAMES", NOFRAMES }, { "NOSCRIPT", NOSCRIPT },
+ { "OL", OL }, { "OPTGROUP", OPTGROUP },
+ { "OPTION", OPTION }, { "P", P },
+ { "PARAM", PARAM }, { "PLAINTEXT", PLAINTEXT },
+ { "PRE", PRE }, { "SCRIPT", SCRIPT },
+ { "SELECT", SELECT }, { "SPACER", SPACER },
+ { "STYLE", STYLE }, { "TBODY", TBODY },
+ { "TEXTAREA", TEXTAREA }, { "TFOOT", TFOOT },
+ { "THEAD", THEAD }, { "TITLE", TITLE },
+ { "TR", TR }, { "UL", UL },
+ { "WBR", WBR },
+ { "APPLET", APPLET }, { "BUTTON", BUTTON },
+ { "CAPTION", CAPTION }, { "HTML", HTML },
+ { "MARQUEE", MARQUEE }, { "OBJECT", OBJECT },
+ { "TABLE", TABLE }, { "TD", TD },
+ { "TH", TH },
+ { "A", A }, { "B", B },
+ { "BIG", BIG }, { "EM", EM },
+ { "FONT", FONT }, { "I", I },
+ { "NOBR", NOBR }, { "S", S },
+ { "SMALL", SMALL }, { "STRIKE", STRIKE },
+ { "STRONG", STRONG }, { "TT", TT },
+ { "U", U },
};
+
static void hubbub_treebuilder_buffer_handler(const uint8_t *data,
size_t len, void *pw);
static void hubbub_treebuilder_token_handler(const hubbub_token *token,
@@ -143,59 +85,6 @@ static bool handle_generic_rcdata(hubbub_treebuilder *treebuilder,
static bool handle_script_collect_characters(hubbub_treebuilder *treebuilder,
const hubbub_token *token);
-static bool process_characters_expect_whitespace(
- hubbub_treebuilder *treebuilder, const hubbub_token *token,
- bool insert_into_current_node);
-static void process_comment_append(hubbub_treebuilder *treebuilder,
- const hubbub_token *token, void *parent);
-static void parse_generic_rcdata(hubbub_treebuilder *treebuilder,
- const hubbub_token *token, bool rcdata);
-static void process_base_link_meta_in_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token, element_type type);
-static void process_script_in_head(hubbub_treebuilder *treebuilder,
- const hubbub_token *token);
-
-/** \todo Uncomment the static keyword here once these functions are actually used */
-
-/*static*/ bool element_in_scope(hubbub_treebuilder *treebuilder,
- element_type type, bool in_table);
-/*static*/ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder);
-/*static*/ void clear_active_formatting_list_to_marker(
- hubbub_treebuilder *treebuilder);
-static void insert_element(hubbub_treebuilder *treebuilder,
- const hubbub_tag *tag_name);
-static void insert_element_verbatim(hubbub_treebuilder *treebuilder,
- const uint8_t *name, size_t len);
-static void insert_element_no_push(hubbub_treebuilder *treebuilder,
- const hubbub_tag *tag_name);
-/*static*/ void close_implied_end_tags(hubbub_treebuilder *treebuilder,
- element_type except);
-/*static*/ void reset_insertion_mode(hubbub_treebuilder *treebuilder);
-
-static element_type element_type_from_name(hubbub_treebuilder *treebuilder,
- const hubbub_string *tag_name);
-static element_type element_type_from_verbatim_name(const uint8_t *name,
- size_t len);
-
-static inline bool is_special_element(element_type type);
-static inline bool is_scoping_element(element_type type);
-static inline bool is_formatting_element(element_type type);
-static inline bool is_phrasing_element(element_type type);
-
-static bool element_stack_push(hubbub_treebuilder *treebuilder,
- element_type type, void *node);
-static bool element_stack_pop(hubbub_treebuilder *treebuilder,
- element_type *type, void **node);
-
-/*static*/ bool formatting_list_insert(hubbub_treebuilder *treebuilder,
- element_type type, void *node, uint32_t stack_index);
-static bool formatting_list_remove(hubbub_treebuilder *treebuilder,
- formatting_list_entry *entry,
- element_type *type, void **node, uint32_t *stack_index);
-static bool formatting_list_replace(hubbub_treebuilder *treebuilder,
- formatting_list_entry *entry,
- element_type type, void *node, uint32_t stack_index,
- element_type *otype, void **onode, uint32_t *ostack_index);
/**
* Create a hubbub treebuilder
@@ -243,6 +132,8 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser,
tb->context.collect.string.type = HUBBUB_STRING_OFF;
+ tb->context.strip_leading_lr = false;
+
tb->buffer_handler = NULL;
tb->buffer_pw = NULL;
@@ -430,7 +321,7 @@ void hubbub_treebuilder_token_handler(const hubbub_token *token,
treebuilder->tree_handler == NULL)
return;
- while (reprocess == true) {
+ while (reprocess) {
switch (treebuilder->context.mode) {
case INITIAL:
reprocess = handle_initial(treebuilder, token);
@@ -451,6 +342,8 @@ void hubbub_treebuilder_token_handler(const hubbub_token *token,
reprocess = handle_after_head(treebuilder, token);
break;
case IN_BODY:
+ reprocess = handle_in_body(treebuilder, token);
+ break;
case IN_TABLE:
case IN_CAPTION:
case IN_COLUMN_GROUP:
@@ -491,7 +384,7 @@ bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token)
switch (token->type) {
case HUBBUB_TOKEN_CHARACTER:
if (process_characters_expect_whitespace(treebuilder, token,
- false) == true) {
+ false)) {
/** \todo parse error */
treebuilder->tree_handler->set_quirks_mode(
@@ -552,7 +445,7 @@ bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token)
break;
}
- if (reprocess == true) {
+ if (reprocess) {
treebuilder->context.mode = BEFORE_HTML;
}
@@ -570,6 +463,7 @@ bool handle_before_html(hubbub_treebuilder *treebuilder,
const hubbub_token *token)
{
bool reprocess = false;
+ bool handled = false;
switch (token->type) {
case HUBBUB_TOKEN_DOCTYPE:
@@ -589,48 +483,7 @@ bool handle_before_html(hubbub_treebuilder *treebuilder,
&token->data.tag.name);
if (type == HTML) {
- int success;
- void *html, *appended;
-
- /* We can't use insert_element() here, as it assumes
- * that we're inserting into current_node. There is
- * no current_node to insert into at this point so
- * we get to do it manually. */
-
- success = treebuilder->tree_handler->create_element(
- treebuilder->tree_handler->ctx,
- &token->data.tag, &html);
- if (success != 0) {
- /** \todo errors */
- }
-
- success = treebuilder->tree_handler->append_child(
- treebuilder->tree_handler->ctx,
- treebuilder->context.document,
- html, &appended);
- if (success != 0) {
- /** \todo errors */
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- html);
- }
-
- /* We can't use element_stack_push() here, as it
- * assumes that current_node is pointing at the index
- * before the one to insert at. For the first entry in
- * the stack, this does not hold so we must insert
- * manually. */
- treebuilder->context.element_stack[0].type = HTML;
- treebuilder->context.element_stack[0].node = html;
- treebuilder->context.current_node = 0;
-
- /** \todo cache selection algorithm */
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- appended);
-
- treebuilder->context.mode = BEFORE_HEAD;
+ handled = true;
} else {
reprocess = true;
}
@@ -642,15 +495,37 @@ bool handle_before_html(hubbub_treebuilder *treebuilder,
break;
}
- if (reprocess == true) {
- /* Need to manufacture html element */
+
+ if (handled || reprocess) {
int success;
void *html, *appended;
- /** \todo UTF-16 */
- success = treebuilder->tree_handler->create_element_verbatim(
- treebuilder->tree_handler->ctx,
- (const uint8_t *) "html", SLEN("html"), &html);
+ /* We can't use insert_element() here, as it assumes
+ * that we're inserting into current_node. There is
+ * no current_node to insert into at this point so
+ * we get to do it manually. */
+
+ if (reprocess) {
+ /* Need to manufacture html element */
+ hubbub_tag tag;
+
+ /** \todo UTF-16 */
+ tag.name.type = HUBBUB_STRING_PTR;
+ tag.name.data.ptr = (const uint8_t *) "html";
+ tag.name.len = SLEN("html");
+
+ tag.n_attributes = 0;
+ tag.attributes = NULL;
+
+ success = treebuilder->tree_handler->create_element(
+ treebuilder->tree_handler->ctx,
+ &tag, &html);
+ } else {
+ success = treebuilder->tree_handler->create_element(
+ treebuilder->tree_handler->ctx,
+ &token->data.tag, &html);
+ }
+
if (success != 0) {
/** \todo errors */
}
@@ -698,6 +573,7 @@ bool handle_before_head(hubbub_treebuilder *treebuilder,
const hubbub_token *token)
{
bool reprocess = false;
+ bool handled = false;
switch (token->type) {
case HUBBUB_TOKEN_CHARACTER:
@@ -718,20 +594,10 @@ bool handle_before_head(hubbub_treebuilder *treebuilder,
&token->data.tag.name);
if (type == HTML) {
- /** \todo Process as if "in body" */
+ /* Process as if "in body" */
+ process_tag_in_body(treebuilder, token);
} else if (type == HEAD) {
- insert_element(treebuilder, &token->data.tag);
-
- treebuilder->tree_handler->ref_node(
- treebuilder->tree_handler->ctx,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node);
-
- treebuilder->context.head_element =
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node;
-
- treebuilder->context.mode = IN_HEAD;
+ handled = true;
} else {
reprocess = true;
}
@@ -755,9 +621,31 @@ bool handle_before_head(hubbub_treebuilder *treebuilder,
break;
}
- if (reprocess == true) {
- insert_element_verbatim(treebuilder,
- (const uint8_t *) "head", SLEN("head"));
+ if (handled || reprocess) {
+ hubbub_tag tag;
+
+ if (reprocess) {
+ /* Manufacture head tag */
+ tag.name.type = HUBBUB_STRING_PTR;
+ tag.name.data.ptr = (const uint8_t *) "head";
+ tag.name.len = SLEN("head");
+
+ tag.n_attributes = 0;
+ tag.attributes = NULL;
+ } else {
+ tag = token->data.tag;
+ }
+
+ insert_element(treebuilder, &tag);
+
+ treebuilder->tree_handler->ref_node(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node);
+
+ treebuilder->context.head_element =
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node;
treebuilder->context.mode = IN_HEAD;
}
@@ -776,6 +664,7 @@ bool handle_in_head(hubbub_treebuilder *treebuilder,
const hubbub_token *token)
{
bool reprocess = false;
+ bool handled = false;
switch (token->type) {
case HUBBUB_TOKEN_CHARACTER:
@@ -796,7 +685,8 @@ bool handle_in_head(hubbub_treebuilder *treebuilder,
&token->data.tag.name);
if (type == HTML) {
- /** \todo Process as if "in body" */
+ /* Process as if "in body" */
+ process_tag_in_body(treebuilder, token);
} else if (type == BASE || type == LINK || type == META) {
process_base_link_meta_in_head(treebuilder,
token, type);
@@ -816,6 +706,8 @@ bool handle_in_head(hubbub_treebuilder *treebuilder,
process_script_in_head(treebuilder, token);
} else if (type == HEAD) {
/** \todo parse error */
+ } else {
+ reprocess = true;
}
}
break;
@@ -825,19 +717,7 @@ bool handle_in_head(hubbub_treebuilder *treebuilder,
&token->data.tag.name);
if (type == HEAD) {
- element_type otype;
- void *node;
-
- if (element_stack_pop(treebuilder,
- &otype, &node) == false) {
- /** \todo errors */
- }
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- node);
-
- treebuilder->context.mode = AFTER_HEAD;
+ handled = true;
} else if (type == BODY || type == HTML ||
type == P || type == BR) {
reprocess = true;
@@ -849,12 +729,11 @@ bool handle_in_head(hubbub_treebuilder *treebuilder,
break;
}
- if (reprocess == true) {
+ if (handled || reprocess) {
element_type otype;
void *node;
- if (element_stack_pop(treebuilder,
- &otype, &node) == false) {
+ if (!element_stack_pop(treebuilder, &otype, &node)) {
/** \todo errors */
}
@@ -879,6 +758,7 @@ bool handle_in_head_noscript(hubbub_treebuilder *treebuilder,
const hubbub_token *token)
{
bool reprocess = false;
+ bool handled = false;
switch (token->type) {
case HUBBUB_TOKEN_CHARACTER:
@@ -899,7 +779,8 @@ bool handle_in_head_noscript(hubbub_treebuilder *treebuilder,
&token->data.tag.name);
if (type == HTML) {
- /** \todo Process as "in body" */
+ /* Process as "in body" */
+ process_tag_in_body(treebuilder, token);
} else if (type == LINK || type == META) {
process_base_link_meta_in_head(treebuilder,
token, type);
@@ -919,19 +800,7 @@ bool handle_in_head_noscript(hubbub_treebuilder *treebuilder,
&token->data.tag.name);
if (type == NOSCRIPT) {
- element_type otype;
- void *node;
-
- if (element_stack_pop(treebuilder,
- &otype, &node) == false) {
- /** \todo errors */
- }
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- node);
-
- treebuilder->context.mode = IN_HEAD;
+ handled = true;
} else if (type == P || type == BR) {
/** \todo parse error */
reprocess = true;
@@ -946,11 +815,11 @@ bool handle_in_head_noscript(hubbub_treebuilder *treebuilder,
break;
}
- if (reprocess == true) {
+ if (handled || reprocess) {
element_type otype;
void *node;
- if (element_stack_pop(treebuilder, &otype, &node) == false) {
+ if (!element_stack_pop(treebuilder, &otype, &node)) {
/** \todo errors */
}
@@ -975,6 +844,7 @@ bool handle_after_head(hubbub_treebuilder *treebuilder,
const hubbub_token *token)
{
bool reprocess = false;
+ bool handled = false;
switch (token->type) {
case HUBBUB_TOKEN_CHARACTER:
@@ -995,10 +865,10 @@ bool handle_after_head(hubbub_treebuilder *treebuilder,
&token->data.tag.name);
if (type == HTML) {
- /** \todo Process as if "in body" */
+ /* Process as if "in body" */
+ process_tag_in_body(treebuilder, token);
} else if (type == BODY) {
- insert_element(treebuilder, &token->data.tag);
- treebuilder->context.mode = IN_BODY;
+ handled = true;
} else if (type == FRAMESET) {
insert_element(treebuilder, &token->data.tag);
treebuilder->context.mode = IN_FRAMESET;
@@ -1010,10 +880,9 @@ bool handle_after_head(hubbub_treebuilder *treebuilder,
/** \todo parse error */
- if (element_stack_push(treebuilder,
+ if (!element_stack_push(treebuilder,
HEAD,
- treebuilder->context.head_element) ==
- false) {
+ treebuilder->context.head_element)) {
/** \todo errors */
}
@@ -1028,8 +897,7 @@ bool handle_after_head(hubbub_treebuilder *treebuilder,
parse_generic_rcdata(treebuilder, token, true);
}
- if (element_stack_pop(treebuilder, &otype, &node) ==
- false) {
+ if (!element_stack_pop(treebuilder, &otype, &node)) {
/** \todo errors */
}
@@ -1046,9 +914,22 @@ bool handle_after_head(hubbub_treebuilder *treebuilder,
break;
}
- if (reprocess == true) {
- insert_element_verbatim(treebuilder,
- (const uint8_t *) "body", SLEN("body"));
+ if (handled || reprocess) {
+ hubbub_tag tag;
+
+ if (reprocess) {
+ /* Manufacture body */
+ tag.name.type = HUBBUB_STRING_PTR;
+ tag.name.data.ptr = (const uint8_t *) "body";
+ tag.name.len = SLEN("body");
+
+ tag.n_attributes = 0;
+ tag.attributes = NULL;
+ } else {
+ tag = token->data.tag;
+ }
+
+ insert_element(treebuilder, &tag);
treebuilder->context.mode = IN_BODY;
}
@@ -1069,6 +950,12 @@ bool handle_generic_rcdata(hubbub_treebuilder *treebuilder,
bool reprocess = false;
bool done = false;
+ if (treebuilder->context.strip_leading_lr &&
+ token->type != HUBBUB_TOKEN_CHARACTER) {
+ /* Reset the LR stripping flag */
+ treebuilder->context.strip_leading_lr = false;
+ }
+
switch (token->type) {
case HUBBUB_TOKEN_CHARACTER:
if (treebuilder->context.collect.string.len == 0) {
@@ -1077,6 +964,19 @@ bool handle_generic_rcdata(hubbub_treebuilder *treebuilder,
}
treebuilder->context.collect.string.len +=
token->data.character.len;
+
+ if (treebuilder->context.strip_leading_lr) {
+ const uint8_t *str = treebuilder->input_buffer +
+ treebuilder->context.collect.string.data.off;
+
+ /** \todo UTF-16 */
+ if (*str == '\n') {
+ treebuilder->context.collect.string.data.off++;
+ treebuilder->context.collect.string.len--;
+ }
+
+ treebuilder->context.strip_leading_lr = false;
+ }
break;
case HUBBUB_TOKEN_END_TAG:
{
@@ -1084,7 +984,7 @@ bool handle_generic_rcdata(hubbub_treebuilder *treebuilder,
&token->data.tag.name);
if (type != treebuilder->context.collect.type) {
- assert(0);
+ /** \todo parse error */
}
done = true;
@@ -1102,7 +1002,7 @@ bool handle_generic_rcdata(hubbub_treebuilder *treebuilder,
break;
}
- if (done == true) {
+ if (done) {
int success;
void *text, *appended;
@@ -1189,7 +1089,7 @@ bool handle_script_collect_characters(hubbub_treebuilder *treebuilder,
break;
}
- if (done == true) {
+ if (done) {
int success;
void *text, *appended;
@@ -1281,42 +1181,14 @@ bool process_characters_expect_whitespace(hubbub_treebuilder *treebuilder,
}
/* Non-whitespace characters in token, so reprocess */
if (c != len) {
- if (c > 0 && insert_into_current_node == true) {
+ if (c > 0 && insert_into_current_node) {
hubbub_string temp;
- int success;
- void *text, *appended;
+ temp.type = HUBBUB_STRING_OFF;
temp.data.off = token->data.character.data.off;
temp.len = len - c;
- /** \todo Append to pre-existing text child, iff
- * one exists and it's the last in the child list */
-
- success = treebuilder->tree_handler->create_text(
- treebuilder->tree_handler->ctx,
- &temp, &text);
- if (success != 0) {
- /** \todo errors */
- }
-
- success = treebuilder->tree_handler->append_child(
- treebuilder->tree_handler->ctx,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node,
- text, &appended);
- if (success != 0) {
- /** \todo errors */
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- text);
- }
-
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- appended);
- treebuilder->tree_handler->unref_node(
- treebuilder->tree_handler->ctx,
- text);
+ append_text(treebuilder, &temp);
}
/* Update token data to strip leading whitespace */
@@ -1391,6 +1263,13 @@ void parse_generic_rcdata(hubbub_treebuilder *treebuilder,
/** \todo errors */
}
+ /* It's a bit nasty having this code deal with textarea->form
+ * association, but it avoids having to duplicate the entire rest
+ * of this function for textarea processing */
+ if (type == TEXTAREA && treebuilder->context.form_element != NULL) {
+ /** \todo associate textarea with form */
+ }
+
success = treebuilder->tree_handler->append_child(
treebuilder->tree_handler->ctx,
treebuilder->context.element_stack[
@@ -1486,9 +1365,9 @@ void process_script_in_head(hubbub_treebuilder *treebuilder,
* \param treebuilder Treebuilder to look in
* \param type Element type to find
* \param in_table Whether we're looking in table scope
- * \return True iff element is in scope, false otherwise
+ * \return Element stack index, or 0 if not in scope
*/
-bool element_in_scope(hubbub_treebuilder *treebuilder,
+uint32_t element_in_scope(hubbub_treebuilder *treebuilder,
element_type type, bool in_table)
{
uint32_t node;
@@ -1496,12 +1375,12 @@ bool element_in_scope(hubbub_treebuilder *treebuilder,
if (treebuilder->context.element_stack == NULL)
return false;
- for (node = treebuilder->context.current_node; node > 0; node --) {
+ for (node = treebuilder->context.current_node; node > 0; node--) {
element_type node_type =
treebuilder->context.element_stack[node].type;
if (node_type == type)
- return true;
+ return node;
if (node_type == TABLE)
break;
@@ -1515,7 +1394,7 @@ bool element_in_scope(hubbub_treebuilder *treebuilder,
break;
}
- return false;
+ return 0;
}
/**
@@ -1546,7 +1425,7 @@ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder)
}
}
- while (1) {
+ while (entry != NULL) {
int success;
void *clone, *appended;
element_type prev_type;
@@ -1577,9 +1456,8 @@ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder)
return;
}
- if (element_stack_push(treebuilder,
- entry->details.type,
- appended) == false) {
+ if (!element_stack_push(treebuilder,
+ entry->details.type, appended)) {
/** \todo handle memory exhaustion */
treebuilder->tree_handler->unref_node(
treebuilder->tree_handler->ctx,
@@ -1589,11 +1467,11 @@ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder)
clone);
}
- if (formatting_list_replace(treebuilder, entry,
+ if (!formatting_list_replace(treebuilder, entry,
entry->details.type, clone,
treebuilder->context.current_node,
&prev_type, &prev_node,
- &prev_stack_index) == false) {
+ &prev_stack_index)) {
/** \todo handle errors */
treebuilder->tree_handler->unref_node(
treebuilder->tree_handler->ctx,
@@ -1604,8 +1482,7 @@ void reconstruct_active_formatting_list(hubbub_treebuilder *treebuilder)
treebuilder->tree_handler->ctx,
prev_node);
- if (entry->next != NULL)
- entry = entry->next;
+ entry = entry->next;
}
}
@@ -1627,8 +1504,8 @@ void clear_active_formatting_list_to_marker(hubbub_treebuilder *treebuilder)
if (is_scoping_element(entry->details.type))
done = true;
- if (formatting_list_remove(treebuilder, entry,
- &type, &node, &stack_index) == false) {
+ if (!formatting_list_remove(treebuilder, entry,
+ &type, &node, &stack_index)) {
/** \todo handle errors */
}
@@ -1636,7 +1513,7 @@ void clear_active_formatting_list_to_marker(hubbub_treebuilder *treebuilder)
treebuilder->tree_handler->ctx,
node);
- if (done == true)
+ if (done)
break;
}
}
@@ -1670,47 +1547,9 @@ void insert_element(hubbub_treebuilder *treebuilder, const hubbub_tag *tag)
treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
appended);
- if (element_stack_push(treebuilder,
+ if (!element_stack_push(treebuilder,
element_type_from_name(treebuilder, &tag->name),
- node) == false) {
- /** \todo errors */
- }
-}
-
-/**
- * Create element and insert it into the DOM, pushing it on the stack
- *
- * \param treebuilder The treebuilder instance
- * \param name Name of element to insert
- * \param len Length, in bytes, of ::name
- */
-void insert_element_verbatim(hubbub_treebuilder *treebuilder,
- const uint8_t *name, size_t len)
-{
- int success;
- void *node, *appended;
-
- success = treebuilder->tree_handler->create_element_verbatim(
- treebuilder->tree_handler->ctx, name, len, &node);
- if (success != 0) {
- /** \todo errors */
- }
-
- success = treebuilder->tree_handler->append_child(
- treebuilder->tree_handler->ctx,
- treebuilder->context.element_stack[
- treebuilder->context.current_node].node,
- node, &appended);
- if (success != 0) {
- /** \todo errors */
- }
-
- treebuilder->tree_handler->unref_node(treebuilder->tree_handler->ctx,
- appended);
-
- if (element_stack_push(treebuilder,
- element_type_from_verbatim_name(name, len),
- node) == false) {
+ node)) {
/** \todo errors */
}
}
@@ -1752,7 +1591,8 @@ void insert_element_no_push(hubbub_treebuilder *treebuilder,
* Close implied end tags
*
* \param treebuilder The treebuilder instance
- * \param except Tag type to exclude from processing [DD,DT,LI,P]
+ * \param except Tag type to exclude from processing [DD,DT,LI,P],
+ * or UNKNOWN to exclude nothing
*/
void close_implied_end_tags(hubbub_treebuilder *treebuilder,
element_type except)
@@ -1766,10 +1606,10 @@ void close_implied_end_tags(hubbub_treebuilder *treebuilder,
element_type otype;
void *node;
- if (type == except)
+ if (except != UNKNOWN && type == except)
break;
- if (element_stack_pop(treebuilder, &otype, &node) == false) {
+ if (!element_stack_pop(treebuilder, &otype, &node)) {
/** \todo errors */
}
@@ -1839,77 +1679,67 @@ void reset_insertion_mode(hubbub_treebuilder *treebuilder)
}
/**
- * Convert an element name into an element type
+ * Append text to the current node, inserting into the last child of the
+ * current node, iff it's a Text node.
*
* \param treebuilder The treebuilder instance
- * \param tag_name The tag name to consider
- * \return The corresponding element type
+ * \param string The string to append
*/
-element_type element_type_from_name(hubbub_treebuilder *treebuilder,
- const hubbub_string *tag_name)
+void append_text(hubbub_treebuilder *treebuilder,
+ const hubbub_string *string)
{
- const uint8_t *name = treebuilder->input_buffer + tag_name->data.off;
+ int success;
+ void *text, *appended;
+
+ /** \todo Append to pre-existing text child, iff
+ * one exists and it's the last in the child list */
+
+ success = treebuilder->tree_handler->create_text(
+ treebuilder->tree_handler->ctx, string, &text);
+ if (success != 0) {
+ /** \todo errors */
+ }
+
+ success = treebuilder->tree_handler->append_child(
+ treebuilder->tree_handler->ctx,
+ treebuilder->context.element_stack[
+ treebuilder->context.current_node].node,
+ text, &appended);
+ if (success != 0) {
+ /** \todo errors */
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx,
+ text);
+ }
- return element_type_from_verbatim_name(name, tag_name->len);
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, appended);
+ treebuilder->tree_handler->unref_node(
+ treebuilder->tree_handler->ctx, text);
}
/**
- * Convert a verbatim element name into an element type
+ * Convert an element name into an element type
*
- * \param name The tag name
- * \param len Length, in bytes, of ::name
+ * \param treebuilder The treebuilder instance
+ * \param tag_name The tag name to consider
* \return The corresponding element type
*/
-element_type element_type_from_verbatim_name(const uint8_t *name, size_t len)
+element_type element_type_from_name(hubbub_treebuilder *treebuilder,
+ const hubbub_string *tag_name)
{
- static const struct {
- const char *name;
- element_type type;
- } name_type_map[] = {
- { "ADDRESS", ADDRESS }, { "AREA", AREA },
- { "BASE", BASE }, { "BASEFONT", BASEFONT },
- { "BGSOUND", BGSOUND }, { "BLOCKQUOTE", BLOCKQUOTE },
- { "BODY", BODY }, { "BR", BR },
- { "CENTER", CENTER }, { "COL", COL },
- { "COLGROUP", COLGROUP }, { "DD", DD },
- { "DIR", DIR }, { "DIV", DIV },
- { "DL", DL }, { "DT", DT },
- { "EMBED", EMBED }, { "FIELDSET", FIELDSET },
- { "FORM", FORM }, { "FRAME", FRAME },
- { "FRAMESET", FRAMESET }, { "H1", H1 },
- { "H2", H2 }, { "H3", H3 },
- { "H4", H4 }, { "H5", H5 },
- { "H6", H6 }, { "HEAD", HEAD },
- { "HR", HR }, { "IFRAME", IFRAME },
- { "IMAGE", IMAGE }, { "IMG", IMG },
- { "INPUT", INPUT }, { "ISINDEX", ISINDEX },
- { "LI", LI }, { "LINK", LINK },
- { "LISTING", LISTING }, { "MENU", MENU },
- { "META", META }, { "NOEMBED", NOEMBED },
- { "NOFRAMES", NOFRAMES }, { "NOSCRIPT", NOSCRIPT },
- { "OL", OL }, { "OPTGROUP", OPTGROUP },
- { "OPTION", OPTION }, { "P", P },
- { "PARAM", PARAM }, { "PLAINTEXT", PLAINTEXT },
- { "PRE", PRE }, { "SCRIPT", SCRIPT },
- { "SELECT", SELECT }, { "SPACER", SPACER },
- { "STYLE", STYLE }, { "TBODY", TBODY },
- { "TEXTAREA", TEXTAREA }, { "TFOOT", TFOOT },
- { "THEAD", THEAD }, { "TITLE", TITLE },
- { "TR", TR }, { "UL", UL },
- { "WBR", WBR },
- { "APPLET", APPLET }, { "BUTTON", BUTTON },
- { "CAPTION", CAPTION }, { "HTML", HTML },
- { "MARQUEE", MARQUEE }, { "OBJECT", OBJECT },
- { "TABLE", TABLE }, { "TD", TD },
- { "TH", TH },
- { "A", A }, { "B", B },
- { "BIG", BIG }, { "EM", EM },
- { "FONT", FONT }, { "I", I },
- { "NOBR", NOBR }, { "S", S },
- { "SMALL", SMALL }, { "STRIKE", STRIKE },
- { "STRONG", STRONG }, { "TT", TT },
- { "U", U },
- };
+ const uint8_t *name = NULL;
+ size_t len = tag_name->len;
+
+ switch (tag_name->type) {
+ case HUBBUB_STRING_OFF:
+ name = treebuilder->input_buffer + tag_name->data.off;
+ break;
+ case HUBBUB_STRING_PTR:
+ name = tag_name->data.ptr;
+ break;
+ }
+
/** \todo UTF-16 support */
/** \todo optimise this */
@@ -1925,8 +1755,7 @@ element_type element_type_from_verbatim_name(const uint8_t *name, size_t len)
return name_type_map[i].type;
}
- /** \todo produce type values for unknown tags */
- return U + 1;
+ return UNKNOWN;
}
/**
@@ -1935,7 +1764,7 @@ element_type element_type_from_verbatim_name(const uint8_t *name, size_t len)
* \param type Node type to consider
* \return True iff node is a special element
*/
-inline bool is_special_element(element_type type)
+bool is_special_element(element_type type)
{
return (type <= WBR);
}
@@ -1946,7 +1775,7 @@ inline bool is_special_element(element_type type)
* \param type Node type to consider
* \return True iff node is a scoping element
*/
-inline bool is_scoping_element(element_type type)
+bool is_scoping_element(element_type type)
{
return (type >= APPLET && type <= TH);
}
@@ -1957,7 +1786,7 @@ inline bool is_scoping_element(element_type type)
* \param type Node type to consider
* \return True iff node is a formatting element
*/
-inline bool is_formatting_element(element_type type)
+bool is_formatting_element(element_type type)
{
return (type >= A && type <= U);
}
@@ -1968,7 +1797,7 @@ inline bool is_formatting_element(element_type type)
* \param type Node type to consider
* \return True iff node is a phrasing element
*/
-inline bool is_phrasing_element(element_type type)
+bool is_phrasing_element(element_type type)
{
return (type > U);
}
@@ -2066,7 +1895,7 @@ bool element_stack_pop(hubbub_treebuilder *treebuilder,
}
/**
- * Insert an element into the list of active formatting elements
+ * Append an element to the end of the list of active formatting elements
*
* \param treebuilder Treebuilder instance containing list
* \param type Type of node being inserted
@@ -2074,7 +1903,7 @@ bool element_stack_pop(hubbub_treebuilder *treebuilder,
* \param stack_index Index into stack of open elements
* \return True on success, false on memory exhaustion
*/
-bool formatting_list_insert(hubbub_treebuilder *treebuilder,
+bool formatting_list_append(hubbub_treebuilder *treebuilder,
element_type type, void *node, uint32_t stack_index)
{
formatting_list_entry *entry;
@@ -2102,6 +1931,57 @@ bool formatting_list_insert(hubbub_treebuilder *treebuilder,
}
/**
+ * Insert an element into the list of active formatting elements
+ *
+ * \param treebuilder Treebuilder instance containing list
+ * \param prev Previous entry
+ * \param next Next entry
+ * \param type Type of node being inserted
+ * \param node Node being inserted
+ * \param stack_index Index into stack of open elements
+ * \return True on success, false on memory exhaustion
+ */
+bool formatting_list_insert(hubbub_treebuilder *treebuilder,
+ formatting_list_entry *prev, formatting_list_entry *next,
+ element_type type, void *node, uint32_t stack_index)
+{
+ formatting_list_entry *entry;
+
+ if (prev != NULL) {
+ assert(prev->next == next);
+ }
+
+ if (next != NULL) {
+ assert(next->prev == prev);
+ }
+
+ entry = treebuilder->alloc(NULL, sizeof(formatting_list_entry),
+ treebuilder->alloc_pw);
+ if (entry == NULL)
+ return false;
+
+ entry->details.type = type;
+ entry->details.node = node;
+ entry->stack_index = stack_index;
+
+ entry->prev = prev;
+ entry->next = next;
+
+ if (entry->prev != NULL)
+ entry->prev->next = entry;
+ else
+ treebuilder->context.formatting_list = entry;
+
+ if (entry->next != NULL)
+ entry->next->prev = entry;
+ else
+ treebuilder->context.formatting_list_end = entry;
+
+ return true;
+}
+
+
+/**
* Remove an element from the list of active formatting elements
*
* \param treebuilder Treebuilder instance containing list
@@ -2165,3 +2045,62 @@ bool formatting_list_replace(hubbub_treebuilder *treebuilder,
return true;
}
+#ifndef NDEBUG
+static const char *element_type_to_name(element_type type);
+
+/**
+ * Dump an element stack to the given file pointer
+ *
+ * \param treebuilder The treebuilder instance
+ * \param fp The file to dump to
+ */
+void element_stack_dump(hubbub_treebuilder *treebuilder, FILE *fp)
+{
+ element_context *stack = treebuilder->context.element_stack;
+ uint32_t i;
+
+ for (i = 0; i <= treebuilder->context.current_node; i++) {
+ fprintf(fp, "%u: %s %p\n",
+ i,
+ element_type_to_name(stack[i].type),
+ stack[i].node);
+ }
+}
+
+/**
+ * Dump a formatting list to the given file pointer
+ *
+ * \param treebuilder The treebuilder instance
+ * \param fp The file to dump to
+ */
+void formatting_list_dump(hubbub_treebuilder *treebuilder, FILE *fp)
+{
+ formatting_list_entry *entry;
+
+ for (entry = treebuilder->context.formatting_list; entry != NULL;
+ entry = entry->next) {
+ fprintf(fp, "%s %p %u\n",
+ element_type_to_name(entry->details.type),
+ entry->details.node, entry->stack_index);
+ }
+}
+
+/**
+ * Convert an element type to a name
+ *
+ * \param type The element type
+ * \return Pointer to name
+ */
+const char *element_type_to_name(element_type type)
+{
+ for (uint32_t i = 0;
+ i < sizeof(name_type_map) / sizeof(name_type_map[0]);
+ i++) {
+ if (name_type_map[i].type == type)
+ return name_type_map[i].name;
+ }
+
+ return "UNKNOWN";
+}
+#endif
+
diff --git a/test/data/html/INDEX b/test/data/html/INDEX
index cd97b8e..25483db 100644
--- a/test/data/html/INDEX
+++ b/test/data/html/INDEX
@@ -6,3 +6,5 @@ section-tree-construction.html HTML5 tree construction algorithm
#web-apps.html HTML5 specification
initial-close-tag.html Page with initial </html>
#phonecalls.html HTML document that breaks libxml's HTML parser
+misnested.html Misnested tags
+isindex.html Test of <isindex> parsing
diff --git a/test/data/html/isindex.html b/test/data/html/isindex.html
new file mode 100644
index 0000000..f454069
--- /dev/null
+++ b/test/data/html/isindex.html
@@ -0,0 +1,8 @@
+<!DOCTYPE html>
+<html>
+<body>
+<isindex action="foo" name="bar">
+<isindex action="foo" name="bar" prompt="baz">
+</body>
+</html>
+
diff --git a/test/data/html/misnested.html b/test/data/html/misnested.html
new file mode 100644
index 0000000..1116840
--- /dev/null
+++ b/test/data/html/misnested.html
@@ -0,0 +1,11 @@
+<!DOCTYPE html>
+<html>
+<head>
+<title>Misnested tags</title>
+</head>
+<body>
+<p>Hello <b>this <i>is a test </b>of badly </i>nested tags</p>
+<p>Hello <b>this <i>is a <button>test </b>of badly </i>nested tags</p>
+<p>Hello <a>this is <p><a>test </p></p>
+</body>
+</html>
diff --git a/test/tree.c b/test/tree.c
index f4e6c3c..4370139 100644
--- a/test/tree.c
+++ b/test/tree.c
@@ -37,8 +37,6 @@ static int create_doctype(void *ctx, const hubbub_string *qname,
const hubbub_string *public_id, const hubbub_string *system_id,
void **result);
static int create_element(void *ctx, const hubbub_tag *tag, void **result);
-static int create_element_verbatim(void *ctx, const uint8_t *name, size_t len,
- void **result);
static int create_text(void *ctx, const hubbub_string *data, void **result);
static int ref_node(void *ctx, void *node);
static int unref_node(void *ctx, void *node);
@@ -47,13 +45,18 @@ static int insert_before(void *ctx, void *parent, void *child, void *ref_child,
void **result);
static int remove_child(void *ctx, void *parent, void *child, void **result);
static int clone_node(void *ctx, void *node, bool deep, void **result);
+static int reparent_children(void *ctx, void *node, void *new_parent);
+static int get_parent(void *ctx, void *node, bool element_only, void **result);
+static int has_children(void *ctx, void *node, bool *result);
+static int form_associate(void *ctx, void *form, void *node);
+static int add_attributes(void *ctx, void *node,
+ const hubbub_attribute *attributes, uint32_t n_attributes);
static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode);
static hubbub_tree_handler tree_handler = {
create_comment,
create_doctype,
create_element,
- create_element_verbatim,
create_text,
ref_node,
unref_node,
@@ -61,6 +64,11 @@ static hubbub_tree_handler tree_handler = {
insert_before,
remove_child,
clone_node,
+ reparent_children,
+ get_parent,
+ has_children,
+ form_associate,
+ add_attributes,
set_quirks_mode,
NULL
};
@@ -251,22 +259,6 @@ int create_element(void *ctx, const hubbub_tag *tag, void **result)
return 0;
}
-int create_element_verbatim(void *ctx, const uint8_t *name, size_t len,
- void **result)
-{
- printf("Creating (%u) [element verbatim '%.*s']\n",
- ++node_counter, len, name);
-
- GROW_REF
- node_ref[node_counter] = 0;
-
- ref_node(ctx, (void *) node_counter);
-
- *result = (void *) node_counter;
-
- return 0;
-}
-
int create_text(void *ctx, const hubbub_string *data, void **result)
{
printf("Creating (%u) [text '%.*s']\n", ++node_counter,
@@ -349,6 +341,60 @@ int clone_node(void *ctx, void *node, bool deep, void **result)
return 0;
}
+int reparent_children(void *ctx, void *node, void *new_parent)
+{
+ UNUSED(ctx);
+
+ printf("Reparenting children of %u to %u\n",
+ (uintptr_t) node, (uintptr_t) new_parent);
+
+ return 0;
+}
+
+int get_parent(void *ctx, void *node, bool element_only, void **result)
+{
+ printf("Retrieving parent of %u (%s)\n", (uintptr_t) node,
+ element_only ? "element only" : "");
+
+ ref_node(ctx, (void *) 1);
+ *result = (void *) 1;
+
+ return 0;
+}
+
+int has_children(void *ctx, void *node, bool *result)
+{
+ UNUSED(ctx);
+
+ printf("Want children for %u\n", (uintptr_t) node);
+
+ *result = false;
+
+ return 0;
+}
+
+int form_associate(void *ctx, void *form, void *node)
+{
+ UNUSED(ctx);
+
+ printf("Associating %u with form %u\n",
+ (uintptr_t) node, (uintptr_t) form);
+
+ return 0;
+}
+
+int add_attributes(void *ctx, void *node,
+ const hubbub_attribute *attributes, uint32_t n_attributes)
+{
+ UNUSED(ctx);
+ UNUSED(attributes);
+ UNUSED(n_attributes);
+
+ printf("Adding attributes to %u\n", (uintptr_t) node);
+
+ return 0;
+}
+
int set_quirks_mode(void *ctx, hubbub_quirks_mode mode)
{
UNUSED(ctx);