From 6261a9cf2faada630dc1924fcf58305594a8028a Mon Sep 17 00:00:00 2001 From: Andrew Sidwell Date: Mon, 23 Jun 2008 20:22:25 +0000 Subject: Put each insertion mode into its own C file, so that treebuilder.c doesn't get extremely long. svn path=/trunk/hubbub/; revision=4429 --- src/treebuilder/Makefile | 5 +- src/treebuilder/after_head.c | 127 ++++++ src/treebuilder/before_head.c | 106 +++++ src/treebuilder/before_html.c | 126 ++++++ src/treebuilder/generic_rcdata.c | 123 ++++++ src/treebuilder/in_head.c | 119 ++++++ src/treebuilder/in_head_noscript.c | 121 ++++++ src/treebuilder/initial.c | 101 +++++ src/treebuilder/internal.h | 27 -- src/treebuilder/modes.h | 66 +++ src/treebuilder/script_collect.c | 123 ++++++ src/treebuilder/treebuilder.c | 838 +------------------------------------ 12 files changed, 1018 insertions(+), 864 deletions(-) create mode 100644 src/treebuilder/after_head.c create mode 100644 src/treebuilder/before_head.c create mode 100644 src/treebuilder/before_html.c create mode 100644 src/treebuilder/generic_rcdata.c create mode 100644 src/treebuilder/in_head.c create mode 100644 src/treebuilder/in_head_noscript.c create mode 100644 src/treebuilder/initial.c create mode 100644 src/treebuilder/modes.h create mode 100644 src/treebuilder/script_collect.c diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile index 3c6355c..f73f774 100644 --- a/src/treebuilder/Makefile +++ b/src/treebuilder/Makefile @@ -32,7 +32,10 @@ dirstack_$(sp) := $(d) d := $(DIR) # Sources -SRCS_$(d) := in_body.c treebuilder.c +SRCS_$(d) := treebuilder.c \ + initial.c before_html.c before_head.c in_head.c \ + in_head_noscript.c after_head.c in_body.c \ + generic_rcdata.c script_collect.c # Append to sources for component SOURCES += $(addprefix $(d), $(SRCS_$(d))) diff --git a/src/treebuilder/after_head.c b/src/treebuilder/after_head.c new file mode 100644 index 0000000..b460115 --- /dev/null +++ b/src/treebuilder/after_head.c @@ -0,0 +1,127 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#include +#include + +#include "treebuilder/modes.h" +#include "treebuilder/internal.h" +#include "treebuilder/treebuilder.h" +#include "utils/utils.h" + + +/** + * Handle tokens in "after head" insertion mode + * + * \param treebuilder The treebuilder instance + * \param token The token to process + * \return True to reprocess the token, false otherwise + */ +bool handle_after_head(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + bool reprocess = false; + bool handled = false; + + switch (token->type) { + case HUBBUB_TOKEN_CHARACTER: + append_text(treebuilder, &token->data.character); + break; + case HUBBUB_TOKEN_COMMENT: + process_comment_append(treebuilder, token, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + break; + case HUBBUB_TOKEN_DOCTYPE: + /** \todo parse error */ + break; + case HUBBUB_TOKEN_START_TAG: + { + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if (type == HTML) { + /* Process as if "in body" */ + process_tag_in_body(treebuilder, token); + } else if (type == BODY) { + handled = true; + } else if (type == FRAMESET) { + insert_element(treebuilder, &token->data.tag); + treebuilder->context.mode = IN_FRAMESET; + } else if (type == BASE || type == LINK || type == META || + type == NOFRAMES || type == SCRIPT || + type == STYLE || type == TITLE) { + element_type otype; + void *node; + + /** \todo parse error */ + + if (!element_stack_push(treebuilder, + HEAD, + treebuilder->context.head_element)) { + /** \todo errors */ + } + + + /* This should be identical to handling "in head" */ + if (type == BASE || type == LINK || type == META) { + /** \todo ack sc flag */ + + process_base_link_meta_in_head(treebuilder, + token, type); + } else if (type == SCRIPT) { + process_script_in_head(treebuilder, token); + } else if (type == STYLE || type == NOFRAMES) { + parse_generic_rcdata(treebuilder, token, false); + } else if (type == TITLE) { + parse_generic_rcdata(treebuilder, token, true); + } + + if (!element_stack_pop(treebuilder, &otype, &node)) { + /** \todo errors */ + } + + /* No need to unref node as we never increased + * its reference count when pushing it on the stack */ + } else if (type == HEAD) { + /** \todo parse error */ + } else { + reprocess = true; + } + } + break; + case HUBBUB_TOKEN_END_TAG: + /** \parse error */ + break; + case HUBBUB_TOKEN_EOF: + reprocess = true; + break; + } + + if (handled || reprocess) { + hubbub_tag tag; + + if (reprocess) { + /* Manufacture body */ + tag.name.type = HUBBUB_STRING_PTR; + tag.name.data.ptr = (const uint8_t *) "body"; + tag.name.len = SLEN("body"); + + tag.n_attributes = 0; + tag.attributes = NULL; + } else { + tag = token->data.tag; + } + + insert_element(treebuilder, &tag); + + treebuilder->context.mode = IN_BODY; + } + + return reprocess; +} + diff --git a/src/treebuilder/before_head.c b/src/treebuilder/before_head.c new file mode 100644 index 0000000..1534ff8 --- /dev/null +++ b/src/treebuilder/before_head.c @@ -0,0 +1,106 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#include +#include + +#include "treebuilder/modes.h" +#include "treebuilder/internal.h" +#include "treebuilder/treebuilder.h" +#include "utils/utils.h" + + +/** + * Handle token in "before head" insertion mode + * + * \param treebuilder The treebuilder instance + * \param token The token to handle + * \return True to reprocess token, false otherwise + */ +bool handle_before_head(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + bool reprocess = false; + bool handled = false; + + switch (token->type) { + case HUBBUB_TOKEN_CHARACTER: + reprocess = process_characters_expect_whitespace(treebuilder, + token, false); + break; + case HUBBUB_TOKEN_COMMENT: + process_comment_append(treebuilder, token, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + break; + case HUBBUB_TOKEN_DOCTYPE: + /** \todo parse error */ + break; + case HUBBUB_TOKEN_START_TAG: + { + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if (type == HTML) { + /* Process as if "in body" */ + process_tag_in_body(treebuilder, token); + } else if (type == HEAD) { + handled = true; + } else { + reprocess = true; + } + } + break; + case HUBBUB_TOKEN_END_TAG: + { + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if (type == HEAD || type == BR) { + reprocess = true; + } else { + /** \todo parse error */ + } + } + break; + case HUBBUB_TOKEN_EOF: + reprocess = true; + break; + } + + if (handled || reprocess) { + hubbub_tag tag; + + if (reprocess) { + /* Manufacture head tag */ + tag.name.type = HUBBUB_STRING_PTR; + tag.name.data.ptr = (const uint8_t *) "head"; + tag.name.len = SLEN("head"); + + tag.n_attributes = 0; + tag.attributes = NULL; + } else { + tag = token->data.tag; + } + + insert_element(treebuilder, &tag); + + treebuilder->tree_handler->ref_node( + treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + + treebuilder->context.head_element = + treebuilder->context.element_stack[ + treebuilder->context.current_node].node; + + treebuilder->context.mode = IN_HEAD; + } + + return reprocess; +} + diff --git a/src/treebuilder/before_html.c b/src/treebuilder/before_html.c new file mode 100644 index 0000000..f8b3231 --- /dev/null +++ b/src/treebuilder/before_html.c @@ -0,0 +1,126 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#include +#include + +#include "treebuilder/modes.h" +#include "treebuilder/internal.h" +#include "treebuilder/treebuilder.h" +#include "utils/utils.h" + + +/** + * Handle token in "before html" insertion mode + * + * \param treebuilder The treebuilder instance + * \param token The token to handle + * \return True to reprocess token, false otherwise + */ +bool handle_before_html(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + bool reprocess = false; + bool handled = false; + + switch (token->type) { + case HUBBUB_TOKEN_DOCTYPE: + /** \todo parse error */ + break; + case HUBBUB_TOKEN_COMMENT: + process_comment_append(treebuilder, token, + treebuilder->context.document); + break; + case HUBBUB_TOKEN_CHARACTER: + reprocess = process_characters_expect_whitespace(treebuilder, + token, false); + break; + case HUBBUB_TOKEN_START_TAG: + { + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if (type == HTML) { + handled = true; + } else { + reprocess = true; + } + } + break; + case HUBBUB_TOKEN_END_TAG: + case HUBBUB_TOKEN_EOF: + reprocess = true; + break; + } + + + if (handled || reprocess) { + int success; + void *html, *appended; + + /* We can't use insert_element() here, as it assumes + * that we're inserting into current_node. There is + * no current_node to insert into at this point so + * we get to do it manually. */ + + if (reprocess) { + /* Need to manufacture html element */ + hubbub_tag tag; + + /** \todo UTF-16 */ + tag.name.type = HUBBUB_STRING_PTR; + tag.name.data.ptr = (const uint8_t *) "html"; + tag.name.len = SLEN("html"); + + tag.n_attributes = 0; + tag.attributes = NULL; + + success = treebuilder->tree_handler->create_element( + treebuilder->tree_handler->ctx, + &tag, &html); + } else { + success = treebuilder->tree_handler->create_element( + treebuilder->tree_handler->ctx, + &token->data.tag, &html); + } + + if (success != 0) { + /** \todo errors */ + } + + success = treebuilder->tree_handler->append_child( + treebuilder->tree_handler->ctx, + treebuilder->context.document, + html, &appended); + if (success != 0) { + /** \todo errors */ + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + html); + } + + /* We can't use element_stack_push() here, as it + * assumes that current_node is pointing at the index + * before the one to insert at. For the first entry in + * the stack, this does not hold so we must insert + * manually. */ + treebuilder->context.element_stack[0].type = HTML; + treebuilder->context.element_stack[0].node = html; + treebuilder->context.current_node = 0; + + /** \todo cache selection algorithm */ + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + appended); + + treebuilder->context.mode = BEFORE_HEAD; + } + + return reprocess; +} + diff --git a/src/treebuilder/generic_rcdata.c b/src/treebuilder/generic_rcdata.c new file mode 100644 index 0000000..07173cf --- /dev/null +++ b/src/treebuilder/generic_rcdata.c @@ -0,0 +1,123 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#include +#include + +#include "treebuilder/modes.h" +#include "treebuilder/internal.h" +#include "treebuilder/treebuilder.h" +#include "utils/utils.h" + + +/** + * Handle tokens in "generic rcdata" insertion mode + * + * \param treebuilder The treebuilder instance + * \param token The token to process + * \return True to reprocess the token, false otherwise + */ +bool handle_generic_rcdata(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + bool reprocess = false; + bool done = false; + + if (treebuilder->context.strip_leading_lr && + token->type != HUBBUB_TOKEN_CHARACTER) { + /* Reset the LR stripping flag */ + treebuilder->context.strip_leading_lr = false; + } + + switch (token->type) { + case HUBBUB_TOKEN_CHARACTER: + if (treebuilder->context.collect.string.len == 0) { + treebuilder->context.collect.string.data.off = + token->data.character.data.off; + } + treebuilder->context.collect.string.len += + token->data.character.len; + + if (treebuilder->context.strip_leading_lr) { + const uint8_t *str = treebuilder->input_buffer + + treebuilder->context.collect.string.data.off; + + /** \todo UTF-16 */ + if (*str == '\n') { + treebuilder->context.collect.string.data.off++; + treebuilder->context.collect.string.len--; + } + + treebuilder->context.strip_leading_lr = false; + } + break; + case HUBBUB_TOKEN_END_TAG: + { + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if (type != treebuilder->context.collect.type) { + /** \todo parse error */ + } + + done = true; + } + break; + case HUBBUB_TOKEN_EOF: + /** \todo parse error */ + done = reprocess = true; + break; + case HUBBUB_TOKEN_COMMENT: + case HUBBUB_TOKEN_DOCTYPE: + case HUBBUB_TOKEN_START_TAG: + /* Should never happen */ + assert(0); + break; + } + + if (done) { + int success; + void *text, *appended; + + success = treebuilder->tree_handler->create_text( + treebuilder->tree_handler->ctx, + &treebuilder->context.collect.string, + &text); + if (success != 0) { + /** \todo errors */ + } + + success = treebuilder->tree_handler->append_child( + treebuilder->tree_handler->ctx, + treebuilder->context.collect.node, + text, &appended); + if (success != 0) { + /** \todo errors */ + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + text); + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, appended); + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, text); + + /* Clean up context */ + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + treebuilder->context.collect.node); + treebuilder->context.collect.node = NULL; + + /* Return to previous insertion mode */ + treebuilder->context.mode = + treebuilder->context.collect.mode; + } + + return reprocess; +} + diff --git a/src/treebuilder/in_head.c b/src/treebuilder/in_head.c new file mode 100644 index 0000000..96ff87d --- /dev/null +++ b/src/treebuilder/in_head.c @@ -0,0 +1,119 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#include +#include + +#include "treebuilder/modes.h" +#include "treebuilder/internal.h" +#include "treebuilder/treebuilder.h" +#include "utils/utils.h" + + +/** + * Handle token in "in head" insertion mode + * + * \param treebuilder The treebuilder instance + * \param token The token to handle + * \return True to reprocess token, false otherwise + */ +bool handle_in_head(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + bool reprocess = false; + bool handled = false; + + switch (token->type) { + case HUBBUB_TOKEN_CHARACTER: + reprocess = process_characters_expect_whitespace(treebuilder, + token, true); + break; + case HUBBUB_TOKEN_COMMENT: + process_comment_append(treebuilder, token, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + break; + case HUBBUB_TOKEN_DOCTYPE: + /** \todo parse error */ + break; + case HUBBUB_TOKEN_START_TAG: + { + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if (type == HTML) { + /* Process as if "in body" */ + process_tag_in_body(treebuilder, token); + } else if (type == BASE || type == COMMAND || + type == EVENT_SOURCE || type == LINK) { + process_base_link_meta_in_head(treebuilder, + token, type); + + /** \todo ack sc flag */ + } else if (type == META) { + process_base_link_meta_in_head(treebuilder, + token, type); + + /** \todo ack sc flag */ + + /** \todo detect charset */ + } else if (type == TITLE) { + parse_generic_rcdata(treebuilder, token, true); + } else if (type == NOFRAMES || type == STYLE) { + parse_generic_rcdata(treebuilder, token, false); + } else if (type == NOSCRIPT) { + /** \todo determine if scripting is enabled */ + if (false /*scripting_is_enabled*/) { + parse_generic_rcdata(treebuilder, token, false); + } else { + insert_element(treebuilder, &token->data.tag); + treebuilder->context.mode = IN_HEAD_NOSCRIPT; + } + } else if (type == SCRIPT) { + process_script_in_head(treebuilder, token); + } else if (type == HEAD) { + /** \todo parse error */ + } else { + reprocess = true; + } + } + break; + case HUBBUB_TOKEN_END_TAG: + { + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if (type == HEAD) { + handled = true; + } else if (type == BR) { + reprocess = true; + } /** \todo parse error */ + } + break; + case HUBBUB_TOKEN_EOF: + reprocess = true; + break; + } + + if (handled || reprocess) { + element_type otype; + void *node; + + if (!element_stack_pop(treebuilder, &otype, &node)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + node); + + treebuilder->context.mode = AFTER_HEAD; + } + + return reprocess; +} + diff --git a/src/treebuilder/in_head_noscript.c b/src/treebuilder/in_head_noscript.c new file mode 100644 index 0000000..ca01681 --- /dev/null +++ b/src/treebuilder/in_head_noscript.c @@ -0,0 +1,121 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#include +#include + +#include "treebuilder/modes.h" +#include "treebuilder/internal.h" +#include "treebuilder/treebuilder.h" +#include "utils/utils.h" + + +/** + * Handle tokens in "in head noscript" insertion mode + * + * \param treebuilder The treebuilder instance + * \param token The token to process + * \return True to reprocess the token, false otherwise + */ +bool handle_in_head_noscript(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + bool reprocess = false; + bool handled = false; + + switch (token->type) { + case HUBBUB_TOKEN_CHARACTER: + /* This should be equivalent to "in head" processing */ + reprocess = process_characters_expect_whitespace(treebuilder, + token, true); + break; + case HUBBUB_TOKEN_COMMENT: + /* This should be equivalent to "in head" processing */ + process_comment_append(treebuilder, token, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + break; + case HUBBUB_TOKEN_DOCTYPE: + /** \todo parse error */ + break; + case HUBBUB_TOKEN_START_TAG: + { + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if (type == HTML) { + /* Process as "in body" */ + process_tag_in_body(treebuilder, token); + } else if (type == NOSCRIPT) { + handled = true; + } else if (type == LINK) { + /* This should be equivalent to "in head" processing */ + process_base_link_meta_in_head(treebuilder, + token, type); + + /** \todo ack sc flag */ + } else if (type == META) { + /* This should be equivalent to "in head" processing */ + process_base_link_meta_in_head(treebuilder, + token, type); + + /** \todo ack sc flag */ + + /** \todo detect charset */ + } else if (type == NOFRAMES) { + /* This should be equivalent to "in head" processing */ + parse_generic_rcdata(treebuilder, token, true); + } else if (type == STYLE) { + /* This should be equivalent to "in head" processing */ + parse_generic_rcdata(treebuilder, token, false); + } else if (type == HEAD || type == NOSCRIPT) { + /** \todo parse error */ + } else { + /** \todo parse error */ + reprocess = true; + } + } + break; + case HUBBUB_TOKEN_END_TAG: + { + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if (type == NOSCRIPT) { + handled = true; + } else if (type == BR) { + /** \todo parse error */ + reprocess = true; + } else { + /** \todo parse error */ + } + } + break; + case HUBBUB_TOKEN_EOF: + /** \todo parse error */ + reprocess = true; + break; + } + + if (handled || reprocess) { + element_type otype; + void *node; + + if (!element_stack_pop(treebuilder, &otype, &node)) { + /** \todo errors */ + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + node); + + treebuilder->context.mode = IN_HEAD; + } + + return reprocess; +} + diff --git a/src/treebuilder/initial.c b/src/treebuilder/initial.c new file mode 100644 index 0000000..30a380b --- /dev/null +++ b/src/treebuilder/initial.c @@ -0,0 +1,101 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#include +#include + +#include "treebuilder/modes.h" +#include "treebuilder/internal.h" +#include "treebuilder/treebuilder.h" +#include "utils/utils.h" + + +/** + * Handle token in initial insertion mode + * + * \param treebuilder The treebuilder instance + * \param token The token to handle + * \return True to reprocess token, false otherwise + */ +bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token) +{ + bool reprocess = false; + + switch (token->type) { + case HUBBUB_TOKEN_CHARACTER: + if (process_characters_expect_whitespace(treebuilder, token, + false)) { + /** \todo parse error */ + + treebuilder->tree_handler->set_quirks_mode( + treebuilder->tree_handler->ctx, + HUBBUB_QUIRKS_MODE_FULL); + treebuilder->context.mode = BEFORE_HTML; + reprocess = true; + } + break; + case HUBBUB_TOKEN_COMMENT: + process_comment_append(treebuilder, token, + treebuilder->context.document); + break; + case HUBBUB_TOKEN_DOCTYPE: + { + int success; + void *doctype, *appended; + + /** \todo parse error */ + + /** \todo need public and system ids from tokeniser */ + success = treebuilder->tree_handler->create_doctype( + treebuilder->tree_handler->ctx, + &token->data.doctype.name, + &token->data.doctype.public_id, + &token->data.doctype.system_id, &doctype); + if (success != 0) { + /** \todo errors */ + } + + /* Append to Document node */ + success = treebuilder->tree_handler->append_child( + treebuilder->tree_handler->ctx, + treebuilder->context.document, + doctype, &appended); + if (success != 0) { + /** \todo errors */ + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + doctype); + } + + /* \todo look up the doctype in a catalog */ + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, appended); + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, doctype); + + treebuilder->context.mode = BEFORE_HTML; + } + break; + case HUBBUB_TOKEN_START_TAG: + case HUBBUB_TOKEN_END_TAG: + case HUBBUB_TOKEN_EOF: + /** \todo parse error */ + treebuilder->tree_handler->set_quirks_mode( + treebuilder->tree_handler->ctx, + HUBBUB_QUIRKS_MODE_FULL); + reprocess = true; + break; + } + + if (reprocess) { + treebuilder->context.mode = BEFORE_HTML; + } + + return reprocess; +} + diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h index 392f606..a4eed84 100644 --- a/src/treebuilder/internal.h +++ b/src/treebuilder/internal.h @@ -10,33 +10,6 @@ #include "treebuilder/treebuilder.h" -typedef enum -{ - INITIAL, - BEFORE_HTML, - BEFORE_HEAD, - IN_HEAD, - IN_HEAD_NOSCRIPT, - AFTER_HEAD, - IN_BODY, - IN_TABLE, - IN_CAPTION, - IN_COLUMN_GROUP, - IN_TABLE_BODY, - IN_ROW, - IN_CELL, - IN_SELECT, - IN_SELECT_IN_TABLE, - IN_FOREIGN_CONTENT, - AFTER_BODY, - IN_FRAMESET, - AFTER_FRAMESET, - AFTER_AFTER_BODY, - AFTER_AFTER_FRAMESET, - GENERIC_RCDATA, - SCRIPT_COLLECT_CHARACTERS, -} insertion_mode; - typedef enum { /* Special */ diff --git a/src/treebuilder/modes.h b/src/treebuilder/modes.h new file mode 100644 index 0000000..ab9a229 --- /dev/null +++ b/src/treebuilder/modes.h @@ -0,0 +1,66 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#ifndef hubbub_treebuilder_modes_h_ +#define hubbub_treebuilder_modes_h_ + +#include "treebuilder/treebuilder.h" + +/** The various treebuilder insertion modes */ +typedef enum +{ + INITIAL, + BEFORE_HTML, + BEFORE_HEAD, + IN_HEAD, + IN_HEAD_NOSCRIPT, + AFTER_HEAD, + IN_BODY, + IN_TABLE, + IN_CAPTION, + IN_COLUMN_GROUP, + IN_TABLE_BODY, + IN_ROW, + IN_CELL, + IN_SELECT, + IN_SELECT_IN_TABLE, + IN_FOREIGN_CONTENT, + AFTER_BODY, + IN_FRAMESET, + AFTER_FRAMESET, + AFTER_AFTER_BODY, + + AFTER_AFTER_FRAMESET, + GENERIC_RCDATA, + SCRIPT_COLLECT_CHARACTERS, +} insertion_mode; + + + +bool handle_initial(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +bool handle_before_html(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +bool handle_before_head(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +bool handle_in_head(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +bool handle_in_head_noscript(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +bool handle_after_head(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +bool handle_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +bool handle_generic_rcdata(hubbub_treebuilder *treebuilder, + const hubbub_token *token); +bool handle_script_collect_characters(hubbub_treebuilder *treebuilder, + const hubbub_token *token); + +bool process_tag_in_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); + +#endif diff --git a/src/treebuilder/script_collect.c b/src/treebuilder/script_collect.c new file mode 100644 index 0000000..7f61f95 --- /dev/null +++ b/src/treebuilder/script_collect.c @@ -0,0 +1,123 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 John-Mark Bell + */ + +#include +#include + +#include "treebuilder/modes.h" +#include "treebuilder/internal.h" +#include "treebuilder/treebuilder.h" +#include "utils/utils.h" + + +/** + * Handle tokens in "script collect characters" insertion mode + * + * \param treebuilder The treebuilder instance + * \param token The token to process + * \return True to reprocess the token, false otherwise + */ +bool handle_script_collect_characters(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + bool reprocess = false; + bool done = false; + + switch (token->type) { + case HUBBUB_TOKEN_CHARACTER: + if (treebuilder->context.collect.string.len == 0) { + treebuilder->context.collect.string.data.off = + token->data.character.data.off; + } + treebuilder->context.collect.string.len += + token->data.character.len; + break; + case HUBBUB_TOKEN_END_TAG: + { + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if (type != treebuilder->context.collect.type) { + /** \todo parse error */ + /** \todo Mark script as "already executed" */ + } + + done = true; + } + break; + case HUBBUB_TOKEN_EOF: + case HUBBUB_TOKEN_COMMENT: + case HUBBUB_TOKEN_DOCTYPE: + case HUBBUB_TOKEN_START_TAG: + /** \todo parse error */ + /** \todo Mark script as "already executed" */ + done = reprocess = true; + break; + } + + if (done) { + int success; + void *text, *appended; + + success = treebuilder->tree_handler->create_text( + treebuilder->tree_handler->ctx, + &treebuilder->context.collect.string, + &text); + if (success != 0) { + /** \todo errors */ + } + + /** \todo fragment case -- skip this lot entirely */ + + success = treebuilder->tree_handler->append_child( + treebuilder->tree_handler->ctx, + treebuilder->context.collect.node, + text, &appended); + if (success != 0) { + /** \todo errors */ + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + text); + } + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, appended); + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, text); + + /** \todo insertion point manipulation */ + + /* Append script node to current node */ + success = treebuilder->tree_handler->append_child( + treebuilder->tree_handler->ctx, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node, + treebuilder->context.collect.node, &appended); + if (success != 0) { + /** \todo errors */ + } + + /** \todo restore insertion point */ + + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + appended); + treebuilder->tree_handler->unref_node( + treebuilder->tree_handler->ctx, + treebuilder->context.collect.node); + treebuilder->context.collect.node = NULL; + + /** \todo process any pending script */ + + /* Return to previous insertion mode */ + treebuilder->context.mode = + treebuilder->context.collect.mode; + } + + return reprocess; +} + diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c index 68f82d8..2b256b4 100644 --- a/src/treebuilder/treebuilder.c +++ b/src/treebuilder/treebuilder.c @@ -8,11 +8,12 @@ #include #include -#include "treebuilder/in_body.h" +#include "treebuilder/modes.h" #include "treebuilder/internal.h" #include "treebuilder/treebuilder.h" #include "utils/utils.h" + static const struct { const char *name; element_type type; @@ -68,23 +69,6 @@ static void hubbub_treebuilder_buffer_handler(const uint8_t *data, static void hubbub_treebuilder_token_handler(const hubbub_token *token, void *pw); -static bool handle_initial(hubbub_treebuilder *treebuilder, - const hubbub_token *token); -static bool handle_before_html(hubbub_treebuilder *treebuilder, - const hubbub_token *token); -static bool handle_before_head(hubbub_treebuilder *treebuilder, - const hubbub_token *token); -static bool handle_in_head(hubbub_treebuilder *treebuilder, - const hubbub_token *token); -static bool handle_in_head_noscript(hubbub_treebuilder *treebuilder, - const hubbub_token *token); -static bool handle_after_head(hubbub_treebuilder *treebuilder, - const hubbub_token *token); -static bool handle_generic_rcdata(hubbub_treebuilder *treebuilder, - const hubbub_token *token); -static bool handle_script_collect_characters(hubbub_treebuilder *treebuilder, - const hubbub_token *token); - /** * Create a hubbub treebuilder @@ -371,824 +355,6 @@ void hubbub_treebuilder_token_handler(const hubbub_token *token, } } -/** - * Handle token in initial insertion mode - * - * \param treebuilder The treebuilder instance - * \param token The token to handle - * \return True to reprocess token, false otherwise - */ -bool handle_initial(hubbub_treebuilder *treebuilder, const hubbub_token *token) -{ - bool reprocess = false; - - switch (token->type) { - case HUBBUB_TOKEN_CHARACTER: - if (process_characters_expect_whitespace(treebuilder, token, - false)) { - /** \todo parse error */ - - treebuilder->tree_handler->set_quirks_mode( - treebuilder->tree_handler->ctx, - HUBBUB_QUIRKS_MODE_FULL); - treebuilder->context.mode = BEFORE_HTML; - reprocess = true; - } - break; - case HUBBUB_TOKEN_COMMENT: - process_comment_append(treebuilder, token, - treebuilder->context.document); - break; - case HUBBUB_TOKEN_DOCTYPE: - { - int success; - void *doctype, *appended; - - /** \todo parse error */ - - /** \todo need public and system ids from tokeniser */ - success = treebuilder->tree_handler->create_doctype( - treebuilder->tree_handler->ctx, - &token->data.doctype.name, - &token->data.doctype.public_id, - &token->data.doctype.system_id, &doctype); - if (success != 0) { - /** \todo errors */ - } - - /* Append to Document node */ - success = treebuilder->tree_handler->append_child( - treebuilder->tree_handler->ctx, - treebuilder->context.document, - doctype, &appended); - if (success != 0) { - /** \todo errors */ - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - doctype); - } - - /* \todo look up the doctype in a catalog */ - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, appended); - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, doctype); - - treebuilder->context.mode = BEFORE_HTML; - } - break; - case HUBBUB_TOKEN_START_TAG: - case HUBBUB_TOKEN_END_TAG: - case HUBBUB_TOKEN_EOF: - /** \todo parse error */ - treebuilder->tree_handler->set_quirks_mode( - treebuilder->tree_handler->ctx, - HUBBUB_QUIRKS_MODE_FULL); - reprocess = true; - break; - } - - if (reprocess) { - treebuilder->context.mode = BEFORE_HTML; - } - - return reprocess; -} - -/** - * Handle token in "before html" insertion mode - * - * \param treebuilder The treebuilder instance - * \param token The token to handle - * \return True to reprocess token, false otherwise - */ -bool handle_before_html(hubbub_treebuilder *treebuilder, - const hubbub_token *token) -{ - bool reprocess = false; - bool handled = false; - - switch (token->type) { - case HUBBUB_TOKEN_DOCTYPE: - /** \todo parse error */ - break; - case HUBBUB_TOKEN_COMMENT: - process_comment_append(treebuilder, token, - treebuilder->context.document); - break; - case HUBBUB_TOKEN_CHARACTER: - reprocess = process_characters_expect_whitespace(treebuilder, - token, false); - break; - case HUBBUB_TOKEN_START_TAG: - { - element_type type = element_type_from_name(treebuilder, - &token->data.tag.name); - - if (type == HTML) { - handled = true; - } else { - reprocess = true; - } - } - break; - case HUBBUB_TOKEN_END_TAG: - case HUBBUB_TOKEN_EOF: - reprocess = true; - break; - } - - - if (handled || reprocess) { - int success; - void *html, *appended; - - /* We can't use insert_element() here, as it assumes - * that we're inserting into current_node. There is - * no current_node to insert into at this point so - * we get to do it manually. */ - - if (reprocess) { - /* Need to manufacture html element */ - hubbub_tag tag; - - /** \todo UTF-16 */ - tag.name.type = HUBBUB_STRING_PTR; - tag.name.data.ptr = (const uint8_t *) "html"; - tag.name.len = SLEN("html"); - - tag.n_attributes = 0; - tag.attributes = NULL; - - success = treebuilder->tree_handler->create_element( - treebuilder->tree_handler->ctx, - &tag, &html); - } else { - success = treebuilder->tree_handler->create_element( - treebuilder->tree_handler->ctx, - &token->data.tag, &html); - } - - if (success != 0) { - /** \todo errors */ - } - - success = treebuilder->tree_handler->append_child( - treebuilder->tree_handler->ctx, - treebuilder->context.document, - html, &appended); - if (success != 0) { - /** \todo errors */ - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - html); - } - - /* We can't use element_stack_push() here, as it - * assumes that current_node is pointing at the index - * before the one to insert at. For the first entry in - * the stack, this does not hold so we must insert - * manually. */ - treebuilder->context.element_stack[0].type = HTML; - treebuilder->context.element_stack[0].node = html; - treebuilder->context.current_node = 0; - - /** \todo cache selection algorithm */ - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - appended); - - treebuilder->context.mode = BEFORE_HEAD; - } - - return reprocess; -} - -/** - * Handle token in "before head" insertion mode - * - * \param treebuilder The treebuilder instance - * \param token The token to handle - * \return True to reprocess token, false otherwise - */ -bool handle_before_head(hubbub_treebuilder *treebuilder, - const hubbub_token *token) -{ - bool reprocess = false; - bool handled = false; - - switch (token->type) { - case HUBBUB_TOKEN_CHARACTER: - reprocess = process_characters_expect_whitespace(treebuilder, - token, false); - break; - case HUBBUB_TOKEN_COMMENT: - process_comment_append(treebuilder, token, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node); - break; - case HUBBUB_TOKEN_DOCTYPE: - /** \todo parse error */ - break; - case HUBBUB_TOKEN_START_TAG: - { - element_type type = element_type_from_name(treebuilder, - &token->data.tag.name); - - if (type == HTML) { - /* Process as if "in body" */ - process_tag_in_body(treebuilder, token); - } else if (type == HEAD) { - handled = true; - } else { - reprocess = true; - } - } - break; - case HUBBUB_TOKEN_END_TAG: - { - element_type type = element_type_from_name(treebuilder, - &token->data.tag.name); - - if (type == HEAD || type == BR) { - reprocess = true; - } else { - /** \todo parse error */ - } - } - break; - case HUBBUB_TOKEN_EOF: - reprocess = true; - break; - } - - if (handled || reprocess) { - hubbub_tag tag; - - if (reprocess) { - /* Manufacture head tag */ - tag.name.type = HUBBUB_STRING_PTR; - tag.name.data.ptr = (const uint8_t *) "head"; - tag.name.len = SLEN("head"); - - tag.n_attributes = 0; - tag.attributes = NULL; - } else { - tag = token->data.tag; - } - - insert_element(treebuilder, &tag); - - treebuilder->tree_handler->ref_node( - treebuilder->tree_handler->ctx, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node); - - treebuilder->context.head_element = - treebuilder->context.element_stack[ - treebuilder->context.current_node].node; - - treebuilder->context.mode = IN_HEAD; - } - - return reprocess; -} - -/** - * Handle token in "in head" insertion mode - * - * \param treebuilder The treebuilder instance - * \param token The token to handle - * \return True to reprocess token, false otherwise - */ -bool handle_in_head(hubbub_treebuilder *treebuilder, - const hubbub_token *token) -{ - bool reprocess = false; - bool handled = false; - - switch (token->type) { - case HUBBUB_TOKEN_CHARACTER: - reprocess = process_characters_expect_whitespace(treebuilder, - token, true); - break; - case HUBBUB_TOKEN_COMMENT: - process_comment_append(treebuilder, token, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node); - break; - case HUBBUB_TOKEN_DOCTYPE: - /** \todo parse error */ - break; - case HUBBUB_TOKEN_START_TAG: - { - element_type type = element_type_from_name(treebuilder, - &token->data.tag.name); - - if (type == HTML) { - /* Process as if "in body" */ - process_tag_in_body(treebuilder, token); - } else if (type == BASE || type == COMMAND || - type == EVENT_SOURCE || type == LINK) { - process_base_link_meta_in_head(treebuilder, - token, type); - - /** \todo ack sc flag */ - } else if (type == META) { - process_base_link_meta_in_head(treebuilder, - token, type); - - /** \todo ack sc flag */ - - /** \todo detect charset */ - } else if (type == TITLE) { - parse_generic_rcdata(treebuilder, token, true); - } else if (type == NOFRAMES || type == STYLE) { - parse_generic_rcdata(treebuilder, token, false); - } else if (type == NOSCRIPT) { - /** \todo determine if scripting is enabled */ - if (false /*scripting_is_enabled*/) { - parse_generic_rcdata(treebuilder, token, false); - } else { - insert_element(treebuilder, &token->data.tag); - treebuilder->context.mode = IN_HEAD_NOSCRIPT; - } - } else if (type == SCRIPT) { - process_script_in_head(treebuilder, token); - } else if (type == HEAD) { - /** \todo parse error */ - } else { - reprocess = true; - } - } - break; - case HUBBUB_TOKEN_END_TAG: - { - element_type type = element_type_from_name(treebuilder, - &token->data.tag.name); - - if (type == HEAD) { - handled = true; - } else if (type == BR) { - reprocess = true; - } /** \todo parse error */ - } - break; - case HUBBUB_TOKEN_EOF: - reprocess = true; - break; - } - - if (handled || reprocess) { - element_type otype; - void *node; - - if (!element_stack_pop(treebuilder, &otype, &node)) { - /** \todo errors */ - } - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - node); - - treebuilder->context.mode = AFTER_HEAD; - } - - return reprocess; -} - -/** - * Handle tokens in "in head noscript" insertion mode - * - * \param treebuilder The treebuilder instance - * \param token The token to process - * \return True to reprocess the token, false otherwise - */ -bool handle_in_head_noscript(hubbub_treebuilder *treebuilder, - const hubbub_token *token) -{ - bool reprocess = false; - bool handled = false; - - switch (token->type) { - case HUBBUB_TOKEN_CHARACTER: - /* This should be equivalent to "in head" processing */ - reprocess = process_characters_expect_whitespace(treebuilder, - token, true); - break; - case HUBBUB_TOKEN_COMMENT: - /* This should be equivalent to "in head" processing */ - process_comment_append(treebuilder, token, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node); - break; - case HUBBUB_TOKEN_DOCTYPE: - /** \todo parse error */ - break; - case HUBBUB_TOKEN_START_TAG: - { - element_type type = element_type_from_name(treebuilder, - &token->data.tag.name); - - if (type == HTML) { - /* Process as "in body" */ - process_tag_in_body(treebuilder, token); - } else if (type == NOSCRIPT) { - handled = true; - } else if (type == LINK) { - /* This should be equivalent to "in head" processing */ - process_base_link_meta_in_head(treebuilder, - token, type); - - /** \todo ack sc flag */ - } else if (type == META) { - /* This should be equivalent to "in head" processing */ - process_base_link_meta_in_head(treebuilder, - token, type); - - /** \todo ack sc flag */ - - /** \todo detect charset */ - } else if (type == NOFRAMES) { - /* This should be equivalent to "in head" processing */ - parse_generic_rcdata(treebuilder, token, true); - } else if (type == STYLE) { - /* This should be equivalent to "in head" processing */ - parse_generic_rcdata(treebuilder, token, false); - } else if (type == HEAD || type == NOSCRIPT) { - /** \todo parse error */ - } else { - /** \todo parse error */ - reprocess = true; - } - } - break; - case HUBBUB_TOKEN_END_TAG: - { - element_type type = element_type_from_name(treebuilder, - &token->data.tag.name); - - if (type == NOSCRIPT) { - handled = true; - } else if (type == BR) { - /** \todo parse error */ - reprocess = true; - } else { - /** \todo parse error */ - } - } - break; - case HUBBUB_TOKEN_EOF: - /** \todo parse error */ - reprocess = true; - break; - } - - if (handled || reprocess) { - element_type otype; - void *node; - - if (!element_stack_pop(treebuilder, &otype, &node)) { - /** \todo errors */ - } - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - node); - - treebuilder->context.mode = IN_HEAD; - } - - return reprocess; -} - -/** - * Handle tokens in "after head" insertion mode - * - * \param treebuilder The treebuilder instance - * \param token The token to process - * \return True to reprocess the token, false otherwise - */ -bool handle_after_head(hubbub_treebuilder *treebuilder, - const hubbub_token *token) -{ - bool reprocess = false; - bool handled = false; - - switch (token->type) { - case HUBBUB_TOKEN_CHARACTER: - append_text(treebuilder, &token->data.character); - break; - case HUBBUB_TOKEN_COMMENT: - process_comment_append(treebuilder, token, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node); - break; - case HUBBUB_TOKEN_DOCTYPE: - /** \todo parse error */ - break; - case HUBBUB_TOKEN_START_TAG: - { - element_type type = element_type_from_name(treebuilder, - &token->data.tag.name); - - if (type == HTML) { - /* Process as if "in body" */ - process_tag_in_body(treebuilder, token); - } else if (type == BODY) { - handled = true; - } else if (type == FRAMESET) { - insert_element(treebuilder, &token->data.tag); - treebuilder->context.mode = IN_FRAMESET; - } else if (type == BASE || type == LINK || type == META || - type == NOFRAMES || type == SCRIPT || - type == STYLE || type == TITLE) { - element_type otype; - void *node; - - /** \todo parse error */ - - if (!element_stack_push(treebuilder, - HEAD, - treebuilder->context.head_element)) { - /** \todo errors */ - } - - - /* This should be identical to handling "in head" */ - if (type == BASE || type == LINK || type == META) { - /** \todo ack sc flag */ - - process_base_link_meta_in_head(treebuilder, - token, type); - } else if (type == SCRIPT) { - process_script_in_head(treebuilder, token); - } else if (type == STYLE || type == NOFRAMES) { - parse_generic_rcdata(treebuilder, token, false); - } else if (type == TITLE) { - parse_generic_rcdata(treebuilder, token, true); - } - - if (!element_stack_pop(treebuilder, &otype, &node)) { - /** \todo errors */ - } - - /* No need to unref node as we never increased - * its reference count when pushing it on the stack */ - } else if (type == HEAD) { - /** \todo parse error */ - } else { - reprocess = true; - } - } - break; - case HUBBUB_TOKEN_END_TAG: - /** \parse error */ - break; - case HUBBUB_TOKEN_EOF: - reprocess = true; - break; - } - - if (handled || reprocess) { - hubbub_tag tag; - - if (reprocess) { - /* Manufacture body */ - tag.name.type = HUBBUB_STRING_PTR; - tag.name.data.ptr = (const uint8_t *) "body"; - tag.name.len = SLEN("body"); - - tag.n_attributes = 0; - tag.attributes = NULL; - } else { - tag = token->data.tag; - } - - insert_element(treebuilder, &tag); - - treebuilder->context.mode = IN_BODY; - } - - return reprocess; -} - -/** - * Handle tokens in "generic rcdata" insertion mode - * - * \param treebuilder The treebuilder instance - * \param token The token to process - * \return True to reprocess the token, false otherwise - */ -bool handle_generic_rcdata(hubbub_treebuilder *treebuilder, - const hubbub_token *token) -{ - bool reprocess = false; - bool done = false; - - if (treebuilder->context.strip_leading_lr && - token->type != HUBBUB_TOKEN_CHARACTER) { - /* Reset the LR stripping flag */ - treebuilder->context.strip_leading_lr = false; - } - - switch (token->type) { - case HUBBUB_TOKEN_CHARACTER: - if (treebuilder->context.collect.string.len == 0) { - treebuilder->context.collect.string.data.off = - token->data.character.data.off; - } - treebuilder->context.collect.string.len += - token->data.character.len; - - if (treebuilder->context.strip_leading_lr) { - const uint8_t *str = treebuilder->input_buffer + - treebuilder->context.collect.string.data.off; - - /** \todo UTF-16 */ - if (*str == '\n') { - treebuilder->context.collect.string.data.off++; - treebuilder->context.collect.string.len--; - } - - treebuilder->context.strip_leading_lr = false; - } - break; - case HUBBUB_TOKEN_END_TAG: - { - element_type type = element_type_from_name(treebuilder, - &token->data.tag.name); - - if (type != treebuilder->context.collect.type) { - /** \todo parse error */ - } - - done = true; - } - break; - case HUBBUB_TOKEN_EOF: - /** \todo parse error */ - done = reprocess = true; - break; - case HUBBUB_TOKEN_COMMENT: - case HUBBUB_TOKEN_DOCTYPE: - case HUBBUB_TOKEN_START_TAG: - /* Should never happen */ - assert(0); - break; - } - - if (done) { - int success; - void *text, *appended; - - success = treebuilder->tree_handler->create_text( - treebuilder->tree_handler->ctx, - &treebuilder->context.collect.string, - &text); - if (success != 0) { - /** \todo errors */ - } - - success = treebuilder->tree_handler->append_child( - treebuilder->tree_handler->ctx, - treebuilder->context.collect.node, - text, &appended); - if (success != 0) { - /** \todo errors */ - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - text); - } - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, appended); - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, text); - - /* Clean up context */ - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - treebuilder->context.collect.node); - treebuilder->context.collect.node = NULL; - - /* Return to previous insertion mode */ - treebuilder->context.mode = - treebuilder->context.collect.mode; - } - - return reprocess; -} - -/** - * Handle tokens in "script collect characters" insertion mode - * - * \param treebuilder The treebuilder instance - * \param token The token to process - * \return True to reprocess the token, false otherwise - */ -bool handle_script_collect_characters(hubbub_treebuilder *treebuilder, - const hubbub_token *token) -{ - bool reprocess = false; - bool done = false; - - switch (token->type) { - case HUBBUB_TOKEN_CHARACTER: - if (treebuilder->context.collect.string.len == 0) { - treebuilder->context.collect.string.data.off = - token->data.character.data.off; - } - treebuilder->context.collect.string.len += - token->data.character.len; - break; - case HUBBUB_TOKEN_END_TAG: - { - element_type type = element_type_from_name(treebuilder, - &token->data.tag.name); - - if (type != treebuilder->context.collect.type) { - /** \todo parse error */ - /** \todo Mark script as "already executed" */ - } - - done = true; - } - break; - case HUBBUB_TOKEN_EOF: - case HUBBUB_TOKEN_COMMENT: - case HUBBUB_TOKEN_DOCTYPE: - case HUBBUB_TOKEN_START_TAG: - /** \todo parse error */ - /** \todo Mark script as "already executed" */ - done = reprocess = true; - break; - } - - if (done) { - int success; - void *text, *appended; - - success = treebuilder->tree_handler->create_text( - treebuilder->tree_handler->ctx, - &treebuilder->context.collect.string, - &text); - if (success != 0) { - /** \todo errors */ - } - - /** \todo fragment case -- skip this lot entirely */ - - success = treebuilder->tree_handler->append_child( - treebuilder->tree_handler->ctx, - treebuilder->context.collect.node, - text, &appended); - if (success != 0) { - /** \todo errors */ - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - text); - } - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, appended); - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, text); - - /** \todo insertion point manipulation */ - - /* Append script node to current node */ - success = treebuilder->tree_handler->append_child( - treebuilder->tree_handler->ctx, - treebuilder->context.element_stack[ - treebuilder->context.current_node].node, - treebuilder->context.collect.node, &appended); - if (success != 0) { - /** \todo errors */ - } - - /** \todo restore insertion point */ - - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - appended); - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - treebuilder->context.collect.node); - treebuilder->context.collect.node = NULL; - - /** \todo process any pending script */ - - /* Return to previous insertion mode */ - treebuilder->context.mode = - treebuilder->context.collect.mode; - } - - return reprocess; -} - /** * Process a character token in cases where we expect only whitespace -- cgit v1.2.3