From 833ee4b1f01b5da2327ab79777219b88528162c6 Mon Sep 17 00:00:00 2001 From: Andrew Sidwell Date: Mon, 30 Jun 2008 10:45:26 +0000 Subject: Add "in foreign content" handling. Not convinced this is the best way. svn path=/trunk/hubbub/; revision=4475 --- src/treebuilder/Makefile | 1 + src/treebuilder/in_foreign_content.c | 139 +++++++++++++++++++++++++++++++++++ src/treebuilder/internal.h | 7 +- src/treebuilder/modes.h | 4 + src/treebuilder/treebuilder.c | 6 +- 5 files changed, 152 insertions(+), 5 deletions(-) create mode 100644 src/treebuilder/in_foreign_content.c diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile index f2e8e3a..3b736b2 100644 --- a/src/treebuilder/Makefile +++ b/src/treebuilder/Makefile @@ -37,6 +37,7 @@ SRCS_$(d) := treebuilder.c \ in_head_noscript.c after_head.c in_body.c \ in_caption.c in_column_group.c in_table_body.c in_row.c \ in_cell.c in_select.c in_select_in_table.c \ + in_foreign_content.c \ generic_rcdata.c script_collect.c # Append to sources for component diff --git a/src/treebuilder/in_foreign_content.c b/src/treebuilder/in_foreign_content.c new file mode 100644 index 0000000..d1338e7 --- /dev/null +++ b/src/treebuilder/in_foreign_content.c @@ -0,0 +1,139 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2008 Andrew Sidwell + */ + +#include +#include + +#include "treebuilder/modes.h" +#include "treebuilder/internal.h" +#include "treebuilder/treebuilder.h" +#include "utils/utils.h" + + + +static bool element_in_scope_in_non_html_ns(hubbub_treebuilder *treebuilder) +{ + uint32_t node; + + if (treebuilder->context.element_stack == NULL) + return false; + + for (node = treebuilder->context.current_node; node > 0; node--) { + element_type node_ns = + treebuilder->context.element_stack[node].ns; + + if (node_ns != HTML) + return true; + } + + return false; +} + + + +/** + * Handle tokens in "in foreign content" insertion mode + * + * \param treebuilder The treebuilder instance + * \param token The token to process + * \return True to reprocess the token, false otherwise + */ +bool handle_in_foreign_content(hubbub_treebuilder *treebuilder, + const hubbub_token *token) +{ + bool reprocess = false; + + switch (token->type) { + case HUBBUB_TOKEN_CHARACTER: + append_text(treebuilder, &token->data.character); + break; + case HUBBUB_TOKEN_COMMENT: + process_comment_append(treebuilder, token, + treebuilder->context.element_stack[ + treebuilder->context.current_node].node); + break; + case HUBBUB_TOKEN_DOCTYPE: + /** \todo parse error */ + break; + case HUBBUB_TOKEN_START_TAG: + { + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + element_type cur_node = current_node(treebuilder); + hubbub_ns cur_node_ns = current_node_ns(treebuilder); + + if (cur_node_ns == HUBBUB_NS_HTML || + (cur_node_ns == HUBBUB_NS_MATHML && + (type != MGLYPH && type != MALIGNMARK) && + (cur_node == MI || cur_node == MO || + cur_node == MN || cur_node == MS || + cur_node == MTEXT))) { + treebuilder->context.mode = + treebuilder->context.second_mode; + hubbub_treebuilder_token_handler(token, treebuilder); + + if (treebuilder->context.mode == IN_FOREIGN_CONTENT && + !element_in_scope_in_non_html_ns(treebuilder)) { + treebuilder->context.mode = + treebuilder->context.second_mode; + } + } else if (type == B || type == BIG || type == BLOCKQUOTE || + type == BODY || type == BR || type == CENTER || + type == CODE || type == DD || type == DIV || + type == DL || type == DT || type == EM || + type == EMBED || type == FONT || type == H1 || + type == H2 || type == H3 || type == H4 || + type == H5 || type == H6 || type == HEAD || + type == HR || type == I || type == IMG || + type == LI || type == LISTING || + type == MENU || type == META || type == NOBR || + type == OL || type == P || type == PRE || + type == RUBY || type == S || type == SMALL || + type == SPAN || type == STRONG || + type == STRIKE || type == SUB || type == SUP || + type == TABLE || type == TT || type == U || + type == UL || type == VAR) { + /** \todo parse error */ + + while (cur_node_ns != HUBBUB_NS_HTML) { + void *node; + element_stack_pop(treebuilder, &cur_node_ns, + &cur_node, &node); + cur_node_ns = current_node_ns(treebuilder); + } + + treebuilder->context.mode = + treebuilder->context.second_mode; + } else { + hubbub_tag tag = token->data.tag; + + adjust_foreign_attributes(treebuilder, &tag); + + /* Set to the right namespace and insert */ + tag.ns = cur_node_ns; + + if (token->data.tag.self_closing) { + insert_element_no_push(treebuilder, &tag); + /** \todo ack sc flag */ + } else { + insert_element(treebuilder, &tag); + } + } + } + break; + case HUBBUB_TOKEN_END_TAG: + /** \parse error */ + break; + case HUBBUB_TOKEN_EOF: + reprocess = true; + break; + } + + return reprocess; +} + diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h index 6f7278c..f9fd09e 100644 --- a/src/treebuilder/internal.h +++ b/src/treebuilder/internal.h @@ -27,7 +27,9 @@ typedef enum A, B, BIG, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U, /* Phrasing */ /**< \todo Enumerate phrasing elements */ - LABEL, MATH, RP, RT, XMP, + CODE, LABEL, RP, RT, RUBY, SPAN, SUB, SUP, VAR, XMP, +/* MathML */ + MATH, MGLYPH, MALIGNMARK, MI, MO, MN, MS, MTEXT, UNKNOWN, } element_type; @@ -104,6 +106,8 @@ struct hubbub_treebuilder void *alloc_pw; /**< Client private data */ }; +void hubbub_treebuilder_token_handler(const hubbub_token *token, void *pw); + bool process_characters_expect_whitespace( hubbub_treebuilder *treebuilder, const hubbub_token *token, bool insert_into_current_node); @@ -142,6 +146,7 @@ bool element_stack_pop(hubbub_treebuilder *treebuilder, bool element_stack_pop_until(hubbub_treebuilder *treebuilder, element_type type); element_type current_node(hubbub_treebuilder *treebuilder); +hubbub_ns current_node_ns(hubbub_treebuilder *treebuilder); element_type prev_node(hubbub_treebuilder *treebuilder); bool formatting_list_append(hubbub_treebuilder *treebuilder, diff --git a/src/treebuilder/modes.h b/src/treebuilder/modes.h index c97e9de..8172ddd 100644 --- a/src/treebuilder/modes.h +++ b/src/treebuilder/modes.h @@ -59,6 +59,8 @@ bool handle_in_caption(hubbub_treebuilder *treebuilder, const hubbub_token *token); bool handle_in_column_group(hubbub_treebuilder *treebuilder, const hubbub_token *token); +bool handle_in_table_body(hubbub_treebuilder *treebuilder, + const hubbub_token *token); bool handle_in_row(hubbub_treebuilder *treebuilder, const hubbub_token *token); bool handle_in_cell(hubbub_treebuilder *treebuilder, @@ -71,6 +73,8 @@ bool handle_generic_rcdata(hubbub_treebuilder *treebuilder, const hubbub_token *token); bool handle_script_collect_characters(hubbub_treebuilder *treebuilder, const hubbub_token *token); +bool handle_in_foreign_content(hubbub_treebuilder *treebuilder, + const hubbub_token *token); bool process_in_head(hubbub_treebuilder *treebuilder, const hubbub_token *token); diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c index cef73d3..3c25540 100644 --- a/src/treebuilder/treebuilder.c +++ b/src/treebuilder/treebuilder.c @@ -66,12 +66,10 @@ static const struct { static void hubbub_treebuilder_buffer_handler(const uint8_t *data, size_t len, void *pw); -static void hubbub_treebuilder_token_handler(const hubbub_token *token, - void *pw); /** - * Create a hubbub treebuilder + * Create a hubbub treebuilder * * \param tokeniser Underlying tokeniser instance * \param alloc Memory (de)allocation function @@ -109,7 +107,7 @@ hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, return NULL; } tb->context.stack_alloc = ELEMENT_STACK_CHUNK; - /* We rely on HTML not being equal to zero to determine + /* We rely on HTML not being equal to zero to determine * if the first item in the stack is in use. Assert this here. */ assert(HTML != 0); tb->context.element_stack[0].type = 0; -- cgit v1.2.3