From 4cc5b88048f984ffb8a403a23f0c78c236a5b533 Mon Sep 17 00:00:00 2001 From: Rupinder Singh Khokhar Date: Fri, 11 Jul 2014 11:30:43 +0530 Subject: Now, since we have the support for retriving attributes from context, the foreing breakout function is updated to incorporate checking of attributes --- src/treebuilder/in_foreign_content.c | 96 +++++++++++++++++++----------------- src/treebuilder/internal.h | 5 ++ src/treebuilder/treebuilder.c | 68 +++++++++++++++++++++++++ test/data/tree-construction/INDEX | 2 +- 4 files changed, 125 insertions(+), 46 deletions(-) diff --git a/src/treebuilder/in_foreign_content.c b/src/treebuilder/in_foreign_content.c index e6d22ac..73a061d 100644 --- a/src/treebuilder/in_foreign_content.c +++ b/src/treebuilder/in_foreign_content.c @@ -288,8 +288,8 @@ void adjust_foreign_attributes(hubbub_treebuilder *treebuilder, } } -#undef S +#undef S /*** Foreign content insertion mode ***/ @@ -368,29 +368,18 @@ static void foreign_break_out(hubbub_treebuilder *treebuilder) /** \todo parse error */ /** todo fragment case */ - element_stack_pop(treebuilder, &ns, &type, &node); - treebuilder->tree_handler->unref_node( - treebuilder->tree_handler->ctx, - node); - - while (stack[treebuilder->context.current_node].ns != - HUBBUB_NS_HTML && - !(stack[treebuilder->context.current_node].ns == - HUBBUB_NS_MATHML && ( - stack[treebuilder->context.current_node].type == - MI || stack[treebuilder->context.current_node].type == - MO || stack[treebuilder->context.current_node].type == - MN || stack[treebuilder->context.current_node].type == - MS || stack[treebuilder->context.current_node].type == - MTEXT)) && - !(stack[treebuilder->context.current_node].ns == - HUBBUB_NS_SVG && ( - stack[treebuilder->context.current_node].type == - FOREIGNOBJECT || - stack[treebuilder->context.current_node].type == - DESC || - stack[treebuilder->context.current_node].type == - TITLE)) + + ns = stack[treebuilder->context.current_node].ns; + type = current_node(treebuilder); + size_t n_attrs = stack[ + treebuilder->context.current_node].n_attributes; + hubbub_attribute *attrs = + stack[treebuilder->context.current_node]. + attributes; + while (ns != HUBBUB_NS_HTML && + !is_mathml_text_integration(type, ns) && + !is_html_integration(type, ns, attrs, + n_attrs) ) { element_stack_pop(treebuilder, &ns, &type, &node); @@ -398,15 +387,17 @@ static void foreign_break_out(hubbub_treebuilder *treebuilder) treebuilder->tree_handler->ctx, node); - if(ns == HUBBUB_NS_MATHML && - type == ANNOTATION_XML) { - /*todo check for attributes */ - } + type = current_node(treebuilder); + ns = stack[treebuilder->context.current_node].ns; + n_attrs = stack[ + treebuilder->context.current_node].n_attributes; + attrs = stack[treebuilder->context.current_node]. + attributes; } - treebuilder->context.mode = treebuilder->context.second_mode; } + /** * Handle tokens in "in foreign content" insertion mode * @@ -418,37 +409,41 @@ hubbub_error handle_in_foreign_content(hubbub_treebuilder *treebuilder, const hubbub_token *token) { hubbub_ns cur_node_ns = treebuilder->context.element_stack[ - treebuilder->context.current_node].ns; + treebuilder->context.current_node].ns; element_type cur_node = current_node(treebuilder); - element_type type = element_type_from_name(treebuilder, - &token->data.tag.name); + hubbub_attribute *cur_node_attrs; + size_t cur_node_n_attrs; + + cur_node_attrs = treebuilder->context.element_stack[ + treebuilder->context.current_node].attributes; + + cur_node_n_attrs = treebuilder->context.element_stack[ + treebuilder->context.current_node].n_attributes; if (treebuilder->context.current_node == 0 || - cur_node_ns == HUBBUB_NS_HTML || - (cur_node_ns == HUBBUB_NS_MATHML && - (type != MGLYPH && type != MALIGNMARK) && - (cur_node == MI || cur_node == MO || - cur_node == MN || cur_node == MS || - cur_node == MTEXT)) || - (type == SVG && (cur_node_ns == HUBBUB_NS_MATHML && - cur_node == ANNOTATION_XML)) || - (cur_node_ns == HUBBUB_NS_SVG && - (cur_node == FOREIGNOBJECT || - cur_node == DESC || - cur_node == TITLE))) { + cur_node_ns == HUBBUB_NS_HTML) { return process_as_in_secondary(treebuilder, token); } + hubbub_error err = HUBBUB_OK; const uint8_t *c; switch (token->type) { case HUBBUB_TOKEN_CHARACTER: + { + if(is_html_integration(cur_node, cur_node_ns, + cur_node_attrs, cur_node_n_attrs) || + is_mathml_text_integration(cur_node, cur_node_ns)) { + return process_as_in_secondary(treebuilder, token); + } + c = (token->data.character.ptr); if(*c != '\t' && *c != '\r' && *c != ' ' && *c != '\n' && *c != '\f') { treebuilder->context.frameset_ok = false; } err = append_text(treebuilder, &token->data.character); + } break; case HUBBUB_TOKEN_COMMENT: err = process_comment_append(treebuilder, token, @@ -461,7 +456,17 @@ hubbub_error handle_in_foreign_content(hubbub_treebuilder *treebuilder, case HUBBUB_TOKEN_START_TAG: { bool handled = false; - if (type == B || type == BIG || type == BLOCKQUOTE || + element_type type = element_type_from_name(treebuilder, + &token->data.tag.name); + + if ((is_mathml_text_integration(cur_node, cur_node_ns) && + type != MGLYPH && type != MALIGNMARK) || + (type == SVG && cur_node_ns == HUBBUB_NS_MATHML && + cur_node == ANNOTATION_XML) || + is_html_integration(cur_node, cur_node_ns, + cur_node_attrs, cur_node_n_attrs)) { + return process_as_in_secondary(treebuilder, token); + } else if (type == B || type == BIG || type == BLOCKQUOTE || type == BODY || type == BR || type == CENTER || type == CODE || type == DD || type == DIV || type == DL || type == DT || type == EM || @@ -540,6 +545,7 @@ hubbub_error handle_in_foreign_content(hubbub_treebuilder *treebuilder, &token->data.tag.name); uint32_t node; element_context *stack = treebuilder->context.element_stack; + for (node = treebuilder->context.current_node; node > 1; node--) { if(stack[node].type == type) { hubbub_ns ns; diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h index f1314bb..a5f0b22 100644 --- a/src/treebuilder/internal.h +++ b/src/treebuilder/internal.h @@ -207,6 +207,11 @@ hubbub_error formatting_list_replace(hubbub_treebuilder *treebuilder, uint32_t *ostack_index); void copy_attribute(hubbub_attribute *source, hubbub_attribute *sink); +bool is_mathml_text_integration (element_type type, + hubbub_ns ns); +bool is_html_integration (element_type type, + hubbub_ns ns, hubbub_attribute *attrs, + size_t n_attrs); /* in_foreign_content.c */ void adjust_mathml_attributes(hubbub_treebuilder *treebuilder, hubbub_tag *tag); diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c index 6b89355..d741b5c 100644 --- a/src/treebuilder/treebuilder.c +++ b/src/treebuilder/treebuilder.c @@ -98,6 +98,8 @@ static const struct { { S("foreignobject"), FOREIGNOBJECT } }; +#undef S + static bool is_form_associated(element_type type); /** @@ -1621,6 +1623,72 @@ void copy_attribute(hubbub_attribute *source, return; } +#define S(s) (uint8_t *) s, SLEN(s) + +/** + * Checks if the node with the given properties is + * a mathml text integration point + * \param type The element type of the node + * \param ns The namespace of the node + */ + +bool is_mathml_text_integration (element_type type, + hubbub_ns ns) { + + if(ns == HUBBUB_NS_MATHML && ( + type == MI || type == MO || + type == MN || type == MS || + type == MTEXT)) { + return true; + } + return false; +} + +/** + * Checks if the node with the given properties is + * a html integration point + * \param type The element type of the node + * \param ns The namespace of the node + * \param attrs Pointer to the list of attributes + * \param n_attrs Number of attributes on the node + */ +bool is_html_integration (element_type type, + hubbub_ns ns, hubbub_attribute *attrs, + size_t n_attrs) { + + if(ns == HUBBUB_NS_MATHML && + type == ANNOTATION_XML) { + + size_t i; + bool found = false; + + for(i = 0;i < n_attrs; i++) { + /*search for the given attributes in O(n_attrs) time complexity*/ + if(hubbub_string_match_ci(attrs[i].name.ptr, + attrs[i].name.len, S("encoding")) && + (hubbub_string_match_ci(attrs[i].value.ptr, + attrs[i].value.len, + S("text/html")) || + hubbub_string_match_ci(attrs[i].value.ptr, + attrs[i].value.len, + S("application/xhtml+xml"))) + ) { + found = true; + break; + } + } + + if(found) { + return true; + } + } else if(ns == HUBBUB_NS_SVG &&( + type == FOREIGNOBJECT || + type == DESC || + type == TITLE)) { + return true; + } + return false; +} #ifndef NDEBUG diff --git a/test/data/tree-construction/INDEX b/test/data/tree-construction/INDEX index f280fba..1c1b78f 100644 --- a/test/data/tree-construction/INDEX +++ b/test/data/tree-construction/INDEX @@ -37,7 +37,7 @@ tests14.dat html5lib treebuilder tests #tests18.dat NA tests19.dat html5lib treebuilder tests tests1.dat html5lib treebuilder tests -#tests20.dat NA +tests20.dat html5lib treebuilder tests #tests21.dat NA #tests22.dat NA #tests23.dat NA -- cgit v1.2.3