From f2b1200a40f6d7f79ae2702119598bbc304edbd4 Mon Sep 17 00:00:00 2001 From: Rupinder Singh Khokhar Date: Tue, 8 Jul 2014 06:32:49 +0530 Subject: added provision to get quirks mode of document from treebuilder, assuming treebuilder has one document associated with it. Also fixed table endtag handler in in_table insertion mode. Also fixed table starttag handler in in_body insertion mode --- src/treebuilder/in_body.c | 8 ++- src/treebuilder/in_table.c | 3 +- src/treebuilder/initial.c | 119 ++++++++++++++++++++++++++++++++++++++++-- src/treebuilder/internal.h | 1 + src/treebuilder/treebuilder.c | 1 + 5 files changed, 126 insertions(+), 6 deletions(-) (limited to 'src/treebuilder') diff --git a/src/treebuilder/in_body.c b/src/treebuilder/in_body.c index 04eb8c2..43fc678 100644 --- a/src/treebuilder/in_body.c +++ b/src/treebuilder/in_body.c @@ -321,7 +321,13 @@ hubbub_error process_start_tag(hubbub_treebuilder *treebuilder, err = parse_generic_rcdata(treebuilder, token, false); } else if (type == TABLE) { - err = process_container_in_body(treebuilder, token); + if(treebuilder->quirks_mode != HUBBUB_QUIRKS_MODE_FULL && + element_in_scope(treebuilder, P, + BUTTON_SCOPE)) { + err = close_p_in_body(treebuilder); + } + insert_element(treebuilder, &token->data.tag, + true); if (err == HUBBUB_OK) { treebuilder->context.frameset_ok = false; diff --git a/src/treebuilder/in_table.c b/src/treebuilder/in_table.c index 908d5ad..d3d32e8 100644 --- a/src/treebuilder/in_table.c +++ b/src/treebuilder/in_table.c @@ -225,7 +225,8 @@ hubbub_error handle_in_table(hubbub_treebuilder *treebuilder, /** \todo fragment case */ if(!element_in_scope(treebuilder, TABLE, TABLE_SCOPE)) { /* todo parse error */ - break; + handled = true; + break; } element_stack_pop_until(treebuilder, TABLE); diff --git a/src/treebuilder/initial.c b/src/treebuilder/initial.c index 9c2a6aa..2676757 100644 --- a/src/treebuilder/initial.c +++ b/src/treebuilder/initial.c @@ -125,7 +125,7 @@ static bool lookup_full_quirks(hubbub_treebuilder *treebuilder, #define S(s) (uint8_t *) s, sizeof s - 1 /* Check the name is "HTML" (case-insensitively) */ - if (!hubbub_string_match_ci(name, name_len, S("HTML"))) + if (!hubbub_string_match(name, name_len, S("html"))) return true; /* No public id means not-quirks */ @@ -148,7 +148,116 @@ static bool lookup_full_quirks(hubbub_treebuilder *treebuilder, hubbub_string_match_ci(public_id, public_id_len, S("HTML")) || hubbub_string_match_ci(system_id, system_id_len, - S("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd"))) { + S("http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd")) || + starts_with(public_id, public_id_len, + S("-//AS//DTD HTML 3.0 asWedit + extensions//")) || + starts_with(public_id, public_id_len, + S("-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML 2.0 Level 1//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML 2.0 Level 2//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML 2.0 Strict Level 1//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML 2.0 Strict Level 2//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML 2.0 Strict//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML 2.0//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML 2.1E//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML 3.0//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML 3.2 Final//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML 3.2//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML 3//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML Level 0//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML Level 1//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML Level 2//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML Level 3//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML Strict Level 0//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML Strict Level 1//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML Strict Level 2//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML Strict Level 3//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML Strict//")) || + starts_with(public_id, public_id_len, + S("-//IETF//DTD HTML//")) || + starts_with(public_id, public_id_len, + S("-//Metrius//DTD Metrius Presentational//")) || + starts_with(public_id, public_id_len, + S("-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//")) || + starts_with(public_id, public_id_len, + S("-//Microsoft//DTD Internet Explorer 2.0 HTML//")) || + starts_with(public_id, public_id_len, + S("-//Microsoft//DTD Internet Explorer 2.0 Tables//")) || + starts_with(public_id, public_id_len, + S("-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//")) || + starts_with(public_id, public_id_len, + S("-//Microsoft//DTD Internet Explorer 3.0 HTML//")) || + starts_with(public_id, public_id_len, + S("-//Microsoft//DTD Internet Explorer 3.0 Tables//")) || + starts_with(public_id, public_id_len, + S("-//Netscape Comm. Corp.//DTD HTML//")) || + starts_with(public_id, public_id_len, + S("-//Netscape Comm. Corp.//DTD Strict HTML//")) || + starts_with(public_id, public_id_len, + S("-//O'Reilly and Associates//DTD HTML 2.0//")) || + starts_with(public_id, public_id_len, + S("-//O'Reilly and Associates//DTD HTML Extended 1.0//")) || + starts_with(public_id, public_id_len, + S("-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//")) || + starts_with(public_id, public_id_len, + S("-//SQ//DTD HTML 2.0 HoTMetaL + extensions//")) || + starts_with(public_id, public_id_len, + S("-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//")) || + starts_with(public_id, public_id_len, + S("-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//")) || + starts_with(public_id, public_id_len, + S("-//Spyglass//DTD HTML 2.0 Extended//")) || + starts_with(public_id, public_id_len, + S("-//Sun Microsystems Corp.//DTD HotJava HTML//")) || + starts_with(public_id, public_id_len, + S("-//Sun Microsystems Corp.//DTD HotJava Strict HTML//")) || + starts_with(public_id, public_id_len, + S("-//W3C//DTD HTML 3 1995-03-24//")) || + starts_with(public_id, public_id_len, + S("-//W3C//DTD HTML 3.2 Draft//")) || + starts_with(public_id, public_id_len, + S("-//W3C//DTD HTML 3.2 Final//")) || + starts_with(public_id, public_id_len, + S("-//W3C//DTD HTML 3.2//")) || + starts_with(public_id, public_id_len, + S("-//W3C//DTD HTML 3.2S Draft//")) || + starts_with(public_id, public_id_len, + S("-//W3C//DTD HTML 4.0 Frameset//")) || + starts_with(public_id, public_id_len, + S("-//W3C//DTD HTML 4.0 Transitional//")) || + starts_with(public_id, public_id_len, + S("-//W3C//DTD HTML Experimental 19960712//")) || + starts_with(public_id, public_id_len, + S("-//W3C//DTD HTML Experimental 970421//")) || + starts_with(public_id, public_id_len, + S("-//W3C//DTD W3 HTML//")) || + starts_with(public_id, public_id_len, + S("-//W3O//DTD W3 HTML 3.0//")) || + starts_with(public_id, public_id_len, + S("-//WebTechs//DTD Mozilla HTML 2.0//")) || + starts_with(public_id, public_id_len, + S("-//WebTechs//DTD Mozilla HTML//")) + ) { return true; } @@ -178,9 +287,7 @@ static bool lookup_limited_quirks(hubbub_treebuilder *treebuilder, { const uint8_t *public_id = cdoc->public_id.ptr; size_t public_id_len = cdoc->public_id.len; - UNUSED(treebuilder); - #define S(s) (uint8_t *) s, sizeof s - 1 if (starts_with(public_id, public_id_len, @@ -226,6 +333,7 @@ hubbub_error handle_initial(hubbub_treebuilder *treebuilder, treebuilder->tree_handler->set_quirks_mode( treebuilder->tree_handler->ctx, HUBBUB_QUIRKS_MODE_FULL); + treebuilder->quirks_mode = HUBBUB_QUIRKS_MODE_FULL; treebuilder->context.mode = BEFORE_HTML; } break; @@ -271,10 +379,12 @@ hubbub_error handle_initial(hubbub_treebuilder *treebuilder, treebuilder->tree_handler->set_quirks_mode( treebuilder->tree_handler->ctx, HUBBUB_QUIRKS_MODE_FULL); + treebuilder->quirks_mode = HUBBUB_QUIRKS_MODE_FULL; } else if (lookup_limited_quirks(treebuilder, cdoc)) { treebuilder->tree_handler->set_quirks_mode( treebuilder->tree_handler->ctx, HUBBUB_QUIRKS_MODE_LIMITED); + treebuilder->quirks_mode = HUBBUB_QUIRKS_MODE_LIMITED; } treebuilder->context.mode = BEFORE_HTML; @@ -287,6 +397,7 @@ hubbub_error handle_initial(hubbub_treebuilder *treebuilder, treebuilder->tree_handler->set_quirks_mode( treebuilder->tree_handler->ctx, HUBBUB_QUIRKS_MODE_FULL); + treebuilder->quirks_mode = HUBBUB_QUIRKS_MODE_FULL; err = HUBBUB_REPROCESS; break; } diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h index 2cd5e4a..5eca574 100644 --- a/src/treebuilder/internal.h +++ b/src/treebuilder/internal.h @@ -128,6 +128,7 @@ struct hubbub_treebuilder hubbub_error_handler error_handler; /**< Error handler */ void *error_pw; /**< Error handler data */ + hubbub_quirks_mode quirks_mode; /**< The quirks mode to which the document is set to*/ }; hubbub_error hubbub_treebuilder_token_handler( diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c index 5d84ff0..a1bc802 100644 --- a/src/treebuilder/treebuilder.c +++ b/src/treebuilder/treebuilder.c @@ -142,6 +142,7 @@ hubbub_error hubbub_treebuilder_create(hubbub_tokeniser *tokeniser, tb->error_handler = NULL; tb->error_pw = NULL; + tb->quirks_mode = HUBBUB_QUIRKS_MODE_NONE; tokparams.token_handler.handler = hubbub_treebuilder_token_handler; tokparams.token_handler.pw = tb; -- cgit v1.2.3