/* * This file is part of Hubbub. * Licensed under the MIT License, * http://www.opensource.org/licenses/mit-license.php * Copyright 2008 John-Mark Bell */ #include #include #include #include "treebuilder/modes.h" #include "treebuilder/internal.h" #include "treebuilder/treebuilder.h" #include "charset/detect.h" #include "utils/utils.h" #include "utils/string.h" /** * Process a meta tag as if "in head". * * \param treebuilder The treebuilder instance * \param token The token to process */ static hubbub_error process_meta_in_head(hubbub_treebuilder *treebuilder, const hubbub_token *token) { static uint16_t utf16, utf16be, utf16le; uint16_t charset_enc = 0; uint16_t content_type_enc = 0; size_t i; hubbub_error err = HUBBUB_OK; err = insert_element(treebuilder, &token->data.tag, false); if (err != HUBBUB_OK) return err; /** \todo ack sc flag */ if (treebuilder->tree_handler->encoding_change == NULL) return err; /* Grab UTF-16 MIBenums */ if (utf16 == 0) { utf16 = parserutils_charset_mibenum_from_name( "utf-16", SLEN("utf-16")); utf16be = parserutils_charset_mibenum_from_name( "utf-16be", SLEN("utf-16be")); utf16le = parserutils_charset_mibenum_from_name( "utf-16le", SLEN("utf-16le")); assert(utf16 != 0 && utf16be != 0 && utf16le != 0); } for (i = 0; i < token->data.tag.n_attributes; i++) { hubbub_attribute *attr = &token->data.tag.attributes[i]; if (hubbub_string_match(attr->name.ptr, attr->name.len, (const uint8_t *) "charset", SLEN("charset")) == true) { /* Extract charset */ charset_enc = parserutils_charset_mibenum_from_name( (const char *) attr->value.ptr, attr->value.len); } else if (hubbub_string_match(attr->name.ptr, attr->name.len, (const uint8_t *) "content", SLEN("content")) == true) { /* Extract charset from Content-Type */ content_type_enc = hubbub_charset_parse_content( attr->value.ptr, attr->value.len); } } /* Fall back, if necessary */ if (charset_enc == 0 && content_type_enc != 0) charset_enc = content_type_enc; if (charset_enc != 0) { const char *name; hubbub_charset_fix_charset(&charset_enc); /* Change UTF-16 to UTF-8 */ if (charset_enc == utf16le || charset_enc == utf16be || charset_enc == utf16) { charset_enc = parserutils_charset_mibenum_from_name( "UTF-8", SLEN("UTF-8")); } name = parserutils_charset_mibenum_to_name(charset_enc); err = treebuilder->tree_handler->encoding_change( treebuilder->tree_handler->ctx, name); } return err; } /** * Handle token in "in head" insertion mode * * \param treebuilder The treebuilder instance * \param token The token to handle * \return True to reprocess token, false otherwise */ hubbub_error handle_in_head(hubbub_treebuilder *treebuilder, const hubbub_token *token) { hubbub_error err = HUBBUB_OK; bool handled = false; switch (token->type) { case HUBBUB_TOKEN_CHARACTER: err = process_characters_expect_whitespace(treebuilder, token, true); break; case HUBBUB_TOKEN_COMMENT: err = process_comment_append(treebuilder, token, treebuilder->context.element_stack[ treebuilder->context.current_node].node); break; case HUBBUB_TOKEN_DOCTYPE: /** \todo parse error */ break; case HUBBUB_TOKEN_START_TAG: { element_type type = element_type_from_name(treebuilder, &token->data.tag.name); if (type == HTML) { /* Process as if "in body" */ err = handle_in_body(treebuilder, token); } else if (type == BASE || type == BASEFONT || type == BGSOUND || type == LINK) { err = insert_element(treebuilder, &token->data.tag, false); /** \todo ack sc flag */ } else if (type == META) { err = process_meta_in_head(treebuilder, token); } else if (type == TITLE) { err = parse_generic_rcdata(treebuilder, token, true); } else if (type == NOFRAMES || type == STYLE) { err = parse_generic_rcdata(treebuilder, token, false); } else if (type == NOSCRIPT) { if (treebuilder->context.enable_scripting) { err = parse_generic_rcdata(treebuilder, token, false); } else { err = insert_element(treebuilder, &token->data.tag, true); if (err != HUBBUB_OK) return err; treebuilder->context.mode = IN_HEAD_NOSCRIPT; } } else if (type == SCRIPT) { /** \todo need to ensure that the client callback * sets the parser-inserted/already-executed script * flags. */ err = parse_generic_rcdata(treebuilder, token, false); } else if (type == HEAD) { /** \todo parse error */ } else { err = HUBBUB_REPROCESS; } } break; case HUBBUB_TOKEN_END_TAG: { element_type type = element_type_from_name(treebuilder, &token->data.tag.name); if (type == HEAD) { handled = true; } else if (type == HTML || type == BODY || type == BR) { err = HUBBUB_REPROCESS; } /** \todo parse error */ } break; case HUBBUB_TOKEN_EOF: err = HUBBUB_REPROCESS; break; } if (handled || err == HUBBUB_REPROCESS) { hubbub_ns ns; element_type otype; void *node; element_stack_pop(treebuilder, &ns, &otype, &node); treebuilder->tree_handler->unref_node( treebuilder->tree_handler->ctx, node); treebuilder->context.mode = AFTER_HEAD; } return err; }