/* * Copyright 2007 James Bursa * * This file is part of NetSurf, http://www.netsurf-browser.org/ * * NetSurf is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * NetSurf is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /** \file * Content for text/html (implementation). */ #define _GNU_SOURCE /* for strndup() */ #include #include #include #include #include #include #ifdef WITH_HUBBUB #include #include #include #endif #include #include #include #include "utils/config.h" #include "content/content.h" #include "content/fetch.h" #include "content/fetchcache.h" #include "desktop/browser.h" #include "desktop/gui.h" #include "desktop/options.h" #include "render/box.h" #include "render/font.h" #include "render/html.h" #include "render/imagemap.h" #include "render/layout.h" #include "utils/log.h" #include "utils/messages.h" #include "utils/talloc.h" #include "utils/url.h" #include "utils/utils.h" #define CHUNK 4096 #ifndef WITH_HUBBUB static bool html_set_parser_encoding(struct content *c, const char *encoding); static const char *html_detect_encoding(const char **data, unsigned int *size); #endif static void html_convert_css_callback(content_msg msg, struct content *css, intptr_t p1, intptr_t p2, union content_msg_data data); static bool html_meta_refresh(struct content *c, xmlNode *head); static bool html_head(struct content *c, xmlNode *head); static bool html_find_stylesheets(struct content *c, xmlNode *html, xmlNode *head); static bool html_find_inline_stylesheets(struct content *c, xmlNode *html); static bool html_process_style_element(struct content *c, xmlNode *style); static void html_object_callback(content_msg msg, struct content *object, intptr_t p1, intptr_t p2, union content_msg_data data); static void html_object_done(struct box *box, struct content *object, bool background); static void html_object_failed(struct box *box, struct content *content, bool background); static bool html_object_type_permitted(const content_type type, const content_type *permitted_types); static void html_object_refresh(void *p); static void html_destroy_frameset(struct content_html_frames *frameset); static void html_destroy_iframe(struct content_html_iframe *iframe); static void html_set_status(struct content *c, const char *extra); static void html_dump_frameset(struct content_html_frames *frame, unsigned int depth); static const char empty_document[] = "" "" "" "Empty document" "" "" "

Empty document

" "

The document sent by the server is empty.

" "" ""; #ifdef WITH_HUBBUB const char const *ns_prefixes[NUM_NAMESPACES] = { NULL, NULL, "math", "svg", "xlink", "xml", "xmlns" }; const char const *ns_urls[NUM_NAMESPACES] = { NULL, "http://www.w3.org/1999/xhtml", "http://www.w3.org/1998/Math/MathML", "http://www.w3.org/2000/svg", "http://www.w3.org/1999/xlink", "http://www.w3.org/XML/1998/namespace", "http://www.w3.org/2000/xmlns/" }; static int create_comment(void *ctx, const hubbub_string *data, void **result); static int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result); static int create_element(void *ctx, const hubbub_tag *tag, void **result); static int create_text(void *ctx, const hubbub_string *data, void **result); static int ref_node(void *ctx, void *node); static int unref_node(void *ctx, void *node); static int append_child(void *ctx, void *parent, void *child, void **result); static int insert_before(void *ctx, void *parent, void *child, void *ref_child, void **result); static int remove_child(void *ctx, void *parent, void *child, void **result); static int clone_node(void *ctx, void *node, bool deep, void **result); static int reparent_children(void *ctx, void *node, void *new_parent); static int get_parent(void *ctx, void *node, bool element_only, void **result); static int has_children(void *ctx, void *node, bool *result); static int form_associate(void *ctx, void *form, void *node); static int add_attributes(void *ctx, void *node, const hubbub_attribute *attributes, uint32_t n_attributes); static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode); static int change_encoding(void *ctx, const char *mibenum); static hubbub_tree_handler tree_handler = { create_comment, create_doctype, create_element, create_text, ref_node, unref_node, append_child, insert_before, remove_child, clone_node, reparent_children, get_parent, has_children, form_associate, add_attributes, set_quirks_mode, change_encoding, NULL }; /*** Tree construction functions ***/ int create_comment(void *ctx, const hubbub_string *data, void **result) { xmlNode *node = xmlNewComment(NULL); node->content = xmlStrndup(data->ptr, data->len); node->_private = (void *)1; *result = node; return 0; } int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result) { /* Make a node that doesn't really exist, then don't append it * later. */ xmlNode *node = xmlNewComment(NULL); node->_private = (void *)1; *result = node; return 0; } int create_element(void *ctx, const hubbub_tag *tag, void **result) { struct content *c = ctx; struct content_html_data *html = &c->data.html; char *name = strndup((const char *) tag->name.ptr, tag->name.len); xmlNode *node = xmlNewNode(NULL, BAD_CAST name); node->_private = (void *)1; *result = node; if (html->has_ns == false) { for (size_t i = 1; i < NUM_NAMESPACES; i++) { html->ns[i] = xmlNewNs(node, BAD_CAST ns_urls[i], BAD_CAST ns_prefixes[i]); } html->has_ns = true; } xmlSetNs(node, html->ns[tag->ns]); free(name); for (size_t i = 0; i < tag->n_attributes; i++) { hubbub_attribute *attr = &tag->attributes[i]; char *name = strndup((const char *) attr->name.ptr, attr->name.len); char *value = strndup((const char *) attr->value.ptr, attr->value.len); if (attr->ns == HUBBUB_NS_NULL) { xmlNewProp(node, BAD_CAST name, BAD_CAST value); } else { xmlNewNsProp(node, html->ns[attr->ns], BAD_CAST name, BAD_CAST value); } free(name); free(value); } return 0; } int create_text(void *ctx, const hubbub_string *data, void **result) { xmlNode *node = xmlNewTextLen(BAD_CAST data->ptr, data->len); node->_private = (void *)1; *result = node; return 0; } int ref_node(void *ctx, void *node) { xmlNode *n = node; n->_private = (void *)((uintptr_t)n->_private + 1); return 0; } int unref_node(void *ctx, void *node) { xmlNode *n = node; n->_private = (void *)((uintptr_t)n->_private - 1); if (n->_private == (void *)0 && n->parent == NULL) { xmlFreeNode(n); } return 0; } int append_child(void *ctx, void *parent, void *child, void **result) { xmlNode *nparent = parent; xmlNode *nchild = child; if (nchild->type == XML_TEXT_NODE && nparent->last != NULL && nparent->last->type == XML_TEXT_NODE) { xmlNode *clone; clone_node(ctx, nchild, false, (void **) &clone); *result = xmlAddChild(parent, clone); /* node referenced by clone_node */ } else { *result = xmlAddChild(parent, child); ref_node(ctx, *result); } return 0; } /* insert 'child' before 'ref_child', under 'parent' */ int insert_before(void *ctx, void *parent, void *child, void *ref_child, void **result) { *result = xmlAddPrevSibling(ref_child, child); ref_node(ctx, *result); return 0; } int remove_child(void *ctx, void *parent, void *child, void **result) { xmlUnlinkNode(child); *result = child; ref_node(ctx, *result); return 0; } int clone_node(void *ctx, void *node, bool deep, void **result) { xmlNode *n = xmlCopyNode(node, deep ? 1 : 2); n->_private = (void *)1; *result = n; return 0; } /* Take all of the child nodes of "node" and append them to "new_parent" */ int reparent_children(void *ctx, void *node, void *new_parent) { xmlNode *n = (xmlNode *) node; xmlNode *p = (xmlNode *) new_parent; for (xmlNode *child = n->children; child != NULL; ) { xmlNode *next = child->next; xmlUnlinkNode(child); if (xmlAddChild(p, child) == NULL) return 1; child = next; } return 0; } int get_parent(void *ctx, void *node, bool element_only, void **result) { *result = ((xmlNode *)node)->parent; if (*result != NULL && element_only && ((xmlNode *) *result)->type != XML_ELEMENT_NODE) *result = NULL; if (*result != NULL) ref_node(ctx, *result); return 0; } int has_children(void *ctx, void *node, bool *result) { *result = ((xmlNode *)node)->children ? true : false; return 0; } int form_associate(void *ctx, void *form, void *node) { return 0; } int add_attributes(void *ctx, void *node, const hubbub_attribute *attributes, uint32_t n_attributes) { struct content *c = ctx; struct content_html_data *html = &c->data.html; for (size_t i = 0; i < n_attributes; i++) { const hubbub_attribute *attr = &attributes[i]; char *name = strndup((const char *) attr->name.ptr, attr->name.len); char *value = strndup((const char *) attr->value.ptr, attr->value.len); if (attr->ns == HUBBUB_NS_NULL) { xmlNewProp(node, BAD_CAST name, BAD_CAST value); } else { xmlNewNsProp(node, html->ns[attr->ns], BAD_CAST name, BAD_CAST value); } free(name); free(value); } return 0; } int set_quirks_mode(void *ctx, hubbub_quirks_mode mode) { return 0; } int change_encoding(void *ctx, const char *name) { struct content *c = ctx; struct content_html_data *html = &c->data.html; /* If we have an encoding here, it means we are *certain* */ if (html->encoding) { return 0; } /* Find the confidence otherwise (can only be from a BOM) */ uint32_t source; const char *charset = hubbub_parser_read_charset(html->parser, &source); if (source == HUBBUB_CHARSET_CONFIDENT) { html->encoding_source = ENCODING_SOURCE_DETECTED; html->encoding = (char *) charset; return 0; } /* So here we have something of confidence tentative... */ /* http://www.whatwg.org/specs/web-apps/current-work/#change */ /* 2. "If the new encoding is identical or equivalent to the encoding * that is already being used to interpret the input stream, then set * the confidence to confident and abort these steps." */ /* Whatever happens, the encoding should be set here; either for * reprocessing with a different charset, or for confirming that the * charset is in fact correct */ html->encoding = (char *) name; html->encoding_source = ENCODING_SOURCE_META; /* Equal encodings will have the same string pointers */ return (charset == name) ? 0 : 1; } /** * Talloc'd-up allocation hook for Hubbub. */ static void *html_hubbub_realloc(void *ptr, size_t len, void *pw) { return talloc_realloc_size(pw, ptr, len); } /** * Create, set up, and whatnot, a Hubbub parser instance, along with the * relevant libxml2 bits. */ static int html_create_parser(struct content *c) { struct content_html_data *html = &c->data.html; hubbub_parser_optparams param; html->parser = hubbub_parser_create(html->encoding, html_hubbub_realloc, c); if (!html->parser) return 1; html->document = xmlNewDoc(BAD_CAST "1.0"); if (!html->document) return 1; html->tree_handler = tree_handler; html->tree_handler.ctx = c; param.tree_handler = &html->tree_handler; hubbub_parser_setopt(html->parser, HUBBUB_PARSER_TREE_HANDLER, ¶m); param.document_node = html->document; hubbub_parser_setopt(html->parser, HUBBUB_PARSER_DOCUMENT_NODE, ¶m); return 0; } #endif /** * Create a CONTENT_HTML. * * The content_html_data structure is initialized and the HTML parser is * created. */ bool html_create(struct content *c, const char *params[]) { unsigned int i; struct content_html_data *html = &c->data.html; union content_msg_data msg_data; html->parser = 0; #ifdef WITH_HUBBUB html->document = 0; html->has_ns = false; memset(html->ns, 0, sizeof(html->ns)); #endif html->encoding_handler = 0; html->encoding = 0; html->getenc = true; html->base_url = c->url; html->base_target = NULL; html->layout = 0; html->background_colour = TRANSPARENT; html->stylesheet_count = 0; html->stylesheet_content = 0; html->style = 0; html->working_stylesheet = 0; html->object_count = 0; html->object = 0; html->forms = 0; html->imagemaps = 0; html->bw = 0; html->frameset = 0; html->iframe = 0; html->page = 0; html->index = 0; html->box = 0; html->font_func = &nsfont; for (i = 0; params[i]; i += 2) { if (strcasecmp(params[i], "charset") == 0) { html->encoding = talloc_strdup(c, params[i + 1]); if (!html->encoding) goto no_memory; html->encoding_source = ENCODING_SOURCE_HEADER; html->getenc = false; break; } } #ifndef WITH_HUBBUB html->parser = htmlCreatePushParserCtxt(0, 0, "", 0, 0, XML_CHAR_ENCODING_NONE); if (!html->parser) goto no_memory; #else /* Set up the parser, libxml2 document, and that */ if (html_create_parser(c) != 0) goto no_memory; #endif #ifndef WITH_HUBBUB if (html->encoding) { /* an encoding was specified in the Content-Type header */ if (!html_set_parser_encoding(c, html->encoding)) return false; } #endif return true; no_memory: msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /** * Process data for CONTENT_HTML. * * The data is parsed in chunks of size CHUNK, multitasking in between. */ bool html_process_data(struct content *c, char *data, unsigned int size) { unsigned long x; #ifndef WITH_HUBBUB if (c->data.html.getenc) { /* No encoding was specified in the Content-Type header. * Attempt to detect if the encoding is not 8-bit. If the * encoding is 8-bit, leave the parser unchanged, so that it * searches for a . */ const char *encoding; encoding = html_detect_encoding((const char **) &data, &size); if (encoding) { if (!html_set_parser_encoding(c, encoding)) return false; c->data.html.encoding = talloc_strdup(c, encoding); if (!c->data.html.encoding) return false; c->data.html.encoding_source = ENCODING_SOURCE_DETECTED; } c->data.html.getenc = false; /* The data we received may have solely consisted of a BOM. * If so, it will have been stripped by html_detect_encoding. * Therefore, we'll have nothing to do in that case. */ if (size == 0) return true; } #endif #ifdef WITH_HUBBUB hubbub_error err; #endif for (x = 0; x + CHUNK <= size; x += CHUNK) { #ifdef WITH_HUBBUB LOG(("Parsing %d bytes", CHUNK)); err = hubbub_parser_parse_chunk( c->data.html.parser, (uint8_t *) data + x, CHUNK); if (err == HUBBUB_ENCODINGCHANGE) { goto encoding_change; } #else htmlParseChunk(c->data.html.parser, data + x, CHUNK, 0); #endif gui_multitask(); } #ifdef WITH_HUBBUB LOG(("Parsing %d bytes", (size - x))); err = hubbub_parser_parse_chunk( c->data.html.parser, (uint8_t *) data + x, (size - x)); if (err == HUBBUB_ENCODINGCHANGE) { goto encoding_change; } #else htmlParseChunk(c->data.html.parser, data + x, (int) (size - x), 0); #endif #ifndef WITH_HUBBUB if (!c->data.html.encoding && c->data.html.parser->input->encoding) { /* The encoding was not in headers or detected, * and the parser found a . */ /* However, if that encoding is non-ASCII-compatible, * ignore it, as it can't possibly be correct */ if (strncasecmp((const char *) c->data.html.parser-> input->encoding, "UTF-16", 6) == 0 || /* UTF-16(LE|BE)? */ strncasecmp((const char *) c->data.html.parser-> input->encoding, "UTF-32", 6) == 0) { /* UTF-32(LE|BE)? */ c->data.html.encoding = talloc_strdup(c, "ISO-8859-1"); c->data.html.encoding_source = ENCODING_SOURCE_DETECTED; } else { c->data.html.encoding = talloc_strdup(c, (const char *) c->data.html.parser-> input->encoding); c->data.html.encoding_source = ENCODING_SOURCE_META; } if (!c->data.html.encoding) { union content_msg_data msg_data; msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /* have the encoding; don't attempt to detect it */ c->data.html.getenc = false; /* now, we must reset the parser such that it reparses * using the correct charset, and then reparse any document * source we've got. we achieve this by recreating the * parser in its entirety as this is simpler than resetting * the existing one and ensuring it's still set up correctly. */ if (c->data.html.parser->myDoc) xmlFreeDoc(c->data.html.parser->myDoc); htmlFreeParserCtxt(c->data.html.parser); c->data.html.parser = htmlCreatePushParserCtxt(0, 0, "", 0, 0, XML_CHAR_ENCODING_NONE); if (!c->data.html.parser) { union content_msg_data msg_data; msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } if (!html_set_parser_encoding(c, c->data.html.encoding)) return false; /* and reparse received document source - the recursion * is safe as we've just set c->data.html.encoding so * we'll never get back in here. */ if (!html_process_data(c, c->source_data, c->source_size)) return false; } #endif return true; #ifdef WITH_HUBBUB encoding_change: LOG(("Changing encoding")); /* Free up hubbub, libxml2 etc */ hubbub_parser_destroy(c->data.html.parser); if (c->data.html.document) { xmlFreeDoc(c->data.html.document); } c->data.html.has_ns = false; memset(c->data.html.ns, 0, sizeof(c->data.html.ns)); /* Set up the parser, libxml2 document, and that */ if (html_create_parser(c) != 0) { union content_msg_data msg_data; msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /* Recurse to reprocess all that data. This is safe because * the encoding is now specified at parser-start which means * it cannot be changed again. */ return html_process_data(c, c->source_data, c->source_size); #endif } #ifndef WITH_HUBBUB /** * Set the HTML parser character encoding. * * \param c content of type CONTENT_HTML * \param encoding name of encoding * \return true on success, false on error and error reported */ bool html_set_parser_encoding(struct content *c, const char *encoding) { struct content_html_data *html = &c->data.html; xmlError *error; char error_message[500]; union content_msg_data msg_data; html->encoding_handler = xmlFindCharEncodingHandler(encoding); if (!html->encoding_handler) { /* either out of memory, or no handler available */ /* assume no handler available, which is not a fatal error */ LOG(("no encoding handler for \"%s\"", encoding)); /* \todo warn user and ask them to install iconv? */ return true; } xmlCtxtResetLastError(html->parser); if (xmlSwitchToEncoding(html->parser, html->encoding_handler)) { error = xmlCtxtGetLastError(html->parser); snprintf(error_message, sizeof error_message, "%s xmlSwitchToEncoding(): %s", messages_get("MiscError"), error ? error->message : "failed"); msg_data.error = error_message; content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /* Dirty hack to get around libxml oddness: * 1) When creating a push parser context, the input flow's encoding * string is not set (whether an encoding is specified or not) * 2) When switching encoding (as above), the input flow's encoding * string is never changed * 3) When handling a meta charset, the input flow's encoding string * is checked to determine if an encoding has already been set. * If it has been set, then the meta charset is ignored. * * The upshot of this is that, if we don't explicitly set the input * flow's encoding string here, any meta charset in the document * will override our setting, which is incorrect behaviour. * * Ideally, this would be fixed in libxml, but that requires rather * more knowledge than I currently have of what libxml is doing. */ if (!html->parser->input->encoding) html->parser->input->encoding = xmlStrdup((const xmlChar *) encoding); /* Ensure noone else attempts to reset the encoding */ html->getenc = false; return true; } /** * Attempt to detect the encoding of some HTML data. * * \param data Pointer to HTML source data * \param size Pointer to length of data * \return a constant string giving the encoding, or 0 if the encoding * appears to be some 8-bit encoding * * If a BOM is encountered, *data and *size will be modified to skip over it */ const char *html_detect_encoding(const char **data, unsigned int *size) { const unsigned char *d = (const unsigned char *) *data; /* this detection assumes that the first two characters are <= 0xff */ if (*size < 4) return 0; if (d[0] == 0x00 && d[1] == 0x00 && d[2] == 0xfe && d[3] == 0xff) { /* BOM 00 00 fe ff */ *data += 4; *size -= 4; return "UTF-32BE"; } else if (d[0] == 0xff && d[1] == 0xfe && d[2] == 0x00 && d[3] == 0x00) { /* BOM ff fe 00 00 */ *data += 4; *size -= 4; return "UTF-32LE"; } else if (d[0] == 0x00 && d[1] != 0x00 && d[2] == 0x00 && d[3] != 0x00) /* 00 xx 00 xx */ return "UTF-16BE"; else if (d[0] != 0x00 && d[1] == 0x00 && d[2] != 0x00 && d[3] == 0x00) /* xx 00 xx 00 */ return "UTF-16LE"; else if (d[0] == 0x00 && d[1] == 0x00 && d[2] == 0x00 && d[3] != 0x00) /* 00 00 00 xx */ return "ISO-10646-UCS-4"; else if (d[0] != 0x00 && d[1] == 0x00 && d[2] == 0x00 && d[3] == 0x00) /* xx 00 00 00 */ return "ISO-10646-UCS-4"; else if (d[0] == 0xfe && d[1] == 0xff) { /* BOM fe ff */ *data += 2; *size -= 2; return "UTF-16BE"; } else if (d[0] == 0xff && d[1] == 0xfe) { /* BOM ff fe */ *data += 2; *size -= 2; return "UTF-16LE"; } else if (d[0] == 0xef && d[1] == 0xbb && d[2] == 0xbf) { /* BOM ef bb bf */ *data += 3; *size -= 3; return "UTF-8"; } return 0; } #endif /** * Convert a CONTENT_HTML for display. * * The following steps are carried out in order: * * - parsing to an XML tree is completed * - stylesheets are fetched * - the XML tree is converted to a box tree and object fetches are started * - the box tree is laid out * * On exit, the content status will be either CONTENT_STATUS_DONE if the * document is completely loaded or CONTENT_STATUS_READY if objects are still * being fetched. */ bool html_convert(struct content *c, int width, int height) { xmlDoc *document; xmlNode *html, *head; union content_msg_data msg_data; unsigned int time_before, time_taken; /* finish parsing */ if (c->source_size == 0) #ifndef WITH_HUBBUB htmlParseChunk(c->data.html.parser, empty_document, sizeof empty_document, 0); #else hubbub_parser_parse_chunk(c->data.html.parser, (uint8_t *) empty_document, sizeof empty_document); #endif #ifndef WITH_HUBBUB htmlParseChunk(c->data.html.parser, "", 0, 1); document = c->data.html.parser->myDoc; /*xmlDebugDumpDocument(stderr, c->data.html.parser->myDoc);*/ htmlFreeParserCtxt(c->data.html.parser); c->data.html.parser = 0; #else hubbub_parser_completed(c->data.html.parser); hubbub_parser_destroy(c->data.html.parser); c->data.html.parser = 0; document = c->data.html.document; /*xmlDebugDumpDocument(stderr, document);*/ #endif if (!document) { LOG(("Parsing failed")); msg_data.error = messages_get("ParsingFail"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /* locate html and head elements */ html = xmlDocGetRootElement(document); if (html == 0 || strcmp((const char *) html->name, "html") != 0) { LOG(("html element not found")); xmlFreeDoc(document); msg_data.error = messages_get("ParsingFail"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } for (head = html->children; head != 0 && head->type != XML_ELEMENT_NODE; head = head->next) ; if (head && strcmp((const char *) head->name, "head") != 0) { head = 0; LOG(("head element not found")); } if (head) { if (!html_head(c, head)) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /* handle meta refresh */ if (!html_meta_refresh(c, head)) return false; } /* get stylesheets */ if (!html_find_stylesheets(c, html, head)) return false; /* convert xml tree to box tree */ LOG(("XML to box")); content_set_status(c, messages_get("Processing")); content_broadcast(c, CONTENT_MSG_STATUS, msg_data); if (!xml_to_box(html, c)) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /*box_dump(c->data.html.layout->children, 0);*/ /*if (c->data.html.frameset) html_dump_frameset(c->data.html.frameset, 0);*/ /* extract image maps - can't do this sensibly in xml_to_box */ if (!imagemap_extract(html, c)) { LOG(("imagemap extraction failed")); msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } /*imagemap_dump(c);*/ /* XML tree not required past this point */ xmlFreeDoc(document); /* layout the box tree */ html_set_status(c, messages_get("Formatting")); content_broadcast(c, CONTENT_MSG_STATUS, msg_data); LOG(("Layout document")); time_before = wallclock(); html_reformat(c, width, height); time_taken = wallclock() - time_before; LOG(("Layout took %dcs", time_taken)); c->reformat_time = wallclock() + ((time_taken < option_min_reflow_period ? option_min_reflow_period : time_taken * 1.25)); LOG(("Scheduling relayout no sooner than %dcs", c->reformat_time - wallclock())); /*box_dump(c->data.html.layout->children, 0);*/ if (c->active == 0) c->status = CONTENT_STATUS_DONE; else c->status = CONTENT_STATUS_READY; html_set_status(c, ""); return true; } /** * Process elements in . * * \param c content structure * \param head xml node of head element * \return true on success, false on memory exhaustion * * The title and base href are extracted if present. */ bool html_head(struct content *c, xmlNode *head) { xmlNode *node; xmlChar *s; c->title = 0; for (node = head->children; node != 0; node = node->next) { if (node->type != XML_ELEMENT_NODE) continue; LOG(("Node: %s", node->name)); if (!c->title && strcmp((const char *) node->name, "title") == 0) { xmlChar *title = xmlNodeGetContent(node); char *title2; if (!title) return false; title2 = squash_whitespace((const char *) title); xmlFree(title); if (!title2) return false; c->title = talloc_strdup(c, title2); free(title2); if (!c->title) return false; } else if (strcmp((const char *) node->name, "base") == 0) { char *href = (char *) xmlGetProp(node, (const xmlChar *) "href"); if (href) { char *url; url_func_result res; res = url_normalize(href, &url); if (res == URL_FUNC_OK) { c->data.html.base_url = talloc_strdup(c, url); free(url); } xmlFree(href); } /* don't use the central values to ease freeing later on */ if ((s = xmlGetProp(node, (const xmlChar *) "target"))) { if ((!strcasecmp((const char *) s, "_blank")) || (!strcasecmp((const char *) s, "_top")) || (!strcasecmp((const char *) s, "_parent")) || (!strcasecmp((const char *) s, "_self")) || ('a' <= s[0] && s[0] <= 'z') || ('A' <= s[0] && s[0] <= 'Z')) { /* [6.16] */ c->data.html.base_target = talloc_strdup(c, (const char *) s); if (!c->data.html.base_target) { xmlFree(s); return false; } } xmlFree(s); } } } return true; } /** * Search for meta refresh * * http://wp.netscape.com/assist/net_sites/pushpull.html * * \param c content structure * \param head xml node of head element * \return true on success, false otherwise (error reported) */ bool html_meta_refresh(struct content *c, xmlNode *head) { xmlNode *n; xmlChar *equiv, *content; union content_msg_data msg_data; char *url, *end, *refresh = NULL, quote = 0; url_func_result res; for (n = head == 0 ? 0 : head->children; n; n = n->next) { if (n->type != XML_ELEMENT_NODE) continue; /* Recurse into noscript elements */ if (strcmp((const char *) n->name, "noscript") == 0) { if (!html_meta_refresh(c, n)) { /* Some error occurred */ return false; } else if (c->refresh) { /* Meta refresh found - stop */ return true; } } if (strcmp((const char *) n->name, "meta")) { continue; } equiv = xmlGetProp(n, (const xmlChar *) "http-equiv"); if (!equiv) continue; if (strcasecmp((const char *) equiv, "refresh")) { xmlFree(equiv); continue; } xmlFree(equiv); content = xmlGetProp(n, (const xmlChar *) "content"); if (!content) continue; end = (char *) content + strlen((const char *) content); /* content := *LWS 1*DIGIT *LWS [';' *LWS *1url *LWS] * url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq) * url-nq := *urlchar * url-sq := "'" *(urlchar | '"') "'" * url-dq := '"' *(urlchar | "'") '"' * urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF] */ /* *LWS 1*DIGIT */ msg_data.delay = (int)strtol((char *) content, &url, 10); /* a very small delay and self-referencing URL can cause a loop * that grinds machines to a halt. To prevent this we set a * minimum refresh delay of 1s. */ if (msg_data.delay < 1) msg_data.delay = 1; /* *LWS */ while (url < end && isspace(*url)) { url++; } /* ';' */ if (url < end && *url == ';') url++; /* *LWS */ while (url < end && isspace(*url)) { url++; } if (url == end) { /* Just delay specified, so refresh current page */ xmlFree(content); c->refresh = talloc_strdup(c, c->url); if (!c->refresh) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } content_broadcast(c, CONTENT_MSG_REFRESH, msg_data); break; } /* "url" */ if (url <= end - 3) { if (strncasecmp(url, "url", 3) == 0) { url += 3; } else { /* Unexpected input, ignore this header */ continue; } } else { /* Insufficient input, ignore this header */ continue; } /* *LWS */ while (url < end && isspace(*url)) { url++; } /* '=' */ if (url < end) { if (*url == '=') { url++; } else { /* Unexpected input, ignore this header */ continue; } } else { /* Insufficient input, ignore this header */ continue; } /* *LWS */ while (url < end && isspace(*url)) { url++; } /* '"' or "'" */ if (url < end && (*url == '"' || *url == '\'')) { quote = *url; url++; } /* Start of URL */ refresh = url; if (quote != 0) { /* url-sq | url-dq */ while (url < end && *url != quote) url++; } else { /* url-nq */ while (url < end && !isspace(*url)) url++; } /* '"' or "'" or *LWS (we don't care) */ if (url < end) *url = '\0'; res = url_join(refresh, c->data.html.base_url, &refresh); xmlFree(content); if (res == URL_FUNC_NOMEM) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } else if (res == URL_FUNC_FAILED) { /* This isn't fatal so carry on looking */ continue; } c->refresh = talloc_strdup(c, refresh); free(refresh); if (!c->refresh) { msg_data.error = messages_get("NoMemory"); content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; } content_broadcast(c, CONTENT_MSG_REFRESH, msg_data); } return true; } /** * Process inline stylesheets and fetch linked stylesheets. * * \param c content structure * \param head xml node of html element * \param head xml node of head element, or 0 if none * \return true on success, false if an error occurred */ bool html_find_stylesheets(struct content *c, xmlNode *html, xmlNode *head) { xmlNode *node; char *rel, *type, *media, *href, *url; unsigned int i = STYLESHEET_START; unsigned int last_active = 0; union content_msg_data msg_data; url_func_result res; struct content **stylesheet_content; /* stylesheet 0 is the base style sheet, * stylesheet 1 is the adblocking stylesheet, * stylesheet 2 is any