/* * Copyright 2008 Andrew Sidwell * Copyright 2008 John-Mark Bell * * This file is part of NetSurf, http://www.netsurf-browser.org/ * * NetSurf is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * NetSurf is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #ifdef WITH_HUBBUB #define _GNU_SOURCE /* for strndup */ #include #include #include #include #include #include #include #include "render/parser_binding.h" #include "utils/config.h" #include "utils/log.h" #include "utils/talloc.h" typedef struct hubbub_ctx { hubbub_parser *parser; htmlDocPtr document; bool owns_doc; const char *encoding; binding_encoding_source encoding_source; #define NUM_NAMESPACES (6) xmlNsPtr namespaces[NUM_NAMESPACES]; #undef NUM_NAMESPACES hubbub_tree_handler tree_handler; } hubbub_ctx; static struct { const char *prefix; const char *url; } namespaces[] = { { NULL, NULL }, { NULL, "http://www.w3.org/1999/xhtml" }, { "math", "http://www.w3.org/1998/Math/MathML" }, { "svg", "http://www.w3.org/2000/svg" }, { "xlink", "http://www.w3.org/1999/xlink" }, /** \todo Oh dear. LibXML2 refuses to create any namespace with a * prefix of "xml". That sucks, royally. */ { "xml", "http://www.w3.org/XML/1998/namespace" }, { "xmlns", "http://www.w3.org/2000/xmlns/" } }; static inline char *c_string_from_hubbub_string(hubbub_ctx *ctx, const hubbub_string *str); static void create_namespaces(hubbub_ctx *ctx, xmlNode *root); static int create_comment(void *ctx, const hubbub_string *data, void **result); static int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result); static int create_element(void *ctx, const hubbub_tag *tag, void **result); static int create_text(void *ctx, const hubbub_string *data, void **result); static int ref_node(void *ctx, void *node); static int unref_node(void *ctx, void *node); static int append_child(void *ctx, void *parent, void *child, void **result); static int insert_before(void *ctx, void *parent, void *child, void *ref_child, void **result); static int remove_child(void *ctx, void *parent, void *child, void **result); static int clone_node(void *ctx, void *node, bool deep, void **result); static int reparent_children(void *ctx, void *node, void *new_parent); static int get_parent(void *ctx, void *node, bool element_only, void **result); static int has_children(void *ctx, void *node, bool *result); static int form_associate(void *ctx, void *form, void *node); static int add_attributes(void *ctx, void *node, const hubbub_attribute *attributes, uint32_t n_attributes); static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode); static int change_encoding(void *ctx, const char *charset); static hubbub_tree_handler tree_handler = { create_comment, create_doctype, create_element, create_text, ref_node, unref_node, append_child, insert_before, remove_child, clone_node, reparent_children, get_parent, has_children, form_associate, add_attributes, set_quirks_mode, change_encoding, NULL }; static void *myrealloc(void *ptr, size_t len, void *pw) { return talloc_realloc_size(pw, ptr, len); } binding_error binding_create_tree(void *arena, const char *charset, void **ctx) { hubbub_ctx *c; hubbub_parser_optparams params; hubbub_error error; c = malloc(sizeof(hubbub_ctx)); if (c == NULL) return BINDING_NOMEM; c->parser = NULL; c->encoding = charset; c->encoding_source = ENCODING_SOURCE_HEADER; c->document = NULL; c->owns_doc = true; error = hubbub_parser_create(charset, true, myrealloc, arena, &c->parser); if (error != HUBBUB_OK) { free(c); if (error == HUBBUB_BADENCODING) return BINDING_BADENCODING; else return BINDING_NOMEM; /* Assume OOM */ } c->document = htmlNewDocNoDtD(NULL, NULL); if (c->document == NULL) { hubbub_parser_destroy(c->parser); free(c); return BINDING_NOMEM; } c->document->_private = (void *) 0; for (uint32_t i = 0; i < sizeof(c->namespaces) / sizeof(c->namespaces[0]); i++) { c->namespaces[i] = NULL; } c->tree_handler = tree_handler; c->tree_handler.ctx = (void *) c; params.tree_handler = &c->tree_handler; hubbub_parser_setopt(c->parser, HUBBUB_PARSER_TREE_HANDLER, ¶ms); ref_node(c, c->document); params.document_node = c->document; hubbub_parser_setopt(c->parser, HUBBUB_PARSER_DOCUMENT_NODE, ¶ms); *ctx = (void *) c; return BINDING_OK; } binding_error binding_destroy_tree(void *ctx) { hubbub_ctx *c = (hubbub_ctx *) ctx; if (ctx == NULL) return BINDING_OK; if (c->parser != NULL) hubbub_parser_destroy(c->parser); if (c->owns_doc) xmlFreeDoc(c->document); c->parser = NULL; c->encoding = NULL; c->document = NULL; free(c); return BINDING_OK; } binding_error binding_parse_chunk(void *ctx, const uint8_t *data, size_t len) { hubbub_ctx *c = (hubbub_ctx *) ctx; hubbub_error err; err = hubbub_parser_parse_chunk(c->parser, (uint8_t *) data, len); if (err == HUBBUB_ENCODINGCHANGE) return BINDING_ENCODINGCHANGE; return BINDING_OK; } binding_error binding_parse_completed(void *ctx) { hubbub_ctx *c = (hubbub_ctx *) ctx; hubbub_error error; error = hubbub_parser_completed(c->parser); /** \todo error handling */ return BINDING_OK; } const char *binding_get_encoding(void *ctx, binding_encoding_source *source) { hubbub_ctx *c = (hubbub_ctx *) ctx; *source = c->encoding_source; return c->encoding; } xmlDocPtr binding_get_document(void *ctx) { hubbub_ctx *c = (hubbub_ctx *) ctx; xmlDocPtr doc = c->document; c->owns_doc = false; return doc; } /*****************************************************************************/ char *c_string_from_hubbub_string(hubbub_ctx *ctx, const hubbub_string *str) { return strndup((const char *) str->ptr, (int) str->len); } void create_namespaces(hubbub_ctx *ctx, xmlNode *root) { for (uint32_t i = 1; i < sizeof(namespaces) / sizeof(namespaces[0]); i++) { ctx->namespaces[i - 1] = xmlNewNs(root, BAD_CAST namespaces[i].url, BAD_CAST namespaces[i].prefix); if (ctx->namespaces[i - 1] == NULL) { LOG(("Failed creating namespace %s\n", namespaces[i].prefix)); } } } int create_comment(void *ctx, const hubbub_string *data, void **result) { hubbub_ctx *c = (hubbub_ctx *) ctx; char *content; xmlNodePtr n; content = c_string_from_hubbub_string(c, data); if (content == NULL) return 1; n = xmlNewDocComment(c->document, BAD_CAST content); if (n == NULL) { free(content); return 1; } n->_private = (void *) (uintptr_t) 1; free(content); *result = (void *) n; return 0; } int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result) { hubbub_ctx *c = (hubbub_ctx *) ctx; char *name, *public = NULL, *system = NULL; xmlDtdPtr n; name = c_string_from_hubbub_string(c, &doctype->name); if (name == NULL) return 1; if (!doctype->public_missing) { public = c_string_from_hubbub_string(c, &doctype->public_id); if (public == NULL) { free(name); return 1; } } if (!doctype->system_missing) { system = c_string_from_hubbub_string(c, &doctype->system_id); if (system == NULL) { free(public); free(name); return 1; } } n = xmlNewDtd(c->document, BAD_CAST name, BAD_CAST (public ? public : ""), BAD_CAST (system ? system : "")); if (n == NULL) { free(system); free(public); free(name); return 1; } n->_private = (void *) (uintptr_t) 1; *result = (void *) n; free(system); free(public); free(name); return 0; } int create_element(void *ctx, const hubbub_tag *tag, void **result) { hubbub_ctx *c = (hubbub_ctx *) ctx; char *name; xmlNodePtr n; name = c_string_from_hubbub_string(c, &tag->name); if (name == NULL) return 1; if (c->namespaces[0] != NULL) { n = xmlNewDocNode(c->document, c->namespaces[tag->ns - 1], BAD_CAST name, NULL); } else { n = xmlNewDocNode(c->document, NULL, BAD_CAST name, NULL); /* We're creating the root node of the document. Therefore, * create the namespaces and set this node's namespace */ if (n != NULL && c->namespaces[0] == NULL) { create_namespaces(c, (void *) n); xmlSetNs(n, c->namespaces[tag->ns - 1]); } } if (n == NULL) { free(name); return 1; } n->_private = (void *) (uintptr_t) 1; if (tag->n_attributes > 0 && add_attributes(ctx, (void *) n, tag->attributes, tag->n_attributes) != 0) { xmlFreeNode(n); free(name); return 1; } *result = (void *) n; free(name); return 0; } int create_text(void *ctx, const hubbub_string *data, void **result) { hubbub_ctx *c = (hubbub_ctx *) ctx; xmlNodePtr n; n = xmlNewDocTextLen(c->document, BAD_CAST data->ptr, (int) data->len); if (n == NULL) { return 1; } n->_private = (void *) (uintptr_t) 1; *result = (void *) n; return 0; } int ref_node(void *ctx, void *node) { hubbub_ctx *c = (hubbub_ctx *) ctx; if (node == c->document) { xmlDoc *n = (xmlDoc *) node; uintptr_t count = (uintptr_t) n->_private; n->_private = (void *) ++count; } else { xmlNode *n = (xmlNode *) node; uintptr_t count = (uintptr_t) n->_private; n->_private = (void *) ++count; } return 0; } int unref_node(void *ctx, void *node) { hubbub_ctx *c = (hubbub_ctx *) ctx; if (node == c->document) { xmlDoc *n = (xmlDoc *) node; uintptr_t count = (uintptr_t) n->_private; assert(count != 0 && "Node has refcount of zero"); n->_private = (void *) --count; } else { xmlNode *n = (xmlNode *) node; uintptr_t count = (uintptr_t) n->_private; assert(count != 0 && "Node has refcount of zero"); n->_private = (void *) --count; if (count == 0 && n->parent == NULL) { xmlFreeNode(n); } } return 0; } int append_child(void *ctx, void *parent, void *child, void **result) { xmlNode *chld = (xmlNode *) child; xmlNode *p = (xmlNode *) parent; if (chld->type == XML_TEXT_NODE && p->last != NULL && p->last->type == XML_TEXT_NODE) { /* Need to clone the child, as libxml will free it if it * merges the content with a pre-existing text node. */ chld = xmlCopyNode(chld, 0); if (chld == NULL) return 1; *result = xmlAddChild(p, chld); assert(*result != (void *) chld); } else { *result = xmlAddChild(p, chld); } if (*result == NULL) return 1; ref_node(ctx, *result); return 0; } int insert_before(void *ctx, void *parent, void *child, void *ref_child, void **result) { xmlNode *chld = (xmlNode *) child; xmlNode *ref = (xmlNode *) ref_child; if (chld->type == XML_TEXT_NODE && ref->prev != NULL && ref->prev->type == XML_TEXT_NODE) { /* Clone text node, as it'll be freed by libxml */ chld = xmlCopyNode(chld, 0); if (chld == NULL) return 1; *result = xmlAddNextSibling(ref->prev, chld); assert(*result != (void *) chld); } else { *result = xmlAddPrevSibling(ref, chld); } if (*result == NULL) return 1; ref_node(ctx, *result); return 0; } int remove_child(void *ctx, void *parent, void *child, void **result) { xmlNode *chld = (xmlNode *) child; xmlUnlinkNode(chld); *result = child; ref_node(ctx, *result); return 0; } int clone_node(void *ctx, void *node, bool deep, void **result) { xmlNode *n = (xmlNode *) node; *result = xmlCopyNode(n, deep ? 1 : 2); if (*result == NULL) return 1; ((xmlNode *)(*result))->_private = (void *) (uintptr_t) 1; return 0; } int reparent_children(void *ctx, void *node, void *new_parent) { xmlNode *n = (xmlNode *) node; xmlNode *p = (xmlNode *) new_parent; for (xmlNode *child = n->children; child != NULL; ) { xmlNode *next = child->next; xmlUnlinkNode(child); if (xmlAddChild(p, child) == NULL) return 1; child = next; } return 0; } int get_parent(void *ctx, void *node, bool element_only, void **result) { xmlNode *n = (xmlNode *) node; *result = (void *) n->parent; if (*result != NULL && element_only && ((xmlNode *) *result)->type != XML_ELEMENT_NODE) { *result = NULL; } if (*result != NULL) ref_node(ctx, *result); return 0; } int has_children(void *ctx, void *node, bool *result) { xmlNode *n = (xmlNode *) node; *result = n->children != NULL; return 0; } int form_associate(void *ctx, void *form, void *node) { return 0; } int add_attributes(void *ctx, void *node, const hubbub_attribute *attributes, uint32_t n_attributes) { hubbub_ctx *c = (hubbub_ctx *) ctx; xmlNode *n = (xmlNode *) node; for (uint32_t attr = 0; attr < n_attributes; attr++) { xmlAttr *prop; char *name, *value; name = c_string_from_hubbub_string(c, &attributes[attr].name); if (name == NULL) return 1; value = c_string_from_hubbub_string(c, &attributes[attr].value); if (value == NULL) { free(name); return 1; } if (attributes[attr].ns != HUBBUB_NS_NULL && c->namespaces[0] != NULL) { prop = xmlNewNsProp(n, c->namespaces[attributes[attr].ns - 1], BAD_CAST name, BAD_CAST value); } else { prop = xmlNewProp(n, BAD_CAST name, BAD_CAST value); } if (prop == NULL) { free(value); free(name); return 1; } free(value); free(name); } return 0; } int set_quirks_mode(void *ctx, hubbub_quirks_mode mode) { return 0; } int change_encoding(void *ctx, const char *charset) { hubbub_ctx *c = (hubbub_ctx *) ctx; /* If we have an encoding here, it means we are *certain* */ if (c->encoding != NULL) { return 0; } /* Find the confidence otherwise (can only be from a BOM) */ uint32_t source; const char *name = hubbub_parser_read_charset(c->parser, &source); if (source == HUBBUB_CHARSET_CONFIDENT) { c->encoding_source = ENCODING_SOURCE_DETECTED; c->encoding = (char *) charset; return 0; } /* So here we have something of confidence tentative... */ /* http://www.whatwg.org/specs/web-apps/current-work/#change */ /* 2. "If the new encoding is identical or equivalent to the encoding * that is already being used to interpret the input stream, then set * the confidence to confident and abort these steps." */ /* Whatever happens, the encoding should be set here; either for * reprocessing with a different charset, or for confirming that the * charset is in fact correct */ c->encoding = charset; c->encoding_source = ENCODING_SOURCE_META; /* Equal encodings will have the same string pointers */ return (charset == name) ? 0 : 1; } #endif