summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2009-03-05 11:00:00 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2009-03-05 11:00:00 +0000
commit2a8f109871049a5b2376c95e91eeaed4172f89c2 (patch)
treee8264a3440a4eb921b2621a21ff325db6ba121bb
parent44e5b337b4c26216e2e85fca0e9ccabed58d8564 (diff)
downloadlibdom-2a8f109871049a5b2376c95e91eeaed4172f89c2.tar.gz
libdom-2a8f109871049a5b2376c95e91eeaed4172f89c2.tar.bz2
Make hubbub parser binding build trees correctly (credit: Bo Yang)
svn path=/trunk/dom/; revision=6711
-rw-r--r--bindings/hubbub/parser.c723
-rw-r--r--bindings/hubbub/parser.h33
-rw-r--r--include/dom/dom.h14
-rw-r--r--src/core/string.c8
-rw-r--r--src/utils/namespace.c91
-rw-r--r--test/lib/testobject.c2
6 files changed, 731 insertions, 140 deletions
diff --git a/bindings/hubbub/parser.c b/bindings/hubbub/parser.c
index 7b5e6ab..0ad4176 100644
--- a/bindings/hubbub/parser.c
+++ b/bindings/hubbub/parser.c
@@ -3,6 +3,7 @@
* Licensed under the MIT License,
* http://www.opensource.org/licenses/mit-license.php
* Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
*/
#include <stdio.h>
@@ -16,13 +17,19 @@
#include "utils.h"
/**
- * libdom Hubbub parser object
+ * libdom Hubbub parser context
*/
struct dom_hubbub_parser {
hubbub_parser *parser; /**< Hubbub parser instance */
+ hubbub_tree_handler tree_handler;
+ /**< Hubbub parser tree handler */
struct dom_document *doc; /**< DOM Document we're building */
+ dom_hubbub_encoding_source encoding_source;
+ /**< The document's encoding source */
+ const char *encoding; /**< The document's encoding */
+
bool complete; /**< Indicate stream completion */
struct dom_implementation *impl;/**< DOM implementation */
@@ -34,39 +41,83 @@ struct dom_hubbub_parser {
void *mctx; /**< Pointer to client data */
};
-static hubbub_error __dom_hubbub_token_handler(const hubbub_token *token,
- void *pw);
-static bool __initialised;
+/* The callbacks declarations */
+static int create_comment(void *parser, const hubbub_string *data,
+ void **result);
+static int create_doctype(void *parser, const hubbub_doctype *doctype,
+ void **result);
+static int create_element(void *parser, const hubbub_tag *tag, void **result);
+static int create_text(void *parser, const hubbub_string *data,
+ void **result);
+static int ref_node(void *parser, void *node);
+static int unref_node(void *parser, void *node);
+static int append_child(void *parser, void *parent, void *child,
+ void **result);
+static int insert_before(void *parser, void *parent, void *child,
+ void *ref_child, void **result);
+static int remove_child(void *parser, void *parent, void *child,
+ void **result);
+static int clone_node(void *parser, void *node, bool deep, void **result);
+static int reparent_children(void *parser, void *node, void *new_parent);
+static int get_parent(void *parser, void *node, bool element_only,
+ void **result);
+static int has_children(void *parser, void *node, bool *result);
+static int form_associate(void *parser, void *form, void *node);
+static int add_attributes(void *parser, void *node,
+ const hubbub_attribute *attributes, uint32_t n_attributes);
+static int set_quirks_mode(void *parser, hubbub_quirks_mode mode);
+static int change_encoding(void *parser, const char *charset);
+
+static hubbub_tree_handler tree_handler = {
+ create_comment,
+ create_doctype,
+ create_element,
+ create_text,
+ ref_node,
+ unref_node,
+ append_child,
+ insert_before,
+ remove_child,
+ clone_node,
+ reparent_children,
+ get_parent,
+ has_children,
+ form_associate,
+ add_attributes,
+ set_quirks_mode,
+ change_encoding,
+ NULL
+};
+
+static bool __initialised = false;
/**
* Create a Hubbub parser instance
*
* \param aliases Path to encoding alias mapping file
* \param enc Source charset, or NULL
- * \param int_enc Desired charset of document buffer (UTF-8 or UTF-16)
+ * \param fix_enc Whether fix the encoding
* \param alloc Memory (de)allocation function
* \param pw Pointer to client-specific private data
* \param msg Informational message function
* \param mctx Pointer to client-specific private data
* \return Pointer to instance, or NULL on memory exhaustion
*/
-dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
- const char *enc, const char *int_enc,
+dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
+ const char *enc, bool fix_enc,
dom_alloc alloc, void *pw, dom_msg msg, void *mctx)
{
dom_hubbub_parser *parser;
hubbub_parser_optparams params;
- struct dom_string *features;
+ hubbub_error error;
dom_exception err;
- hubbub_error e;
-
- UNUSED(int_enc);
+ struct dom_string *features;
if (__initialised == false) {
- e = hubbub_initialise(aliases, (hubbub_alloc) alloc, pw);
- if (e != HUBBUB_OK) {
- msg(DOM_MSG_ERROR, mctx,
+ error = hubbub_initialise(aliases, (hubbub_alloc) alloc, pw);
+ if (error != HUBBUB_OK) {
+ msg(DOM_MSG_ERROR, mctx,
"Failed initialising hubbub");
return NULL;
}
@@ -74,36 +125,33 @@ dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
__initialised = true;
}
+
parser = alloc(NULL, sizeof(dom_hubbub_parser), pw);
if (parser == NULL) {
- msg(DOM_MSG_CRITICAL, mctx, "No memory for parser");
+ msg(DOM_MSG_CRITICAL, mctx, "No memory for parsing context");
return NULL;
}
- e = hubbub_parser_create(enc, true, (hubbub_alloc) alloc, pw,
- &parser->parser);
- if (e != HUBBUB_OK) {
- alloc(parser, 0, pw);
- msg(DOM_MSG_CRITICAL, mctx, "Failed to create hubbub parser");
- return NULL;
- }
+ parser->parser = NULL;
+ parser->doc = NULL;
+ parser->encoding = enc;
+ parser->encoding_source = enc != NULL ? ENCODING_SOURCE_HEADER
+ : ENCODING_SOURCE_DETECTED;
+ parser->complete = false;
+ parser->impl = NULL;
- params.token_handler.handler = __dom_hubbub_token_handler;
- params.token_handler.pw = parser;
- e = hubbub_parser_setopt(parser->parser, HUBBUB_PARSER_TOKEN_HANDLER,
- &params);
- if (e != HUBBUB_OK) {
- hubbub_parser_destroy(parser->parser);
- alloc(parser, 0, pw);
- msg(DOM_MSG_CRITICAL, mctx,
- "Failed registering hubbub token handler");
+ parser->alloc = alloc;
+ parser->pw = pw;
+ parser->msg = msg;
+ parser->mctx = mctx;
+
+ error = hubbub_parser_create(enc, fix_enc, alloc, pw, &parser->parser);
+ if (error != HUBBUB_OK) {
+ parser->alloc(parser, 0, parser->pw);
+ msg(DOM_MSG_CRITICAL, mctx, "Can't create parser");
return NULL;
}
- parser->doc = NULL;
-
- parser->complete = false;
-
/* Get DOM implementation */
/* Create string representation of the features we want */
err = dom_string_create(alloc, pw,
@@ -115,7 +163,7 @@ dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
return NULL;
}
- /* Now, try to get an appropriate implementation from the registry */
+ /* Now, try to get an appropriate implementation from the registry */
err = dom_implregistry_get_dom_implementation(features,
&parser->impl, alloc, pw);
if (err != DOM_NO_ERR) {
@@ -129,11 +177,26 @@ dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
/* no longer need the features string */
dom_string_unref(features);
- parser->alloc = alloc;
- parser->pw = pw;
+ err = dom_implementation_create_document(parser->impl, NULL, NULL, NULL,
+ &parser->doc, alloc, pw);
+ if (err != DOM_NO_ERR) {
+ hubbub_parser_destroy(parser->parser);
+ alloc(parser, 0, pw);
+ msg(DOM_MSG_ERROR, mctx, "Can't create DOM document");
+ return NULL;
+ }
- parser->msg = msg;
- parser->mctx = mctx;
+ parser->tree_handler = tree_handler;
+ parser->tree_handler.ctx = (void *) parser;
+
+ params.tree_handler = &parser->tree_handler;
+ hubbub_parser_setopt(parser->parser, HUBBUB_PARSER_TREE_HANDLER,
+ &params);
+
+ dom_node_ref((struct dom_node *) parser->doc);
+ params.document_node = parser->doc;
+ hubbub_parser_setopt(parser->parser, HUBBUB_PARSER_DOCUMENT_NODE,
+ &params);
return parser;
}
@@ -142,31 +205,29 @@ dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
void dom_hubbub_parser_destroy(dom_hubbub_parser *parser)
{
dom_implementation_unref(parser->impl);
-
hubbub_parser_destroy(parser->parser);
+ parser->parser = NULL;
- /** \todo do we want to clean up the document here too? */
+ if (parser->doc != NULL) {
+ dom_node_unref((struct dom_node *) parser->doc);
+ parser->doc = NULL;
+ }
parser->alloc(parser, 0, parser->pw);
}
-/* Parse a chunk of data */
dom_hubbub_error dom_hubbub_parser_parse_chunk(dom_hubbub_parser *parser,
uint8_t *data, size_t len)
{
hubbub_error err;
err = hubbub_parser_parse_chunk(parser->parser, data, len);
- if (err != HUBBUB_OK) {
- parser->msg(DOM_MSG_ERROR, parser->mctx,
- "hubbub_parser_parse_chunk failed: %d", err);
+ if (err != HUBBUB_OK)
return DOM_HUBBUB_HUBBUB_ERR | err;
- }
return DOM_HUBBUB_OK;
}
-/* Notify parser that datastream is empty */
dom_hubbub_error dom_hubbub_parser_completed(dom_hubbub_parser *parser)
{
hubbub_error err;
@@ -183,73 +244,523 @@ dom_hubbub_error dom_hubbub_parser_completed(dom_hubbub_parser *parser)
return DOM_HUBBUB_OK;
}
-/* Retrieve the created DOM Document */
struct dom_document *dom_hubbub_parser_get_document(dom_hubbub_parser *parser)
{
- return (parser->complete ? parser->doc : NULL);
+ struct dom_document *doc = NULL;
+
+ if (parser->complete) {
+ doc = parser->doc;
+ parser->doc = NULL;
+ }
+
+ return doc;
}
-hubbub_error __dom_hubbub_token_handler(const hubbub_token *token, void *pw)
+const char *dom_hubbub_parser_get_encoding(dom_hubbub_parser *parser,
+ dom_hubbub_encoding_source *source)
{
- dom_hubbub_parser *parser = (dom_hubbub_parser *) pw;
- static const char *token_names[] = {
- "DOCTYPE", "START TAG", "END TAG",
- "COMMENT", "CHARACTERS", "EOF"
- };
- size_t i;
+ *source = parser->encoding_source;
+
+ return parser->encoding != NULL ? parser->encoding
+ : "Windows-1252";
+}
+
+/* The callbacks definitions */
+static int create_comment(void *parser, const hubbub_string *data,
+ void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ struct dom_string *str;
+ struct dom_comment *comment;
+
+ *result = NULL;
+
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw, data->ptr,
+ data->len, &str);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create comment node text");
+ return 1;
+ }
+
+ err = dom_document_create_comment(dom_parser->doc, str, &comment);
+ if (err != DOM_NO_ERR) {
+ dom_string_unref(str);
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create comment node with text '%.*s'",
+ data->len, data->ptr);
+ return 1;
+ }
+
+ *result = comment;
+
+ dom_string_unref(str);
+
+ return 0;
+}
+
+static int create_doctype(void *parser, const hubbub_doctype *doctype,
+ void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ struct dom_string *qname, *public_id = NULL, *system_id = NULL;
+ struct dom_document_type *dtype;
+
+ *result = NULL;
+
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ doctype->name.ptr, doctype->name.len, &qname);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create doctype name");
+ goto fail;
+ }
+
+ if (doctype->public_missing == false) {
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ doctype->public_id.ptr,
+ doctype->public_id.len, &public_id);
+ } else {
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ NULL, 0, &public_id);
+ }
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create doctype public id");
+ goto clean1;
+ }
+
+ if (doctype->system_missing == false) {
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ doctype->system_id.ptr,
+ doctype->system_id.len, &system_id);
+ } else {
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ NULL, 0, &system_id);
+ }
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create doctype system id");
+ goto clean2;
+ }
+
+ err = dom_implementation_create_document_type(dom_parser->impl, qname,
+ public_id, system_id, &dtype, dom_parser->alloc,
+ dom_parser->pw);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create the document type");
+ goto clean3;
+ }
+
+ *result = dtype;
+
+clean3:
+ dom_string_unref(system_id);
+
+clean2:
+ dom_string_unref(public_id);
+
+clean1:
+ dom_string_unref(qname);
+
+fail:
+ if (*result == NULL)
+ return 1;
+ else
+ return 0;
+}
+
+static int create_element(void *parser, const hubbub_tag *tag, void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ struct dom_string *name;
+ struct dom_element *element = NULL;
+
+ *result = NULL;
+
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ tag->name.ptr, tag->name.len, &name);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create element name");
+ goto fail;
+ }
+
+ if (tag->ns == HUBBUB_NS_NULL) {
+ err = dom_document_create_element(dom_parser->doc, name,
+ &element);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create the DOM element");
+ goto clean1;
+ }
+ } else {
+ err = dom_document_create_element_ns(dom_parser->doc,
+ dom_namespaces[tag->ns], name, &element);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create the DOM element");
+ goto clean1;
+ }
+ }
+
+ *result = element;
+clean1:
+ dom_string_unref(name);
+
+fail:
+ if (*result == NULL)
+ return 1;
+ else
+ return 0;
+}
+
+static int create_text(void *parser, const hubbub_string *data, void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ struct dom_string *str;
+ struct dom_text *text = NULL;
+
+ *result = NULL;
+
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw, data->ptr,
+ data->len, &str);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create text '%.*s'", data->len,
+ data->ptr);
+ goto fail;
+ }
+
+ err = dom_document_create_text_node(dom_parser->doc, str, &text);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create the DOM text node");
+ goto clean1;
+ }
+
+ *result = text;
+clean1:
+ dom_string_unref(str);
+fail:
+ if (*result == NULL)
+ return 1;
+ else
+ return 0;
+
+}
+
+static int ref_node(void *parser, void *node)
+{
+ struct dom_node *dnode = (struct dom_node *) node;
+
+ UNUSED(parser);
+
+ dom_node_ref(dnode);
+
+ return 0;
+}
+
+static int unref_node(void *parser, void *node)
+{
+ struct dom_node *dnode = (struct dom_node *) node;
+
+ UNUSED(parser);
+
+ dom_node_unref(dnode);
+
+ return 0;
+}
+
+static int append_child(void *parser, void *parent, void *child, void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+
+ err = dom_node_append_child((struct dom_node *) parent,
+ (struct dom_node *) child,
+ (struct dom_node **) result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't append child '%p' for parent '%p'",
+ child, parent);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int insert_before(void *parser, void *parent, void *child,
+ void *ref_child, void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+
+ err = dom_node_insert_before((struct dom_node *) parent,
+ (struct dom_node *) child,
+ (struct dom_node *) ref_child,
+ (struct dom_node **) result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't insert node '%p' before node '%p'",
+ child, ref_child);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int remove_child(void *parser, void *parent, void *child, void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+
+ err = dom_node_remove_child((struct dom_node *) parent,
+ (struct dom_node *) child,
+ (struct dom_node **) result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't remove child '%p'", child);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int clone_node(void *parser, void *node, bool deep, void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+
+ err = dom_node_clone_node((struct dom_node *) node, deep,
+ (struct dom_node **) result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't clone node '%p'", node);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int reparent_children(void *parser, void *node, void *new_parent)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ struct dom_node *child, *result;
+
+ while(true) {
+ err = dom_node_get_first_child((struct dom_node *) node,
+ &child);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Error in dom_note_get_first_child");
+ return 1;
+ }
+ if (child == NULL)
+ break;
+
+ err = dom_node_remove_child(node, (struct dom_node *) child,
+ &result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Error in dom_node_remove_child");
+ goto fail;
+ }
+ dom_node_unref(result);
+
+ err = dom_node_append_child((struct dom_node *) new_parent,
+ (struct dom_node *) child, &result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Error in dom_node_append_child");
+ goto fail;
+ }
+ dom_node_unref(result);
+ dom_node_unref(child);
+ }
+ return 0;
+
+fail:
+ dom_node_unref(child);
+ return 1;
+}
+
+static int get_parent(void *parser, void *node, bool element_only,
+ void **result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ struct dom_node *parent;
+ dom_node_type type = DOM_NODE_TYPE_COUNT;
+
+ err = dom_node_get_parent_node((struct dom_node *) node,
+ &parent);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Error in dom_node_get_parent");
+ return 1;
+ }
+ if (element_only == false) {
+ *result = parent;
+ return 0;
+ }
+
+ err = dom_node_get_node_type(parent, &type);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Error in dom_node_get_type");
+ goto fail;
+ }
+ if (type == DOM_ELEMENT_NODE) {
+ *result = parent;
+ return 0;
+ } else {
+ *result = NULL;
+ dom_node_unref(parent);
+ return 0;
+ }
+
+fail:
+ dom_node_unref(parent);
+ return 1;
+}
+
+static int has_children(void *parser, void *node, bool *result)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+
+ UNUSED(parser);
+
+ err = dom_node_has_child_nodes((struct dom_node *) node, result);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Error in dom_node_has_child_nodes");
+ return 1;
+ }
+ return 0;
+}
+
+static int form_associate(void *parser, void *form, void *node)
+{
UNUSED(parser);
+ UNUSED(form);
+ UNUSED(node);
- printf("%s: ", token_names[token->type]);
-
- switch (token->type) {
- case HUBBUB_TOKEN_DOCTYPE:
- printf("'%.*s' (%svalid)\n",
- (int) token->data.doctype.name.len,
- token->data.doctype.name.ptr,
- token->data.doctype.force_quirks ? "in" : "");
- break;
- case HUBBUB_TOKEN_START_TAG:
- printf("'%.*s' %s\n",
- (int) token->data.tag.name.len,
- token->data.tag.name.ptr,
- (token->data.tag.n_attributes > 0) ?
- "attributes:" : "");
- for (i = 0; i < token->data.tag.n_attributes; i++) {
- printf("\t'%.*s' = '%.*s'\n",
- (int) token->data.tag.attributes[i].name.len,
- token->data.tag.attributes[i].name.ptr,
- (int) token->data.tag.attributes[i].value.len,
- token->data.tag.attributes[i].value.ptr);
+ return 0;
+}
+
+static int add_attributes(void *parser, void *node,
+ const hubbub_attribute *attributes, uint32_t n_attributes)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ dom_exception err;
+ uint32_t i;
+
+ for (i = 0; i < n_attributes; i++) {
+ struct dom_string *name, *value;
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ attributes[i].name.ptr,
+ attributes[i].name.len, &name);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create attribute name");
+ goto fail;
}
- break;
- case HUBBUB_TOKEN_END_TAG:
- printf("'%.*s' %s\n",
- (int) token->data.tag.name.len,
- token->data.tag.name.ptr,
- (token->data.tag.n_attributes > 0) ?
- "attributes:" : "");
- for (i = 0; i < token->data.tag.n_attributes; i++) {
- printf("\t'%.*s' = '%.*s'\n",
- (int) token->data.tag.attributes[i].name.len,
- token->data.tag.attributes[i].name.ptr,
- (int) token->data.tag.attributes[i].value.len,
- token->data.tag.attributes[i].value.ptr);
+
+ err = dom_string_create(dom_parser->alloc, dom_parser->pw,
+ attributes[i].value.ptr,
+ attributes[i].value.len, &value);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL, dom_parser->mctx,
+ "Can't create attribute value");
+ dom_string_unref(name);
+ goto fail;
+ }
+
+ if (attributes[i].ns == HUBBUB_NS_NULL) {
+ err = dom_element_set_attribute(
+ (struct dom_element *) node, name,
+ value);
+ dom_string_unref(name);
+ dom_string_unref(value);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL,
+ dom_parser->mctx,
+ "Can't add attribute");
+ goto fail;
+ }
+ } else {
+ err = dom_element_set_attribute_ns(
+ (struct dom_element *) node,
+ dom_namespaces[attributes[i].ns], name,
+ value);
+ dom_string_unref(name);
+ dom_string_unref(value);
+ if (err != DOM_NO_ERR) {
+ dom_parser->msg(DOM_MSG_CRITICAL,
+ dom_parser->mctx,
+ "Can't add attribute ns");
+ goto fail;
+ }
}
- break;
- case HUBBUB_TOKEN_COMMENT:
- printf("'%.*s'\n", (int) token->data.comment.len,
- token->data.comment.ptr);
- break;
- case HUBBUB_TOKEN_CHARACTER:
- printf("'%.*s'\n", (int) token->data.character.len,
- token->data.character.ptr);
- break;
- case HUBBUB_TOKEN_EOF:
- printf("\n");
- break;
- }
-
- return HUBBUB_OK;
+ }
+
+ return 0;
+
+fail:
+ return 1;
+}
+
+static int set_quirks_mode(void *parser, hubbub_quirks_mode mode)
+{
+ UNUSED(parser);
+ UNUSED(mode);
+
+ return 0;
+}
+
+static int change_encoding(void *parser, const char *charset)
+{
+ dom_hubbub_parser *dom_parser = (dom_hubbub_parser *) parser;
+ uint32_t source;
+ const char *name;
+
+ /* If we have an encoding here, it means we are *certain* */
+ if (dom_parser->encoding != NULL) {
+ return 0;
+ }
+
+ /* Find the confidence otherwise (can only be from a BOM) */
+ name = hubbub_parser_read_charset(dom_parser->parser, &source);
+
+ if (source == HUBBUB_CHARSET_CONFIDENT) {
+ dom_parser->encoding_source = ENCODING_SOURCE_DETECTED;
+ dom_parser->encoding = (char *) charset;
+ return 0;
+ }
+
+ /* So here we have something of confidence tentative... */
+ /* http://www.whatwg.org/specs/web-apps/current-work/#change */
+
+ /* 2. "If the new encoding is identical or equivalent to the encoding
+ * that is already being used to interpret the input stream, then set
+ * the confidence to confident and abort these steps." */
+
+ /* Whatever happens, the encoding should be set here; either for
+ * reprocessing with a different charset, or for confirming that the
+ * charset is in fact correct */
+ dom_parser->encoding = charset;
+ dom_parser->encoding_source = ENCODING_SOURCE_META;
+
+ /* Equal encodings will have the same string pointers */
+ return (charset == name) ? 0 : 1;
}
diff --git a/bindings/hubbub/parser.h b/bindings/hubbub/parser.h
index f4c2ac4..7e28916 100644
--- a/bindings/hubbub/parser.h
+++ b/bindings/hubbub/parser.h
@@ -19,9 +19,37 @@ struct dom_document;
typedef struct dom_hubbub_parser dom_hubbub_parser;
+/* The encoding source of the document */
+typedef enum dom_hubub_encoding_source {
+ ENCODING_SOURCE_HEADER,
+ ENCODING_SOURCE_DETECTED,
+ ENCODING_SOURCE_META
+} dom_hubbub_encoding_source;
+
+/* The recommended way to use the parser is:
+ *
+ * dom_hubbub_parser_create(...);
+ * dom_hubbub_parser_parse_chunk(...);
+ * call _parse_chunk for all chunks of data
+ *
+ * After you have parsed the data,
+ *
+ * dom_hubbub_parser_completed(...);
+ * dom_bubbub_parser_get_document(...);
+ * dom_hubbub_parser_destroy(...);
+ *
+ * Clients must ensure that the last 3 function calls above are called in
+ * the order shown. dom_hubbub_parser_get_document() will pass the ownership
+ * of the document to the client. After that, the parser should be destroyed.
+ * The client must not call any method of this parser after destruction.
+ *
+ * The client must call dom_hubbub_parser_completed() before calling
+ * dom_hubbub_parser_get_document().
+ */
+
/* Create a Hubbub parser instance */
dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases,
- const char *enc, const char *int_enc,
+ const char *enc, bool fix_enc,
dom_alloc alloc, void *pw, dom_msg msg, void *mctx);
/* Destroy a Hubbub parser instance */
@@ -37,4 +65,7 @@ dom_hubbub_error dom_hubbub_parser_completed(dom_hubbub_parser *parser);
/* Retrieve the created DOM Document */
struct dom_document *dom_hubbub_parser_get_document(dom_hubbub_parser *parser);
+/* Retrieve the document's encoding */
+const char *dom_hubbub_parser_get_encoding(dom_hubbub_parser *parser,
+ dom_hubbub_encoding_source *source);
#endif
diff --git a/include/dom/dom.h b/include/dom/dom.h
index 4d17a3f..fd8b9d8 100644
--- a/include/dom/dom.h
+++ b/include/dom/dom.h
@@ -35,4 +35,18 @@
#include <dom/core/string.h>
#include <dom/core/text.h>
+typedef enum dom_namespace {
+ DOM_NAMESPACE_NULL = 0,
+ DOM_NAMESPACE_HTML = 1,
+ DOM_NAMESPACE_MATHML = 2,
+ DOM_NAMESPACE_SVG = 3,
+ DOM_NAMESPACE_XLINK = 4,
+ DOM_NAMESPACE_XML = 5,
+ DOM_NAMESPACE_XMLNS = 6,
+
+ DOM_NAMESPACE_COUNT = 7
+} dom_namespace;
+
+extern struct dom_string *dom_namespaces[DOM_NAMESPACE_COUNT];
+
#endif
diff --git a/src/core/string.c b/src/core/string.c
index 2540e26..3d30e3f 100644
--- a/src/core/string.c
+++ b/src/core/string.c
@@ -89,6 +89,14 @@ dom_exception dom_string_create(dom_alloc alloc, void *pw,
{
struct dom_string *ret;
+ if (ptr == NULL && len == 0) {
+ dom_string_ref(&empty_string);
+
+ *str = &empty_string;
+
+ return DOM_NO_ERR;
+ }
+
ret = alloc(NULL, sizeof(struct dom_string), pw);
if (ret == NULL)
return DOM_NO_MEM_ERR;
diff --git a/src/utils/namespace.c b/src/utils/namespace.c
index 8002b8e..ca5b01d 100644
--- a/src/utils/namespace.c
+++ b/src/utils/namespace.c
@@ -7,19 +7,31 @@
#include <string.h>
-#include <dom/core/string.h>
+#include <dom/dom.h>
#include "utils/namespace.h"
#include "utils/utils.h"
+
/** XML prefix */
static struct dom_string *xml;
-/** XML namespace URI */
-static struct dom_string *xml_ns;
/** XMLNS prefix */
static struct dom_string *xmlns;
-/** XMLNS namespace URI */
-static struct dom_string *xmlns_ns;
+
+/** The namespace strings */
+static const char *namespaces[DOM_NAMESPACE_COUNT] = {
+ NULL,
+ "http://www.w3.org/1999/xhtml",
+ "http://www.w3.org/1998/Math/MathML",
+ "http://www.w3.org/2000/svg",
+ "http://www.w3.org/1999/xlink",
+ "http://www.w3.org/XML/1998/namespace",
+ "http://www.w3.org/2000/xmlns/"
+};
+
+struct dom_string *dom_namespaces[DOM_NAMESPACE_COUNT] = {
+ NULL,
+};
/**
* Initialise the namespace component
@@ -30,6 +42,7 @@ static struct dom_string *xmlns_ns;
*/
dom_exception _dom_namespace_initialise(dom_alloc alloc, void *pw)
{
+ int i;
dom_exception err;
err = dom_string_create(alloc, pw,
@@ -39,31 +52,27 @@ dom_exception _dom_namespace_initialise(dom_alloc alloc, void *pw)
}
err = dom_string_create(alloc, pw,
- (const uint8_t *) "http://www.w3.org/XML/1998/namespace",
- SLEN("http://www.w3.org/XML/1998/namespace"),
- &xml_ns);
- if (err != DOM_NO_ERR) {
- dom_string_unref(xml);
- return err;
- }
-
- err = dom_string_create(alloc, pw,
(const uint8_t *) "xmlns", SLEN("xmlns"), &xmlns);
if (err != DOM_NO_ERR) {
- dom_string_unref(xml_ns);
dom_string_unref(xml);
+ xml = NULL;
+
return err;
}
- err = dom_string_create(alloc, pw,
- (const uint8_t *) "http://www.w3.org/2000/xmlns",
- SLEN("http://www.w3.org/2000/xmlns"),
- &xmlns_ns);
- if (err != DOM_NO_ERR) {
- dom_string_unref(xmlns);
- dom_string_unref(xml_ns);
- dom_string_unref(xml);
- return err;
+ for (i = 1; i < DOM_NAMESPACE_COUNT; i++) {
+ err = dom_string_create(
+ alloc, pw, (const uint8_t *) namespaces[i],
+ strlen(namespaces[i]), &dom_namespaces[i]);
+ if (err != DOM_NO_ERR) {
+ dom_string_unref(xmlns);
+ xmlns = NULL;
+
+ dom_string_unref(xml);
+ xml = NULL;
+
+ return err;
+ }
}
return DOM_NO_ERR;
@@ -76,10 +85,24 @@ dom_exception _dom_namespace_initialise(dom_alloc alloc, void *pw)
*/
dom_exception _dom_namespace_finalise(void)
{
- dom_string_unref(xmlns_ns);
- dom_string_unref(xmlns);
- dom_string_unref(xml_ns);
- dom_string_unref(xml);
+ int i;
+
+ if (xmlns != NULL) {
+ dom_string_unref(xmlns);
+ xmlns = NULL;
+ }
+
+ if (xml != NULL) {
+ dom_string_unref(xml);
+ xml = NULL;
+ }
+
+ for (i = 1; i < DOM_NAMESPACE_COUNT; i++) {
+ if (dom_namespaces[i] != NULL) {
+ dom_string_unref(dom_namespaces[i]);
+ dom_namespaces[i] = NULL;
+ }
+ }
return DOM_NO_ERR;
}
@@ -119,7 +142,8 @@ dom_exception _dom_namespace_validate_qname(struct dom_string *qname,
/* No prefix */
/* If namespace URI is for xmlns, ensure qname == "xmlns" */
if (namespace != NULL &&
- dom_string_cmp(namespace, xmlns_ns) == 0 &&
+ dom_string_cmp(namespace,
+ dom_namespaces[DOM_NAMESPACE_XMLNS]) == 0 &&
dom_string_cmp(qname, xmlns) != 0) {
return DOM_NAMESPACE_ERR;
}
@@ -140,20 +164,23 @@ dom_exception _dom_namespace_validate_qname(struct dom_string *qname,
/* Test for invalid XML namespace */
if (dom_string_cmp(prefix, xml) == 0 &&
- dom_string_cmp(namespace, xml_ns) != 0) {
+ dom_string_cmp(namespace,
+ dom_namespaces[DOM_NAMESPACE_XML]) != 0) {
dom_string_unref(prefix);
return DOM_NAMESPACE_ERR;
}
/* Test for invalid xmlns namespace */
if (dom_string_cmp(prefix, xmlns) == 0 &&
- dom_string_cmp(namespace, xmlns_ns) != 0) {
+ dom_string_cmp(namespace,
+ dom_namespaces[DOM_NAMESPACE_XMLNS]) != 0) {
dom_string_unref(prefix);
return DOM_NAMESPACE_ERR;
}
/* Test for presence of xmlns namespace with non xmlns prefix */
- if (dom_string_cmp(namespace, xmlns_ns) == 0 &&
+ if (dom_string_cmp(namespace,
+ dom_namespaces[DOM_NAMESPACE_XMLNS]) == 0 &&
dom_string_cmp(prefix, xmlns) != 0) {
dom_string_unref(prefix);
return DOM_NAMESPACE_ERR;
diff --git a/test/lib/testobject.c b/test/lib/testobject.c
index a0ab835..4c27e0b 100644
--- a/test/lib/testobject.c
+++ b/test/lib/testobject.c
@@ -114,7 +114,7 @@ TestObject *test_object_create(int argc, char **argv,
snprintf(abuf, sizeof abuf, "%s/Aliases", argv[1]);
ret->parser.html = dom_hubbub_parser_create(abuf,
- NULL, "UTF-8", myrealloc, NULL, mymsg, NULL);
+ NULL, true, myrealloc, NULL, mymsg, NULL);
if (ret->parser.html == NULL) {
free(ret);
return NULL;