From c34dc685f70193728ead525f59d56cdbf116f574 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sun, 4 Nov 2007 03:40:09 +0000 Subject: Make TestObject support both HTML and XML documents and auto-detect which parser to use. Make binding testcase attempt to load an HTML document. Hubbub parser binding constructor takes Aliases file path as a parameter. Hubbub parser binding's token handler now spews debug at stdout. svn path=/trunk/dom/; revision=3648 --- bindings/hubbub/parser.c | 78 ++++++++++++++++++++++++++++++++++++++++++------ bindings/hubbub/parser.h | 6 ++-- 2 files changed, 72 insertions(+), 12 deletions(-) (limited to 'bindings') diff --git a/bindings/hubbub/parser.c b/bindings/hubbub/parser.c index 8931fdf..9473438 100644 --- a/bindings/hubbub/parser.c +++ b/bindings/hubbub/parser.c @@ -5,6 +5,8 @@ * Copyright 2007 John-Mark Bell */ +#include + #include #include @@ -18,6 +20,7 @@ */ struct dom_hubbub_parser { hubbub_parser *parser; /**< Hubbub parser instance */ + const uint8_t *buffer; /**< Parser buffer pointer */ struct dom_document *doc; /**< DOM Document we're building */ @@ -41,6 +44,7 @@ static bool __initialised; /** * Create a Hubbub parser instance * + * \param aliases Path to encoding alias mapping file * \param enc Source charset, or NULL * \param int_enc Desired charset of document buffer (UTF-8 or UTF-16) * \param alloc Memory (de)allocation function @@ -49,9 +53,9 @@ static bool __initialised; * \param mctx Pointer to client-specific private data * \return Pointer to instance, or NULL on memory exhaustion */ -dom_hubbub_parser *dom_hubbub_parser_create(const char *enc, - const char *int_enc, dom_alloc alloc, void *pw, - dom_msg msg, void *mctx) +dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases, + const char *enc, const char *int_enc, + dom_alloc alloc, void *pw, dom_msg msg, void *mctx) { dom_hubbub_parser *parser; hubbub_parser_optparams params; @@ -60,8 +64,7 @@ dom_hubbub_parser *dom_hubbub_parser_create(const char *enc, hubbub_error e; if (__initialised == false) { - /** \todo Need path of encoding aliases file */ - e = hubbub_initialise("", (hubbub_alloc) alloc, pw); + e = hubbub_initialise(aliases, (hubbub_alloc) alloc, pw); if (e != HUBBUB_OK) { msg(DOM_MSG_ERROR, mctx, "Failed initialising hubbub"); @@ -202,14 +205,71 @@ struct dom_document *dom_hubbub_parser_get_document(dom_hubbub_parser *parser) void __dom_hubbub_buffer_handler(const uint8_t *buffer, size_t len, void *pw) { - UNUSED(buffer); + dom_hubbub_parser *parser = (dom_hubbub_parser *) pw; + UNUSED(len); - UNUSED(pw); + + parser->buffer = buffer; } void __dom_hubbub_token_handler(const hubbub_token *token, void *pw) { - UNUSED(token); - UNUSED(pw); + dom_hubbub_parser *parser = (dom_hubbub_parser *) pw; + static const char *token_names[] = { + "DOCTYPE", "START TAG", "END TAG", + "COMMENT", "CHARACTERS", "EOF" + }; + size_t i; + + printf("%s: ", token_names[token->type]); + + switch (token->type) { + case HUBBUB_TOKEN_DOCTYPE: + printf("'%.*s' (%svalid)\n", + (int) token->data.doctype.name.len, + parser->buffer + + token->data.doctype.name.data_off, + token->data.doctype.correct ? "" : "in"); + break; + case HUBBUB_TOKEN_START_TAG: + printf("'%.*s' %s\n", + (int) token->data.tag.name.len, + parser->buffer + token->data.tag.name.data_off, + (token->data.tag.n_attributes > 0) ? + "attributes:" : ""); + for (i = 0; i < token->data.tag.n_attributes; i++) { + printf("\t'%.*s' = '%.*s'\n", + (int) token->data.tag.attributes[i].name.len, + parser->buffer + token->data.tag.attributes[i].name.data_off, + (int) token->data.tag.attributes[i].value.len, + parser->buffer + token->data.tag.attributes[i].value.data_off); + } + break; + case HUBBUB_TOKEN_END_TAG: + printf("'%.*s' %s\n", + (int) token->data.tag.name.len, + parser->buffer + token->data.tag.name.data_off, + (token->data.tag.n_attributes > 0) ? + "attributes:" : ""); + for (i = 0; i < token->data.tag.n_attributes; i++) { + printf("\t'%.*s' = '%.*s'\n", + (int) token->data.tag.attributes[i].name.len, + parser->buffer + token->data.tag.attributes[i].name.data_off, + (int) token->data.tag.attributes[i].value.len, + parser->buffer + token->data.tag.attributes[i].value.data_off); + } + break; + case HUBBUB_TOKEN_COMMENT: + printf("'%.*s'\n", (int) token->data.comment.len, + parser->buffer + token->data.comment.data_off); + break; + case HUBBUB_TOKEN_CHARACTER: + printf("'%.*s'\n", (int) token->data.character.len, + parser->buffer + token->data.character.data_off); + break; + case HUBBUB_TOKEN_EOF: + printf("\n"); + break; + } } diff --git a/bindings/hubbub/parser.h b/bindings/hubbub/parser.h index 65da1ea..f4c2ac4 100644 --- a/bindings/hubbub/parser.h +++ b/bindings/hubbub/parser.h @@ -20,9 +20,9 @@ struct dom_document; typedef struct dom_hubbub_parser dom_hubbub_parser; /* Create a Hubbub parser instance */ -dom_hubbub_parser *dom_hubbub_parser_create(const char *enc, - const char *int_enc, dom_alloc alloc, void *pw, - dom_msg msg, void *mctx); +dom_hubbub_parser *dom_hubbub_parser_create(const char *aliases, + const char *enc, const char *int_enc, + dom_alloc alloc, void *pw, dom_msg msg, void *mctx); /* Destroy a Hubbub parser instance */ void dom_hubbub_parser_destroy(dom_hubbub_parser *parser); -- cgit v1.2.3