From 975de383a09510aae8d5a32f5fc97181fe2846a5 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sun, 4 Nov 2007 02:08:05 +0000 Subject: Beginnings of a hubbub binding to libdom Currently comprises a stubbed-out parser wrapper Todo: 1) Complete parser wrapper, such that it actually creates a DOM tree 2) Provide a hubbub-backed DOMImplementationSource svn path=/trunk/dom/; revision=3646 --- bindings/hubbub/Makefile | 70 +++++++++++++++ bindings/hubbub/README | 29 +++++++ bindings/hubbub/errors.h | 19 +++++ bindings/hubbub/parser.c | 215 +++++++++++++++++++++++++++++++++++++++++++++++ bindings/hubbub/parser.h | 40 +++++++++ bindings/hubbub/utils.h | 28 ++++++ 6 files changed, 401 insertions(+) create mode 100644 bindings/hubbub/Makefile create mode 100644 bindings/hubbub/README create mode 100644 bindings/hubbub/errors.h create mode 100644 bindings/hubbub/parser.c create mode 100644 bindings/hubbub/parser.h create mode 100644 bindings/hubbub/utils.h (limited to 'bindings/hubbub') diff --git a/bindings/hubbub/Makefile b/bindings/hubbub/Makefile new file mode 100644 index 0000000..dd430c8 --- /dev/null +++ b/bindings/hubbub/Makefile @@ -0,0 +1,70 @@ +# Makefile for libdom +# +# Toolchain is exported by top-level makefile +# +# Top-level makefile also exports the following variables: +# +# COMPONENT Name of component +# EXPORT Absolute path of export directory +# TOP Absolute path of source tree root +# +# The top-level makefile requires the following targets to exist: +# +# clean Clean source tree +# debug Create a debug binary +# distclean Fully clean source tree, back to pristine condition +# export Export distributable components to ${EXPORT} +# release Create a release binary +# setup Perform any setup required prior to compilation +# test Execute any test cases + +# Manipulate include paths +# TODO: fix hubbub include path finding -- needs hubbub to use pkgconfig +CFLAGS += -I$(CURDIR) \ + -I../../../hubbub/include + +# Release output +RELEASE = ${TOP}/${COMPONENT}-libhubbub.a + +# Debug output +DEBUG = ${TOP}/${COMPONENT}-libhubbub-debug.a + +# Objects +OBJS = parser + +.PHONY: clean debug distclean export release setup test + +# Targets +release: $(addprefix Release/, $(addsuffix .o, $(OBJS))) + @${AR} ${ARFLAGS} $(RELEASE) Release/* + +debug: $(addprefix Debug/, $(addsuffix .o, $(OBJS))) + @${AR} ${ARFLAGS} $(DEBUG) Debug/* + +clean: + -@${RM} ${RMFLAGS} $(addprefix Release/, $(addsuffix .o, ${OBJS})) + -@${RM} ${RMFLAGS} $(addprefix Debug/, $(addsuffix .o, ${OBJS})) + -@${RM} ${RMFLAGS} $(RELEASE) $(DEBUG) + +distclean: + -@${RM} ${RMFLAGS} -r Release + -@${RM} ${RMFLAGS} -r Debug + +setup: + @${MKDIR} ${MKDIRFLAGS} Release + @${MKDIR} ${MKDIRFLAGS} Debug + +export: + @${CP} ${CPFLAGS} $(RELEASE) ${EXPORT}/lib/ + +test: + +# Pattern rules +Release/%.o: %.c + @${ECHO} ${ECHOFLAGS} "==> $<" + @${CC} -c ${CFLAGS} -DNDEBUG -o $@ $< + +Debug/%.o: %.c + @${ECHO} ${ECHOFLAGS} "==> $<" + @${CC} -c -g ${CFLAGS} -o $@ $< + diff --git a/bindings/hubbub/README b/bindings/hubbub/README new file mode 100644 index 0000000..43b7be7 --- /dev/null +++ b/bindings/hubbub/README @@ -0,0 +1,29 @@ +Hubbub binding for libdom +========================= + +Overview +-------- + + This is an example binding of hubbub to libdom. It consists of two, + orthogonal, parts: + + 1) hubbub parser wrapper + 2) hubbub-specific DOMImplementationSource for libdom + +Push parser wrapper +------------------- + + This is a wrapper around hubbub's parser API, to facilitate + construction of a libdom DOM tree. The basic premise is that the wrapper + intercepts the SAX-like events emitted by hubbub's tokeniser then builds + a libdom DOM tree from them. + +DOMImplementationSource +----------------------- + + The DOMImplementationSource exposes the APIs needed to create a new + document based upon the hubbub binding. It also provides the utility + functions that libdom uses when performing some operations (such as + document normalization). This is needed as libdom is document language + agnostic; therefore, it requires support from the binding to perform + some operations. diff --git a/bindings/hubbub/errors.h b/bindings/hubbub/errors.h new file mode 100644 index 0000000..4409fb9 --- /dev/null +++ b/bindings/hubbub/errors.h @@ -0,0 +1,19 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2007 John-Mark Bell + */ + +#ifndef dom_hubbub_errors_h_ +#define dom_hubbub_errors_h_ + +typedef enum { + DOM_HUBBUB_OK = 0, + + DOM_HUBBUB_NOMEM = 1, + + DOM_HUBBUB_HUBBUB_ERR = (1<<16), +} dom_hubbub_error; + +#endif diff --git a/bindings/hubbub/parser.c b/bindings/hubbub/parser.c new file mode 100644 index 0000000..8931fdf --- /dev/null +++ b/bindings/hubbub/parser.c @@ -0,0 +1,215 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2007 John-Mark Bell + */ + +#include +#include + +#include + +#include "parser.h" +#include "utils.h" + +/** + * libdom Hubbub parser object + */ +struct dom_hubbub_parser { + hubbub_parser *parser; /**< Hubbub parser instance */ + + struct dom_document *doc; /**< DOM Document we're building */ + + bool complete; /**< Indicate stream completion */ + + struct dom_implementation *impl;/**< DOM implementation */ + + dom_alloc alloc; /**< Memory (de)allocation function */ + void *pw; /**< Pointer to client data */ + + dom_msg msg; /**< Informational messaging function */ + void *mctx; /**< Pointer to client data */ +}; + +static void __dom_hubbub_buffer_handler(const uint8_t *buffer, size_t len, + void *pw); +static void __dom_hubbub_token_handler(const hubbub_token *token, void *pw); + +static bool __initialised; + +/** + * Create a Hubbub parser instance + * + * \param enc Source charset, or NULL + * \param int_enc Desired charset of document buffer (UTF-8 or UTF-16) + * \param alloc Memory (de)allocation function + * \param pw Pointer to client-specific private data + * \param msg Informational message function + * \param mctx Pointer to client-specific private data + * \return Pointer to instance, or NULL on memory exhaustion + */ +dom_hubbub_parser *dom_hubbub_parser_create(const char *enc, + const char *int_enc, dom_alloc alloc, void *pw, + dom_msg msg, void *mctx) +{ + dom_hubbub_parser *parser; + hubbub_parser_optparams params; + struct dom_string *features; + dom_exception err; + hubbub_error e; + + if (__initialised == false) { + /** \todo Need path of encoding aliases file */ + e = hubbub_initialise("", (hubbub_alloc) alloc, pw); + if (e != HUBBUB_OK) { + msg(DOM_MSG_ERROR, mctx, + "Failed initialising hubbub"); + return NULL; + } + + __initialised = true; + } + + parser = alloc(NULL, sizeof(dom_hubbub_parser), pw); + if (parser == NULL) { + msg(DOM_MSG_CRITICAL, mctx, "No memory for parser"); + return NULL; + } + + parser->parser = hubbub_parser_create(enc, int_enc, + (hubbub_alloc) alloc, pw); + if (parser->parser == NULL) { + alloc(parser, 0, pw); + msg(DOM_MSG_CRITICAL, mctx, "Failed to create hubbub parser"); + return NULL; + } + + params.buffer_handler.handler = __dom_hubbub_buffer_handler; + params.buffer_handler.pw = parser; + e = hubbub_parser_setopt(parser->parser, HUBBUB_PARSER_BUFFER_HANDLER, + ¶ms); + if (e != HUBBUB_OK) { + hubbub_parser_destroy(parser->parser); + alloc(parser, 0, pw); + msg(DOM_MSG_CRITICAL, mctx, + "Failed registering hubbub buffer handler"); + return NULL; + } + + params.token_handler.handler = __dom_hubbub_token_handler; + params.token_handler.pw = parser; + e = hubbub_parser_setopt(parser->parser, HUBBUB_PARSER_TOKEN_HANDLER, + ¶ms); + if (e != HUBBUB_OK) { + hubbub_parser_destroy(parser->parser); + alloc(parser, 0, pw); + msg(DOM_MSG_CRITICAL, mctx, + "Failed registering hubbub token handler"); + return NULL; + } + + parser->doc = NULL; + + parser->complete = false; + + /* Get DOM implementation */ + /* Create string representation of the features we want */ + err = dom_string_create_from_ptr_no_doc(alloc, pw, + DOM_STRING_UTF8, + (const uint8_t *) "HTML", SLEN("HTML"), &features); + if (err != DOM_NO_ERR) { + hubbub_parser_destroy(parser->parser); + alloc(parser, 0, pw); + msg(DOM_MSG_CRITICAL, mctx, "No memory for feature string"); + return NULL; + } + + /* Now, try to get an appropriate implementation from the registry */ + err = dom_implregistry_get_dom_implementation(features, + &parser->impl, alloc, pw); + if (err != DOM_NO_ERR) { + dom_string_unref(features); + hubbub_parser_destroy(parser->parser); + alloc(parser, 0, pw); + msg(DOM_MSG_ERROR, mctx, "No suitable DOMImplementation"); + return NULL; + } + + /* no longer need the features string */ + dom_string_unref(features); + + parser->alloc = alloc; + parser->pw = pw; + + parser->msg = msg; + parser->mctx = mctx; + + return parser; +} + +/* Destroy a Hubbub parser instance */ +void dom_hubbub_parser_destroy(dom_hubbub_parser *parser) +{ + dom_implementation_unref(parser->impl); + + hubbub_parser_destroy(parser->parser); + + /** \todo do we want to clean up the document here too? */ + + parser->alloc(parser, 0, parser->pw); +} + +/* Parse a chunk of data */ +dom_hubbub_error dom_hubbub_parser_parse_chunk(dom_hubbub_parser *parser, + uint8_t *data, size_t len) +{ + hubbub_error err; + + err = hubbub_parser_parse_chunk(parser->parser, data, len); + if (err != HUBBUB_OK) { + parser->msg(DOM_MSG_ERROR, parser->mctx, + "hubbub_parser_parse_chunk failed: %d", err); + return DOM_HUBBUB_HUBBUB_ERR | err; + } + + return DOM_HUBBUB_OK; +} + +/* Notify parser that datastream is empty */ +dom_hubbub_error dom_hubbub_parser_completed(dom_hubbub_parser *parser) +{ + hubbub_error err; + + err = hubbub_parser_completed(parser->parser); + if (err != DOM_HUBBUB_OK) { + parser->msg(DOM_MSG_ERROR, parser->mctx, + "hubbub_parser_completed failed: %d", err); + return DOM_HUBBUB_HUBBUB_ERR | err; + } + + parser->complete = true; + + return DOM_HUBBUB_OK; +} + +/* Retrieve the created DOM Document */ +struct dom_document *dom_hubbub_parser_get_document(dom_hubbub_parser *parser) +{ + return (parser->complete ? parser->doc : NULL); +} + +void __dom_hubbub_buffer_handler(const uint8_t *buffer, size_t len, + void *pw) +{ + UNUSED(buffer); + UNUSED(len); + UNUSED(pw); +} + +void __dom_hubbub_token_handler(const hubbub_token *token, void *pw) +{ + UNUSED(token); + UNUSED(pw); +} + diff --git a/bindings/hubbub/parser.h b/bindings/hubbub/parser.h new file mode 100644 index 0000000..65da1ea --- /dev/null +++ b/bindings/hubbub/parser.h @@ -0,0 +1,40 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2007 John-Mark Bell + */ + +#ifndef dom_hubbub_parser_h_ +#define dom_hubbub_parser_h_ + +#include +#include + +#include + +#include "errors.h" + +struct dom_document; + +typedef struct dom_hubbub_parser dom_hubbub_parser; + +/* Create a Hubbub parser instance */ +dom_hubbub_parser *dom_hubbub_parser_create(const char *enc, + const char *int_enc, dom_alloc alloc, void *pw, + dom_msg msg, void *mctx); + +/* Destroy a Hubbub parser instance */ +void dom_hubbub_parser_destroy(dom_hubbub_parser *parser); + +/* Parse a chunk of data */ +dom_hubbub_error dom_hubbub_parser_parse_chunk(dom_hubbub_parser *parser, + uint8_t *data, size_t len); + +/* Notify parser that datastream is empty */ +dom_hubbub_error dom_hubbub_parser_completed(dom_hubbub_parser *parser); + +/* Retrieve the created DOM Document */ +struct dom_document *dom_hubbub_parser_get_document(dom_hubbub_parser *parser); + +#endif diff --git a/bindings/hubbub/utils.h b/bindings/hubbub/utils.h new file mode 100644 index 0000000..2e2aaf2 --- /dev/null +++ b/bindings/hubbub/utils.h @@ -0,0 +1,28 @@ +/* + * This file is part of libdom. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2007 John-Mark Bell + */ + +#ifndef dom_hubbub_utils_h_ +#define dom_hubbub_utils_h_ + +#ifndef max +#define max(a,b) ((a)>(b)?(a):(b)) +#endif + +#ifndef min +#define min(a,b) ((a)<(b)?(a):(b)) +#endif + +#ifndef SLEN +/* Calculate length of a string constant */ +#define SLEN(s) (sizeof((s)) - 1) /* -1 for '\0' */ +#endif + +#ifndef UNUSED +#define UNUSED(x) ((x)=(x)) +#endif + +#endif -- cgit v1.2.3