summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/hubbub/functypes.h61
-rw-r--r--include/hubbub/parser.h6
-rw-r--r--include/hubbub/tree.h31
-rw-r--r--src/Makefile3
-rw-r--r--src/parser.c67
-rw-r--r--src/treebuilder/Makefile53
-rw-r--r--src/treebuilder/treebuilder.c196
-rw-r--r--src/treebuilder/treebuilder.h62
8 files changed, 468 insertions, 11 deletions
diff --git a/include/hubbub/functypes.h b/include/hubbub/functypes.h
index aa3e649..8d7f199 100644
--- a/include/hubbub/functypes.h
+++ b/include/hubbub/functypes.h
@@ -2,12 +2,13 @@
* This file is part of Hubbub.
* Licensed under the MIT License,
* http://www.opensource.org/licenses/mit-license.php
- * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ * Copyright 2007-8 John-Mark Bell <jmb@netsurf-browser.org>
*/
#ifndef hubbub_functypes_h_
#define hubbub_functypes_h_
+#include <stdbool.h>
#include <stdlib.h>
#include <hubbub/types.h>
@@ -32,6 +33,64 @@ typedef void (*hubbub_buffer_handler)(const uint8_t *data,
typedef void (*hubbub_error_handler)(uint32_t line, uint32_t col,
const char *message, void *pw);
+/**
+ * Type of tree comment node creation function
+ */
+typedef int (*hubbub_tree_create_comment)(void *ctx, const hubbub_string *data,
+ void **result);
+
+/**
+ * Type of tree doctype node creation function
+ */
+typedef int (*hubbub_tree_create_doctype)(void *ctx, const hubbub_string *qname,
+ const hubbub_string *public_id, const hubbub_string *system_id,
+ void **result);
+
+/**
+ * Type of tree element node creation function
+ */
+typedef int (*hubbub_tree_create_element)(void *ctx,
+ const hubbub_string *tag_name, void **result);
+
+/**
+ * Type of tree text node creation function
+ */
+typedef int (*hubbub_tree_create_text)(void *ctx, const hubbub_string *data,
+ void **result);
+
+/**
+ * Type of tree node destruction function
+ */
+typedef int (*hubbub_tree_free_node)(void *ctx, void *node);
+
+/**
+ * Type of tree node appending function
+ */
+typedef int (*hubbub_tree_append_child)(void *ctx, void *parent, void *child,
+ void **result);
+
+/**
+ * Type of tree node insertion function
+ */
+typedef int (*hubbub_tree_insert_before)(void *ctx, void *parent, void *child,
+ void *ref_child, void **result);
+
+/**
+ * Type of tree node removal function
+ */
+typedef int (*hubbub_tree_remove_child)(void *ctx, void *parent, void *child,
+ void **result);
+
+/**
+ * Type of tree node cloning function
+ */
+typedef int (*hubbub_tree_clone_node)(void *ctx, void *node, bool deep,
+ void **result);
+
+/**
+ * Type of tree quirks mode notification function
+ */
+typedef int (*hubbub_tree_set_quirks_mode)(void *ctx, bool quirky);
#endif
diff --git a/include/hubbub/parser.h b/include/hubbub/parser.h
index cdf8664..134f4b7 100644
--- a/include/hubbub/parser.h
+++ b/include/hubbub/parser.h
@@ -2,7 +2,7 @@
* This file is part of Hubbub.
* Licensed under the MIT License,
* http://www.opensource.org/licenses/mit-license.php
- * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ * Copyright 2007-8 John-Mark Bell <jmb@netsurf-browser.org>
*/
#ifndef hubbub_parser_h_
@@ -12,6 +12,7 @@
#include <hubbub/errors.h>
#include <hubbub/functypes.h>
+#include <hubbub/tree.h>
#include <hubbub/types.h>
typedef struct hubbub_parser hubbub_parser;
@@ -24,6 +25,7 @@ typedef enum hubbub_parser_opttype {
HUBBUB_PARSER_BUFFER_HANDLER,
HUBBUB_PARSER_ERROR_HANDLER,
HUBBUB_PARSER_CONTENT_MODEL,
+ HUBBUB_PARSER_TREE_HANDLER,
} hubbub_parser_opttype;
/**
@@ -48,6 +50,8 @@ typedef union hubbub_parser_optparams {
struct {
hubbub_content_model model;
} content_model;
+
+ hubbub_tree_handler tree_handler;
} hubbub_parser_optparams;
/* Create a hubbub parser */
diff --git a/include/hubbub/tree.h b/include/hubbub/tree.h
new file mode 100644
index 0000000..a883f1a
--- /dev/null
+++ b/include/hubbub/tree.h
@@ -0,0 +1,31 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_tree_h_
+#define hubbub_tree_h_
+
+#include <hubbub/functypes.h>
+
+/**
+ * Hubbub tree handler
+ */
+typedef struct hubbub_tree_handler {
+ hubbub_tree_create_comment create_comment;
+ hubbub_tree_create_doctype create_doctype;
+ hubbub_tree_create_element create_element;
+ hubbub_tree_create_text create_text;
+ hubbub_tree_free_node free_node;
+ hubbub_tree_append_child append_child;
+ hubbub_tree_insert_before insert_before;
+ hubbub_tree_remove_child remove_child;
+ hubbub_tree_clone_node clone_node;
+ hubbub_tree_set_quirks_mode set_quirks_mode;
+ void *ctx;
+} hubbub_tree_handler;
+
+#endif
+
diff --git a/src/Makefile b/src/Makefile
index b72a9e0..7af11a4 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -37,6 +37,7 @@ release: $(addprefix Release/, $(addsuffix .o, $(OBJS)))
@${MAKE} -C charset release
@${MAKE} -C input release
@${MAKE} -C tokeniser release
+ @${MAKE} -C treebuilder release
@${MAKE} -C utils release
@${AR} ${ARFLAGS} $(RELEASE) Release/*
@@ -44,6 +45,7 @@ debug: $(addprefix Debug/, $(addsuffix .o, $(OBJS)))
@${MAKE} -C charset debug
@${MAKE} -C input debug
@${MAKE} -C tokeniser debug
+ @${MAKE} -C treebuilder debug
@${MAKE} -C utils debug
@${AR} ${ARFLAGS} $(DEBUG) Debug/*
@@ -51,6 +53,7 @@ clean:
@${MAKE} -C charset clean
@${MAKE} -C input clean
@${MAKE} -C tokeniser clean
+ @${MAKE} -C treebuilder clean
@${MAKE} -C utils clean
-@${RM} ${RMFLAGS} $(addprefix Release/, $(addsuffix .o, ${OBJS}))
-@${RM} ${RMFLAGS} $(addprefix Debug/, $(addsuffix .o, ${OBJS}))
diff --git a/src/parser.c b/src/parser.c
index 258067f..23702e1 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -2,13 +2,14 @@
* This file is part of Hubbub.
* Licensed under the MIT License,
* http://www.opensource.org/licenses/mit-license.php
- * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ * Copyright 2007-8 John-Mark Bell <jmb@netsurf-browser.org>
*/
#include <hubbub/parser.h>
#include "input/inputstream.h"
#include "tokeniser/tokeniser.h"
+#include "treebuilder/treebuilder.h"
/**
* Hubbub parser object
@@ -16,6 +17,7 @@
struct hubbub_parser {
hubbub_inputstream *stream; /**< Input stream instance */
hubbub_tokeniser *tok; /**< Tokeniser instance */
+ hubbub_treebuilder *tb; /**< Treebuilder instance */
hubbub_alloc alloc; /**< Memory (de)allocation function */
void *pw; /**< Client data */
@@ -55,6 +57,14 @@ hubbub_parser *hubbub_parser_create(const char *enc, const char *int_enc,
return NULL;
}
+ parser->tb = hubbub_treebuilder_create(parser->tok, alloc, pw);
+ if (parser->tb == NULL) {
+ hubbub_tokeniser_destroy(parser->tok);
+ hubbub_inputstream_destroy(parser->stream);
+ alloc(parser, 0, pw);
+ return NULL;
+ }
+
parser->alloc = alloc;
parser->pw = pw;
@@ -71,6 +81,8 @@ void hubbub_parser_destroy(hubbub_parser *parser)
if (parser == NULL)
return;
+ hubbub_treebuilder_destroy(parser->tb);
+
hubbub_tokeniser_destroy(parser->tok);
hubbub_inputstream_destroy(parser->stream);
@@ -90,30 +102,67 @@ hubbub_error hubbub_parser_setopt(hubbub_parser *parser,
hubbub_parser_opttype type,
hubbub_parser_optparams *params)
{
- hubbub_tokeniser_opttype toktype;
+ hubbub_error result = HUBBUB_OK;;
if (parser == NULL || params == NULL)
return HUBBUB_BADPARM;
switch (type) {
case HUBBUB_PARSER_TOKEN_HANDLER:
- toktype = HUBBUB_TOKENISER_TOKEN_HANDLER;
+ if (parser->tb != NULL) {
+ /* Client is defining their own token handler,
+ * so we must destroy the default treebuilder */
+ hubbub_treebuilder_destroy(parser->tb);
+ parser->tb = NULL;
+ }
+ result = hubbub_tokeniser_setopt(parser->tok,
+ HUBBUB_TOKENISER_TOKEN_HANDLER,
+ (hubbub_tokeniser_optparams *) params);
break;
case HUBBUB_PARSER_BUFFER_HANDLER:
- toktype = HUBBUB_TOKENISER_BUFFER_HANDLER;
+ /* The buffer handler cascades, so if there's a treebuilder,
+ * simply inform that. Otherwise, tell the tokeniser. */
+ if (parser->tb != NULL) {
+ result = hubbub_treebuilder_setopt(parser->tb,
+ HUBBUB_TREEBUILDER_BUFFER_HANDLER,
+ (hubbub_treebuilder_optparams *) params);
+ } else {
+ result = hubbub_tokeniser_setopt(parser->tok,
+ HUBBUB_TOKENISER_BUFFER_HANDLER,
+ (hubbub_tokeniser_optparams *) params);
+ }
break;
case HUBBUB_PARSER_ERROR_HANDLER:
- toktype = HUBBUB_TOKENISER_BUFFER_HANDLER;
+ /* The error handler does not cascade, so tell both the
+ * treebuilder (if extant) and the tokeniser. */
+ if (parser->tb != NULL) {
+ result = hubbub_treebuilder_setopt(parser->tb,
+ HUBBUB_TREEBUILDER_ERROR_HANDLER,
+ (hubbub_treebuilder_optparams *) params);
+ }
+ if (result == HUBBUB_OK) {
+ result = hubbub_tokeniser_setopt(parser->tok,
+ HUBBUB_TOKENISER_ERROR_HANDLER,
+ (hubbub_tokeniser_optparams *) params);
+ }
break;
case HUBBUB_PARSER_CONTENT_MODEL:
- toktype = HUBBUB_TOKENISER_CONTENT_MODEL;
+ result = hubbub_tokeniser_setopt(parser->tok,
+ HUBBUB_TOKENISER_CONTENT_MODEL,
+ (hubbub_tokeniser_optparams *) params);
+ break;
+ case HUBBUB_PARSER_TREE_HANDLER:
+ if (parser->tb != NULL) {
+ result = hubbub_treebuilder_setopt(parser->tb,
+ HUBBUB_TREEBUILDER_TREE_HANDLER,
+ (hubbub_treebuilder_optparams *) params);
+ }
break;
default:
- return HUBBUB_INVALID;
+ result = HUBBUB_INVALID;
}
- return hubbub_tokeniser_setopt(parser->tok, toktype,
- (hubbub_tokeniser_optparams *) params);
+ return result;
}
/**
diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile
new file mode 100644
index 0000000..d63a7a3
--- /dev/null
+++ b/src/treebuilder/Makefile
@@ -0,0 +1,53 @@
+# Makefile for libhubbub
+#
+# Toolchain is exported by top-level makefile
+#
+# Top-level makefile also exports the following variables:
+#
+# COMPONENT Name of component
+# EXPORT Absolute path of export directory
+# TOP Absolute path of source tree root
+#
+# The top-level makefile requires the following targets to exist:
+#
+# clean Clean source tree
+# debug Create a debug binary
+# distclean Fully clean source tree, back to pristine condition
+# export Export distributable components to ${EXPORT}
+# release Create a release binary
+# setup Perform any setup required prior to compilation
+# test Execute any test cases
+
+# Manipulate include paths
+CFLAGS += -I$(CURDIR)
+
+# Objects
+OBJS = treebuilder
+
+.PHONY: clean debug distclean export release setup test
+
+# Targets
+release: $(addprefix ../Release/, $(addsuffix .o, $(OBJS)))
+
+debug: $(addprefix ../Debug/, $(addsuffix .o, $(OBJS)))
+
+clean:
+ -@${RM} ${RMFLAGS} $(addprefix ../Release/, $(addsuffix .o, ${OBJS}))
+ -@${RM} ${RMFLAGS} $(addprefix ../Debug/, $(addsuffix .o, ${OBJS}))
+
+distclean:
+
+setup:
+
+export:
+
+test:
+
+# Pattern rules
+../Release/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c ${CFLAGS} -DNDEBUG -o $@ $<
+
+../Debug/%.o: %.c
+ @${ECHO} ${ECHOFLAGS} "==> $<"
+ @${CC} -c -g ${CFLAGS} -o $@ $<
diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
new file mode 100644
index 0000000..529cd08
--- /dev/null
+++ b/src/treebuilder/treebuilder.c
@@ -0,0 +1,196 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#include <string.h>
+
+#include "treebuilder/treebuilder.h"
+#include "utils/utils.h"
+
+struct hubbub_treebuilder
+{
+ hubbub_tokeniser *tokeniser; /**< Underlying tokeniser */
+
+ const uint8_t *input_buffer; /**< Start of tokeniser's buffer */
+ size_t input_buffer_len; /**< Length of input buffer */
+
+ hubbub_tree_handler tree_handler;
+
+ hubbub_buffer_handler buffer_handler;
+ void *buffer_pw;
+
+ hubbub_error_handler error_handler;
+ void *error_pw;
+
+ hubbub_alloc alloc; /**< Memory (de)allocation function */
+ void *alloc_pw; /**< Client private data */
+};
+
+static void hubbub_treebuilder_buffer_handler(const uint8_t *data,
+ size_t len, void *pw);
+static void hubbub_treebuilder_token_handler(const hubbub_token *token,
+ void *pw);
+
+/**
+ * Create a hubbub treebuilder
+ *
+ * \param tokeniser Underlying tokeniser instance
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data
+ * \return Pointer to treebuilder instance, or NULL on error.
+ */
+hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser,
+ hubbub_alloc alloc, void *pw)
+{
+ hubbub_treebuilder *tb;
+ hubbub_tokeniser_optparams tokparams;
+
+ if (tokeniser == NULL || alloc == NULL)
+ return NULL;
+
+ tb = alloc(NULL, sizeof(hubbub_treebuilder), pw);
+ if (tb == NULL)
+ return NULL;
+
+ tb->tokeniser = tokeniser;
+
+ tb->input_buffer = NULL;
+ tb->input_buffer_len = 0;
+
+ memset(&tb->tree_handler, 0, sizeof(hubbub_tree_handler));
+
+ tb->buffer_handler = NULL;
+ tb->buffer_pw = NULL;
+
+ tb->error_handler = NULL;
+ tb->error_pw = NULL;
+
+ tb->alloc = alloc;
+ tb->alloc_pw = pw;
+
+ tokparams.token_handler.handler = hubbub_treebuilder_token_handler;
+ tokparams.token_handler.pw = tb;
+
+ if (hubbub_tokeniser_setopt(tokeniser, HUBBUB_TOKENISER_TOKEN_HANDLER,
+ &tokparams) != HUBBUB_OK) {
+ alloc(tb, 0, pw);
+ return NULL;
+ }
+
+ tokparams.buffer_handler.handler = hubbub_treebuilder_buffer_handler;
+ tokparams.buffer_handler.pw = tb;
+
+ if (hubbub_tokeniser_setopt(tokeniser, HUBBUB_TOKENISER_BUFFER_HANDLER,
+ &tokparams) != HUBBUB_OK) {
+ alloc(tb, 0, pw);
+ return NULL;
+ }
+
+ return tb;
+}
+
+/**
+ * Destroy a hubbub treebuilder
+ *
+ * \param treebuilder The treebuilder instance to destroy
+ */
+void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder)
+{
+ hubbub_tokeniser_optparams tokparams;
+
+ if (treebuilder == NULL)
+ return;
+
+ tokparams.buffer_handler.handler = treebuilder->buffer_handler;
+ tokparams.buffer_handler.pw = treebuilder->buffer_pw;
+
+ hubbub_tokeniser_setopt(treebuilder->tokeniser,
+ HUBBUB_TOKENISER_BUFFER_HANDLER, &tokparams);
+
+ tokparams.token_handler.handler = NULL;
+ tokparams.token_handler.pw = NULL;
+
+ hubbub_tokeniser_setopt(treebuilder->tokeniser,
+ HUBBUB_TOKENISER_TOKEN_HANDLER, &tokparams);
+
+ treebuilder->alloc(treebuilder, 0, treebuilder->alloc_pw);
+}
+
+/**
+ * Configure a hubbub treebuilder
+ *
+ * \param treebuilder The treebuilder instance to configure
+ * \param type The option type to configure
+ * \param params Pointer to option-specific parameters
+ * \return HUBBUB_OK on success, appropriate error otherwise.
+ */
+hubbub_error hubbub_treebuilder_setopt(hubbub_treebuilder *treebuilder,
+ hubbub_treebuilder_opttype type,
+ hubbub_treebuilder_optparams *params)
+{
+ if (treebuilder == NULL || params == NULL)
+ return HUBBUB_BADPARM;
+
+ switch (type) {
+ case HUBBUB_TREEBUILDER_BUFFER_HANDLER:
+ treebuilder->buffer_handler = params->buffer_handler.handler;
+ treebuilder->buffer_pw = params->buffer_handler.pw;
+ treebuilder->buffer_handler(treebuilder->input_buffer,
+ treebuilder->input_buffer_len,
+ treebuilder->buffer_pw);
+ break;
+ case HUBBUB_TREEBUILDER_ERROR_HANDLER:
+ treebuilder->error_handler = params->error_handler.handler;
+ treebuilder->error_pw = params->error_handler.pw;
+ break;
+ case HUBBUB_TREEBUILDER_TREE_HANDLER:
+ treebuilder->tree_handler = params->tree_handler;
+ break;
+ }
+
+ return HUBBUB_OK;
+}
+
+/**
+ * Handle tokeniser buffer moving
+ *
+ * \param data New location of buffer
+ * \param len Length of buffer in bytes
+ * \param pw Pointer to treebuilder instance
+ */
+void hubbub_treebuilder_buffer_handler(const uint8_t *data,
+ size_t len, void *pw)
+{
+ hubbub_treebuilder *treebuilder = (hubbub_treebuilder *) pw;
+
+ treebuilder->input_buffer = data;
+ treebuilder->input_buffer_len = len;
+
+ /* Inform client buffer handler, too (if there is one) */
+ if (treebuilder->buffer_handler != NULL) {
+ treebuilder->buffer_handler(treebuilder->input_buffer,
+ treebuilder->input_buffer_len,
+ treebuilder->buffer_pw);
+ }
+}
+
+/**
+ * Handle tokeniser emitting a token
+ *
+ * \param token The emitted token
+ * \param pw Pointer to treebuilder instance
+ */
+void hubbub_treebuilder_token_handler(const hubbub_token *token,
+ void *pw)
+{
+ hubbub_treebuilder *treebuilder = (hubbub_treebuilder *) pw;
+
+ UNUSED(treebuilder);
+ UNUSED(token);
+
+ /** \todo implement this */
+}
+
diff --git a/src/treebuilder/treebuilder.h b/src/treebuilder/treebuilder.h
new file mode 100644
index 0000000..0792e99
--- /dev/null
+++ b/src/treebuilder/treebuilder.h
@@ -0,0 +1,62 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_treebuilder_treebuilder_h_
+#define hubbub_treebuilder_treebuilder_h_
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+#include <hubbub/errors.h>
+#include <hubbub/functypes.h>
+#include <hubbub/tree.h>
+#include <hubbub/types.h>
+
+#include "tokeniser/tokeniser.h"
+
+typedef struct hubbub_treebuilder hubbub_treebuilder;
+
+/**
+ * Hubbub treebuilder option types
+ */
+typedef enum hubbub_treebuilder_opttype {
+ HUBBUB_TREEBUILDER_BUFFER_HANDLER,
+ HUBBUB_TREEBUILDER_ERROR_HANDLER,
+ HUBBUB_TREEBUILDER_TREE_HANDLER,
+} hubbub_treebuilder_opttype;
+
+/**
+ * Hubbub treebuilder option parameters
+ */
+typedef union hubbub_treebuilder_optparams {
+ struct {
+ hubbub_buffer_handler handler;
+ void *pw;
+ } buffer_handler;
+
+ struct {
+ hubbub_error_handler handler;
+ void *pw;
+ } error_handler;
+
+ hubbub_tree_handler tree_handler;
+} hubbub_treebuilder_optparams;
+
+/* Create a hubbub treebuilder */
+hubbub_treebuilder *hubbub_treebuilder_create(hubbub_tokeniser *tokeniser,
+ hubbub_alloc alloc, void *pw);
+
+/* Destroy a hubbub treebuilder */
+void hubbub_treebuilder_destroy(hubbub_treebuilder *treebuilder);
+
+/* Configure a hubbub treebuilder */
+hubbub_error hubbub_treebuilder_setopt(hubbub_treebuilder *treebuilder,
+ hubbub_treebuilder_opttype type,
+ hubbub_treebuilder_optparams *params);
+
+#endif
+