summaryrefslogtreecommitdiff
path: root/bindings/hubbub/parser.c
blob: 8931fdf6020e687edac4d9977d896167ef419430 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
/*
 * This file is part of libdom.
 * Licensed under the MIT License,
 *                http://www.opensource.org/licenses/mit-license.php
 * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
 */

#include <hubbub/hubbub.h>
#include <hubbub/parser.h>

#include <dom/dom.h>

#include "parser.h"
#include "utils.h"

/**
 * libdom Hubbub parser object
 */
struct dom_hubbub_parser {
	hubbub_parser *parser;		/**< Hubbub parser instance */

	struct dom_document *doc;	/**< DOM Document we're building */

	bool complete;			/**< Indicate stream completion */

	struct dom_implementation *impl;/**< DOM implementation */

	dom_alloc alloc;		/**< Memory (de)allocation function */
	void *pw;			/**< Pointer to client data */

	dom_msg msg;			/**< Informational messaging function */
	void *mctx;			/**< Pointer to client data */
};

static void __dom_hubbub_buffer_handler(const uint8_t *buffer, size_t len, 
		void *pw);
static void __dom_hubbub_token_handler(const hubbub_token *token, void *pw);

static bool __initialised;

/**
 * Create a Hubbub parser instance
 *
 * \param enc      Source charset, or NULL
 * \param int_enc  Desired charset of document buffer (UTF-8 or UTF-16)
 * \param alloc    Memory (de)allocation function
 * \param pw       Pointer to client-specific private data
 * \param msg      Informational message function
 * \param mctx     Pointer to client-specific private data
 * \return Pointer to instance, or NULL on memory exhaustion
 */
dom_hubbub_parser *dom_hubbub_parser_create(const char *enc, 
		const char *int_enc, dom_alloc alloc, void *pw, 
		dom_msg msg, void *mctx)
{
	dom_hubbub_parser *parser;
	hubbub_parser_optparams params;
	struct dom_string *features;
	dom_exception err;
	hubbub_error e;

	if (__initialised == false) {
		/** \todo Need path of encoding aliases file */
		e = hubbub_initialise("", (hubbub_alloc) alloc, pw);
		if (e != HUBBUB_OK) {
			msg(DOM_MSG_ERROR, mctx, 
					"Failed initialising hubbub");
			return NULL;
		}

		__initialised = true;
	}

	parser = alloc(NULL, sizeof(dom_hubbub_parser), pw);
	if (parser == NULL) {
		msg(DOM_MSG_CRITICAL, mctx, "No memory for parser");
		return NULL;
	}

	parser->parser = hubbub_parser_create(enc, int_enc, 
			(hubbub_alloc) alloc, pw);
	if (parser->parser == NULL) {
		alloc(parser, 0, pw);
		msg(DOM_MSG_CRITICAL, mctx, "Failed to create hubbub parser");
		return NULL;
	}

	params.buffer_handler.handler = __dom_hubbub_buffer_handler;
	params.buffer_handler.pw = parser;
	e = hubbub_parser_setopt(parser->parser, HUBBUB_PARSER_BUFFER_HANDLER,
			&params);
	if (e != HUBBUB_OK) {
		hubbub_parser_destroy(parser->parser);
		alloc(parser, 0, pw);
		msg(DOM_MSG_CRITICAL, mctx, 
				"Failed registering hubbub buffer handler");
		return NULL;
	}

	params.token_handler.handler = __dom_hubbub_token_handler;
	params.token_handler.pw = parser;
	e = hubbub_parser_setopt(parser->parser, HUBBUB_PARSER_TOKEN_HANDLER,
			&params);
	if (e != HUBBUB_OK) {
		hubbub_parser_destroy(parser->parser);
		alloc(parser, 0, pw);
		msg(DOM_MSG_CRITICAL, mctx,
				"Failed registering hubbub token handler");
		return NULL;
	}

	parser->doc = NULL;

	parser->complete = false;

	/* Get DOM implementation */
	/* Create string representation of the features we want */
	err = dom_string_create_from_ptr_no_doc(alloc, pw,
			DOM_STRING_UTF8,
			(const uint8_t *) "HTML", SLEN("HTML"), &features);
	if (err != DOM_NO_ERR) {
		hubbub_parser_destroy(parser->parser);
		alloc(parser, 0, pw);
		msg(DOM_MSG_CRITICAL, mctx, "No memory for feature string");
		return NULL;
	}

	/* Now, try to get an appropriate implementation from the registry */
	err = dom_implregistry_get_dom_implementation(features,
			&parser->impl, alloc, pw);
	if (err != DOM_NO_ERR) {
		dom_string_unref(features);
		hubbub_parser_destroy(parser->parser);
		alloc(parser, 0, pw);
		msg(DOM_MSG_ERROR, mctx, "No suitable DOMImplementation");
		return NULL;
	}

	/* no longer need the features string */
	dom_string_unref(features);

	parser->alloc = alloc;
	parser->pw = pw;

	parser->msg = msg;
	parser->mctx = mctx;

	return parser;
}

/* Destroy a Hubbub parser instance */
void dom_hubbub_parser_destroy(dom_hubbub_parser *parser)
{
	dom_implementation_unref(parser->impl);

	hubbub_parser_destroy(parser->parser);

	/** \todo do we want to clean up the document here too? */

	parser->alloc(parser, 0, parser->pw);
}

/* Parse a chunk of data */
dom_hubbub_error dom_hubbub_parser_parse_chunk(dom_hubbub_parser *parser,
		uint8_t *data, size_t len)
{
	hubbub_error err;

	err = hubbub_parser_parse_chunk(parser->parser, data, len);
	if (err != HUBBUB_OK) {
		parser->msg(DOM_MSG_ERROR, parser->mctx,
				"hubbub_parser_parse_chunk failed: %d", err);
		return DOM_HUBBUB_HUBBUB_ERR | err;
	}

	return DOM_HUBBUB_OK;
}

/* Notify parser that datastream is empty */
dom_hubbub_error dom_hubbub_parser_completed(dom_hubbub_parser *parser)
{
	hubbub_error err;

	err = hubbub_parser_completed(parser->parser);
	if (err != DOM_HUBBUB_OK) {
		parser->msg(DOM_MSG_ERROR, parser->mctx,
				"hubbub_parser_completed failed: %d", err);
		return DOM_HUBBUB_HUBBUB_ERR | err;
	}

	parser->complete = true;

	return DOM_HUBBUB_OK;
}

/* Retrieve the created DOM Document */
struct dom_document *dom_hubbub_parser_get_document(dom_hubbub_parser *parser)
{
	return (parser->complete ? parser->doc : NULL);
}

void __dom_hubbub_buffer_handler(const uint8_t *buffer, size_t len, 
		void *pw)
{
	UNUSED(buffer);
	UNUSED(len);
	UNUSED(pw);
}

void __dom_hubbub_token_handler(const hubbub_token *token, void *pw)
{
	UNUSED(token);
	UNUSED(pw);
}