From 7b30a5520cfb56e651f0eb4da85a3e07747da7dc Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sat, 23 Jun 2007 22:40:25 +0000 Subject: Import hubbub -- an HTML parsing library. Plenty of work still to do (like tree generation ;) svn path=/trunk/hubbub/; revision=3359 --- include/hubbub/parser.h | 84 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) create mode 100644 include/hubbub/parser.h (limited to 'include/hubbub/parser.h') diff --git a/include/hubbub/parser.h b/include/hubbub/parser.h new file mode 100644 index 0000000..cdf8664 --- /dev/null +++ b/include/hubbub/parser.h @@ -0,0 +1,84 @@ +/* + * This file is part of Hubbub. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2007 John-Mark Bell + */ + +#ifndef hubbub_parser_h_ +#define hubbub_parser_h_ + +#include + +#include +#include +#include + +typedef struct hubbub_parser hubbub_parser; + +/** + * Hubbub parser option types + */ +typedef enum hubbub_parser_opttype { + HUBBUB_PARSER_TOKEN_HANDLER, + HUBBUB_PARSER_BUFFER_HANDLER, + HUBBUB_PARSER_ERROR_HANDLER, + HUBBUB_PARSER_CONTENT_MODEL, +} hubbub_parser_opttype; + +/** + * Hubbub parser option parameters + */ +typedef union hubbub_parser_optparams { + struct { + hubbub_token_handler handler; + void *pw; + } token_handler; + + struct { + hubbub_buffer_handler handler; + void *pw; + } buffer_handler; + + struct { + hubbub_error_handler handler; + void *pw; + } error_handler; + + struct { + hubbub_content_model model; + } content_model; +} hubbub_parser_optparams; + +/* Create a hubbub parser */ +hubbub_parser *hubbub_parser_create(const char *enc, const char *int_enc, + hubbub_alloc alloc, void *pw); +/* Destroy a hubbub parser */ +void hubbub_parser_destroy(hubbub_parser *parser); + +/* Configure a hubbub parser */ +hubbub_error hubbub_parser_setopt(hubbub_parser *parser, + hubbub_parser_opttype type, + hubbub_parser_optparams *params); + +/* Pass a chunk of data to a hubbub parser for parsing */ +/* This data is encoded in the input charset */ +hubbub_error hubbub_parser_parse_chunk(hubbub_parser *parser, + uint8_t *data, size_t len); +/* Pass a chunk of extraneous data to a hubbub parser for parsing */ +/* This data is UTF-8 encoded */ +hubbub_error hubbub_parser_parse_extraneous_chunk(hubbub_parser *parser, + uint8_t *data, size_t len); +/* Inform the parser that the last chunk of data has been parsed */ +hubbub_error hubbub_parser_completed(hubbub_parser *parser); + +/* Read the document charset */ +const char *hubbub_parser_read_charset(hubbub_parser *parser, + hubbub_charset_source *source); + +/* Claim ownership of the document buffer */ +hubbub_error hubbub_parser_claim_buffer(hubbub_parser *parser, + uint8_t **buffer, size_t *len); + +#endif + -- cgit v1.2.3