From ff835de85e45a77c810d0f8dc7916ff0be677545 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sun, 24 Jun 2007 10:31:23 +0000 Subject: Add UTF-16 input stream. This appears to work correctly -- could probably do with more testing. svn path=/trunk/hubbub/; revision=3362 --- test/parser-utf16.c | 175 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 test/parser-utf16.c (limited to 'test/parser-utf16.c') diff --git a/test/parser-utf16.c b/test/parser-utf16.c new file mode 100644 index 0000000..9056dd1 --- /dev/null +++ b/test/parser-utf16.c @@ -0,0 +1,175 @@ +#include +#include +#include + +#include + +#include + +#include "utils/utils.h" + +#include "testutils.h" + +static const uint8_t *pbuffer; + +static void buffer_handler(const uint8_t *buffer, size_t len, void *pw); +static void token_handler(const hubbub_token *token, void *pw); + +static void *myrealloc(void *ptr, size_t len, void *pw) +{ + UNUSED(pw); + + return realloc(ptr, len); +} + +int main(int argc, char **argv) +{ + hubbub_parser *parser; + hubbub_parser_optparams params; + FILE *fp; + size_t len, origlen; +#define CHUNK_SIZE (4096) + uint8_t buf[CHUNK_SIZE]; + const char *charset; + hubbub_charset_source cssource; + uint8_t *buffer; + + if (argc != 3) { + printf("Usage: %s \n", argv[0]); + return 1; + } + + /* Initialise library */ + assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK); + + parser = hubbub_parser_create("UTF-8", "UTF-16", myrealloc, NULL); + assert(parser != NULL); + + params.buffer_handler.handler = buffer_handler; + params.buffer_handler.pw = NULL; + assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_BUFFER_HANDLER, + ¶ms) == HUBBUB_OK); + + params.token_handler.handler = token_handler; + params.token_handler.pw = NULL; + assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TOKEN_HANDLER, + ¶ms) == HUBBUB_OK); + + fp = fopen(argv[2], "rb"); + if (fp == NULL) { + printf("Failed opening %s\n", argv[2]); + return 1; + } + + fseek(fp, 0, SEEK_END); + origlen = len = ftell(fp); + fseek(fp, 0, SEEK_SET); + + while (len >= CHUNK_SIZE) { + fread(buf, 1, CHUNK_SIZE, fp); + + assert(hubbub_parser_parse_chunk(parser, + buf, CHUNK_SIZE) == HUBBUB_OK); + + len -= CHUNK_SIZE; + } + + if (len > 0) { + fread(buf, 1, len, fp); + + assert(hubbub_parser_parse_chunk(parser, + buf, len) == HUBBUB_OK); + + len = 0; + + assert(hubbub_parser_completed(parser) == HUBBUB_OK); + } + + fclose(fp); + + charset = hubbub_parser_read_charset(parser, &cssource); + + printf("Charset: %s (from %d)\n", charset, cssource); + + assert(hubbub_parser_claim_buffer(parser, &buffer, &len) == + HUBBUB_OK); + + free(buffer); + + hubbub_parser_destroy(parser); + + assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK); + + printf("PASS\n"); + + return 0; +} + +void buffer_handler(const uint8_t *buffer, size_t len, void *pw) +{ + UNUSED(len); + UNUSED(pw); + + pbuffer = buffer; +} + +void token_handler(const hubbub_token *token, void *pw) +{ + static const char *token_names[] = { + "DOCTYPE", "START TAG", "END TAG", + "COMMENT", "CHARACTERS", "EOF" + }; + size_t i; + + UNUSED(pw); + + printf("%s: ", token_names[token->type]); + + switch (token->type) { + case HUBBUB_TOKEN_DOCTYPE: + printf("'%.*s' (%svalid)\n", + (int) token->data.doctype.name.len, + pbuffer + token->data.doctype.name.data_off, + token->data.doctype.correct ? "" : "in"); + break; + case HUBBUB_TOKEN_START_TAG: + printf("'%.*s' %s\n", + (int) token->data.tag.name.len, + pbuffer + token->data.tag.name.data_off, + (token->data.tag.n_attributes > 0) ? + "attributes:" : ""); + for (i = 0; i < token->data.tag.n_attributes; i++) { + printf("\t'%.*s' = '%.*s'\n", + (int) token->data.tag.attributes[i].name.len, + pbuffer + token->data.tag.attributes[i].name.data_off, + (int) token->data.tag.attributes[i].value.len, + pbuffer + token->data.tag.attributes[i].value.data_off); + } + break; + case HUBBUB_TOKEN_END_TAG: + printf("'%.*s' %s\n", + (int) token->data.tag.name.len, + pbuffer + token->data.tag.name.data_off, + (token->data.tag.n_attributes > 0) ? + "attributes:" : ""); + for (i = 0; i < token->data.tag.n_attributes; i++) { + printf("\t'%.*s' = '%.*s'\n", + (int) token->data.tag.attributes[i].name.len, + pbuffer + token->data.tag.attributes[i].name.data_off, + (int) token->data.tag.attributes[i].value.len, + pbuffer + token->data.tag.attributes[i].value.data_off); + } + break; + case HUBBUB_TOKEN_COMMENT: + printf("'%.*s'\n", (int) token->data.comment.len, + pbuffer + token->data.comment.data_off); + break; + case HUBBUB_TOKEN_CHARACTER: + printf("'%.*s'\n", (int) token->data.character.len, + pbuffer + token->data.character.data_off); + break; + case HUBBUB_TOKEN_EOF: + printf("\n"); + break; + } +} -- cgit v1.2.3