summaryrefslogtreecommitdiff
path: root/test
diff options
context:
space:
mode:
Diffstat (limited to 'test')
-rw-r--r--test/INDEX12
-rw-r--r--test/Makefile9
-rw-r--r--test/aliases.c61
-rw-r--r--test/cscodec.c247
-rw-r--r--test/csdetect.c18
-rw-r--r--test/data/cscodec/INDEX5
-rw-r--r--test/data/cscodec/simple.datbin1193 -> 0 bytes
-rw-r--r--test/data/csdetect/INDEX1
-rw-r--r--test/data/csdetect/regression.dat5
-rw-r--r--test/data/tree-construction/INDEX1
-rw-r--r--test/data/tree-construction/regression.dat31
-rw-r--r--test/filter.c355
-rw-r--r--test/inputstream.c126
-rw-r--r--test/parser-utf16.c195
-rw-r--r--test/parser.c44
-rw-r--r--test/regression/cscodec-segv.c37
-rw-r--r--test/regression/filter-segv.c38
-rw-r--r--test/regression/stream-nomem.c88
-rw-r--r--test/tokeniser.c54
-rw-r--r--test/tokeniser2.c107
-rw-r--r--test/tokeniser3.c108
-rw-r--r--test/tree.c49
-rw-r--r--test/tree2.c77
23 files changed, 217 insertions, 1451 deletions
diff --git a/test/INDEX b/test/INDEX
index e3522bc..50a15de 100644
--- a/test/INDEX
+++ b/test/INDEX
@@ -2,23 +2,13 @@
#
# Test Description DataDir
-aliases Encoding alias handling
-cscodec Charset codec implementation cscodec
+tokeniser2 HTML tokeniser (again) tokeniser2
csdetect Charset detection csdetect
dict Generic string dictionary
entities Named entity dictionary
-filter Input stream filtering
hubbub Library initialisation/finalisation
-inputstream Buffered input stream html
parser Public parser API html
-parser-utf16 Public parser API (utf-16 internally) html
tokeniser HTML tokeniser html
-tokeniser2 HTML tokeniser (again) tokeniser2
tokeniser3 HTML tokeniser (byte-by-byte) tokeniser2
tree Treebuilding API html
tree2 Treebuilding API tree-construction
-
-# Regression tests
-regression/cscodec-segv Segfault in charset codecs
-regression/filter-segv Segfault in input filtering
-regression/stream-nomem Inputstream buffer expansion
diff --git a/test/Makefile b/test/Makefile
index 20aa6ce..74df014 100644
--- a/test/Makefile
+++ b/test/Makefile
@@ -39,11 +39,8 @@ CFLAGS := $(CFLAGS) -I$(TOP)/src/ -I$(d) \
LDFLAGS := $(LDFLAGS) `$(PKGCONFIG) $(PKGCONFIGFLAGS) --libs json` -liconv
# Tests
-TESTS_$(d) := aliases cscodec csdetect dict entities filter hubbub \
- inputstream parser parser-utf16 tokeniser tokeniser2 tokeniser3 \
- tree tree2
-TESTS_$(d) := $(TESTS_$(d)) regression/cscodec-segv regression/filter-segv \
- regression/stream-nomem
+TESTS_$(d) := csdetect dict entities hubbub parser \
+ tokeniser tokeniser2 tokeniser3 tree tree2
# Items for top-level makefile to use
ITEMS_CLEAN := $(ITEMS_CLEAN) \
@@ -80,7 +77,7 @@ define compile_test
$(2): $(3) $(TOP)/$(COMPONENT)-debug.a
@$$(ECHO) $$(ECHOFLAGS) "==> $(1)"
@$$(CC) -c -g $$(DEBUGCFLAGS) -o $$@.o $(1)
- @$$(LD) -g -o $$@ $$@.o $$(LDFLAGS) -lhubbub-debug -lgcov
+ @$$(LD) -g -o $$@ $$@.o -lhubbub-debug $$(LDFLAGS) -lgcov
@$$(RM) $$(RMFLAGS) $$@.o
endef
diff --git a/test/aliases.c b/test/aliases.c
deleted file mode 100644
index 1cbf2a4..0000000
--- a/test/aliases.c
+++ /dev/null
@@ -1,61 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-
-#include "charset/aliases.h"
-
-#include "testutils.h"
-
-extern void hubbub_aliases_dump(void);
-
-static void *myrealloc(void *ptr, size_t len, void *pw)
-{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
-
-int main (int argc, char **argv)
-{
- hubbub_aliases_canon *c;
-
- if (argc != 2) {
- printf("Usage: %s <filename>\n", argv[0]);
- return 1;
- }
-
- hubbub_aliases_create(argv[1], myrealloc, NULL);
-
- hubbub_aliases_dump();
-
- c = hubbub_alias_canonicalise("moose", 5);
- if (c) {
- printf("FAIL - found invalid encoding 'moose'\n");
- return 1;
- }
-
- c = hubbub_alias_canonicalise("csinvariant", 11);
- if (c) {
- printf("%s %d\n", c->name, c->mib_enum);
- } else {
- printf("FAIL - failed finding encoding 'csinvariant'\n");
- return 1;
- }
-
- c = hubbub_alias_canonicalise("nats-sefi-add", 13);
- if (c) {
- printf("%s %d\n", c->name, c->mib_enum);
- } else {
- printf("FAIL - failed finding encoding 'nats-sefi-add'\n");
- return 1;
- }
-
- printf("%d\n", hubbub_mibenum_from_name(c->name, strlen(c->name)));
-
- printf("%s\n", hubbub_mibenum_to_name(c->mib_enum));
-
- hubbub_aliases_destroy(myrealloc, NULL);
-
- printf("PASS\n");
-
- return 0;
-}
diff --git a/test/cscodec.c b/test/cscodec.c
deleted file mode 100644
index 525b275..0000000
--- a/test/cscodec.c
+++ /dev/null
@@ -1,247 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-
-#include <hubbub/hubbub.h>
-
-#include "charset/codec.h"
-#include "utils/utils.h"
-
-#include "testutils.h"
-
-typedef struct line_ctx {
- hubbub_charsetcodec *codec;
-
- size_t buflen;
- size_t bufused;
- uint8_t *buf;
- size_t explen;
- size_t expused;
- uint8_t *exp;
-
- bool indata;
- bool inexp;
-
- hubbub_error exp_ret;
-
- enum { ENCODE, DECODE } dir;
-} line_ctx;
-
-static bool handle_line(const char *data, size_t datalen, void *pw);
-static void run_test(line_ctx *ctx);
-static hubbub_error filter(uint32_t c, uint32_t **output,
- size_t *outputlen, void *pw);
-
-
-static void *myrealloc(void *ptr, size_t len, void *pw)
-{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
-
-int main(int argc, char **argv)
-{
- line_ctx ctx;
-
- if (argc != 3) {
- printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
- return 1;
- }
-
- assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
-
- assert(hubbub_charsetcodec_create("NATS-SEFI-ADD",
- myrealloc, NULL) == NULL);
-
- ctx.codec = hubbub_charsetcodec_create("UTF-8", myrealloc, NULL);
- assert(ctx.codec != NULL);
-
- ctx.buflen = parse_filesize(argv[2]);
- if (ctx.buflen == 0)
- return 1;
-
- ctx.buf = malloc(2 * ctx.buflen);
- if (ctx.buf == NULL) {
- printf("Failed allocating %u bytes\n",
- (unsigned int) ctx.buflen);
- return 1;
- }
-
- ctx.exp = ctx.buf + ctx.buflen;
- ctx.explen = ctx.buflen;
-
- ctx.buf[0] = '\0';
- ctx.exp[0] = '\0';
- ctx.bufused = 0;
- ctx.expused = 0;
- ctx.indata = false;
- ctx.inexp = false;
- ctx.exp_ret = HUBBUB_OK;
-
- assert(parse_testfile(argv[2], handle_line, &ctx) == true);
-
- /* and run final test */
- if (ctx.bufused > 0 && ctx.buf[ctx.bufused - 1] == '\n')
- ctx.bufused -= 1;
-
- if (ctx.expused > 0 && ctx.exp[ctx.expused - 1] == '\n')
- ctx.expused -= 1;
-
- run_test(&ctx);
-
- free(ctx.buf);
-
- hubbub_charsetcodec_destroy(ctx.codec);
-
- assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
-
- printf("PASS\n");
-
- return 0;
-}
-
-bool handle_line(const char *data, size_t datalen, void *pw)
-{
- line_ctx *ctx = (line_ctx *) pw;
-
- if (data[0] == '#') {
- if (ctx->inexp) {
- /* This marks end of testcase, so run it */
-
- if (ctx->buf[ctx->bufused - 1] == '\n')
- ctx->bufused -= 1;
-
- if (ctx->exp[ctx->expused - 1] == '\n')
- ctx->expused -= 1;
-
- run_test(ctx);
-
- ctx->buf[0] = '\0';
- ctx->exp[0] = '\0';
- ctx->bufused = 0;
- ctx->expused = 0;
- ctx->exp_ret = HUBBUB_OK;
- }
-
- if (strncasecmp(data+1, "data", 4) == 0) {
- hubbub_charsetcodec_optparams params;
- const char *ptr = data + 6;
-
- ctx->indata = true;
- ctx->inexp = false;
-
- if (strncasecmp(ptr, "decode", 6) == 0)
- ctx->dir = DECODE;
- else
- ctx->dir = ENCODE;
-
- ptr += 7;
-
- if (strncasecmp(ptr, "LOOSE", 5) == 0) {
- params.error_mode.mode =
- HUBBUB_CHARSETCODEC_ERROR_LOOSE;
- ptr += 6;
- } else if (strncasecmp(ptr, "STRICT", 6) == 0) {
- params.error_mode.mode =
- HUBBUB_CHARSETCODEC_ERROR_STRICT;
- ptr += 7;
- } else {
- params.error_mode.mode =
- HUBBUB_CHARSETCODEC_ERROR_TRANSLIT;
- ptr += 9;
- }
-
- assert(hubbub_charsetcodec_setopt(ctx->codec,
- HUBBUB_CHARSETCODEC_ERROR_MODE,
- (hubbub_charsetcodec_optparams *) &params)
- == HUBBUB_OK);
-
- if (strncasecmp(ptr, "filter", 6) == 0) {
- params.filter_func.filter = filter;
- params.filter_func.pw = ctx;
-
- assert(hubbub_charsetcodec_setopt(ctx->codec,
- HUBBUB_CHARSETCODEC_FILTER_FUNC,
- (hubbub_charsetcodec_optparams *)
- &params) == HUBBUB_OK);
- }
- } else if (strncasecmp(data+1, "expected", 8) == 0) {
- ctx->indata = false;
- ctx->inexp = true;
-
- ctx->exp_ret = hubbub_error_from_string(data + 10,
- datalen - 10 - 1 /* \n */);
- } else if (strncasecmp(data+1, "reset", 5) == 0) {
- ctx->indata = false;
- ctx->inexp = false;
-
- hubbub_charsetcodec_reset(ctx->codec);
- }
- } else {
- if (ctx->indata) {
- memcpy(ctx->buf + ctx->bufused, data, datalen);
- ctx->bufused += datalen;
- }
- if (ctx->inexp) {
- memcpy(ctx->exp + ctx->expused, data, datalen);
- ctx->expused += datalen;
- }
- }
-
- return true;
-}
-
-void run_test(line_ctx *ctx)
-{
- static int testnum;
- size_t destlen = ctx->bufused * 4;
- uint8_t dest[destlen];
- uint8_t *pdest = dest;
- const uint8_t *psrc = ctx->buf;
- size_t srclen = ctx->bufused;
- size_t i;
-
- if (ctx->dir == DECODE) {
- assert(hubbub_charsetcodec_decode(ctx->codec,
- &psrc, &srclen,
- &pdest, &destlen) == ctx->exp_ret);
- } else {
- assert(hubbub_charsetcodec_encode(ctx->codec,
- &psrc, &srclen,
- &pdest, &destlen) == ctx->exp_ret);
- }
-
- printf("%d: Read '", ++testnum);
- for (i = 0; i < ctx->expused; i++) {
- printf("%c%c ", "0123456789abcdef"[(dest[i] >> 4) & 0xf],
- "0123456789abcdef"[dest[i] & 0xf]);
- }
- printf("' Expected '");
- for (i = 0; i < ctx->expused; i++) {
- printf("%c%c ", "0123456789abcdef"[(ctx->exp[i] >> 4) & 0xf],
- "0123456789abcdef"[ctx->exp[i] & 0xf]);
- }
- printf("'\n");
-
- assert(memcmp(dest, ctx->exp, ctx->expused) == 0);
-}
-
-hubbub_error filter(uint32_t c, uint32_t **output,
- size_t *outputlen, void *pw)
-{
- static uint32_t outbuf;
-
- UNUSED(pw);
-
- if (c == HUBBUB_CHARSETCODEC_NULL) {
- outbuf = 0;
- return HUBBUB_OK;
- }
-
- outbuf = c;
-
- *output = &outbuf;
- *outputlen = 1;
-
- return HUBBUB_OK;
-}
diff --git a/test/csdetect.c b/test/csdetect.c
index 3b39972..d02efcb 100644
--- a/test/csdetect.c
+++ b/test/csdetect.c
@@ -4,9 +4,10 @@
#include <stdlib.h>
#include <string.h>
+#include <parserutils/charset/mibenum.h>
+
#include <hubbub/hubbub.h>
-#include "charset/aliases.h"
#include "charset/detect.h"
#include "utils/utils.h"
@@ -113,20 +114,21 @@ bool handle_line(const char *data, size_t datalen, void *pw)
void run_test(const uint8_t *data, size_t len, char *expected)
{
- uint16_t mibenum;
- hubbub_charset_source source;
+ uint16_t mibenum = 0;
+ hubbub_charset_source source = HUBBUB_CHARSET_UNKNOWN;
static int testnum;
- assert(hubbub_charset_extract(&data, &len,
+ assert(hubbub_charset_extract(data, len,
&mibenum, &source) == HUBBUB_OK);
assert(mibenum != 0);
printf("%d: Detected charset %s (%d) Source %d Expected %s (%d)\n",
- ++testnum, hubbub_mibenum_to_name(mibenum),
+ ++testnum, parserutils_charset_mibenum_to_name(mibenum),
mibenum, source, expected,
- hubbub_mibenum_from_name(expected, strlen(expected)));
+ parserutils_charset_mibenum_from_name(
+ expected, strlen(expected)));
- assert(mibenum ==
- hubbub_mibenum_from_name(expected, strlen(expected)));
+ assert(mibenum == parserutils_charset_mibenum_from_name(
+ expected, strlen(expected)));
}
diff --git a/test/data/cscodec/INDEX b/test/data/cscodec/INDEX
deleted file mode 100644
index 326cff5..0000000
--- a/test/data/cscodec/INDEX
+++ /dev/null
@@ -1,5 +0,0 @@
-# Index file for charset codec tests
-#
-# Test Description
-
-simple.dat Simple tests, designed to validate testdriver \ No newline at end of file
diff --git a/test/data/cscodec/simple.dat b/test/data/cscodec/simple.dat
deleted file mode 100644
index 6a3cad1..0000000
--- a/test/data/cscodec/simple.dat
+++ /dev/null
Binary files differ
diff --git a/test/data/csdetect/INDEX b/test/data/csdetect/INDEX
index e292063..315ce6a 100644
--- a/test/data/csdetect/INDEX
+++ b/test/data/csdetect/INDEX
@@ -7,3 +7,4 @@ non-ascii-meta.dat Tests for meta charsets claiming to be non-ASCII
test-yahoo-jp.dat Yahoo! Japan, from html5lib testcases
tests1.dat Assorted tests, including edge cases, from html5lib
tests2.dat Further tests from html5lib
+regression.dat Regression tests
diff --git a/test/data/csdetect/regression.dat b/test/data/csdetect/regression.dat
new file mode 100644
index 0000000..75e5f14
--- /dev/null
+++ b/test/data/csdetect/regression.dat
@@ -0,0 +1,5 @@
+#data
+<table nowrap>
+#encoding
+windows-1252
+
diff --git a/test/data/tree-construction/INDEX b/test/data/tree-construction/INDEX
index ea258b0..c994b5a 100644
--- a/test/data/tree-construction/INDEX
+++ b/test/data/tree-construction/INDEX
@@ -17,3 +17,4 @@ tests12.dat html5lib tests
after-after-body.dat Tests "after after body" mode
after-after-frameset.dat Tests "after after frameset" mode
after-body.dat Tests "after body" mode
+regression.dat Regression tests
diff --git a/test/data/tree-construction/regression.dat b/test/data/tree-construction/regression.dat
new file mode 100644
index 0000000..0d4d77a
--- /dev/null
+++ b/test/data/tree-construction/regression.dat
@@ -0,0 +1,31 @@
+#data
+<html>
+ <body>
+ <table>
+ <tr>
+ <td>
+ <div>
+ <b>
+ </div>
+ <table></table>
+ </td>
+ </tr>
+ </table>
+ <table></table>
+ <script type="text/javascript"></script>
+ </body>
+</html>
+#errors
+#document
+| <html>
+| <head>
+| <body>
+| <table>
+| <tr>
+| <td>
+| <div>
+| <b>
+| <table>
+| <table>
+| <script>
+
diff --git a/test/filter.c b/test/filter.c
deleted file mode 100644
index 83cce20..0000000
--- a/test/filter.c
+++ /dev/null
@@ -1,355 +0,0 @@
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-
-#include <hubbub/hubbub.h>
-
-#include "utils/utils.h"
-
-#include "input/filter.h"
-
-#include "testutils.h"
-
-static void *myrealloc(void *ptr, size_t len, void *pw)
-{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
-
-int main(int argc, char **argv)
-{
- hubbub_filter_optparams params;
- hubbub_filter *input;
- uint8_t inbuf[64], outbuf[64];
- size_t inlen, outlen;
- const uint8_t *in = inbuf;
- uint8_t *out = outbuf;
-
- if (argc != 2) {
- printf("Usage: %s <filename>\n", argv[0]);
- return 1;
- }
-
- /* Initialise library */
- assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
-
- /* Create input filter */
- input = hubbub_filter_create("UTF-8", myrealloc, NULL);
- assert(input);
-
- /* Convert filter to UTF-8 encoding */
- params.encoding.name = "UTF-8";
- assert(hubbub_filter_setopt(input, HUBBUB_FILTER_SET_ENCODING,
- (hubbub_filter_optparams *) &params) == HUBBUB_OK);
-
-
- /* Simple case - valid input & output buffer large enough */
- in = inbuf;
- out = outbuf;
- strcpy((char *) inbuf, "hell\xc2\xa0o!");
- inlen = strlen((const char *) inbuf);
- outbuf[0] = '\0';
- outlen = 64;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- assert(hubbub_filter_reset(input) == HUBBUB_OK);
-
- assert(memcmp(outbuf, "hell\xc2\xa0o!",
- SLEN("hell\xc2\xa0o!")) == 0);
-
-
- /* Too small an output buffer; no encoding edge cases */
- in = inbuf;
- out = outbuf;
- strcpy((char *) inbuf, "hello!");
- inlen = strlen((const char *) inbuf);
- outbuf[0] = '\0';
- outlen = 5;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_NOMEM);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- outlen = 64 - 5 + outlen;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- assert(hubbub_filter_reset(input) == HUBBUB_OK);
-
- assert(memcmp(outbuf, "hello!",
- SLEN("hello!")) == 0);
-
-
- /* Illegal input sequence; output buffer large enough */
- in = inbuf;
- out = outbuf;
- strcpy((char *) inbuf, "hell\x96o!");
- inlen = strlen((const char *) inbuf);
- outbuf[0] = '\0';
- outlen = 64;
-
- /* Input does loose decoding, converting to U+FFFD if illegal
- * input is encountered */
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- assert(hubbub_filter_reset(input) == HUBBUB_OK);
-
- assert(memcmp(outbuf, "hell\xef\xbf\xbdo!",
- SLEN("hell\xef\xbf\xbdo!")) == 0);
-
-
- /* Input ends mid-sequence */
- in = inbuf;
- out = outbuf;
- strcpy((char *) inbuf, "hell\xc2\xa0o!");
- inlen = strlen((const char *) inbuf) - 3;
- outbuf[0] = '\0';
- outlen = 64;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- inlen = 3;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- assert(hubbub_filter_reset(input) == HUBBUB_OK);
-
- assert(memcmp(outbuf, "hell\xc2\xa0o!",
- SLEN("hell\xc2\xa0o!")) == 0);
-
-
- /* Input ends mid-sequence, but second attempt has too small a
- * buffer, but large enough to write out the incomplete character. */
- in = inbuf;
- out = outbuf;
- strcpy((char *) inbuf, "hell\xc2\xa0o!");
- inlen = strlen((const char *) inbuf) - 3;
- outbuf[0] = '\0';
- outlen = 64;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- inlen = 3;
- outlen = 3;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_NOMEM);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- outlen = 64 - 7;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- assert(hubbub_filter_reset(input) == HUBBUB_OK);
-
- assert(memcmp(outbuf, "hell\xc2\xa0o!",
- SLEN("hell\xc2\xa0o!")) == 0);
-
-
- /* Input ends mid-sequence, but second attempt has too small a
- * buffer, not large enough to write out the incomplete character. */
- in = inbuf;
- out = outbuf;
- strcpy((char *) inbuf, "hell\xc2\xa0o!");
- inlen = strlen((const char *) inbuf) - 3;
- outbuf[0] = '\0';
- outlen = 64;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- inlen = 3;
- outlen = 1;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_NOMEM);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- outlen = 60;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- assert(hubbub_filter_reset(input) == HUBBUB_OK);
-
- assert(memcmp(outbuf, "hell\xc2\xa0o!",
- SLEN("hell\xc2\xa0o!")) == 0);
-
-
- /* Input ends mid-sequence, but second attempt contains
- * invalid character */
- in = inbuf;
- out = outbuf;
- strcpy((char *) inbuf, "hell\xc2\xc2o!");
- inlen = strlen((const char *) inbuf) - 3;
- outbuf[0] = '\0';
- outlen = 64;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- inlen = 3;
-
- /* Input does loose decoding, converting to U+FFFD if illegal
- * input is encountered */
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- assert(hubbub_filter_reset(input) == HUBBUB_OK);
-
- assert(memcmp(outbuf, "hell\xef\xbf\xbdo!",
- SLEN("hell\xef\xbf\xbdo!")) == 0);
-
-
- /* Input ends mid-sequence, but second attempt contains another
- * incomplete character */
- in = inbuf;
- out = outbuf;
- strcpy((char *) inbuf, "hell\xc2\xa0\xc2\xa1o!");
- inlen = strlen((const char *) inbuf) - 5;
- outbuf[0] = '\0';
- outlen = 64;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- inlen = 2;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- inlen = 3;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- assert(hubbub_filter_reset(input) == HUBBUB_OK);
-
- assert(memcmp(outbuf, "hell\xc2\xa0\xc2\xa1o!",
- SLEN("hell\xc2\xa0\xc2\xa1o!")) == 0);
-
-
- /* Input ends mid-sequence, but second attempt contains insufficient
- * data to complete the incomplete character */
- in = inbuf;
- out = outbuf;
- strcpy((char *) inbuf, "hell\xe2\x80\xa2o!");
- inlen = strlen((const char *) inbuf) - 4;
- outbuf[0] = '\0';
- outlen = 64;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- inlen = 1;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- inlen = 3;
-
- assert(hubbub_filter_process_chunk(input, &in, &inlen,
- &out, &outlen) == HUBBUB_OK);
-
- printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen,
- (int) (out - ((uint8_t *) outbuf)),
- outbuf, (int) outlen);
-
- assert(hubbub_filter_reset(input) == HUBBUB_OK);
-
- assert(memcmp(outbuf, "hell\xe2\x80\xa2o!",
- SLEN("hell\xe2\x80\xa2o!")) == 0);
-
-
- /* Clean up */
- hubbub_filter_destroy(input);
-
- assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
-
- printf("PASS\n");
-
- return 0;
-}
diff --git a/test/inputstream.c b/test/inputstream.c
deleted file mode 100644
index 3a83419..0000000
--- a/test/inputstream.c
+++ /dev/null
@@ -1,126 +0,0 @@
-#include <inttypes.h>
-#include <stdio.h>
-
-#include <hubbub/hubbub.h>
-
-#include "utils/utils.h"
-
-#include "input/inputstream.h"
-
-#include "testutils.h"
-
-static void buffer_moved_handler(const uint8_t *buffer, size_t len,
- void *pw);
-
-static void *myrealloc(void *ptr, size_t len, void *pw)
-{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
-
-int main(int argc, char **argv)
-{
- hubbub_inputstream *stream;
- FILE *fp;
- size_t len, origlen;
-#define CHUNK_SIZE (4096)
- uint8_t buf[CHUNK_SIZE];
- uint8_t *isb;
- size_t isblen;
- uint32_t c;
-
- if (argc != 3) {
- printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
- return 1;
- }
-
- /* Initialise library */
- assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
-
- stream = hubbub_inputstream_create("UTF-8", "UTF-8", myrealloc, NULL);
- assert(stream != NULL);
-
- assert(hubbub_inputstream_register_movehandler(stream,
- buffer_moved_handler, NULL) == HUBBUB_OK);
-
- fp = fopen(argv[2], "rb");
- if (fp == NULL) {
- printf("Failed opening %s\n", argv[2]);
- return 1;
- }
-
- fseek(fp, 0, SEEK_END);
- origlen = len = ftell(fp);
- fseek(fp, 0, SEEK_SET);
-
- while (len >= CHUNK_SIZE) {
- fread(buf, 1, CHUNK_SIZE, fp);
-
- assert(hubbub_inputstream_append(stream,
- buf, CHUNK_SIZE) == HUBBUB_OK);
-
- len -= CHUNK_SIZE;
-
- while ((c = hubbub_inputstream_peek(stream)) !=
- HUBBUB_INPUTSTREAM_OOD) {
- size_t len;
- hubbub_inputstream_cur_pos(stream, &len);
- hubbub_inputstream_advance(stream);
- assert(hubbub_inputstream_push_back(stream, c) ==
- HUBBUB_OK);
- hubbub_inputstream_advance(stream);
- }
- }
-
- if (len > 0) {
- fread(buf, 1, len, fp);
-
- assert(hubbub_inputstream_append(stream,
- buf, len) == HUBBUB_OK);
-
- len = 0;
- }
-
- fclose(fp);
-
- assert(hubbub_inputstream_insert(stream,
- (const uint8_t *) "hello!!!",
- SLEN("hello!!!")) == HUBBUB_OK);
-
- assert(hubbub_inputstream_append(stream, NULL, 0) == HUBBUB_OK);
-
- while (hubbub_inputstream_peek(stream) !=
- HUBBUB_INPUTSTREAM_EOF) {
- size_t len;
- hubbub_inputstream_cur_pos(stream, &len);
- hubbub_inputstream_advance(stream);
- }
-
- assert(hubbub_inputstream_claim_buffer(stream, &isb, &isblen) ==
- HUBBUB_OK);
-
- printf("Input size: %zu, Output size: %zu\n", origlen, isblen);
- printf("Buffer at %p\n", isb);
-
- free(isb);
-
- assert(hubbub_inputstream_deregister_movehandler(stream,
- buffer_moved_handler, NULL) == HUBBUB_OK);
-
- hubbub_inputstream_destroy(stream);
-
- assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
-
- printf("PASS\n");
-
- return 0;
-}
-
-void buffer_moved_handler(const uint8_t *buffer, size_t len,
- void *pw)
-{
- UNUSED(pw);
-
- printf("Buffer moved to: %p (%zu)\n", buffer, len);
-}
diff --git a/test/parser-utf16.c b/test/parser-utf16.c
deleted file mode 100644
index 326de78..0000000
--- a/test/parser-utf16.c
+++ /dev/null
@@ -1,195 +0,0 @@
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <hubbub/hubbub.h>
-
-#include <hubbub/parser.h>
-
-#include "utils/utils.h"
-
-#include "testutils.h"
-
-static const uint8_t *pbuffer;
-
-static void buffer_handler(const uint8_t *buffer, size_t len, void *pw);
-static void token_handler(const hubbub_token *token, void *pw);
-
-static void *myrealloc(void *ptr, size_t len, void *pw)
-{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
-
-int main(int argc, char **argv)
-{
- hubbub_parser *parser;
- hubbub_parser_optparams params;
- FILE *fp;
- size_t len, origlen;
-#define CHUNK_SIZE (4096)
- uint8_t buf[CHUNK_SIZE];
- const char *charset;
- hubbub_charset_source cssource;
- uint8_t *buffer;
-
- if (argc != 3) {
- printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
- return 1;
- }
-
- /* Initialise library */
- assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
-
- parser = hubbub_parser_create("UTF-8", "UTF-16", myrealloc, NULL);
- assert(parser != NULL);
-
- params.buffer_handler.handler = buffer_handler;
- params.buffer_handler.pw = NULL;
- assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_BUFFER_HANDLER,
- &params) == HUBBUB_OK);
-
- params.token_handler.handler = token_handler;
- params.token_handler.pw = NULL;
- assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TOKEN_HANDLER,
- &params) == HUBBUB_OK);
-
- fp = fopen(argv[2], "rb");
- if (fp == NULL) {
- printf("Failed opening %s\n", argv[2]);
- return 1;
- }
-
- fseek(fp, 0, SEEK_END);
- origlen = len = ftell(fp);
- fseek(fp, 0, SEEK_SET);
-
- while (len >= CHUNK_SIZE) {
- fread(buf, 1, CHUNK_SIZE, fp);
-
- assert(hubbub_parser_parse_chunk(parser,
- buf, CHUNK_SIZE) == HUBBUB_OK);
-
- len -= CHUNK_SIZE;
- }
-
- if (len > 0) {
- fread(buf, 1, len, fp);
-
- assert(hubbub_parser_parse_chunk(parser,
- buf, len) == HUBBUB_OK);
-
- len = 0;
-
- assert(hubbub_parser_completed(parser) == HUBBUB_OK);
- }
-
- fclose(fp);
-
- charset = hubbub_parser_read_charset(parser, &cssource);
-
- printf("Charset: %s (from %d)\n", charset, cssource);
-
- assert(hubbub_parser_claim_buffer(parser, &buffer, &len) ==
- HUBBUB_OK);
-
- free(buffer);
-
- hubbub_parser_destroy(parser);
-
- assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
-
- printf("PASS\n");
-
- return 0;
-}
-
-void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
-{
- UNUSED(len);
- UNUSED(pw);
-
- pbuffer = buffer;
-}
-
-void token_handler(const hubbub_token *token, void *pw)
-{
- static const char *token_names[] = {
- "DOCTYPE", "START TAG", "END TAG",
- "COMMENT", "CHARACTERS", "EOF"
- };
- size_t i;
-
- UNUSED(pw);
-
- printf("%s: ", token_names[token->type]);
-
- switch (token->type) {
- case HUBBUB_TOKEN_DOCTYPE:
- printf("'%.*s' %sids:\n",
- (int) token->data.doctype.name.len,
- pbuffer + token->data.doctype.name.data.off,
- token->data.doctype.force_quirks ?
- "(force-quirks) " : "");
-
- if (token->data.doctype.public_missing)
- printf("\tpublic: missing\n");
- else
- printf("\tpublic: '%.*s'\n",
- (int) token->data.doctype.public_id.len,
- pbuffer + token->data.doctype.public_id.data.off);
-
- if (token->data.doctype.system_missing)
- printf("\tsystem: missing\n");
- else
- printf("\tsystem: '%.*s'\n",
- (int) token->data.doctype.system_id.len,
- pbuffer + token->data.doctype.system_id.data.off);
-
- break;
- case HUBBUB_TOKEN_START_TAG:
- printf("'%.*s' %s%s\n",
- (int) token->data.tag.name.len,
- pbuffer + token->data.tag.name.data.off,
- (token->data.tag.self_closing) ?
- "(self-closing) " : "",
- (token->data.tag.n_attributes > 0) ?
- "attributes:" : "");
- for (i = 0; i < token->data.tag.n_attributes; i++) {
- printf("\t'%.*s' = '%.*s'\n",
- (int) token->data.tag.attributes[i].name.len,
- pbuffer + token->data.tag.attributes[i].name.data.off,
- (int) token->data.tag.attributes[i].value.len,
- pbuffer + token->data.tag.attributes[i].value.data.off);
- }
- break;
- case HUBBUB_TOKEN_END_TAG:
- printf("'%.*s' %s%s\n",
- (int) token->data.tag.name.len,
- pbuffer + token->data.tag.name.data.off,
- (token->data.tag.self_closing) ?
- "(self-closing) " : "",
- (token->data.tag.n_attributes > 0) ?
- "attributes:" : "");
- for (i = 0; i < token->data.tag.n_attributes; i++) {
- printf("\t'%.*s' = '%.*s'\n",
- (int) token->data.tag.attributes[i].name.len,
- pbuffer + token->data.tag.attributes[i].name.data.off,
- (int) token->data.tag.attributes[i].value.len,
- pbuffer + token->data.tag.attributes[i].value.data.off);
- }
- break;
- case HUBBUB_TOKEN_COMMENT:
- printf("'%.*s'\n", (int) token->data.comment.len,
- pbuffer + token->data.comment.data.off);
- break;
- case HUBBUB_TOKEN_CHARACTER:
- printf("'%.*s'\n", (int) token->data.character.len,
- pbuffer + token->data.character.data.off);
- break;
- case HUBBUB_TOKEN_EOF:
- printf("\n");
- break;
- }
-}
diff --git a/test/parser.c b/test/parser.c
index 0bc9246..ba8dd31 100644
--- a/test/parser.c
+++ b/test/parser.c
@@ -10,9 +10,6 @@
#include "testutils.h"
-static const uint8_t *pbuffer;
-
-static void buffer_handler(const uint8_t *buffer, size_t len, void *pw);
static void token_handler(const hubbub_token *token, void *pw);
static void *myrealloc(void *ptr, size_t len, void *pw)
@@ -32,7 +29,6 @@ int main(int argc, char **argv)
uint8_t buf[CHUNK_SIZE];
const char *charset;
hubbub_charset_source cssource;
- uint8_t *buffer;
if (argc != 3) {
printf("Usage: %s <aliases_file> <filename>\n", argv[0]);
@@ -45,11 +41,6 @@ int main(int argc, char **argv)
parser = hubbub_parser_create("UTF-8", "UTF-8", myrealloc, NULL);
assert(parser != NULL);
- params.buffer_handler.handler = buffer_handler;
- params.buffer_handler.pw = NULL;
- assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_BUFFER_HANDLER,
- &params) == HUBBUB_OK);
-
params.token_handler.handler = token_handler;
params.token_handler.pw = NULL;
assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TOKEN_HANDLER,
@@ -91,11 +82,6 @@ int main(int argc, char **argv)
printf("Charset: %s (from %d)\n", charset, cssource);
- assert(hubbub_parser_claim_buffer(parser, &buffer, &len) ==
- HUBBUB_OK);
-
- free(buffer);
-
hubbub_parser_destroy(parser);
assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
@@ -105,14 +91,6 @@ int main(int argc, char **argv)
return 0;
}
-void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
-{
- UNUSED(len);
- UNUSED(pw);
-
- pbuffer = buffer;
-}
-
void token_handler(const hubbub_token *token, void *pw)
{
static const char *token_names[] = {
@@ -129,7 +107,7 @@ void token_handler(const hubbub_token *token, void *pw)
case HUBBUB_TOKEN_DOCTYPE:
printf("'%.*s' %sids:\n",
(int) token->data.doctype.name.len,
- pbuffer + token->data.doctype.name.data.off,
+ token->data.doctype.name.ptr,
token->data.doctype.force_quirks ?
"(force-quirks) " : "");
@@ -138,20 +116,20 @@ void token_handler(const hubbub_token *token, void *pw)
else
printf("\tpublic: '%.*s'\n",
(int) token->data.doctype.public_id.len,
- pbuffer + token->data.doctype.public_id.data.off);
+ token->data.doctype.public_id.ptr);
if (token->data.doctype.system_missing)
printf("\tsystem: missing\n");
else
printf("\tsystem: '%.*s'\n",
(int) token->data.doctype.system_id.len,
- pbuffer + token->data.doctype.system_id.data.off);
+ token->data.doctype.system_id.ptr);
break;
case HUBBUB_TOKEN_START_TAG:
printf("'%.*s' %s%s\n",
(int) token->data.tag.name.len,
- pbuffer + token->data.tag.name.data.off,
+ token->data.tag.name.ptr,
(token->data.tag.self_closing) ?
"(self-closing) " : "",
(token->data.tag.n_attributes > 0) ?
@@ -159,15 +137,15 @@ void token_handler(const hubbub_token *token, void *pw)
for (i = 0; i < token->data.tag.n_attributes; i++) {
printf("\t'%.*s' = '%.*s'\n",
(int) token->data.tag.attributes[i].name.len,
- pbuffer + token->data.tag.attributes[i].name.data.off,
+ token->data.tag.attributes[i].name.ptr,
(int) token->data.tag.attributes[i].value.len,
- pbuffer + token->data.tag.attributes[i].value.data.off);
+ token->data.tag.attributes[i].value.ptr);
}
break;
case HUBBUB_TOKEN_END_TAG:
printf("'%.*s' %s%s\n",
(int) token->data.tag.name.len,
- pbuffer + token->data.tag.name.data.off,
+ token->data.tag.name.ptr,
(token->data.tag.self_closing) ?
"(self-closing) " : "",
(token->data.tag.n_attributes > 0) ?
@@ -175,18 +153,18 @@ void token_handler(const hubbub_token *token, void *pw)
for (i = 0; i < token->data.tag.n_attributes; i++) {
printf("\t'%.*s' = '%.*s'\n",
(int) token->data.tag.attributes[i].name.len,
- pbuffer + token->data.tag.attributes[i].name.data.off,
+ token->data.tag.attributes[i].name.ptr,
(int) token->data.tag.attributes[i].value.len,
- pbuffer + token->data.tag.attributes[i].value.data.off);
+ token->data.tag.attributes[i].value.ptr);
}
break;
case HUBBUB_TOKEN_COMMENT:
printf("'%.*s'\n", (int) token->data.comment.len,
- pbuffer + token->data.comment.data.off);
+ token->data.comment.ptr);
break;
case HUBBUB_TOKEN_CHARACTER:
printf("'%.*s'\n", (int) token->data.character.len,
- pbuffer + token->data.character.data.off);
+ token->data.character.ptr);
break;
case HUBBUB_TOKEN_EOF:
printf("\n");
diff --git a/test/regression/cscodec-segv.c b/test/regression/cscodec-segv.c
deleted file mode 100644
index ad9894a..0000000
--- a/test/regression/cscodec-segv.c
+++ /dev/null
@@ -1,37 +0,0 @@
-#include <stdio.h>
-
-#include <hubbub/hubbub.h>
-
-#include "charset/codec.h"
-
-#include "testutils.h"
-
-static void *myrealloc(void *ptr, size_t len, void *pw)
-{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
-
-int main(int argc, char **argv)
-{
- hubbub_charsetcodec *codec;
-
- if (argc != 2) {
- printf("Usage: %s <aliases_file>\n", argv[0]);
- return 1;
- }
-
- assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
-
- codec = hubbub_charsetcodec_create("ISO-8859-1", myrealloc, NULL);
- assert(codec != NULL);
-
- hubbub_charsetcodec_destroy(codec);
-
- assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
-
- printf("PASS\n");
-
- return 0;
-}
diff --git a/test/regression/filter-segv.c b/test/regression/filter-segv.c
deleted file mode 100644
index 950df61..0000000
--- a/test/regression/filter-segv.c
+++ /dev/null
@@ -1,38 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-
-#include <hubbub/hubbub.h>
-
-#include "input/filter.h"
-
-#include "testutils.h"
-
-static void *myrealloc(void *ptr, size_t len, void *pw)
-{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
-
-int main(int argc, char **argv)
-{
- hubbub_filter *input;
-
- if (argc != 2) {
- printf("Usage: %s <filename>\n", argv[0]);
- return 1;
- }
-
- assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
-
- input = hubbub_filter_create("UTF-8", myrealloc, NULL);
- assert(input);
-
- hubbub_filter_destroy(input);
-
- assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
-
- printf("PASS\n");
-
- return 0;
-}
diff --git a/test/regression/stream-nomem.c b/test/regression/stream-nomem.c
deleted file mode 100644
index 7233ac7..0000000
--- a/test/regression/stream-nomem.c
+++ /dev/null
@@ -1,88 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-
-#include <hubbub/hubbub.h>
-
-#include "utils/utils.h"
-
-#include "input/inputstream.h"
-
-#include "testutils.h"
-
-static void *myrealloc(void *ptr, size_t len, void *pw)
-{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
-
-int main(int argc, char **argv)
-{
- hubbub_inputstream *stream;
-
- /* This is specially calculated so that the inputstream is forced to
- * reallocate (it assumes that the inputstream's buffer chunk size
- * is 4k) */
-#define BUFFER_SIZE (4096 + 4)
- uint8_t input_buffer[BUFFER_SIZE];
- uint8_t *buffer;
- size_t buflen;
- uint32_t c;
-
- if (argc != 2) {
- printf("Usage: %s <aliases_file>\n", argv[0]);
- return 1;
- }
-
- /* Populate the buffer with something sane */
- memset(input_buffer, 'a', BUFFER_SIZE);
- /* Now, set up our test data */
- input_buffer[BUFFER_SIZE - 1] = '5';
- input_buffer[BUFFER_SIZE - 2] = '4';
- input_buffer[BUFFER_SIZE - 3] = '\xbd';
- input_buffer[BUFFER_SIZE - 4] = '\xbf';
- /* This byte will occupy the 4095th byte in the buffer and
- * thus cause the entirety of U+FFFD to be buffered until after
- * the buffer has been enlarged */
- input_buffer[BUFFER_SIZE - 5] = '\xef';
- input_buffer[BUFFER_SIZE - 6] = '3';
- input_buffer[BUFFER_SIZE - 7] = '2';
- input_buffer[BUFFER_SIZE - 8] = '1';
-
- assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
-
- stream = hubbub_inputstream_create("UTF-8", "UTF-8", myrealloc, NULL);
- assert(stream != NULL);
-
- assert(hubbub_inputstream_append(stream, input_buffer, BUFFER_SIZE) ==
- HUBBUB_OK);
-
- assert(hubbub_inputstream_append(stream, NULL, 0) == HUBBUB_OK);
-
- while ((c = hubbub_inputstream_peek(stream)) != HUBBUB_INPUTSTREAM_EOF)
- hubbub_inputstream_advance(stream);
-
- assert(hubbub_inputstream_claim_buffer(stream, &buffer, &buflen) ==
- HUBBUB_OK);
-
- assert(buflen == BUFFER_SIZE);
-
- printf("Buffer: '%.*s'\n", 8, buffer + (BUFFER_SIZE - 8));
-
- assert( buffer[BUFFER_SIZE - 6] == '3' &&
- buffer[BUFFER_SIZE - 5] == (uint8_t) '\xef' &&
- buffer[BUFFER_SIZE - 4] == (uint8_t) '\xbf' &&
- buffer[BUFFER_SIZE - 3] == (uint8_t) '\xbd' &&
- buffer[BUFFER_SIZE - 2] == '4');
-
- free(buffer);
-
- hubbub_inputstream_destroy(stream);
-
- assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
-
- printf("PASS\n");
-
- return 0;
-}
-
diff --git a/test/tokeniser.c b/test/tokeniser.c
index 2d9577b..0ccf264 100644
--- a/test/tokeniser.c
+++ b/test/tokeniser.c
@@ -1,18 +1,16 @@
#include <inttypes.h>
#include <stdio.h>
+#include <parserutils/input/inputstream.h>
+
#include <hubbub/hubbub.h>
#include "utils/utils.h"
-#include "input/inputstream.h"
#include "tokeniser/tokeniser.h"
#include "testutils.h"
-static const uint8_t *pbuffer;
-
-static void buffer_handler(const uint8_t *buffer, size_t len, void *pw);
static void token_handler(const hubbub_token *token, void *pw);
static void *myrealloc(void *ptr, size_t len, void *pw)
@@ -24,7 +22,7 @@ static void *myrealloc(void *ptr, size_t len, void *pw)
int main(int argc, char **argv)
{
- hubbub_inputstream *stream;
+ parserutils_inputstream *stream;
hubbub_tokeniser *tok;
hubbub_tokeniser_optparams params;
FILE *fp;
@@ -40,17 +38,13 @@ int main(int argc, char **argv)
/* Initialise library */
assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK);
- stream = hubbub_inputstream_create("UTF-8", "UTF-8", myrealloc, NULL);
+ stream = parserutils_inputstream_create("UTF-8", 0, NULL,
+ myrealloc, NULL);
assert(stream != NULL);
tok = hubbub_tokeniser_create(stream, myrealloc, NULL);
assert(tok != NULL);
- params.buffer_handler.handler = buffer_handler;
- params.buffer_handler.pw = NULL;
- assert(hubbub_tokeniser_setopt(tok, HUBBUB_TOKENISER_BUFFER_HANDLER,
- &params) == HUBBUB_OK);
-
params.token_handler.handler = token_handler;
params.token_handler.pw = NULL;
assert(hubbub_tokeniser_setopt(tok, HUBBUB_TOKENISER_TOKEN_HANDLER,
@@ -69,7 +63,7 @@ int main(int argc, char **argv)
while (len >= CHUNK_SIZE) {
fread(buf, 1, CHUNK_SIZE, fp);
- assert(hubbub_inputstream_append(stream,
+ assert(parserutils_inputstream_append(stream,
buf, CHUNK_SIZE) == HUBBUB_OK);
len -= CHUNK_SIZE;
@@ -80,12 +74,12 @@ int main(int argc, char **argv)
if (len > 0) {
fread(buf, 1, len, fp);
- assert(hubbub_inputstream_append(stream,
+ assert(parserutils_inputstream_append(stream,
buf, len) == HUBBUB_OK);
len = 0;
- assert(hubbub_inputstream_append(stream, NULL, 0) ==
+ assert(parserutils_inputstream_append(stream, NULL, 0) ==
HUBBUB_OK);
assert(hubbub_tokeniser_run(tok) == HUBBUB_OK);
@@ -95,7 +89,7 @@ int main(int argc, char **argv)
hubbub_tokeniser_destroy(tok);
- hubbub_inputstream_destroy(stream);
+ parserutils_inputstream_destroy(stream);
assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
@@ -104,14 +98,6 @@ int main(int argc, char **argv)
return 0;
}
-void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
-{
- UNUSED(len);
- UNUSED(pw);
-
- pbuffer = buffer;
-}
-
void token_handler(const hubbub_token *token, void *pw)
{
static const char *token_names[] = {
@@ -128,7 +114,7 @@ void token_handler(const hubbub_token *token, void *pw)
case HUBBUB_TOKEN_DOCTYPE:
printf("'%.*s' %sids:\n",
(int) token->data.doctype.name.len,
- pbuffer + token->data.doctype.name.data.off,
+ token->data.doctype.name.ptr,
token->data.doctype.force_quirks ?
"(force-quirks) " : "");
@@ -137,20 +123,20 @@ void token_handler(const hubbub_token *token, void *pw)
else
printf("\tpublic: '%.*s'\n",
(int) token->data.doctype.public_id.len,
- pbuffer + token->data.doctype.public_id.data.off);
+ token->data.doctype.public_id.ptr);
if (token->data.doctype.system_missing)
printf("\tsystem: missing\n");
else
printf("\tsystem: '%.*s'\n",
(int) token->data.doctype.system_id.len,
- pbuffer + token->data.doctype.system_id.data.off);
+ token->data.doctype.system_id.ptr);
break;
case HUBBUB_TOKEN_START_TAG:
printf("'%.*s' %s%s\n",
(int) token->data.tag.name.len,
- pbuffer + token->data.tag.name.data.off,
+ token->data.tag.name.ptr,
(token->data.tag.self_closing) ?
"(self-closing) " : "",
(token->data.tag.n_attributes > 0) ?
@@ -158,15 +144,15 @@ void token_handler(const hubbub_token *token, void *pw)
for (i = 0; i < token->data.tag.n_attributes; i++) {
printf("\t'%.*s' = '%.*s'\n",
(int) token->data.tag.attributes[i].name.len,
- pbuffer + token->data.tag.attributes[i].name.data.off,
+ token->data.tag.attributes[i].name.ptr,
(int) token->data.tag.attributes[i].value.len,
- pbuffer + token->data.tag.attributes[i].value.data.off);
+ token->data.tag.attributes[i].value.ptr);
}
break;
case HUBBUB_TOKEN_END_TAG:
printf("'%.*s' %s%s\n",
(int) token->data.tag.name.len,
- pbuffer + token->data.tag.name.data.off,
+ token->data.tag.name.ptr,
(token->data.tag.self_closing) ?
"(self-closing) " : "",
(token->data.tag.n_attributes > 0) ?
@@ -174,18 +160,18 @@ void token_handler(const hubbub_token *token, void *pw)
for (i = 0; i < token->data.tag.n_attributes; i++) {
printf("\t'%.*s' = '%.*s'\n",
(int) token->data.tag.attributes[i].name.len,
- pbuffer + token->data.tag.attributes[i].name.data.off,
+ token->data.tag.attributes[i].name.ptr,
(int) token->data.tag.attributes[i].value.len,
- pbuffer + token->data.tag.attributes[i].value.data.off);
+ token->data.tag.attributes[i].value.ptr);
}
break;
case HUBBUB_TOKEN_COMMENT:
printf("'%.*s'\n", (int) token->data.comment.len,
- pbuffer + token->data.comment.data.off);
+ token->data.comment.ptr);
break;
case HUBBUB_TOKEN_CHARACTER:
printf("'%.*s'\n", (int) token->data.character.len,
- pbuffer + token->data.character.data.off);
+ token->data.character.ptr);
break;
case HUBBUB_TOKEN_EOF:
printf("\n");
diff --git a/test/tokeniser2.c b/test/tokeniser2.c
index 14ac71d..eb369b4 100644
--- a/test/tokeniser2.c
+++ b/test/tokeniser2.c
@@ -4,11 +4,12 @@
#include <json.h>
+#include <parserutils/input/inputstream.h>
+
#include <hubbub/hubbub.h>
#include "utils/utils.h"
-#include "input/inputstream.h"
#include "tokeniser/tokeniser.h"
#include "testutils.h"
@@ -29,7 +30,6 @@ typedef struct context {
} context;
static void run_test(context *ctx);
-static void buffer_handler(const uint8_t *buffer, size_t len, void *pw);
static void token_handler(const hubbub_token *token, void *pw);
static void *myrealloc(void *ptr, size_t len, void *pw)
@@ -74,6 +74,7 @@ int main(int argc, char **argv)
ctx.last_start_tag = NULL;
ctx.content_model = NULL;
+ ctx.process_cdata = false;
/* Extract settings */
for (entry = json_object_get_object(test)->head; entry;
@@ -119,7 +120,7 @@ int main(int argc, char **argv)
void run_test(context *ctx)
{
- hubbub_inputstream *stream;
+ parserutils_inputstream *stream;
hubbub_tokeniser *tok;
hubbub_tokeniser_optparams params;
int i, max_i;
@@ -138,7 +139,7 @@ void run_test(context *ctx)
ctx->output_index = 0;
ctx->char_off = 0;
- stream = hubbub_inputstream_create("UTF-8", "UTF-8",
+ stream = parserutils_inputstream_create("UTF-8", 0, NULL,
myrealloc, NULL);
assert(stream != NULL);
@@ -152,7 +153,7 @@ void run_test(context *ctx)
snprintf((char *) buf, sizeof buf, "<%s>",
ctx->last_start_tag);
- assert(hubbub_inputstream_append(stream,
+ assert(parserutils_inputstream_append(stream,
buf, strlen(ctx->last_start_tag) + 2) ==
HUBBUB_OK);
@@ -166,12 +167,6 @@ void run_test(context *ctx)
&params) == HUBBUB_OK);
}
- params.buffer_handler.handler = buffer_handler;
- params.buffer_handler.pw = ctx;
- assert(hubbub_tokeniser_setopt(tok,
- HUBBUB_TOKENISER_BUFFER_HANDLER,
- &params) == HUBBUB_OK);
-
params.token_handler.handler = token_handler;
params.token_handler.pw = ctx;
assert(hubbub_tokeniser_setopt(tok,
@@ -204,10 +199,10 @@ void run_test(context *ctx)
HUBBUB_TOKENISER_CONTENT_MODEL,
&params) == HUBBUB_OK);
- assert(hubbub_inputstream_append(stream,
+ assert(parserutils_inputstream_append(stream,
ctx->input, ctx->input_len) == HUBBUB_OK);
- assert(hubbub_inputstream_append(stream, NULL, 0) ==
+ assert(parserutils_inputstream_append(stream, NULL, 0) ==
HUBBUB_OK);
printf("Input: '%.*s' (%d)\n", (int) ctx->input_len,
@@ -218,19 +213,10 @@ void run_test(context *ctx)
hubbub_tokeniser_destroy(tok);
- hubbub_inputstream_destroy(stream);
+ parserutils_inputstream_destroy(stream);
}
}
-void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
-{
- context *ctx = (context *) pw;
-
- UNUSED(len);
-
- ctx->pbuffer = buffer;
-}
-
void token_handler(const hubbub_token *token, void *pw)
{
static const char *token_names[] = {
@@ -277,7 +263,7 @@ void token_handler(const hubbub_token *token, void *pw)
items = json_object_get_array(obj);
- printf("%s: %s\n", token_names[token->type],
+ printf("got %s: expected %s\n", token_names[token->type],
json_object_get_string((struct json_object *)
array_list_get_idx(items, 0)));
@@ -297,9 +283,8 @@ void token_handler(const hubbub_token *token, void *pw)
array_list_get_idx(items, 3));
bool expquirks = !json_object_get_boolean(
array_list_get_idx(items, 4));
- char *gotname = (char *) (ctx->pbuffer +
- token->data.doctype.name.data.off);
- char *gotpub, *gotsys;
+ const char *gotname = (const char *)token->data.doctype.name.ptr;
+ const char *gotpub, *gotsys;
printf("'%.*s' %sids:\n",
(int) token->data.doctype.name.len,
@@ -311,34 +296,36 @@ void token_handler(const hubbub_token *token, void *pw)
gotpub = NULL;
printf("\tpublic: missing\n");
} else {
- gotpub = (char *) (ctx->pbuffer +
- token->data.doctype.public_id.data.off);
- printf("\tpublic: '%.*s'\n",
+ gotpub = (const char *) token->data.doctype.public_id.ptr;
+ printf("\tpublic: '%.*s' (%d)\n",
(int) token->data.doctype.public_id.len,
- gotpub);
+ gotpub,
+ (int) token->data.doctype.public_id.len);
}
if (token->data.doctype.system_missing) {
gotsys = NULL;
printf("\tsystem: missing\n");
} else {
- gotsys = (char *) (ctx->pbuffer +
- token->data.doctype.system_id.data.off);
- printf("\tsystem: '%.*s'\n",
+ gotsys = (const char *) token->data.doctype.system_id.ptr;
+ printf("\tsystem: '%.*s' (%d)\n",
(int) token->data.doctype.system_id.len,
- gotsys);
+ gotsys,
+ token->data.doctype.system_id.len);
}
assert(token->data.doctype.name.len == strlen(expname));
assert(strncmp(gotname, expname, strlen(expname)) == 0);
- assert((exppub == NULL) == (gotpub == NULL));
+ assert((exppub == NULL) ==
+ (token->data.doctype.public_missing == true));
if (exppub) {
assert(token->data.doctype.public_id.len == strlen(exppub));
assert(strncmp(gotpub, exppub, strlen(exppub)) == 0);
}
- assert((expsys == NULL) == (gotsys == NULL));
+ assert((expsys == NULL) ==
+ (token->data.doctype.system_missing == true));
if (gotsys) {
assert(token->data.doctype.system_id.len == strlen(expsys));
assert(strncmp(gotsys, expsys, strlen(expsys)) == 0);
@@ -356,16 +343,22 @@ void token_handler(const hubbub_token *token, void *pw)
bool self_closing = json_object_get_boolean(
array_list_get_idx(items, 3));
- char *tagname = (char *) (ctx->pbuffer +
- token->data.tag.name.data.off);
+ const char *tagname = (const char *)
+ token->data.tag.name.ptr;
- printf("'%.*s' %s%s\n",
+ printf("expected: '%s' %s\n",
+ expname,
+ (self_closing) ? "(self-closing) " : "");
+
+ printf(" got: '%.*s' %s\n",
(int) token->data.tag.name.len,
tagname,
(token->data.tag.self_closing) ?
- "(self-closing) " : "",
- (token->data.tag.n_attributes > 0) ?
- "attributes:" : "");
+ "(self-closing) " : "");
+
+ if (token->data.tag.n_attributes > 0) {
+ printf("attributes:\n");
+ }
assert(token->data.tag.name.len == strlen(expname));
assert(strncmp(tagname, expname, strlen(expname)) == 0);
@@ -379,12 +372,12 @@ void token_handler(const hubbub_token *token, void *pw)
char *expname = (char *) expattrs->k;
char *expval = json_object_get_string(
(struct json_object *) expattrs->v);
- char *gotname = (char *) (ctx->pbuffer +
- token->data.tag.attributes[i].name.data.off);
+ const char *gotname = (const char *)
+ token->data.tag.attributes[i].name.ptr;
size_t namelen =
token->data.tag.attributes[i].name.len;
- char *gotval = (char *) (ctx->pbuffer +
- token->data.tag.attributes[i].value.data.off);
+ const char *gotval = (const char *)
+ token->data.tag.attributes[i].value.ptr;
size_t vallen =
token->data.tag.attributes[i].value.len;
@@ -408,8 +401,8 @@ void token_handler(const hubbub_token *token, void *pw)
{
char *expname = json_object_get_string(
array_list_get_idx(items, 1));
- char *tagname = (char *) (ctx->pbuffer +
- token->data.tag.name.data.off);
+ const char *tagname = (const char *)
+ token->data.tag.name.ptr;
printf("'%.*s' %s\n",
(int) token->data.tag.name.len,
@@ -425,10 +418,12 @@ void token_handler(const hubbub_token *token, void *pw)
{
char *expstr = json_object_get_string(
array_list_get_idx(items, 1));
- char *gotstr = (char *) (ctx->pbuffer +
- token->data.comment.data.off);
+ const char *gotstr = (const char *)
+ token->data.comment.ptr;
- printf("'%.*s'\n", (int) token->data.comment.len, gotstr);
+ printf("expected: '%s'\n", expstr);
+ printf(" got: '%.*s'\n",
+ (int) token->data.comment.len, gotstr);
assert(token->data.comment.len == strlen(expstr));
assert(strncmp(gotstr, expstr, strlen(expstr)) == 0);
@@ -439,9 +434,9 @@ void token_handler(const hubbub_token *token, void *pw)
int expstrlen;
char *expstr = json_object_get_string_len(
array_list_get_idx(items, 1), &expstrlen);
- char *gotstr = (char *) (ctx->pbuffer +
- token->data.character.data.off);
- size_t len = min(token->data.character.len,
+ const char *gotstr = (const char *)
+ token->data.character.ptr;
+ size_t len = min(token->data.character.len,
expstrlen - ctx->char_off);
printf("expected: '%.*s'\n", (int) len, expstr + ctx->char_off);
@@ -457,7 +452,7 @@ void token_handler(const hubbub_token *token, void *pw)
hubbub_token t;
t.type = HUBBUB_TOKEN_CHARACTER;
- t.data.character.data.off += len;
+ t.data.character.ptr += len;
t.data.character.len -= len;
ctx->char_off = 0;
diff --git a/test/tokeniser3.c b/test/tokeniser3.c
index 76b1d07..05f57b4 100644
--- a/test/tokeniser3.c
+++ b/test/tokeniser3.c
@@ -4,18 +4,17 @@
#include <json.h>
+#include <parserutils/input/inputstream.h>
+
#include <hubbub/hubbub.h>
#include "utils/utils.h"
-#include "input/inputstream.h"
#include "tokeniser/tokeniser.h"
#include "testutils.h"
typedef struct context {
- const uint8_t *pbuffer;
-
const uint8_t *input;
size_t input_len;
@@ -29,7 +28,6 @@ typedef struct context {
} context;
static void run_test(context *ctx);
-static void buffer_handler(const uint8_t *buffer, size_t len, void *pw);
static void token_handler(const hubbub_token *token, void *pw);
static void *myrealloc(void *ptr, size_t len, void *pw)
@@ -74,6 +72,7 @@ int main(int argc, char **argv)
ctx.last_start_tag = NULL;
ctx.content_model = NULL;
+ ctx.process_cdata = false;
/* Extract settings */
for (entry = json_object_get_object(test)->head; entry;
@@ -117,7 +116,7 @@ int main(int argc, char **argv)
void run_test(context *ctx)
{
- hubbub_inputstream *stream;
+ parserutils_inputstream *stream;
hubbub_tokeniser *tok;
hubbub_tokeniser_optparams params;
int i, max_i;
@@ -137,7 +136,7 @@ void run_test(context *ctx)
ctx->output_index = 0;
ctx->char_off = 0;
- stream = hubbub_inputstream_create("UTF-8", "UTF-8",
+ stream = parserutils_inputstream_create("UTF-8", 0, NULL,
myrealloc, NULL);
assert(stream != NULL);
@@ -151,7 +150,7 @@ void run_test(context *ctx)
snprintf((char *) buf, sizeof buf, "<%s>",
ctx->last_start_tag);
- assert(hubbub_inputstream_append(stream,
+ assert(parserutils_inputstream_append(stream,
buf, strlen(ctx->last_start_tag) + 2) ==
HUBBUB_OK);
@@ -165,12 +164,6 @@ void run_test(context *ctx)
&params) == HUBBUB_OK);
}
- params.buffer_handler.handler = buffer_handler;
- params.buffer_handler.pw = ctx;
- assert(hubbub_tokeniser_setopt(tok,
- HUBBUB_TOKENISER_BUFFER_HANDLER,
- &params) == HUBBUB_OK);
-
params.token_handler.handler = token_handler;
params.token_handler.pw = ctx;
assert(hubbub_tokeniser_setopt(tok,
@@ -208,33 +201,24 @@ void run_test(context *ctx)
(int) ctx->input_len);
for (j = 0; j < ctx->input_len; j++) {
- assert(hubbub_inputstream_append(stream,
+ assert(parserutils_inputstream_append(stream,
ctx->input + j, 1) ==
HUBBUB_OK);
assert(hubbub_tokeniser_run(tok) == HUBBUB_OK);
}
- assert(hubbub_inputstream_append(stream, NULL, 0) ==
+ assert(parserutils_inputstream_append(stream, NULL, 0) ==
HUBBUB_OK);
assert(hubbub_tokeniser_run(tok) == HUBBUB_OK);
hubbub_tokeniser_destroy(tok);
- hubbub_inputstream_destroy(stream);
+ parserutils_inputstream_destroy(stream);
}
}
-void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
-{
- context *ctx = (context *) pw;
-
- UNUSED(len);
-
- ctx->pbuffer = buffer;
-}
-
void token_handler(const hubbub_token *token, void *pw)
{
static const char *token_names[] = {
@@ -301,9 +285,9 @@ void token_handler(const hubbub_token *token, void *pw)
array_list_get_idx(items, 3));
bool expquirks = !json_object_get_boolean(
array_list_get_idx(items, 4));
- char *gotname = (char *) (ctx->pbuffer +
- token->data.doctype.name.data.off);
- char *gotpub, *gotsys;
+ const char *gotname = (const char *)
+ token->data.doctype.name.ptr;
+ const char *gotpub, *gotsys;
printf("'%.*s' %sids:\n",
(int) token->data.doctype.name.len,
@@ -315,34 +299,38 @@ void token_handler(const hubbub_token *token, void *pw)
gotpub = NULL;
printf("\tpublic: missing\n");
} else {
- gotpub = (char *) (ctx->pbuffer +
- token->data.doctype.public_id.data.off);
- printf("\tpublic: '%.*s'\n",
+ gotpub = (const char *)
+ token->data.doctype.public_id.ptr;
+ printf("\tpublic: '%.*s' (%d)\n",
(int) token->data.doctype.public_id.len,
- gotpub);
+ gotpub,
+ (int) token->data.doctype.public_id.len);
}
if (token->data.doctype.system_missing) {
gotsys = NULL;
printf("\tsystem: missing\n");
} else {
- gotsys = (char *) (ctx->pbuffer +
- token->data.doctype.system_id.data.off);
- printf("\tsystem: '%.*s'\n",
+ gotsys = (const char *)
+ token->data.doctype.system_id.ptr;
+ printf("\tsystem: '%.*s' (%d)\n",
(int) token->data.doctype.system_id.len,
- gotsys);
+ gotsys,
+ (int) token->data.doctype.system_id.len);
}
assert(token->data.doctype.name.len == strlen(expname));
assert(strncmp(gotname, expname, strlen(expname)) == 0);
- assert((exppub == NULL) == (gotpub == NULL));
+ assert((exppub == NULL) ==
+ (token->data.doctype.public_missing == true));
if (exppub) {
assert(token->data.doctype.public_id.len == strlen(exppub));
assert(strncmp(gotpub, exppub, strlen(exppub)) == 0);
}
- assert((expsys == NULL) == (gotsys == NULL));
+ assert((expsys == NULL) ==
+ (token->data.doctype.system_missing == true));
if (gotsys) {
assert(token->data.doctype.system_id.len == strlen(expsys));
assert(strncmp(gotsys, expsys, strlen(expsys)) == 0);
@@ -360,16 +348,22 @@ void token_handler(const hubbub_token *token, void *pw)
bool self_closing = json_object_get_boolean(
array_list_get_idx(items, 3));
- char *tagname = (char *) (ctx->pbuffer +
- token->data.tag.name.data.off);
+ const char *tagname = (const char *)
+ token->data.tag.name.ptr;
- printf("'%.*s' %s%s\n",
+ printf("expected: '%s' %s\n",
+ expname,
+ (self_closing) ? "(self-closing) " : "");
+
+ printf(" got: '%.*s' %s\n",
(int) token->data.tag.name.len,
tagname,
(token->data.tag.self_closing) ?
- "(self-closing) " : "",
- (token->data.tag.n_attributes > 0) ?
- "attributes:" : "");
+ "(self-closing) " : "");
+
+ if (token->data.tag.n_attributes > 0) {
+ printf("attributes:\n");
+ }
assert(token->data.tag.name.len == strlen(expname));
assert(strncmp(tagname, expname, strlen(expname)) == 0);
@@ -383,12 +377,12 @@ void token_handler(const hubbub_token *token, void *pw)
char *expname = (char *) expattrs->k;
char *expval = json_object_get_string(
(struct json_object *) expattrs->v);
- char *gotname = (char *) (ctx->pbuffer +
- token->data.tag.attributes[i].name.data.off);
+ const char *gotname = (const char *)
+ token->data.tag.attributes[i].name.ptr;
size_t namelen =
token->data.tag.attributes[i].name.len;
- char *gotval = (char *) (ctx->pbuffer +
- token->data.tag.attributes[i].value.data.off);
+ const char *gotval = (const char *)
+ token->data.tag.attributes[i].value.ptr;
size_t vallen =
token->data.tag.attributes[i].value.len;
@@ -412,8 +406,8 @@ void token_handler(const hubbub_token *token, void *pw)
{
char *expname = json_object_get_string(
array_list_get_idx(items, 1));
- char *tagname = (char *) (ctx->pbuffer +
- token->data.tag.name.data.off);
+ const char *tagname = (const char *)
+ token->data.tag.name.ptr;
printf("'%.*s' %s\n",
(int) token->data.tag.name.len,
@@ -429,10 +423,12 @@ void token_handler(const hubbub_token *token, void *pw)
{
char *expstr = json_object_get_string(
array_list_get_idx(items, 1));
- char *gotstr = (char *) (ctx->pbuffer +
- token->data.comment.data.off);
+ const char *gotstr = (const char *)
+ token->data.comment.ptr;
- printf("'%.*s'\n", (int) token->data.comment.len, gotstr);
+ printf("expected: '%s'\n", expstr);
+ printf(" got: '%.*s'\n",
+ (int) token->data.comment.len, gotstr);
assert(token->data.comment.len == strlen(expstr));
assert(strncmp(gotstr, expstr, strlen(expstr)) == 0);
@@ -443,8 +439,8 @@ void token_handler(const hubbub_token *token, void *pw)
int expstrlen;
char *expstr = json_object_get_string_len(
array_list_get_idx(items, 1), &expstrlen);
- char *gotstr = (char *) (ctx->pbuffer +
- token->data.character.data.off);
+ const char *gotstr = (const char *)
+ token->data.character.ptr;
size_t len = min(token->data.character.len,
expstrlen - ctx->char_off);
@@ -462,7 +458,7 @@ void token_handler(const hubbub_token *token, void *pw)
hubbub_token t;
t.type = HUBBUB_TOKEN_CHARACTER;
- t.data.character.data.off += len;
+ t.data.character.ptr += len;
t.data.character.len -= len;
ctx->char_off = 0;
diff --git a/test/tree.c b/test/tree.c
index a854491..ae947a0 100644
--- a/test/tree.c
+++ b/test/tree.c
@@ -29,9 +29,6 @@ static uintptr_t node_counter;
node_ref_alloc += NODE_REF_CHUNK; \
}
-static const uint8_t *pbuffer;
-
-static void buffer_handler(const uint8_t *buffer, size_t len, void *pw);
static int create_comment(void *ctx, const hubbub_string *data, void **result);
static int create_doctype(void *ctx, const hubbub_doctype *doctype,
void **result);
@@ -79,22 +76,6 @@ static void *myrealloc(void *ptr, size_t len, void *pw)
return realloc(ptr, len);
}
-static const uint8_t *ptr_from_hubbub_string(const hubbub_string *string)
-{
- const uint8_t *data;
-
- switch (string->type) {
- case HUBBUB_STRING_OFF:
- data = pbuffer + string->data.off;
- break;
- case HUBBUB_STRING_PTR:
- data = string->data.ptr;
- break;
- }
-
- return data;
-}
-
int main(int argc, char **argv)
{
hubbub_parser *parser;
@@ -105,7 +86,6 @@ int main(int argc, char **argv)
uint8_t buf[CHUNK_SIZE];
const char *charset;
hubbub_charset_source cssource;
- uint8_t *buffer;
bool passed = true;
if (argc != 3) {
@@ -126,11 +106,6 @@ int main(int argc, char **argv)
parser = hubbub_parser_create("UTF-8", "UTF-8", myrealloc, NULL);
assert(parser != NULL);
- params.buffer_handler.handler = buffer_handler;
- params.buffer_handler.pw = NULL;
- assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_BUFFER_HANDLER,
- &params) == HUBBUB_OK);
-
params.tree_handler = &tree_handler;
assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TREE_HANDLER,
&params) == HUBBUB_OK);
@@ -176,11 +151,6 @@ int main(int argc, char **argv)
printf("Charset: %s (from %d)\n", charset, cssource);
- assert(hubbub_parser_claim_buffer(parser, &buffer, &len) ==
- HUBBUB_OK);
-
- free(buffer);
-
hubbub_parser_destroy(parser);
assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK);
@@ -200,18 +170,10 @@ int main(int argc, char **argv)
return 0;
}
-void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
-{
- UNUSED(len);
- UNUSED(pw);
-
- pbuffer = buffer;
-}
-
int create_comment(void *ctx, const hubbub_string *data, void **result)
{
printf("Creating (%" PRIuPTR ") [comment '%.*s']\n", ++node_counter,
- (int) data->len, ptr_from_hubbub_string(data));
+ (int) data->len, data->ptr);
GROW_REF
node_ref[node_counter] = 0;
@@ -226,8 +188,7 @@ int create_comment(void *ctx, const hubbub_string *data, void **result)
int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result)
{
printf("Creating (%" PRIuPTR ") [doctype '%.*s']\n", ++node_counter,
- (int) doctype->name.len,
- ptr_from_hubbub_string(&doctype->name));
+ (int) doctype->name.len, doctype->name.ptr);
GROW_REF
node_ref[node_counter] = 0;
@@ -242,7 +203,7 @@ int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result)
int create_element(void *ctx, const hubbub_tag *tag, void **result)
{
printf("Creating (%" PRIuPTR ") [element '%.*s']\n", ++node_counter,
- (int) tag->name.len, ptr_from_hubbub_string(&tag->name));
+ (int) tag->name.len, tag->name.ptr);
GROW_REF
node_ref[node_counter] = 0;
@@ -257,11 +218,11 @@ int create_element(void *ctx, const hubbub_tag *tag, void **result)
int create_text(void *ctx, const hubbub_string *data, void **result)
{
printf("Creating (%" PRIuPTR ") [text '%.*s']\n", ++node_counter,
- (int) data->len, ptr_from_hubbub_string(data));
+ (int) data->len, data->ptr);
GROW_REF
node_ref[node_counter] = 0;
-
+
ref_node(ctx, (void *) node_counter);
*result = (void *) node_counter;
diff --git a/test/tree2.c b/test/tree2.c
index 53876fb..0f181ef 100644
--- a/test/tree2.c
+++ b/test/tree2.c
@@ -74,10 +74,6 @@ node_t *Document;
static void node_print(buf_t *buf, node_t *node, unsigned depth);
-
-static const uint8_t *pbuffer;
-
-static void buffer_handler(const uint8_t *buffer, size_t len, void *pw);
static int create_comment(void *ctx, const hubbub_string *data, void **result);
static int create_doctype(void *ctx, const hubbub_doctype *doctype,
void **result);
@@ -123,29 +119,23 @@ static hubbub_tree_handler tree_handler = {
static void *myrealloc(void *ptr, size_t len, void *pw)
{
- UNUSED(pw);
-
- return realloc(ptr, len);
-}
+ void *ret;
-static const uint8_t *ptr_from_hubbub_string(const hubbub_string *string)
-{
- const uint8_t *data;
+ UNUSED(pw);
- switch (string->type) {
- case HUBBUB_STRING_OFF:
- data = pbuffer + string->data.off;
- break;
- case HUBBUB_STRING_PTR:
- data = string->data.ptr;
- break;
+ /* A half-arsed attempt at filling freshly allocated space with junk. */
+ if (ptr == NULL) {
+ ret = malloc(len);
+ if (ret != NULL)
+ memset(ret, 0xdf, len);
+ } else {
+ ret = realloc(ptr, len);
}
- return data;
+ return ret;
}
-
/*
* Create, initialise, and return, a parser instance.
*/
@@ -157,11 +147,6 @@ static hubbub_parser *setup_parser(void)
parser = hubbub_parser_create("UTF-8", "UTF-8", myrealloc, NULL);
assert(parser != NULL);
- params.buffer_handler.handler = buffer_handler;
- params.buffer_handler.pw = NULL;
- assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_BUFFER_HANDLER,
- &params) == HUBBUB_OK);
-
params.tree_handler = &tree_handler;
assert(hubbub_parser_setopt(parser, HUBBUB_PARSER_TREE_HANDLER,
&params) == HUBBUB_OK);
@@ -174,14 +159,6 @@ static hubbub_parser *setup_parser(void)
}
-void buffer_handler(const uint8_t *buffer, size_t len, void *pw)
-{
- UNUSED(len);
- UNUSED(pw);
-
- pbuffer = buffer;
-}
-
/*** Buffer handling bits ***/
static void buf_clear(buf_t *buf)
@@ -371,8 +348,8 @@ int create_comment(void *ctx, const hubbub_string *data, void **result)
node_t *node = calloc(1, sizeof *node);
node->type = COMMENT;
- node->data.content = strndup((char *)ptr_from_hubbub_string(data),
- data->len);
+ node->data.content = strndup((const char *) data->ptr, data->len);
+ node->refcnt = 1;
node->refcnt = 1;
*result = node;
@@ -386,20 +363,18 @@ int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result)
node->type = DOCTYPE;
node->data.doctype.name = strndup(
- (char *)ptr_from_hubbub_string(&doctype->name),
+ (const char *) doctype->name.ptr,
doctype->name.len);
if (!doctype->public_missing) {
node->data.doctype.public_id = strndup(
- (char *)ptr_from_hubbub_string(
- &doctype->public_id),
+ (const char *) doctype->public_id.ptr,
doctype->public_id.len);
}
if (!doctype->system_missing) {
node->data.doctype.system_id = strndup(
- (char *)ptr_from_hubbub_string(
- &doctype->system_id),
+ (const char *) doctype->system_id.ptr,
doctype->system_id.len);
}
node->refcnt = 1;
@@ -418,7 +393,7 @@ int create_element(void *ctx, const hubbub_tag *tag, void **result)
node->type = ELEMENT;
node->data.element.ns = tag->ns;
node->data.element.name = strndup(
- (char *)ptr_from_hubbub_string(&tag->name),
+ (const char *) tag->name.ptr,
tag->name.len);
node->data.element.n_attrs = tag->n_attributes;
@@ -432,12 +407,12 @@ int create_element(void *ctx, const hubbub_tag *tag, void **result)
attr->ns = tag->attributes[i].ns;
- attr->name = strndup((char *)ptr_from_hubbub_string(
- &tag->attributes[i].name),
+ attr->name = strndup(
+ (const char *) tag->attributes[i].name.ptr,
tag->attributes[i].name.len);
- attr->value = strndup((char *)ptr_from_hubbub_string(
- &tag->attributes[i].value),
+ attr->value = strndup(
+ (const char *) tag->attributes[i].value.ptr,
tag->attributes[i].value.len);
}
node->refcnt = 1;
@@ -452,8 +427,8 @@ int create_text(void *ctx, const hubbub_string *data, void **result)
node_t *node = calloc(1, sizeof *node);
node->type = CHARACTER;
- node->data.content = strndup((char *)ptr_from_hubbub_string(data),
- data->len);
+ node->data.content = strndup((const char *) data->ptr, data->len);
+ node->refcnt = 1;
node->refcnt = 1;
*result = node;
@@ -778,12 +753,12 @@ int add_attributes(void *ctx, void *vnode,
attr->ns = attributes[i].ns;
- attr->name = strndup((char *)ptr_from_hubbub_string(
- &attributes[i].name),
+ attr->name = strndup(
+ (const char *) attributes[i].name.ptr,
attributes[i].name.len);
- attr->value = strndup((char *)ptr_from_hubbub_string(
- &attributes[i].value),
+ attr->value = strndup(
+ (const char *) attributes[i].value.ptr,
attributes[i].value.len);
}