summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/tokeniser/tokeniser.c10
-rw-r--r--test/data/tokeniser2/INDEX2
-rw-r--r--test/testutils.h12
-rw-r--r--test/tokeniser2.c32
-rw-r--r--test/tokeniser3.c32
5 files changed, 53 insertions, 35 deletions
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 7355f80..d108490 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -44,6 +44,12 @@ static const hubbub_string u_fffd_str = { u_fffd, sizeof(u_fffd) };
static const uint8_t lf = '\n';
static const hubbub_string lf_str = { &lf, 1 };
+/**
+ * UTF-8 encoding of U+0000 NULL CHARACTER
+ */
+static const uint8_t u_null[1] = { '\x00'};
+static const hubbub_string u_null_str = { u_null, sizeof(u_null) };
+
/**
* Tokeniser states
@@ -751,8 +757,8 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
emit_current_chars(tokeniser);
}
- /* Emit a replacement character */
- emit_character_token(tokeniser, &u_fffd_str);
+ /* Emit a null character */
+ emit_character_token(tokeniser, &u_null_str);
/* Advance past NUL */
parserutils_inputstream_advance(tokeniser->input, 1);
diff --git a/test/data/tokeniser2/INDEX b/test/data/tokeniser2/INDEX
index 57c0cad..1f6ea93 100644
--- a/test/data/tokeniser2/INDEX
+++ b/test/data/tokeniser2/INDEX
@@ -3,7 +3,7 @@
# Test Description
test1.test html5lib tests (part 1)
-#test2.test html5lib tests (part 2)
+test2.test html5lib tests (part 2)
#test3.test html5lib tests (part 3)
#test4.test html5lib tests (part 4)
#contentModelFlags.test html5lib content model tests
diff --git a/test/testutils.h b/test/testutils.h
index 45870f9..7a8eda5 100644
--- a/test/testutils.h
+++ b/test/testutils.h
@@ -10,6 +10,18 @@
#define UNUSED(x) ((x) = (x))
#endif
+#ifndef LEN
+uint32_t string_length(const char *str);
+uint32_t string_length(const char *str)
+{
+ if(str == NULL)
+ return 0;
+ return strlen(str);
+}
+#define LEN(x) string_length(x)
+#endif
+
+
/* Redefine assert, so we can simply use the standard assert mechanism
* within testcases and exit with the right output for the testrunner
* to do the right thing. */
diff --git a/test/tokeniser2.c b/test/tokeniser2.c
index c8ab9c0..07f355a 100644
--- a/test/tokeniser2.c
+++ b/test/tokeniser2.c
@@ -134,7 +134,7 @@ void run_test(context *ctx)
if (ctx->last_start_tag != NULL) {
/* Fake up a start tag, in PCDATA state */
- size_t len = strlen(ctx->last_start_tag) + 3;
+ size_t len = LEN(ctx->last_start_tag) + 3;
uint8_t *buf = malloc(len);
snprintf((char *) buf, len, "<%s>",
@@ -302,21 +302,21 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
(int) token->data.doctype.system_id.len);
}
- assert(token->data.doctype.name.len == strlen(expname));
- assert(strncmp(gotname, expname, strlen(expname)) == 0);
+ assert(token->data.doctype.name.len == LEN(expname));
+ assert(strncmp(gotname, expname, LEN(expname)) == 0);
assert((exppub == NULL) ==
(token->data.doctype.public_missing == true));
if (exppub) {
- assert(token->data.doctype.public_id.len == strlen(exppub));
- assert(strncmp(gotpub, exppub, strlen(exppub)) == 0);
+ assert(token->data.doctype.public_id.len == LEN(exppub));
+ assert(strncmp(gotpub, exppub, LEN(exppub)) == 0);
}
assert((expsys == NULL) ==
(token->data.doctype.system_missing == true));
if (gotsys) {
- assert(token->data.doctype.system_id.len == strlen(expsys));
- assert(strncmp(gotsys, expsys, strlen(expsys)) == 0);
+ assert(token->data.doctype.system_id.len == LEN(expsys));
+ assert(strncmp(gotsys, expsys, LEN(expsys)) == 0);
}
assert(expquirks == token->data.doctype.force_quirks);
@@ -348,8 +348,8 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
printf("attributes:\n");
}
- assert(token->data.tag.name.len == strlen(expname));
- assert(strncmp(tagname, expname, strlen(expname)) == 0);
+ assert(token->data.tag.name.len == LEN(expname));
+ assert(strncmp(tagname, expname, LEN(expname)) == 0);
assert((token->data.tag.n_attributes == 0) ==
(expattrs == NULL));
@@ -373,11 +373,11 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
(int) namelen, gotname,
(int) vallen, gotval);
- assert(namelen == strlen(expname));
+ assert(namelen == LEN(expname));
assert(strncmp(gotname, expname,
strlen(expname)) == 0);
- assert(vallen == strlen(expval));
- assert(strncmp(gotval, expval, strlen(expval)) == 0);
+ assert(vallen == LEN(expval));
+ assert(strncmp(gotval, expval, LEN(expval)) == 0);
expattrs = expattrs->next;
}
@@ -398,8 +398,8 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
(token->data.tag.n_attributes > 0) ?
"attributes:" : "");
- assert(token->data.tag.name.len == strlen(expname));
- assert(strncmp(tagname, expname, strlen(expname)) == 0);
+ assert(token->data.tag.name.len == LEN(expname));
+ assert(strncmp(tagname, expname, LEN(expname)) == 0);
}
break;
case HUBBUB_TOKEN_COMMENT:
@@ -413,8 +413,8 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
printf(" got: '%.*s'\n",
(int) token->data.comment.len, gotstr);
- assert(token->data.comment.len == strlen(expstr));
- assert(strncmp(gotstr, expstr, strlen(expstr)) == 0);
+ assert(token->data.comment.len == LEN(expstr));
+ assert(strncmp(gotstr, expstr, LEN(expstr)) == 0);
}
break;
case HUBBUB_TOKEN_CHARACTER:
diff --git a/test/tokeniser3.c b/test/tokeniser3.c
index 949ddd0..e68a230 100644
--- a/test/tokeniser3.c
+++ b/test/tokeniser3.c
@@ -132,7 +132,7 @@ void run_test(context *ctx)
if (ctx->last_start_tag != NULL) {
/* Fake up a start tag, in PCDATA state */
- size_t len = strlen(ctx->last_start_tag) + 3;
+ size_t len = LEN(ctx->last_start_tag) + 3;
uint8_t *buf = malloc(len);
snprintf((char *) buf, len, "<%s>",
@@ -308,21 +308,21 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
(int) token->data.doctype.system_id.len);
}
- assert(token->data.doctype.name.len == strlen(expname));
- assert(strncmp(gotname, expname, strlen(expname)) == 0);
+ assert(token->data.doctype.name.len == LEN(expname));
+ assert(strncmp(gotname, expname, LEN(expname)) == 0);
assert((exppub == NULL) ==
(token->data.doctype.public_missing == true));
if (exppub) {
- assert(token->data.doctype.public_id.len == strlen(exppub));
- assert(strncmp(gotpub, exppub, strlen(exppub)) == 0);
+ assert(token->data.doctype.public_id.len == LEN(exppub));
+ assert(strncmp(gotpub, exppub, LEN(exppub)) == 0);
}
assert((expsys == NULL) ==
(token->data.doctype.system_missing == true));
if (gotsys) {
- assert(token->data.doctype.system_id.len == strlen(expsys));
- assert(strncmp(gotsys, expsys, strlen(expsys)) == 0);
+ assert(token->data.doctype.system_id.len == LEN(expsys));
+ assert(strncmp(gotsys, expsys, LEN(expsys)) == 0);
}
assert(expquirks == token->data.doctype.force_quirks);
@@ -354,8 +354,8 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
printf("attributes:\n");
}
- assert(token->data.tag.name.len == strlen(expname));
- assert(strncmp(tagname, expname, strlen(expname)) == 0);
+ assert(token->data.tag.name.len == LEN(expname));
+ assert(strncmp(tagname, expname, LEN(expname)) == 0);
assert((token->data.tag.n_attributes == 0) ==
(expattrs == NULL));
@@ -379,11 +379,11 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
(int) namelen, gotname,
(int) vallen, gotval);
- assert(namelen == strlen(expname));
+ assert(namelen == LEN(expname));
assert(strncmp(gotname, expname,
strlen(expname)) == 0);
- assert(vallen == strlen(expval));
- assert(strncmp(gotval, expval, strlen(expval)) == 0);
+ assert(vallen == LEN(expval));
+ assert(strncmp(gotval, expval, LEN(expval)) == 0);
expattrs = expattrs->next;
}
@@ -404,8 +404,8 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
(token->data.tag.n_attributes > 0) ?
"attributes:" : "");
- assert(token->data.tag.name.len == strlen(expname));
- assert(strncmp(tagname, expname, strlen(expname)) == 0);
+ assert(token->data.tag.name.len == LEN(expname));
+ assert(strncmp(tagname, expname, LEN(expname)) == 0);
}
break;
case HUBBUB_TOKEN_COMMENT:
@@ -419,8 +419,8 @@ hubbub_error token_handler(const hubbub_token *token, void *pw)
printf(" got: '%.*s'\n",
(int) token->data.comment.len, gotstr);
- assert(token->data.comment.len == strlen(expstr));
- assert(strncmp(gotstr, expstr, strlen(expstr)) == 0);
+ assert(token->data.comment.len == LEN(expstr));
+ assert(strncmp(gotstr, expstr, LEN(expstr)) == 0);
}
break;
case HUBBUB_TOKEN_CHARACTER: