summaryrefslogtreecommitdiff
path: root/src/tokeniser
diff options
context:
space:
mode:
authorRupinder Singh Khokhar <rsk1coder99@gmail.com>2014-06-13 05:00:11 +0530
committerRupinder Singh Khokhar <rsk1coder99@gmail.com>2014-07-09 10:04:20 +0530
commit02f5504de388eb69ded0bc9e05361d8db82c2137 (patch)
tree702495f5dce9b1c41b2191de259d3db58a430a29 /src/tokeniser
parenta501b83d9be45e80b59fc8eca8e1816f467b4662 (diff)
downloadlibhubbub-02f5504de388eb69ded0bc9e05361d8db82c2137.tar.gz
libhubbub-02f5504de388eb69ded0bc9e05361d8db82c2137.tar.bz2
[Fix] tokeniser wrongly emitted a replacement character instead of utf8 NULL. Also, the tester used strlen to calculate string lengths--this seg faults if a null is passed-- this is also fixed.
Diffstat (limited to 'src/tokeniser')
-rw-r--r--src/tokeniser/tokeniser.c10
1 files changed, 8 insertions, 2 deletions
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 7355f80..d108490 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -44,6 +44,12 @@ static const hubbub_string u_fffd_str = { u_fffd, sizeof(u_fffd) };
static const uint8_t lf = '\n';
static const hubbub_string lf_str = { &lf, 1 };
+/**
+ * UTF-8 encoding of U+0000 NULL CHARACTER
+ */
+static const uint8_t u_null[1] = { '\x00'};
+static const hubbub_string u_null_str = { u_null, sizeof(u_null) };
+
/**
* Tokeniser states
@@ -751,8 +757,8 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
emit_current_chars(tokeniser);
}
- /* Emit a replacement character */
- emit_character_token(tokeniser, &u_fffd_str);
+ /* Emit a null character */
+ emit_character_token(tokeniser, &u_null_str);
/* Advance past NUL */
parserutils_inputstream_advance(tokeniser->input, 1);