From 02f5504de388eb69ded0bc9e05361d8db82c2137 Mon Sep 17 00:00:00 2001
From: Rupinder Singh Khokhar <rsk1coder99@gmail.com>
Date: Fri, 13 Jun 2014 05:00:11 +0530
Subject: [Fix] tokeniser wrongly emitted a replacement character instead of
 utf8 NULL. Also, the tester used strlen to calculate string lengths--this seg
 faults if a null is passed-- this is also fixed.

---
 src/tokeniser/tokeniser.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

(limited to 'src/tokeniser')

diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 7355f80..d108490 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -44,6 +44,12 @@ static const hubbub_string u_fffd_str = { u_fffd, sizeof(u_fffd) };
 static const uint8_t lf = '\n';
 static const hubbub_string lf_str = { &lf, 1 };
 
+/**
+ * UTF-8 encoding of U+0000 NULL CHARACTER
+ */
+static const uint8_t u_null[1] = { '\x00'};
+static const hubbub_string u_null_str = { u_null, sizeof(u_null) };
+
 
 /**
  * Tokeniser states
@@ -751,8 +757,8 @@ hubbub_error hubbub_tokeniser_handle_data(hubbub_tokeniser *tokeniser)
 				emit_current_chars(tokeniser);
 			}
 
-			/* Emit a replacement character */
-			emit_character_token(tokeniser, &u_fffd_str);
+			/* Emit a null character */
+			emit_character_token(tokeniser, &u_null_str);
 
 			/* Advance past NUL */
 			parserutils_inputstream_advance(tokeniser->input, 1);
-- 
cgit v1.2.3