summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Sidwell <andy@entai.co.uk>2008-06-18 23:59:26 +0000
committerAndrew Sidwell <andy@entai.co.uk>2008-06-18 23:59:26 +0000
commitc9e50d84e6471c7c8251e7839cf4ea01e550dbd6 (patch)
tree71815674b19fd2b7226f2a5e57e63d1dde891c7b
parent10bfe64ca643b88884887259fc662c950624e2a6 (diff)
downloadlibhubbub-c9e50d84e6471c7c8251e7839cf4ea01e550dbd6.tar.gz
libhubbub-c9e50d84e6471c7c8251e7839cf4ea01e550dbd6.tar.bz2
Fix entity tokenisation when data is given one byte at a time.
svn path=/trunk/hubbub/; revision=4404
-rw-r--r--src/tokeniser/tokeniser.c21
1 files changed, 14 insertions, 7 deletions
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index 6c564a8..51ab885 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -116,6 +116,9 @@ typedef struct hubbub_tokeniser_context {
* matching completed */
bool done_setup; /**< Flag that match setup
* has completed */
+ bool overflow; /**< Whether this entity has
+ * has overflowed the maximum
+ * numeric entity value */
void *context; /**< Context for named
* entity search */
size_t prev_len; /**< Previous byte length
@@ -2881,6 +2884,7 @@ bool hubbub_tokeniser_consume_character_reference(hubbub_tokeniser *tokeniser)
tokeniser->context.match_entity.return_state = tokeniser->state;
tokeniser->context.match_entity.complete = false;
tokeniser->context.match_entity.done_setup = true;
+ tokeniser->context.match_entity.overflow = false;
tokeniser->context.match_entity.context = NULL;
tokeniser->context.match_entity.prev_len = len;
@@ -2888,8 +2892,11 @@ bool hubbub_tokeniser_consume_character_reference(hubbub_tokeniser *tokeniser)
c = hubbub_inputstream_peek(tokeniser->input);
- if (c == HUBBUB_INPUTSTREAM_OOD)
+ if (c == HUBBUB_INPUTSTREAM_OOD) {
+ /* rewind because we need more data */
+ hubbub_inputstream_rewind(tokeniser->input, 1);
return false;
+ }
/* Reset allowed character for future calls */
tokeniser->context.allowed_char = '\0';
@@ -2922,7 +2929,6 @@ bool hubbub_tokeniser_handle_numbered_entity(hubbub_tokeniser *tokeniser)
uint32_t c = hubbub_inputstream_peek(tokeniser->input);
uint32_t pos;
size_t len;
- bool overflow = false;
hubbub_error error;
if (c == HUBBUB_INPUTSTREAM_OOD)
@@ -2978,7 +2984,7 @@ bool hubbub_tokeniser_handle_numbered_entity(hubbub_tokeniser *tokeniser)
}
if (ctx->match_entity.codepoint >= 0x10FFFF) {
- overflow = true;
+ ctx->match_entity.overflow = true;
}
hubbub_inputstream_advance(tokeniser->input);
@@ -3007,7 +3013,7 @@ bool hubbub_tokeniser_handle_numbered_entity(hubbub_tokeniser *tokeniser)
cp = cp1252Table[cp - 0x80];
} else if (cp == 0x0D) {
cp = 0x000A;
- } else if (overflow || cp <= 0x0008 ||
+ } else if (ctx->match_entity.overflow || cp <= 0x0008 ||
(0x000E <= cp && cp <= 0x001F) ||
(0x007F <= cp && cp <= 0x009F) ||
(0xD800 <= cp && cp <= 0xDFFF) ||
@@ -3086,13 +3092,14 @@ bool hubbub_tokeniser_handle_named_entity(hubbub_tokeniser *tokeniser)
hubbub_inputstream_advance(tokeniser->input);
}
+ if (c == HUBBUB_INPUTSTREAM_OOD) {
+ return false;
+ }
+
/* Rewind back possible matches, if any */
hubbub_inputstream_rewind(tokeniser->input,
ctx->match_entity.poss_len);
- if (c == HUBBUB_INPUTSTREAM_OOD)
- return false;
-
c = hubbub_inputstream_peek(tokeniser->input);
if ((tokeniser->context.match_entity.return_state ==