summaryrefslogtreecommitdiff
path: root/src/tokeniser
diff options
context:
space:
mode:
authorAndrew Sidwell <andy@entai.co.uk>2008-06-18 17:33:24 +0000
committerAndrew Sidwell <andy@entai.co.uk>2008-06-18 17:33:24 +0000
commit48ba3bdbd561645a78ef5e5cb99ead7ef3a10661 (patch)
treeff11bfb8ff547502d5dd2691da9ce3f34c382223 /src/tokeniser
parentfcc857c2b72ecc43388a0ee34f0a8ddfed8d13d8 (diff)
downloadlibhubbub-48ba3bdbd561645a78ef5e5cb99ead7ef3a10661.tar.gz
libhubbub-48ba3bdbd561645a78ef5e5cb99ead7ef3a10661.tar.bz2
Fix remaining issues around passing the testsuite, and make sure all the tokeniser is tested.
svn path=/trunk/hubbub/; revision=4387
Diffstat (limited to 'src/tokeniser')
-rw-r--r--src/tokeniser/tokeniser.c144
1 files changed, 2 insertions, 142 deletions
diff --git a/src/tokeniser/tokeniser.c b/src/tokeniser/tokeniser.c
index f3c902d..6c564a8 100644
--- a/src/tokeniser/tokeniser.c
+++ b/src/tokeniser/tokeniser.c
@@ -4,7 +4,6 @@
* http://www.opensource.org/licenses/mit-license.php
* Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
*/
-#include <stdio.h>
#include <stdbool.h>
#include <string.h>
@@ -31,7 +30,6 @@ typedef enum hubbub_tokeniser_state {
HUBBUB_TOKENISER_STATE_CHARACTER_REFERENCE_DATA,
HUBBUB_TOKENISER_STATE_TAG_OPEN,
HUBBUB_TOKENISER_STATE_CLOSE_TAG_OPEN,
- HUBBUB_TOKENISER_STATE_CLOSE_TAG_MATCH,
HUBBUB_TOKENISER_STATE_TAG_NAME,
HUBBUB_TOKENISER_STATE_BEFORE_ATTRIBUTE_NAME,
HUBBUB_TOKENISER_STATE_ATTRIBUTE_NAME,
@@ -92,7 +90,6 @@ typedef struct hubbub_tokeniser_context {
hubbub_string last_start_tag_name; /**< Name of the last start tag
* emitted */
struct {
- hubbub_string tag; /**< Pending close tag */
uint32_t count;
} close_tag_match;
@@ -170,8 +167,6 @@ static bool hubbub_tokeniser_handle_character_reference_data(
static bool hubbub_tokeniser_handle_tag_open(hubbub_tokeniser *tokeniser);
static bool hubbub_tokeniser_handle_close_tag_open(
hubbub_tokeniser *tokeniser);
-static bool hubbub_tokeniser_handle_close_tag_match(
- hubbub_tokeniser *tokeniser);
static bool hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser);
static bool hubbub_tokeniser_handle_before_attribute_name(
hubbub_tokeniser *tokeniser);
@@ -301,7 +296,6 @@ hubbub_tokeniser *hubbub_tokeniser_create(hubbub_inputstream *input,
tok->context.current_tag.name.type = HUBBUB_STRING_OFF;
tok->context.current_comment.type = HUBBUB_STRING_OFF;
tok->context.current_chars.type = HUBBUB_STRING_OFF;
- tok->context.close_tag_match.tag.type = HUBBUB_STRING_OFF;
tok->context.match_entity.str.type = HUBBUB_STRING_OFF;
return tok;
@@ -396,10 +390,6 @@ hubbub_error hubbub_tokeniser_run(hubbub_tokeniser *tokeniser)
cont = hubbub_tokeniser_handle_close_tag_open(
tokeniser);
break;
- case HUBBUB_TOKENISER_STATE_CLOSE_TAG_MATCH:
- cont = hubbub_tokeniser_handle_close_tag_match(
- tokeniser);
- break;
case HUBBUB_TOKENISER_STATE_TAG_NAME:
cont = hubbub_tokeniser_handle_tag_name(tokeniser);
break;
@@ -1016,132 +1006,6 @@ bool hubbub_tokeniser_handle_close_tag_open(hubbub_tokeniser *tokeniser)
return true;
}
-bool hubbub_tokeniser_handle_close_tag_match(hubbub_tokeniser *tokeniser)
-{
- hubbub_tokeniser_context *ctx = &tokeniser->context;
- hubbub_tag *ctag = &tokeniser->context.current_tag;
- uint32_t c = 0;
-
- while (ctx->close_tag_match.tag.len < ctag->name.len &&
- (c = hubbub_inputstream_peek(tokeniser->input)) !=
- HUBBUB_INPUTSTREAM_EOF &&
- c != HUBBUB_INPUTSTREAM_OOD) {
- /* Match last open tag */
- uint32_t off;
- size_t len;
-
- off = hubbub_inputstream_cur_pos(tokeniser->input, &len);
-
- if (ctx->close_tag_match.tag.len == 0) {
- ctx->close_tag_match.tag.data.off = off;
- ctx->close_tag_match.tag.len = len;
- } else {
- ctx->close_tag_match.tag.len += len;
- }
-
- hubbub_inputstream_advance(tokeniser->input);
-
- if (ctx->close_tag_match.tag.len > ctag->name.len ||
- (ctx->close_tag_match.tag.len == ctag->name.len &&
- hubbub_inputstream_compare_range_ci(
- tokeniser->input,
- ctag->name.data.off,
- ctx->close_tag_match.tag.data.off,
- ctag->name.len) != 0)) {
- hubbub_token token;
-
- /* Rewind input stream to start of tag name */
- if (hubbub_inputstream_rewind(tokeniser->input,
- ctx->close_tag_match.tag.len) !=
- HUBBUB_OK)
- abort();
-
- /* Emit "</" */
- token.type = HUBBUB_TOKEN_CHARACTER;
- token.data.character =
- tokeniser->context.current_chars;
-
- hubbub_tokeniser_emit_token(tokeniser, &token);
-
- tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
- hubbub_inputstream_advance(tokeniser->input);
-
- return true;
- } else if (ctx->close_tag_match.tag.len == ctag->name.len &&
- hubbub_inputstream_compare_range_ci(
- tokeniser->input,
- ctag->name.data.off,
- ctx->close_tag_match.tag.data.off,
- ctag->name.len) == 0) {
- /* Matched => stop searching */
- break;
- }
- }
-
- if (c == HUBBUB_INPUTSTREAM_OOD) {
- /* Need more data */
- return false;
- }
-
- if (c == HUBBUB_INPUTSTREAM_EOF) {
- /* Ran out of data - parse error */
- hubbub_token token;
-
- /* Rewind input stream to start of tag name */
- if (hubbub_inputstream_rewind(tokeniser->input,
- ctx->close_tag_match.tag.len) != HUBBUB_OK)
- abort();
-
- /* Emit "</" */
- token.type = HUBBUB_TOKEN_CHARACTER;
- token.data.character = tokeniser->context.current_chars;
-
- hubbub_tokeniser_emit_token(tokeniser, &token);
-
- tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
-
- return true;
- }
-
- /* Match following char */
- c = hubbub_inputstream_peek(tokeniser->input);
-
- if (c == HUBBUB_INPUTSTREAM_OOD) {
- /* Need more data */
- return false;
- }
-
- /* Rewind input stream to start of tag name */
- if (hubbub_inputstream_rewind(tokeniser->input,
- ctx->close_tag_match.tag.len) != HUBBUB_OK)
- abort();
-
- /* Check that following char was valid */
- if (c != '\t' && c != '\n' && c != '\f' && c != ' ' && c != '>' &&
- c != '/' && c != HUBBUB_INPUTSTREAM_EOF) {
- hubbub_token token;
-
- /* Emit "</" */
- token.type = HUBBUB_TOKEN_CHARACTER;
- token.data.character = tokeniser->context.current_chars;
-
- hubbub_tokeniser_emit_token(tokeniser, &token);
-
- tokeniser->state = HUBBUB_TOKENISER_STATE_DATA;
- hubbub_inputstream_advance(tokeniser->input);
-
- return true;
- }
-
- /* Switch the content model back to PCDATA */
- tokeniser->content_model = HUBBUB_CONTENT_MODEL_PCDATA;
-
- /* Finally, transition back to close tag open state */
- tokeniser->state = HUBBUB_TOKENISER_STATE_CLOSE_TAG_OPEN;
-
- return true;
-}
-
bool hubbub_tokeniser_handle_tag_name(hubbub_tokeniser *tokeniser)
{
hubbub_tag *ctag = &tokeniser->context.current_tag;
@@ -1673,12 +1537,8 @@ bool hubbub_tokeniser_handle_attribute_value_uq(hubbub_tokeniser *tokeniser)
size_t len;
pos = hubbub_inputstream_cur_pos(tokeniser->input, &len);
-
- if (ctag->attributes[ctag->n_attributes - 1].value.len == 0) {
- ctag->attributes[ctag->n_attributes - 1].value.data.off =
- pos;
- }
-
+ /* don't worry about setting the offset -- this is
+ * always done before this state is reached */
ctag->attributes[ctag->n_attributes - 1].value.len += len;
hubbub_inputstream_advance(tokeniser->input);