From 6b31f0eaae57409c267977a7d658f8900c510dfd Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Mon, 4 Aug 2008 23:04:59 +0000 Subject: For tokens where there's a possibility of case differences requiring case insensitive matching, intern lower cased versions of strings alongside the originals. svn path=/trunk/libcss/; revision=4902 --- src/lex/lex.c | 2 + src/lex/lex.h | 2 + src/parse/parse.c | 225 +++++++++++++++++++++++++++++++++--------------------- 3 files changed, 140 insertions(+), 89 deletions(-) (limited to 'src') diff --git a/src/lex/lex.c b/src/lex/lex.c index 5fdcff4..8f85d25 100644 --- a/src/lex/lex.c +++ b/src/lex/lex.c @@ -1141,6 +1141,8 @@ start: t->type = CSS_TOKEN_EOF; t->data.ptr = NULL; t->data.len = 0; + t->lower.ptr = NULL; + t->lower.len = 0; t->col = lexer->currentCol; t->line = lexer->currentLine; lexer->escapeSeen = false; diff --git a/src/lex/lex.h b/src/lex/lex.h index 97a4a17..6bfba6d 100644 --- a/src/lex/lex.h +++ b/src/lex/lex.h @@ -51,6 +51,8 @@ typedef struct css_token { css_string data; + css_string lower; + uint32_t col; uint32_t line; } css_token; diff --git a/src/parse/parse.c b/src/parse/parse.c index c603d6b..533d822 100644 --- a/src/parse/parse.c +++ b/src/parse/parse.c @@ -6,6 +6,8 @@ */ #include +#include +#include #include #include @@ -539,9 +541,54 @@ css_error getToken(css_parser *parser, const css_token **token) if (t->data.ptr != NULL && t->data.len > 0) { /* Insert token text into the dictionary */ const parserutils_dict_entry *interned; + uint8_t temp[t->data.len]; + bool lower = false; + + switch (t->type) { + case CSS_TOKEN_IDENT: + case CSS_TOKEN_ATKEYWORD: + case CSS_TOKEN_HASH: + case CSS_TOKEN_FUNCTION: + for (size_t i = 0; i < t->data.len; i++) { + temp[i] = tolower(t->data.ptr[i]); + if (temp[i] != t->data.ptr[i]) + lower = true; + } + break; + default: + break; + } + + if (lower == true) { + /* We get to insert it twice - once for the raw + * data, and once for a lowercased version that + * we need internally. */ + perror = parserutils_dict_insert( + parser->dictionary, + temp, t->data.len, + &interned); + if (perror != PARSERUTILS_OK) { + return css_error_from_parserutils_error( + perror); + } - perror = parserutils_dict_insert(parser->dictionary, - t->data.ptr, t->data.len, &interned); + t->lower.ptr = interned->data; + t->lower.len = interned->len; + + perror = parserutils_dict_insert( + parser->dictionary, + t->data.ptr, t->data.len, + &interned); + } else { + /* Otherwise, we're not interested in case */ + perror = parserutils_dict_insert( + parser->dictionary, + t->data.ptr, t->data.len, + &interned); + + t->lower.ptr = interned->data; + t->lower.len = interned->len; + } if (perror != PARSERUTILS_OK) return css_error_from_parserutils_error(perror); @@ -778,8 +825,8 @@ css_error parseRuleset(css_parser *parser) /* The grammar's ambiguous here -- selectors may start with a * brace. We're going to assume that that won't happen, * however. */ - if (token->type == CSS_TOKEN_CHAR && token->data.len == 1 && - token->data.ptr[0] == '{') { + if (token->type == CSS_TOKEN_CHAR && token->lower.len == 1 && + token->lower.ptr[0] == '{') { state->substate = WS; goto ws; } else { @@ -813,8 +860,8 @@ css_error parseRuleset(css_parser *parser) if (error != CSS_OK) return error; - if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 || - token->data.ptr[0] != '{') { + if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1 || + token->lower.ptr[0] != '{') { /* This should never happen, as FOLLOW(selector) * contains only '{' */ assert(0 && "Expected {"); @@ -878,8 +925,8 @@ css_error parseRulesetEnd(css_parser *parser) if (error != CSS_OK) return error; - if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 || - token->data.ptr[0] != '}') { + if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1 || + token->lower.ptr[0] != '}') { /* This should never happen, as FOLLOW(decl-list) * contains only '}' */ assert(0 && "Expected }"); @@ -953,9 +1000,9 @@ css_error parseAtRule(css_parser *parser) /* Grammar ambiguity: any0 can be followed by '{',';',')',']'. * at-rule can only be followed by '{' and ';'. */ - if (token->type == CSS_TOKEN_CHAR && token->data.len == 1) { - if (token->data.ptr[0] == ')' || - token->data.ptr[0] == ']') { + if (token->type == CSS_TOKEN_CHAR && token->lower.len == 1) { + if (token->lower.ptr[0] == ')' || + token->lower.ptr[0] == ']') { parser_state to = { sAny0, Initial }; parser_state subsequent = { sAtRule, AfterAny }; @@ -1006,12 +1053,12 @@ css_error parseAtRuleEnd(css_parser *parser) if (error != CSS_OK) return error; - if (token->type != CSS_TOKEN_CHAR || token->data.len != 1) { + if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1) { /* Should never happen FOLLOW(at-rule) == '{', ';'*/ assert(0 && "Expected { or ;"); } - if (token->data.ptr[0] == '{') { + if (token->lower.ptr[0] == '{') { parser_state to = { sBlock, Initial }; parser_state subsequent = { sAtRuleEnd, AfterBlock }; @@ -1020,7 +1067,7 @@ css_error parseAtRuleEnd(css_parser *parser) return error; return transition(parser, to, subsequent); - } else if (token->data.ptr[0] != ';') { + } else if (token->lower.ptr[0] != ';') { /* Again, should never happen */ assert(0 && "Expected ;"); } @@ -1072,8 +1119,8 @@ css_error parseBlock(css_parser *parser) parserutils_vector_clear(parser->tokens); - if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 || - token->data.ptr[0] != '{') { + if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1 || + token->lower.ptr[0] != '{') { /* This should never happen, as FIRST(block) == '{' */ assert(0 && "Expected {"); } @@ -1099,8 +1146,8 @@ css_error parseBlock(css_parser *parser) if (error != CSS_OK) return error; - if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 || - token->data.ptr[0] != '}') { + if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1 || + token->lower.ptr[0] != '}') { /* This should never happen, as * FOLLOW(block-content) == '}' */ assert(0 && "Expected }"); @@ -1152,8 +1199,8 @@ css_error parseBlockContent(css_parser *parser) if (token->type == CSS_TOKEN_ATKEYWORD) { state->substate = WS; } else if (token->type == CSS_TOKEN_CHAR) { - if (token->data.len == 1 && - token->data.ptr[0] == '{') { + if (token->lower.len == 1 && + token->lower.ptr[0] == '{') { /* Grammar ambiguity. Assume block */ parser_state to = { sBlock, Initial }; parser_state subsequent = @@ -1176,12 +1223,12 @@ css_error parseBlockContent(css_parser *parser) return transition(parser, to, subsequent); - } else if (token->data.len == 1 && - token->data.ptr[0] == ';') { + } else if (token->lower.len == 1 && + token->lower.ptr[0] == ';') { /* Grammar ambiguity. Assume semi */ state->substate = WS; - } else if (token->data.len == 1 && - token->data.ptr[0] == '}') { + } else if (token->lower.len == 1 && + token->lower.ptr[0] == '}') { /* Grammar ambiguity. Assume end */ error = pushBack(parser, token); if (error != CSS_OK) @@ -1305,8 +1352,8 @@ css_error parseDeclaration(css_parser *parser) if (error != CSS_OK) return error; - if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 || - token->data.ptr[0] != ':') { + if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1 || + token->lower.ptr[0] != ':') { /* parse error -- expected : */ parser_state to = { sMalformedDecl, Initial }; @@ -1369,18 +1416,18 @@ css_error parseDeclList(css_parser *parser) if (error != CSS_OK) return error; - if (token->type != CSS_TOKEN_CHAR || token->data.len != 1) { + if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1) { /* Should never happen */ assert(0 && "Expected ; or }"); } - if (token->data.ptr[0] == '}') { + if (token->lower.ptr[0] == '}') { error = pushBack(parser, token); if (error != CSS_OK) return error; return done(parser); - } else if (token->data.ptr[0] == ';') { + } else if (token->lower.ptr[0] == ';') { state->substate = WS; } else { /* Should never happen */ @@ -1419,9 +1466,9 @@ css_error parseDeclListEnd(css_parser *parser) return error; if (token->type != CSS_TOKEN_CHAR || - token->data.len != 1 || - (token->data.ptr[0] != ';' && - token->data.ptr[0] != '}')) { + token->lower.len != 1 || + (token->lower.ptr[0] != ';' && + token->lower.ptr[0] != '}')) { parser_state to = { sDeclaration, Initial }; parser_state subsequent = { sDeclListEnd, AfterDeclaration }; @@ -1508,9 +1555,9 @@ css_error parseValue1(css_parser *parser) /* Grammar ambiguity -- assume ';' or '}' mark end */ if (token->type == CSS_TOKEN_CHAR && - token->data.len == 1 && - (token->data.ptr[0] == ';' || - token->data.ptr[0] == '}')) { + token->lower.len == 1 && + (token->lower.ptr[0] == ';' || + token->lower.ptr[0] == '}')) { /* Parse error */ parser->parseError = true; @@ -1556,9 +1603,9 @@ css_error parseValue0(css_parser *parser) /* Grammar ambiguity -- assume ';' or '}' mark end */ if (token->type == CSS_TOKEN_CHAR && - token->data.len == 1 && - (token->data.ptr[0] == ';' || - token->data.ptr[0] == '}')) { + token->lower.len == 1 && + (token->lower.ptr[0] == ';' || + token->lower.ptr[0] == '}')) { return done(parser); } @@ -1595,8 +1642,8 @@ css_error parseValue(css_parser *parser) if (token->type == CSS_TOKEN_ATKEYWORD) { state->substate = WS; } else if (token->type == CSS_TOKEN_CHAR && - token->data.len == 1 && - token->data.ptr[0] == '{') { + token->lower.len == 1 && + token->lower.ptr[0] == '{') { /* Grammar ambiguity. Assume block. */ parser_state to = { sBlock, Initial }; @@ -1656,11 +1703,11 @@ css_error parseAny0(css_parser *parser) /* Grammar ambiguity: * assume '{', ';', ')', ']' mark end */ if (token->type == CSS_TOKEN_CHAR && - token->data.len == 1 && - (token->data.ptr[0] == '{' || - token->data.ptr[0] == ';' || - token->data.ptr[0] == ')' || - token->data.ptr[0] == ']')) { + token->lower.len == 1 && + (token->lower.ptr[0] == '{' || + token->lower.ptr[0] == ';' || + token->lower.ptr[0] == ')' || + token->lower.ptr[0] == ']')) { return done(parser); } @@ -1720,15 +1767,15 @@ css_error parseAny1(css_parser *parser) /* Grammar ambiguity: any0 can be followed by * '{', ';', ')', ']'. any1 can only be followed by '{'. */ - if (token->type == CSS_TOKEN_CHAR && token->data.len == 1) { - if (token->data.ptr[0] == ';' || - token->data.ptr[0] == ')' || - token->data.ptr[0] == ']') { + if (token->type == CSS_TOKEN_CHAR && token->lower.len == 1) { + if (token->lower.ptr[0] == ';' || + token->lower.ptr[0] == ')' || + token->lower.ptr[0] == ']') { parser_state to = { sAny, Initial }; parser_state subsequent = { sAny1, AfterAny }; return transition(parser, to, subsequent); - } else if (token->data.ptr[0] != '{') { + } else if (token->lower.ptr[0] != '{') { /* parse error */ parser->parseError = true; } @@ -1798,11 +1845,11 @@ css_error parseAny(css_parser *parser) parser->match_char = ')'; state->substate = WS; } else if (token->type == CSS_TOKEN_CHAR && - token->data.len == 1 && - (token->data.ptr[0] == '(' || - token->data.ptr[0] == '[')) { + token->lower.len == 1 && + (token->lower.ptr[0] == '(' || + token->lower.ptr[0] == '[')) { parser->match_char = - token->data.ptr[0] == '(' ? ')' : ']'; + token->lower.ptr[0] == '(' ? ')' : ']'; state->substate = WS; } @@ -1834,8 +1881,8 @@ css_error parseAny(css_parser *parser) return error; /* Match correct close bracket (grammar ambiguity) */ - if (token->type == CSS_TOKEN_CHAR && token->data.len == 1 && - token->data.ptr[0] == parser->match_char) { + if (token->type == CSS_TOKEN_CHAR && token->lower.len == 1 && + token->lower.ptr[0] == parser->match_char) { state->substate = WS2; goto ws2; } @@ -1878,14 +1925,14 @@ css_error parseMalformedDeclaration(css_parser *parser) break; if (token->type != CSS_TOKEN_CHAR || - token->data.len != 1 || - (token->data.ptr[0] != '{' && - token->data.ptr[0] != '}' && - token->data.ptr[0] != '[' && - token->data.ptr[0] != ']' && - token->data.ptr[0] != '(' && - token->data.ptr[0] != ')' && - token->data.ptr[0] != ';')) + token->lower.len != 1 || + (token->lower.ptr[0] != '{' && + token->lower.ptr[0] != '}' && + token->lower.ptr[0] != '[' && + token->lower.ptr[0] != ']' && + token->lower.ptr[0] != '(' && + token->lower.ptr[0] != ')' && + token->lower.ptr[0] != ';')) continue; char want; @@ -1895,17 +1942,17 @@ css_error parseMalformedDeclaration(css_parser *parser) /* If the stack is empty, then we're done if we've got * either a ';' or '}' */ if (match == NULL) { - if (token->data.ptr[0] == ';' || - token->data.ptr[0] == '}') + if (token->lower.ptr[0] == ';' || + token->lower.ptr[0] == '}') break; } /* Regardless, if we've got a semicolon, ignore it */ - if (token->data.ptr[0] == ';') + if (token->lower.ptr[0] == ';') continue; /* Get corresponding start tokens for end tokens */ - switch (token->data.ptr[0]) { + switch (token->lower.ptr[0]) { case '}': want = '{'; break; @@ -1927,7 +1974,7 @@ css_error parseMalformedDeclaration(css_parser *parser) parser->open_items, NULL); } else if (want == 0) { parserutils_stack_push(parser->open_items, - &token->data.ptr[0]); + &token->lower.ptr[0]); } } } @@ -1972,13 +2019,13 @@ css_error parseMalformedSelector(css_parser *parser) break; if (token->type != CSS_TOKEN_CHAR || - token->data.len != 1 || - (token->data.ptr[0] != '{' && - token->data.ptr[0] != '}' && - token->data.ptr[0] != '[' && - token->data.ptr[0] != ']' && - token->data.ptr[0] != '(' && - token->data.ptr[0] != ')')) + token->lower.len != 1 || + (token->lower.ptr[0] != '{' && + token->lower.ptr[0] != '}' && + token->lower.ptr[0] != '[' && + token->lower.ptr[0] != ']' && + token->lower.ptr[0] != '(' && + token->lower.ptr[0] != ')')) continue; char want; @@ -1986,7 +2033,7 @@ css_error parseMalformedSelector(css_parser *parser) parser->open_items); /* Get corresponding start tokens for end tokens */ - switch (token->data.ptr[0]) { + switch (token->lower.ptr[0]) { case '}': want = '{'; break; @@ -2008,7 +2055,7 @@ css_error parseMalformedSelector(css_parser *parser) parser->open_items, NULL); } else if (want == 0) { parserutils_stack_push(parser->open_items, - &token->data.ptr[0]); + &token->lower.ptr[0]); } /* If we encountered a '}', there was data on the stack @@ -2065,14 +2112,14 @@ css_error parseMalformedAtRule(css_parser *parser) break; if (token->type != CSS_TOKEN_CHAR || - token->data.len != 1 || - (token->data.ptr[0] != '{' && - token->data.ptr[0] != '}' && - token->data.ptr[0] != '[' && - token->data.ptr[0] != ']' && - token->data.ptr[0] != '(' && - token->data.ptr[0] != ')' && - token->data.ptr[0] != ';')) + token->lower.len != 1 || + (token->lower.ptr[0] != '{' && + token->lower.ptr[0] != '}' && + token->lower.ptr[0] != '[' && + token->lower.ptr[0] != ']' && + token->lower.ptr[0] != '(' && + token->lower.ptr[0] != ')' && + token->lower.ptr[0] != ';')) continue; char want; @@ -2081,7 +2128,7 @@ css_error parseMalformedAtRule(css_parser *parser) /* If we have a semicolon, then we're either done or * need to ignore it */ - if (token->data.ptr[0] == ';') { + if (token->lower.ptr[0] == ';') { if (match == NULL) break; else @@ -2089,7 +2136,7 @@ css_error parseMalformedAtRule(css_parser *parser) } /* Get corresponding start tokens for end tokens */ - switch (token->data.ptr[0]) { + switch (token->lower.ptr[0]) { case '}': want = '{'; break; @@ -2111,7 +2158,7 @@ css_error parseMalformedAtRule(css_parser *parser) parser->open_items, NULL); } else if (want == 0) { parserutils_stack_push(parser->open_items, - &token->data.ptr[0]); + &token->lower.ptr[0]); } /* If we encountered a '}', there was data on the stack -- cgit v1.2.3