summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-08-04 23:04:59 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2008-08-04 23:04:59 +0000
commit6b31f0eaae57409c267977a7d658f8900c510dfd (patch)
tree8f160729d145807461c0b11d2a7623e5a9ddbb34 /src
parent5f24982f201c86331c20848ade943f8687203292 (diff)
downloadlibcss-6b31f0eaae57409c267977a7d658f8900c510dfd.tar.gz
libcss-6b31f0eaae57409c267977a7d658f8900c510dfd.tar.bz2
For tokens where there's a possibility of case differences requiring case insensitive matching, intern lower cased versions of strings alongside the originals.
svn path=/trunk/libcss/; revision=4902
Diffstat (limited to 'src')
-rw-r--r--src/lex/lex.c2
-rw-r--r--src/lex/lex.h2
-rw-r--r--src/parse/parse.c225
3 files changed, 140 insertions, 89 deletions
diff --git a/src/lex/lex.c b/src/lex/lex.c
index 5fdcff4..8f85d25 100644
--- a/src/lex/lex.c
+++ b/src/lex/lex.c
@@ -1141,6 +1141,8 @@ start:
t->type = CSS_TOKEN_EOF;
t->data.ptr = NULL;
t->data.len = 0;
+ t->lower.ptr = NULL;
+ t->lower.len = 0;
t->col = lexer->currentCol;
t->line = lexer->currentLine;
lexer->escapeSeen = false;
diff --git a/src/lex/lex.h b/src/lex/lex.h
index 97a4a17..6bfba6d 100644
--- a/src/lex/lex.h
+++ b/src/lex/lex.h
@@ -51,6 +51,8 @@ typedef struct css_token {
css_string data;
+ css_string lower;
+
uint32_t col;
uint32_t line;
} css_token;
diff --git a/src/parse/parse.c b/src/parse/parse.c
index c603d6b..533d822 100644
--- a/src/parse/parse.c
+++ b/src/parse/parse.c
@@ -6,6 +6,8 @@
*/
#include <assert.h>
+#include <ctype.h>
+#include <stdbool.h>
#include <parserutils/input/inputstream.h>
#include <parserutils/utils/dict.h>
@@ -539,9 +541,54 @@ css_error getToken(css_parser *parser, const css_token **token)
if (t->data.ptr != NULL && t->data.len > 0) {
/* Insert token text into the dictionary */
const parserutils_dict_entry *interned;
+ uint8_t temp[t->data.len];
+ bool lower = false;
+
+ switch (t->type) {
+ case CSS_TOKEN_IDENT:
+ case CSS_TOKEN_ATKEYWORD:
+ case CSS_TOKEN_HASH:
+ case CSS_TOKEN_FUNCTION:
+ for (size_t i = 0; i < t->data.len; i++) {
+ temp[i] = tolower(t->data.ptr[i]);
+ if (temp[i] != t->data.ptr[i])
+ lower = true;
+ }
+ break;
+ default:
+ break;
+ }
+
+ if (lower == true) {
+ /* We get to insert it twice - once for the raw
+ * data, and once for a lowercased version that
+ * we need internally. */
+ perror = parserutils_dict_insert(
+ parser->dictionary,
+ temp, t->data.len,
+ &interned);
+ if (perror != PARSERUTILS_OK) {
+ return css_error_from_parserutils_error(
+ perror);
+ }
- perror = parserutils_dict_insert(parser->dictionary,
- t->data.ptr, t->data.len, &interned);
+ t->lower.ptr = interned->data;
+ t->lower.len = interned->len;
+
+ perror = parserutils_dict_insert(
+ parser->dictionary,
+ t->data.ptr, t->data.len,
+ &interned);
+ } else {
+ /* Otherwise, we're not interested in case */
+ perror = parserutils_dict_insert(
+ parser->dictionary,
+ t->data.ptr, t->data.len,
+ &interned);
+
+ t->lower.ptr = interned->data;
+ t->lower.len = interned->len;
+ }
if (perror != PARSERUTILS_OK)
return css_error_from_parserutils_error(perror);
@@ -778,8 +825,8 @@ css_error parseRuleset(css_parser *parser)
/* The grammar's ambiguous here -- selectors may start with a
* brace. We're going to assume that that won't happen,
* however. */
- if (token->type == CSS_TOKEN_CHAR && token->data.len == 1 &&
- token->data.ptr[0] == '{') {
+ if (token->type == CSS_TOKEN_CHAR && token->lower.len == 1 &&
+ token->lower.ptr[0] == '{') {
state->substate = WS;
goto ws;
} else {
@@ -813,8 +860,8 @@ css_error parseRuleset(css_parser *parser)
if (error != CSS_OK)
return error;
- if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 ||
- token->data.ptr[0] != '{') {
+ if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1 ||
+ token->lower.ptr[0] != '{') {
/* This should never happen, as FOLLOW(selector)
* contains only '{' */
assert(0 && "Expected {");
@@ -878,8 +925,8 @@ css_error parseRulesetEnd(css_parser *parser)
if (error != CSS_OK)
return error;
- if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 ||
- token->data.ptr[0] != '}') {
+ if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1 ||
+ token->lower.ptr[0] != '}') {
/* This should never happen, as FOLLOW(decl-list)
* contains only '}' */
assert(0 && "Expected }");
@@ -953,9 +1000,9 @@ css_error parseAtRule(css_parser *parser)
/* Grammar ambiguity: any0 can be followed by '{',';',')',']'.
* at-rule can only be followed by '{' and ';'. */
- if (token->type == CSS_TOKEN_CHAR && token->data.len == 1) {
- if (token->data.ptr[0] == ')' ||
- token->data.ptr[0] == ']') {
+ if (token->type == CSS_TOKEN_CHAR && token->lower.len == 1) {
+ if (token->lower.ptr[0] == ')' ||
+ token->lower.ptr[0] == ']') {
parser_state to = { sAny0, Initial };
parser_state subsequent = { sAtRule, AfterAny };
@@ -1006,12 +1053,12 @@ css_error parseAtRuleEnd(css_parser *parser)
if (error != CSS_OK)
return error;
- if (token->type != CSS_TOKEN_CHAR || token->data.len != 1) {
+ if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1) {
/* Should never happen FOLLOW(at-rule) == '{', ';'*/
assert(0 && "Expected { or ;");
}
- if (token->data.ptr[0] == '{') {
+ if (token->lower.ptr[0] == '{') {
parser_state to = { sBlock, Initial };
parser_state subsequent = { sAtRuleEnd, AfterBlock };
@@ -1020,7 +1067,7 @@ css_error parseAtRuleEnd(css_parser *parser)
return error;
return transition(parser, to, subsequent);
- } else if (token->data.ptr[0] != ';') {
+ } else if (token->lower.ptr[0] != ';') {
/* Again, should never happen */
assert(0 && "Expected ;");
}
@@ -1072,8 +1119,8 @@ css_error parseBlock(css_parser *parser)
parserutils_vector_clear(parser->tokens);
- if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 ||
- token->data.ptr[0] != '{') {
+ if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1 ||
+ token->lower.ptr[0] != '{') {
/* This should never happen, as FIRST(block) == '{' */
assert(0 && "Expected {");
}
@@ -1099,8 +1146,8 @@ css_error parseBlock(css_parser *parser)
if (error != CSS_OK)
return error;
- if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 ||
- token->data.ptr[0] != '}') {
+ if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1 ||
+ token->lower.ptr[0] != '}') {
/* This should never happen, as
* FOLLOW(block-content) == '}' */
assert(0 && "Expected }");
@@ -1152,8 +1199,8 @@ css_error parseBlockContent(css_parser *parser)
if (token->type == CSS_TOKEN_ATKEYWORD) {
state->substate = WS;
} else if (token->type == CSS_TOKEN_CHAR) {
- if (token->data.len == 1 &&
- token->data.ptr[0] == '{') {
+ if (token->lower.len == 1 &&
+ token->lower.ptr[0] == '{') {
/* Grammar ambiguity. Assume block */
parser_state to = { sBlock, Initial };
parser_state subsequent =
@@ -1176,12 +1223,12 @@ css_error parseBlockContent(css_parser *parser)
return transition(parser, to,
subsequent);
- } else if (token->data.len == 1 &&
- token->data.ptr[0] == ';') {
+ } else if (token->lower.len == 1 &&
+ token->lower.ptr[0] == ';') {
/* Grammar ambiguity. Assume semi */
state->substate = WS;
- } else if (token->data.len == 1 &&
- token->data.ptr[0] == '}') {
+ } else if (token->lower.len == 1 &&
+ token->lower.ptr[0] == '}') {
/* Grammar ambiguity. Assume end */
error = pushBack(parser, token);
if (error != CSS_OK)
@@ -1305,8 +1352,8 @@ css_error parseDeclaration(css_parser *parser)
if (error != CSS_OK)
return error;
- if (token->type != CSS_TOKEN_CHAR || token->data.len != 1 ||
- token->data.ptr[0] != ':') {
+ if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1 ||
+ token->lower.ptr[0] != ':') {
/* parse error -- expected : */
parser_state to = { sMalformedDecl, Initial };
@@ -1369,18 +1416,18 @@ css_error parseDeclList(css_parser *parser)
if (error != CSS_OK)
return error;
- if (token->type != CSS_TOKEN_CHAR || token->data.len != 1) {
+ if (token->type != CSS_TOKEN_CHAR || token->lower.len != 1) {
/* Should never happen */
assert(0 && "Expected ; or }");
}
- if (token->data.ptr[0] == '}') {
+ if (token->lower.ptr[0] == '}') {
error = pushBack(parser, token);
if (error != CSS_OK)
return error;
return done(parser);
- } else if (token->data.ptr[0] == ';') {
+ } else if (token->lower.ptr[0] == ';') {
state->substate = WS;
} else {
/* Should never happen */
@@ -1419,9 +1466,9 @@ css_error parseDeclListEnd(css_parser *parser)
return error;
if (token->type != CSS_TOKEN_CHAR ||
- token->data.len != 1 ||
- (token->data.ptr[0] != ';' &&
- token->data.ptr[0] != '}')) {
+ token->lower.len != 1 ||
+ (token->lower.ptr[0] != ';' &&
+ token->lower.ptr[0] != '}')) {
parser_state to = { sDeclaration, Initial };
parser_state subsequent =
{ sDeclListEnd, AfterDeclaration };
@@ -1508,9 +1555,9 @@ css_error parseValue1(css_parser *parser)
/* Grammar ambiguity -- assume ';' or '}' mark end */
if (token->type == CSS_TOKEN_CHAR &&
- token->data.len == 1 &&
- (token->data.ptr[0] == ';' ||
- token->data.ptr[0] == '}')) {
+ token->lower.len == 1 &&
+ (token->lower.ptr[0] == ';' ||
+ token->lower.ptr[0] == '}')) {
/* Parse error */
parser->parseError = true;
@@ -1556,9 +1603,9 @@ css_error parseValue0(css_parser *parser)
/* Grammar ambiguity -- assume ';' or '}' mark end */
if (token->type == CSS_TOKEN_CHAR &&
- token->data.len == 1 &&
- (token->data.ptr[0] == ';' ||
- token->data.ptr[0] == '}')) {
+ token->lower.len == 1 &&
+ (token->lower.ptr[0] == ';' ||
+ token->lower.ptr[0] == '}')) {
return done(parser);
}
@@ -1595,8 +1642,8 @@ css_error parseValue(css_parser *parser)
if (token->type == CSS_TOKEN_ATKEYWORD) {
state->substate = WS;
} else if (token->type == CSS_TOKEN_CHAR &&
- token->data.len == 1 &&
- token->data.ptr[0] == '{') {
+ token->lower.len == 1 &&
+ token->lower.ptr[0] == '{') {
/* Grammar ambiguity. Assume block. */
parser_state to = { sBlock, Initial };
@@ -1656,11 +1703,11 @@ css_error parseAny0(css_parser *parser)
/* Grammar ambiguity:
* assume '{', ';', ')', ']' mark end */
if (token->type == CSS_TOKEN_CHAR &&
- token->data.len == 1 &&
- (token->data.ptr[0] == '{' ||
- token->data.ptr[0] == ';' ||
- token->data.ptr[0] == ')' ||
- token->data.ptr[0] == ']')) {
+ token->lower.len == 1 &&
+ (token->lower.ptr[0] == '{' ||
+ token->lower.ptr[0] == ';' ||
+ token->lower.ptr[0] == ')' ||
+ token->lower.ptr[0] == ']')) {
return done(parser);
}
@@ -1720,15 +1767,15 @@ css_error parseAny1(css_parser *parser)
/* Grammar ambiguity: any0 can be followed by
* '{', ';', ')', ']'. any1 can only be followed by '{'. */
- if (token->type == CSS_TOKEN_CHAR && token->data.len == 1) {
- if (token->data.ptr[0] == ';' ||
- token->data.ptr[0] == ')' ||
- token->data.ptr[0] == ']') {
+ if (token->type == CSS_TOKEN_CHAR && token->lower.len == 1) {
+ if (token->lower.ptr[0] == ';' ||
+ token->lower.ptr[0] == ')' ||
+ token->lower.ptr[0] == ']') {
parser_state to = { sAny, Initial };
parser_state subsequent = { sAny1, AfterAny };
return transition(parser, to, subsequent);
- } else if (token->data.ptr[0] != '{') {
+ } else if (token->lower.ptr[0] != '{') {
/* parse error */
parser->parseError = true;
}
@@ -1798,11 +1845,11 @@ css_error parseAny(css_parser *parser)
parser->match_char = ')';
state->substate = WS;
} else if (token->type == CSS_TOKEN_CHAR &&
- token->data.len == 1 &&
- (token->data.ptr[0] == '(' ||
- token->data.ptr[0] == '[')) {
+ token->lower.len == 1 &&
+ (token->lower.ptr[0] == '(' ||
+ token->lower.ptr[0] == '[')) {
parser->match_char =
- token->data.ptr[0] == '(' ? ')' : ']';
+ token->lower.ptr[0] == '(' ? ')' : ']';
state->substate = WS;
}
@@ -1834,8 +1881,8 @@ css_error parseAny(css_parser *parser)
return error;
/* Match correct close bracket (grammar ambiguity) */
- if (token->type == CSS_TOKEN_CHAR && token->data.len == 1 &&
- token->data.ptr[0] == parser->match_char) {
+ if (token->type == CSS_TOKEN_CHAR && token->lower.len == 1 &&
+ token->lower.ptr[0] == parser->match_char) {
state->substate = WS2;
goto ws2;
}
@@ -1878,14 +1925,14 @@ css_error parseMalformedDeclaration(css_parser *parser)
break;
if (token->type != CSS_TOKEN_CHAR ||
- token->data.len != 1 ||
- (token->data.ptr[0] != '{' &&
- token->data.ptr[0] != '}' &&
- token->data.ptr[0] != '[' &&
- token->data.ptr[0] != ']' &&
- token->data.ptr[0] != '(' &&
- token->data.ptr[0] != ')' &&
- token->data.ptr[0] != ';'))
+ token->lower.len != 1 ||
+ (token->lower.ptr[0] != '{' &&
+ token->lower.ptr[0] != '}' &&
+ token->lower.ptr[0] != '[' &&
+ token->lower.ptr[0] != ']' &&
+ token->lower.ptr[0] != '(' &&
+ token->lower.ptr[0] != ')' &&
+ token->lower.ptr[0] != ';'))
continue;
char want;
@@ -1895,17 +1942,17 @@ css_error parseMalformedDeclaration(css_parser *parser)
/* If the stack is empty, then we're done if we've got
* either a ';' or '}' */
if (match == NULL) {
- if (token->data.ptr[0] == ';' ||
- token->data.ptr[0] == '}')
+ if (token->lower.ptr[0] == ';' ||
+ token->lower.ptr[0] == '}')
break;
}
/* Regardless, if we've got a semicolon, ignore it */
- if (token->data.ptr[0] == ';')
+ if (token->lower.ptr[0] == ';')
continue;
/* Get corresponding start tokens for end tokens */
- switch (token->data.ptr[0]) {
+ switch (token->lower.ptr[0]) {
case '}':
want = '{';
break;
@@ -1927,7 +1974,7 @@ css_error parseMalformedDeclaration(css_parser *parser)
parser->open_items, NULL);
} else if (want == 0) {
parserutils_stack_push(parser->open_items,
- &token->data.ptr[0]);
+ &token->lower.ptr[0]);
}
}
}
@@ -1972,13 +2019,13 @@ css_error parseMalformedSelector(css_parser *parser)
break;
if (token->type != CSS_TOKEN_CHAR ||
- token->data.len != 1 ||
- (token->data.ptr[0] != '{' &&
- token->data.ptr[0] != '}' &&
- token->data.ptr[0] != '[' &&
- token->data.ptr[0] != ']' &&
- token->data.ptr[0] != '(' &&
- token->data.ptr[0] != ')'))
+ token->lower.len != 1 ||
+ (token->lower.ptr[0] != '{' &&
+ token->lower.ptr[0] != '}' &&
+ token->lower.ptr[0] != '[' &&
+ token->lower.ptr[0] != ']' &&
+ token->lower.ptr[0] != '(' &&
+ token->lower.ptr[0] != ')'))
continue;
char want;
@@ -1986,7 +2033,7 @@ css_error parseMalformedSelector(css_parser *parser)
parser->open_items);
/* Get corresponding start tokens for end tokens */
- switch (token->data.ptr[0]) {
+ switch (token->lower.ptr[0]) {
case '}':
want = '{';
break;
@@ -2008,7 +2055,7 @@ css_error parseMalformedSelector(css_parser *parser)
parser->open_items, NULL);
} else if (want == 0) {
parserutils_stack_push(parser->open_items,
- &token->data.ptr[0]);
+ &token->lower.ptr[0]);
}
/* If we encountered a '}', there was data on the stack
@@ -2065,14 +2112,14 @@ css_error parseMalformedAtRule(css_parser *parser)
break;
if (token->type != CSS_TOKEN_CHAR ||
- token->data.len != 1 ||
- (token->data.ptr[0] != '{' &&
- token->data.ptr[0] != '}' &&
- token->data.ptr[0] != '[' &&
- token->data.ptr[0] != ']' &&
- token->data.ptr[0] != '(' &&
- token->data.ptr[0] != ')' &&
- token->data.ptr[0] != ';'))
+ token->lower.len != 1 ||
+ (token->lower.ptr[0] != '{' &&
+ token->lower.ptr[0] != '}' &&
+ token->lower.ptr[0] != '[' &&
+ token->lower.ptr[0] != ']' &&
+ token->lower.ptr[0] != '(' &&
+ token->lower.ptr[0] != ')' &&
+ token->lower.ptr[0] != ';'))
continue;
char want;
@@ -2081,7 +2128,7 @@ css_error parseMalformedAtRule(css_parser *parser)
/* If we have a semicolon, then we're either done or
* need to ignore it */
- if (token->data.ptr[0] == ';') {
+ if (token->lower.ptr[0] == ';') {
if (match == NULL)
break;
else
@@ -2089,7 +2136,7 @@ css_error parseMalformedAtRule(css_parser *parser)
}
/* Get corresponding start tokens for end tokens */
- switch (token->data.ptr[0]) {
+ switch (token->lower.ptr[0]) {
case '}':
want = '{';
break;
@@ -2111,7 +2158,7 @@ css_error parseMalformedAtRule(css_parser *parser)
parser->open_items, NULL);
} else if (want == 0) {
parserutils_stack_push(parser->open_items,
- &token->data.ptr[0]);
+ &token->lower.ptr[0]);
}
/* If we encountered a '}', there was data on the stack