summaryrefslogtreecommitdiff
path: root/src/lex
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-06-26 12:05:14 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2008-06-26 12:05:14 +0000
commit6d1f8a2e90f1b125fcd8839a0f9f5811c97fb8c9 (patch)
tree74d77677d8115ff6071b0d5621702dd8ab0fc3d1 /src/lex
parent76b814183691e731a6d6e564fcc0e98ddac4adf3 (diff)
downloadlibcss-6d1f8a2e90f1b125fcd8839a0f9f5811c97fb8c9.tar.gz
libcss-6d1f8a2e90f1b125fcd8839a0f9f5811c97fb8c9.tar.bz2
Correctly process unterminated strings.
svn path=/trunk/libcss/; revision=4455
Diffstat (limited to 'src/lex')
-rw-r--r--src/lex/lex.c49
-rw-r--r--src/lex/lex.h14
2 files changed, 37 insertions, 26 deletions
diff --git a/src/lex/lex.c b/src/lex/lex.c
index f184a7c..d487a27 100644
--- a/src/lex/lex.c
+++ b/src/lex/lex.c
@@ -34,6 +34,7 @@
#include "lex/lex.h"
#include "utils/parserutilserror.h"
+#include "utils/utils.h"
/** \todo Optimisation -- we're currently revisiting a bunch of input
* characters (Currently, we're calling parserutils_inputstream_peek
@@ -379,7 +380,15 @@ css_error emitToken(css_lexer *lexer, css_token_type type,
t->data.ptr += 1;
t->data.len -= 1;
- /* Strip the trailing quote */
+ /* Strip the trailing quote, iff it exists (may have hit EOF) */
+ if (t->data.ptr[t->data.len - 1] == '"' ||
+ t->data.ptr[t->data.len - 1] == '\'') {
+ t->data.len -= 1;
+ }
+ break;
+ case CSS_TOKEN_INVALID_STRING:
+ /* Strip the leading quote */
+ t->data.ptr += 1;
t->data.len -= 1;
break;
case CSS_TOKEN_HASH:
@@ -396,8 +405,8 @@ css_error emitToken(css_lexer *lexer, css_token_type type,
break;
case CSS_TOKEN_URI:
/* Strip the "url(" from the start */
- t->data.ptr += sizeof("url(") - 1;
- t->data.len -= sizeof("url(") - 1;
+ t->data.ptr += SLEN("url(");
+ t->data.len -= SLEN("url(");
/* Strip any leading whitespace */
while (isSpace(t->data.ptr[0])) {
@@ -427,16 +436,16 @@ css_error emitToken(css_lexer *lexer, css_token_type type,
break;
case CSS_TOKEN_UNICODE_RANGE:
/* Remove "U+" from the start */
- t->data.ptr += sizeof("U+") - 1;
- t->data.len -= sizeof("U+") - 1;
+ t->data.ptr += SLEN("U+");
+ t->data.len -= SLEN("U+");
break;
case CSS_TOKEN_COMMENT:
/* Strip the leading '/' and '*' */
- t->data.ptr += sizeof("/*") - 1;
- t->data.len -= sizeof("/*") - 1;
+ t->data.ptr += SLEN("/*");
+ t->data.len -= SLEN("/*");
/* Strip the trailing '*' and '/' */
- t->data.len -= sizeof("*/") - 1;
+ t->data.len -= SLEN("*/");
break;
case CSS_TOKEN_FUNCTION:
/* Strip the trailing '(' */
@@ -1239,11 +1248,13 @@ css_error String(css_lexer *lexer, const css_token **token)
*/
error = consumeString(lexer);
- if (error != CSS_OK && error != CSS_EOF)
+ if (error != CSS_OK && error != CSS_EOF && error != CSS_INVALID)
return error;
+ /* EOF will be reprocessed in Start() */
return emitToken(lexer,
- error == CSS_EOF ? CSS_TOKEN_EOF : CSS_TOKEN_STRING,
+ error == CSS_INVALID ? CSS_TOKEN_INVALID_STRING
+ : CSS_TOKEN_STRING,
token);
}
@@ -1450,8 +1461,14 @@ css_error URI(css_lexer *lexer, const css_token **token)
lexer->substate = String;
error = consumeString(lexer);
- if (error != CSS_OK && error != CSS_EOF)
+ if (error == CSS_INVALID) {
+ /* Rewind to "url(" */
+ lexer->bytesReadForToken = lexer->context.bytesForURL;
+ lexer->token.data.len = lexer->context.dataLenForURL;
+ return emitToken(lexer, CSS_TOKEN_FUNCTION, token);
+ } else if (error != CSS_OK && error != CSS_EOF) {
return error;
+ }
/* EOF gets handled in RParen */
@@ -1794,12 +1811,6 @@ css_error consumeString(css_lexer *lexer)
* The open quote has been consumed.
*/
- /** \todo Handle unexpected end of string correctly - CSS 2.1 $4.2
- * Need to flag the string as being in error (within token, so the
- * parser can discard the construct in which the string was found).
- * This does not apply in the EOF case. In that case, we must act
- * as described in "Unexpected end of style sheet" and simply close
- * the string */
do {
cptr = parserutils_inputstream_peek(lexer->input,
lexer->bytesReadForToken, &clen);
@@ -1818,8 +1829,8 @@ css_error consumeString(css_lexer *lexer)
if (error != CSS_OK)
return error;
} else if (c != quote) {
- /* Invalid character in string -- skip */
- lexer->bytesReadForToken += clen;
+ /* Invalid character in string */
+ return CSS_INVALID;
}
} while(c != quote);
diff --git a/src/lex/lex.h b/src/lex/lex.h
index bfd85b9..b40aff3 100644
--- a/src/lex/lex.h
+++ b/src/lex/lex.h
@@ -33,13 +33,13 @@ typedef union css_lexer_optparams {
* Token type
*/
typedef enum css_token_type {
- CSS_TOKEN_IDENT, CSS_TOKEN_ATKEYWORD, CSS_TOKEN_STRING,
- CSS_TOKEN_HASH, CSS_TOKEN_NUMBER, CSS_TOKEN_PERCENTAGE,
- CSS_TOKEN_DIMENSION, CSS_TOKEN_URI, CSS_TOKEN_UNICODE_RANGE,
- CSS_TOKEN_CDO, CSS_TOKEN_CDC, CSS_TOKEN_S, CSS_TOKEN_COMMENT,
- CSS_TOKEN_FUNCTION, CSS_TOKEN_INCLUDES, CSS_TOKEN_DASHMATCH,
- CSS_TOKEN_PREFIXMATCH, CSS_TOKEN_SUFFIXMATCH, CSS_TOKEN_SUBSTRINGMATCH,
- CSS_TOKEN_CHAR, CSS_TOKEN_EOF
+ CSS_TOKEN_IDENT, CSS_TOKEN_ATKEYWORD, CSS_TOKEN_STRING,
+ CSS_TOKEN_INVALID_STRING, CSS_TOKEN_HASH, CSS_TOKEN_NUMBER,
+ CSS_TOKEN_PERCENTAGE, CSS_TOKEN_DIMENSION, CSS_TOKEN_URI,
+ CSS_TOKEN_UNICODE_RANGE, CSS_TOKEN_CDO, CSS_TOKEN_CDC, CSS_TOKEN_S,
+ CSS_TOKEN_COMMENT, CSS_TOKEN_FUNCTION, CSS_TOKEN_INCLUDES,
+ CSS_TOKEN_DASHMATCH, CSS_TOKEN_PREFIXMATCH, CSS_TOKEN_SUFFIXMATCH,
+ CSS_TOKEN_SUBSTRINGMATCH, CSS_TOKEN_CHAR, CSS_TOKEN_EOF
} css_token_type;
/**