Correctly process unterminated strings.

svn path=/trunk/libcss/; revision=4455
author: John Mark Bell <jmb@netsurf-browser.org> 2008-06-26 12:05:14 +0000
committer: John Mark Bell <jmb@netsurf-browser.org> 2008-06-26 12:05:14 +0000
commit: 6d1f8a2e90f1b125fcd8839a0f9f5811c97fb8c9 (patch)
tree: 74d77677d8115ff6071b0d5621702dd8ab0fc3d1 /src/lex
parent: 76b814183691e731a6d6e564fcc0e98ddac4adf3 (diff)
download: libcss-6d1f8a2e90f1b125fcd8839a0f9f5811c97fb8c9.tar.gz
libcss-6d1f8a2e90f1b125fcd8839a0f9f5811c97fb8c9.tar.bz2
2 files changed, 37 insertions, 26 deletions
diff --git a/src/lex/lex.c b/src/lex/lex.c
index f184a7c..d487a27 100644
--- a/src/lex/lex.c
+++ b/src/lex/lex.c
@@ -34,6 +34,7 @@
 
 #include "lex/lex.h"
 #include "utils/parserutilserror.h"
+#include "utils/utils.h"
 
 /** \todo Optimisation -- we're currently revisiting a bunch of input 
  *	  characters (Currently, we're calling parserutils_inputstream_peek 
@@ -379,7 +380,15 @@ css_error emitToken(css_lexer *lexer, css_token_type type,
 		t->data.ptr += 1;
 		t->data.len -= 1;
 
-		/* Strip the trailing quote */
+		/* Strip the trailing quote, iff it exists (may have hit EOF) */
+		if (t->data.ptr[t->data.len - 1] == '"' ||
+				t->data.ptr[t->data.len - 1] == '\'') {
+			t->data.len -= 1;
+		}
+		break;
+	case CSS_TOKEN_INVALID_STRING:
+		/* Strip the leading quote */
+		t->data.ptr += 1;
 		t->data.len -= 1;
 		break;
 	case CSS_TOKEN_HASH:
@@ -396,8 +405,8 @@ css_error emitToken(css_lexer *lexer, css_token_type type,
 		break;
 	case CSS_TOKEN_URI:
 		/* Strip the "url(" from the start */
-		t->data.ptr += sizeof("url(") - 1;
-		t->data.len -= sizeof("url(") - 1;
+		t->data.ptr += SLEN("url(");
+		t->data.len -= SLEN("url(");
 
 		/* Strip any leading whitespace */
 		while (isSpace(t->data.ptr[0])) {
@@ -427,16 +436,16 @@ css_error emitToken(css_lexer *lexer, css_token_type type,
 		break;
 	case CSS_TOKEN_UNICODE_RANGE:
 		/* Remove "U+" from the start */
-		t->data.ptr += sizeof("U+") - 1;
-		t->data.len -= sizeof("U+") - 1;
+		t->data.ptr += SLEN("U+");
+		t->data.len -= SLEN("U+");
 		break;
 	case CSS_TOKEN_COMMENT:
 		/* Strip the leading '/' and '*' */
-		t->data.ptr += sizeof("/*") - 1;
-		t->data.len -= sizeof("/*") - 1;
+		t->data.ptr += SLEN("/*");
+		t->data.len -= SLEN("/*");
 
 		/* Strip the trailing '*' and '/' */
-		t->data.len -= sizeof("*/") - 1;
+		t->data.len -= SLEN("*/");
 		break;
 	case CSS_TOKEN_FUNCTION:
 		/* Strip the trailing '(' */
@@ -1239,11 +1248,13 @@ css_error String(css_lexer *lexer, const css_token **token)
 	 */
 
 	error = consumeString(lexer);
-	if (error != CSS_OK && error != CSS_EOF)
+	if (error != CSS_OK && error != CSS_EOF && error != CSS_INVALID)
 		return error;
 
+	/* EOF will be reprocessed in Start() */
 	return emitToken(lexer, 
-			error == CSS_EOF ? CSS_TOKEN_EOF : CSS_TOKEN_STRING, 
+			error == CSS_INVALID ? CSS_TOKEN_INVALID_STRING 
+					     : CSS_TOKEN_STRING, 
 			token);
 }
 
@@ -1450,8 +1461,14 @@ css_error URI(css_lexer *lexer, const css_token **token)
 		lexer->substate = String;
 
 		error = consumeString(lexer);
-		if (error != CSS_OK && error != CSS_EOF)
+		if (error == CSS_INVALID) {
+			/* Rewind to "url(" */
+			lexer->bytesReadForToken = lexer->context.bytesForURL;
+			lexer->token.data.len = lexer->context.dataLenForURL;
+			return emitToken(lexer, CSS_TOKEN_FUNCTION, token);
+		} else if (error != CSS_OK && error != CSS_EOF) {
 			return error;
+		}
 
 		/* EOF gets handled in RParen */
 
@@ -1794,12 +1811,6 @@ css_error consumeString(css_lexer *lexer)
 	 * The open quote has been consumed.
 	 */
 
-	/** \todo Handle unexpected end of string correctly - CSS 2.1 $4.2 
-	 * Need to flag the string as being in error (within token, so the
-	 * parser can discard the construct in which the string was found).
-	 * This does not apply in the EOF case. In that case, we must act
-	 * as described in "Unexpected end of style sheet" and simply close
-	 * the string */
 	do {
 		cptr = parserutils_inputstream_peek(lexer->input, 
 				lexer->bytesReadForToken, &clen);
@@ -1818,8 +1829,8 @@ css_error consumeString(css_lexer *lexer)
 			if (error != CSS_OK)
 				return error;
 		} else if (c != quote) {
-			/* Invalid character in string -- skip */
-			lexer->bytesReadForToken += clen;
+			/* Invalid character in string */
+			return CSS_INVALID;
 		}
 	} while(c != quote);
 
diff --git a/src/lex/lex.h b/src/lex/lex.h
index bfd85b9..b40aff3 100644
--- a/src/lex/lex.h
+++ b/src/lex/lex.h
@@ -33,13 +33,13 @@ typedef union css_lexer_optparams {
  * Token type
  */
 typedef enum css_token_type { 
-	CSS_TOKEN_IDENT, CSS_TOKEN_ATKEYWORD, CSS_TOKEN_STRING,
-	CSS_TOKEN_HASH, CSS_TOKEN_NUMBER, CSS_TOKEN_PERCENTAGE, 
-	CSS_TOKEN_DIMENSION, CSS_TOKEN_URI, CSS_TOKEN_UNICODE_RANGE, 
-	CSS_TOKEN_CDO, CSS_TOKEN_CDC, CSS_TOKEN_S, CSS_TOKEN_COMMENT,
-	CSS_TOKEN_FUNCTION, CSS_TOKEN_INCLUDES, CSS_TOKEN_DASHMATCH,
-	CSS_TOKEN_PREFIXMATCH, CSS_TOKEN_SUFFIXMATCH, CSS_TOKEN_SUBSTRINGMATCH,
-	CSS_TOKEN_CHAR, CSS_TOKEN_EOF 
+	CSS_TOKEN_IDENT, CSS_TOKEN_ATKEYWORD, CSS_TOKEN_STRING, 
+	CSS_TOKEN_INVALID_STRING, CSS_TOKEN_HASH, CSS_TOKEN_NUMBER, 
+	CSS_TOKEN_PERCENTAGE, CSS_TOKEN_DIMENSION, CSS_TOKEN_URI, 
+	CSS_TOKEN_UNICODE_RANGE, CSS_TOKEN_CDO, CSS_TOKEN_CDC, CSS_TOKEN_S, 
+	CSS_TOKEN_COMMENT, CSS_TOKEN_FUNCTION, CSS_TOKEN_INCLUDES, 
+	CSS_TOKEN_DASHMATCH, CSS_TOKEN_PREFIXMATCH, CSS_TOKEN_SUFFIXMATCH, 
+	CSS_TOKEN_SUBSTRINGMATCH, CSS_TOKEN_CHAR, CSS_TOKEN_EOF 
 } css_token_type;
 
 /**
author	John Mark Bell <jmb@netsurf-browser.org>	2008-06-26 12:05:14 +0000
committer	John Mark Bell <jmb@netsurf-browser.org>	2008-06-26 12:05:14 +0000
commit	6d1f8a2e90f1b125fcd8839a0f9f5811c97fb8c9 (patch)
tree	74d77677d8115ff6071b0d5621702dd8ab0fc3d1 /src/lex
parent	76b814183691e731a6d6e564fcc0e98ddac4adf3 (diff)
download	libcss-6d1f8a2e90f1b125fcd8839a0f9f5811c97fb8c9.tar.gz libcss-6d1f8a2e90f1b125fcd8839a0f9f5811c97fb8c9.tar.bz2