Rewrite parsing of <meta http-equiv="Refresh" ...> content values. The previous code was full of nasty edge cases. As an added bonus, there's some BNF documenting what we expect to support here.

svn path=/trunk/netsurf/; revision=4814
author: John Mark Bell <jmb@netsurf-browser.org> 2008-07-30 00:51:26 +0000
committer: John Mark Bell <jmb@netsurf-browser.org> 2008-07-30 00:51:26 +0000
commit: 307ab6675f9a7129d645bb5c25b919d25e3ca8bd (patch)
tree: ae51e48d616bb648bb1c5ded7303dfc7cfa291bf
parent: 3eabd20dcf0f030cebac90b0081b8c8f0fb4f1be (diff)
download: netsurf-307ab6675f9a7129d645bb5c25b919d25e3ca8bd.tar.gz
netsurf-307ab6675f9a7129d645bb5c25b919d25e3ca8bd.tar.bz2
1 files changed, 92 insertions, 41 deletions
diff --git a/render/html.c b/render/html.c
index 64d3e1386..8685b6967 100644
--- a/render/html.c
+++ b/render/html.c
@@ -603,7 +603,7 @@ bool html_meta_refresh(struct content *c, xmlNode *head)
 	xmlNode *n;
 	xmlChar *equiv, *content;
 	union content_msg_data msg_data;
-	char *url, *end, *refresh;
+	char *url, *end, *refresh = NULL, quote = 0;
 	url_func_result res;
 
 	for (n = head == 0 ? 0 : head->children; n; n = n->next) {
@@ -642,6 +642,16 @@ bool html_meta_refresh(struct content *c, xmlNode *head)
 
 		end = (char *) content + strlen((const char *) content);
 
+		/* content  := *LWS 1*DIGIT *LWS [';' *LWS *1url *LWS]
+		 * url      := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq)
+		 * url-nq   := *urlchar
+		 * url-sq   := "'" (urlchar | '"') "'"
+		 * url-dq   := '"' (urlchar | "'") '"'
+		 * urlchar  := [#x9#x21#x23-#x26#x28-#x7E] | nonascii
+		 * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
+		 */
+
+		/* *LWS 1*DIGIT */
 		msg_data.delay = (int)strtol((char *) content, &url, 10);
 		/* a very small delay and self-referencing URL can cause a loop
 		 * that grinds machines to a halt. To prevent this we set a
@@ -649,6 +659,20 @@ bool html_meta_refresh(struct content *c, xmlNode *head)
 		if (msg_data.delay < 1)
 			msg_data.delay = 1;
 
+		/* *LWS */
+		while (url < end && isspace(*url)) {
+			url++;
+		}
+
+		/* ';' */
+		if (url < end && *url == ';')
+			url++;
+
+		/* *LWS */
+		while (url < end && isspace(*url)) {
+			url++;
+		}
+
 		if (url == end) {
 			/* Just delay specified, so refresh current page */
 			xmlFree(content);
@@ -665,62 +689,89 @@ bool html_meta_refresh(struct content *c, xmlNode *head)
 			break;
 		}
 
-		for ( ; url <= end - 4; url++) {
-			if (!strncasecmp(url, "url=", 4)) {
-				url += 4;
-				break;
+		/* "url" */
+		if (url <= end - 3) {
+			if (strncasecmp(url, "url", 3) == 0) {
+				url += 3;
+			} else {
+				/* Unexpected input, ignore this header */
+				continue;
 			}
+		} else {
+			/* Insufficient input, ignore this header */
+			continue;
 		}
 
-		/* various sites contain junk meta refresh URL components,
-		 * so attempt to deal with this by stripping likely garbage
-		 * from the beginning and end of URLs */
-		while (url < end) {
-			if (isspace(*url) || *url == '\'' || *url == '"')
+		/* *LWS */
+		while (url < end && isspace(*url)) {
+			url++;
+		}
+
+		/* '=' */
+		if (url < end) {
+			if (*url == '=') {
 				url++;
-			else
-				break;
+			} else {
+				/* Unexpected input, ignore this header */
+				continue;
+			}
+		} else {
+			/* Insufficient input, ignore this header */
+			continue;
 		}
 
-		while (end > url) {
-			if (isspace(end[-1]) || end[-1] == '\'' ||
-					end[-1] == '"')
-				*--end = '\0';
-			else
-				break;
+		/* *LWS */
+		while (url < end && isspace(*url)) {
+			url++;
 		}
 
-		if (url < end) {
-			res = url_join(url, c->data.html.base_url, &refresh);
+		/* '"' or "'" */
+		if (url < end && (*url == '"' || *url == '\'')) {
+			quote = *url;
+			url++;
+		}
 
-			xmlFree(content);
+		/* Start of URL */
+		refresh = url;
 
-			if (res == URL_FUNC_NOMEM) {
-				msg_data.error = messages_get("NoMemory");
-				content_broadcast(c,
-					CONTENT_MSG_ERROR, msg_data);
-				return false;
-			} else if (res == URL_FUNC_FAILED) {
-				/* This isn't fatal so carry on looking */
-				continue;
-			}
+		if (quote != 0) {
+			/* url-sq | url-dq */
+			while (url < end && *url != quote)
+				url++;
+		} else {
+			/* url-nq */
+			while (url < end && !isspace(*url))
+				url++;
+		}
 
-			c->refresh = talloc_strdup(c, refresh);
+		/* '"' or "'" or *LWS (we don't care) */
+		if (url < end)
+			*url = '\0';
 
-			free(refresh);
+		res = url_join(refresh, c->data.html.base_url, &refresh);
 
-			if (!c->refresh) {
-				msg_data.error = messages_get("NoMemory");
-				content_broadcast(c,
-					CONTENT_MSG_ERROR, msg_data);
-				return false;
-			}
+		xmlFree(content);
 
-			content_broadcast(c, CONTENT_MSG_REFRESH, msg_data);
-			break;
+		if (res == URL_FUNC_NOMEM) {
+			msg_data.error = messages_get("NoMemory");
+			content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+			return false;
+		} else if (res == URL_FUNC_FAILED) {
+			/* This isn't fatal so carry on looking */
+			continue;
 		}
 
-		xmlFree(content);
+		c->refresh = talloc_strdup(c, refresh);
+
+		free(refresh);
+
+		if (!c->refresh) {
+			msg_data.error = messages_get("NoMemory");
+			content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+			return false;
+		}
+
+		content_broadcast(c, CONTENT_MSG_REFRESH, msg_data);
 	}
 
 	return true;
author	John Mark Bell <jmb@netsurf-browser.org>	2008-07-30 00:51:26 +0000
committer	John Mark Bell <jmb@netsurf-browser.org>	2008-07-30 00:51:26 +0000
commit	307ab6675f9a7129d645bb5c25b919d25e3ca8bd (patch)
tree	ae51e48d616bb648bb1c5ded7303dfc7cfa291bf
parent	3eabd20dcf0f030cebac90b0081b8c8f0fb4f1be (diff)
download	netsurf-307ab6675f9a7129d645bb5c25b919d25e3ca8bd.tar.gz netsurf-307ab6675f9a7129d645bb5c25b919d25e3ca8bd.tar.bz2