summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-07-30 00:51:26 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2008-07-30 00:51:26 +0000
commit307ab6675f9a7129d645bb5c25b919d25e3ca8bd (patch)
treeae51e48d616bb648bb1c5ded7303dfc7cfa291bf
parent3eabd20dcf0f030cebac90b0081b8c8f0fb4f1be (diff)
downloadnetsurf-307ab6675f9a7129d645bb5c25b919d25e3ca8bd.tar.gz
netsurf-307ab6675f9a7129d645bb5c25b919d25e3ca8bd.tar.bz2
Rewrite parsing of <meta http-equiv="Refresh" ...> content values. The previous code was full of nasty edge cases. As an added bonus, there's some BNF documenting what we expect to support here.
svn path=/trunk/netsurf/; revision=4814
-rw-r--r--render/html.c133
1 files changed, 92 insertions, 41 deletions
diff --git a/render/html.c b/render/html.c
index 64d3e1386..8685b6967 100644
--- a/render/html.c
+++ b/render/html.c
@@ -603,7 +603,7 @@ bool html_meta_refresh(struct content *c, xmlNode *head)
xmlNode *n;
xmlChar *equiv, *content;
union content_msg_data msg_data;
- char *url, *end, *refresh;
+ char *url, *end, *refresh = NULL, quote = 0;
url_func_result res;
for (n = head == 0 ? 0 : head->children; n; n = n->next) {
@@ -642,6 +642,16 @@ bool html_meta_refresh(struct content *c, xmlNode *head)
end = (char *) content + strlen((const char *) content);
+ /* content := *LWS 1*DIGIT *LWS [';' *LWS *1url *LWS]
+ * url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq)
+ * url-nq := *urlchar
+ * url-sq := "'" (urlchar | '"') "'"
+ * url-dq := '"' (urlchar | "'") '"'
+ * urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii
+ * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
+ */
+
+ /* *LWS 1*DIGIT */
msg_data.delay = (int)strtol((char *) content, &url, 10);
/* a very small delay and self-referencing URL can cause a loop
* that grinds machines to a halt. To prevent this we set a
@@ -649,6 +659,20 @@ bool html_meta_refresh(struct content *c, xmlNode *head)
if (msg_data.delay < 1)
msg_data.delay = 1;
+ /* *LWS */
+ while (url < end && isspace(*url)) {
+ url++;
+ }
+
+ /* ';' */
+ if (url < end && *url == ';')
+ url++;
+
+ /* *LWS */
+ while (url < end && isspace(*url)) {
+ url++;
+ }
+
if (url == end) {
/* Just delay specified, so refresh current page */
xmlFree(content);
@@ -665,62 +689,89 @@ bool html_meta_refresh(struct content *c, xmlNode *head)
break;
}
- for ( ; url <= end - 4; url++) {
- if (!strncasecmp(url, "url=", 4)) {
- url += 4;
- break;
+ /* "url" */
+ if (url <= end - 3) {
+ if (strncasecmp(url, "url", 3) == 0) {
+ url += 3;
+ } else {
+ /* Unexpected input, ignore this header */
+ continue;
}
+ } else {
+ /* Insufficient input, ignore this header */
+ continue;
}
- /* various sites contain junk meta refresh URL components,
- * so attempt to deal with this by stripping likely garbage
- * from the beginning and end of URLs */
- while (url < end) {
- if (isspace(*url) || *url == '\'' || *url == '"')
+ /* *LWS */
+ while (url < end && isspace(*url)) {
+ url++;
+ }
+
+ /* '=' */
+ if (url < end) {
+ if (*url == '=') {
url++;
- else
- break;
+ } else {
+ /* Unexpected input, ignore this header */
+ continue;
+ }
+ } else {
+ /* Insufficient input, ignore this header */
+ continue;
}
- while (end > url) {
- if (isspace(end[-1]) || end[-1] == '\'' ||
- end[-1] == '"')
- *--end = '\0';
- else
- break;
+ /* *LWS */
+ while (url < end && isspace(*url)) {
+ url++;
}
- if (url < end) {
- res = url_join(url, c->data.html.base_url, &refresh);
+ /* '"' or "'" */
+ if (url < end && (*url == '"' || *url == '\'')) {
+ quote = *url;
+ url++;
+ }
- xmlFree(content);
+ /* Start of URL */
+ refresh = url;
- if (res == URL_FUNC_NOMEM) {
- msg_data.error = messages_get("NoMemory");
- content_broadcast(c,
- CONTENT_MSG_ERROR, msg_data);
- return false;
- } else if (res == URL_FUNC_FAILED) {
- /* This isn't fatal so carry on looking */
- continue;
- }
+ if (quote != 0) {
+ /* url-sq | url-dq */
+ while (url < end && *url != quote)
+ url++;
+ } else {
+ /* url-nq */
+ while (url < end && !isspace(*url))
+ url++;
+ }
- c->refresh = talloc_strdup(c, refresh);
+ /* '"' or "'" or *LWS (we don't care) */
+ if (url < end)
+ *url = '\0';
- free(refresh);
+ res = url_join(refresh, c->data.html.base_url, &refresh);
- if (!c->refresh) {
- msg_data.error = messages_get("NoMemory");
- content_broadcast(c,
- CONTENT_MSG_ERROR, msg_data);
- return false;
- }
+ xmlFree(content);
- content_broadcast(c, CONTENT_MSG_REFRESH, msg_data);
- break;
+ if (res == URL_FUNC_NOMEM) {
+ msg_data.error = messages_get("NoMemory");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+ return false;
+ } else if (res == URL_FUNC_FAILED) {
+ /* This isn't fatal so carry on looking */
+ continue;
}
- xmlFree(content);
+ c->refresh = talloc_strdup(c, refresh);
+
+ free(refresh);
+
+ if (!c->refresh) {
+ msg_data.error = messages_get("NoMemory");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+ return false;
+ }
+
+ content_broadcast(c, CONTENT_MSG_REFRESH, msg_data);
}
return true;