From e7fe3678f0da2548455134956631d0fb4d2a8dcd Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sun, 1 Apr 2007 16:41:22 +0000 Subject: Deal with more ways in which meta refresh URLs can be invalid. svn path=/trunk/netsurf/; revision=3229 --- render/html.c | 37 +++++++++++++++++++++++-------------- 1 file changed, 23 insertions(+), 14 deletions(-) diff --git a/render/html.c b/render/html.c index b22ea0d8f..2a77b0446 100644 --- a/render/html.c +++ b/render/html.c @@ -61,7 +61,7 @@ static void html_dump_frameset(struct content_html_frames *frame, static const char empty_document[] = "" + " \"http://www.w3.org/TR/html4/strict.dtd\">" "" "" "Empty document" @@ -575,22 +575,32 @@ bool html_meta_refresh(struct content *c, xmlNode *head) } for ( ; url <= end - 4; url++) { - if (!strncasecmp(url, "url=", 4)) + if (!strncasecmp(url, "url=", 4)) { + url += 4; break; + } } - /* mail.google.com sends out the broken format ", url=''", so - * special case this */ - if (url <= end - 4) { - if ((url[4] == '\'') && (end[-1] == '\'')) { - *--end = '\0'; - url++; - } + /* various sites contain junk meta refresh URL components, + * so attempt to deal with this by stripping likely garbage + * from the beginning and end of URLs */ + while (url < end) { + if (isspace(*url) || *url == '\'' || *url == '"') + url++; + else + break; } - if (url <= end - 4) { - res = url_join(url + 4, c->data.html.base_url, - &refresh); + while (end > url) { + if (isspace(end[-1]) || end[-1] == '\'' || + end[-1] == '"') + *--end = '\0'; + else + break; + } + + if (url < end) { + res = url_join(url, c->data.html.base_url, &refresh); xmlFree(content); @@ -599,8 +609,7 @@ bool html_meta_refresh(struct content *c, xmlNode *head) content_broadcast(c, CONTENT_MSG_ERROR, msg_data); return false; - } - else if (res == URL_FUNC_FAILED) { + } else if (res == URL_FUNC_FAILED) { /* This isn't fatal so carry on looking */ continue; } -- cgit v1.2.3