From 73bfb6201e3aecf1b530d55d18bdf6610facf6f1 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sat, 24 Mar 2012 22:08:13 +0000 Subject: Fix up meta refresh parser svn path=/trunk/netsurf/; revision=13631 --- render/html.c | 377 +++++++++++++++++++++++++++++++++++----------------------- 1 file changed, 226 insertions(+), 151 deletions(-) (limited to 'render/html.c') diff --git a/render/html.c b/render/html.c index 28a32aafd..ffe41ff2d 100644 --- a/render/html.c +++ b/render/html.c @@ -109,6 +109,8 @@ static dom_string *html_dom_string__parent; static dom_string *html_dom_string__self; static dom_string *html_dom_string__blank; static dom_string *html_dom_string__top; +static dom_string *html_dom_string_http_equiv; +static dom_string *html_dom_string_content; static nserror html_create_html_data(html_content *c, const http_parameter *params) @@ -558,192 +560,178 @@ static bool html_head(html_content *c, dom_node *head) return true; } -/** - * Search for meta refresh - * - * http://wp.netscape.com/assist/net_sites/pushpull.html - * - * \param c content structure - * \param head xml node of head element - * \return true on success, false otherwise (error reported) - */ - -static bool html_meta_refresh(html_content *c, dom_node *head) +static bool html_meta_refresh_process_element(html_content *c, dom_node *n) { -#ifdef FIXME - dom_node *n; - xmlChar *equiv, *content; union content_msg_data msg_data; - char *url, *end, *refresh = NULL, quote = 0; + const char *url, *end, *refresh = NULL; + char *new_url; + char quote = '\0'; + dom_string *equiv, *content; + dom_exception exc; nsurl *nsurl; nserror error; - for (n = head == NULL ? NULL : head->children; n; n = n->next) { - if (n->type != XML_ELEMENT_NODE) - continue; + exc = dom_element_get_attribute(n, html_dom_string_http_equiv, &equiv); + if (exc != DOM_NO_ERR) + return false; - /* Recurse into noscript elements */ - if (strcmp((const char *) n->name, "noscript") == 0) { - if (html_meta_refresh(c, n) == false) { - /* Some error occurred */ - return false; - } else if (c->base.refresh) { - /* Meta refresh found - stop */ - return true; - } - } + if (equiv == NULL) + return true; - if (strcmp((const char *) n->name, "meta") != 0) { - continue; - } + if (strcasecmp(dom_string_data(equiv), "refresh") != 0) { + dom_string_unref(equiv); + return true; + } - equiv = xmlGetProp(n, (const xmlChar *) "http-equiv"); - if (equiv == NULL) - continue; + dom_string_unref(equiv); - if (strcasecmp((const char *) equiv, "refresh") != 0) { - xmlFree(equiv); - continue; - } + exc = dom_element_get_attribute(n, html_dom_string_content, &content); + if (exc != DOM_NO_ERR) + return false; - xmlFree(equiv); + if (content == NULL) + return true; - content = xmlGetProp(n, (const xmlChar *) "content"); - if (content == NULL) - continue; + end = dom_string_data(content) + dom_string_byte_length(content); - end = (char *) content + strlen((const char *) content); - - /* content := *LWS intpart fracpart? *LWS [';' *LWS *1url *LWS] - * intpart := 1*DIGIT - * fracpart := 1*('.' | DIGIT) - * url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq) - * url-nq := *urlchar - * url-sq := "'" *(urlchar | '"') "'" - * url-dq := '"' *(urlchar | "'") '"' - * urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii - * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF] - */ + /* content := *LWS intpart fracpart? *LWS [';' *LWS *1url *LWS] + * intpart := 1*DIGIT + * fracpart := 1*('.' | DIGIT) + * url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq) + * url-nq := *urlchar + * url-sq := "'" *(urlchar | '"') "'" + * url-dq := '"' *(urlchar | "'") '"' + * urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii + * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF] + */ - url = (char *) content; + url = dom_string_data(content); - /* *LWS */ - while (url < end && isspace(*url)) { - url++; - } + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } - /* intpart */ - if (url == end || (*url < '0' || '9' < *url)) { - /* Empty content, or invalid timeval */ - xmlFree(content); - continue; - } + /* intpart */ + if (url == end || (*url < '0' || '9' < *url)) { + /* Empty content, or invalid timeval */ + dom_string_unref(content); + return true; + } - msg_data.delay = (int) strtol(url, &url, 10); - /* a very small delay and self-referencing URL can cause a loop - * that grinds machines to a halt. To prevent this we set a - * minimum refresh delay of 1s. */ - if (msg_data.delay < 1) - msg_data.delay = 1; + msg_data.delay = (int) strtol(url, &new_url, 10); + /* a very small delay and self-referencing URL can cause a loop + * that grinds machines to a halt. To prevent this we set a + * minimum refresh delay of 1s. */ + if (msg_data.delay < 1) + msg_data.delay = 1; - /* fracpart? (ignored, as delay is integer only) */ - while (url < end && (('0' <= *url && *url <= '9') || - *url == '.')) { - url++; - } + url = new_url; - /* *LWS */ - while (url < end && isspace(*url)) { - url++; - } + /* fracpart? (ignored, as delay is integer only) */ + while (url < end && (('0' <= *url && *url <= '9') || + *url == '.')) { + url++; + } - /* ';' */ - if (url < end && *url == ';') - url++; + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } - /* *LWS */ - while (url < end && isspace(*url)) { - url++; - } + /* ';' */ + if (url < end && *url == ';') + url++; - if (url == end) { - /* Just delay specified, so refresh current page */ - xmlFree(content); + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } - c->base.refresh = nsurl_ref( - content_get_url(&c->base)); + if (url == end) { + /* Just delay specified, so refresh current page */ + dom_string_unref(content); - content_broadcast(&c->base, CONTENT_MSG_REFRESH, - msg_data); - break; - } + c->base.refresh = nsurl_ref( + content_get_url(&c->base)); - /* "url" */ - if (url <= end - 3) { - if (strncasecmp(url, "url", 3) == 0) { - url += 3; - } else { - /* Unexpected input, ignore this header */ - xmlFree(content); - continue; - } + content_broadcast(&c->base, CONTENT_MSG_REFRESH, + msg_data); + return true; + } + + /* "url" */ + if (url <= end - 3) { + if (strncasecmp(url, "url", 3) == 0) { + url += 3; } else { - /* Insufficient input, ignore this header */ - xmlFree(content); - continue; + /* Unexpected input, ignore this header */ + dom_string_unref(content); + return true; } + } else { + /* Insufficient input, ignore this header */ + dom_string_unref(content); + return true; + } - /* *LWS */ - while (url < end && isspace(*url)) { - url++; - } + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } - /* '=' */ - if (url < end) { - if (*url == '=') { - url++; - } else { - /* Unexpected input, ignore this header */ - xmlFree(content); - continue; - } + /* '=' */ + if (url < end) { + if (*url == '=') { + url++; } else { - /* Insufficient input, ignore this header */ - xmlFree(content); - continue; + /* Unexpected input, ignore this header */ + dom_string_unref(content); + return true; } + } else { + /* Insufficient input, ignore this header */ + dom_string_unref(content); + return true; + } - /* *LWS */ - while (url < end && isspace(*url)) { - url++; - } + /* *LWS */ + while (url < end && isspace(*url)) { + url++; + } - /* '"' or "'" */ - if (url < end && (*url == '"' || *url == '\'')) { - quote = *url; - url++; - } + /* '"' or "'" */ + if (url < end && (*url == '"' || *url == '\'')) { + quote = *url; + url++; + } - /* Start of URL */ - refresh = url; + /* Start of URL */ + refresh = url; - if (quote != 0) { - /* url-sq | url-dq */ - while (url < end && *url != quote) - url++; - } else { - /* url-nq */ - while (url < end && !isspace(*url)) - url++; - } + if (quote != 0) { + /* url-sq | url-dq */ + while (url < end && *url != quote) + url++; + } else { + /* url-nq */ + while (url < end && !isspace(*url)) + url++; + } - /* '"' or "'" or *LWS (we don't care) */ - if (url < end) - *url = '\0'; + /* '"' or "'" or *LWS (we don't care) */ + if (url < end) { + new_url = strndup(refresh, url - refresh); + if (new_url == NULL) { + dom_string_unref(content); + return false; + } - error = nsurl_join(c->base_url, refresh, &nsurl); + error = nsurl_join(c->base_url, new_url, &nsurl); if (error != NSERROR_OK) { - xmlFree(content); + free(new_url); + + dom_string_unref(content); msg_data.error = messages_get("NoMemory"); content_broadcast(&c->base, CONTENT_MSG_ERROR, @@ -752,13 +740,93 @@ static bool html_meta_refresh(html_content *c, dom_node *head) return false; } - xmlFree(content); + free(new_url); c->base.refresh = nsurl; content_broadcast(&c->base, CONTENT_MSG_REFRESH, msg_data); } -#endif + + dom_string_unref(content); + + return true; +} + +/** + * Search for meta refresh + * + * http://wp.netscape.com/assist/net_sites/pushpull.html + * + * \param c content structure + * \param head xml node of head element + * \return true on success, false otherwise (error reported) + */ + +static bool html_meta_refresh(html_content *c, dom_node *head) +{ + dom_node *n, *next; + dom_exception exc; + + if (head == NULL) + return true; + + exc = dom_node_get_first_child(head, &n); + if (exc != DOM_NO_ERR) + return false; + + while (n != NULL) { + dom_node_type type; + + exc = dom_node_get_node_type(n, &type); + if (exc != DOM_NO_ERR) { + dom_node_unref(n); + return false; + } + + if (type == XML_ELEMENT_NODE) { + dom_string *name; + + exc = dom_node_get_node_name(n, &name); + if (exc != DOM_NO_ERR) { + dom_node_unref(n); + return false; + } + + /* Recurse into noscript elements */ + if (strcmp(dom_string_data(name), "noscript") == 0) { + if (html_meta_refresh(c, n) == false) { + /* Some error occurred */ + dom_node_unref(n); + return false; + } else if (c->base.refresh) { + /* Meta refresh found - stop */ + dom_node_unref(n); + return true; + } + } else if (strcmp(dom_string_data(name), "meta") == 0) { + if (html_meta_refresh_process_element(c, + n) == false) { + /* Some error occurred */ + dom_node_unref(n); + return false; + } else if (c->base.refresh != NULL) { + /* Meta refresh found - stop */ + dom_node_unref(n); + return true; + } + } + } + + exc = dom_node_get_next_sibling(n, &next); + if (exc != DOM_NO_ERR) { + dom_node_unref(n); + return false; + } + + dom_node_unref(n); + n = next; + } + return true; } @@ -3066,9 +3134,16 @@ nserror html_init(void) HTML_DOM_STRING_INTERN(_self); HTML_DOM_STRING_INTERN(_parent); HTML_DOM_STRING_INTERN(_top); + HTML_DOM_STRING_INTERN(content); #undef HTML_DOM_STRING_INTERN + exc = dom_string_create_interned((const uint8_t *) "http-equiv", + SLEN("http-equiv"), + &html_dom_string_http_equiv); + if ((exc != DOM_NO_ERR) || (html_dom_string_http_equiv == NULL)) + goto error; + for (i = 0; i < NOF_ELEMENTS(html_types); i++) { error = content_factory_register_handler(html_types[i], &html_content_handler); -- cgit v1.2.3