summaryrefslogtreecommitdiff
path: root/render/html.c
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2012-03-24 22:08:13 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2012-03-24 22:08:13 +0000
commit73bfb6201e3aecf1b530d55d18bdf6610facf6f1 (patch)
treedd36337e0c0d01fa86b1ce15ddba80ad457fef34 /render/html.c
parent6a62b3a561fa50020883ed0a29d0ecfb517b1bb2 (diff)
downloadnetsurf-73bfb6201e3aecf1b530d55d18bdf6610facf6f1.tar.gz
netsurf-73bfb6201e3aecf1b530d55d18bdf6610facf6f1.tar.bz2
Fix up meta refresh parser
svn path=/trunk/netsurf/; revision=13631
Diffstat (limited to 'render/html.c')
-rw-r--r--render/html.c377
1 files changed, 226 insertions, 151 deletions
diff --git a/render/html.c b/render/html.c
index 28a32aafd..ffe41ff2d 100644
--- a/render/html.c
+++ b/render/html.c
@@ -109,6 +109,8 @@ static dom_string *html_dom_string__parent;
static dom_string *html_dom_string__self;
static dom_string *html_dom_string__blank;
static dom_string *html_dom_string__top;
+static dom_string *html_dom_string_http_equiv;
+static dom_string *html_dom_string_content;
static nserror
html_create_html_data(html_content *c, const http_parameter *params)
@@ -558,192 +560,178 @@ static bool html_head(html_content *c, dom_node *head)
return true;
}
-/**
- * Search for meta refresh
- *
- * http://wp.netscape.com/assist/net_sites/pushpull.html
- *
- * \param c content structure
- * \param head xml node of head element
- * \return true on success, false otherwise (error reported)
- */
-
-static bool html_meta_refresh(html_content *c, dom_node *head)
+static bool html_meta_refresh_process_element(html_content *c, dom_node *n)
{
-#ifdef FIXME
- dom_node *n;
- xmlChar *equiv, *content;
union content_msg_data msg_data;
- char *url, *end, *refresh = NULL, quote = 0;
+ const char *url, *end, *refresh = NULL;
+ char *new_url;
+ char quote = '\0';
+ dom_string *equiv, *content;
+ dom_exception exc;
nsurl *nsurl;
nserror error;
- for (n = head == NULL ? NULL : head->children; n; n = n->next) {
- if (n->type != XML_ELEMENT_NODE)
- continue;
+ exc = dom_element_get_attribute(n, html_dom_string_http_equiv, &equiv);
+ if (exc != DOM_NO_ERR)
+ return false;
- /* Recurse into noscript elements */
- if (strcmp((const char *) n->name, "noscript") == 0) {
- if (html_meta_refresh(c, n) == false) {
- /* Some error occurred */
- return false;
- } else if (c->base.refresh) {
- /* Meta refresh found - stop */
- return true;
- }
- }
+ if (equiv == NULL)
+ return true;
- if (strcmp((const char *) n->name, "meta") != 0) {
- continue;
- }
+ if (strcasecmp(dom_string_data(equiv), "refresh") != 0) {
+ dom_string_unref(equiv);
+ return true;
+ }
- equiv = xmlGetProp(n, (const xmlChar *) "http-equiv");
- if (equiv == NULL)
- continue;
+ dom_string_unref(equiv);
- if (strcasecmp((const char *) equiv, "refresh") != 0) {
- xmlFree(equiv);
- continue;
- }
+ exc = dom_element_get_attribute(n, html_dom_string_content, &content);
+ if (exc != DOM_NO_ERR)
+ return false;
- xmlFree(equiv);
+ if (content == NULL)
+ return true;
- content = xmlGetProp(n, (const xmlChar *) "content");
- if (content == NULL)
- continue;
+ end = dom_string_data(content) + dom_string_byte_length(content);
- end = (char *) content + strlen((const char *) content);
-
- /* content := *LWS intpart fracpart? *LWS [';' *LWS *1url *LWS]
- * intpart := 1*DIGIT
- * fracpart := 1*('.' | DIGIT)
- * url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq)
- * url-nq := *urlchar
- * url-sq := "'" *(urlchar | '"') "'"
- * url-dq := '"' *(urlchar | "'") '"'
- * urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii
- * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
- */
+ /* content := *LWS intpart fracpart? *LWS [';' *LWS *1url *LWS]
+ * intpart := 1*DIGIT
+ * fracpart := 1*('.' | DIGIT)
+ * url := "url" *LWS '=' *LWS (url-nq | url-sq | url-dq)
+ * url-nq := *urlchar
+ * url-sq := "'" *(urlchar | '"') "'"
+ * url-dq := '"' *(urlchar | "'") '"'
+ * urlchar := [#x9#x21#x23-#x26#x28-#x7E] | nonascii
+ * nonascii := [#x80-#xD7FF#xE000-#xFFFD#x10000-#x10FFFF]
+ */
- url = (char *) content;
+ url = dom_string_data(content);
- /* *LWS */
- while (url < end && isspace(*url)) {
- url++;
- }
+ /* *LWS */
+ while (url < end && isspace(*url)) {
+ url++;
+ }
- /* intpart */
- if (url == end || (*url < '0' || '9' < *url)) {
- /* Empty content, or invalid timeval */
- xmlFree(content);
- continue;
- }
+ /* intpart */
+ if (url == end || (*url < '0' || '9' < *url)) {
+ /* Empty content, or invalid timeval */
+ dom_string_unref(content);
+ return true;
+ }
- msg_data.delay = (int) strtol(url, &url, 10);
- /* a very small delay and self-referencing URL can cause a loop
- * that grinds machines to a halt. To prevent this we set a
- * minimum refresh delay of 1s. */
- if (msg_data.delay < 1)
- msg_data.delay = 1;
+ msg_data.delay = (int) strtol(url, &new_url, 10);
+ /* a very small delay and self-referencing URL can cause a loop
+ * that grinds machines to a halt. To prevent this we set a
+ * minimum refresh delay of 1s. */
+ if (msg_data.delay < 1)
+ msg_data.delay = 1;
- /* fracpart? (ignored, as delay is integer only) */
- while (url < end && (('0' <= *url && *url <= '9') ||
- *url == '.')) {
- url++;
- }
+ url = new_url;
- /* *LWS */
- while (url < end && isspace(*url)) {
- url++;
- }
+ /* fracpart? (ignored, as delay is integer only) */
+ while (url < end && (('0' <= *url && *url <= '9') ||
+ *url == '.')) {
+ url++;
+ }
- /* ';' */
- if (url < end && *url == ';')
- url++;
+ /* *LWS */
+ while (url < end && isspace(*url)) {
+ url++;
+ }
- /* *LWS */
- while (url < end && isspace(*url)) {
- url++;
- }
+ /* ';' */
+ if (url < end && *url == ';')
+ url++;
- if (url == end) {
- /* Just delay specified, so refresh current page */
- xmlFree(content);
+ /* *LWS */
+ while (url < end && isspace(*url)) {
+ url++;
+ }
- c->base.refresh = nsurl_ref(
- content_get_url(&c->base));
+ if (url == end) {
+ /* Just delay specified, so refresh current page */
+ dom_string_unref(content);
- content_broadcast(&c->base, CONTENT_MSG_REFRESH,
- msg_data);
- break;
- }
+ c->base.refresh = nsurl_ref(
+ content_get_url(&c->base));
- /* "url" */
- if (url <= end - 3) {
- if (strncasecmp(url, "url", 3) == 0) {
- url += 3;
- } else {
- /* Unexpected input, ignore this header */
- xmlFree(content);
- continue;
- }
+ content_broadcast(&c->base, CONTENT_MSG_REFRESH,
+ msg_data);
+ return true;
+ }
+
+ /* "url" */
+ if (url <= end - 3) {
+ if (strncasecmp(url, "url", 3) == 0) {
+ url += 3;
} else {
- /* Insufficient input, ignore this header */
- xmlFree(content);
- continue;
+ /* Unexpected input, ignore this header */
+ dom_string_unref(content);
+ return true;
}
+ } else {
+ /* Insufficient input, ignore this header */
+ dom_string_unref(content);
+ return true;
+ }
- /* *LWS */
- while (url < end && isspace(*url)) {
- url++;
- }
+ /* *LWS */
+ while (url < end && isspace(*url)) {
+ url++;
+ }
- /* '=' */
- if (url < end) {
- if (*url == '=') {
- url++;
- } else {
- /* Unexpected input, ignore this header */
- xmlFree(content);
- continue;
- }
+ /* '=' */
+ if (url < end) {
+ if (*url == '=') {
+ url++;
} else {
- /* Insufficient input, ignore this header */
- xmlFree(content);
- continue;
+ /* Unexpected input, ignore this header */
+ dom_string_unref(content);
+ return true;
}
+ } else {
+ /* Insufficient input, ignore this header */
+ dom_string_unref(content);
+ return true;
+ }
- /* *LWS */
- while (url < end && isspace(*url)) {
- url++;
- }
+ /* *LWS */
+ while (url < end && isspace(*url)) {
+ url++;
+ }
- /* '"' or "'" */
- if (url < end && (*url == '"' || *url == '\'')) {
- quote = *url;
- url++;
- }
+ /* '"' or "'" */
+ if (url < end && (*url == '"' || *url == '\'')) {
+ quote = *url;
+ url++;
+ }
- /* Start of URL */
- refresh = url;
+ /* Start of URL */
+ refresh = url;
- if (quote != 0) {
- /* url-sq | url-dq */
- while (url < end && *url != quote)
- url++;
- } else {
- /* url-nq */
- while (url < end && !isspace(*url))
- url++;
- }
+ if (quote != 0) {
+ /* url-sq | url-dq */
+ while (url < end && *url != quote)
+ url++;
+ } else {
+ /* url-nq */
+ while (url < end && !isspace(*url))
+ url++;
+ }
- /* '"' or "'" or *LWS (we don't care) */
- if (url < end)
- *url = '\0';
+ /* '"' or "'" or *LWS (we don't care) */
+ if (url < end) {
+ new_url = strndup(refresh, url - refresh);
+ if (new_url == NULL) {
+ dom_string_unref(content);
+ return false;
+ }
- error = nsurl_join(c->base_url, refresh, &nsurl);
+ error = nsurl_join(c->base_url, new_url, &nsurl);
if (error != NSERROR_OK) {
- xmlFree(content);
+ free(new_url);
+
+ dom_string_unref(content);
msg_data.error = messages_get("NoMemory");
content_broadcast(&c->base, CONTENT_MSG_ERROR,
@@ -752,13 +740,93 @@ static bool html_meta_refresh(html_content *c, dom_node *head)
return false;
}
- xmlFree(content);
+ free(new_url);
c->base.refresh = nsurl;
content_broadcast(&c->base, CONTENT_MSG_REFRESH, msg_data);
}
-#endif
+
+ dom_string_unref(content);
+
+ return true;
+}
+
+/**
+ * Search for meta refresh
+ *
+ * http://wp.netscape.com/assist/net_sites/pushpull.html
+ *
+ * \param c content structure
+ * \param head xml node of head element
+ * \return true on success, false otherwise (error reported)
+ */
+
+static bool html_meta_refresh(html_content *c, dom_node *head)
+{
+ dom_node *n, *next;
+ dom_exception exc;
+
+ if (head == NULL)
+ return true;
+
+ exc = dom_node_get_first_child(head, &n);
+ if (exc != DOM_NO_ERR)
+ return false;
+
+ while (n != NULL) {
+ dom_node_type type;
+
+ exc = dom_node_get_node_type(n, &type);
+ if (exc != DOM_NO_ERR) {
+ dom_node_unref(n);
+ return false;
+ }
+
+ if (type == XML_ELEMENT_NODE) {
+ dom_string *name;
+
+ exc = dom_node_get_node_name(n, &name);
+ if (exc != DOM_NO_ERR) {
+ dom_node_unref(n);
+ return false;
+ }
+
+ /* Recurse into noscript elements */
+ if (strcmp(dom_string_data(name), "noscript") == 0) {
+ if (html_meta_refresh(c, n) == false) {
+ /* Some error occurred */
+ dom_node_unref(n);
+ return false;
+ } else if (c->base.refresh) {
+ /* Meta refresh found - stop */
+ dom_node_unref(n);
+ return true;
+ }
+ } else if (strcmp(dom_string_data(name), "meta") == 0) {
+ if (html_meta_refresh_process_element(c,
+ n) == false) {
+ /* Some error occurred */
+ dom_node_unref(n);
+ return false;
+ } else if (c->base.refresh != NULL) {
+ /* Meta refresh found - stop */
+ dom_node_unref(n);
+ return true;
+ }
+ }
+ }
+
+ exc = dom_node_get_next_sibling(n, &next);
+ if (exc != DOM_NO_ERR) {
+ dom_node_unref(n);
+ return false;
+ }
+
+ dom_node_unref(n);
+ n = next;
+ }
+
return true;
}
@@ -3066,9 +3134,16 @@ nserror html_init(void)
HTML_DOM_STRING_INTERN(_self);
HTML_DOM_STRING_INTERN(_parent);
HTML_DOM_STRING_INTERN(_top);
+ HTML_DOM_STRING_INTERN(content);
#undef HTML_DOM_STRING_INTERN
+ exc = dom_string_create_interned((const uint8_t *) "http-equiv",
+ SLEN("http-equiv"),
+ &html_dom_string_http_equiv);
+ if ((exc != DOM_NO_ERR) || (html_dom_string_http_equiv == NULL))
+ goto error;
+
for (i = 0; i < NOF_ELEMENTS(html_types); i++) {
error = content_factory_register_handler(html_types[i],
&html_content_handler);