Reparse entire document if meta charset resulting in changed document

encoding is encountered (fixes 1389126) svn path=/trunk/netsurf/; revision=3176
author: John Mark Bell <jmb@netsurf-browser.org> 2007-02-10 21:34:22 +0000
committer: John Mark Bell <jmb@netsurf-browser.org> 2007-02-10 21:34:22 +0000
commit: 3b40e0f5fc824bdcea46ee4fce609c2511c6b1fd (patch)
tree: 17aca5977986b50496d23cc4e5b06521a0dc3a12 /render/html.c
parent: a9b34bd990e5b9073b8069796052364618a5a6ad (diff)
download: netsurf-3b40e0f5fc824bdcea46ee4fce609c2511c6b1fd.tar.gz
netsurf-3b40e0f5fc824bdcea46ee4fce609c2511c6b1fd.tar.bz2
1 files changed, 48 insertions, 14 deletions
diff --git a/render/html.c b/render/html.c
index b0d0aef7f..46d98b81d 100644
--- a/render/html.c
+++ b/render/html.c
@@ -177,6 +177,53 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
 	}
 	htmlParseChunk(c->data.html.parser, data + x, (int) (size - x), 0);
 
+	if (!c->data.html.encoding && c->data.html.parser->input->encoding) {
+		/* The encoding was not in headers or detected,
+		 * and the parser found a <meta http-equiv="content-type"
+		 * content="text/html; charset=...">. */
+		c->data.html.encoding = talloc_strdup(c,
+				c->data.html.parser->input->encoding);
+		if (!c->data.html.encoding) {
+			union content_msg_data msg_data;
+
+			msg_data.error = messages_get("NoMemory");
+			content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+			return false;
+		}
+		c->data.html.encoding_source = ENCODING_SOURCE_META;
+
+		/* have the encoding; don't attempt to detect it */
+		c->data.html.getenc = false;
+
+		/* now, we must reset the parser such that it reparses
+		 * using the correct charset, and then reparse any document
+		 * source we've got. we achieve this by recreating the
+		 * parser in its entirety as this is simpler than resetting
+		 * the existing one and ensuring it's still set up correctly.
+		 */
+		if (c->data.html.parser->myDoc)
+			xmlFreeDoc(c->data.html.parser->myDoc);
+		htmlFreeParserCtxt(c->data.html.parser);
+
+		c->data.html.parser = htmlCreatePushParserCtxt(0, 0, "", 0,
+				0, XML_CHAR_ENCODING_NONE);
+		if (!c->data.html.parser) {
+			union content_msg_data msg_data;
+
+			msg_data.error = messages_get("NoMemory");
+			content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+			return false;
+		}
+		if (!html_set_parser_encoding(c, c->data.html.encoding))
+			return false;
+
+		/* and reparse received document source - the recursion
+		 * is safe as we've just set c->data.html.encoding so
+		 * we'll never get back in here. */
+		if (!html_process_data(c, c->source_data, c->source_size))
+			return false;
+	}
+
 	return true;
 }
 
@@ -235,7 +282,7 @@ bool html_set_parser_encoding(struct content *c, const char *encoding)
 	 */
 	if (!html->parser->input->encoding)
 		html->parser->input->encoding =
-				xmlStrdup((xmlChar *) encoding);
+				xmlStrdup((const xmlChar *) encoding);
 
 	/* Ensure noone else attempts to reset the encoding */
 	html->getenc = false;
@@ -316,19 +363,6 @@ bool html_convert(struct content *c, int width, int height)
 		return false;
 	}
 
-	if (!c->data.html.encoding && document->encoding) {
-		/* The encoding was not in headers or detected, and the parser
-		 * found a <meta http-equiv="content-type"
-		 * content="text/html; charset=...">. */
-		c->data.html.encoding = talloc_strdup(c, document->encoding);
-		if (!c->data.html.encoding) {
-			msg_data.error = messages_get("NoMemory");
-			content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
-			return false;
-		}
-		c->data.html.encoding_source = ENCODING_SOURCE_META;
-	}
-
 	/* locate html and head elements */
 	for (html = document->children;
 			html != 0 && html->type != XML_ELEMENT_NODE;
author	John Mark Bell <jmb@netsurf-browser.org>	2007-02-10 21:34:22 +0000
committer	John Mark Bell <jmb@netsurf-browser.org>	2007-02-10 21:34:22 +0000
commit	3b40e0f5fc824bdcea46ee4fce609c2511c6b1fd (patch)
tree	17aca5977986b50496d23cc4e5b06521a0dc3a12 /render/html.c
parent	a9b34bd990e5b9073b8069796052364618a5a6ad (diff)
download	netsurf-3b40e0f5fc824bdcea46ee4fce609c2511c6b1fd.tar.gz netsurf-3b40e0f5fc824bdcea46ee4fce609c2511c6b1fd.tar.bz2