summaryrefslogtreecommitdiff
path: root/render/html.c
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2007-02-10 21:34:22 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2007-02-10 21:34:22 +0000
commit3b40e0f5fc824bdcea46ee4fce609c2511c6b1fd (patch)
tree17aca5977986b50496d23cc4e5b06521a0dc3a12 /render/html.c
parenta9b34bd990e5b9073b8069796052364618a5a6ad (diff)
downloadnetsurf-3b40e0f5fc824bdcea46ee4fce609c2511c6b1fd.tar.gz
netsurf-3b40e0f5fc824bdcea46ee4fce609c2511c6b1fd.tar.bz2
Reparse entire document if meta charset resulting in changed document
encoding is encountered (fixes 1389126) svn path=/trunk/netsurf/; revision=3176
Diffstat (limited to 'render/html.c')
-rw-r--r--render/html.c62
1 files changed, 48 insertions, 14 deletions
diff --git a/render/html.c b/render/html.c
index b0d0aef7f..46d98b81d 100644
--- a/render/html.c
+++ b/render/html.c
@@ -177,6 +177,53 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
}
htmlParseChunk(c->data.html.parser, data + x, (int) (size - x), 0);
+ if (!c->data.html.encoding && c->data.html.parser->input->encoding) {
+ /* The encoding was not in headers or detected,
+ * and the parser found a <meta http-equiv="content-type"
+ * content="text/html; charset=...">. */
+ c->data.html.encoding = talloc_strdup(c,
+ c->data.html.parser->input->encoding);
+ if (!c->data.html.encoding) {
+ union content_msg_data msg_data;
+
+ msg_data.error = messages_get("NoMemory");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+ return false;
+ }
+ c->data.html.encoding_source = ENCODING_SOURCE_META;
+
+ /* have the encoding; don't attempt to detect it */
+ c->data.html.getenc = false;
+
+ /* now, we must reset the parser such that it reparses
+ * using the correct charset, and then reparse any document
+ * source we've got. we achieve this by recreating the
+ * parser in its entirety as this is simpler than resetting
+ * the existing one and ensuring it's still set up correctly.
+ */
+ if (c->data.html.parser->myDoc)
+ xmlFreeDoc(c->data.html.parser->myDoc);
+ htmlFreeParserCtxt(c->data.html.parser);
+
+ c->data.html.parser = htmlCreatePushParserCtxt(0, 0, "", 0,
+ 0, XML_CHAR_ENCODING_NONE);
+ if (!c->data.html.parser) {
+ union content_msg_data msg_data;
+
+ msg_data.error = messages_get("NoMemory");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+ return false;
+ }
+ if (!html_set_parser_encoding(c, c->data.html.encoding))
+ return false;
+
+ /* and reparse received document source - the recursion
+ * is safe as we've just set c->data.html.encoding so
+ * we'll never get back in here. */
+ if (!html_process_data(c, c->source_data, c->source_size))
+ return false;
+ }
+
return true;
}
@@ -235,7 +282,7 @@ bool html_set_parser_encoding(struct content *c, const char *encoding)
*/
if (!html->parser->input->encoding)
html->parser->input->encoding =
- xmlStrdup((xmlChar *) encoding);
+ xmlStrdup((const xmlChar *) encoding);
/* Ensure noone else attempts to reset the encoding */
html->getenc = false;
@@ -316,19 +363,6 @@ bool html_convert(struct content *c, int width, int height)
return false;
}
- if (!c->data.html.encoding && document->encoding) {
- /* The encoding was not in headers or detected, and the parser
- * found a <meta http-equiv="content-type"
- * content="text/html; charset=...">. */
- c->data.html.encoding = talloc_strdup(c, document->encoding);
- if (!c->data.html.encoding) {
- msg_data.error = messages_get("NoMemory");
- content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
- return false;
- }
- c->data.html.encoding_source = ENCODING_SOURCE_META;
- }
-
/* locate html and head elements */
for (html = document->children;
html != 0 && html->type != XML_ELEMENT_NODE;