summaryrefslogtreecommitdiff
path: root/render/html.c
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2007-02-10 19:53:41 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2007-02-10 19:53:41 +0000
commita9b34bd990e5b9073b8069796052364618a5a6ad (patch)
tree0ed6ac9e7d9c50da57927ccc5e6e762b8bfff580 /render/html.c
parent9549165a2b099c5a418adcf820ef856e34e3f4ae (diff)
downloadnetsurf-a9b34bd990e5b9073b8069796052364618a5a6ad.tar.gz
netsurf-a9b34bd990e5b9073b8069796052364618a5a6ad.tar.bz2
Work around libxml oddness in allowing document meta encoding to
override external charset information. svn path=/trunk/netsurf/; revision=3175
Diffstat (limited to 'render/html.c')
-rw-r--r--render/html.c20
1 files changed, 20 insertions, 0 deletions
diff --git a/render/html.c b/render/html.c
index 5ea64a985..b0d0aef7f 100644
--- a/render/html.c
+++ b/render/html.c
@@ -217,6 +217,26 @@ bool html_set_parser_encoding(struct content *c, const char *encoding)
return false;
}
+ /* Dirty hack to get around libxml oddness:
+ * 1) When creating a push parser context, the input flow's encoding
+ * string is not set (whether an encoding is specified or not)
+ * 2) When switching encoding (as above), the input flow's encoding
+ * string is never changed
+ * 3) When handling a meta charset, the input flow's encoding string
+ * is checked to determine if an encoding has already been set.
+ * If it has been set, then the meta charset is ignored.
+ *
+ * The upshot of this is that, if we don't explicitly set the input
+ * flow's encoding string here, any meta charset in the document
+ * will override our setting, which is incorrect behaviour.
+ *
+ * Ideally, this would be fixed in libxml, but that requires rather
+ * more knowledge than I currently have of what libxml is doing.
+ */
+ if (!html->parser->input->encoding)
+ html->parser->input->encoding =
+ xmlStrdup((xmlChar *) encoding);
+
/* Ensure noone else attempts to reset the encoding */
html->getenc = false;