summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2009-04-02 00:29:07 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2009-04-02 00:29:07 +0000
commit1a8ae5bb267dc3fcd75856dd62b40c252f444c7c (patch)
tree8a610ce7e97a06d294f30fdc2091ee44ddf44115
parent9b8208fca65e6ff52634bb5055d2566e1d2c82c1 (diff)
downloadlibcss-1a8ae5bb267dc3fcd75856dd62b40c252f444c7c.tar.gz
libcss-1a8ae5bb267dc3fcd75856dd62b40c252f444c7c.tar.bz2
Reject any non-ASCII charset detected using the ASCII-compatible detector
svn path=/trunk/libcss/; revision=7027
-rw-r--r--src/charset/detect.c18
1 files changed, 18 insertions, 0 deletions
diff --git a/src/charset/detect.c b/src/charset/detect.c
index 78b4663..2612fa8 100644
--- a/src/charset/detect.c
+++ b/src/charset/detect.c
@@ -414,6 +414,24 @@ parserutils_error try_ascii_compatible_charset(const uint8_t *data, size_t len,
/* Convert to MIB enum */
charset = parserutils_charset_mibenum_from_name(
(const char *) start, end - start);
+
+ /* Any non-ASCII compatible charset must be ignored, as
+ * we've just used an ASCII parser to read it. */
+ if (charset == parserutils_charset_mibenum_from_name(
+ "UTF-32", SLEN("UTF-32")) ||
+ charset == parserutils_charset_mibenum_from_name(
+ "UTF-32LE", SLEN("UTF-32LE")) ||
+ charset == parserutils_charset_mibenum_from_name(
+ "UTF-32BE", SLEN("UTF-32BE")) ||
+ charset == parserutils_charset_mibenum_from_name(
+ "UTF-16", SLEN("UTF-16")) ||
+ charset == parserutils_charset_mibenum_from_name(
+ "UTF-16LE", SLEN("UTF-16LE")) ||
+ charset == parserutils_charset_mibenum_from_name(
+ "UTF-16BE", SLEN("UTF-16BE"))) {
+
+ charset = 0;
+ }
}
#undef CHARSET