summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2010-10-23 17:24:28 (GMT)
committer John Mark Bell <jmb@netsurf-browser.org>2010-10-23 17:24:28 (GMT)
commit6537f4f4acc41eb0608fdb1506ff8fc947cfb121 (patch)
tree84885dbfd3c55920e0b5ba161b038b8b86824ae7
parent4dfe13fb3772f9f9df944238e2fcf782b8c336aa (diff)
downloadlibparserutils-6537f4f4acc41eb0608fdb1506ff8fc947cfb121.tar.gz
libparserutils-6537f4f4acc41eb0608fdb1506ff8fc947cfb121.tar.bz2
Fix bug where any encoding specified when creating an input stream would be replaced by UTF-8 if there was no charset detection callback provided, too.
Tidy up the logic in this area, and add more commentary so it's clear. svn path=/trunk/libparserutils/; revision=10899
-rw-r--r--src/input/inputstream.c26
1 files changed, 18 insertions, 8 deletions
diff --git a/src/input/inputstream.c b/src/input/inputstream.c
index 03dbf8f..73c038d 100644
--- a/src/input/inputstream.c
+++ b/src/input/inputstream.c
@@ -379,6 +379,9 @@ parserutils_error parserutils_inputstream_refill_buffer(
if (stream->done_first_chunk == false) {
parserutils_filter_optparams params;
+ /* If there is a charset detection routine, give it an
+ * opportunity to override any charset specified when the
+ * inputstream was created */
if (stream->csdetect != NULL) {
error = stream->csdetect(stream->raw->data,
stream->raw->length,
@@ -391,16 +394,23 @@ parserutils_error parserutils_inputstream_refill_buffer(
/* We don't have enough data to detect the
* input encoding, but we're not going to get
* any more as we've been notified of EOF.
- * Therefore, fall back to UTF-8. */
- stream->mibenum =
- parserutils_charset_mibenum_from_name(
- "UTF-8", SLEN("UTF-8"));
- stream->encsrc = 0;
-
+ * Therefore, leave the encoding alone
+ * so that any charset specified when the
+ * inputstream was created will be preserved.
+ * If there was no charset specified, then
+ * we'll default to UTF-8, below */
error = PARSERUTILS_OK;
}
- } else {
- /* Default to UTF-8 */
+ }
+
+ /* Default to UTF-8 if there is still no encoding information
+ * We'll do this if there was no encoding specified up-front
+ * and:
+ * 1) there was no charset detection routine
+ * or 2) there was insufficient data for the charset
+ * detection routine to detect an encoding
+ */
+ if (stream->mibenum == 0) {
stream->mibenum =
parserutils_charset_mibenum_from_name("UTF-8",
SLEN("UTF-8"));