summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2009-07-25 21:24:25 (GMT)
committer John Mark Bell <jmb@netsurf-browser.org>2009-07-25 21:24:25 (GMT)
commit27def7b52a24c03f270ce66b9b175ab227cfa21b (patch)
treed2ac83882f25c160d570e533d58af9c3016ab815
parentdded6b00138b102e627c562f2f52230925c0309e (diff)
downloadlibparserutils-27def7b52a24c03f270ce66b9b175ab227cfa21b.tar.gz
libparserutils-27def7b52a24c03f270ce66b9b175ab227cfa21b.tar.bz2
Correctly handle case where all of the following are true:
1) There's outstanding data to be read 2) We haven't read the first chunk of data 3) We've recieved EOF 4) There's insufficient input data to detect the charset svn path=/trunk/libparserutils/; revision=8789
-rw-r--r--src/input/inputstream.c20
1 files changed, 17 insertions, 3 deletions
diff --git a/src/input/inputstream.c b/src/input/inputstream.c
index 150dcce..03dbf8f 100644
--- a/src/input/inputstream.c
+++ b/src/input/inputstream.c
@@ -376,15 +376,29 @@ parserutils_error parserutils_inputstream_refill_buffer(
/* If this is the first chunk of data, we must detect the charset and
* strip the BOM, if one exists */
- if (!stream->done_first_chunk) {
+ if (stream->done_first_chunk == false) {
parserutils_filter_optparams params;
if (stream->csdetect != NULL) {
error = stream->csdetect(stream->raw->data,
stream->raw->length,
&stream->mibenum, &stream->encsrc);
- if (error != PARSERUTILS_OK)
- return error;
+ if (error != PARSERUTILS_OK) {
+ if (error != PARSERUTILS_NEEDDATA ||
+ stream->public.had_eof == false)
+ return error;
+
+ /* We don't have enough data to detect the
+ * input encoding, but we're not going to get
+ * any more as we've been notified of EOF.
+ * Therefore, fall back to UTF-8. */
+ stream->mibenum =
+ parserutils_charset_mibenum_from_name(
+ "UTF-8", SLEN("UTF-8"));
+ stream->encsrc = 0;
+
+ error = PARSERUTILS_OK;
+ }
} else {
/* Default to UTF-8 */
stream->mibenum =