From 27def7b52a24c03f270ce66b9b175ab227cfa21b Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sat, 25 Jul 2009 21:24:25 +0000 Subject: Correctly handle case where all of the following are true: 1) There's outstanding data to be read 2) We haven't read the first chunk of data 3) We've recieved EOF 4) There's insufficient input data to detect the charset svn path=/trunk/libparserutils/; revision=8789 --- src/input/inputstream.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) diff --git a/src/input/inputstream.c b/src/input/inputstream.c index 150dcce..03dbf8f 100644 --- a/src/input/inputstream.c +++ b/src/input/inputstream.c @@ -376,15 +376,29 @@ parserutils_error parserutils_inputstream_refill_buffer( /* If this is the first chunk of data, we must detect the charset and * strip the BOM, if one exists */ - if (!stream->done_first_chunk) { + if (stream->done_first_chunk == false) { parserutils_filter_optparams params; if (stream->csdetect != NULL) { error = stream->csdetect(stream->raw->data, stream->raw->length, &stream->mibenum, &stream->encsrc); - if (error != PARSERUTILS_OK) - return error; + if (error != PARSERUTILS_OK) { + if (error != PARSERUTILS_NEEDDATA || + stream->public.had_eof == false) + return error; + + /* We don't have enough data to detect the + * input encoding, but we're not going to get + * any more as we've been notified of EOF. + * Therefore, fall back to UTF-8. */ + stream->mibenum = + parserutils_charset_mibenum_from_name( + "UTF-8", SLEN("UTF-8")); + stream->encsrc = 0; + + error = PARSERUTILS_OK; + } } else { /* Default to UTF-8 */ stream->mibenum = -- cgit v1.2.3