From 6df8f99a707326655b4f285920f19fef6d9eb90a Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Tue, 14 Oct 2008 15:44:05 +0000 Subject: Fixup dubious charsets svn path=/trunk/hubbub/; revision=5575 --- src/parser.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'src/parser.c') diff --git a/src/parser.c b/src/parser.c index e43a309..075a0e2 100644 --- a/src/parser.c +++ b/src/parser.c @@ -5,6 +5,9 @@ * Copyright 2007-8 John-Mark Bell */ +#include + +#include #include #include @@ -29,11 +32,12 @@ struct hubbub_parser { * Create a hubbub parser * * \param enc Source document encoding, or NULL to autodetect + * `param fix_enc Permit fixing up of encoding if it's frequently misused * \param alloc Memory (de)allocation function * \param pw Pointer to client-specific private data (may be NULL) * \return Pointer to parser instance, or NULL on error */ -hubbub_parser *hubbub_parser_create(const char *enc, +hubbub_parser *hubbub_parser_create(const char *enc, bool fix_enc, hubbub_alloc alloc, void *pw) { hubbub_parser *parser; @@ -45,6 +49,19 @@ hubbub_parser *hubbub_parser_create(const char *enc, if (parser == NULL) return NULL; + /* If we have an encoding and we're permitted to fix up likely broken + * ones, then attempt to do so. */ + if (enc != NULL && fix_enc == true) { + uint16_t mibenum = parserutils_charset_mibenum_from_name(enc, + strlen(enc)); + + if (mibenum != 0) { + hubbub_charset_fix_charset(&mibenum); + + enc = parserutils_charset_mibenum_to_name(mibenum); + } + } + parser->stream = parserutils_inputstream_create(enc, enc != NULL ? HUBBUB_CHARSET_CONFIDENT : HUBBUB_CHARSET_UNKNOWN, hubbub_charset_extract, alloc, pw); @@ -201,7 +218,7 @@ hubbub_error hubbub_parser_parse_chunk(hubbub_parser *parser, * Pass a chunk of extraneous data to a hubbub parser for parsing * * \param parser Parser instance to use - * \param data Data to parse (encoded in internal charset) + * \param data Data to parse (encoded in UTF-8) * \param len Length, in byte, of data * \return HUBBUB_OK on success, appropriate error otherwise */ -- cgit v1.2.3