From 73f43994189d92e43616548f8a1f8b92de3b8887 Mon Sep 17 00:00:00 2001
From: John Mark Bell <jmb@netsurf-browser.org>
Date: Mon, 13 Sep 2010 22:18:51 +0000
Subject: Replace use of iconv with a parserutils inputstream

svn path=/trunk/netsurf/; revision=10770
---
 render/textplain.c | 167 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 98 insertions(+), 69 deletions(-)

(limited to 'render/textplain.c')

diff --git a/render/textplain.c b/render/textplain.c
index 94a5cce66..13ccd2660 100644
--- a/render/textplain.c
+++ b/render/textplain.c
@@ -27,7 +27,9 @@
 #include <string.h>
 #include <strings.h>
 #include <math.h>
-#include <iconv.h>
+
+#include <parserutils/input/inputstream.h>
+
 #include "content/content_protected.h"
 #include "content/hlcache.h"
 #include "css/css.h"
@@ -67,6 +69,10 @@ static plot_font_style_t textplain_style = {
 static int textplain_tab_width = 256;  /* try for a sensible default */
 
 static bool textplain_create_internal(struct content *c, const char *encoding);
+static parserutils_error textplain_charset_hack(const uint8_t *data, size_t len,
+		uint16_t *mibenum, uint32_t *source);
+static bool textplain_drain_input(struct content *c, 
+		parserutils_inputstream *stream, parserutils_error terminator);
 static int textplain_coord_from_offset(const char *text, size_t offset,
 	size_t length);
 static float textplain_line_height(void);
@@ -91,39 +97,54 @@ bool textplain_create(struct content *c, const http_parameter *params)
 	return textplain_create_internal(c, encoding);
 }
 
+/*
+ * Hack around bug in libparserutils: if the client provides an
+ * encoding up front, but does not provide a charset detection
+ * callback, then libparserutils will replace the provided encoding
+ * with UTF-8. This breaks our input handling.
+ *
+ * We avoid this by providing a callback that does precisely nothing,
+ * thus preserving whatever charset information we decided on in
+ * textplain_create.
+ */
+parserutils_error textplain_charset_hack(const uint8_t *data, size_t len,
+		uint16_t *mibenum, uint32_t *source)
+{
+	return PARSERUTILS_OK;
+} 
+
 bool textplain_create_internal(struct content *c, const char *encoding)
 {
 	char *utf8_data;
-	iconv_t iconv_cd;
+	parserutils_inputstream *stream;
+	parserutils_error error;
 	union content_msg_data msg_data;
 
 	utf8_data = talloc_array(c, char, CHUNK);
-	if (!utf8_data)
+	if (utf8_data == NULL)
 		goto no_memory;
 
-	iconv_cd = iconv_open("utf-8", encoding);
-	if (iconv_cd == (iconv_t)(-1) && errno == EINVAL) {
-		LOG(("unsupported encoding \"%s\"", encoding));
-		iconv_cd = iconv_open("utf-8", "iso-8859-1");
+	error = parserutils_inputstream_create(encoding, 0, 
+			textplain_charset_hack, ns_realloc, NULL, &stream);
+	if (error == PARSERUTILS_BADENCODING) {
+		/* Fall back to Windows-1252 */
+		error = parserutils_inputstream_create("Windows-1252", 0,
+				textplain_charset_hack, ns_realloc, NULL,
+				&stream);
 	}
-	if (iconv_cd == (iconv_t)(-1)) {
-		char buf[300];
-
-		snprintf(buf, sizeof buf, "IconvFailed %s", strerror(errno));
-		buf[sizeof buf - 1] = 0;
-
-		msg_data.error = buf;
-		content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
-
-		return false;
+	if (error != PARSERUTILS_OK) {
+		talloc_free(utf8_data);
+		goto no_memory;
 	}
-
+	
 	c->data.textplain.encoding = strdup(encoding);
-	if (c->data.textplain.encoding == NULL)
+	if (c->data.textplain.encoding == NULL) {
+		talloc_free(utf8_data);
+		parserutils_inputstream_destroy(stream);
 		goto no_memory;
+	}
 		
-	c->data.textplain.iconv_cd = iconv_cd;
-	c->data.textplain.converted = 0;
+	c->data.textplain.inputstream = stream;
 	c->data.textplain.utf8_data = utf8_data;
 	c->data.textplain.utf8_data_size = 0;
 	c->data.textplain.utf8_data_allocated = CHUNK;
@@ -139,62 +160,59 @@ no_memory:
 	return false;
 }
 
-
-/**
- * Process data for CONTENT_TEXTPLAIN.
- */
-
-bool textplain_process_data(struct content *c, 
-		const char *data, unsigned int size)
+bool textplain_drain_input(struct content *c, parserutils_inputstream *stream,
+		parserutils_error terminator)
 {
-	iconv_t iconv_cd = c->data.textplain.iconv_cd;
-	size_t count;
-	union content_msg_data msg_data;
-	const char *source_data;
-	unsigned long source_size;
-
-	source_data = content__get_source_data(c, &source_size);
-
-	do {
-		char *inbuf = (char *) source_data + 
-				c->data.textplain.converted;
-		size_t inbytesleft = source_size - c->data.textplain.converted;
-		char *outbuf = c->data.textplain.utf8_data +
-				c->data.textplain.utf8_data_size;
-		size_t outbytesleft = c->data.textplain.utf8_data_allocated -
-				c->data.textplain.utf8_data_size;
-		count = iconv(iconv_cd, &inbuf, &inbytesleft,
-				&outbuf, &outbytesleft);
-		c->data.textplain.converted = inbuf - source_data;
-		c->data.textplain.utf8_data_size = c->data.textplain.
-				utf8_data_allocated - outbytesleft;
-
-		if (count == (size_t)(-1) && errno == E2BIG) {
+	const uint8_t *ch;
+	size_t chlen;
+
+	/** \todo Optimise: stop invoking memcpy for each character */
+	while (parserutils_inputstream_peek(stream, 0, &ch, &chlen) != 
+			terminator) {
+		if (c->data.textplain.utf8_data_size + chlen >= 
+				c->data.textplain.utf8_data_allocated) {
 			size_t allocated = CHUNK +
 					c->data.textplain.utf8_data_allocated;
 			char *utf8_data = talloc_realloc(c,
 					c->data.textplain.utf8_data,
 					char, allocated);
-			if (!utf8_data)
-				goto no_memory;
+			if (utf8_data == NULL)
+				return false;
+
 			c->data.textplain.utf8_data = utf8_data;
 			c->data.textplain.utf8_data_allocated = allocated;
-		} else if (count == (size_t)(-1) && errno != EINVAL) {
-			char buf[300];
+		}
 
-			snprintf(buf, sizeof buf, "IconvFailed %s",
-					strerror(errno));
-			buf[sizeof buf - 1] = 0;
+		memcpy(c->data.textplain.utf8_data + 
+				c->data.textplain.utf8_data_size, ch, chlen);
+		c->data.textplain.utf8_data_size += chlen;
 
-			msg_data.error = buf;
-			content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+		parserutils_inputstream_advance(stream, chlen);
+	}
 
-			return false;
-		}
+	return true;
+}
 
-		gui_multitask();
-	} while (!(c->data.textplain.converted == source_size ||
-			(count == (size_t)(-1) && errno == EINVAL)));
+
+/**
+ * Process data for CONTENT_TEXTPLAIN.
+ */
+
+bool textplain_process_data(struct content *c, 
+		const char *data, unsigned int size)
+{
+	parserutils_inputstream *stream = c->data.textplain.inputstream;
+	union content_msg_data msg_data;
+	parserutils_error error;
+
+	error = parserutils_inputstream_append(stream, 
+			(const uint8_t *) data, size);
+	if (error != PARSERUTILS_OK) {
+		goto no_memory;
+	}
+
+	if (textplain_drain_input(c, stream, PARSERUTILS_NEEDDATA) == false)
+		goto no_memory;
 
 	return true;
 
@@ -211,8 +229,19 @@ no_memory:
 
 bool textplain_convert(struct content *c)
 {
-	iconv_close(c->data.textplain.iconv_cd);
-	c->data.textplain.iconv_cd = 0;
+	parserutils_inputstream *stream = c->data.textplain.inputstream;
+	parserutils_error error;
+
+	error = parserutils_inputstream_append(stream, NULL, 0);
+	if (error != PARSERUTILS_OK) {
+		return false;
+	}
+
+	if (textplain_drain_input(c, stream, PARSERUTILS_EOF) == false)
+		return false;
+
+	parserutils_inputstream_destroy(stream);
+	c->data.textplain.inputstream = NULL;
 
 	c->status = CONTENT_STATUS_DONE;
 	content_set_status(c, messages_get("Done"));
@@ -324,8 +353,8 @@ void textplain_destroy(struct content *c)
 	if (c->data.textplain.encoding != NULL)
 		free(c->data.textplain.encoding);
 
-	if (c->data.textplain.iconv_cd)
-		iconv_close(c->data.textplain.iconv_cd);
+	if (c->data.textplain.inputstream != NULL)
+		parserutils_inputstream_destroy(c->data.textplain.inputstream);
 }
 
 
-- 
cgit v1.2.3