From b734c12fd6d38d02b2daa5cba69e69b7dcf94182 Mon Sep 17 00:00:00 2001
From: John Mark Bell <jmb@netsurf-browser.org>
Date: Sat, 23 Oct 2010 18:43:48 +0000
Subject: Slightly less braindead textplain handling

svn path=/trunk/netsurf/; revision=10900
---
 render/textplain.c | 96 ++++++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 72 insertions(+), 24 deletions(-)

diff --git a/render/textplain.c b/render/textplain.c
index 716ee24da..547d6a82c 100644
--- a/render/textplain.c
+++ b/render/textplain.c
@@ -51,7 +51,7 @@
 #include "utils/utf8.h"
 
 
-#define CHUNK 20480
+#define CHUNK 32768 /* Must be a power of 2 */
 #define MARGIN 4
 
 
@@ -73,6 +73,8 @@ static parserutils_error textplain_charset_hack(const uint8_t *data, size_t len,
 		uint16_t *mibenum, uint32_t *source);
 static bool textplain_drain_input(struct content *c, 
 		parserutils_inputstream *stream, parserutils_error terminator);
+static bool textplain_copy_utf8_data(struct content *c,
+		const uint8_t *buf, size_t len);
 static int textplain_coord_from_offset(const char *text, size_t offset,
 	size_t length);
 static float textplain_line_height(void);
@@ -165,44 +167,90 @@ bool textplain_drain_input(struct content *c, parserutils_inputstream *stream,
 {
 	static const uint8_t *u_fffd = (const uint8_t *) "\xef\xbf\xfd";
 	const uint8_t *ch;
-	size_t chlen, outlen;
+	size_t chlen, offset = 0;
 
-	/** \todo Optimise: stop invoking memcpy for each character */
-	while (parserutils_inputstream_peek(stream, 0, &ch, &chlen) != 
+	while (parserutils_inputstream_peek(stream, offset, &ch, &chlen) != 
 			terminator) {
-
 		/* Replace all instances of NUL with U+FFFD */
 		if (chlen == 1 && *ch == 0) {
-			ch = u_fffd;
-			outlen = 3;
-		} else {
-			outlen = chlen;
-		}
+			if (offset > 0) {
+				/* Obtain pointer to start of input data */
+				parserutils_inputstream_peek(stream, 0, 
+						&ch, &chlen);
+				/* Copy from it up to the start of the NUL */
+				if (textplain_copy_utf8_data(c, ch, 
+						offset) == false)
+					return false;
+			}
 
-		if (c->data.textplain.utf8_data_size + outlen >= 
-				c->data.textplain.utf8_data_allocated) {
-			size_t allocated = CHUNK +
-					c->data.textplain.utf8_data_allocated;
-			char *utf8_data = talloc_realloc(c,
-					c->data.textplain.utf8_data,
-					char, allocated);
-			if (utf8_data == NULL)
+			/* Emit U+FFFD */
+			if (textplain_copy_utf8_data(c, u_fffd, 3) == false)
 				return false;
 
-			c->data.textplain.utf8_data = utf8_data;
-			c->data.textplain.utf8_data_allocated = allocated;
+			/* Advance inputstream past the NUL we just read */
+			parserutils_inputstream_advance(stream, offset + 1);
+			/* Reset the read offset */
+			offset = 0;
+		} else {
+			/* Accumulate input */
+			offset += chlen;
+
+			if (offset > CHUNK) {
+				/* Obtain pointer to start of input data */
+				parserutils_inputstream_peek(stream, 0, 
+						&ch, &chlen);
+
+				/* Emit the data we've read */
+				if (textplain_copy_utf8_data(c, ch, 
+						offset) == false)
+					return false;
+
+				/* Advance the inputstream */
+				parserutils_inputstream_advance(stream, offset);
+				/* Reset the read offset */
+				offset = 0;
+			}
 		}
+	}
 
-		memcpy(c->data.textplain.utf8_data + 
-				c->data.textplain.utf8_data_size, ch, outlen);
-		c->data.textplain.utf8_data_size += outlen;
+	if (offset > 0) {
+		/* Obtain pointer to start of input data */
+		parserutils_inputstream_peek(stream, 0, &ch, &chlen);	
+		/* Emit any data remaining */
+		if (textplain_copy_utf8_data(c, ch, offset) == false)
+			return false;
 
-		parserutils_inputstream_advance(stream, chlen);
+		/* Advance the inputstream past the data we've read */
+		parserutils_inputstream_advance(stream, offset);
 	}
 
 	return true;
 }
 
+bool textplain_copy_utf8_data(struct content *c, const uint8_t *buf, size_t len)
+{
+	if (c->data.textplain.utf8_data_size + len >= 
+			c->data.textplain.utf8_data_allocated) {
+		/* Compute next multiple of chunk above the required space */
+		size_t allocated = (c->data.textplain.utf8_data_size + len + 
+				CHUNK - 1) & ~(CHUNK - 1);
+		char *utf8_data = talloc_realloc(c,
+				c->data.textplain.utf8_data,
+				char, allocated);
+		if (utf8_data == NULL)
+			return false;
+
+		c->data.textplain.utf8_data = utf8_data;
+		c->data.textplain.utf8_data_allocated = allocated;
+	}
+
+	memcpy(c->data.textplain.utf8_data + 
+			c->data.textplain.utf8_data_size, buf, len);
+	c->data.textplain.utf8_data_size += len;
+
+	return true;
+}
+
 
 /**
  * Process data for CONTENT_TEXTPLAIN.
-- 
cgit v1.2.3