From 12cadfb82f691625b8d0cba40a07a938663261ed Mon Sep 17 00:00:00 2001
From: John Mark Bell <jmb@netsurf-browser.org>
Date: Sat, 29 Nov 2008 14:46:56 +0000
Subject: Fix resumption of decode after EINVAL. iconv() expects to replay the
 whole sequence whereas UnicodeLib expects to resume where it left off.

svn path=/trunk/iconv/; revision=5826
---
 src/iconv.c    | 26 ++++++++++++++++++++++++++
 src/internal.h |  1 +
 2 files changed, 27 insertions(+)

diff --git a/src/iconv.c b/src/iconv.c
index 0cb31bf..a96a5a6 100644
--- a/src/iconv.c
+++ b/src/iconv.c
@@ -268,10 +268,16 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
 	}
 
 	if (inbuf == NULL || *inbuf == NULL) {
+		/* Clear skip */
+		e->skip = 0;
+
+		/* Reset read codec */
 		if (e->in) {
 			encoding_reset(e->in);
 			encoding_set_flags(e->in, e->inflags, e->inflags);
 		}
+
+		/* Reset write codec, flushing shift sequences, if asked */
 		if (e->out) {
 			if (outbuf != NULL) {
 				char *prev_outbuf = *outbuf;
@@ -317,6 +323,21 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
 
 	LOG(("reading"));
 
+	/* If, on the previous attempt to convert data, we reached the end
+	 * of the input buffer mid-sequence, then we retain the number of
+	 * bytes into the sequence we have read so far. We need to skip over
+	 * these bytes in the input now because UnicodeLib expects the next
+	 * byte to be the next in the sequence rather than the iconv()
+	 * semantics of replaying the entire incomplete sequence from the 
+	 * start.
+	 */
+	if (e->skip != 0) {
+		*inbuf += e->skip;
+		*inbytesleft -= e->skip;
+
+		e->skip = 0;
+	}
+
 	/* Perform the conversion.
 	 *
 	 * To ensure that we detect the correct error conditions
@@ -357,6 +378,8 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
 		 * assume everything's reversible. */
 		return 0;
 	case WRITE_NONE:    /* 3 */
+		/* Mark where we need to start in the next call */
+		e->skip = read;
 		errno = EINVAL;
 		break;
 	case WRITE_NOMEM:   /* 4 */
@@ -415,6 +438,9 @@ int character_callback(void *handle, UCS4 c)
 
 	e = (struct encoding_context*)handle;
 
+	if (c == 0xFFFE)
+		c = 0xFFFD;
+
 	/* Stop on invalid characters if we're not transliterating */
 	/** \todo is this sane? -- we can't distinguish between illegal input 
 	 * or valid input which just happens to correspond with U+fffd. */
diff --git a/src/internal.h b/src/internal.h
index c1d04f3..eb11589 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -29,6 +29,7 @@ struct encoding_context {
 		WRITE_NOMEM, 
 		WRITE_NONE
 	} write_state;
+	int skip;
 	struct encoding_context *prev, *next;
 };
 
-- 
cgit v1.2.3