summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-11-29 14:46:56 (GMT)
committer John Mark Bell <jmb@netsurf-browser.org>2008-11-29 14:46:56 (GMT)
commit12cadfb82f691625b8d0cba40a07a938663261ed (patch)
treec37da92214f6298faab846f1e7bfa6c03e3bf035
parentf32f802867813d90e06755f295dcfe37fb6093e9 (diff)
downloadiconv-12cadfb82f691625b8d0cba40a07a938663261ed.tar.gz
iconv-12cadfb82f691625b8d0cba40a07a938663261ed.tar.bz2
Fix resumption of decode after EINVAL.
iconv() expects to replay the whole sequence whereas UnicodeLib expects to resume where it left off. svn path=/trunk/iconv/; revision=5826
-rw-r--r--src/iconv.c26
-rw-r--r--src/internal.h1
2 files changed, 27 insertions, 0 deletions
diff --git a/src/iconv.c b/src/iconv.c
index 0cb31bf..a96a5a6 100644
--- a/src/iconv.c
+++ b/src/iconv.c
@@ -268,10 +268,16 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
}
if (inbuf == NULL || *inbuf == NULL) {
+ /* Clear skip */
+ e->skip = 0;
+
+ /* Reset read codec */
if (e->in) {
encoding_reset(e->in);
encoding_set_flags(e->in, e->inflags, e->inflags);
}
+
+ /* Reset write codec, flushing shift sequences, if asked */
if (e->out) {
if (outbuf != NULL) {
char *prev_outbuf = *outbuf;
@@ -317,6 +323,21 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
LOG(("reading"));
+ /* If, on the previous attempt to convert data, we reached the end
+ * of the input buffer mid-sequence, then we retain the number of
+ * bytes into the sequence we have read so far. We need to skip over
+ * these bytes in the input now because UnicodeLib expects the next
+ * byte to be the next in the sequence rather than the iconv()
+ * semantics of replaying the entire incomplete sequence from the
+ * start.
+ */
+ if (e->skip != 0) {
+ *inbuf += e->skip;
+ *inbytesleft -= e->skip;
+
+ e->skip = 0;
+ }
+
/* Perform the conversion.
*
* To ensure that we detect the correct error conditions
@@ -357,6 +378,8 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
* assume everything's reversible. */
return 0;
case WRITE_NONE: /* 3 */
+ /* Mark where we need to start in the next call */
+ e->skip = read;
errno = EINVAL;
break;
case WRITE_NOMEM: /* 4 */
@@ -415,6 +438,9 @@ int character_callback(void *handle, UCS4 c)
e = (struct encoding_context*)handle;
+ if (c == 0xFFFE)
+ c = 0xFFFD;
+
/* Stop on invalid characters if we're not transliterating */
/** \todo is this sane? -- we can't distinguish between illegal input
* or valid input which just happens to correspond with U+fffd. */
diff --git a/src/internal.h b/src/internal.h
index c1d04f3..eb11589 100644
--- a/src/internal.h
+++ b/src/internal.h
@@ -29,6 +29,7 @@ struct encoding_context {
WRITE_NOMEM,
WRITE_NONE
} write_state;
+ int skip;
struct encoding_context *prev, *next;
};