From 12cadfb82f691625b8d0cba40a07a938663261ed Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sat, 29 Nov 2008 14:46:56 +0000 Subject: Fix resumption of decode after EINVAL. iconv() expects to replay the whole sequence whereas UnicodeLib expects to resume where it left off. svn path=/trunk/iconv/; revision=5826 --- src/iconv.c | 26 ++++++++++++++++++++++++++ src/internal.h | 1 + 2 files changed, 27 insertions(+) diff --git a/src/iconv.c b/src/iconv.c index 0cb31bf..a96a5a6 100644 --- a/src/iconv.c +++ b/src/iconv.c @@ -268,10 +268,16 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, } if (inbuf == NULL || *inbuf == NULL) { + /* Clear skip */ + e->skip = 0; + + /* Reset read codec */ if (e->in) { encoding_reset(e->in); encoding_set_flags(e->in, e->inflags, e->inflags); } + + /* Reset write codec, flushing shift sequences, if asked */ if (e->out) { if (outbuf != NULL) { char *prev_outbuf = *outbuf; @@ -317,6 +323,21 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, LOG(("reading")); + /* If, on the previous attempt to convert data, we reached the end + * of the input buffer mid-sequence, then we retain the number of + * bytes into the sequence we have read so far. We need to skip over + * these bytes in the input now because UnicodeLib expects the next + * byte to be the next in the sequence rather than the iconv() + * semantics of replaying the entire incomplete sequence from the + * start. + */ + if (e->skip != 0) { + *inbuf += e->skip; + *inbytesleft -= e->skip; + + e->skip = 0; + } + /* Perform the conversion. * * To ensure that we detect the correct error conditions @@ -357,6 +378,8 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, * assume everything's reversible. */ return 0; case WRITE_NONE: /* 3 */ + /* Mark where we need to start in the next call */ + e->skip = read; errno = EINVAL; break; case WRITE_NOMEM: /* 4 */ @@ -415,6 +438,9 @@ int character_callback(void *handle, UCS4 c) e = (struct encoding_context*)handle; + if (c == 0xFFFE) + c = 0xFFFD; + /* Stop on invalid characters if we're not transliterating */ /** \todo is this sane? -- we can't distinguish between illegal input * or valid input which just happens to correspond with U+fffd. */ diff --git a/src/internal.h b/src/internal.h index c1d04f3..eb11589 100644 --- a/src/internal.h +++ b/src/internal.h @@ -29,6 +29,7 @@ struct encoding_context { WRITE_NOMEM, WRITE_NONE } write_state; + int skip; struct encoding_context *prev, *next; }; -- cgit v1.2.3