diff options
author | John-Mark Bell <jmb@netsurf-browser.org> | 2013-01-11 03:33:47 +0000 |
---|---|---|
committer | John-Mark Bell <jmb@netsurf-browser.org> | 2013-01-11 11:20:19 +0000 |
commit | 23deb46db03c3e7a2884a49edcf882d933315e70 (patch) | |
tree | c8d1c3bb87a616b4ce33a5a66ce26e84816f55f3 /src/iconv.c | |
parent | efe52d57b2e0d1cb15ce3ccea5dd7d5e0359dae4 (diff) | |
download | iconv-23deb46db03c3e7a2884a49edcf882d933315e70.tar.gz iconv-23deb46db03c3e7a2884a49edcf882d933315e70.tar.bz2 |
Add proper transliteration support.
Diffstat (limited to 'src/iconv.c')
-rw-r--r-- | src/iconv.c | 77 |
1 files changed, 36 insertions, 41 deletions
diff --git a/src/iconv.c b/src/iconv.c index db47cbc..c81a0b2 100644 --- a/src/iconv.c +++ b/src/iconv.c @@ -234,6 +234,18 @@ iconv_t iconv_open(const char *tocode, const char *fromcode) return (iconv_t)(-1); } + e->transout = encoding_new(to, encoding_WRITE_STRICT); + if (e->transout == NULL) { + if (e->out) + encoding_delete(e->out); + if (e->in) + encoding_delete(e->in); + iconv_eightbit_delete(e); + free(e); + errno = ENOMEM; /* Assume memory exhaustion */ + return (iconv_t)(-1); + } + /* Set encoding flags */ unsigned int flags = 0; if (to_force_le) @@ -243,6 +255,7 @@ iconv_t iconv_open(const char *tocode, const char *fromcode) flags |= encoding_FLAG_NO_HEADER; encoding_set_flags(e->out, flags, flags); + encoding_set_flags(e->transout, flags, flags); e->outflags = flags; } @@ -262,6 +275,7 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, { struct encoding_context *e; unsigned int read = 0; + int ret; /* search for cd in list */ for (e = context_list; e; e = e->next) @@ -289,7 +303,6 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, if (outbuf != NULL) { char *prev_outbuf = *outbuf; size_t prev_outbytesleft = *outbytesleft; - int ret; ret = encoding_write(e->out, NULL_UCS4, outbuf, (int*) outbytesleft); @@ -328,6 +341,13 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, e->outbuf = outbuf; e->outbytesleft = outbytesleft; + /* Flush through any remaining transliteration */ + ret = translit_flush_replacement(e); + if (ret <= 0) { + errno = E2BIG; + return (size_t)-1; + } + LOG(("reading")); /* If, on the previous attempt to convert data, we reached the end @@ -397,6 +417,10 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf, errno = EINVAL; break; case WRITE_NOMEM: /* 4 */ + if (e->substlen > 0) { + /* Buffer full while transliterating: skip input */ + e->skip = read; + } errno = E2BIG; break; case WRITE_FAILED: /* 1 */ @@ -426,6 +450,8 @@ int iconv_close(iconv_t cd) encoding_delete(e->in); if (e->out) encoding_delete(e->out); + if (e->transout) + encoding_delete(e->transout); iconv_eightbit_delete(e); /* remove from list */ @@ -495,48 +521,17 @@ int character_callback(void *handle, UCS4 c) (int*)e->outbytesleft); } - e->write_state = ret == -1 ? WRITE_FAILED - : ret == 0 ? WRITE_NOMEM : WRITE_SUCCESS; + if (ret == -1 && e->transliterate) { + /* Transliterate, if we've been asked to. */ + ret = translit_substitute(e, c); + } if (ret == -1) { - /* Transliterate, if we've been asked to. - * Assumes that output is 8bit/8bit multibyte with ASCII G0. - * This should be fine as the only <>8bit encodings are - * UCS{2,4}, UTF-{16,32}, neither of which return -1. - * Also, afaiaa, all supported multibyte encodings are ASCII - * compatible. */ - /** \todo Actually perform some kind of transliteration */ - if (e->transliterate) { - if ((int)*e->outbytesleft > 0) { - if (e->out) { - /* Flush through any pending shift sequences */ - /** \todo this is a bit dodgy, as we only - * really need to ensure that the ASCII set - * is mapped into G0 in ISO2022 encodings. - * This will reset G1->G3, too, which may - * break things. If so, we may have to - * perform some dirty hackery which relies - * upon knowledge of UnicodeLib's internals - */ - encoding_write(e->out, NULL_UCS4, - e->outbuf, - (int*)e->outbytesleft); - } - - if ((int)*e->outbytesleft > 0) { - *(*e->outbuf)++ = '?'; - --*e->outbytesleft; - - e->write_state = WRITE_SUCCESS; - } else { - e->write_state = WRITE_NOMEM; - } - } else { - e->write_state = WRITE_NOMEM; - } - } else { - e->write_state = WRITE_FAILED; - } + e->write_state = WRITE_FAILED; + } else if (ret == 0) { + e->write_state = WRITE_NOMEM; + } else { + e->write_state = WRITE_SUCCESS; } /* Always stop after processing each character */ |