summaryrefslogtreecommitdiff
path: root/src/iconv.c
diff options
context:
space:
mode:
authorJohn-Mark Bell <jmb@netsurf-browser.org>2013-01-11 03:33:47 +0000
committerJohn-Mark Bell <jmb@netsurf-browser.org>2013-01-11 11:20:19 +0000
commit23deb46db03c3e7a2884a49edcf882d933315e70 (patch)
treec8d1c3bb87a616b4ce33a5a66ce26e84816f55f3 /src/iconv.c
parentefe52d57b2e0d1cb15ce3ccea5dd7d5e0359dae4 (diff)
downloadiconv-23deb46db03c3e7a2884a49edcf882d933315e70.tar.gz
iconv-23deb46db03c3e7a2884a49edcf882d933315e70.tar.bz2
Add proper transliteration support.
Diffstat (limited to 'src/iconv.c')
-rw-r--r--src/iconv.c77
1 files changed, 36 insertions, 41 deletions
diff --git a/src/iconv.c b/src/iconv.c
index db47cbc..c81a0b2 100644
--- a/src/iconv.c
+++ b/src/iconv.c
@@ -234,6 +234,18 @@ iconv_t iconv_open(const char *tocode, const char *fromcode)
return (iconv_t)(-1);
}
+ e->transout = encoding_new(to, encoding_WRITE_STRICT);
+ if (e->transout == NULL) {
+ if (e->out)
+ encoding_delete(e->out);
+ if (e->in)
+ encoding_delete(e->in);
+ iconv_eightbit_delete(e);
+ free(e);
+ errno = ENOMEM; /* Assume memory exhaustion */
+ return (iconv_t)(-1);
+ }
+
/* Set encoding flags */
unsigned int flags = 0;
if (to_force_le)
@@ -243,6 +255,7 @@ iconv_t iconv_open(const char *tocode, const char *fromcode)
flags |= encoding_FLAG_NO_HEADER;
encoding_set_flags(e->out, flags, flags);
+ encoding_set_flags(e->transout, flags, flags);
e->outflags = flags;
}
@@ -262,6 +275,7 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
{
struct encoding_context *e;
unsigned int read = 0;
+ int ret;
/* search for cd in list */
for (e = context_list; e; e = e->next)
@@ -289,7 +303,6 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
if (outbuf != NULL) {
char *prev_outbuf = *outbuf;
size_t prev_outbytesleft = *outbytesleft;
- int ret;
ret = encoding_write(e->out, NULL_UCS4,
outbuf, (int*) outbytesleft);
@@ -328,6 +341,13 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
e->outbuf = outbuf;
e->outbytesleft = outbytesleft;
+ /* Flush through any remaining transliteration */
+ ret = translit_flush_replacement(e);
+ if (ret <= 0) {
+ errno = E2BIG;
+ return (size_t)-1;
+ }
+
LOG(("reading"));
/* If, on the previous attempt to convert data, we reached the end
@@ -397,6 +417,10 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
errno = EINVAL;
break;
case WRITE_NOMEM: /* 4 */
+ if (e->substlen > 0) {
+ /* Buffer full while transliterating: skip input */
+ e->skip = read;
+ }
errno = E2BIG;
break;
case WRITE_FAILED: /* 1 */
@@ -426,6 +450,8 @@ int iconv_close(iconv_t cd)
encoding_delete(e->in);
if (e->out)
encoding_delete(e->out);
+ if (e->transout)
+ encoding_delete(e->transout);
iconv_eightbit_delete(e);
/* remove from list */
@@ -495,48 +521,17 @@ int character_callback(void *handle, UCS4 c)
(int*)e->outbytesleft);
}
- e->write_state = ret == -1 ? WRITE_FAILED
- : ret == 0 ? WRITE_NOMEM : WRITE_SUCCESS;
+ if (ret == -1 && e->transliterate) {
+ /* Transliterate, if we've been asked to. */
+ ret = translit_substitute(e, c);
+ }
if (ret == -1) {
- /* Transliterate, if we've been asked to.
- * Assumes that output is 8bit/8bit multibyte with ASCII G0.
- * This should be fine as the only <>8bit encodings are
- * UCS{2,4}, UTF-{16,32}, neither of which return -1.
- * Also, afaiaa, all supported multibyte encodings are ASCII
- * compatible. */
- /** \todo Actually perform some kind of transliteration */
- if (e->transliterate) {
- if ((int)*e->outbytesleft > 0) {
- if (e->out) {
- /* Flush through any pending shift sequences */
- /** \todo this is a bit dodgy, as we only
- * really need to ensure that the ASCII set
- * is mapped into G0 in ISO2022 encodings.
- * This will reset G1->G3, too, which may
- * break things. If so, we may have to
- * perform some dirty hackery which relies
- * upon knowledge of UnicodeLib's internals
- */
- encoding_write(e->out, NULL_UCS4,
- e->outbuf,
- (int*)e->outbytesleft);
- }
-
- if ((int)*e->outbytesleft > 0) {
- *(*e->outbuf)++ = '?';
- --*e->outbytesleft;
-
- e->write_state = WRITE_SUCCESS;
- } else {
- e->write_state = WRITE_NOMEM;
- }
- } else {
- e->write_state = WRITE_NOMEM;
- }
- } else {
- e->write_state = WRITE_FAILED;
- }
+ e->write_state = WRITE_FAILED;
+ } else if (ret == 0) {
+ e->write_state = WRITE_NOMEM;
+ } else {
+ e->write_state = WRITE_SUCCESS;
}
/* Always stop after processing each character */