Add proper transliteration support.

author: John-Mark Bell <jmb@netsurf-browser.org> 2013-01-11 03:33:47 +0000
committer: John-Mark Bell <jmb@netsurf-browser.org> 2013-01-11 11:20:19 +0000
commit: 23deb46db03c3e7a2884a49edcf882d933315e70 (patch)
tree: c8d1c3bb87a616b4ce33a5a66ce26e84816f55f3 /src/iconv.c
parent: efe52d57b2e0d1cb15ce3ccea5dd7d5e0359dae4 (diff)
download: iconv-23deb46db03c3e7a2884a49edcf882d933315e70.tar.gz
iconv-23deb46db03c3e7a2884a49edcf882d933315e70.tar.bz2
1 files changed, 36 insertions, 41 deletions
diff --git a/src/iconv.c b/src/iconv.c
index db47cbc..c81a0b2 100644
--- a/src/iconv.c
+++ b/src/iconv.c
@@ -234,6 +234,18 @@ iconv_t iconv_open(const char *tocode, const char *fromcode)
 			return (iconv_t)(-1);
 		}
 
+		e->transout = encoding_new(to, encoding_WRITE_STRICT);
+		if (e->transout == NULL) {
+			if (e->out)
+				encoding_delete(e->out);
+			if (e->in)
+				encoding_delete(e->in);
+			iconv_eightbit_delete(e);
+			free(e);
+			errno = ENOMEM;		/* Assume memory exhaustion */
+			return (iconv_t)(-1);
+		}
+
 		/* Set encoding flags */
 		unsigned int flags = 0;
 		if (to_force_le)
@@ -243,6 +255,7 @@ iconv_t iconv_open(const char *tocode, const char *fromcode)
 			flags |= encoding_FLAG_NO_HEADER;
 
 		encoding_set_flags(e->out, flags, flags);
+		encoding_set_flags(e->transout, flags, flags);
 
 		e->outflags = flags;
 	}
@@ -262,6 +275,7 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
 {
 	struct encoding_context *e;
 	unsigned int read = 0;
+	int ret;
 
 	/* search for cd in list */
 	for (e = context_list; e; e = e->next)
@@ -289,7 +303,6 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
 			if (outbuf != NULL) {
 				char *prev_outbuf = *outbuf;
 				size_t prev_outbytesleft = *outbytesleft;
-				int ret;
 
 				ret = encoding_write(e->out, NULL_UCS4, 
 						outbuf, (int*) outbytesleft);
@@ -328,6 +341,13 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
 	e->outbuf = outbuf;
 	e->outbytesleft = outbytesleft;
 
+	/* Flush through any remaining transliteration */
+	ret = translit_flush_replacement(e);
+	if (ret <= 0) {
+		errno = E2BIG;
+		return (size_t)-1;
+	}
+
 	LOG(("reading"));
 
 	/* If, on the previous attempt to convert data, we reached the end
@@ -397,6 +417,10 @@ size_t iconv(iconv_t cd, char **inbuf, size_t *inbytesleft, char **outbuf,
 		errno = EINVAL;
 		break;
 	case WRITE_NOMEM:   /* 4 */
+		if (e->substlen > 0) {
+			/* Buffer full while transliterating: skip input */
+			e->skip = read;
+		}
 		errno = E2BIG;
 		break;
 	case WRITE_FAILED:  /* 1 */
@@ -426,6 +450,8 @@ int iconv_close(iconv_t cd)
 		encoding_delete(e->in);
 	if (e->out)
 		encoding_delete(e->out);
+	if (e->transout)
+		encoding_delete(e->transout);
 	iconv_eightbit_delete(e);
 
 	/* remove from list */
@@ -495,48 +521,17 @@ int character_callback(void *handle, UCS4 c)
 				(int*)e->outbytesleft);
 	}
 
-	e->write_state = ret == -1 ? WRITE_FAILED 
-				   : ret == 0 ? WRITE_NOMEM : WRITE_SUCCESS;
+	if (ret == -1 && e->transliterate) {
+		/* Transliterate, if we've been asked to. */
+		ret = translit_substitute(e, c);
+	} 
 
 	if (ret == -1) {
-		/* Transliterate, if we've been asked to.
-		 * Assumes that output is 8bit/8bit multibyte with ASCII G0.
-		 * This should be fine as the only <>8bit encodings are
-		 * UCS{2,4}, UTF-{16,32}, neither of which return -1.
-		 * Also, afaiaa, all supported multibyte encodings are ASCII
-		 * compatible. */
-		/** \todo Actually perform some kind of transliteration */
-		if (e->transliterate) {
-			if ((int)*e->outbytesleft > 0) {
-				if (e->out) {
-				/* Flush through any pending shift sequences */
-				/** \todo this is a bit dodgy, as we only
-				 * really need to ensure that the ASCII set
-				 * is mapped into G0 in ISO2022 encodings.
-				 * This will reset G1->G3, too, which may
-				 * break things. If so, we may have to
-				 * perform some dirty hackery which relies
-				 * upon knowledge of UnicodeLib's internals
-				 */
-					encoding_write(e->out, NULL_UCS4, 
-						e->outbuf,
-						(int*)e->outbytesleft);
-				}
-
-				if ((int)*e->outbytesleft > 0) {
-					*(*e->outbuf)++ = '?';
-					--*e->outbytesleft;
-
-					e->write_state = WRITE_SUCCESS;
-				} else {
-					e->write_state = WRITE_NOMEM;
-				}
-			} else {
-				e->write_state = WRITE_NOMEM;
-			}
-		} else {
-			e->write_state = WRITE_FAILED;
-		}
+		e->write_state = WRITE_FAILED;
+	} else if (ret == 0) {
+		e->write_state = WRITE_NOMEM;
+	} else {
+		e->write_state = WRITE_SUCCESS;
 	}
 
 	/* Always stop after processing each character */
author	John-Mark Bell <jmb@netsurf-browser.org>	2013-01-11 03:33:47 +0000
committer	John-Mark Bell <jmb@netsurf-browser.org>	2013-01-11 11:20:19 +0000
commit	23deb46db03c3e7a2884a49edcf882d933315e70 (patch)
tree	c8d1c3bb87a616b4ce33a5a66ce26e84816f55f3 /src/iconv.c
parent	efe52d57b2e0d1cb15ce3ccea5dd7d5e0359dae4 (diff)
download	iconv-23deb46db03c3e7a2884a49edcf882d933315e70.tar.gz iconv-23deb46db03c3e7a2884a49edcf882d933315e70.tar.bz2