1 files changed, 96 insertions, 111 deletions
diff --git a/riscos/ucstables.c b/riscos/ucstables.c
index 5a8eb8aec..6f8e44d11 100644
--- a/riscos/ucstables.c
+++ b/riscos/ucstables.c
@@ -441,13 +441,9 @@ utf8_convert_ret utf8_to_local_encoding(const char *string, size_t len,
 		char **result)
 {
 	os_error *error;
-	int alphabet, i, offset_count = 0;
-	struct {
-		const struct special *local;	/* local character */
-		size_t offset;		/* byte offset into string */
-	} offsets[CHAR_MAX];
-	size_t off;
-	char *temp;
+	int alphabet, i;
+	size_t off, prev_off;
+	char *temp, *cur_pos;
 	const char *enc;
 	utf8_convert_ret err;
 
@@ -475,8 +471,18 @@ utf8_convert_ret utf8_to_local_encoding(const char *string, size_t len,
 					localencodings[CONT_ENC_END + 1]
 						 : localencodings[0]));
 
-	/* populate offsets array with details of characters that
-	 * will be stripped by iconv */
+	/* create output buffer */
+	*(result) = malloc(len + 1);
+	if (!(*result))
+		return UTF8_CONVERT_NOMEM;
+	*(*result) = '\0';
+
+	prev_off = 0;
+	cur_pos = (*result);
+
+	/* Iterate over string, converting input between unconvertable
+	 * characters and inserting appropriate output for characters
+	 * that iconv can't handle. */
 	for (off = 0; off < len; off = utf8_next(string, len, off)) {
 		if (string[off] != 0xE2 &&
 				string[off] != 0xC5 && string[off] != 0xEF)
@@ -484,69 +490,45 @@ utf8_convert_ret utf8_to_local_encoding(const char *string, size_t len,
 
 		for (i = 0; i != NOF_ELEMENTS(special_chars); i++) {
 			if (strncmp(string + off, special_chars[i].utf,
-					special_chars[i].len) == 0) {
-				/* ensure we don't overflow our buffer */
-				assert(offset_count < CHAR_MAX - 1);
-				offsets[offset_count].local =
-							&special_chars[i];
-				offsets[offset_count].offset = off;
-				offset_count++;
-				break;
-			}
-		}
-	}
+					special_chars[i].len) != 0)
+				continue;
 
-	if (offset_count == 0) {
-		/* No substitutions are required, so exit here */
-		return utf8_to_enc(string, enc, len, result);
-	}
+			/* 0 length has a special meaning to utf8_to_enc */
+			if (off - prev_off > 0) {
+				err = utf8_to_enc(string + prev_off, enc,
+						off - prev_off, &temp);
+				if (err != UTF8_CONVERT_OK) {
+					assert(err != UTF8_CONVERT_BADENC);
+					free(*result);
+					return UTF8_CONVERT_NOMEM;
+				}
 
-	/* create output buffer */
-	*(result) = malloc(len + 1);
-	if (!(*result))
-		return UTF8_CONVERT_NOMEM;
-	*(*result) = '\0';
+				strcat(cur_pos, temp);
 
-	/* convert the chunks between offsets, then copy stripped
-	 * character into output string */
-	for (i = 0; i != offset_count; i++) {
-		off = (i > 0 ? offsets[i-1].offset + offsets[i-1].local->len
-			     : 0);
-
-		/* 0 length has a special meaning to utf8_to_enc */
-		if (offsets[i].offset > off) {
-			err = utf8_to_enc(string + off, enc,
-					offsets[i].offset - off, &temp);
-			if (err != UTF8_CONVERT_OK) {
-				assert(err != UTF8_CONVERT_BADENC);
-				free(*result);
-				return UTF8_CONVERT_NOMEM;
-			}
+				cur_pos += strlen(temp);
 
-			strcat((*result), temp);
+				free(temp);
+			}
 
-			free(temp);
+			*cur_pos = special_chars[i].local;
+			*(++cur_pos) = '\0';
+			prev_off = off + special_chars[i].len;
 		}
-
-		off = strlen(*result);
-		(*result)[off] = offsets[i].local->local;
-		(*result)[off+1] = '\0';
 	}
 
 	/* handle last chunk
 	 * NB. 0 length has a special meaning to utf8_to_enc */
 
-	off = offsets[offset_count - 1].offset +
-			offsets[offset_count - 1].local->len;
-	if (off < len) {
-		err = utf8_to_enc(string + off, enc, len - off, &temp);
+	if (prev_off < len) {
+		err = utf8_to_enc(string + prev_off, enc, len - prev_off,
+				&temp);
 		if (err != UTF8_CONVERT_OK) {
 			assert(err != UTF8_CONVERT_BADENC);
 			free(*result);
 			return UTF8_CONVERT_NOMEM;
 		}
 
-		strcat((*result), temp);
+		strcat(cur_pos, temp);
 
 		free(temp);
 	}
@@ -566,12 +548,9 @@ utf8_convert_ret utf8_from_local_encoding(const char *string, size_t len,
 		char **result)
 {
 	os_error *error;
-	int alphabet, i, offset_count = 0;
-	struct {
-		const struct special *local;	/* utf character */
-		size_t offset;		/* byte offset into string */
-	} offsets[CHAR_MAX];
-	size_t off;
+	int alphabet, i, num_specials = 0, result_alloc;
+#define SPECIAL_CHUNK_SIZE 255
+	size_t off, prev_off, cur_off;
 	char *temp;
 	const char *enc;
 	utf8_convert_ret err;
@@ -603,64 +582,74 @@ utf8_convert_ret utf8_from_local_encoding(const char *string, size_t len,
 					localencodings[CONT_ENC_END + 1]
 						 : localencodings[0]));
 
-	/* populate offsets array with details of characters that
-	 * will be stripped by iconv */
+	/* create output buffer (oversized) */
+	result_alloc = (len * 4) + (3 * SPECIAL_CHUNK_SIZE) + 1;
+
+	*(result) = malloc(result_alloc);
+	if (!(*result))
+		return UTF8_CONVERT_NOMEM;
+	*(*result) = '\0';
+
+	prev_off = 0;
+	cur_off = 0;
+
+	/* Iterate over string, converting input between unconvertable
+	 * characters and inserting appropriate output for characters
+	 * that iconv can't handle. */
 	for (off = 0; off < len; off++) {
 		if (string[off] < 0x80 || string[off] > 0x9f)
 			continue;
 
 		for (i = 0; i != NOF_ELEMENTS(special_chars); i++) {
-			if (string[off] == special_chars[i].local) {
-				/* ensure we don't overflow our buffer */
-				assert(offset_count < CHAR_MAX - 1);
-				offsets[offset_count].local =
-							&special_chars[i];
-				offsets[offset_count].offset = off;
-				offset_count++;
-				break;
-			}
-		}
-	}
+			if (string[off] != special_chars[i].local)
+				continue;
 
-	if (offset_count == 0) {
-		/* No substitutions are required, so exit here */
-		return utf8_from_enc(string, enc, len, result);
-	}
+			/* 0 length has a special meaning to utf8_from_enc */
+			if (off - prev_off > 0) {
+				err = utf8_from_enc(string + prev_off, enc,
+						off - prev_off, &temp);
+				if (err != UTF8_CONVERT_OK) {
+					assert(err != UTF8_CONVERT_BADENC);
+					LOG(("utf8_from_enc failed"));
+					free(*result);
+					return UTF8_CONVERT_NOMEM;
+				}
 
-	/* create output buffer (oversized) */
-	*(result) = malloc((len * 4) + (3 * offset_count) + 1);
-	if (!(*result))
-		return UTF8_CONVERT_NOMEM;
-	*(*result) = '\0';
+				strcat((*result) + cur_off, temp);
+
+				cur_off += strlen(temp);
 
-	/* convert the chunks between offsets, then copy stripped
-	 * UTF-8 character into output string */
-	for (i = 0; i != offset_count; i++) {
-		off = (i > 0 ? offsets[i-1].offset + 1 : 0);
-
-		/* 0 length has a special meaning to utf8_from_enc */
-		if (offsets[i].offset > off) {
-			err = utf8_from_enc(string + off, enc,
-					offsets[i].offset - off, &temp);
-			if (err != UTF8_CONVERT_OK) {
-				assert(err != UTF8_CONVERT_BADENC);
-				LOG(("utf8_from_enc failed"));
-				free(*result);
-				return UTF8_CONVERT_NOMEM;
+				free(temp);
 			}
 
-			strcat((*result), temp);
-			free(temp);
-		}
+			strcat((*result) + cur_off, special_chars[i].utf);
+
+			cur_off += special_chars[i].len;
 
-		strcat((*result), offsets[i].local->utf);
+			prev_off = off + 1;
+
+			num_specials++;
+			if (num_specials % SPECIAL_CHUNK_SIZE ==
+					SPECIAL_CHUNK_SIZE - 1) {
+				char *temp = realloc((*result),
+						result_alloc +
+						(3 * SPECIAL_CHUNK_SIZE));
+				if (!temp) {
+					free(*result);
+					return UTF8_CONVERT_NOMEM;
+				}
+
+				*result = temp;
+				result_alloc += (3 * SPECIAL_CHUNK_SIZE);
+			}
+		}
 	}
 
 	/* handle last chunk
 	 * NB. 0 length has a special meaning to utf8_from_enc */
-	off = offsets[offset_count - 1].offset + 1;
-	if (off < len) {
-		err = utf8_from_enc(string + off, enc, len - off, &temp);
+	if (prev_off < len) {
+		err = utf8_from_enc(string + prev_off, enc, len - prev_off,
+				&temp);
 		if (err != UTF8_CONVERT_OK) {
 			assert(err != UTF8_CONVERT_BADENC);
 			LOG(("utf8_from_enc failed"));
@@ -668,22 +657,18 @@ utf8_convert_ret utf8_from_local_encoding(const char *string, size_t len,
 			return UTF8_CONVERT_NOMEM;
 		}
 
-		strcat((*result), temp);
+		strcat((*result) + cur_off, temp);
 
 		free(temp);
 	}
 
 	/* and copy into more reasonably-sized buffer */
-	temp = malloc(strlen((*result)) + 1);
+	temp = realloc((*result), cur_off + 1);
 	if (!temp) {
-		LOG(("malloc failed"));
+		LOG(("realloc failed"));
 		free(*result);
 		return UTF8_CONVERT_NOMEM;
 	}
-	*temp = '\0';
-
-	strcpy(temp, (*result));
-	free(*result);
 	*result = temp;
 
 	return UTF8_CONVERT_OK;