summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--riscos/ucstables.c207
1 files changed, 96 insertions, 111 deletions
diff --git a/riscos/ucstables.c b/riscos/ucstables.c
index 5a8eb8aec..6f8e44d11 100644
--- a/riscos/ucstables.c
+++ b/riscos/ucstables.c
@@ -441,13 +441,9 @@ utf8_convert_ret utf8_to_local_encoding(const char *string, size_t len,
char **result)
{
os_error *error;
- int alphabet, i, offset_count = 0;
- struct {
- const struct special *local; /* local character */
- size_t offset; /* byte offset into string */
- } offsets[CHAR_MAX];
- size_t off;
- char *temp;
+ int alphabet, i;
+ size_t off, prev_off;
+ char *temp, *cur_pos;
const char *enc;
utf8_convert_ret err;
@@ -475,8 +471,18 @@ utf8_convert_ret utf8_to_local_encoding(const char *string, size_t len,
localencodings[CONT_ENC_END + 1]
: localencodings[0]));
- /* populate offsets array with details of characters that
- * will be stripped by iconv */
+ /* create output buffer */
+ *(result) = malloc(len + 1);
+ if (!(*result))
+ return UTF8_CONVERT_NOMEM;
+ *(*result) = '\0';
+
+ prev_off = 0;
+ cur_pos = (*result);
+
+ /* Iterate over string, converting input between unconvertable
+ * characters and inserting appropriate output for characters
+ * that iconv can't handle. */
for (off = 0; off < len; off = utf8_next(string, len, off)) {
if (string[off] != 0xE2 &&
string[off] != 0xC5 && string[off] != 0xEF)
@@ -484,69 +490,45 @@ utf8_convert_ret utf8_to_local_encoding(const char *string, size_t len,
for (i = 0; i != NOF_ELEMENTS(special_chars); i++) {
if (strncmp(string + off, special_chars[i].utf,
- special_chars[i].len) == 0) {
- /* ensure we don't overflow our buffer */
- assert(offset_count < CHAR_MAX - 1);
- offsets[offset_count].local =
- &special_chars[i];
- offsets[offset_count].offset = off;
- offset_count++;
- break;
- }
- }
- }
+ special_chars[i].len) != 0)
+ continue;
- if (offset_count == 0) {
- /* No substitutions are required, so exit here */
- return utf8_to_enc(string, enc, len, result);
- }
+ /* 0 length has a special meaning to utf8_to_enc */
+ if (off - prev_off > 0) {
+ err = utf8_to_enc(string + prev_off, enc,
+ off - prev_off, &temp);
+ if (err != UTF8_CONVERT_OK) {
+ assert(err != UTF8_CONVERT_BADENC);
+ free(*result);
+ return UTF8_CONVERT_NOMEM;
+ }
- /* create output buffer */
- *(result) = malloc(len + 1);
- if (!(*result))
- return UTF8_CONVERT_NOMEM;
- *(*result) = '\0';
+ strcat(cur_pos, temp);
- /* convert the chunks between offsets, then copy stripped
- * character into output string */
- for (i = 0; i != offset_count; i++) {
- off = (i > 0 ? offsets[i-1].offset + offsets[i-1].local->len
- : 0);
-
- /* 0 length has a special meaning to utf8_to_enc */
- if (offsets[i].offset > off) {
- err = utf8_to_enc(string + off, enc,
- offsets[i].offset - off, &temp);
- if (err != UTF8_CONVERT_OK) {
- assert(err != UTF8_CONVERT_BADENC);
- free(*result);
- return UTF8_CONVERT_NOMEM;
- }
+ cur_pos += strlen(temp);
- strcat((*result), temp);
+ free(temp);
+ }
- free(temp);
+ *cur_pos = special_chars[i].local;
+ *(++cur_pos) = '\0';
+ prev_off = off + special_chars[i].len;
}
-
- off = strlen(*result);
- (*result)[off] = offsets[i].local->local;
- (*result)[off+1] = '\0';
}
/* handle last chunk
* NB. 0 length has a special meaning to utf8_to_enc */
- off = offsets[offset_count - 1].offset +
- offsets[offset_count - 1].local->len;
- if (off < len) {
- err = utf8_to_enc(string + off, enc, len - off, &temp);
+ if (prev_off < len) {
+ err = utf8_to_enc(string + prev_off, enc, len - prev_off,
+ &temp);
if (err != UTF8_CONVERT_OK) {
assert(err != UTF8_CONVERT_BADENC);
free(*result);
return UTF8_CONVERT_NOMEM;
}
- strcat((*result), temp);
+ strcat(cur_pos, temp);
free(temp);
}
@@ -566,12 +548,9 @@ utf8_convert_ret utf8_from_local_encoding(const char *string, size_t len,
char **result)
{
os_error *error;
- int alphabet, i, offset_count = 0;
- struct {
- const struct special *local; /* utf character */
- size_t offset; /* byte offset into string */
- } offsets[CHAR_MAX];
- size_t off;
+ int alphabet, i, num_specials = 0, result_alloc;
+#define SPECIAL_CHUNK_SIZE 255
+ size_t off, prev_off, cur_off;
char *temp;
const char *enc;
utf8_convert_ret err;
@@ -603,64 +582,74 @@ utf8_convert_ret utf8_from_local_encoding(const char *string, size_t len,
localencodings[CONT_ENC_END + 1]
: localencodings[0]));
- /* populate offsets array with details of characters that
- * will be stripped by iconv */
+ /* create output buffer (oversized) */
+ result_alloc = (len * 4) + (3 * SPECIAL_CHUNK_SIZE) + 1;
+
+ *(result) = malloc(result_alloc);
+ if (!(*result))
+ return UTF8_CONVERT_NOMEM;
+ *(*result) = '\0';
+
+ prev_off = 0;
+ cur_off = 0;
+
+ /* Iterate over string, converting input between unconvertable
+ * characters and inserting appropriate output for characters
+ * that iconv can't handle. */
for (off = 0; off < len; off++) {
if (string[off] < 0x80 || string[off] > 0x9f)
continue;
for (i = 0; i != NOF_ELEMENTS(special_chars); i++) {
- if (string[off] == special_chars[i].local) {
- /* ensure we don't overflow our buffer */
- assert(offset_count < CHAR_MAX - 1);
- offsets[offset_count].local =
- &special_chars[i];
- offsets[offset_count].offset = off;
- offset_count++;
- break;
- }
- }
- }
+ if (string[off] != special_chars[i].local)
+ continue;
- if (offset_count == 0) {
- /* No substitutions are required, so exit here */
- return utf8_from_enc(string, enc, len, result);
- }
+ /* 0 length has a special meaning to utf8_from_enc */
+ if (off - prev_off > 0) {
+ err = utf8_from_enc(string + prev_off, enc,
+ off - prev_off, &temp);
+ if (err != UTF8_CONVERT_OK) {
+ assert(err != UTF8_CONVERT_BADENC);
+ LOG(("utf8_from_enc failed"));
+ free(*result);
+ return UTF8_CONVERT_NOMEM;
+ }
- /* create output buffer (oversized) */
- *(result) = malloc((len * 4) + (3 * offset_count) + 1);
- if (!(*result))
- return UTF8_CONVERT_NOMEM;
- *(*result) = '\0';
+ strcat((*result) + cur_off, temp);
+
+ cur_off += strlen(temp);
- /* convert the chunks between offsets, then copy stripped
- * UTF-8 character into output string */
- for (i = 0; i != offset_count; i++) {
- off = (i > 0 ? offsets[i-1].offset + 1 : 0);
-
- /* 0 length has a special meaning to utf8_from_enc */
- if (offsets[i].offset > off) {
- err = utf8_from_enc(string + off, enc,
- offsets[i].offset - off, &temp);
- if (err != UTF8_CONVERT_OK) {
- assert(err != UTF8_CONVERT_BADENC);
- LOG(("utf8_from_enc failed"));
- free(*result);
- return UTF8_CONVERT_NOMEM;
+ free(temp);
}
- strcat((*result), temp);
- free(temp);
- }
+ strcat((*result) + cur_off, special_chars[i].utf);
+
+ cur_off += special_chars[i].len;
- strcat((*result), offsets[i].local->utf);
+ prev_off = off + 1;
+
+ num_specials++;
+ if (num_specials % SPECIAL_CHUNK_SIZE ==
+ SPECIAL_CHUNK_SIZE - 1) {
+ char *temp = realloc((*result),
+ result_alloc +
+ (3 * SPECIAL_CHUNK_SIZE));
+ if (!temp) {
+ free(*result);
+ return UTF8_CONVERT_NOMEM;
+ }
+
+ *result = temp;
+ result_alloc += (3 * SPECIAL_CHUNK_SIZE);
+ }
+ }
}
/* handle last chunk
* NB. 0 length has a special meaning to utf8_from_enc */
- off = offsets[offset_count - 1].offset + 1;
- if (off < len) {
- err = utf8_from_enc(string + off, enc, len - off, &temp);
+ if (prev_off < len) {
+ err = utf8_from_enc(string + prev_off, enc, len - prev_off,
+ &temp);
if (err != UTF8_CONVERT_OK) {
assert(err != UTF8_CONVERT_BADENC);
LOG(("utf8_from_enc failed"));
@@ -668,22 +657,18 @@ utf8_convert_ret utf8_from_local_encoding(const char *string, size_t len,
return UTF8_CONVERT_NOMEM;
}
- strcat((*result), temp);
+ strcat((*result) + cur_off, temp);
free(temp);
}
/* and copy into more reasonably-sized buffer */
- temp = malloc(strlen((*result)) + 1);
+ temp = realloc((*result), cur_off + 1);
if (!temp) {
- LOG(("malloc failed"));
+ LOG(("realloc failed"));
free(*result);
return UTF8_CONVERT_NOMEM;
}
- *temp = '\0';
-
- strcpy(temp, (*result));
- free(*result);
*result = temp;
return UTF8_CONVERT_OK;