summaryrefslogtreecommitdiff
path: root/utils/utils.c
diff options
context:
space:
mode:
authorJames Bursa <james@netsurf-browser.org>2003-12-29 00:38:59 +0000
committerJames Bursa <james@netsurf-browser.org>2003-12-29 00:38:59 +0000
commit4fcbc23c1ce263d38973a5ba69dd471c2585050f (patch)
tree2fd5602254d569af013d4b6aeac789976bafb50e /utils/utils.c
parent3a8b8485adc6a0e5e1d8182b64951d077b842093 (diff)
downloadnetsurf-4fcbc23c1ce263d38973a5ba69dd471c2585050f.tar.gz
netsurf-4fcbc23c1ce263d38973a5ba69dd471c2585050f.tar.bz2
[project @ 2003-12-29 00:38:59 by bursa]
Transliterate Unicode to Latin1 using Markus Kuhn's transtab. svn path=/import/netsurf/; revision=465
Diffstat (limited to 'utils/utils.c')
-rw-r--r--utils/utils.c25
1 files changed, 19 insertions, 6 deletions
diff --git a/utils/utils.c b/utils/utils.c
index ecc31f995..8cd6e1f68 100644
--- a/utils/utils.c
+++ b/utils/utils.c
@@ -117,22 +117,35 @@ char * squash_whitespace(const char * s)
char * tolat1(xmlChar * s)
{
unsigned int length = strlen((char*) s);
- char *d = xcalloc(length + 1, sizeof(char));
+ unsigned int space = length + 100;
+ char *d = xcalloc(space, sizeof(char));
char *d0 = d;
+ char *end = d0 + space - 10;
int u, chars;
while (*s != 0) {
chars = length;
u = xmlGetUTF8Char((unsigned char *) s, &chars);
+ if (chars <= 0) {
+ s += 1;
+ length -= 1;
+ LOG(("UTF-8 error"));
+ continue;
+ }
s += chars;
length -= chars;
if (u == 0x09 || u == 0x0a || u == 0x0d)
- *d = ' ';
+ *d++ = ' ';
else if ((0x20 <= u && u <= 0x7f) || (0xa0 <= u && u <= 0xff))
- *d = u;
- else
- *d = '?';
- d++;
+ *d++ = u;
+ else {
+ unicode_transliterate((unsigned int) u, &d);
+ if (end < d) {
+ space += 100;
+ d0 = xrealloc(d0, space);
+ end = d0 + space - 10;
+ }
+ }
}
*d = 0;