summaryrefslogtreecommitdiff
path: root/utils/tt2code
diff options
context:
space:
mode:
authorJames Bursa <james@netsurf-browser.org>2003-12-29 00:38:59 +0000
committerJames Bursa <james@netsurf-browser.org>2003-12-29 00:38:59 +0000
commit4fcbc23c1ce263d38973a5ba69dd471c2585050f (patch)
tree2fd5602254d569af013d4b6aeac789976bafb50e /utils/tt2code
parent3a8b8485adc6a0e5e1d8182b64951d077b842093 (diff)
downloadnetsurf-4fcbc23c1ce263d38973a5ba69dd471c2585050f.tar.gz
netsurf-4fcbc23c1ce263d38973a5ba69dd471c2585050f.tar.bz2
[project @ 2003-12-29 00:38:59 by bursa]
Transliterate Unicode to Latin1 using Markus Kuhn's transtab. svn path=/import/netsurf/; revision=465
Diffstat (limited to 'utils/tt2code')
-rwxr-xr-xutils/tt2code51
1 files changed, 51 insertions, 0 deletions
diff --git a/utils/tt2code b/utils/tt2code
new file mode 100755
index 000000000..c4dc07820
--- /dev/null
+++ b/utils/tt2code
@@ -0,0 +1,51 @@
+#!/usr/bin/perl -W
+
+print <<END;
+void unicode_transliterate(unsigned int c, char **r)
+{
+ char *s = *r;
+ switch (c) {
+
+END
+
+LINE: while (<>) {
+ chomp;
+ next if m/^%/;
+ next if m/^ *$/;
+
+ m/^<U([0-9A-F]{4})> /g or die "invalid line '$_'";
+ $z = $1;
+ next if (hex($z) < 256);
+
+ SUBST: while (m/\G"?((<U([0-9A-F]{4})>)*)"?;?/g) {
+ next if $& eq '';
+ $m = $1;
+ if ($m eq '') {
+ print "case 0x$z: break;\n";
+ next;
+ }
+ chop $m;
+ @s = split /></, substr $m, 1;
+ foreach $s (@s) {
+ $s = substr $s, 1;
+ next SUBST if 255 < hex($s);
+ }
+
+ print "case 0x$z: ";
+ foreach $s (@s) {
+ print "*s++ = 0x$s; ";
+ }
+ print "break;\n";
+ next LINE;
+ }
+}
+
+print <<END;
+
+default: *s++ = '?'; break;
+ }
+
+ *r = s;
+}
+END
+