summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJames Bursa <james@netsurf-browser.org>2003-12-29 00:38:59 +0000
committerJames Bursa <james@netsurf-browser.org>2003-12-29 00:38:59 +0000
commit4fcbc23c1ce263d38973a5ba69dd471c2585050f (patch)
tree2fd5602254d569af013d4b6aeac789976bafb50e
parent3a8b8485adc6a0e5e1d8182b64951d077b842093 (diff)
downloadnetsurf-4fcbc23c1ce263d38973a5ba69dd471c2585050f.tar.gz
netsurf-4fcbc23c1ce263d38973a5ba69dd471c2585050f.tar.bz2
[project @ 2003-12-29 00:38:59 by bursa]
Transliterate Unicode to Latin1 using Markus Kuhn's transtab. svn path=/import/netsurf/; revision=465
-rw-r--r--makefile4
-rw-r--r--utils/transtab1688
-rwxr-xr-xutils/tt2code51
-rw-r--r--utils/utils.c25
-rw-r--r--utils/utils.h1
5 files changed, 1762 insertions, 7 deletions
diff --git a/makefile b/makefile
index 23d2135c9..ea7de707e 100644
--- a/makefile
+++ b/makefile
@@ -9,7 +9,7 @@ CC_DEBUG = gcc
OBJECTS_COMMON = cache.o content.o fetch.o fetchcache.o other.o \
css.o css_enum.o parser.o ruleset.o scanner.o \
box.o form.o html.o layout.o textplain.o \
- messages.o utils.o
+ messages.o utils.o translit.c
OBJECTS = $(OBJECTS_COMMON) \
browser.o loginlist.o netsurf.o \
htmlinstance.o htmlredraw.o \
@@ -69,6 +69,8 @@ css/parser.c: css/parser.y
-cd css; lemon parser.y
css/scanner.c css/scanner.h: css/scanner.l
cd css; flex scanner.l
+utils/translit.c: transtab
+ cd utils; ./tt2code < transtab > translit.c
# create documentation
$(DOCDIR)/%.html: documentation/%.xml
diff --git a/utils/transtab b/utils/transtab
new file mode 100644
index 000000000..98d063823
--- /dev/null
+++ b/utils/transtab
@@ -0,0 +1,1688 @@
+% Source: http://www.cl.cam.ac.uk/~mgk25/unicode.html#libs
+%
+% "This package contains a table for transliterating ISO 10646 texts into
+% best-effort representations using smaller coded character sets (ASCII,
+% ISO 8859, etc.). It is primarily intended for inclusion into the GNU C
+% library, but might be of use for other applications as well. The table
+% is freely available to anyone."
+
+% APOSTROPHE
+<U0027> <U2019>
+% GRAVE ACCENT
+<U0060> <U201B>;<U2018>
+% NO-BREAK SPACE
+<U00A0> <U0020>
+% INVERTED EXCLAMATION MARK
+<U00A1> <U0021>
+% CENT SIGN
+<U00A2> <U0063>
+% POUND SIGN
+<U00A3> "<U0047><U0042><U0050>"
+% YEN SIGN
+<U00A5> <U0059>
+% BROKEN BAR
+<U00A6> <U007C>
+% SECTION SIGN
+<U00A7> <U0053>
+% DIAERESIS
+<U00A8> <U0022>
+% COPYRIGHT SIGN
+<U00A9> "<U0028><U0063><U0029>";<U0063>
+% FEMININE ORDINAL INDICATOR
+<U00AA> <U0061>
+% LEFT-POINTING DOUBLE ANGLE QUOTATION MARK
+<U00AB> "<U003C><U003C>"
+% NOT SIGN
+<U00AC> <U002D>
+% SOFT HYPHEN
+<U00AD> <U002D>
+% REGISTERED SIGN
+<U00AE> "<U0028><U0052><U0029>"
+% MACRON
+<U00AF> <U002D>
+% DEGREE SIGN
+<U00B0> <U0020>
+% PLUS-MINUS SIGN
+<U00B1> "<U002B><U002F><U002D>"
+% SUPERSCRIPT TWO
+<U00B2> "<U005E><U0032>";<U0032>
+% SUPERSCRIPT THREE
+<U00B3> "<U005E><U0033>";<U0033>
+% ACUTE ACCENT
+<U00B4> <U0027>
+% MICRO SIGN
+<U00B5> <U03BC>;<U0075>
+% PILCROW SIGN
+<U00B6> <U0050>
+% MIDDLE DOT
+<U00B7> <U002E>
+% CEDILLA
+<U00B8> <U002C>
+% SUPERSCRIPT ONE
+<U00B9> "<U005E><U0031>";<U0031>
+% MASCULINE ORDINAL INDICATOR
+<U00BA> <U006F>
+% RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK
+<U00BB> "<U003E><U003E>"
+% VULGAR FRACTION ONE QUARTER
+<U00BC> "<U0020><U0031><U002F><U0034>"
+% VULGAR FRACTION ONE HALF
+<U00BD> "<U0020><U0031><U002F><U0032>"
+% VULGAR FRACTION THREE QUARTERS
+<U00BE> "<U0020><U0033><U002F><U0034>"
+% INVERTED QUESTION MARK
+<U00BF> <U003F>
+% LATIN CAPITAL LETTER A WITH GRAVE
+<U00C0> <U0041>
+% LATIN CAPITAL LETTER A WITH ACUTE
+<U00C1> <U0041>
+% LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+<U00C2> <U0041>
+% LATIN CAPITAL LETTER A WITH TILDE
+<U00C3> <U0041>
+% LATIN CAPITAL LETTER A WITH DIAERESIS
+<U00C4> "<U0041><U0065>";<U0041>
+% LATIN CAPITAL LETTER A WITH RING ABOVE
+<U00C5> "<U0041><U0061>";<U0041>
+% LATIN CAPITAL LETTER AE
+<U00C6> "<U0041><U0045>";<U0041>
+% LATIN CAPITAL LETTER C WITH CEDILLA
+<U00C7> <U0043>
+% LATIN CAPITAL LETTER E WITH GRAVE
+<U00C8> <U0045>
+% LATIN CAPITAL LETTER E WITH ACUTE
+<U00C9> <U0045>
+% LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+<U00CA> <U0045>
+% LATIN CAPITAL LETTER E WITH DIAERESIS
+<U00CB> <U0045>
+% LATIN CAPITAL LETTER I WITH GRAVE
+<U00CC> <U0049>
+% LATIN CAPITAL LETTER I WITH ACUTE
+<U00CD> <U0049>
+% LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+<U00CE> <U0049>
+% LATIN CAPITAL LETTER I WITH DIAERESIS
+<U00CF> <U0049>
+% LATIN CAPITAL LETTER ETH
+<U00D0> <U0044>
+% LATIN CAPITAL LETTER N WITH TILDE
+<U00D1> <U004E>
+% LATIN CAPITAL LETTER O WITH GRAVE
+<U00D2> <U004F>
+% LATIN CAPITAL LETTER O WITH ACUTE
+<U00D3> <U004F>
+% LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+<U00D4> <U004F>
+% LATIN CAPITAL LETTER O WITH TILDE
+<U00D5> <U004F>
+% LATIN CAPITAL LETTER O WITH DIAERESIS
+<U00D6> "<U004F><U0065>";<U004F>
+% MULTIPLICATION SIGN
+<U00D7> <U0078>
+% LATIN CAPITAL LETTER O WITH STROKE
+<U00D8> <U004F>
+% LATIN CAPITAL LETTER U WITH GRAVE
+<U00D9> <U0055>
+% LATIN CAPITAL LETTER U WITH ACUTE
+<U00DA> <U0055>
+% LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+<U00DB> <U0055>
+% LATIN CAPITAL LETTER U WITH DIAERESIS
+<U00DC> "<U0055><U0065>";<U0055>
+% LATIN CAPITAL LETTER Y WITH ACUTE
+<U00DD> <U0059>
+% LATIN CAPITAL LETTER THORN
+<U00DE> "<U0054><U0068>"
+% LATIN SMALL LETTER SHARP S
+<U00DF> "<U0073><U0073>";<U03B2>
+% LATIN SMALL LETTER A WITH GRAVE
+<U00E0> <U0061>
+% LATIN SMALL LETTER A WITH ACUTE
+<U00E1> <U0061>
+% LATIN SMALL LETTER A WITH CIRCUMFLEX
+<U00E2> <U0061>
+% LATIN SMALL LETTER A WITH TILDE
+<U00E3> <U0061>
+% LATIN SMALL LETTER A WITH DIAERESIS
+<U00E4> "<U0061><U0065>";<U0061>
+% LATIN SMALL LETTER A WITH RING ABOVE
+<U00E5> "<U0061><U0061>";<U0061>
+% LATIN SMALL LETTER AE
+<U00E6> "<U0061><U0065>";<U0061>
+% LATIN SMALL LETTER C WITH CEDILLA
+<U00E7> <U0063>
+% LATIN SMALL LETTER E WITH GRAVE
+<U00E8> <U0065>
+% LATIN SMALL LETTER E WITH ACUTE
+<U00E9> <U0065>
+% LATIN SMALL LETTER E WITH CIRCUMFLEX
+<U00EA> <U0065>
+% LATIN SMALL LETTER E WITH DIAERESIS
+<U00EB> <U0065>
+% LATIN SMALL LETTER I WITH GRAVE
+<U00EC> <U0069>
+% LATIN SMALL LETTER I WITH ACUTE
+<U00ED> <U0069>
+% LATIN SMALL LETTER I WITH CIRCUMFLEX
+<U00EE> <U0069>
+% LATIN SMALL LETTER I WITH DIAERESIS
+<U00EF> <U0069>
+% LATIN SMALL LETTER ETH
+<U00F0> <U0064>
+% LATIN SMALL LETTER N WITH TILDE
+<U00F1> <U006E>
+% LATIN SMALL LETTER O WITH GRAVE
+<U00F2> <U006F>
+% LATIN SMALL LETTER O WITH ACUTE
+<U00F3> <U006F>
+% LATIN SMALL LETTER O WITH CIRCUMFLEX
+<U00F4> <U006F>
+% LATIN SMALL LETTER O WITH TILDE
+<U00F5> <U006F>
+% LATIN SMALL LETTER O WITH DIAERESIS
+<U00F6> "<U006F><U0065>";<U006F>
+% DIVISION SIGN
+<U00F7> <U003A>
+% LATIN SMALL LETTER O WITH STROKE
+<U00F8> <U006F>
+% LATIN SMALL LETTER U WITH GRAVE
+<U00F9> <U0075>
+% LATIN SMALL LETTER U WITH ACUTE
+<U00FA> <U0075>
+% LATIN SMALL LETTER U WITH CIRCUMFLEX
+<U00FB> <U0075>
+% LATIN SMALL LETTER U WITH DIAERESIS
+<U00FC> "<U0075><U0065>";<U0075>
+% LATIN SMALL LETTER Y WITH ACUTE
+<U00FD> <U0079>
+% LATIN SMALL LETTER THORN
+<U00FE> "<U0074><U0068>"
+% LATIN SMALL LETTER Y WITH DIAERESIS
+<U00FF> <U0079>
+% LATIN CAPITAL LETTER A WITH MACRON
+<U0100> <U0041>
+% LATIN SMALL LETTER A WITH MACRON
+<U0101> <U0061>
+% LATIN CAPITAL LETTER A WITH BREVE
+<U0102> <U0041>
+% LATIN SMALL LETTER A WITH BREVE
+<U0103> <U0061>
+% LATIN CAPITAL LETTER A WITH OGONEK
+<U0104> <U0041>
+% LATIN SMALL LETTER A WITH OGONEK
+<U0105> <U0061>
+% LATIN CAPITAL LETTER C WITH ACUTE
+<U0106> <U0043>
+% LATIN SMALL LETTER C WITH ACUTE
+<U0107> <U0063>
+% LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+<U0108> "<U0043><U0068>";<U0043>
+% LATIN SMALL LETTER C WITH CIRCUMFLEX
+<U0109> "<U0063><U0068>";<U0063>
+% LATIN CAPITAL LETTER C WITH DOT ABOVE
+<U010A> <U0043>
+% LATIN SMALL LETTER C WITH DOT ABOVE
+<U010B> <U0063>
+% LATIN CAPITAL LETTER C WITH CARON
+<U010C> <U0043>
+% LATIN SMALL LETTER C WITH CARON
+<U010D> <U0063>
+% LATIN CAPITAL LETTER D WITH CARON
+<U010E> <U0044>
+% LATIN SMALL LETTER D WITH CARON
+<U010F> <U0064>
+% LATIN CAPITAL LETTER D WITH STROKE
+<U0110> <U0044>
+% LATIN SMALL LETTER D WITH STROKE
+<U0111> <U0064>
+% LATIN CAPITAL LETTER E WITH MACRON
+<U0112> <U0045>
+% LATIN SMALL LETTER E WITH MACRON
+<U0113> <U0065>
+% LATIN CAPITAL LETTER E WITH BREVE
+<U0114> <U0045>
+% LATIN SMALL LETTER E WITH BREVE
+<U0115> <U0065>
+% LATIN CAPITAL LETTER E WITH DOT ABOVE
+<U0116> <U0045>
+% LATIN SMALL LETTER E WITH DOT ABOVE
+<U0117> <U0065>
+% LATIN CAPITAL LETTER E WITH OGONEK
+<U0118> <U0045>
+% LATIN SMALL LETTER E WITH OGONEK
+<U0119> <U0065>
+% LATIN CAPITAL LETTER E WITH CARON
+<U011A> <U0045>
+% LATIN SMALL LETTER E WITH CARON
+<U011B> <U0065>
+% LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+<U011C> "<U0047><U0068>";<U0047>
+% LATIN SMALL LETTER G WITH CIRCUMFLEX
+<U011D> "<U0067><U0068>";<U0067>
+% LATIN CAPITAL LETTER G WITH BREVE
+<U011E> <U0047>
+% LATIN SMALL LETTER G WITH BREVE
+<U011F> <U0067>
+% LATIN CAPITAL LETTER G WITH DOT ABOVE
+<U0120> <U0047>
+% LATIN SMALL LETTER G WITH DOT ABOVE
+<U0121> <U0067>
+% LATIN CAPITAL LETTER G WITH CEDILLA
+<U0122> <U0047>
+% LATIN SMALL LETTER G WITH CEDILLA
+<U0123> <U0067>
+% LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+<U0124> "<U0048><U0068>";<U0048>
+% LATIN SMALL LETTER H WITH CIRCUMFLEX
+<U0125> "<U0068><U0068>";<U0068>
+% LATIN CAPITAL LETTER H WITH STROKE
+<U0126> <U0048>
+% LATIN SMALL LETTER H WITH STROKE
+<U0127> <U0068>
+% LATIN CAPITAL LETTER I WITH TILDE
+<U0128> <U0049>
+% LATIN SMALL LETTER I WITH TILDE
+<U0129> <U0069>
+% LATIN CAPITAL LETTER I WITH MACRON
+<U012A> <U0049>
+% LATIN SMALL LETTER I WITH MACRON
+<U012B> <U0069>
+% LATIN CAPITAL LETTER I WITH BREVE
+<U012C> <U0049>
+% LATIN SMALL LETTER I WITH BREVE
+<U012D> <U0069>
+% LATIN CAPITAL LETTER I WITH OGONEK
+<U012E> <U0049>
+% LATIN SMALL LETTER I WITH OGONEK
+<U012F> <U0069>
+% LATIN CAPITAL LETTER I WITH DOT ABOVE
+<U0130> <U0049>
+% LATIN SMALL LETTER DOTLESS I
+<U0131> <U0069>
+% LATIN CAPITAL LIGATURE IJ
+<U0132> "<U0049><U004A>"
+% LATIN SMALL LIGATURE IJ
+<U0133> "<U0069><U006A>"
+% LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+<U0134> "<U004A><U0068>";<U004A>
+% LATIN SMALL LETTER J WITH CIRCUMFLEX
+<U0135> "<U006A><U0068>";<U006A>
+% LATIN CAPITAL LETTER K WITH CEDILLA
+<U0136> <U004B>
+% LATIN SMALL LETTER K WITH CEDILLA
+<U0137> <U006B>
+% LATIN SMALL LETTER KRA
+<U0138> <U006B>
+% LATIN CAPITAL LETTER L WITH ACUTE
+<U0139> <U004C>
+% LATIN SMALL LETTER L WITH ACUTE
+<U013A> <U006C>
+% LATIN CAPITAL LETTER L WITH CEDILLA
+<U013B> <U004C>
+% LATIN SMALL LETTER L WITH CEDILLA
+<U013C> <U006C>
+% LATIN CAPITAL LETTER L WITH CARON
+<U013D> <U004C>
+% LATIN SMALL LETTER L WITH CARON
+<U013E> <U006C>
+% LATIN CAPITAL LETTER L WITH MIDDLE DOT
+<U013F> "<U004C><U00B7>";"<U004C><U002E>";<U004C>
+% LATIN SMALL LETTER L WITH MIDDLE DOT
+<U0140> "<U006C><U00B7>";"<U006C><U002E>";<U006C>
+% LATIN CAPITAL LETTER L WITH STROKE
+<U0141> <U004C>
+% LATIN SMALL LETTER L WITH STROKE
+<U0142> <U006C>
+% LATIN CAPITAL LETTER N WITH ACUTE
+<U0143> <U004E>
+% LATIN SMALL LETTER N WITH ACUTE
+<U0144> <U006E>
+% LATIN CAPITAL LETTER N WITH CEDILLA
+<U0145> <U004E>
+% LATIN SMALL LETTER N WITH CEDILLA
+<U0146> <U006E>
+% LATIN CAPITAL LETTER N WITH CARON
+<U0147> <U004E>
+% LATIN SMALL LETTER N WITH CARON
+<U0148> <U006E>
+% LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+<U0149> "<U0027><U006E>"
+% LATIN CAPITAL LETTER ENG
+<U014A> "<U004E><U0047>";<U004E>
+% LATIN SMALL LETTER ENG
+<U014B> "<U006E><U0067>";<U006E>
+% LATIN CAPITAL LETTER O WITH MACRON
+<U014C> <U004F>
+% LATIN SMALL LETTER O WITH MACRON
+<U014D> <U006F>
+% LATIN CAPITAL LETTER O WITH BREVE
+<U014E> <U004F>
+% LATIN SMALL LETTER O WITH BREVE
+<U014F> <U006F>
+% LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+<U0150> <U004F>
+% LATIN SMALL LETTER O WITH DOUBLE ACUTE
+<U0151> <U006F>
+% LATIN CAPITAL LIGATURE OE
+<U0152> "<U004F><U0045>"
+% LATIN SMALL LIGATURE OE
+<U0153> "<U006F><U0065>"
+% LATIN CAPITAL LETTER R WITH ACUTE
+<U0154> <U0052>
+% LATIN SMALL LETTER R WITH ACUTE
+<U0155> <U0072>
+% LATIN CAPITAL LETTER R WITH CEDILLA
+<U0156> <U0052>
+% LATIN SMALL LETTER R WITH CEDILLA
+<U0157> <U0072>
+% LATIN CAPITAL LETTER R WITH CARON
+<U0158> <U0052>
+% LATIN SMALL LETTER R WITH CARON
+<U0159> <U0072>
+% LATIN CAPITAL LETTER S WITH ACUTE
+<U015A> <U0053>
+% LATIN SMALL LETTER S WITH ACUTE
+<U015B> <U0073>
+% LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+<U015C> "<U0053><U0068>";<U0053>
+% LATIN SMALL LETTER S WITH CIRCUMFLEX
+<U015D> "<U0073><U0068>";<U0073>
+% LATIN CAPITAL LETTER S WITH CEDILLA
+<U015E> <U0053>
+% LATIN SMALL LETTER S WITH CEDILLA
+<U015F> <U0073>
+% LATIN CAPITAL LETTER S WITH CARON
+<U0160> <U0053>
+% LATIN SMALL LETTER S WITH CARON
+<U0161> <U0073>
+% LATIN CAPITAL LETTER T WITH CEDILLA
+<U0162> <U0054>
+% LATIN SMALL LETTER T WITH CEDILLA
+<U0163> <U0074>
+% LATIN CAPITAL LETTER T WITH CARON
+<U0164> <U0054>
+% LATIN SMALL LETTER T WITH CARON
+<U0165> <U0074>
+% LATIN CAPITAL LETTER T WITH STROKE
+<U0166> <U0054>
+% LATIN SMALL LETTER T WITH STROKE
+<U0167> <U0074>
+% LATIN CAPITAL LETTER U WITH TILDE
+<U0168> <U0055>
+% LATIN SMALL LETTER U WITH TILDE
+<U0169> <U0075>
+% LATIN CAPITAL LETTER U WITH MACRON
+<U016A> <U0055>
+% LATIN SMALL LETTER U WITH MACRON
+<U016B> <U0075>
+% LATIN CAPITAL LETTER U WITH BREVE
+<U016C> <U0055>
+% LATIN SMALL LETTER U WITH BREVE
+<U016D> <U0075>
+% LATIN CAPITAL LETTER U WITH RING ABOVE
+<U016E> <U0055>
+% LATIN SMALL LETTER U WITH RING ABOVE
+<U016F> <U0075>
+% LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+<U0170> <U0055>
+% LATIN SMALL LETTER U WITH DOUBLE ACUTE
+<U0171> <U0075>
+% LATIN CAPITAL LETTER U WITH OGONEK
+<U0172> <U0055>
+% LATIN SMALL LETTER U WITH OGONEK
+<U0173> <U0075>
+% LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+<U0174> <U0057>
+% LATIN SMALL LETTER W WITH CIRCUMFLEX
+<U0175> <U0077>
+% LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+<U0176> <U0059>
+% LATIN SMALL LETTER Y WITH CIRCUMFLEX
+<U0177> <U0079>
+% LATIN CAPITAL LETTER Y WITH DIAERESIS
+<U0178> <U0059>
+% LATIN CAPITAL LETTER Z WITH ACUTE
+<U0179> <U005A>
+% LATIN SMALL LETTER Z WITH ACUTE
+<U017A> <U007A>
+% LATIN CAPITAL LETTER Z WITH DOT ABOVE
+<U017B> <U005A>
+% LATIN SMALL LETTER Z WITH DOT ABOVE
+<U017C> <U007A>
+% LATIN CAPITAL LETTER Z WITH CARON
+<U017D> <U005A>
+% LATIN SMALL LETTER Z WITH CARON
+<U017E> <U007A>
+% LATIN SMALL LETTER LONG S
+<U017F> <U0073>
+% LATIN SMALL LETTER F WITH HOOK
+<U0192> <U0066>
+% LATIN CAPITAL LETTER S WITH COMMA BELOW
+<U0218> <U015E>;<U0053>
+% LATIN SMALL LETTER S WITH COMMA BELOW
+<U0219> <U015F>;<U0073>
+% LATIN CAPITAL LETTER T WITH COMMA BELOW
+<U021A> <U0162>;<U0054>
+% LATIN SMALL LETTER T WITH COMMA BELOW
+<U021B> <U0163>;<U0074>
+% MODIFIER LETTER PRIME
+<U02B9> <U2032>;<U0027>
+% MODIFIER LETTER TURNED COMMA
+<U02BB> <U2018>
+% MODIFIER LETTER APOSTROPHE
+<U02BC> <U2019>;<U0027>
+% MODIFIER LETTER REVERSED COMMA
+<U02BD> <U201B>
+% MODIFIER LETTER CIRCUMFLEX ACCENT
+<U02C6> <U005E>
+% MODIFIER LETTER VERTICAL LINE
+<U02C8> <U0027>
+% MODIFIER LETTER MACRON
+<U02C9> <U00AF>
+% MODIFIER LETTER LOW VERTICAL LINE
+<U02CC> <U002C>
+% MODIFIER LETTER TRIANGULAR COLON
+<U02D0> <U003A>
+% RING ABOVE
+<U02DA> <U00B0>
+% SMALL TILDE
+<U02DC> <U007E>
+% DOUBLE ACUTE ACCENT
+<U02DD> <U0022>
+% GREEK NUMERAL SIGN
+<U0374> <U0027>
+% GREEK LOWER NUMERAL SIGN
+<U0375> <U002C>
+% GREEK QUESTION MARK
+<U037E> <U003B>
+% LATIN CAPITAL LETTER B WITH DOT ABOVE
+<U1E02> <U0042>
+% LATIN SMALL LETTER B WITH DOT ABOVE
+<U1E03> <U0062>
+% LATIN CAPITAL LETTER D WITH DOT ABOVE
+<U1E0A> <U0044>
+% LATIN SMALL LETTER D WITH DOT ABOVE
+<U1E0B> <U0064>
+% LATIN CAPITAL LETTER F WITH DOT ABOVE
+<U1E1E> <U0046>
+% LATIN SMALL LETTER F WITH DOT ABOVE
+<U1E1F> <U0066>
+% LATIN CAPITAL LETTER M WITH DOT ABOVE
+<U1E40> <U004D>
+% LATIN SMALL LETTER M WITH DOT ABOVE
+<U1E41> <U006D>
+% LATIN CAPITAL LETTER P WITH DOT ABOVE
+<U1E56> <U0050>
+% LATIN SMALL LETTER P WITH DOT ABOVE
+<U1E57> <U0070>
+% LATIN CAPITAL LETTER S WITH DOT ABOVE
+<U1E60> <U0053>
+% LATIN SMALL LETTER S WITH DOT ABOVE
+<U1E61> <U0073>
+% LATIN CAPITAL LETTER T WITH DOT ABOVE
+<U1E6A> <U0054>
+% LATIN SMALL LETTER T WITH DOT ABOVE
+<U1E6B> <U0074>
+% LATIN CAPITAL LETTER W WITH GRAVE
+<U1E80> <U0057>
+% LATIN SMALL LETTER W WITH GRAVE
+<U1E81> <U0077>
+% LATIN CAPITAL LETTER W WITH ACUTE
+<U1E82> <U0057>
+% LATIN SMALL LETTER W WITH ACUTE
+<U1E83> <U0077>
+% LATIN CAPITAL LETTER W WITH DIAERESIS
+<U1E84> <U0057>
+% LATIN SMALL LETTER W WITH DIAERESIS
+<U1E85> <U0077>
+% LATIN CAPITAL LETTER Y WITH GRAVE
+<U1EF2> <U0059>
+% LATIN SMALL LETTER Y WITH GRAVE
+<U1EF3> <U0079>
+% EN QUAD
+<U2000> <U0020>
+% EM QUAD
+<U2001> "<U0020><U0020>"
+% EN SPACE
+<U2002> <U0020>
+% EM SPACE
+<U2003> "<U0020><U0020>"
+% THREE-PER-EM SPACE
+<U2004> <U0020>
+% FOUR-PER-EM SPACE
+<U2005> <U0020>
+% SIX-PER-EM SPACE
+<U2006> <U0020>
+% FIGURE SPACE
+<U2007> <U0020>
+% PUNCTUATION SPACE
+<U2008> <U0020>
+% THIN SPACE
+<U2009> <U0020>
+% HAIR SPACE
+<U200A> ""
+% ZERO WIDTH SPACE
+<U200B> ""
+% ZERO WIDTH NON-JOINER
+<U200C> ""
+% ZERO WIDTH JOINER
+<U200D> ""
+% LEFT-TO-RIGHT MARK
+<U200E> ""
+% RIGHT-TO-LEFT MARK
+<U200F> ""
+% HYPHEN
+<U2010> <U002D>
+% NON-BREAKING HYPHEN
+<U2011> <U002D>
+% FIGURE DASH
+<U2012> <U002D>
+% EN DASH
+<U2013> <U002D>
+% EM DASH
+<U2014> "<U002D><U002D>"
+% HORIZONTAL BAR
+<U2015> "<U002D><U002D>"
+% DOUBLE VERTICAL LINE
+<U2016> "<U007C><U007C>"
+% DOUBLE LOW LINE
+<U2017> <U005F>
+% LEFT SINGLE QUOTATION MARK
+<U2018> <U0027>
+% RIGHT SINGLE QUOTATION MARK
+<U2019> <U0027>
+% SINGLE LOW-9 QUOTATION MARK
+<U201A> <U0027>
+% SINGLE HIGH-REVERSED-9 QUOTATION MARK
+<U201B> <U0027>
+% LEFT DOUBLE QUOTATION MARK
+<U201C> <U0022>
+% RIGHT DOUBLE QUOTATION MARK
+<U201D> <U0022>
+% DOUBLE LOW-9 QUOTATION MARK
+<U201E> <U0022>
+% DOUBLE HIGH-REVERSED-9 QUOTATION MARK
+<U201F> <U0022>
+% DAGGER
+<U2020> <U002B>
+% DOUBLE DAGGER
+<U2021> "<U002B><U002B>"
+% BULLET
+<U2022> <U006F>
+% TRIANGULAR BULLET
+<U2023> <U003E>
+% ONE DOT LEADER
+<U2024> <U002E>
+% TWO DOT LEADER
+<U2025> "<U002E><U002E>"
+% HORIZONTAL ELLIPSIS
+<U2026> "<U002E><U002E><U002E>"
+% HYPHENATION POINT
+<U2027> <U002D>
+% LEFT-TO-RIGHT EMBEDDING
+<U202A> ""
+% RIGHT-TO-LEFT EMBEDDING
+<U202B> ""
+% POP DIRECTIONAL FORMATTING
+<U202C> ""
+% LEFT-TO-RIGHT OVERRIDE
+<U202D> ""
+% RIGHT-TO-LEFT OVERRIDE
+<U202E> ""
+% NARROW NO-BREAK SPACE
+<U202F> <U0020>
+% PER MILLE SIGN
+<U2030> "<U0020><U0030><U002F><U0030><U0030>"
+% PRIME
+<U2032> <U0027>
+% DOUBLE PRIME
+<U2033> <U0022>
+% TRIPLE PRIME
+<U2034> "<U0027><U0027><U0027>"
+% REVERSED PRIME
+<U2035> <U0060>
+% REVERSED DOUBLE PRIME
+<U2036> "<U0060><U0060>"
+% REVERSED TRIPLE PRIME
+<U2037> "<U0060><U0060><U0060>"
+% SINGLE LEFT-POINTING ANGLE QUOTATION MARK
+<U2039> <U003C>
+% SINGLE RIGHT-POINTING ANGLE QUOTATION MARK
+<U203A> <U003E>
+% DOUBLE EXCLAMATION MARK
+<U203C> "<U0021><U0021>"
+% OVERLINE
+<U203E> <U002D>
+% HYPHEN BULLET
+<U2043> <U002D>
+% FRACTION SLASH
+<U2044> <U002F>
+% QUESTION EXCLAMATION MARK
+<U2048> "<U003F><U0021>"
+% EXCLAMATION QUESTION MARK
+<U2049> "<U0021><U003F>"
+% TIRONIAN SIGN ET
+<U204A> <U0037>
+% SUPERSCRIPT ZERO
+<U2070> "<U005E><U0030>";<U0030>
+% SUPERSCRIPT FOUR
+<U2074> "<U005E><U0034>";<U0034>
+% SUPERSCRIPT FIVE
+<U2075> "<U005E><U0035>";<U0035>
+% SUPERSCRIPT SIX
+<U2076> "<U005E><U0036>";<U0036>
+% SUPERSCRIPT SEVEN
+<U2077> "<U005E><U0037>";<U0037>
+% SUPERSCRIPT EIGHT
+<U2078> "<U005E><U0038>";<U0038>
+% SUPERSCRIPT NINE
+<U2079> "<U005E><U0039>";<U0039>
+% SUPERSCRIPT PLUS SIGN
+<U207A> "<U005E><U002B>";<U002B>
+% SUPERSCRIPT MINUS
+<U207B> "<U005E><U002D>";<U002D>
+% SUPERSCRIPT EQUALS SIGN
+<U207C> "<U005E><U003D>";<U003D>
+% SUPERSCRIPT LEFT PARENTHESIS
+<U207D> "<U005E><U0028>";<U0028>
+% SUPERSCRIPT RIGHT PARENTHESIS
+<U207E> "<U005E><U0029>";<U0029>
+% SUPERSCRIPT LATIN SMALL LETTER N
+<U207F> "<U005E><U006E>";<U006E>
+% SUBSCRIPT ZERO
+<U2080> "<U005F><U0030>";<U0030>
+% SUBSCRIPT ONE
+<U2081> "<U005F><U0031>";<U0031>
+% SUBSCRIPT TWO
+<U2082> "<U005F><U0032>";<U0032>
+% SUBSCRIPT THREE
+<U2083> "<U005F><U0033>";<U0033>
+% SUBSCRIPT FOUR
+<U2084> "<U005F><U0034>";<U0034>
+% SUBSCRIPT FIVE
+<U2085> "<U005F><U0035>";<U0035>
+% SUBSCRIPT SIX
+<U2086> "<U005F><U0036>";<U0036>
+% SUBSCRIPT SEVEN
+<U2087> "<U005F><U0037>";<U0037>
+% SUBSCRIPT EIGHT
+<U2088> "<U005F><U0038>";<U0038>
+% SUBSCRIPT NINE
+<U2089> "<U005F><U0039>";<U0039>
+% SUBSCRIPT PLUS SIGN
+<U208A> "<U005F><U002B>";<U002B>
+% SUBSCRIPT MINUS
+<U208B> "<U005F><U002D>";<U002D>
+% SUBSCRIPT EQUALS SIGN
+<U208C> "<U005F><U003D>";<U003D>
+% SUBSCRIPT LEFT PARENTHESIS
+<U208D> "<U005F><U0028>";<U0028>
+% SUBSCRIPT RIGHT PARENTHESIS
+<U208E> "<U005F><U0029>";<U0029>
+% EURO SIGN
+<U20AC> "<U0045><U0055><U0052>";<U0045>
+% ACCOUNT OF
+<U2100> "<U0061><U002F><U0063>"
+% ADDRESSED TO THE SUBJECT
+<U2101> "<U0061><U002F><U0073>"
+% DEGREE CELSIUS
+<U2103> "<U00B0><U0043>";<U0043>
+% CARE OF
+<U2105> "<U0063><U002F><U006F>"
+% CADA UNA
+<U2106> "<U0063><U002F><U0075>"
+% DEGREE FAHRENHEIT
+<U2109> "<U00B0><U0046>";<U0046>
+% SCRIPT SMALL L
+<U2113> <U006C>
+% NUMERO SIGN
+<U2116> "<U004E><U00BA>";"<U004E><U006F>"
+% SOUND RECORDING COPYRIGHT
+<U2117> "<U0028><U0050><U0029>"
+% SERVICE MARK
+<U2120> "<U005B><U0053><U004D><U005D>"
+% TELEPHONE SIGN
+<U2121> "<U0054><U0045><U004C>"
+% TRADE MARK SIGN
+<U2122> "<U005B><U0054><U004D><U005D>"
+% OHM SIGN
+<U2126> <U03A9>;"<U006F><U0068><U006D>";<U004F>
+% KELVIN SIGN
+<U212A> <U004B>
+% ANGSTROM SIGN
+<U212B> <U00C5>
+% ESTIMATED SYMBOL
+<U212E> <U0065>
+% VULGAR FRACTION ONE THIRD
+<U2153> "<U0020><U0031><U002F><U0033>"
+% VULGAR FRACTION TWO THIRDS
+<U2154> "<U0020><U0032><U002F><U0033>"
+% VULGAR FRACTION ONE FIFTH
+<U2155> "<U0020><U0031><U002F><U0035>"
+% VULGAR FRACTION TWO FIFTHS
+<U2156> "<U0020><U0032><U002F><U0035>"
+% VULGAR FRACTION THREE FIFTHS
+<U2157> "<U0020><U0033><U002F><U0035>"
+% VULGAR FRACTION FOUR FIFTHS
+<U2158> "<U0020><U0034><U002F><U0035>"
+% VULGAR FRACTION ONE SIXTH
+<U2159> "<U0020><U0031><U002F><U0036>"
+% VULGAR FRACTION FIVE SIXTHS
+<U215A> "<U0020><U0035><U002F><U0036>"
+% VULGAR FRACTION ONE EIGHTH
+<U215B> "<U0020><U0031><U002F><U0038>"
+% VULGAR FRACTION THREE EIGHTHS
+<U215C> "<U0020><U0033><U002F><U0038>"
+% VULGAR FRACTION FIVE EIGHTHS
+<U215D> "<U0020><U0035><U002F><U0038>"
+% VULGAR FRACTION SEVEN EIGHTHS
+<U215E> "<U0020><U0037><U002F><U0038>"
+% FRACTION NUMERATOR ONE
+<U215F> "<U0020><U0031><U002F>"
+% ROMAN NUMERAL ONE
+<U2160> <U0049>
+% ROMAN NUMERAL TWO
+<U2161> "<U0049><U0049>"
+% ROMAN NUMERAL THREE
+<U2162> "<U0049><U0049><U0049>"
+% ROMAN NUMERAL FOUR
+<U2163> "<U0049><U0056>"
+% ROMAN NUMERAL FIVE
+<U2164> <U0056>
+% ROMAN NUMERAL SIX
+<U2165> "<U0056><U0049>"
+% ROMAN NUMERAL SEVEN
+<U2166> "<U0056><U0049><U0049>"
+% ROMAN NUMERAL EIGHT
+<U2167> "<U0056><U0049><U0049><U0049>"
+% ROMAN NUMERAL NINE
+<U2168> "<U0049><U0058>"
+% ROMAN NUMERAL TEN
+<U2169> <U0058>
+% ROMAN NUMERAL ELEVEN
+<U216A> "<U0058><U0049>"
+% ROMAN NUMERAL TWELVE
+<U216B> "<U0058><U0049><U0049>"
+% ROMAN NUMERAL FIFTY
+<U216C> <U004C>
+% ROMAN NUMERAL ONE HUNDRED
+<U216D> <U0043>
+% ROMAN NUMERAL FIVE HUNDRED
+<U216E> <U0044>
+% ROMAN NUMERAL ONE THOUSAND
+<U216F> <U004D>
+% SMALL ROMAN NUMERAL ONE
+<U2170> <U0069>
+% SMALL ROMAN NUMERAL TWO
+<U2171> "<U0069><U0069>"
+% SMALL ROMAN NUMERAL THREE
+<U2172> "<U0069><U0069><U0069>"
+% SMALL ROMAN NUMERAL FOUR
+<U2173> "<U0069><U0076>"
+% SMALL ROMAN NUMERAL FIVE
+<U2174> <U0076>
+% SMALL ROMAN NUMERAL SIX
+<U2175> "<U0076><U0069>"
+% SMALL ROMAN NUMERAL SEVEN
+<U2176> "<U0076><U0069><U0069>"
+% SMALL ROMAN NUMERAL EIGHT
+<U2177> "<U0076><U0069><U0069><U0069>"
+% SMALL ROMAN NUMERAL NINE
+<U2178> "<U0069><U0078>"
+% SMALL ROMAN NUMERAL TEN
+<U2179> <U0078>
+% SMALL ROMAN NUMERAL ELEVEN
+<U217A> "<U0078><U0069>"
+% SMALL ROMAN NUMERAL TWELVE
+<U217B> "<U0078><U0069><U0069>"
+% SMALL ROMAN NUMERAL FIFTY
+<U217C> <U006C>
+% SMALL ROMAN NUMERAL ONE HUNDRED
+<U217D> <U0063>
+% SMALL ROMAN NUMERAL FIVE HUNDRED
+<U217E> <U0064>
+% SMALL ROMAN NUMERAL ONE THOUSAND
+<U217F> <U006D>
+% LEFTWARDS ARROW
+<U2190> "<U003C><U002D>"
+% UPWARDS ARROW
+<U2191> <U005E>
+% RIGHTWARDS ARROW
+<U2192> "<U002D><U003E>"
+% DOWNWARDS ARROW
+<U2193> <U0076>
+% LEFT RIGHT ARROW
+<U2194> "<U003C><U002D><U003E>"
+% LEFTWARDS DOUBLE ARROW
+<U21D0> "<U003C><U003D>"
+% RIGHTWARDS DOUBLE ARROW
+<U21D2> "<U003D><U003E>"
+% LEFT RIGHT DOUBLE ARROW
+<U21D4> "<U003C><U003D><U003E>"
+% MINUS SIGN
+<U2212> <U2013>;<U002D>
+% DIVISION SLASH
+<U2215> <U002F>
+% SET MINUS
+<U2216> <U005C>
+% ASTERISK OPERATOR
+<U2217> <U002A>
+% RING OPERATOR
+<U2218> <U006F>
+% BULLET OPERATOR
+<U2219> <U00B7>
+% INFINITY
+<U221E> "<U0069><U006E><U0066>"
+% DIVIDES
+<U2223> <U007C>
+% PARALLEL TO
+<U2225> "<U007C><U007C>"
+% RATIO
+<U2236> <U003A>
+% TILDE OPERATOR
+<U223C> <U007E>
+% NOT EQUAL TO
+<U2260> "<U002F><U003D>"
+% IDENTICAL TO
+<U2261> <U003D>
+% LESS-THAN OR EQUAL TO
+<U2264> "<U003C><U003D>"
+% GREATER-THAN OR EQUAL TO
+<U2265> "<U003E><U003D>"
+% MUCH LESS-THAN
+<U226A> "<U003C><U003C>"
+% MUCH GREATER-THAN
+<U226B> "<U003E><U003E>"
+% CIRCLED PLUS
+<U2295> "<U0028><U002B><U0029>"
+% CIRCLED MINUS
+<U2296> "<U0028><U002D><U0029>"
+% CIRCLED TIMES
+<U2297> "<U0028><U0078><U0029>"
+% CIRCLED DIVISION SLASH
+<U2298> "<U0028><U002F><U0029>"
+% RIGHT TACK
+<U22A2> "<U007C><U002D>"
+% LEFT TACK
+<U22A3> "<U002D><U007C>"
+% ASSERTION
+<U22A6> "<U007C><U002D>"
+% MODELS
+<U22A7> "<U007C><U003D>"
+% TRUE
+<U22A8> "<U007C><U003D>"
+% FORCES
+<U22A9> "<U007C><U007C><U002D>"
+% DOT OPERATOR
+<U22C5> <U00B7>
+% STAR OPERATOR
+<U22C6> <U002A>
+% EQUAL AND PARALLEL TO
+<U22D5> <U0023>
+% VERY MUCH LESS-THAN
+<U22D8> "<U003C><U003C><U003C>"
+% VERY MUCH GREATER-THAN
+<U22D9> "<U003E><U003E><U003E>"
+% MIDLINE HORIZONTAL ELLIPSIS
+<U22EF> "<U002E><U002E><U002E>"
+% LEFT-POINTING ANGLE BRACKET
+<U2329> <U003C>
+% RIGHT-POINTING ANGLE BRACKET
+<U232A> <U003E>
+% SYMBOL FOR NULL
+<U2400> "<U004E><U0055><U004C>"
+% SYMBOL FOR START OF HEADING
+<U2401> "<U0053><U004F><U0048>"
+% SYMBOL FOR START OF TEXT
+<U2402> "<U0053><U0054><U0058>"
+% SYMBOL FOR END OF TEXT
+<U2403> "<U0045><U0054><U0058>"
+% SYMBOL FOR END OF TRANSMISSION
+<U2404> "<U0045><U004F><U0054>"
+% SYMBOL FOR ENQUIRY
+<U2405> "<U0045><U004E><U0051>"
+% SYMBOL FOR ACKNOWLEDGE
+<U2406> "<U0041><U0043><U004B>"
+% SYMBOL FOR BELL
+<U2407> "<U0042><U0045><U004C>"
+% SYMBOL FOR BACKSPACE
+<U2408> "<U0042><U0053>"
+% SYMBOL FOR HORIZONTAL TABULATION
+<U2409> "<U0048><U0054>"
+% SYMBOL FOR LINE FEED
+<U240A> "<U004C><U0046>"
+% SYMBOL FOR VERTICAL TABULATION
+<U240B> "<U0056><U0054>"
+% SYMBOL FOR FORM FEED
+<U240C> "<U0046><U0046>"
+% SYMBOL FOR CARRIAGE RETURN
+<U240D> "<U0043><U0052>"
+% SYMBOL FOR SHIFT OUT
+<U240E> "<U0053><U004F>"
+% SYMBOL FOR SHIFT IN
+<U240F> "<U0053><U0049>"
+% SYMBOL FOR DATA LINK ESCAPE
+<U2410> "<U0044><U004C><U0045>"
+% SYMBOL FOR DEVICE CONTROL ONE
+<U2411> "<U0044><U0043><U0031>"
+% SYMBOL FOR DEVICE CONTROL TWO
+<U2412> "<U0044><U0043><U0032>"
+% SYMBOL FOR DEVICE CONTROL THREE
+<U2413> "<U0044><U0043><U0033>"
+% SYMBOL FOR DEVICE CONTROL FOUR
+<U2414> "<U0044><U0043><U0034>"
+% SYMBOL FOR NEGATIVE ACKNOWLEDGE
+<U2415> "<U004E><U0041><U004B>"
+% SYMBOL FOR SYNCHRONOUS IDLE
+<U2416> "<U0053><U0059><U004E>"
+% SYMBOL FOR END OF TRANSMISSION BLOCK
+<U2417> "<U0045><U0054><U0042>"
+% SYMBOL FOR CANCEL
+<U2418> "<U0043><U0041><U004E>"
+% SYMBOL FOR END OF MEDIUM
+<U2419> "<U0045><U004D>"
+% SYMBOL FOR SUBSTITUTE
+<U241A> "<U0053><U0055><U0042>"
+% SYMBOL FOR ESCAPE
+<U241B> "<U0045><U0053><U0043>"
+% SYMBOL FOR FILE SEPARATOR
+<U241C> "<U0046><U0053>"
+% SYMBOL FOR GROUP SEPARATOR
+<U241D> "<U0047><U0053>"
+% SYMBOL FOR RECORD SEPARATOR
+<U241E> "<U0052><U0053>"
+% SYMBOL FOR UNIT SEPARATOR
+<U241F> "<U0055><U0053>"
+% SYMBOL FOR SPACE
+<U2420> "<U0053><U0050>"
+% SYMBOL FOR DELETE
+<U2421> "<U0044><U0045><U004C>"
+% OPEN BOX
+<U2423> <U005F>
+% SYMBOL FOR NEWLINE
+<U2424> "<U004E><U004C>"
+% SYMBOL FOR DELETE FORM TWO
+<U2425> "<U002F><U002F><U002F>"
+% SYMBOL FOR SUBSTITUTE FORM TWO
+<U2426> <U003F>
+% CIRCLED DIGIT ONE
+<U2460> "<U0028><U0031><U0029>";<U0031>
+% CIRCLED DIGIT TWO
+<U2461> "<U0028><U0032><U0029>";<U0032>
+% CIRCLED DIGIT THREE
+<U2462> "<U0028><U0033><U0029>";<U0033>
+% CIRCLED DIGIT FOUR
+<U2463> "<U0028><U0034><U0029>";<U0034>
+% CIRCLED DIGIT FIVE
+<U2464> "<U0028><U0035><U0029>";<U0035>
+% CIRCLED DIGIT SIX
+<U2465> "<U0028><U0036><U0029>";<U0036>
+% CIRCLED DIGIT SEVEN
+<U2466> "<U0028><U0037><U0029>";<U0037>
+% CIRCLED DIGIT EIGHT
+<U2467> "<U0028><U0038><U0029>";<U0038>
+% CIRCLED DIGIT NINE
+<U2468> "<U0028><U0039><U0029>";<U0039>
+% CIRCLED NUMBER TEN
+<U2469> "<U0028><U0031><U0030><U0029>"
+% CIRCLED NUMBER ELEVEN
+<U246A> "<U0028><U0031><U0031><U0029>"
+% CIRCLED NUMBER TWELVE
+<U246B> "<U0028><U0031><U0032><U0029>"
+% CIRCLED NUMBER THIRTEEN
+<U246C> "<U0028><U0031><U0033><U0029>"
+% CIRCLED NUMBER FOURTEEN
+<U246D> "<U0028><U0031><U0034><U0029>"
+% CIRCLED NUMBER FIFTEEN
+<U246E> "<U0028><U0031><U0035><U0029>"
+% CIRCLED NUMBER SIXTEEN
+<U246F> "<U0028><U0031><U0036><U0029>"
+% CIRCLED NUMBER SEVENTEEN
+<U2470> "<U0028><U0031><U0037><U0029>"
+% CIRCLED NUMBER EIGHTEEN
+<U2471> "<U0028><U0031><U0038><U0029>"
+% CIRCLED NUMBER NINETEEN
+<U2472> "<U0028><U0031><U0039><U0029>"
+% CIRCLED NUMBER TWENTY
+<U2473> "<U0028><U0032><U0030><U0029>"
+% PARENTHESIZED DIGIT ONE
+<U2474> "<U0028><U0031><U0029>";<U0031>
+% PARENTHESIZED DIGIT TWO
+<U2475> "<U0028><U0032><U0029>";<U0032>
+% PARENTHESIZED DIGIT THREE
+<U2476> "<U0028><U0033><U0029>";<U0033>
+% PARENTHESIZED DIGIT FOUR
+<U2477> "<U0028><U0034><U0029>";<U0034>
+% PARENTHESIZED DIGIT FIVE
+<U2478> "<U0028><U0035><U0029>";<U0035>
+% PARENTHESIZED DIGIT SIX
+<U2479> "<U0028><U0036><U0029>";<U0036>
+% PARENTHESIZED DIGIT SEVEN
+<U247A> "<U0028><U0037><U0029>";<U0037>
+% PARENTHESIZED DIGIT EIGHT
+<U247B> "<U0028><U0038><U0029>";<U0038>
+% PARENTHESIZED DIGIT NINE
+<U247C> "<U0028><U0039><U0029>";<U0039>
+% PARENTHESIZED NUMBER TEN
+<U247D> "<U0028><U0031><U0030><U0029>"
+% PARENTHESIZED NUMBER ELEVEN
+<U247E> "<U0028><U0031><U0031><U0029>"
+% PARENTHESIZED NUMBER TWELVE
+<U247F> "<U0028><U0031><U0032><U0029>"
+% PARENTHESIZED NUMBER THIRTEEN
+<U2480> "<U0028><U0031><U0033><U0029>"
+% PARENTHESIZED NUMBER FOURTEEN
+<U2481> "<U0028><U0031><U0034><U0029>"
+% PARENTHESIZED NUMBER FIFTEEN
+<U2482> "<U0028><U0031><U0035><U0029>"
+% PARENTHESIZED NUMBER SIXTEEN
+<U2483> "<U0028><U0031><U0036><U0029>"
+% PARENTHESIZED NUMBER SEVENTEEN
+<U2484> "<U0028><U0031><U0037><U0029>"
+% PARENTHESIZED NUMBER EIGHTEEN
+<U2485> "<U0028><U0031><U0038><U0029>"
+% PARENTHESIZED NUMBER NINETEEN
+<U2486> "<U0028><U0031><U0039><U0029>"
+% PARENTHESIZED NUMBER TWENTY
+<U2487> "<U0028><U0032><U0030><U0029>"
+% DIGIT ONE FULL STOP
+<U2488> "<U0031><U002E>";<U0031>
+% DIGIT TWO FULL STOP
+<U2489> "<U0032><U002E>";<U0032>
+% DIGIT THREE FULL STOP
+<U248A> "<U0033><U002E>";<U0033>
+% DIGIT FOUR FULL STOP
+<U248B> "<U0034><U002E>";<U0034>
+% DIGIT FIVE FULL STOP
+<U248C> "<U0035><U002E>";<U0035>
+% DIGIT SIX FULL STOP
+<U248D> "<U0036><U002E>";<U0036>
+% DIGIT SEVEN FULL STOP
+<U248E> "<U0037><U002E>";<U0037>
+% DIGIT EIGHT FULL STOP
+<U248F> "<U0038><U002E>";<U0038>
+% DIGIT NINE FULL STOP
+<U2490> "<U0039><U002E>";<U0039>
+% NUMBER TEN FULL STOP
+<U2491> "<U0031><U0030><U002E>"
+% NUMBER ELEVEN FULL STOP
+<U2492> "<U0031><U0031><U002E>"
+% NUMBER TWELVE FULL STOP
+<U2493> "<U0031><U0032><U002E>"
+% NUMBER THIRTEEN FULL STOP
+<U2494> "<U0031><U0033><U002E>"
+% NUMBER FOURTEEN FULL STOP
+<U2495> "<U0031><U0034><U002E>"
+% NUMBER FIFTEEN FULL STOP
+<U2496> "<U0031><U0035><U002E>"
+% NUMBER SIXTEEN FULL STOP
+<U2497> "<U0031><U0036><U002E>"
+% NUMBER SEVENTEEN FULL STOP
+<U2498> "<U0031><U0037><U002E>"
+% NUMBER EIGHTEEN FULL STOP
+<U2499> "<U0031><U0038><U002E>"
+% NUMBER NINETEEN FULL STOP
+<U249A> "<U0031><U0039><U002E>"
+% NUMBER TWENTY FULL STOP
+<U249B> "<U0032><U0030><U002E>"
+% PARENTHESIZED LATIN SMALL LETTER A
+<U249C> "<U0028><U0061><U0029>";<U0061>
+% PARENTHESIZED LATIN SMALL LETTER B
+<U249D> "<U0028><U0062><U0029>";<U0062>
+% PARENTHESIZED LATIN SMALL LETTER C
+<U249E> "<U0028><U0063><U0029>";<U0063>
+% PARENTHESIZED LATIN SMALL LETTER D
+<U249F> "<U0028><U0064><U0029>";<U0064>
+% PARENTHESIZED LATIN SMALL LETTER E
+<U24A0> "<U0028><U0065><U0029>";<U0065>
+% PARENTHESIZED LATIN SMALL LETTER F
+<U24A1> "<U0028><U0066><U0029>";<U0066>
+% PARENTHESIZED LATIN SMALL LETTER G
+<U24A2> "<U0028><U0067><U0029>";<U0067>
+% PARENTHESIZED LATIN SMALL LETTER H
+<U24A3> "<U0028><U0068><U0029>";<U0068>
+% PARENTHESIZED LATIN SMALL LETTER I
+<U24A4> "<U0028><U0069><U0029>";<U0069>
+% PARENTHESIZED LATIN SMALL LETTER J
+<U24A5> "<U0028><U006A><U0029>";<U006A>
+% PARENTHESIZED LATIN SMALL LETTER K
+<U24A6> "<U0028><U006B><U0029>";<U006B>
+% PARENTHESIZED LATIN SMALL LETTER L
+<U24A7> "<U0028><U006C><U0029>";<U006C>
+% PARENTHESIZED LATIN SMALL LETTER M
+<U24A8> "<U0028><U006D><U0029>";<U006D>
+% PARENTHESIZED LATIN SMALL LETTER N
+<U24A9> "<U0028><U006E><U0029>";<U006E>
+% PARENTHESIZED LATIN SMALL LETTER O
+<U24AA> "<U0028><U006F><U0029>";<U006F>
+% PARENTHESIZED LATIN SMALL LETTER P
+<U24AB> "<U0028><U0070><U0029>";<U0070>
+% PARENTHESIZED LATIN SMALL LETTER Q
+<U24AC> "<U0028><U0071><U0029>";<U0071>
+% PARENTHESIZED LATIN SMALL LETTER R
+<U24AD> "<U0028><U0072><U0029>";<U0072>
+% PARENTHESIZED LATIN SMALL LETTER S
+<U24AE> "<U0028><U0073><U0029>";<U0073>
+% PARENTHESIZED LATIN SMALL LETTER T
+<U24AF> "<U0028><U0074><U0029>";<U0074>
+% PARENTHESIZED LATIN SMALL LETTER U
+<U24B0> "<U0028><U0075><U0029>";<U0075>
+% PARENTHESIZED LATIN SMALL LETTER V
+<U24B1> "<U0028><U0076><U0029>";<U0076>
+% PARENTHESIZED LATIN SMALL LETTER W
+<U24B2> "<U0028><U0077><U0029>";<U0077>
+% PARENTHESIZED LATIN SMALL LETTER X
+<U24B3> "<U0028><U0078><U0029>";<U0078>
+% PARENTHESIZED LATIN SMALL LETTER Y
+<U24B4> "<U0028><U0079><U0029>";<U0079>
+% PARENTHESIZED LATIN SMALL LETTER Z
+<U24B5> "<U0028><U007A><U0029>";<U007A>
+% CIRCLED LATIN CAPITAL LETTER A
+<U24B6> "<U0028><U0041><U0029>";<U0041>
+% CIRCLED LATIN CAPITAL LETTER B
+<U24B7> "<U0028><U0042><U0029>";<U0042>
+% CIRCLED LATIN CAPITAL LETTER C
+<U24B8> "<U0028><U0043><U0029>";<U0043>
+% CIRCLED LATIN CAPITAL LETTER D
+<U24B9> "<U0028><U0044><U0029>";<U0044>
+% CIRCLED LATIN CAPITAL LETTER E
+<U24BA> "<U0028><U0045><U0029>";<U0045>
+% CIRCLED LATIN CAPITAL LETTER F
+<U24BB> "<U0028><U0046><U0029>";<U0046>
+% CIRCLED LATIN CAPITAL LETTER G
+<U24BC> "<U0028><U0047><U0029>";<U0047>
+% CIRCLED LATIN CAPITAL LETTER H
+<U24BD> "<U0028><U0048><U0029>";<U0048>
+% CIRCLED LATIN CAPITAL LETTER I
+<U24BE> "<U0028><U0049><U0029>";<U0049>
+% CIRCLED LATIN CAPITAL LETTER J
+<U24BF> "<U0028><U004A><U0029>";<U004A>
+% CIRCLED LATIN CAPITAL LETTER K
+<U24C0> "<U0028><U004B><U0029>";<U004B>
+% CIRCLED LATIN CAPITAL LETTER L
+<U24C1> "<U0028><U004C><U0029>";<U004C>
+% CIRCLED LATIN CAPITAL LETTER M
+<U24C2> "<U0028><U004D><U0029>";<U004D>
+% CIRCLED LATIN CAPITAL LETTER N
+<U24C3> "<U0028><U004E><U0029>";<U004E>
+% CIRCLED LATIN CAPITAL LETTER O
+<U24C4> "<U0028><U004F><U0029>";<U004F>
+% CIRCLED LATIN CAPITAL LETTER P
+<U24C5> "<U0028><U0050><U0029>";<U0050>
+% CIRCLED LATIN CAPITAL LETTER Q
+<U24C6> "<U0028><U0051><U0029>";<U0051>
+% CIRCLED LATIN CAPITAL LETTER R
+<U24C7> "<U0028><U0052><U0029>";<U0052>
+% CIRCLED LATIN CAPITAL LETTER S
+<U24C8> "<U0028><U0053><U0029>";<U0053>
+% CIRCLED LATIN CAPITAL LETTER T
+<U24C9> "<U0028><U0054><U0029>";<U0054>
+% CIRCLED LATIN CAPITAL LETTER U
+<U24CA> "<U0028><U0055><U0029>";<U0055>
+% CIRCLED LATIN CAPITAL LETTER V
+<U24CB> "<U0028><U0056><U0029>";<U0056>
+% CIRCLED LATIN CAPITAL LETTER W
+<U24CC> "<U0028><U0057><U0029>";<U0057>
+% CIRCLED LATIN CAPITAL LETTER X
+<U24CD> "<U0028><U0058><U0029>";<U0058>
+% CIRCLED LATIN CAPITAL LETTER Y
+<U24CE> "<U0028><U0059><U0029>";<U0059>
+% CIRCLED LATIN CAPITAL LETTER Z
+<U24CF> "<U0028><U005A><U0029>";<U005A>
+% CIRCLED LATIN SMALL LETTER A
+<U24D0> "<U0028><U0061><U0029>";<U0061>
+% CIRCLED LATIN SMALL LETTER B
+<U24D1> "<U0028><U0062><U0029>";<U0062>
+% CIRCLED LATIN SMALL LETTER C
+<U24D2> "<U0028><U0063><U0029>";<U0063>
+% CIRCLED LATIN SMALL LETTER D
+<U24D3> "<U0028><U0064><U0029>";<U0064>
+% CIRCLED LATIN SMALL LETTER E
+<U24D4> "<U0028><U0065><U0029>";<U0065>
+% CIRCLED LATIN SMALL LETTER F
+<U24D5> "<U0028><U0066><U0029>";<U0066>
+% CIRCLED LATIN SMALL LETTER G
+<U24D6> "<U0028><U0067><U0029>";<U0067>
+% CIRCLED LATIN SMALL LETTER H
+<U24D7> "<U0028><U0068><U0029>";<U0068>
+% CIRCLED LATIN SMALL LETTER I
+<U24D8> "<U0028><U0069><U0029>";<U0069>
+% CIRCLED LATIN SMALL LETTER J
+<U24D9> "<U0028><U006A><U0029>";<U006A>
+% CIRCLED LATIN SMALL LETTER K
+<U24DA> "<U0028><U006B><U0029>";<U006B>
+% CIRCLED LATIN SMALL LETTER L
+<U24DB> "<U0028><U006C><U0029>";<U006C>
+% CIRCLED LATIN SMALL LETTER M
+<U24DC> "<U0028><U006D><U0029>";<U006D>
+% CIRCLED LATIN SMALL LETTER N
+<U24DD> "<U0028><U006E><U0029>";<U006E>
+% CIRCLED LATIN SMALL LETTER O
+<U24DE> "<U0028><U006F><U0029>";<U006F>
+% CIRCLED LATIN SMALL LETTER P
+<U24DF> "<U0028><U0070><U0029>";<U0070>
+% CIRCLED LATIN SMALL LETTER Q
+<U24E0> "<U0028><U0071><U0029>";<U0071>
+% CIRCLED LATIN SMALL LETTER R
+<U24E1> "<U0028><U0072><U0029>";<U0072>
+% CIRCLED LATIN SMALL LETTER S
+<U24E2> "<U0028><U0073><U0029>";<U0073>
+% CIRCLED LATIN SMALL LETTER T
+<U24E3> "<U0028><U0074><U0029>";<U0074>
+% CIRCLED LATIN SMALL LETTER U
+<U24E4> "<U0028><U0075><U0029>";<U0075>
+% CIRCLED LATIN SMALL LETTER V
+<U24E5> "<U0028><U0076><U0029>";<U0076>
+% CIRCLED LATIN SMALL LETTER W
+<U24E6> "<U0028><U0077><U0029>";<U0077>
+% CIRCLED LATIN SMALL LETTER X
+<U24E7> "<U0028><U0078><U0029>";<U0078>
+% CIRCLED LATIN SMALL LETTER Y
+<U24E8> "<U0028><U0079><U0029>";<U0079>
+% CIRCLED LATIN SMALL LETTER Z
+<U24E9> "<U0028><U007A><U0029>";<U007A>
+% CIRCLED DIGIT ZERO
+<U24EA> "<U0028><U0030><U0029>";<U0030>
+% BOX DRAWINGS LIGHT HORIZONTAL
+<U2500> <U002D>
+% BOX DRAWINGS HEAVY HORIZONTAL
+<U2501> <U003D>
+% BOX DRAWINGS LIGHT VERTICAL
+<U2502> <U007C>
+% BOX DRAWINGS HEAVY VERTICAL
+<U2503> <U007C>
+% BOX DRAWINGS LIGHT TRIPLE DASH HORIZONTAL
+<U2504> <U002D>
+% BOX DRAWINGS HEAVY TRIPLE DASH HORIZONTAL
+<U2505> <U003D>
+% BOX DRAWINGS LIGHT TRIPLE DASH VERTICAL
+<U2506> <U007C>
+% BOX DRAWINGS HEAVY TRIPLE DASH VERTICAL
+<U2507> <U007C>
+% BOX DRAWINGS LIGHT QUADRUPLE DASH HORIZONTAL
+<U2508> <U002D>
+% BOX DRAWINGS HEAVY QUADRUPLE DASH HORIZONTAL
+<U2509> <U003D>
+% BOX DRAWINGS LIGHT QUADRUPLE DASH VERTICAL
+<U250A> <U007C>
+% BOX DRAWINGS HEAVY QUADRUPLE DASH VERTICAL
+<U250B> <U007C>
+% BOX DRAWINGS LIGHT DOWN AND RIGHT
+<U250C> <U002B>
+% BOX DRAWINGS DOWN LIGHT AND RIGHT HEAVY
+<U250D> <U002B>
+% BOX DRAWINGS DOWN HEAVY AND RIGHT LIGHT
+<U250E> <U002B>
+% BOX DRAWINGS HEAVY DOWN AND RIGHT
+<U250F> <U002B>
+% BOX DRAWINGS LIGHT DOWN AND LEFT
+<U2510> <U002B>
+% BOX DRAWINGS DOWN LIGHT AND LEFT HEAVY
+<U2511> <U002B>
+% BOX DRAWINGS DOWN HEAVY AND LEFT LIGHT
+<U2512> <U002B>
+% BOX DRAWINGS HEAVY DOWN AND LEFT
+<U2513> <U002B>
+% BOX DRAWINGS LIGHT UP AND RIGHT
+<U2514> <U002B>
+% BOX DRAWINGS UP LIGHT AND RIGHT HEAVY
+<U2515> <U002B>
+% BOX DRAWINGS UP HEAVY AND RIGHT LIGHT
+<U2516> <U002B>
+% BOX DRAWINGS HEAVY UP AND RIGHT
+<U2517> <U002B>
+% BOX DRAWINGS LIGHT UP AND LEFT
+<U2518> <U002B>
+% BOX DRAWINGS UP LIGHT AND LEFT HEAVY
+<U2519> <U002B>
+% BOX DRAWINGS UP HEAVY AND LEFT LIGHT
+<U251A> <U002B>
+% BOX DRAWINGS HEAVY UP AND LEFT
+<U251B> <U002B>
+% BOX DRAWINGS LIGHT VERTICAL AND RIGHT
+<U251C> <U002B>
+% BOX DRAWINGS VERTICAL LIGHT AND RIGHT HEAVY
+<U251D> <U002B>
+% BOX DRAWINGS UP HEAVY AND RIGHT DOWN LIGHT
+<U251E> <U002B>
+% BOX DRAWINGS DOWN HEAVY AND RIGHT UP LIGHT
+<U251F> <U002B>
+% BOX DRAWINGS VERTICAL HEAVY AND RIGHT LIGHT
+<U2520> <U002B>
+% BOX DRAWINGS DOWN LIGHT AND RIGHT UP HEAVY
+<U2521> <U002B>
+% BOX DRAWINGS UP LIGHT AND RIGHT DOWN HEAVY
+<U2522> <U002B>
+% BOX DRAWINGS HEAVY VERTICAL AND RIGHT
+<U2523> <U002B>
+% BOX DRAWINGS LIGHT VERTICAL AND LEFT
+<U2524> <U002B>
+% BOX DRAWINGS VERTICAL LIGHT AND LEFT HEAVY
+<U2525> <U002B>
+% BOX DRAWINGS UP HEAVY AND LEFT DOWN LIGHT
+<U2526> <U002B>
+% BOX DRAWINGS DOWN HEAVY AND LEFT UP LIGHT
+<U2527> <U002B>
+% BOX DRAWINGS VERTICAL HEAVY AND LEFT LIGHT
+<U2528> <U002B>
+% BOX DRAWINGS DOWN LIGHT AND LEFT UP HEAVY
+<U2529> <U002B>
+% BOX DRAWINGS UP LIGHT AND LEFT DOWN HEAVY
+<U252A> <U002B>
+% BOX DRAWINGS HEAVY VERTICAL AND LEFT
+<U252B> <U002B>
+% BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
+<U252C> <U002B>
+% BOX DRAWINGS LEFT HEAVY AND RIGHT DOWN LIGHT
+<U252D> <U002B>
+% BOX DRAWINGS RIGHT HEAVY AND LEFT DOWN LIGHT
+<U252E> <U002B>
+% BOX DRAWINGS DOWN LIGHT AND HORIZONTAL HEAVY
+<U252F> <U002B>
+% BOX DRAWINGS DOWN HEAVY AND HORIZONTAL LIGHT
+<U2530> <U002B>
+% BOX DRAWINGS RIGHT LIGHT AND LEFT DOWN HEAVY
+<U2531> <U002B>
+% BOX DRAWINGS LEFT LIGHT AND RIGHT DOWN HEAVY
+<U2532> <U002B>
+% BOX DRAWINGS HEAVY DOWN AND HORIZONTAL
+<U2533> <U002B>
+% BOX DRAWINGS LIGHT UP AND HORIZONTAL
+<U2534> <U002B>
+% BOX DRAWINGS LEFT HEAVY AND RIGHT UP LIGHT
+<U2535> <U002B>
+% BOX DRAWINGS RIGHT HEAVY AND LEFT UP LIGHT
+<U2536> <U002B>
+% BOX DRAWINGS UP LIGHT AND HORIZONTAL HEAVY
+<U2537> <U002B>
+% BOX DRAWINGS UP HEAVY AND HORIZONTAL LIGHT
+<U2538> <U002B>
+% BOX DRAWINGS RIGHT LIGHT AND LEFT UP HEAVY
+<U2539> <U002B>
+% BOX DRAWINGS LEFT LIGHT AND RIGHT UP HEAVY
+<U253A> <U002B>
+% BOX DRAWINGS HEAVY UP AND HORIZONTAL
+<U253B> <U002B>
+% BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
+<U253C> <U002B>
+% BOX DRAWINGS LEFT HEAVY AND RIGHT VERTICAL LIGHT
+<U253D> <U002B>
+% BOX DRAWINGS RIGHT HEAVY AND LEFT VERTICAL LIGHT
+<U253E> <U002B>
+% BOX DRAWINGS VERTICAL LIGHT AND HORIZONTAL HEAVY
+<U253F> <U002B>
+% BOX DRAWINGS UP HEAVY AND DOWN HORIZONTAL LIGHT
+<U2540> <U002B>
+% BOX DRAWINGS DOWN HEAVY AND UP HORIZONTAL LIGHT
+<U2541> <U002B>
+% BOX DRAWINGS VERTICAL HEAVY AND HORIZONTAL LIGHT
+<U2542> <U002B>
+% BOX DRAWINGS LEFT UP HEAVY AND RIGHT DOWN LIGHT
+<U2543> <U002B>
+% BOX DRAWINGS RIGHT UP HEAVY AND LEFT DOWN LIGHT
+<U2544> <U002B>
+% BOX DRAWINGS LEFT DOWN HEAVY AND RIGHT UP LIGHT
+<U2545> <U002B>
+% BOX DRAWINGS RIGHT DOWN HEAVY AND LEFT UP LIGHT
+<U2546> <U002B>
+% BOX DRAWINGS DOWN LIGHT AND UP HORIZONTAL HEAVY
+<U2547> <U002B>
+% BOX DRAWINGS UP LIGHT AND DOWN HORIZONTAL HEAVY
+<U2548> <U002B>
+% BOX DRAWINGS RIGHT LIGHT AND LEFT VERTICAL HEAVY
+<U2549> <U002B>
+% BOX DRAWINGS LEFT LIGHT AND RIGHT VERTICAL HEAVY
+<U254A> <U002B>
+% BOX DRAWINGS HEAVY VERTICAL AND HORIZONTAL
+<U254B> <U002B>
+% BOX DRAWINGS LIGHT DOUBLE DASH HORIZONTAL
+<U254C> <U002D>
+% BOX DRAWINGS HEAVY DOUBLE DASH HORIZONTAL
+<U254D> <U003D>
+% BOX DRAWINGS LIGHT DOUBLE DASH VERTICAL
+<U254E> <U007C>
+% BOX DRAWINGS HEAVY DOUBLE DASH VERTICAL
+<U254F> <U007C>
+% BOX DRAWINGS DOUBLE HORIZONTAL
+<U2550> <U003D>
+% BOX DRAWINGS DOUBLE VERTICAL
+<U2551> <U007C>
+% BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
+<U2552> <U002B>
+% BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
+<U2553> <U002B>
+% BOX DRAWINGS DOUBLE DOWN AND RIGHT
+<U2554> <U002B>
+% BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
+<U2555> <U002B>
+% BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
+<U2556> <U002B>
+% BOX DRAWINGS DOUBLE DOWN AND LEFT
+<U2557> <U002B>
+% BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
+<U2558> <U002B>
+% BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
+<U2559> <U002B>
+% BOX DRAWINGS DOUBLE UP AND RIGHT
+<U255A> <U002B>
+% BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
+<U255B> <U002B>
+% BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
+<U255C> <U002B>
+% BOX DRAWINGS DOUBLE UP AND LEFT
+<U255D> <U002B>
+% BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
+<U255E> <U002B>
+% BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
+<U255F> <U002B>
+% BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
+<U2560> <U002B>
+% BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
+<U2561> <U002B>
+% BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
+<U2562> <U002B>
+% BOX DRAWINGS DOUBLE VERTICAL AND LEFT
+<U2563> <U002B>
+% BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
+<U2564> <U002B>
+% BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
+<U2565> <U002B>
+% BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
+<U2566> <U002B>
+% BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
+<U2567> <U002B>
+% BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
+<U2568> <U002B>
+% BOX DRAWINGS DOUBLE UP AND HORIZONTAL
+<U2569> <U002B>
+% BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
+<U256A> <U002B>
+% BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
+<U256B> <U002B>
+% BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
+<U256C> <U002B>
+% BOX DRAWINGS LIGHT ARC DOWN AND RIGHT
+<U256D> <U002B>
+% BOX DRAWINGS LIGHT ARC DOWN AND LEFT
+<U256E> <U002B>
+% BOX DRAWINGS LIGHT ARC UP AND LEFT
+<U256F> <U002B>
+% BOX DRAWINGS LIGHT ARC UP AND RIGHT
+<U2570> <U002B>
+% BOX DRAWINGS LIGHT DIAGONAL UPPER RIGHT TO LOWER LEFT
+<U2571> <U002F>
+% BOX DRAWINGS LIGHT DIAGONAL UPPER LEFT TO LOWER RIGHT
+<U2572> <U005C>
+% BOX DRAWINGS LIGHT DIAGONAL CROSS
+<U2573> <U0058>
+% BOX DRAWINGS LIGHT LEFT AND HEAVY RIGHT
+<U257C> <U002D>
+% BOX DRAWINGS LIGHT UP AND HEAVY DOWN
+<U257D> <U007C>
+% BOX DRAWINGS HEAVY LEFT AND LIGHT RIGHT
+<U257E> <U002D>
+% BOX DRAWINGS HEAVY UP AND LIGHT DOWN
+<U257F> <U007C>
+% WHITE CIRCLE
+<U25CB> <U006F>
+% WHITE BULLET
+<U25E6> <U006F>
+% BLACK STAR
+<U2605> <U002A>
+% WHITE STAR
+<U2606> <U002A>
+% BALLOT BOX WITH X
+<U2612> <U0058>
+% SALTIRE
+<U2613> <U0058>
+% WHITE FROWNING FACE
+<U2639> "<U003A><U002D><U0028>"
+% WHITE SMILING FACE
+<U263A> "<U003A><U002D><U0029>"
+% BLACK SMILING FACE
+<U263B> "<U0028><U002D><U003A>"
+% MUSIC FLAT SIGN
+<U266D> <U0062>
+% MUSIC SHARP SIGN
+<U266F> <U0023>
+% UPPER BLADE SCISSORS
+<U2701> "<U0025><U003C>"
+% BLACK SCISSORS
+<U2702> "<U0025><U003C>"
+% LOWER BLADE SCISSORS
+<U2703> "<U0025><U003C>"
+% WHITE SCISSORS
+<U2704> "<U0025><U003C>"
+% VICTORY HAND
+<U270C> <U0056>
+% CHECK MARK
+<U2713> <U221A>
+% HEAVY CHECK MARK
+<U2714> <U221A>
+% MULTIPLICATION X
+<U2715> <U0078>
+% HEAVY MULTIPLICATION X
+<U2716> <U0078>
+% BALLOT X
+<U2717> <U0058>
+% HEAVY BALLOT X
+<U2718> <U0058>
+% OUTLINED GREEK CROSS
+<U2719> <U002B>
+% HEAVY GREEK CROSS
+<U271A> <U002B>
+% OPEN CENTRE CROSS
+<U271B> <U002B>
+% HEAVY OPEN CENTRE CROSS
+<U271C> <U002B>
+% LATIN CROSS
+<U271D> <U002B>
+% SHADOWED WHITE LATIN CROSS
+<U271E> <U002B>
+% OUTLINED LATIN CROSS
+<U271F> <U002B>
+% MALTESE CROSS
+<U2720> <U002B>
+% STAR OF DAVID
+<U2721> <U002A>
+% FOUR TEARDROP-SPOKED ASTERISK
+<U2722> <U002B>
+% FOUR BALLOON-SPOKED ASTERISK
+<U2723> <U002B>
+% HEAVY FOUR BALLOON-SPOKED ASTERISK
+<U2724> <U002B>
+% FOUR CLUB-SPOKED ASTERISK
+<U2725> <U002B>
+% BLACK FOUR POINTED STAR
+<U2726> <U002B>
+% WHITE FOUR POINTED STAR
+<U2727> <U002B>
+% STRESS OUTLINED WHITE STAR
+<U2729> <U002A>
+% CIRCLED WHITE STAR
+<U272A> <U002A>
+% OPEN CENTRE BLACK STAR
+<U272B> <U002A>
+% BLACK CENTRE WHITE STAR
+<U272C> <U002A>
+% OUTLINED BLACK STAR
+<U272D> <U002A>
+% HEAVY OUTLINED BLACK STAR
+<U272E> <U002A>
+% PINWHEEL STAR
+<U272F> <U002A>
+% SHADOWED WHITE STAR
+<U2730> <U002A>
+% HEAVY ASTERISK
+<U2731> <U002A>
+% OPEN CENTRE ASTERISK
+<U2732> <U002A>
+% EIGHT SPOKED ASTERISK
+<U2733> <U002A>
+% EIGHT POINTED BLACK STAR
+<U2734> <U002A>
+% EIGHT POINTED PINWHEEL STAR
+<U2735> <U002A>
+% SIX POINTED BLACK STAR
+<U2736> <U002A>
+% EIGHT POINTED RECTILINEAR BLACK STAR
+<U2737> <U002A>
+% HEAVY EIGHT POINTED RECTILINEAR BLACK STAR
+<U2738> <U002A>
+% TWELVE POINTED BLACK STAR
+<U2739> <U002A>
+% SIXTEEN POINTED ASTERISK
+<U273A> <U002A>
+% TEARDROP-SPOKED ASTERISK
+<U273B> <U002A>
+% OPEN CENTRE TEARDROP-SPOKED ASTERISK
+<U273C> <U002A>
+% HEAVY TEARDROP-SPOKED ASTERISK
+<U273D> <U002A>
+% SIX PETALLED BLACK AND WHITE FLORETTE
+<U273E> <U002A>
+% BLACK FLORETTE
+<U273F> <U002A>
+% WHITE FLORETTE
+<U2740> <U002A>
+% EIGHT PETALLED OUTLINED BLACK FLORETTE
+<U2741> <U002A>
+% CIRCLED OPEN CENTRE EIGHT POINTED STAR
+<U2742> <U002A>
+% HEAVY TEARDROP-SPOKED PINWHEEL ASTERISK
+<U2743> <U002A>
+% SNOWFLAKE
+<U2744> <U002A>
+% TIGHT TRIFOLIATE SNOWFLAKE
+<U2745> <U002A>
+% HEAVY CHEVRON SNOWFLAKE
+<U2746> <U002A>
+% SPARKLE
+<U2747> <U002A>
+% HEAVY SPARKLE
+<U2748> <U002A>
+% BALLOON-SPOKED ASTERISK
+<U2749> <U002A>
+% EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
+<U274A> <U002A>
+% HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
+<U274B> <U002A>
+% LATIN SMALL LIGATURE FF
+<UFB00> "<U0066><U0066>"
+% LATIN SMALL LIGATURE FI
+<UFB01> "<U0066><U0069>"
+% LATIN SMALL LIGATURE FL
+<UFB02> "<U0066><U006C>"
+% LATIN SMALL LIGATURE FFI
+<UFB03> "<U0066><U0066><U0069>"
+% LATIN SMALL LIGATURE FFL
+<UFB04> "<U0066><U0066><U006C>"
+% LATIN SMALL LIGATURE LONG S T
+<UFB05> "<U017F><U0074>";"<U0073><U0074>"
+% LATIN SMALL LIGATURE ST
+<UFB06> "<U0073><U0074>"
+% ZERO WIDTH NO-BREAK SPACE
+<UFEFF> ""
+% REPLACEMENT CHARACTER
+<UFFFD> <U003F>
diff --git a/utils/tt2code b/utils/tt2code
new file mode 100755
index 000000000..c4dc07820
--- /dev/null
+++ b/utils/tt2code
@@ -0,0 +1,51 @@
+#!/usr/bin/perl -W
+
+print <<END;
+void unicode_transliterate(unsigned int c, char **r)
+{
+ char *s = *r;
+ switch (c) {
+
+END
+
+LINE: while (<>) {
+ chomp;
+ next if m/^%/;
+ next if m/^ *$/;
+
+ m/^<U([0-9A-F]{4})> /g or die "invalid line '$_'";
+ $z = $1;
+ next if (hex($z) < 256);
+
+ SUBST: while (m/\G"?((<U([0-9A-F]{4})>)*)"?;?/g) {
+ next if $& eq '';
+ $m = $1;
+ if ($m eq '') {
+ print "case 0x$z: break;\n";
+ next;
+ }
+ chop $m;
+ @s = split /></, substr $m, 1;
+ foreach $s (@s) {
+ $s = substr $s, 1;
+ next SUBST if 255 < hex($s);
+ }
+
+ print "case 0x$z: ";
+ foreach $s (@s) {
+ print "*s++ = 0x$s; ";
+ }
+ print "break;\n";
+ next LINE;
+ }
+}
+
+print <<END;
+
+default: *s++ = '?'; break;
+ }
+
+ *r = s;
+}
+END
+
diff --git a/utils/utils.c b/utils/utils.c
index ecc31f995..8cd6e1f68 100644
--- a/utils/utils.c
+++ b/utils/utils.c
@@ -117,22 +117,35 @@ char * squash_whitespace(const char * s)
char * tolat1(xmlChar * s)
{
unsigned int length = strlen((char*) s);
- char *d = xcalloc(length + 1, sizeof(char));
+ unsigned int space = length + 100;
+ char *d = xcalloc(space, sizeof(char));
char *d0 = d;
+ char *end = d0 + space - 10;
int u, chars;
while (*s != 0) {
chars = length;
u = xmlGetUTF8Char((unsigned char *) s, &chars);
+ if (chars <= 0) {
+ s += 1;
+ length -= 1;
+ LOG(("UTF-8 error"));
+ continue;
+ }
s += chars;
length -= chars;
if (u == 0x09 || u == 0x0a || u == 0x0d)
- *d = ' ';
+ *d++ = ' ';
else if ((0x20 <= u && u <= 0x7f) || (0xa0 <= u && u <= 0xff))
- *d = u;
- else
- *d = '?';
- d++;
+ *d++ = u;
+ else {
+ unicode_transliterate((unsigned int) u, &d);
+ if (end < d) {
+ space += 100;
+ d0 = xrealloc(d0, space);
+ end = d0 + space - 10;
+ }
+ }
}
*d = 0;
diff --git a/utils/utils.h b/utils/utils.h
index 4feeb34a3..02b927711 100644
--- a/utils/utils.h
+++ b/utils/utils.h
@@ -31,5 +31,6 @@ char *get_host_from_url(char* url);
bool is_dir(const char *path);
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags);
void clean_cookiejar(void);
+void unicode_transliterate(unsigned int c, char **r);
#endif