summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-11-19 18:14:42 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2008-11-19 18:14:42 +0000
commiteaa5a508641a35871452ea3d3413745c58521023 (patch)
tree704e16810c63ad7b2c39245043006b4579c17ba6
parentd22f57165da1f0d63b789bcb36f5c6c2d8461baa (diff)
downloadiconv-eaa5a508641a35871452ea3d3413745c58521023.tar.gz
iconv-eaa5a508641a35871452ea3d3413745c58521023.tar.bz2
Resurrect the sources of the Aliases file generator.
svn path=/trunk/iconv/; revision=5737
-rw-r--r--aliases/Makefile66
-rw-r--r--aliases/data/aliases-bottom43
-rw-r--r--aliases/data/aliases-top10
-rw-r--r--aliases/data/character-sets1868
-rw-r--r--aliases/makealiases.c243
5 files changed, 2230 insertions, 0 deletions
diff --git a/aliases/Makefile b/aliases/Makefile
new file mode 100644
index 0000000..ed19a18
--- /dev/null
+++ b/aliases/Makefile
@@ -0,0 +1,66 @@
+# Child makefile fragment
+#
+# Toolchain is provided by top-level makefile
+#
+# Variables provided by top-level makefile
+#
+# COMPONENT The name of the component
+# EXPORT The location of the export directory
+# TOP The location of the source tree root
+# RELEASEDIR The place to put release objects
+# DEBUGDIR The place to put debug objects
+#
+# do_include Canned command sequence to include a child makefile
+#
+# Variables provided by parent makefile:
+#
+# DIR The name of the directory we're in, relative to $(TOP)
+#
+# Variables we can manipulate:
+#
+# ITEMS_CLEAN The list of items to remove for "make clean"
+# ITEMS_DISTCLEAN The list of items to remove for "make distclean"
+# TARGET_TESTS The list of target names to run for "make test"
+#
+# SOURCES The list of sources to build for $(COMPONENT)
+#
+# Plus anything from the toolchain
+
+# Push parent directory onto the directory stack
+sp := $(sp).x
+dirstack_$(sp) := $(d)
+d := $(DIR)
+
+# Extend toolchain settings
+CFLAGS := $(CFLAGS) -I$(TOP)/src -I$(d)
+
+SRCS_$(d) := makealiases.c
+
+DATA_$(d) := aliases-top character-sets aliases-bottom
+
+ALIASES := $(addprefix $(d), Aliases)
+
+MAKEALIASES := $(addprefix $(d), makealiases)
+
+# Items for top-level makefile to use
+ITEMS_CLEAN := $(ITEMS_CLEAN) $(MAKEALIASES)
+
+ITEMS_DISTCLEAN := $(ITEMS_DISTCLEAN) $(ALIASES)
+
+.PHONY: aliases
+
+# Target for building aliases file
+aliases: $(MAKEALIASES) $(addprefix $(d)data/, $(DATA_$(d)))
+ @$(MAKEALIASES) $(wordlist 2,$(words $^),$^) $(ALIASES)
+
+# Target for building makealiases binary
+$(MAKEALIASES): $(addprefix $(d), $(SRCS_$(d)))
+ @$(CC) $(CCFLAGS) -o $@ $^
+
+# Now include any children we may have
+MAKE_INCLUDES := $(wildcard $(d)*/Makefile)
+$(eval $(foreach INC, $(MAKE_INCLUDES), $(call do_include,$(INC))))
+
+# Finally, pop off the directory stack
+d := $(dirstack_$(sp))
+sp := $(basename $(sp))
diff --git a/aliases/data/aliases-bottom b/aliases/data/aliases-bottom
new file mode 100644
index 0000000..fb68432
--- /dev/null
+++ b/aliases/data/aliases-bottom
@@ -0,0 +1,43 @@
+
+# Additional encodings not defined by IANA
+
+# Arbitrary allocations
+#CP737 3001
+#CP853 3002
+#CP856 3003
+CP874 3004 WINDOWS-874
+#CP922 3005
+#CP1046 3006
+#CP1124 3007
+#CP1125 3008 WINDOWS-1125
+#CP1129 3009
+#CP1133 3010 IBM-CP1133
+#CP1161 3011 IBM-1161 IBM1161 CSIBM1161
+#CP1162 3012 IBM-1162 IBM1162 CSIBM1162
+#CP1163 3013 IBM-1163 IBM1163 CSIBM1163
+#GEORGIAN-ACADEMY 3014
+#GEORGIAN-PS 3015
+#KOI8-RU 3016
+#KOI8-T 3017
+#MACARABIC 3018 X-MAC-ARABIC MAC-ARABIC
+#MACCROATIAN 3019 X-MAC-CROATIAN MAC-CROATIAN
+#MACGREEK 3020 X-MAC-GREEK MAC-GREEK
+#MACHEBREW 3021 X-MAC-HEBREW MAC-HEBREW
+#MACICELAND 3022 X-MAC-ICELAND MAC-ICELAND
+#MACROMANIA 3023 X-MAC-ROMANIA MAC-ROMANIA
+#MACTHAI 3024 X-MAC-THAI MAC-THAI
+#MACTURKISH 3025 X-MAC-TURKISH MAC-TURKISH
+#MULELAO-1 3026
+
+# From Unicode Lib
+ISO-IR-182 4000
+ISO-IR-197 4002
+ISO-2022-JP-1 4008
+MACCYRILLIC 4009 X-MAC-CYRILLIC MAC-CYRILLIC
+MACUKRAINE 4010 X-MAC-UKRAINIAN MAC-UKRAINIAN
+MACCENTRALEUROPE 4011 X-MAC-CENTRALEURROMAN MAC-CENTRALEURROMAN
+JOHAB 4012
+ISO-8859-11 4014 iso-ir-166 ISO_8859-11 ISO8859-11 8859_11
+X-CURRENT 4999 X-SYSTEM
+X-ACORN-LATIN1 5001
+X-ACORN-FUZZY 5002
diff --git a/aliases/data/aliases-top b/aliases/data/aliases-top
new file mode 100644
index 0000000..84bf99c
--- /dev/null
+++ b/aliases/data/aliases-top
@@ -0,0 +1,10 @@
+# > Unicode:Files.Aliases
+# Mapping of character set encoding names to their canonical form
+#
+# Lines starting with a '#' are comments, blank lines are ignored.
+#
+# Based on http://www.iana.org/assignments/character-sets and
+# http://www.iana.org/assignments/ianacharset-mib
+#
+# Canonical Form MIBenum Aliases...
+#
diff --git a/aliases/data/character-sets b/aliases/data/character-sets
new file mode 100644
index 0000000..8cda52d
--- /dev/null
+++ b/aliases/data/character-sets
@@ -0,0 +1,1868 @@
+
+===================================================================
+CHARACTER SETS
+
+(last updated 28 January 2005)
+
+These are the official names for character sets that may be used in
+the Internet and may be referred to in Internet documentation. These
+names are expressed in ANSI_X3.4-1968 which is commonly called
+US-ASCII or simply ASCII. The character set most commonly use in the
+Internet and used especially in protocol standards is US-ASCII, this
+is strongly encouraged. The use of the name US-ASCII is also
+encouraged.
+
+The character set names may be up to 40 characters taken from the
+printable characters of US-ASCII. However, no distinction is made
+between use of upper and lower case letters.
+
+The MIBenum value is a unique value for use in MIBs to identify coded
+character sets.
+
+The value space for MIBenum values has been divided into three
+regions. The first region (3-999) consists of coded character sets
+that have been standardized by some standard setting organization.
+This region is intended for standards that do not have subset
+implementations. The second region (1000-1999) is for the Unicode and
+ISO/IEC 10646 coded character sets together with a specification of a
+(set of) sub-repertoires that may occur. The third region (>1999) is
+intended for vendor specific coded character sets.
+
+ Assigned MIB enum Numbers
+ -------------------------
+ 0-2 Reserved
+ 3-999 Set By Standards Organizations
+ 1000-1999 Unicode / 10646
+ 2000-2999 Vendor
+
+The aliases that start with "cs" have been added for use with the
+IANA-CHARSET-MIB as originally defined in RFC3808, and as currently
+maintained by IANA at http://www/iana.org/assignments/ianacharset-mib.
+Note that the ianacharset-mib needs to be kept in sync with this
+registry. These aliases that start with "cs" contain the standard
+numbers along with suggestive names in order to facilitate applications
+that want to display the names in user interfaces. The "cs" stands
+for character set and is provided for applications that need a lower
+case first letter but want to use mixed case thereafter that cannot
+contain any special characters, such as underbar ("_") and dash ("-").
+
+If the character set is from an ISO standard, its cs alias is the ISO
+standard number or name. If the character set is not from an ISO
+standard, but is registered with ISO (IPSJ/ITSCJ is the current ISO
+Registration Authority), the ISO Registry number is specified as
+ISOnnn followed by letters suggestive of the name or standards number
+of the code set. When a national or international standard is
+revised, the year of revision is added to the cs alias of the new
+character set entry in the IANA Registry in order to distinguish the
+revised character set from the original character set.
+
+
+Character Set Reference
+------------- ---------
+
+Name: ANSI_X3.4-1968 [RFC1345,KXS2]
+MIBenum: 3
+Source: ECMA registry
+Alias: iso-ir-6
+Alias: ANSI_X3.4-1986
+Alias: ISO_646.irv:1991
+Alias: ASCII
+Alias: ISO646-US
+Alias: US-ASCII (preferred MIME name)
+Alias: us
+Alias: IBM367
+Alias: cp367
+Alias: csASCII
+
+Name: ISO-10646-UTF-1
+MIBenum: 27
+Source: Universal Transfer Format (1), this is the multibyte
+ encoding, that subsets ASCII-7. It does not have byte
+ ordering issues.
+Alias: csISO10646UTF1
+
+Name: ISO_646.basic:1983 [RFC1345,KXS2]
+MIBenum: 28
+Source: ECMA registry
+Alias: ref
+Alias: csISO646basic1983
+
+Name: INVARIANT [RFC1345,KXS2]
+MIBenum: 29
+Alias: csINVARIANT
+
+Name: ISO_646.irv:1983 [RFC1345,KXS2]
+MIBenum: 30
+Source: ECMA registry
+Alias: iso-ir-2
+Alias: irv
+Alias: csISO2IntlRefVersion
+
+Name: BS_4730 [RFC1345,KXS2]
+MIBenum: 20
+Source: ECMA registry
+Alias: iso-ir-4
+Alias: ISO646-GB
+Alias: gb
+Alias: uk
+Alias: csISO4UnitedKingdom
+
+Name: NATS-SEFI [RFC1345,KXS2]
+MIBenum: 31
+Source: ECMA registry
+Alias: iso-ir-8-1
+Alias: csNATSSEFI
+
+Name: NATS-SEFI-ADD [RFC1345,KXS2]
+MIBenum: 32
+Source: ECMA registry
+Alias: iso-ir-8-2
+Alias: csNATSSEFIADD
+
+Name: NATS-DANO [RFC1345,KXS2]
+MIBenum: 33
+Source: ECMA registry
+Alias: iso-ir-9-1
+Alias: csNATSDANO
+
+Name: NATS-DANO-ADD [RFC1345,KXS2]
+MIBenum: 34
+Source: ECMA registry
+Alias: iso-ir-9-2
+Alias: csNATSDANOADD
+
+Name: SEN_850200_B [RFC1345,KXS2]
+MIBenum: 35
+Source: ECMA registry
+Alias: iso-ir-10
+Alias: FI
+Alias: ISO646-FI
+Alias: ISO646-SE
+Alias: se
+Alias: csISO10Swedish
+
+Name: SEN_850200_C [RFC1345,KXS2]
+MIBenum: 21
+Source: ECMA registry
+Alias: iso-ir-11
+Alias: ISO646-SE2
+Alias: se2
+Alias: csISO11SwedishForNames
+
+Name: KS_C_5601-1987 [RFC1345,KXS2]
+MIBenum: 36
+Source: ECMA registry
+Alias: iso-ir-149
+Alias: KS_C_5601-1989
+Alias: KSC_5601
+Alias: korean
+Alias: csKSC56011987
+
+Name: ISO-2022-KR (preferred MIME name) [RFC1557,Choi]
+MIBenum: 37
+Source: RFC-1557 (see also KS_C_5601-1987)
+Alias: csISO2022KR
+
+Name: EUC-KR (preferred MIME name) [RFC1557,Choi]
+MIBenum: 38
+Source: RFC-1557 (see also KS_C_5861-1992)
+Alias: csEUCKR
+
+Name: ISO-2022-JP (preferred MIME name) [RFC1468,Murai]
+MIBenum: 39
+Source: RFC-1468 (see also RFC-2237)
+Alias: csISO2022JP
+
+Name: ISO-2022-JP-2 (preferred MIME name) [RFC1554,Ohta]
+MIBenum: 40
+Source: RFC-1554
+Alias: csISO2022JP2
+
+Name: ISO-2022-CN [RFC1922]
+MIBenum: 104
+Source: RFC-1922
+
+Name: ISO-2022-CN-EXT [RFC1922]
+MIBenum: 105
+Source: RFC-1922
+
+Name: JIS_C6220-1969-jp [RFC1345,KXS2]
+MIBenum: 41
+Source: ECMA registry
+Alias: JIS_C6220-1969
+Alias: iso-ir-13
+Alias: katakana
+Alias: x0201-7
+Alias: csISO13JISC6220jp
+
+Name: JIS_C6220-1969-ro [RFC1345,KXS2]
+MIBenum: 42
+Source: ECMA registry
+Alias: iso-ir-14
+Alias: jp
+Alias: ISO646-JP
+Alias: csISO14JISC6220ro
+
+Name: IT [RFC1345,KXS2]
+MIBenum: 22
+Source: ECMA registry
+Alias: iso-ir-15
+Alias: ISO646-IT
+Alias: csISO15Italian
+
+Name: PT [RFC1345,KXS2]
+MIBenum: 43
+Source: ECMA registry
+Alias: iso-ir-16
+Alias: ISO646-PT
+Alias: csISO16Portuguese
+
+Name: ES [RFC1345,KXS2]
+MIBenum: 23
+Source: ECMA registry
+Alias: iso-ir-17
+Alias: ISO646-ES
+Alias: csISO17Spanish
+
+Name: greek7-old [RFC1345,KXS2]
+MIBenum: 44
+Source: ECMA registry
+Alias: iso-ir-18
+Alias: csISO18Greek7Old
+
+Name: latin-greek [RFC1345,KXS2]
+MIBenum: 45
+Source: ECMA registry
+Alias: iso-ir-19
+Alias: csISO19LatinGreek
+
+Name: DIN_66003 [RFC1345,KXS2]
+MIBenum: 24
+Source: ECMA registry
+Alias: iso-ir-21
+Alias: de
+Alias: ISO646-DE
+Alias: csISO21German
+
+Name: NF_Z_62-010_(1973) [RFC1345,KXS2]
+MIBenum: 46
+Source: ECMA registry
+Alias: iso-ir-25
+Alias: ISO646-FR1
+Alias: csISO25French
+
+Name: Latin-greek-1 [RFC1345,KXS2]
+MIBenum: 47
+Source: ECMA registry
+Alias: iso-ir-27
+Alias: csISO27LatinGreek1
+
+Name: ISO_5427 [RFC1345,KXS2]
+MIBenum: 48
+Source: ECMA registry
+Alias: iso-ir-37
+Alias: csISO5427Cyrillic
+
+Name: JIS_C6226-1978 [RFC1345,KXS2]
+MIBenum: 49
+Source: ECMA registry
+Alias: iso-ir-42
+Alias: csISO42JISC62261978
+
+Name: BS_viewdata [RFC1345,KXS2]
+MIBenum: 50
+Source: ECMA registry
+Alias: iso-ir-47
+Alias: csISO47BSViewdata
+
+Name: INIS [RFC1345,KXS2]
+MIBenum: 51
+Source: ECMA registry
+Alias: iso-ir-49
+Alias: csISO49INIS
+
+Name: INIS-8 [RFC1345,KXS2]
+MIBenum: 52
+Source: ECMA registry
+Alias: iso-ir-50
+Alias: csISO50INIS8
+
+Name: INIS-cyrillic [RFC1345,KXS2]
+MIBenum: 53
+Source: ECMA registry
+Alias: iso-ir-51
+Alias: csISO51INISCyrillic
+
+Name: ISO_5427:1981 [RFC1345,KXS2]
+MIBenum: 54
+Source: ECMA registry
+Alias: iso-ir-54
+Alias: ISO5427Cyrillic1981
+
+Name: ISO_5428:1980 [RFC1345,KXS2]
+MIBenum: 55
+Source: ECMA registry
+Alias: iso-ir-55
+Alias: csISO5428Greek
+
+Name: GB_1988-80 [RFC1345,KXS2]
+MIBenum: 56
+Source: ECMA registry
+Alias: iso-ir-57
+Alias: cn
+Alias: ISO646-CN
+Alias: csISO57GB1988
+
+Name: GB_2312-80 [RFC1345,KXS2]
+MIBenum: 57
+Source: ECMA registry
+Alias: iso-ir-58
+Alias: chinese
+Alias: csISO58GB231280
+
+Name: NS_4551-1 [RFC1345,KXS2]
+MIBenum: 25
+Source: ECMA registry
+Alias: iso-ir-60
+Alias: ISO646-NO
+Alias: no
+Alias: csISO60DanishNorwegian
+Alias: csISO60Norwegian1
+
+Name: NS_4551-2 [RFC1345,KXS2]
+MIBenum: 58
+Source: ECMA registry
+Alias: ISO646-NO2
+Alias: iso-ir-61
+Alias: no2
+Alias: csISO61Norwegian2
+
+Name: NF_Z_62-010 [RFC1345,KXS2]
+MIBenum: 26
+Source: ECMA registry
+Alias: iso-ir-69
+Alias: ISO646-FR
+Alias: fr
+Alias: csISO69French
+
+Name: videotex-suppl [RFC1345,KXS2]
+MIBenum: 59
+Source: ECMA registry
+Alias: iso-ir-70
+Alias: csISO70VideotexSupp1
+
+Name: PT2 [RFC1345,KXS2]
+MIBenum: 60
+Source: ECMA registry
+Alias: iso-ir-84
+Alias: ISO646-PT2
+Alias: csISO84Portuguese2
+
+Name: ES2 [RFC1345,KXS2]
+MIBenum: 61
+Source: ECMA registry
+Alias: iso-ir-85
+Alias: ISO646-ES2
+Alias: csISO85Spanish2
+
+Name: MSZ_7795.3 [RFC1345,KXS2]
+MIBenum: 62
+Source: ECMA registry
+Alias: iso-ir-86
+Alias: ISO646-HU
+Alias: hu
+Alias: csISO86Hungarian
+
+Name: JIS_C6226-1983 [RFC1345,KXS2]
+MIBenum: 63
+Source: ECMA registry
+Alias: iso-ir-87
+Alias: x0208
+Alias: JIS_X0208-1983
+Alias: csISO87JISX0208
+
+Name: greek7 [RFC1345,KXS2]
+MIBenum: 64
+Source: ECMA registry
+Alias: iso-ir-88
+Alias: csISO88Greek7
+
+Name: ASMO_449 [RFC1345,KXS2]
+MIBenum: 65
+Source: ECMA registry
+Alias: ISO_9036
+Alias: arabic7
+Alias: iso-ir-89
+Alias: csISO89ASMO449
+
+Name: iso-ir-90 [RFC1345,KXS2]
+MIBenum: 66
+Source: ECMA registry
+Alias: csISO90
+
+Name: JIS_C6229-1984-a [RFC1345,KXS2]
+MIBenum: 67
+Source: ECMA registry
+Alias: iso-ir-91
+Alias: jp-ocr-a
+Alias: csISO91JISC62291984a
+
+Name: JIS_C6229-1984-b [RFC1345,KXS2]
+MIBenum: 68
+Source: ECMA registry
+Alias: iso-ir-92
+Alias: ISO646-JP-OCR-B
+Alias: jp-ocr-b
+Alias: csISO92JISC62991984b
+
+Name: JIS_C6229-1984-b-add [RFC1345,KXS2]
+MIBenum: 69
+Source: ECMA registry
+Alias: iso-ir-93
+Alias: jp-ocr-b-add
+Alias: csISO93JIS62291984badd
+
+Name: JIS_C6229-1984-hand [RFC1345,KXS2]
+MIBenum: 70
+Source: ECMA registry
+Alias: iso-ir-94
+Alias: jp-ocr-hand
+Alias: csISO94JIS62291984hand
+
+Name: JIS_C6229-1984-hand-add [RFC1345,KXS2]
+MIBenum: 71
+Source: ECMA registry
+Alias: iso-ir-95
+Alias: jp-ocr-hand-add
+Alias: csISO95JIS62291984handadd
+
+Name: JIS_C6229-1984-kana [RFC1345,KXS2]
+MIBenum: 72
+Source: ECMA registry
+Alias: iso-ir-96
+Alias: csISO96JISC62291984kana
+
+Name: ISO_2033-1983 [RFC1345,KXS2]
+MIBenum: 73
+Source: ECMA registry
+Alias: iso-ir-98
+Alias: e13b
+Alias: csISO2033
+
+Name: ANSI_X3.110-1983 [RFC1345,KXS2]
+MIBenum: 74
+Source: ECMA registry
+Alias: iso-ir-99
+Alias: CSA_T500-1983
+Alias: NAPLPS
+Alias: csISO99NAPLPS
+
+Name: ISO_8859-1:1987 [RFC1345,KXS2]
+MIBenum: 4
+Source: ECMA registry
+Alias: iso-ir-100
+Alias: ISO_8859-1
+Alias: ISO-8859-1 (preferred MIME name)
+Alias: latin1
+Alias: l1
+Alias: IBM819
+Alias: CP819
+Alias: csISOLatin1
+
+Name: ISO_8859-2:1987 [RFC1345,KXS2]
+MIBenum: 5
+Source: ECMA registry
+Alias: iso-ir-101
+Alias: ISO_8859-2
+Alias: ISO-8859-2 (preferred MIME name)
+Alias: latin2
+Alias: l2
+Alias: csISOLatin2
+
+Name: T.61-7bit [RFC1345,KXS2]
+MIBenum: 75
+Source: ECMA registry
+Alias: iso-ir-102
+Alias: csISO102T617bit
+
+Name: T.61-8bit [RFC1345,KXS2]
+MIBenum: 76
+Alias: T.61
+Source: ECMA registry
+Alias: iso-ir-103
+Alias: csISO103T618bit
+
+Name: ISO_8859-3:1988 [RFC1345,KXS2]
+MIBenum: 6
+Source: ECMA registry
+Alias: iso-ir-109
+Alias: ISO_8859-3
+Alias: ISO-8859-3 (preferred MIME name)
+Alias: latin3
+Alias: l3
+Alias: csISOLatin3
+
+Name: ISO_8859-4:1988 [RFC1345,KXS2]
+MIBenum: 7
+Source: ECMA registry
+Alias: iso-ir-110
+Alias: ISO_8859-4
+Alias: ISO-8859-4 (preferred MIME name)
+Alias: latin4
+Alias: l4
+Alias: csISOLatin4
+
+Name: ECMA-cyrillic
+MIBenum: 77
+Source: ISO registry (formerly ECMA registry)
+ http://www.itscj.ipsj.jp/ISO-IR/111.pdf
+Alias: iso-ir-111
+Alias: KOI8-E
+Alias: csISO111ECMACyrillic
+
+Name: CSA_Z243.4-1985-1 [RFC1345,KXS2]
+MIBenum: 78
+Source: ECMA registry
+Alias: iso-ir-121
+Alias: ISO646-CA
+Alias: csa7-1
+Alias: ca
+Alias: csISO121Canadian1
+
+Name: CSA_Z243.4-1985-2 [RFC1345,KXS2]
+MIBenum: 79
+Source: ECMA registry
+Alias: iso-ir-122
+Alias: ISO646-CA2
+Alias: csa7-2
+Alias: csISO122Canadian2
+
+Name: CSA_Z243.4-1985-gr [RFC1345,KXS2]
+MIBenum: 80
+Source: ECMA registry
+Alias: iso-ir-123
+Alias: csISO123CSAZ24341985gr
+
+Name: ISO_8859-6:1987 [RFC1345,KXS2]
+MIBenum: 9
+Source: ECMA registry
+Alias: iso-ir-127
+Alias: ISO_8859-6
+Alias: ISO-8859-6 (preferred MIME name)
+Alias: ECMA-114
+Alias: ASMO-708
+Alias: arabic
+Alias: csISOLatinArabic
+
+Name: ISO_8859-6-E [RFC1556,IANA]
+MIBenum: 81
+Source: RFC1556
+Alias: csISO88596E
+Alias: ISO-8859-6-E (preferred MIME name)
+
+Name: ISO_8859-6-I [RFC1556,IANA]
+MIBenum: 82
+Source: RFC1556
+Alias: csISO88596I
+Alias: ISO-8859-6-I (preferred MIME name)
+
+Name: ISO_8859-7:1987 [RFC1947,RFC1345,KXS2]
+MIBenum: 10
+Source: ECMA registry
+Alias: iso-ir-126
+Alias: ISO_8859-7
+Alias: ISO-8859-7 (preferred MIME name)
+Alias: ELOT_928
+Alias: ECMA-118
+Alias: greek
+Alias: greek8
+Alias: csISOLatinGreek
+
+Name: T.101-G2 [RFC1345,KXS2]
+MIBenum: 83
+Source: ECMA registry
+Alias: iso-ir-128
+Alias: csISO128T101G2
+
+Name: ISO_8859-8:1988 [RFC1345,KXS2]
+MIBenum: 11
+Source: ECMA registry
+Alias: iso-ir-138
+Alias: ISO_8859-8
+Alias: ISO-8859-8 (preferred MIME name)
+Alias: hebrew
+Alias: csISOLatinHebrew
+
+Name: ISO_8859-8-E [RFC1556,Nussbacher]
+MIBenum: 84
+Source: RFC1556
+Alias: csISO88598E
+Alias: ISO-8859-8-E (preferred MIME name)
+
+Name: ISO_8859-8-I [RFC1556,Nussbacher]
+MIBenum: 85
+Source: RFC1556
+Alias: csISO88598I
+Alias: ISO-8859-8-I (preferred MIME name)
+
+Name: CSN_369103 [RFC1345,KXS2]
+MIBenum: 86
+Source: ECMA registry
+Alias: iso-ir-139
+Alias: csISO139CSN369103
+
+Name: JUS_I.B1.002 [RFC1345,KXS2]
+MIBenum: 87
+Source: ECMA registry
+Alias: iso-ir-141
+Alias: ISO646-YU
+Alias: js
+Alias: yu
+Alias: csISO141JUSIB1002
+
+Name: ISO_6937-2-add [RFC1345,KXS2]
+MIBenum: 14
+Source: ECMA registry and ISO 6937-2:1983
+Alias: iso-ir-142
+Alias: csISOTextComm
+
+Name: IEC_P27-1 [RFC1345,KXS2]
+MIBenum: 88
+Source: ECMA registry
+Alias: iso-ir-143
+Alias: csISO143IECP271
+
+Name: ISO_8859-5:1988 [RFC1345,KXS2]
+MIBenum: 8
+Source: ECMA registry
+Alias: iso-ir-144
+Alias: ISO_8859-5
+Alias: ISO-8859-5 (preferred MIME name)
+Alias: cyrillic
+Alias: csISOLatinCyrillic
+
+Name: JUS_I.B1.003-serb [RFC1345,KXS2]
+MIBenum: 89
+Source: ECMA registry
+Alias: iso-ir-146
+Alias: serbian
+Alias: csISO146Serbian
+
+Name: JUS_I.B1.003-mac [RFC1345,KXS2]
+MIBenum: 90
+Source: ECMA registry
+Alias: macedonian
+Alias: iso-ir-147
+Alias: csISO147Macedonian
+
+Name: ISO_8859-9:1989 [RFC1345,KXS2]
+MIBenum: 12
+Source: ECMA registry
+Alias: iso-ir-148
+Alias: ISO_8859-9
+Alias: ISO-8859-9 (preferred MIME name)
+Alias: latin5
+Alias: l5
+Alias: csISOLatin5
+
+Name: greek-ccitt [RFC1345,KXS2]
+MIBenum: 91
+Source: ECMA registry
+Alias: iso-ir-150
+Alias: csISO150
+Alias: csISO150GreekCCITT
+
+Name: NC_NC00-10:81 [RFC1345,KXS2]
+MIBenum: 92
+Source: ECMA registry
+Alias: cuba
+Alias: iso-ir-151
+Alias: ISO646-CU
+Alias: csISO151Cuba
+
+Name: ISO_6937-2-25 [RFC1345,KXS2]
+MIBenum: 93
+Source: ECMA registry
+Alias: iso-ir-152
+Alias: csISO6937Add
+
+Name: GOST_19768-74 [RFC1345,KXS2]
+MIBenum: 94
+Source: ECMA registry
+Alias: ST_SEV_358-88
+Alias: iso-ir-153
+Alias: csISO153GOST1976874
+
+Name: ISO_8859-supp [RFC1345,KXS2]
+MIBenum: 95
+Source: ECMA registry
+Alias: iso-ir-154
+Alias: latin1-2-5
+Alias: csISO8859Supp
+
+Name: ISO_10367-box [RFC1345,KXS2]
+MIBenum: 96
+Source: ECMA registry
+Alias: iso-ir-155
+Alias: csISO10367Box
+
+Name: ISO-8859-10 (preferred MIME name) [RFC1345,KXS2]
+MIBenum: 13
+Source: ECMA registry
+Alias: iso-ir-157
+Alias: l6
+Alias: ISO_8859-10:1992
+Alias: csISOLatin6
+Alias: latin6
+
+Name: latin-lap [RFC1345,KXS2]
+MIBenum: 97
+Source: ECMA registry
+Alias: lap
+Alias: iso-ir-158
+Alias: csISO158Lap
+
+Name: JIS_X0212-1990 [RFC1345,KXS2]
+MIBenum: 98
+Source: ECMA registry
+Alias: x0212
+Alias: iso-ir-159
+Alias: csISO159JISX02121990
+
+Name: DS_2089 [RFC1345,KXS2]
+MIBenum: 99
+Source: Danish Standard, DS 2089, February 1974
+Alias: DS2089
+Alias: ISO646-DK
+Alias: dk
+Alias: csISO646Danish
+
+Name: us-dk [RFC1345,KXS2]
+MIBenum: 100
+Alias: csUSDK
+
+Name: dk-us [RFC1345,KXS2]
+MIBenum: 101
+Alias: csDKUS
+
+Name: JIS_X0201 [RFC1345,KXS2]
+MIBenum: 15
+Source: JIS X 0201-1976. One byte only, this is equivalent to
+ JIS/Roman (similar to ASCII) plus eight-bit half-width
+ Katakana
+Alias: X0201
+Alias: csHalfWidthKatakana
+
+Name: KSC5636 [RFC1345,KXS2]
+MIBenum: 102
+Alias: ISO646-KR
+Alias: csKSC5636
+
+Name: ISO-10646-UCS-2
+MIBenum: 1000
+Source: the 2-octet Basic Multilingual Plane, aka Unicode
+ this needs to specify network byte order: the standard
+ does not specify (it is a 16-bit integer space)
+Alias: csUnicode
+
+Name: ISO-10646-UCS-4
+MIBenum: 1001
+Source: the full code space. (same comment about byte order,
+ these are 31-bit numbers.
+Alias: csUCS4
+
+Name: DEC-MCS [RFC1345,KXS2]
+MIBenum: 2008
+Source: VAX/VMS User's Manual,
+ Order Number: AI-Y517A-TE, April 1986.
+Alias: dec
+Alias: csDECMCS
+
+Name: hp-roman8 [HP-PCL5,RFC1345,KXS2]
+MIBenum: 2004
+Source: LaserJet IIP Printer User's Manual,
+ HP part no 33471-90901, Hewlet-Packard, June 1989.
+Alias: roman8
+Alias: r8
+Alias: csHPRoman8
+
+Name: macintosh [RFC1345,KXS2]
+MIBenum: 2027
+Source: The Unicode Standard ver1.0, ISBN 0-201-56788-1, Oct 1991
+Alias: mac
+Alias: csMacintosh
+
+Name: IBM037 [RFC1345,KXS2]
+MIBenum: 2028
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp037
+Alias: ebcdic-cp-us
+Alias: ebcdic-cp-ca
+Alias: ebcdic-cp-wt
+Alias: ebcdic-cp-nl
+Alias: csIBM037
+
+Name: IBM038 [RFC1345,KXS2]
+MIBenum: 2029
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-INT
+Alias: cp038
+Alias: csIBM038
+
+Name: IBM273 [RFC1345,KXS2]
+MIBenum: 2030
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP273
+Alias: csIBM273
+
+Name: IBM274 [RFC1345,KXS2]
+MIBenum: 2031
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-BE
+Alias: CP274
+Alias: csIBM274
+
+Name: IBM275 [RFC1345,KXS2]
+MIBenum: 2032
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: EBCDIC-BR
+Alias: cp275
+Alias: csIBM275
+
+Name: IBM277 [RFC1345,KXS2]
+MIBenum: 2033
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: EBCDIC-CP-DK
+Alias: EBCDIC-CP-NO
+Alias: csIBM277
+
+Name: IBM278 [RFC1345,KXS2]
+MIBenum: 2034
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP278
+Alias: ebcdic-cp-fi
+Alias: ebcdic-cp-se
+Alias: csIBM278
+
+Name: IBM280 [RFC1345,KXS2]
+MIBenum: 2035
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP280
+Alias: ebcdic-cp-it
+Alias: csIBM280
+
+Name: IBM281 [RFC1345,KXS2]
+MIBenum: 2036
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: EBCDIC-JP-E
+Alias: cp281
+Alias: csIBM281
+
+Name: IBM284 [RFC1345,KXS2]
+MIBenum: 2037
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP284
+Alias: ebcdic-cp-es
+Alias: csIBM284
+
+Name: IBM285 [RFC1345,KXS2]
+MIBenum: 2038
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP285
+Alias: ebcdic-cp-gb
+Alias: csIBM285
+
+Name: IBM290 [RFC1345,KXS2]
+MIBenum: 2039
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: cp290
+Alias: EBCDIC-JP-kana
+Alias: csIBM290
+
+Name: IBM297 [RFC1345,KXS2]
+MIBenum: 2040
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp297
+Alias: ebcdic-cp-fr
+Alias: csIBM297
+
+Name: IBM420 [RFC1345,KXS2]
+MIBenum: 2041
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990,
+ IBM NLS RM p 11-11
+Alias: cp420
+Alias: ebcdic-cp-ar1
+Alias: csIBM420
+
+Name: IBM423 [RFC1345,KXS2]
+MIBenum: 2042
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp423
+Alias: ebcdic-cp-gr
+Alias: csIBM423
+
+Name: IBM424 [RFC1345,KXS2]
+MIBenum: 2043
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp424
+Alias: ebcdic-cp-he
+Alias: csIBM424
+
+Name: IBM437 [RFC1345,KXS2]
+MIBenum: 2011
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp437
+Alias: 437
+Alias: csPC8CodePage437
+
+Name: IBM500 [RFC1345,KXS2]
+MIBenum: 2044
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP500
+Alias: ebcdic-cp-be
+Alias: ebcdic-cp-ch
+Alias: csIBM500
+
+Name: IBM775 [HP-PCL5]
+MIBenum: 2087
+Source: HP PCL 5 Comparison Guide (P/N 5021-0329) pp B-13, 1996
+Alias: cp775
+Alias: csPC775Baltic
+
+Name: IBM850 [RFC1345,KXS2]
+MIBenum: 2009
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp850
+Alias: 850
+Alias: csPC850Multilingual
+
+Name: IBM851 [RFC1345,KXS2]
+MIBenum: 2045
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp851
+Alias: 851
+Alias: csIBM851
+
+Name: IBM852 [RFC1345,KXS2]
+MIBenum: 2010
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp852
+Alias: 852
+Alias: csPCp852
+
+Name: IBM855 [RFC1345,KXS2]
+MIBenum: 2046
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp855
+Alias: 855
+Alias: csIBM855
+
+Name: IBM857 [RFC1345,KXS2]
+MIBenum: 2047
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp857
+Alias: 857
+Alias: csIBM857
+
+Name: IBM860 [RFC1345,KXS2]
+MIBenum: 2048
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp860
+Alias: 860
+Alias: csIBM860
+
+Name: IBM861 [RFC1345,KXS2]
+MIBenum: 2049
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp861
+Alias: 861
+Alias: cp-is
+Alias: csIBM861
+
+Name: IBM862 [RFC1345,KXS2]
+MIBenum: 2013
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp862
+Alias: 862
+Alias: csPC862LatinHebrew
+
+Name: IBM863 [RFC1345,KXS2]
+MIBenum: 2050
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp863
+Alias: 863
+Alias: csIBM863
+
+Name: IBM864 [RFC1345,KXS2]
+MIBenum: 2051
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp864
+Alias: csIBM864
+
+Name: IBM865 [RFC1345,KXS2]
+MIBenum: 2052
+Source: IBM DOS 3.3 Ref (Abridged), 94X9575 (Feb 1987)
+Alias: cp865
+Alias: 865
+Alias: csIBM865
+
+Name: IBM866 [Pond]
+MIBenum: 2086
+Source: IBM NLDG Volume 2 (SE09-8002-03) August 1994
+Alias: cp866
+Alias: 866
+Alias: csIBM866
+
+Name: IBM868 [RFC1345,KXS2]
+MIBenum: 2053
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP868
+Alias: cp-ar
+Alias: csIBM868
+
+Name: IBM869 [RFC1345,KXS2]
+MIBenum: 2054
+Source: IBM Keyboard layouts and code pages, PN 07G4586 June 1991
+Alias: cp869
+Alias: 869
+Alias: cp-gr
+Alias: csIBM869
+
+Name: IBM870 [RFC1345,KXS2]
+MIBenum: 2055
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP870
+Alias: ebcdic-cp-roece
+Alias: ebcdic-cp-yu
+Alias: csIBM870
+
+Name: IBM871 [RFC1345,KXS2]
+MIBenum: 2056
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP871
+Alias: ebcdic-cp-is
+Alias: csIBM871
+
+Name: IBM880 [RFC1345,KXS2]
+MIBenum: 2057
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp880
+Alias: EBCDIC-Cyrillic
+Alias: csIBM880
+
+Name: IBM891 [RFC1345,KXS2]
+MIBenum: 2058
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp891
+Alias: csIBM891
+
+Name: IBM903 [RFC1345,KXS2]
+MIBenum: 2059
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp903
+Alias: csIBM903
+
+Name: IBM904 [RFC1345,KXS2]
+MIBenum: 2060
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: cp904
+Alias: 904
+Alias: csIBBM904
+
+Name: IBM905 [RFC1345,KXS2]
+MIBenum: 2061
+Source: IBM 3174 Character Set Ref, GA27-3831-02, March 1990
+Alias: CP905
+Alias: ebcdic-cp-tr
+Alias: csIBM905
+
+Name: IBM918 [RFC1345,KXS2]
+MIBenum: 2062
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP918
+Alias: ebcdic-cp-ar2
+Alias: csIBM918
+
+Name: IBM1026 [RFC1345,KXS2]
+MIBenum: 2063
+Source: IBM NLS RM Vol2 SE09-8002-01, March 1990
+Alias: CP1026
+Alias: csIBM1026
+
+Name: EBCDIC-AT-DE [RFC1345,KXS2]
+MIBenum: 2064
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csIBMEBCDICATDE
+
+Name: EBCDIC-AT-DE-A [RFC1345,KXS2]
+MIBenum: 2065
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICATDEA
+
+Name: EBCDIC-CA-FR [RFC1345,KXS2]
+MIBenum: 2066
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICCAFR
+
+Name: EBCDIC-DK-NO [RFC1345,KXS2]
+MIBenum: 2067
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICDKNO
+
+Name: EBCDIC-DK-NO-A [RFC1345,KXS2]
+MIBenum: 2068
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICDKNOA
+
+Name: EBCDIC-FI-SE [RFC1345,KXS2]
+MIBenum: 2069
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFISE
+
+Name: EBCDIC-FI-SE-A [RFC1345,KXS2]
+MIBenum: 2070
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFISEA
+
+Name: EBCDIC-FR [RFC1345,KXS2]
+MIBenum: 2071
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICFR
+
+Name: EBCDIC-IT [RFC1345,KXS2]
+MIBenum: 2072
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICIT
+
+Name: EBCDIC-PT [RFC1345,KXS2]
+MIBenum: 2073
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICPT
+
+Name: EBCDIC-ES [RFC1345,KXS2]
+MIBenum: 2074
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICES
+
+Name: EBCDIC-ES-A [RFC1345,KXS2]
+MIBenum: 2075
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICESA
+
+Name: EBCDIC-ES-S [RFC1345,KXS2]
+MIBenum: 2076
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICESS
+
+Name: EBCDIC-UK [RFC1345,KXS2]
+MIBenum: 2077
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICUK
+
+Name: EBCDIC-US [RFC1345,KXS2]
+MIBenum: 2078
+Source: IBM 3270 Char Set Ref Ch 10, GA27-2837-9, April 1987
+Alias: csEBCDICUS
+
+Name: UNKNOWN-8BIT [RFC1428]
+MIBenum: 2079
+Alias: csUnknown8BiT
+
+Name: MNEMONIC [RFC1345,KXS2]
+MIBenum: 2080
+Source: RFC 1345, also known as "mnemonic+ascii+38"
+Alias: csMnemonic
+
+Name: MNEM [RFC1345,KXS2]
+MIBenum: 2081
+Source: RFC 1345, also known as "mnemonic+ascii+8200"
+Alias: csMnem
+
+Name: VISCII [RFC1456]
+MIBenum: 2082
+Source: RFC 1456
+Alias: csVISCII
+
+Name: VIQR [RFC1456]
+MIBenum: 2083
+Source: RFC 1456
+Alias: csVIQR
+
+Name: KOI8-R (preferred MIME name) [RFC1489]
+MIBenum: 2084
+Source: RFC 1489, based on GOST-19768-74, ISO-6937/8,
+ INIS-Cyrillic, ISO-5427.
+Alias: csKOI8R
+
+Name: KOI8-U [RFC2319]
+MIBenum: 2088
+Source: RFC 2319
+
+Name: IBM00858
+MIBenum: 2089
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00858) [Mahdi]
+Alias: CCSID00858
+Alias: CP00858
+Alias: PC-Multilingual-850+euro
+
+Name: IBM00924
+MIBenum: 2090
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM00924) [Mahdi]
+Alias: CCSID00924
+Alias: CP00924
+Alias: ebcdic-Latin9--euro
+
+Name: IBM01140
+MIBenum: 2091
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01140) [Mahdi]
+Alias: CCSID01140
+Alias: CP01140
+Alias: ebcdic-us-37+euro
+
+Name: IBM01141
+MIBenum: 2092
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01141) [Mahdi]
+Alias: CCSID01141
+Alias: CP01141
+Alias: ebcdic-de-273+euro
+
+Name: IBM01142
+MIBenum: 2093
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01142) [Mahdi]
+Alias: CCSID01142
+Alias: CP01142
+Alias: ebcdic-dk-277+euro
+Alias: ebcdic-no-277+euro
+
+Name: IBM01143
+MIBenum: 2094
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01143) [Mahdi]
+Alias: CCSID01143
+Alias: CP01143
+Alias: ebcdic-fi-278+euro
+Alias: ebcdic-se-278+euro
+
+Name: IBM01144
+MIBenum: 2095
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01144) [Mahdi]
+Alias: CCSID01144
+Alias: CP01144
+Alias: ebcdic-it-280+euro
+
+Name: IBM01145
+MIBenum: 2096
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01145) [Mahdi]
+Alias: CCSID01145
+Alias: CP01145
+Alias: ebcdic-es-284+euro
+
+Name: IBM01146
+MIBenum: 2097
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01146) [Mahdi]
+Alias: CCSID01146
+Alias: CP01146
+Alias: ebcdic-gb-285+euro
+
+Name: IBM01147
+MIBenum: 2098
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01147) [Mahdi]
+Alias: CCSID01147
+Alias: CP01147
+Alias: ebcdic-fr-297+euro
+
+Name: IBM01148
+MIBenum: 2099
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01148) [Mahdi]
+Alias: CCSID01148
+Alias: CP01148
+Alias: ebcdic-international-500+euro
+
+Name: IBM01149
+MIBenum: 2100
+Source: IBM See (http://www.iana.org/assignments/charset-reg/IBM01149) [Mahdi]
+Alias: CCSID01149
+Alias: CP01149
+Alias: ebcdic-is-871+euro
+
+Name: Big5-HKSCS [Yick]
+MIBenum: 2101
+Source: See (http://www.iana.org/assignments/charset-reg/Big5-HKSCS)
+Alias: None
+
+Name: IBM1047 [Robrigado]
+MIBenum: 2102
+Source: IBM1047 (EBCDIC Latin 1/Open Systems)
+http://www-1.ibm.com/servers/eserver/iseries/software/globalization/pdf/cp01047z.pdf
+Alias: IBM-1047
+
+Name: PTCP154 [Uskov]
+MIBenum: 2103
+Source: See (http://www.iana.org/assignments/charset-reg/PTCP154)
+Alias: csPTCP154
+Alias: PT154
+Alias: CP154
+Alias: Cyrillic-Asian
+
+Name: Amiga-1251
+MIBenum: 2104
+Source: See (http://www.amiga.ultranet.ru/Amiga-1251.html)
+Alias: Ami1251
+Alias: Amiga1251
+Alias: Ami-1251
+(Aliases are provided for historical reasons and should not be used)
+ [Malyshev]
+
+Name: KOI7-switched
+MIBenum: 2105
+Source: See <http://www.iana.org/assignments/charset-reg/KOI7-switched>
+Aliases: None
+
+Name: UNICODE-1-1 [RFC1641]
+MIBenum: 1010
+Source: RFC 1641
+Alias: csUnicode11
+
+Name: SCSU
+MIBenum: 1011
+Source: SCSU See (http://www.iana.org/assignments/charset-reg/SCSU) [Scherer]
+Alias: None
+
+Name: UTF-7 [RFC2152]
+MIBenum: 1012
+Source: RFC 2152
+Alias: None
+
+Name: UTF-16BE [RFC2781]
+MIBenum: 1013
+Source: RFC 2781
+Alias: None
+
+Name: UTF-16LE [RFC2781]
+MIBenum: 1014
+Source: RFC 2781
+Alias: None
+
+Name: UTF-16 [RFC2781]
+MIBenum: 1015
+Source: RFC 2781
+Alias: None
+
+Name: CESU-8 [Phipps]
+MIBenum: 1016
+Source: <http://www.unicode.org/unicode/reports/tr26>
+Alias: csCESU-8
+
+Name: UTF-32 [Davis]
+MIBenum: 1017
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: UTF-32BE [Davis]
+MIBenum: 1018
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: UTF-32LE [Davis]
+MIBenum: 1019
+Source: <http://www.unicode.org/unicode/reports/tr19/>
+Alias: None
+
+Name: BOCU-1 [Scherer]
+MIBenum: 1020
+Source: http://www.unicode.org/notes/tn6/
+Alias: csBOCU-1
+
+Name: UNICODE-1-1-UTF-7 [RFC1642]
+MIBenum: 103
+Source: RFC 1642
+Alias: csUnicode11UTF7
+
+Name: UTF-8 [RFC3629]
+MIBenum: 106
+Source: RFC 3629
+Alias: None
+
+Name: ISO-8859-13
+MIBenum: 109
+Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-13)[Tumasonis]
+Alias: None
+
+Name: ISO-8859-14
+MIBenum: 110
+Source: ISO See (http://www.iana.org/assignments/charset-reg/iso-8859-14) [Simonsen]
+Alias: iso-ir-199
+Alias: ISO_8859-14:1998
+Alias: ISO_8859-14
+Alias: latin8
+Alias: iso-celtic
+Alias: l8
+
+Name: ISO-8859-15
+MIBenum: 111
+Source: ISO
+ Please see: <http://www.iana.org/assignments/charset-reg/ISO-8859-15>
+Alias: ISO_8859-15
+Alias: Latin-9
+
+Name: ISO-8859-16
+MIBenum: 112
+Source: ISO
+Alias: iso-ir-226
+Alias: ISO_8859-16:2001
+Alias: ISO_8859-16
+Alias: latin10
+Alias: l10
+
+Name: GBK
+MIBenum: 113
+Source: Chinese IT Standardization Technical Committee
+ Please see: <http://www.iana.org/assignments/charset-reg/GBK>
+Alias: CP936
+Alias: MS936
+Alias: windows-936
+
+Name: GB18030
+MIBenum: 114
+Source: Chinese IT Standardization Technical Committee
+ Please see: <http://www.iana.org/assignments/charset-reg/GB18030>
+Alias: None
+
+Name: OSD_EBCDIC_DF04_15
+MIBenum: 115
+Source: Fujitsu-Siemens standard mainframe EBCDIC encoding
+ Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-15>
+Alias: None
+
+Name: OSD_EBCDIC_DF03_IRV
+MIBenum: 116
+Source: Fujitsu-Siemens standard mainframe EBCDIC encoding
+ Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF03-IRV>
+Alias: None
+
+Name: OSD_EBCDIC_DF04_1
+MIBenum: 117
+Source: Fujitsu-Siemens standard mainframe EBCDIC encoding
+ Please see: <http://www.iana.org/assignments/charset-reg/OSD-EBCDIC-DF04-1>
+Alias: None
+
+Name: JIS_Encoding
+MIBenum: 16
+Source: JIS X 0202-1991. Uses ISO 2022 escape sequences to
+ shift code sets as documented in JIS X 0202-1991.
+Alias: csJISEncoding
+
+Name: Shift_JIS (preferred MIME name)
+MIBenum: 17
+Source: This charset is an extension of csHalfWidthKatakana by
+ adding graphic characters in JIS X 0208. The CCS's are
+ JIS X0201:1997 and JIS X0208:1997. The
+ complete definition is shown in Appendix 1 of JIS
+ X0208:1997.
+ This charset can be used for the top-level media type "text".
+Alias: MS_Kanji
+Alias: csShiftJIS
+
+Name: Extended_UNIX_Code_Packed_Format_for_Japanese
+MIBenum: 18
+Source: Standardized by OSF, UNIX International, and UNIX Systems
+ Laboratories Pacific. Uses ISO 2022 rules to select
+ code set 0: US-ASCII (a single 7-bit byte set)
+ code set 1: JIS X0208-1990 (a double 8-bit byte set)
+ restricted to A0-FF in both bytes
+ code set 2: Half Width Katakana (a single 7-bit byte set)
+ requiring SS2 as the character prefix
+ code set 3: JIS X0212-1990 (a double 7-bit byte set)
+ restricted to A0-FF in both bytes
+ requiring SS3 as the character prefix
+Alias: csEUCPkdFmtJapanese
+Alias: EUC-JP (preferred MIME name)
+
+Name: Extended_UNIX_Code_Fixed_Width_for_Japanese
+MIBenum: 19
+Source: Used in Japan. Each character is 2 octets.
+ code set 0: US-ASCII (a single 7-bit byte set)
+ 1st byte = 00
+ 2nd byte = 20-7E
+ code set 1: JIS X0208-1990 (a double 7-bit byte set)
+ restricted to A0-FF in both bytes
+ code set 2: Half Width Katakana (a single 7-bit byte set)
+ 1st byte = 00
+ 2nd byte = A0-FF
+ code set 3: JIS X0212-1990 (a double 7-bit byte set)
+ restricted to A0-FF in
+ the first byte
+ and 21-7E in the second byte
+Alias: csEUCFixWidJapanese
+
+Name: ISO-10646-UCS-Basic
+MIBenum: 1002
+Source: ASCII subset of Unicode. Basic Latin = collection 1
+ See ISO 10646, Appendix A
+Alias: csUnicodeASCII
+
+Name: ISO-10646-Unicode-Latin1
+MIBenum: 1003
+Source: ISO Latin-1 subset of Unicode. Basic Latin and Latin-1
+ Supplement = collections 1 and 2. See ISO 10646,
+ Appendix A. See RFC 1815.
+Alias: csUnicodeLatin1
+Alias: ISO-10646
+
+Name: ISO-10646-J-1
+Source: ISO 10646 Japanese, see RFC 1815.
+
+Name: ISO-Unicode-IBM-1261
+MIBenum: 1005
+Source: IBM Latin-2, -3, -5, Extended Presentation Set, GCSGID: 1261
+Alias: csUnicodeIBM1261
+
+Name: ISO-Unicode-IBM-1268
+MIBenum: 1006
+Source: IBM Latin-4 Extended Presentation Set, GCSGID: 1268
+Alias: csUnicodeIBM1268
+
+Name: ISO-Unicode-IBM-1276
+MIBenum: 1007
+Source: IBM Cyrillic Greek Extended Presentation Set, GCSGID: 1276
+Alias: csUnicodeIBM1276
+
+Name: ISO-Unicode-IBM-1264
+MIBenum: 1008
+Source: IBM Arabic Presentation Set, GCSGID: 1264
+Alias: csUnicodeIBM1264
+
+Name: ISO-Unicode-IBM-1265
+MIBenum: 1009
+Source: IBM Hebrew Presentation Set, GCSGID: 1265
+Alias: csUnicodeIBM1265
+
+Name: ISO-8859-1-Windows-3.0-Latin-1 [HP-PCL5]
+MIBenum: 2000
+Source: Extended ISO 8859-1 Latin-1 for Windows 3.0.
+ PCL Symbol Set id: 9U
+Alias: csWindows30Latin1
+
+Name: ISO-8859-1-Windows-3.1-Latin-1 [HP-PCL5]
+MIBenum: 2001
+Source: Extended ISO 8859-1 Latin-1 for Windows 3.1.
+ PCL Symbol Set id: 19U
+Alias: csWindows31Latin1
+
+Name: ISO-8859-2-Windows-Latin-2 [HP-PCL5]
+MIBenum: 2002
+Source: Extended ISO 8859-2. Latin-2 for Windows 3.1.
+ PCL Symbol Set id: 9E
+Alias: csWindows31Latin2
+
+Name: ISO-8859-9-Windows-Latin-5 [HP-PCL5]
+MIBenum: 2003
+Source: Extended ISO 8859-9. Latin-5 for Windows 3.1
+ PCL Symbol Set id: 5T
+Alias: csWindows31Latin5
+
+Name: Adobe-Standard-Encoding [Adobe]
+MIBenum: 2005
+Source: PostScript Language Reference Manual
+ PCL Symbol Set id: 10J
+Alias: csAdobeStandardEncoding
+
+Name: Ventura-US [HP-PCL5]
+MIBenum: 2006
+Source: Ventura US. ASCII plus characters typically used in
+ publishing, like pilcrow, copyright, registered, trade mark,
+ section, dagger, and double dagger in the range A0 (hex)
+ to FF (hex).
+ PCL Symbol Set id: 14J
+Alias: csVenturaUS
+
+Name: Ventura-International [HP-PCL5]
+MIBenum: 2007
+Source: Ventura International. ASCII plus coded characters similar
+ to Roman8.
+ PCL Symbol Set id: 13J
+Alias: csVenturaInternational
+
+Name: PC8-Danish-Norwegian [HP-PCL5]
+MIBenum: 2012
+Source: PC Danish Norwegian
+ 8-bit PC set for Danish Norwegian
+ PCL Symbol Set id: 11U
+Alias: csPC8DanishNorwegian
+
+Name: PC8-Turkish [HP-PCL5]
+MIBenum: 2014
+Source: PC Latin Turkish. PCL Symbol Set id: 9T
+Alias: csPC8Turkish
+
+Name: IBM-Symbols [IBM-CIDT]
+MIBenum: 2015
+Source: Presentation Set, CPGID: 259
+Alias: csIBMSymbols
+
+Name: IBM-Thai [IBM-CIDT]
+MIBenum: 2016
+Source: Presentation Set, CPGID: 838
+Alias: csIBMThai
+
+Name: HP-Legal [HP-PCL5]
+MIBenum: 2017
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 1U
+Alias: csHPLegal
+
+Name: HP-Pi-font [HP-PCL5]
+MIBenum: 2018
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 15U
+Alias: csHPPiFont
+
+Name: HP-Math8 [HP-PCL5]
+MIBenum: 2019
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 8M
+Alias: csHPMath8
+
+Name: Adobe-Symbol-Encoding [Adobe]
+MIBenum: 2020
+Source: PostScript Language Reference Manual
+ PCL Symbol Set id: 5M
+Alias: csHPPSMath
+
+Name: HP-DeskTop [HP-PCL5]
+MIBenum: 2021
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 7J
+Alias: csHPDesktop
+
+Name: Ventura-Math [HP-PCL5]
+MIBenum: 2022
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 6M
+Alias: csVenturaMath
+
+Name: Microsoft-Publishing [HP-PCL5]
+MIBenum: 2023
+Source: PCL 5 Comparison Guide, Hewlett-Packard,
+ HP part number 5961-0510, October 1992
+ PCL Symbol Set id: 6J
+Alias: csMicrosoftPublishing
+
+Name: Windows-31J
+MIBenum: 2024
+Source: Windows Japanese. A further extension of Shift_JIS
+ to include NEC special characters (Row 13), NEC
+ selection of IBM extensions (Rows 89 to 92), and IBM
+ extensions (Rows 115 to 119). The CCS's are
+ JIS X0201:1997, JIS X0208:1997, and these extensions.
+ This charset can be used for the top-level media type "text",
+ but it is of limited or specialized use (see RFC2278).
+ PCL Symbol Set id: 19K
+Alias: csWindows31J
+
+Name: GB2312 (preferred MIME name)
+MIBenum: 2025
+Source: Chinese for People's Republic of China (PRC) mixed one byte,
+ two byte set:
+ 20-7E = one byte ASCII
+ A1-FE = two byte PRC Kanji
+ See GB 2312-80
+ PCL Symbol Set Id: 18C
+Alias: csGB2312
+
+Name: Big5 (preferred MIME name)
+MIBenum: 2026
+Source: Chinese for Taiwan Multi-byte set.
+ PCL Symbol Set Id: 18T
+Alias: csBig5
+
+Name: windows-1250
+MIBenum: 2250
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1250) [Lazhintseva]
+Alias: None
+
+Name: windows-1251
+MIBenum: 2251
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1251) [Lazhintseva]
+Alias: None
+
+Name: windows-1252
+MIBenum: 2252
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1252) [Wendt]
+Alias: None
+
+Name: windows-1253
+MIBenum: 2253
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1253) [Lazhintseva]
+Alias: None
+
+Name: windows-1254
+MIBenum: 2254
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1254) [Lazhintseva]
+Alias: None
+
+Name: windows-1255
+MIBenum: 2255
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1255) [Lazhintseva]
+Alias: None
+
+Name: windows-1256
+MIBenum: 2256
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1256) [Lazhintseva]
+Alias: None
+
+Name: windows-1257
+MIBenum: 2257
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1257) [Lazhintseva]
+Alias: None
+
+Name: windows-1258
+MIBenum: 2258
+Source: Microsoft (http://www.iana.org/assignments/charset-reg/windows-1258) [Lazhintseva]
+Alias: None
+
+Name: TIS-620
+MIBenum: 2259
+Source: Thai Industrial Standards Institute (TISI) [Tantsetthi]
+
+Name: HZ-GB-2312
+MIBenum: 2085
+Source: RFC 1842, RFC 1843 [RFC1842, RFC1843]
+
+
+REFERENCES
+----------
+
+[RFC1345] Simonsen, K., "Character Mnemonics & Character Sets",
+ RFC 1345, Rationel Almen Planlaegning, Rationel Almen
+ Planlaegning, June 1992.
+
+[RFC1428] Vaudreuil, G., "Transition of Internet Mail from
+ Just-Send-8 to 8bit-SMTP/MIME", RFC1428, CNRI, February
+ 1993.
+
+[RFC1456] Vietnamese Standardization Working Group, "Conventions for
+ Encoding the Vietnamese Language VISCII: VIetnamese
+ Standard Code for Information Interchange VIQR: VIetnamese
+ Quoted-Readable Specification Revision 1.1", RFC 1456, May
+ 1993.
+
+[RFC1468] Murai, J., Crispin, M., and E. van der Poel, "Japanese
+ Character Encoding for Internet Messages", RFC 1468,
+ Keio University, Panda Programming, June 1993.
+
+[RFC1489] Chernov, A., "Registration of a Cyrillic Character Set",
+ RFC1489, RELCOM Development Team, July 1993.
+
+[RFC1554] Ohta, M., and K. Handa, "ISO-2022-JP-2: Multilingual
+ Extension of ISO-2022-JP", RFC1554, Tokyo Institute of
+ Technology, ETL, December 1993.
+
+[RFC1556] Nussbacher, H., "Handling of Bi-directional Texts in MIME",
+ RFC1556, Israeli Inter-University, December 1993.
+
+[RFC1557] Choi, U., Chon, K., and H. Park, "Korean Character Encoding
+ for Internet Messages", KAIST, Solvit Chosun Media,
+ December 1993.
+
+[RFC1641] Goldsmith, D., and M. Davis, "Using Unicode with MIME",
+ RFC1641, Taligent, Inc., July 1994.
+
+[RFC1642] Goldsmith, D., and M. Davis, "UTF-7", RFC1642, Taligent,
+ Inc., July 1994.
+
+[RFC1815] Ohta, M., "Character Sets ISO-10646 and ISO-10646-J-1",
+ RFC 1815, Tokyo Institute of Technology, July 1995.
+
+
+[Adobe] Adobe Systems Incorporated, PostScript Language Reference
+ Manual, second edition, Addison-Wesley Publishing Company,
+ Inc., 1990.
+
+[ECMA Registry] ISO-IR: International Register of Escape Sequences
+ http://www.itscj.ipsj.or.jp/ISO-IE/ Note: The current
+ registration authority is IPSJ/ITSCJ, Japan.
+
+[HP-PCL5] Hewlett-Packard Company, "HP PCL 5 Comparison Guide",
+ (P/N 5021-0329) pp B-13, 1996.
+
+[IBM-CIDT] IBM Corporation, "ABOUT TYPE: IBM's Technical Reference
+ for Core Interchange Digitized Type", Publication number
+ S544-3708-01
+
+[RFC1842] Wei, Y., J. Li, and Y. Jiang, "ASCII Printable
+ Characters-Based Chinese Character Encoding for Internet
+ Messages", RFC 1842, Harvard University, Rice University,
+ University of Maryland, August 1995.
+
+[RFC1843] Lee, F., "HZ - A Data Format for Exchanging Files of
+ Arbitrarily Mixed Chinese and ASCII Characters", RFC 1843,
+ Stanford University, August 1995.
+
+[RFC2152] Goldsmith, D., M. Davis, "UTF-7: A Mail-Safe Transformation
+ Format of Unicode", RFC 2152, Apple Computer, Inc.,
+ Taligent Inc., May 1997.
+
+[RFC2279] Yergeau, F., "UTF-8, A Transformation Format of ISO 10646",
+ RFC 2279, Alis Technologies, January, 1998.
+
+[RFC2781] Hoffman, P., Yergeau, F., "UTF-16, an encoding of ISO 10646",
+ RFC 2781, February 2000.
+
+[RFC3629] Yergeau, F., "UTF-8, a transformation format of ISO 10646",
+ RFC3629, November 2003.
+
+PEOPLE
+------
+
+[KXS2] Keld Simonsen <Keld.Simonsen@dkuug.dk>
+
+[Choi] Woohyong Choi <whchoi@cosmos.kaist.ac.kr>
+
+[Davis] Mark Davis, <mark@unicode.org>, April 2002.
+
+[Lazhintseva] Katya Lazhintseva, <katyal@MICROSOFT.com>, May 1996.
+
+[Mahdi] Tamer Mahdi, <tamer@ca.ibm.com>, August 2000.
+
+[Malyshev] Michael Malyshev, <michael_malyshev@mail.ru>, January 2004
+
+[Murai] Jun Murai <jun@wide.ad.jp>
+
+[Nussbacher] Hank Nussbacher, <hank@vm.tau.ac.il>
+
+[Ohta] Masataka Ohta, <mohta@cc.titech.ac.jp>, July 1995.
+
+[Phipps] Toby Phipps, <tphipps@peoplesoft.com>, March 2002.
+
+[Pond] Rick Pond, <rickpond@vnet.ibm.com>, March 1997.
+
+[Robrigado] Reuel Robrigado, <reuelr@ca.ibm.com>, September 2002.
+
+[Scherer] Markus Scherer, <markus.scherer@jtcsv.com>, August 2000,
+ September 2002.
+
+[Simonsen] Keld Simonsen, <Keld.Simonsen@rap.dk>, August 2000.
+
+[Tantsetthi] Trin Tantsetthi, <trin@mozart.inet.co.th>, September 1998.
+
+[Tumasonis] Vladas Tumasonis, <vladas.tumasonis@maf.vu.lt>, August 2000.
+
+[Uskov] Alexander Uskov, <auskov@idc.kz>, September 2002.
+
+[Wendt] Chris Wendt, <christw@microsoft.com>, December 1999.
+
+[Yick] Nicky Yick, <cliac@itsd.gcn.gov.hk>, October 2000.
+
+[]
+
+
+
+
+
+
+
diff --git a/aliases/makealiases.c b/aliases/makealiases.c
new file mode 100644
index 0000000..f24370b
--- /dev/null
+++ b/aliases/makealiases.c
@@ -0,0 +1,243 @@
+/**
+ * IANA charset data to Iconv Aliases file convertor
+ *
+ * Version history:
+ *
+ * 0.01 - Initial version
+ * 0.02 - Added "utf8" alias seen in the wild
+ */
+
+#include <ctype.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct extra {
+ const char *canon;
+ const char *aliases;
+} extras[] = {
+ { "ISO-8859-1", "8859_1 ISO8859-1" },
+ { "ISO-8859-2", "8859_2 ISO8859-2" },
+ { "ISO-8859-3", "8859_3 ISO8859-3" },
+ { "ISO-8859-4", "8859_4 ISO8859-4" },
+ { "ISO-8859-5", "8859_5 ISO8859-5" },
+ { "ISO-8859-7", "8859_7 ISO8859-7" },
+ { "ISO-8859-8", "8859_8 ISO8859-8" },
+ { "ISO-8859-9", "8859_9 ISO8859-9" },
+ { "ISO-8859-10", "8859_10 ISO8859-10" },
+ { "ISO-8859-13", "8859_13 ISO8859-13" },
+ { "ISO-8859-14", "8859_14 ISO8859-14" },
+ { "ISO-8859-15", "8859_15 ISO8859-15" },
+ { "Shift_JIS", "X-SJIS Shift-JIS" },
+ { "EUC-JP", "EUCJP" },
+ { "EUC-KR", "EUCKR" },
+ { "UTF-8", "UNICODE-1-1-UTF-8 UNICODE-2-0-UTF-8 utf8" },
+ { "ISO-10646-UCS-4", "UCS-4 UCS4" },
+ { "ISO-10646-UCS-2", "UCS-2 UCS2" },
+ { "GB2312", "EUC-CN EUCCN CN-GB" },
+ { "Big5", "BIG-FIVE BIG-5 CN-BIG5 BIG_FIVE" },
+ { "macintosh", "MACROMAN MAC-ROMAN X-MAC-ROMAN" },
+ { "windows-1250", "CP1250 MS-EE" },
+ { "windows-1251", "CP1251 MS-CYRL" },
+ { "windows-1252", "CP1252 MS-ANSI" },
+ { "windows-1253", "CP1253 MS-GREEK" },
+ { "windows-1254", "CP1254 MS-TURK" },
+ { "windows-1256", "CP1256 MS-ARAB" },
+ { "windows-1257", "CP1257 WINBALTRIM" },
+};
+#define EXTRAS_SIZE (sizeof(extras) / sizeof(extras[0]))
+
+/*
+ * Make aliases file from IANA charset data.
+ * The canonical name of an encoding is that which follows the "Name:" tag
+ * in the input file. There is an exception, however, for those encodings
+ * which have an alias which is denoted as the "preferred MIME name". For
+ * these encodings, the preferred MIME name is taken as the canonical form.
+ */
+
+#define TOP argv[1]
+#define SETS argv[2]
+#define BOTTOM argv[3]
+#define ALIASES argv[4]
+
+int main(int argc, const char **argv)
+{
+ FILE *in, *out;
+ char buf[200], name[64];
+ short mibenum;
+ char *s, *n, *aliases, *temp;
+ int i;
+ int namelen;
+
+ in = fopen(TOP, "r");
+ if (!in)
+ return 1;
+
+ out = fopen(ALIASES, "w");
+ if (!out)
+ return 1;
+
+ while (fgets(buf, sizeof buf, in)) {
+ fputs(buf, out);
+ }
+
+ fclose(in);
+
+ in = fopen(SETS, "r");
+ if (!in) {
+ fclose(out);
+ return 1;
+ }
+
+ fgets(buf, sizeof buf, in);
+
+ while (1) {
+ /* find start of record */
+ if (strncmp(buf, "Name:", 5) != 0) {
+ while(fgets(buf, sizeof buf, in)) {
+ if (strncmp(buf, "Name:", 5) == 0)
+ break;
+ }
+ }
+ if(strncmp(buf, "Name:", 5) != 0)
+ break;
+
+ buf[strlen(buf) - 1] = '\0';
+
+ s = buf+5;
+ /* skip whitespace */
+ while (isspace(*s))
+ s++;
+ /* copy name to buffer */
+ n = name;
+ while (*s) {
+ if (isspace(*s))
+ break;
+ *n++ = *s++;
+ }
+ *n = '\0';
+
+ /* get mibenum */
+ while(fgets(buf, sizeof buf, in)) {
+ if (strncmp(buf, "Name:", 5) == 0)
+ break;
+ if (strncmp(buf, "MIBenum:", 8) == 0)
+ break;
+ }
+ if (strncmp(buf, "MIBenum:", 8) != 0)
+ continue;
+
+ buf[strlen(buf) - 1] = '\0';
+
+ s = buf+8;
+ while (isspace(*s))
+ s++;
+ mibenum = atoi(s);
+
+ aliases = malloc(1);
+ if (!aliases)
+ break;
+ *aliases = '\0';
+
+ /* parse aliases */
+ while(fgets(buf, sizeof buf, in)) {
+ if (strncmp(buf, "Name:", 5) == 0)
+ break;
+ if (strncmp(buf, "Alias:", 6) != 0)
+ continue;
+
+ buf[strlen(buf) - 1] = '\0';
+
+ s = buf + 6;
+ while (isspace(*s))
+ s++;
+
+ if (strncmp(s, "None", 4) == 0)
+ /* ignore this */
+ continue;
+
+ if (strstr(s, "preferred MIME name") != 0) {
+ temp = realloc(aliases,
+ strlen(aliases) + 1 +
+ strlen(name) + 1);
+ if (!temp)
+ goto end;
+ aliases = temp;
+ sprintf(aliases, "%s%s%s", aliases,
+ aliases[0] == '\0' ? "" : " ", name);
+ n = name;
+ while (*s) {
+ if (isspace(*s))
+ break;
+ *n++ = *s++;
+ }
+ *n = '\0';
+ }
+ else {
+ n = s;
+ while (*n) {
+ if (isspace(*n))
+ break;
+ n++;
+ }
+ temp = realloc(aliases,
+ strlen(aliases) + 1 + (n - s) + 1);
+ if (!temp)
+ goto end;
+ aliases = temp;
+ n = aliases + strlen(aliases);
+ if (aliases[0] != '\0')
+ *n++ = ' ';
+ while (*s) {
+ if (isspace(*s))
+ break;
+ *n++ = *s++;
+ }
+ *n = '\0';
+ }
+ }
+
+ fprintf(out, "%s\t", name);
+
+ /* Rounded up to tab stop */
+ namelen = (strlen(name) + 8) & ~(8 - 1);
+ while (namelen < 3 * 8) {
+ fputc('\t', out);
+ namelen += 8;
+ }
+
+ fprintf(out, "%d", mibenum);
+
+ if (aliases[0] != '\0')
+ fprintf(out, "\t\t%s", aliases);
+ for (i = 0; i != EXTRAS_SIZE; i++) {
+ if (strcmp(name, extras[i].canon) == 0) {
+ fprintf(out, "%s%s",
+ aliases[0] == '\0' ? "\t\t" : " ",
+ extras[i].aliases);
+ break;
+ }
+ }
+ fprintf(out, "\n");
+
+ free(aliases);
+ }
+
+end:
+ fclose(in);
+
+ in = fopen(BOTTOM, "r");
+ if (!in) {
+ fclose(out);
+ return 1;
+ }
+
+ while (fgets(buf, sizeof buf, in))
+ fputs(buf, out);
+
+ fclose(in);
+ fclose(out);
+
+ return 0;
+}
+