From 2fee3114b46682b220c73aae7c4f2466dedb81ff Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Sat, 4 Dec 2010 15:28:50 +0000 Subject: Remove init/final code and turn aliases into static data structure. r=vince svn path=/trunk/libparserutils/; revision=10961 --- build/Aliases | 302 +++++++++++++++++++++++ build/make-aliases.pl | 124 ++++++++++ include/parserutils/parserutils.h | 7 - src/Makefile | 2 - src/charset/Makefile | 10 +- src/charset/aliases.c | 459 ++++++----------------------------- src/charset/aliases.h | 14 +- src/charset/charset.c | 54 ----- src/charset/charset.h | 24 -- src/parserutils.c | 54 ----- test/INDEX | 2 - test/Makefile | 4 +- test/aliases.c | 17 -- test/charset.c | 31 --- test/cscodec-8859.c | 7 - test/cscodec-ext8.c | 7 - test/cscodec-utf16.c | 7 - test/cscodec-utf8.c | 7 - test/data/Aliases | 302 ----------------------- test/filter.c | 6 - test/inputstream.c | 6 - test/parserutils.c | 30 --- test/regression/INDEX | 1 - test/regression/Makefile | 2 +- test/regression/cscodec-segv.c | 38 --- test/regression/filter-badenc-segv.c | 5 - test/regression/filter-segv.c | 5 - test/regression/stream-nomem.c | 5 - 28 files changed, 509 insertions(+), 1023 deletions(-) create mode 100644 build/Aliases create mode 100644 build/make-aliases.pl delete mode 100644 src/charset/charset.c delete mode 100644 src/charset/charset.h delete mode 100644 src/parserutils.c delete mode 100644 test/charset.c delete mode 100644 test/data/Aliases delete mode 100644 test/parserutils.c delete mode 100644 test/regression/cscodec-segv.c diff --git a/build/Aliases b/build/Aliases new file mode 100644 index 0000000..db61ff1 --- /dev/null +++ b/build/Aliases @@ -0,0 +1,302 @@ +# > Unicode:Files.Aliases +# Mapping of character set encoding names to their canonical form +# +# Lines starting with a '#' are comments, blank lines are ignored. +# +# Based on http://www.iana.org/assignments/character-sets and +# http://www.iana.org/assignments/ianacharset-mib +# +# Canonical Form MIBenum Aliases... +# +US-ASCII 3 iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ASCII ISO646-US ANSI_X3.4-1968 us IBM367 cp367 csASCII +ISO-10646-UTF-1 27 csISO10646UTF1 +ISO_646.basic:1983 28 ref csISO646basic1983 +INVARIANT 29 csINVARIANT +ISO_646.irv:1983 30 iso-ir-2 irv csISO2IntlRefVersion +BS_4730 20 iso-ir-4 ISO646-GB gb uk csISO4UnitedKingdom +NATS-SEFI 31 iso-ir-8-1 csNATSSEFI +NATS-SEFI-ADD 32 iso-ir-8-2 csNATSSEFIADD +NATS-DANO 33 iso-ir-9-1 csNATSDANO +NATS-DANO-ADD 34 iso-ir-9-2 csNATSDANOADD +SEN_850200_B 35 iso-ir-10 FI ISO646-FI ISO646-SE se csISO10Swedish +SEN_850200_C 21 iso-ir-11 ISO646-SE2 se2 csISO11SwedishForNames +KS_C_5601-1987 36 iso-ir-149 KS_C_5601-1989 KSC_5601 korean csKSC56011987 +ISO-2022-KR 37 csISO2022KR +EUC-KR 38 csEUCKR EUCKR +ISO-2022-JP 39 csISO2022JP +ISO-2022-JP-2 40 csISO2022JP2 +ISO-2022-CN 104 +ISO-2022-CN-EXT 105 +JIS_C6220-1969-jp 41 JIS_C6220-1969 iso-ir-13 katakana x0201-7 csISO13JISC6220jp +JIS_C6220-1969-ro 42 iso-ir-14 jp ISO646-JP csISO14JISC6220ro +IT 22 iso-ir-15 ISO646-IT csISO15Italian +PT 43 iso-ir-16 ISO646-PT csISO16Portuguese +ES 23 iso-ir-17 ISO646-ES csISO17Spanish +greek7-old 44 iso-ir-18 csISO18Greek7Old +latin-greek 45 iso-ir-19 csISO19LatinGreek +DIN_66003 24 iso-ir-21 de ISO646-DE csISO21German +NF_Z_62-010_(1973) 46 iso-ir-25 ISO646-FR1 csISO25French +Latin-greek-1 47 iso-ir-27 csISO27LatinGreek1 +ISO_5427 48 iso-ir-37 csISO5427Cyrillic +JIS_C6226-1978 49 iso-ir-42 csISO42JISC62261978 +BS_viewdata 50 iso-ir-47 csISO47BSViewdata +INIS 51 iso-ir-49 csISO49INIS +INIS-8 52 iso-ir-50 csISO50INIS8 +INIS-cyrillic 53 iso-ir-51 csISO51INISCyrillic +ISO_5427:1981 54 iso-ir-54 ISO5427Cyrillic1981 +ISO_5428:1980 55 iso-ir-55 csISO5428Greek +GB_1988-80 56 iso-ir-57 cn ISO646-CN csISO57GB1988 +GB_2312-80 57 iso-ir-58 chinese csISO58GB231280 +NS_4551-1 25 iso-ir-60 ISO646-NO no csISO60DanishNorwegian csISO60Norwegian1 +NS_4551-2 58 ISO646-NO2 iso-ir-61 no2 csISO61Norwegian2 +NF_Z_62-010 26 iso-ir-69 ISO646-FR fr csISO69French +videotex-suppl 59 iso-ir-70 csISO70VideotexSupp1 +PT2 60 iso-ir-84 ISO646-PT2 csISO84Portuguese2 +ES2 61 iso-ir-85 ISO646-ES2 csISO85Spanish2 +MSZ_7795.3 62 iso-ir-86 ISO646-HU hu csISO86Hungarian +JIS_C6226-1983 63 iso-ir-87 x0208 JIS_X0208-1983 csISO87JISX0208 +greek7 64 iso-ir-88 csISO88Greek7 +ASMO_449 65 ISO_9036 arabic7 iso-ir-89 csISO89ASMO449 +iso-ir-90 66 csISO90 +JIS_C6229-1984-a 67 iso-ir-91 jp-ocr-a csISO91JISC62291984a +JIS_C6229-1984-b 68 iso-ir-92 ISO646-JP-OCR-B jp-ocr-b csISO92JISC62991984b +JIS_C6229-1984-b-add 69 iso-ir-93 jp-ocr-b-add csISO93JIS62291984badd +JIS_C6229-1984-hand 70 iso-ir-94 jp-ocr-hand csISO94JIS62291984hand +JIS_C6229-1984-hand-add 71 iso-ir-95 jp-ocr-hand-add csISO95JIS62291984handadd +JIS_C6229-1984-kana 72 iso-ir-96 csISO96JISC62291984kana +ISO_2033-1983 73 iso-ir-98 e13b csISO2033 +ANSI_X3.110-1983 74 iso-ir-99 CSA_T500-1983 NAPLPS csISO99NAPLPS +ISO-8859-1 4 iso-ir-100 ISO_8859-1 ISO_8859-1:1987 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 ISO8859-1 +ISO-8859-2 5 iso-ir-101 ISO_8859-2 ISO_8859-2:1987 latin2 l2 csISOLatin2 8859_2 ISO8859-2 +T.61-7bit 75 iso-ir-102 csISO102T617bit +T.61-8bit 76 T.61 iso-ir-103 csISO103T618bit +ISO-8859-3 6 iso-ir-109 ISO_8859-3 ISO_8859-3:1988 latin3 l3 csISOLatin3 8859_3 ISO8859-3 +ISO-8859-4 7 iso-ir-110 ISO_8859-4 ISO_8859-4:1988 latin4 l4 csISOLatin4 8859_4 ISO8859-4 +ECMA-cyrillic 77 iso-ir-111 KOI8-E csISO111ECMACyrillic +CSA_Z243.4-1985-1 78 iso-ir-121 ISO646-CA csa7-1 ca csISO121Canadian1 +CSA_Z243.4-1985-2 79 iso-ir-122 ISO646-CA2 csa7-2 csISO122Canadian2 +CSA_Z243.4-1985-gr 80 iso-ir-123 csISO123CSAZ24341985gr +ISO-8859-6 9 iso-ir-127 ISO_8859-6 ISO_8859-6:1987 ECMA-114 ASMO-708 arabic csISOLatinArabic +ISO-8859-6-E 81 csISO88596E ISO_8859-6-E +ISO-8859-6-I 82 csISO88596I ISO_8859-6-I +ISO-8859-7 10 iso-ir-126 ISO_8859-7 ISO_8859-7:1987 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 ISO8859-7 +T.101-G2 83 iso-ir-128 csISO128T101G2 +ISO-8859-8 11 iso-ir-138 ISO_8859-8 ISO_8859-8:1988 hebrew csISOLatinHebrew 8859_8 ISO8859-8 +ISO-8859-8-E 84 csISO88598E ISO_8859-8-E +ISO-8859-8-I 85 csISO88598I ISO_8859-8-I +CSN_369103 86 iso-ir-139 csISO139CSN369103 +JUS_I.B1.002 87 iso-ir-141 ISO646-YU js yu csISO141JUSIB1002 +ISO_6937-2-add 14 iso-ir-142 csISOTextComm +IEC_P27-1 88 iso-ir-143 csISO143IECP271 +ISO-8859-5 8 iso-ir-144 ISO_8859-5 ISO_8859-5:1988 cyrillic csISOLatinCyrillic 8859_5 ISO8859-5 +JUS_I.B1.003-serb 89 iso-ir-146 serbian csISO146Serbian +JUS_I.B1.003-mac 90 macedonian iso-ir-147 csISO147Macedonian +ISO-8859-9 12 iso-ir-148 ISO_8859-9 ISO_8859-9:1989 latin5 l5 csISOLatin5 8859_9 ISO8859-9 +greek-ccitt 91 iso-ir-150 csISO150 csISO150GreekCCITT +NC_NC00-10:81 92 cuba iso-ir-151 ISO646-CU csISO151Cuba +ISO_6937-2-25 93 iso-ir-152 csISO6937Add +GOST_19768-74 94 ST_SEV_358-88 iso-ir-153 csISO153GOST1976874 +ISO_8859-supp 95 iso-ir-154 latin1-2-5 csISO8859Supp +ISO_10367-box 96 iso-ir-155 csISO10367Box +ISO-8859-10 13 iso-ir-157 l6 ISO_8859-10:1992 csISOLatin6 latin6 8859_10 ISO8859-10 +latin-lap 97 lap iso-ir-158 csISO158Lap +JIS_X0212-1990 98 x0212 iso-ir-159 csISO159JISX02121990 +DS_2089 99 DS2089 ISO646-DK dk csISO646Danish +us-dk 100 csUSDK +dk-us 101 csDKUS +JIS_X0201 15 X0201 csHalfWidthKatakana +KSC5636 102 ISO646-KR csKSC5636 +ISO-10646-UCS-2 1000 csUnicode UCS-2 UCS2 +ISO-10646-UCS-4 1001 csUCS4 UCS-4 UCS4 +DEC-MCS 2008 dec csDECMCS +hp-roman8 2004 roman8 r8 csHPRoman8 +macintosh 2027 mac csMacintosh MACROMAN MAC-ROMAN X-MAC-ROMAN +IBM037 2028 cp037 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037 +IBM038 2029 EBCDIC-INT cp038 csIBM038 +IBM273 2030 CP273 csIBM273 +IBM274 2031 EBCDIC-BE CP274 csIBM274 +IBM275 2032 EBCDIC-BR cp275 csIBM275 +IBM277 2033 EBCDIC-CP-DK EBCDIC-CP-NO csIBM277 +IBM278 2034 CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278 +IBM280 2035 CP280 ebcdic-cp-it csIBM280 +IBM281 2036 EBCDIC-JP-E cp281 csIBM281 +IBM284 2037 CP284 ebcdic-cp-es csIBM284 +IBM285 2038 CP285 ebcdic-cp-gb csIBM285 +IBM290 2039 cp290 EBCDIC-JP-kana csIBM290 +IBM297 2040 cp297 ebcdic-cp-fr csIBM297 +IBM420 2041 cp420 ebcdic-cp-ar1 csIBM420 +IBM423 2042 cp423 ebcdic-cp-gr csIBM423 +IBM424 2043 cp424 ebcdic-cp-he csIBM424 +IBM437 2011 cp437 437 csPC8CodePage437 +IBM500 2044 CP500 ebcdic-cp-be ebcdic-cp-ch csIBM500 +IBM775 2087 cp775 csPC775Baltic +IBM850 2009 cp850 850 csPC850Multilingual +IBM851 2045 cp851 851 csIBM851 +IBM852 2010 cp852 852 csPCp852 +IBM855 2046 cp855 855 csIBM855 +IBM857 2047 cp857 857 csIBM857 +IBM860 2048 cp860 860 csIBM860 +IBM861 2049 cp861 861 cp-is csIBM861 +IBM862 2013 cp862 862 csPC862LatinHebrew +IBM863 2050 cp863 863 csIBM863 +IBM864 2051 cp864 csIBM864 +IBM865 2052 cp865 865 csIBM865 +IBM866 2086 cp866 866 csIBM866 +IBM868 2053 CP868 cp-ar csIBM868 +IBM869 2054 cp869 869 cp-gr csIBM869 +IBM870 2055 CP870 ebcdic-cp-roece ebcdic-cp-yu csIBM870 +IBM871 2056 CP871 ebcdic-cp-is csIBM871 +IBM880 2057 cp880 EBCDIC-Cyrillic csIBM880 +IBM891 2058 cp891 csIBM891 +IBM903 2059 cp903 csIBM903 +IBM904 2060 cp904 904 csIBBM904 +IBM905 2061 CP905 ebcdic-cp-tr csIBM905 +IBM918 2062 CP918 ebcdic-cp-ar2 csIBM918 +IBM1026 2063 CP1026 csIBM1026 +EBCDIC-AT-DE 2064 csIBMEBCDICATDE +EBCDIC-AT-DE-A 2065 csEBCDICATDEA +EBCDIC-CA-FR 2066 csEBCDICCAFR +EBCDIC-DK-NO 2067 csEBCDICDKNO +EBCDIC-DK-NO-A 2068 csEBCDICDKNOA +EBCDIC-FI-SE 2069 csEBCDICFISE +EBCDIC-FI-SE-A 2070 csEBCDICFISEA +EBCDIC-FR 2071 csEBCDICFR +EBCDIC-IT 2072 csEBCDICIT +EBCDIC-PT 2073 csEBCDICPT +EBCDIC-ES 2074 csEBCDICES +EBCDIC-ES-A 2075 csEBCDICESA +EBCDIC-ES-S 2076 csEBCDICESS +EBCDIC-UK 2077 csEBCDICUK +EBCDIC-US 2078 csEBCDICUS +UNKNOWN-8BIT 2079 csUnknown8BiT +MNEMONIC 2080 csMnemonic +MNEM 2081 csMnem +VISCII 2082 csVISCII +VIQR 2083 csVIQR +KOI8-R 2084 csKOI8R +KOI8-U 2088 +IBM00858 2089 CCSID00858 CP00858 PC-Multilingual-850+euro +IBM00924 2090 CCSID00924 CP00924 ebcdic-Latin9--euro +IBM01140 2091 CCSID01140 CP01140 ebcdic-us-37+euro +IBM01141 2092 CCSID01141 CP01141 ebcdic-de-273+euro +IBM01142 2093 CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro +IBM01143 2094 CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro +IBM01144 2095 CCSID01144 CP01144 ebcdic-it-280+euro +IBM01145 2096 CCSID01145 CP01145 ebcdic-es-284+euro +IBM01146 2097 CCSID01146 CP01146 ebcdic-gb-285+euro +IBM01147 2098 CCSID01147 CP01147 ebcdic-fr-297+euro +IBM01148 2099 CCSID01148 CP01148 ebcdic-international-500+euro +IBM01149 2100 CCSID01149 CP01149 ebcdic-is-871+euro +Big5-HKSCS 2101 +IBM1047 2102 IBM-1047 +PTCP154 2103 csPTCP154 PT154 CP154 Cyrillic-Asian +Amiga-1251 2104 Ami1251 Amiga1251 Ami-1251 +KOI7-switched 2105 +UNICODE-1-1 1010 csUnicode11 +SCSU 1011 +UTF-7 1012 +UTF-16BE 1013 +UTF-16LE 1014 +UTF-16 1015 +CESU-8 1016 csCESU-8 +UTF-32 1017 +UTF-32BE 1018 +UTF-32LE 1019 +BOCU-1 1020 csBOCU-1 +UNICODE-1-1-UTF-7 103 csUnicode11UTF7 +UTF-8 106 UNICODE-1-1-UTF-8 UNICODE-2-0-UTF-8 utf8 +ISO-8859-13 109 8859_13 ISO8859-13 +ISO-8859-14 110 iso-ir-199 ISO_8859-14:1998 ISO_8859-14 latin8 iso-celtic l8 8859_14 ISO8859-14 +ISO-8859-15 111 ISO_8859-15 Latin-9 8859_15 ISO8859-15 +ISO-8859-16 112 iso-ir-226 ISO_8859-16:2001 ISO_8859-16 latin10 l10 +GBK 113 CP936 MS936 windows-936 +GB18030 114 +OSD_EBCDIC_DF04_15 115 +OSD_EBCDIC_DF03_IRV 116 +OSD_EBCDIC_DF04_1 117 +JIS_Encoding 16 csJISEncoding +Shift_JIS 17 MS_Kanji csShiftJIS X-SJIS Shift-JIS +EUC-JP 18 csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese EUCJP +Extended_UNIX_Code_Fixed_Width_for_Japanese 19 csEUCFixWidJapanese +ISO-10646-UCS-Basic 1002 csUnicodeASCII +ISO-10646-Unicode-Latin1 1003 csUnicodeLatin1 ISO-10646 +ISO-Unicode-IBM-1261 1005 csUnicodeIBM1261 +ISO-Unicode-IBM-1268 1006 csUnicodeIBM1268 +ISO-Unicode-IBM-1276 1007 csUnicodeIBM1276 +ISO-Unicode-IBM-1264 1008 csUnicodeIBM1264 +ISO-Unicode-IBM-1265 1009 csUnicodeIBM1265 +ISO-8859-1-Windows-3.0-Latin-1 2000 csWindows30Latin1 +ISO-8859-1-Windows-3.1-Latin-1 2001 csWindows31Latin1 +ISO-8859-2-Windows-Latin-2 2002 csWindows31Latin2 +ISO-8859-9-Windows-Latin-5 2003 csWindows31Latin5 +Adobe-Standard-Encoding 2005 csAdobeStandardEncoding +Ventura-US 2006 csVenturaUS +Ventura-International 2007 csVenturaInternational +PC8-Danish-Norwegian 2012 csPC8DanishNorwegian +PC8-Turkish 2014 csPC8Turkish +IBM-Symbols 2015 csIBMSymbols +IBM-Thai 2016 csIBMThai +HP-Legal 2017 csHPLegal +HP-Pi-font 2018 csHPPiFont +HP-Math8 2019 csHPMath8 +Adobe-Symbol-Encoding 2020 csHPPSMath +HP-DeskTop 2021 csHPDesktop +Ventura-Math 2022 csVenturaMath +Microsoft-Publishing 2023 csMicrosoftPublishing +Windows-31J 2024 csWindows31J +GB2312 2025 csGB2312 EUC-CN EUCCN CN-GB +Big5 2026 csBig5 BIG-FIVE BIG-5 CN-BIG5 BIG_FIVE +windows-1250 2250 CP1250 MS-EE +windows-1251 2251 CP1251 MS-CYRL +windows-1252 2252 CP1252 MS-ANSI +windows-1253 2253 CP1253 MS-GREEK +windows-1254 2254 CP1254 MS-TURK +windows-1255 2255 +windows-1256 2256 CP1256 MS-ARAB +windows-1257 2257 CP1257 WINBALTRIM +windows-1258 2258 +TIS-620 2259 +HZ-GB-2312 2085 + +# Additional encodings not defined by IANA + +# Arbitrary allocations +#CP737 3001 +#CP853 3002 +#CP856 3003 +CP874 3004 WINDOWS-874 +#CP922 3005 +#CP1046 3006 +#CP1124 3007 +#CP1125 3008 WINDOWS-1125 +#CP1129 3009 +#CP1133 3010 IBM-CP1133 +#CP1161 3011 IBM-1161 IBM1161 CSIBM1161 +#CP1162 3012 IBM-1162 IBM1162 CSIBM1162 +#CP1163 3013 IBM-1163 IBM1163 CSIBM1163 +#GEORGIAN-ACADEMY 3014 +#GEORGIAN-PS 3015 +#KOI8-RU 3016 +#KOI8-T 3017 +#MACARABIC 3018 X-MAC-ARABIC MAC-ARABIC +#MACCROATIAN 3019 X-MAC-CROATIAN MAC-CROATIAN +#MACGREEK 3020 X-MAC-GREEK MAC-GREEK +#MACHEBREW 3021 X-MAC-HEBREW MAC-HEBREW +#MACICELAND 3022 X-MAC-ICELAND MAC-ICELAND +#MACROMANIA 3023 X-MAC-ROMANIA MAC-ROMANIA +#MACTHAI 3024 X-MAC-THAI MAC-THAI +#MACTURKISH 3025 X-MAC-TURKISH MAC-TURKISH +#MULELAO-1 3026 + +# From Unicode Lib +ISO-IR-182 4000 +ISO-IR-197 4002 +ISO-2022-JP-1 4008 +MACCYRILLIC 4009 X-MAC-CYRILLIC MAC-CYRILLIC +MACUKRAINE 4010 X-MAC-UKRAINIAN MAC-UKRAINIAN +MACCENTRALEUROPE 4011 X-MAC-CENTRALEURROMAN MAC-CENTRALEURROMAN +JOHAB 4012 +ISO-8859-11 4014 iso-ir-166 ISO_8859-11 ISO8859-11 8859_11 +X-CURRENT 4999 X-SYSTEM +X-ACORN-LATIN1 5001 +X-ACORN-FUZZY 5002 diff --git a/build/make-aliases.pl b/build/make-aliases.pl new file mode 100644 index 0000000..f677c06 --- /dev/null +++ b/build/make-aliases.pl @@ -0,0 +1,124 @@ +#!/usr/bin/perl -w +# This file is part of LibParserUtils. +# Licensed under the MIT License, +# http://www.opensource.org/licenses/mit-license.php +# Copyright 2010 Daniel Silverstone +# John-Mark Bell + +use strict; + +use constant ALIAS_FILE => 'build/Aliases'; +use constant ALIAS_INC => 'src/charset/aliases.inc'; + +use constant UNICODE_CHARSETS => + [ + qr'^ISO-10646-UCS-[24]$', + qr'^UTF-16', + qr'^UTF-8$', + qr'^UTF-32' + ]; + +open(INFILE, "<", ALIAS_FILE) || die "Unable to open " . ALIAS_FILE; + +my %charsets; + +while (my $line = ) { + last unless (defined $line); + next if ($line =~ /^#/); + chomp $line; + next if ($line eq ''); + my @elements = split /\s+/, $line; + my $canon = shift @elements; + my $mibenum = shift @elements; + $charsets{$canon} = [$mibenum, \@elements]; +} + +close(INFILE); + +my $unicodeexp = ""; + +my $output = <<'EOH'; +/* + * This file is part of LibParserUtils. + * Licensed under the MIT License, + * http://www.opensource.org/licenses/mit-license.php + * Copyright 2010 The NetSurf Project. + * + * Note: This file is automatically generated by make-aliases.pl + * + * Do not edit file file, changes will be overwritten during build. + */ + +static parserutils_charset_aliases_canon canonical_charset_names[] = { +EOH + +my %aliases; +my $canonnr = 0; +foreach my $canon (sort keys %charsets) { + my ($mibenum, $elements) = @{$charsets{$canon}}; + # Ordering must match struct in src/charset/aliases.h + $output .= "\t{ " . $mibenum . ", " . length($canon) . ', "' . $canon . '" },' . "\n"; + my $isunicode = 0; + foreach my $unirexp (@{UNICODE_CHARSETS()}) { + $isunicode = 1 if ($canon =~ $unirexp); + } + if ($isunicode == 1) { + $unicodeexp .= "((x) == $mibenum) || "; + } + $canon =~ y/A-Z/a-z/; + $canon =~ s/[^a-z0-9]//g; + $aliases{$canon} = $canonnr; + foreach my $alias (@$elements) { + $alias =~ y/A-Z/a-z/; + $alias =~ s/[^a-z0-9]//g; + $aliases{$alias} = $canonnr; + } + $canonnr += 1; +} + +$output .= "};\n\nstatic const uint16_t charset_aliases_canon_count = ${canonnr};\n\n"; + +$output .= <<'EOT'; +typedef struct { + uint16_t name_len; + const char *name; + parserutils_charset_aliases_canon *canon; +} parserutils_charset_aliases_alias; + +static parserutils_charset_aliases_alias charset_aliases[] = { +EOT + +my $aliascount = 0; + +foreach my $alias (sort keys %aliases) { + my $canonnr = $aliases{$alias}; + $output .= "\t{ " . length($alias) . ', "' . $alias . '", &canonical_charset_names[' . $canonnr . "] },\n"; + $aliascount += 1; +} + +$output .= "};\n\n"; + +# Drop the final " || " +chop $unicodeexp; +chop $unicodeexp; +chop $unicodeexp; +chop $unicodeexp; + +$output .= <<"EOS"; +static const uint16_t charset_aliases_count = ${aliascount}; + +#define MIBENUM_IS_UNICODE(x) ($unicodeexp) +EOS + +if (open(EXISTING, "<", ALIAS_INC)) { + local $/ = undef(); + my $now = ; + undef($output) if ($output eq $now); + close(EXISTING); +} + +if (defined($output)) { + open(OUTF, ">", ALIAS_INC); + print OUTF $output; + close(OUTF); +} diff --git a/include/parserutils/parserutils.h b/include/parserutils/parserutils.h index aa2b5ac..8518f42 100644 --- a/include/parserutils/parserutils.h +++ b/include/parserutils/parserutils.h @@ -17,13 +17,6 @@ extern "C" #include #include -/* Initialise the ParserUtils library for use */ -parserutils_error parserutils_initialise(const char *aliases_file, - parserutils_alloc alloc, void *pw); - -/* Clean up after ParserUtils */ -parserutils_error parserutils_finalise(parserutils_alloc alloc, void *pw); - #ifdef __cplusplus } #endif diff --git a/src/Makefile b/src/Makefile index 334dd43..3cbaf86 100644 --- a/src/Makefile +++ b/src/Makefile @@ -1,4 +1,2 @@ -# Sources -DIR_SOURCES := parserutils.c include build/makefiles/Makefile.subdir diff --git a/src/charset/Makefile b/src/charset/Makefile index a4c8f64..d851b8e 100644 --- a/src/charset/Makefile +++ b/src/charset/Makefile @@ -1,4 +1,12 @@ # Sources -DIR_SOURCES := aliases.c charset.c codec.c +DIR_SOURCES := aliases.c codec.c + +$(DIR)aliases.c: $(DIR)aliases.inc + +$(DIR)aliases.inc: build/make-aliases.pl build/Aliases + $(VQ)$(ECHO) " ALIAS: $@" + $(Q)$(PERL) build/make-aliases.pl + +CLEAN_ITEMS := $(CLEAN_ITEMS) $(DIR)aliases.inc include build/makefiles/Makefile.subdir diff --git a/src/charset/aliases.c b/src/charset/aliases.c index 188a275..5c173d0 100644 --- a/src/charset/aliases.c +++ b/src/charset/aliases.c @@ -17,151 +17,77 @@ #include "charset/aliases.h" #include "utils/utils.h" -struct alias { - struct alias *next; - parserutils_charset_aliases_canon *canon; - uint16_t name_len; - char name[1]; -}; +/* Bring in the aliases tables */ +#include "aliases.inc" -#define HASH_SIZE (43) -static parserutils_charset_aliases_canon *canon_tab[HASH_SIZE]; -static struct alias *alias_tab[HASH_SIZE]; +typedef struct { + size_t slen; + const char *s; +} lengthed_string; -static parserutils_error parserutils_charset_create_alias(const char *alias, - parserutils_charset_aliases_canon *c, - parserutils_alloc alloc, void *pw); -static parserutils_charset_aliases_canon *parserutils_charset_create_canon( - const char *canon, uint16_t mibenum, - parserutils_alloc alloc, void *pw); -static int aliascmp(const char *s1, const char *s2, size_t s2_len); -static uint32_t parserutils_charset_hash_val(const char *alias, size_t len); -/** - * Create alias data from Aliases file - * - * \param filename The path to the Aliases file - * \param alloc Memory (de)allocation function - * \param pw Pointer to client-specific private data (may be NULL) - * \return PARSERUTILS_OK on success, appropriate error otherwise. - */ -parserutils_error parserutils_charset_aliases_create(const char *filename, - parserutils_alloc alloc, void *pw) -{ - char buf[300]; - FILE *fp; - - if (filename == NULL || alloc == NULL) - return PARSERUTILS_BADPARM; - - fp = fopen(filename, "r"); - if (fp == NULL) - return PARSERUTILS_FILENOTFOUND; - - while (fgets(buf, sizeof buf, fp)) { - char *p, *aliases = 0, *mib, *end; - parserutils_charset_aliases_canon *cf; - - if (buf[0] == 0 || buf[0] == '#') - /* skip blank lines or comments */ - continue; - - buf[strlen(buf) - 1] = 0; /* lose terminating newline */ - end = buf + strlen(buf); - - /* find end of canonical form */ - for (p = buf; *p && !isspace(*p) && !iscntrl(*p); p++) - ; /* do nothing */ - if (p >= end) - continue; - *p++ = '\0'; /* terminate canonical form */ - - /* skip whitespace */ - for (; *p && isspace(*p); p++) - ; /* do nothing */ - if (p >= end) - continue; - mib = p; - - /* find end of mibenum */ - for (; *p && !isspace(*p) && !iscntrl(*p); p++) - ; /* do nothing */ - if (p < end) - *p++ = '\0'; /* terminate mibenum */ - - cf = parserutils_charset_create_canon(buf, atoi(mib), alloc, pw); - if (cf == NULL) - continue; - - /* skip whitespace */ - for (; p < end && *p && isspace(*p); p++) - ; /* do nothing */ - if (p >= end) - continue; - aliases = p; - - while (p < end) { - /* find end of alias */ - for (; *p && !isspace(*p) && !iscntrl(*p); p++) - ; /* do nothing */ - if (p > end) - /* stop if we've gone past the end */ - break; - /* terminate current alias */ - *p++ = '\0'; +#define IS_PUNCT_OR_SPACE(x) \ + (!(((x) >= 'A' && (x) <= 'Z') || \ + ((x) >= 'a' && (x) <= 'z') || \ + ((x) >= '0' && (x) <= '9'))) - if (parserutils_charset_create_alias(aliases, cf, - alloc, pw) != PARSERUTILS_OK) - break; - /* in terminating, we may have advanced - * past the end - check this here */ - if (p >= end) - break; - - /* skip whitespace */ - for (; *p && isspace(*p); p++) - ; /* do nothing */ - - if (p >= end) - /* gone past end => stop */ - break; - - /* update pointer to current alias */ - aliases = p; - } - } - - fclose(fp); - - return PARSERUTILS_OK; +static int parserutils_charset_alias_match(const void *a, const void *b) +{ + lengthed_string *s = (lengthed_string *)a; + parserutils_charset_aliases_alias *alias = (parserutils_charset_aliases_alias*)b; + size_t key_left = s->slen; + size_t alias_left = alias->name_len; + const char *s_alias = alias->name; + const char *s_key = s->s; + int cmpret; + + while ((key_left > 0) && (alias_left > 0)) { + while ((key_left > 0) && IS_PUNCT_OR_SPACE(*s_key)) { + key_left--; s_key++; + } + + if (key_left == 0) + break; + + cmpret = tolower(*s_key) - *s_alias; + + if (cmpret != 0) { + return cmpret; + } + + key_left--; + s_key++; + alias_left--; + s_alias++; + } + + return key_left - alias_left; } /** - * Free all alias data + * Retrieve the canonical form of an alias name * - * \param alloc Memory (de)allocation function - * \param pw Pointer to client-specific private data + * \param alias The alias name + * \param len The length of the alias name + * \return Pointer to canonical form or NULL if not found */ -void parserutils_charset_aliases_destroy(parserutils_alloc alloc, void *pw) +parserutils_charset_aliases_canon *parserutils_charset_alias_canonicalise( + const char *alias, size_t len) { - parserutils_charset_aliases_canon *c, *d; - struct alias *a, *b; - int i; - - for (i = 0; i != HASH_SIZE; i++) { - for (c = canon_tab[i]; c; c = d) { - d = c->next; - alloc(c, 0, pw); - } - canon_tab[i] = NULL; - - for (a = alias_tab[i]; a; a = b) { - b = a->next; - alloc(a, 0, pw); - } - alias_tab[i] = NULL; - } + parserutils_charset_aliases_alias *c; + lengthed_string s = {len, alias}; + + c = (parserutils_charset_aliases_alias*)bsearch(&s, + &charset_aliases[0], + charset_aliases_count, + sizeof(parserutils_charset_aliases_alias), + parserutils_charset_alias_match); + + if (c == NULL) + return NULL; + + return c->canon; } /** @@ -195,13 +121,14 @@ const char *parserutils_charset_mibenum_to_name(uint16_t mibenum) { int i; parserutils_charset_aliases_canon *c; - - for (i = 0; i != HASH_SIZE; i++) - for (c = canon_tab[i]; c; c = c->next) - if (c->mib_enum == mibenum) - return c->name; - - return NULL; + + for (i = 0; i < charset_aliases_canon_count; ++i) { + c = &canonical_charset_names[i]; + if (c->mib_enum == mibenum) + return c->name; + } + + return NULL; } /** @@ -212,253 +139,5 @@ const char *parserutils_charset_mibenum_to_name(uint16_t mibenum) */ bool parserutils_charset_mibenum_is_unicode(uint16_t mibenum) { - static uint16_t ucs4; - static uint16_t ucs2; - static uint16_t utf8; - static uint16_t utf16; - static uint16_t utf16be; - static uint16_t utf16le; - static uint16_t utf32; - static uint16_t utf32be; - static uint16_t utf32le; - - if (ucs4 == 0) { - ucs4 = parserutils_charset_mibenum_from_name("UCS-4", - SLEN("UCS-4")); - ucs2 = parserutils_charset_mibenum_from_name("UCS-2", - SLEN("UCS-2")); - utf8 = parserutils_charset_mibenum_from_name("UTF-8", - SLEN("UTF-8")); - utf16 = parserutils_charset_mibenum_from_name("UTF-16", - SLEN("UTF-16")); - utf16be = parserutils_charset_mibenum_from_name("UTF-16BE", - SLEN("UTF-16BE")); - utf16le = parserutils_charset_mibenum_from_name("UTF-16LE", - SLEN("UTF-16LE")); - utf32 = parserutils_charset_mibenum_from_name("UTF-32", - SLEN("UTF-32")); - utf32be = parserutils_charset_mibenum_from_name("UTF-32BE", - SLEN("UTF-32BE")); - utf32le = parserutils_charset_mibenum_from_name("UTF-32LE", - SLEN("UTF-32LE")); - } - - return (mibenum == ucs4 || mibenum == ucs2 || mibenum == utf8 || - mibenum == utf16 || mibenum == utf16be || - mibenum == utf16le || mibenum == utf32 || - mibenum == utf32be || mibenum == utf32le); -} - -#define IS_PUNCT_OR_SPACE(x) \ - ((0x09 <= (x) && (x) <= 0x0D) || \ - (0x20 <= (x) && (x) <= 0x2F) || \ - (0x3A <= (x) && (x) <= 0x40) || \ - (0x5B <= (x) && (x) <= 0x60) || \ - (0x7B <= (x) && (x) <= 0x7E)) - - -/** - * Compare name "s1" to name "s2" (of size s2_len) case-insensitively - * and ignoring ASCII punctuation characters. - * - * See http://www.whatwg.org/specs/web-apps/current-work/#character0 - * - * \param s1 Alias to compare to - * \param s2 Alias to compare - * \param s2_len Length of "s2" - * \returns 0 if equal, 1 otherwise - */ -int aliascmp(const char *s1, const char *s2, size_t s2_len) -{ - size_t s2_pos = 0; - - if (s1 == NULL || s2_len == 0) - return 1; - - while (true) { - while (IS_PUNCT_OR_SPACE(*s1)) - s1++; - while (s2_pos < s2_len && - IS_PUNCT_OR_SPACE(s2[s2_pos])) { - s2_pos++; - } - - if (s2_pos == s2_len) - return (*s1 != '\0') ? 1 : 0; - - if (tolower(*s1) != tolower(s2[s2_pos])) - break; - s1++; - s2_pos++; - } - - return 1; -} - - -/** - * Retrieve the canonical form of an alias name - * - * \param alias The alias name - * \param len The length of the alias name - * \return Pointer to canonical form or NULL if not found - */ -parserutils_charset_aliases_canon *parserutils_charset_alias_canonicalise( - const char *alias, size_t len) -{ - uint32_t hash; - parserutils_charset_aliases_canon *c; - struct alias *a; - - if (alias == NULL) - return NULL; - - hash = parserutils_charset_hash_val(alias, len); - - for (c = canon_tab[hash]; c; c = c->next) - if (aliascmp(c->name, alias, len) == 0) - break; - if (c) - return c; - - for (a = alias_tab[hash]; a; a = a->next) - if (aliascmp(a->name, alias, len) == 0) - break; - if (a) - return a->canon; - - return NULL; -} - - -/** - * Create an alias - * - * \param alias The alias name - * \param c The canonical form - * \param alloc Memory (de)allocation function - * \param pw Pointer to client-specific private data (may be NULL) - * \return PARSERUTILS_OK on success, appropriate error otherwise - */ -parserutils_error parserutils_charset_create_alias(const char *alias, - parserutils_charset_aliases_canon *c, - parserutils_alloc alloc, void *pw) -{ - struct alias *a; - uint32_t hash; - - if (alias == NULL || c == NULL || alloc == NULL) - return PARSERUTILS_BADPARM; - - a = alloc(NULL, sizeof(struct alias) + strlen(alias) + 1, pw); - if (a == NULL) - return PARSERUTILS_NOMEM; - - a->canon = c; - a->name_len = strlen(alias); - strcpy(a->name, alias); - a->name[a->name_len] = '\0'; - - hash = parserutils_charset_hash_val(alias, a->name_len); - - a->next = alias_tab[hash]; - alias_tab[hash] = a; - - return PARSERUTILS_OK; -} - -/** - * Create a canonical form - * - * \param canon The canonical name - * \param mibenum The MIB enum value - * \param alloc Memory (de)allocation function - * \param pw Pointer to client-specific private data (may be NULL) - * \return Pointer to canonical form or NULL on error - */ -parserutils_charset_aliases_canon *parserutils_charset_create_canon( - const char *canon, uint16_t mibenum, - parserutils_alloc alloc, void *pw) -{ - parserutils_charset_aliases_canon *c; - uint32_t hash, len; - - if (canon == NULL || alloc == NULL) - return NULL; - - len = strlen(canon); - - c = alloc(NULL, sizeof(parserutils_charset_aliases_canon) + len + 1, pw); - if (c == NULL) - return NULL; - - c->mib_enum = mibenum; - c->name_len = len; - strcpy(c->name, canon); - c->name[len] = '\0'; - - hash = parserutils_charset_hash_val(canon, len); - - c->next = canon_tab[hash]; - canon_tab[hash] = c; - - return c; -} - -/** - * Hash function - * - * \param alias String to hash - * \param len Number of bytes to hash (<= strlen(alias)) - * \return The hashed value - */ -uint32_t parserutils_charset_hash_val(const char *alias, size_t len) -{ - const char *s = alias; - uint32_t h = 5381; - - if (alias == NULL) - return 0; - - while (len--) { - if (IS_PUNCT_OR_SPACE(*s)) { - s++; - } else { - h = (h * 33) ^ (*s++ & ~0x20); /* case insensitive */ - } - } - - return h % HASH_SIZE; -} - - -#ifndef NDEBUG -/** - * Dump all alias data to stdout - */ -void parserutils_charset_aliases_dump(void) -{ - parserutils_charset_aliases_canon *c; - struct alias *a; - int i; - size_t size = 0; - - for (i = 0; i != HASH_SIZE; i++) { - for (c = canon_tab[i]; c; c = c->next) { - printf("%d %s\n", i, c->name); - size += offsetof(parserutils_charset_aliases_canon, - name) + c->name_len; - } - - for (a = alias_tab[i]; a; a = a->next) { - printf("%d %s\n", i, a->name); - size += offsetof(struct alias, name) + a->name_len; - } - } - - size += (sizeof(canon_tab) / sizeof(canon_tab[0])); - size += (sizeof(alias_tab) / sizeof(alias_tab[0])); - - printf("%u\n", (unsigned int) size); + return MIBENUM_IS_UNICODE(mibenum); } -#endif diff --git a/src/charset/aliases.h b/src/charset/aliases.h index 9abd2c8..189f8d5 100644 --- a/src/charset/aliases.h +++ b/src/charset/aliases.h @@ -13,24 +13,14 @@ #include typedef struct parserutils_charset_aliases_canon { - struct parserutils_charset_aliases_canon *next; + /* Do not change the ordering here without changing make-aliases.pl */ uint16_t mib_enum; uint16_t name_len; - char name[1]; + const char *name; } parserutils_charset_aliases_canon; -/* Load encoding aliases from file */ -parserutils_error parserutils_charset_aliases_create(const char *filename, - parserutils_alloc alloc, void *pw); -/* Destroy encoding aliases */ -void parserutils_charset_aliases_destroy(parserutils_alloc alloc, void *pw); - /* Canonicalise an alias name */ parserutils_charset_aliases_canon *parserutils_charset_alias_canonicalise( const char *alias, size_t len); -#ifndef NDEBUG -void parserutils_charset_aliases_dump(void); -#endif - #endif diff --git a/src/charset/charset.c b/src/charset/charset.c deleted file mode 100644 index 3ef1a71..0000000 --- a/src/charset/charset.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * This file is part of LibParserUtils. - * Licensed under the MIT License, - * http://www.opensource.org/licenses/mit-license.php - * Copyright 2007 John-Mark Bell - */ - -#include "charset/aliases.h" -#include "charset/charset.h" - -/** - * Initialise the Charset library for use. - * - * This _must_ be called before using any libparserutils charset functions - * - * \param aliases_file Pointer to name of file containing encoding alias data - * \param alloc Pointer to (de)allocation function - * \param pw Pointer to client-specific private data (may be NULL) - * \return PARSERUTILS_OK on success, applicable error otherwise. - */ -parserutils_error parserutils_charset_initialise(const char *aliases_file, - parserutils_alloc alloc, void *pw) -{ - parserutils_error error; - - if (aliases_file == NULL || alloc == NULL) - return PARSERUTILS_BADPARM; - - error = parserutils_charset_aliases_create(aliases_file, alloc, pw); - if (error != PARSERUTILS_OK) - return error; - - return PARSERUTILS_OK; -} - -/** - * Clean up after Libparserutils - * - * \param alloc Pointer to (de)allocation function - * \param pw Pointer to client-specific private data (may be NULL) - * \return PARSERUTILS_OK on success, applicable error otherwise. - */ -parserutils_error parserutils_charset_finalise(parserutils_alloc alloc, - void *pw) -{ - if (alloc == NULL) - return PARSERUTILS_BADPARM; - - parserutils_charset_aliases_destroy(alloc, pw); - - return PARSERUTILS_OK; -} - - diff --git a/src/charset/charset.h b/src/charset/charset.h deleted file mode 100644 index 4b07577..0000000 --- a/src/charset/charset.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * This file is part of LibParserUtils. - * Licensed under the MIT License, - * http://www.opensource.org/licenses/mit-license.php - * Copyright 2007 John-Mark Bell - */ - -#ifndef parserutils_charset_charset_h_ -#define parserutils_charset_charset_h_ - -#include -#include -#include - -/* Initialise the Charset library for use */ -parserutils_error parserutils_charset_initialise(const char *aliases_file, - parserutils_alloc alloc, void *pw); - -/* Clean up after Charset */ -parserutils_error parserutils_charset_finalise(parserutils_alloc alloc, - void *pw); - -#endif - diff --git a/src/parserutils.c b/src/parserutils.c deleted file mode 100644 index ed9b21f..0000000 --- a/src/parserutils.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * This file is part of LibParserUtils. - * Licensed under the MIT License, - * http://www.opensource.org/licenses/mit-license.php - * Copyright 2007 John-Mark Bell - */ - -#include - -#include "charset/charset.h" - -/** - * Initialise the ParserUtils library for use. - * - * This _must_ be called before using any libparserutils functions - * - * \param aliases_file Pointer to name of file containing encoding alias data - * \param alloc Pointer to (de)allocation function - * \param pw Pointer to client-specific private data (may be NULL) - * \return PARSERUTILS_OK on success, applicable error otherwise. - */ -parserutils_error parserutils_initialise(const char *aliases_file, - parserutils_alloc alloc, void *pw) -{ - parserutils_error error; - - if (aliases_file == NULL || alloc == NULL) - return PARSERUTILS_BADPARM; - - error = parserutils_charset_initialise(aliases_file, alloc, pw); - if (error != PARSERUTILS_OK) - return error; - - return PARSERUTILS_OK; -} - -/** - * Clean up after Libparserutils - * - * \param alloc Pointer to (de)allocation function - * \param pw Pointer to client-specific private data (may be NULL) - * \return PARSERUTILS_OK on success, applicable error otherwise. - */ -parserutils_error parserutils_finalise(parserutils_alloc alloc, void *pw) -{ - if (alloc == NULL) - return PARSERUTILS_BADPARM; - - parserutils_charset_finalise(alloc, pw); - - return PARSERUTILS_OK; -} - - diff --git a/test/INDEX b/test/INDEX index 537124b..2ada1a8 100644 --- a/test/INDEX +++ b/test/INDEX @@ -2,8 +2,6 @@ # # Test Description DataDir -charset Charset initialisation/finalisation -parserutils Library initialisation/finalisation aliases Encoding alias handling cscodec-utf8 UTF-8 charset codec implementation cscodec-utf8 cscodec-utf16 UTF-16 charset codec implementation cscodec-utf16 diff --git a/test/Makefile b/test/Makefile index 4dc7c7b..afd38a7 100644 --- a/test/Makefile +++ b/test/Makefile @@ -1,7 +1,7 @@ # Tests DIR_TEST_ITEMS := aliases:aliases.c cscodec-8859:cscodec-8859.c \ cscodec-ext8:cscodec-ext8.c cscodec-utf8:cscodec-utf8.c \ - cscodec-utf16:cscodec-utf16.c charset:charset.c filter:filter.c \ - inputstream:inputstream.c parserutils:parserutils.c + cscodec-utf16:cscodec-utf16.c filter:filter.c \ + inputstream:inputstream.c include build/makefiles/Makefile.subdir diff --git a/test/aliases.c b/test/aliases.c index b0e8e60..9a9f962 100644 --- a/test/aliases.c +++ b/test/aliases.c @@ -5,15 +5,6 @@ #include "testutils.h" -extern void charset_aliases_dump(void); - -static void *myrealloc(void *ptr, size_t len, void *pw) -{ - UNUSED(pw); - - return realloc(ptr, len); -} - int main (int argc, char **argv) { parserutils_charset_aliases_canon *c; @@ -23,12 +14,6 @@ int main (int argc, char **argv) return 1; } - parserutils_charset_aliases_create(argv[1], myrealloc, NULL); - -#ifndef NDEBUG - parserutils_charset_aliases_dump(); -#endif - c = parserutils_charset_alias_canonicalise("moose", 5); if (c) { printf("FAIL - found invalid encoding 'moose'\n"); @@ -65,8 +50,6 @@ int main (int argc, char **argv) return 1; } - parserutils_charset_aliases_destroy(myrealloc, NULL); - printf("PASS\n"); return 0; diff --git a/test/charset.c b/test/charset.c deleted file mode 100644 index a793e7e..0000000 --- a/test/charset.c +++ /dev/null @@ -1,31 +0,0 @@ -#include -#include - -#include "charset/charset.h" - -#include "testutils.h" - -static void *myrealloc(void *ptr, size_t len, void *pw) -{ - UNUSED(pw); - - return realloc(ptr, len); -} - -int main(int argc, char **argv) -{ - if (argc != 2) { - printf("Usage: %s \n", argv[0]); - return 1; - } - - assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) == - PARSERUTILS_OK); - - assert (parserutils_charset_finalise(myrealloc, NULL) == - PARSERUTILS_OK); - - printf("PASS\n"); - - return 0; -} diff --git a/test/cscodec-8859.c b/test/cscodec-8859.c index 4d0c8b4..7fd5a2f 100644 --- a/test/cscodec-8859.c +++ b/test/cscodec-8859.c @@ -2,7 +2,6 @@ #include #include -#include "charset/charset.h" #include #include "utils/utils.h" @@ -48,9 +47,6 @@ int main(int argc, char **argv) return 1; } - assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) == - PARSERUTILS_OK); - assert(parserutils_charset_codec_create("NATS-SEFI-ADD", myrealloc, NULL, &codec) == PARSERUTILS_BADENCODING); @@ -92,9 +88,6 @@ int main(int argc, char **argv) parserutils_charset_codec_destroy(ctx.codec); - assert(parserutils_charset_finalise(myrealloc, NULL) == - PARSERUTILS_OK); - printf("PASS\n"); return 0; diff --git a/test/cscodec-ext8.c b/test/cscodec-ext8.c index 4d0c8b4..7fd5a2f 100644 --- a/test/cscodec-ext8.c +++ b/test/cscodec-ext8.c @@ -2,7 +2,6 @@ #include #include -#include "charset/charset.h" #include #include "utils/utils.h" @@ -48,9 +47,6 @@ int main(int argc, char **argv) return 1; } - assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) == - PARSERUTILS_OK); - assert(parserutils_charset_codec_create("NATS-SEFI-ADD", myrealloc, NULL, &codec) == PARSERUTILS_BADENCODING); @@ -92,9 +88,6 @@ int main(int argc, char **argv) parserutils_charset_codec_destroy(ctx.codec); - assert(parserutils_charset_finalise(myrealloc, NULL) == - PARSERUTILS_OK); - printf("PASS\n"); return 0; diff --git a/test/cscodec-utf16.c b/test/cscodec-utf16.c index e2f3c52..dd0ebb1 100644 --- a/test/cscodec-utf16.c +++ b/test/cscodec-utf16.c @@ -6,7 +6,6 @@ #include #include -#include "charset/charset.h" #include #include "utils/utils.h" @@ -51,9 +50,6 @@ int main(int argc, char **argv) return 1; } - assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) == - PARSERUTILS_OK); - assert(parserutils_charset_codec_create("NATS-SEFI-ADD", myrealloc, NULL, &codec) == PARSERUTILS_BADENCODING); @@ -101,9 +97,6 @@ int main(int argc, char **argv) parserutils_charset_codec_destroy(ctx.codec); - assert(parserutils_charset_finalise(myrealloc, NULL) == - PARSERUTILS_OK); - printf("PASS\n"); return 0; diff --git a/test/cscodec-utf8.c b/test/cscodec-utf8.c index 3e18e9e..b0dcb89 100644 --- a/test/cscodec-utf8.c +++ b/test/cscodec-utf8.c @@ -1,7 +1,6 @@ #include #include -#include "charset/charset.h" #include #include "utils/utils.h" @@ -46,9 +45,6 @@ int main(int argc, char **argv) return 1; } - assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) == - PARSERUTILS_OK); - assert(parserutils_charset_codec_create("NATS-SEFI-ADD", myrealloc, NULL, &codec) == PARSERUTILS_BADENCODING); @@ -92,9 +88,6 @@ int main(int argc, char **argv) parserutils_charset_codec_destroy(ctx.codec); - assert(parserutils_charset_finalise(myrealloc, NULL) == - PARSERUTILS_OK); - printf("PASS\n"); return 0; diff --git a/test/data/Aliases b/test/data/Aliases deleted file mode 100644 index db61ff1..0000000 --- a/test/data/Aliases +++ /dev/null @@ -1,302 +0,0 @@ -# > Unicode:Files.Aliases -# Mapping of character set encoding names to their canonical form -# -# Lines starting with a '#' are comments, blank lines are ignored. -# -# Based on http://www.iana.org/assignments/character-sets and -# http://www.iana.org/assignments/ianacharset-mib -# -# Canonical Form MIBenum Aliases... -# -US-ASCII 3 iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ASCII ISO646-US ANSI_X3.4-1968 us IBM367 cp367 csASCII -ISO-10646-UTF-1 27 csISO10646UTF1 -ISO_646.basic:1983 28 ref csISO646basic1983 -INVARIANT 29 csINVARIANT -ISO_646.irv:1983 30 iso-ir-2 irv csISO2IntlRefVersion -BS_4730 20 iso-ir-4 ISO646-GB gb uk csISO4UnitedKingdom -NATS-SEFI 31 iso-ir-8-1 csNATSSEFI -NATS-SEFI-ADD 32 iso-ir-8-2 csNATSSEFIADD -NATS-DANO 33 iso-ir-9-1 csNATSDANO -NATS-DANO-ADD 34 iso-ir-9-2 csNATSDANOADD -SEN_850200_B 35 iso-ir-10 FI ISO646-FI ISO646-SE se csISO10Swedish -SEN_850200_C 21 iso-ir-11 ISO646-SE2 se2 csISO11SwedishForNames -KS_C_5601-1987 36 iso-ir-149 KS_C_5601-1989 KSC_5601 korean csKSC56011987 -ISO-2022-KR 37 csISO2022KR -EUC-KR 38 csEUCKR EUCKR -ISO-2022-JP 39 csISO2022JP -ISO-2022-JP-2 40 csISO2022JP2 -ISO-2022-CN 104 -ISO-2022-CN-EXT 105 -JIS_C6220-1969-jp 41 JIS_C6220-1969 iso-ir-13 katakana x0201-7 csISO13JISC6220jp -JIS_C6220-1969-ro 42 iso-ir-14 jp ISO646-JP csISO14JISC6220ro -IT 22 iso-ir-15 ISO646-IT csISO15Italian -PT 43 iso-ir-16 ISO646-PT csISO16Portuguese -ES 23 iso-ir-17 ISO646-ES csISO17Spanish -greek7-old 44 iso-ir-18 csISO18Greek7Old -latin-greek 45 iso-ir-19 csISO19LatinGreek -DIN_66003 24 iso-ir-21 de ISO646-DE csISO21German -NF_Z_62-010_(1973) 46 iso-ir-25 ISO646-FR1 csISO25French -Latin-greek-1 47 iso-ir-27 csISO27LatinGreek1 -ISO_5427 48 iso-ir-37 csISO5427Cyrillic -JIS_C6226-1978 49 iso-ir-42 csISO42JISC62261978 -BS_viewdata 50 iso-ir-47 csISO47BSViewdata -INIS 51 iso-ir-49 csISO49INIS -INIS-8 52 iso-ir-50 csISO50INIS8 -INIS-cyrillic 53 iso-ir-51 csISO51INISCyrillic -ISO_5427:1981 54 iso-ir-54 ISO5427Cyrillic1981 -ISO_5428:1980 55 iso-ir-55 csISO5428Greek -GB_1988-80 56 iso-ir-57 cn ISO646-CN csISO57GB1988 -GB_2312-80 57 iso-ir-58 chinese csISO58GB231280 -NS_4551-1 25 iso-ir-60 ISO646-NO no csISO60DanishNorwegian csISO60Norwegian1 -NS_4551-2 58 ISO646-NO2 iso-ir-61 no2 csISO61Norwegian2 -NF_Z_62-010 26 iso-ir-69 ISO646-FR fr csISO69French -videotex-suppl 59 iso-ir-70 csISO70VideotexSupp1 -PT2 60 iso-ir-84 ISO646-PT2 csISO84Portuguese2 -ES2 61 iso-ir-85 ISO646-ES2 csISO85Spanish2 -MSZ_7795.3 62 iso-ir-86 ISO646-HU hu csISO86Hungarian -JIS_C6226-1983 63 iso-ir-87 x0208 JIS_X0208-1983 csISO87JISX0208 -greek7 64 iso-ir-88 csISO88Greek7 -ASMO_449 65 ISO_9036 arabic7 iso-ir-89 csISO89ASMO449 -iso-ir-90 66 csISO90 -JIS_C6229-1984-a 67 iso-ir-91 jp-ocr-a csISO91JISC62291984a -JIS_C6229-1984-b 68 iso-ir-92 ISO646-JP-OCR-B jp-ocr-b csISO92JISC62991984b -JIS_C6229-1984-b-add 69 iso-ir-93 jp-ocr-b-add csISO93JIS62291984badd -JIS_C6229-1984-hand 70 iso-ir-94 jp-ocr-hand csISO94JIS62291984hand -JIS_C6229-1984-hand-add 71 iso-ir-95 jp-ocr-hand-add csISO95JIS62291984handadd -JIS_C6229-1984-kana 72 iso-ir-96 csISO96JISC62291984kana -ISO_2033-1983 73 iso-ir-98 e13b csISO2033 -ANSI_X3.110-1983 74 iso-ir-99 CSA_T500-1983 NAPLPS csISO99NAPLPS -ISO-8859-1 4 iso-ir-100 ISO_8859-1 ISO_8859-1:1987 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 ISO8859-1 -ISO-8859-2 5 iso-ir-101 ISO_8859-2 ISO_8859-2:1987 latin2 l2 csISOLatin2 8859_2 ISO8859-2 -T.61-7bit 75 iso-ir-102 csISO102T617bit -T.61-8bit 76 T.61 iso-ir-103 csISO103T618bit -ISO-8859-3 6 iso-ir-109 ISO_8859-3 ISO_8859-3:1988 latin3 l3 csISOLatin3 8859_3 ISO8859-3 -ISO-8859-4 7 iso-ir-110 ISO_8859-4 ISO_8859-4:1988 latin4 l4 csISOLatin4 8859_4 ISO8859-4 -ECMA-cyrillic 77 iso-ir-111 KOI8-E csISO111ECMACyrillic -CSA_Z243.4-1985-1 78 iso-ir-121 ISO646-CA csa7-1 ca csISO121Canadian1 -CSA_Z243.4-1985-2 79 iso-ir-122 ISO646-CA2 csa7-2 csISO122Canadian2 -CSA_Z243.4-1985-gr 80 iso-ir-123 csISO123CSAZ24341985gr -ISO-8859-6 9 iso-ir-127 ISO_8859-6 ISO_8859-6:1987 ECMA-114 ASMO-708 arabic csISOLatinArabic -ISO-8859-6-E 81 csISO88596E ISO_8859-6-E -ISO-8859-6-I 82 csISO88596I ISO_8859-6-I -ISO-8859-7 10 iso-ir-126 ISO_8859-7 ISO_8859-7:1987 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 ISO8859-7 -T.101-G2 83 iso-ir-128 csISO128T101G2 -ISO-8859-8 11 iso-ir-138 ISO_8859-8 ISO_8859-8:1988 hebrew csISOLatinHebrew 8859_8 ISO8859-8 -ISO-8859-8-E 84 csISO88598E ISO_8859-8-E -ISO-8859-8-I 85 csISO88598I ISO_8859-8-I -CSN_369103 86 iso-ir-139 csISO139CSN369103 -JUS_I.B1.002 87 iso-ir-141 ISO646-YU js yu csISO141JUSIB1002 -ISO_6937-2-add 14 iso-ir-142 csISOTextComm -IEC_P27-1 88 iso-ir-143 csISO143IECP271 -ISO-8859-5 8 iso-ir-144 ISO_8859-5 ISO_8859-5:1988 cyrillic csISOLatinCyrillic 8859_5 ISO8859-5 -JUS_I.B1.003-serb 89 iso-ir-146 serbian csISO146Serbian -JUS_I.B1.003-mac 90 macedonian iso-ir-147 csISO147Macedonian -ISO-8859-9 12 iso-ir-148 ISO_8859-9 ISO_8859-9:1989 latin5 l5 csISOLatin5 8859_9 ISO8859-9 -greek-ccitt 91 iso-ir-150 csISO150 csISO150GreekCCITT -NC_NC00-10:81 92 cuba iso-ir-151 ISO646-CU csISO151Cuba -ISO_6937-2-25 93 iso-ir-152 csISO6937Add -GOST_19768-74 94 ST_SEV_358-88 iso-ir-153 csISO153GOST1976874 -ISO_8859-supp 95 iso-ir-154 latin1-2-5 csISO8859Supp -ISO_10367-box 96 iso-ir-155 csISO10367Box -ISO-8859-10 13 iso-ir-157 l6 ISO_8859-10:1992 csISOLatin6 latin6 8859_10 ISO8859-10 -latin-lap 97 lap iso-ir-158 csISO158Lap -JIS_X0212-1990 98 x0212 iso-ir-159 csISO159JISX02121990 -DS_2089 99 DS2089 ISO646-DK dk csISO646Danish -us-dk 100 csUSDK -dk-us 101 csDKUS -JIS_X0201 15 X0201 csHalfWidthKatakana -KSC5636 102 ISO646-KR csKSC5636 -ISO-10646-UCS-2 1000 csUnicode UCS-2 UCS2 -ISO-10646-UCS-4 1001 csUCS4 UCS-4 UCS4 -DEC-MCS 2008 dec csDECMCS -hp-roman8 2004 roman8 r8 csHPRoman8 -macintosh 2027 mac csMacintosh MACROMAN MAC-ROMAN X-MAC-ROMAN -IBM037 2028 cp037 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037 -IBM038 2029 EBCDIC-INT cp038 csIBM038 -IBM273 2030 CP273 csIBM273 -IBM274 2031 EBCDIC-BE CP274 csIBM274 -IBM275 2032 EBCDIC-BR cp275 csIBM275 -IBM277 2033 EBCDIC-CP-DK EBCDIC-CP-NO csIBM277 -IBM278 2034 CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278 -IBM280 2035 CP280 ebcdic-cp-it csIBM280 -IBM281 2036 EBCDIC-JP-E cp281 csIBM281 -IBM284 2037 CP284 ebcdic-cp-es csIBM284 -IBM285 2038 CP285 ebcdic-cp-gb csIBM285 -IBM290 2039 cp290 EBCDIC-JP-kana csIBM290 -IBM297 2040 cp297 ebcdic-cp-fr csIBM297 -IBM420 2041 cp420 ebcdic-cp-ar1 csIBM420 -IBM423 2042 cp423 ebcdic-cp-gr csIBM423 -IBM424 2043 cp424 ebcdic-cp-he csIBM424 -IBM437 2011 cp437 437 csPC8CodePage437 -IBM500 2044 CP500 ebcdic-cp-be ebcdic-cp-ch csIBM500 -IBM775 2087 cp775 csPC775Baltic -IBM850 2009 cp850 850 csPC850Multilingual -IBM851 2045 cp851 851 csIBM851 -IBM852 2010 cp852 852 csPCp852 -IBM855 2046 cp855 855 csIBM855 -IBM857 2047 cp857 857 csIBM857 -IBM860 2048 cp860 860 csIBM860 -IBM861 2049 cp861 861 cp-is csIBM861 -IBM862 2013 cp862 862 csPC862LatinHebrew -IBM863 2050 cp863 863 csIBM863 -IBM864 2051 cp864 csIBM864 -IBM865 2052 cp865 865 csIBM865 -IBM866 2086 cp866 866 csIBM866 -IBM868 2053 CP868 cp-ar csIBM868 -IBM869 2054 cp869 869 cp-gr csIBM869 -IBM870 2055 CP870 ebcdic-cp-roece ebcdic-cp-yu csIBM870 -IBM871 2056 CP871 ebcdic-cp-is csIBM871 -IBM880 2057 cp880 EBCDIC-Cyrillic csIBM880 -IBM891 2058 cp891 csIBM891 -IBM903 2059 cp903 csIBM903 -IBM904 2060 cp904 904 csIBBM904 -IBM905 2061 CP905 ebcdic-cp-tr csIBM905 -IBM918 2062 CP918 ebcdic-cp-ar2 csIBM918 -IBM1026 2063 CP1026 csIBM1026 -EBCDIC-AT-DE 2064 csIBMEBCDICATDE -EBCDIC-AT-DE-A 2065 csEBCDICATDEA -EBCDIC-CA-FR 2066 csEBCDICCAFR -EBCDIC-DK-NO 2067 csEBCDICDKNO -EBCDIC-DK-NO-A 2068 csEBCDICDKNOA -EBCDIC-FI-SE 2069 csEBCDICFISE -EBCDIC-FI-SE-A 2070 csEBCDICFISEA -EBCDIC-FR 2071 csEBCDICFR -EBCDIC-IT 2072 csEBCDICIT -EBCDIC-PT 2073 csEBCDICPT -EBCDIC-ES 2074 csEBCDICES -EBCDIC-ES-A 2075 csEBCDICESA -EBCDIC-ES-S 2076 csEBCDICESS -EBCDIC-UK 2077 csEBCDICUK -EBCDIC-US 2078 csEBCDICUS -UNKNOWN-8BIT 2079 csUnknown8BiT -MNEMONIC 2080 csMnemonic -MNEM 2081 csMnem -VISCII 2082 csVISCII -VIQR 2083 csVIQR -KOI8-R 2084 csKOI8R -KOI8-U 2088 -IBM00858 2089 CCSID00858 CP00858 PC-Multilingual-850+euro -IBM00924 2090 CCSID00924 CP00924 ebcdic-Latin9--euro -IBM01140 2091 CCSID01140 CP01140 ebcdic-us-37+euro -IBM01141 2092 CCSID01141 CP01141 ebcdic-de-273+euro -IBM01142 2093 CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro -IBM01143 2094 CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro -IBM01144 2095 CCSID01144 CP01144 ebcdic-it-280+euro -IBM01145 2096 CCSID01145 CP01145 ebcdic-es-284+euro -IBM01146 2097 CCSID01146 CP01146 ebcdic-gb-285+euro -IBM01147 2098 CCSID01147 CP01147 ebcdic-fr-297+euro -IBM01148 2099 CCSID01148 CP01148 ebcdic-international-500+euro -IBM01149 2100 CCSID01149 CP01149 ebcdic-is-871+euro -Big5-HKSCS 2101 -IBM1047 2102 IBM-1047 -PTCP154 2103 csPTCP154 PT154 CP154 Cyrillic-Asian -Amiga-1251 2104 Ami1251 Amiga1251 Ami-1251 -KOI7-switched 2105 -UNICODE-1-1 1010 csUnicode11 -SCSU 1011 -UTF-7 1012 -UTF-16BE 1013 -UTF-16LE 1014 -UTF-16 1015 -CESU-8 1016 csCESU-8 -UTF-32 1017 -UTF-32BE 1018 -UTF-32LE 1019 -BOCU-1 1020 csBOCU-1 -UNICODE-1-1-UTF-7 103 csUnicode11UTF7 -UTF-8 106 UNICODE-1-1-UTF-8 UNICODE-2-0-UTF-8 utf8 -ISO-8859-13 109 8859_13 ISO8859-13 -ISO-8859-14 110 iso-ir-199 ISO_8859-14:1998 ISO_8859-14 latin8 iso-celtic l8 8859_14 ISO8859-14 -ISO-8859-15 111 ISO_8859-15 Latin-9 8859_15 ISO8859-15 -ISO-8859-16 112 iso-ir-226 ISO_8859-16:2001 ISO_8859-16 latin10 l10 -GBK 113 CP936 MS936 windows-936 -GB18030 114 -OSD_EBCDIC_DF04_15 115 -OSD_EBCDIC_DF03_IRV 116 -OSD_EBCDIC_DF04_1 117 -JIS_Encoding 16 csJISEncoding -Shift_JIS 17 MS_Kanji csShiftJIS X-SJIS Shift-JIS -EUC-JP 18 csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese EUCJP -Extended_UNIX_Code_Fixed_Width_for_Japanese 19 csEUCFixWidJapanese -ISO-10646-UCS-Basic 1002 csUnicodeASCII -ISO-10646-Unicode-Latin1 1003 csUnicodeLatin1 ISO-10646 -ISO-Unicode-IBM-1261 1005 csUnicodeIBM1261 -ISO-Unicode-IBM-1268 1006 csUnicodeIBM1268 -ISO-Unicode-IBM-1276 1007 csUnicodeIBM1276 -ISO-Unicode-IBM-1264 1008 csUnicodeIBM1264 -ISO-Unicode-IBM-1265 1009 csUnicodeIBM1265 -ISO-8859-1-Windows-3.0-Latin-1 2000 csWindows30Latin1 -ISO-8859-1-Windows-3.1-Latin-1 2001 csWindows31Latin1 -ISO-8859-2-Windows-Latin-2 2002 csWindows31Latin2 -ISO-8859-9-Windows-Latin-5 2003 csWindows31Latin5 -Adobe-Standard-Encoding 2005 csAdobeStandardEncoding -Ventura-US 2006 csVenturaUS -Ventura-International 2007 csVenturaInternational -PC8-Danish-Norwegian 2012 csPC8DanishNorwegian -PC8-Turkish 2014 csPC8Turkish -IBM-Symbols 2015 csIBMSymbols -IBM-Thai 2016 csIBMThai -HP-Legal 2017 csHPLegal -HP-Pi-font 2018 csHPPiFont -HP-Math8 2019 csHPMath8 -Adobe-Symbol-Encoding 2020 csHPPSMath -HP-DeskTop 2021 csHPDesktop -Ventura-Math 2022 csVenturaMath -Microsoft-Publishing 2023 csMicrosoftPublishing -Windows-31J 2024 csWindows31J -GB2312 2025 csGB2312 EUC-CN EUCCN CN-GB -Big5 2026 csBig5 BIG-FIVE BIG-5 CN-BIG5 BIG_FIVE -windows-1250 2250 CP1250 MS-EE -windows-1251 2251 CP1251 MS-CYRL -windows-1252 2252 CP1252 MS-ANSI -windows-1253 2253 CP1253 MS-GREEK -windows-1254 2254 CP1254 MS-TURK -windows-1255 2255 -windows-1256 2256 CP1256 MS-ARAB -windows-1257 2257 CP1257 WINBALTRIM -windows-1258 2258 -TIS-620 2259 -HZ-GB-2312 2085 - -# Additional encodings not defined by IANA - -# Arbitrary allocations -#CP737 3001 -#CP853 3002 -#CP856 3003 -CP874 3004 WINDOWS-874 -#CP922 3005 -#CP1046 3006 -#CP1124 3007 -#CP1125 3008 WINDOWS-1125 -#CP1129 3009 -#CP1133 3010 IBM-CP1133 -#CP1161 3011 IBM-1161 IBM1161 CSIBM1161 -#CP1162 3012 IBM-1162 IBM1162 CSIBM1162 -#CP1163 3013 IBM-1163 IBM1163 CSIBM1163 -#GEORGIAN-ACADEMY 3014 -#GEORGIAN-PS 3015 -#KOI8-RU 3016 -#KOI8-T 3017 -#MACARABIC 3018 X-MAC-ARABIC MAC-ARABIC -#MACCROATIAN 3019 X-MAC-CROATIAN MAC-CROATIAN -#MACGREEK 3020 X-MAC-GREEK MAC-GREEK -#MACHEBREW 3021 X-MAC-HEBREW MAC-HEBREW -#MACICELAND 3022 X-MAC-ICELAND MAC-ICELAND -#MACROMANIA 3023 X-MAC-ROMANIA MAC-ROMANIA -#MACTHAI 3024 X-MAC-THAI MAC-THAI -#MACTURKISH 3025 X-MAC-TURKISH MAC-TURKISH -#MULELAO-1 3026 - -# From Unicode Lib -ISO-IR-182 4000 -ISO-IR-197 4002 -ISO-2022-JP-1 4008 -MACCYRILLIC 4009 X-MAC-CYRILLIC MAC-CYRILLIC -MACUKRAINE 4010 X-MAC-UKRAINIAN MAC-UKRAINIAN -MACCENTRALEUROPE 4011 X-MAC-CENTRALEURROMAN MAC-CENTRALEURROMAN -JOHAB 4012 -ISO-8859-11 4014 iso-ir-166 ISO_8859-11 ISO8859-11 8859_11 -X-CURRENT 4999 X-SYSTEM -X-ACORN-LATIN1 5001 -X-ACORN-FUZZY 5002 diff --git a/test/filter.c b/test/filter.c index 044a772..c66b684 100644 --- a/test/filter.c +++ b/test/filter.c @@ -32,10 +32,6 @@ int main(int argc, char **argv) return 1; } - /* Initialise library */ - assert(parserutils_initialise(argv[1], myrealloc, NULL) == - PARSERUTILS_OK); - /* Create input filter */ assert(parserutils_filter_create("UTF-8", myrealloc, NULL, &input) == PARSERUTILS_OK); @@ -349,8 +345,6 @@ int main(int argc, char **argv) /* Clean up */ parserutils_filter_destroy(input); - assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK); - printf("PASS\n"); return 0; diff --git a/test/inputstream.c b/test/inputstream.c index ec72629..99d3a3d 100644 --- a/test/inputstream.c +++ b/test/inputstream.c @@ -36,10 +36,6 @@ int main(int argc, char **argv) return 1; } - /* Initialise library */ - assert(parserutils_initialise(argv[1], myrealloc, NULL) == - PARSERUTILS_OK); - assert(parserutils_inputstream_create("UTF-8", 1, NULL, myrealloc, NULL, &stream) == PARSERUTILS_OK); @@ -94,8 +90,6 @@ int main(int argc, char **argv) parserutils_inputstream_destroy(stream); - assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK); - printf("PASS\n"); return 0; diff --git a/test/parserutils.c b/test/parserutils.c deleted file mode 100644 index c6d671a..0000000 --- a/test/parserutils.c +++ /dev/null @@ -1,30 +0,0 @@ -#include -#include - -#include - -#include "testutils.h" - -static void *myrealloc(void *ptr, size_t len, void *pw) -{ - UNUSED(pw); - - return realloc(ptr, len); -} - -int main(int argc, char **argv) -{ - if (argc != 2) { - printf("Usage: %s \n", argv[0]); - return 1; - } - - assert(parserutils_initialise(argv[1], myrealloc, NULL) == - PARSERUTILS_OK); - - assert (parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK); - - printf("PASS\n"); - - return 0; -} diff --git a/test/regression/INDEX b/test/regression/INDEX index 135e761..f6de6cf 100644 --- a/test/regression/INDEX +++ b/test/regression/INDEX @@ -2,7 +2,6 @@ # # Test Description DataDir -cscodec-segv Segfault in charset codecs filter-segv Segfault in input filtering stream-nomem Inputstream buffer expansion filter-badenc-segv Segfault on resetting bad encoding in filter diff --git a/test/regression/Makefile b/test/regression/Makefile index 0799ea4..2cdeaf7 100644 --- a/test/regression/Makefile +++ b/test/regression/Makefile @@ -1,5 +1,5 @@ # Tests -DIR_TEST_ITEMS := cscodec-segv:cscodec-segv.c filter-segv:filter-segv.c \ +DIR_TEST_ITEMS := filter-segv:filter-segv.c \ stream-nomem:stream-nomem.c filter-badenc-segv:filter-badenc-segv.c CFLAGS := $(CFLAGS) -I$(CURDIR)/test diff --git a/test/regression/cscodec-segv.c b/test/regression/cscodec-segv.c deleted file mode 100644 index 0cf9f69..0000000 --- a/test/regression/cscodec-segv.c +++ /dev/null @@ -1,38 +0,0 @@ -#include - -#include "charset/charset.h" -#include - -#include "testutils.h" - -static void *myrealloc(void *ptr, size_t len, void *pw) -{ - UNUSED(pw); - - return realloc(ptr, len); -} - -int main(int argc, char **argv) -{ - parserutils_charset_codec *codec; - - if (argc != 2) { - printf("Usage: %s \n", argv[0]); - return 1; - } - - assert(parserutils_charset_initialise(argv[1], myrealloc, NULL) == - PARSERUTILS_OK); - - assert(parserutils_charset_codec_create("UTF-8", myrealloc, NULL, - &codec) == PARSERUTILS_OK); - - parserutils_charset_codec_destroy(codec); - - assert(parserutils_charset_finalise(myrealloc, NULL) == - PARSERUTILS_OK); - - printf("PASS\n"); - - return 0; -} diff --git a/test/regression/filter-badenc-segv.c b/test/regression/filter-badenc-segv.c index 09ae864..0c9877a 100644 --- a/test/regression/filter-badenc-segv.c +++ b/test/regression/filter-badenc-segv.c @@ -31,9 +31,6 @@ int main(int argc, char **argv) return 1; } - assert(parserutils_initialise(argv[1], myrealloc, NULL) == - PARSERUTILS_OK); - assert(parserutils_filter_create("UTF-8", myrealloc, NULL, &input) == PARSERUTILS_OK); @@ -49,8 +46,6 @@ int main(int argc, char **argv) parserutils_filter_destroy(input); - assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK); - printf("PASS\n"); return 0; diff --git a/test/regression/filter-segv.c b/test/regression/filter-segv.c index a884b6d..58905f9 100644 --- a/test/regression/filter-segv.c +++ b/test/regression/filter-segv.c @@ -23,16 +23,11 @@ int main(int argc, char **argv) return 1; } - assert(parserutils_initialise(argv[1], myrealloc, NULL) == - PARSERUTILS_OK); - assert(parserutils_filter_create("UTF-8", myrealloc, NULL, &input) == PARSERUTILS_OK); parserutils_filter_destroy(input); - assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK); - printf("PASS\n"); return 0; diff --git a/test/regression/stream-nomem.c b/test/regression/stream-nomem.c index fc8d514..2f7707e 100644 --- a/test/regression/stream-nomem.c +++ b/test/regression/stream-nomem.c @@ -49,9 +49,6 @@ int main(int argc, char **argv) input_buffer[BUFFER_SIZE - 7] = '2'; input_buffer[BUFFER_SIZE - 8] = '1'; - assert(parserutils_initialise(argv[1], myrealloc, NULL) == - PARSERUTILS_OK); - assert(parserutils_inputstream_create("UTF-8", 0, NULL, myrealloc, NULL, &stream) == PARSERUTILS_OK); @@ -84,8 +81,6 @@ int main(int argc, char **argv) parserutils_inputstream_destroy(stream); - assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK); - printf("PASS\n"); return 0; -- cgit v1.2.3