diff options
Diffstat (limited to 'utils')
66 files changed, 4812 insertions, 6003 deletions
diff --git a/utils/DerivedJoiningType.txt b/utils/DerivedJoiningType.txt deleted file mode 100644 index d4dcc85f6..000000000 --- a/utils/DerivedJoiningType.txt +++ /dev/null @@ -1,318 +0,0 @@ -# DerivedJoiningType-5.2.0.txt -# Date: 2009-05-28, 20:37:39 GMT [MD] -# -# Unicode Character Database -# Copyright (c) 1991-2009 Unicode, Inc. -# For terms of use, see http://www.unicode.org/terms_of_use.html -# For documentation, see http://www.unicode.org/reports/tr44/ - -# ================================================ - -# Type T is derived, as described in ArabicShaping.txt - -# All code points not explicitly listed for Joining_Type -# have the value Non_Joining (U). - -# @missing: 0000..10FFFF; Non_Joining - -# ================================================ - -# Joining_Type=Join_Causing - -0640 ; C # Lm ARABIC TATWEEL -07FA ; C # Lm NKO LAJANYALAN -200D ; C # Cf ZERO WIDTH JOINER - -# Total code points: 3 - -# ================================================ - -# Joining_Type=Dual_Joining - -0626 ; D # Lo ARABIC LETTER YEH WITH HAMZA ABOVE -0628 ; D # Lo ARABIC LETTER BEH -062A..062E ; D # Lo [5] ARABIC LETTER TEH..ARABIC LETTER KHAH -0633..063F ; D # Lo [13] ARABIC LETTER SEEN..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE -0641..0647 ; D # Lo [7] ARABIC LETTER FEH..ARABIC LETTER HEH -0649..064A ; D # Lo [2] ARABIC LETTER ALEF MAKSURA..ARABIC LETTER YEH -066E..066F ; D # Lo [2] ARABIC LETTER DOTLESS BEH..ARABIC LETTER DOTLESS QAF -0678..0687 ; D # Lo [16] ARABIC LETTER HIGH HAMZA YEH..ARABIC LETTER TCHEHEH -069A..06BF ; D # Lo [38] ARABIC LETTER SEEN WITH DOT BELOW AND DOT ABOVE..ARABIC LETTER TCHEH WITH DOT ABOVE -06C1..06C2 ; D # Lo [2] ARABIC LETTER HEH GOAL..ARABIC LETTER HEH GOAL WITH HAMZA ABOVE -06CC ; D # Lo ARABIC LETTER FARSI YEH -06CE ; D # Lo ARABIC LETTER YEH WITH SMALL V -06D0..06D1 ; D # Lo [2] ARABIC LETTER E..ARABIC LETTER YEH WITH THREE DOTS BELOW -06FA..06FC ; D # Lo [3] ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER GHAIN WITH DOT BELOW -06FF ; D # Lo ARABIC LETTER HEH WITH INVERTED V -0712..0714 ; D # Lo [3] SYRIAC LETTER BETH..SYRIAC LETTER GAMAL GARSHUNI -071A..071D ; D # Lo [4] SYRIAC LETTER HETH..SYRIAC LETTER YUDH -071F..0727 ; D # Lo [9] SYRIAC LETTER KAPH..SYRIAC LETTER REVERSED PE -0729 ; D # Lo SYRIAC LETTER QAPH -072B ; D # Lo SYRIAC LETTER SHIN -072D..072E ; D # Lo [2] SYRIAC LETTER PERSIAN BHETH..SYRIAC LETTER PERSIAN GHAMAL -074E..0758 ; D # Lo [11] SYRIAC LETTER SOGDIAN KHAPH..ARABIC LETTER HAH WITH THREE DOTS POINTING UPWARDS BELOW -075C..076A ; D # Lo [15] ARABIC LETTER SEEN WITH FOUR DOTS ABOVE..ARABIC LETTER LAM WITH BAR -076D..0770 ; D # Lo [4] ARABIC LETTER SEEN WITH TWO DOTS VERTICALLY ABOVE..ARABIC LETTER SEEN WITH SMALL ARABIC LETTER TAH AND TWO DOTS -0772 ; D # Lo ARABIC LETTER HAH WITH SMALL ARABIC LETTER TAH ABOVE -0775..0777 ; D # Lo [3] ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER FARSI YEH WITH EXTENDED ARABIC-INDIC DIGIT FOUR BELOW -077A..077F ; D # Lo [6] ARABIC LETTER YEH BARREE WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER KAF WITH TWO DOTS ABOVE -07CA..07EA ; D # Lo [33] NKO LETTER A..NKO LETTER JONA RA - -# Total code points: 188 - -# ================================================ - -# Joining_Type=Right_Joining - -0622..0625 ; R # Lo [4] ARABIC LETTER ALEF WITH MADDA ABOVE..ARABIC LETTER ALEF WITH HAMZA BELOW -0627 ; R # Lo ARABIC LETTER ALEF -0629 ; R # Lo ARABIC LETTER TEH MARBUTA -062F..0632 ; R # Lo [4] ARABIC LETTER DAL..ARABIC LETTER ZAIN -0648 ; R # Lo ARABIC LETTER WAW -0671..0673 ; R # Lo [3] ARABIC LETTER ALEF WASLA..ARABIC LETTER ALEF WITH WAVY HAMZA BELOW -0675..0677 ; R # Lo [3] ARABIC LETTER HIGH HAMZA ALEF..ARABIC LETTER U WITH HAMZA ABOVE -0688..0699 ; R # Lo [18] ARABIC LETTER DDAL..ARABIC LETTER REH WITH FOUR DOTS ABOVE -06C0 ; R # Lo ARABIC LETTER HEH WITH YEH ABOVE -06C3..06CB ; R # Lo [9] ARABIC LETTER TEH MARBUTA GOAL..ARABIC LETTER VE -06CD ; R # Lo ARABIC LETTER YEH WITH TAIL -06CF ; R # Lo ARABIC LETTER WAW WITH DOT ABOVE -06D2..06D3 ; R # Lo [2] ARABIC LETTER YEH BARREE..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE -06D5 ; R # Lo ARABIC LETTER AE -06EE..06EF ; R # Lo [2] ARABIC LETTER DAL WITH INVERTED V..ARABIC LETTER REH WITH INVERTED V -0710 ; R # Lo SYRIAC LETTER ALAPH -0715..0719 ; R # Lo [5] SYRIAC LETTER DALATH..SYRIAC LETTER ZAIN -071E ; R # Lo SYRIAC LETTER YUDH HE -0728 ; R # Lo SYRIAC LETTER SADHE -072A ; R # Lo SYRIAC LETTER RISH -072C ; R # Lo SYRIAC LETTER TAW -072F ; R # Lo SYRIAC LETTER PERSIAN DHALATH -074D ; R # Lo SYRIAC LETTER SOGDIAN ZHAIN -0759..075B ; R # Lo [3] ARABIC LETTER DAL WITH TWO DOTS VERTICALLY BELOW AND SMALL TAH..ARABIC LETTER REH WITH STROKE -076B..076C ; R # Lo [2] ARABIC LETTER REH WITH TWO DOTS VERTICALLY ABOVE..ARABIC LETTER REH WITH HAMZA ABOVE -0771 ; R # Lo ARABIC LETTER REH WITH SMALL ARABIC LETTER TAH AND TWO DOTS -0773..0774 ; R # Lo [2] ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER ALEF WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE -0778..0779 ; R # Lo [2] ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT TWO ABOVE..ARABIC LETTER WAW WITH EXTENDED ARABIC-INDIC DIGIT THREE ABOVE - -# Total code points: 74 - -# ================================================ - -# Joining_Type=Transparent - -00AD ; T # Cf SOFT HYPHEN -0300..036F ; T # Mn [112] COMBINING GRAVE ACCENT..COMBINING LATIN SMALL LETTER X -0483..0487 ; T # Mn [5] COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE -0488..0489 ; T # Me [2] COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..COMBINING CYRILLIC MILLIONS SIGN -0591..05BD ; T # Mn [45] HEBREW ACCENT ETNAHTA..HEBREW POINT METEG -05BF ; T # Mn HEBREW POINT RAFE -05C1..05C2 ; T # Mn [2] HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT -05C4..05C5 ; T # Mn [2] HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT -05C7 ; T # Mn HEBREW POINT QAMATS QATAN -0610..061A ; T # Mn [11] ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA -064B..065E ; T # Mn [20] ARABIC FATHATAN..ARABIC FATHA WITH TWO DOTS -0670 ; T # Mn ARABIC LETTER SUPERSCRIPT ALEF -06D6..06DC ; T # Mn [7] ARABIC SMALL HIGH LIGATURE SAD WITH LAM WITH ALEF MAKSURA..ARABIC SMALL HIGH SEEN -06DE ; T # Me ARABIC START OF RUB EL HIZB -06DF..06E4 ; T # Mn [6] ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH MADDA -06E7..06E8 ; T # Mn [2] ARABIC SMALL HIGH YEH..ARABIC SMALL HIGH NOON -06EA..06ED ; T # Mn [4] ARABIC EMPTY CENTRE LOW STOP..ARABIC SMALL LOW MEEM -070F ; T # Cf SYRIAC ABBREVIATION MARK -0711 ; T # Mn SYRIAC LETTER SUPERSCRIPT ALAPH -0730..074A ; T # Mn [27] SYRIAC PTHAHA ABOVE..SYRIAC BARREKH -07A6..07B0 ; T # Mn [11] THAANA ABAFILI..THAANA SUKUN -07EB..07F3 ; T # Mn [9] NKO COMBINING SHORT HIGH TONE..NKO COMBINING DOUBLE DOT ABOVE -0816..0819 ; T # Mn [4] SAMARITAN MARK IN..SAMARITAN MARK DAGESH -081B..0823 ; T # Mn [9] SAMARITAN MARK EPENTHETIC YUT..SAMARITAN VOWEL SIGN A -0825..0827 ; T # Mn [3] SAMARITAN VOWEL SIGN SHORT A..SAMARITAN VOWEL SIGN U -0829..082D ; T # Mn [5] SAMARITAN VOWEL SIGN LONG I..SAMARITAN MARK NEQUDAA -0900..0902 ; T # Mn [3] DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI SIGN ANUSVARA -093C ; T # Mn DEVANAGARI SIGN NUKTA -0941..0948 ; T # Mn [8] DEVANAGARI VOWEL SIGN U..DEVANAGARI VOWEL SIGN AI -094D ; T # Mn DEVANAGARI SIGN VIRAMA -0951..0955 ; T # Mn [5] DEVANAGARI STRESS SIGN UDATTA..DEVANAGARI VOWEL SIGN CANDRA LONG E -0962..0963 ; T # Mn [2] DEVANAGARI VOWEL SIGN VOCALIC L..DEVANAGARI VOWEL SIGN VOCALIC LL -0981 ; T # Mn BENGALI SIGN CANDRABINDU -09BC ; T # Mn BENGALI SIGN NUKTA -09C1..09C4 ; T # Mn [4] BENGALI VOWEL SIGN U..BENGALI VOWEL SIGN VOCALIC RR -09CD ; T # Mn BENGALI SIGN VIRAMA -09E2..09E3 ; T # Mn [2] BENGALI VOWEL SIGN VOCALIC L..BENGALI VOWEL SIGN VOCALIC LL -0A01..0A02 ; T # Mn [2] GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN BINDI -0A3C ; T # Mn GURMUKHI SIGN NUKTA -0A41..0A42 ; T # Mn [2] GURMUKHI VOWEL SIGN U..GURMUKHI VOWEL SIGN UU -0A47..0A48 ; T # Mn [2] GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI -0A4B..0A4D ; T # Mn [3] GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA -0A51 ; T # Mn GURMUKHI SIGN UDAAT -0A70..0A71 ; T # Mn [2] GURMUKHI TIPPI..GURMUKHI ADDAK -0A75 ; T # Mn GURMUKHI SIGN YAKASH -0A81..0A82 ; T # Mn [2] GUJARATI SIGN CANDRABINDU..GUJARATI SIGN ANUSVARA -0ABC ; T # Mn GUJARATI SIGN NUKTA -0AC1..0AC5 ; T # Mn [5] GUJARATI VOWEL SIGN U..GUJARATI VOWEL SIGN CANDRA E -0AC7..0AC8 ; T # Mn [2] GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN AI -0ACD ; T # Mn GUJARATI SIGN VIRAMA -0AE2..0AE3 ; T # Mn [2] GUJARATI VOWEL SIGN VOCALIC L..GUJARATI VOWEL SIGN VOCALIC LL -0B01 ; T # Mn ORIYA SIGN CANDRABINDU -0B3C ; T # Mn ORIYA SIGN NUKTA -0B3F ; T # Mn ORIYA VOWEL SIGN I -0B41..0B44 ; T # Mn [4] ORIYA VOWEL SIGN U..ORIYA VOWEL SIGN VOCALIC RR -0B4D ; T # Mn ORIYA SIGN VIRAMA -0B56 ; T # Mn ORIYA AI LENGTH MARK -0B62..0B63 ; T # Mn [2] ORIYA VOWEL SIGN VOCALIC L..ORIYA VOWEL SIGN VOCALIC LL -0B82 ; T # Mn TAMIL SIGN ANUSVARA -0BC0 ; T # Mn TAMIL VOWEL SIGN II -0BCD ; T # Mn TAMIL SIGN VIRAMA -0C3E..0C40 ; T # Mn [3] TELUGU VOWEL SIGN AA..TELUGU VOWEL SIGN II -0C46..0C48 ; T # Mn [3] TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI -0C4A..0C4D ; T # Mn [4] TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA -0C55..0C56 ; T # Mn [2] TELUGU LENGTH MARK..TELUGU AI LENGTH MARK -0C62..0C63 ; T # Mn [2] TELUGU VOWEL SIGN VOCALIC L..TELUGU VOWEL SIGN VOCALIC LL -0CBC ; T # Mn KANNADA SIGN NUKTA -0CBF ; T # Mn KANNADA VOWEL SIGN I -0CC6 ; T # Mn KANNADA VOWEL SIGN E -0CCC..0CCD ; T # Mn [2] KANNADA VOWEL SIGN AU..KANNADA SIGN VIRAMA -0CE2..0CE3 ; T # Mn [2] KANNADA VOWEL SIGN VOCALIC L..KANNADA VOWEL SIGN VOCALIC LL -0D41..0D44 ; T # Mn [4] MALAYALAM VOWEL SIGN U..MALAYALAM VOWEL SIGN VOCALIC RR -0D4D ; T # Mn MALAYALAM SIGN VIRAMA -0D62..0D63 ; T # Mn [2] MALAYALAM VOWEL SIGN VOCALIC L..MALAYALAM VOWEL SIGN VOCALIC LL -0DCA ; T # Mn SINHALA SIGN AL-LAKUNA -0DD2..0DD4 ; T # Mn [3] SINHALA VOWEL SIGN KETTI IS-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA -0DD6 ; T # Mn SINHALA VOWEL SIGN DIGA PAA-PILLA -0E31 ; T # Mn THAI CHARACTER MAI HAN-AKAT -0E34..0E3A ; T # Mn [7] THAI CHARACTER SARA I..THAI CHARACTER PHINTHU -0E47..0E4E ; T # Mn [8] THAI CHARACTER MAITAIKHU..THAI CHARACTER YAMAKKAN -0EB1 ; T # Mn LAO VOWEL SIGN MAI KAN -0EB4..0EB9 ; T # Mn [6] LAO VOWEL SIGN I..LAO VOWEL SIGN UU -0EBB..0EBC ; T # Mn [2] LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN LO -0EC8..0ECD ; T # Mn [6] LAO TONE MAI EK..LAO NIGGAHITA -0F18..0F19 ; T # Mn [2] TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS -0F35 ; T # Mn TIBETAN MARK NGAS BZUNG NYI ZLA -0F37 ; T # Mn TIBETAN MARK NGAS BZUNG SGOR RTAGS -0F39 ; T # Mn TIBETAN MARK TSA -PHRU -0F71..0F7E ; T # Mn [14] TIBETAN VOWEL SIGN AA..TIBETAN SIGN RJES SU NGA RO -0F80..0F84 ; T # Mn [5] TIBETAN VOWEL SIGN REVERSED I..TIBETAN MARK HALANTA -0F86..0F87 ; T # Mn [2] TIBETAN SIGN LCI RTAGS..TIBETAN SIGN YANG RTAGS -0F90..0F97 ; T # Mn [8] TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER JA -0F99..0FBC ; T # Mn [36] TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER FIXED-FORM RA -0FC6 ; T # Mn TIBETAN SYMBOL PADMA GDAN -102D..1030 ; T # Mn [4] MYANMAR VOWEL SIGN I..MYANMAR VOWEL SIGN UU -1032..1037 ; T # Mn [6] MYANMAR VOWEL SIGN AI..MYANMAR SIGN DOT BELOW -1039..103A ; T # Mn [2] MYANMAR SIGN VIRAMA..MYANMAR SIGN ASAT -103D..103E ; T # Mn [2] MYANMAR CONSONANT SIGN MEDIAL WA..MYANMAR CONSONANT SIGN MEDIAL HA -1058..1059 ; T # Mn [2] MYANMAR VOWEL SIGN VOCALIC L..MYANMAR VOWEL SIGN VOCALIC LL -105E..1060 ; T # Mn [3] MYANMAR CONSONANT SIGN MON MEDIAL NA..MYANMAR CONSONANT SIGN MON MEDIAL LA -1071..1074 ; T # Mn [4] MYANMAR VOWEL SIGN GEBA KAREN I..MYANMAR VOWEL SIGN KAYAH EE -1082 ; T # Mn MYANMAR CONSONANT SIGN SHAN MEDIAL WA -1085..1086 ; T # Mn [2] MYANMAR VOWEL SIGN SHAN E ABOVE..MYANMAR VOWEL SIGN SHAN FINAL Y -108D ; T # Mn MYANMAR SIGN SHAN COUNCIL EMPHATIC TONE -109D ; T # Mn MYANMAR VOWEL SIGN AITON AI -135F ; T # Mn ETHIOPIC COMBINING GEMINATION MARK -1712..1714 ; T # Mn [3] TAGALOG VOWEL SIGN I..TAGALOG SIGN VIRAMA -1732..1734 ; T # Mn [3] HANUNOO VOWEL SIGN I..HANUNOO SIGN PAMUDPOD -1752..1753 ; T # Mn [2] BUHID VOWEL SIGN I..BUHID VOWEL SIGN U -1772..1773 ; T # Mn [2] TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U -17B4..17B5 ; T # Cf [2] KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA -17B7..17BD ; T # Mn [7] KHMER VOWEL SIGN I..KHMER VOWEL SIGN UA -17C6 ; T # Mn KHMER SIGN NIKAHIT -17C9..17D3 ; T # Mn [11] KHMER SIGN MUUSIKATOAN..KHMER SIGN BATHAMASAT -17DD ; T # Mn KHMER SIGN ATTHACAN -180B..180D ; T # Mn [3] MONGOLIAN FREE VARIATION SELECTOR ONE..MONGOLIAN FREE VARIATION SELECTOR THREE -18A9 ; T # Mn MONGOLIAN LETTER ALI GALI DAGALGA -1920..1922 ; T # Mn [3] LIMBU VOWEL SIGN A..LIMBU VOWEL SIGN U -1927..1928 ; T # Mn [2] LIMBU VOWEL SIGN E..LIMBU VOWEL SIGN O -1932 ; T # Mn LIMBU SMALL LETTER ANUSVARA -1939..193B ; T # Mn [3] LIMBU SIGN MUKPHRENG..LIMBU SIGN SA-I -1A17..1A18 ; T # Mn [2] BUGINESE VOWEL SIGN I..BUGINESE VOWEL SIGN U -1A56 ; T # Mn TAI THAM CONSONANT SIGN MEDIAL LA -1A58..1A5E ; T # Mn [7] TAI THAM SIGN MAI KANG LAI..TAI THAM CONSONANT SIGN SA -1A60 ; T # Mn TAI THAM SIGN SAKOT -1A62 ; T # Mn TAI THAM VOWEL SIGN MAI SAT -1A65..1A6C ; T # Mn [8] TAI THAM VOWEL SIGN I..TAI THAM VOWEL SIGN OA BELOW -1A73..1A7C ; T # Mn [10] TAI THAM VOWEL SIGN OA ABOVE..TAI THAM SIGN KHUEN-LUE KARAN -1A7F ; T # Mn TAI THAM COMBINING CRYPTOGRAMMIC DOT -1B00..1B03 ; T # Mn [4] BALINESE SIGN ULU RICEM..BALINESE SIGN SURANG -1B34 ; T # Mn BALINESE SIGN REREKAN -1B36..1B3A ; T # Mn [5] BALINESE VOWEL SIGN ULU..BALINESE VOWEL SIGN RA REPA -1B3C ; T # Mn BALINESE VOWEL SIGN LA LENGA -1B42 ; T # Mn BALINESE VOWEL SIGN PEPET -1B6B..1B73 ; T # Mn [9] BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG -1B80..1B81 ; T # Mn [2] SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PANGLAYAR -1BA2..1BA5 ; T # Mn [4] SUNDANESE CONSONANT SIGN PANYAKRA..SUNDANESE VOWEL SIGN PANYUKU -1BA8..1BA9 ; T # Mn [2] SUNDANESE VOWEL SIGN PAMEPET..SUNDANESE VOWEL SIGN PANEULEUNG -1C2C..1C33 ; T # Mn [8] LEPCHA VOWEL SIGN E..LEPCHA CONSONANT SIGN T -1C36..1C37 ; T # Mn [2] LEPCHA SIGN RAN..LEPCHA SIGN NUKTA -1CD0..1CD2 ; T # Mn [3] VEDIC TONE KARSHANA..VEDIC TONE PRENKHA -1CD4..1CE0 ; T # Mn [13] VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC TONE RIGVEDIC KASHMIRI INDEPENDENT SVARITA -1CE2..1CE8 ; T # Mn [7] VEDIC SIGN VISARGA SVARITA..VEDIC SIGN VISARGA ANUDATTA WITH TAIL -1CED ; T # Mn VEDIC SIGN TIRYAK -1DC0..1DE6 ; T # Mn [39] COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z -1DFD..1DFF ; T # Mn [3] COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW -200B ; T # Cf ZERO WIDTH SPACE -200E..200F ; T # Cf [2] LEFT-TO-RIGHT MARK..RIGHT-TO-LEFT MARK -202A..202E ; T # Cf [5] LEFT-TO-RIGHT EMBEDDING..RIGHT-TO-LEFT OVERRIDE -2060..2064 ; T # Cf [5] WORD JOINER..INVISIBLE PLUS -206A..206F ; T # Cf [6] INHIBIT SYMMETRIC SWAPPING..NOMINAL DIGIT SHAPES -20D0..20DC ; T # Mn [13] COMBINING LEFT HARPOON ABOVE..COMBINING FOUR DOTS ABOVE -20DD..20E0 ; T # Me [4] COMBINING ENCLOSING CIRCLE..COMBINING ENCLOSING CIRCLE BACKSLASH -20E1 ; T # Mn COMBINING LEFT RIGHT ARROW ABOVE -20E2..20E4 ; T # Me [3] COMBINING ENCLOSING SCREEN..COMBINING ENCLOSING UPWARD POINTING TRIANGLE -20E5..20F0 ; T # Mn [12] COMBINING REVERSE SOLIDUS OVERLAY..COMBINING ASTERISK ABOVE -2CEF..2CF1 ; T # Mn [3] COPTIC COMBINING NI ABOVE..COPTIC COMBINING SPIRITUS LENIS -2DE0..2DFF ; T # Mn [32] COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS -302A..302F ; T # Mn [6] IDEOGRAPHIC LEVEL TONE MARK..HANGUL DOUBLE DOT TONE MARK -3099..309A ; T # Mn [2] COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK -A66F ; T # Mn COMBINING CYRILLIC VZMET -A670..A672 ; T # Me [3] COMBINING CYRILLIC TEN MILLIONS SIGN..COMBINING CYRILLIC THOUSAND MILLIONS SIGN -A67C..A67D ; T # Mn [2] COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK -A6F0..A6F1 ; T # Mn [2] BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS -A802 ; T # Mn SYLOTI NAGRI SIGN DVISVARA -A806 ; T # Mn SYLOTI NAGRI SIGN HASANTA -A80B ; T # Mn SYLOTI NAGRI SIGN ANUSVARA -A825..A826 ; T # Mn [2] SYLOTI NAGRI VOWEL SIGN U..SYLOTI NAGRI VOWEL SIGN E -A8C4 ; T # Mn SAURASHTRA SIGN VIRAMA -A8E0..A8F1 ; T # Mn [18] COMBINING DEVANAGARI DIGIT ZERO..COMBINING DEVANAGARI SIGN AVAGRAHA -A926..A92D ; T # Mn [8] KAYAH LI VOWEL UE..KAYAH LI TONE CALYA PLOPHU -A947..A951 ; T # Mn [11] REJANG VOWEL SIGN I..REJANG CONSONANT SIGN R -A980..A982 ; T # Mn [3] JAVANESE SIGN PANYANGGA..JAVANESE SIGN LAYAR -A9B3 ; T # Mn JAVANESE SIGN CECAK TELU -A9B6..A9B9 ; T # Mn [4] JAVANESE VOWEL SIGN WULU..JAVANESE VOWEL SIGN SUKU MENDUT -A9BC ; T # Mn JAVANESE VOWEL SIGN PEPET -AA29..AA2E ; T # Mn [6] CHAM VOWEL SIGN AA..CHAM VOWEL SIGN OE -AA31..AA32 ; T # Mn [2] CHAM VOWEL SIGN AU..CHAM VOWEL SIGN UE -AA35..AA36 ; T # Mn [2] CHAM CONSONANT SIGN LA..CHAM CONSONANT SIGN WA -AA43 ; T # Mn CHAM CONSONANT SIGN FINAL NG -AA4C ; T # Mn CHAM CONSONANT SIGN FINAL M -AAB0 ; T # Mn TAI VIET MAI KANG -AAB2..AAB4 ; T # Mn [3] TAI VIET VOWEL I..TAI VIET VOWEL U -AAB7..AAB8 ; T # Mn [2] TAI VIET MAI KHIT..TAI VIET VOWEL IA -AABE..AABF ; T # Mn [2] TAI VIET VOWEL AM..TAI VIET TONE MAI EK -AAC1 ; T # Mn TAI VIET TONE MAI THO -ABE5 ; T # Mn MEETEI MAYEK VOWEL SIGN ANAP -ABE8 ; T # Mn MEETEI MAYEK VOWEL SIGN UNAP -ABED ; T # Mn MEETEI MAYEK APUN IYEK -FB1E ; T # Mn HEBREW POINT JUDEO-SPANISH VARIKA -FE00..FE0F ; T # Mn [16] VARIATION SELECTOR-1..VARIATION SELECTOR-16 -FE20..FE26 ; T # Mn [7] COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON -FEFF ; T # Cf ZERO WIDTH NO-BREAK SPACE -FFF9..FFFB ; T # Cf [3] INTERLINEAR ANNOTATION ANCHOR..INTERLINEAR ANNOTATION TERMINATOR -101FD ; T # Mn PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE -10A01..10A03 ; T # Mn [3] KHAROSHTHI VOWEL SIGN I..KHAROSHTHI VOWEL SIGN VOCALIC R -10A05..10A06 ; T # Mn [2] KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O -10A0C..10A0F ; T # Mn [4] KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI SIGN VISARGA -10A38..10A3A ; T # Mn [3] KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW -10A3F ; T # Mn KHAROSHTHI VIRAMA -11080..11081 ; T # Mn [2] KAITHI SIGN CANDRABINDU..KAITHI SIGN ANUSVARA -110B3..110B6 ; T # Mn [4] KAITHI VOWEL SIGN U..KAITHI VOWEL SIGN AI -110B9..110BA ; T # Mn [2] KAITHI SIGN VIRAMA..KAITHI SIGN NUKTA -110BD ; T # Cf KAITHI NUMBER SIGN -1D167..1D169 ; T # Mn [3] MUSICAL SYMBOL COMBINING TREMOLO-1..MUSICAL SYMBOL COMBINING TREMOLO-3 -1D173..1D17A ; T # Cf [8] MUSICAL SYMBOL BEGIN BEAM..MUSICAL SYMBOL END PHRASE -1D17B..1D182 ; T # Mn [8] MUSICAL SYMBOL COMBINING ACCENT..MUSICAL SYMBOL COMBINING LOURE -1D185..1D18B ; T # Mn [7] MUSICAL SYMBOL COMBINING DOIT..MUSICAL SYMBOL COMBINING TRIPLE TONGUE -1D1AA..1D1AD ; T # Mn [4] MUSICAL SYMBOL COMBINING DOWN BOW..MUSICAL SYMBOL COMBINING SNAP PIZZICATO -1D242..1D244 ; T # Mn [3] COMBINING GREEK MUSICAL TRISEME..COMBINING GREEK MUSICAL PENTASEME -E0001 ; T # Cf LANGUAGE TAG -E0020..E007F ; T # Cf [96] TAG SPACE..CANCEL TAG -E0100..E01EF ; T # Mn [240] VARIATION SELECTOR-17..VARIATION SELECTOR-256 - -# Total code points: 1308 - -# EOF diff --git a/utils/Makefile b/utils/Makefile index 2f59501c2..b186cd116 100644 --- a/utils/Makefile +++ b/utils/Makefile @@ -6,13 +6,16 @@ S_UTILS := \ file.c \ filename.c \ filepath.c \ + hashmap.c \ hashtable.c \ idna.c \ libdom.c \ log.c \ messages.c \ + nscolour.c \ nsoption.c \ punycode.c \ + ssl_certs.c \ talloc.c \ time.c \ url.c \ diff --git a/utils/config.h b/utils/config.h index ddd1c6e8e..f54326dba 100644 --- a/utils/config.h +++ b/utils/config.h @@ -16,21 +16,38 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -#ifndef _NETSURF_UTILS_CONFIG_H_ -#define _NETSURF_UTILS_CONFIG_H_ +#ifndef NETSURF_UTILS_CONFIG_H_ +#define NETSURF_UTILS_CONFIG_H_ #include <stddef.h> +#if defined(__NetBSD__) +#include <sys/param.h> +#if (defined(__NetBSD_Version__) && __NetBSD_Prereq__(8,0,0)) +#define NetBSD_v8 +#endif +#endif + /* Try to detect which features the target OS supports */ -#if (defined(_GNU_SOURCE) && !defined(__APPLE__) || defined(__amigaos4__) || defined(__HAIKU__) || (defined(_POSIX_C_SOURCE) && ((_POSIX_C_SOURCE - 0) >= 200809L)) && !defined(__riscos__)) +#if (defined(_GNU_SOURCE) && \ + !defined(__APPLE__) || \ + defined(__amigaos4__) || \ + defined(__HAIKU__) || \ + (defined(_POSIX_C_SOURCE) && ((_POSIX_C_SOURCE - 0) >= 200809L)) && \ + !defined(__riscos__)) #define HAVE_STRNDUP #else #undef HAVE_STRNDUP char *strndup(const char *s, size_t n); #endif -#if (defined(_GNU_SOURCE) || defined(__APPLE__) || defined(__HAIKU__) || defined(__OpenBSD__)) +#if ((defined(_GNU_SOURCE) || \ + defined(__APPLE__) || \ + defined(__HAIKU__) || \ + defined(__NetBSD__) || \ + defined(__OpenBSD__)) && \ + !defined(__serenity__)) #define HAVE_STRCASESTR #else #undef HAVE_STRCASESTR @@ -40,7 +57,13 @@ char *strcasestr(const char *haystack, const char *needle); /* Although these platforms might have strftime or strptime they * appear not to support the time_t seconds format specifier. */ -#if (defined(_WIN32) || defined(__riscos__) || defined(__HAIKU__) || defined(__BEOS__) || defined(__amigaos4__) || defined(__AMIGA__) || defined(__MINT__)) +#if (defined(_WIN32) || \ + defined(__riscos__) || \ + defined(__HAIKU__) || \ + defined(__BEOS__) || \ + defined(__amigaos4__) || \ + defined(__AMIGA__) || \ + defined(__MINT__)) #undef HAVE_STRPTIME #undef HAVE_STRFTIME #else @@ -51,22 +74,36 @@ char *strcasestr(const char *haystack, const char *needle); /* For some reason, UnixLib defines this unconditionally. Assume we're using * UnixLib if building for RISC OS. */ -#if ((defined(_GNU_SOURCE) && !defined(__APPLE__)) || defined(__riscos__)) +#if ((defined(_GNU_SOURCE) && !defined(__APPLE__)) || \ + defined(__riscos__) || \ + defined(NetBSD_v8)) #define HAVE_STRCHRNUL #else #undef HAVE_STRCHRNUL char *strchrnul(const char *s, int c); #endif +/* + * amigaos3 declares this but does not have it in its actual library + */ +#define HAVE_STRTOULL +#if !defined(__amigaos4__) && defined(__AMIGA__) +#undef HAVE_STRTOULL +#endif + #define HAVE_SYS_SELECT -#define HAVE_INETATON #define HAVE_POSIX_INET_HEADERS #if (defined(_WIN32)) -#undef HAVE_INETATON #undef HAVE_SYS_SELECT #undef HAVE_POSIX_INET_HEADERS #endif +#define HAVE_INETATON +#if (defined(_WIN32) || \ + defined(__serenity__)) +#undef HAVE_INETATON +#endif + #define HAVE_INETPTON #if (defined(_WIN32)) #undef HAVE_INETPTON @@ -104,10 +141,31 @@ char *realpath(const char *path, char *resolved_path); #endif #define HAVE_SCANDIR -#if (defined(_WIN32)) +#if (defined(_WIN32) || \ + defined(__serenity__)) #undef HAVE_SCANDIR #endif +#define HAVE_DIRFD +#define HAVE_UNLINKAT +#define HAVE_FSTATAT +#if (defined(_WIN32) || defined(__riscos__) || defined(__HAIKU__) || defined(__BEOS__) || defined(__amigaos4__) || defined(__AMIGA__) || defined(__MINT__)) +#undef HAVE_DIRFD +#undef HAVE_UNLINKAT +#undef HAVE_FSTATAT +#endif + +#define HAVE_REGEX +#if (defined(__serenity__)) +#undef HAVE_REGEX +#endif + +/* execinfo available for backtrace */ +#if ((defined(__linux__) && defined(__GLIBC__) && !defined(__UCLIBC__)) || \ + defined(__APPLE__)) +#define HAVE_EXECINFO +#endif + /* This section toggles build options on and off. * Simply undefine a symbol to turn the relevant feature off. * @@ -133,9 +191,11 @@ char *realpath(const char *path, char *resolved_path); #define WITH_MMAP #endif -/* amiga */ -#if defined(__amigaos4__) || defined(__AMIGA__) || \ - defined(nsatari) +/* IPv6 */ +#if (defined(__amigaos4__) || \ + defined(__AMIGA__) || \ + defined(nsatari) || \ + defined(__serenity__)) #define NO_IPV6 #endif diff --git a/utils/corestringlist.h b/utils/corestringlist.h index def5a73bf..5cdbb3af7 100644 --- a/utils/corestringlist.h +++ b/utils/corestringlist.h @@ -70,6 +70,7 @@ CORESTRING_LWC_STRING(filename); CORESTRING_LWC_STRING(font); CORESTRING_LWC_STRING(frame); CORESTRING_LWC_STRING(frameset); +CORESTRING_LWC_STRING(ftp); CORESTRING_LWC_STRING(h1); CORESTRING_LWC_STRING(h2); CORESTRING_LWC_STRING(h3); @@ -86,6 +87,7 @@ CORESTRING_LWC_STRING(icon); CORESTRING_LWC_STRING(iframe); CORESTRING_LWC_STRING(image); CORESTRING_LWC_STRING(img); +CORESTRING_LWC_STRING(includesubdomains); CORESTRING_LWC_STRING(input); CORESTRING_LWC_STRING(javascript); CORESTRING_LWC_STRING(justify); @@ -137,10 +139,19 @@ CORESTRING_LWC_STRING(_blank); CORESTRING_LWC_STRING(_parent); CORESTRING_LWC_STRING(_self); CORESTRING_LWC_STRING(_top); +CORESTRING_LWC_STRING(443); /* unusual lwc strings */ CORESTRING_LWC_VALUE(shortcut_icon, "shortcut icon"); CORESTRING_LWC_VALUE(slash_, "/"); +CORESTRING_LWC_VALUE(max_age, "max-age"); +CORESTRING_LWC_VALUE(no_cache, "no-cache"); +CORESTRING_LWC_VALUE(no_store, "no-store"); +CORESTRING_LWC_VALUE(query_auth, "query/auth"); +CORESTRING_LWC_VALUE(query_ssl, "query/ssl"); +CORESTRING_LWC_VALUE(query_timeout, "query/timeout"); +CORESTRING_LWC_VALUE(query_fetcherror, "query/fetcherror"); +CORESTRING_LWC_VALUE(x_ns_css, "x-ns-css"); /* mime types */ CORESTRING_LWC_VALUE(multipart_form_data, "multipart/form-data"); @@ -156,6 +167,7 @@ CORESTRING_LWC_VALUE(application_octet_stream, "application/octet-stream"); CORESTRING_LWC_VALUE(image_gif, "image/gif"); CORESTRING_LWC_VALUE(image_png, "image/png"); CORESTRING_LWC_VALUE(image_jpeg, "image/jpeg"); +CORESTRING_LWC_VALUE(image_jxl, "image/jxl"); CORESTRING_LWC_VALUE(image_bmp, "image/bmp"); CORESTRING_LWC_VALUE(image_vnd_microsoft_icon, "image/vnd.microsoft.icon"); CORESTRING_LWC_VALUE(image_webp, "image/webp"); @@ -180,6 +192,10 @@ CORESTRING_DOM_STRING(afterprint); CORESTRING_DOM_STRING(align); CORESTRING_DOM_STRING(alt); CORESTRING_DOM_STRING(area); +CORESTRING_DOM_STRING(ArrowDown); +CORESTRING_DOM_STRING(ArrowLeft); +CORESTRING_DOM_STRING(ArrowRight); +CORESTRING_DOM_STRING(ArrowUp); CORESTRING_DOM_STRING(async); CORESTRING_DOM_STRING(background); CORESTRING_DOM_STRING(beforeprint); @@ -223,12 +239,15 @@ CORESTRING_DOM_STRING(dragstart); CORESTRING_DOM_STRING(drop); CORESTRING_DOM_STRING(durationchange); CORESTRING_DOM_STRING(emptied); +CORESTRING_DOM_STRING(End); CORESTRING_DOM_STRING(ended); CORESTRING_DOM_STRING(error); +CORESTRING_DOM_STRING(Escape); CORESTRING_DOM_STRING(focus); CORESTRING_DOM_STRING(frameborder); CORESTRING_DOM_STRING(hashchange); CORESTRING_DOM_STRING(height); +CORESTRING_DOM_STRING(Home); CORESTRING_DOM_STRING(href); CORESTRING_DOM_STRING(hreflang); CORESTRING_DOM_STRING(hspace); @@ -240,6 +259,7 @@ CORESTRING_DOM_STRING(keydown); CORESTRING_DOM_STRING(keypress); CORESTRING_DOM_STRING(keyup); CORESTRING_DOM_STRING(link); +CORESTRING_DOM_STRING(languagechange); CORESTRING_DOM_STRING(load); CORESTRING_DOM_STRING(loadeddata); CORESTRING_DOM_STRING(loadedmetadata); @@ -261,8 +281,10 @@ CORESTRING_DOM_STRING(noresize); CORESTRING_DOM_STRING(nowrap); CORESTRING_DOM_STRING(offline); CORESTRING_DOM_STRING(online); +CORESTRING_DOM_STRING(PageDown); CORESTRING_DOM_STRING(pagehide); CORESTRING_DOM_STRING(pageshow); +CORESTRING_DOM_STRING(PageUp); CORESTRING_DOM_STRING(pause); CORESTRING_DOM_STRING(play); CORESTRING_DOM_STRING(playing); @@ -274,6 +296,7 @@ CORESTRING_DOM_STRING(rect); CORESTRING_DOM_STRING(rel); CORESTRING_DOM_STRING(reset); CORESTRING_DOM_STRING(resize); +CORESTRING_DOM_STRING(reversed); CORESTRING_DOM_STRING(rows); CORESTRING_DOM_STRING(rowspan); CORESTRING_DOM_STRING(scroll); @@ -288,6 +311,7 @@ CORESTRING_DOM_STRING(size); CORESTRING_DOM_STRING(sizes); CORESTRING_DOM_STRING(src); CORESTRING_DOM_STRING(stalled); +CORESTRING_DOM_STRING(start); CORESTRING_DOM_STRING(storage); CORESTRING_DOM_STRING(style); CORESTRING_DOM_STRING(submit); @@ -307,11 +331,14 @@ CORESTRING_DOM_STRING(waiting); CORESTRING_DOM_STRING(width); /* DOM node names, not really CSS */ CORESTRING_DOM_STRING(BUTTON); +CORESTRING_DOM_STRING(HTML); CORESTRING_DOM_STRING(INPUT); CORESTRING_DOM_STRING(SELECT); CORESTRING_DOM_STRING(TEXTAREA); +CORESTRING_DOM_STRING(TITLE); CORESTRING_DOM_STRING(BODY); CORESTRING_DOM_STRING(HEAD); +CORESTRING_DOM_STRING(SCRIPT); /* DOM input types, not really CSS */ CORESTRING_DOM_STRING(button); CORESTRING_DOM_STRING(image); @@ -342,6 +369,7 @@ CORESTRING_DOM_STRING(__ns_key_libcss_node_data); CORESTRING_DOM_STRING(__ns_key_file_name_node_data); CORESTRING_DOM_STRING(__ns_key_image_coords_node_data); CORESTRING_DOM_STRING(__ns_key_html_content_data); +CORESTRING_DOM_STRING(__ns_key_canvas_node_data); /* unusual DOM strings */ CORESTRING_DOM_VALUE(text_javascript, "text/javascript"); @@ -349,6 +377,10 @@ CORESTRING_DOM_VALUE(http_equiv, "http-equiv"); CORESTRING_DOM_VALUE(html_namespace, "http://www.w3.org/1999/xhtml"); CORESTRING_NSURL(about_blank, "about:blank"); +CORESTRING_NSURL(about_query_ssl, "about:query/ssl"); +CORESTRING_NSURL(about_query_auth, "about:query/auth"); +CORESTRING_NSURL(about_query_timeout, "about:query/timeout"); +CORESTRING_NSURL(about_query_fetcherror, "about:query/fetcherror"); #undef CORESTRING_LWC_STRING #undef CORESTRING_DOM_STRING diff --git a/utils/coverity-build.sh b/utils/coverity-build.sh deleted file mode 100755 index b51562f8f..000000000 --- a/utils/coverity-build.sh +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/bash -# -# Copyright © 2013 Vincent Sanders <vince@netsurf-browser.org> -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# * The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -# CI system coverity build and submission script -# -# Usage: coverity-build.sh -# - -# environment variables -# -# HOST The ABI to be compiled for -# COVERITY_TOKEN -# COVERITY_USER -# COVERITY_PREFIX path to tools else default is used -# -# either PREFIX or JENKINS_HOME - -COVERITY_PROJECT="NetSurf+Browser" - -# build gtk, framebuffer and monkey frontend by default -TARGETS="gtk framebuffer monkey" - -# setup build environment -export PREFIX=${PREFIX:-${JENKINS_HOME}/artifacts-${HOST}} -export PKG_CONFIG_PATH=${PREFIX}/lib/pkgconfig -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${PREFIX}/lib -export PATH=${PATH}:${PREFIX}/bin - -# Coverity tools location -COVERITY_PREFIX=${COVERITY_PREFIX:-/opt/coverity/cov-analysis-linux64-7.5.0} -COVERITY_VERSION=$(git rev-parse HEAD) - -export PATH=${PATH}:${COVERITY_PREFIX}/bin - -COVERITY_TAR=coverity-scan.tar - -# cleanup before we start -rm -rf cov-int/ ${COVERITY_TAR} ${COVERITY_TAR}.gz - -for TARGET in ${TARGETS}; do - make clean TARGET=${TARGET} -done - -# Do the builds using coverity data gathering tool -for TARGET in ${TARGETS}; do - cov-build --dir cov-int make CCACHE= TARGET=${TARGET} -done - -tar cf ${COVERITY_TAR} cov-int - -gzip -9 ${COVERITY_TAR} - -curl --form "project=${COVERITY_PROJECT}" --form "token=${COVERITY_TOKEN}" --form "email=${COVERITY_USER}" --form "file=@${COVERITY_TAR}.gz" --form "version=${COVERITY_VERSION}" --form "description=Git Head build" "https://scan.coverity.com/builds?project=${COVERITY_PROJECT}" diff --git a/utils/dirent.h b/utils/dirent.h index fa78e1491..318734c30 100644 --- a/utils/dirent.h +++ b/utils/dirent.h @@ -28,8 +28,8 @@ * utils config header include. */ -#ifndef _NETSURF_UTILS_DIRENT_H_ -#define _NETSURF_UTILS_DIRENT_H_ +#ifndef NETSURF_UTILS_DIRENT_H +#define NETSURF_UTILS_DIRENT_H #include "utils/config.h" diff --git a/utils/errors.h b/utils/errors.h index 9a0a9bc04..ac4d38e2b 100644 --- a/utils/errors.h +++ b/utils/errors.h @@ -59,6 +59,10 @@ typedef enum { NSERROR_NOSPACE, /**< Insufficient space */ NSERROR_BAD_SIZE, /**< Bad size */ NSERROR_NOT_IMPLEMENTED, /**< Functionality is not implemented */ + NSERROR_BAD_REDIRECT, /**< Fetch encountered a bad redirect */ + NSERROR_BAD_AUTH, /**< Fetch needs authentication data */ + NSERROR_BAD_CERTS, /**< Fetch needs certificate chain check */ + NSERROR_TIMEOUT, /**< Operation timed out */ } nserror; #endif diff --git a/utils/fetch-transifex.pl b/utils/fetch-transifex.pl deleted file mode 100644 index 4d40062c9..000000000 --- a/utils/fetch-transifex.pl +++ /dev/null @@ -1,127 +0,0 @@ -#!/usr/bin/perl -# -# Copyright © 2013 Vincent Sanders <vince@netsurf-browser.org> -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# * The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -=head1 - -retrive resource from transifex service - -=cut - -use strict; -use Getopt::Long (); -use LWP::UserAgent; -use JSON qw( decode_json ); -use Data::Dumper; -use Fcntl qw( O_CREAT O_EXCL O_WRONLY O_APPEND O_RDONLY O_WRONLY ); - -use constant GETOPT_OPTS => qw( auto_abbrev no_getopt_compat bundling ); -use constant GETOPT_SPEC => - qw( output|o=s - lang|l=s - resource|res|r=s - project|prj|p=s - user|u=s - password|w=s - help|h|? ); - -# ensure no locale translation is applied and leave it all in UTF-8 -use bytes; - -# default option values: -my %opt = qw( resource messagesany project netsurf user netsurf ); - -sub output_stream (); -sub usage (); - -sub main () -{ - my $output; - my $opt_ok; - - # option parsing: - Getopt::Long::Configure( GETOPT_OPTS ); - $opt_ok = Getopt::Long::GetOptions( \%opt, GETOPT_SPEC ); - - if( $opt_ok ) - { - $output = output_stream(); - } - - # double check the options are sane (and we weren't asked for the help) - if( !$opt_ok || $opt{help} || $opt{lang} !~ /^[a-z]{2}$/ ) - { - usage(); - } - - my $transifexurl = "https://www.transifex.com/api/2/project/" . $opt{project} . "/resource/" . $opt{resource} . "/translation/" . $opt{lang} . "/"; - - my $ua = LWP::UserAgent->new; - $ua->credentials( - 'www.transifex.com:443', - 'Transifex API', - $opt{user} => $opt{password} - ); - - my $response = $ua->get( $transifexurl ); - if (!$response->is_success) { - die $response->status_line . " When fetching " . $transifexurl; - } - - # Decode the entire JSON - my $decoded_json = decode_json( $response->decoded_content ); - - print ( $output $decoded_json->{'content'} ); -} - -main(); - -sub usage () -{ - print(STDERR <<TXT ); -usage: - $0 -l lang-code \ - [-o output-file] [-r resource] [-p project] [-u user] [-w password] - - lang-code : en fr ko ... (no default) - project : transifex project (default 'netsurf') - resource : transifex resource (default 'messagesany') - user : transifex resource (default 'netsurf') - password : transifex resource (no default) - output-file: defaults to standard output -TXT - exit(1); -} - -sub output_stream () -{ - if( $opt{output} ) - { - my $ofh; - - sysopen( $ofh, $opt{output}, O_CREAT|O_EXCL|O_APPEND|O_WRONLY ) || - die( "$0: Failed to open output file $opt{output}: $!\n" ); - - return $ofh; - } - - return \*STDOUT; -} diff --git a/utils/file.c b/utils/file.c index cc82657c9..75a8a1c03 100644 --- a/utils/file.c +++ b/utils/file.c @@ -26,6 +26,8 @@ #include <sys/types.h> #include <sys/stat.h> #include <unistd.h> +#include <fcntl.h> +#include <errno.h> #include "desktop/gui_internal.h" @@ -35,6 +37,11 @@ #include "utils/nsurl.h" #include "utils/string.h" #include "utils/file.h" +#include "utils/dirent.h" + +#ifdef nsamiga +#include "frontends/amiga/os3support.h" +#endif /** * Generate a posix path from one or more component elemnts. @@ -307,3 +314,82 @@ nserror netsurf_mkdir_all(const char *fname) { return guit->file->mkdir_all(fname); } + +/* exported interface documented in utils/file.h */ +nserror +netsurf_recursive_rm(const char *path) +{ + DIR *parent; + struct dirent *entry; + nserror ret = NSERROR_OK; + struct stat ent_stat; /* stat result of leaf entry */ + + parent = opendir(path); + if (parent == NULL) { + switch (errno) { + case ENOENT: + return NSERROR_NOT_FOUND; + default: + return NSERROR_UNKNOWN; + } + } + + while ((entry = readdir(parent))) { + char *leafpath = NULL; + + if (strcmp(entry->d_name, ".") == 0 || + strcmp(entry->d_name, "..") == 0) + continue; + + ret = netsurf_mkpath(&leafpath, NULL, 2, path, entry->d_name); + if (ret != NSERROR_OK) + goto out; + +#if (defined(HAVE_DIRFD) && defined(HAVE_FSTATAT)) + if (fstatat(dirfd(parent), entry->d_name, &ent_stat, + AT_SYMLINK_NOFOLLOW) != 0) { +#else + if (stat(leafpath, &ent_stat) != 0) { +#endif + free(leafpath); + goto out_via_errno; + } + if (S_ISDIR(ent_stat.st_mode)) { + ret = netsurf_recursive_rm(leafpath); + if (ret != NSERROR_OK) { + free(leafpath); + goto out; + } + } else { +#if (defined(HAVE_DIRFD) && defined(HAVE_UNLINKAT)) + if (unlinkat(dirfd(parent), entry->d_name, 0) != 0) { +#else + if (unlink(leafpath) != 0) { +#endif + free(leafpath); + goto out_via_errno; + } + } + + free(leafpath); + } + + if (rmdir(path) != 0) { + goto out_via_errno; + } + + goto out; + +out_via_errno: + switch (errno) { + case ENOENT: + ret = NSERROR_NOT_FOUND; + break; + default: + ret = NSERROR_UNKNOWN; + } +out: + closedir(parent); + + return ret; +} diff --git a/utils/file.h b/utils/file.h index 5ee13b5aa..809ffe419 100644 --- a/utils/file.h +++ b/utils/file.h @@ -117,7 +117,7 @@ struct gui_file_table { }; /** Default (posix) file operation table. */ -struct gui_file_table *default_file_table; +extern struct gui_file_table *default_file_table; /** * Generate a path from one or more component elemnts. @@ -172,4 +172,15 @@ nserror netsurf_path_to_nsurl(const char *path, struct nsurl **url); */ nserror netsurf_mkdir_all(const char *fname); +/** + * Recursively remove a directory + * + * If this returns a failure code, there's an unpredictable amount left + * unremoved. + * + * @param path The path to recursively remove + * @return NSERROR_OK on success, or an error code on failure. + */ +nserror netsurf_recursive_rm(const char *path); + #endif diff --git a/utils/filename.c b/utils/filename.c index 01a403fd9..346fa85cc 100644 --- a/utils/filename.c +++ b/utils/filename.c @@ -29,10 +29,13 @@ #include <stdio.h> #include <stdlib.h> #include <errno.h> +#include <fcntl.h> #include <sys/stat.h> #include <unistd.h> #include "utils/dirent.h" +#include "utils/errors.h" +#include "utils/file.h" #include "utils/filename.h" #include "utils/log.h" #include "utils/utils.h" @@ -55,7 +58,6 @@ static char filename_directory[256]; static struct directory *filename_create_directory(const char *prefix); static bool filename_flush_directory(const char *folder, int depth); -static bool filename_delete_recursive(char *folder); /** * Request a new, unique, filename. @@ -96,7 +98,9 @@ const char *filename_request(void) else dir->high_used |= (1 << (i - 32)); - sprintf(filename_buffer, "%s%.2i", dir->prefix, i); + i = i % 99; + + snprintf(filename_buffer, sizeof(filename_buffer), "%s%.2i", dir->prefix, i); return filename_buffer; } @@ -270,19 +274,30 @@ bool filename_flush_directory(const char *folder, int depth) } parent = opendir(folder); + if (parent == NULL) + return false; while ((entry = readdir(parent))) { - struct stat statbuf; + int written; + struct stat statbuf; /* Ignore '.' and '..' */ if (strcmp(entry->d_name, ".") == 0 || strcmp(entry->d_name, "..") == 0) continue; - snprintf(child, sizeof(child), "%s/%s", folder, entry->d_name); - child[sizeof(child) - 1] = '\0'; + written = snprintf(child, sizeof(child), "%s/%s", + folder, entry->d_name); + if (written == sizeof(child)) { + child[sizeof(child) - 1] = '\0'; + } +#if (defined(HAVE_DIRFD) && defined(HAVE_FSTATAT)) + if (fstatat(dirfd(parent), entry->d_name, &statbuf, + AT_SYMLINK_NOFOLLOW) == -1) { +#else if (stat(child, &statbuf) == -1) { +#endif NSLOG(netsurf, INFO, "Unable to stat %s: %s", child, strerror(errno)); continue; @@ -348,14 +363,20 @@ bool filename_flush_directory(const char *folder, int depth) /* delete or recurse */ if (del) { - if (S_ISDIR(statbuf.st_mode)) - filename_delete_recursive(child); - - if (remove(child)) - NSLOG(netsurf, INFO, "Failed to remove '%s'", - child); - else - changed = true; + if (S_ISDIR(statbuf.st_mode)) { + changed = (netsurf_recursive_rm(child) == + NSERROR_OK); + } else { +#if (defined(HAVE_DIRFD) && defined(HAVE_UNLINKAT)) + if (unlinkat(dirfd(parent), entry->d_name, 0)) { +#else + if (unlink(child)) { +#endif + NSLOG(netsurf, INFO, + "Failed to remove '%s'", child); + } else + changed = true; + } } else { while (filename_flush_directory(child, depth + 1)); } @@ -368,56 +389,6 @@ bool filename_flush_directory(const char *folder, int depth) /** - * Recursively deletes the contents of a directory - * - * \param folder the directory to delete - * \return true on success, false otherwise - */ -bool filename_delete_recursive(char *folder) -{ - DIR *parent; - struct dirent *entry; - char child[256]; - struct stat statbuf; - - parent = opendir(folder); - - while ((entry = readdir(parent))) { - /* Ignore '.' and '..' */ - if (strcmp(entry->d_name, ".") == 0 || - strcmp(entry->d_name, "..") == 0) - continue; - - snprintf(child, sizeof(child), "%s/%s", folder, entry->d_name); - child[sizeof(child) - 1] = '\0'; - - if (stat(child, &statbuf) == -1) { - NSLOG(netsurf, INFO, "Unable to stat %s: %s", child, - strerror(errno)); - continue; - } - - if (S_ISDIR(statbuf.st_mode)) { - if (!filename_delete_recursive(child)) { - closedir(parent); - return false; - } - } - - if (remove(child)) { - NSLOG(netsurf, INFO, "Failed to remove '%s'", child); - closedir(parent); - return false; - } - } - - closedir(parent); - - return true; -} - - -/** * Creates a new directory. * * \param prefix the prefix to use, or NULL to allocate a new one diff --git a/utils/filepath.c b/utils/filepath.c index 156ba2d75..b87e2bf0d 100644 --- a/utils/filepath.c +++ b/utils/filepath.c @@ -16,10 +16,12 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -/** \file +/** + * \file * Provides utility functions for finding readable files. * - * These functions are intended to make finding resource files more straightforward. + * These functions are intended to make finding resource files more + * straightforward. */ #include <sys/types.h> @@ -37,7 +39,7 @@ #include "utils/filepath.h" /** maximum number of elements in the resource vector */ -#define MAX_RESPATH 128 +#define MAX_RESPATH 128 /* exported interface documented in filepath.h */ char *filepath_vsfindfile(char *str, const char *format, va_list ap) @@ -61,20 +63,21 @@ char *filepath_vsfindfile(char *str, const char *format, va_list ap) } realpathname = realpath(pathname, str); - + free(pathname); - + if (realpathname != NULL) { /* sucessfully expanded pathname */ if (access(realpathname, R_OK) != 0) { /* unable to read the file */ return NULL; - } + } } return realpathname; } + /* exported interface documented in filepath.h */ char *filepath_sfindfile(char *str, const char *format, ...) { @@ -88,6 +91,7 @@ char *filepath_sfindfile(char *str, const char *format, ...) return ret; } + /* exported interface documented in filepath.h */ char *filepath_findfile(const char *format, ...) { @@ -120,6 +124,7 @@ char *filepath_sfind(char **respathv, char *filepath, const char *filename) return NULL; } + /* exported interface documented in filepath.h */ char *filepath_find(char **respathv, const char *filename) { @@ -141,8 +146,13 @@ char *filepath_find(char **respathv, const char *filename) return ret; } + /* exported interface documented in filepath.h */ -char *filepath_sfinddef(char **respathv, char *filepath, const char *filename, const char *def) +char * +filepath_sfinddef(char **respathv, + char *filepath, + const char *filename, + const char *def) { char t[PATH_MAX]; char *ret; @@ -159,9 +169,9 @@ char *filepath_sfinddef(char **respathv, char *filepath, const char *filename, c snprintf(t, PATH_MAX, "%s/%s/%s", getenv("HOME"), def + 1, filename); } else { snprintf(t, PATH_MAX, "%s/%s", def, filename); - } + } if (realpath(t, ret) == NULL) { - strcpy(ret, t); + strncpy(ret, t, PATH_MAX); } } @@ -178,23 +188,41 @@ filepath_generate(char * const *pathv, const char * const *langv) int langc = 0; int respathc = 0; struct stat dstat; - char tmppath[PATH_MAX]; + char *tmppath; + int tmppathlen; respath = calloc(MAX_RESPATH, sizeof(char *)); while ((respath != NULL) && (pathv[pathc] != NULL)) { - if ((stat(pathv[pathc], &dstat) == 0) && + if ((stat(pathv[pathc], &dstat) == 0) && S_ISDIR(dstat.st_mode)) { /* path element exists and is a directory */ langc = 0; while (langv[langc] != NULL) { - snprintf(tmppath, sizeof tmppath, "%s/%s", pathv[pathc],langv[langc]); - if ((stat(tmppath, &dstat) == 0) && + tmppathlen = snprintf(NULL, + 0, + "%s/%s", + pathv[pathc], + langv[langc]); + tmppath = malloc(tmppathlen + 1); + if (tmppath == NULL) { + break; + } + snprintf(tmppath, + tmppathlen + 1, + "%s/%s", + pathv[pathc], + langv[langc]); + + if ((stat(tmppath, &dstat) == 0) && S_ISDIR(dstat.st_mode)) { /* path element exists and is a directory */ - respath[respathc++] = strdup(tmppath); + respath[respathc++] = tmppath; + } else { + free(tmppath); } + langc++; } respath[respathc++] = strdup(pathv[pathc]); @@ -204,6 +232,7 @@ filepath_generate(char * const *pathv, const char * const *langv) return respath; } + /** * expand ${} in a string into environment variables. * @@ -232,20 +261,20 @@ expand_path(const char *path, int pathlen) explen = pathlen; while (exp[cloop] != 0) { - if ((exp[cloop] == '$') && + if ((exp[cloop] == '$') && (exp[cloop + 1] == '{')) { cstart = cloop; cloop++; - } - + } + if ((cstart != -1) && (exp[cloop] == '}')) { replen = cloop - cstart; exp[cloop] = 0; envv = getenv(exp + cstart + 2); if (envv == NULL) { - memmove(exp + cstart, - exp + cloop + 1, + memmove(exp + cstart, + exp + cloop + 1, explen - cloop); explen -= replen; } else { @@ -257,8 +286,8 @@ expand_path(const char *path, int pathlen) return NULL; } exp = tmp; - memmove(exp + cstart + envlen, - exp + cloop + 1, + memmove(exp + cstart + envlen, + exp + cloop + 1, explen - cloop ); memmove(exp + cstart, envv, envlen); explen += envlen - replen; @@ -278,6 +307,7 @@ expand_path(const char *path, int pathlen) return exp; } + /* exported interface documented in filepath.h */ char ** filepath_path_to_strvec(const char *path) @@ -315,12 +345,13 @@ filepath_path_to_strvec(const char *path) /* check for termination */ if (*eend == 0) break; - + estart = eend; } return strvec; } + /* exported interface documented in filepath.h */ void filepath_free_strvec(char **pathv) { @@ -331,4 +362,3 @@ void filepath_free_strvec(char **pathv) } free(pathv); } - diff --git a/utils/filepath.h b/utils/filepath.h index ad077e2ce..784264b33 100644 --- a/utils/filepath.h +++ b/utils/filepath.h @@ -69,10 +69,10 @@ char *filepath_findfile(const char *format, ...); * normalised file name of the first acessible file or NULL if no file * can be found in any of the resource paths. * - * @param respathv The resource path vector to iterate. - * @param filepath The buffer to place the result in. - * @param filename The filename of the resource to search for. - * @return A pointer to filepath if a target is found or NULL if not. + * \param respathv The resource path vector to iterate. + * \param filepath The buffer to place the result in. + * \param filename The filename of the resource to search for. + * \return A pointer to filepath if a target is found or NULL if not. */ char *filepath_sfind(char **respathv, char *filepath, const char *filename); @@ -93,6 +93,12 @@ char *filepath_find(char **respathv, const char *filename); * is used as an additional path element to search, if that still * fails the returned path is set to the concatination of the default * path and the filename. + * + * \param respathv The resource path vector to iterate. + * \param filepath The buffer to place the result in. Must have space for PATH_MAX bytes. + * \param filename The filename of the resource to search for. + * \param def The default path to use + * \return A pointer to filepath if a target is found or the default if not */ char *filepath_sfinddef(char **respathv, char *filepath, const char *filename, const char *def); diff --git a/utils/git-date.sh b/utils/git-date.sh deleted file mode 100755 index d9a9059a0..000000000 --- a/utils/git-date.sh +++ /dev/null @@ -1,36 +0,0 @@ -#!/bin/bash -# -# Copyright © 2016 Michael Drake <tlsa@netsurf-browser.org> -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# * The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -# Sets the timestamp of files to the last time a commit touched them in -# the given subtree. -# -# Usage: git-date.sh <PATH> -# -# Example: /utils/git-date.sh riscos/distribution - -set -e - -for FILE in $(git ls-files ${1}) -do - TIME=$(git log --pretty=format:%cd -n 1 --date=iso "$FILE") - touch -m -d "$TIME" "$FILE" -done diff --git a/utils/git-testament.pl b/utils/git-testament.pl deleted file mode 100644 index 5a71a16df..000000000 --- a/utils/git-testament.pl +++ /dev/null @@ -1,227 +0,0 @@ -#!/usr/bin/perl -w - -use strict; - -=head1 - -Generate a testament describing the current Git status. This gets written -out in a C form which can be used to construct the NetSurf Git testament -file for signon notification. - -If there is no Git in place, the data is invented arbitrarily. - -=cut - -$ENV{LC_ALL} = 'C'; - -my $root = shift @ARGV; -my $targetfile = shift @ARGV; - -my %gitinfo; # The Git information - -$root .= "/" unless ($root =~ m@/$@); - -my $git_present = 0; -if ( -d ".git" ) { - $git_present = 1; -} - -sub compat_tmpnam { - # File::Temp was introduced in Perl 5.6.1 - my $have_file_tmp = eval { require File::Temp }; - - if ( ! $have_file_tmp ) { - return "$$.gitt"; - } else { - return File::Temp::tmpnam(); - } -} - -sub compat_md5_hex { - # Digest::MD5 was introduced in Perl 5.7.1 - my $have_digest_md5 = eval { require Digest::MD5 }; - my $have_md5 = eval { require MD5 }; - my $data = shift; - - if ( ! $have_digest_md5 ) { - return MD5->hexhash($data); - } else { - return Digest::MD5->new->add($data)->hexdigest; - } -} - -sub gather_output { - my $cmd = shift; - my $tmpfile = compat_tmpnam(); - local $/ = undef(); - system("$cmd > $tmpfile"); - open(my $CMDH, "<", $tmpfile); - my $ret = <$CMDH>; - close($CMDH); - unlink($tmpfile); - return $ret; -} - -if ( $git_present ) { - my @bits = split /\s+/, `git config --get-regexp "^remote.*.url\$"`; - $gitinfo{url} = $bits[1]; - chomp $gitinfo{url}; - $gitinfo{revision} = `git rev-parse HEAD`; - chomp $gitinfo{revision}; - $gitinfo{branch} = `git for-each-ref --format="\%(refname:short)" \$(git symbolic-ref HEAD 2>/dev/null || git show-ref -s HEAD)`; - chomp $gitinfo{branch}; - @bits = split /\s+/, `git describe --tags --exact-match HEAD 2>/dev/null`; - $bits[0] = "" unless exists $bits[0]; - $gitinfo{tag} = $bits[0]; - $gitinfo{branch} = $gitinfo{tag} if ($gitinfo{tag} =~ m@.@); -} else { - $gitinfo{url} = "http://nowhere/tarball/"; - $gitinfo{revision} = "unknown"; - $gitinfo{branch} = "tarball"; - $gitinfo{tag} = ""; -} - -my %gitstatus; # The Git status output - -if ( $git_present ) { - foreach my $line (split(/\n/, gather_output("git status --porcelain"))) { - chomp $line; - my ($X, $Y, $fp) = ($line =~ /^(.)(.) (.+)$/); - my $fn = $fp; - $fn = ($fp =~ /(.+) ->/) if ($fp =~ / -> /); - next unless (care_about_file($fn)); - # Normalise $X and $Y (WT and index) into a simple A/M/D etc - - $gitstatus{$fn} = "$X$Y"; - } -} - -my %userinfo; # The information about the current user - -{ - my @pwent = getpwuid($<); - $userinfo{USERNAME} = $pwent[0]; - my $gecos = $pwent[6]; - $gecos =~ s/,.+//g; - $gecos =~ s/"/'/g; - $gecos =~ s/\\/\\\\/g; - $userinfo{GECOS} = $gecos; -} - -# The current date, in AmigaOS version friendly format (dd.mm.yyyy) - -my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(); -my $compiledate = sprintf("%02d.%02d.%d",$mday,$mon+1,$year+1900); -chomp $compiledate; - -# Spew the testament out - -my $testament = ""; - -$testament .= "#define USERNAME \"$userinfo{USERNAME}\"\n"; -$testament .= "#define GECOS \"$userinfo{GECOS}\"\n"; - -my $qroot = $root; -$qroot =~ s/"/\\"/g; - -my $hostname = $ENV{HOSTNAME}; - -unless ( defined($hostname) && $hostname ne "") { - # Try hostname command if env-var empty - $hostname = gather_output("hostname"); - chomp $hostname; -} - -$hostname = "unknown-host" unless (defined($hostname) && $hostname ne ""); -$hostname =~ s/"/\\"/g; - -$testament .= "#define WT_ROOT \"$qroot\"\n"; -$testament .= "#define WT_HOSTNAME \"$hostname\"\n"; -$testament .= "#define WT_COMPILEDATE \"$compiledate\"\n"; - -my $cibuild = $ENV{CI_BUILD}; -if (defined ($cibuild) && ($cibuild ne '')) { - $testament .= "#define CI_BUILD \"$cibuild\"\n"; -} - -$testament .= "#define WT_BRANCHPATH \"$gitinfo{branch}\"\n"; - -if ($gitinfo{branch} =~ m@^master$@) { - $testament .= "#define WT_BRANCHISMASTER 1\n"; -} -if ($gitinfo{tag} =~ m@.@) { - $testament .= "#define WT_BRANCHISTAG 1\n"; - $testament .= "#define WT_TAGIS \"$gitinfo{tag}\"\n"; -} -if ($gitinfo{url} =~ m@/tarball/@) { - $testament .= "#define WT_NO_GIT 1\n"; -} -$testament .= "#define WT_REVID \"$gitinfo{revision}\"\n"; -$testament .= "#define WT_MODIFIED " . scalar(keys %gitstatus) . "\n"; -$testament .= "#define WT_MODIFICATIONS {\\\n"; -my $doneone = 0; -foreach my $filename (sort keys %gitstatus) { - if ($doneone) { - $testament .= ", \\\n"; - } - $testament .= " { \"$filename\", \"$gitstatus{$filename}\" }"; - $doneone = 1; -} -$testament .= " \\\n}\n"; - -my $oldcsum = ""; -if ( -e $targetfile ) { - open(my $OLDVALUES, "<", $targetfile); - foreach my $line (readline($OLDVALUES)) { - if ($line =~ /MD5:([0-9a-f]+)/) { - $oldcsum = $1; - } - } - close($OLDVALUES); -} - -my $newcsum = compat_md5_hex($testament); - -if ($oldcsum ne $newcsum) { - print "TESTMENT: $targetfile\n"; - open(my $NEWVALUES, ">", $targetfile) or die "$!"; - print $NEWVALUES "/* ", $targetfile,"\n"; - print $NEWVALUES <<'EOS'; - * - * Revision testament. - * - * *WARNING* this file is automatically generated by git-testament.pl - * - * Copyright 2012 NetSurf Browser Project - */ - -EOS - - print $NEWVALUES "#ifndef NETSURF_REVISION_TESTAMENT\n"; - print $NEWVALUES "#define NETSURF_REVISION_TESTAMENT \"$newcsum\"\n\n"; - print $NEWVALUES "/* Revision testament checksum:\n"; - print $NEWVALUES " * MD5:", $newcsum,"\n */\n\n"; - print $NEWVALUES "/* Revision testament: */\n"; - print $NEWVALUES $testament; - print $NEWVALUES "\n#endif\n"; - close($NEWVALUES); - foreach my $unwanted (@ARGV) { - next unless(-e $unwanted); - print "TESTAMENT: Removing $unwanted\n"; - system("rm", "-f", "--", $unwanted); - } -} else { - print "TESTMENT: unchanged\n"; -} - -exit 0; - -sub care_about_file { - my ($fn) = @_; - return 0 if ($fn =~ /\.d$/); # Don't care for extraneous DEP files - return 0 if ($fn =~ /\.a$/); # Don't care for extraneous archive files - return 0 if ($fn =~ /\.md5$/); # Don't care for md5sum files - return 0 if ($fn =~ /\.map$/); # Don't care for map files - return 0 if ($fn =~ /\.gitt$/); # Don't care for testament temp files - return 1; -} diff --git a/utils/hashmap.c b/utils/hashmap.c new file mode 100644 index 000000000..4d9c78869 --- /dev/null +++ b/utils/hashmap.c @@ -0,0 +1,255 @@ +/* + * Copyright 2020 Daniel Silverstone <dsilvers@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdlib.h> +#include <string.h> + +#include "utils/hashmap.h" + +/** + * The default number of buckets in the hashmaps we create. + */ +#define DEFAULT_HASHMAP_BUCKETS (4091) + +/** + * Hashmaps have chains of entries in buckets. + */ +typedef struct hashmap_entry_s { + struct hashmap_entry_s **prevptr; + struct hashmap_entry_s *next; + void *key; + void *value; + uint32_t key_hash; +} hashmap_entry_t; + +/** + * The content of a hashmap + */ +struct hashmap_s { + /** + * The parameters to be used for this hashmap + */ + hashmap_parameters_t *params; + + /** + * The buckets for the hash chains + */ + hashmap_entry_t **buckets; + + /** + * The number of buckets in this map + */ + uint32_t bucket_count; + + /** + * The number of entries in this map + */ + size_t entry_count; +}; + +/* Exported function, documented in hashmap.h */ +hashmap_t * +hashmap_create(hashmap_parameters_t *params) +{ + hashmap_t *ret = malloc(sizeof(hashmap_t)); + if (ret == NULL) { + return NULL; + } + + ret->params = params; + ret->bucket_count = DEFAULT_HASHMAP_BUCKETS; + ret->entry_count = 0; + ret->buckets = malloc(ret->bucket_count * sizeof(hashmap_entry_t *)); + + if (ret->buckets == NULL) { + free(ret); + return NULL; + } + + memset(ret->buckets, 0, ret->bucket_count * sizeof(hashmap_entry_t *)); + + return ret; +} + +/* Exported function, documented in hashmap.h */ +void +hashmap_destroy(hashmap_t *hashmap) +{ + uint32_t bucket; + hashmap_entry_t *entry; + + for (bucket = 0; bucket < hashmap->bucket_count; bucket++) { + for (entry = hashmap->buckets[bucket]; + entry != NULL;) { + hashmap_entry_t *next = entry->next; + hashmap->params->value_destroy(entry->value); + hashmap->params->key_destroy(entry->key); + free(entry); + entry = next; + } + } + + free(hashmap->buckets); + free(hashmap); +} + +/* Exported function, documented in hashmap.h */ +void * +hashmap_lookup(hashmap_t *hashmap, void *key) +{ + uint32_t hash = hashmap->params->key_hash(key); + hashmap_entry_t *entry = hashmap->buckets[hash % hashmap->bucket_count]; + + for(;entry != NULL; entry = entry->next) { + if (entry->key_hash == hash) { + if (hashmap->params->key_eq(key, entry->key)) { + return entry->value; + } + } + } + + return NULL; +} + +/* Exported function, documented in hashmap.h */ +void * +hashmap_insert(hashmap_t *hashmap, void *key) +{ + uint32_t hash = hashmap->params->key_hash(key); + uint32_t bucket = hash % hashmap->bucket_count; + hashmap_entry_t *entry = hashmap->buckets[bucket]; + void *new_key, *new_value; + + for(;entry != NULL; entry = entry->next) { + if (entry->key_hash == hash) { + if (hashmap->params->key_eq(key, entry->key)) { + /* This key is already here */ + new_key = hashmap->params->key_clone(key); + if (new_key == NULL) { + /* Allocation failed */ + return NULL; + } + new_value = hashmap->params->value_alloc(entry->key); + if (new_value == NULL) { + /* Allocation failed */ + hashmap->params->key_destroy(new_key); + return NULL; + } + hashmap->params->value_destroy(entry->value); + hashmap->params->key_destroy(entry->key); + entry->value = new_value; + entry->key = new_key; + return entry->value; + } + } + } + + /* The key was not found in the map, so allocate a new entry */ + entry = malloc(sizeof(*entry)); + + if (entry == NULL) { + return NULL; + } + + memset(entry, 0, sizeof(*entry)); + + entry->key = hashmap->params->key_clone(key); + if (entry->key == NULL) { + goto err; + } + entry->key_hash = hash; + + entry->value = hashmap->params->value_alloc(entry->key); + if (entry->value == NULL) { + goto err; + } + + entry->prevptr = &(hashmap->buckets[bucket]); + entry->next = hashmap->buckets[bucket]; + if (entry->next != NULL) { + entry->next->prevptr = &entry->next; + } + + hashmap->buckets[bucket] = entry; + + hashmap->entry_count++; + + return entry->value; + +err: + if (entry->value != NULL) + hashmap->params->value_destroy(entry->value); + if (entry->key != NULL) + hashmap->params->key_destroy(entry->key); + free(entry); + + return NULL; +} + +/* Exported function, documented in hashmap.h */ +bool +hashmap_remove(hashmap_t *hashmap, void *key) +{ + uint32_t hash = hashmap->params->key_hash(key); + + hashmap_entry_t *entry = hashmap->buckets[hash % hashmap->bucket_count]; + + for(;entry != NULL; entry = entry->next) { + if (entry->key_hash == hash) { + if (hashmap->params->key_eq(key, entry->key)) { + hashmap->params->value_destroy(entry->value); + hashmap->params->key_destroy(entry->key); + if (entry->next != NULL) { + entry->next->prevptr = entry->prevptr; + } + *entry->prevptr = entry->next; + free(entry); + hashmap->entry_count--; + return true; + } + } + } + + return false; +} + +/* Exported function, documented in hashmap.h */ +bool +hashmap_iterate(hashmap_t *hashmap, hashmap_iteration_cb_t cb, void *ctx) +{ + for (uint32_t bucket = 0; + bucket < hashmap->bucket_count; + bucket++) { + for (hashmap_entry_t *entry = hashmap->buckets[bucket]; + entry != NULL; + entry = entry->next) { + /* If the callback returns true, we early-exit */ + if (cb(entry->key, entry->value, ctx)) + return true; + } + } + + return false; +} + +/* Exported function, documented in hashmap.h */ +size_t +hashmap_count(hashmap_t *hashmap) +{ + return hashmap->entry_count; +} diff --git a/utils/hashmap.h b/utils/hashmap.h new file mode 100644 index 000000000..3968fd3fe --- /dev/null +++ b/utils/hashmap.h @@ -0,0 +1,197 @@ +/* + * Copyright 2020 Daniel Silverstone <dsilvers@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef NETSURF_HASHMAP_H +#define NETSURF_HASHMAP_H + +#include <stdint.h> +#include <stdbool.h> + +/** + * Generic hashmap. + * + * Hashmaps take ownership of the keys inserted into them by means of a + * clone function in their parameters. They also manage the value memory + * directly. + */ +typedef struct hashmap_s hashmap_t; + +/** + * Key cloning function type + */ +typedef void* (*hashmap_key_clone_t)(void *); + +/** + * Key destructor function type + */ +typedef void (*hashmap_key_destroy_t)(void *); + +/** + * Key hashing function type + */ +typedef uint32_t (*hashmap_key_hash_t)(void *); + +/** + * Key comparison function type + */ +typedef bool (*hashmap_key_eq_t)(void *, void*); + +/** + * Value allocation function type + */ +typedef void* (*hashmap_value_alloc_t)(void *); + +/** + * Value destructor function type + */ +typedef void (*hashmap_value_destroy_t)(void *); + +/** + * Hashmap iteration callback function type. + * + * First parameter is the key, second is the value. + * The final parameter is the context pointer for the iteration. + * + * Return true to stop iterating early + */ +typedef bool (*hashmap_iteration_cb_t)(void *, void *, void *); + +/** + * Parameters for hashmaps + */ +typedef struct { + /** + * A function which when called will clone a key and give + * ownership of the returned object to the hashmap + */ + hashmap_key_clone_t key_clone; + + /** + * A function which when given a key will return its hash. + */ + hashmap_key_hash_t key_hash; + + /** + * A function to compare two keys and return if they are equal. + * Note: identity is not necessary, nor strict equality, so long + * as the function is a full equality model. + * (i.e. key1 == key2 => key2 == key1) + */ + hashmap_key_eq_t key_eq; + + /** + * A function which when called will destroy a key object + */ + hashmap_key_destroy_t key_destroy; + + /** + * A function which when called will allocate a value object + */ + hashmap_value_alloc_t value_alloc; + + /** + * A function which when called will destroy a value object + */ + hashmap_value_destroy_t value_destroy; +} hashmap_parameters_t; + + +/** + * Create a hashmap + * + * The provided hashmap parameter table will be used for map operations + * which need to allocate/free etc. + * + * \param params The hashmap parameters for this map + */ +hashmap_t* hashmap_create(hashmap_parameters_t *params); + +/** + * Destroy a hashmap + * + * After this, all keys and values will have been destroyed and all memory + * associated with this hashmap will be invalidated. + * + * \param hashmap The hashmap to destroy + */ +void hashmap_destroy(hashmap_t *hashmap); + +/** + * Look up a key in a hashmap + * + * If the key has an associated value in the hashmap then the pointer to it + * is returned, otherwise NULL. + * + * \param hashmap The hashmap to look up the key inside + * \param key The key to look up in the hashmap + * \return A pointer to the value if found, NULL otherwise + */ +void* hashmap_lookup(hashmap_t *hashmap, void *key); + +/** + * Create an entry in a hashmap + * + * This creates a blank value using the parameters and then associates it with + * a clone of the given key, inserting it into the hashmap. If a value was + * present for the given key already, then it is destroyed first. + * + * NOTE: If allocation of the new value object fails, then any existing entry + * will be left alone, but NULL will be returned. + * + * \param hashmap The hashmap to insert into + * \param key The key to insert an entry for + * \return The value pointer for that key, or NULL if allocation failed. + */ +void *hashmap_insert(hashmap_t *hashmap, void *key); + +/** + * Remove an entry from the hashmap + * + * This will remove the entry for the given key from the hashmap + * If there is no such entry, this will safely do nothing. + * The value associated with the entry will be destroyed and so should not + * be used beyond calling this function. + * + * \param hashmap The hashmap to remove the entry from + * \param key The key to remove the entry for + * \return true if an entry was removed, false otherwise + */ +bool hashmap_remove(hashmap_t *hashmap, void *key); + +/** + * Iterate the hashmap + * + * For each key/value pair in the hashmap, call the callback passing in + * the key and value. During iteration you MUST NOT mutate the hashmap. + * + * \param hashmap The hashmap to iterate + * \param cb The callback for each key,value pair + * \param ctx The callback context + * \return Whether or not we stopped iteration early + */ +bool hashmap_iterate(hashmap_t *hashmap, hashmap_iteration_cb_t cb, void *ctx); + +/** + * Get the number of entries in this map + * + * \param hashmap The hashmap to retrieve the entry count from + * \return The number of entries in the hashmap + */ +size_t hashmap_count(hashmap_t *hashmap); + +#endif diff --git a/utils/hashtable.c b/utils/hashtable.c index 3a1711da0..aa162cbc4 100644 --- a/utils/hashtable.c +++ b/utils/hashtable.c @@ -28,11 +28,15 @@ * it that has good coverage along side the other tests. */ +#include <stdint.h> +#include <stdbool.h> #include <stdlib.h> #include <string.h> -#include <stdbool.h> -#include "utils/hashtable.h" +#include <zlib.h> +#include <errno.h> + #include "utils/log.h" +#include "utils/hashtable.h" struct hash_entry { @@ -46,6 +50,8 @@ struct hash_table { struct hash_entry **chain; }; +/** maximum length of line for file or inline add */ +#define LINE_BUFFER_SIZE 512 /** * Hash a string, returning a 32bit value. The hash algorithm used is @@ -75,6 +81,175 @@ static inline unsigned int hash_string_fnv(const char *datum, unsigned int *len) } + +/** + * process a line of input. + * + * \param hash The hash table to add the line to + * \param ln The line to process + * \param lnlen The length of \ln + * \return NSERROR_OK on success else NSERROR_INVALID + */ +static nserror +process_line(struct hash_table *hash, uint8_t *ln, int lnlen) +{ + uint8_t *key; + uint8_t *value; + uint8_t *colon; + + key = ln; /* set key to start of line */ + value = ln + lnlen; /* set value to end of line */ + + /* skip leading whitespace */ + while ((key < value) && + ((*key == ' ') || (*key == '\t'))) { + key++; + } + + /* empty or comment lines */ + if ((*key == 0) || (*key == '#')) { + return NSERROR_OK; + } + + /* find first colon as key/value separator */ + for (colon = key; colon < value; colon++) { + if (*colon == ':') { + break; + } + } + if (colon == value) { + /* no colon found */ + return NSERROR_INVALID; + } + + *colon = 0; /* terminate key */ + value = colon + 1; + + if (hash_add(hash, (char *)key, (char *)value) == false) { + NSLOG(netsurf, INFO, + "Unable to add %s:%s to hash table", ln, value); + return NSERROR_INVALID; + } + return NSERROR_OK; +} + + +/** + * adds key/value pairs to a hash from a memory area + */ +static nserror +hash_add_inline_plain(struct hash_table *ht, const uint8_t *data, size_t size) +{ + uint8_t s[LINE_BUFFER_SIZE]; /* line buffer */ + unsigned int slen = 0; + nserror res = NSERROR_OK; + + while (size > 0) { + s[slen] = *data; + + if (s[slen] == '\n') { + s[slen] = 0; /* replace newline with null termination */ + res = process_line(ht, s, slen); + slen = 0; + if (res != NSERROR_OK) { + break; + } + } else { + slen++; + if (slen > sizeof s) { + NSLOG(netsurf, INFO, "Overlength line\n"); + slen = 0; + } + } + + size--; + data++; + } + if (slen > 0) { + s[slen] = 0; + res = process_line(ht, s, slen); + } + + return res; +} + +/** + * adds key/value pairs to a hash from a compressed memory area + */ +static nserror +hash_add_inline_gzip(struct hash_table *ht, const uint8_t *data, size_t size) +{ + nserror res; + int ret; /* zlib return value */ + z_stream strm; + uint8_t s[LINE_BUFFER_SIZE]; /* line buffer */ + size_t used = 0; /* number of bytes in buffer in use */ + uint8_t *nl; + + strm.zalloc = Z_NULL; + strm.zfree = Z_NULL; + strm.opaque = Z_NULL; + + strm.next_in = (uint8_t *)data; + strm.avail_in = size; + + ret = inflateInit2(&strm, 32 + MAX_WBITS); + if (ret != Z_OK) { + NSLOG(netsurf, INFO, "inflateInit returned %d", ret); + return NSERROR_INVALID; + } + + do { + strm.next_out = s + used; + strm.avail_out = sizeof(s) - used; + + ret = inflate(&strm, Z_NO_FLUSH); + if ((ret != Z_OK) && (ret != Z_STREAM_END)) { + break; + } + + used = sizeof(s) - strm.avail_out; + while (used > 0) { + /* find nl */ + for (nl = &s[0]; nl < &s[used]; nl++) { + if (*nl == '\n') { + break; + } + } + if (nl == &s[used]) { + /* no nl found */ + break; + } + /* found newline */ + *nl = 0; /* null terminate line */ + res = process_line(ht, &s[0], nl - &s[0]); + if (res != NSERROR_OK) { + inflateEnd(&strm); + return res; + } + + /* move data down */ + memmove(&s[0], nl + 1, used - ((nl + 1) - &s[0]) ); + used -= ((nl +1) - &s[0]); + } + if (used == sizeof(s)) { + /* entire buffer used and no newline */ + NSLOG(netsurf, INFO, "Overlength line"); + used = 0; + } + } while (ret != Z_STREAM_END); + + inflateEnd(&strm); + + if (ret != Z_STREAM_END) { + NSLOG(netsurf, INFO, "inflate returned %d", ret); + return NSERROR_INVALID; + } + return NSERROR_OK; + +} + + /* exported interface documented in utils/hashtable.h */ struct hash_table *hash_create(unsigned int chains) { @@ -172,10 +347,60 @@ const char *hash_get(struct hash_table *ht, const char *key) h = hash_string_fnv(key, &key_length); c = h % ht->nchains; - for (e = ht->chain[c]; e; e = e->next) + for (e = ht->chain[c]; e; e = e->next) { if ((key_length == e->key_length) && - (memcmp(key, e->pairing, key_length) == 0)) + (memcmp(key, e->pairing, key_length) == 0)) { return e->pairing + key_length + 1; + } + } return NULL; } + + + +/* exported interface documented in utils/hashtable.h */ +nserror hash_add_file(struct hash_table *ht, const char *path) +{ + nserror res = NSERROR_OK; + char s[LINE_BUFFER_SIZE]; /* line buffer */ + gzFile fp; /* compressed file handle */ + + if (path == NULL) { + return NSERROR_BAD_PARAMETER; + } + + fp = gzopen(path, "r"); + if (!fp) { + NSLOG(netsurf, INFO, + "Unable to open file \"%.100s\": %s", path, + strerror(errno)); + + return NSERROR_NOT_FOUND; + } + + while (gzgets(fp, s, sizeof s)) { + int slen = strlen(s); + s[--slen] = 0; /* remove \n at end */ + + res = process_line(ht, (uint8_t *)s, slen); + if (res != NSERROR_OK) { + break; + } + } + + gzclose(fp); + + return res; +} + + +/* exported interface documented in utils/hashtable.h */ +nserror hash_add_inline(struct hash_table *ht, const uint8_t *data, size_t size) +{ + if ((data[0]==0x1f) && (data[1] == 0x8b)) { + /* gzip header detected */ + return hash_add_inline_gzip(ht, data, size); + } + return hash_add_inline_plain(ht, data, size); +} diff --git a/utils/hashtable.h b/utils/hashtable.h index b0e7392c6..b1c0d5c41 100644 --- a/utils/hashtable.h +++ b/utils/hashtable.h @@ -29,8 +29,11 @@ struct hash_table; /** - * Create a new hash table, and return a context for it. The memory consumption - * of a hash table is approximately 8 + (nchains * 12) bytes if it is empty. + * Create a new hash table + * + * Allocate a new hash table and return a context for it. The memory + * consumption of a hash table is approximately 8 + (nchains * 12) + * bytes if it is empty. * * \param chains Number of chains/buckets this hash table will have. This * should be a prime number, and ideally a prime number just @@ -41,18 +44,22 @@ struct hash_table; struct hash_table *hash_create(unsigned int chains); /** - * Destroys a hash table, freeing all memory associated with it. + * Destroys a hash table + * + * Destroy a hash table freeing all memory associated with it. * * \param ht Hash table to destroy. After the function returns, this - * will nolonger be valid. + * will no longer be valid. */ void hash_destroy(struct hash_table *ht); /** - * Adds a key/value pair to a hash table. If the key you're adding is already - * in the hash table, it does not replace it, but it does take precedent over - * it. The old key/value pair will be inaccessable but still in memory until - * hash_destroy() is called on the hash table. + * Adds a key/value pair to a hash table. + * + * If the key you're adding is already in the hash table, it does not + * replace it, but it does take precedent over it. The old key/value + * pair will be inaccessable but still in memory until hash_destroy() + * is called on the hash table. * * \param ht The hash table context to add the key/value pair to. * \param key The key to associate the value with. A copy is made. @@ -71,4 +78,34 @@ bool hash_add(struct hash_table *ht, const char *key, const char *value); */ const char *hash_get(struct hash_table *ht, const char *key); +/** + * Add key/value pairs to a hash table with data from a file + * + * The file should be formatted as a series of lines terminated with + * newline character. Each line should contain a key/value pair + * separated by a colon. If a line is empty or starts with a # + * character it will be ignored. + * + * The file may be optionally gzip compressed. + * + * \param ht The hash table context to add the key/value pairs to. + * \param path Path to file with key/value pairs in. + * \return NSERROR_OK on success else error code + */ +nserror hash_add_file(struct hash_table *ht, const char *path); + +/** + * Add key/value pairs to a hash table with data from a memory buffer + * + * The data format is the same as in hash_add_file() but held in memory + * + * The data may optionally be gzip compressed. + * + * \param ht The hash table context to add the key/value pairs to. + * \param data Source of key/value pairs + * \param size length of \a data + * \return NSERROR_OK on success else error code + */ +nserror hash_add_inline(struct hash_table *ht, const uint8_t *data, size_t size); + #endif diff --git a/utils/http.h b/utils/http.h index 173604fb4..8da4f3fe3 100644 --- a/utils/http.h +++ b/utils/http.h @@ -27,8 +27,10 @@ #include "utils/errors.h" +#include "utils/http/cache-control.h" #include "utils/http/content-disposition.h" #include "utils/http/content-type.h" +#include "utils/http/strict-transport-security.h" #include "utils/http/www-authenticate.h" #endif diff --git a/utils/http/Makefile b/utils/http/Makefile index 198588bd4..b60f8f60a 100644 --- a/utils/http/Makefile +++ b/utils/http/Makefile @@ -1,6 +1,7 @@ # http utils sources S_HTTP := challenge.c generics.c primitives.c parameter.c \ - content-disposition.c content-type.c www-authenticate.c + cache-control.c content-disposition.c content-type.c \ + strict-transport-security.c www-authenticate.c -S_HTTP := $(addprefix utils/http/,$(S_HTTP))
\ No newline at end of file +S_HTTP := $(addprefix utils/http/,$(S_HTTP)) diff --git a/utils/http/cache-control.c b/utils/http/cache-control.c new file mode 100644 index 000000000..44700821d --- /dev/null +++ b/utils/http/cache-control.c @@ -0,0 +1,353 @@ +/* + * Copyright 2019 John-Mark Bell <jmb@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <limits.h> +#include <stdlib.h> + +#include "utils/corestrings.h" +#include "utils/http.h" + +#include "utils/http/generics.h" +#include "utils/http/primitives.h" + +/** + * Representation of a Cache-Control + */ +struct http_cache_control { + uint32_t max_age; /**< Max age (delta seconds) */ + bool max_age_valid; /**< Whether max-age is valid */ + bool no_cache; /**< Whether caching is forbidden */ + bool no_store; /**< Whether persistent caching is forbidden */ +}; + +/** + * Representation of a directive + */ +typedef struct http_directive { + http__item base; + + lwc_string *name; /**< Parameter name */ + lwc_string *value; /**< Parameter value (optional) */ +} http_directive; + + +static void http_destroy_directive(http_directive *self) +{ + lwc_string_unref(self->name); + if (self->value != NULL) { + lwc_string_unref(self->value); + } + free(self); +} + +static nserror http__parse_directive(const char **input, + http_directive **result) +{ + const char *pos = *input; + lwc_string *name; + lwc_string *value = NULL; + http_directive *directive; + nserror error; + + /* token [ "=" ( token | quoted-string ) ] */ + + error = http__parse_token(&pos, &name); + if (error != NSERROR_OK) + return error; + + http__skip_LWS(&pos); + + if (*pos == '=') { + pos++; + + http__skip_LWS(&pos); + + if (*pos == '"') + error = http__parse_quoted_string(&pos, &value); + else + error = http__parse_token(&pos, &value); + + if (error != NSERROR_OK) { + lwc_string_unref(name); + return error; + } + } + + directive = malloc(sizeof(*directive)); + if (directive == NULL) { + if (value != NULL) { + lwc_string_unref(value); + } + lwc_string_unref(name); + return NSERROR_NOMEM; + } + + HTTP__ITEM_INIT(directive, NULL, http_destroy_directive); + directive->name = name; + directive->value = value; + + *result = directive; + *input = pos; + + return NSERROR_OK; +} + +static void http_directive_list_destroy(http_directive *list) +{ + http__item_list_destroy(list); +} + +static nserror http_directive_list_find_item(const http_directive *list, + lwc_string *name, lwc_string **value) +{ + bool match; + + while (list != NULL) { + if (lwc_string_caseless_isequal(name, list->name, + &match) == lwc_error_ok && match) + break; + + list = (http_directive *) list->base.next; + } + + if (list == NULL) + return NSERROR_NOT_FOUND; + + if (list->value != NULL) { + *value = lwc_string_ref(list->value); + } else { + *value = NULL; + } + + return NSERROR_OK; +} + +static const http_directive *http_directive_list_iterate( + const http_directive *cur, + lwc_string **name, lwc_string **value) +{ + if (cur == NULL) + return NULL; + + *name = lwc_string_ref(cur->name); + if (cur->value != NULL) { + *value = lwc_string_ref(cur->value); + } else { + *value = NULL; + } + + return (http_directive *) cur->base.next; +} + +static uint32_t count(const http_directive *list, lwc_string *key) +{ + uint32_t count = 0; + bool match; + + while (list != NULL) { + if (lwc_string_caseless_isequal(key, list->name, + &match) == lwc_error_ok && match) { + count++; + } + + list = (http_directive *) list->base.next; + } + + return count; +} + +static bool check_duplicates(const http_directive *directives) +{ + bool result = true; + const http_directive *key = directives; + + if (key == NULL) { + /* No directives, so there can't be any duplicates */ + return true; + } + + do { + lwc_string *name = NULL, *value = NULL; + + key = http_directive_list_iterate(key, &name, &value); + + result &= (count(directives, name) == 1); + + lwc_string_unref(name); + if (value != NULL) { + lwc_string_unref(value); + } + } while (key != NULL); + + return result; +} + +static nserror parse_max_age(lwc_string *value, uint32_t *result) +{ + const char *pos = lwc_string_data(value); + const char *end = pos + lwc_string_length(value); + uint32_t val = 0; + + /* 1*DIGIT */ + + if (pos == end) { + /* Blank value */ + return NSERROR_NOT_FOUND; + } + + while (pos < end) { + if ('0' <= *pos && *pos <= '9') { + uint32_t nv = val * 10 + (*pos - '0'); + if (nv < val) { + val = UINT_MAX; + } else { + val = nv; + } + } else { + /* Non-digit */ + return NSERROR_NOT_FOUND; + } + + pos++; + } + + *result = val; + + return NSERROR_OK; +} + +/* See cache-control.h for documentation */ +nserror http_parse_cache_control(const char *header_value, + http_cache_control **result) +{ + const char *pos = header_value; + http_cache_control *cc; + http_directive *first = NULL; + http_directive *directives = NULL; + lwc_string *value_str = NULL; + uint32_t max_age = 0; + bool max_age_valid = false; + bool no_cache = false; + bool no_store = false; + nserror error; + + /* 1#cache-directive */ + + http__skip_LWS(&pos); + + error = http__parse_directive(&pos, &first); + if (error != NSERROR_OK) { + return error; + } + + http__skip_LWS(&pos); + + if (*pos == ',') { + error = http__item_list_parse(&pos, + http__parse_directive, first, &directives); + if (error != NSERROR_OK) { + if (directives != NULL) { + http_directive_list_destroy(directives); + } + return error; + } + } else { + directives = first; + } + + /* Each directive must only appear once */ + if (check_duplicates(directives) == false) { + http_directive_list_destroy(directives); + return NSERROR_NOT_FOUND; + } + + /* Find max-age */ + error = http_directive_list_find_item(directives, + corestring_lwc_max_age, &value_str); + if (error == NSERROR_OK && value_str != NULL) { + error = parse_max_age(value_str, &max_age); + max_age_valid = (error == NSERROR_OK); + lwc_string_unref(value_str); + } + + /* Find no-cache */ + error = http_directive_list_find_item(directives, + corestring_lwc_no_cache, &value_str); + if (error == NSERROR_OK) { + no_cache = true; + if (value_str != NULL) { + lwc_string_unref(value_str); + } + } + + /* Find no-store */ + error = http_directive_list_find_item(directives, + corestring_lwc_no_store, &value_str); + if (error == NSERROR_OK) { + no_store = true; + if (value_str != NULL) { + lwc_string_unref(value_str); + } + } + + http_directive_list_destroy(directives); + + cc = malloc(sizeof(*cc)); + if (cc == NULL) { + return NSERROR_NOMEM; + } + + cc->max_age = max_age; + cc->max_age_valid = max_age_valid; + cc->no_cache = no_cache; + cc->no_store = no_store; + + *result = cc; + + return NSERROR_OK; +} + +/* See cache-control.h for documentation */ +void http_cache_control_destroy(http_cache_control *victim) +{ + free(victim); +} + +/* See cache-control.h for documentation */ +bool http_cache_control_has_max_age(http_cache_control *cc) +{ + return cc->max_age_valid; +} + +/* See cache-control.h for documentation */ +uint32_t http_cache_control_max_age(http_cache_control *cc) +{ + return cc->max_age; +} + +/* See cache-control.h for documentation */ +bool http_cache_control_no_cache(http_cache_control *cc) +{ + return cc->no_cache; +} + +/* See cache-control.h for documentation */ +bool http_cache_control_no_store(http_cache_control *cc) +{ + return cc->no_store; +} diff --git a/utils/http/cache-control.h b/utils/http/cache-control.h new file mode 100644 index 000000000..945cfcec0 --- /dev/null +++ b/utils/http/cache-control.h @@ -0,0 +1,77 @@ +/* + * Copyright 2019 John-Mark Bell <jmb@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef NETSURF_UTILS_HTTP_CACHE_CONTROL_H_ +#define NETSURF_UTILS_HTTP_CACHE_CONTROL_H_ + +#include <libwapcaplet/libwapcaplet.h> + +typedef struct http_cache_control http_cache_control; + +/** + * Parse an HTTP Cache-Control header value + * + * \param header_value Header value to parse + * \param result Pointer to location to receive result + * \return NSERROR_OK on success, + * NSERROR_NOMEM on memory exhaustion, + * appropriate error otherwise + */ +nserror http_parse_cache_control(const char *header_value, + http_cache_control **result); + +/** + * Destroy a cache_control object + * + * \param victim Object to destroy + */ +void http_cache_control_destroy(http_cache_control *victim); + +/** + * Determine if a valid max-age directive is present + * + * \param cc Object to inspect + * \return Whether max-age is valid + */ +bool http_cache_control_has_max_age(http_cache_control *cc); + +/** + * Get the value of a cache control's max-age + * + * \param cc Object to inspect + * \return Max age, in delta-seconds + */ +uint32_t http_cache_control_max_age(http_cache_control *cc); + +/** + * Get the value of a cache control's no-cache flag + * + * \param cc Object to inspect + * \return Whether caching is forbidden + */ +bool http_cache_control_no_cache(http_cache_control *cc); + +/** + * Get the value of a cache control's no-store flag + * + * \param cc Object to inspect + * \return Whether persistent caching is forbidden + */ +bool http_cache_control_no_store(http_cache_control *cc); + +#endif diff --git a/utils/http/challenge.c b/utils/http/challenge.c index 578532e97..9b85fccbc 100644 --- a/utils/http/challenge.c +++ b/utils/http/challenge.c @@ -92,7 +92,7 @@ nserror http__parse_challenge(const char **input, http_challenge **challenge) http__skip_LWS(&pos); if (*pos == ',') { - error = http__item_list_parse(&pos, + error = http__item_list_parse(&pos, http__parse_parameter, first, ¶ms); if (error != NSERROR_OK && error != NSERROR_NOT_FOUND) { lwc_string_unref(scheme); diff --git a/utils/http/content-disposition.c b/utils/http/content-disposition.c index 5d5e94c26..03bd12bd3 100644 --- a/utils/http/content-disposition.c +++ b/utils/http/content-disposition.c @@ -45,7 +45,7 @@ nserror http_parse_content_disposition(const char *header_value, http__skip_LWS(&pos); if (*pos == ';') { - error = http__item_list_parse(&pos, + error = http__item_list_parse(&pos, http__parse_parameter, NULL, ¶ms); if (error != NSERROR_OK && error != NSERROR_NOT_FOUND) { lwc_string_unref(mtype); diff --git a/utils/http/content-type.c b/utils/http/content-type.c index f84da8c8e..d4279f512 100644 --- a/utils/http/content-type.c +++ b/utils/http/content-type.c @@ -68,7 +68,7 @@ nserror http_parse_content_type(const char *header_value, http__skip_LWS(&pos); if (*pos == ';') { - error = http__item_list_parse(&pos, + error = http__item_list_parse(&pos, http__parse_parameter, NULL, ¶ms); if (error != NSERROR_OK && error != NSERROR_NOT_FOUND) { lwc_string_unref(subtype); diff --git a/utils/http/generics.h b/utils/http/generics.h index 8c391c4af..a5af73458 100644 --- a/utils/http/generics.h +++ b/utils/http/generics.h @@ -19,6 +19,8 @@ #ifndef NETSURF_UTILS_HTTP_GENERICS_H_ #define NETSURF_UTILS_HTTP_GENERICS_H_ +#include <stdbool.h> + #include "utils/errors.h" /** diff --git a/utils/http/strict-transport-security.c b/utils/http/strict-transport-security.c new file mode 100644 index 000000000..9de610c73 --- /dev/null +++ b/utils/http/strict-transport-security.c @@ -0,0 +1,341 @@ +/* + * Copyright 2018 John-Mark Bell <jmb@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <limits.h> +#include <stdlib.h> + +#include "utils/corestrings.h" +#include "utils/http.h" + +#include "utils/http/generics.h" +#include "utils/http/primitives.h" + +/** + * Representation of a Strict-Transport-Security + */ +struct http_strict_transport_security { + uint32_t max_age; /**< Max age (delta seconds) */ + bool include_sub_domains; /**< Whether subdomains are included */ +}; + +/** + * Representation of a directive + */ +typedef struct http_directive { + http__item base; + + lwc_string *name; /**< Parameter name */ + lwc_string *value; /**< Parameter value (optional) */ +} http_directive; + + +static void http_destroy_directive(http_directive *self) +{ + lwc_string_unref(self->name); + if (self->value != NULL) { + lwc_string_unref(self->value); + } + free(self); +} + +static nserror http__parse_directive(const char **input, + http_directive **result) +{ + const char *pos = *input; + lwc_string *name; + lwc_string *value = NULL; + http_directive *directive; + nserror error; + + /* token [ "=" ( token | quoted-string ) ] */ + + error = http__parse_token(&pos, &name); + if (error != NSERROR_OK) + return error; + + http__skip_LWS(&pos); + + if (*pos == '=') { + pos++; + + http__skip_LWS(&pos); + + if (*pos == '"') + error = http__parse_quoted_string(&pos, &value); + else + error = http__parse_token(&pos, &value); + + if (error != NSERROR_OK) { + lwc_string_unref(name); + return error; + } + } + + directive = malloc(sizeof(*directive)); + if (directive == NULL) { + if (value != NULL) { + lwc_string_unref(value); + } + lwc_string_unref(name); + return NSERROR_NOMEM; + } + + HTTP__ITEM_INIT(directive, NULL, http_destroy_directive); + directive->name = name; + directive->value = value; + + *result = directive; + *input = pos; + + return NSERROR_OK; +} + +static void http_directive_list_destroy(http_directive *list) +{ + http__item_list_destroy(list); +} + +static nserror http_directive_list_find_item(const http_directive *list, + lwc_string *name, lwc_string **value) +{ + bool match; + + while (list != NULL) { + if (lwc_string_caseless_isequal(name, list->name, + &match) == lwc_error_ok && match) + break; + + list = (http_directive *) list->base.next; + } + + if (list == NULL) + return NSERROR_NOT_FOUND; + + if (list->value != NULL) { + *value = lwc_string_ref(list->value); + } else { + *value = NULL; + } + + return NSERROR_OK; +} + +static const http_directive *http_directive_list_iterate( + const http_directive *cur, + lwc_string **name, lwc_string **value) +{ + if (cur == NULL) + return NULL; + + *name = lwc_string_ref(cur->name); + if (cur->value != NULL) { + *value = lwc_string_ref(cur->value); + } else { + *value = NULL; + } + + return (http_directive *) cur->base.next; +} + +static uint32_t count(const http_directive *list, lwc_string *key) +{ + uint32_t count = 0; + bool match; + + while (list != NULL) { + if (lwc_string_caseless_isequal(key, list->name, + &match) == lwc_error_ok && match) { + count++; + } + + list = (http_directive *) list->base.next; + } + + return count; +} + +static bool check_duplicates(const http_directive *directives) +{ + bool result = true; + const http_directive *key = directives; + + if (key == NULL) { + /* No directives, so there can't be any duplicates */ + return true; + } + + do { + lwc_string *name = NULL, *value = NULL; + + key = http_directive_list_iterate(key, &name, &value); + + result &= (count(directives, name) == 1); + + lwc_string_unref(name); + if (value != NULL) { + lwc_string_unref(value); + } + } while (key != NULL); + + return result; +} + +static nserror parse_max_age(lwc_string *value, uint32_t *result) +{ + const char *pos = lwc_string_data(value); + const char *end = pos + lwc_string_length(value); + uint32_t val = 0; + + /* 1*DIGIT */ + + if (pos == end) { + /* Blank value */ + return NSERROR_NOT_FOUND; + } + + while (pos < end) { + if ('0' <= *pos && *pos <= '9') { + uint32_t nv = val * 10 + (*pos - '0'); + if (nv < val) { + val = UINT_MAX; + } else { + val = nv; + } + } else { + /* Non-digit */ + return NSERROR_NOT_FOUND; + } + + pos++; + } + + *result = val; + + return NSERROR_OK; +} + +/* See strict-transport-security.h for documentation */ +nserror http_parse_strict_transport_security(const char *header_value, + http_strict_transport_security **result) +{ + const char *pos = header_value; + http_strict_transport_security *sts; + http_directive *first = NULL; + http_directive *directives = NULL; + lwc_string *max_age_str = NULL, *isd_str = NULL; + uint32_t max_age; + bool include_sub_domains = false; + nserror error; + + /* directive *( ";" directive ) */ + + http__skip_LWS(&pos); + + error = http__parse_directive(&pos, &first); + if (error != NSERROR_OK) { + return error; + } + + http__skip_LWS(&pos); + + if (*pos == ';') { + error = http__item_list_parse(&pos, + http__parse_directive, first, &directives); + if (error != NSERROR_OK) { + if (directives != NULL) { + http_directive_list_destroy(directives); + } + return error; + } + } else { + directives = first; + } + + /* Each directive must only appear once */ + if (check_duplicates(directives) == false) { + http_directive_list_destroy(directives); + return NSERROR_NOT_FOUND; + } + + /* max-age is required */ + error = http_directive_list_find_item(directives, + corestring_lwc_max_age, &max_age_str); + if (error != NSERROR_OK || max_age_str == NULL) { + http_directive_list_destroy(directives); + return NSERROR_NOT_FOUND; + } + + error = parse_max_age(max_age_str, &max_age); + if (error != NSERROR_OK) { + lwc_string_unref(max_age_str); + http_directive_list_destroy(directives); + return NSERROR_NOT_FOUND; + } + lwc_string_unref(max_age_str); + + /* includeSubDomains is optional and valueless */ + error = http_directive_list_find_item(directives, + corestring_lwc_includesubdomains, &isd_str); + if (error != NSERROR_OK && error != NSERROR_NOT_FOUND) { + http_directive_list_destroy(directives); + return NSERROR_NOT_FOUND; + } else if (error == NSERROR_OK) { + if (isd_str != NULL) { + /* Present, but not valueless: invalid */ + lwc_string_unref(isd_str); + http_directive_list_destroy(directives); + return NSERROR_NOT_FOUND; + } + include_sub_domains = true; + } + http_directive_list_destroy(directives); + + sts = malloc(sizeof(*sts)); + if (sts == NULL) { + return NSERROR_NOMEM; + } + + sts->max_age = max_age; + sts->include_sub_domains = include_sub_domains; + + *result = sts; + + return NSERROR_OK; +} + +/* See strict-transport-security.h for documentation */ +void http_strict_transport_security_destroy( + http_strict_transport_security *victim) +{ + free(victim); +} + +/* See strict-transport-security.h for documentation */ +uint32_t http_strict_transport_security_max_age( + http_strict_transport_security *sts) +{ + return sts->max_age; +} + +/* See strict-transport-security.h for documentation */ +bool http_strict_transport_security_include_subdomains( + http_strict_transport_security *sts) +{ + return sts->include_sub_domains; +} + diff --git a/utils/http/strict-transport-security.h b/utils/http/strict-transport-security.h new file mode 100644 index 000000000..4e52419fc --- /dev/null +++ b/utils/http/strict-transport-security.h @@ -0,0 +1,64 @@ +/* + * Copyright 2018 John-Mark Bell <jmb@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef NETSURF_UTILS_HTTP_STRICT_TRANSPORT_SECURITY_H_ +#define NETSURF_UTILS_HTTP_STRICT_TRANSPORT_SECURITY_H_ + +#include <libwapcaplet/libwapcaplet.h> + +typedef struct http_strict_transport_security http_strict_transport_security; + +/** + * Parse an HTTP Strict-Transport-Security header value + * + * \param header_value Header value to parse + * \param result Pointer to location to receive result + * \return NSERROR_OK on success, + * NSERROR_NOMEM on memory exhaustion, + * appropriate error otherwise + */ +nserror http_parse_strict_transport_security(const char *header_value, + http_strict_transport_security **result); + +/** + * Destroy a strict transport security object + * + * \param victim Object to destroy + */ +void http_strict_transport_security_destroy( + http_strict_transport_security *victim); + +/** + * Get the value of a strict transport security's max-age + * + * \param sts Object to inspect + * \return Max age, in delta-seconds + */ +uint32_t http_strict_transport_security_max_age( + http_strict_transport_security *sts); + +/** + * Get the value of a strict transport security's includeSubDomains flag + * + * \param sts Object to inspect + * \return Whether subdomains should be included + */ +bool http_strict_transport_security_include_subdomains( + http_strict_transport_security *sts); + +#endif diff --git a/utils/idna-derived-props-gen.pl b/utils/idna-derived-props-gen.pl deleted file mode 100644 index 515f62a40..000000000 --- a/utils/idna-derived-props-gen.pl +++ /dev/null @@ -1,96 +0,0 @@ -#!/usr/bin/perl -# -# Copyright 2014 Chris Young <chris@unsatisfactorysoftware.co.uk> -# -# This file is part of NetSurf, http://www.netsurf-browser.org/ -# -# NetSurf is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# NetSurf is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program. If not, see <http://www.gnu.org/licenses/>. - -print <<HEADER; -/* This file is generated by idna-derived-props-gen.pl - * DO NOT EDIT BY HAND - */ -#ifndef _NETSURF_UTILS_IDNA_PROPS_H_ -#define _NETSURF_UTILS_IDNA_PROPS_H_ - -typedef enum idna_property { - IDNA_P_PVALID = 1, - IDNA_P_CONTEXTJ = 2, - IDNA_P_CONTEXTO = 3, - IDNA_P_DISALLOWED = 4, - IDNA_P_UNASSIGNED = 5 -} idna_property; - -typedef enum idna_unicode_jt { - IDNA_UNICODE_JT_U = 0, - IDNA_UNICODE_JT_C = 1, - IDNA_UNICODE_JT_D = 2, - IDNA_UNICODE_JT_R = 3, - IDNA_UNICODE_JT_T = 4, - IDNA_UNICODE_JT_L = 5 -} idna_unicode_jt; - - -typedef struct idna_table { - int32_t start; - int32_t end; - union p { - idna_property property; - idna_unicode_jt jt; - } p; -} idna_table; - -idna_table idna_derived[] = { -HEADER - -open(CSVFILE, "idna-tables-5.2.0-properties.csv"); -$line = <CSVFILE>; # discard header line - -while($line = <CSVFILE>) { - @items = split(/\,/, $line); - @codepoints = split(/-/, $items[0]); - if($#codepoints == 0) { $codepoints[1] = $codepoints[0]; } - print "\t{ 0x" . $codepoints[0] . ", 0x" . $codepoints[1] . ", .p.property = IDNA_P_" . $items[1] . " },\n"; -} - -close(CSVFILE); - -print <<HEADER; - { 0, 0, .p.property = 0} -}; - -idna_table idna_joiningtype[] = { -HEADER - - -open(TXTFILE, "DerivedJoiningType.txt"); - -while($line = <TXTFILE>) { - chop($line); - if(substr($line, 0, 1) eq '#') {next;} - if(length($line) == 0) {next;} - @items = split(/;/, $line); - @codepoints = split(/\./, $items[0]); - if($#codepoints == 0) { $codepoints[2] = $codepoints[0]; } - print "\t{ 0x" . $codepoints[0] . ", 0x" . $codepoints[2] . ", .p.jt = IDNA_UNICODE_JT_" . substr($items[1], 1, 1) . " },\n"; -} - -close(TXTFILE); - -print <<HEADER; - { 0, 0, .p.jt = 0} -}; -#endif -HEADER - - diff --git a/utils/idna-tables-5.2.0-properties.csv b/utils/idna-tables-5.2.0-properties.csv deleted file mode 100644 index a74547bc9..000000000 --- a/utils/idna-tables-5.2.0-properties.csv +++ /dev/null @@ -1,2322 +0,0 @@ -Codepoint,Property,Description
-0000-002C,DISALLOWED,NULL..COMMA
-002D,PVALID,HYPHEN-MINUS
-002E-002F,DISALLOWED,FULL STOP..SOLIDUS
-0030-0039,PVALID,DIGIT ZERO..DIGIT NINE
-003A-0060,DISALLOWED,COLON..GRAVE ACCENT
-0061-007A,PVALID,LATIN SMALL LETTER A..LATIN SMALL LETTER Z
-007B-00B6,DISALLOWED,LEFT CURLY BRACKET..PILCROW SIGN
-00B7,CONTEXTO,MIDDLE DOT
-00B8-00DE,DISALLOWED,CEDILLA..LATIN CAPITAL LETTER THORN
-00DF-00F6,PVALID,LATIN SMALL LETTER SHARP S..LATIN SMALL LETTER O WITH DIAERESIS
-00F7,DISALLOWED,DIVISION SIGN
-00F8-00FF,PVALID,LATIN SMALL LETTER O WITH STROKE..LATIN SMALL LETTER Y WITH DIAERESIS
-0100,DISALLOWED,LATIN CAPITAL LETTER A WITH MACRON
-0101,PVALID,LATIN SMALL LETTER A WITH MACRON
-0102,DISALLOWED,LATIN CAPITAL LETTER A WITH BREVE
-0103,PVALID,LATIN SMALL LETTER A WITH BREVE
-0104,DISALLOWED,LATIN CAPITAL LETTER A WITH OGONEK
-0105,PVALID,LATIN SMALL LETTER A WITH OGONEK
-0106,DISALLOWED,LATIN CAPITAL LETTER C WITH ACUTE
-0107,PVALID,LATIN SMALL LETTER C WITH ACUTE
-0108,DISALLOWED,LATIN CAPITAL LETTER C WITH CIRCUMFLEX
-0109,PVALID,LATIN SMALL LETTER C WITH CIRCUMFLEX
-010A,DISALLOWED,LATIN CAPITAL LETTER C WITH DOT ABOVE
-010B,PVALID,LATIN SMALL LETTER C WITH DOT ABOVE
-010C,DISALLOWED,LATIN CAPITAL LETTER C WITH CARON
-010D,PVALID,LATIN SMALL LETTER C WITH CARON
-010E,DISALLOWED,LATIN CAPITAL LETTER D WITH CARON
-010F,PVALID,LATIN SMALL LETTER D WITH CARON
-0110,DISALLOWED,LATIN CAPITAL LETTER D WITH STROKE
-0111,PVALID,LATIN SMALL LETTER D WITH STROKE
-0112,DISALLOWED,LATIN CAPITAL LETTER E WITH MACRON
-0113,PVALID,LATIN SMALL LETTER E WITH MACRON
-0114,DISALLOWED,LATIN CAPITAL LETTER E WITH BREVE
-0115,PVALID,LATIN SMALL LETTER E WITH BREVE
-0116,DISALLOWED,LATIN CAPITAL LETTER E WITH DOT ABOVE
-0117,PVALID,LATIN SMALL LETTER E WITH DOT ABOVE
-0118,DISALLOWED,LATIN CAPITAL LETTER E WITH OGONEK
-0119,PVALID,LATIN SMALL LETTER E WITH OGONEK
-011A,DISALLOWED,LATIN CAPITAL LETTER E WITH CARON
-011B,PVALID,LATIN SMALL LETTER E WITH CARON
-011C,DISALLOWED,LATIN CAPITAL LETTER G WITH CIRCUMFLEX
-011D,PVALID,LATIN SMALL LETTER G WITH CIRCUMFLEX
-011E,DISALLOWED,LATIN CAPITAL LETTER G WITH BREVE
-011F,PVALID,LATIN SMALL LETTER G WITH BREVE
-0120,DISALLOWED,LATIN CAPITAL LETTER G WITH DOT ABOVE
-0121,PVALID,LATIN SMALL LETTER G WITH DOT ABOVE
-0122,DISALLOWED,LATIN CAPITAL LETTER G WITH CEDILLA
-0123,PVALID,LATIN SMALL LETTER G WITH CEDILLA
-0124,DISALLOWED,LATIN CAPITAL LETTER H WITH CIRCUMFLEX
-0125,PVALID,LATIN SMALL LETTER H WITH CIRCUMFLEX
-0126,DISALLOWED,LATIN CAPITAL LETTER H WITH STROKE
-0127,PVALID,LATIN SMALL LETTER H WITH STROKE
-0128,DISALLOWED,LATIN CAPITAL LETTER I WITH TILDE
-0129,PVALID,LATIN SMALL LETTER I WITH TILDE
-012A,DISALLOWED,LATIN CAPITAL LETTER I WITH MACRON
-012B,PVALID,LATIN SMALL LETTER I WITH MACRON
-012C,DISALLOWED,LATIN CAPITAL LETTER I WITH BREVE
-012D,PVALID,LATIN SMALL LETTER I WITH BREVE
-012E,DISALLOWED,LATIN CAPITAL LETTER I WITH OGONEK
-012F,PVALID,LATIN SMALL LETTER I WITH OGONEK
-0130,DISALLOWED,LATIN CAPITAL LETTER I WITH DOT ABOVE
-0131,PVALID,LATIN SMALL LETTER DOTLESS I
-0132-0134,DISALLOWED,LATIN CAPITAL LIGATURE IJ..LATIN CAPITAL LETTER J WITH CIRCUMFLEX
-0135,PVALID,LATIN SMALL LETTER J WITH CIRCUMFLEX
-0136,DISALLOWED,LATIN CAPITAL LETTER K WITH CEDILLA
-0137-0138,PVALID,LATIN SMALL LETTER K WITH CEDILLA..LATIN SMALL LETTER KRA
-0139,DISALLOWED,LATIN CAPITAL LETTER L WITH ACUTE
-013A,PVALID,LATIN SMALL LETTER L WITH ACUTE
-013B,DISALLOWED,LATIN CAPITAL LETTER L WITH CEDILLA
-013C,PVALID,LATIN SMALL LETTER L WITH CEDILLA
-013D,DISALLOWED,LATIN CAPITAL LETTER L WITH CARON
-013E,PVALID,LATIN SMALL LETTER L WITH CARON
-013F-0141,DISALLOWED,LATIN CAPITAL LETTER L WITH MIDDLE DOT..LATIN CAPITAL LETTER L WITH STROKE
-0142,PVALID,LATIN SMALL LETTER L WITH STROKE
-0143,DISALLOWED,LATIN CAPITAL LETTER N WITH ACUTE
-0144,PVALID,LATIN SMALL LETTER N WITH ACUTE
-0145,DISALLOWED,LATIN CAPITAL LETTER N WITH CEDILLA
-0146,PVALID,LATIN SMALL LETTER N WITH CEDILLA
-0147,DISALLOWED,LATIN CAPITAL LETTER N WITH CARON
-0148,PVALID,LATIN SMALL LETTER N WITH CARON
-0149-014A,DISALLOWED,LATIN SMALL LETTER N PRECEDED BY APOSTROPHE..LATIN CAPITAL LETTER ENG
-014B,PVALID,LATIN SMALL LETTER ENG
-014C,DISALLOWED,LATIN CAPITAL LETTER O WITH MACRON
-014D,PVALID,LATIN SMALL LETTER O WITH MACRON
-014E,DISALLOWED,LATIN CAPITAL LETTER O WITH BREVE
-014F,PVALID,LATIN SMALL LETTER O WITH BREVE
-0150,DISALLOWED,LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
-0151,PVALID,LATIN SMALL LETTER O WITH DOUBLE ACUTE
-0152,DISALLOWED,LATIN CAPITAL LIGATURE OE
-0153,PVALID,LATIN SMALL LIGATURE OE
-0154,DISALLOWED,LATIN CAPITAL LETTER R WITH ACUTE
-0155,PVALID,LATIN SMALL LETTER R WITH ACUTE
-0156,DISALLOWED,LATIN CAPITAL LETTER R WITH CEDILLA
-0157,PVALID,LATIN SMALL LETTER R WITH CEDILLA
-0158,DISALLOWED,LATIN CAPITAL LETTER R WITH CARON
-0159,PVALID,LATIN SMALL LETTER R WITH CARON
-015A,DISALLOWED,LATIN CAPITAL LETTER S WITH ACUTE
-015B,PVALID,LATIN SMALL LETTER S WITH ACUTE
-015C,DISALLOWED,LATIN CAPITAL LETTER S WITH CIRCUMFLEX
-015D,PVALID,LATIN SMALL LETTER S WITH CIRCUMFLEX
-015E,DISALLOWED,LATIN CAPITAL LETTER S WITH CEDILLA
-015F,PVALID,LATIN SMALL LETTER S WITH CEDILLA
-0160,DISALLOWED,LATIN CAPITAL LETTER S WITH CARON
-0161,PVALID,LATIN SMALL LETTER S WITH CARON
-0162,DISALLOWED,LATIN CAPITAL LETTER T WITH CEDILLA
-0163,PVALID,LATIN SMALL LETTER T WITH CEDILLA
-0164,DISALLOWED,LATIN CAPITAL LETTER T WITH CARON
-0165,PVALID,LATIN SMALL LETTER T WITH CARON
-0166,DISALLOWED,LATIN CAPITAL LETTER T WITH STROKE
-0167,PVALID,LATIN SMALL LETTER T WITH STROKE
-0168,DISALLOWED,LATIN CAPITAL LETTER U WITH TILDE
-0169,PVALID,LATIN SMALL LETTER U WITH TILDE
-016A,DISALLOWED,LATIN CAPITAL LETTER U WITH MACRON
-016B,PVALID,LATIN SMALL LETTER U WITH MACRON
-016C,DISALLOWED,LATIN CAPITAL LETTER U WITH BREVE
-016D,PVALID,LATIN SMALL LETTER U WITH BREVE
-016E,DISALLOWED,LATIN CAPITAL LETTER U WITH RING ABOVE
-016F,PVALID,LATIN SMALL LETTER U WITH RING ABOVE
-0170,DISALLOWED,LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
-0171,PVALID,LATIN SMALL LETTER U WITH DOUBLE ACUTE
-0172,DISALLOWED,LATIN CAPITAL LETTER U WITH OGONEK
-0173,PVALID,LATIN SMALL LETTER U WITH OGONEK
-0174,DISALLOWED,LATIN CAPITAL LETTER W WITH CIRCUMFLEX
-0175,PVALID,LATIN SMALL LETTER W WITH CIRCUMFLEX
-0176,DISALLOWED,LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
-0177,PVALID,LATIN SMALL LETTER Y WITH CIRCUMFLEX
-0178-0179,DISALLOWED,LATIN CAPITAL LETTER Y WITH DIAERESIS..LATIN CAPITAL LETTER Z WITH ACUTE
-017A,PVALID,LATIN SMALL LETTER Z WITH ACUTE
-017B,DISALLOWED,LATIN CAPITAL LETTER Z WITH DOT ABOVE
-017C,PVALID,LATIN SMALL LETTER Z WITH DOT ABOVE
-017D,DISALLOWED,LATIN CAPITAL LETTER Z WITH CARON
-017E,PVALID,LATIN SMALL LETTER Z WITH CARON
-017F,DISALLOWED,LATIN SMALL LETTER LONG S
-0180,PVALID,LATIN SMALL LETTER B WITH STROKE
-0181-0182,DISALLOWED,LATIN CAPITAL LETTER B WITH HOOK..LATIN CAPITAL LETTER B WITH TOPBAR
-0183,PVALID,LATIN SMALL LETTER B WITH TOPBAR
-0184,DISALLOWED,LATIN CAPITAL LETTER TONE SIX
-0185,PVALID,LATIN SMALL LETTER TONE SIX
-0186-0187,DISALLOWED,LATIN CAPITAL LETTER OPEN O..LATIN CAPITAL LETTER C WITH HOOK
-0188,PVALID,LATIN SMALL LETTER C WITH HOOK
-0189-018B,DISALLOWED,LATIN CAPITAL LETTER AFRICAN D..LATIN CAPITAL LETTER D WITH TOPBAR
-018C-018D,PVALID,LATIN SMALL LETTER D WITH TOPBAR..LATIN SMALL LETTER TURNED DELTA
-018E-0191,DISALLOWED,LATIN CAPITAL LETTER REVERSED E..LATIN CAPITAL LETTER F WITH HOOK
-0192,PVALID,LATIN SMALL LETTER F WITH HOOK
-0193-0194,DISALLOWED,LATIN CAPITAL LETTER G WITH HOOK..LATIN CAPITAL LETTER GAMMA
-0195,PVALID,LATIN SMALL LETTER HV
-0196-0198,DISALLOWED,LATIN CAPITAL LETTER IOTA..LATIN CAPITAL LETTER K WITH HOOK
-0199-019B,PVALID,LATIN SMALL LETTER K WITH HOOK..LATIN SMALL LETTER LAMBDA WITH STROKE
-019C-019D,DISALLOWED,LATIN CAPITAL LETTER TURNED M..LATIN CAPITAL LETTER N WITH LEFT HOOK
-019E,PVALID,LATIN SMALL LETTER N WITH LONG RIGHT LEG
-019F-01A0,DISALLOWED,LATIN CAPITAL LETTER O WITH MIDDLE TILDE..LATIN CAPITAL LETTER O WITH HORN
-01A1,PVALID,LATIN SMALL LETTER O WITH HORN
-01A2,DISALLOWED,LATIN CAPITAL LETTER OI
-01A3,PVALID,LATIN SMALL LETTER OI
-01A4,DISALLOWED,LATIN CAPITAL LETTER P WITH HOOK
-01A5,PVALID,LATIN SMALL LETTER P WITH HOOK
-01A6-01A7,DISALLOWED,LATIN LETTER YR..LATIN CAPITAL LETTER TONE TWO
-01A8,PVALID,LATIN SMALL LETTER TONE TWO
-01A9,DISALLOWED,LATIN CAPITAL LETTER ESH
-01AA-01AB,PVALID,LATIN LETTER REVERSED ESH LOOP..LATIN SMALL LETTER T WITH PALATAL HOOK
-01AC,DISALLOWED,LATIN CAPITAL LETTER T WITH HOOK
-01AD,PVALID,LATIN SMALL LETTER T WITH HOOK
-01AE-01AF,DISALLOWED,LATIN CAPITAL LETTER T WITH RETROFLEX HOOK..LATIN CAPITAL LETTER U WITH HORN
-01B0,PVALID,LATIN SMALL LETTER U WITH HORN
-01B1-01B3,DISALLOWED,LATIN CAPITAL LETTER UPSILON..LATIN CAPITAL LETTER Y WITH HOOK
-01B4,PVALID,LATIN SMALL LETTER Y WITH HOOK
-01B5,DISALLOWED,LATIN CAPITAL LETTER Z WITH STROKE
-01B6,PVALID,LATIN SMALL LETTER Z WITH STROKE
-01B7-01B8,DISALLOWED,LATIN CAPITAL LETTER EZH..LATIN CAPITAL LETTER EZH REVERSED
-01B9-01BB,PVALID,LATIN SMALL LETTER EZH REVERSED..LATIN LETTER TWO WITH STROKE
-01BC,DISALLOWED,LATIN CAPITAL LETTER TONE FIVE
-01BD-01C3,PVALID,LATIN SMALL LETTER TONE FIVE..LATIN LETTER RETROFLEX CLICK
-01C4-01CD,DISALLOWED,LATIN CAPITAL LETTER DZ WITH CARON..LATIN CAPITAL LETTER A WITH CARON
-01CE,PVALID,LATIN SMALL LETTER A WITH CARON
-01CF,DISALLOWED,LATIN CAPITAL LETTER I WITH CARON
-01D0,PVALID,LATIN SMALL LETTER I WITH CARON
-01D1,DISALLOWED,LATIN CAPITAL LETTER O WITH CARON
-01D2,PVALID,LATIN SMALL LETTER O WITH CARON
-01D3,DISALLOWED,LATIN CAPITAL LETTER U WITH CARON
-01D4,PVALID,LATIN SMALL LETTER U WITH CARON
-01D5,DISALLOWED,LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
-01D6,PVALID,LATIN SMALL LETTER U WITH DIAERESIS AND MACRON
-01D7,DISALLOWED,LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
-01D8,PVALID,LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE
-01D9,DISALLOWED,LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
-01DA,PVALID,LATIN SMALL LETTER U WITH DIAERESIS AND CARON
-01DB,DISALLOWED,LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
-01DC-01DD,PVALID,LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE..LATIN SMALL LETTER TURNED E
-01DE,DISALLOWED,LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
-01DF,PVALID,LATIN SMALL LETTER A WITH DIAERESIS AND MACRON
-01E0,DISALLOWED,LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
-01E1,PVALID,LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON
-01E2,DISALLOWED,LATIN CAPITAL LETTER AE WITH MACRON
-01E3,PVALID,LATIN SMALL LETTER AE WITH MACRON
-01E4,DISALLOWED,LATIN CAPITAL LETTER G WITH STROKE
-01E5,PVALID,LATIN SMALL LETTER G WITH STROKE
-01E6,DISALLOWED,LATIN CAPITAL LETTER G WITH CARON
-01E7,PVALID,LATIN SMALL LETTER G WITH CARON
-01E8,DISALLOWED,LATIN CAPITAL LETTER K WITH CARON
-01E9,PVALID,LATIN SMALL LETTER K WITH CARON
-01EA,DISALLOWED,LATIN CAPITAL LETTER O WITH OGONEK
-01EB,PVALID,LATIN SMALL LETTER O WITH OGONEK
-01EC,DISALLOWED,LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
-01ED,PVALID,LATIN SMALL LETTER O WITH OGONEK AND MACRON
-01EE,DISALLOWED,LATIN CAPITAL LETTER EZH WITH CARON
-01EF-01F0,PVALID,LATIN SMALL LETTER EZH WITH CARON..LATIN SMALL LETTER J WITH CARON
-01F1-01F4,DISALLOWED,LATIN CAPITAL LETTER DZ..LATIN CAPITAL LETTER G WITH ACUTE
-01F5,PVALID,LATIN SMALL LETTER G WITH ACUTE
-01F6-01F8,DISALLOWED,LATIN CAPITAL LETTER HWAIR..LATIN CAPITAL LETTER N WITH GRAVE
-01F9,PVALID,LATIN SMALL LETTER N WITH GRAVE
-01FA,DISALLOWED,LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
-01FB,PVALID,LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE
-01FC,DISALLOWED,LATIN CAPITAL LETTER AE WITH ACUTE
-01FD,PVALID,LATIN SMALL LETTER AE WITH ACUTE
-01FE,DISALLOWED,LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
-01FF,PVALID,LATIN SMALL LETTER O WITH STROKE AND ACUTE
-0200,DISALLOWED,LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
-0201,PVALID,LATIN SMALL LETTER A WITH DOUBLE GRAVE
-0202,DISALLOWED,LATIN CAPITAL LETTER A WITH INVERTED BREVE
-0203,PVALID,LATIN SMALL LETTER A WITH INVERTED BREVE
-0204,DISALLOWED,LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
-0205,PVALID,LATIN SMALL LETTER E WITH DOUBLE GRAVE
-0206,DISALLOWED,LATIN CAPITAL LETTER E WITH INVERTED BREVE
-0207,PVALID,LATIN SMALL LETTER E WITH INVERTED BREVE
-0208,DISALLOWED,LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
-0209,PVALID,LATIN SMALL LETTER I WITH DOUBLE GRAVE
-020A,DISALLOWED,LATIN CAPITAL LETTER I WITH INVERTED BREVE
-020B,PVALID,LATIN SMALL LETTER I WITH INVERTED BREVE
-020C,DISALLOWED,LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
-020D,PVALID,LATIN SMALL LETTER O WITH DOUBLE GRAVE
-020E,DISALLOWED,LATIN CAPITAL LETTER O WITH INVERTED BREVE
-020F,PVALID,LATIN SMALL LETTER O WITH INVERTED BREVE
-0210,DISALLOWED,LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
-0211,PVALID,LATIN SMALL LETTER R WITH DOUBLE GRAVE
-0212,DISALLOWED,LATIN CAPITAL LETTER R WITH INVERTED BREVE
-0213,PVALID,LATIN SMALL LETTER R WITH INVERTED BREVE
-0214,DISALLOWED,LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
-0215,PVALID,LATIN SMALL LETTER U WITH DOUBLE GRAVE
-0216,DISALLOWED,LATIN CAPITAL LETTER U WITH INVERTED BREVE
-0217,PVALID,LATIN SMALL LETTER U WITH INVERTED BREVE
-0218,DISALLOWED,LATIN CAPITAL LETTER S WITH COMMA BELOW
-0219,PVALID,LATIN SMALL LETTER S WITH COMMA BELOW
-021A,DISALLOWED,LATIN CAPITAL LETTER T WITH COMMA BELOW
-021B,PVALID,LATIN SMALL LETTER T WITH COMMA BELOW
-021C,DISALLOWED,LATIN CAPITAL LETTER YOGH
-021D,PVALID,LATIN SMALL LETTER YOGH
-021E,DISALLOWED,LATIN CAPITAL LETTER H WITH CARON
-021F,PVALID,LATIN SMALL LETTER H WITH CARON
-0220,DISALLOWED,LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
-0221,PVALID,LATIN SMALL LETTER D WITH CURL
-0222,DISALLOWED,LATIN CAPITAL LETTER OU
-0223,PVALID,LATIN SMALL LETTER OU
-0224,DISALLOWED,LATIN CAPITAL LETTER Z WITH HOOK
-0225,PVALID,LATIN SMALL LETTER Z WITH HOOK
-0226,DISALLOWED,LATIN CAPITAL LETTER A WITH DOT ABOVE
-0227,PVALID,LATIN SMALL LETTER A WITH DOT ABOVE
-0228,DISALLOWED,LATIN CAPITAL LETTER E WITH CEDILLA
-0229,PVALID,LATIN SMALL LETTER E WITH CEDILLA
-022A,DISALLOWED,LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
-022B,PVALID,LATIN SMALL LETTER O WITH DIAERESIS AND MACRON
-022C,DISALLOWED,LATIN CAPITAL LETTER O WITH TILDE AND MACRON
-022D,PVALID,LATIN SMALL LETTER O WITH TILDE AND MACRON
-022E,DISALLOWED,LATIN CAPITAL LETTER O WITH DOT ABOVE
-022F,PVALID,LATIN SMALL LETTER O WITH DOT ABOVE
-0230,DISALLOWED,LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
-0231,PVALID,LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON
-0232,DISALLOWED,LATIN CAPITAL LETTER Y WITH MACRON
-0233-0239,PVALID,LATIN SMALL LETTER Y WITH MACRON..LATIN SMALL LETTER QP DIGRAPH
-023A-023B,DISALLOWED,LATIN CAPITAL LETTER A WITH STROKE..LATIN CAPITAL LETTER C WITH STROKE
-023C,PVALID,LATIN SMALL LETTER C WITH STROKE
-023D-023E,DISALLOWED,LATIN CAPITAL LETTER L WITH BAR..LATIN CAPITAL LETTER T WITH DIAGONAL STROKE
-023F-0240,PVALID,LATIN SMALL LETTER S WITH SWASH TAIL..LATIN SMALL LETTER Z WITH SWASH TAIL
-0241,DISALLOWED,LATIN CAPITAL LETTER GLOTTAL STOP
-0242,PVALID,LATIN SMALL LETTER GLOTTAL STOP
-0243-0246,DISALLOWED,LATIN CAPITAL LETTER B WITH STROKE..LATIN CAPITAL LETTER E WITH STROKE
-0247,PVALID,LATIN SMALL LETTER E WITH STROKE
-0248,DISALLOWED,LATIN CAPITAL LETTER J WITH STROKE
-0249,PVALID,LATIN SMALL LETTER J WITH STROKE
-024A,DISALLOWED,LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL
-024B,PVALID,LATIN SMALL LETTER Q WITH HOOK TAIL
-024C,DISALLOWED,LATIN CAPITAL LETTER R WITH STROKE
-024D,PVALID,LATIN SMALL LETTER R WITH STROKE
-024E,DISALLOWED,LATIN CAPITAL LETTER Y WITH STROKE
-024F-02AF,PVALID,LATIN SMALL LETTER Y WITH STROKE..LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL
-02B0-02B8,DISALLOWED,MODIFIER LETTER SMALL H..MODIFIER LETTER SMALL Y
-02B9-02C1,PVALID,MODIFIER LETTER PRIME..MODIFIER LETTER REVERSED GLOTTAL STOP
-02C2-02C5,DISALLOWED,MODIFIER LETTER LEFT ARROWHEAD..MODIFIER LETTER DOWN ARROWHEAD
-02C6-02D1,PVALID,MODIFIER LETTER CIRCUMFLEX ACCENT..MODIFIER LETTER HALF TRIANGULAR COLON
-02D2-02EB,DISALLOWED,MODIFIER LETTER CENTRED RIGHT HALF RING..MODIFIER LETTER YANG DEPARTING TONE MARK
-02EC,PVALID,MODIFIER LETTER VOICING
-02ED,DISALLOWED,MODIFIER LETTER UNASPIRATED
-02EE,PVALID,MODIFIER LETTER DOUBLE APOSTROPHE
-02EF-02FF,DISALLOWED,MODIFIER LETTER LOW DOWN ARROWHEAD..MODIFIER LETTER LOW LEFT ARROW
-0300-033F,PVALID,COMBINING GRAVE ACCENT..COMBINING DOUBLE OVERLINE
-0340-0341,DISALLOWED,COMBINING GRAVE TONE MARK..COMBINING ACUTE TONE MARK
-0342,PVALID,COMBINING GREEK PERISPOMENI
-0343-0345,DISALLOWED,COMBINING GREEK KORONIS..COMBINING GREEK YPOGEGRAMMENI
-0346-034E,PVALID,COMBINING BRIDGE ABOVE..COMBINING UPWARDS ARROW BELOW
-034F,DISALLOWED,COMBINING GRAPHEME JOINER
-0350-036F,PVALID,COMBINING RIGHT ARROWHEAD ABOVE..COMBINING LATIN SMALL LETTER X
-0370,DISALLOWED,GREEK CAPITAL LETTER HETA
-0371,PVALID,GREEK SMALL LETTER HETA
-0372,DISALLOWED,GREEK CAPITAL LETTER ARCHAIC SAMPI
-0373,PVALID,GREEK SMALL LETTER ARCHAIC SAMPI
-0374,DISALLOWED,GREEK NUMERAL SIGN
-0375,CONTEXTO,GREEK LOWER NUMERAL SIGN
-0376,DISALLOWED,GREEK CAPITAL LETTER PAMPHYLIAN DIGAMMA
-0377,PVALID,GREEK SMALL LETTER PAMPHYLIAN DIGAMMA
-0378-0379,UNASSIGNED,<RESERVED>..<RESERVED>
-037A,DISALLOWED,GREEK YPOGEGRAMMENI
-037B-037D,PVALID,GREEK SMALL REVERSED LUNATE SIGMA SYMBOL..GREEK SMALL REVERSED DOTTED LUNATE SIGMA SYMBOL
-037E,DISALLOWED,GREEK QUESTION MARK
-037F-0383,UNASSIGNED,<RESERVED>..<RESERVED>
-0384-038A,DISALLOWED,GREEK TONOS..GREEK CAPITAL LETTER IOTA WITH TONOS
-038B,UNASSIGNED,<RESERVED>
-038C,DISALLOWED,GREEK CAPITAL LETTER OMICRON WITH TONOS
-038D,UNASSIGNED,<RESERVED>
-038E-038F,DISALLOWED,GREEK CAPITAL LETTER UPSILON WITH TONOS..GREEK CAPITAL LETTER OMEGA WITH TONOS
-0390,PVALID,GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
-0391-03A1,DISALLOWED,GREEK CAPITAL LETTER ALPHA..GREEK CAPITAL LETTER RHO
-03A2,UNASSIGNED,<RESERVED>
-03A3-03AB,DISALLOWED,GREEK CAPITAL LETTER SIGMA..GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
-03AC-03CE,PVALID,GREEK SMALL LETTER ALPHA WITH TONOS..GREEK SMALL LETTER OMEGA WITH TONOS
-03CF-03D6,DISALLOWED,GREEK CAPITAL KAI SYMBOL..GREEK PI SYMBOL
-03D7,PVALID,GREEK KAI SYMBOL
-03D8,DISALLOWED,GREEK LETTER ARCHAIC KOPPA
-03D9,PVALID,GREEK SMALL LETTER ARCHAIC KOPPA
-03DA,DISALLOWED,GREEK LETTER STIGMA
-03DB,PVALID,GREEK SMALL LETTER STIGMA
-03DC,DISALLOWED,GREEK LETTER DIGAMMA
-03DD,PVALID,GREEK SMALL LETTER DIGAMMA
-03DE,DISALLOWED,GREEK LETTER KOPPA
-03DF,PVALID,GREEK SMALL LETTER KOPPA
-03E0,DISALLOWED,GREEK LETTER SAMPI
-03E1,PVALID,GREEK SMALL LETTER SAMPI
-03E2,DISALLOWED,COPTIC CAPITAL LETTER SHEI
-03E3,PVALID,COPTIC SMALL LETTER SHEI
-03E4,DISALLOWED,COPTIC CAPITAL LETTER FEI
-03E5,PVALID,COPTIC SMALL LETTER FEI
-03E6,DISALLOWED,COPTIC CAPITAL LETTER KHEI
-03E7,PVALID,COPTIC SMALL LETTER KHEI
-03E8,DISALLOWED,COPTIC CAPITAL LETTER HORI
-03E9,PVALID,COPTIC SMALL LETTER HORI
-03EA,DISALLOWED,COPTIC CAPITAL LETTER GANGIA
-03EB,PVALID,COPTIC SMALL LETTER GANGIA
-03EC,DISALLOWED,COPTIC CAPITAL LETTER SHIMA
-03ED,PVALID,COPTIC SMALL LETTER SHIMA
-03EE,DISALLOWED,COPTIC CAPITAL LETTER DEI
-03EF,PVALID,COPTIC SMALL LETTER DEI
-03F0-03F2,DISALLOWED,GREEK KAPPA SYMBOL..GREEK LUNATE SIGMA SYMBOL
-03F3,PVALID,GREEK LETTER YOT
-03F4-03F7,DISALLOWED,GREEK CAPITAL THETA SYMBOL..GREEK CAPITAL LETTER SHO
-03F8,PVALID,GREEK SMALL LETTER SHO
-03F9-03FA,DISALLOWED,GREEK CAPITAL LUNATE SIGMA SYMBOL..GREEK CAPITAL LETTER SAN
-03FB-03FC,PVALID,GREEK SMALL LETTER SAN..GREEK RHO WITH STROKE SYMBOL
-03FD-042F,DISALLOWED,GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL..CYRILLIC CAPITAL LETTER YA
-0430-045F,PVALID,CYRILLIC SMALL LETTER A..CYRILLIC SMALL LETTER DZHE
-0460,DISALLOWED,CYRILLIC CAPITAL LETTER OMEGA
-0461,PVALID,CYRILLIC SMALL LETTER OMEGA
-0462,DISALLOWED,CYRILLIC CAPITAL LETTER YAT
-0463,PVALID,CYRILLIC SMALL LETTER YAT
-0464,DISALLOWED,CYRILLIC CAPITAL LETTER IOTIFIED E
-0465,PVALID,CYRILLIC SMALL LETTER IOTIFIED E
-0466,DISALLOWED,CYRILLIC CAPITAL LETTER LITTLE YUS
-0467,PVALID,CYRILLIC SMALL LETTER LITTLE YUS
-0468,DISALLOWED,CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
-0469,PVALID,CYRILLIC SMALL LETTER IOTIFIED LITTLE YUS
-046A,DISALLOWED,CYRILLIC CAPITAL LETTER BIG YUS
-046B,PVALID,CYRILLIC SMALL LETTER BIG YUS
-046C,DISALLOWED,CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
-046D,PVALID,CYRILLIC SMALL LETTER IOTIFIED BIG YUS
-046E,DISALLOWED,CYRILLIC CAPITAL LETTER KSI
-046F,PVALID,CYRILLIC SMALL LETTER KSI
-0470,DISALLOWED,CYRILLIC CAPITAL LETTER PSI
-0471,PVALID,CYRILLIC SMALL LETTER PSI
-0472,DISALLOWED,CYRILLIC CAPITAL LETTER FITA
-0473,PVALID,CYRILLIC SMALL LETTER FITA
-0474,DISALLOWED,CYRILLIC CAPITAL LETTER IZHITSA
-0475,PVALID,CYRILLIC SMALL LETTER IZHITSA
-0476,DISALLOWED,CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
-0477,PVALID,CYRILLIC SMALL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
-0478,DISALLOWED,CYRILLIC CAPITAL LETTER UK
-0479,PVALID,CYRILLIC SMALL LETTER UK
-047A,DISALLOWED,CYRILLIC CAPITAL LETTER ROUND OMEGA
-047B,PVALID,CYRILLIC SMALL LETTER ROUND OMEGA
-047C,DISALLOWED,CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
-047D,PVALID,CYRILLIC SMALL LETTER OMEGA WITH TITLO
-047E,DISALLOWED,CYRILLIC CAPITAL LETTER OT
-047F,PVALID,CYRILLIC SMALL LETTER OT
-0480,DISALLOWED,CYRILLIC CAPITAL LETTER KOPPA
-0481,PVALID,CYRILLIC SMALL LETTER KOPPA
-0482,DISALLOWED,CYRILLIC THOUSANDS SIGN
-0483-0487,PVALID,COMBINING CYRILLIC TITLO..COMBINING CYRILLIC POKRYTIE
-0488-048A,DISALLOWED,COMBINING CYRILLIC HUNDRED THOUSANDS SIGN..CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
-048B,PVALID,CYRILLIC SMALL LETTER SHORT I WITH TAIL
-048C,DISALLOWED,CYRILLIC CAPITAL LETTER SEMISOFT SIGN
-048D,PVALID,CYRILLIC SMALL LETTER SEMISOFT SIGN
-048E,DISALLOWED,CYRILLIC CAPITAL LETTER ER WITH TICK
-048F,PVALID,CYRILLIC SMALL LETTER ER WITH TICK
-0490,DISALLOWED,CYRILLIC CAPITAL LETTER GHE WITH UPTURN
-0491,PVALID,CYRILLIC SMALL LETTER GHE WITH UPTURN
-0492,DISALLOWED,CYRILLIC CAPITAL LETTER GHE WITH STROKE
-0493,PVALID,CYRILLIC SMALL LETTER GHE WITH STROKE
-0494,DISALLOWED,CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
-0495,PVALID,CYRILLIC SMALL LETTER GHE WITH MIDDLE HOOK
-0496,DISALLOWED,CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
-0497,PVALID,CYRILLIC SMALL LETTER ZHE WITH DESCENDER
-0498,DISALLOWED,CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
-0499,PVALID,CYRILLIC SMALL LETTER ZE WITH DESCENDER
-049A,DISALLOWED,CYRILLIC CAPITAL LETTER KA WITH DESCENDER
-049B,PVALID,CYRILLIC SMALL LETTER KA WITH DESCENDER
-049C,DISALLOWED,CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
-049D,PVALID,CYRILLIC SMALL LETTER KA WITH VERTICAL STROKE
-049E,DISALLOWED,CYRILLIC CAPITAL LETTER KA WITH STROKE
-049F,PVALID,CYRILLIC SMALL LETTER KA WITH STROKE
-04A0,DISALLOWED,CYRILLIC CAPITAL LETTER BASHKIR KA
-04A1,PVALID,CYRILLIC SMALL LETTER BASHKIR KA
-04A2,DISALLOWED,CYRILLIC CAPITAL LETTER EN WITH DESCENDER
-04A3,PVALID,CYRILLIC SMALL LETTER EN WITH DESCENDER
-04A4,DISALLOWED,CYRILLIC CAPITAL LIGATURE EN GHE
-04A5,PVALID,CYRILLIC SMALL LIGATURE EN GHE
-04A6,DISALLOWED,CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
-04A7,PVALID,CYRILLIC SMALL LETTER PE WITH MIDDLE HOOK
-04A8,DISALLOWED,CYRILLIC CAPITAL LETTER ABKHASIAN HA
-04A9,PVALID,CYRILLIC SMALL LETTER ABKHASIAN HA
-04AA,DISALLOWED,CYRILLIC CAPITAL LETTER ES WITH DESCENDER
-04AB,PVALID,CYRILLIC SMALL LETTER ES WITH DESCENDER
-04AC,DISALLOWED,CYRILLIC CAPITAL LETTER TE WITH DESCENDER
-04AD,PVALID,CYRILLIC SMALL LETTER TE WITH DESCENDER
-04AE,DISALLOWED,CYRILLIC CAPITAL LETTER STRAIGHT U
-04AF,PVALID,CYRILLIC SMALL LETTER STRAIGHT U
-04B0,DISALLOWED,CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
-04B1,PVALID,CYRILLIC SMALL LETTER STRAIGHT U WITH STROKE
-04B2,DISALLOWED,CYRILLIC CAPITAL LETTER HA WITH DESCENDER
-04B3,PVALID,CYRILLIC SMALL LETTER HA WITH DESCENDER
-04B4,DISALLOWED,CYRILLIC CAPITAL LIGATURE TE TSE
-04B5,PVALID,CYRILLIC SMALL LIGATURE TE TSE
-04B6,DISALLOWED,CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
-04B7,PVALID,CYRILLIC SMALL LETTER CHE WITH DESCENDER
-04B8,DISALLOWED,CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
-04B9,PVALID,CYRILLIC SMALL LETTER CHE WITH VERTICAL STROKE
-04BA,DISALLOWED,CYRILLIC CAPITAL LETTER SHHA
-04BB,PVALID,CYRILLIC SMALL LETTER SHHA
-04BC,DISALLOWED,CYRILLIC CAPITAL LETTER ABKHASIAN CHE
-04BD,PVALID,CYRILLIC SMALL LETTER ABKHASIAN CHE
-04BE,DISALLOWED,CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
-04BF,PVALID,CYRILLIC SMALL LETTER ABKHASIAN CHE WITH DESCENDER
-04C0-04C1,DISALLOWED,CYRILLIC LETTER PALOCHKA..CYRILLIC CAPITAL LETTER ZHE WITH BREVE
-04C2,PVALID,CYRILLIC SMALL LETTER ZHE WITH BREVE
-04C3,DISALLOWED,CYRILLIC CAPITAL LETTER KA WITH HOOK
-04C4,PVALID,CYRILLIC SMALL LETTER KA WITH HOOK
-04C5,DISALLOWED,CYRILLIC CAPITAL LETTER EL WITH TAIL
-04C6,PVALID,CYRILLIC SMALL LETTER EL WITH TAIL
-04C7,DISALLOWED,CYRILLIC CAPITAL LETTER EN WITH HOOK
-04C8,PVALID,CYRILLIC SMALL LETTER EN WITH HOOK
-04C9,DISALLOWED,CYRILLIC CAPITAL LETTER EN WITH TAIL
-04CA,PVALID,CYRILLIC SMALL LETTER EN WITH TAIL
-04CB,DISALLOWED,CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
-04CC,PVALID,CYRILLIC SMALL LETTER KHAKASSIAN CHE
-04CD,DISALLOWED,CYRILLIC CAPITAL LETTER EM WITH TAIL
-04CE-04CF,PVALID,CYRILLIC SMALL LETTER EM WITH TAIL..CYRILLIC SMALL LETTER PALOCHKA
-04D0,DISALLOWED,CYRILLIC CAPITAL LETTER A WITH BREVE
-04D1,PVALID,CYRILLIC SMALL LETTER A WITH BREVE
-04D2,DISALLOWED,CYRILLIC CAPITAL LETTER A WITH DIAERESIS
-04D3,PVALID,CYRILLIC SMALL LETTER A WITH DIAERESIS
-04D4,DISALLOWED,CYRILLIC CAPITAL LIGATURE A IE
-04D5,PVALID,CYRILLIC SMALL LIGATURE A IE
-04D6,DISALLOWED,CYRILLIC CAPITAL LETTER IE WITH BREVE
-04D7,PVALID,CYRILLIC SMALL LETTER IE WITH BREVE
-04D8,DISALLOWED,CYRILLIC CAPITAL LETTER SCHWA
-04D9,PVALID,CYRILLIC SMALL LETTER SCHWA
-04DA,DISALLOWED,CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
-04DB,PVALID,CYRILLIC SMALL LETTER SCHWA WITH DIAERESIS
-04DC,DISALLOWED,CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
-04DD,PVALID,CYRILLIC SMALL LETTER ZHE WITH DIAERESIS
-04DE,DISALLOWED,CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
-04DF,PVALID,CYRILLIC SMALL LETTER ZE WITH DIAERESIS
-04E0,DISALLOWED,CYRILLIC CAPITAL LETTER ABKHASIAN DZE
-04E1,PVALID,CYRILLIC SMALL LETTER ABKHASIAN DZE
-04E2,DISALLOWED,CYRILLIC CAPITAL LETTER I WITH MACRON
-04E3,PVALID,CYRILLIC SMALL LETTER I WITH MACRON
-04E4,DISALLOWED,CYRILLIC CAPITAL LETTER I WITH DIAERESIS
-04E5,PVALID,CYRILLIC SMALL LETTER I WITH DIAERESIS
-04E6,DISALLOWED,CYRILLIC CAPITAL LETTER O WITH DIAERESIS
-04E7,PVALID,CYRILLIC SMALL LETTER O WITH DIAERESIS
-04E8,DISALLOWED,CYRILLIC CAPITAL LETTER BARRED O
-04E9,PVALID,CYRILLIC SMALL LETTER BARRED O
-04EA,DISALLOWED,CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
-04EB,PVALID,CYRILLIC SMALL LETTER BARRED O WITH DIAERESIS
-04EC,DISALLOWED,CYRILLIC CAPITAL LETTER E WITH DIAERESIS
-04ED,PVALID,CYRILLIC SMALL LETTER E WITH DIAERESIS
-04EE,DISALLOWED,CYRILLIC CAPITAL LETTER U WITH MACRON
-04EF,PVALID,CYRILLIC SMALL LETTER U WITH MACRON
-04F0,DISALLOWED,CYRILLIC CAPITAL LETTER U WITH DIAERESIS
-04F1,PVALID,CYRILLIC SMALL LETTER U WITH DIAERESIS
-04F2,DISALLOWED,CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
-04F3,PVALID,CYRILLIC SMALL LETTER U WITH DOUBLE ACUTE
-04F4,DISALLOWED,CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
-04F5,PVALID,CYRILLIC SMALL LETTER CHE WITH DIAERESIS
-04F6,DISALLOWED,CYRILLIC CAPITAL LETTER GHE WITH DESCENDER
-04F7,PVALID,CYRILLIC SMALL LETTER GHE WITH DESCENDER
-04F8,DISALLOWED,CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
-04F9,PVALID,CYRILLIC SMALL LETTER YERU WITH DIAERESIS
-04FA,DISALLOWED,CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK
-04FB,PVALID,CYRILLIC SMALL LETTER GHE WITH STROKE AND HOOK
-04FC,DISALLOWED,CYRILLIC CAPITAL LETTER HA WITH HOOK
-04FD,PVALID,CYRILLIC SMALL LETTER HA WITH HOOK
-04FE,DISALLOWED,CYRILLIC CAPITAL LETTER HA WITH STROKE
-04FF,PVALID,CYRILLIC SMALL LETTER HA WITH STROKE
-0500,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI DE
-0501,PVALID,CYRILLIC SMALL LETTER KOMI DE
-0502,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI DJE
-0503,PVALID,CYRILLIC SMALL LETTER KOMI DJE
-0504,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI ZJE
-0505,PVALID,CYRILLIC SMALL LETTER KOMI ZJE
-0506,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI DZJE
-0507,PVALID,CYRILLIC SMALL LETTER KOMI DZJE
-0508,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI LJE
-0509,PVALID,CYRILLIC SMALL LETTER KOMI LJE
-050A,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI NJE
-050B,PVALID,CYRILLIC SMALL LETTER KOMI NJE
-050C,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI SJE
-050D,PVALID,CYRILLIC SMALL LETTER KOMI SJE
-050E,DISALLOWED,CYRILLIC CAPITAL LETTER KOMI TJE
-050F,PVALID,CYRILLIC SMALL LETTER KOMI TJE
-0510,DISALLOWED,CYRILLIC CAPITAL LETTER REVERSED ZE
-0511,PVALID,CYRILLIC SMALL LETTER REVERSED ZE
-0512,DISALLOWED,CYRILLIC CAPITAL LETTER EL WITH HOOK
-0513,PVALID,CYRILLIC SMALL LETTER EL WITH HOOK
-0514,DISALLOWED,CYRILLIC CAPITAL LETTER LHA
-0515,PVALID,CYRILLIC SMALL LETTER LHA
-0516,DISALLOWED,CYRILLIC CAPITAL LETTER RHA
-0517,PVALID,CYRILLIC SMALL LETTER RHA
-0518,DISALLOWED,CYRILLIC CAPITAL LETTER YAE
-0519,PVALID,CYRILLIC SMALL LETTER YAE
-051A,DISALLOWED,CYRILLIC CAPITAL LETTER QA
-051B,PVALID,CYRILLIC SMALL LETTER QA
-051C,DISALLOWED,CYRILLIC CAPITAL LETTER WE
-051D,PVALID,CYRILLIC SMALL LETTER WE
-051E,DISALLOWED,CYRILLIC CAPITAL LETTER ALEUT KA
-051F,PVALID,CYRILLIC SMALL LETTER ALEUT KA
-0520,DISALLOWED,CYRILLIC CAPITAL LETTER EL WITH MIDDLE HOOK
-0521,PVALID,CYRILLIC SMALL LETTER EL WITH MIDDLE HOOK
-0522,DISALLOWED,CYRILLIC CAPITAL LETTER EN WITH MIDDLE HOOK
-0523,PVALID,CYRILLIC SMALL LETTER EN WITH MIDDLE HOOK
-0524,DISALLOWED,CYRILLIC CAPITAL LETTER PE WITH DESCENDER
-0525,PVALID,CYRILLIC SMALL LETTER PE WITH DESCENDER
-0526-0530,UNASSIGNED,<RESERVED>..<RESERVED>
-0531-0556,DISALLOWED,ARMENIAN CAPITAL LETTER AYB..ARMENIAN CAPITAL LETTER FEH
-0557-0558,UNASSIGNED,<RESERVED>..<RESERVED>
-0559,PVALID,ARMENIAN MODIFIER LETTER LEFT HALF RING
-055A-055F,DISALLOWED,ARMENIAN APOSTROPHE..ARMENIAN ABBREVIATION MARK
-0560,UNASSIGNED,<RESERVED>
-0561-0586,PVALID,ARMENIAN SMALL LETTER AYB..ARMENIAN SMALL LETTER FEH
-0587,DISALLOWED,ARMENIAN SMALL LIGATURE ECH YIWN
-0588,UNASSIGNED,<RESERVED>
-0589-058A,DISALLOWED,ARMENIAN FULL STOP..ARMENIAN HYPHEN
-058B-0590,UNASSIGNED,<RESERVED>..<RESERVED>
-0591-05BD,PVALID,HEBREW ACCENT ETNAHTA..HEBREW POINT METEG
-05BE,DISALLOWED,HEBREW PUNCTUATION MAQAF
-05BF,PVALID,HEBREW POINT RAFE
-05C0,DISALLOWED,HEBREW PUNCTUATION PASEQ
-05C1-05C2,PVALID,HEBREW POINT SHIN DOT..HEBREW POINT SIN DOT
-05C3,DISALLOWED,HEBREW PUNCTUATION SOF PASUQ
-05C4-05C5,PVALID,HEBREW MARK UPPER DOT..HEBREW MARK LOWER DOT
-05C6,DISALLOWED,HEBREW PUNCTUATION NUN HAFUKHA
-05C7,PVALID,HEBREW POINT QAMATS QATAN
-05C8-05CF,UNASSIGNED,<RESERVED>..<RESERVED>
-05D0-05EA,PVALID,HEBREW LETTER ALEF..HEBREW LETTER TAV
-05EB-05EF,UNASSIGNED,<RESERVED>..<RESERVED>
-05F0-05F2,PVALID,HEBREW LIGATURE YIDDISH DOUBLE VAV..HEBREW LIGATURE YIDDISH DOUBLE YOD
-05F3-05F4,CONTEXTO,HEBREW PUNCTUATION GERESH..HEBREW PUNCTUATION GERSHAYIM
-05F5-05FF,UNASSIGNED,<RESERVED>..<RESERVED>
-0600-0603,DISALLOWED,ARABIC NUMBER SIGN..ARABIC SIGN SAFHA
-0604-0605,UNASSIGNED,<RESERVED>..<RESERVED>
-0606-060F,DISALLOWED,ARABIC-INDIC CUBE ROOT..ARABIC SIGN MISRA
-0610-061A,PVALID,ARABIC SIGN SALLALLAHOU ALAYHE WASSALLAM..ARABIC SMALL KASRA
-061B,DISALLOWED,ARABIC SEMICOLON
-061C-061D,UNASSIGNED,<RESERVED>..<RESERVED>
-061E-061F,DISALLOWED,ARABIC TRIPLE DOT PUNCTUATION MARK..ARABIC QUESTION MARK
-0620,UNASSIGNED,<RESERVED>
-0621-063F,PVALID,ARABIC LETTER HAMZA..ARABIC LETTER FARSI YEH WITH THREE DOTS ABOVE
-0640,DISALLOWED,ARABIC TATWEEL
-0641-065E,PVALID,ARABIC LETTER FEH..ARABIC FATHA WITH TWO DOTS
-065F,UNASSIGNED,<RESERVED>
-0660-0669,CONTEXTO,ARABIC-INDIC DIGIT ZERO..ARABIC-INDIC DIGIT NINE
-066A-066D,DISALLOWED,ARABIC PERCENT SIGN..ARABIC FIVE POINTED STAR
-066E-0674,PVALID,ARABIC LETTER DOTLESS BEH..ARABIC LETTER HIGH HAMZA
-0675-0678,DISALLOWED,ARABIC LETTER HIGH HAMZA ALEF..ARABIC LETTER HIGH HAMZA YEH
-0679-06D3,PVALID,ARABIC LETTER TTEH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
-06D4,DISALLOWED,ARABIC FULL STOP
-06D5-06DC,PVALID,ARABIC LETTER AE..ARABIC SMALL HIGH SEEN
-06DD-06DE,DISALLOWED,ARABIC END OF AYAH..ARABIC START OF RUB EL HIZB
-06DF-06E8,PVALID,ARABIC SMALL HIGH ROUNDED ZERO..ARABIC SMALL HIGH NOON
-06E9,DISALLOWED,ARABIC PLACE OF SAJDAH
-06EA-06EF,PVALID,ARABIC EMPTY CENTRE LOW STOP..ARABIC LETTER REH WITH INVERTED V
-06F0-06F9,CONTEXTO,EXTENDED ARABIC-INDIC DIGIT ZERO..EXTENDED ARABIC-INDIC DIGIT NINE
-06FA-06FF,PVALID,ARABIC LETTER SHEEN WITH DOT BELOW..ARABIC LETTER HEH WITH INVERTED V
-0700-070D,DISALLOWED,SYRIAC END OF PARAGRAPH..SYRIAC HARKLEAN ASTERISCUS
-070E,UNASSIGNED,<RESERVED>
-070F,DISALLOWED,SYRIAC ABBREVIATION MARK
-0710-074A,PVALID,SYRIAC LETTER ALAPH..SYRIAC BARREKH
-074B-074C,UNASSIGNED,<RESERVED>..<RESERVED>
-074D-07B1,PVALID,SYRIAC LETTER SOGDIAN ZHAIN..THAANA LETTER NAA
-07B2-07BF,UNASSIGNED,<RESERVED>..<RESERVED>
-07C0-07F5,PVALID,NKO DIGIT ZERO..NKO LOW TONE APOSTROPHE
-07F6-07FA,DISALLOWED,NKO SYMBOL OO DENNEN..NKO LAJANYALAN
-07FB-07FF,UNASSIGNED,<RESERVED>..<RESERVED>
-0800-082D,PVALID,SAMARITAN LETTER ALAF..SAMARITAN MARK NEQUDAA
-082E-082F,UNASSIGNED,<RESERVED>..<RESERVED>
-0830-083E,DISALLOWED,SAMARITAN PUNCTUATION NEQUDAA..SAMARITAN PUNCTUATION ANNAAU
-083F-08FF,UNASSIGNED,<RESERVED>..<RESERVED>
-0900-0939,PVALID,DEVANAGARI SIGN INVERTED CANDRABINDU..DEVANAGARI LETTER HA
-093A-093B,UNASSIGNED,<RESERVED>..<RESERVED>
-093C-094E,PVALID,DEVANAGARI SIGN NUKTA..DEVANAGARI VOWEL SIGN PRISHTHAMATRA E
-094F,UNASSIGNED,<RESERVED>
-0950-0955,PVALID,DEVANAGARI OM..DEVANAGARI VOWEL SIGN CANDRA LONG E
-0956-0957,UNASSIGNED,<RESERVED>..<RESERVED>
-0958-095F,DISALLOWED,DEVANAGARI LETTER QA..DEVANAGARI LETTER YYA
-0960-0963,PVALID,DEVANAGARI LETTER VOCALIC RR..DEVANAGARI VOWEL SIGN VOCALIC LL
-0964-0965,DISALLOWED,DEVANAGARI DANDA..DEVANAGARI DOUBLE DANDA
-0966-096F,PVALID,DEVANAGARI DIGIT ZERO..DEVANAGARI DIGIT NINE
-0970,DISALLOWED,DEVANAGARI ABBREVIATION SIGN
-0971-0972,PVALID,DEVANAGARI SIGN HIGH SPACING DOT..DEVANAGARI LETTER CANDRA A
-0973-0978,UNASSIGNED,<RESERVED>..<RESERVED>
-0979-097F,PVALID,DEVANAGARI LETTER ZHA..DEVANAGARI LETTER BBA
-0980,UNASSIGNED,<RESERVED>
-0981-0983,PVALID,BENGALI SIGN CANDRABINDU..BENGALI SIGN VISARGA
-0984,UNASSIGNED,<RESERVED>
-0985-098C,PVALID,BENGALI LETTER A..BENGALI LETTER VOCALIC L
-098D-098E,UNASSIGNED,<RESERVED>..<RESERVED>
-098F-0990,PVALID,BENGALI LETTER E..BENGALI LETTER AI
-0991-0992,UNASSIGNED,<RESERVED>..<RESERVED>
-0993-09A8,PVALID,BENGALI LETTER O..BENGALI LETTER NA
-09A9,UNASSIGNED,<RESERVED>
-09AA-09B0,PVALID,BENGALI LETTER PA..BENGALI LETTER RA
-09B1,UNASSIGNED,<RESERVED>
-09B2,PVALID,BENGALI LETTER LA
-09B3-09B5,UNASSIGNED,<RESERVED>..<RESERVED>
-09B6-09B9,PVALID,BENGALI LETTER SHA..BENGALI LETTER HA
-09BA-09BB,UNASSIGNED,<RESERVED>..<RESERVED>
-09BC-09C4,PVALID,BENGALI SIGN NUKTA..BENGALI VOWEL SIGN VOCALIC RR
-09C5-09C6,UNASSIGNED,<RESERVED>..<RESERVED>
-09C7-09C8,PVALID,BENGALI VOWEL SIGN E..BENGALI VOWEL SIGN AI
-09C9-09CA,UNASSIGNED,<RESERVED>..<RESERVED>
-09CB-09CE,PVALID,BENGALI VOWEL SIGN O..BENGALI LETTER KHANDA TA
-09CF-09D6,UNASSIGNED,<RESERVED>..<RESERVED>
-09D7,PVALID,BENGALI AU LENGTH MARK
-09D8-09DB,UNASSIGNED,<RESERVED>..<RESERVED>
-09DC-09DD,DISALLOWED,BENGALI LETTER RRA..BENGALI LETTER RHA
-09DE,UNASSIGNED,<RESERVED>
-09DF,DISALLOWED,BENGALI LETTER YYA
-09E0-09E3,PVALID,BENGALI LETTER VOCALIC RR..BENGALI VOWEL SIGN VOCALIC LL
-09E4-09E5,UNASSIGNED,<RESERVED>..<RESERVED>
-09E6-09F1,PVALID,BENGALI DIGIT ZERO..BENGALI LETTER RA WITH LOWER DIAGONAL
-09F2-09FB,DISALLOWED,BENGALI RUPEE MARK..BENGALI GANDA MARK
-09FC-0A00,UNASSIGNED,<RESERVED>..<RESERVED>
-0A01-0A03,PVALID,GURMUKHI SIGN ADAK BINDI..GURMUKHI SIGN VISARGA
-0A04,UNASSIGNED,<RESERVED>
-0A05-0A0A,PVALID,GURMUKHI LETTER A..GURMUKHI LETTER UU
-0A0B-0A0E,UNASSIGNED,<RESERVED>..<RESERVED>
-0A0F-0A10,PVALID,GURMUKHI LETTER EE..GURMUKHI LETTER AI
-0A11-0A12,UNASSIGNED,<RESERVED>..<RESERVED>
-0A13-0A28,PVALID,GURMUKHI LETTER OO..GURMUKHI LETTER NA
-0A29,UNASSIGNED,<RESERVED>
-0A2A-0A30,PVALID,GURMUKHI LETTER PA..GURMUKHI LETTER RA
-0A31,UNASSIGNED,<RESERVED>
-0A32,PVALID,GURMUKHI LETTER LA
-0A33,DISALLOWED,GURMUKHI LETTER LLA
-0A34,UNASSIGNED,<RESERVED>
-0A35,PVALID,GURMUKHI LETTER VA
-0A36,DISALLOWED,GURMUKHI LETTER SHA
-0A37,UNASSIGNED,<RESERVED>
-0A38-0A39,PVALID,GURMUKHI LETTER SA..GURMUKHI LETTER HA
-0A3A-0A3B,UNASSIGNED,<RESERVED>..<RESERVED>
-0A3C,PVALID,GURMUKHI SIGN NUKTA
-0A3D,UNASSIGNED,<RESERVED>
-0A3E-0A42,PVALID,GURMUKHI VOWEL SIGN AA..GURMUKHI VOWEL SIGN UU
-0A43-0A46,UNASSIGNED,<RESERVED>..<RESERVED>
-0A47-0A48,PVALID,GURMUKHI VOWEL SIGN EE..GURMUKHI VOWEL SIGN AI
-0A49-0A4A,UNASSIGNED,<RESERVED>..<RESERVED>
-0A4B-0A4D,PVALID,GURMUKHI VOWEL SIGN OO..GURMUKHI SIGN VIRAMA
-0A4E-0A50,UNASSIGNED,<RESERVED>..<RESERVED>
-0A51,PVALID,GURMUKHI SIGN UDAAT
-0A52-0A58,UNASSIGNED,<RESERVED>..<RESERVED>
-0A59-0A5B,DISALLOWED,GURMUKHI LETTER KHHA..GURMUKHI LETTER ZA
-0A5C,PVALID,GURMUKHI LETTER RRA
-0A5D,UNASSIGNED,<RESERVED>
-0A5E,DISALLOWED,GURMUKHI LETTER FA
-0A5F-0A65,UNASSIGNED,<RESERVED>..<RESERVED>
-0A66-0A75,PVALID,GURMUKHI DIGIT ZERO..GURMUKHI SIGN YAKASH
-0A76-0A80,UNASSIGNED,<RESERVED>..<RESERVED>
-0A81-0A83,PVALID,GUJARATI SIGN CANDRABINDU..GUJARATI SIGN VISARGA
-0A84,UNASSIGNED,<RESERVED>
-0A85-0A8D,PVALID,GUJARATI LETTER A..GUJARATI VOWEL CANDRA E
-0A8E,UNASSIGNED,<RESERVED>
-0A8F-0A91,PVALID,GUJARATI LETTER E..GUJARATI VOWEL CANDRA O
-0A92,UNASSIGNED,<RESERVED>
-0A93-0AA8,PVALID,GUJARATI LETTER O..GUJARATI LETTER NA
-0AA9,UNASSIGNED,<RESERVED>
-0AAA-0AB0,PVALID,GUJARATI LETTER PA..GUJARATI LETTER RA
-0AB1,UNASSIGNED,<RESERVED>
-0AB2-0AB3,PVALID,GUJARATI LETTER LA..GUJARATI LETTER LLA
-0AB4,UNASSIGNED,<RESERVED>
-0AB5-0AB9,PVALID,GUJARATI LETTER VA..GUJARATI LETTER HA
-0ABA-0ABB,UNASSIGNED,<RESERVED>..<RESERVED>
-0ABC-0AC5,PVALID,GUJARATI SIGN NUKTA..GUJARATI VOWEL SIGN CANDRA E
-0AC6,UNASSIGNED,<RESERVED>
-0AC7-0AC9,PVALID,GUJARATI VOWEL SIGN E..GUJARATI VOWEL SIGN CANDRA O
-0ACA,UNASSIGNED,<RESERVED>
-0ACB-0ACD,PVALID,GUJARATI VOWEL SIGN O..GUJARATI SIGN VIRAMA
-0ACE-0ACF,UNASSIGNED,<RESERVED>..<RESERVED>
-0AD0,PVALID,GUJARATI OM
-0AD1-0ADF,UNASSIGNED,<RESERVED>..<RESERVED>
-0AE0-0AE3,PVALID,GUJARATI LETTER VOCALIC RR..GUJARATI VOWEL SIGN VOCALIC LL
-0AE4-0AE5,UNASSIGNED,<RESERVED>..<RESERVED>
-0AE6-0AEF,PVALID,GUJARATI DIGIT ZERO..GUJARATI DIGIT NINE
-0AF0,UNASSIGNED,<RESERVED>
-0AF1,DISALLOWED,GUJARATI RUPEE SIGN
-0AF2-0B00,UNASSIGNED,<RESERVED>..<RESERVED>
-0B01-0B03,PVALID,ORIYA SIGN CANDRABINDU..ORIYA SIGN VISARGA
-0B04,UNASSIGNED,<RESERVED>
-0B05-0B0C,PVALID,ORIYA LETTER A..ORIYA LETTER VOCALIC L
-0B0D-0B0E,UNASSIGNED,<RESERVED>..<RESERVED>
-0B0F-0B10,PVALID,ORIYA LETTER E..ORIYA LETTER AI
-0B11-0B12,UNASSIGNED,<RESERVED>..<RESERVED>
-0B13-0B28,PVALID,ORIYA LETTER O..ORIYA LETTER NA
-0B29,UNASSIGNED,<RESERVED>
-0B2A-0B30,PVALID,ORIYA LETTER PA..ORIYA LETTER RA
-0B31,UNASSIGNED,<RESERVED>
-0B32-0B33,PVALID,ORIYA LETTER LA..ORIYA LETTER LLA
-0B34,UNASSIGNED,<RESERVED>
-0B35-0B39,PVALID,ORIYA LETTER VA..ORIYA LETTER HA
-0B3A-0B3B,UNASSIGNED,<RESERVED>..<RESERVED>
-0B3C-0B44,PVALID,ORIYA SIGN NUKTA..ORIYA VOWEL SIGN VOCALIC RR
-0B45-0B46,UNASSIGNED,<RESERVED>..<RESERVED>
-0B47-0B48,PVALID,ORIYA VOWEL SIGN E..ORIYA VOWEL SIGN AI
-0B49-0B4A,UNASSIGNED,<RESERVED>..<RESERVED>
-0B4B-0B4D,PVALID,ORIYA VOWEL SIGN O..ORIYA SIGN VIRAMA
-0B4E-0B55,UNASSIGNED,<RESERVED>..<RESERVED>
-0B56-0B57,PVALID,ORIYA AI LENGTH MARK..ORIYA AU LENGTH MARK
-0B58-0B5B,UNASSIGNED,<RESERVED>..<RESERVED>
-0B5C-0B5D,DISALLOWED,ORIYA LETTER RRA..ORIYA LETTER RHA
-0B5E,UNASSIGNED,<RESERVED>
-0B5F-0B63,PVALID,ORIYA LETTER YYA..ORIYA VOWEL SIGN VOCALIC LL
-0B64-0B65,UNASSIGNED,<RESERVED>..<RESERVED>
-0B66-0B6F,PVALID,ORIYA DIGIT ZERO..ORIYA DIGIT NINE
-0B70,DISALLOWED,ORIYA ISSHAR
-0B71,PVALID,ORIYA LETTER WA
-0B72-0B81,UNASSIGNED,<RESERVED>..<RESERVED>
-0B82-0B83,PVALID,TAMIL SIGN ANUSVARA..TAMIL SIGN VISARGA
-0B84,UNASSIGNED,<RESERVED>
-0B85-0B8A,PVALID,TAMIL LETTER A..TAMIL LETTER UU
-0B8B-0B8D,UNASSIGNED,<RESERVED>..<RESERVED>
-0B8E-0B90,PVALID,TAMIL LETTER E..TAMIL LETTER AI
-0B91,UNASSIGNED,<RESERVED>
-0B92-0B95,PVALID,TAMIL LETTER O..TAMIL LETTER KA
-0B96-0B98,UNASSIGNED,<RESERVED>..<RESERVED>
-0B99-0B9A,PVALID,TAMIL LETTER NGA..TAMIL LETTER CA
-0B9B,UNASSIGNED,<RESERVED>
-0B9C,PVALID,TAMIL LETTER JA
-0B9D,UNASSIGNED,<RESERVED>
-0B9E-0B9F,PVALID,TAMIL LETTER NYA..TAMIL LETTER TTA
-0BA0-0BA2,UNASSIGNED,<RESERVED>..<RESERVED>
-0BA3-0BA4,PVALID,TAMIL LETTER NNA..TAMIL LETTER TA
-0BA5-0BA7,UNASSIGNED,<RESERVED>..<RESERVED>
-0BA8-0BAA,PVALID,TAMIL LETTER NA..TAMIL LETTER PA
-0BAB-0BAD,UNASSIGNED,<RESERVED>..<RESERVED>
-0BAE-0BB9,PVALID,TAMIL LETTER MA..TAMIL LETTER HA
-0BBA-0BBD,UNASSIGNED,<RESERVED>..<RESERVED>
-0BBE-0BC2,PVALID,TAMIL VOWEL SIGN AA..TAMIL VOWEL SIGN UU
-0BC3-0BC5,UNASSIGNED,<RESERVED>..<RESERVED>
-0BC6-0BC8,PVALID,TAMIL VOWEL SIGN E..TAMIL VOWEL SIGN AI
-0BC9,UNASSIGNED,<RESERVED>
-0BCA-0BCD,PVALID,TAMIL VOWEL SIGN O..TAMIL SIGN VIRAMA
-0BCE-0BCF,UNASSIGNED,<RESERVED>..<RESERVED>
-0BD0,PVALID,TAMIL OM
-0BD1-0BD6,UNASSIGNED,<RESERVED>..<RESERVED>
-0BD7,PVALID,TAMIL AU LENGTH MARK
-0BD8-0BE5,UNASSIGNED,<RESERVED>..<RESERVED>
-0BE6-0BEF,PVALID,TAMIL DIGIT ZERO..TAMIL DIGIT NINE
-0BF0-0BFA,DISALLOWED,TAMIL NUMBER TEN..TAMIL NUMBER SIGN
-0BFB-0C00,UNASSIGNED,<RESERVED>..<RESERVED>
-0C01-0C03,PVALID,TELUGU SIGN CANDRABINDU..TELUGU SIGN VISARGA
-0C04,UNASSIGNED,<RESERVED>
-0C05-0C0C,PVALID,TELUGU LETTER A..TELUGU LETTER VOCALIC L
-0C0D,UNASSIGNED,<RESERVED>
-0C0E-0C10,PVALID,TELUGU LETTER E..TELUGU LETTER AI
-0C11,UNASSIGNED,<RESERVED>
-0C12-0C28,PVALID,TELUGU LETTER O..TELUGU LETTER NA
-0C29,UNASSIGNED,<RESERVED>
-0C2A-0C33,PVALID,TELUGU LETTER PA..TELUGU LETTER LLA
-0C34,UNASSIGNED,<RESERVED>
-0C35-0C39,PVALID,TELUGU LETTER VA..TELUGU LETTER HA
-0C3A-0C3C,UNASSIGNED,<RESERVED>..<RESERVED>
-0C3D-0C44,PVALID,TELUGU SIGN AVAGRAHA..TELUGU VOWEL SIGN VOCALIC RR
-0C45,UNASSIGNED,<RESERVED>
-0C46-0C48,PVALID,TELUGU VOWEL SIGN E..TELUGU VOWEL SIGN AI
-0C49,UNASSIGNED,<RESERVED>
-0C4A-0C4D,PVALID,TELUGU VOWEL SIGN O..TELUGU SIGN VIRAMA
-0C4E-0C54,UNASSIGNED,<RESERVED>..<RESERVED>
-0C55-0C56,PVALID,TELUGU LENGTH MARK..TELUGU AI LENGTH MARK
-0C57,UNASSIGNED,<RESERVED>
-0C58-0C59,PVALID,TELUGU LETTER TSA..TELUGU LETTER DZA
-0C5A-0C5F,UNASSIGNED,<RESERVED>..<RESERVED>
-0C60-0C63,PVALID,TELUGU LETTER VOCALIC RR..TELUGU VOWEL SIGN VOCALIC LL
-0C64-0C65,UNASSIGNED,<RESERVED>..<RESERVED>
-0C66-0C6F,PVALID,TELUGU DIGIT ZERO..TELUGU DIGIT NINE
-0C70-0C77,UNASSIGNED,<RESERVED>..<RESERVED>
-0C78-0C7F,DISALLOWED,TELUGU FRACTION DIGIT ZERO FOR ODD POWERS OF FOUR..TELUGU SIGN TUUMU
-0C80-0C81,UNASSIGNED,<RESERVED>..<RESERVED>
-0C82-0C83,PVALID,KANNADA SIGN ANUSVARA..KANNADA SIGN VISARGA
-0C84,UNASSIGNED,<RESERVED>
-0C85-0C8C,PVALID,KANNADA LETTER A..KANNADA LETTER VOCALIC L
-0C8D,UNASSIGNED,<RESERVED>
-0C8E-0C90,PVALID,KANNADA LETTER E..KANNADA LETTER AI
-0C91,UNASSIGNED,<RESERVED>
-0C92-0CA8,PVALID,KANNADA LETTER O..KANNADA LETTER NA
-0CA9,UNASSIGNED,<RESERVED>
-0CAA-0CB3,PVALID,KANNADA LETTER PA..KANNADA LETTER LLA
-0CB4,UNASSIGNED,<RESERVED>
-0CB5-0CB9,PVALID,KANNADA LETTER VA..KANNADA LETTER HA
-0CBA-0CBB,UNASSIGNED,<RESERVED>..<RESERVED>
-0CBC-0CC4,PVALID,KANNADA SIGN NUKTA..KANNADA VOWEL SIGN VOCALIC RR
-0CC5,UNASSIGNED,<RESERVED>
-0CC6-0CC8,PVALID,KANNADA VOWEL SIGN E..KANNADA VOWEL SIGN AI
-0CC9,UNASSIGNED,<RESERVED>
-0CCA-0CCD,PVALID,KANNADA VOWEL SIGN O..KANNADA SIGN VIRAMA
-0CCE-0CD4,UNASSIGNED,<RESERVED>..<RESERVED>
-0CD5-0CD6,PVALID,KANNADA LENGTH MARK..KANNADA AI LENGTH MARK
-0CD7-0CDD,UNASSIGNED,<RESERVED>..<RESERVED>
-0CDE,PVALID,KANNADA LETTER FA
-0CDF,UNASSIGNED,<RESERVED>
-0CE0-0CE3,PVALID,KANNADA LETTER VOCALIC RR..KANNADA VOWEL SIGN VOCALIC LL
-0CE4-0CE5,UNASSIGNED,<RESERVED>..<RESERVED>
-0CE6-0CEF,PVALID,KANNADA DIGIT ZERO..KANNADA DIGIT NINE
-0CF0,UNASSIGNED,<RESERVED>
-0CF1-0CF2,DISALLOWED,KANNADA SIGN JIHVAMULIYA..KANNADA SIGN UPADHMANIYA
-0CF3-0D01,UNASSIGNED,<RESERVED>..<RESERVED>
-0D02-0D03,PVALID,MALAYALAM SIGN ANUSVARA..MALAYALAM SIGN VISARGA
-0D04,UNASSIGNED,<RESERVED>
-0D05-0D0C,PVALID,MALAYALAM LETTER A..MALAYALAM LETTER VOCALIC L
-0D0D,UNASSIGNED,<RESERVED>
-0D0E-0D10,PVALID,MALAYALAM LETTER E..MALAYALAM LETTER AI
-0D11,UNASSIGNED,<RESERVED>
-0D12-0D28,PVALID,MALAYALAM LETTER O..MALAYALAM LETTER NA
-0D29,UNASSIGNED,<RESERVED>
-0D2A-0D39,PVALID,MALAYALAM LETTER PA..MALAYALAM LETTER HA
-0D3A-0D3C,UNASSIGNED,<RESERVED>..<RESERVED>
-0D3D-0D44,PVALID,MALAYALAM SIGN AVAGRAHA..MALAYALAM VOWEL SIGN VOCALIC RR
-0D45,UNASSIGNED,<RESERVED>
-0D46-0D48,PVALID,MALAYALAM VOWEL SIGN E..MALAYALAM VOWEL SIGN AI
-0D49,UNASSIGNED,<RESERVED>
-0D4A-0D4D,PVALID,MALAYALAM VOWEL SIGN O..MALAYALAM SIGN VIRAMA
-0D4E-0D56,UNASSIGNED,<RESERVED>..<RESERVED>
-0D57,PVALID,MALAYALAM AU LENGTH MARK
-0D58-0D5F,UNASSIGNED,<RESERVED>..<RESERVED>
-0D60-0D63,PVALID,MALAYALAM LETTER VOCALIC RR..MALAYALAM VOWEL SIGN VOCALIC LL
-0D64-0D65,UNASSIGNED,<RESERVED>..<RESERVED>
-0D66-0D6F,PVALID,MALAYALAM DIGIT ZERO..MALAYALAM DIGIT NINE
-0D70-0D75,DISALLOWED,MALAYALAM NUMBER TEN..MALAYALAM FRACTION THREE QUARTERS
-0D76-0D78,UNASSIGNED,<RESERVED>..<RESERVED>
-0D79,DISALLOWED,MALAYALAM DATE MARK
-0D7A-0D7F,PVALID,MALAYALAM LETTER CHILLU NN..MALAYALAM LETTER CHILLU K
-0D80-0D81,UNASSIGNED,<RESERVED>..<RESERVED>
-0D82-0D83,PVALID,SINHALA SIGN ANUSVARAYA..SINHALA SIGN VISARGAYA
-0D84,UNASSIGNED,<RESERVED>
-0D85-0D96,PVALID,SINHALA LETTER AYANNA..SINHALA LETTER AUYANNA
-0D97-0D99,UNASSIGNED,<RESERVED>..<RESERVED>
-0D9A-0DB1,PVALID,SINHALA LETTER ALPAPRAANA KAYANNA..SINHALA LETTER DANTAJA NAYANNA
-0DB2,UNASSIGNED,<RESERVED>
-0DB3-0DBB,PVALID,SINHALA LETTER SANYAKA DAYANNA..SINHALA LETTER RAYANNA
-0DBC,UNASSIGNED,<RESERVED>
-0DBD,PVALID,SINHALA LETTER DANTAJA LAYANNA
-0DBE-0DBF,UNASSIGNED,<RESERVED>..<RESERVED>
-0DC0-0DC6,PVALID,SINHALA LETTER VAYANNA..SINHALA LETTER FAYANNA
-0DC7-0DC9,UNASSIGNED,<RESERVED>..<RESERVED>
-0DCA,PVALID,SINHALA SIGN AL-LAKUNA
-0DCB-0DCE,UNASSIGNED,<RESERVED>..<RESERVED>
-0DCF-0DD4,PVALID,SINHALA VOWEL SIGN AELA-PILLA..SINHALA VOWEL SIGN KETTI PAA-PILLA
-0DD5,UNASSIGNED,<RESERVED>
-0DD6,PVALID,SINHALA VOWEL SIGN DIGA PAA-PILLA
-0DD7,UNASSIGNED,<RESERVED>
-0DD8-0DDF,PVALID,SINHALA VOWEL SIGN GAETTA-PILLA..SINHALA VOWEL SIGN GAYANUKITTA
-0DE0-0DF1,UNASSIGNED,<RESERVED>..<RESERVED>
-0DF2-0DF3,PVALID,SINHALA VOWEL SIGN DIGA GAETTA-PILLA..SINHALA VOWEL SIGN DIGA GAYANUKITTA
-0DF4,DISALLOWED,SINHALA PUNCTUATION KUNDDALIYA
-0DF5-0E00,UNASSIGNED,<RESERVED>..<RESERVED>
-0E01-0E32,PVALID,THAI CHARACTER KO KAI..THAI CHARACTER SARA AA
-0E33,DISALLOWED,THAI CHARACTER SARA AM
-0E34-0E3A,PVALID,THAI CHARACTER SARA I..THAI CHARACTER PHINTHU
-0E3B-0E3E,UNASSIGNED,<RESERVED>..<RESERVED>
-0E3F,DISALLOWED,THAI CURRENCY SYMBOL BAHT
-0E40-0E4E,PVALID,THAI CHARACTER SARA E..THAI CHARACTER YAMAKKAN
-0E4F,DISALLOWED,THAI CHARACTER FONGMAN
-0E50-0E59,PVALID,THAI DIGIT ZERO..THAI DIGIT NINE
-0E5A-0E5B,DISALLOWED,THAI CHARACTER ANGKHANKHU..THAI CHARACTER KHOMUT
-0E5C-0E80,UNASSIGNED,<RESERVED>..<RESERVED>
-0E81-0E82,PVALID,LAO LETTER KO..LAO LETTER KHO SUNG
-0E83,UNASSIGNED,<RESERVED>
-0E84,PVALID,LAO LETTER KHO TAM
-0E85-0E86,UNASSIGNED,<RESERVED>..<RESERVED>
-0E87-0E88,PVALID,LAO LETTER NGO..LAO LETTER CO
-0E89,UNASSIGNED,<RESERVED>
-0E8A,PVALID,LAO LETTER SO TAM
-0E8B-0E8C,UNASSIGNED,<RESERVED>..<RESERVED>
-0E8D,PVALID,LAO LETTER NYO
-0E8E-0E93,UNASSIGNED,<RESERVED>..<RESERVED>
-0E94-0E97,PVALID,LAO LETTER DO..LAO LETTER THO TAM
-0E98,UNASSIGNED,<RESERVED>
-0E99-0E9F,PVALID,LAO LETTER NO..LAO LETTER FO SUNG
-0EA0,UNASSIGNED,<RESERVED>
-0EA1-0EA3,PVALID,LAO LETTER MO..LAO LETTER LO LING
-0EA4,UNASSIGNED,<RESERVED>
-0EA5,PVALID,LAO LETTER LO LOOT
-0EA6,UNASSIGNED,<RESERVED>
-0EA7,PVALID,LAO LETTER WO
-0EA8-0EA9,UNASSIGNED,<RESERVED>..<RESERVED>
-0EAA-0EAB,PVALID,LAO LETTER SO SUNG..LAO LETTER HO SUNG
-0EAC,UNASSIGNED,<RESERVED>
-0EAD-0EB2,PVALID,LAO LETTER O..LAO VOWEL SIGN AA
-0EB3,DISALLOWED,LAO VOWEL SIGN AM
-0EB4-0EB9,PVALID,LAO VOWEL SIGN I..LAO VOWEL SIGN UU
-0EBA,UNASSIGNED,<RESERVED>
-0EBB-0EBD,PVALID,LAO VOWEL SIGN MAI KON..LAO SEMIVOWEL SIGN NYO
-0EBE-0EBF,UNASSIGNED,<RESERVED>..<RESERVED>
-0EC0-0EC4,PVALID,LAO VOWEL SIGN E..LAO VOWEL SIGN AI
-0EC5,UNASSIGNED,<RESERVED>
-0EC6,PVALID,LAO KO LA
-0EC7,UNASSIGNED,<RESERVED>
-0EC8-0ECD,PVALID,LAO TONE MAI EK..LAO NIGGAHITA
-0ECE-0ECF,UNASSIGNED,<RESERVED>..<RESERVED>
-0ED0-0ED9,PVALID,LAO DIGIT ZERO..LAO DIGIT NINE
-0EDA-0EDB,UNASSIGNED,<RESERVED>..<RESERVED>
-0EDC-0EDD,DISALLOWED,LAO HO NO..LAO HO MO
-0EDE-0EFF,UNASSIGNED,<RESERVED>..<RESERVED>
-0F00,PVALID,TIBETAN SYLLABLE OM
-0F01-0F0A,DISALLOWED,TIBETAN MARK GTER YIG MGO TRUNCATED A..TIBETAN MARK BKA- SHOG YIG MGO
-0F0B,PVALID,TIBETAN MARK INTERSYLLABIC TSHEG
-0F0C-0F17,DISALLOWED,TIBETAN MARK DELIMITER TSHEG BSTAR..TIBETAN ASTROLOGICAL SIGN SGRA GCAN -CHAR RTAGS
-0F18-0F19,PVALID,TIBETAN ASTROLOGICAL SIGN -KHYUD PA..TIBETAN ASTROLOGICAL SIGN SDONG TSHUGS
-0F1A-0F1F,DISALLOWED,TIBETAN SIGN RDEL DKAR GCIG..TIBETAN SIGN RDEL DKAR RDEL NAG
-0F20-0F29,PVALID,TIBETAN DIGIT ZERO..TIBETAN DIGIT NINE
-0F2A-0F34,DISALLOWED,TIBETAN DIGIT HALF ONE..TIBETAN MARK BSDUS RTAGS
-0F35,PVALID,TIBETAN MARK NGAS BZUNG NYI ZLA
-0F36,DISALLOWED,TIBETAN MARK CARET -DZUD RTAGS BZHI MIG CAN
-0F37,PVALID,TIBETAN MARK NGAS BZUNG SGOR RTAGS
-0F38,DISALLOWED,TIBETAN MARK CHE MGO
-0F39,PVALID,TIBETAN MARK TSA -PHRU
-0F3A-0F3D,DISALLOWED,TIBETAN MARK GUG RTAGS GYON..TIBETAN MARK ANG KHANG GYAS
-0F3E-0F42,PVALID,TIBETAN SIGN YAR TSHES..TIBETAN LETTER GA
-0F43,DISALLOWED,TIBETAN LETTER GHA
-0F44-0F47,PVALID,TIBETAN LETTER NGA..TIBETAN LETTER JA
-0F48,UNASSIGNED,<RESERVED>
-0F49-0F4C,PVALID,TIBETAN LETTER NYA..TIBETAN LETTER DDA
-0F4D,DISALLOWED,TIBETAN LETTER DDHA
-0F4E-0F51,PVALID,TIBETAN LETTER NNA..TIBETAN LETTER DA
-0F52,DISALLOWED,TIBETAN LETTER DHA
-0F53-0F56,PVALID,TIBETAN LETTER NA..TIBETAN LETTER BA
-0F57,DISALLOWED,TIBETAN LETTER BHA
-0F58-0F5B,PVALID,TIBETAN LETTER MA..TIBETAN LETTER DZA
-0F5C,DISALLOWED,TIBETAN LETTER DZHA
-0F5D-0F68,PVALID,TIBETAN LETTER WA..TIBETAN LETTER A
-0F69,DISALLOWED,TIBETAN LETTER KSSA
-0F6A-0F6C,PVALID,TIBETAN LETTER FIXED-FORM RA..TIBETAN LETTER RRA
-0F6D-0F70,UNASSIGNED,<RESERVED>..<RESERVED>
-0F71-0F72,PVALID,TIBETAN VOWEL SIGN AA..TIBETAN VOWEL SIGN I
-0F73,DISALLOWED,TIBETAN VOWEL SIGN II
-0F74,PVALID,TIBETAN VOWEL SIGN U
-0F75-0F79,DISALLOWED,TIBETAN VOWEL SIGN UU..TIBETAN VOWEL SIGN VOCALIC LL
-0F7A-0F80,PVALID,TIBETAN VOWEL SIGN E..TIBETAN VOWEL SIGN REVERSED I
-0F81,DISALLOWED,TIBETAN VOWEL SIGN REVERSED II
-0F82-0F84,PVALID,TIBETAN SIGN NYI ZLA NAA DA..TIBETAN MARK HALANTA
-0F85,DISALLOWED,TIBETAN MARK PALUTA
-0F86-0F8B,PVALID,TIBETAN SIGN LCI RTAGS..TIBETAN SIGN GRU MED RGYINGS
-0F8C-0F8F,UNASSIGNED,<RESERVED>..<RESERVED>
-0F90-0F92,PVALID,TIBETAN SUBJOINED LETTER KA..TIBETAN SUBJOINED LETTER GA
-0F93,DISALLOWED,TIBETAN SUBJOINED LETTER GHA
-0F94-0F97,PVALID,TIBETAN SUBJOINED LETTER NGA..TIBETAN SUBJOINED LETTER JA
-0F98,UNASSIGNED,<RESERVED>
-0F99-0F9C,PVALID,TIBETAN SUBJOINED LETTER NYA..TIBETAN SUBJOINED LETTER DDA
-0F9D,DISALLOWED,TIBETAN SUBJOINED LETTER DDHA
-0F9E-0FA1,PVALID,TIBETAN SUBJOINED LETTER NNA..TIBETAN SUBJOINED LETTER DA
-0FA2,DISALLOWED,TIBETAN SUBJOINED LETTER DHA
-0FA3-0FA6,PVALID,TIBETAN SUBJOINED LETTER NA..TIBETAN SUBJOINED LETTER BA
-0FA7,DISALLOWED,TIBETAN SUBJOINED LETTER BHA
-0FA8-0FAB,PVALID,TIBETAN SUBJOINED LETTER MA..TIBETAN SUBJOINED LETTER DZA
-0FAC,DISALLOWED,TIBETAN SUBJOINED LETTER DZHA
-0FAD-0FB8,PVALID,TIBETAN SUBJOINED LETTER WA..TIBETAN SUBJOINED LETTER A
-0FB9,DISALLOWED,TIBETAN SUBJOINED LETTER KSSA
-0FBA-0FBC,PVALID,TIBETAN SUBJOINED LETTER FIXED-FORM WA..TIBETAN SUBJOINED LETTER FIXED-FORM RA
-0FBD,UNASSIGNED,<RESERVED>
-0FBE-0FC5,DISALLOWED,TIBETAN KU RU KHA..TIBETAN SYMBOL RDO RJE
-0FC6,PVALID,TIBETAN SYMBOL PADMA GDAN
-0FC7-0FCC,DISALLOWED,TIBETAN SYMBOL RDO RJE RGYA GRAM..TIBETAN SYMBOL NOR BU BZHI -KHYIL
-0FCD,UNASSIGNED,<RESERVED>
-0FCE-0FD8,DISALLOWED,TIBETAN SIGN RDEL NAG RDEL DKAR..LEFT-FACING SVASTI SIGN WITH DOTS
-0FD9-0FFF,UNASSIGNED,<RESERVED>..<RESERVED>
-1000-1049,PVALID,MYANMAR LETTER KA..MYANMAR DIGIT NINE
-104A-104F,DISALLOWED,MYANMAR SIGN LITTLE SECTION..MYANMAR SYMBOL GENITIVE
-1050-109D,PVALID,MYANMAR LETTER SHA..MYANMAR VOWEL SIGN AITON AI
-109E-10C5,DISALLOWED,MYANMAR SYMBOL SHAN ONE..GEORGIAN CAPITAL LETTER HOE
-10C6-10CF,UNASSIGNED,<RESERVED>..<RESERVED>
-10D0-10FA,PVALID,GEORGIAN LETTER AN..GEORGIAN LETTER AIN
-10FB-10FC,DISALLOWED,GEORGIAN PARAGRAPH SEPARATOR..MODIFIER LETTER GEORGIAN NAR
-10FD-10FF,UNASSIGNED,<RESERVED>..<RESERVED>
-1100-11FF,DISALLOWED,HANGUL CHOSEONG KIYEOK..HANGUL JONGSEONG SSANGNIEUN
-1200-1248,PVALID,ETHIOPIC SYLLABLE HA..ETHIOPIC SYLLABLE QWA
-1249,UNASSIGNED,<RESERVED>
-124A-124D,PVALID,ETHIOPIC SYLLABLE QWI..ETHIOPIC SYLLABLE QWE
-124E-124F,UNASSIGNED,<RESERVED>..<RESERVED>
-1250-1256,PVALID,ETHIOPIC SYLLABLE QHA..ETHIOPIC SYLLABLE QHO
-1257,UNASSIGNED,<RESERVED>
-1258,PVALID,ETHIOPIC SYLLABLE QHWA
-1259,UNASSIGNED,<RESERVED>
-125A-125D,PVALID,ETHIOPIC SYLLABLE QHWI..ETHIOPIC SYLLABLE QHWE
-125E-125F,UNASSIGNED,<RESERVED>..<RESERVED>
-1260-1288,PVALID,ETHIOPIC SYLLABLE BA..ETHIOPIC SYLLABLE XWA
-1289,UNASSIGNED,<RESERVED>
-128A-128D,PVALID,ETHIOPIC SYLLABLE XWI..ETHIOPIC SYLLABLE XWE
-128E-128F,UNASSIGNED,<RESERVED>..<RESERVED>
-1290-12B0,PVALID,ETHIOPIC SYLLABLE NA..ETHIOPIC SYLLABLE KWA
-12B1,UNASSIGNED,<RESERVED>
-12B2-12B5,PVALID,ETHIOPIC SYLLABLE KWI..ETHIOPIC SYLLABLE KWE
-12B6-12B7,UNASSIGNED,<RESERVED>..<RESERVED>
-12B8-12BE,PVALID,ETHIOPIC SYLLABLE KXA..ETHIOPIC SYLLABLE KXO
-12BF,UNASSIGNED,<RESERVED>
-12C0,PVALID,ETHIOPIC SYLLABLE KXWA
-12C1,UNASSIGNED,<RESERVED>
-12C2-12C5,PVALID,ETHIOPIC SYLLABLE KXWI..ETHIOPIC SYLLABLE KXWE
-12C6-12C7,UNASSIGNED,<RESERVED>..<RESERVED>
-12C8-12D6,PVALID,ETHIOPIC SYLLABLE WA..ETHIOPIC SYLLABLE PHARYNGEAL O
-12D7,UNASSIGNED,<RESERVED>
-12D8-1310,PVALID,ETHIOPIC SYLLABLE ZA..ETHIOPIC SYLLABLE GWA
-1311,UNASSIGNED,<RESERVED>
-1312-1315,PVALID,ETHIOPIC SYLLABLE GWI..ETHIOPIC SYLLABLE GWE
-1316-1317,UNASSIGNED,<RESERVED>..<RESERVED>
-1318-135A,PVALID,ETHIOPIC SYLLABLE GGA..ETHIOPIC SYLLABLE FYA
-135B-135E,UNASSIGNED,<RESERVED>..<RESERVED>
-135F,PVALID,ETHIOPIC COMBINING GEMINATION MARK
-1360-137C,DISALLOWED,ETHIOPIC SECTION MARK..ETHIOPIC NUMBER TEN THOUSAND
-137D-137F,UNASSIGNED,<RESERVED>..<RESERVED>
-1380-138F,PVALID,ETHIOPIC SYLLABLE SEBATBEIT MWA..ETHIOPIC SYLLABLE PWE
-1390-1399,DISALLOWED,ETHIOPIC TONAL MARK YIZET..ETHIOPIC TONAL MARK KURT
-139A-139F,UNASSIGNED,<RESERVED>..<RESERVED>
-13A0-13F4,PVALID,CHEROKEE LETTER A..CHEROKEE LETTER YV
-13F5-13FF,UNASSIGNED,<RESERVED>..<RESERVED>
-1400,DISALLOWED,CANADIAN SYLLABICS HYPHEN
-1401-166C,PVALID,CANADIAN SYLLABICS E..CANADIAN SYLLABICS CARRIER TTSA
-166D-166E,DISALLOWED,CANADIAN SYLLABICS CHI SIGN..CANADIAN SYLLABICS FULL STOP
-166F-167F,PVALID,CANADIAN SYLLABICS QAI..CANADIAN SYLLABICS BLACKFOOT W
-1680,DISALLOWED,OGHAM SPACE MARK
-1681-169A,PVALID,OGHAM LETTER BEITH..OGHAM LETTER PEITH
-169B-169C,DISALLOWED,OGHAM FEATHER MARK..OGHAM REVERSED FEATHER MARK
-169D-169F,UNASSIGNED,<RESERVED>..<RESERVED>
-16A0-16EA,PVALID,RUNIC LETTER FEHU FEOH FE F..RUNIC LETTER X
-16EB-16F0,DISALLOWED,RUNIC SINGLE PUNCTUATION..RUNIC BELGTHOR SYMBOL
-16F1-16FF,UNASSIGNED,<RESERVED>..<RESERVED>
-1700-170C,PVALID,TAGALOG LETTER A..TAGALOG LETTER YA
-170D,UNASSIGNED,<RESERVED>
-170E-1714,PVALID,TAGALOG LETTER LA..TAGALOG SIGN VIRAMA
-1715-171F,UNASSIGNED,<RESERVED>..<RESERVED>
-1720-1734,PVALID,HANUNOO LETTER A..HANUNOO SIGN PAMUDPOD
-1735-1736,DISALLOWED,PHILIPPINE SINGLE PUNCTUATION..PHILIPPINE DOUBLE PUNCTUATION
-1737-173F,UNASSIGNED,<RESERVED>..<RESERVED>
-1740-1753,PVALID,BUHID LETTER A..BUHID VOWEL SIGN U
-1754-175F,UNASSIGNED,<RESERVED>..<RESERVED>
-1760-176C,PVALID,TAGBANWA LETTER A..TAGBANWA LETTER YA
-176D,UNASSIGNED,<RESERVED>
-176E-1770,PVALID,TAGBANWA LETTER LA..TAGBANWA LETTER SA
-1771,UNASSIGNED,<RESERVED>
-1772-1773,PVALID,TAGBANWA VOWEL SIGN I..TAGBANWA VOWEL SIGN U
-1774-177F,UNASSIGNED,<RESERVED>..<RESERVED>
-1780-17B3,PVALID,KHMER LETTER KA..KHMER INDEPENDENT VOWEL QAU
-17B4-17B5,DISALLOWED,KHMER VOWEL INHERENT AQ..KHMER VOWEL INHERENT AA
-17B6-17D3,PVALID,KHMER VOWEL SIGN AA..KHMER SIGN BATHAMASAT
-17D4-17D6,DISALLOWED,KHMER SIGN KHAN..KHMER SIGN CAMNUC PII KUUH
-17D7,PVALID,KHMER SIGN LEK TOO
-17D8-17DB,DISALLOWED,KHMER SIGN BEYYAL..KHMER CURRENCY SYMBOL RIEL
-17DC-17DD,PVALID,KHMER SIGN AVAKRAHASANYA..KHMER SIGN ATTHACAN
-17DE-17DF,UNASSIGNED,<RESERVED>..<RESERVED>
-17E0-17E9,PVALID,KHMER DIGIT ZERO..KHMER DIGIT NINE
-17EA-17EF,UNASSIGNED,<RESERVED>..<RESERVED>
-17F0-17F9,DISALLOWED,KHMER SYMBOL LEK ATTAK SON..KHMER SYMBOL LEK ATTAK PRAM-BUON
-17FA-17FF,UNASSIGNED,<RESERVED>..<RESERVED>
-1800-180E,DISALLOWED,MONGOLIAN BIRGA..MONGOLIAN VOWEL SEPARATOR
-180F,UNASSIGNED,<RESERVED>
-1810-1819,PVALID,MONGOLIAN DIGIT ZERO..MONGOLIAN DIGIT NINE
-181A-181F,UNASSIGNED,<RESERVED>..<RESERVED>
-1820-1877,PVALID,MONGOLIAN LETTER A..MONGOLIAN LETTER MANCHU ZHA
-1878-187F,UNASSIGNED,<RESERVED>..<RESERVED>
-1880-18AA,PVALID,MONGOLIAN LETTER ALI GALI ANUSVARA ONE..MONGOLIAN LETTER MANCHU ALI GALI LHA
-18AB-18AF,UNASSIGNED,<RESERVED>..<RESERVED>
-18B0-18F5,PVALID,CANADIAN SYLLABICS OY..CANADIAN SYLLABICS CARRIER DENTAL S
-18F6-18FF,UNASSIGNED,<RESERVED>..<RESERVED>
-1900-191C,PVALID,LIMBU VOWEL-CARRIER LETTER..LIMBU LETTER HA
-191D-191F,UNASSIGNED,<RESERVED>..<RESERVED>
-1920-192B,PVALID,LIMBU VOWEL SIGN A..LIMBU SUBJOINED LETTER WA
-192C-192F,UNASSIGNED,<RESERVED>..<RESERVED>
-1930-193B,PVALID,LIMBU SMALL LETTER KA..LIMBU SIGN SA-I
-193C-193F,UNASSIGNED,<RESERVED>..<RESERVED>
-1940,DISALLOWED,LIMBU SIGN LOO
-1941-1943,UNASSIGNED,<RESERVED>..<RESERVED>
-1944-1945,DISALLOWED,LIMBU EXCLAMATION MARK..LIMBU QUESTION MARK
-1946-196D,PVALID,LIMBU DIGIT ZERO..TAI LE LETTER AI
-196E-196F,UNASSIGNED,<RESERVED>..<RESERVED>
-1970-1974,PVALID,TAI LE LETTER TONE-2..TAI LE LETTER TONE-6
-1975-197F,UNASSIGNED,<RESERVED>..<RESERVED>
-1980-19AB,PVALID,NEW TAI LUE LETTER HIGH QA..NEW TAI LUE LETTER LOW SUA
-19AC-19AF,UNASSIGNED,<RESERVED>..<RESERVED>
-19B0-19C9,PVALID,NEW TAI LUE VOWEL SIGN VOWEL SHORTENER..NEW TAI LUE TONE MARK-2
-19CA-19CF,UNASSIGNED,<RESERVED>..<RESERVED>
-19D0-19DA,PVALID,NEW TAI LUE DIGIT ZERO..NEW TAI LUE THAM DIGIT ONE
-19DB-19DD,UNASSIGNED,<RESERVED>..<RESERVED>
-19DE-19FF,DISALLOWED,NEW TAI LUE SIGN LAE..KHMER SYMBOL DAP-PRAM ROC
-1A00-1A1B,PVALID,BUGINESE LETTER KA..BUGINESE VOWEL SIGN AE
-1A1C-1A1D,UNASSIGNED,<RESERVED>..<RESERVED>
-1A1E-1A1F,DISALLOWED,BUGINESE PALLAWA..BUGINESE END OF SECTION
-1A20-1A5E,PVALID,TAI THAM LETTER HIGH KA..TAI THAM CONSONANT SIGN SA
-1A5F,UNASSIGNED,<RESERVED>
-1A60-1A7C,PVALID,TAI THAM SIGN SAKOT..TAI THAM SIGN KHUEN-LUE KARAN
-1A7D-1A7E,UNASSIGNED,<RESERVED>..<RESERVED>
-1A7F-1A89,PVALID,TAI THAM COMBINING CRYPTOGRAMMIC DOT..TAI THAM HORA DIGIT NINE
-1A8A-1A8F,UNASSIGNED,<RESERVED>..<RESERVED>
-1A90-1A99,PVALID,TAI THAM THAM DIGIT ZERO..TAI THAM THAM DIGIT NINE
-1A9A-1A9F,UNASSIGNED,<RESERVED>..<RESERVED>
-1AA0-1AA6,DISALLOWED,TAI THAM SIGN WIANG..TAI THAM SIGN REVERSED ROTATED RANA
-1AA7,PVALID,TAI THAM SIGN MAI YAMOK
-1AA8-1AAD,DISALLOWED,TAI THAM SIGN KAAN..TAI THAM SIGN CAANG
-1AAE-1AFF,UNASSIGNED,<RESERVED>..<RESERVED>
-1B00-1B4B,PVALID,BALINESE SIGN ULU RICEM..BALINESE LETTER ASYURA SASAK
-1B4C-1B4F,UNASSIGNED,<RESERVED>..<RESERVED>
-1B50-1B59,PVALID,BALINESE DIGIT ZERO..BALINESE DIGIT NINE
-1B5A-1B6A,DISALLOWED,BALINESE PANTI..BALINESE MUSICAL SYMBOL DANG GEDE
-1B6B-1B73,PVALID,BALINESE MUSICAL SYMBOL COMBINING TEGEH..BALINESE MUSICAL SYMBOL COMBINING GONG
-1B74-1B7C,DISALLOWED,BALINESE MUSICAL SYMBOL RIGHT-HAND OPEN DUG..BALINESE MUSICAL SYMBOL LEFT-HAND OPEN PING
-1B7D-1B7F,UNASSIGNED,<RESERVED>..<RESERVED>
-1B80-1BAA,PVALID,SUNDANESE SIGN PANYECEK..SUNDANESE SIGN PAMAAEH
-1BAB-1BAD,UNASSIGNED,<RESERVED>..<RESERVED>
-1BAE-1BB9,PVALID,SUNDANESE LETTER KHA..SUNDANESE DIGIT NINE
-1BBA-1BFF,UNASSIGNED,<RESERVED>..<RESERVED>
-1C00-1C37,PVALID,LEPCHA LETTER KA..LEPCHA SIGN NUKTA
-1C38-1C3A,UNASSIGNED,<RESERVED>..<RESERVED>
-1C3B-1C3F,DISALLOWED,LEPCHA PUNCTUATION TA-ROL..LEPCHA PUNCTUATION TSHOOK
-1C40-1C49,PVALID,LEPCHA DIGIT ZERO..LEPCHA DIGIT NINE
-1C4A-1C4C,UNASSIGNED,<RESERVED>..<RESERVED>
-1C4D-1C7D,PVALID,LEPCHA LETTER TTA..OL CHIKI AHAD
-1C7E-1C7F,DISALLOWED,OL CHIKI PUNCTUATION MUCAAD..OL CHIKI PUNCTUATION DOUBLE MUCAAD
-1C80-1CCF,UNASSIGNED,<RESERVED>..<RESERVED>
-1CD0-1CD2,PVALID,VEDIC TONE KARSHANA..VEDIC TONE PRENKHA
-1CD3,DISALLOWED,VEDIC SIGN NIHSHVASA
-1CD4-1CF2,PVALID,VEDIC SIGN YAJURVEDIC MIDLINE SVARITA..VEDIC SIGN ARDHAVISARGA
-1CF3-1CFF,UNASSIGNED,<RESERVED>..<RESERVED>
-1D00-1D2B,PVALID,LATIN LETTER SMALL CAPITAL A..CYRILLIC LETTER SMALL CAPITAL EL
-1D2C-1D2E,DISALLOWED,MODIFIER LETTER CAPITAL A..MODIFIER LETTER CAPITAL B
-1D2F,PVALID,MODIFIER LETTER CAPITAL BARRED B
-1D30-1D3A,DISALLOWED,MODIFIER LETTER CAPITAL D..MODIFIER LETTER CAPITAL N
-1D3B,PVALID,MODIFIER LETTER CAPITAL REVERSED N
-1D3C-1D4D,DISALLOWED,MODIFIER LETTER CAPITAL O..MODIFIER LETTER SMALL G
-1D4E,PVALID,MODIFIER LETTER SMALL TURNED I
-1D4F-1D6A,DISALLOWED,MODIFIER LETTER SMALL K..GREEK SUBSCRIPT SMALL LETTER CHI
-1D6B-1D77,PVALID,LATIN SMALL LETTER UE..LATIN SMALL LETTER TURNED G
-1D78,DISALLOWED,MODIFIER LETTER CYRILLIC EN
-1D79-1D9A,PVALID,LATIN SMALL LETTER INSULAR G..LATIN SMALL LETTER EZH WITH RETROFLEX HOOK
-1D9B-1DBF,DISALLOWED,MODIFIER LETTER SMALL TURNED ALPHA..MODIFIER LETTER SMALL THETA
-1DC0-1DE6,PVALID,COMBINING DOTTED GRAVE ACCENT..COMBINING LATIN SMALL LETTER Z
-1DE7-1DFC,UNASSIGNED,<RESERVED>..<RESERVED>
-1DFD-1DFF,PVALID,COMBINING ALMOST EQUAL TO BELOW..COMBINING RIGHT ARROWHEAD AND DOWN ARROWHEAD BELOW
-1E00,DISALLOWED,LATIN CAPITAL LETTER A WITH RING BELOW
-1E01,PVALID,LATIN SMALL LETTER A WITH RING BELOW
-1E02,DISALLOWED,LATIN CAPITAL LETTER B WITH DOT ABOVE
-1E03,PVALID,LATIN SMALL LETTER B WITH DOT ABOVE
-1E04,DISALLOWED,LATIN CAPITAL LETTER B WITH DOT BELOW
-1E05,PVALID,LATIN SMALL LETTER B WITH DOT BELOW
-1E06,DISALLOWED,LATIN CAPITAL LETTER B WITH LINE BELOW
-1E07,PVALID,LATIN SMALL LETTER B WITH LINE BELOW
-1E08,DISALLOWED,LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
-1E09,PVALID,LATIN SMALL LETTER C WITH CEDILLA AND ACUTE
-1E0A,DISALLOWED,LATIN CAPITAL LETTER D WITH DOT ABOVE
-1E0B,PVALID,LATIN SMALL LETTER D WITH DOT ABOVE
-1E0C,DISALLOWED,LATIN CAPITAL LETTER D WITH DOT BELOW
-1E0D,PVALID,LATIN SMALL LETTER D WITH DOT BELOW
-1E0E,DISALLOWED,LATIN CAPITAL LETTER D WITH LINE BELOW
-1E0F,PVALID,LATIN SMALL LETTER D WITH LINE BELOW
-1E10,DISALLOWED,LATIN CAPITAL LETTER D WITH CEDILLA
-1E11,PVALID,LATIN SMALL LETTER D WITH CEDILLA
-1E12,DISALLOWED,LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
-1E13,PVALID,LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW
-1E14,DISALLOWED,LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
-1E15,PVALID,LATIN SMALL LETTER E WITH MACRON AND GRAVE
-1E16,DISALLOWED,LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
-1E17,PVALID,LATIN SMALL LETTER E WITH MACRON AND ACUTE
-1E18,DISALLOWED,LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
-1E19,PVALID,LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW
-1E1A,DISALLOWED,LATIN CAPITAL LETTER E WITH TILDE BELOW
-1E1B,PVALID,LATIN SMALL LETTER E WITH TILDE BELOW
-1E1C,DISALLOWED,LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
-1E1D,PVALID,LATIN SMALL LETTER E WITH CEDILLA AND BREVE
-1E1E,DISALLOWED,LATIN CAPITAL LETTER F WITH DOT ABOVE
-1E1F,PVALID,LATIN SMALL LETTER F WITH DOT ABOVE
-1E20,DISALLOWED,LATIN CAPITAL LETTER G WITH MACRON
-1E21,PVALID,LATIN SMALL LETTER G WITH MACRON
-1E22,DISALLOWED,LATIN CAPITAL LETTER H WITH DOT ABOVE
-1E23,PVALID,LATIN SMALL LETTER H WITH DOT ABOVE
-1E24,DISALLOWED,LATIN CAPITAL LETTER H WITH DOT BELOW
-1E25,PVALID,LATIN SMALL LETTER H WITH DOT BELOW
-1E26,DISALLOWED,LATIN CAPITAL LETTER H WITH DIAERESIS
-1E27,PVALID,LATIN SMALL LETTER H WITH DIAERESIS
-1E28,DISALLOWED,LATIN CAPITAL LETTER H WITH CEDILLA
-1E29,PVALID,LATIN SMALL LETTER H WITH CEDILLA
-1E2A,DISALLOWED,LATIN CAPITAL LETTER H WITH BREVE BELOW
-1E2B,PVALID,LATIN SMALL LETTER H WITH BREVE BELOW
-1E2C,DISALLOWED,LATIN CAPITAL LETTER I WITH TILDE BELOW
-1E2D,PVALID,LATIN SMALL LETTER I WITH TILDE BELOW
-1E2E,DISALLOWED,LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
-1E2F,PVALID,LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE
-1E30,DISALLOWED,LATIN CAPITAL LETTER K WITH ACUTE
-1E31,PVALID,LATIN SMALL LETTER K WITH ACUTE
-1E32,DISALLOWED,LATIN CAPITAL LETTER K WITH DOT BELOW
-1E33,PVALID,LATIN SMALL LETTER K WITH DOT BELOW
-1E34,DISALLOWED,LATIN CAPITAL LETTER K WITH LINE BELOW
-1E35,PVALID,LATIN SMALL LETTER K WITH LINE BELOW
-1E36,DISALLOWED,LATIN CAPITAL LETTER L WITH DOT BELOW
-1E37,PVALID,LATIN SMALL LETTER L WITH DOT BELOW
-1E38,DISALLOWED,LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
-1E39,PVALID,LATIN SMALL LETTER L WITH DOT BELOW AND MACRON
-1E3A,DISALLOWED,LATIN CAPITAL LETTER L WITH LINE BELOW
-1E3B,PVALID,LATIN SMALL LETTER L WITH LINE BELOW
-1E3C,DISALLOWED,LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
-1E3D,PVALID,LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW
-1E3E,DISALLOWED,LATIN CAPITAL LETTER M WITH ACUTE
-1E3F,PVALID,LATIN SMALL LETTER M WITH ACUTE
-1E40,DISALLOWED,LATIN CAPITAL LETTER M WITH DOT ABOVE
-1E41,PVALID,LATIN SMALL LETTER M WITH DOT ABOVE
-1E42,DISALLOWED,LATIN CAPITAL LETTER M WITH DOT BELOW
-1E43,PVALID,LATIN SMALL LETTER M WITH DOT BELOW
-1E44,DISALLOWED,LATIN CAPITAL LETTER N WITH DOT ABOVE
-1E45,PVALID,LATIN SMALL LETTER N WITH DOT ABOVE
-1E46,DISALLOWED,LATIN CAPITAL LETTER N WITH DOT BELOW
-1E47,PVALID,LATIN SMALL LETTER N WITH DOT BELOW
-1E48,DISALLOWED,LATIN CAPITAL LETTER N WITH LINE BELOW
-1E49,PVALID,LATIN SMALL LETTER N WITH LINE BELOW
-1E4A,DISALLOWED,LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
-1E4B,PVALID,LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW
-1E4C,DISALLOWED,LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
-1E4D,PVALID,LATIN SMALL LETTER O WITH TILDE AND ACUTE
-1E4E,DISALLOWED,LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
-1E4F,PVALID,LATIN SMALL LETTER O WITH TILDE AND DIAERESIS
-1E50,DISALLOWED,LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
-1E51,PVALID,LATIN SMALL LETTER O WITH MACRON AND GRAVE
-1E52,DISALLOWED,LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
-1E53,PVALID,LATIN SMALL LETTER O WITH MACRON AND ACUTE
-1E54,DISALLOWED,LATIN CAPITAL LETTER P WITH ACUTE
-1E55,PVALID,LATIN SMALL LETTER P WITH ACUTE
-1E56,DISALLOWED,LATIN CAPITAL LETTER P WITH DOT ABOVE
-1E57,PVALID,LATIN SMALL LETTER P WITH DOT ABOVE
-1E58,DISALLOWED,LATIN CAPITAL LETTER R WITH DOT ABOVE
-1E59,PVALID,LATIN SMALL LETTER R WITH DOT ABOVE
-1E5A,DISALLOWED,LATIN CAPITAL LETTER R WITH DOT BELOW
-1E5B,PVALID,LATIN SMALL LETTER R WITH DOT BELOW
-1E5C,DISALLOWED,LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
-1E5D,PVALID,LATIN SMALL LETTER R WITH DOT BELOW AND MACRON
-1E5E,DISALLOWED,LATIN CAPITAL LETTER R WITH LINE BELOW
-1E5F,PVALID,LATIN SMALL LETTER R WITH LINE BELOW
-1E60,DISALLOWED,LATIN CAPITAL LETTER S WITH DOT ABOVE
-1E61,PVALID,LATIN SMALL LETTER S WITH DOT ABOVE
-1E62,DISALLOWED,LATIN CAPITAL LETTER S WITH DOT BELOW
-1E63,PVALID,LATIN SMALL LETTER S WITH DOT BELOW
-1E64,DISALLOWED,LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
-1E65,PVALID,LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE
-1E66,DISALLOWED,LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
-1E67,PVALID,LATIN SMALL LETTER S WITH CARON AND DOT ABOVE
-1E68,DISALLOWED,LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
-1E69,PVALID,LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE
-1E6A,DISALLOWED,LATIN CAPITAL LETTER T WITH DOT ABOVE
-1E6B,PVALID,LATIN SMALL LETTER T WITH DOT ABOVE
-1E6C,DISALLOWED,LATIN CAPITAL LETTER T WITH DOT BELOW
-1E6D,PVALID,LATIN SMALL LETTER T WITH DOT BELOW
-1E6E,DISALLOWED,LATIN CAPITAL LETTER T WITH LINE BELOW
-1E6F,PVALID,LATIN SMALL LETTER T WITH LINE BELOW
-1E70,DISALLOWED,LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
-1E71,PVALID,LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW
-1E72,DISALLOWED,LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
-1E73,PVALID,LATIN SMALL LETTER U WITH DIAERESIS BELOW
-1E74,DISALLOWED,LATIN CAPITAL LETTER U WITH TILDE BELOW
-1E75,PVALID,LATIN SMALL LETTER U WITH TILDE BELOW
-1E76,DISALLOWED,LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
-1E77,PVALID,LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW
-1E78,DISALLOWED,LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
-1E79,PVALID,LATIN SMALL LETTER U WITH TILDE AND ACUTE
-1E7A,DISALLOWED,LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
-1E7B,PVALID,LATIN SMALL LETTER U WITH MACRON AND DIAERESIS
-1E7C,DISALLOWED,LATIN CAPITAL LETTER V WITH TILDE
-1E7D,PVALID,LATIN SMALL LETTER V WITH TILDE
-1E7E,DISALLOWED,LATIN CAPITAL LETTER V WITH DOT BELOW
-1E7F,PVALID,LATIN SMALL LETTER V WITH DOT BELOW
-1E80,DISALLOWED,LATIN CAPITAL LETTER W WITH GRAVE
-1E81,PVALID,LATIN SMALL LETTER W WITH GRAVE
-1E82,DISALLOWED,LATIN CAPITAL LETTER W WITH ACUTE
-1E83,PVALID,LATIN SMALL LETTER W WITH ACUTE
-1E84,DISALLOWED,LATIN CAPITAL LETTER W WITH DIAERESIS
-1E85,PVALID,LATIN SMALL LETTER W WITH DIAERESIS
-1E86,DISALLOWED,LATIN CAPITAL LETTER W WITH DOT ABOVE
-1E87,PVALID,LATIN SMALL LETTER W WITH DOT ABOVE
-1E88,DISALLOWED,LATIN CAPITAL LETTER W WITH DOT BELOW
-1E89,PVALID,LATIN SMALL LETTER W WITH DOT BELOW
-1E8A,DISALLOWED,LATIN CAPITAL LETTER X WITH DOT ABOVE
-1E8B,PVALID,LATIN SMALL LETTER X WITH DOT ABOVE
-1E8C,DISALLOWED,LATIN CAPITAL LETTER X WITH DIAERESIS
-1E8D,PVALID,LATIN SMALL LETTER X WITH DIAERESIS
-1E8E,DISALLOWED,LATIN CAPITAL LETTER Y WITH DOT ABOVE
-1E8F,PVALID,LATIN SMALL LETTER Y WITH DOT ABOVE
-1E90,DISALLOWED,LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
-1E91,PVALID,LATIN SMALL LETTER Z WITH CIRCUMFLEX
-1E92,DISALLOWED,LATIN CAPITAL LETTER Z WITH DOT BELOW
-1E93,PVALID,LATIN SMALL LETTER Z WITH DOT BELOW
-1E94,DISALLOWED,LATIN CAPITAL LETTER Z WITH LINE BELOW
-1E95-1E99,PVALID,LATIN SMALL LETTER Z WITH LINE BELOW..LATIN SMALL LETTER Y WITH RING ABOVE
-1E9A-1E9B,DISALLOWED,LATIN SMALL LETTER A WITH RIGHT HALF RING..LATIN SMALL LETTER LONG S WITH DOT ABOVE
-1E9C-1E9D,PVALID,LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE..LATIN SMALL LETTER LONG S WITH HIGH STROKE
-1E9E,DISALLOWED,LATIN CAPITAL LETTER SHARP S
-1E9F,PVALID,LATIN SMALL LETTER DELTA
-1EA0,DISALLOWED,LATIN CAPITAL LETTER A WITH DOT BELOW
-1EA1,PVALID,LATIN SMALL LETTER A WITH DOT BELOW
-1EA2,DISALLOWED,LATIN CAPITAL LETTER A WITH HOOK ABOVE
-1EA3,PVALID,LATIN SMALL LETTER A WITH HOOK ABOVE
-1EA4,DISALLOWED,LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
-1EA5,PVALID,LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE
-1EA6,DISALLOWED,LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
-1EA7,PVALID,LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE
-1EA8,DISALLOWED,LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
-1EA9,PVALID,LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
-1EAA,DISALLOWED,LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
-1EAB,PVALID,LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE
-1EAC,DISALLOWED,LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
-1EAD,PVALID,LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW
-1EAE,DISALLOWED,LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
-1EAF,PVALID,LATIN SMALL LETTER A WITH BREVE AND ACUTE
-1EB0,DISALLOWED,LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
-1EB1,PVALID,LATIN SMALL LETTER A WITH BREVE AND GRAVE
-1EB2,DISALLOWED,LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
-1EB3,PVALID,LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE
-1EB4,DISALLOWED,LATIN CAPITAL LETTER A WITH BREVE AND TILDE
-1EB5,PVALID,LATIN SMALL LETTER A WITH BREVE AND TILDE
-1EB6,DISALLOWED,LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
-1EB7,PVALID,LATIN SMALL LETTER A WITH BREVE AND DOT BELOW
-1EB8,DISALLOWED,LATIN CAPITAL LETTER E WITH DOT BELOW
-1EB9,PVALID,LATIN SMALL LETTER E WITH DOT BELOW
-1EBA,DISALLOWED,LATIN CAPITAL LETTER E WITH HOOK ABOVE
-1EBB,PVALID,LATIN SMALL LETTER E WITH HOOK ABOVE
-1EBC,DISALLOWED,LATIN CAPITAL LETTER E WITH TILDE
-1EBD,PVALID,LATIN SMALL LETTER E WITH TILDE
-1EBE,DISALLOWED,LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
-1EBF,PVALID,LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE
-1EC0,DISALLOWED,LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
-1EC1,PVALID,LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE
-1EC2,DISALLOWED,LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
-1EC3,PVALID,LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
-1EC4,DISALLOWED,LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
-1EC5,PVALID,LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE
-1EC6,DISALLOWED,LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
-1EC7,PVALID,LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW
-1EC8,DISALLOWED,LATIN CAPITAL LETTER I WITH HOOK ABOVE
-1EC9,PVALID,LATIN SMALL LETTER I WITH HOOK ABOVE
-1ECA,DISALLOWED,LATIN CAPITAL LETTER I WITH DOT BELOW
-1ECB,PVALID,LATIN SMALL LETTER I WITH DOT BELOW
-1ECC,DISALLOWED,LATIN CAPITAL LETTER O WITH DOT BELOW
-1ECD,PVALID,LATIN SMALL LETTER O WITH DOT BELOW
-1ECE,DISALLOWED,LATIN CAPITAL LETTER O WITH HOOK ABOVE
-1ECF,PVALID,LATIN SMALL LETTER O WITH HOOK ABOVE
-1ED0,DISALLOWED,LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
-1ED1,PVALID,LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE
-1ED2,DISALLOWED,LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
-1ED3,PVALID,LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE
-1ED4,DISALLOWED,LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
-1ED5,PVALID,LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
-1ED6,DISALLOWED,LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
-1ED7,PVALID,LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE
-1ED8,DISALLOWED,LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
-1ED9,PVALID,LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW
-1EDA,DISALLOWED,LATIN CAPITAL LETTER O WITH HORN AND ACUTE
-1EDB,PVALID,LATIN SMALL LETTER O WITH HORN AND ACUTE
-1EDC,DISALLOWED,LATIN CAPITAL LETTER O WITH HORN AND GRAVE
-1EDD,PVALID,LATIN SMALL LETTER O WITH HORN AND GRAVE
-1EDE,DISALLOWED,LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
-1EDF,PVALID,LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE
-1EE0,DISALLOWED,LATIN CAPITAL LETTER O WITH HORN AND TILDE
-1EE1,PVALID,LATIN SMALL LETTER O WITH HORN AND TILDE
-1EE2,DISALLOWED,LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
-1EE3,PVALID,LATIN SMALL LETTER O WITH HORN AND DOT BELOW
-1EE4,DISALLOWED,LATIN CAPITAL LETTER U WITH DOT BELOW
-1EE5,PVALID,LATIN SMALL LETTER U WITH DOT BELOW
-1EE6,DISALLOWED,LATIN CAPITAL LETTER U WITH HOOK ABOVE
-1EE7,PVALID,LATIN SMALL LETTER U WITH HOOK ABOVE
-1EE8,DISALLOWED,LATIN CAPITAL LETTER U WITH HORN AND ACUTE
-1EE9,PVALID,LATIN SMALL LETTER U WITH HORN AND ACUTE
-1EEA,DISALLOWED,LATIN CAPITAL LETTER U WITH HORN AND GRAVE
-1EEB,PVALID,LATIN SMALL LETTER U WITH HORN AND GRAVE
-1EEC,DISALLOWED,LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
-1EED,PVALID,LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE
-1EEE,DISALLOWED,LATIN CAPITAL LETTER U WITH HORN AND TILDE
-1EEF,PVALID,LATIN SMALL LETTER U WITH HORN AND TILDE
-1EF0,DISALLOWED,LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
-1EF1,PVALID,LATIN SMALL LETTER U WITH HORN AND DOT BELOW
-1EF2,DISALLOWED,LATIN CAPITAL LETTER Y WITH GRAVE
-1EF3,PVALID,LATIN SMALL LETTER Y WITH GRAVE
-1EF4,DISALLOWED,LATIN CAPITAL LETTER Y WITH DOT BELOW
-1EF5,PVALID,LATIN SMALL LETTER Y WITH DOT BELOW
-1EF6,DISALLOWED,LATIN CAPITAL LETTER Y WITH HOOK ABOVE
-1EF7,PVALID,LATIN SMALL LETTER Y WITH HOOK ABOVE
-1EF8,DISALLOWED,LATIN CAPITAL LETTER Y WITH TILDE
-1EF9,PVALID,LATIN SMALL LETTER Y WITH TILDE
-1EFA,DISALLOWED,LATIN CAPITAL LETTER MIDDLE-WELSH LL
-1EFB,PVALID,LATIN SMALL LETTER MIDDLE-WELSH LL
-1EFC,DISALLOWED,LATIN CAPITAL LETTER MIDDLE-WELSH V
-1EFD,PVALID,LATIN SMALL LETTER MIDDLE-WELSH V
-1EFE,DISALLOWED,LATIN CAPITAL LETTER Y WITH LOOP
-1EFF-1F07,PVALID,LATIN SMALL LETTER Y WITH LOOP..GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI
-1F08-1F0F,DISALLOWED,GREEK CAPITAL LETTER ALPHA WITH PSILI..GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
-1F10-1F15,PVALID,GREEK SMALL LETTER EPSILON WITH PSILI..GREEK SMALL LETTER EPSILON WITH DASIA AND OXIA
-1F16-1F17,UNASSIGNED,<RESERVED>..<RESERVED>
-1F18-1F1D,DISALLOWED,GREEK CAPITAL LETTER EPSILON WITH PSILI..GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
-1F1E-1F1F,UNASSIGNED,<RESERVED>..<RESERVED>
-1F20-1F27,PVALID,GREEK SMALL LETTER ETA WITH PSILI..GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI
-1F28-1F2F,DISALLOWED,GREEK CAPITAL LETTER ETA WITH PSILI..GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
-1F30-1F37,PVALID,GREEK SMALL LETTER IOTA WITH PSILI..GREEK SMALL LETTER IOTA WITH DASIA AND PERISPOMENI
-1F38-1F3F,DISALLOWED,GREEK CAPITAL LETTER IOTA WITH PSILI..GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
-1F40-1F45,PVALID,GREEK SMALL LETTER OMICRON WITH PSILI..GREEK SMALL LETTER OMICRON WITH DASIA AND OXIA
-1F46-1F47,UNASSIGNED,<RESERVED>..<RESERVED>
-1F48-1F4D,DISALLOWED,GREEK CAPITAL LETTER OMICRON WITH PSILI..GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
-1F4E-1F4F,UNASSIGNED,<RESERVED>..<RESERVED>
-1F50-1F57,PVALID,GREEK SMALL LETTER UPSILON WITH PSILI..GREEK SMALL LETTER UPSILON WITH DASIA AND PERISPOMENI
-1F58,UNASSIGNED,<RESERVED>
-1F59,DISALLOWED,GREEK CAPITAL LETTER UPSILON WITH DASIA
-1F5A,UNASSIGNED,<RESERVED>
-1F5B,DISALLOWED,GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
-1F5C,UNASSIGNED,<RESERVED>
-1F5D,DISALLOWED,GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
-1F5E,UNASSIGNED,<RESERVED>
-1F5F,DISALLOWED,GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
-1F60-1F67,PVALID,GREEK SMALL LETTER OMEGA WITH PSILI..GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI
-1F68-1F6F,DISALLOWED,GREEK CAPITAL LETTER OMEGA WITH PSILI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
-1F70,PVALID,GREEK SMALL LETTER ALPHA WITH VARIA
-1F71,DISALLOWED,GREEK SMALL LETTER ALPHA WITH OXIA
-1F72,PVALID,GREEK SMALL LETTER EPSILON WITH VARIA
-1F73,DISALLOWED,GREEK SMALL LETTER EPSILON WITH OXIA
-1F74,PVALID,GREEK SMALL LETTER ETA WITH VARIA
-1F75,DISALLOWED,GREEK SMALL LETTER ETA WITH OXIA
-1F76,PVALID,GREEK SMALL LETTER IOTA WITH VARIA
-1F77,DISALLOWED,GREEK SMALL LETTER IOTA WITH OXIA
-1F78,PVALID,GREEK SMALL LETTER OMICRON WITH VARIA
-1F79,DISALLOWED,GREEK SMALL LETTER OMICRON WITH OXIA
-1F7A,PVALID,GREEK SMALL LETTER UPSILON WITH VARIA
-1F7B,DISALLOWED,GREEK SMALL LETTER UPSILON WITH OXIA
-1F7C,PVALID,GREEK SMALL LETTER OMEGA WITH VARIA
-1F7D,DISALLOWED,GREEK SMALL LETTER OMEGA WITH OXIA
-1F7E-1F7F,UNASSIGNED,<RESERVED>..<RESERVED>
-1F80-1FAF,DISALLOWED,GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI..GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
-1FB0-1FB1,PVALID,GREEK SMALL LETTER ALPHA WITH VRACHY..GREEK SMALL LETTER ALPHA WITH MACRON
-1FB2-1FB4,DISALLOWED,GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
-1FB5,UNASSIGNED,<RESERVED>
-1FB6,PVALID,GREEK SMALL LETTER ALPHA WITH PERISPOMENI
-1FB7-1FC4,DISALLOWED,GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI..GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
-1FC5,UNASSIGNED,<RESERVED>
-1FC6,PVALID,GREEK SMALL LETTER ETA WITH PERISPOMENI
-1FC7-1FCF,DISALLOWED,GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI..GREEK PSILI AND PERISPOMENI
-1FD0-1FD2,PVALID,GREEK SMALL LETTER IOTA WITH VRACHY..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
-1FD3,DISALLOWED,GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
-1FD4-1FD5,UNASSIGNED,<RESERVED>..<RESERVED>
-1FD6-1FD7,PVALID,GREEK SMALL LETTER IOTA WITH PERISPOMENI..GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
-1FD8-1FDB,DISALLOWED,GREEK CAPITAL LETTER IOTA WITH VRACHY..GREEK CAPITAL LETTER IOTA WITH OXIA
-1FDC,UNASSIGNED,<RESERVED>
-1FDD-1FDF,DISALLOWED,GREEK DASIA AND VARIA..GREEK DASIA AND PERISPOMENI
-1FE0-1FE2,PVALID,GREEK SMALL LETTER UPSILON WITH VRACHY..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
-1FE3,DISALLOWED,GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
-1FE4-1FE7,PVALID,GREEK SMALL LETTER RHO WITH PSILI..GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
-1FE8-1FEF,DISALLOWED,GREEK CAPITAL LETTER UPSILON WITH VRACHY..GREEK VARIA
-1FF0-1FF1,UNASSIGNED,<RESERVED>..<RESERVED>
-1FF2-1FF4,DISALLOWED,GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI..GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
-1FF5,UNASSIGNED,<RESERVED>
-1FF6,PVALID,GREEK SMALL LETTER OMEGA WITH PERISPOMENI
-1FF7-1FFE,DISALLOWED,GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI..GREEK DASIA
-1FFF,UNASSIGNED,<RESERVED>
-2000-200B,DISALLOWED,EN QUAD..ZERO WIDTH SPACE
-200C-200D,CONTEXTJ,ZERO WIDTH NON-JOINER..ZERO WIDTH JOINER
-200E-2064,DISALLOWED,LEFT-TO-RIGHT MARK..INVISIBLE PLUS
-2065-2069,UNASSIGNED,<RESERVED>..<RESERVED>
-206A-2071,DISALLOWED,INHIBIT SYMMETRIC SWAPPING..SUPERSCRIPT LATIN SMALL LETTER I
-2072-2073,UNASSIGNED,<RESERVED>..<RESERVED>
-2074-208E,DISALLOWED,SUPERSCRIPT FOUR..SUBSCRIPT RIGHT PARENTHESIS
-208F,UNASSIGNED,<RESERVED>
-2090-2094,DISALLOWED,LATIN SUBSCRIPT SMALL LETTER A..LATIN SUBSCRIPT SMALL LETTER SCHWA
-2095-209F,UNASSIGNED,<RESERVED>..<RESERVED>
-20A0-20B8,DISALLOWED,EURO-CURRENCY SIGN..TENGE SIGN
-20B9-20CF,UNASSIGNED,<RESERVED>..<RESERVED>
-20D0-20F0,DISALLOWED,COMBINING LEFT HARPOON ABOVE..COMBINING ASTERISK ABOVE
-20F1-20FF,UNASSIGNED,<RESERVED>..<RESERVED>
-2100-214D,DISALLOWED,ACCOUNT OF..AKTIESELSKAB
-214E,PVALID,TURNED SMALL F
-214F-2183,DISALLOWED,SYMBOL FOR SAMARITAN SOURCE..ROMAN NUMERAL REVERSED ONE HUNDRED
-2184,PVALID,LATIN SMALL LETTER REVERSED C
-2185-2189,DISALLOWED,ROMAN NUMERAL SIX LATE FORM..VULGAR FRACTION ZERO THIRDS
-218A-218F,UNASSIGNED,<RESERVED>..<RESERVED>
-2190-23E8,DISALLOWED,LEFTWARDS ARROW..DECIMAL EXPONENT SYMBOL
-23E9-23FF,UNASSIGNED,<RESERVED>..<RESERVED>
-2400-2426,DISALLOWED,SYMBOL FOR NULL..SYMBOL FOR SUBSTITUTE FORM TWO
-2427-243F,UNASSIGNED,<RESERVED>..<RESERVED>
-2440-244A,DISALLOWED,OCR HOOK..OCR DOUBLE BACKSLASH
-244B-245F,UNASSIGNED,<RESERVED>..<RESERVED>
-2460-26CD,DISALLOWED,CIRCLED DIGIT ONE..DISABLED CAR
-26CE,UNASSIGNED,<RESERVED>
-26CF-26E1,DISALLOWED,PICK..RESTRICTED LEFT ENTRY-2
-26E2,UNASSIGNED,<RESERVED>
-26E3,DISALLOWED,HEAVY CIRCLE WITH STROKE AND TWO DOTS ABOVE
-26E4-26E7,UNASSIGNED,<RESERVED>..<RESERVED>
-26E8-26FF,DISALLOWED,BLACK CROSS ON SHIELD..WHITE FLAG WITH HORIZONTAL MIDDLE BLACK STRIPE
-2700,UNASSIGNED,<RESERVED>
-2701-2704,DISALLOWED,UPPER BLADE SCISSORS..WHITE SCISSORS
-2705,UNASSIGNED,<RESERVED>
-2706-2709,DISALLOWED,TELEPHONE LOCATION SIGN..ENVELOPE
-270A-270B,UNASSIGNED,<RESERVED>..<RESERVED>
-270C-2727,DISALLOWED,VICTORY HAND..WHITE FOUR POINTED STAR
-2728,UNASSIGNED,<RESERVED>
-2729-274B,DISALLOWED,STRESS OUTLINED WHITE STAR..HEAVY EIGHT TEARDROP-SPOKED PROPELLER ASTERISK
-274C,UNASSIGNED,<RESERVED>
-274D,DISALLOWED,SHADOWED WHITE CIRCLE
-274E,UNASSIGNED,<RESERVED>
-274F-2752,DISALLOWED,LOWER RIGHT DROP-SHADOWED WHITE SQUARE..UPPER RIGHT SHADOWED WHITE SQUARE
-2753-2755,UNASSIGNED,<RESERVED>..<RESERVED>
-2756-275E,DISALLOWED,BLACK DIAMOND MINUS WHITE X..HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT
-275F-2760,UNASSIGNED,<RESERVED>..<RESERVED>
-2761-2794,DISALLOWED,CURVED STEM PARAGRAPH SIGN ORNAMENT..HEAVY WIDE-HEADED RIGHTWARDS ARROW
-2795-2797,UNASSIGNED,<RESERVED>..<RESERVED>
-2798-27AF,DISALLOWED,HEAVY SOUTH EAST ARROW..NOTCHED LOWER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW
-27B0,UNASSIGNED,<RESERVED>
-27B1-27BE,DISALLOWED,NOTCHED UPPER RIGHT-SHADOWED WHITE RIGHTWARDS ARROW..OPEN-OUTLINED RIGHTWARDS ARROW
-27BF,UNASSIGNED,<RESERVED>
-27C0-27CA,DISALLOWED,THREE DIMENSIONAL ANGLE..VERTICAL BAR WITH HORIZONTAL STROKE
-27CB,UNASSIGNED,<RESERVED>
-27CC,DISALLOWED,LONG DIVISION
-27CD-27CF,UNASSIGNED,<RESERVED>..<RESERVED>
-27D0-2B4C,DISALLOWED,WHITE DIAMOND WITH CENTRED DOT..RIGHTWARDS ARROW ABOVE REVERSE TILDE OPERATOR
-2B4D-2B4F,UNASSIGNED,<RESERVED>..<RESERVED>
-2B50-2B59,DISALLOWED,WHITE MEDIUM STAR..HEAVY CIRCLED SALTIRE
-2B5A-2BFF,UNASSIGNED,<RESERVED>..<RESERVED>
-2C00-2C2E,DISALLOWED,GLAGOLITIC CAPITAL LETTER AZU..GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE
-2C2F,UNASSIGNED,<RESERVED>
-2C30-2C5E,PVALID,GLAGOLITIC SMALL LETTER AZU..GLAGOLITIC SMALL LETTER LATINATE MYSLITE
-2C5F,UNASSIGNED,<RESERVED>
-2C60,DISALLOWED,LATIN CAPITAL LETTER L WITH DOUBLE BAR
-2C61,PVALID,LATIN SMALL LETTER L WITH DOUBLE BAR
-2C62-2C64,DISALLOWED,LATIN CAPITAL LETTER L WITH MIDDLE TILDE..LATIN CAPITAL LETTER R WITH TAIL
-2C65-2C66,PVALID,LATIN SMALL LETTER A WITH STROKE..LATIN SMALL LETTER T WITH DIAGONAL STROKE
-2C67,DISALLOWED,LATIN CAPITAL LETTER H WITH DESCENDER
-2C68,PVALID,LATIN SMALL LETTER H WITH DESCENDER
-2C69,DISALLOWED,LATIN CAPITAL LETTER K WITH DESCENDER
-2C6A,PVALID,LATIN SMALL LETTER K WITH DESCENDER
-2C6B,DISALLOWED,LATIN CAPITAL LETTER Z WITH DESCENDER
-2C6C,PVALID,LATIN SMALL LETTER Z WITH DESCENDER
-2C6D-2C70,DISALLOWED,LATIN CAPITAL LETTER ALPHA..LATIN CAPITAL LETTER TURNED ALPHA
-2C71,PVALID,LATIN SMALL LETTER V WITH RIGHT HOOK
-2C72,DISALLOWED,LATIN CAPITAL LETTER W WITH HOOK
-2C73-2C74,PVALID,LATIN SMALL LETTER W WITH HOOK..LATIN SMALL LETTER V WITH CURL
-2C75,DISALLOWED,LATIN CAPITAL LETTER HALF H
-2C76-2C7B,PVALID,LATIN SMALL LETTER HALF H..LATIN LETTER SMALL CAPITAL TURNED E
-2C7C-2C80,DISALLOWED,LATIN SUBSCRIPT SMALL LETTER J..COPTIC CAPITAL LETTER ALFA
-2C81,PVALID,COPTIC SMALL LETTER ALFA
-2C82,DISALLOWED,COPTIC CAPITAL LETTER VIDA
-2C83,PVALID,COPTIC SMALL LETTER VIDA
-2C84,DISALLOWED,COPTIC CAPITAL LETTER GAMMA
-2C85,PVALID,COPTIC SMALL LETTER GAMMA
-2C86,DISALLOWED,COPTIC CAPITAL LETTER DALDA
-2C87,PVALID,COPTIC SMALL LETTER DALDA
-2C88,DISALLOWED,COPTIC CAPITAL LETTER EIE
-2C89,PVALID,COPTIC SMALL LETTER EIE
-2C8A,DISALLOWED,COPTIC CAPITAL LETTER SOU
-2C8B,PVALID,COPTIC SMALL LETTER SOU
-2C8C,DISALLOWED,COPTIC CAPITAL LETTER ZATA
-2C8D,PVALID,COPTIC SMALL LETTER ZATA
-2C8E,DISALLOWED,COPTIC CAPITAL LETTER HATE
-2C8F,PVALID,COPTIC SMALL LETTER HATE
-2C90,DISALLOWED,COPTIC CAPITAL LETTER THETHE
-2C91,PVALID,COPTIC SMALL LETTER THETHE
-2C92,DISALLOWED,COPTIC CAPITAL LETTER IAUDA
-2C93,PVALID,COPTIC SMALL LETTER IAUDA
-2C94,DISALLOWED,COPTIC CAPITAL LETTER KAPA
-2C95,PVALID,COPTIC SMALL LETTER KAPA
-2C96,DISALLOWED,COPTIC CAPITAL LETTER LAULA
-2C97,PVALID,COPTIC SMALL LETTER LAULA
-2C98,DISALLOWED,COPTIC CAPITAL LETTER MI
-2C99,PVALID,COPTIC SMALL LETTER MI
-2C9A,DISALLOWED,COPTIC CAPITAL LETTER NI
-2C9B,PVALID,COPTIC SMALL LETTER NI
-2C9C,DISALLOWED,COPTIC CAPITAL LETTER KSI
-2C9D,PVALID,COPTIC SMALL LETTER KSI
-2C9E,DISALLOWED,COPTIC CAPITAL LETTER O
-2C9F,PVALID,COPTIC SMALL LETTER O
-2CA0,DISALLOWED,COPTIC CAPITAL LETTER PI
-2CA1,PVALID,COPTIC SMALL LETTER PI
-2CA2,DISALLOWED,COPTIC CAPITAL LETTER RO
-2CA3,PVALID,COPTIC SMALL LETTER RO
-2CA4,DISALLOWED,COPTIC CAPITAL LETTER SIMA
-2CA5,PVALID,COPTIC SMALL LETTER SIMA
-2CA6,DISALLOWED,COPTIC CAPITAL LETTER TAU
-2CA7,PVALID,COPTIC SMALL LETTER TAU
-2CA8,DISALLOWED,COPTIC CAPITAL LETTER UA
-2CA9,PVALID,COPTIC SMALL LETTER UA
-2CAA,DISALLOWED,COPTIC CAPITAL LETTER FI
-2CAB,PVALID,COPTIC SMALL LETTER FI
-2CAC,DISALLOWED,COPTIC CAPITAL LETTER KHI
-2CAD,PVALID,COPTIC SMALL LETTER KHI
-2CAE,DISALLOWED,COPTIC CAPITAL LETTER PSI
-2CAF,PVALID,COPTIC SMALL LETTER PSI
-2CB0,DISALLOWED,COPTIC CAPITAL LETTER OOU
-2CB1,PVALID,COPTIC SMALL LETTER OOU
-2CB2,DISALLOWED,COPTIC CAPITAL LETTER DIALECT-P ALEF
-2CB3,PVALID,COPTIC SMALL LETTER DIALECT-P ALEF
-2CB4,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC AIN
-2CB5,PVALID,COPTIC SMALL LETTER OLD COPTIC AIN
-2CB6,DISALLOWED,COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE
-2CB7,PVALID,COPTIC SMALL LETTER CRYPTOGRAMMIC EIE
-2CB8,DISALLOWED,COPTIC CAPITAL LETTER DIALECT-P KAPA
-2CB9,PVALID,COPTIC SMALL LETTER DIALECT-P KAPA
-2CBA,DISALLOWED,COPTIC CAPITAL LETTER DIALECT-P NI
-2CBB,PVALID,COPTIC SMALL LETTER DIALECT-P NI
-2CBC,DISALLOWED,COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI
-2CBD,PVALID,COPTIC SMALL LETTER CRYPTOGRAMMIC NI
-2CBE,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC OOU
-2CBF,PVALID,COPTIC SMALL LETTER OLD COPTIC OOU
-2CC0,DISALLOWED,COPTIC CAPITAL LETTER SAMPI
-2CC1,PVALID,COPTIC SMALL LETTER SAMPI
-2CC2,DISALLOWED,COPTIC CAPITAL LETTER CROSSED SHEI
-2CC3,PVALID,COPTIC SMALL LETTER CROSSED SHEI
-2CC4,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC SHEI
-2CC5,PVALID,COPTIC SMALL LETTER OLD COPTIC SHEI
-2CC6,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC ESH
-2CC7,PVALID,COPTIC SMALL LETTER OLD COPTIC ESH
-2CC8,DISALLOWED,COPTIC CAPITAL LETTER AKHMIMIC KHEI
-2CC9,PVALID,COPTIC SMALL LETTER AKHMIMIC KHEI
-2CCA,DISALLOWED,COPTIC CAPITAL LETTER DIALECT-P HORI
-2CCB,PVALID,COPTIC SMALL LETTER DIALECT-P HORI
-2CCC,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC HORI
-2CCD,PVALID,COPTIC SMALL LETTER OLD COPTIC HORI
-2CCE,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC HA
-2CCF,PVALID,COPTIC SMALL LETTER OLD COPTIC HA
-2CD0,DISALLOWED,COPTIC CAPITAL LETTER L-SHAPED HA
-2CD1,PVALID,COPTIC SMALL LETTER L-SHAPED HA
-2CD2,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC HEI
-2CD3,PVALID,COPTIC SMALL LETTER OLD COPTIC HEI
-2CD4,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC HAT
-2CD5,PVALID,COPTIC SMALL LETTER OLD COPTIC HAT
-2CD6,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC GANGIA
-2CD7,PVALID,COPTIC SMALL LETTER OLD COPTIC GANGIA
-2CD8,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC DJA
-2CD9,PVALID,COPTIC SMALL LETTER OLD COPTIC DJA
-2CDA,DISALLOWED,COPTIC CAPITAL LETTER OLD COPTIC SHIMA
-2CDB,PVALID,COPTIC SMALL LETTER OLD COPTIC SHIMA
-2CDC,DISALLOWED,COPTIC CAPITAL LETTER OLD NUBIAN SHIMA
-2CDD,PVALID,COPTIC SMALL LETTER OLD NUBIAN SHIMA
-2CDE,DISALLOWED,COPTIC CAPITAL LETTER OLD NUBIAN NGI
-2CDF,PVALID,COPTIC SMALL LETTER OLD NUBIAN NGI
-2CE0,DISALLOWED,COPTIC CAPITAL LETTER OLD NUBIAN NYI
-2CE1,PVALID,COPTIC SMALL LETTER OLD NUBIAN NYI
-2CE2,DISALLOWED,COPTIC CAPITAL LETTER OLD NUBIAN WAU
-2CE3-2CE4,PVALID,COPTIC SMALL LETTER OLD NUBIAN WAU..COPTIC SYMBOL KAI
-2CE5-2CEB,DISALLOWED,COPTIC SYMBOL MI RO..COPTIC CAPITAL LETTER CRYPTOGRAMMIC SHEI
-2CEC,PVALID,COPTIC SMALL LETTER CRYPTOGRAMMIC SHEI
-2CED,DISALLOWED,COPTIC CAPITAL LETTER CRYPTOGRAMMIC GANGIA
-2CEE-2CF1,PVALID,COPTIC SMALL LETTER CRYPTOGRAMMIC GANGIA..COPTIC COMBINING SPIRITUS LENIS
-2CF2-2CF8,UNASSIGNED,<RESERVED>..<RESERVED>
-2CF9-2CFF,DISALLOWED,COPTIC OLD NUBIAN FULL STOP..COPTIC MORPHOLOGICAL DIVIDER
-2D00-2D25,PVALID,GEORGIAN SMALL LETTER AN..GEORGIAN SMALL LETTER HOE
-2D26-2D2F,UNASSIGNED,<RESERVED>..<RESERVED>
-2D30-2D65,PVALID,TIFINAGH LETTER YA..TIFINAGH LETTER YAZZ
-2D66-2D6E,UNASSIGNED,<RESERVED>..<RESERVED>
-2D6F,DISALLOWED,TIFINAGH MODIFIER LETTER LABIALIZATION MARK
-2D70-2D7F,UNASSIGNED,<RESERVED>..<RESERVED>
-2D80-2D96,PVALID,ETHIOPIC SYLLABLE LOA..ETHIOPIC SYLLABLE GGWE
-2D97-2D9F,UNASSIGNED,<RESERVED>..<RESERVED>
-2DA0-2DA6,PVALID,ETHIOPIC SYLLABLE SSA..ETHIOPIC SYLLABLE SSO
-2DA7,UNASSIGNED,<RESERVED>
-2DA8-2DAE,PVALID,ETHIOPIC SYLLABLE CCA..ETHIOPIC SYLLABLE CCO
-2DAF,UNASSIGNED,<RESERVED>
-2DB0-2DB6,PVALID,ETHIOPIC SYLLABLE ZZA..ETHIOPIC SYLLABLE ZZO
-2DB7,UNASSIGNED,<RESERVED>
-2DB8-2DBE,PVALID,ETHIOPIC SYLLABLE CCHA..ETHIOPIC SYLLABLE CCHO
-2DBF,UNASSIGNED,<RESERVED>
-2DC0-2DC6,PVALID,ETHIOPIC SYLLABLE QYA..ETHIOPIC SYLLABLE QYO
-2DC7,UNASSIGNED,<RESERVED>
-2DC8-2DCE,PVALID,ETHIOPIC SYLLABLE KYA..ETHIOPIC SYLLABLE KYO
-2DCF,UNASSIGNED,<RESERVED>
-2DD0-2DD6,PVALID,ETHIOPIC SYLLABLE XYA..ETHIOPIC SYLLABLE XYO
-2DD7,UNASSIGNED,<RESERVED>
-2DD8-2DDE,PVALID,ETHIOPIC SYLLABLE GYA..ETHIOPIC SYLLABLE GYO
-2DDF,UNASSIGNED,<RESERVED>
-2DE0-2DFF,PVALID,COMBINING CYRILLIC LETTER BE..COMBINING CYRILLIC LETTER IOTIFIED BIG YUS
-2E00-2E2E,DISALLOWED,RIGHT ANGLE SUBSTITUTION MARKER..REVERSED QUESTION MARK
-2E2F,PVALID,VERTICAL TILDE
-2E30-2E31,DISALLOWED,RING POINT..WORD SEPARATOR MIDDLE DOT
-2E32-2E7F,UNASSIGNED,<RESERVED>..<RESERVED>
-2E80-2E99,DISALLOWED,CJK RADICAL REPEAT..CJK RADICAL RAP
-2E9A,UNASSIGNED,<RESERVED>
-2E9B-2EF3,DISALLOWED,CJK RADICAL CHOKE..CJK RADICAL C-SIMPLIFIED TURTLE
-2EF4-2EFF,UNASSIGNED,<RESERVED>..<RESERVED>
-2F00-2FD5,DISALLOWED,KANGXI RADICAL ONE..KANGXI RADICAL FLUTE
-2FD6-2FEF,UNASSIGNED,<RESERVED>..<RESERVED>
-2FF0-2FFB,DISALLOWED,IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO RIGHT..IDEOGRAPHIC DESCRIPTION CHARACTER OVERLAID
-2FFC-2FFF,UNASSIGNED,<RESERVED>..<RESERVED>
-3000-3004,DISALLOWED,IDEOGRAPHIC SPACE..JAPANESE INDUSTRIAL STANDARD SYMBOL
-3005-3007,PVALID,IDEOGRAPHIC ITERATION MARK..IDEOGRAPHIC NUMBER ZERO
-3008-3029,DISALLOWED,LEFT ANGLE BRACKET..HANGZHOU NUMERAL NINE
-302A-302D,PVALID,IDEOGRAPHIC LEVEL TONE MARK..IDEOGRAPHIC ENTERING TONE MARK
-302E-303B,DISALLOWED,HANGUL SINGLE DOT TONE MARK..VERTICAL IDEOGRAPHIC ITERATION MARK
-303C,PVALID,MASU MARK
-303D-303F,DISALLOWED,PART ALTERNATION MARK..IDEOGRAPHIC HALF FILL SPACE
-3040,UNASSIGNED,<RESERVED>
-3041-3096,PVALID,HIRAGANA LETTER SMALL A..HIRAGANA LETTER SMALL KE
-3097-3098,UNASSIGNED,<RESERVED>..<RESERVED>
-3099-309A,PVALID,COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK..COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
-309B-309C,DISALLOWED,KATAKANA-HIRAGANA VOICED SOUND MARK..KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
-309D-309E,PVALID,HIRAGANA ITERATION MARK..HIRAGANA VOICED ITERATION MARK
-309F-30A0,DISALLOWED,HIRAGANA DIGRAPH YORI..KATAKANA-HIRAGANA DOUBLE HYPHEN
-30A1-30FA,PVALID,KATAKANA LETTER SMALL A..KATAKANA LETTER VO
-30FB,CONTEXTO,KATAKANA MIDDLE DOT
-30FC-30FE,PVALID,KATAKANA-HIRAGANA PROLONGED SOUND MARK..KATAKANA VOICED ITERATION MARK
-30FF,DISALLOWED,KATAKANA DIGRAPH KOTO
-3100-3104,UNASSIGNED,<RESERVED>..<RESERVED>
-3105-312D,PVALID,BOPOMOFO LETTER B..BOPOMOFO LETTER IH
-312E-3130,UNASSIGNED,<RESERVED>..<RESERVED>
-3131-318E,DISALLOWED,HANGUL LETTER KIYEOK..HANGUL LETTER ARAEAE
-318F,UNASSIGNED,<RESERVED>
-3190-319F,DISALLOWED,IDEOGRAPHIC ANNOTATION LINKING MARK..IDEOGRAPHIC ANNOTATION MAN MARK
-31A0-31B7,PVALID,BOPOMOFO LETTER BU..BOPOMOFO FINAL LETTER H
-31B8-31BF,UNASSIGNED,<RESERVED>..<RESERVED>
-31C0-31E3,DISALLOWED,CJK STROKE T..CJK STROKE Q
-31E4-31EF,UNASSIGNED,<RESERVED>..<RESERVED>
-31F0-31FF,PVALID,KATAKANA LETTER SMALL KU..KATAKANA LETTER SMALL RO
-3200-321E,DISALLOWED,PARENTHESIZED HANGUL KIYEOK..PARENTHESIZED KOREAN CHARACTER O HU
-321F,UNASSIGNED,<RESERVED>
-3220-32FE,DISALLOWED,PARENTHESIZED IDEOGRAPH ONE..CIRCLED KATAKANA WO
-32FF,UNASSIGNED,<RESERVED>
-3300-33FF,DISALLOWED,SQUARE APAATO..SQUARE GAL
-3400-4DB5,PVALID,"<CJK IDEOGRAPH EXTENSION A, FIRST>..<CJK IDEOGRAPH EXTENSION A, LAST>"
-4DB6-4DBF,UNASSIGNED,<RESERVED>..<RESERVED>
-4DC0-4DFF,DISALLOWED,HEXAGRAM FOR THE CREATIVE HEAVEN..HEXAGRAM FOR BEFORE COMPLETION
-4E00-9FCB,PVALID,"<CJK IDEOGRAPH, FIRST>..<CJK IDEOGRAPH, LAST>"
-9FCC-9FFF,UNASSIGNED,<RESERVED>..<RESERVED>
-A000-A48C,PVALID,YI SYLLABLE IT..YI SYLLABLE YYR
-A48D-A48F,UNASSIGNED,<RESERVED>..<RESERVED>
-A490-A4C6,DISALLOWED,YI RADICAL QOT..YI RADICAL KE
-A4C7-A4CF,UNASSIGNED,<RESERVED>..<RESERVED>
-A4D0-A4FD,PVALID,LISU LETTER BA..LISU LETTER TONE MYA JEU
-A4FE-A4FF,DISALLOWED,LISU PUNCTUATION COMMA..LISU PUNCTUATION FULL STOP
-A500-A60C,PVALID,VAI SYLLABLE EE..VAI SYLLABLE LENGTHENER
-A60D-A60F,DISALLOWED,VAI COMMA..VAI QUESTION MARK
-A610-A62B,PVALID,VAI SYLLABLE NDOLE FA..VAI SYLLABLE NDOLE DO
-A62C-A63F,UNASSIGNED,<RESERVED>..<RESERVED>
-A640,DISALLOWED,CYRILLIC CAPITAL LETTER ZEMLYA
-A641,PVALID,CYRILLIC SMALL LETTER ZEMLYA
-A642,DISALLOWED,CYRILLIC CAPITAL LETTER DZELO
-A643,PVALID,CYRILLIC SMALL LETTER DZELO
-A644,DISALLOWED,CYRILLIC CAPITAL LETTER REVERSED DZE
-A645,PVALID,CYRILLIC SMALL LETTER REVERSED DZE
-A646,DISALLOWED,CYRILLIC CAPITAL LETTER IOTA
-A647,PVALID,CYRILLIC SMALL LETTER IOTA
-A648,DISALLOWED,CYRILLIC CAPITAL LETTER DJERV
-A649,PVALID,CYRILLIC SMALL LETTER DJERV
-A64A,DISALLOWED,CYRILLIC CAPITAL LETTER MONOGRAPH UK
-A64B,PVALID,CYRILLIC SMALL LETTER MONOGRAPH UK
-A64C,DISALLOWED,CYRILLIC CAPITAL LETTER BROAD OMEGA
-A64D,PVALID,CYRILLIC SMALL LETTER BROAD OMEGA
-A64E,DISALLOWED,CYRILLIC CAPITAL LETTER NEUTRAL YER
-A64F,PVALID,CYRILLIC SMALL LETTER NEUTRAL YER
-A650,DISALLOWED,CYRILLIC CAPITAL LETTER YERU WITH BACK YER
-A651,PVALID,CYRILLIC SMALL LETTER YERU WITH BACK YER
-A652,DISALLOWED,CYRILLIC CAPITAL LETTER IOTIFIED YAT
-A653,PVALID,CYRILLIC SMALL LETTER IOTIFIED YAT
-A654,DISALLOWED,CYRILLIC CAPITAL LETTER REVERSED YU
-A655,PVALID,CYRILLIC SMALL LETTER REVERSED YU
-A656,DISALLOWED,CYRILLIC CAPITAL LETTER IOTIFIED A
-A657,PVALID,CYRILLIC SMALL LETTER IOTIFIED A
-A658,DISALLOWED,CYRILLIC CAPITAL LETTER CLOSED LITTLE YUS
-A659,PVALID,CYRILLIC SMALL LETTER CLOSED LITTLE YUS
-A65A,DISALLOWED,CYRILLIC CAPITAL LETTER BLENDED YUS
-A65B,PVALID,CYRILLIC SMALL LETTER BLENDED YUS
-A65C,DISALLOWED,CYRILLIC CAPITAL LETTER IOTIFIED CLOSED LITTLE YUS
-A65D,PVALID,CYRILLIC SMALL LETTER IOTIFIED CLOSED LITTLE YUS
-A65E,DISALLOWED,CYRILLIC CAPITAL LETTER YN
-A65F,PVALID,CYRILLIC SMALL LETTER YN
-A660-A661,UNASSIGNED,<RESERVED>..<RESERVED>
-A662,DISALLOWED,CYRILLIC CAPITAL LETTER SOFT DE
-A663,PVALID,CYRILLIC SMALL LETTER SOFT DE
-A664,DISALLOWED,CYRILLIC CAPITAL LETTER SOFT EL
-A665,PVALID,CYRILLIC SMALL LETTER SOFT EL
-A666,DISALLOWED,CYRILLIC CAPITAL LETTER SOFT EM
-A667,PVALID,CYRILLIC SMALL LETTER SOFT EM
-A668,DISALLOWED,CYRILLIC CAPITAL LETTER MONOCULAR O
-A669,PVALID,CYRILLIC SMALL LETTER MONOCULAR O
-A66A,DISALLOWED,CYRILLIC CAPITAL LETTER BINOCULAR O
-A66B,PVALID,CYRILLIC SMALL LETTER BINOCULAR O
-A66C,DISALLOWED,CYRILLIC CAPITAL LETTER DOUBLE MONOCULAR O
-A66D-A66F,PVALID,CYRILLIC SMALL LETTER DOUBLE MONOCULAR O..COMBINING CYRILLIC VZMET
-A670-A673,DISALLOWED,COMBINING CYRILLIC TEN MILLIONS SIGN..SLAVONIC ASTERISK
-A674-A67B,UNASSIGNED,<RESERVED>..<RESERVED>
-A67C-A67D,PVALID,COMBINING CYRILLIC KAVYKA..COMBINING CYRILLIC PAYEROK
-A67E,DISALLOWED,CYRILLIC KAVYKA
-A67F,PVALID,CYRILLIC PAYEROK
-A680,DISALLOWED,CYRILLIC CAPITAL LETTER DWE
-A681,PVALID,CYRILLIC SMALL LETTER DWE
-A682,DISALLOWED,CYRILLIC CAPITAL LETTER DZWE
-A683,PVALID,CYRILLIC SMALL LETTER DZWE
-A684,DISALLOWED,CYRILLIC CAPITAL LETTER ZHWE
-A685,PVALID,CYRILLIC SMALL LETTER ZHWE
-A686,DISALLOWED,CYRILLIC CAPITAL LETTER CCHE
-A687,PVALID,CYRILLIC SMALL LETTER CCHE
-A688,DISALLOWED,CYRILLIC CAPITAL LETTER DZZE
-A689,PVALID,CYRILLIC SMALL LETTER DZZE
-A68A,DISALLOWED,CYRILLIC CAPITAL LETTER TE WITH MIDDLE HOOK
-A68B,PVALID,CYRILLIC SMALL LETTER TE WITH MIDDLE HOOK
-A68C,DISALLOWED,CYRILLIC CAPITAL LETTER TWE
-A68D,PVALID,CYRILLIC SMALL LETTER TWE
-A68E,DISALLOWED,CYRILLIC CAPITAL LETTER TSWE
-A68F,PVALID,CYRILLIC SMALL LETTER TSWE
-A690,DISALLOWED,CYRILLIC CAPITAL LETTER TSSE
-A691,PVALID,CYRILLIC SMALL LETTER TSSE
-A692,DISALLOWED,CYRILLIC CAPITAL LETTER TCHE
-A693,PVALID,CYRILLIC SMALL LETTER TCHE
-A694,DISALLOWED,CYRILLIC CAPITAL LETTER HWE
-A695,PVALID,CYRILLIC SMALL LETTER HWE
-A696,DISALLOWED,CYRILLIC CAPITAL LETTER SHWE
-A697,PVALID,CYRILLIC SMALL LETTER SHWE
-A698-A69F,UNASSIGNED,<RESERVED>..<RESERVED>
-A6A0-A6E5,PVALID,BAMUM LETTER A..BAMUM LETTER KI
-A6E6-A6EF,DISALLOWED,BAMUM LETTER MO..BAMUM LETTER KOGHOM
-A6F0-A6F1,PVALID,BAMUM COMBINING MARK KOQNDON..BAMUM COMBINING MARK TUKWENTIS
-A6F2-A6F7,DISALLOWED,BAMUM NJAEMLI..BAMUM QUESTION MARK
-A6F8-A6FF,UNASSIGNED,<RESERVED>..<RESERVED>
-A700-A716,DISALLOWED,MODIFIER LETTER CHINESE TONE YIN PING..MODIFIER LETTER EXTRA-LOW LEFT-STEM TONE BAR
-A717-A71F,PVALID,MODIFIER LETTER DOT VERTICAL BAR..MODIFIER LETTER LOW INVERTED EXCLAMATION MARK
-A720-A722,DISALLOWED,MODIFIER LETTER STRESS AND HIGH TONE..LATIN CAPITAL LETTER EGYPTOLOGICAL ALEF
-A723,PVALID,LATIN SMALL LETTER EGYPTOLOGICAL ALEF
-A724,DISALLOWED,LATIN CAPITAL LETTER EGYPTOLOGICAL AIN
-A725,PVALID,LATIN SMALL LETTER EGYPTOLOGICAL AIN
-A726,DISALLOWED,LATIN CAPITAL LETTER HENG
-A727,PVALID,LATIN SMALL LETTER HENG
-A728,DISALLOWED,LATIN CAPITAL LETTER TZ
-A729,PVALID,LATIN SMALL LETTER TZ
-A72A,DISALLOWED,LATIN CAPITAL LETTER TRESILLO
-A72B,PVALID,LATIN SMALL LETTER TRESILLO
-A72C,DISALLOWED,LATIN CAPITAL LETTER CUATRILLO
-A72D,PVALID,LATIN SMALL LETTER CUATRILLO
-A72E,DISALLOWED,LATIN CAPITAL LETTER CUATRILLO WITH COMMA
-A72F-A731,PVALID,LATIN SMALL LETTER CUATRILLO WITH COMMA..LATIN LETTER SMALL CAPITAL S
-A732,DISALLOWED,LATIN CAPITAL LETTER AA
-A733,PVALID,LATIN SMALL LETTER AA
-A734,DISALLOWED,LATIN CAPITAL LETTER AO
-A735,PVALID,LATIN SMALL LETTER AO
-A736,DISALLOWED,LATIN CAPITAL LETTER AU
-A737,PVALID,LATIN SMALL LETTER AU
-A738,DISALLOWED,LATIN CAPITAL LETTER AV
-A739,PVALID,LATIN SMALL LETTER AV
-A73A,DISALLOWED,LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR
-A73B,PVALID,LATIN SMALL LETTER AV WITH HORIZONTAL BAR
-A73C,DISALLOWED,LATIN CAPITAL LETTER AY
-A73D,PVALID,LATIN SMALL LETTER AY
-A73E,DISALLOWED,LATIN CAPITAL LETTER REVERSED C WITH DOT
-A73F,PVALID,LATIN SMALL LETTER REVERSED C WITH DOT
-A740,DISALLOWED,LATIN CAPITAL LETTER K WITH STROKE
-A741,PVALID,LATIN SMALL LETTER K WITH STROKE
-A742,DISALLOWED,LATIN CAPITAL LETTER K WITH DIAGONAL STROKE
-A743,PVALID,LATIN SMALL LETTER K WITH DIAGONAL STROKE
-A744,DISALLOWED,LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE
-A745,PVALID,LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE
-A746,DISALLOWED,LATIN CAPITAL LETTER BROKEN L
-A747,PVALID,LATIN SMALL LETTER BROKEN L
-A748,DISALLOWED,LATIN CAPITAL LETTER L WITH HIGH STROKE
-A749,PVALID,LATIN SMALL LETTER L WITH HIGH STROKE
-A74A,DISALLOWED,LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY
-A74B,PVALID,LATIN SMALL LETTER O WITH LONG STROKE OVERLAY
-A74C,DISALLOWED,LATIN CAPITAL LETTER O WITH LOOP
-A74D,PVALID,LATIN SMALL LETTER O WITH LOOP
-A74E,DISALLOWED,LATIN CAPITAL LETTER OO
-A74F,PVALID,LATIN SMALL LETTER OO
-A750,DISALLOWED,LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER
-A751,PVALID,LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER
-A752,DISALLOWED,LATIN CAPITAL LETTER P WITH FLOURISH
-A753,PVALID,LATIN SMALL LETTER P WITH FLOURISH
-A754,DISALLOWED,LATIN CAPITAL LETTER P WITH SQUIRREL TAIL
-A755,PVALID,LATIN SMALL LETTER P WITH SQUIRREL TAIL
-A756,DISALLOWED,LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER
-A757,PVALID,LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER
-A758,DISALLOWED,LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE
-A759,PVALID,LATIN SMALL LETTER Q WITH DIAGONAL STROKE
-A75A,DISALLOWED,LATIN CAPITAL LETTER R ROTUNDA
-A75B,PVALID,LATIN SMALL LETTER R ROTUNDA
-A75C,DISALLOWED,LATIN CAPITAL LETTER RUM ROTUNDA
-A75D,PVALID,LATIN SMALL LETTER RUM ROTUNDA
-A75E,DISALLOWED,LATIN CAPITAL LETTER V WITH DIAGONAL STROKE
-A75F,PVALID,LATIN SMALL LETTER V WITH DIAGONAL STROKE
-A760,DISALLOWED,LATIN CAPITAL LETTER VY
-A761,PVALID,LATIN SMALL LETTER VY
-A762,DISALLOWED,LATIN CAPITAL LETTER VISIGOTHIC Z
-A763,PVALID,LATIN SMALL LETTER VISIGOTHIC Z
-A764,DISALLOWED,LATIN CAPITAL LETTER THORN WITH STROKE
-A765,PVALID,LATIN SMALL LETTER THORN WITH STROKE
-A766,DISALLOWED,LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER
-A767,PVALID,LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER
-A768,DISALLOWED,LATIN CAPITAL LETTER VEND
-A769,PVALID,LATIN SMALL LETTER VEND
-A76A,DISALLOWED,LATIN CAPITAL LETTER ET
-A76B,PVALID,LATIN SMALL LETTER ET
-A76C,DISALLOWED,LATIN CAPITAL LETTER IS
-A76D,PVALID,LATIN SMALL LETTER IS
-A76E,DISALLOWED,LATIN CAPITAL LETTER CON
-A76F,PVALID,LATIN SMALL LETTER CON
-A770,DISALLOWED,MODIFIER LETTER US
-A771-A778,PVALID,LATIN SMALL LETTER DUM..LATIN SMALL LETTER UM
-A779,DISALLOWED,LATIN CAPITAL LETTER INSULAR D
-A77A,PVALID,LATIN SMALL LETTER INSULAR D
-A77B,DISALLOWED,LATIN CAPITAL LETTER INSULAR F
-A77C,PVALID,LATIN SMALL LETTER INSULAR F
-A77D-A77E,DISALLOWED,LATIN CAPITAL LETTER INSULAR G..LATIN CAPITAL LETTER TURNED INSULAR G
-A77F,PVALID,LATIN SMALL LETTER TURNED INSULAR G
-A780,DISALLOWED,LATIN CAPITAL LETTER TURNED L
-A781,PVALID,LATIN SMALL LETTER TURNED L
-A782,DISALLOWED,LATIN CAPITAL LETTER INSULAR R
-A783,PVALID,LATIN SMALL LETTER INSULAR R
-A784,DISALLOWED,LATIN CAPITAL LETTER INSULAR S
-A785,PVALID,LATIN SMALL LETTER INSULAR S
-A786,DISALLOWED,LATIN CAPITAL LETTER INSULAR T
-A787-A788,PVALID,LATIN SMALL LETTER INSULAR T..MODIFIER LETTER LOW CIRCUMFLEX ACCENT
-A789-A78B,DISALLOWED,MODIFIER LETTER COLON..LATIN CAPITAL LETTER SALTILLO
-A78C,PVALID,LATIN SMALL LETTER SALTILLO
-A78D-A7FA,UNASSIGNED,<RESERVED>..<RESERVED>
-A7FB-A827,PVALID,LATIN EPIGRAPHIC LETTER REVERSED F..SYLOTI NAGRI VOWEL SIGN OO
-A828-A82B,DISALLOWED,SYLOTI NAGRI POETRY MARK-1..SYLOTI NAGRI POETRY MARK-4
-A82C-A82F,UNASSIGNED,<RESERVED>..<RESERVED>
-A830-A839,DISALLOWED,NORTH INDIC FRACTION ONE QUARTER..NORTH INDIC QUANTITY MARK
-A83A-A83F,UNASSIGNED,<RESERVED>..<RESERVED>
-A840-A873,PVALID,PHAGS-PA LETTER KA..PHAGS-PA LETTER CANDRABINDU
-A874-A877,DISALLOWED,PHAGS-PA SINGLE HEAD MARK..PHAGS-PA MARK DOUBLE SHAD
-A878-A87F,UNASSIGNED,<RESERVED>..<RESERVED>
-A880-A8C4,PVALID,SAURASHTRA SIGN ANUSVARA..SAURASHTRA SIGN VIRAMA
-A8C5-A8CD,UNASSIGNED,<RESERVED>..<RESERVED>
-A8CE-A8CF,DISALLOWED,SAURASHTRA DANDA..SAURASHTRA DOUBLE DANDA
-A8D0-A8D9,PVALID,SAURASHTRA DIGIT ZERO..SAURASHTRA DIGIT NINE
-A8DA-A8DF,UNASSIGNED,<RESERVED>..<RESERVED>
-A8E0-A8F7,PVALID,COMBINING DEVANAGARI DIGIT ZERO..DEVANAGARI SIGN CANDRABINDU AVAGRAHA
-A8F8-A8FA,DISALLOWED,DEVANAGARI SIGN PUSHPIKA..DEVANAGARI CARET
-A8FB,PVALID,DEVANAGARI HEADSTROKE
-A8FC-A8FF,UNASSIGNED,<RESERVED>..<RESERVED>
-A900-A92D,PVALID,KAYAH LI DIGIT ZERO..KAYAH LI TONE CALYA PLOPHU
-A92E-A92F,DISALLOWED,KAYAH LI SIGN CWI..KAYAH LI SIGN SHYA
-A930-A953,PVALID,REJANG LETTER KA..REJANG VIRAMA
-A954-A95E,UNASSIGNED,<RESERVED>..<RESERVED>
-A95F-A97C,DISALLOWED,REJANG SECTION MARK..HANGUL CHOSEONG SSANGYEORINHIEUH
-A97D-A97F,UNASSIGNED,<RESERVED>..<RESERVED>
-A980-A9C0,PVALID,JAVANESE SIGN PANYANGGA..JAVANESE PANGKON
-A9C1-A9CD,DISALLOWED,JAVANESE LEFT RERENGGAN..JAVANESE TURNED PADA PISELEH
-A9CE,UNASSIGNED,<RESERVED>
-A9CF-A9D9,PVALID,JAVANESE PANGRANGKEP..JAVANESE DIGIT NINE
-A9DA-A9DD,UNASSIGNED,<RESERVED>..<RESERVED>
-A9DE-A9DF,DISALLOWED,JAVANESE PADA TIRTA TUMETES..JAVANESE PADA ISEN-ISEN
-A9E0-A9FF,UNASSIGNED,<RESERVED>..<RESERVED>
-AA00-AA36,PVALID,CHAM LETTER A..CHAM CONSONANT SIGN WA
-AA37-AA3F,UNASSIGNED,<RESERVED>..<RESERVED>
-AA40-AA4D,PVALID,CHAM LETTER FINAL K..CHAM CONSONANT SIGN FINAL H
-AA4E-AA4F,UNASSIGNED,<RESERVED>..<RESERVED>
-AA50-AA59,PVALID,CHAM DIGIT ZERO..CHAM DIGIT NINE
-AA5A-AA5B,UNASSIGNED,<RESERVED>..<RESERVED>
-AA5C-AA5F,DISALLOWED,CHAM PUNCTUATION SPIRAL..CHAM PUNCTUATION TRIPLE DANDA
-AA60-AA76,PVALID,MYANMAR LETTER KHAMTI GA..MYANMAR LOGOGRAM KHAMTI HM
-AA77-AA79,DISALLOWED,MYANMAR SYMBOL AITON EXCLAMATION..MYANMAR SYMBOL AITON TWO
-AA7A-AA7B,PVALID,MYANMAR LETTER AITON RA..MYANMAR SIGN PAO KAREN TONE
-AA7C-AA7F,UNASSIGNED,<RESERVED>..<RESERVED>
-AA80-AAC2,PVALID,TAI VIET LETTER LOW KO..TAI VIET TONE MAI SONG
-AAC3-AADA,UNASSIGNED,<RESERVED>..<RESERVED>
-AADB-AADD,PVALID,TAI VIET SYMBOL KON..TAI VIET SYMBOL SAM
-AADE-AADF,DISALLOWED,TAI VIET SYMBOL HO HOI..TAI VIET SYMBOL KOI KOI
-AAE0-ABBF,UNASSIGNED,<RESERVED>..<RESERVED>
-ABC0-ABEA,PVALID,MEETEI MAYEK LETTER KOK..MEETEI MAYEK VOWEL SIGN NUNG
-ABEB,DISALLOWED,MEETEI MAYEK CHEIKHEI
-ABEC-ABED,PVALID,MEETEI MAYEK LUM IYEK..MEETEI MAYEK APUN IYEK
-ABEE-ABEF,UNASSIGNED,<RESERVED>..<RESERVED>
-ABF0-ABF9,PVALID,MEETEI MAYEK DIGIT ZERO..MEETEI MAYEK DIGIT NINE
-ABFA-ABFF,UNASSIGNED,<RESERVED>..<RESERVED>
-AC00-D7A3,PVALID,"<HANGUL SYLLABLE, FIRST>..<HANGUL SYLLABLE, LAST>"
-D7A4-D7AF,UNASSIGNED,<RESERVED>..<RESERVED>
-D7B0-D7C6,DISALLOWED,HANGUL JUNGSEONG O-YEO..HANGUL JUNGSEONG ARAEA-E
-D7C7-D7CA,UNASSIGNED,<RESERVED>..<RESERVED>
-D7CB-D7FB,DISALLOWED,HANGUL JONGSEONG NIEUN-RIEUL..HANGUL JONGSEONG PHIEUPH-THIEUTH
-D7FC-D7FF,UNASSIGNED,<RESERVED>..<RESERVED>
-D800-FA0D,DISALLOWED,"<NON PRIVATE USE HIGH SURROGATE, FIRST>..CJK COMPATIBILITY IDEOGRAPH-FA0D"
-FA0E-FA0F,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA0E..CJK COMPATIBILITY IDEOGRAPH-FA0F
-FA10,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA10
-FA11,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA11
-FA12,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA12
-FA13-FA14,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA13..CJK COMPATIBILITY IDEOGRAPH-FA14
-FA15-FA1E,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA15..CJK COMPATIBILITY IDEOGRAPH-FA1E
-FA1F,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA1F
-FA20,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA20
-FA21,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA21
-FA22,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA22
-FA23-FA24,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA23..CJK COMPATIBILITY IDEOGRAPH-FA24
-FA25-FA26,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA25..CJK COMPATIBILITY IDEOGRAPH-FA26
-FA27-FA29,PVALID,CJK COMPATIBILITY IDEOGRAPH-FA27..CJK COMPATIBILITY IDEOGRAPH-FA29
-FA2A-FA2D,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA2A..CJK COMPATIBILITY IDEOGRAPH-FA2D
-FA2E-FA2F,UNASSIGNED,<RESERVED>..<RESERVED>
-FA30-FA6D,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA30..CJK COMPATIBILITY IDEOGRAPH-FA6D
-FA6E-FA6F,UNASSIGNED,<RESERVED>..<RESERVED>
-FA70-FAD9,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-FA70..CJK COMPATIBILITY IDEOGRAPH-FAD9
-FADA-FAFF,UNASSIGNED,<RESERVED>..<RESERVED>
-FB00-FB06,DISALLOWED,LATIN SMALL LIGATURE FF..LATIN SMALL LIGATURE ST
-FB07-FB12,UNASSIGNED,<RESERVED>..<RESERVED>
-FB13-FB17,DISALLOWED,ARMENIAN SMALL LIGATURE MEN NOW..ARMENIAN SMALL LIGATURE MEN XEH
-FB18-FB1C,UNASSIGNED,<RESERVED>..<RESERVED>
-FB1D,DISALLOWED,HEBREW LETTER YOD WITH HIRIQ
-FB1E,PVALID,HEBREW POINT JUDEO-SPANISH VARIKA
-FB1F-FB36,DISALLOWED,HEBREW LIGATURE YIDDISH YOD YOD PATAH..HEBREW LETTER ZAYIN WITH DAGESH
-FB37,UNASSIGNED,<RESERVED>
-FB38-FB3C,DISALLOWED,HEBREW LETTER TET WITH DAGESH..HEBREW LETTER LAMED WITH DAGESH
-FB3D,UNASSIGNED,<RESERVED>
-FB3E,DISALLOWED,HEBREW LETTER MEM WITH DAGESH
-FB3F,UNASSIGNED,<RESERVED>
-FB40-FB41,DISALLOWED,HEBREW LETTER NUN WITH DAGESH..HEBREW LETTER SAMEKH WITH DAGESH
-FB42,UNASSIGNED,<RESERVED>
-FB43-FB44,DISALLOWED,HEBREW LETTER FINAL PE WITH DAGESH..HEBREW LETTER PE WITH DAGESH
-FB45,UNASSIGNED,<RESERVED>
-FB46-FBB1,DISALLOWED,HEBREW LETTER TSADI WITH DAGESH..ARABIC LETTER YEH BARREE WITH HAMZA ABOVE FINAL FORM
-FBB2-FBD2,UNASSIGNED,<RESERVED>..<RESERVED>
-FBD3-FD3F,DISALLOWED,ARABIC LETTER NG ISOLATED FORM..ORNATE RIGHT PARENTHESIS
-FD40-FD4F,UNASSIGNED,<RESERVED>..<RESERVED>
-FD50-FD8F,DISALLOWED,ARABIC LIGATURE TEH WITH JEEM WITH MEEM INITIAL FORM..ARABIC LIGATURE MEEM WITH KHAH WITH MEEM INITIAL FORM
-FD90-FD91,UNASSIGNED,<RESERVED>..<RESERVED>
-FD92-FDC7,DISALLOWED,ARABIC LIGATURE MEEM WITH JEEM WITH KHAH INITIAL FORM..ARABIC LIGATURE NOON WITH JEEM WITH YEH FINAL FORM
-FDC8-FDCF,UNASSIGNED,<RESERVED>..<RESERVED>
-FDD0-FDFD,DISALLOWED,<NOT A CHARACTER>..ARABIC LIGATURE BISMILLAH AR-RAHMAN AR-RAHEEM
-FDFE-FDFF,UNASSIGNED,<RESERVED>..<RESERVED>
-FE00-FE19,DISALLOWED,VARIATION SELECTOR-1..PRESENTATION FORM FOR VERTICAL HORIZONTAL ELLIPSIS
-FE1A-FE1F,UNASSIGNED,<RESERVED>..<RESERVED>
-FE20-FE26,PVALID,COMBINING LIGATURE LEFT HALF..COMBINING CONJOINING MACRON
-FE27-FE2F,UNASSIGNED,<RESERVED>..<RESERVED>
-FE30-FE52,DISALLOWED,PRESENTATION FORM FOR VERTICAL TWO DOT LEADER..SMALL FULL STOP
-FE53,UNASSIGNED,<RESERVED>
-FE54-FE66,DISALLOWED,SMALL SEMICOLON..SMALL EQUALS SIGN
-FE67,UNASSIGNED,<RESERVED>
-FE68-FE6B,DISALLOWED,SMALL REVERSE SOLIDUS..SMALL COMMERCIAL AT
-FE6C-FE6F,UNASSIGNED,<RESERVED>..<RESERVED>
-FE70-FE72,DISALLOWED,ARABIC FATHATAN ISOLATED FORM..ARABIC DAMMATAN ISOLATED FORM
-FE73,PVALID,ARABIC TAIL FRAGMENT
-FE74,DISALLOWED,ARABIC KASRATAN ISOLATED FORM
-FE75,UNASSIGNED,<RESERVED>
-FE76-FEFC,DISALLOWED,ARABIC FATHA ISOLATED FORM..ARABIC LIGATURE LAM WITH ALEF FINAL FORM
-FEFD-FEFE,UNASSIGNED,<RESERVED>..<RESERVED>
-FEFF,DISALLOWED,ZERO WIDTH NO-BREAK SPACE
-FF00,UNASSIGNED,<RESERVED>
-FF01-FFBE,DISALLOWED,FULLWIDTH EXCLAMATION MARK..HALFWIDTH HANGUL LETTER HIEUH
-FFBF-FFC1,UNASSIGNED,<RESERVED>..<RESERVED>
-FFC2-FFC7,DISALLOWED,HALFWIDTH HANGUL LETTER A..HALFWIDTH HANGUL LETTER E
-FFC8-FFC9,UNASSIGNED,<RESERVED>..<RESERVED>
-FFCA-FFCF,DISALLOWED,HALFWIDTH HANGUL LETTER YEO..HALFWIDTH HANGUL LETTER OE
-FFD0-FFD1,UNASSIGNED,<RESERVED>..<RESERVED>
-FFD2-FFD7,DISALLOWED,HALFWIDTH HANGUL LETTER YO..HALFWIDTH HANGUL LETTER YU
-FFD8-FFD9,UNASSIGNED,<RESERVED>..<RESERVED>
-FFDA-FFDC,DISALLOWED,HALFWIDTH HANGUL LETTER EU..HALFWIDTH HANGUL LETTER I
-FFDD-FFDF,UNASSIGNED,<RESERVED>..<RESERVED>
-FFE0-FFE6,DISALLOWED,FULLWIDTH CENT SIGN..FULLWIDTH WON SIGN
-FFE7,UNASSIGNED,<RESERVED>
-FFE8-FFEE,DISALLOWED,HALFWIDTH FORMS LIGHT VERTICAL..HALFWIDTH WHITE CIRCLE
-FFEF-FFF8,UNASSIGNED,<RESERVED>..<RESERVED>
-FFF9-FFFF,DISALLOWED,INTERLINEAR ANNOTATION ANCHOR..<NOT A CHARACTER>
-10000-1000B,PVALID,LINEAR B SYLLABLE B008 A..LINEAR B SYLLABLE B046 JE
-1000C,UNASSIGNED,<RESERVED>
-1000D-10026,PVALID,LINEAR B SYLLABLE B036 JO..LINEAR B SYLLABLE B032 QO
-10027,UNASSIGNED,<RESERVED>
-10028-1003A,PVALID,LINEAR B SYLLABLE B060 RA..LINEAR B SYLLABLE B042 WO
-1003B,UNASSIGNED,<RESERVED>
-1003C-1003D,PVALID,LINEAR B SYLLABLE B017 ZA..LINEAR B SYLLABLE B074 ZE
-1003E,UNASSIGNED,<RESERVED>
-1003F-1004D,PVALID,LINEAR B SYLLABLE B020 ZO..LINEAR B SYLLABLE B091 TWO
-1004E-1004F,UNASSIGNED,<RESERVED>..<RESERVED>
-10050-1005D,PVALID,LINEAR B SYMBOL B018..LINEAR B SYMBOL B089
-1005E-1007F,UNASSIGNED,<RESERVED>..<RESERVED>
-10080-100FA,PVALID,LINEAR B IDEOGRAM B100 MAN..LINEAR B IDEOGRAM VESSEL B305
-100FB-100FF,UNASSIGNED,<RESERVED>..<RESERVED>
-10100-10102,DISALLOWED,AEGEAN WORD SEPARATOR LINE..AEGEAN CHECK MARK
-10103-10106,UNASSIGNED,<RESERVED>..<RESERVED>
-10107-10133,DISALLOWED,AEGEAN NUMBER ONE..AEGEAN NUMBER NINETY THOUSAND
-10134-10136,UNASSIGNED,<RESERVED>..<RESERVED>
-10137-1018A,DISALLOWED,AEGEAN WEIGHT BASE UNIT..GREEK ZERO SIGN
-1018B-1018F,UNASSIGNED,<RESERVED>..<RESERVED>
-10190-1019B,DISALLOWED,ROMAN SEXTANS SIGN..ROMAN CENTURIAL SIGN
-1019C-101CF,UNASSIGNED,<RESERVED>..<RESERVED>
-101D0-101FC,DISALLOWED,PHAISTOS DISC SIGN PEDESTRIAN..PHAISTOS DISC SIGN WAVY BAND
-101FD,PVALID,PHAISTOS DISC SIGN COMBINING OBLIQUE STROKE
-101FE-1027F,UNASSIGNED,<RESERVED>..<RESERVED>
-10280-1029C,PVALID,LYCIAN LETTER A..LYCIAN LETTER X
-1029D-1029F,UNASSIGNED,<RESERVED>..<RESERVED>
-102A0-102D0,PVALID,CARIAN LETTER A..CARIAN LETTER UUU3
-102D1-102FF,UNASSIGNED,<RESERVED>..<RESERVED>
-10300-1031E,PVALID,OLD ITALIC LETTER A..OLD ITALIC LETTER UU
-1031F,UNASSIGNED,<RESERVED>
-10320-10323,DISALLOWED,OLD ITALIC NUMERAL ONE..OLD ITALIC NUMERAL FIFTY
-10324-1032F,UNASSIGNED,<RESERVED>..<RESERVED>
-10330-10340,PVALID,GOTHIC LETTER AHSA..GOTHIC LETTER PAIRTHRA
-10341,DISALLOWED,GOTHIC LETTER NINETY
-10342-10349,PVALID,GOTHIC LETTER RAIDA..GOTHIC LETTER OTHAL
-1034A,DISALLOWED,GOTHIC LETTER NINE HUNDRED
-1034B-1037F,UNASSIGNED,<RESERVED>..<RESERVED>
-10380-1039D,PVALID,UGARITIC LETTER ALPA..UGARITIC LETTER SSU
-1039E,UNASSIGNED,<RESERVED>
-1039F,DISALLOWED,UGARITIC WORD DIVIDER
-103A0-103C3,PVALID,OLD PERSIAN SIGN A..OLD PERSIAN SIGN HA
-103C4-103C7,UNASSIGNED,<RESERVED>..<RESERVED>
-103C8-103CF,PVALID,OLD PERSIAN SIGN AURAMAZDAA..OLD PERSIAN SIGN BUUMISH
-103D0-103D5,DISALLOWED,OLD PERSIAN WORD DIVIDER..OLD PERSIAN NUMBER HUNDRED
-103D6-103FF,UNASSIGNED,<RESERVED>..<RESERVED>
-10400-10427,DISALLOWED,DESERET CAPITAL LETTER LONG I..DESERET CAPITAL LETTER EW
-10428-1049D,PVALID,DESERET SMALL LETTER LONG I..OSMANYA LETTER OO
-1049E-1049F,UNASSIGNED,<RESERVED>..<RESERVED>
-104A0-104A9,PVALID,OSMANYA DIGIT ZERO..OSMANYA DIGIT NINE
-104AA-107FF,UNASSIGNED,<RESERVED>..<RESERVED>
-10800-10805,PVALID,CYPRIOT SYLLABLE A..CYPRIOT SYLLABLE JA
-10806-10807,UNASSIGNED,<RESERVED>..<RESERVED>
-10808,PVALID,CYPRIOT SYLLABLE JO
-10809,UNASSIGNED,<RESERVED>
-1080A-10835,PVALID,CYPRIOT SYLLABLE KA..CYPRIOT SYLLABLE WO
-10836,UNASSIGNED,<RESERVED>
-10837-10838,PVALID,CYPRIOT SYLLABLE XA..CYPRIOT SYLLABLE XE
-10839-1083B,UNASSIGNED,<RESERVED>..<RESERVED>
-1083C,PVALID,CYPRIOT SYLLABLE ZA
-1083D-1083E,UNASSIGNED,<RESERVED>..<RESERVED>
-1083F-10855,PVALID,CYPRIOT SYLLABLE ZO..IMPERIAL ARAMAIC LETTER TAW
-10856,UNASSIGNED,<RESERVED>
-10857-1085F,DISALLOWED,IMPERIAL ARAMAIC SECTION SIGN..IMPERIAL ARAMAIC NUMBER TEN THOUSAND
-10860-108FF,UNASSIGNED,<RESERVED>..<RESERVED>
-10900-10915,PVALID,PHOENICIAN LETTER ALF..PHOENICIAN LETTER TAU
-10916-1091B,DISALLOWED,PHOENICIAN NUMBER ONE..PHOENICIAN NUMBER THREE
-1091C-1091E,UNASSIGNED,<RESERVED>..<RESERVED>
-1091F,DISALLOWED,PHOENICIAN WORD SEPARATOR
-10920-10939,PVALID,LYDIAN LETTER A..LYDIAN LETTER C
-1093A-1093E,UNASSIGNED,<RESERVED>..<RESERVED>
-1093F,DISALLOWED,LYDIAN TRIANGULAR MARK
-10940-109FF,UNASSIGNED,<RESERVED>..<RESERVED>
-10A00-10A03,PVALID,KHAROSHTHI LETTER A..KHAROSHTHI VOWEL SIGN VOCALIC R
-10A04,UNASSIGNED,<RESERVED>
-10A05-10A06,PVALID,KHAROSHTHI VOWEL SIGN E..KHAROSHTHI VOWEL SIGN O
-10A07-10A0B,UNASSIGNED,<RESERVED>..<RESERVED>
-10A0C-10A13,PVALID,KHAROSHTHI VOWEL LENGTH MARK..KHAROSHTHI LETTER GHA
-10A14,UNASSIGNED,<RESERVED>
-10A15-10A17,PVALID,KHAROSHTHI LETTER CA..KHAROSHTHI LETTER JA
-10A18,UNASSIGNED,<RESERVED>
-10A19-10A33,PVALID,KHAROSHTHI LETTER NYA..KHAROSHTHI LETTER TTTHA
-10A34-10A37,UNASSIGNED,<RESERVED>..<RESERVED>
-10A38-10A3A,PVALID,KHAROSHTHI SIGN BAR ABOVE..KHAROSHTHI SIGN DOT BELOW
-10A3B-10A3E,UNASSIGNED,<RESERVED>..<RESERVED>
-10A3F,PVALID,KHAROSHTHI VIRAMA
-10A40-10A47,DISALLOWED,KHAROSHTHI DIGIT ONE..KHAROSHTHI NUMBER ONE THOUSAND
-10A48-10A4F,UNASSIGNED,<RESERVED>..<RESERVED>
-10A50-10A58,DISALLOWED,KHAROSHTHI PUNCTUATION DOT..KHAROSHTHI PUNCTUATION LINES
-10A59-10A5F,UNASSIGNED,<RESERVED>..<RESERVED>
-10A60-10A7C,PVALID,OLD SOUTH ARABIAN LETTER HE..OLD SOUTH ARABIAN LETTER THETH
-10A7D-10A7F,DISALLOWED,OLD SOUTH ARABIAN NUMBER ONE..OLD SOUTH ARABIAN NUMERIC INDICATOR
-10A80-10AFF,UNASSIGNED,<RESERVED>..<RESERVED>
-10B00-10B35,PVALID,AVESTAN LETTER A..AVESTAN LETTER HE
-10B36-10B38,UNASSIGNED,<RESERVED>..<RESERVED>
-10B39-10B3F,DISALLOWED,AVESTAN ABBREVIATION MARK..LARGE ONE RING OVER TWO RINGS PUNCTUATION
-10B40-10B55,PVALID,INSCRIPTIONAL PARTHIAN LETTER ALEPH..INSCRIPTIONAL PARTHIAN LETTER TAW
-10B56-10B57,UNASSIGNED,<RESERVED>..<RESERVED>
-10B58-10B5F,DISALLOWED,INSCRIPTIONAL PARTHIAN NUMBER ONE..INSCRIPTIONAL PARTHIAN NUMBER ONE THOUSAND
-10B60-10B72,PVALID,INSCRIPTIONAL PAHLAVI LETTER ALEPH..INSCRIPTIONAL PAHLAVI LETTER TAW
-10B73-10B77,UNASSIGNED,<RESERVED>..<RESERVED>
-10B78-10B7F,DISALLOWED,INSCRIPTIONAL PAHLAVI NUMBER ONE..INSCRIPTIONAL PAHLAVI NUMBER ONE THOUSAND
-10B80-10BFF,UNASSIGNED,<RESERVED>..<RESERVED>
-10C00-10C48,PVALID,OLD TURKIC LETTER ORKHON A..OLD TURKIC LETTER ORKHON BASH
-10C49-10E5F,UNASSIGNED,<RESERVED>..<RESERVED>
-10E60-10E7E,DISALLOWED,RUMI DIGIT ONE..RUMI FRACTION TWO THIRDS
-10E7F-1107F,UNASSIGNED,<RESERVED>..<RESERVED>
-11080-110BA,PVALID,KAITHI SIGN CANDRABINDU..KAITHI SIGN NUKTA
-110BB-110C1,DISALLOWED,KAITHI ABBREVIATION SIGN..KAITHI DOUBLE DANDA
-110C2-11FFF,UNASSIGNED,<RESERVED>..<RESERVED>
-12000-1236E,PVALID,CUNEIFORM SIGN A..CUNEIFORM SIGN ZUM
-1236F-123FF,UNASSIGNED,<RESERVED>..<RESERVED>
-12400-12462,DISALLOWED,CUNEIFORM NUMERIC SIGN TWO ASH..CUNEIFORM NUMERIC SIGN OLD ASSYRIAN ONE QUARTER
-12463-1246F,UNASSIGNED,<RESERVED>..<RESERVED>
-12470-12473,DISALLOWED,CUNEIFORM PUNCTUATION SIGN OLD ASSYRIAN WORD DIVIDER..CUNEIFORM PUNCTUATION SIGN DIAGONAL TRICOLON
-12474-12FFF,UNASSIGNED,<RESERVED>..<RESERVED>
-13000-1342E,PVALID,EGYPTIAN HIEROGLYPH A001..EGYPTIAN HIEROGLYPH AA032
-1342F-1CFFF,UNASSIGNED,<RESERVED>..<RESERVED>
-1D000-1D0F5,DISALLOWED,BYZANTINE MUSICAL SYMBOL PSILI..BYZANTINE MUSICAL SYMBOL GORGON NEO KATO
-1D0F6-1D0FF,UNASSIGNED,<RESERVED>..<RESERVED>
-1D100-1D126,DISALLOWED,MUSICAL SYMBOL SINGLE BARLINE..MUSICAL SYMBOL DRUM CLEF-2
-1D127-1D128,UNASSIGNED,<RESERVED>..<RESERVED>
-1D129-1D1DD,DISALLOWED,MUSICAL SYMBOL MULTIPLE MEASURE REST..MUSICAL SYMBOL PES SUBPUNCTIS
-1D1DE-1D1FF,UNASSIGNED,<RESERVED>..<RESERVED>
-1D200-1D245,DISALLOWED,GREEK VOCAL NOTATION SYMBOL-1..GREEK MUSICAL LEIMMA
-1D246-1D2FF,UNASSIGNED,<RESERVED>..<RESERVED>
-1D300-1D356,DISALLOWED,MONOGRAM FOR EARTH..TETRAGRAM FOR FOSTERING
-1D357-1D35F,UNASSIGNED,<RESERVED>..<RESERVED>
-1D360-1D371,DISALLOWED,COUNTING ROD UNIT DIGIT ONE..COUNTING ROD TENS DIGIT NINE
-1D372-1D3FF,UNASSIGNED,<RESERVED>..<RESERVED>
-1D400-1D454,DISALLOWED,MATHEMATICAL BOLD CAPITAL A..MATHEMATICAL ITALIC SMALL G
-1D455,UNASSIGNED,<RESERVED>
-1D456-1D49C,DISALLOWED,MATHEMATICAL ITALIC SMALL I..MATHEMATICAL SCRIPT CAPITAL A
-1D49D,UNASSIGNED,<RESERVED>
-1D49E-1D49F,DISALLOWED,MATHEMATICAL SCRIPT CAPITAL C..MATHEMATICAL SCRIPT CAPITAL D
-1D4A0-1D4A1,UNASSIGNED,<RESERVED>..<RESERVED>
-1D4A2,DISALLOWED,MATHEMATICAL SCRIPT CAPITAL G
-1D4A3-1D4A4,UNASSIGNED,<RESERVED>..<RESERVED>
-1D4A5-1D4A6,DISALLOWED,MATHEMATICAL SCRIPT CAPITAL J..MATHEMATICAL SCRIPT CAPITAL K
-1D4A7-1D4A8,UNASSIGNED,<RESERVED>..<RESERVED>
-1D4A9-1D4AC,DISALLOWED,MATHEMATICAL SCRIPT CAPITAL N..MATHEMATICAL SCRIPT CAPITAL Q
-1D4AD,UNASSIGNED,<RESERVED>
-1D4AE-1D4B9,DISALLOWED,MATHEMATICAL SCRIPT CAPITAL S..MATHEMATICAL SCRIPT SMALL D
-1D4BA,UNASSIGNED,<RESERVED>
-1D4BB,DISALLOWED,MATHEMATICAL SCRIPT SMALL F
-1D4BC,UNASSIGNED,<RESERVED>
-1D4BD-1D4C3,DISALLOWED,MATHEMATICAL SCRIPT SMALL H..MATHEMATICAL SCRIPT SMALL N
-1D4C4,UNASSIGNED,<RESERVED>
-1D4C5-1D505,DISALLOWED,MATHEMATICAL SCRIPT SMALL P..MATHEMATICAL FRAKTUR CAPITAL B
-1D506,UNASSIGNED,<RESERVED>
-1D507-1D50A,DISALLOWED,MATHEMATICAL FRAKTUR CAPITAL D..MATHEMATICAL FRAKTUR CAPITAL G
-1D50B-1D50C,UNASSIGNED,<RESERVED>..<RESERVED>
-1D50D-1D514,DISALLOWED,MATHEMATICAL FRAKTUR CAPITAL J..MATHEMATICAL FRAKTUR CAPITAL Q
-1D515,UNASSIGNED,<RESERVED>
-1D516-1D51C,DISALLOWED,MATHEMATICAL FRAKTUR CAPITAL S..MATHEMATICAL FRAKTUR CAPITAL Y
-1D51D,UNASSIGNED,<RESERVED>
-1D51E-1D539,DISALLOWED,MATHEMATICAL FRAKTUR SMALL A..MATHEMATICAL DOUBLE-STRUCK CAPITAL B
-1D53A,UNASSIGNED,<RESERVED>
-1D53B-1D53E,DISALLOWED,MATHEMATICAL DOUBLE-STRUCK CAPITAL D..MATHEMATICAL DOUBLE-STRUCK CAPITAL G
-1D53F,UNASSIGNED,<RESERVED>
-1D540-1D544,DISALLOWED,MATHEMATICAL DOUBLE-STRUCK CAPITAL I..MATHEMATICAL DOUBLE-STRUCK CAPITAL M
-1D545,UNASSIGNED,<RESERVED>
-1D546,DISALLOWED,MATHEMATICAL DOUBLE-STRUCK CAPITAL O
-1D547-1D549,UNASSIGNED,<RESERVED>..<RESERVED>
-1D54A-1D550,DISALLOWED,MATHEMATICAL DOUBLE-STRUCK CAPITAL S..MATHEMATICAL DOUBLE-STRUCK CAPITAL Y
-1D551,UNASSIGNED,<RESERVED>
-1D552-1D6A5,DISALLOWED,MATHEMATICAL DOUBLE-STRUCK SMALL A..MATHEMATICAL ITALIC SMALL DOTLESS J
-1D6A6-1D6A7,UNASSIGNED,<RESERVED>..<RESERVED>
-1D6A8-1D7CB,DISALLOWED,MATHEMATICAL BOLD CAPITAL ALPHA..MATHEMATICAL BOLD SMALL DIGAMMA
-1D7CC-1D7CD,UNASSIGNED,<RESERVED>..<RESERVED>
-1D7CE-1D7FF,DISALLOWED,MATHEMATICAL BOLD DIGIT ZERO..MATHEMATICAL MONOSPACE DIGIT NINE
-1D800-1EFFF,UNASSIGNED,<RESERVED>..<RESERVED>
-1F000-1F02B,DISALLOWED,MAHJONG TILE EAST WIND..MAHJONG TILE BACK
-1F02C-1F02F,UNASSIGNED,<RESERVED>..<RESERVED>
-1F030-1F093,DISALLOWED,DOMINO TILE HORIZONTAL BACK..DOMINO TILE VERTICAL-06-06
-1F094-1F0FF,UNASSIGNED,<RESERVED>..<RESERVED>
-1F100-1F10A,DISALLOWED,DIGIT ZERO FULL STOP..DIGIT NINE COMMA
-1F10B-1F10F,UNASSIGNED,<RESERVED>..<RESERVED>
-1F110-1F12E,DISALLOWED,PARENTHESIZED LATIN CAPITAL LETTER A..CIRCLED WZ
-1F12F-1F130,UNASSIGNED,<RESERVED>..<RESERVED>
-1F131,DISALLOWED,SQUARED LATIN CAPITAL LETTER B
-1F132-1F13C,UNASSIGNED,<RESERVED>..<RESERVED>
-1F13D,DISALLOWED,SQUARED LATIN CAPITAL LETTER N
-1F13E,UNASSIGNED,<RESERVED>
-1F13F,DISALLOWED,SQUARED LATIN CAPITAL LETTER P
-1F140-1F141,UNASSIGNED,<RESERVED>..<RESERVED>
-1F142,DISALLOWED,SQUARED LATIN CAPITAL LETTER S
-1F143-1F145,UNASSIGNED,<RESERVED>..<RESERVED>
-1F146,DISALLOWED,SQUARED LATIN CAPITAL LETTER W
-1F147-1F149,UNASSIGNED,<RESERVED>..<RESERVED>
-1F14A-1F14E,DISALLOWED,SQUARED HV..SQUARED PPV
-1F14F-1F156,UNASSIGNED,<RESERVED>..<RESERVED>
-1F157,DISALLOWED,NEGATIVE CIRCLED LATIN CAPITAL LETTER H
-1F158-1F15E,UNASSIGNED,<RESERVED>..<RESERVED>
-1F15F,DISALLOWED,NEGATIVE CIRCLED LATIN CAPITAL LETTER P
-1F160-1F178,UNASSIGNED,<RESERVED>..<RESERVED>
-1F179,DISALLOWED,NEGATIVE SQUARED LATIN CAPITAL LETTER J
-1F17A,UNASSIGNED,<RESERVED>
-1F17B-1F17C,DISALLOWED,NEGATIVE SQUARED LATIN CAPITAL LETTER L..NEGATIVE SQUARED LATIN CAPITAL LETTER M
-1F17D-1F17E,UNASSIGNED,<RESERVED>..<RESERVED>
-1F17F,DISALLOWED,NEGATIVE SQUARED LATIN CAPITAL LETTER P
-1F180-1F189,UNASSIGNED,<RESERVED>..<RESERVED>
-1F18A-1F18D,DISALLOWED,CROSSED NEGATIVE SQUARED LATIN CAPITAL LETTER P..NEGATIVE SQUARED SA
-1F18E-1F18F,UNASSIGNED,<RESERVED>..<RESERVED>
-1F190,DISALLOWED,SQUARE DJ
-1F191-1F1FF,UNASSIGNED,<RESERVED>..<RESERVED>
-1F200,DISALLOWED,SQUARE HIRAGANA HOKA
-1F201-1F20F,UNASSIGNED,<RESERVED>..<RESERVED>
-1F210-1F231,DISALLOWED,SQUARED CJK UNIFIED IDEOGRAPH-624B..SQUARED CJK UNIFIED IDEOGRAPH-6253
-1F232-1F23F,UNASSIGNED,<RESERVED>..<RESERVED>
-1F240-1F248,DISALLOWED,TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-672C..TORTOISE SHELL BRACKETED CJK UNIFIED IDEOGRAPH-6557
-1F249-1FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-1FFFE-1FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
-20000-2A6D6,PVALID,"<CJK IDEOGRAPH EXTENSION B, FIRST>..<CJK IDEOGRAPH EXTENSION B, LAST>"
-2A6D7-2A6FF,UNASSIGNED,<RESERVED>..<RESERVED>
-2A700-2B734,PVALID,"<CJK IDEOGRAPH EXTENSION C, FIRST>..<CJK IDEOGRAPH EXTENSION C, LAST>"
-2B735-2F7FF,UNASSIGNED,<RESERVED>..<RESERVED>
-2F800-2FA1D,DISALLOWED,CJK COMPATIBILITY IDEOGRAPH-2F800..CJK COMPATIBILITY IDEOGRAPH-2FA1D
-2FA1E-2FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-2FFFE-2FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
-30000-3FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-3FFFE-3FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
-40000-4FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-4FFFE-4FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
-50000-5FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-5FFFE-5FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
-60000-6FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-6FFFE-6FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
-70000-7FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-7FFFE-7FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
-80000-8FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-8FFFE-8FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
-90000-9FFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-9FFFE-9FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
-A0000-AFFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-AFFFE-AFFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
-B0000-BFFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-BFFFE-BFFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
-C0000-CFFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-CFFFE-CFFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
-D0000-DFFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-DFFFE-DFFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
-E0000,UNASSIGNED,<RESERVED>
-E0001,DISALLOWED,LANGUAGE TAG
-E0002-E001F,UNASSIGNED,<RESERVED>..<RESERVED>
-E0020-E007F,DISALLOWED,TAG SPACE..CANCEL TAG
-E0080-E00FF,UNASSIGNED,<RESERVED>..<RESERVED>
-E0100-E01EF,DISALLOWED,VARIATION SELECTOR-17..VARIATION SELECTOR-256
-E01F0-EFFFD,UNASSIGNED,<RESERVED>..<RESERVED>
-EFFFE-10FFFF,DISALLOWED,<NOT A CHARACTER>..<NOT A CHARACTER>
diff --git a/utils/idna.c b/utils/idna.c index 572882ecb..3ce3e1be1 100644 --- a/utils/idna.c +++ b/utils/idna.c @@ -26,7 +26,7 @@ #include <stdint.h> #include <stdlib.h> #include <string.h> -#include <libutf8proc/utf8proc.h> +#include <sys/types.h> #include "netsurf/inttypes.h" @@ -39,14 +39,6 @@ #include "utils/utils.h" -int32_t idna_contexto[] = { - /* CONTEXTO codepoints which have a rule defined */ - 0x00b7, 0x0375, 0x05f3, 0x05f4, 0x30fb, 0x0660, 0x0661, - 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, - 0x0669, 0x06f0, 0x06f1, 0x06f2, 0x06f3, 0x06f4, 0x06f5, - 0x06f6, 0x06f7, 0x06f8, 0x06f9, 0 -}; - /** * Convert punycode status into nserror. * @@ -83,6 +75,108 @@ static nserror punycode_status_to_nserror(enum punycode_status status) return ret; } + +/** + * Convert a host label in UCS-4 to an ACE version + * + * \param ucs4_label UCS-4 NFC string containing host label + * \param len Length of host label (in characters/codepoints) + * \param ace_label ASCII-compatible encoded version + * \param out_len Length of ace_label + * \return NSERROR_OK on success, appropriate error otherwise + * + * If return value != NSERROR_OK, output will be left untouched. + */ +static nserror +idna__ucs4_to_ace(int32_t *ucs4_label, + size_t len, + char **ace_label, + size_t *out_len) +{ + char punycode[65]; /* max length of host label + NULL */ + size_t output_length = 60; /* punycode length - 4 - 1 */ + nserror ret; + + punycode[0] = 'x'; + punycode[1] = 'n'; + punycode[2] = '-'; + punycode[3] = '-'; + + ret = punycode_status_to_nserror(punycode_encode(len, + (const punycode_uint *)ucs4_label, NULL, + &output_length, punycode + 4)); + if (ret != NSERROR_OK) { + return ret; + } + + output_length += SLEN("xn--"); + punycode[output_length] = '\0'; + + *ace_label = strdup(punycode); + *out_len = output_length; + + return NSERROR_OK; +} + + +/** + * Convert a host label in ACE format to UCS-4 + * + * \param ace_label ASCII string containing host label + * \param ace_len Length of host label + * \param ucs4_label Pointer to hold UCS4 decoded version + * \param ucs4_len Pointer to hold length of ucs4_label + * \return NSERROR_OK on success, appropriate error otherwise + * + * If return value != NSERROR_OK, output will be left untouched. + */ +static nserror +idna__ace_to_ucs4(const char *ace_label, + size_t ace_len, + int32_t **ucs4_label, + size_t *ucs4_len) +{ + int32_t *ucs4; + nserror ret; + size_t output_length = ace_len; /* never exceeds input length */ + + /* The header should always have been checked before calling */ + assert((ace_label[0] == 'x') && (ace_label[1] == 'n') && + (ace_label[2] == '-') && (ace_label[3] == '-')); + + ucs4 = malloc(output_length * 4); + if (ucs4 == NULL) { + return NSERROR_NOMEM; + } + + ret = punycode_status_to_nserror(punycode_decode(ace_len - 4, + ace_label + 4, &output_length, (punycode_uint *)ucs4, NULL)); + if (ret != NSERROR_OK) { + free(ucs4); + return ret; + } + + ucs4[output_length] = '\0'; + + *ucs4_label = ucs4; + *ucs4_len = output_length; + + return NSERROR_OK; +} + + +#ifdef WITH_UTF8PROC + +#include <libutf8proc/utf8proc.h> + +int32_t idna_contexto[] = { + /* CONTEXTO codepoints which have a rule defined */ + 0x00b7, 0x0375, 0x05f3, 0x05f4, 0x30fb, 0x0660, 0x0661, + 0x0662, 0x0663, 0x0664, 0x0665, 0x0666, 0x0667, 0x0668, + 0x0669, 0x06f0, 0x06f1, 0x06f2, 0x06f3, 0x06f4, 0x06f5, + 0x06f6, 0x06f7, 0x06f8, 0x06f9, 0 +}; + /** * Find the IDNA property of a UCS-4 codepoint * @@ -305,119 +399,6 @@ idna__ucs4_to_utf8(const int32_t *ucs4_label, /** - * Convert a host label in UCS-4 to an ACE version - * - * \param ucs4_label UCS-4 NFC string containing host label - * \param len Length of host label (in characters/codepoints) - * \param ace_label ASCII-compatible encoded version - * \param out_len Length of ace_label - * \return NSERROR_OK on success, appropriate error otherwise - * - * If return value != NSERROR_OK, output will be left untouched. - */ -static nserror -idna__ucs4_to_ace(int32_t *ucs4_label, - size_t len, - char **ace_label, - size_t *out_len) -{ - char punycode[65]; /* max length of host label + NULL */ - size_t output_length = 60; /* punycode length - 4 - 1 */ - nserror ret; - - punycode[0] = 'x'; - punycode[1] = 'n'; - punycode[2] = '-'; - punycode[3] = '-'; - - ret = punycode_status_to_nserror(punycode_encode(len, - (const punycode_uint *)ucs4_label, NULL, - &output_length, punycode + 4)); - if (ret != NSERROR_OK) { - return ret; - } - - output_length += SLEN("xn--"); - punycode[output_length] = '\0'; - - *ace_label = strdup(punycode); - *out_len = output_length; - - return NSERROR_OK; -} - - -/** - * Convert a host label in ACE format to UCS-4 - * - * \param ace_label ASCII string containing host label - * \param ace_len Length of host label - * \param ucs4_label Pointer to hold UCS4 decoded version - * \param ucs4_len Pointer to hold length of ucs4_label - * \return NSERROR_OK on success, appropriate error otherwise - * - * If return value != NSERROR_OK, output will be left untouched. - */ -static nserror -idna__ace_to_ucs4(const char *ace_label, - size_t ace_len, - int32_t **ucs4_label, - size_t *ucs4_len) -{ - int32_t *ucs4; - nserror ret; - size_t output_length = ace_len; /* never exceeds input length */ - - /* The header should always have been checked before calling */ - assert((ace_label[0] == 'x') && (ace_label[1] == 'n') && - (ace_label[2] == '-') && (ace_label[3] == '-')); - - ucs4 = malloc(output_length * 4); - if (ucs4 == NULL) { - return NSERROR_NOMEM; - } - - ret = punycode_status_to_nserror(punycode_decode(ace_len - 4, - ace_label + 4, &output_length, (punycode_uint *)ucs4, NULL)); - if (ret != NSERROR_OK) { - free(ucs4); - return ret; - } - - ucs4[output_length] = '\0'; - - *ucs4_label = ucs4; - *ucs4_len = output_length; - - return NSERROR_OK; -} - - -/** - * Find the length of a host label - * - * \param host String containing a host or FQDN - * \param max_length Length of host string to search (in bytes) - * \return Distance to next separator character or end of string - */ -static size_t idna__host_label_length(const char *host, size_t max_length) -{ - const char *p = host; - size_t length = 0; - - while (length < max_length) { - if ((*p == '.') || (*p == ':') || (*p == '\0')) { - break; - } - length++; - p++; - } - - return length; -} - - -/** * Check if a host label is valid for IDNA2008 * * \param label Host label to check (UCS-4) @@ -436,7 +417,7 @@ static bool idna__is_valid(int32_t *label, size_t len) */ /* 2. Check characters 3 and 4 are not '--'. */ - if ((label[2] == 0x002d) && (label[3] == 0x002d)) { + if ((len >= 4) && (label[2] == 0x002d) && (label[3] == 0x002d)) { NSLOG(netsurf, INFO, "Check failed: characters 2 and 3 are '--'"); return false; @@ -459,7 +440,7 @@ static bool idna__is_valid(int32_t *label, size_t len) /* 4. Check characters not DISALLOWED by RFC5892 */ if (idna_prop == IDNA_P_DISALLOWED) { NSLOG(netsurf, INFO, - "Check failed: character %"PRIsizet" (%x) is DISALLOWED", + "Check failed: character %"PRIsizet" (%"PRIx32") is DISALLOWED", i, label[i]); return false; @@ -469,7 +450,7 @@ static bool idna__is_valid(int32_t *label, size_t len) if (idna_prop == IDNA_P_CONTEXTJ) { if (idna__contextj_rule(label, i, len) == false) { NSLOG(netsurf, INFO, - "Check failed: character %"PRIsizet" (%x) does not conform to CONTEXTJ rule", + "Check failed: character %"PRIsizet" (%"PRIx32") does not conform to CONTEXTJ rule", i, label[i]); return false; @@ -481,7 +462,7 @@ static bool idna__is_valid(int32_t *label, size_t len) if (idna_prop == IDNA_P_CONTEXTO) { if (idna__contexto_rule(label[i]) == false) { NSLOG(netsurf, INFO, - "Check failed: character %"PRIsizet" (%x) has no CONTEXTO rule defined", + "Check failed: character %"PRIsizet" (%"PRIx32") has no CONTEXTO rule defined", i, label[i]); return false; @@ -491,7 +472,7 @@ static bool idna__is_valid(int32_t *label, size_t len) /* 7. Check characters are not UNASSIGNED */ if (idna_prop == IDNA_P_UNASSIGNED) { NSLOG(netsurf, INFO, - "Check failed: character %"PRIsizet" (%x) is UNASSIGNED", + "Check failed: character %"PRIsizet" (%"PRIx32") is UNASSIGNED", i, label[i]); return false; @@ -505,6 +486,155 @@ static bool idna__is_valid(int32_t *label, size_t len) /** + * Verify an ACE label is valid + * + * \param label Host label to check + * \param len Length of label + * \return true if valid, false otherwise + */ +static bool idna__verify(const char *label, size_t len) +{ + nserror error; + int32_t *ucs4; + char *ace; + ssize_t ucs4_len; + size_t u_ucs4_len, ace_len; + + /* Convert our ACE label back to UCS-4 */ + error = idna__ace_to_ucs4(label, len, &ucs4, &u_ucs4_len); + if (error != NSERROR_OK) { + return false; + } + + /* Perform NFC normalisation */ + ucs4_len = utf8proc_normalize_utf32(ucs4, u_ucs4_len, + UTF8PROC_STABLE | UTF8PROC_COMPOSE); + if (ucs4_len < 0) { + free(ucs4); + return false; + } + + /* Convert the UCS-4 label back to ACE */ + error = idna__ucs4_to_ace(ucs4, (size_t)ucs4_len, + &ace, &ace_len); + free(ucs4); + if (error != NSERROR_OK) { + return false; + } + + /* Check if it matches the input */ + if ((len == ace_len) && (strncmp(label, ace, len) == 0)) { + free(ace); + return true; + } + + NSLOG(netsurf, INFO, "Re-encoded ACE label %s does not match input", + ace); + free(ace); + + return false; +} + + +#else /* WITH_UTF8PROC */ + + +/** + * Convert a UTF-8 string to UCS-4 + * + * \param utf8_label UTF-8 string containing host label + * \param len Length of host label (in bytes) + * \param ucs4_label Pointer to update with the output + * \param ucs4_len Pointer to update with the length + * \return NSERROR_OK on success, appropriate error otherwise + * + * If return value != NSERROR_OK, output will be left untouched. + */ +static nserror +idna__utf8_to_ucs4(const char *utf8_label, + size_t len, + int32_t **ucs4_label, + size_t *ucs4_len) +{ + return NSERROR_NOT_IMPLEMENTED; +} + + +/** + * Convert a UCS-4 string to UTF-8 + * + * \param ucs4_label UCS-4 string containing host label + * \param ucs4_len Length of host label (in bytes) + * \param utf8_label Pointer to update with the output + * \param utf8_len Pointer to update with the length + * \return NSERROR_OK on success, appropriate error otherwise + * + * If return value != NSERROR_OK, output will be left untouched. + */ +static nserror +idna__ucs4_to_utf8(const int32_t *ucs4_label, + size_t ucs4_len, + char **utf8_label, + size_t *utf8_len) +{ + return NSERROR_NOT_IMPLEMENTED; +} + + +/** + * Check if a host label is valid for IDNA2008 + * + * \param label Host label to check (UCS-4) + * \param len Length of host label (in characters/codepoints) + * \return true if compliant, false otherwise + */ +static bool idna__is_valid(int32_t *label, size_t len) +{ + return true; +} + + +/** + * Verify an ACE label is valid + * + * \param label Host label to check + * \param len Length of label + * \return true if valid, false otherwise + */ +static bool idna__verify(const char *label, size_t len) +{ + return true; +} + + +#endif /* WITH_UTF8PROC */ + + +/** + * Find the length of a host label + * + * \param host String containing a host or FQDN + * \param max_length Length of host string to search (in bytes) + * \return Distance to next separator character or end of string + */ +static size_t idna__host_label_length(const char *host, size_t max_length) +{ + const char *p = host; + size_t length = 0; + + while (length < max_length) { + if ((*p == '.') || (*p == ':') || (*p == '\0')) { + break; + } + length++; + p++; + } + + return length; +} + + +/** * Check if a host label is LDH * * \param label Host label to check @@ -559,57 +689,6 @@ static bool idna__is_ace(const char *label, size_t len) } -/** - * Verify an ACE label is valid - * - * \param label Host label to check - * \param len Length of label - * \return true if valid, false otherwise - */ -static bool idna__verify(const char *label, size_t len) -{ - nserror error; - int32_t *ucs4; - char *ace; - ssize_t ucs4_len; - size_t u_ucs4_len, ace_len; - - /* Convert our ACE label back to UCS-4 */ - error = idna__ace_to_ucs4(label, len, &ucs4, &u_ucs4_len); - if (error != NSERROR_OK) { - return false; - } - - /* Perform NFC normalisation */ - ucs4_len = utf8proc_normalize_utf32(ucs4, u_ucs4_len, - UTF8PROC_STABLE | UTF8PROC_COMPOSE); - if (ucs4_len < 0) { - free(ucs4); - return false; - } - - /* Convert the UCS-4 label back to ACE */ - error = idna__ucs4_to_ace(ucs4, (size_t)ucs4_len, - &ace, &ace_len); - free(ucs4); - if (error != NSERROR_OK) { - return false; - } - - /* Check if it matches the input */ - if ((len == ace_len) && (strncmp(label, ace, len) == 0)) { - free(ace); - return true; - } - - NSLOG(netsurf, INFO, "Re-encoded ACE label %s does not match input", - ace); - free(ace); - - return false; -} - - /* exported interface documented in idna.h */ nserror idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_len) @@ -630,8 +709,9 @@ idna_encode(const char *host, size_t len, char **ace_host, size_t *ace_len) /* This string is IDN or invalid */ /* Convert to Unicode */ - if ((error = idna__utf8_to_ucs4(host, label_len, - &ucs4_host, &ucs4_len)) != NSERROR_OK) { + error = idna__utf8_to_ucs4(host, label_len, + &ucs4_host, &ucs4_len); + if (error != NSERROR_OK) { return error; } @@ -709,7 +789,7 @@ idna_decode(const char *ace_host, size_t ace_len, char **host, size_t *host_len) /* Decode to Unicode */ error = idna__ace_to_ucs4(ace_host, label_len, - &ucs4_host, &ucs4_len); + &ucs4_host, &ucs4_len); if (error != NSERROR_OK) { return error; } @@ -722,7 +802,7 @@ idna_decode(const char *ace_host, size_t ace_len, char **host, size_t *host_len) return error; } - memcpy(fqdn_p, output, output_len * 4); + memcpy(fqdn_p, output, output_len); free(output); fqdn_p += output_len; fqdn_len += output_len; diff --git a/utils/idna.h b/utils/idna.h index 1fb344730..efc73eb72 100644 --- a/utils/idna.h +++ b/utils/idna.h @@ -16,12 +16,13 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -/** \file - * NetSurf international domain name handling (interface). +/** + * \file + * interface to international domain name handling. */ -#ifndef _NETSURF_UTILS_IDNA_H_ -#define _NETSURF_UTILS_IDNA_H_ +#ifndef NETSURF_UTILS_IDNA_H_ +#define NETSURF_UTILS_IDNA_H_ /** * Unicode canonical combining class for virama diff --git a/utils/idna_props.h b/utils/idna_props.h index b5ca966c0..05546c51a 100644 --- a/utils/idna_props.h +++ b/utils/idna_props.h @@ -343,8 +343,8 @@ idna_table idna_derived[] = { { 0x0378, 0x0379, .p.property = IDNA_P_UNASSIGNED }, { 0x037A, 0x037A, .p.property = IDNA_P_DISALLOWED }, { 0x037B, 0x037D, .p.property = IDNA_P_PVALID }, - { 0x037E, 0x037E, .p.property = IDNA_P_DISALLOWED }, - { 0x037F, 0x0383, .p.property = IDNA_P_UNASSIGNED }, + { 0x037E, 0x037F, .p.property = IDNA_P_DISALLOWED }, + { 0x0380, 0x0383, .p.property = IDNA_P_UNASSIGNED }, { 0x0384, 0x038A, .p.property = IDNA_P_DISALLOWED }, { 0x038B, 0x038B, .p.property = IDNA_P_UNASSIGNED }, { 0x038C, 0x038C, .p.property = IDNA_P_DISALLOWED }, @@ -579,17 +579,28 @@ idna_table idna_derived[] = { { 0x0523, 0x0523, .p.property = IDNA_P_PVALID }, { 0x0524, 0x0524, .p.property = IDNA_P_DISALLOWED }, { 0x0525, 0x0525, .p.property = IDNA_P_PVALID }, - { 0x0526, 0x0530, .p.property = IDNA_P_UNASSIGNED }, + { 0x0526, 0x0526, .p.property = IDNA_P_DISALLOWED }, + { 0x0527, 0x0527, .p.property = IDNA_P_PVALID }, + { 0x0528, 0x0528, .p.property = IDNA_P_DISALLOWED }, + { 0x0529, 0x0529, .p.property = IDNA_P_PVALID }, + { 0x052A, 0x052A, .p.property = IDNA_P_DISALLOWED }, + { 0x052B, 0x052B, .p.property = IDNA_P_PVALID }, + { 0x052C, 0x052C, .p.property = IDNA_P_DISALLOWED }, + { 0x052D, 0x052D, .p.property = IDNA_P_PVALID }, + { 0x052E, 0x052E, .p.property = IDNA_P_DISALLOWED }, + { 0x052F, 0x052F, .p.property = IDNA_P_PVALID }, + { 0x0530, 0x0530, .p.property = IDNA_P_UNASSIGNED }, { 0x0531, 0x0556, .p.property = IDNA_P_DISALLOWED }, { 0x0557, 0x0558, .p.property = IDNA_P_UNASSIGNED }, { 0x0559, 0x0559, .p.property = IDNA_P_PVALID }, { 0x055A, 0x055F, .p.property = IDNA_P_DISALLOWED }, - { 0x0560, 0x0560, .p.property = IDNA_P_UNASSIGNED }, - { 0x0561, 0x0586, .p.property = IDNA_P_PVALID }, + { 0x0560, 0x0586, .p.property = IDNA_P_PVALID }, { 0x0587, 0x0587, .p.property = IDNA_P_DISALLOWED }, - { 0x0588, 0x0588, .p.property = IDNA_P_UNASSIGNED }, + { 0x0588, 0x0588, .p.property = IDNA_P_PVALID }, { 0x0589, 0x058A, .p.property = IDNA_P_DISALLOWED }, - { 0x058B, 0x0590, .p.property = IDNA_P_UNASSIGNED }, + { 0x058B, 0x058C, .p.property = IDNA_P_UNASSIGNED }, + { 0x058D, 0x058F, .p.property = IDNA_P_DISALLOWED }, + { 0x0590, 0x0590, .p.property = IDNA_P_UNASSIGNED }, { 0x0591, 0x05BD, .p.property = IDNA_P_PVALID }, { 0x05BE, 0x05BE, .p.property = IDNA_P_DISALLOWED }, { 0x05BF, 0x05BF, .p.property = IDNA_P_PVALID }, @@ -601,22 +612,18 @@ idna_table idna_derived[] = { { 0x05C7, 0x05C7, .p.property = IDNA_P_PVALID }, { 0x05C8, 0x05CF, .p.property = IDNA_P_UNASSIGNED }, { 0x05D0, 0x05EA, .p.property = IDNA_P_PVALID }, - { 0x05EB, 0x05EF, .p.property = IDNA_P_UNASSIGNED }, - { 0x05F0, 0x05F2, .p.property = IDNA_P_PVALID }, + { 0x05EB, 0x05EE, .p.property = IDNA_P_UNASSIGNED }, + { 0x05EF, 0x05F2, .p.property = IDNA_P_PVALID }, { 0x05F3, 0x05F4, .p.property = IDNA_P_CONTEXTO }, { 0x05F5, 0x05FF, .p.property = IDNA_P_UNASSIGNED }, - { 0x0600, 0x0603, .p.property = IDNA_P_DISALLOWED }, - { 0x0604, 0x0605, .p.property = IDNA_P_UNASSIGNED }, - { 0x0606, 0x060F, .p.property = IDNA_P_DISALLOWED }, + { 0x0600, 0x060F, .p.property = IDNA_P_DISALLOWED }, { 0x0610, 0x061A, .p.property = IDNA_P_PVALID }, - { 0x061B, 0x061B, .p.property = IDNA_P_DISALLOWED }, - { 0x061C, 0x061D, .p.property = IDNA_P_UNASSIGNED }, + { 0x061B, 0x061C, .p.property = IDNA_P_DISALLOWED }, + { 0x061D, 0x061D, .p.property = IDNA_P_UNASSIGNED }, { 0x061E, 0x061F, .p.property = IDNA_P_DISALLOWED }, - { 0x0620, 0x0620, .p.property = IDNA_P_UNASSIGNED }, - { 0x0621, 0x063F, .p.property = IDNA_P_PVALID }, + { 0x0620, 0x063F, .p.property = IDNA_P_PVALID }, { 0x0640, 0x0640, .p.property = IDNA_P_DISALLOWED }, - { 0x0641, 0x065E, .p.property = IDNA_P_PVALID }, - { 0x065F, 0x065F, .p.property = IDNA_P_UNASSIGNED }, + { 0x0641, 0x065F, .p.property = IDNA_P_PVALID }, { 0x0660, 0x0669, .p.property = IDNA_P_CONTEXTO }, { 0x066A, 0x066D, .p.property = IDNA_P_DISALLOWED }, { 0x066E, 0x0674, .p.property = IDNA_P_PVALID }, @@ -639,27 +646,32 @@ idna_table idna_derived[] = { { 0x07B2, 0x07BF, .p.property = IDNA_P_UNASSIGNED }, { 0x07C0, 0x07F5, .p.property = IDNA_P_PVALID }, { 0x07F6, 0x07FA, .p.property = IDNA_P_DISALLOWED }, - { 0x07FB, 0x07FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x07FB, 0x07FC, .p.property = IDNA_P_UNASSIGNED }, + { 0x07FD, 0x07FD, .p.property = IDNA_P_PVALID }, + { 0x07FE, 0x07FF, .p.property = IDNA_P_DISALLOWED }, { 0x0800, 0x082D, .p.property = IDNA_P_PVALID }, { 0x082E, 0x082F, .p.property = IDNA_P_UNASSIGNED }, { 0x0830, 0x083E, .p.property = IDNA_P_DISALLOWED }, - { 0x083F, 0x08FF, .p.property = IDNA_P_UNASSIGNED }, - { 0x0900, 0x0939, .p.property = IDNA_P_PVALID }, - { 0x093A, 0x093B, .p.property = IDNA_P_UNASSIGNED }, - { 0x093C, 0x094E, .p.property = IDNA_P_PVALID }, - { 0x094F, 0x094F, .p.property = IDNA_P_UNASSIGNED }, - { 0x0950, 0x0955, .p.property = IDNA_P_PVALID }, - { 0x0956, 0x0957, .p.property = IDNA_P_UNASSIGNED }, + { 0x083F, 0x083F, .p.property = IDNA_P_UNASSIGNED }, + { 0x0840, 0x085B, .p.property = IDNA_P_PVALID }, + { 0x085C, 0x085D, .p.property = IDNA_P_UNASSIGNED }, + { 0x085E, 0x085E, .p.property = IDNA_P_DISALLOWED }, + { 0x085F, 0x085F, .p.property = IDNA_P_UNASSIGNED }, + { 0x0860, 0x086A, .p.property = IDNA_P_PVALID }, + { 0x086B, 0x089F, .p.property = IDNA_P_UNASSIGNED }, + { 0x08A0, 0x08B4, .p.property = IDNA_P_PVALID }, + { 0x08B5, 0x08B5, .p.property = IDNA_P_UNASSIGNED }, + { 0x08B6, 0x08BD, .p.property = IDNA_P_PVALID }, + { 0x08BE, 0x08D2, .p.property = IDNA_P_UNASSIGNED }, + { 0x08D3, 0x08E1, .p.property = IDNA_P_PVALID }, + { 0x08E2, 0x08E2, .p.property = IDNA_P_DISALLOWED }, + { 0x08E3, 0x0957, .p.property = IDNA_P_PVALID }, { 0x0958, 0x095F, .p.property = IDNA_P_DISALLOWED }, { 0x0960, 0x0963, .p.property = IDNA_P_PVALID }, { 0x0964, 0x0965, .p.property = IDNA_P_DISALLOWED }, { 0x0966, 0x096F, .p.property = IDNA_P_PVALID }, { 0x0970, 0x0970, .p.property = IDNA_P_DISALLOWED }, - { 0x0971, 0x0972, .p.property = IDNA_P_PVALID }, - { 0x0973, 0x0978, .p.property = IDNA_P_UNASSIGNED }, - { 0x0979, 0x097F, .p.property = IDNA_P_PVALID }, - { 0x0980, 0x0980, .p.property = IDNA_P_UNASSIGNED }, - { 0x0981, 0x0983, .p.property = IDNA_P_PVALID }, + { 0x0971, 0x0983, .p.property = IDNA_P_PVALID }, { 0x0984, 0x0984, .p.property = IDNA_P_UNASSIGNED }, { 0x0985, 0x098C, .p.property = IDNA_P_PVALID }, { 0x098D, 0x098E, .p.property = IDNA_P_UNASSIGNED }, @@ -688,7 +700,10 @@ idna_table idna_derived[] = { { 0x09E4, 0x09E5, .p.property = IDNA_P_UNASSIGNED }, { 0x09E6, 0x09F1, .p.property = IDNA_P_PVALID }, { 0x09F2, 0x09FB, .p.property = IDNA_P_DISALLOWED }, - { 0x09FC, 0x0A00, .p.property = IDNA_P_UNASSIGNED }, + { 0x09FC, 0x09FC, .p.property = IDNA_P_PVALID }, + { 0x09FD, 0x09FD, .p.property = IDNA_P_DISALLOWED }, + { 0x09FE, 0x09FE, .p.property = IDNA_P_PVALID }, + { 0x09FF, 0x0A00, .p.property = IDNA_P_UNASSIGNED }, { 0x0A01, 0x0A03, .p.property = IDNA_P_PVALID }, { 0x0A04, 0x0A04, .p.property = IDNA_P_UNASSIGNED }, { 0x0A05, 0x0A0A, .p.property = IDNA_P_PVALID }, @@ -723,7 +738,8 @@ idna_table idna_derived[] = { { 0x0A5E, 0x0A5E, .p.property = IDNA_P_DISALLOWED }, { 0x0A5F, 0x0A65, .p.property = IDNA_P_UNASSIGNED }, { 0x0A66, 0x0A75, .p.property = IDNA_P_PVALID }, - { 0x0A76, 0x0A80, .p.property = IDNA_P_UNASSIGNED }, + { 0x0A76, 0x0A76, .p.property = IDNA_P_DISALLOWED }, + { 0x0A77, 0x0A80, .p.property = IDNA_P_UNASSIGNED }, { 0x0A81, 0x0A83, .p.property = IDNA_P_PVALID }, { 0x0A84, 0x0A84, .p.property = IDNA_P_UNASSIGNED }, { 0x0A85, 0x0A8D, .p.property = IDNA_P_PVALID }, @@ -749,9 +765,10 @@ idna_table idna_derived[] = { { 0x0AE0, 0x0AE3, .p.property = IDNA_P_PVALID }, { 0x0AE4, 0x0AE5, .p.property = IDNA_P_UNASSIGNED }, { 0x0AE6, 0x0AEF, .p.property = IDNA_P_PVALID }, - { 0x0AF0, 0x0AF0, .p.property = IDNA_P_UNASSIGNED }, - { 0x0AF1, 0x0AF1, .p.property = IDNA_P_DISALLOWED }, - { 0x0AF2, 0x0B00, .p.property = IDNA_P_UNASSIGNED }, + { 0x0AF0, 0x0AF1, .p.property = IDNA_P_DISALLOWED }, + { 0x0AF2, 0x0AF8, .p.property = IDNA_P_UNASSIGNED }, + { 0x0AF9, 0x0AFF, .p.property = IDNA_P_PVALID }, + { 0x0B00, 0x0B00, .p.property = IDNA_P_UNASSIGNED }, { 0x0B01, 0x0B03, .p.property = IDNA_P_PVALID }, { 0x0B04, 0x0B04, .p.property = IDNA_P_UNASSIGNED }, { 0x0B05, 0x0B0C, .p.property = IDNA_P_PVALID }, @@ -781,7 +798,8 @@ idna_table idna_derived[] = { { 0x0B66, 0x0B6F, .p.property = IDNA_P_PVALID }, { 0x0B70, 0x0B70, .p.property = IDNA_P_DISALLOWED }, { 0x0B71, 0x0B71, .p.property = IDNA_P_PVALID }, - { 0x0B72, 0x0B81, .p.property = IDNA_P_UNASSIGNED }, + { 0x0B72, 0x0B77, .p.property = IDNA_P_DISALLOWED }, + { 0x0B78, 0x0B81, .p.property = IDNA_P_UNASSIGNED }, { 0x0B82, 0x0B83, .p.property = IDNA_P_PVALID }, { 0x0B84, 0x0B84, .p.property = IDNA_P_UNASSIGNED }, { 0x0B85, 0x0B8A, .p.property = IDNA_P_PVALID }, @@ -814,18 +832,14 @@ idna_table idna_derived[] = { { 0x0BD8, 0x0BE5, .p.property = IDNA_P_UNASSIGNED }, { 0x0BE6, 0x0BEF, .p.property = IDNA_P_PVALID }, { 0x0BF0, 0x0BFA, .p.property = IDNA_P_DISALLOWED }, - { 0x0BFB, 0x0C00, .p.property = IDNA_P_UNASSIGNED }, - { 0x0C01, 0x0C03, .p.property = IDNA_P_PVALID }, - { 0x0C04, 0x0C04, .p.property = IDNA_P_UNASSIGNED }, - { 0x0C05, 0x0C0C, .p.property = IDNA_P_PVALID }, + { 0x0BFB, 0x0BFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x0C00, 0x0C0C, .p.property = IDNA_P_PVALID }, { 0x0C0D, 0x0C0D, .p.property = IDNA_P_UNASSIGNED }, { 0x0C0E, 0x0C10, .p.property = IDNA_P_PVALID }, { 0x0C11, 0x0C11, .p.property = IDNA_P_UNASSIGNED }, { 0x0C12, 0x0C28, .p.property = IDNA_P_PVALID }, { 0x0C29, 0x0C29, .p.property = IDNA_P_UNASSIGNED }, - { 0x0C2A, 0x0C33, .p.property = IDNA_P_PVALID }, - { 0x0C34, 0x0C34, .p.property = IDNA_P_UNASSIGNED }, - { 0x0C35, 0x0C39, .p.property = IDNA_P_PVALID }, + { 0x0C2A, 0x0C39, .p.property = IDNA_P_PVALID }, { 0x0C3A, 0x0C3C, .p.property = IDNA_P_UNASSIGNED }, { 0x0C3D, 0x0C44, .p.property = IDNA_P_PVALID }, { 0x0C45, 0x0C45, .p.property = IDNA_P_UNASSIGNED }, @@ -835,16 +849,15 @@ idna_table idna_derived[] = { { 0x0C4E, 0x0C54, .p.property = IDNA_P_UNASSIGNED }, { 0x0C55, 0x0C56, .p.property = IDNA_P_PVALID }, { 0x0C57, 0x0C57, .p.property = IDNA_P_UNASSIGNED }, - { 0x0C58, 0x0C59, .p.property = IDNA_P_PVALID }, - { 0x0C5A, 0x0C5F, .p.property = IDNA_P_UNASSIGNED }, + { 0x0C58, 0x0C5A, .p.property = IDNA_P_PVALID }, + { 0x0C5B, 0x0C5F, .p.property = IDNA_P_UNASSIGNED }, { 0x0C60, 0x0C63, .p.property = IDNA_P_PVALID }, { 0x0C64, 0x0C65, .p.property = IDNA_P_UNASSIGNED }, { 0x0C66, 0x0C6F, .p.property = IDNA_P_PVALID }, { 0x0C70, 0x0C77, .p.property = IDNA_P_UNASSIGNED }, { 0x0C78, 0x0C7F, .p.property = IDNA_P_DISALLOWED }, - { 0x0C80, 0x0C81, .p.property = IDNA_P_UNASSIGNED }, - { 0x0C82, 0x0C83, .p.property = IDNA_P_PVALID }, - { 0x0C84, 0x0C84, .p.property = IDNA_P_UNASSIGNED }, + { 0x0C80, 0x0C83, .p.property = IDNA_P_PVALID }, + { 0x0C84, 0x0C84, .p.property = IDNA_P_DISALLOWED }, { 0x0C85, 0x0C8C, .p.property = IDNA_P_PVALID }, { 0x0C8D, 0x0C8D, .p.property = IDNA_P_UNASSIGNED }, { 0x0C8E, 0x0C90, .p.property = IDNA_P_PVALID }, @@ -869,32 +882,27 @@ idna_table idna_derived[] = { { 0x0CE4, 0x0CE5, .p.property = IDNA_P_UNASSIGNED }, { 0x0CE6, 0x0CEF, .p.property = IDNA_P_PVALID }, { 0x0CF0, 0x0CF0, .p.property = IDNA_P_UNASSIGNED }, - { 0x0CF1, 0x0CF2, .p.property = IDNA_P_DISALLOWED }, - { 0x0CF3, 0x0D01, .p.property = IDNA_P_UNASSIGNED }, - { 0x0D02, 0x0D03, .p.property = IDNA_P_PVALID }, + { 0x0CF1, 0x0CF2, .p.property = IDNA_P_PVALID }, + { 0x0CF3, 0x0CFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x0D00, 0x0D03, .p.property = IDNA_P_PVALID }, { 0x0D04, 0x0D04, .p.property = IDNA_P_UNASSIGNED }, { 0x0D05, 0x0D0C, .p.property = IDNA_P_PVALID }, { 0x0D0D, 0x0D0D, .p.property = IDNA_P_UNASSIGNED }, { 0x0D0E, 0x0D10, .p.property = IDNA_P_PVALID }, { 0x0D11, 0x0D11, .p.property = IDNA_P_UNASSIGNED }, - { 0x0D12, 0x0D28, .p.property = IDNA_P_PVALID }, - { 0x0D29, 0x0D29, .p.property = IDNA_P_UNASSIGNED }, - { 0x0D2A, 0x0D39, .p.property = IDNA_P_PVALID }, - { 0x0D3A, 0x0D3C, .p.property = IDNA_P_UNASSIGNED }, - { 0x0D3D, 0x0D44, .p.property = IDNA_P_PVALID }, + { 0x0D12, 0x0D44, .p.property = IDNA_P_PVALID }, { 0x0D45, 0x0D45, .p.property = IDNA_P_UNASSIGNED }, { 0x0D46, 0x0D48, .p.property = IDNA_P_PVALID }, { 0x0D49, 0x0D49, .p.property = IDNA_P_UNASSIGNED }, - { 0x0D4A, 0x0D4D, .p.property = IDNA_P_PVALID }, - { 0x0D4E, 0x0D56, .p.property = IDNA_P_UNASSIGNED }, - { 0x0D57, 0x0D57, .p.property = IDNA_P_PVALID }, - { 0x0D58, 0x0D5F, .p.property = IDNA_P_UNASSIGNED }, - { 0x0D60, 0x0D63, .p.property = IDNA_P_PVALID }, + { 0x0D4A, 0x0D4E, .p.property = IDNA_P_PVALID }, + { 0x0D4F, 0x0D4F, .p.property = IDNA_P_DISALLOWED }, + { 0x0D50, 0x0D53, .p.property = IDNA_P_UNASSIGNED }, + { 0x0D54, 0x0D57, .p.property = IDNA_P_PVALID }, + { 0x0D58, 0x0D5E, .p.property = IDNA_P_DISALLOWED }, + { 0x0D5F, 0x0D63, .p.property = IDNA_P_PVALID }, { 0x0D64, 0x0D65, .p.property = IDNA_P_UNASSIGNED }, { 0x0D66, 0x0D6F, .p.property = IDNA_P_PVALID }, - { 0x0D70, 0x0D75, .p.property = IDNA_P_DISALLOWED }, - { 0x0D76, 0x0D78, .p.property = IDNA_P_UNASSIGNED }, - { 0x0D79, 0x0D79, .p.property = IDNA_P_DISALLOWED }, + { 0x0D70, 0x0D79, .p.property = IDNA_P_DISALLOWED }, { 0x0D7A, 0x0D7F, .p.property = IDNA_P_PVALID }, { 0x0D80, 0x0D81, .p.property = IDNA_P_UNASSIGNED }, { 0x0D82, 0x0D83, .p.property = IDNA_P_PVALID }, @@ -916,7 +924,9 @@ idna_table idna_derived[] = { { 0x0DD6, 0x0DD6, .p.property = IDNA_P_PVALID }, { 0x0DD7, 0x0DD7, .p.property = IDNA_P_UNASSIGNED }, { 0x0DD8, 0x0DDF, .p.property = IDNA_P_PVALID }, - { 0x0DE0, 0x0DF1, .p.property = IDNA_P_UNASSIGNED }, + { 0x0DE0, 0x0DE5, .p.property = IDNA_P_UNASSIGNED }, + { 0x0DE6, 0x0DEF, .p.property = IDNA_P_PVALID }, + { 0x0DF0, 0x0DF1, .p.property = IDNA_P_UNASSIGNED }, { 0x0DF2, 0x0DF3, .p.property = IDNA_P_PVALID }, { 0x0DF4, 0x0DF4, .p.property = IDNA_P_DISALLOWED }, { 0x0DF5, 0x0E00, .p.property = IDNA_P_UNASSIGNED }, @@ -967,7 +977,8 @@ idna_table idna_derived[] = { { 0x0ED0, 0x0ED9, .p.property = IDNA_P_PVALID }, { 0x0EDA, 0x0EDB, .p.property = IDNA_P_UNASSIGNED }, { 0x0EDC, 0x0EDD, .p.property = IDNA_P_DISALLOWED }, - { 0x0EDE, 0x0EFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x0EDE, 0x0EDF, .p.property = IDNA_P_PVALID }, + { 0x0EE0, 0x0EFF, .p.property = IDNA_P_UNASSIGNED }, { 0x0F00, 0x0F00, .p.property = IDNA_P_PVALID }, { 0x0F01, 0x0F0A, .p.property = IDNA_P_DISALLOWED }, { 0x0F0B, 0x0F0B, .p.property = IDNA_P_PVALID }, @@ -1006,9 +1017,7 @@ idna_table idna_derived[] = { { 0x0F81, 0x0F81, .p.property = IDNA_P_DISALLOWED }, { 0x0F82, 0x0F84, .p.property = IDNA_P_PVALID }, { 0x0F85, 0x0F85, .p.property = IDNA_P_DISALLOWED }, - { 0x0F86, 0x0F8B, .p.property = IDNA_P_PVALID }, - { 0x0F8C, 0x0F8F, .p.property = IDNA_P_UNASSIGNED }, - { 0x0F90, 0x0F92, .p.property = IDNA_P_PVALID }, + { 0x0F86, 0x0F92, .p.property = IDNA_P_PVALID }, { 0x0F93, 0x0F93, .p.property = IDNA_P_DISALLOWED }, { 0x0F94, 0x0F97, .p.property = IDNA_P_PVALID }, { 0x0F98, 0x0F98, .p.property = IDNA_P_UNASSIGNED }, @@ -1028,16 +1037,20 @@ idna_table idna_derived[] = { { 0x0FC6, 0x0FC6, .p.property = IDNA_P_PVALID }, { 0x0FC7, 0x0FCC, .p.property = IDNA_P_DISALLOWED }, { 0x0FCD, 0x0FCD, .p.property = IDNA_P_UNASSIGNED }, - { 0x0FCE, 0x0FD8, .p.property = IDNA_P_DISALLOWED }, - { 0x0FD9, 0x0FFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x0FCE, 0x0FDA, .p.property = IDNA_P_DISALLOWED }, + { 0x0FDB, 0x0FFF, .p.property = IDNA_P_UNASSIGNED }, { 0x1000, 0x1049, .p.property = IDNA_P_PVALID }, { 0x104A, 0x104F, .p.property = IDNA_P_DISALLOWED }, { 0x1050, 0x109D, .p.property = IDNA_P_PVALID }, { 0x109E, 0x10C5, .p.property = IDNA_P_DISALLOWED }, - { 0x10C6, 0x10CF, .p.property = IDNA_P_UNASSIGNED }, + { 0x10C6, 0x10C6, .p.property = IDNA_P_UNASSIGNED }, + { 0x10C7, 0x10C7, .p.property = IDNA_P_DISALLOWED }, + { 0x10C8, 0x10CC, .p.property = IDNA_P_UNASSIGNED }, + { 0x10CD, 0x10CD, .p.property = IDNA_P_DISALLOWED }, + { 0x10CE, 0x10CF, .p.property = IDNA_P_UNASSIGNED }, { 0x10D0, 0x10FA, .p.property = IDNA_P_PVALID }, { 0x10FB, 0x10FC, .p.property = IDNA_P_DISALLOWED }, - { 0x10FD, 0x10FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x10FD, 0x10FF, .p.property = IDNA_P_PVALID }, { 0x1100, 0x11FF, .p.property = IDNA_P_DISALLOWED }, { 0x1200, 0x1248, .p.property = IDNA_P_PVALID }, { 0x1249, 0x1249, .p.property = IDNA_P_UNASSIGNED }, @@ -1070,15 +1083,17 @@ idna_table idna_derived[] = { { 0x1312, 0x1315, .p.property = IDNA_P_PVALID }, { 0x1316, 0x1317, .p.property = IDNA_P_UNASSIGNED }, { 0x1318, 0x135A, .p.property = IDNA_P_PVALID }, - { 0x135B, 0x135E, .p.property = IDNA_P_UNASSIGNED }, - { 0x135F, 0x135F, .p.property = IDNA_P_PVALID }, + { 0x135B, 0x135C, .p.property = IDNA_P_UNASSIGNED }, + { 0x135D, 0x135F, .p.property = IDNA_P_PVALID }, { 0x1360, 0x137C, .p.property = IDNA_P_DISALLOWED }, { 0x137D, 0x137F, .p.property = IDNA_P_UNASSIGNED }, { 0x1380, 0x138F, .p.property = IDNA_P_PVALID }, { 0x1390, 0x1399, .p.property = IDNA_P_DISALLOWED }, { 0x139A, 0x139F, .p.property = IDNA_P_UNASSIGNED }, - { 0x13A0, 0x13F4, .p.property = IDNA_P_PVALID }, - { 0x13F5, 0x13FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x13A0, 0x13F5, .p.property = IDNA_P_PVALID }, + { 0x13F6, 0x13F7, .p.property = IDNA_P_UNASSIGNED }, + { 0x13F8, 0x13FD, .p.property = IDNA_P_DISALLOWED }, + { 0x13FE, 0x13FF, .p.property = IDNA_P_UNASSIGNED }, { 0x1400, 0x1400, .p.property = IDNA_P_DISALLOWED }, { 0x1401, 0x166C, .p.property = IDNA_P_PVALID }, { 0x166D, 0x166E, .p.property = IDNA_P_DISALLOWED }, @@ -1089,7 +1104,8 @@ idna_table idna_derived[] = { { 0x169D, 0x169F, .p.property = IDNA_P_UNASSIGNED }, { 0x16A0, 0x16EA, .p.property = IDNA_P_PVALID }, { 0x16EB, 0x16F0, .p.property = IDNA_P_DISALLOWED }, - { 0x16F1, 0x16FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x16F1, 0x16F8, .p.property = IDNA_P_PVALID }, + { 0x16F9, 0x16FF, .p.property = IDNA_P_UNASSIGNED }, { 0x1700, 0x170C, .p.property = IDNA_P_PVALID }, { 0x170D, 0x170D, .p.property = IDNA_P_UNASSIGNED }, { 0x170E, 0x1714, .p.property = IDNA_P_PVALID }, @@ -1121,14 +1137,14 @@ idna_table idna_derived[] = { { 0x180F, 0x180F, .p.property = IDNA_P_UNASSIGNED }, { 0x1810, 0x1819, .p.property = IDNA_P_PVALID }, { 0x181A, 0x181F, .p.property = IDNA_P_UNASSIGNED }, - { 0x1820, 0x1877, .p.property = IDNA_P_PVALID }, - { 0x1878, 0x187F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1820, 0x1878, .p.property = IDNA_P_PVALID }, + { 0x1879, 0x187F, .p.property = IDNA_P_UNASSIGNED }, { 0x1880, 0x18AA, .p.property = IDNA_P_PVALID }, { 0x18AB, 0x18AF, .p.property = IDNA_P_UNASSIGNED }, { 0x18B0, 0x18F5, .p.property = IDNA_P_PVALID }, { 0x18F6, 0x18FF, .p.property = IDNA_P_UNASSIGNED }, - { 0x1900, 0x191C, .p.property = IDNA_P_PVALID }, - { 0x191D, 0x191F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1900, 0x191E, .p.property = IDNA_P_PVALID }, + { 0x191F, 0x191F, .p.property = IDNA_P_UNASSIGNED }, { 0x1920, 0x192B, .p.property = IDNA_P_PVALID }, { 0x192C, 0x192F, .p.property = IDNA_P_UNASSIGNED }, { 0x1930, 0x193B, .p.property = IDNA_P_PVALID }, @@ -1144,7 +1160,8 @@ idna_table idna_derived[] = { { 0x19AC, 0x19AF, .p.property = IDNA_P_UNASSIGNED }, { 0x19B0, 0x19C9, .p.property = IDNA_P_PVALID }, { 0x19CA, 0x19CF, .p.property = IDNA_P_UNASSIGNED }, - { 0x19D0, 0x19DA, .p.property = IDNA_P_PVALID }, + { 0x19D0, 0x19D9, .p.property = IDNA_P_PVALID }, + { 0x19DA, 0x19DA, .p.property = IDNA_P_DISALLOWED }, { 0x19DB, 0x19DD, .p.property = IDNA_P_UNASSIGNED }, { 0x19DE, 0x19FF, .p.property = IDNA_P_DISALLOWED }, { 0x1A00, 0x1A1B, .p.property = IDNA_P_PVALID }, @@ -1161,7 +1178,10 @@ idna_table idna_derived[] = { { 0x1AA0, 0x1AA6, .p.property = IDNA_P_DISALLOWED }, { 0x1AA7, 0x1AA7, .p.property = IDNA_P_PVALID }, { 0x1AA8, 0x1AAD, .p.property = IDNA_P_DISALLOWED }, - { 0x1AAE, 0x1AFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1AAE, 0x1AAF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1AB0, 0x1ABD, .p.property = IDNA_P_PVALID }, + { 0x1ABE, 0x1ABE, .p.property = IDNA_P_DISALLOWED }, + { 0x1ABF, 0x1AFF, .p.property = IDNA_P_UNASSIGNED }, { 0x1B00, 0x1B4B, .p.property = IDNA_P_PVALID }, { 0x1B4C, 0x1B4F, .p.property = IDNA_P_UNASSIGNED }, { 0x1B50, 0x1B59, .p.property = IDNA_P_PVALID }, @@ -1169,22 +1189,25 @@ idna_table idna_derived[] = { { 0x1B6B, 0x1B73, .p.property = IDNA_P_PVALID }, { 0x1B74, 0x1B7C, .p.property = IDNA_P_DISALLOWED }, { 0x1B7D, 0x1B7F, .p.property = IDNA_P_UNASSIGNED }, - { 0x1B80, 0x1BAA, .p.property = IDNA_P_PVALID }, - { 0x1BAB, 0x1BAD, .p.property = IDNA_P_UNASSIGNED }, - { 0x1BAE, 0x1BB9, .p.property = IDNA_P_PVALID }, - { 0x1BBA, 0x1BFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1B80, 0x1BF3, .p.property = IDNA_P_PVALID }, + { 0x1BF4, 0x1BFB, .p.property = IDNA_P_UNASSIGNED }, + { 0x1BFC, 0x1BFF, .p.property = IDNA_P_DISALLOWED }, { 0x1C00, 0x1C37, .p.property = IDNA_P_PVALID }, { 0x1C38, 0x1C3A, .p.property = IDNA_P_UNASSIGNED }, { 0x1C3B, 0x1C3F, .p.property = IDNA_P_DISALLOWED }, { 0x1C40, 0x1C49, .p.property = IDNA_P_PVALID }, { 0x1C4A, 0x1C4C, .p.property = IDNA_P_UNASSIGNED }, { 0x1C4D, 0x1C7D, .p.property = IDNA_P_PVALID }, - { 0x1C7E, 0x1C7F, .p.property = IDNA_P_DISALLOWED }, - { 0x1C80, 0x1CCF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1C7E, 0x1C88, .p.property = IDNA_P_DISALLOWED }, + { 0x1C89, 0x1C8F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1C90, 0x1CBA, .p.property = IDNA_P_DISALLOWED }, + { 0x1CBB, 0x1CBC, .p.property = IDNA_P_UNASSIGNED }, + { 0x1CBD, 0x1CC7, .p.property = IDNA_P_DISALLOWED }, + { 0x1CC8, 0x1CCF, .p.property = IDNA_P_UNASSIGNED }, { 0x1CD0, 0x1CD2, .p.property = IDNA_P_PVALID }, { 0x1CD3, 0x1CD3, .p.property = IDNA_P_DISALLOWED }, - { 0x1CD4, 0x1CF2, .p.property = IDNA_P_PVALID }, - { 0x1CF3, 0x1CFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1CD4, 0x1CF9, .p.property = IDNA_P_PVALID }, + { 0x1CFA, 0x1CFF, .p.property = IDNA_P_UNASSIGNED }, { 0x1D00, 0x1D2B, .p.property = IDNA_P_PVALID }, { 0x1D2C, 0x1D2E, .p.property = IDNA_P_DISALLOWED }, { 0x1D2F, 0x1D2F, .p.property = IDNA_P_PVALID }, @@ -1197,9 +1220,9 @@ idna_table idna_derived[] = { { 0x1D78, 0x1D78, .p.property = IDNA_P_DISALLOWED }, { 0x1D79, 0x1D9A, .p.property = IDNA_P_PVALID }, { 0x1D9B, 0x1DBF, .p.property = IDNA_P_DISALLOWED }, - { 0x1DC0, 0x1DE6, .p.property = IDNA_P_PVALID }, - { 0x1DE7, 0x1DFC, .p.property = IDNA_P_UNASSIGNED }, - { 0x1DFD, 0x1DFF, .p.property = IDNA_P_PVALID }, + { 0x1DC0, 0x1DF9, .p.property = IDNA_P_PVALID }, + { 0x1DFA, 0x1DFA, .p.property = IDNA_P_UNASSIGNED }, + { 0x1DFB, 0x1DFF, .p.property = IDNA_P_PVALID }, { 0x1E00, 0x1E00, .p.property = IDNA_P_DISALLOWED }, { 0x1E01, 0x1E01, .p.property = IDNA_P_PVALID }, { 0x1E02, 0x1E02, .p.property = IDNA_P_DISALLOWED }, @@ -1518,65 +1541,35 @@ idna_table idna_derived[] = { { 0x2000, 0x200B, .p.property = IDNA_P_DISALLOWED }, { 0x200C, 0x200D, .p.property = IDNA_P_CONTEXTJ }, { 0x200E, 0x2064, .p.property = IDNA_P_DISALLOWED }, - { 0x2065, 0x2069, .p.property = IDNA_P_UNASSIGNED }, - { 0x206A, 0x2071, .p.property = IDNA_P_DISALLOWED }, + { 0x2065, 0x2065, .p.property = IDNA_P_UNASSIGNED }, + { 0x2066, 0x2071, .p.property = IDNA_P_DISALLOWED }, { 0x2072, 0x2073, .p.property = IDNA_P_UNASSIGNED }, { 0x2074, 0x208E, .p.property = IDNA_P_DISALLOWED }, { 0x208F, 0x208F, .p.property = IDNA_P_UNASSIGNED }, - { 0x2090, 0x2094, .p.property = IDNA_P_DISALLOWED }, - { 0x2095, 0x209F, .p.property = IDNA_P_UNASSIGNED }, - { 0x20A0, 0x20B8, .p.property = IDNA_P_DISALLOWED }, - { 0x20B9, 0x20CF, .p.property = IDNA_P_UNASSIGNED }, + { 0x2090, 0x209C, .p.property = IDNA_P_DISALLOWED }, + { 0x209D, 0x209F, .p.property = IDNA_P_UNASSIGNED }, + { 0x20A0, 0x20BF, .p.property = IDNA_P_DISALLOWED }, + { 0x20C0, 0x20CF, .p.property = IDNA_P_UNASSIGNED }, { 0x20D0, 0x20F0, .p.property = IDNA_P_DISALLOWED }, { 0x20F1, 0x20FF, .p.property = IDNA_P_UNASSIGNED }, { 0x2100, 0x214D, .p.property = IDNA_P_DISALLOWED }, { 0x214E, 0x214E, .p.property = IDNA_P_PVALID }, { 0x214F, 0x2183, .p.property = IDNA_P_DISALLOWED }, { 0x2184, 0x2184, .p.property = IDNA_P_PVALID }, - { 0x2185, 0x2189, .p.property = IDNA_P_DISALLOWED }, - { 0x218A, 0x218F, .p.property = IDNA_P_UNASSIGNED }, - { 0x2190, 0x23E8, .p.property = IDNA_P_DISALLOWED }, - { 0x23E9, 0x23FF, .p.property = IDNA_P_UNASSIGNED }, - { 0x2400, 0x2426, .p.property = IDNA_P_DISALLOWED }, + { 0x2185, 0x218B, .p.property = IDNA_P_DISALLOWED }, + { 0x218C, 0x218F, .p.property = IDNA_P_UNASSIGNED }, + { 0x2190, 0x2426, .p.property = IDNA_P_DISALLOWED }, { 0x2427, 0x243F, .p.property = IDNA_P_UNASSIGNED }, { 0x2440, 0x244A, .p.property = IDNA_P_DISALLOWED }, { 0x244B, 0x245F, .p.property = IDNA_P_UNASSIGNED }, - { 0x2460, 0x26CD, .p.property = IDNA_P_DISALLOWED }, - { 0x26CE, 0x26CE, .p.property = IDNA_P_UNASSIGNED }, - { 0x26CF, 0x26E1, .p.property = IDNA_P_DISALLOWED }, - { 0x26E2, 0x26E2, .p.property = IDNA_P_UNASSIGNED }, - { 0x26E3, 0x26E3, .p.property = IDNA_P_DISALLOWED }, - { 0x26E4, 0x26E7, .p.property = IDNA_P_UNASSIGNED }, - { 0x26E8, 0x26FF, .p.property = IDNA_P_DISALLOWED }, - { 0x2700, 0x2700, .p.property = IDNA_P_UNASSIGNED }, - { 0x2701, 0x2704, .p.property = IDNA_P_DISALLOWED }, - { 0x2705, 0x2705, .p.property = IDNA_P_UNASSIGNED }, - { 0x2706, 0x2709, .p.property = IDNA_P_DISALLOWED }, - { 0x270A, 0x270B, .p.property = IDNA_P_UNASSIGNED }, - { 0x270C, 0x2727, .p.property = IDNA_P_DISALLOWED }, - { 0x2728, 0x2728, .p.property = IDNA_P_UNASSIGNED }, - { 0x2729, 0x274B, .p.property = IDNA_P_DISALLOWED }, - { 0x274C, 0x274C, .p.property = IDNA_P_UNASSIGNED }, - { 0x274D, 0x274D, .p.property = IDNA_P_DISALLOWED }, - { 0x274E, 0x274E, .p.property = IDNA_P_UNASSIGNED }, - { 0x274F, 0x2752, .p.property = IDNA_P_DISALLOWED }, - { 0x2753, 0x2755, .p.property = IDNA_P_UNASSIGNED }, - { 0x2756, 0x275E, .p.property = IDNA_P_DISALLOWED }, - { 0x275F, 0x2760, .p.property = IDNA_P_UNASSIGNED }, - { 0x2761, 0x2794, .p.property = IDNA_P_DISALLOWED }, - { 0x2795, 0x2797, .p.property = IDNA_P_UNASSIGNED }, - { 0x2798, 0x27AF, .p.property = IDNA_P_DISALLOWED }, - { 0x27B0, 0x27B0, .p.property = IDNA_P_UNASSIGNED }, - { 0x27B1, 0x27BE, .p.property = IDNA_P_DISALLOWED }, - { 0x27BF, 0x27BF, .p.property = IDNA_P_UNASSIGNED }, - { 0x27C0, 0x27CA, .p.property = IDNA_P_DISALLOWED }, - { 0x27CB, 0x27CB, .p.property = IDNA_P_UNASSIGNED }, - { 0x27CC, 0x27CC, .p.property = IDNA_P_DISALLOWED }, - { 0x27CD, 0x27CF, .p.property = IDNA_P_UNASSIGNED }, - { 0x27D0, 0x2B4C, .p.property = IDNA_P_DISALLOWED }, - { 0x2B4D, 0x2B4F, .p.property = IDNA_P_UNASSIGNED }, - { 0x2B50, 0x2B59, .p.property = IDNA_P_DISALLOWED }, - { 0x2B5A, 0x2BFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x2460, 0x2B73, .p.property = IDNA_P_DISALLOWED }, + { 0x2B74, 0x2B75, .p.property = IDNA_P_UNASSIGNED }, + { 0x2B76, 0x2B95, .p.property = IDNA_P_DISALLOWED }, + { 0x2B96, 0x2B97, .p.property = IDNA_P_UNASSIGNED }, + { 0x2B98, 0x2BC8, .p.property = IDNA_P_DISALLOWED }, + { 0x2BC9, 0x2BC9, .p.property = IDNA_P_UNASSIGNED }, + { 0x2BCA, 0x2BFE, .p.property = IDNA_P_DISALLOWED }, + { 0x2BFF, 0x2BFF, .p.property = IDNA_P_UNASSIGNED }, { 0x2C00, 0x2C2E, .p.property = IDNA_P_DISALLOWED }, { 0x2C2F, 0x2C2F, .p.property = IDNA_P_UNASSIGNED }, { 0x2C30, 0x2C5E, .p.property = IDNA_P_PVALID }, @@ -1701,15 +1694,21 @@ idna_table idna_derived[] = { { 0x2CEC, 0x2CEC, .p.property = IDNA_P_PVALID }, { 0x2CED, 0x2CED, .p.property = IDNA_P_DISALLOWED }, { 0x2CEE, 0x2CF1, .p.property = IDNA_P_PVALID }, - { 0x2CF2, 0x2CF8, .p.property = IDNA_P_UNASSIGNED }, + { 0x2CF2, 0x2CF2, .p.property = IDNA_P_DISALLOWED }, + { 0x2CF3, 0x2CF3, .p.property = IDNA_P_PVALID }, + { 0x2CF4, 0x2CF8, .p.property = IDNA_P_UNASSIGNED }, { 0x2CF9, 0x2CFF, .p.property = IDNA_P_DISALLOWED }, { 0x2D00, 0x2D25, .p.property = IDNA_P_PVALID }, - { 0x2D26, 0x2D2F, .p.property = IDNA_P_UNASSIGNED }, - { 0x2D30, 0x2D65, .p.property = IDNA_P_PVALID }, - { 0x2D66, 0x2D6E, .p.property = IDNA_P_UNASSIGNED }, - { 0x2D6F, 0x2D6F, .p.property = IDNA_P_DISALLOWED }, - { 0x2D70, 0x2D7F, .p.property = IDNA_P_UNASSIGNED }, - { 0x2D80, 0x2D96, .p.property = IDNA_P_PVALID }, + { 0x2D26, 0x2D26, .p.property = IDNA_P_UNASSIGNED }, + { 0x2D27, 0x2D27, .p.property = IDNA_P_PVALID }, + { 0x2D28, 0x2D2C, .p.property = IDNA_P_UNASSIGNED }, + { 0x2D2D, 0x2D2D, .p.property = IDNA_P_PVALID }, + { 0x2D2E, 0x2D2F, .p.property = IDNA_P_UNASSIGNED }, + { 0x2D30, 0x2D67, .p.property = IDNA_P_PVALID }, + { 0x2D68, 0x2D6E, .p.property = IDNA_P_UNASSIGNED }, + { 0x2D6F, 0x2D70, .p.property = IDNA_P_DISALLOWED }, + { 0x2D71, 0x2D7E, .p.property = IDNA_P_UNASSIGNED }, + { 0x2D7F, 0x2D96, .p.property = IDNA_P_PVALID }, { 0x2D97, 0x2D9F, .p.property = IDNA_P_UNASSIGNED }, { 0x2DA0, 0x2DA6, .p.property = IDNA_P_PVALID }, { 0x2DA7, 0x2DA7, .p.property = IDNA_P_UNASSIGNED }, @@ -1730,8 +1729,8 @@ idna_table idna_derived[] = { { 0x2DE0, 0x2DFF, .p.property = IDNA_P_PVALID }, { 0x2E00, 0x2E2E, .p.property = IDNA_P_DISALLOWED }, { 0x2E2F, 0x2E2F, .p.property = IDNA_P_PVALID }, - { 0x2E30, 0x2E31, .p.property = IDNA_P_DISALLOWED }, - { 0x2E32, 0x2E7F, .p.property = IDNA_P_UNASSIGNED }, + { 0x2E30, 0x2E4E, .p.property = IDNA_P_DISALLOWED }, + { 0x2E4F, 0x2E7F, .p.property = IDNA_P_UNASSIGNED }, { 0x2E80, 0x2E99, .p.property = IDNA_P_DISALLOWED }, { 0x2E9A, 0x2E9A, .p.property = IDNA_P_UNASSIGNED }, { 0x2E9B, 0x2EF3, .p.property = IDNA_P_DISALLOWED }, @@ -1759,13 +1758,13 @@ idna_table idna_derived[] = { { 0x30FC, 0x30FE, .p.property = IDNA_P_PVALID }, { 0x30FF, 0x30FF, .p.property = IDNA_P_DISALLOWED }, { 0x3100, 0x3104, .p.property = IDNA_P_UNASSIGNED }, - { 0x3105, 0x312D, .p.property = IDNA_P_PVALID }, - { 0x312E, 0x3130, .p.property = IDNA_P_UNASSIGNED }, + { 0x3105, 0x312F, .p.property = IDNA_P_PVALID }, + { 0x3130, 0x3130, .p.property = IDNA_P_UNASSIGNED }, { 0x3131, 0x318E, .p.property = IDNA_P_DISALLOWED }, { 0x318F, 0x318F, .p.property = IDNA_P_UNASSIGNED }, { 0x3190, 0x319F, .p.property = IDNA_P_DISALLOWED }, - { 0x31A0, 0x31B7, .p.property = IDNA_P_PVALID }, - { 0x31B8, 0x31BF, .p.property = IDNA_P_UNASSIGNED }, + { 0x31A0, 0x31BA, .p.property = IDNA_P_PVALID }, + { 0x31BB, 0x31BF, .p.property = IDNA_P_UNASSIGNED }, { 0x31C0, 0x31E3, .p.property = IDNA_P_DISALLOWED }, { 0x31E4, 0x31EF, .p.property = IDNA_P_UNASSIGNED }, { 0x31F0, 0x31FF, .p.property = IDNA_P_PVALID }, @@ -1777,8 +1776,8 @@ idna_table idna_derived[] = { { 0x3400, 0x4DB5, .p.property = IDNA_P_PVALID }, { 0x4DB6, 0x4DBF, .p.property = IDNA_P_UNASSIGNED }, { 0x4DC0, 0x4DFF, .p.property = IDNA_P_DISALLOWED }, - { 0x4E00, 0x9FCB, .p.property = IDNA_P_PVALID }, - { 0x9FCC, 0x9FFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x4E00, 0x9FEF, .p.property = IDNA_P_PVALID }, + { 0x9FF0, 0x9FFF, .p.property = IDNA_P_UNASSIGNED }, { 0xA000, 0xA48C, .p.property = IDNA_P_PVALID }, { 0xA48D, 0xA48F, .p.property = IDNA_P_UNASSIGNED }, { 0xA490, 0xA4C6, .p.property = IDNA_P_DISALLOWED }, @@ -1821,7 +1820,8 @@ idna_table idna_derived[] = { { 0xA65D, 0xA65D, .p.property = IDNA_P_PVALID }, { 0xA65E, 0xA65E, .p.property = IDNA_P_DISALLOWED }, { 0xA65F, 0xA65F, .p.property = IDNA_P_PVALID }, - { 0xA660, 0xA661, .p.property = IDNA_P_UNASSIGNED }, + { 0xA660, 0xA660, .p.property = IDNA_P_DISALLOWED }, + { 0xA661, 0xA661, .p.property = IDNA_P_PVALID }, { 0xA662, 0xA662, .p.property = IDNA_P_DISALLOWED }, { 0xA663, 0xA663, .p.property = IDNA_P_PVALID }, { 0xA664, 0xA664, .p.property = IDNA_P_DISALLOWED }, @@ -1835,8 +1835,7 @@ idna_table idna_derived[] = { { 0xA66C, 0xA66C, .p.property = IDNA_P_DISALLOWED }, { 0xA66D, 0xA66F, .p.property = IDNA_P_PVALID }, { 0xA670, 0xA673, .p.property = IDNA_P_DISALLOWED }, - { 0xA674, 0xA67B, .p.property = IDNA_P_UNASSIGNED }, - { 0xA67C, 0xA67D, .p.property = IDNA_P_PVALID }, + { 0xA674, 0xA67D, .p.property = IDNA_P_PVALID }, { 0xA67E, 0xA67E, .p.property = IDNA_P_DISALLOWED }, { 0xA67F, 0xA67F, .p.property = IDNA_P_PVALID }, { 0xA680, 0xA680, .p.property = IDNA_P_DISALLOWED }, @@ -1863,8 +1862,12 @@ idna_table idna_derived[] = { { 0xA695, 0xA695, .p.property = IDNA_P_PVALID }, { 0xA696, 0xA696, .p.property = IDNA_P_DISALLOWED }, { 0xA697, 0xA697, .p.property = IDNA_P_PVALID }, - { 0xA698, 0xA69F, .p.property = IDNA_P_UNASSIGNED }, - { 0xA6A0, 0xA6E5, .p.property = IDNA_P_PVALID }, + { 0xA698, 0xA698, .p.property = IDNA_P_DISALLOWED }, + { 0xA699, 0xA699, .p.property = IDNA_P_PVALID }, + { 0xA69A, 0xA69A, .p.property = IDNA_P_DISALLOWED }, + { 0xA69B, 0xA69B, .p.property = IDNA_P_PVALID }, + { 0xA69C, 0xA69D, .p.property = IDNA_P_DISALLOWED }, + { 0xA69E, 0xA6E5, .p.property = IDNA_P_PVALID }, { 0xA6E6, 0xA6EF, .p.property = IDNA_P_DISALLOWED }, { 0xA6F0, 0xA6F1, .p.property = IDNA_P_PVALID }, { 0xA6F2, 0xA6F7, .p.property = IDNA_P_DISALLOWED }, @@ -1965,8 +1968,44 @@ idna_table idna_derived[] = { { 0xA787, 0xA788, .p.property = IDNA_P_PVALID }, { 0xA789, 0xA78B, .p.property = IDNA_P_DISALLOWED }, { 0xA78C, 0xA78C, .p.property = IDNA_P_PVALID }, - { 0xA78D, 0xA7FA, .p.property = IDNA_P_UNASSIGNED }, - { 0xA7FB, 0xA827, .p.property = IDNA_P_PVALID }, + { 0xA78D, 0xA78D, .p.property = IDNA_P_DISALLOWED }, + { 0xA78E, 0xA78F, .p.property = IDNA_P_PVALID }, + { 0xA790, 0xA790, .p.property = IDNA_P_DISALLOWED }, + { 0xA791, 0xA791, .p.property = IDNA_P_PVALID }, + { 0xA792, 0xA792, .p.property = IDNA_P_DISALLOWED }, + { 0xA793, 0xA795, .p.property = IDNA_P_PVALID }, + { 0xA796, 0xA796, .p.property = IDNA_P_DISALLOWED }, + { 0xA797, 0xA797, .p.property = IDNA_P_PVALID }, + { 0xA798, 0xA798, .p.property = IDNA_P_DISALLOWED }, + { 0xA799, 0xA799, .p.property = IDNA_P_PVALID }, + { 0xA79A, 0xA79A, .p.property = IDNA_P_DISALLOWED }, + { 0xA79B, 0xA79B, .p.property = IDNA_P_PVALID }, + { 0xA79C, 0xA79C, .p.property = IDNA_P_DISALLOWED }, + { 0xA79D, 0xA79D, .p.property = IDNA_P_PVALID }, + { 0xA79E, 0xA79E, .p.property = IDNA_P_DISALLOWED }, + { 0xA79F, 0xA79F, .p.property = IDNA_P_PVALID }, + { 0xA7A0, 0xA7A0, .p.property = IDNA_P_DISALLOWED }, + { 0xA7A1, 0xA7A1, .p.property = IDNA_P_PVALID }, + { 0xA7A2, 0xA7A2, .p.property = IDNA_P_DISALLOWED }, + { 0xA7A3, 0xA7A3, .p.property = IDNA_P_PVALID }, + { 0xA7A4, 0xA7A4, .p.property = IDNA_P_DISALLOWED }, + { 0xA7A5, 0xA7A5, .p.property = IDNA_P_PVALID }, + { 0xA7A6, 0xA7A6, .p.property = IDNA_P_DISALLOWED }, + { 0xA7A7, 0xA7A7, .p.property = IDNA_P_PVALID }, + { 0xA7A8, 0xA7A8, .p.property = IDNA_P_DISALLOWED }, + { 0xA7A9, 0xA7A9, .p.property = IDNA_P_PVALID }, + { 0xA7AA, 0xA7AE, .p.property = IDNA_P_DISALLOWED }, + { 0xA7AF, 0xA7AF, .p.property = IDNA_P_PVALID }, + { 0xA7B0, 0xA7B4, .p.property = IDNA_P_DISALLOWED }, + { 0xA7B5, 0xA7B5, .p.property = IDNA_P_PVALID }, + { 0xA7B6, 0xA7B6, .p.property = IDNA_P_DISALLOWED }, + { 0xA7B7, 0xA7B7, .p.property = IDNA_P_PVALID }, + { 0xA7B8, 0xA7B8, .p.property = IDNA_P_DISALLOWED }, + { 0xA7B9, 0xA7B9, .p.property = IDNA_P_PVALID }, + { 0xA7BA, 0xA7F6, .p.property = IDNA_P_UNASSIGNED }, + { 0xA7F7, 0xA7F7, .p.property = IDNA_P_PVALID }, + { 0xA7F8, 0xA7F9, .p.property = IDNA_P_DISALLOWED }, + { 0xA7FA, 0xA827, .p.property = IDNA_P_PVALID }, { 0xA828, 0xA82B, .p.property = IDNA_P_DISALLOWED }, { 0xA82C, 0xA82F, .p.property = IDNA_P_UNASSIGNED }, { 0xA830, 0xA839, .p.property = IDNA_P_DISALLOWED }, @@ -1974,16 +2013,16 @@ idna_table idna_derived[] = { { 0xA840, 0xA873, .p.property = IDNA_P_PVALID }, { 0xA874, 0xA877, .p.property = IDNA_P_DISALLOWED }, { 0xA878, 0xA87F, .p.property = IDNA_P_UNASSIGNED }, - { 0xA880, 0xA8C4, .p.property = IDNA_P_PVALID }, - { 0xA8C5, 0xA8CD, .p.property = IDNA_P_UNASSIGNED }, + { 0xA880, 0xA8C5, .p.property = IDNA_P_PVALID }, + { 0xA8C6, 0xA8CD, .p.property = IDNA_P_UNASSIGNED }, { 0xA8CE, 0xA8CF, .p.property = IDNA_P_DISALLOWED }, { 0xA8D0, 0xA8D9, .p.property = IDNA_P_PVALID }, { 0xA8DA, 0xA8DF, .p.property = IDNA_P_UNASSIGNED }, { 0xA8E0, 0xA8F7, .p.property = IDNA_P_PVALID }, { 0xA8F8, 0xA8FA, .p.property = IDNA_P_DISALLOWED }, { 0xA8FB, 0xA8FB, .p.property = IDNA_P_PVALID }, - { 0xA8FC, 0xA8FF, .p.property = IDNA_P_UNASSIGNED }, - { 0xA900, 0xA92D, .p.property = IDNA_P_PVALID }, + { 0xA8FC, 0xA8FC, .p.property = IDNA_P_DISALLOWED }, + { 0xA8FD, 0xA92D, .p.property = IDNA_P_PVALID }, { 0xA92E, 0xA92F, .p.property = IDNA_P_DISALLOWED }, { 0xA930, 0xA953, .p.property = IDNA_P_PVALID }, { 0xA954, 0xA95E, .p.property = IDNA_P_UNASSIGNED }, @@ -1995,7 +2034,8 @@ idna_table idna_derived[] = { { 0xA9CF, 0xA9D9, .p.property = IDNA_P_PVALID }, { 0xA9DA, 0xA9DD, .p.property = IDNA_P_UNASSIGNED }, { 0xA9DE, 0xA9DF, .p.property = IDNA_P_DISALLOWED }, - { 0xA9E0, 0xA9FF, .p.property = IDNA_P_UNASSIGNED }, + { 0xA9E0, 0xA9FE, .p.property = IDNA_P_PVALID }, + { 0xA9FF, 0xA9FF, .p.property = IDNA_P_UNASSIGNED }, { 0xAA00, 0xAA36, .p.property = IDNA_P_PVALID }, { 0xAA37, 0xAA3F, .p.property = IDNA_P_UNASSIGNED }, { 0xAA40, 0xAA4D, .p.property = IDNA_P_PVALID }, @@ -2005,13 +2045,29 @@ idna_table idna_derived[] = { { 0xAA5C, 0xAA5F, .p.property = IDNA_P_DISALLOWED }, { 0xAA60, 0xAA76, .p.property = IDNA_P_PVALID }, { 0xAA77, 0xAA79, .p.property = IDNA_P_DISALLOWED }, - { 0xAA7A, 0xAA7B, .p.property = IDNA_P_PVALID }, - { 0xAA7C, 0xAA7F, .p.property = IDNA_P_UNASSIGNED }, - { 0xAA80, 0xAAC2, .p.property = IDNA_P_PVALID }, + { 0xAA7A, 0xAAC2, .p.property = IDNA_P_PVALID }, { 0xAAC3, 0xAADA, .p.property = IDNA_P_UNASSIGNED }, { 0xAADB, 0xAADD, .p.property = IDNA_P_PVALID }, { 0xAADE, 0xAADF, .p.property = IDNA_P_DISALLOWED }, - { 0xAAE0, 0xABBF, .p.property = IDNA_P_UNASSIGNED }, + { 0xAAE0, 0xAAEF, .p.property = IDNA_P_PVALID }, + { 0xAAF0, 0xAAF1, .p.property = IDNA_P_DISALLOWED }, + { 0xAAF2, 0xAAF6, .p.property = IDNA_P_PVALID }, + { 0xAAF7, 0xAB00, .p.property = IDNA_P_UNASSIGNED }, + { 0xAB01, 0xAB06, .p.property = IDNA_P_PVALID }, + { 0xAB07, 0xAB08, .p.property = IDNA_P_UNASSIGNED }, + { 0xAB09, 0xAB0E, .p.property = IDNA_P_PVALID }, + { 0xAB0F, 0xAB10, .p.property = IDNA_P_UNASSIGNED }, + { 0xAB11, 0xAB16, .p.property = IDNA_P_PVALID }, + { 0xAB17, 0xAB1F, .p.property = IDNA_P_UNASSIGNED }, + { 0xAB20, 0xAB26, .p.property = IDNA_P_PVALID }, + { 0xAB27, 0xAB27, .p.property = IDNA_P_UNASSIGNED }, + { 0xAB28, 0xAB2E, .p.property = IDNA_P_PVALID }, + { 0xAB2F, 0xAB2F, .p.property = IDNA_P_UNASSIGNED }, + { 0xAB30, 0xAB5A, .p.property = IDNA_P_PVALID }, + { 0xAB5B, 0xAB5F, .p.property = IDNA_P_DISALLOWED }, + { 0xAB60, 0xAB65, .p.property = IDNA_P_PVALID }, + { 0xAB66, 0xAB6F, .p.property = IDNA_P_UNASSIGNED }, + { 0xAB70, 0xABBF, .p.property = IDNA_P_DISALLOWED }, { 0xABC0, 0xABEA, .p.property = IDNA_P_PVALID }, { 0xABEB, 0xABEB, .p.property = IDNA_P_DISALLOWED }, { 0xABEC, 0xABED, .p.property = IDNA_P_PVALID }, @@ -2038,9 +2094,7 @@ idna_table idna_derived[] = { { 0xFA23, 0xFA24, .p.property = IDNA_P_PVALID }, { 0xFA25, 0xFA26, .p.property = IDNA_P_DISALLOWED }, { 0xFA27, 0xFA29, .p.property = IDNA_P_PVALID }, - { 0xFA2A, 0xFA2D, .p.property = IDNA_P_DISALLOWED }, - { 0xFA2E, 0xFA2F, .p.property = IDNA_P_UNASSIGNED }, - { 0xFA30, 0xFA6D, .p.property = IDNA_P_DISALLOWED }, + { 0xFA2A, 0xFA6D, .p.property = IDNA_P_DISALLOWED }, { 0xFA6E, 0xFA6F, .p.property = IDNA_P_UNASSIGNED }, { 0xFA70, 0xFAD9, .p.property = IDNA_P_DISALLOWED }, { 0xFADA, 0xFAFF, .p.property = IDNA_P_UNASSIGNED }, @@ -2060,8 +2114,8 @@ idna_table idna_derived[] = { { 0xFB42, 0xFB42, .p.property = IDNA_P_UNASSIGNED }, { 0xFB43, 0xFB44, .p.property = IDNA_P_DISALLOWED }, { 0xFB45, 0xFB45, .p.property = IDNA_P_UNASSIGNED }, - { 0xFB46, 0xFBB1, .p.property = IDNA_P_DISALLOWED }, - { 0xFBB2, 0xFBD2, .p.property = IDNA_P_UNASSIGNED }, + { 0xFB46, 0xFBC1, .p.property = IDNA_P_DISALLOWED }, + { 0xFBC2, 0xFBD2, .p.property = IDNA_P_UNASSIGNED }, { 0xFBD3, 0xFD3F, .p.property = IDNA_P_DISALLOWED }, { 0xFD40, 0xFD4F, .p.property = IDNA_P_UNASSIGNED }, { 0xFD50, 0xFD8F, .p.property = IDNA_P_DISALLOWED }, @@ -2072,8 +2126,7 @@ idna_table idna_derived[] = { { 0xFDFE, 0xFDFF, .p.property = IDNA_P_UNASSIGNED }, { 0xFE00, 0xFE19, .p.property = IDNA_P_DISALLOWED }, { 0xFE1A, 0xFE1F, .p.property = IDNA_P_UNASSIGNED }, - { 0xFE20, 0xFE26, .p.property = IDNA_P_PVALID }, - { 0xFE27, 0xFE2F, .p.property = IDNA_P_UNASSIGNED }, + { 0xFE20, 0xFE2F, .p.property = IDNA_P_PVALID }, { 0xFE30, 0xFE52, .p.property = IDNA_P_DISALLOWED }, { 0xFE53, 0xFE53, .p.property = IDNA_P_UNASSIGNED }, { 0xFE54, 0xFE66, .p.property = IDNA_P_DISALLOWED }, @@ -2121,26 +2174,32 @@ idna_table idna_derived[] = { { 0x10103, 0x10106, .p.property = IDNA_P_UNASSIGNED }, { 0x10107, 0x10133, .p.property = IDNA_P_DISALLOWED }, { 0x10134, 0x10136, .p.property = IDNA_P_UNASSIGNED }, - { 0x10137, 0x1018A, .p.property = IDNA_P_DISALLOWED }, - { 0x1018B, 0x1018F, .p.property = IDNA_P_UNASSIGNED }, + { 0x10137, 0x1018E, .p.property = IDNA_P_DISALLOWED }, + { 0x1018F, 0x1018F, .p.property = IDNA_P_UNASSIGNED }, { 0x10190, 0x1019B, .p.property = IDNA_P_DISALLOWED }, - { 0x1019C, 0x101CF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1019C, 0x1019F, .p.property = IDNA_P_UNASSIGNED }, + { 0x101A0, 0x101A0, .p.property = IDNA_P_DISALLOWED }, + { 0x101A1, 0x101CF, .p.property = IDNA_P_UNASSIGNED }, { 0x101D0, 0x101FC, .p.property = IDNA_P_DISALLOWED }, { 0x101FD, 0x101FD, .p.property = IDNA_P_PVALID }, { 0x101FE, 0x1027F, .p.property = IDNA_P_UNASSIGNED }, { 0x10280, 0x1029C, .p.property = IDNA_P_PVALID }, { 0x1029D, 0x1029F, .p.property = IDNA_P_UNASSIGNED }, { 0x102A0, 0x102D0, .p.property = IDNA_P_PVALID }, - { 0x102D1, 0x102FF, .p.property = IDNA_P_UNASSIGNED }, - { 0x10300, 0x1031E, .p.property = IDNA_P_PVALID }, - { 0x1031F, 0x1031F, .p.property = IDNA_P_UNASSIGNED }, + { 0x102D1, 0x102DF, .p.property = IDNA_P_UNASSIGNED }, + { 0x102E0, 0x102E0, .p.property = IDNA_P_PVALID }, + { 0x102E1, 0x102FB, .p.property = IDNA_P_DISALLOWED }, + { 0x102FC, 0x102FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x10300, 0x1031F, .p.property = IDNA_P_PVALID }, { 0x10320, 0x10323, .p.property = IDNA_P_DISALLOWED }, - { 0x10324, 0x1032F, .p.property = IDNA_P_UNASSIGNED }, - { 0x10330, 0x10340, .p.property = IDNA_P_PVALID }, + { 0x10324, 0x1032C, .p.property = IDNA_P_UNASSIGNED }, + { 0x1032D, 0x10340, .p.property = IDNA_P_PVALID }, { 0x10341, 0x10341, .p.property = IDNA_P_DISALLOWED }, { 0x10342, 0x10349, .p.property = IDNA_P_PVALID }, { 0x1034A, 0x1034A, .p.property = IDNA_P_DISALLOWED }, - { 0x1034B, 0x1037F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1034B, 0x1034F, .p.property = IDNA_P_UNASSIGNED }, + { 0x10350, 0x1037A, .p.property = IDNA_P_PVALID }, + { 0x1037B, 0x1037F, .p.property = IDNA_P_UNASSIGNED }, { 0x10380, 0x1039D, .p.property = IDNA_P_PVALID }, { 0x1039E, 0x1039E, .p.property = IDNA_P_UNASSIGNED }, { 0x1039F, 0x1039F, .p.property = IDNA_P_DISALLOWED }, @@ -2153,7 +2212,23 @@ idna_table idna_derived[] = { { 0x10428, 0x1049D, .p.property = IDNA_P_PVALID }, { 0x1049E, 0x1049F, .p.property = IDNA_P_UNASSIGNED }, { 0x104A0, 0x104A9, .p.property = IDNA_P_PVALID }, - { 0x104AA, 0x107FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x104AA, 0x104AF, .p.property = IDNA_P_UNASSIGNED }, + { 0x104B0, 0x104D3, .p.property = IDNA_P_DISALLOWED }, + { 0x104D4, 0x104D7, .p.property = IDNA_P_UNASSIGNED }, + { 0x104D8, 0x104FB, .p.property = IDNA_P_PVALID }, + { 0x104FC, 0x104FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x10500, 0x10527, .p.property = IDNA_P_PVALID }, + { 0x10528, 0x1052F, .p.property = IDNA_P_UNASSIGNED }, + { 0x10530, 0x10563, .p.property = IDNA_P_PVALID }, + { 0x10564, 0x1056E, .p.property = IDNA_P_UNASSIGNED }, + { 0x1056F, 0x1056F, .p.property = IDNA_P_DISALLOWED }, + { 0x10570, 0x105FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x10600, 0x10736, .p.property = IDNA_P_PVALID }, + { 0x10737, 0x1073F, .p.property = IDNA_P_UNASSIGNED }, + { 0x10740, 0x10755, .p.property = IDNA_P_PVALID }, + { 0x10756, 0x1075F, .p.property = IDNA_P_UNASSIGNED }, + { 0x10760, 0x10767, .p.property = IDNA_P_PVALID }, + { 0x10768, 0x107FF, .p.property = IDNA_P_UNASSIGNED }, { 0x10800, 0x10805, .p.property = IDNA_P_PVALID }, { 0x10806, 0x10807, .p.property = IDNA_P_UNASSIGNED }, { 0x10808, 0x10808, .p.property = IDNA_P_PVALID }, @@ -2167,7 +2242,17 @@ idna_table idna_derived[] = { { 0x1083F, 0x10855, .p.property = IDNA_P_PVALID }, { 0x10856, 0x10856, .p.property = IDNA_P_UNASSIGNED }, { 0x10857, 0x1085F, .p.property = IDNA_P_DISALLOWED }, - { 0x10860, 0x108FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x10860, 0x10876, .p.property = IDNA_P_PVALID }, + { 0x10877, 0x1087F, .p.property = IDNA_P_DISALLOWED }, + { 0x10880, 0x1089E, .p.property = IDNA_P_PVALID }, + { 0x1089F, 0x108A6, .p.property = IDNA_P_UNASSIGNED }, + { 0x108A7, 0x108AF, .p.property = IDNA_P_DISALLOWED }, + { 0x108B0, 0x108DF, .p.property = IDNA_P_UNASSIGNED }, + { 0x108E0, 0x108F2, .p.property = IDNA_P_PVALID }, + { 0x108F3, 0x108F3, .p.property = IDNA_P_UNASSIGNED }, + { 0x108F4, 0x108F5, .p.property = IDNA_P_PVALID }, + { 0x108F6, 0x108FA, .p.property = IDNA_P_UNASSIGNED }, + { 0x108FB, 0x108FF, .p.property = IDNA_P_DISALLOWED }, { 0x10900, 0x10915, .p.property = IDNA_P_PVALID }, { 0x10916, 0x1091B, .p.property = IDNA_P_DISALLOWED }, { 0x1091C, 0x1091E, .p.property = IDNA_P_UNASSIGNED }, @@ -2175,7 +2260,14 @@ idna_table idna_derived[] = { { 0x10920, 0x10939, .p.property = IDNA_P_PVALID }, { 0x1093A, 0x1093E, .p.property = IDNA_P_UNASSIGNED }, { 0x1093F, 0x1093F, .p.property = IDNA_P_DISALLOWED }, - { 0x10940, 0x109FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x10940, 0x1097F, .p.property = IDNA_P_UNASSIGNED }, + { 0x10980, 0x109B7, .p.property = IDNA_P_PVALID }, + { 0x109B8, 0x109BB, .p.property = IDNA_P_UNASSIGNED }, + { 0x109BC, 0x109BD, .p.property = IDNA_P_DISALLOWED }, + { 0x109BE, 0x109BF, .p.property = IDNA_P_PVALID }, + { 0x109C0, 0x109CF, .p.property = IDNA_P_DISALLOWED }, + { 0x109D0, 0x109D1, .p.property = IDNA_P_UNASSIGNED }, + { 0x109D2, 0x109FF, .p.property = IDNA_P_DISALLOWED }, { 0x10A00, 0x10A03, .p.property = IDNA_P_PVALID }, { 0x10A04, 0x10A04, .p.property = IDNA_P_UNASSIGNED }, { 0x10A05, 0x10A06, .p.property = IDNA_P_PVALID }, @@ -2184,18 +2276,26 @@ idna_table idna_derived[] = { { 0x10A14, 0x10A14, .p.property = IDNA_P_UNASSIGNED }, { 0x10A15, 0x10A17, .p.property = IDNA_P_PVALID }, { 0x10A18, 0x10A18, .p.property = IDNA_P_UNASSIGNED }, - { 0x10A19, 0x10A33, .p.property = IDNA_P_PVALID }, - { 0x10A34, 0x10A37, .p.property = IDNA_P_UNASSIGNED }, + { 0x10A19, 0x10A35, .p.property = IDNA_P_PVALID }, + { 0x10A36, 0x10A37, .p.property = IDNA_P_UNASSIGNED }, { 0x10A38, 0x10A3A, .p.property = IDNA_P_PVALID }, { 0x10A3B, 0x10A3E, .p.property = IDNA_P_UNASSIGNED }, { 0x10A3F, 0x10A3F, .p.property = IDNA_P_PVALID }, - { 0x10A40, 0x10A47, .p.property = IDNA_P_DISALLOWED }, - { 0x10A48, 0x10A4F, .p.property = IDNA_P_UNASSIGNED }, + { 0x10A40, 0x10A48, .p.property = IDNA_P_DISALLOWED }, + { 0x10A49, 0x10A4F, .p.property = IDNA_P_UNASSIGNED }, { 0x10A50, 0x10A58, .p.property = IDNA_P_DISALLOWED }, { 0x10A59, 0x10A5F, .p.property = IDNA_P_UNASSIGNED }, { 0x10A60, 0x10A7C, .p.property = IDNA_P_PVALID }, { 0x10A7D, 0x10A7F, .p.property = IDNA_P_DISALLOWED }, - { 0x10A80, 0x10AFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x10A80, 0x10A9C, .p.property = IDNA_P_PVALID }, + { 0x10A9D, 0x10A9F, .p.property = IDNA_P_DISALLOWED }, + { 0x10AA0, 0x10ABF, .p.property = IDNA_P_UNASSIGNED }, + { 0x10AC0, 0x10AC7, .p.property = IDNA_P_PVALID }, + { 0x10AC8, 0x10AC8, .p.property = IDNA_P_DISALLOWED }, + { 0x10AC9, 0x10AE6, .p.property = IDNA_P_PVALID }, + { 0x10AE7, 0x10AEA, .p.property = IDNA_P_UNASSIGNED }, + { 0x10AEB, 0x10AF6, .p.property = IDNA_P_DISALLOWED }, + { 0x10AF7, 0x10AFF, .p.property = IDNA_P_UNASSIGNED }, { 0x10B00, 0x10B35, .p.property = IDNA_P_PVALID }, { 0x10B36, 0x10B38, .p.property = IDNA_P_UNASSIGNED }, { 0x10B39, 0x10B3F, .p.property = IDNA_P_DISALLOWED }, @@ -2205,34 +2305,312 @@ idna_table idna_derived[] = { { 0x10B60, 0x10B72, .p.property = IDNA_P_PVALID }, { 0x10B73, 0x10B77, .p.property = IDNA_P_UNASSIGNED }, { 0x10B78, 0x10B7F, .p.property = IDNA_P_DISALLOWED }, - { 0x10B80, 0x10BFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x10B80, 0x10B91, .p.property = IDNA_P_PVALID }, + { 0x10B92, 0x10B98, .p.property = IDNA_P_UNASSIGNED }, + { 0x10B99, 0x10B9C, .p.property = IDNA_P_DISALLOWED }, + { 0x10B9D, 0x10BA8, .p.property = IDNA_P_UNASSIGNED }, + { 0x10BA9, 0x10BAF, .p.property = IDNA_P_DISALLOWED }, + { 0x10BB0, 0x10BFF, .p.property = IDNA_P_UNASSIGNED }, { 0x10C00, 0x10C48, .p.property = IDNA_P_PVALID }, - { 0x10C49, 0x10E5F, .p.property = IDNA_P_UNASSIGNED }, + { 0x10C49, 0x10C7F, .p.property = IDNA_P_UNASSIGNED }, + { 0x10C80, 0x10CB2, .p.property = IDNA_P_DISALLOWED }, + { 0x10CB3, 0x10CBF, .p.property = IDNA_P_UNASSIGNED }, + { 0x10CC0, 0x10CF2, .p.property = IDNA_P_PVALID }, + { 0x10CF3, 0x10CF9, .p.property = IDNA_P_UNASSIGNED }, + { 0x10CFA, 0x10CFF, .p.property = IDNA_P_DISALLOWED }, + { 0x10D00, 0x10D27, .p.property = IDNA_P_PVALID }, + { 0x10D28, 0x10D2F, .p.property = IDNA_P_UNASSIGNED }, + { 0x10D30, 0x10D39, .p.property = IDNA_P_PVALID }, + { 0x10D3A, 0x10E5F, .p.property = IDNA_P_UNASSIGNED }, { 0x10E60, 0x10E7E, .p.property = IDNA_P_DISALLOWED }, - { 0x10E7F, 0x1107F, .p.property = IDNA_P_UNASSIGNED }, - { 0x11080, 0x110BA, .p.property = IDNA_P_PVALID }, + { 0x10E7F, 0x10EFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x10F00, 0x10F1C, .p.property = IDNA_P_PVALID }, + { 0x10F1D, 0x10F26, .p.property = IDNA_P_DISALLOWED }, + { 0x10F27, 0x10F27, .p.property = IDNA_P_PVALID }, + { 0x10F28, 0x10F2F, .p.property = IDNA_P_UNASSIGNED }, + { 0x10F30, 0x10F50, .p.property = IDNA_P_PVALID }, + { 0x10F51, 0x10F59, .p.property = IDNA_P_DISALLOWED }, + { 0x10F5A, 0x10FFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x11000, 0x11046, .p.property = IDNA_P_PVALID }, + { 0x11047, 0x1104D, .p.property = IDNA_P_DISALLOWED }, + { 0x1104E, 0x11051, .p.property = IDNA_P_UNASSIGNED }, + { 0x11052, 0x11065, .p.property = IDNA_P_DISALLOWED }, + { 0x11066, 0x1106F, .p.property = IDNA_P_PVALID }, + { 0x11070, 0x1107E, .p.property = IDNA_P_UNASSIGNED }, + { 0x1107F, 0x110BA, .p.property = IDNA_P_PVALID }, { 0x110BB, 0x110C1, .p.property = IDNA_P_DISALLOWED }, - { 0x110C2, 0x11FFF, .p.property = IDNA_P_UNASSIGNED }, - { 0x12000, 0x1236E, .p.property = IDNA_P_PVALID }, - { 0x1236F, 0x123FF, .p.property = IDNA_P_UNASSIGNED }, - { 0x12400, 0x12462, .p.property = IDNA_P_DISALLOWED }, - { 0x12463, 0x1246F, .p.property = IDNA_P_UNASSIGNED }, - { 0x12470, 0x12473, .p.property = IDNA_P_DISALLOWED }, - { 0x12474, 0x12FFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x110C2, 0x110CC, .p.property = IDNA_P_UNASSIGNED }, + { 0x110CD, 0x110CD, .p.property = IDNA_P_DISALLOWED }, + { 0x110CE, 0x110CF, .p.property = IDNA_P_UNASSIGNED }, + { 0x110D0, 0x110E8, .p.property = IDNA_P_PVALID }, + { 0x110E9, 0x110EF, .p.property = IDNA_P_UNASSIGNED }, + { 0x110F0, 0x110F9, .p.property = IDNA_P_PVALID }, + { 0x110FA, 0x110FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x11100, 0x11134, .p.property = IDNA_P_PVALID }, + { 0x11135, 0x11135, .p.property = IDNA_P_UNASSIGNED }, + { 0x11136, 0x1113F, .p.property = IDNA_P_PVALID }, + { 0x11140, 0x11143, .p.property = IDNA_P_DISALLOWED }, + { 0x11144, 0x11146, .p.property = IDNA_P_PVALID }, + { 0x11147, 0x1114F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11150, 0x11173, .p.property = IDNA_P_PVALID }, + { 0x11174, 0x11175, .p.property = IDNA_P_DISALLOWED }, + { 0x11176, 0x11176, .p.property = IDNA_P_PVALID }, + { 0x11177, 0x1117F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11180, 0x111C4, .p.property = IDNA_P_PVALID }, + { 0x111C5, 0x111C8, .p.property = IDNA_P_DISALLOWED }, + { 0x111C9, 0x111CC, .p.property = IDNA_P_PVALID }, + { 0x111CD, 0x111CD, .p.property = IDNA_P_DISALLOWED }, + { 0x111CE, 0x111CF, .p.property = IDNA_P_UNASSIGNED }, + { 0x111D0, 0x111DA, .p.property = IDNA_P_PVALID }, + { 0x111DB, 0x111DB, .p.property = IDNA_P_DISALLOWED }, + { 0x111DC, 0x111DC, .p.property = IDNA_P_PVALID }, + { 0x111DD, 0x111DF, .p.property = IDNA_P_DISALLOWED }, + { 0x111E0, 0x111E0, .p.property = IDNA_P_UNASSIGNED }, + { 0x111E1, 0x111F4, .p.property = IDNA_P_DISALLOWED }, + { 0x111F5, 0x111FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x11200, 0x11211, .p.property = IDNA_P_PVALID }, + { 0x11212, 0x11212, .p.property = IDNA_P_UNASSIGNED }, + { 0x11213, 0x11237, .p.property = IDNA_P_PVALID }, + { 0x11238, 0x1123D, .p.property = IDNA_P_DISALLOWED }, + { 0x1123E, 0x1123E, .p.property = IDNA_P_PVALID }, + { 0x1123F, 0x1127F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11280, 0x11286, .p.property = IDNA_P_PVALID }, + { 0x11287, 0x11287, .p.property = IDNA_P_UNASSIGNED }, + { 0x11288, 0x11288, .p.property = IDNA_P_PVALID }, + { 0x11289, 0x11289, .p.property = IDNA_P_UNASSIGNED }, + { 0x1128A, 0x1128D, .p.property = IDNA_P_PVALID }, + { 0x1128E, 0x1128E, .p.property = IDNA_P_UNASSIGNED }, + { 0x1128F, 0x1129D, .p.property = IDNA_P_PVALID }, + { 0x1129E, 0x1129E, .p.property = IDNA_P_UNASSIGNED }, + { 0x1129F, 0x112A8, .p.property = IDNA_P_PVALID }, + { 0x112A9, 0x112A9, .p.property = IDNA_P_DISALLOWED }, + { 0x112AA, 0x112AF, .p.property = IDNA_P_UNASSIGNED }, + { 0x112B0, 0x112EA, .p.property = IDNA_P_PVALID }, + { 0x112EB, 0x112EF, .p.property = IDNA_P_UNASSIGNED }, + { 0x112F0, 0x112F9, .p.property = IDNA_P_PVALID }, + { 0x112FA, 0x112FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x11300, 0x11303, .p.property = IDNA_P_PVALID }, + { 0x11304, 0x11304, .p.property = IDNA_P_UNASSIGNED }, + { 0x11305, 0x1130C, .p.property = IDNA_P_PVALID }, + { 0x1130D, 0x1130E, .p.property = IDNA_P_UNASSIGNED }, + { 0x1130F, 0x11310, .p.property = IDNA_P_PVALID }, + { 0x11311, 0x11312, .p.property = IDNA_P_UNASSIGNED }, + { 0x11313, 0x11328, .p.property = IDNA_P_PVALID }, + { 0x11329, 0x11329, .p.property = IDNA_P_UNASSIGNED }, + { 0x1132A, 0x11330, .p.property = IDNA_P_PVALID }, + { 0x11331, 0x11331, .p.property = IDNA_P_UNASSIGNED }, + { 0x11332, 0x11333, .p.property = IDNA_P_PVALID }, + { 0x11334, 0x11334, .p.property = IDNA_P_UNASSIGNED }, + { 0x11335, 0x11339, .p.property = IDNA_P_PVALID }, + { 0x1133A, 0x1133A, .p.property = IDNA_P_UNASSIGNED }, + { 0x1133B, 0x11344, .p.property = IDNA_P_PVALID }, + { 0x11345, 0x11346, .p.property = IDNA_P_UNASSIGNED }, + { 0x11347, 0x11348, .p.property = IDNA_P_PVALID }, + { 0x11349, 0x1134A, .p.property = IDNA_P_UNASSIGNED }, + { 0x1134B, 0x1134D, .p.property = IDNA_P_PVALID }, + { 0x1134E, 0x1134F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11350, 0x11350, .p.property = IDNA_P_PVALID }, + { 0x11351, 0x11356, .p.property = IDNA_P_UNASSIGNED }, + { 0x11357, 0x11357, .p.property = IDNA_P_PVALID }, + { 0x11358, 0x1135C, .p.property = IDNA_P_UNASSIGNED }, + { 0x1135D, 0x11363, .p.property = IDNA_P_PVALID }, + { 0x11364, 0x11365, .p.property = IDNA_P_UNASSIGNED }, + { 0x11366, 0x1136C, .p.property = IDNA_P_PVALID }, + { 0x1136D, 0x1136F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11370, 0x11374, .p.property = IDNA_P_PVALID }, + { 0x11375, 0x113FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x11400, 0x1144A, .p.property = IDNA_P_PVALID }, + { 0x1144B, 0x1144F, .p.property = IDNA_P_DISALLOWED }, + { 0x11450, 0x11459, .p.property = IDNA_P_PVALID }, + { 0x1145A, 0x1145A, .p.property = IDNA_P_UNASSIGNED }, + { 0x1145B, 0x1145B, .p.property = IDNA_P_DISALLOWED }, + { 0x1145C, 0x1145C, .p.property = IDNA_P_UNASSIGNED }, + { 0x1145D, 0x1145D, .p.property = IDNA_P_DISALLOWED }, + { 0x1145E, 0x1145E, .p.property = IDNA_P_PVALID }, + { 0x1145F, 0x1147F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11480, 0x114C5, .p.property = IDNA_P_PVALID }, + { 0x114C6, 0x114C6, .p.property = IDNA_P_DISALLOWED }, + { 0x114C7, 0x114C7, .p.property = IDNA_P_PVALID }, + { 0x114C8, 0x114CF, .p.property = IDNA_P_UNASSIGNED }, + { 0x114D0, 0x114D9, .p.property = IDNA_P_PVALID }, + { 0x114DA, 0x1157F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11580, 0x115B5, .p.property = IDNA_P_PVALID }, + { 0x115B6, 0x115B7, .p.property = IDNA_P_UNASSIGNED }, + { 0x115B8, 0x115C0, .p.property = IDNA_P_PVALID }, + { 0x115C1, 0x115D7, .p.property = IDNA_P_DISALLOWED }, + { 0x115D8, 0x115DD, .p.property = IDNA_P_PVALID }, + { 0x115DE, 0x115FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x11600, 0x11640, .p.property = IDNA_P_PVALID }, + { 0x11641, 0x11643, .p.property = IDNA_P_DISALLOWED }, + { 0x11644, 0x11644, .p.property = IDNA_P_PVALID }, + { 0x11645, 0x1164F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11650, 0x11659, .p.property = IDNA_P_PVALID }, + { 0x1165A, 0x1165F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11660, 0x1166C, .p.property = IDNA_P_DISALLOWED }, + { 0x1166D, 0x1167F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11680, 0x116B7, .p.property = IDNA_P_PVALID }, + { 0x116B8, 0x116BF, .p.property = IDNA_P_UNASSIGNED }, + { 0x116C0, 0x116C9, .p.property = IDNA_P_PVALID }, + { 0x116CA, 0x116FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x11700, 0x1171A, .p.property = IDNA_P_PVALID }, + { 0x1171B, 0x1171C, .p.property = IDNA_P_UNASSIGNED }, + { 0x1171D, 0x1172B, .p.property = IDNA_P_PVALID }, + { 0x1172C, 0x1172F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11730, 0x11739, .p.property = IDNA_P_PVALID }, + { 0x1173A, 0x1173F, .p.property = IDNA_P_DISALLOWED }, + { 0x11740, 0x117FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x11800, 0x1183A, .p.property = IDNA_P_PVALID }, + { 0x1183B, 0x1183B, .p.property = IDNA_P_DISALLOWED }, + { 0x1183C, 0x1189F, .p.property = IDNA_P_UNASSIGNED }, + { 0x118A0, 0x118BF, .p.property = IDNA_P_DISALLOWED }, + { 0x118C0, 0x118E9, .p.property = IDNA_P_PVALID }, + { 0x118EA, 0x118F2, .p.property = IDNA_P_DISALLOWED }, + { 0x118F3, 0x118FE, .p.property = IDNA_P_UNASSIGNED }, + { 0x118FF, 0x118FF, .p.property = IDNA_P_PVALID }, + { 0x11900, 0x119FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x11A00, 0x11A3E, .p.property = IDNA_P_PVALID }, + { 0x11A3F, 0x11A46, .p.property = IDNA_P_DISALLOWED }, + { 0x11A47, 0x11A47, .p.property = IDNA_P_PVALID }, + { 0x11A48, 0x11A4F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11A50, 0x11A83, .p.property = IDNA_P_PVALID }, + { 0x11A84, 0x11A85, .p.property = IDNA_P_UNASSIGNED }, + { 0x11A86, 0x11A99, .p.property = IDNA_P_PVALID }, + { 0x11A9A, 0x11A9C, .p.property = IDNA_P_DISALLOWED }, + { 0x11A9D, 0x11A9D, .p.property = IDNA_P_PVALID }, + { 0x11A9E, 0x11AA2, .p.property = IDNA_P_DISALLOWED }, + { 0x11AA3, 0x11ABF, .p.property = IDNA_P_UNASSIGNED }, + { 0x11AC0, 0x11AF8, .p.property = IDNA_P_PVALID }, + { 0x11AF9, 0x11BFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x11C00, 0x11C08, .p.property = IDNA_P_PVALID }, + { 0x11C09, 0x11C09, .p.property = IDNA_P_UNASSIGNED }, + { 0x11C0A, 0x11C36, .p.property = IDNA_P_PVALID }, + { 0x11C37, 0x11C37, .p.property = IDNA_P_UNASSIGNED }, + { 0x11C38, 0x11C40, .p.property = IDNA_P_PVALID }, + { 0x11C41, 0x11C45, .p.property = IDNA_P_DISALLOWED }, + { 0x11C46, 0x11C4F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11C50, 0x11C59, .p.property = IDNA_P_PVALID }, + { 0x11C5A, 0x11C6C, .p.property = IDNA_P_DISALLOWED }, + { 0x11C6D, 0x11C6F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11C70, 0x11C71, .p.property = IDNA_P_DISALLOWED }, + { 0x11C72, 0x11C8F, .p.property = IDNA_P_PVALID }, + { 0x11C90, 0x11C91, .p.property = IDNA_P_UNASSIGNED }, + { 0x11C92, 0x11CA7, .p.property = IDNA_P_PVALID }, + { 0x11CA8, 0x11CA8, .p.property = IDNA_P_UNASSIGNED }, + { 0x11CA9, 0x11CB6, .p.property = IDNA_P_PVALID }, + { 0x11CB7, 0x11CFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x11D00, 0x11D06, .p.property = IDNA_P_PVALID }, + { 0x11D07, 0x11D07, .p.property = IDNA_P_UNASSIGNED }, + { 0x11D08, 0x11D09, .p.property = IDNA_P_PVALID }, + { 0x11D0A, 0x11D0A, .p.property = IDNA_P_UNASSIGNED }, + { 0x11D0B, 0x11D36, .p.property = IDNA_P_PVALID }, + { 0x11D37, 0x11D39, .p.property = IDNA_P_UNASSIGNED }, + { 0x11D3A, 0x11D3A, .p.property = IDNA_P_PVALID }, + { 0x11D3B, 0x11D3B, .p.property = IDNA_P_UNASSIGNED }, + { 0x11D3C, 0x11D3D, .p.property = IDNA_P_PVALID }, + { 0x11D3E, 0x11D3E, .p.property = IDNA_P_UNASSIGNED }, + { 0x11D3F, 0x11D47, .p.property = IDNA_P_PVALID }, + { 0x11D48, 0x11D4F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11D50, 0x11D59, .p.property = IDNA_P_PVALID }, + { 0x11D5A, 0x11D5F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11D60, 0x11D65, .p.property = IDNA_P_PVALID }, + { 0x11D66, 0x11D66, .p.property = IDNA_P_UNASSIGNED }, + { 0x11D67, 0x11D68, .p.property = IDNA_P_PVALID }, + { 0x11D69, 0x11D69, .p.property = IDNA_P_UNASSIGNED }, + { 0x11D6A, 0x11D8E, .p.property = IDNA_P_PVALID }, + { 0x11D8F, 0x11D8F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11D90, 0x11D91, .p.property = IDNA_P_PVALID }, + { 0x11D92, 0x11D92, .p.property = IDNA_P_UNASSIGNED }, + { 0x11D93, 0x11D98, .p.property = IDNA_P_PVALID }, + { 0x11D99, 0x11D9F, .p.property = IDNA_P_UNASSIGNED }, + { 0x11DA0, 0x11DA9, .p.property = IDNA_P_PVALID }, + { 0x11DAA, 0x11EDF, .p.property = IDNA_P_UNASSIGNED }, + { 0x11EE0, 0x11EF6, .p.property = IDNA_P_PVALID }, + { 0x11EF7, 0x11EF8, .p.property = IDNA_P_DISALLOWED }, + { 0x11EF9, 0x11FFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x12000, 0x12399, .p.property = IDNA_P_PVALID }, + { 0x1239A, 0x123FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x12400, 0x1246E, .p.property = IDNA_P_DISALLOWED }, + { 0x1246F, 0x1246F, .p.property = IDNA_P_UNASSIGNED }, + { 0x12470, 0x12474, .p.property = IDNA_P_DISALLOWED }, + { 0x12475, 0x1247F, .p.property = IDNA_P_UNASSIGNED }, + { 0x12480, 0x12543, .p.property = IDNA_P_PVALID }, + { 0x12544, 0x12FFF, .p.property = IDNA_P_UNASSIGNED }, { 0x13000, 0x1342E, .p.property = IDNA_P_PVALID }, - { 0x1342F, 0x1CFFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1342F, 0x143FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x14400, 0x14646, .p.property = IDNA_P_PVALID }, + { 0x14647, 0x167FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x16800, 0x16A38, .p.property = IDNA_P_PVALID }, + { 0x16A39, 0x16A3F, .p.property = IDNA_P_UNASSIGNED }, + { 0x16A40, 0x16A5E, .p.property = IDNA_P_PVALID }, + { 0x16A5F, 0x16A5F, .p.property = IDNA_P_UNASSIGNED }, + { 0x16A60, 0x16A69, .p.property = IDNA_P_PVALID }, + { 0x16A6A, 0x16A6D, .p.property = IDNA_P_UNASSIGNED }, + { 0x16A6E, 0x16A6F, .p.property = IDNA_P_DISALLOWED }, + { 0x16A70, 0x16ACF, .p.property = IDNA_P_UNASSIGNED }, + { 0x16AD0, 0x16AED, .p.property = IDNA_P_PVALID }, + { 0x16AEE, 0x16AEF, .p.property = IDNA_P_UNASSIGNED }, + { 0x16AF0, 0x16AF4, .p.property = IDNA_P_PVALID }, + { 0x16AF5, 0x16AF5, .p.property = IDNA_P_DISALLOWED }, + { 0x16AF6, 0x16AFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x16B00, 0x16B36, .p.property = IDNA_P_PVALID }, + { 0x16B37, 0x16B3F, .p.property = IDNA_P_DISALLOWED }, + { 0x16B40, 0x16B43, .p.property = IDNA_P_PVALID }, + { 0x16B44, 0x16B45, .p.property = IDNA_P_DISALLOWED }, + { 0x16B46, 0x16B4F, .p.property = IDNA_P_UNASSIGNED }, + { 0x16B50, 0x16B59, .p.property = IDNA_P_PVALID }, + { 0x16B5A, 0x16B5A, .p.property = IDNA_P_UNASSIGNED }, + { 0x16B5B, 0x16B61, .p.property = IDNA_P_DISALLOWED }, + { 0x16B62, 0x16B62, .p.property = IDNA_P_UNASSIGNED }, + { 0x16B63, 0x16B77, .p.property = IDNA_P_PVALID }, + { 0x16B78, 0x16B7C, .p.property = IDNA_P_UNASSIGNED }, + { 0x16B7D, 0x16B8F, .p.property = IDNA_P_PVALID }, + { 0x16B90, 0x16E3F, .p.property = IDNA_P_UNASSIGNED }, + { 0x16E40, 0x16E5F, .p.property = IDNA_P_DISALLOWED }, + { 0x16E60, 0x16E7F, .p.property = IDNA_P_PVALID }, + { 0x16E80, 0x16E9A, .p.property = IDNA_P_DISALLOWED }, + { 0x16E9B, 0x16EFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x16F00, 0x16F44, .p.property = IDNA_P_PVALID }, + { 0x16F45, 0x16F4F, .p.property = IDNA_P_UNASSIGNED }, + { 0x16F50, 0x16F7E, .p.property = IDNA_P_PVALID }, + { 0x16F7F, 0x16F8E, .p.property = IDNA_P_UNASSIGNED }, + { 0x16F8F, 0x16F9F, .p.property = IDNA_P_PVALID }, + { 0x16FA0, 0x16FDF, .p.property = IDNA_P_UNASSIGNED }, + { 0x16FE0, 0x16FE1, .p.property = IDNA_P_PVALID }, + { 0x16FE2, 0x16FFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x17000, 0x187F1, .p.property = IDNA_P_PVALID }, + { 0x187F2, 0x187FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x18800, 0x18AF2, .p.property = IDNA_P_PVALID }, + { 0x18AF3, 0x1AFFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1B000, 0x1B11E, .p.property = IDNA_P_PVALID }, + { 0x1B11F, 0x1B16F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1B170, 0x1B2FB, .p.property = IDNA_P_PVALID }, + { 0x1B2FC, 0x1BBFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1BC00, 0x1BC6A, .p.property = IDNA_P_PVALID }, + { 0x1BC6B, 0x1BC6F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1BC70, 0x1BC7C, .p.property = IDNA_P_PVALID }, + { 0x1BC7D, 0x1BC7F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1BC80, 0x1BC88, .p.property = IDNA_P_PVALID }, + { 0x1BC89, 0x1BC8F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1BC90, 0x1BC99, .p.property = IDNA_P_PVALID }, + { 0x1BC9A, 0x1BC9B, .p.property = IDNA_P_UNASSIGNED }, + { 0x1BC9C, 0x1BC9C, .p.property = IDNA_P_DISALLOWED }, + { 0x1BC9D, 0x1BC9E, .p.property = IDNA_P_PVALID }, + { 0x1BC9F, 0x1BCA3, .p.property = IDNA_P_DISALLOWED }, + { 0x1BCA4, 0x1CFFF, .p.property = IDNA_P_UNASSIGNED }, { 0x1D000, 0x1D0F5, .p.property = IDNA_P_DISALLOWED }, { 0x1D0F6, 0x1D0FF, .p.property = IDNA_P_UNASSIGNED }, { 0x1D100, 0x1D126, .p.property = IDNA_P_DISALLOWED }, { 0x1D127, 0x1D128, .p.property = IDNA_P_UNASSIGNED }, - { 0x1D129, 0x1D1DD, .p.property = IDNA_P_DISALLOWED }, - { 0x1D1DE, 0x1D1FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1D129, 0x1D1E8, .p.property = IDNA_P_DISALLOWED }, + { 0x1D1E9, 0x1D1FF, .p.property = IDNA_P_UNASSIGNED }, { 0x1D200, 0x1D245, .p.property = IDNA_P_DISALLOWED }, - { 0x1D246, 0x1D2FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1D246, 0x1D2DF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1D2E0, 0x1D2F3, .p.property = IDNA_P_DISALLOWED }, + { 0x1D2F4, 0x1D2FF, .p.property = IDNA_P_UNASSIGNED }, { 0x1D300, 0x1D356, .p.property = IDNA_P_DISALLOWED }, { 0x1D357, 0x1D35F, .p.property = IDNA_P_UNASSIGNED }, - { 0x1D360, 0x1D371, .p.property = IDNA_P_DISALLOWED }, - { 0x1D372, 0x1D3FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1D360, 0x1D378, .p.property = IDNA_P_DISALLOWED }, + { 0x1D379, 0x1D3FF, .p.property = IDNA_P_UNASSIGNED }, { 0x1D400, 0x1D454, .p.property = IDNA_P_DISALLOWED }, { 0x1D455, 0x1D455, .p.property = IDNA_P_UNASSIGNED }, { 0x1D456, 0x1D49C, .p.property = IDNA_P_DISALLOWED }, @@ -2273,53 +2651,191 @@ idna_table idna_derived[] = { { 0x1D6A6, 0x1D6A7, .p.property = IDNA_P_UNASSIGNED }, { 0x1D6A8, 0x1D7CB, .p.property = IDNA_P_DISALLOWED }, { 0x1D7CC, 0x1D7CD, .p.property = IDNA_P_UNASSIGNED }, - { 0x1D7CE, 0x1D7FF, .p.property = IDNA_P_DISALLOWED }, - { 0x1D800, 0x1EFFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1D7CE, 0x1D9FF, .p.property = IDNA_P_DISALLOWED }, + { 0x1DA00, 0x1DA36, .p.property = IDNA_P_PVALID }, + { 0x1DA37, 0x1DA3A, .p.property = IDNA_P_DISALLOWED }, + { 0x1DA3B, 0x1DA6C, .p.property = IDNA_P_PVALID }, + { 0x1DA6D, 0x1DA74, .p.property = IDNA_P_DISALLOWED }, + { 0x1DA75, 0x1DA75, .p.property = IDNA_P_PVALID }, + { 0x1DA76, 0x1DA83, .p.property = IDNA_P_DISALLOWED }, + { 0x1DA84, 0x1DA84, .p.property = IDNA_P_PVALID }, + { 0x1DA85, 0x1DA8B, .p.property = IDNA_P_DISALLOWED }, + { 0x1DA8C, 0x1DA9A, .p.property = IDNA_P_UNASSIGNED }, + { 0x1DA9B, 0x1DA9F, .p.property = IDNA_P_PVALID }, + { 0x1DAA0, 0x1DAA0, .p.property = IDNA_P_UNASSIGNED }, + { 0x1DAA1, 0x1DAAF, .p.property = IDNA_P_PVALID }, + { 0x1DAB0, 0x1DFFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1E000, 0x1E006, .p.property = IDNA_P_PVALID }, + { 0x1E007, 0x1E007, .p.property = IDNA_P_UNASSIGNED }, + { 0x1E008, 0x1E018, .p.property = IDNA_P_PVALID }, + { 0x1E019, 0x1E01A, .p.property = IDNA_P_UNASSIGNED }, + { 0x1E01B, 0x1E021, .p.property = IDNA_P_PVALID }, + { 0x1E022, 0x1E022, .p.property = IDNA_P_UNASSIGNED }, + { 0x1E023, 0x1E024, .p.property = IDNA_P_PVALID }, + { 0x1E025, 0x1E025, .p.property = IDNA_P_UNASSIGNED }, + { 0x1E026, 0x1E02A, .p.property = IDNA_P_PVALID }, + { 0x1E02B, 0x1E7FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1E800, 0x1E8C4, .p.property = IDNA_P_PVALID }, + { 0x1E8C5, 0x1E8C6, .p.property = IDNA_P_UNASSIGNED }, + { 0x1E8C7, 0x1E8CF, .p.property = IDNA_P_DISALLOWED }, + { 0x1E8D0, 0x1E8D6, .p.property = IDNA_P_PVALID }, + { 0x1E8D7, 0x1E8FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1E900, 0x1E921, .p.property = IDNA_P_DISALLOWED }, + { 0x1E922, 0x1E94A, .p.property = IDNA_P_PVALID }, + { 0x1E94B, 0x1E94F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1E950, 0x1E959, .p.property = IDNA_P_PVALID }, + { 0x1E95A, 0x1E95D, .p.property = IDNA_P_UNASSIGNED }, + { 0x1E95E, 0x1E95F, .p.property = IDNA_P_DISALLOWED }, + { 0x1E960, 0x1EC70, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EC71, 0x1ECB4, .p.property = IDNA_P_DISALLOWED }, + { 0x1ECB5, 0x1EDFF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE00, 0x1EE03, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE04, 0x1EE04, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE05, 0x1EE1F, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE20, 0x1EE20, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE21, 0x1EE22, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE23, 0x1EE23, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE24, 0x1EE24, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE25, 0x1EE26, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE27, 0x1EE27, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE28, 0x1EE28, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE29, 0x1EE32, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE33, 0x1EE33, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE34, 0x1EE37, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE38, 0x1EE38, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE39, 0x1EE39, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE3A, 0x1EE3A, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE3B, 0x1EE3B, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE3C, 0x1EE41, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE42, 0x1EE42, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE43, 0x1EE46, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE47, 0x1EE47, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE48, 0x1EE48, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE49, 0x1EE49, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE4A, 0x1EE4A, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE4B, 0x1EE4B, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE4C, 0x1EE4C, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE4D, 0x1EE4F, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE50, 0x1EE50, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE51, 0x1EE52, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE53, 0x1EE53, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE54, 0x1EE54, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE55, 0x1EE56, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE57, 0x1EE57, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE58, 0x1EE58, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE59, 0x1EE59, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE5A, 0x1EE5A, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE5B, 0x1EE5B, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE5C, 0x1EE5C, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE5D, 0x1EE5D, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE5E, 0x1EE5E, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE5F, 0x1EE5F, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE60, 0x1EE60, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE61, 0x1EE62, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE63, 0x1EE63, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE64, 0x1EE64, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE65, 0x1EE66, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE67, 0x1EE6A, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE6B, 0x1EE6B, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE6C, 0x1EE72, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE73, 0x1EE73, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE74, 0x1EE77, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE78, 0x1EE78, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE79, 0x1EE7C, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE7D, 0x1EE7D, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE7E, 0x1EE7E, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE7F, 0x1EE7F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE80, 0x1EE89, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE8A, 0x1EE8A, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EE8B, 0x1EE9B, .p.property = IDNA_P_DISALLOWED }, + { 0x1EE9C, 0x1EEA0, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EEA1, 0x1EEA3, .p.property = IDNA_P_DISALLOWED }, + { 0x1EEA4, 0x1EEA4, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EEA5, 0x1EEA9, .p.property = IDNA_P_DISALLOWED }, + { 0x1EEAA, 0x1EEAA, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EEAB, 0x1EEBB, .p.property = IDNA_P_DISALLOWED }, + { 0x1EEBC, 0x1EEEF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1EEF0, 0x1EEF1, .p.property = IDNA_P_DISALLOWED }, + { 0x1EEF2, 0x1EFFF, .p.property = IDNA_P_UNASSIGNED }, { 0x1F000, 0x1F02B, .p.property = IDNA_P_DISALLOWED }, { 0x1F02C, 0x1F02F, .p.property = IDNA_P_UNASSIGNED }, { 0x1F030, 0x1F093, .p.property = IDNA_P_DISALLOWED }, - { 0x1F094, 0x1F0FF, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F100, 0x1F10A, .p.property = IDNA_P_DISALLOWED }, - { 0x1F10B, 0x1F10F, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F110, 0x1F12E, .p.property = IDNA_P_DISALLOWED }, - { 0x1F12F, 0x1F130, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F131, 0x1F131, .p.property = IDNA_P_DISALLOWED }, - { 0x1F132, 0x1F13C, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F13D, 0x1F13D, .p.property = IDNA_P_DISALLOWED }, - { 0x1F13E, 0x1F13E, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F13F, 0x1F13F, .p.property = IDNA_P_DISALLOWED }, - { 0x1F140, 0x1F141, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F142, 0x1F142, .p.property = IDNA_P_DISALLOWED }, - { 0x1F143, 0x1F145, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F146, 0x1F146, .p.property = IDNA_P_DISALLOWED }, - { 0x1F147, 0x1F149, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F14A, 0x1F14E, .p.property = IDNA_P_DISALLOWED }, - { 0x1F14F, 0x1F156, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F157, 0x1F157, .p.property = IDNA_P_DISALLOWED }, - { 0x1F158, 0x1F15E, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F15F, 0x1F15F, .p.property = IDNA_P_DISALLOWED }, - { 0x1F160, 0x1F178, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F179, 0x1F179, .p.property = IDNA_P_DISALLOWED }, - { 0x1F17A, 0x1F17A, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F17B, 0x1F17C, .p.property = IDNA_P_DISALLOWED }, - { 0x1F17D, 0x1F17E, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F17F, 0x1F17F, .p.property = IDNA_P_DISALLOWED }, - { 0x1F180, 0x1F189, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F18A, 0x1F18D, .p.property = IDNA_P_DISALLOWED }, - { 0x1F18E, 0x1F18F, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F190, 0x1F190, .p.property = IDNA_P_DISALLOWED }, - { 0x1F191, 0x1F1FF, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F200, 0x1F200, .p.property = IDNA_P_DISALLOWED }, - { 0x1F201, 0x1F20F, .p.property = IDNA_P_UNASSIGNED }, - { 0x1F210, 0x1F231, .p.property = IDNA_P_DISALLOWED }, - { 0x1F232, 0x1F23F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F094, 0x1F09F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F0A0, 0x1F0AE, .p.property = IDNA_P_DISALLOWED }, + { 0x1F0AF, 0x1F0B0, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F0B1, 0x1F0BF, .p.property = IDNA_P_DISALLOWED }, + { 0x1F0C0, 0x1F0C0, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F0C1, 0x1F0CF, .p.property = IDNA_P_DISALLOWED }, + { 0x1F0D0, 0x1F0D0, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F0D1, 0x1F0F5, .p.property = IDNA_P_DISALLOWED }, + { 0x1F0F6, 0x1F0FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F100, 0x1F10C, .p.property = IDNA_P_DISALLOWED }, + { 0x1F10D, 0x1F10F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F110, 0x1F16B, .p.property = IDNA_P_DISALLOWED }, + { 0x1F16C, 0x1F16F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F170, 0x1F1AC, .p.property = IDNA_P_DISALLOWED }, + { 0x1F1AD, 0x1F1E5, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F1E6, 0x1F202, .p.property = IDNA_P_DISALLOWED }, + { 0x1F203, 0x1F20F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F210, 0x1F23B, .p.property = IDNA_P_DISALLOWED }, + { 0x1F23C, 0x1F23F, .p.property = IDNA_P_UNASSIGNED }, { 0x1F240, 0x1F248, .p.property = IDNA_P_DISALLOWED }, - { 0x1F249, 0x1FFFD, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F249, 0x1F24F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F250, 0x1F251, .p.property = IDNA_P_DISALLOWED }, + { 0x1F252, 0x1F25F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F260, 0x1F265, .p.property = IDNA_P_DISALLOWED }, + { 0x1F266, 0x1F2FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F300, 0x1F6D4, .p.property = IDNA_P_DISALLOWED }, + { 0x1F6D5, 0x1F6DF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F6E0, 0x1F6EC, .p.property = IDNA_P_DISALLOWED }, + { 0x1F6ED, 0x1F6EF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F6F0, 0x1F6F9, .p.property = IDNA_P_DISALLOWED }, + { 0x1F6FA, 0x1F6FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F700, 0x1F773, .p.property = IDNA_P_DISALLOWED }, + { 0x1F774, 0x1F77F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F780, 0x1F7D8, .p.property = IDNA_P_DISALLOWED }, + { 0x1F7D9, 0x1F7FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F800, 0x1F80B, .p.property = IDNA_P_DISALLOWED }, + { 0x1F80C, 0x1F80F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F810, 0x1F847, .p.property = IDNA_P_DISALLOWED }, + { 0x1F848, 0x1F84F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F850, 0x1F859, .p.property = IDNA_P_DISALLOWED }, + { 0x1F85A, 0x1F85F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F860, 0x1F887, .p.property = IDNA_P_DISALLOWED }, + { 0x1F888, 0x1F88F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F890, 0x1F8AD, .p.property = IDNA_P_DISALLOWED }, + { 0x1F8AE, 0x1F8FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F900, 0x1F90B, .p.property = IDNA_P_DISALLOWED }, + { 0x1F90C, 0x1F90F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F910, 0x1F93E, .p.property = IDNA_P_DISALLOWED }, + { 0x1F93F, 0x1F93F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F940, 0x1F970, .p.property = IDNA_P_DISALLOWED }, + { 0x1F971, 0x1F972, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F973, 0x1F976, .p.property = IDNA_P_DISALLOWED }, + { 0x1F977, 0x1F979, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F97A, 0x1F97A, .p.property = IDNA_P_DISALLOWED }, + { 0x1F97B, 0x1F97B, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F97C, 0x1F9A2, .p.property = IDNA_P_DISALLOWED }, + { 0x1F9A3, 0x1F9AF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F9B0, 0x1F9B9, .p.property = IDNA_P_DISALLOWED }, + { 0x1F9BA, 0x1F9BF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F9C0, 0x1F9C2, .p.property = IDNA_P_DISALLOWED }, + { 0x1F9C3, 0x1F9CF, .p.property = IDNA_P_UNASSIGNED }, + { 0x1F9D0, 0x1F9FF, .p.property = IDNA_P_DISALLOWED }, + { 0x1FA00, 0x1FA5F, .p.property = IDNA_P_UNASSIGNED }, + { 0x1FA60, 0x1FA6D, .p.property = IDNA_P_DISALLOWED }, + { 0x1FA6E, 0x1FFFD, .p.property = IDNA_P_UNASSIGNED }, { 0x1FFFE, 0x1FFFF, .p.property = IDNA_P_DISALLOWED }, { 0x20000, 0x2A6D6, .p.property = IDNA_P_PVALID }, { 0x2A6D7, 0x2A6FF, .p.property = IDNA_P_UNASSIGNED }, { 0x2A700, 0x2B734, .p.property = IDNA_P_PVALID }, - { 0x2B735, 0x2F7FF, .p.property = IDNA_P_UNASSIGNED }, + { 0x2B735, 0x2B73F, .p.property = IDNA_P_UNASSIGNED }, + { 0x2B740, 0x2B81D, .p.property = IDNA_P_PVALID }, + { 0x2B81E, 0x2B81F, .p.property = IDNA_P_UNASSIGNED }, + { 0x2B820, 0x2CEA1, .p.property = IDNA_P_PVALID }, + { 0x2CEA2, 0x2CEAF, .p.property = IDNA_P_UNASSIGNED }, + { 0x2CEB0, 0x2EBE0, .p.property = IDNA_P_PVALID }, + { 0x2EBE1, 0x2F7FF, .p.property = IDNA_P_UNASSIGNED }, { 0x2F800, 0x2FA1D, .p.property = IDNA_P_DISALLOWED }, { 0x2FA1E, 0x2FFFD, .p.property = IDNA_P_UNASSIGNED }, { 0x2FFFE, 0x2FFFF, .p.property = IDNA_P_DISALLOWED }, @@ -2359,7 +2875,9 @@ idna_table idna_derived[] = { idna_table idna_joiningtype[] = { { 0x0640 , 0x0640 , .p.jt = IDNA_UNICODE_JT_C }, { 0x07FA , 0x07FA , .p.jt = IDNA_UNICODE_JT_C }, + { 0x180A , 0x180A , .p.jt = IDNA_UNICODE_JT_C }, { 0x200D , 0x200D , .p.jt = IDNA_UNICODE_JT_C }, + { 0x0620 , 0x0620 , .p.jt = IDNA_UNICODE_JT_D }, { 0x0626 , 0x0626 , .p.jt = IDNA_UNICODE_JT_D }, { 0x0628 , 0x0628 , .p.jt = IDNA_UNICODE_JT_D }, { 0x062A, 0x062E , .p.jt = IDNA_UNICODE_JT_D }, @@ -2388,6 +2906,43 @@ idna_table idna_joiningtype[] = { { 0x0775, 0x0777 , .p.jt = IDNA_UNICODE_JT_D }, { 0x077A, 0x077F , .p.jt = IDNA_UNICODE_JT_D }, { 0x07CA, 0x07EA , .p.jt = IDNA_UNICODE_JT_D }, + { 0x0841, 0x0845 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x0848 , 0x0848 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x084A, 0x0853 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x0855 , 0x0855 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x0860 , 0x0860 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x0862, 0x0865 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x0868 , 0x0868 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x08A0, 0x08A9 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x08AF, 0x08B0 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x08B3, 0x08B4 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x08B6, 0x08B8 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x08BA, 0x08BD , .p.jt = IDNA_UNICODE_JT_D }, + { 0x1807 , 0x1807 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x1820, 0x1842 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x1843 , 0x1843 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x1844, 0x1878 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x1887, 0x18A8 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x18AA , 0x18AA , .p.jt = IDNA_UNICODE_JT_D }, + { 0xA840, 0xA871 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10AC0, 0x10AC4 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10AD3, 0x10AD6 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10AD8, 0x10ADC , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10ADE, 0x10AE0 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10AEB, 0x10AEE , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10B80 , 0x10B80 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10B82 , 0x10B82 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10B86, 0x10B88 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10B8A, 0x10B8B , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10B8D , 0x10B8D , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10B90 , 0x10B90 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10BAD, 0x10BAE , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10D01, 0x10D21 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10D23 , 0x10D23 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10F30, 0x10F32 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10F34, 0x10F44 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x10F51, 0x10F53 , .p.jt = IDNA_UNICODE_JT_D }, + { 0x1E900, 0x1E943 , .p.jt = IDNA_UNICODE_JT_D }, { 0x0622, 0x0625 , .p.jt = IDNA_UNICODE_JT_R }, { 0x0627 , 0x0627 , .p.jt = IDNA_UNICODE_JT_R }, { 0x0629 , 0x0629 , .p.jt = IDNA_UNICODE_JT_R }, @@ -2416,6 +2971,38 @@ idna_table idna_joiningtype[] = { { 0x0771 , 0x0771 , .p.jt = IDNA_UNICODE_JT_R }, { 0x0773, 0x0774 , .p.jt = IDNA_UNICODE_JT_R }, { 0x0778, 0x0779 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x0840 , 0x0840 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x0846, 0x0847 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x0849 , 0x0849 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x0854 , 0x0854 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x0867 , 0x0867 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x0869, 0x086A , .p.jt = IDNA_UNICODE_JT_R }, + { 0x08AA, 0x08AC , .p.jt = IDNA_UNICODE_JT_R }, + { 0x08AE , 0x08AE , .p.jt = IDNA_UNICODE_JT_R }, + { 0x08B1, 0x08B2 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x08B9 , 0x08B9 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10AC5 , 0x10AC5 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10AC7 , 0x10AC7 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10AC9, 0x10ACA , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10ACE, 0x10AD2 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10ADD , 0x10ADD , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10AE1 , 0x10AE1 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10AE4 , 0x10AE4 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10AEF , 0x10AEF , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10B81 , 0x10B81 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10B83, 0x10B85 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10B89 , 0x10B89 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10B8C , 0x10B8C , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10B8E, 0x10B8F , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10B91 , 0x10B91 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10BA9, 0x10BAC , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10D22 , 0x10D22 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10F33 , 0x10F33 , .p.jt = IDNA_UNICODE_JT_R }, + { 0x10F54 , 0x10F54 , .p.jt = IDNA_UNICODE_JT_R }, + { 0xA872 , 0xA872 , .p.jt = IDNA_UNICODE_JT_L }, + { 0x10ACD , 0x10ACD , .p.jt = IDNA_UNICODE_JT_L }, + { 0x10AD7 , 0x10AD7 , .p.jt = IDNA_UNICODE_JT_L }, + { 0x10D00 , 0x10D00 , .p.jt = IDNA_UNICODE_JT_L }, { 0x00AD , 0x00AD , .p.jt = IDNA_UNICODE_JT_T }, { 0x0300, 0x036F , .p.jt = IDNA_UNICODE_JT_T }, { 0x0483, 0x0487 , .p.jt = IDNA_UNICODE_JT_T }, @@ -2426,10 +3013,10 @@ idna_table idna_joiningtype[] = { { 0x05C4, 0x05C5 , .p.jt = IDNA_UNICODE_JT_T }, { 0x05C7 , 0x05C7 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0610, 0x061A , .p.jt = IDNA_UNICODE_JT_T }, - { 0x064B, 0x065E , .p.jt = IDNA_UNICODE_JT_T }, + { 0x061C , 0x061C , .p.jt = IDNA_UNICODE_JT_T }, + { 0x064B, 0x065F , .p.jt = IDNA_UNICODE_JT_T }, { 0x0670 , 0x0670 , .p.jt = IDNA_UNICODE_JT_T }, { 0x06D6, 0x06DC , .p.jt = IDNA_UNICODE_JT_T }, - { 0x06DE , 0x06DE , .p.jt = IDNA_UNICODE_JT_T }, { 0x06DF, 0x06E4 , .p.jt = IDNA_UNICODE_JT_T }, { 0x06E7, 0x06E8 , .p.jt = IDNA_UNICODE_JT_T }, { 0x06EA, 0x06ED , .p.jt = IDNA_UNICODE_JT_T }, @@ -2438,21 +3025,26 @@ idna_table idna_joiningtype[] = { { 0x0730, 0x074A , .p.jt = IDNA_UNICODE_JT_T }, { 0x07A6, 0x07B0 , .p.jt = IDNA_UNICODE_JT_T }, { 0x07EB, 0x07F3 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x07FD , 0x07FD , .p.jt = IDNA_UNICODE_JT_T }, { 0x0816, 0x0819 , .p.jt = IDNA_UNICODE_JT_T }, { 0x081B, 0x0823 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0825, 0x0827 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0829, 0x082D , .p.jt = IDNA_UNICODE_JT_T }, - { 0x0900, 0x0902 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x0859, 0x085B , .p.jt = IDNA_UNICODE_JT_T }, + { 0x08D3, 0x08E1 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x08E3, 0x0902 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x093A , 0x093A , .p.jt = IDNA_UNICODE_JT_T }, { 0x093C , 0x093C , .p.jt = IDNA_UNICODE_JT_T }, { 0x0941, 0x0948 , .p.jt = IDNA_UNICODE_JT_T }, { 0x094D , 0x094D , .p.jt = IDNA_UNICODE_JT_T }, - { 0x0951, 0x0955 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x0951, 0x0957 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0962, 0x0963 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0981 , 0x0981 , .p.jt = IDNA_UNICODE_JT_T }, { 0x09BC , 0x09BC , .p.jt = IDNA_UNICODE_JT_T }, { 0x09C1, 0x09C4 , .p.jt = IDNA_UNICODE_JT_T }, { 0x09CD , 0x09CD , .p.jt = IDNA_UNICODE_JT_T }, { 0x09E2, 0x09E3 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x09FE , 0x09FE , .p.jt = IDNA_UNICODE_JT_T }, { 0x0A01, 0x0A02 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0A3C , 0x0A3C , .p.jt = IDNA_UNICODE_JT_T }, { 0x0A41, 0x0A42 , .p.jt = IDNA_UNICODE_JT_T }, @@ -2467,6 +3059,7 @@ idna_table idna_joiningtype[] = { { 0x0AC7, 0x0AC8 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0ACD , 0x0ACD , .p.jt = IDNA_UNICODE_JT_T }, { 0x0AE2, 0x0AE3 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x0AFA, 0x0AFF , .p.jt = IDNA_UNICODE_JT_T }, { 0x0B01 , 0x0B01 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0B3C , 0x0B3C , .p.jt = IDNA_UNICODE_JT_T }, { 0x0B3F , 0x0B3F , .p.jt = IDNA_UNICODE_JT_T }, @@ -2477,16 +3070,21 @@ idna_table idna_joiningtype[] = { { 0x0B82 , 0x0B82 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0BC0 , 0x0BC0 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0BCD , 0x0BCD , .p.jt = IDNA_UNICODE_JT_T }, + { 0x0C00 , 0x0C00 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x0C04 , 0x0C04 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0C3E, 0x0C40 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0C46, 0x0C48 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0C4A, 0x0C4D , .p.jt = IDNA_UNICODE_JT_T }, { 0x0C55, 0x0C56 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0C62, 0x0C63 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x0C81 , 0x0C81 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0CBC , 0x0CBC , .p.jt = IDNA_UNICODE_JT_T }, { 0x0CBF , 0x0CBF , .p.jt = IDNA_UNICODE_JT_T }, { 0x0CC6 , 0x0CC6 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0CCC, 0x0CCD , .p.jt = IDNA_UNICODE_JT_T }, { 0x0CE2, 0x0CE3 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x0D00, 0x0D01 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x0D3B, 0x0D3C , .p.jt = IDNA_UNICODE_JT_T }, { 0x0D41, 0x0D44 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0D4D , 0x0D4D , .p.jt = IDNA_UNICODE_JT_T }, { 0x0D62, 0x0D63 , .p.jt = IDNA_UNICODE_JT_T }, @@ -2507,7 +3105,7 @@ idna_table idna_joiningtype[] = { { 0x0F71, 0x0F7E , .p.jt = IDNA_UNICODE_JT_T }, { 0x0F80, 0x0F84 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0F86, 0x0F87 , .p.jt = IDNA_UNICODE_JT_T }, - { 0x0F90, 0x0F97 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x0F8D, 0x0F97 , .p.jt = IDNA_UNICODE_JT_T }, { 0x0F99, 0x0FBC , .p.jt = IDNA_UNICODE_JT_T }, { 0x0FC6 , 0x0FC6 , .p.jt = IDNA_UNICODE_JT_T }, { 0x102D, 0x1030 , .p.jt = IDNA_UNICODE_JT_T }, @@ -2521,7 +3119,7 @@ idna_table idna_joiningtype[] = { { 0x1085, 0x1086 , .p.jt = IDNA_UNICODE_JT_T }, { 0x108D , 0x108D , .p.jt = IDNA_UNICODE_JT_T }, { 0x109D , 0x109D , .p.jt = IDNA_UNICODE_JT_T }, - { 0x135F , 0x135F , .p.jt = IDNA_UNICODE_JT_T }, + { 0x135D, 0x135F , .p.jt = IDNA_UNICODE_JT_T }, { 0x1712, 0x1714 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1732, 0x1734 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1752, 0x1753 , .p.jt = IDNA_UNICODE_JT_T }, @@ -2532,12 +3130,14 @@ idna_table idna_joiningtype[] = { { 0x17C9, 0x17D3 , .p.jt = IDNA_UNICODE_JT_T }, { 0x17DD , 0x17DD , .p.jt = IDNA_UNICODE_JT_T }, { 0x180B, 0x180D , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1885, 0x1886 , .p.jt = IDNA_UNICODE_JT_T }, { 0x18A9 , 0x18A9 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1920, 0x1922 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1927, 0x1928 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1932 , 0x1932 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1939, 0x193B , .p.jt = IDNA_UNICODE_JT_T }, { 0x1A17, 0x1A18 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1A1B , 0x1A1B , .p.jt = IDNA_UNICODE_JT_T }, { 0x1A56 , 0x1A56 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1A58, 0x1A5E , .p.jt = IDNA_UNICODE_JT_T }, { 0x1A60 , 0x1A60 , .p.jt = IDNA_UNICODE_JT_T }, @@ -2545,6 +3145,8 @@ idna_table idna_joiningtype[] = { { 0x1A65, 0x1A6C , .p.jt = IDNA_UNICODE_JT_T }, { 0x1A73, 0x1A7C , .p.jt = IDNA_UNICODE_JT_T }, { 0x1A7F , 0x1A7F , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1AB0, 0x1ABD , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1ABE , 0x1ABE , .p.jt = IDNA_UNICODE_JT_T }, { 0x1B00, 0x1B03 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1B34 , 0x1B34 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1B36, 0x1B3A , .p.jt = IDNA_UNICODE_JT_T }, @@ -2554,14 +3156,21 @@ idna_table idna_joiningtype[] = { { 0x1B80, 0x1B81 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1BA2, 0x1BA5 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1BA8, 0x1BA9 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1BAB, 0x1BAD , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1BE6 , 0x1BE6 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1BE8, 0x1BE9 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1BED , 0x1BED , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1BEF, 0x1BF1 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1C2C, 0x1C33 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1C36, 0x1C37 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1CD0, 0x1CD2 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1CD4, 0x1CE0 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1CE2, 0x1CE8 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1CED , 0x1CED , .p.jt = IDNA_UNICODE_JT_T }, - { 0x1DC0, 0x1DE6 , .p.jt = IDNA_UNICODE_JT_T }, - { 0x1DFD, 0x1DFF , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1CF4 , 0x1CF4 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1CF8, 0x1CF9 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1DC0, 0x1DF9 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1DFB, 0x1DFF , .p.jt = IDNA_UNICODE_JT_T }, { 0x200B , 0x200B , .p.jt = IDNA_UNICODE_JT_T }, { 0x200E, 0x200F , .p.jt = IDNA_UNICODE_JT_T }, { 0x202A, 0x202E , .p.jt = IDNA_UNICODE_JT_T }, @@ -2573,59 +3182,156 @@ idna_table idna_joiningtype[] = { { 0x20E2, 0x20E4 , .p.jt = IDNA_UNICODE_JT_T }, { 0x20E5, 0x20F0 , .p.jt = IDNA_UNICODE_JT_T }, { 0x2CEF, 0x2CF1 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x2D7F , 0x2D7F , .p.jt = IDNA_UNICODE_JT_T }, { 0x2DE0, 0x2DFF , .p.jt = IDNA_UNICODE_JT_T }, - { 0x302A, 0x302F , .p.jt = IDNA_UNICODE_JT_T }, + { 0x302A, 0x302D , .p.jt = IDNA_UNICODE_JT_T }, { 0x3099, 0x309A , .p.jt = IDNA_UNICODE_JT_T }, { 0xA66F , 0xA66F , .p.jt = IDNA_UNICODE_JT_T }, { 0xA670, 0xA672 , .p.jt = IDNA_UNICODE_JT_T }, - { 0xA67C, 0xA67D , .p.jt = IDNA_UNICODE_JT_T }, + { 0xA674, 0xA67D , .p.jt = IDNA_UNICODE_JT_T }, + { 0xA69E, 0xA69F , .p.jt = IDNA_UNICODE_JT_T }, { 0xA6F0, 0xA6F1 , .p.jt = IDNA_UNICODE_JT_T }, { 0xA802 , 0xA802 , .p.jt = IDNA_UNICODE_JT_T }, { 0xA806 , 0xA806 , .p.jt = IDNA_UNICODE_JT_T }, { 0xA80B , 0xA80B , .p.jt = IDNA_UNICODE_JT_T }, { 0xA825, 0xA826 , .p.jt = IDNA_UNICODE_JT_T }, - { 0xA8C4 , 0xA8C4 , .p.jt = IDNA_UNICODE_JT_T }, + { 0xA8C4, 0xA8C5 , .p.jt = IDNA_UNICODE_JT_T }, { 0xA8E0, 0xA8F1 , .p.jt = IDNA_UNICODE_JT_T }, + { 0xA8FF , 0xA8FF , .p.jt = IDNA_UNICODE_JT_T }, { 0xA926, 0xA92D , .p.jt = IDNA_UNICODE_JT_T }, { 0xA947, 0xA951 , .p.jt = IDNA_UNICODE_JT_T }, { 0xA980, 0xA982 , .p.jt = IDNA_UNICODE_JT_T }, { 0xA9B3 , 0xA9B3 , .p.jt = IDNA_UNICODE_JT_T }, { 0xA9B6, 0xA9B9 , .p.jt = IDNA_UNICODE_JT_T }, { 0xA9BC , 0xA9BC , .p.jt = IDNA_UNICODE_JT_T }, + { 0xA9E5 , 0xA9E5 , .p.jt = IDNA_UNICODE_JT_T }, { 0xAA29, 0xAA2E , .p.jt = IDNA_UNICODE_JT_T }, { 0xAA31, 0xAA32 , .p.jt = IDNA_UNICODE_JT_T }, { 0xAA35, 0xAA36 , .p.jt = IDNA_UNICODE_JT_T }, { 0xAA43 , 0xAA43 , .p.jt = IDNA_UNICODE_JT_T }, { 0xAA4C , 0xAA4C , .p.jt = IDNA_UNICODE_JT_T }, + { 0xAA7C , 0xAA7C , .p.jt = IDNA_UNICODE_JT_T }, { 0xAAB0 , 0xAAB0 , .p.jt = IDNA_UNICODE_JT_T }, { 0xAAB2, 0xAAB4 , .p.jt = IDNA_UNICODE_JT_T }, { 0xAAB7, 0xAAB8 , .p.jt = IDNA_UNICODE_JT_T }, { 0xAABE, 0xAABF , .p.jt = IDNA_UNICODE_JT_T }, { 0xAAC1 , 0xAAC1 , .p.jt = IDNA_UNICODE_JT_T }, + { 0xAAEC, 0xAAED , .p.jt = IDNA_UNICODE_JT_T }, + { 0xAAF6 , 0xAAF6 , .p.jt = IDNA_UNICODE_JT_T }, { 0xABE5 , 0xABE5 , .p.jt = IDNA_UNICODE_JT_T }, { 0xABE8 , 0xABE8 , .p.jt = IDNA_UNICODE_JT_T }, { 0xABED , 0xABED , .p.jt = IDNA_UNICODE_JT_T }, { 0xFB1E , 0xFB1E , .p.jt = IDNA_UNICODE_JT_T }, { 0xFE00, 0xFE0F , .p.jt = IDNA_UNICODE_JT_T }, - { 0xFE20, 0xFE26 , .p.jt = IDNA_UNICODE_JT_T }, + { 0xFE20, 0xFE2F , .p.jt = IDNA_UNICODE_JT_T }, { 0xFEFF , 0xFEFF , .p.jt = IDNA_UNICODE_JT_T }, { 0xFFF9, 0xFFFB , .p.jt = IDNA_UNICODE_JT_T }, { 0x101FD , 0x101FD , .p.jt = IDNA_UNICODE_JT_T }, + { 0x102E0 , 0x102E0 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x10376, 0x1037A , .p.jt = IDNA_UNICODE_JT_T }, { 0x10A01, 0x10A03 , .p.jt = IDNA_UNICODE_JT_T }, { 0x10A05, 0x10A06 , .p.jt = IDNA_UNICODE_JT_T }, { 0x10A0C, 0x10A0F , .p.jt = IDNA_UNICODE_JT_T }, { 0x10A38, 0x10A3A , .p.jt = IDNA_UNICODE_JT_T }, { 0x10A3F , 0x10A3F , .p.jt = IDNA_UNICODE_JT_T }, - { 0x11080, 0x11081 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x10AE5, 0x10AE6 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x10D24, 0x10D27 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x10F46, 0x10F50 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11001 , 0x11001 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11038, 0x11046 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1107F, 0x11081 , .p.jt = IDNA_UNICODE_JT_T }, { 0x110B3, 0x110B6 , .p.jt = IDNA_UNICODE_JT_T }, { 0x110B9, 0x110BA , .p.jt = IDNA_UNICODE_JT_T }, - { 0x110BD , 0x110BD , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11100, 0x11102 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11127, 0x1112B , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1112D, 0x11134 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11173 , 0x11173 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11180, 0x11181 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x111B6, 0x111BE , .p.jt = IDNA_UNICODE_JT_T }, + { 0x111C9, 0x111CC , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1122F, 0x11231 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11234 , 0x11234 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11236, 0x11237 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1123E , 0x1123E , .p.jt = IDNA_UNICODE_JT_T }, + { 0x112DF , 0x112DF , .p.jt = IDNA_UNICODE_JT_T }, + { 0x112E3, 0x112EA , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11300, 0x11301 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1133B, 0x1133C , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11340 , 0x11340 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11366, 0x1136C , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11370, 0x11374 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11438, 0x1143F , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11442, 0x11444 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11446 , 0x11446 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1145E , 0x1145E , .p.jt = IDNA_UNICODE_JT_T }, + { 0x114B3, 0x114B8 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x114BA , 0x114BA , .p.jt = IDNA_UNICODE_JT_T }, + { 0x114BF, 0x114C0 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x114C2, 0x114C3 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x115B2, 0x115B5 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x115BC, 0x115BD , .p.jt = IDNA_UNICODE_JT_T }, + { 0x115BF, 0x115C0 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x115DC, 0x115DD , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11633, 0x1163A , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1163D , 0x1163D , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1163F, 0x11640 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x116AB , 0x116AB , .p.jt = IDNA_UNICODE_JT_T }, + { 0x116AD , 0x116AD , .p.jt = IDNA_UNICODE_JT_T }, + { 0x116B0, 0x116B5 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x116B7 , 0x116B7 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1171D, 0x1171F , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11722, 0x11725 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11727, 0x1172B , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1182F, 0x11837 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11839, 0x1183A , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11A01, 0x11A0A , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11A33, 0x11A38 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11A3B, 0x11A3E , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11A47 , 0x11A47 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11A51, 0x11A56 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11A59, 0x11A5B , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11A8A, 0x11A96 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11A98, 0x11A99 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11C30, 0x11C36 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11C38, 0x11C3D , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11C3F , 0x11C3F , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11C92, 0x11CA7 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11CAA, 0x11CB0 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11CB2, 0x11CB3 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11CB5, 0x11CB6 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11D31, 0x11D36 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11D3A , 0x11D3A , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11D3C, 0x11D3D , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11D3F, 0x11D45 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11D47 , 0x11D47 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11D90, 0x11D91 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11D95 , 0x11D95 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11D97 , 0x11D97 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x11EF3, 0x11EF4 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x16AF0, 0x16AF4 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x16B30, 0x16B36 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x16F8F, 0x16F92 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1BC9D, 0x1BC9E , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1BCA0, 0x1BCA3 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1D167, 0x1D169 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1D173, 0x1D17A , .p.jt = IDNA_UNICODE_JT_T }, { 0x1D17B, 0x1D182 , .p.jt = IDNA_UNICODE_JT_T }, { 0x1D185, 0x1D18B , .p.jt = IDNA_UNICODE_JT_T }, { 0x1D1AA, 0x1D1AD , .p.jt = IDNA_UNICODE_JT_T }, { 0x1D242, 0x1D244 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1DA00, 0x1DA36 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1DA3B, 0x1DA6C , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1DA75 , 0x1DA75 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1DA84 , 0x1DA84 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1DA9B, 0x1DA9F , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1DAA1, 0x1DAAF , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1E000, 0x1E006 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1E008, 0x1E018 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1E01B, 0x1E021 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1E023, 0x1E024 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1E026, 0x1E02A , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1E8D0, 0x1E8D6 , .p.jt = IDNA_UNICODE_JT_T }, + { 0x1E944, 0x1E94A , .p.jt = IDNA_UNICODE_JT_T }, { 0xE0001 , 0xE0001 , .p.jt = IDNA_UNICODE_JT_T }, { 0xE0020, 0xE007F , .p.jt = IDNA_UNICODE_JT_T }, { 0xE0100, 0xE01EF , .p.jt = IDNA_UNICODE_JT_T }, diff --git a/utils/import-messages.pl b/utils/import-messages.pl deleted file mode 100644 index 4c13a859e..000000000 --- a/utils/import-messages.pl +++ /dev/null @@ -1,326 +0,0 @@ -#!/usr/bin/perl -# -# Copyright © 2013 Vivek Dasmohapatra <vivek@collabora.co.uk> -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# * The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -=head1 - -Take a single-language messages file and merge it back in to the -NetSurf master messaged (i10n) file. - -=cut - -use strict; - -use Getopt::Long (); -use Fcntl qw( O_CREAT O_EXCL O_WRONLY O_APPEND O_RDONLY O_WRONLY O_TRUNC ); - -use constant GETOPT_OPTS => qw( auto_abbrev no_getopt_compat bundling ); -use constant GETOPT_SPEC => - qw( output|o=s - input|i=s - lang|l=s - plat|platform|p=s - format|fmt|f=s - import|I=s - help|h|? ); - -# default option values: -my %opt = qw( plat any format messages ); - -sub input_stream ($;$); -sub output_stream (); -sub usage (); -sub parser (); - -sub main () -{ - my $input; - my $output; - my $import; - my $parser; - my $opt_ok; - my @input; - my %message; - my $last_key; - my $last_plat; - - # option parsing: - Getopt::Long::Configure( GETOPT_OPTS ); - $opt_ok = Getopt::Long::GetOptions( \%opt, GETOPT_SPEC ); - - # allow input, import & output to be specified as non-option arguments: - if( @ARGV ) { $opt{input } ||= shift( @ARGV ) } - if( @ARGV ) { $opt{import} ||= shift( @ARGV ) } - if( @ARGV ) { $opt{output} ||= shift( @ARGV ) } - - # open the appropriate streams and get the formatter and headers: - if( $opt_ok ) - { - $input = input_stream( $opt{input} ); - $import = input_stream( $opt{import}, 'import-file' ); - $parser = parser(); - $opt{plat} ||= 'any'; - } - - # double check the options are sane (and we weren't asked for the help) - if( !$opt_ok || $opt{help} || $opt{lang} !~ /^[a-z]{2}$/ ) - { - usage(); - } - - @input = <$input>; - $output = output_stream(); - - $parser->( \%message, $import ); - - foreach ( @input ) - { - use bytes; - - my( $lang, $plat, $key ); - - if( /^([a-z]{2})\.([^.]+)\.([^:]+):/ ) - { - ( $lang, $plat, $key ) = ( $1, $2, $3 ); - } - - if( $key || $message{ $last_key } ) - { - #print( $output "## $last_key -> $key\n" ); - # the key changed but we have a message for it still pending: - if( $last_key && $message{ $last_key } && ($key ne $last_key) ) - { - my $plt = $last_plat; - my $str = $message{ $last_key }; - my $msg = qq|$opt{lang}.$last_plat.$last_key:$str\n|; - - print( $output $msg ); - delete( $message{ $last_key } ); - - # if the line following our new translation is not blank, - # generate a synthetic group-separator: - if( !/^\s*$/ ) { print( $output "\n") } - } - - $last_key = $key; - $last_plat = $plat; - - if( $lang eq $opt{lang} ) - { - my $val = $message{ $key }; - if( $val && - ( $opt{plat} eq 'any' || # all platforms ok - $opt{plat} eq $plat ) ) # specified platform matched - { - print( $output qq|$1.$2.$3:$val\n| ); - delete( $message{ $key } ); - next; - } - } - } - - print( $output $_ ); - } -} - -main(); - -sub usage () -{ - my @fmt = map { s/::$//; $_ } keys(%{$::{'msgfmt::'}}); - print( STDERR <<TXT ); -usage: - $0 -l lang-code \ - [-p platform] [-f format] \ - [-o output-file] [-i input-file] [-I import-file] - - $0 -l lang-code … [input-file [import-file [output-file]]] - - lang-code : en fr ko … (no default) - platform : any gtk ami (default 'any') - format : @fmt (default 'messages') - input-file : defaults to standard input - output-file: defaults to standard output - import-file: no default - - The input-file may be the same as the output-file, in which case - it will be altered in place. -TXT - exit(1); -} - -sub input_stream ($;$) -{ - my $file = shift(); - my $must_exist = shift(); - - if( $file ) - { - my $ifh; - - sysopen( $ifh, $file, O_RDONLY ) || - die( "$0: Failed to open input file $file: $!\n" ); - - return $ifh; - } - - if( $must_exist ) - { - print( STDERR "No file specified for $must_exist\n" ); - usage(); - } - - return \*STDIN; -} - -sub output_stream () -{ - if( $opt{output} ) - { - my $ofh; - - sysopen( $ofh, $opt{output}, O_CREAT|O_TRUNC|O_WRONLY ) || - die( "$0: Failed to open output file $opt{output}: $!\n" ); - - return $ofh; - } - - return \*STDOUT; -} - -sub parser () -{ - my $name = $opt{format}; - my $func = "msgfmt::$name"->UNIVERSAL::can("parse"); - - return $func || die( "No handler found for format '$name'\n" ); -} - -# format implementations: -{ - package msgfmt::java; - - sub unescape { $_[0] =~ s/\\([^abfnrtv])/$1/g; $_[0] } - sub parse - { - my $cache = shift(); - my $stream = shift(); - - while ( <$stream> ) - { - if( /([^#]\S+)\s*=\s?(.*)/ ) - { - my $key = $1; - my $val = $2; - $cache->{ $key } = unescape( $val ); - } - } - } -} - -{ - package msgfmt::messages; # native netsurf format - - sub parse - { - my $cache = shift(); - my $stream = shift(); - - while ( <$stream> ) - { - if( /^([a-z]{2})\.([^.]+)\.([^:]+):(.*)/ ) - { - my( $lang, $plat, $key, $val ) = ( $1, $2, $3, $4 ); - - if( $lang ne $opt{lang} ) { next } - if( $opt{plat} ne 'any' && - $opt{plat} ne $plat && - 'all' ne $plat ) { next } - - $cache->{ $key } = $val; - } - } - } -} - -{ - package msgfmt::transifex; - use base 'msgfmt::java'; - - # the differences between transifex and java properties only matter in - # the outward direction: During import they can be treated the same way -} - -{ - package msgfmt::android; - - ANDROID_XML: - { - package msgfmt::android::xml; - - my @stack; - my $data; - my $key; - our $cache; - - sub StartDocument ($) { @stack = (); $key = '' } - sub Text ($) { if( $key ) { $data .= $_ } } - sub PI ($$$) { } - sub EndDocument ($) { } - - sub EndTag ($$) - { - pop( @stack ); - - if( !$key ) { return; } - - $cache->{ $key } = $data; - $data = $key = ''; - } - - sub StartTag ($$) - { - push( @stack, $_[1] ); - - if( "@stack" eq "resources string" ) - { - $data = ''; - $key = $_{ name }; - } - } - } - - sub parse - { - require XML::Parser; - - if( !$XML::Parser::VERSION ) - { - die("XML::Parser required for android format support\n"); - } - - $msgfmt::android::xml::cache = shift(); - my $stream = shift(); - my $parser = XML::Parser->new( Style => 'Stream', - Pkg => 'msgfmt::android::xml' ); - $parser->parse( $stream ); - } -} diff --git a/utils/inet.h b/utils/inet.h index da1798432..29010efc1 100644 --- a/utils/inet.h +++ b/utils/inet.h @@ -40,6 +40,15 @@ #include <arpa/inet.h> #include <sys/select.h> +#define ns_close_socket close + +#ifdef WITH_AMISSL +/* AmiSSL needs everything to be using bsdsocket directly to avoid conflicts */ +#include <proto/bsdsocket.h> +#undef ns_close_socket +#define ns_close_socket CloseSocket +#endif + #else #include <winsock2.h> @@ -49,6 +58,8 @@ #define EAFNOSUPPORT WSAEAFNOSUPPORT #endif +#define ns_close_socket closesocket + #endif diff --git a/utils/jenkins-build.sh b/utils/jenkins-build.sh deleted file mode 100755 index b6ca21dd7..000000000 --- a/utils/jenkins-build.sh +++ /dev/null @@ -1,437 +0,0 @@ -#!/bin/bash -# -# Copyright © 2013 Vincent Sanders <vince@netsurf-browser.org> -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# * The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -# NetSurf continuous integration build script for jenkins -# -# This script is executed by jenkins to build netsurf itself -# -# Usage: jenkins-build.sh -# - -# TARGET is set to the frontend target to build -# HOST is set to the identifier of the toolchain doing the building -# CC is the compiler (gcc or clang) -# BUILD_NUMBER is the CI build number - -##### - -# set defaults - this is not retrivable from the jenkins environment -OLD_ARTIFACT_COUNT=25 - -################# Parameter and environment setup ##################### - -#identifier for this specific build -IDENTIFIER="$CC-${BUILD_NUMBER}" - -# Identifier for build which will be cleaned -OLD_IDENTIFIER="$CC-$((BUILD_NUMBER - ${OLD_ARTIFACT_COUNT}))" - -# default atari architecture - bletch -ATARIARCH=68020-60 - -# make tool -MAKE=make - -# Ensure the combination of target and toolchain works and set build -# specific parameters too -case ${TARGET} in - "riscos") - case ${HOST} in - "arm-unknown-riscos") - ;; - - *) - echo "Target \"${TARGET}\" cannot be built on \"${HOST})\"" - exit 1 - ;; - - esac - - PKG_SRC=netsurf - PKG_SFX=.zip - ;; - - "haiku") - case ${HOST} in - "i586-pc-haiku") - ;; - - *) - echo "Target \"${TARGET}\" cannot be built on \"${HOST})\"" - exit 1 - ;; - - esac - - PKG_SRC=NetSurf - PKG_SFX= - ;; - - - "windows") - case ${HOST} in - "i686-w64-mingw32") - ;; - - *) - echo "Target \"${TARGET}\" cannot be built on \"${HOST})\"" - exit 1 - ;; - - esac - - PKG_SRC=netsurf-installer - PKG_SFX=.exe - ;; - - - "cocoa") - case ${HOST} in - "x86_64-apple-darwin14.5.0") - PATH=/opt/local/bin:/opt/local/sbin:${PATH} - ;; - - "i686-apple-darwin10") - ;; - - "powerpc-apple-darwin9") - ;; - - *) - echo "Target \"${TARGET}\" cannot be built on \"${HOST})\"" - exit 1 - ;; - - esac - - IDENTIFIER="${HOST}-${IDENTIFIER}" - OLD_IDENTIFIER="${HOST}-${OLD_IDENTIFIER}" - PKG_SRC=NetSurf - PKG_SFX=.dmg - ;; - - - "amiga") - case ${HOST} in - "ppc-amigaos") - ;; - - *) - echo "Target \"${TARGET}\" cannot be built on \"${HOST})\"" - exit 1 - ;; - - esac - - PKG_SRC=NetSurf_Amiga/netsurf - PKG_SFX=.lha - ;; - - - "atari") - case ${HOST} in - "m68k-atari-mint") - PKG_SRC=ns020 - PKG_SFX=.zip - ;; - - "m5475-atari-mint") - export GCCSDK_INSTALL_ENV=/opt/netsurf/m5475-atari-mint/env - export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/m5475-atari-mint/cross/bin - ATARIARCH=v4e - PKG_SRC=nsv4e - PKG_SFX=.zip - ;; - - *) - echo "Target \"${TARGET}\" cannot be built on \"${HOST})\"" - exit 1 - ;; - - esac - - IDENTIFIER="${HOST}-${IDENTIFIER}" - OLD_IDENTIFIER="${HOST}-${OLD_IDENTIFIER}" - ;; - - - "gtk") - case ${HOST} in - "x86_64-linux-gnu") - ;; - - "arm-linux-gnueabihf") - ;; - - "aarch64-linux-gnu") - ;; - - amd64-unknown-openbsd*) - MAKE=gmake - ;; - - x86_64-unknown-freebsd*) - MAKE=gmake - ;; - - *) - echo "Target \"${TARGET}\" cannot be built on \"${HOST}\"" - exit 1 - ;; - - esac - - IDENTIFIER="${HOST}-${IDENTIFIER}" - OLD_IDENTIFIER="${HOST}-${OLD_IDENTIFIER}" - PKG_SRC=nsgtk - PKG_SFX= - ;; - - - "gtk3") - case ${HOST} in - "x86_64-linux-gnu") - ;; - - "arm-linux-gnueabihf") - ;; - - "aarch64-linux-gnu") - ;; - - amd64-unknown-openbsd*) - MAKE=gmake - ;; - - x86_64-unknown-freebsd*) - MAKE=gmake - ;; - - *) - echo "Target \"${TARGET}\" cannot be built on \"${HOST}\"" - exit 1 - ;; - - esac - - IDENTIFIER="${HOST}-${IDENTIFIER}" - OLD_IDENTIFIER="${HOST}-${OLD_IDENTIFIER}" - PKG_SRC=nsgtk3 - PKG_SFX= - ;; - - - "framebuffer") - case ${HOST} in - "x86_64-linux-gnu") - ;; - - arm-linux-gnueabihf) - ;; - - "aarch64-linux-gnu") - ;; - - "i686-apple-darwin10") - ;; - - "powerpc-apple-darwin9") - ;; - - amd64-unknown-openbsd*) - MAKE=gmake - ;; - - x86_64-unknown-freebsd*) - MAKE=gmake - ;; - - "arm-unknown-riscos") - export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env - export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin - ;; - - "m68k-atari-mint") - export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env - export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin - ;; - - "m5475-atari-mint") - ATARIARCH=v4e - export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env - export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin - ;; - - "i686-w64-mingw32") - export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env - export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin - ;; - - "ppc-amigaos") - export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env - export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin - ;; - - *) - echo "Target \"${TARGET}\" cannot be built on \"${HOST})\"" - exit 1 - ;; - - esac - - IDENTIFIER="${HOST}-${IDENTIFIER}" - OLD_IDENTIFIER="${HOST}-${OLD_IDENTIFIER}" - PKG_SRC=nsfb - PKG_SFX= - ;; - - - "monkey") - # monkey target can be built anywhere - case ${HOST} in - amd64-unknown-openbsd*) - MAKE=gmake - ;; - - x86_64-unknown-freebsd*) - MAKE=gmake - ;; - - "arm-unknown-riscos") - export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env - export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin - # headers and compiler combination throw these warnings - export CFLAGS="-Wno-redundant-decls -Wno-parentheses" - export LDFLAGS=-lcares - ;; - - "m68k-atari-mint") - export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env - export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin - ;; - - "m5475-atari-mint") - ATARIARCH=v4e - export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env - export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin - ;; - - "i686-w64-mingw32") - export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env - export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin - ;; - - "ppc-amigaos") - export GCCSDK_INSTALL_ENV=/opt/netsurf/${HOST}/env - export GCCSDK_INSTALL_CROSSBIN=/opt/netsurf/${HOST}/cross/bin - ;; - - *) - echo "Target \"${TARGET}\" generic build on \"${HOST})\"" - ;; - - esac - - IDENTIFIER="${HOST}-${IDENTIFIER}" - OLD_IDENTIFIER="${HOST}-${OLD_IDENTIFIER}" - PKG_SRC=nsmonkey - PKG_SFX= - ;; - - *) - # TARGET must be in the environment and set correctly - echo "Unkown TARGET \"${TARGET}\"" - exit 1 - ;; - -esac - -# setup environment -export PREFIX=${JENKINS_HOME}/artifacts-${HOST} -export PKG_CONFIG_PATH=${PREFIX}/lib/pkgconfig -export LD_LIBRARY_PATH=${LD_LIBRARY_PATH}:${PREFIX}/lib -export PATH=${PATH}:${PREFIX}/bin - -# configure ccache for clang -if [ "${CC}" = "clang" ];then - export CCACHE_CPP2=yes - export CC="clang -Qunused-arguments" -fi - -########### Use distcc if present ###### - -DISTCC=distcc -PARALLEL=1 -HAVE_DISTCC=$(${DISTCC} --version >/dev/null 2>&1 && echo "true" || echo "false") -if [ ${HAVE_DISTCC} = "true" ];then - PARALLEL=$(${DISTCC} -j) - export PATH=/usr/lib/distcc:${PATH} - export DISTCC_DIR=${JENKINS_HOME} -fi - - - -########### Build from source ################## - -# Clean first -${MAKE} clean - -# Do the Build -${MAKE} -j ${PARALLEL} -k CI_BUILD=${BUILD_NUMBER} ATARIARCH=${ATARIARCH} Q= - - - -############ Package artifact construction ################ - -# build the package file -${MAKE} -k CI_BUILD=${BUILD_NUMBER} ATARIARCH=${ATARIARCH} package Q= - -if [ ! -f "${PKG_SRC}${PKG_SFX}" ]; then - # unable to find package file - exit 1 -fi - - - -############ Package artifact deployment ################ - -#destination for package artifacts -DESTDIR=/srv/ci.netsurf-browser.org/html/builds/${TARGET}/ - -NEW_ARTIFACT_TARGET="NetSurf-${IDENTIFIER}${PKG_SFX}" - -# copy the file into the output - always use scp as it works local or remote -scp "${PKG_SRC}${PKG_SFX}" netsurf@ci.netsurf-browser.org:${DESTDIR}/${NEW_ARTIFACT_TARGET} - -# remove the local package file artifact -rm -f "${PKG_SRC}${PKG_SFX}" - -# setup latest link -ssh netsurf@ci.netsurf-browser.org "rm -f ${DESTDIR}/LATEST && echo "${NEW_ARTIFACT_TARGET}" > ${DESTDIR}/LATEST" - - - -############ Package artifact cleanup ################ - -OLD_ARTIFACT_TARGET="NetSurf-${OLD_IDENTIFIER}${PKG_SFX}" - -ssh netsurf@ci.netsurf-browser.org "rm -f ${DESTDIR}/${OLD_ARTIFACT_TARGET}" diff --git a/utils/libdom.c b/utils/libdom.c index a996e98bf..c39aad343 100644 --- a/utils/libdom.c +++ b/utils/libdom.c @@ -28,129 +28,6 @@ #include "utils/log.h" #include "utils/libdom.h" -/* exported interface documented in libdom.h */ -bool libdom_treewalk(dom_node *root, - bool (*callback)(dom_node *node, dom_string *name, void *ctx), - void *ctx) -{ - dom_node *node; - bool result = true; - - node = dom_node_ref(root); /* tree root */ - - while (node != NULL) { - dom_node *next = NULL; - dom_node_type type; - dom_string *name; - dom_exception exc; - - exc = dom_node_get_first_child(node, &next); - if (exc != DOM_NO_ERR) { - dom_node_unref(node); - break; - } - - if (next != NULL) { - /* 1. Got children */ - dom_node_unref(node); - node = next; - } else { - /* No children; siblings & ancestor's siblings */ - while (node != NULL) { - exc = dom_node_get_next_sibling(node, &next); - if (exc != DOM_NO_ERR) { - dom_node_unref(node); - node = NULL; - break; - } - - if (next != NULL) { - /* 2. Got sibling */ - break; - } - - exc = dom_node_get_parent_node(node, &next); - if (exc != DOM_NO_ERR) { - dom_node_unref(node); - node = NULL; - break; - } - - /* 3. Try parent */ - dom_node_unref(node); - node = next; - } - - if (node == NULL) - break; - - dom_node_unref(node); - node = next; - } - - assert(node != NULL); - - exc = dom_node_get_node_type(node, &type); - if ((exc != DOM_NO_ERR) || (type != DOM_ELEMENT_NODE)) - continue; - - exc = dom_node_get_node_name(node, &name); - if (exc != DOM_NO_ERR) - continue; - - result = callback(node, name, ctx); - - dom_string_unref(name); - - if (result == false) { - break; /* callback caused early termination */ - } - - } - return result; -} - - -/* libdom_treewalk context for libdom_find_element */ -struct find_element_ctx { - lwc_string *search; - dom_node *found; -}; - -/* libdom_treewalk callback for libdom_find_element */ -static bool libdom_find_element_callback(dom_node *node, dom_string *name, - void *ctx) -{ - struct find_element_ctx *data = ctx; - - if (dom_string_caseless_lwc_isequal(name, data->search)) { - /* Found element */ - data->found = node; - return false; /* Discontinue search */ - } - - return true; /* Continue search */ -} - - -/* exported interface documented in libdom.h */ -dom_node *libdom_find_element(dom_node *node, lwc_string *element_name) -{ - struct find_element_ctx data; - - assert(element_name != NULL); - - if (node == NULL) - return NULL; - - data.search = element_name; - data.found = NULL; - - libdom_treewalk(node, libdom_find_element_callback, &data); - - return data.found; -} - /* exported interface documented in libdom.h */ dom_node *libdom_find_first_element(dom_node *parent, lwc_string *element_name) diff --git a/utils/libdom.h b/utils/libdom.h index 306aa0f8b..63841934f 100644 --- a/utils/libdom.h +++ b/utils/libdom.h @@ -33,29 +33,6 @@ #include <dom/bindings/hubbub/errors.h> /** - * depth-first walk the dom calling callback for each element - * - * \param root the dom node to use as the root of the tree walk - * \param callback The function called for each element - * \param ctx The context passed to the callback. - * \return true if all nodes were examined, false if the callback terminated - * the walk early. - */ -bool libdom_treewalk(dom_node *root, - bool (*callback)(dom_node *node, dom_string *name, void *ctx), - void *ctx); - -/** - * Search the descendants of a node for an element. - * - * \param node dom_node to search children of, or NULL - * \param element_name name of element to find - * \return first child of node which is an element and matches name, or - * NULL if not found or parameter node is NULL - */ -dom_node *libdom_find_element(dom_node *node, lwc_string *element_name); - -/** * Search children of a node for first named element * \param parent dom_node to search children of, or NULL * \param element_name name of element to find diff --git a/utils/log.c b/utils/log.c index e267b3179..a083b5720 100644 --- a/utils/log.c +++ b/utils/log.c @@ -94,11 +94,14 @@ static const char *nslog_gettime(void) NSLOG_DEFINE_CATEGORY(netsurf, "NetSurf default logging"); NSLOG_DEFINE_CATEGORY(llcache, "Low level cache"); -NSLOG_DEFINE_CATEGORY(fetch, "objet fetching"); -NSLOG_DEFINE_CATEGORY(plot, "rendering system"); -NSLOG_DEFINE_CATEGORY(schedule, "scheduler"); +NSLOG_DEFINE_CATEGORY(fetch, "Object fetching"); +NSLOG_DEFINE_CATEGORY(plot, "Rendering system"); +NSLOG_DEFINE_CATEGORY(schedule, "Scheduler"); NSLOG_DEFINE_CATEGORY(fbtk, "Framebuffer toolkit"); NSLOG_DEFINE_CATEGORY(layout, "Layout"); +NSLOG_DEFINE_CATEGORY(flex, "Flex"); +NSLOG_DEFINE_CATEGORY(dukky, "Duktape JavaScript Binding"); +NSLOG_DEFINE_CATEGORY(jserrors, "JavaScript error messages"); static void netsurf_render_log(void *_ctx, @@ -107,8 +110,11 @@ netsurf_render_log(void *_ctx, va_list args) { fprintf(logfile, - "%s %.*s:%i %.*s: ", + "%s [%s %.*s] %.*s:%i %.*s: ", nslog_gettime(), + nslog_short_level_name(ctx->level), + ctx->category->namelen, + ctx->category->name, ctx->filenamelen, ctx->filename, ctx->lineno, @@ -280,7 +286,7 @@ nserror nslog_init(nslog_ensure_t *ensure, int *pargc, char **argv) /* exported interface documented in utils/log.h */ nserror -nslog_set_filter_by_options() +nslog_set_filter_by_options(void) { if (verbose_log) return nslog_set_filter(nsoption_charp(verbose_filter)); @@ -290,7 +296,7 @@ nslog_set_filter_by_options() /* exported interface documented in utils/log.h */ void -nslog_finalise() +nslog_finalise(void) { NSLOG(netsurf, INFO, "Finalising logging, please report any further messages"); diff --git a/utils/log.h b/utils/log.h index b773ec4a2..b7aa83358 100644 --- a/utils/log.h +++ b/utils/log.h @@ -84,6 +84,9 @@ NSLOG_DECLARE_CATEGORY(plot); NSLOG_DECLARE_CATEGORY(schedule); NSLOG_DECLARE_CATEGORY(fbtk); NSLOG_DECLARE_CATEGORY(layout); +NSLOG_DECLARE_CATEGORY(flex); +NSLOG_DECLARE_CATEGORY(dukky); +NSLOG_DECLARE_CATEGORY(jserrors); #else /* WITH_NSLOG */ diff --git a/utils/memanalyze.pl b/utils/memanalyze.pl deleted file mode 100755 index 57e107d11..000000000 --- a/utils/memanalyze.pl +++ /dev/null @@ -1,380 +0,0 @@ -#!/usr/bin/env perl -# -# Example input: -# -# MEM mprintf.c:1094 malloc(32) = e5718 -# MEM mprintf.c:1103 realloc(e5718, 64) = e6118 -# MEM sendf.c:232 free(f6520) - -my $mallocs=0; -my $callocs=0; -my $reallocs=0; -my $strdups=0; -my $showlimit; - -while(1) { - if($ARGV[0] eq "-v") { - $verbose=1; - shift @ARGV; - } - elsif($ARGV[0] eq "-t") { - $trace=1; - shift @ARGV; - } - elsif($ARGV[0] eq "-l") { - # only show what alloc that caused a memlimit failure - $showlimit=1; - shift @ARGV; - } - else { - last; - } -} - -my $maxmem; - -sub newtotal { - my ($newtot)=@_; - # count a max here - - if($newtot > $maxmem) { - $maxmem= $newtot; - } -} - -my $file = $ARGV[0]; - -if(! -f $file) { - print "Usage: memanalyze.pl [options] <dump file>\n", - "Options:\n", - " -l memlimit failure displayed\n", - " -v Verbose\n", - " -t Trace\n"; - exit; -} - -open(FILE, "<$file"); - -if($showlimit) { - while(<FILE>) { - if(/^LIMIT.*memlimit$/) { - print $_; - last; - } - } - close(FILE); - exit; -} - - - -while(<FILE>) { - chomp $_; - $line = $_; - - if($line =~ /^LIMIT ([^ ]*):(\d*) (.*)/) { - # new memory limit test prefix - my $i = $3; - my ($source, $linenum) = ($1, $2); - if($trace && ($i =~ /([^ ]*) reached memlimit/)) { - print "LIMIT: $1 returned error at $source:$linenum\n"; - } - } - elsif($line =~ /^MEM ([^ ]*):(\d*) (.*)/) { - # generic match for the filename+linenumber - $source = $1; - $linenum = $2; - $function = $3; - - if($function =~ /free\(0x([0-9a-f]*)/) { - $addr = $1; - if(!exists $sizeataddr{$addr}) { - print "FREE ERROR: No memory allocated: $line\n"; - } - elsif(-1 == $sizeataddr{$addr}) { - print "FREE ERROR: Memory freed twice: $line\n"; - print "FREE ERROR: Previously freed at: ".$getmem{$addr}."\n"; - } - else { - $totalmem -= $sizeataddr{$addr}; - if($trace) { - print "FREE: malloc at ".$getmem{$addr}." is freed again at $source:$linenum\n"; - printf("FREE: %d bytes freed, left allocated: $totalmem bytes\n", $sizeataddr{$addr}); - } - - newtotal($totalmem); - $frees++; - - $sizeataddr{$addr}=-1; # set -1 to mark as freed - $getmem{$addr}="$source:$linenum"; - - } - } - elsif($function =~ /malloc\((\d*)\) = 0x([0-9a-f]*)/) { - $size = $1; - $addr = $2; - - if($sizeataddr{$addr}>0) { - # this means weeeeeirdo - print "Mixed debug compile, rebuild curl now\n"; - } - - $sizeataddr{$addr}=$size; - $totalmem += $size; - - if($trace) { - print "MALLOC: malloc($size) at $source:$linenum", - " makes totally $totalmem bytes\n"; - } - - newtotal($totalmem); - $mallocs++; - - $getmem{$addr}="$source:$linenum"; - } - elsif($function =~ /calloc\((\d*),(\d*)\) = 0x([0-9a-f]*)/) { - $size = $1*$2; - $addr = $3; - - $arg1 = $1; - $arg2 = $2; - - if($sizeataddr{$addr}>0) { - # this means weeeeeirdo - print "Mixed debug compile, rebuild curl now\n"; - } - - $sizeataddr{$addr}=$size; - $totalmem += $size; - - if($trace) { - print "CALLOC: calloc($arg1,$arg2) at $source:$linenum", - " makes totally $totalmem bytes\n"; - } - - newtotal($totalmem); - $callocs++; - - $getmem{$addr}="$source:$linenum"; - } - elsif($function =~ /realloc\(0x([0-9a-f]*), (\d*)\) = 0x([0-9a-f]*)/) { - $oldaddr = $1; - $newsize = $2; - $newaddr = $3; - - $totalmem -= $sizeataddr{$oldaddr}; - if($trace) { - printf("REALLOC: %d less bytes and ", $sizeataddr{$oldaddr}); - } - $sizeataddr{$oldaddr}=0; - - $totalmem += $newsize; - $sizeataddr{$newaddr}=$newsize; - - if($trace) { - printf("%d more bytes ($source:$linenum)\n", $newsize); - } - - newtotal($totalmem); - $reallocs++; - - $getmem{$oldaddr}=""; - $getmem{$newaddr}="$source:$linenum"; - } - elsif($function =~ /strdup\(0x([0-9a-f]*)\) \((\d*)\) = 0x([0-9a-f]*)/) { - # strdup(a5b50) (8) = df7c0 - - $dup = $1; - $size = $2; - $addr = $3; - $getmem{$addr}="$source:$linenum"; - $sizeataddr{$addr}=$size; - - $totalmem += $size; - - if($trace) { - printf("STRDUP: $size bytes at %s, makes totally: %d bytes\n", - $getmem{$addr}, $totalmem); - } - - newtotal($totalmem); - $strdups++; - } - elsif($function =~ /strndup\(0x([0-9a-f]*), (\d*)\) \((\d*)\) = 0x([0-9a-f]*)/) { - # strndup(a5b50, 20) (8) = df7c0 - - $dup = $1; - $limit = $2; - $size = $3; - $addr = $4; - $getmem{$addr}="$source:$linenum"; - $sizeataddr{$addr}=$size; - - $totalmem += $size; - - if($trace) { - printf("STRDUP: $size bytes at %s, makes totally: %d bytes\n", - $getmem{$addr}, $totalmem); - } - - newtotal($totalmem); - $strdups++; - } - else { - print "Not recognized input line: $function\n"; - } - } - # FD url.c:1282 socket() = 5 - elsif($_ =~ /^FD ([^ ]*):(\d*) (.*)/) { - # generic match for the filename+linenumber - $source = $1; - $linenum = $2; - $function = $3; - - if($function =~ /socket\(\) = (\d*)/) { - $filedes{$1}=1; - $getfile{$1}="$source:$linenum"; - $openfile++; - } - elsif($function =~ /accept\(\) = (\d*)/) { - $filedes{$1}=1; - $getfile{$1}="$source:$linenum"; - $openfile++; - } - elsif($function =~ /sclose\((\d*)\)/) { - if($filedes{$1} != 1) { - print "Close without open: $line\n"; - } - else { - $filedes{$1}=0; # closed now - $openfile--; - } - } - } - # FILE url.c:1282 fopen("blabla") = 0x5ddd - elsif($_ =~ /^FILE ([^ ]*):(\d*) (.*)/) { - # generic match for the filename+linenumber - $source = $1; - $linenum = $2; - $function = $3; - - if($function =~ /fopen\(\"([^\"]*)\",\"([^\"]*)\"\) = (\(nil\)|0x([0-9a-f]*))/) { - if($3 eq "(nil)") { - ; - } - else { - $fopen{$4}=1; - $fopenfile{$4}="$source:$linenum"; - $fopens++; - } - } - # fclose(0x1026c8) - elsif($function =~ /fclose\(0x([0-9a-f]*)\)/) { - if(!$fopen{$1}) { - print "fclose() without fopen(): $line\n"; - } - else { - $fopen{$1}=0; - $fopens--; - } - } - } - # GETNAME url.c:1901 getnameinfo() - elsif($_ =~ /^GETNAME ([^ ]*):(\d*) (.*)/) { - # not much to do - } - - # ADDR url.c:1282 getaddrinfo() = 0x5ddd - elsif($_ =~ /^ADDR ([^ ]*):(\d*) (.*)/) { - # generic match for the filename+linenumber - $source = $1; - $linenum = $2; - $function = $3; - - if($function =~ /getaddrinfo\(\) = (\(nil\)|0x([0-9a-f]*))/) { - my $add = $2; - if($add eq "(nil)") { - ; - } - else { - $addrinfo{$add}=1; - $addrinfofile{$add}="$source:$linenum"; - $addrinfos++; - } - } - # fclose(0x1026c8) - elsif($function =~ /freeaddrinfo\(0x([0-9a-f]*)\)/) { - if(!$addrinfo{$1}) { - print "freeaddrinfo() without getaddrinfo(): $line\n"; - } - else { - $addrinfo{$1}=0; - $addrinfos--; - } - } - - - } - else { - print "Not recognized prefix line: $line\n"; - } -} -close(FILE); - -if($totalmem) { - print "Leak detected: memory still allocated: $totalmem bytes\n"; - - for(keys %sizeataddr) { - $addr = $_; - $size = $sizeataddr{$addr}; - if($size > 0) { - print "At $addr, there's $size bytes.\t"; - print " allocated by ".$getmem{$addr}."\n"; - $allocs{$getmem{$addr}}++; - $amount{$getmem{$addr}} += $size; - } - } - - print "Summary by location of allocation:\n"; - print "Allocs\tBytes\tLocation\n"; - for (sort { $amount{$b} <=> $amount{$a} } keys %allocs) { - print "$allocs{$_}\t$amount{$_}\t$_\n"; - } -} - -if($openfile) { - for(keys %filedes) { - if($filedes{$_} == 1) { - print "Open file descriptor created at ".$getfile{$_}."\n"; - } - } -} - -if($fopens) { - print "Open FILE handles left at:\n"; - for(keys %fopen) { - if($fopen{$_} == 1) { - print "fopen() called at ".$fopenfile{$_}."\n"; - } - } -} - -if($addrinfos) { - print "IPv6-style name resolve data left at:\n"; - for(keys %addrinfofile) { - if($addrinfo{$_} == 1) { - print "getaddrinfo() called at ".$addrinfofile{$_}."\n"; - } - } -} - -if($verbose) { - print "Mallocs: $mallocs\n", - "Reallocs: $reallocs\n", - "Callocs: $callocs\n", - "Strdups: $strdups\n", - "Frees: $frees\n", - "Allocations: ".($mallocs + $callocs + $reallocs + $strdups)."\n"; - - print "Maximum allocated: $maxmem\n"; -} diff --git a/utils/merge-messages.lua b/utils/merge-messages.lua deleted file mode 100755 index 3aeac697c..000000000 --- a/utils/merge-messages.lua +++ /dev/null @@ -1,83 +0,0 @@ -#!/usr/bin/env lua5.1 - -local lfs = require "lfs" - -local en_stat = assert(lfs.attributes "!NetSurf/Resources/en/Messages") -local language = { } -local sorted = { } - -io.stderr:write("loading non-en languages...\n"); - -for dir in lfs.dir "!NetSurf/Resources" do - local path = "!NetSurf/Resources/" .. dir - if dir ~= "en" and lfs.attributes(path .. "/Messages") then - local f = io.open(path .. "/Messages", "r") - local c = 0 - io.stderr:write(dir, ":") - language[dir] = { } - sorted[#sorted + 1] = dir - for l in f:lines() do - if l:sub(1, 1) ~= "#" then - local tag, msg = l:match "^([^:]*):(.*)$" - if tag then - language[dir][tag] = msg - c = c + 1 - end - end - end - f:close() - io.stderr:write(tostring(c), " entries.\n") - end -end - -table.sort(sorted) - -io.stderr:write("working through en...\n") - -local manipulators = { - { "^(ami.*)", "ami.%1" }, - { "^(gtk.*)", "gtk.%1" }, - { "^(Help.*)", "ro.%1" }, - { "^(ARexx.*)", "ami.%1" }, - - { "^(.*)$", "all.%1" } -- must be last -} - -local function manipulate_tag(t) - for _, m in ipairs(manipulators) do - local r, s = t:gsub(m[1], m[2]) - if s > 0 then return r end - end - return t -end - -local f = io.open("!NetSurf/Resources/en/Messages", "r") - -for l in f:lines() do - if l:sub(1,1) == "#" then - print(l) - else - local tag, msg = l:match "^([^:]*):(.*)$" - if not tag then - print(l) - else - local mtag = manipulate_tag(tag) - io.stdout:write("en.", mtag, ":", msg, "\n") - for _, langname in ipairs(sorted) do - local trans = language[langname][tag] - if not trans then - io.stderr:write("*** language ", langname, " lacks translation for ", mtag, "/", tag, "\n") - trans = msg - end - io.stdout:write(langname, ".", mtag, ":", trans, "\n") - language[langname][tag] = nil - end - end - end -end - -for _, langname in ipairs(sorted) do - for tag in pairs(language[langname]) do - io.stderr:write("*** language ", langname, " contains orphan tag ", tag, "\n") - end -end
\ No newline at end of file diff --git a/utils/messages.c b/utils/messages.c index e2d45e9da..418276ec3 100644 --- a/utils/messages.c +++ b/utils/messages.c @@ -42,116 +42,104 @@ /** Messages are stored in a fixed-size hash table. */ #define HASH_SIZE 101 -/** The hash table used to store the standard Messages file for the old API */ +/** + * The hash table used to store the standard Messages file for the old API + */ static struct hash_table *messages_hash = NULL; + /** - * process a line of input. + * Create a message context + * + * generate a message context populated with english fallbacks for + * some formatted messages. */ -static nserror -message_process_line(struct hash_table *hash, uint8_t *ln, int lnlen) +static struct hash_table *messages_create_ctx(int hash_size) { - uint8_t *value; - uint8_t *colon; - - /* empty or comment lines */ - if (ln[0] == 0 || ln[0] == '#') { - return NSERROR_OK; - } - - /* find first colon as key/value separator */ - for (colon = ln; colon < (ln + lnlen); colon++) { - if (*colon == ':') { - break; + struct hash_table *nctx; + const struct { + const char *key; + const char *value; + } fallback[] = { + { "LoginDescription", + "The site %s is requesting your username and password. " + "The realm is \"%s\""}, + { "PrivacyDescription", + "A privacy error occurred while communicating with %s this " + "may be a site configuration error or an attempt to steal " + "private information (passwords, messages or credit cards)"}, + { "TimeoutDescription", + "A connection to %s could not be established. The site may " + "be temporarily unavailable or too busy to respond."}, + { "FetchErrorDescription", + "An error occurred when connecting to %s"}, + { NULL, NULL} + }; + nctx = hash_create(hash_size); + + if (nctx != NULL) { + int floop; + for (floop = 0; fallback[floop].key != NULL; floop++) { + hash_add(nctx, + fallback[floop].key, + fallback[floop].value); } } - if (colon == (ln + lnlen)) { - /* no colon found */ - return NSERROR_INVALID; - } - *colon = 0; /* terminate key */ - value = colon + 1; + return nctx; +} - if (hash_add(hash, (char *)ln, (char *)value) == false) { - NSLOG(netsurf, INFO, "Unable to add %s:%s to hash table", ln, - value); - return NSERROR_INVALID; - } - return NSERROR_OK; +/** + * Free memory used by a messages hash. + * The context will not be valid after this function returns. + * + * \param ctx context of messages file to free + */ +static void messages_destroy_ctx(struct hash_table *ctx) +{ + if (ctx == NULL) + return; + + hash_destroy(ctx); } + /** * Read keys and values from messages file. * * \param path pathname of messages file - * \param ctx reference of hash table to merge with. + * \param ctx reference of hash table to merge with or NULL to create one. * \return NSERROR_OK on sucess and ctx updated or error code on faliure. */ static nserror messages_load_ctx(const char *path, struct hash_table **ctx) { - char s[400]; /* line buffer */ - gzFile fp; /* compressed file handle */ struct hash_table *nctx; /* new context */ + nserror res; - assert(path != NULL); - - fp = gzopen(path, "r"); - if (!fp) { - NSLOG(netsurf, INFO, - "Unable to open messages file \"%.100s\": %s", path, - strerror(errno)); - - return NSERROR_NOT_FOUND; - } - - if (*ctx == NULL) { - nctx = hash_create(HASH_SIZE); - } else { + if (*ctx != NULL) { /** * \note The passed hash is not copied here so this * updates in place. */ - nctx = *ctx; + return hash_add_file(*ctx, path); } + + nctx = messages_create_ctx(HASH_SIZE); if (nctx == NULL) { NSLOG(netsurf, INFO, "Unable to create hash table for messages file %s", path); - gzclose(fp); return NSERROR_NOMEM; } - while (gzgets(fp, s, sizeof s)) { - char *colon, *value; - - if (s[0] == 0 || s[0] == '#') - continue; - - s[strlen(s) - 1] = 0; /* remove \n at end */ - colon = strchr(s, ':'); - if (!colon) - continue; - *colon = 0; /* terminate key */ - value = colon + 1; - - if (hash_add(nctx, s, value) == false) { - NSLOG(netsurf, INFO, - "Unable to add %s:%s to hash table of %s", s, - value, path); - gzclose(fp); - if (*ctx == NULL) { - hash_destroy(nctx); - } - return NSERROR_INVALID; - } + res = hash_add_file(nctx, path); + if (res == NSERROR_OK) { + *ctx = nctx; + } else { + hash_destroy(nctx); } - gzclose(fp); - - *ctx = nctx; - - return NSERROR_OK; + return res; } @@ -185,113 +173,34 @@ messages_get_ctx(const char *key, struct hash_table *ctx) } -/** - * Free memory used by a messages hash. - * The context will not be valid after this function returns. - * - * \param ctx context of messages file to free - */ -static void messages_destroy_ctx(struct hash_table *ctx) -{ - if (ctx == NULL) - return; - - hash_destroy(ctx); -} - - /* exported interface documented in messages.h */ nserror messages_add_from_file(const char *path) { - nserror err; - if (path == NULL) { return NSERROR_BAD_PARAMETER; } NSLOG(netsurf, INFO, "Loading Messages from '%s'", path); - err = messages_load_ctx(path, &messages_hash); - - - return err; + return messages_load_ctx(path, &messages_hash); } /* exported interface documented in messages.h */ -nserror messages_add_from_inline(const uint8_t *data, size_t data_size) +nserror messages_add_from_inline(const uint8_t *data, size_t size) { - z_stream strm; - int ret; - uint8_t s[512]; /* line buffer */ - size_t used = 0; /* number of bytes in buffer in use */ - uint8_t *nl; - /* ensure the hash table is initialised */ if (messages_hash == NULL) { - messages_hash = hash_create(HASH_SIZE); + messages_hash = messages_create_ctx(HASH_SIZE); } if (messages_hash == NULL) { NSLOG(netsurf, INFO, "Unable to create hash table"); return NSERROR_NOMEM; } - - strm.zalloc = Z_NULL; - strm.zfree = Z_NULL; - strm.opaque = Z_NULL; - - strm.next_in = (uint8_t *)data; - strm.avail_in = data_size; - - ret = inflateInit2(&strm, 32 + MAX_WBITS); - if (ret != Z_OK) { - NSLOG(netsurf, INFO, "inflateInit returned %d", ret); - return NSERROR_INVALID; - } - - do { - strm.next_out = s + used; - strm.avail_out = sizeof(s) - used; - - ret = inflate(&strm, Z_NO_FLUSH); - if ((ret != Z_OK) && (ret != Z_STREAM_END)) { - break; - } - - used = sizeof(s) - strm.avail_out; - while (used > 0) { - /* find nl */ - for (nl = &s[0]; nl < &s[used]; nl++) { - if (*nl == '\n') { - break; - } - } - if (nl == &s[used]) { - /* no nl found */ - break; - } - /* found newline */ - *nl = 0; /* null terminate line */ - message_process_line(messages_hash, &s[0], nl - &s[0]); - memmove(&s[0], nl + 1, used - ((nl + 1) - &s[0]) ); - used -= ((nl +1) - &s[0]); - } - if (used == sizeof(s)) { - /* entire buffer used and no newline */ - NSLOG(netsurf, INFO, "Overlength line"); - used = 0; - } - } while (ret != Z_STREAM_END); - - inflateEnd(&strm); - - if (ret != Z_STREAM_END) { - NSLOG(netsurf, INFO, "inflate returned %d", ret); - return NSERROR_INVALID; - } - return NSERROR_OK; + return hash_add_inline(messages_hash, data, size); } + /* exported interface documented in messages.h */ char *messages_get_buff(const char *key, ...) { @@ -300,7 +209,17 @@ char *messages_get_buff(const char *key, ...) int buff_len = 0; va_list ap; - msg_fmt = messages_get_ctx(key, messages_hash); + assert(key != NULL); + + if (messages_hash == NULL) { + return NULL; + } + + msg_fmt = hash_get(messages_hash, key); + + if (msg_fmt == NULL) { + return NULL; + } va_start(ap, key); buff_len = vsnprintf(buff, buff_len, msg_fmt, ap); @@ -456,6 +375,22 @@ const char *messages_get_errorcode(nserror code) case NSERROR_UNKNOWN: /* Unknown error */ return messages_get_ctx("Unknown", messages_hash); + + case NSERROR_BAD_AUTH: + /* Authentication required */ + return messages_get_ctx("BadAuth", messages_hash); + + case NSERROR_BAD_REDIRECT: + /* To many redirects */ + return messages_get_ctx("TooManyRedirects", messages_hash); + + case NSERROR_BAD_CERTS: + /* Certificate chain verification failure */ + return messages_get_ctx("CertificateVerificationNeeded", messages_hash); + + case NSERROR_TIMEOUT: + /* Operation timed out */ + return messages_get_ctx("Timeout", messages_hash); } /* The switch has no default, so the compiler should tell us when we @@ -466,6 +401,63 @@ const char *messages_get_errorcode(nserror code) return messages_get_ctx("Unknown", messages_hash); } +/* exported function documented in utils/messages.h */ +const char *messages_get_sslcode(ssl_cert_err code) +{ + switch (code) { + case SSL_CERT_ERR_OK: + /* Nothing wrong with this certificate */ + return messages_get_ctx("SSLCertErrOk", messages_hash); + + case SSL_CERT_ERR_UNKNOWN: + /* Unknown error */ + return messages_get_ctx("SSLCertErrUnknown", messages_hash); + + case SSL_CERT_ERR_BAD_ISSUER: + /* Bad issuer */ + return messages_get_ctx("SSLCertErrBadIssuer", messages_hash); + + case SSL_CERT_ERR_BAD_SIG: + /* Bad signature on this certificate */ + return messages_get_ctx("SSLCertErrBadSig", messages_hash); + + case SSL_CERT_ERR_TOO_YOUNG: + /* This certificate is not yet valid */ + return messages_get_ctx("SSLCertErrTooYoung", messages_hash); + + case SSL_CERT_ERR_TOO_OLD: + /* This certificate is no longer valid */ + return messages_get_ctx("SSLCertErrTooOld", messages_hash); + + case SSL_CERT_ERR_SELF_SIGNED: + /* This certificate is self signed */ + return messages_get_ctx("SSLCertErrSelfSigned", messages_hash); + + case SSL_CERT_ERR_CHAIN_SELF_SIGNED: + /* This certificate chain is self signed */ + return messages_get_ctx("SSLCertErrChainSelfSigned", messages_hash); + + case SSL_CERT_ERR_REVOKED: + /* This certificate has been revoked */ + return messages_get_ctx("SSLCertErrRevoked", messages_hash); + + case SSL_CERT_ERR_HOSTNAME_MISMATCH: + /* Common name is invalid */ + return messages_get_ctx("SSLCertErrHostnameMismatch", messages_hash); + + case SSL_CERT_ERR_CERT_MISSING: + /* Common name is invalid */ + return messages_get_ctx("SSLCertErrCertMissing", messages_hash); + + } + + /* The switch has no default, so the compiler should tell us when we + * forget to add messages for new error codes. As such, we should + * never get here. + */ + assert(0); + return messages_get_ctx("Unknown", messages_hash); +} /* exported function documented in utils/messages.h */ void messages_destroy(void) @@ -473,4 +465,3 @@ void messages_destroy(void) messages_destroy_ctx(messages_hash); messages_hash = NULL; } - diff --git a/utils/messages.h b/utils/messages.h index 4024f7e77..5da35e4ad 100644 --- a/utils/messages.h +++ b/utils/messages.h @@ -16,7 +16,8 @@ * along with this program. If not, see <http://www.gnu.org/licenses/>. */ -/** \file +/** + * \file * Localised message support (interface). * * The messages module loads a file of keys and associated strings, and @@ -30,12 +31,13 @@ * file table. Use the _ctx versions of the functions to do this. */ -#ifndef _NETSURF_UTILS_MESSAGES_H_ -#define _NETSURF_UTILS_MESSAGES_H_ +#ifndef NETSURF_UTILS_MESSAGES_H_ +#define NETSURF_UTILS_MESSAGES_H_ #include <stdint.h> #include "utils/errors.h" +#include "netsurf/ssl_certs.h" /** * Read keys and values from messages file into the standard Messages hash. @@ -79,15 +81,22 @@ const char *messages_get(const char *key); const char *messages_get_errorcode(nserror code); /** + * lookup of a message by SSL error code from the standard Messages hash. + * + * \param code ssl error code + * \return message text + */ +const char *messages_get_sslcode(ssl_cert_err code); + +/** * Formatted message from a key in the global message hash. * - * \param key key of message + * \param key key of message * \param ... message parameters - * \return buffer containing formatted message text or NULL if memory - * is unavailable. The caller owns the returned buffer and is - * responsible for freeing it. + * \return buffer containing formatted message text or NULL if key is + * unavailable or memory allocation failed. The caller owns the + * returned buffer and is responsible for freeing it. */ - char *messages_get_buff(const char *key, ...); /** diff --git a/utils/nscolour.c b/utils/nscolour.c new file mode 100644 index 000000000..5a772b8a6 --- /dev/null +++ b/utils/nscolour.c @@ -0,0 +1,282 @@ +/* + * Copyright 2020 Michael Drake <tlsa@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * \file + * NetSurf UI colours (implementation). + * + * Builds common colours used throughout NetSurf's interface. + */ + +#include <stdio.h> +#include <assert.h> +#include <stddef.h> +#include <stdbool.h> + +#include "netsurf/inttypes.h" +#include "netsurf/plot_style.h" + +#include "utils/errors.h" +#include "utils/nscolour.h" +#include "desktop/system_colour.h" + +colour nscolours[NSCOLOUR__COUNT]; + +/** + * Set some colours up from a couple of system colour entries. + * + * \param[in] name_bg Name of choices string for background colour lookup. + * \param[in] name_fg Name of choices string for foreground colour lookup. + * \param[in] bg_num Numerator for background adjustment ratio. + * \param[in] bg_den Denominator for backfground adjustment ratio. + * \param[out] bg Returns the background colour. + * \param[out] bg_hover Returns the hovered background colour. + * \param[out] fg Returns the foreground colour. + * \param[out] fg_subtle Returns the subtle foreground colour. + * \param[out] fg_faded Returns the faded foreground colour. + * \param[out] fg_good Returns the good foreground colour. + * \param[out] fg_bad Returns the bad foreground colour. + * \param[out] border Returns the border colour. + */ +static nserror nscolour__get( + const char *name_bg, + const char *name_fg, + unsigned bg_num, + unsigned bg_den, + colour *bg, + colour *bg_hover, + colour *fg, + colour *fg_subtle, + colour *fg_faded, + colour *fg_good, + colour *fg_bad, + colour *border) +{ + nserror res; + bool dark_mode; + colour bg_sys; + + assert(name_bg != NULL); + assert(name_fg != NULL); + assert(bg != NULL); + assert(fg != NULL); + + /* user configured background colour */ + res = ns_system_colour_char(name_bg, &bg_sys); + if (res != NSERROR_OK) { + return res; + } + + /* user configured foreground colour */ + res = ns_system_colour_char(name_fg, fg); + if (res != NSERROR_OK) { + return res; + } + + /* if there is a valid background fraction apply it */ + if (bg_num < bg_den) { + *bg = mix_colour(bg_sys, *fg, 255 * bg_num / bg_den); + } else { + *bg = bg_sys; + } + + dark_mode = colour_lightness(*fg) > colour_lightness(*bg); + + if (bg_hover != NULL) { + *bg_hover = dark_mode ? + half_lighten_colour(*bg) : + half_darken_colour(*bg); + } + + if (fg_subtle != NULL) { + *fg_subtle = mix_colour(*fg, *bg, 255 * 25 / 32); + } + + if (fg_faded != NULL) { + *fg_faded = mix_colour(*fg, *bg, 255 * 20 / 32); + } + + if (fg_good != NULL) { + *fg_good = colour_engorge_component(*fg, !dark_mode, + PLOT_COLOUR_COMPONENT_GREEN); + } + + if (fg_bad != NULL) { + *fg_bad = colour_engorge_component(*fg, !dark_mode, + PLOT_COLOUR_COMPONENT_RED); + } + + if (border != NULL) { + *border = mix_colour(*fg, bg_sys, 255 * 8 / 32); + } + + return NSERROR_OK; +} + +/* Exported interface, documented in utils/nscolour.h */ +nserror nscolour_update(void) +{ + nserror res; + + res = nscolour__get("Window", "WindowText", 16, 16, + &nscolours[NSCOLOUR_WIN_EVEN_BG], + &nscolours[NSCOLOUR_WIN_EVEN_BG_HOVER], + &nscolours[NSCOLOUR_WIN_EVEN_FG], + &nscolours[NSCOLOUR_WIN_EVEN_FG_SUBTLE], + &nscolours[NSCOLOUR_WIN_EVEN_FG_FADED], + &nscolours[NSCOLOUR_WIN_EVEN_FG_GOOD], + &nscolours[NSCOLOUR_WIN_EVEN_FG_BAD], + &nscolours[NSCOLOUR_WIN_EVEN_BORDER]); + if (res != NSERROR_OK) { + return res; + } + + res = nscolour__get("Window", "WindowText", 15, 16, + &nscolours[NSCOLOUR_WIN_ODD_BG], + &nscolours[NSCOLOUR_WIN_ODD_BG_HOVER], + &nscolours[NSCOLOUR_WIN_ODD_FG], + &nscolours[NSCOLOUR_WIN_ODD_FG_SUBTLE], + &nscolours[NSCOLOUR_WIN_ODD_FG_FADED], + &nscolours[NSCOLOUR_WIN_ODD_FG_GOOD], + &nscolours[NSCOLOUR_WIN_ODD_FG_BAD], + &nscolours[NSCOLOUR_WIN_ODD_BORDER]); + if (res != NSERROR_OK) { + return res; + } + + res = nscolour__get("Highlight", "HighlightText", 16, 16, + &nscolours[NSCOLOUR_SEL_BG], + NULL, + &nscolours[NSCOLOUR_SEL_FG], + &nscolours[NSCOLOUR_SEL_FG_SUBTLE], + NULL, + NULL, + NULL, + NULL); + if (res != NSERROR_OK) { + return res; + } + + res = ns_system_colour_char("Scrollbar", + &nscolours[NSCOLOUR_SCROLL_WELL]); + if (res != NSERROR_OK) { + return res; + } + + res = nscolour__get("ButtonFace", "ButtonText", 16, 16, + &nscolours[NSCOLOUR_BUTTON_BG], + NULL, + &nscolours[NSCOLOUR_BUTTON_FG], + NULL, + NULL, + NULL, + NULL, + NULL); + if (res != NSERROR_OK) { + return res; + } + + nscolours[NSCOLOUR_TEXT_INPUT_BG] = + colour_to_bw_nearest(nscolours[NSCOLOUR_WIN_EVEN_BG]); + nscolours[NSCOLOUR_TEXT_INPUT_FG] = + colour_to_bw_nearest(nscolours[NSCOLOUR_WIN_EVEN_FG]); + nscolours[NSCOLOUR_TEXT_INPUT_FG_SUBTLE] = + blend_colour(nscolours[NSCOLOUR_TEXT_INPUT_BG], + nscolours[NSCOLOUR_TEXT_INPUT_FG]); + + return NSERROR_OK; +} + +/* Exported interface, documented in utils/nscolour.h */ +nserror nscolour_get_stylesheet(const char **stylesheet_out) +{ + static char buffer[640]; + int ret; + + assert(stylesheet_out != NULL); + + ret = snprintf(buffer, sizeof(buffer), + ".ns-odd-bg {\n" + "\tbackground-color: #%06"PRIx32";\n" + "}\n" + ".ns-odd-bg-hover {\n" + "\tbackground-color: #%06"PRIx32";\n" + "}\n" + ".ns-odd-fg {\n" + "\tcolor: #%06"PRIx32";\n" + "}\n" + ".ns-odd-fg-subtle {\n" + "\tcolor: #%06"PRIx32";\n" + "}\n" + ".ns-odd-fg-faded {\n" + "\tcolor: #%06"PRIx32";\n" + "}\n" + ".ns-odd-fg-good {\n" + "\tcolor: #%06"PRIx32";\n" + "}\n" + ".ns-odd-fg-bad {\n" + "\tcolor: #%06"PRIx32";\n" + "}\n" + ".ns-even-bg {\n" + "\tbackground-color: #%06"PRIx32";\n" + "}\n" + ".ns-even-bg-hover {\n" + "\tbackground-color: #%06"PRIx32";\n" + "}\n" + ".ns-even-fg {\n" + "\tcolor: #%06"PRIx32";\n" + "}\n" + ".ns-even-fg-subtle {\n" + "\tcolor: #%06"PRIx32";\n" + "}\n" + ".ns-even-fg-faded {\n" + "\tcolor: #%06"PRIx32";\n" + "}\n" + ".ns-even-fg-good {\n" + "\tcolor: #%06"PRIx32";\n" + "}\n" + ".ns-even-fg-bad {\n" + "\tcolor: #%06"PRIx32";\n" + "}\n" + ".ns-border {\n" + "\tborder-color: #%06"PRIx32";\n" + "}\n", + colour_rb_swap(nscolours[NSCOLOUR_WIN_ODD_BG]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_ODD_BG_HOVER]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_ODD_FG]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_ODD_FG_SUBTLE]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_ODD_FG_FADED]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_ODD_FG_GOOD]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_ODD_FG_BAD]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_EVEN_BG]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_EVEN_BG_HOVER]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_EVEN_FG]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_EVEN_FG_SUBTLE]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_EVEN_FG_FADED]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_EVEN_FG_GOOD]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_EVEN_FG_BAD]), + colour_rb_swap(nscolours[NSCOLOUR_WIN_EVEN_BORDER])); + assert(ret > 0 && (size_t)ret < sizeof(buffer)); + if (ret < 0 || (size_t)ret >= sizeof(buffer)) { + /* Error or buffer too small */ + return NSERROR_NOSPACE; + } + + *stylesheet_out = buffer; + return NSERROR_OK; +} diff --git a/utils/nscolour.h b/utils/nscolour.h new file mode 100644 index 000000000..855a39806 --- /dev/null +++ b/utils/nscolour.h @@ -0,0 +1,81 @@ +/* + * Copyright 2020 Michael Drake <tlsa@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** \file + * NetSurf UI colours (interface). + * + * Interface to acquire common colours used throughout NetSurf's interface. + */ + +#ifndef _NETSURF_UTILS_NSCOLOUR_H_ +#define _NETSURF_UTILS_NSCOLOUR_H_ + +#include "netsurf/types.h" + +/** + * NetSurf UI colour key. + */ +enum nscolour { + NSCOLOUR_WIN_ODD_BG, + NSCOLOUR_WIN_ODD_BG_HOVER, + NSCOLOUR_WIN_ODD_FG, + NSCOLOUR_WIN_ODD_FG_SUBTLE, + NSCOLOUR_WIN_ODD_FG_FADED, + NSCOLOUR_WIN_ODD_FG_GOOD, + NSCOLOUR_WIN_ODD_FG_BAD, + NSCOLOUR_WIN_ODD_BORDER, + NSCOLOUR_WIN_EVEN_BG, + NSCOLOUR_WIN_EVEN_BG_HOVER, + NSCOLOUR_WIN_EVEN_FG, + NSCOLOUR_WIN_EVEN_FG_SUBTLE, + NSCOLOUR_WIN_EVEN_FG_FADED, + NSCOLOUR_WIN_EVEN_FG_GOOD, + NSCOLOUR_WIN_EVEN_FG_BAD, + NSCOLOUR_WIN_EVEN_BORDER, + NSCOLOUR_TEXT_INPUT_BG, + NSCOLOUR_TEXT_INPUT_FG, + NSCOLOUR_TEXT_INPUT_FG_SUBTLE, + NSCOLOUR_SEL_BG, + NSCOLOUR_SEL_FG, + NSCOLOUR_SEL_FG_SUBTLE, + NSCOLOUR_SCROLL_WELL, + NSCOLOUR_BUTTON_BG, + NSCOLOUR_BUTTON_FG, + NSCOLOUR__COUNT, +}; + +/** + * NetSurf UI colour table. + */ +extern colour nscolours[]; + +/** + * Update the nscolour table from the current nsoptions. + * + * \return NSERROR_OK on success, or appropriate error otherwise. + */ +nserror nscolour_update(void); + +/** + * Get a pointer to a stylesheet for nscolours. + * + * \return NSERROR_OK on success, or appropriate error otherwise. + */ +nserror nscolour_get_stylesheet(const char **stylesheet_out); + +#endif diff --git a/utils/nsoption.c b/utils/nsoption.c index 09529a0d0..7306dee1b 100644 --- a/utils/nsoption.c +++ b/utils/nsoption.c @@ -32,6 +32,7 @@ #include <string.h> #include <strings.h> +#include "netsurf/inttypes.h" #include "netsurf/plot_style.h" #include "utils/errors.h" #include "utils/log.h" @@ -112,7 +113,7 @@ strtooption(const char *value, struct nsoption_s *option) break; case OPTION_COLOUR: - if (sscanf(value, "%x", &rgbcolour) == 1) { + if (sscanf(value, "%"SCNx32"", &rgbcolour) == 1) { option->value.c = (((0x000000FF & rgbcolour) << 16) | ((0x0000FF00 & rgbcolour) << 0) | ((0x00FF0000 & rgbcolour) >> 16)); @@ -323,7 +324,7 @@ nsoption_output(FILE *fp, rgbcolour = (((0x000000FF & opts[entry].value.c) << 16) | ((0x0000FF00 & opts[entry].value.c) << 0) | ((0x00FF0000 & opts[entry].value.c) >> 16)); - fprintf(fp, "%s:%06x\n", + fprintf(fp, "%s:%06"PRIx32"\n", opts[entry].key, rgbcolour); @@ -383,17 +384,20 @@ nsoption_output_value_html(struct nsoption_s *option, break; case OPTION_COLOUR: - rgbcolour = (((0x000000FF & option->value.c) << 16) | - ((0x0000FF00 & option->value.c) << 0) | - ((0x00FF0000 & option->value.c) >> 16)); + rgbcolour = colour_rb_swap(option->value.c); slen = snprintf(string + pos, size - pos, - "<span style=\"background-color: #%06x; " - "color: #%06x; " - "font-family:Monospace; \">#%06X</span>", + "<span style=\"font-family:Monospace;\">" + "#%06"PRIX32 + "</span> " + "<span style=\"background-color: #%06"PRIx32"; " + "border: 1px solid #%06"PRIx32"; " + "display: inline-block; " + "width: 1em; height: 1em;\">" + "</span>", rgbcolour, - colour_to_bw_furthest(rgbcolour), - rgbcolour); + rgbcolour, + colour_to_bw_furthest(rgbcolour)); break; case OPTION_STRING: @@ -456,7 +460,7 @@ nsoption_output_value_text(struct nsoption_s *option, rgbcolour = (((0x000000FF & option->value.c) << 16) | ((0x0000FF00 & option->value.c) << 0) | ((0x00FF0000 & option->value.c) >> 16)); - slen = snprintf(string + pos, size - pos, "%06x", rgbcolour); + slen = snprintf(string + pos, size - pos, "%06"PRIx32, rgbcolour); break; case OPTION_STRING: @@ -537,6 +541,89 @@ nsoption_free(struct nsoption_s *opts) } +/** + * extract key/value from a line of input + * + * \retun NSERROR_OK and key_out and value_out updated + * NSERROR_NOT_FOUND if not a key/value input line + * NSERROR_INVALID if the line is and invalid format (missing colon) + */ +static nserror +get_key_value(char *line, int linelen, char **key_out, char **value_out) +{ + char *key; + char *value; + + /* skip leading whitespace for start of key */ + for (key = line; *key != 0; key++) { + if ((*key != ' ') && (*key != '\t') && (*key != '\n')) { + break; + } + } + + /* empty line or only whitespace */ + if (*key == 0) { + return NSERROR_NOT_FOUND; + } + + /* comment */ + if (*key == '#') { + return NSERROR_NOT_FOUND; + } + + /* get start of value */ + for (value = key; *value != 0; value++) { + if (*value == ':') { + *value = 0; + value++; + break; + } + } + + /* missing colon separator */ + if (*value == 0) { + return NSERROR_INVALID; + } + + /* remove delimiter from value */ + if (line[linelen - 1] == '\n') { + linelen--; + line[linelen] = 0; + } + + *key_out = key; + *value_out = value; + return NSERROR_OK; +} + + +/** + * Process a line from a user option file + */ +static nserror optionline(struct nsoption_s *opts, char *line, int linelen) +{ + nserror res; + char *key; + char *value; + int idx; + + res = get_key_value(line, linelen, &key, &value); + if (res != NSERROR_OK) { + /* skip line as no valid key value pair found */ + return res; + } + + for (idx = 0; opts[idx].key != NULL; idx++) { + if (strcasecmp(key, opts[idx].key) == 0) { + strtooption(value, &opts[idx]); + break; + } + } + + return res; +} + + /* exported interface documented in utils/nsoption.h */ nserror nsoption_init(nsoption_set_default_t *set_defaults, @@ -642,7 +729,9 @@ nsoption_read(const char *path, struct nsoption_s *opts) opts = nsoptions; } - /** @todo is this and API bug not being a parameter */ + /** + * @todo is this an API bug not being a parameter + */ defs = nsoptions_default; if ((opts == NULL) || (defs == NULL)) { @@ -655,34 +744,10 @@ nsoption_read(const char *path, struct nsoption_s *opts) return NSERROR_NOT_FOUND; } - NSLOG(netsurf, INFO, "Successfully opened '%s' for Options file", - path); + NSLOG(netsurf, INFO, "Successfully opened '%s' for Options file", path); while (fgets(s, NSOPTION_MAX_LINE_LEN, fp)) { - char *colon, *value; - unsigned int idx; - - if ((s[0] == 0) || (s[0] == '#')) { - continue; - } - - colon = strchr(s, ':'); - if (colon == 0) { - continue; - } - - s[strlen(s) - 1] = 0; /* remove \n at end */ - *colon = 0; /* terminate key */ - value = colon + 1; - - for (idx = 0; opts[idx].key != NULL; idx++) { - if (strcasecmp(s, opts[idx].key) != 0) { - continue; - } - - strtooption(value, &opts[idx]); - break; - } + optionline(opts, s, strlen(s)); } fclose(fp); diff --git a/utils/nsurl.h b/utils/nsurl.h index f97562bf0..fcae12e0c 100644 --- a/utils/nsurl.h +++ b/utils/nsurl.h @@ -30,6 +30,16 @@ /** NetSurf URL object */ typedef struct nsurl nsurl; +/** A type for URL schemes */ +enum nsurl_scheme_type { + NSURL_SCHEME_OTHER, + NSURL_SCHEME_HTTP, + NSURL_SCHEME_HTTPS, + NSURL_SCHEME_FILE, + NSURL_SCHEME_FTP, + NSURL_SCHEME_MAILTO, + NSURL_SCHEME_DATA +}; typedef enum nsurl_component { NSURL_SCHEME = (1 << 0), @@ -146,6 +156,15 @@ lwc_string *nsurl_get_component(const nsurl *url, nsurl_component part); /** + * Get the scheme type from a NetSurf URL object + * + * \param url NetSurf URL object + * \return The URL scheme type. + */ +enum nsurl_scheme_type nsurl_get_scheme_type(const nsurl *url); + + +/** * Enquire about the existence of componenets in a given URL * * \param url NetSurf URL object @@ -181,6 +200,24 @@ const char *nsurl_access(const nsurl *url); /** + * Variant of \ref nsurl_access for logging. + * + * \param url NetSurf URL to retrieve a string pointer for. + * \return the required string + * + * This will not necessarily return the actual nsurl's URL, but something + * that is suitable for recording to logs. E.g. URLs with the `data` scheme + * will return a simple place holder, to avoid repeatedly dumping loads of data. + * + * The returned string is owned by the NetSurf URL object. It will die + * with the NetSurf URL object. Keep a reference to the URL if you need it. + * + * The returned string has a trailing '\0'. + */ +const char *nsurl_access_log(const nsurl *url); + + +/** * Get a UTF-8 string (for human readable IDNs) from a NetSurf URL object * * \param url NetSurf URL object @@ -295,12 +332,34 @@ nserror nsurl_refragment(const nsurl *url, lwc_string *frag, nsurl **new_url); * the created object. * * Any query component in url is replaced with query in new_url. + * + * Passing the empty string as a replacement will result in the query + * component being removed. */ nserror nsurl_replace_query(const nsurl *url, const char *query, nsurl **new_url); /** + * Create a NetSurf URL object, with scheme replaced + * + * \param url NetSurf URL to create new NetSurf URL from + * \param scheme Scheme to use + * \param new_url Returns new NetSurf URL with scheme provided + * \return NSERROR_OK on success, appropriate error otherwise + * + * If return value != NSERROR_OK, nothing will be returned in new_url. + * + * It is up to the client to call nsurl_unref when they are finished with + * the created object. + * + * Any scheme component in url is replaced with scheme in new_url. + */ +nserror nsurl_replace_scheme(const nsurl *url, lwc_string *scheme, + nsurl **new_url); + + +/** * Attempt to find a nice filename for a URL. * * \param url A NetSurf URL object to create a filename from @@ -329,4 +388,14 @@ nserror nsurl_nice(const nsurl *url, char **result, bool remove_extensions); */ nserror nsurl_parent(const nsurl *url, nsurl **new_url); +/** + * Dump a NetSurf URL's internal components to stderr + * + * This is helper functionality for developers, and shouldn't be called + * generally. + * + * \param url The NetSurf URL to dump components of + */ +void nsurl_dump(const nsurl *url); + #endif diff --git a/utils/nsurl/nsurl.c b/utils/nsurl/nsurl.c index 3b0af9328..63619af15 100644 --- a/utils/nsurl/nsurl.c +++ b/utils/nsurl/nsurl.c @@ -36,6 +36,7 @@ #include <stdlib.h> #include <string.h> #include <strings.h> +#include <inttypes.h> #include "utils/ascii.h" #include "utils/corestrings.h" @@ -54,7 +55,7 @@ * Does nothing if the components are the same, so ensure match is * preset to true. */ -#define nsurl__component_compare(c1, c2, match) \ +#define nsurl__component_compare(c1, c2, match) \ if (c1 && c2 && lwc_error_ok == \ lwc_string_isequal(c1, c2, match)) { \ /* do nothing */ \ @@ -88,10 +89,6 @@ void nsurl_unref(nsurl *url) if (--url->count > 0) return; -#ifdef NSURL_DEBUG - nsurl__dump(url); -#endif - /* Release lwc strings */ nsurl__components_destroy(&url->components); @@ -240,6 +237,15 @@ lwc_string *nsurl_get_component(const nsurl *url, nsurl_component part) /* exported interface, documented in nsurl.h */ +enum nsurl_scheme_type nsurl_get_scheme_type(const nsurl *url) +{ + assert(url != NULL); + + return url->components.scheme_type; +} + + +/* exported interface, documented in nsurl.h */ bool nsurl_has_component(const nsurl *url, nsurl_component part) { assert(url != NULL); @@ -316,6 +322,19 @@ const char *nsurl_access(const nsurl *url) /* exported interface, documented in nsurl.h */ +const char *nsurl_access_log(const nsurl *url) +{ + assert(url != NULL); + + if (url->components.scheme_type == NSURL_SCHEME_DATA) { + return "[data url]"; + } + + return url->string; +} + + +/* exported interface, documented in nsurl.h */ nserror nsurl_get_utf8(const nsurl *url, char **url_s, size_t *url_l) { nserror err; @@ -355,7 +374,7 @@ nserror nsurl_get_utf8(const nsurl *url, char **url_s, size_t *url_l) } *url_l = scheme_len + idna_host_len + path_len + 1; /* +1 for \0 */ - *url_s = malloc(*url_l); + *url_s = malloc(*url_l); if (*url_s == NULL) { err = NSERROR_NOMEM; @@ -565,57 +584,67 @@ nserror nsurl_refragment(const nsurl *url, lwc_string *frag, nsurl **new_url) nserror nsurl_replace_query(const nsurl *url, const char *query, nsurl **new_url) { - int query_len; /* Length of new query string, including '?' */ - int frag_len = 0; /* Length of fragment, including '#' */ - int base_len; /* Length of URL up to start of query */ - char *pos; - size_t len; - lwc_string *lwc_query; + int query_len; /* Length of new query string excluding '?' */ + int frag_len = 0; /* Length of fragment, excluding '#' */ + int base_len; /* Length of URL up to start of query */ + char *pos; /* current position in output string */ + size_t length; /* new url string length */ + lwc_string *lwc_query = NULL; assert(url != NULL); assert(query != NULL); - assert(query[0] == '?'); - /* Get the length of the new query */ - query_len = strlen(query); + length = query_len = strlen(query); + if (query_len > 0) { + length++; /* allow for '?' */ + + /* intern string */ + if (lwc_intern_string(query, + query_len, + &lwc_query) != lwc_error_ok) { + return NSERROR_NOMEM; + } + } /* Find the change in length from url to new_url */ base_len = url->length; if (url->components.query != NULL) { - base_len -= lwc_string_length(url->components.query); + base_len -= (1 + lwc_string_length(url->components.query)); } if (url->components.fragment != NULL) { - frag_len = 1 + lwc_string_length(url->components.fragment); - base_len -= frag_len; + frag_len = lwc_string_length(url->components.fragment); + base_len -= (1 + frag_len); + length += frag_len + 1; /* allow for '#' */ } - /* Set new_url's length */ - len = base_len + query_len + frag_len; + /* compute new url string length */ + length += base_len; /* Create NetSurf URL object */ - *new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */ + *new_url = malloc(sizeof(nsurl) + length + 1); /* Add 1 for \0 */ if (*new_url == NULL) { + if (query_len > 0) { + lwc_string_unref(lwc_query); + } return NSERROR_NOMEM; } - if (lwc_intern_string(query, query_len, &lwc_query) != lwc_error_ok) { - free(*new_url); - return NSERROR_NOMEM; - } - - (*new_url)->length = len; + (*new_url)->length = length; /* Set string */ pos = (*new_url)->string; memcpy(pos, url->string, base_len); pos += base_len; - memcpy(pos, query, query_len); - pos += query_len; + if (query_len > 0) { + *pos = '?'; + memcpy(++pos, query, query_len); + pos += query_len; + } if (url->components.fragment != NULL) { const char *frag = lwc_string_data(url->components.fragment); *pos = '#'; - memcpy(++pos, frag, frag_len - 1); - pos += frag_len - 1; + memcpy(++pos, frag, frag_len); + pos += frag_len; } *pos = '\0'; @@ -648,6 +677,93 @@ nserror nsurl_replace_query(const nsurl *url, const char *query, } +/* exported interface, documented in nsurl.h */ +nserror nsurl_replace_scheme(const nsurl *url, lwc_string *scheme, + nsurl **new_url) +{ + int scheme_len; + int base_len; + char *pos; + size_t len; + bool match; + + assert(url != NULL); + assert(scheme != NULL); + + /* Get the length of the new scheme */ + scheme_len = lwc_string_length(scheme); + + /* Find the change in length from url to new_url */ + base_len = url->length; + if (url->components.scheme != NULL) { + base_len -= lwc_string_length(url->components.scheme); + } + + /* Set new_url's length */ + len = base_len + scheme_len; + + /* Create NetSurf URL object */ + *new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */ + if (*new_url == NULL) { + return NSERROR_NOMEM; + } + + (*new_url)->length = len; + + /* Set string */ + pos = (*new_url)->string; + memcpy(pos, lwc_string_data(scheme), scheme_len); + memcpy(pos + scheme_len, + url->string + url->length - base_len, base_len); + pos[len] = '\0'; + + /* Copy components */ + (*new_url)->components.scheme = lwc_string_ref(scheme); + (*new_url)->components.username = + nsurl__component_copy(url->components.username); + (*new_url)->components.password = + nsurl__component_copy(url->components.password); + (*new_url)->components.host = + nsurl__component_copy(url->components.host); + (*new_url)->components.port = + nsurl__component_copy(url->components.port); + (*new_url)->components.path = + nsurl__component_copy(url->components.path); + (*new_url)->components.query = + nsurl__component_copy(url->components.query); + (*new_url)->components.fragment = + nsurl__component_copy(url->components.fragment); + + /* Compute new scheme type */ + if (lwc_string_caseless_isequal(scheme, corestring_lwc_http, + &match) == lwc_error_ok && match == true) { + (*new_url)->components.scheme_type = NSURL_SCHEME_HTTP; + } else if (lwc_string_caseless_isequal(scheme, corestring_lwc_https, + &match) == lwc_error_ok && match == true) { + (*new_url)->components.scheme_type = NSURL_SCHEME_HTTPS; + } else if (lwc_string_caseless_isequal(scheme, corestring_lwc_file, + &match) == lwc_error_ok && match == true) { + (*new_url)->components.scheme_type = NSURL_SCHEME_FILE; + } else if (lwc_string_caseless_isequal(scheme, corestring_lwc_ftp, + &match) == lwc_error_ok && match == true) { + (*new_url)->components.scheme_type = NSURL_SCHEME_FTP; + } else if (lwc_string_caseless_isequal(scheme, corestring_lwc_mailto, + &match) == lwc_error_ok && match == true) { + (*new_url)->components.scheme_type = NSURL_SCHEME_MAILTO; + } else { + (*new_url)->components.scheme_type = NSURL_SCHEME_OTHER; + } + + /* Get the nsurl's hash */ + nsurl__calc_hash(*new_url); + + /* Give the URL a reference */ + (*new_url)->count = 1; + + return NSERROR_OK; +} + + /* exported interface documented in utils/nsurl.h */ nserror nsurl_nice(const nsurl *url, char **result, bool remove_extensions) { @@ -809,7 +925,7 @@ nserror nsurl_parent(const nsurl *url, nsurl **new_url) } else if (old_path_len == new_path_len) { lwc_path = lwc_string_ref(url->components.path); } else { - if (lwc_intern_string(path, old_path_len - new_path_len, + if (lwc_intern_string(path, new_path_len, &lwc_path) != lwc_error_ok) { free(*new_url); return NSERROR_NOMEM; @@ -850,3 +966,35 @@ nserror nsurl_parent(const nsurl *url, nsurl **new_url) return NSERROR_OK; } +/* exported interface, documented in nsurl.h */ +void nsurl_dump(const nsurl *url) +{ + fprintf(stderr, "nsurl components for %p " + "(refs: %i hash: %"PRIx32"):\n", + url, url->count, url->hash); + + if (url->components.scheme) + fprintf(stderr, " Scheme: %s\n", + lwc_string_data(url->components.scheme)); + if (url->components.username) + fprintf(stderr, "Username: %s\n", + lwc_string_data(url->components.username)); + if (url->components.password) + fprintf(stderr, "Password: %s\n", + lwc_string_data(url->components.password)); + if (url->components.host) + fprintf(stderr, " Host: %s\n", + lwc_string_data(url->components.host)); + if (url->components.port) + fprintf(stderr, " Port: %s\n", + lwc_string_data(url->components.port)); + if (url->components.path) + fprintf(stderr, " Path: %s\n", + lwc_string_data(url->components.path)); + if (url->components.query) + fprintf(stderr, " Query: %s\n", + lwc_string_data(url->components.query)); + if (url->components.fragment) + fprintf(stderr, "Fragment: %s\n", + lwc_string_data(url->components.fragment)); +} diff --git a/utils/nsurl/parse.c b/utils/nsurl/parse.c index ce6f4435d..dbf0c6e2c 100644 --- a/utils/nsurl/parse.c +++ b/utils/nsurl/parse.c @@ -37,6 +37,8 @@ #include <string.h> #include <strings.h> +#include "netsurf/inttypes.h" + #include "utils/ascii.h" #include "utils/corestrings.h" #include "utils/errors.h" @@ -302,6 +304,16 @@ static void nsurl__get_string_markers(const char * const url_s, ((*(pos - off + 5) == 'o') || (*(pos - off + 5) == 'O')))) { marker.scheme_type = NSURL_SCHEME_MAILTO; + } else if (off == SLEN("data") && + (((*(pos - off + 0) == 'd') || + (*(pos - off + 0) == 'D')) && + ((*(pos - off + 1) == 'a') || + (*(pos - off + 1) == 'A')) && + ((*(pos - off + 2) == 't') || + (*(pos - off + 2) == 'T')) && + ((*(pos - off + 3) == 'a') || + (*(pos - off + 3) == 'A')))) { + marker.scheme_type = NSURL_SCHEME_DATA; } /* Skip over colon */ @@ -332,7 +344,7 @@ static void nsurl__get_string_markers(const char * const url_s, * and in the case of mailto: when we assume there is an authority. */ if ((*pos == '/' && *(pos + 1) == '/') || - (is_http && ((joining && *pos == '/') || + (is_http && ((joining && *pos == '/') || (joining == false && marker.scheme_end != marker.start))) || marker.scheme_type == NSURL_SCHEME_MAILTO) { @@ -450,21 +462,29 @@ static void nsurl__get_string_markers(const char * const url_s, marker.fragment = marker.end; } -#ifdef NSURL_DEBUG - NSLOG(netsurf, INFO, "marker.start: %i", marker.start); - NSLOG(netsurf, INFO, "marker.scheme_end: %i", marker.scheme_end); - NSLOG(netsurf, INFO, "marker.authority: %i", marker.authority); - - NSLOG(netsurf, INFO, "marker.colon_first: %i", marker.colon_first); - NSLOG(netsurf, INFO, "marker.at: %i", marker.at); - NSLOG(netsurf, INFO, "marker.colon_last: %i", marker.colon_last); - - NSLOG(netsurf, INFO, "marker.path: %i", marker.path); - NSLOG(netsurf, INFO, "marker.query: %i", marker.query); - NSLOG(netsurf, INFO, "marker.fragment: %i", marker.fragment); - - NSLOG(netsurf, INFO, "marker.end: %i", marker.end); -#endif + NSLOG(netsurf, DEEPDEBUG, + "marker.start: %"PRIsizet, marker.start); + NSLOG(netsurf, DEEPDEBUG, + "marker.scheme_end: %"PRIsizet, marker.scheme_end); + NSLOG(netsurf, DEEPDEBUG, + "marker.authority: %"PRIsizet, marker.authority); + + NSLOG(netsurf, DEEPDEBUG, + "marker.colon_first: %"PRIsizet, marker.colon_first); + NSLOG(netsurf, DEEPDEBUG, + "marker.at: %"PRIsizet, marker.at); + NSLOG(netsurf, DEEPDEBUG, + "marker.colon_last: %"PRIsizet, marker.colon_last); + + NSLOG(netsurf, DEEPDEBUG, + "marker.path: %"PRIsizet, marker.path); + NSLOG(netsurf, DEEPDEBUG, + "marker.query: %"PRIsizet, marker.query); + NSLOG(netsurf, DEEPDEBUG, + "marker.fragment: %"PRIsizet, marker.fragment); + + NSLOG(netsurf, DEEPDEBUG, + "marker.end: %"PRIsizet, marker.end); /* Got all the URL components pegged out now */ *markers = marker; @@ -484,10 +504,10 @@ static size_t nsurl__remove_dot_segments(char *path, char *output) char *output_pos = output; while (*path_pos != '\0') { -#ifdef NSURL_DEBUG - NSLOG(netsurf, INFO, " in:%s", path_pos); - NSLOG(netsurf, INFO, "out:%.*s", output_pos - output, output); -#endif + NSLOG(netsurf, DEEPDEBUG, " in:%s", path_pos); + NSLOG(netsurf, DEEPDEBUG, "out:%.*s", + (int)(output_pos - output), output); + if (*path_pos == '.') { if (*(path_pos + 1) == '.' && *(path_pos + 2) == '/') { @@ -557,7 +577,7 @@ static size_t nsurl__remove_dot_segments(char *path, char *output) /* Copy up to but not including next '/' */ while ((*path_pos != '/') && (*path_pos != '\0')) - *output_pos++ = *path_pos++; + *output_pos++ = *path_pos++; } return output_pos - output; @@ -651,7 +671,9 @@ static nserror nsurl__create_from_section(const char * const url_s, break; case URL_QUERY: - start = pegs->query; + start = (*(url_s + pegs->query) != '?') ? + pegs->query : + pegs->query + 1; end = pegs->fragment; break; @@ -1065,6 +1087,15 @@ static void nsurl__get_string_data(const struct nsurl_components *url, *url_l += SLEN("@"); } + /* spanned query question mark */ + if ((flags & ~(NSURL_F_QUERY | NSURL_F_FRAGMENT)) && + (flags & NSURL_F_QUERY)) { + flags |= NSURL_F_QUERY_PUNCTUATION; + + *url_l += SLEN("?"); + } + + /* spanned fragment hash mark */ if ((flags & ~NSURL_F_FRAGMENT) && (flags & NSURL_F_FRAGMENT)) { flags |= NSURL_F_FRAGMENT_PUNCTUATION; @@ -1138,6 +1169,8 @@ static void nsurl__get_string(const struct nsurl_components *url, char *url_s, } if (flags & NSURL_F_QUERY) { + if (flags & NSURL_F_QUERY_PUNCTUATION) + *(pos++) = '?'; memcpy(pos, lwc_string_data(url->query), l->query); pos += l->query; } @@ -1283,7 +1316,7 @@ nserror nsurl_create(const char * const url_s, nsurl **url) } e = nsurl__components_to_string(&c, NSURL_WITH_FRAGMENT, - sizeof(nsurl), (char **)url, &length); + offsetof(nsurl, string), (char **)url, &length); if (e != NSERROR_OK) { return e; } @@ -1323,10 +1356,8 @@ nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined) assert(base != NULL); assert(rel != NULL); -#ifdef NSURL_DEBUG - NSLOG(netsurf, INFO, "base: \"%s\", rel: \"%s\"", nsurl_access(base), - rel); -#endif + NSLOG(netsurf, DEEPDEBUG, "base: \"%s\", rel: \"%s\"", + nsurl_access(base), rel); /* Peg out the URL sections */ nsurl__get_string_markers(rel, &m, true); @@ -1523,7 +1554,7 @@ nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined) } error = nsurl__components_to_string(&c, NSURL_WITH_FRAGMENT, - sizeof(nsurl), (char **)joined, &length); + offsetof(nsurl, string), (char **)joined, &length); if (error != NSERROR_OK) { return error; } @@ -1539,4 +1570,3 @@ nserror nsurl_join(const nsurl *base, const char *rel, nsurl **joined) return NSERROR_OK; } - diff --git a/utils/nsurl/private.h b/utils/nsurl/private.h index bc6737cd6..b099a451c 100644 --- a/utils/nsurl/private.h +++ b/utils/nsurl/private.h @@ -25,21 +25,6 @@ #include "utils/utils.h" -/* Define to enable NSURL debugging */ -#undef NSURL_DEBUG - - -/** A type for URL schemes */ -enum nsurl_scheme_type { - NSURL_SCHEME_OTHER, - NSURL_SCHEME_HTTP, - NSURL_SCHEME_HTTPS, - NSURL_SCHEME_FILE, - NSURL_SCHEME_FTP, - NSURL_SCHEME_MAILTO -}; - - /** * nsurl components * @@ -108,9 +93,10 @@ enum nsurl_string_flags { NSURL_F_HOST | NSURL_F_PORT), NSURL_F_PATH = (1 << 8), - NSURL_F_QUERY = (1 << 9), - NSURL_F_FRAGMENT_PUNCTUATION = (1 << 10), - NSURL_F_FRAGMENT = (1 << 11) + NSURL_F_QUERY_PUNCTUATION = (1 << 9), + NSURL_F_QUERY = (1 << 10), + NSURL_F_FRAGMENT_PUNCTUATION = (1 << 11), + NSURL_F_FRAGMENT = (1 << 12) }; /** @@ -176,57 +162,4 @@ static inline void nsurl__components_destroy(struct nsurl_components *c) lwc_string_unref(c->fragment); } - - -#ifdef NSURL_DEBUG -/** - * Dump a NetSurf URL's internal components - * - * \param url The NetSurf URL to dump components of - */ -static inline void nsurl__dump(const nsurl *url) -{ - if (url->components.scheme) - NSLOG(netsurf, INFO,netsurf, INFO, - " Scheme: %s", - lwc_string_data(url->components.scheme)); - - if (url->components.username) - NSLOG(netsurf, INFO, - "Username: %s", - lwc_string_data(url->components.username)); - - if (url->components.password) - NSLOG(netsurf, INFO, - "Password: %s", - lwc_string_data(url->components.password)); - - if (url->components.host) - NSLOG(netsurf, INFO, - " Host: %s", - lwc_string_data(url->components.host)); - - if (url->components.port) - NSLOG(netsurf, INFO, - " Port: %s", - lwc_string_data(url->components.port)); - - if (url->components.path) - NSLOG(netsurf, INFO, - " Path: %s", - lwc_string_data(url->components.path)); - - if (url->components.query) - NSLOG(netsurf, INFO, - " Query: %s", - lwc_string_data(url->components.query)); - - if (url->components.fragment) - NSLOG(netsurf, INFO, - "Fragment: %s", - lwc_string_data(url->components.fragment)); -} -#endif - - #endif diff --git a/utils/regex.h b/utils/regex.h new file mode 100644 index 000000000..a415239d6 --- /dev/null +++ b/utils/regex.h @@ -0,0 +1,63 @@ +/* + * Copyright 2019 Vincent Sanders <vince@netxurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef NETSURF_UTILS_REGEX_H_ +#define NETSURF_UTILS_REGEX_H_ + +#include "utils/config.h" + +#ifdef HAVE_REGEX +#include <sys/types.h> +#include <regex.h> +#else + +#define REG_NOMATCH 1 + +#define REG_EXTENDED 1 +#define REG_ICASE (1 << 1) +#define REG_NEWLINE (1 << 2) +#define REG_NOSUB (1 << 3) + +typedef ssize_t regoff_t; + +typedef struct { + size_t re_nsub; /* Number of parenthesized subexpressions.*/ +} regex_t; + + +typedef struct { + regoff_t rm_so; /* Byte offset from start of string to start + * of substring. + */ + regoff_t rm_eo; /* Byte offset from start of string of the + * first character after the end of substring. + */ +} regmatch_t; + + +int regcomp(regex_t *restrict preg, const char *restrictregex, int cflags); + +size_t regerror(int errorcode, const regex_t *restrict preg, char *restrict errbuf, size_t errbuf_size); + +int regexec(const regex_t *restrict preg, const char *restrict string, size_t nmatch, regmatch_t pmatch[restrict], int eflags); + +void regfree(regex_t *preg); + +#endif + +#endif diff --git a/utils/ring.h b/utils/ring.h index 1802b4484..55880a695 100644 --- a/utils/ring.h +++ b/utils/ring.h @@ -66,24 +66,6 @@ /** Find the element (by hostname) in the given ring, leave it in the * provided element variable */ -#define RING_FINDBYHOST(ring, element, hostname) \ - /*LOG("RING_FINDBYHOST(%s, %s)", #ring, hostname);*/ \ - if (ring) { \ - bool found = false; \ - element = ring; \ - do { \ - if (strcasecmp(element->host, hostname) == 0) { \ - found = true; \ - break; \ - } \ - element = element->r_next; \ - } while (element != ring); \ - if (!found) element = 0; \ - } else element = 0 - -/** Find the element (by hostname) in the given ring, leave it in the - * provided element variable - */ #define RING_FINDBYLWCHOST(ring, element, lwc_hostname) \ /*LOG("RING_FINDBYHOST(%s, %s)", #ring, hostname);*/ \ if (ring) { \ @@ -112,19 +94,6 @@ } while (p != ring); \ } else sizevar = 0 -/** Count the number of elements in the ring which match the provided hostname */ -#define RING_COUNTBYHOST(ringtype, ring, sizevar, hostname) \ - /*LOG("RING_COUNTBYHOST(%s, %s)", #ring, hostname);*/ \ - if (ring) { \ - ringtype *p = ring; \ - sizevar = 0; \ - do { \ - if (strcasecmp(p->host, hostname) == 0) \ - sizevar++; \ - p = p->r_next; \ - } while (p != ring); \ - } else sizevar = 0 - /** Count the number of elements in the ring which match the provided lwc_hostname */ #define RING_COUNTBYLWCHOST(ringtype, ring, sizevar, lwc_hostname) \ /*LOG("RING_COUNTBYHOST(%s, %s)", #ring, hostname);*/ \ @@ -136,7 +105,7 @@ /* nsurl guarantees lowercase host */ \ if (lwc_string_isequal(p->host, lwc_hostname, \ &matches) == lwc_error_ok) \ - if (matches) \ + if (matches) \ sizevar++; \ p = p->r_next; \ } while (p != ring); \ diff --git a/utils/split-messages.pl b/utils/split-messages.pl deleted file mode 100644 index 570ae03ca..000000000 --- a/utils/split-messages.pl +++ /dev/null @@ -1,303 +0,0 @@ -#!/usr/bin/perl -# -# Copyright 2013 Vivek Dasmohapatra <vivek@collabora.co.uk> -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to deal -# in the Software without restriction, including without limitation the rights -# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -# copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# * The above copyright notice and this permission notice shall be included in -# all copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THE SOFTWARE. - -=head1 - -Filter the NetSurf combined messages (i10n) file according to language -and platform and generate output in a selection of formats for use -both internally within netsurf and externally for translation -services. - -=cut - -use strict; - -use Getopt::Long (); -use Fcntl qw( O_CREAT O_EXCL O_WRONLY O_APPEND O_RDONLY O_WRONLY ); - -use constant GETOPT_OPTS => qw( auto_abbrev no_getopt_compat bundling ); -use constant GETOPT_SPEC => - qw( output|o=s - input|i=s - lang|l=s - dlang|d=s - plat|platform|p=s - format|fmt|f=s - warning|W=s - help|h|? ); - -# default option values: -my %opt = qw( dlang en plat any format messages warning none ); - -sub input_stream (); -sub output_stream (); -sub formatter (); -sub static_section($); -sub usage (); - -sub main () -{ - my $input; - my $output; - my $format; - my $header; - my $footer; - my $opt_ok; - - # option parsing: - Getopt::Long::Configure( GETOPT_OPTS ); - $opt_ok = Getopt::Long::GetOptions( \%opt, GETOPT_SPEC ); - - # allow input and output to be specified as non-option arguments: - if( @ARGV ) { $opt{input } ||= shift( @ARGV ) } - if( @ARGV ) { $opt{output} ||= shift( @ARGV ) } - - # open the appropriate streams and get the formatter and headers: - if( $opt_ok ) - { - $input = input_stream(); - $output = output_stream(); - $format = formatter(); - $header = static_section('header'); - $footer = static_section('footer'); - } - - # double check the options are sane (and we weren't asked for the help) - if( !$opt_ok || $opt{help} || $opt{lang} !~ /^[a-z]{2}$/ || $opt{dlang} !~ /^[a-z]{2}$/ ) - { - usage(); - } - - # we are good to go: - print( $output $header ); - - my $cur_key; - - my $dlang_key; - my $dlang_val; - - my $tran_out = 1; - my $tran_val; - my $tran_key; - - while (<$input>) - { - # skip comment and empty lines - /^#/ && next; - /^\s*$/ && next; - - # only parsing things that look like message lines: - if( /^([a-z]{2}).([^.]+).([^:]+):(.*)/ ) - { - my( $lang, $plat, $key, $val ) = ( $1, $2, $3, $4 ); - - # skip the line if it is not for our target platform - if( $opt{plat} ne 'any' && - $opt{plat} ne $plat && - 'all' ne $plat ) - { - next; - } - - # On key change ensure a translation has been generated - if ($cur_key ne $key) - { - if ($tran_out == 0) - { - # No translaton for previous key - if ($cur_key eq $dlang_key) - { - print( $output $format->( $dlang_key, $dlang_val ) ); - if( $opt{warning} eq "fb" ) - { - warn( "warning: $dlang_key missing translation in $opt{lang} using $opt{dlang} instead" ); - } - } - else - { - # No translation and nothing in default language - warn( "warning: $dlang_key missing translation in $opt{lang} and no fallback in $opt{dlang}" ); - } - } - else - { - if (($opt{dlang} ne $opt{lang} ) && ($tran_key eq $dlang_key) && ($tran_val eq $dlang_val)) - { - if( $opt{warning} eq "dup" ) - { - warn( "warning: $tran_key value in $opt{lang} is same as in default $opt{dlang}" ); - } - } - } - $cur_key = $key; - $tran_out = 0; - } - - # capture the key/value in the default language - if( $lang eq $opt{dlang} ) - { - $dlang_key = $key; - $dlang_val = $val; - } - - # output if its the target language - if( $lang eq $opt{lang} ) { - print( $output $format->( $key, $val ) ); - $tran_out = 1; - $tran_val = $val; - $tran_key = $key; - } - } - else - { - warn( "Malformed entry: $_" ); - } - } - - print( $output $footer ); -} - -main(); - -sub usage () -{ - my @fmt = map { s/::$//; $_ } keys(%{$::{'msgfmt::'}}); - print(STDERR <<TXT ); -usage: - $0 -l lang-code [-d def-lang-code] [-W warning] \ - [-o output-file] [-i input-file] [-p platform] [-f format] - - $0 -l lang-code ... [input-file [output-file]] - - lang-code : en fr ko ... (no default) - def-lang-code : en fr ko ... (default 'en') - warning : none, all (default 'none') - platform : any gtk ami (default 'any') - format : @fmt (default 'messages') - input-file : defaults to standard input - output-file : defaults to standard output -TXT - exit(1); -} - -sub input_stream () -{ - if( $opt{input} ) - { - my $ifh; - - sysopen( $ifh, $opt{input}, O_RDONLY ) || - die( "$0: Failed to open input file $opt{input}: $!\n" ); - - return $ifh; - } - - return \*STDIN; -} - -sub output_stream () -{ - if( $opt{output} ) - { - my $ofh; - - sysopen( $ofh, $opt{output}, O_CREAT|O_EXCL|O_APPEND|O_WRONLY ) || - die( "$0: Failed to open output file $opt{output}: $!\n" ); - - return $ofh; - } - - return \*STDOUT; -} - -sub formatter () -{ - my $name = $opt{format}; - my $func = "msgfmt::$name"->UNIVERSAL::can("format"); - - return $func || die( "No handler found for format '$name'\n" ); -} - -sub static_section ($) -{ - my $name = $opt{format}; - my $sect = shift(); - my $func = "msgfmt::$name"->UNIVERSAL::can( $sect ); - - return $func ? $func->() : ""; -} - -# format implementations: -{ - package msgfmt::java; - - sub escape { $_[0] =~ s/([:'\\])/\\$1/g; $_[0] } - sub format { return join(' = ', $_[0], escape( $_[1] ) ) . "\n" } - sub header { "# autogenerated from " . ($opt{input} || '-stdin-') . "\n" } -} - -{ - package msgfmt::messages; # native netsurf format - - sub format { return join( ":", @_ ) . "\n" } - sub header - { - my $in = $opt{input} || '-stdin-'; - return <<TXT; -# This messages file is automatically generated from $in -# at build-time. Please go and edit that instead of this.\n -TXT - } -} - -{ - package msgfmt::transifex; - use base 'msgfmt::java'; - - # transifex has the following quirks: - # \ processing is buggy - they re-process every \\ as a \ - # so \\n, instead or producing literal '\n', is interpreted as \ ^J - # Additionally, although the java properties format specifies - # that ' should be \ escaped, transifex does not allow/support this: - sub escape { $_[0] =~ s/(:|\\(?![abfnrtv]))/\\$1/g; $_[0] } - sub format { return join(' = ', $_[0], escape( $_[1] ) ) . "\n" } -} - -########### YAML ########### -#{ -# package msgfmt::yaml; -# use YAML qw(Dump Bless); -# print Dump %data; -#} - -{ - package msgfmt::android; - - sub header { qq|<?xml version="1.0" encoding="utf-8"?>\n<resources>\n| } - sub footer { qq|</resources>| } - sub format - { - use HTML::Entities qw(encode_entities); - my $escaped = encode_entities( $_[1], '<>&"' ); - qq| <string name="$_[0]">$escaped</string>\n|; - } -} diff --git a/utils/ssl_certs.c b/utils/ssl_certs.c new file mode 100644 index 000000000..8546165ac --- /dev/null +++ b/utils/ssl_certs.c @@ -0,0 +1,356 @@ +/* + * Copyright 2020 Vincent Sanders <vince@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * \file + * helpers for X509 certificate chains + */ + +#include <stdlib.h> +#include <stdint.h> +#include <string.h> +#include <nsutils/base64.h> + +#include "utils/errors.h" +#include "utils/log.h" +#include "utils/nsurl.h" + +#include "netsurf/ssl_certs.h" + +/* + * create new certificate chain + * + * exported interface documented in netsurf/ssl_certs.h + */ +nserror +cert_chain_alloc(size_t depth, struct cert_chain **chain_out) +{ + struct cert_chain* chain; + + chain = calloc(1, sizeof(struct cert_chain)); + if (chain == NULL) { + return NSERROR_NOMEM; + } + + chain->depth = depth; + + *chain_out = chain; + + return NSERROR_OK; +} + + +/* + * duplicate certificate chain into existing chain + * + * exported interface documented in netsurf/ssl_certs.h + */ +nserror +cert_chain_dup_into(const struct cert_chain *src, struct cert_chain *dst) +{ + size_t depth; + for (depth = 0; depth < dst->depth; depth++) { + if (dst->certs[depth].der != NULL) { + free(dst->certs[depth].der); + dst->certs[depth].der = NULL; + } + } + + dst->depth = src->depth; + + for (depth = 0; depth < src->depth; depth++) { + dst->certs[depth].err = src->certs[depth].err; + dst->certs[depth].der_length = src->certs[depth].der_length; + if (src->certs[depth].der != NULL) { + dst->certs[depth].der = malloc(src->certs[depth].der_length); + if (dst->certs[depth].der == NULL) { + return NSERROR_NOMEM; + } + memcpy(dst->certs[depth].der, + src->certs[depth].der, + src->certs[depth].der_length); + } + + } + + return NSERROR_OK; +} + + +/* + * duplicate certificate chain + * + * exported interface documented in netsurf/ssl_certs.h + */ +nserror +cert_chain_dup(const struct cert_chain *src, struct cert_chain **dst_out) +{ + struct cert_chain* dst; + size_t depth; + nserror res; + + res = cert_chain_alloc(src->depth, &dst); + if (res != NSERROR_OK) { + return res; + } + + for (depth = 0; depth < src->depth; depth++) { + dst->certs[depth].err = src->certs[depth].err; + dst->certs[depth].der_length = src->certs[depth].der_length; + if (src->certs[depth].der != NULL) { + dst->certs[depth].der = malloc(src->certs[depth].der_length); + if (dst->certs[depth].der == NULL) { + cert_chain_free(dst); + return NSERROR_NOMEM; + } + memcpy(dst->certs[depth].der, + src->certs[depth].der, + src->certs[depth].der_length); + } + + } + + *dst_out = dst; + return NSERROR_OK; +} + + +#define MIN_CERT_LEN 64 + +/** + * process a part of a query extracting the certificate of an error code + */ +static nserror +process_query_section(const char *str, size_t len, struct cert_chain* chain) +{ + nsuerror nsures; + + if ((len > (5 + MIN_CERT_LEN)) && + (strncmp(str, "cert=", 5) == 0)) { + /* possible certificate entry */ + nsures = nsu_base64_decode_alloc_url( + (const uint8_t *)str + 5, + len - 5, + &chain->certs[chain->depth].der, + &chain->certs[chain->depth].der_length); + if (nsures == NSUERROR_OK) { + chain->depth++; + } + } else if ((len > 8) && + (strncmp(str, "certerr=", 8) == 0)) { + /* certificate entry error code */ + if (chain->depth > 0) { + chain->certs[chain->depth - 1].err = strtoul(str + 8, NULL, 10); + } + } + return NSERROR_OK; +} + +/* + * create a certificate chain from a fetch query string + * + * exported interface documented in netsurf/ssl_certs.h + */ +nserror cert_chain_from_query(struct nsurl *url, struct cert_chain **chain_out) +{ + struct cert_chain* chain; + nserror res; + char *querystr; + size_t querylen; + size_t kvstart; + size_t kvlen; + + res = nsurl_get(url, NSURL_QUERY, &querystr, &querylen); + if (res != NSERROR_OK) { + return res; + } + + if (querylen < MIN_CERT_LEN) { + free(querystr); + return NSERROR_NEED_DATA; + } + + res = cert_chain_alloc(0, &chain); + if (res != NSERROR_OK) { + free(querystr); + return res; + } + + for (kvlen = 0, kvstart = 0; kvstart < querylen; kvstart += kvlen) { + /* get query section length */ + kvlen = 0; + while (((kvstart + kvlen) < querylen) && + (querystr[kvstart + kvlen] != '&')) { + kvlen++; + } + + res = process_query_section(querystr + kvstart, kvlen, chain); + if (res != NSERROR_OK) { + break; + } + kvlen++; /* account for & separator */ + } + free(querystr); + + if (chain->depth > 0) { + *chain_out = chain; + } else { + free(chain); + return NSERROR_INVALID; + } + + return NSERROR_OK; +} + + +/* + * create a fetch query string from a certificate chain + * + * exported interface documented in netsurf/ssl_certs.h + */ +nserror cert_chain_to_query(struct cert_chain *chain, struct nsurl **url_out ) +{ + nserror res; + nsurl *url; + size_t allocsize; + size_t urlstrlen; + uint8_t *urlstr; + size_t depth; + + allocsize = 20; + for (depth = 0; depth < chain->depth; depth++) { + allocsize += 7; /* allow for &cert= */ + allocsize += 4 * ((chain->certs[depth].der_length + 2) / 3); + if (chain->certs[depth].err != SSL_CERT_ERR_OK) { + allocsize += 20; /* allow for &certerr=4000000000 */ + } + } + + urlstr = malloc(allocsize); + if (urlstr == NULL) { + return NSERROR_NOMEM; + } + + urlstrlen = snprintf((char *)urlstr, allocsize, "about:certificate"); + for (depth = 0; depth < chain->depth; depth++) { + int written; + nsuerror nsures; + size_t output_length; + + written = snprintf((char *)urlstr + urlstrlen, + allocsize - urlstrlen, + "&cert="); + if (written < 0) { + free(urlstr); + return NSERROR_UNKNOWN; + } + if ((size_t)written >= allocsize - urlstrlen) { + free(urlstr); + return NSERROR_UNKNOWN; + } + + urlstrlen += (size_t)written; + + output_length = allocsize - urlstrlen; + nsures = nsu_base64_encode_url( + chain->certs[depth].der, + chain->certs[depth].der_length, + (uint8_t *)urlstr + urlstrlen, + &output_length); + if (nsures != NSUERROR_OK) { + free(urlstr); + return (nserror)nsures; + } + urlstrlen += output_length; + + if (chain->certs[depth].err != SSL_CERT_ERR_OK) { + written = snprintf((char *)urlstr + urlstrlen, + allocsize - urlstrlen, + "&certerr=%d", + chain->certs[depth].err); + if (written < 0) { + free(urlstr); + return NSERROR_UNKNOWN; + } + if ((size_t)written >= allocsize - urlstrlen) { + free(urlstr); + return NSERROR_UNKNOWN; + } + + urlstrlen += (size_t)written; + } + + } + urlstr[17] = '?'; + urlstr[urlstrlen] = 0; + + res = nsurl_create((const char *)urlstr, &url); + free(urlstr); + + if (res == NSERROR_OK) { + *url_out = url; + } + + return res; +} + +/* + * free certificate chain + * + * exported interface documented in netsurf/ssl_certs.h + */ +nserror cert_chain_free(struct cert_chain* chain) +{ + size_t depth; + + if (chain != NULL) { + for (depth = 0; depth < chain->depth; depth++) { + if (chain->certs[depth].der != NULL) { + free(chain->certs[depth].der); + } + } + + free(chain); + } + + return NSERROR_OK; +} + + +/* + * calculate storage used of certificate chain + * + * exported interface documented in netsurf/ssl_certs.h + */ +size_t cert_chain_size(const struct cert_chain *chain) +{ + size_t size = 0; + size_t depth; + + if (chain != NULL) { + size += sizeof(struct cert_chain); + + for (depth = 0; depth < chain->depth; depth++) { + if (chain->certs[depth].der != NULL) { + size += chain->certs[depth].der_length; + } + } + } + + return size; +} diff --git a/utils/string.h b/utils/string.h index 03d891700..abb343154 100644 --- a/utils/string.h +++ b/utils/string.h @@ -64,7 +64,7 @@ char *cnv_space2nbsp(const char *s); * @param bytesize The size in bytes. * @return A human readable string representing the size. */ -char *human_friendly_bytesize(unsigned long bytesize); +char *human_friendly_bytesize(unsigned long long int bytesize); /** diff --git a/utils/sys_time.h b/utils/sys_time.h index 82d88fd25..c9ce8202f 100644 --- a/utils/sys_time.h +++ b/utils/sys_time.h @@ -18,14 +18,26 @@ /** * \file - * \brief BSD style time functions + * BSD style timeval macros + * + * BSD added macros for manipulating timeval which have become standard on + * modern c libraries but for compatability where they are missing it is + * necessary to provide fallbacks. */ -#ifndef _NETSURF_UTILS_SYS_TIME_H_ -#define _NETSURF_UTILS_SYS_TIME_H_ +#ifndef NETSURF_UTILS_SYS_TIME_H_ +#define NETSURF_UTILS_SYS_TIME_H_ #include <sys/time.h> +#ifndef timerclear +#define timerclear(a) ((a)->tv_sec = (a)->tv_usec = 0) +#endif + +#ifndef timerisset +#define timerisset(a) ((a)->tv_sec || (a)->tv_usec) +#endif + #ifndef timeradd #define timeradd(a, aa, result) \ do { \ @@ -50,4 +62,10 @@ } while (0) #endif +#ifndef timercmp +#define timercmp(a, aa, cmp) \ + (((a)->tv_sec cmp (aa)->tv_sec) || \ + ((a)->tv_sec == (aa)->tv_sec && (a)->tv_usec cmp (aa)->tv_usec)) +#endif + #endif diff --git a/utils/test-netsurf b/utils/test-netsurf deleted file mode 100755 index 90c7e121b..000000000 --- a/utils/test-netsurf +++ /dev/null @@ -1,35 +0,0 @@ -#!/bin/sh -# This file is part of NetSurf, http://netsurf-browser.org/ -# Licensed under the GNU General Public License, -# http://www.opensource.org/licenses/gpl-license -# Copyright 2007 Rob Kendrick <rjek@netsurf-browser.org> -# -# This launcher script is meant only for running nsgtk from inside the -# build tree, with some debugging enabled. It is not meant for day-to-day -# or packaged use! - -if [ -d ~/.netsurf ]; then - LOG=~/.netsurf/log.txt -elif [ -d /tmp ]; then - LOG=/tmp/netsurf-log.txt -else - LOG=netsurf-log.txt -fi - -echo $0: using $LOG as logfile - -ulimit -c unlimited - -if [ "x$1" = "x--gdb" ]; then - GDB="gdb --args" - echo - echo - echo "**********************************************************" - echo "Remember to type 'run' and press ENTER once gdb has loaded" - echo "**********************************************************" - echo - echo - shift -fi - -exec $GDB $PREFIX/bin/netsurf -v "$@" 2>&1 | tee $LOG diff --git a/utils/time.c b/utils/time.c index 71fa30fe3..e1f76ca54 100644 --- a/utils/time.c +++ b/utils/time.c @@ -109,7 +109,7 @@ static const char * const months[NSC_TIME_MONTH__COUNT] = { /* exported interface documented in utils/time.h */ const char *rfc1123_date(time_t t) { - static char ret[30]; + static char ret[31]; struct tm *tm = gmtime(&t); diff --git a/utils/useragent.c b/utils/useragent.c index b528dce4f..547999c7c 100644 --- a/utils/useragent.c +++ b/utils/useragent.c @@ -19,6 +19,7 @@ #include <stdio.h> #include <stdlib.h> +#include <string.h> #include "utils/config.h" #include "utils/utsname.h" @@ -29,7 +30,7 @@ static const char *core_user_agent_string = NULL; #ifndef NETSURF_UA_FORMAT_STRING -#define NETSURF_UA_FORMAT_STRING "NetSurf/%d.%d (%s)" +#define NETSURF_UA_FORMAT_STRING "Mozilla/5.0 (%s) NetSurf/%d.%d" #endif /** @@ -46,12 +47,16 @@ user_agent_build_string(void) if (uname(&un) >= 0) { sysname = un.sysname; + if (strcmp(sysname, "Linux") == 0) { + /* Force desktop, not mobile */ + sysname = "X11; Linux"; + } } len = snprintf(NULL, 0, NETSURF_UA_FORMAT_STRING, + sysname, netsurf_version_major, - netsurf_version_minor, - sysname); + netsurf_version_minor); ua_string = malloc(len + 1); if (!ua_string) { /** \todo this needs handling better */ @@ -59,9 +64,9 @@ user_agent_build_string(void) } snprintf(ua_string, len + 1, NETSURF_UA_FORMAT_STRING, + sysname, netsurf_version_major, - netsurf_version_minor, - sysname); + netsurf_version_minor); core_user_agent_string = ua_string; @@ -78,3 +83,13 @@ user_agent_string(void) return core_user_agent_string; } +/* Public API documented in useragent.h */ +void +free_user_agent_string(void) +{ + if (core_user_agent_string != NULL) { + /* Nasty cast because we need to de-const it to free it */ + free((void *)core_user_agent_string); + core_user_agent_string = NULL; + } +} diff --git a/utils/useragent.h b/utils/useragent.h index 87677e7bc..6eb309a62 100644 --- a/utils/useragent.h +++ b/utils/useragent.h @@ -27,4 +27,11 @@ */ const char * user_agent_string(void); +/** Free any memory allocated for the user_agent_string + * + * After calling this, the value returned by \ref user_agent_string() + * is to be considered invalid. + */ +void free_user_agent_string(void); + #endif diff --git a/utils/utf8.c b/utils/utf8.c index f0ac0c9b2..3eedd0810 100644 --- a/utils/utf8.c +++ b/utils/utf8.c @@ -32,6 +32,7 @@ #include "utils/log.h" #include "utils/utf8.h" +#include "netsurf/inttypes.h" #include "netsurf/utf8.h" #include "desktop/gui_internal.h" @@ -44,7 +45,7 @@ uint32_t utf8_to_ucs4(const char *s_in, size_t l) parserutils_error perror; perror = parserutils_charset_utf8_to_ucs4((const uint8_t *) s_in, l, - &ucs4, &len); + &ucs4, &len); if (perror != PARSERUTILS_OK) ucs4 = 0xfffd; @@ -106,7 +107,7 @@ size_t utf8_char_byte_length(const char *s) parserutils_error perror; perror = parserutils_charset_utf8_char_byte_length((const uint8_t *) s, - &len); + &len); assert(perror == PARSERUTILS_OK); return len; @@ -131,7 +132,7 @@ size_t utf8_next(const char *s, size_t l, size_t o) parserutils_error perror; perror = parserutils_charset_utf8_next((const uint8_t *) s, l, o, - &next); + &next); assert(perror == PARSERUTILS_OK); return next; @@ -151,6 +152,47 @@ static inline void utf8_clear_cd_cache(void) last_cd.cd = 0; } +/** + * obtain a cached conversion descriptor + * + * either return the cached conversion descriptor or create one if required + */ +static nserror +get_cached_cd(const char *enc_from, const char *enc_to, iconv_t *cd_out) +{ + iconv_t cd; + /* we cache the last used conversion descriptor, + * so check if we're trying to use it here */ + if (strncasecmp(last_cd.from, enc_from, sizeof(last_cd.from)) == 0 && + strncasecmp(last_cd.to, enc_to, sizeof(last_cd.to)) == 0 && + last_cd.cd != 0) { + *cd_out = last_cd.cd; + return NSERROR_OK; + } + + /* no match, so create a new cd */ + cd = iconv_open(enc_to, enc_from); + if (cd == (iconv_t) -1) { + if (errno == EINVAL) { + return NSERROR_BAD_ENCODING; + } + /* default to no memory */ + return NSERROR_NOMEM; + } + + /* close the last cd - we don't care if this fails */ + if (last_cd.cd) { + iconv_close(last_cd.cd); + } + + /* and safely copy the to/from/cd data into last_cd */ + snprintf(last_cd.from, sizeof(last_cd.from), "%s", enc_from); + snprintf(last_cd.to, sizeof(last_cd.to), "%s", enc_to); + *cd_out = last_cd.cd = cd; + + return NSERROR_OK; +} + /* exported interface documented in utils/utf8.h */ nserror utf8_finalise(void) { @@ -168,95 +210,72 @@ nserror utf8_finalise(void) * Convert a string from one encoding to another * * \param string The NULL-terminated string to convert - * \param len Length of input string to consider (in bytes), or 0 + * \param slen Length of input string to consider (in bytes), or 0 * \param from The encoding name to convert from * \param to The encoding name to convert to - * \param result Pointer to location in which to store result. - * \param result_len Pointer to location in which to store result length. + * \param result_out Pointer to location in which to store result. + * \param result_len_out Pointer to location in which to store result length. * \return NSERROR_OK for no error, NSERROR_NOMEM on allocation error, * NSERROR_BAD_ENCODING for a bad character encoding */ static nserror utf8_convert(const char *string, - size_t len, + size_t slen, const char *from, const char *to, - char **result, - size_t *result_len) + char **result_out, + size_t *result_len_out) { iconv_t cd; - char *temp, *out, *in; - size_t slen, rlen; - - assert(string && from && to && result); - - if (string[0] == '\0') { - /* On AmigaOS, iconv() returns an error if we pass an - * empty string. This prevents iconv() being called as - * there is no conversion necessary anyway. */ - *result = strdup(""); - if (!(*result)) { - *result = NULL; - return NSERROR_NOMEM; - } + char *temp, *out, *in, *result; + size_t result_len; + nserror res; - return NSERROR_OK; + assert(string && from && to && result_out); + + /* calculate the source length if not given */ + if (slen==0) { + slen = strlen(string); } - if (strcasecmp(from, to) == 0) { - /* conversion from an encoding to itself == strdup */ - slen = len ? len : strlen(string); - *(result) = strndup(string, slen); - if (!(*result)) { - *(result) = NULL; + /* process the empty string separately avoiding any conversion + * check for the source and destination encoding being the same + * + * This optimisation is necessary on AmigaOS as iconv() + * returns an error if an empty string is passed. + */ + if ((slen == 0) || (strcasecmp(from, to) == 0)) { + *result_out = strndup(string, slen); + if (*result_out == NULL) { return NSERROR_NOMEM; } + if (result_len_out != NULL) { + *result_len_out = slen; + } return NSERROR_OK; } in = (char *)string; - /* we cache the last used conversion descriptor, - * so check if we're trying to use it here */ - if (strncasecmp(last_cd.from, from, sizeof(last_cd.from)) == 0 && - strncasecmp(last_cd.to, to, sizeof(last_cd.to)) == 0) { - cd = last_cd.cd; - } - else { - /* no match, so create a new cd */ - cd = iconv_open(to, from); - if (cd == (iconv_t)-1) { - if (errno == EINVAL) - return NSERROR_BAD_ENCODING; - /* default to no memory */ - return NSERROR_NOMEM; - } - - /* close the last cd - we don't care if this fails */ - if (last_cd.cd) - iconv_close(last_cd.cd); - - /* and copy the to/from/cd data into last_cd */ - snprintf(last_cd.from, sizeof(last_cd.from), "%s", from); - snprintf(last_cd.to, sizeof(last_cd.to), "%s", to); - last_cd.cd = cd; + res = get_cached_cd(from, to, &cd); + if (res != NSERROR_OK) { + return res; } - slen = len ? len : strlen(string); /* Worst case = ASCII -> UCS4, so allocate an output buffer * 4 times larger than the input buffer, and add 4 bytes at * the end for the NULL terminator */ - rlen = slen * 4 + 4; + result_len = slen * 4 + 4; - temp = out = malloc(rlen); + temp = out = malloc(result_len); if (!out) { return NSERROR_NOMEM; } /* perform conversion */ - if (iconv(cd, (void *) &in, &slen, &out, &rlen) == (size_t)-1) { + if (iconv(cd, (void *) &in, &slen, &out, &result_len) == (size_t)-1) { free(temp); /* clear the cached conversion descriptor as it's invalid */ if (last_cd.cd) @@ -270,19 +289,22 @@ utf8_convert(const char *string, return NSERROR_NOMEM; } - *(result) = realloc(temp, out - temp + 4); - if (!(*result)) { + result_len = out - temp; + + /* resize buffer allowing for null termination */ + result = realloc(temp, result_len + 4); + if (result == NULL) { free(temp); - *(result) = NULL; /* for sanity's sake */ return NSERROR_NOMEM; } /* NULL terminate - needs 4 characters as we may have * converted to UTF-32 */ - memset((*result) + (out - temp), 0, 4); + memset(result + result_len, 0, 4); - if (result_len != NULL) { - *result_len = (out - temp); + *result_out = result; + if (result_len_out != NULL) { + *result_len_out = result_len; } return NSERROR_OK; @@ -290,14 +312,14 @@ utf8_convert(const char *string, /* exported interface documented in utils/utf8.h */ nserror utf8_to_enc(const char *string, const char *encname, - size_t len, char **result) + size_t len, char **result) { return utf8_convert(string, len, "UTF-8", encname, result, NULL); } /* exported interface documented in utils/utf8.h */ nserror utf8_from_enc(const char *string, const char *encname, - size_t len, char **result, size_t *result_len) + size_t len, char **result, size_t *result_len) { return utf8_convert(string, len, encname, "UTF-8", result, result_len); } @@ -325,10 +347,10 @@ utf8_convert_html_chunk(iconv_t cd, return NSERROR_NOMEM; ucs4 = utf8_to_ucs4(chunk, inlen); - esclen = snprintf(escape, sizeof(escape), "&#x%06x;", ucs4); + esclen = snprintf(escape, sizeof(escape), "&#x%06"PRIx32";", ucs4); pescape = escape; ret = iconv(cd, (void *) &pescape, &esclen, - (void *) out, outlen); + (void *) out, outlen); if (ret == (size_t) -1) return NSERROR_NOMEM; @@ -340,45 +362,26 @@ utf8_convert_html_chunk(iconv_t cd, return NSERROR_OK; } + + /* exported interface documented in utils/utf8.h */ nserror -utf8_to_html(const char *string, const char *encname, size_t len, char **result) +utf8_to_html(const char *string, const char *encname, size_t len, char **result_out) { iconv_t cd; const char *in; - char *out, *origout; + char *out, *origout, *result; size_t off, prev_off, inlen, outlen, origoutlen, esclen; nserror ret; char *pescape, escape[11]; + nserror res; if (len == 0) len = strlen(string); - /* we cache the last used conversion descriptor, - * so check if we're trying to use it here */ - if (strncasecmp(last_cd.from, "UTF-8", sizeof(last_cd.from)) == 0 && - strncasecmp(last_cd.to, encname, - sizeof(last_cd.to)) == 0 && - last_cd.cd != 0) { - cd = last_cd.cd; - } else { - /* no match, so create a new cd */ - cd = iconv_open(encname, "UTF-8"); - if (cd == (iconv_t) -1) { - if (errno == EINVAL) - return NSERROR_BAD_ENCODING; - /* default to no memory */ - return NSERROR_NOMEM; - } - - /* close the last cd - we don't care if this fails */ - if (last_cd.cd) - iconv_close(last_cd.cd); - - /* and safely copy the to/from/cd data into last_cd */ - snprintf(last_cd.from, sizeof(last_cd.from), "UTF-8"); - snprintf(last_cd.to, sizeof(last_cd.to), "%s", encname); - last_cd.cd = cd; + res = get_cached_cd("UTF-8", encname, &cd); + if (res != NSERROR_OK) { + return res; } /* Worst case is ASCII -> UCS4, with all characters escaped: @@ -398,13 +401,13 @@ utf8_to_html(const char *string, const char *encname, size_t len, char **result) while (off < len) { /* Must escape '&', '<', and '>' */ if (string[off] == '&' || string[off] == '<' || - string[off] == '>') { + string[off] == '>') { if (off - prev_off > 0) { /* Emit chunk */ in = string + prev_off; inlen = off - prev_off; ret = utf8_convert_html_chunk(cd, in, inlen, - &out, &outlen); + &out, &outlen); if (ret != NSERROR_OK) { free(origout); iconv_close(cd); @@ -415,10 +418,10 @@ utf8_to_html(const char *string, const char *encname, size_t len, char **result) /* Emit mandatory escape */ esclen = snprintf(escape, sizeof(escape), - "&#x%06x;", string[off]); + "&#x%06x;", string[off]); pescape = escape; ret = utf8_convert_html_chunk(cd, pescape, esclen, - &out, &outlen); + &out, &outlen); if (ret != NSERROR_OK) { free(origout); iconv_close(cd); @@ -450,11 +453,12 @@ utf8_to_html(const char *string, const char *encname, size_t len, char **result) outlen -= 4; /* Shrink-wrap */ - *result = realloc(origout, origoutlen - outlen); - if (*result == NULL) { + result = realloc(origout, origoutlen - outlen); + if (result == NULL) { free(origout); return NSERROR_NOMEM; } + *result_out = result; return NSERROR_OK; } diff --git a/utils/utils.c b/utils/utils.c index 15c91c621..c64718531 100644 --- a/utils/utils.c +++ b/utils/utils.c @@ -24,7 +24,9 @@ #include <stdio.h> #include <string.h> #include <strings.h> +#include <sys/types.h> #include <sys/stat.h> +#include <unistd.h> #include "utils/messages.h" #include "utils/dirent.h" @@ -172,6 +174,7 @@ nserror vsnstrjoin(char **str, size_t *size, char sep, size_t nelm, va_list ap) return NSERROR_OK; } + /* exported interface documented in utils/utils.h */ nserror snstrjoin(char **str, size_t *size, char sep, size_t nelm, ...) { @@ -189,26 +192,43 @@ nserror snstrjoin(char **str, size_t *size, char sep, size_t nelm, ...) /** * The size of buffers within human_friendly_bytesize. * - * We can have a fairly good estimate of how long the buffer needs to - * be. The unsigned long can store a value representing a maximum - * size of around 4 GB. Therefore the greatest space required is to - * represent 1023MB. Currently that would be represented as "1023MB" - * so 12 including a null terminator. Ideally we would be able to - * know this value for sure, in the mean time the following should - * suffice. + * We can have a fairly good estimate of the output buffers maximum length. + * + * The unsigned long long int can store a value representing a maximum + * size of 16 EiB (exibytes). Therefore the greatest space required is to + * represent 1023 PiB. + * Currently that would be represented as "1023.00PiBytes" in english + * giving a 15 byte length including a null terminator. + * Ideally we would be able to accurately know this length for other + * languages, in the mean time a largeish buffer size is selected + * and should suffice. */ -#define BYTESIZE_BUFFER_SIZE 20 +#define BYTESIZE_BUFFER_SIZE 32 /* exported interface documented in utils/string.h */ -char *human_friendly_bytesize(unsigned long bsize) { +char *human_friendly_bytesize(unsigned long long int bsize) { static char buffer1[BYTESIZE_BUFFER_SIZE]; static char buffer2[BYTESIZE_BUFFER_SIZE]; static char buffer3[BYTESIZE_BUFFER_SIZE]; static char *curbuffer = buffer3; - enum {bytes, kilobytes, megabytes, gigabytes} unit = bytes; - static char units[][7] = {"Bytes", "kBytes", "MBytes", "GBytes"}; - - float bytesize = (float)bsize; + enum { + bytes, + kilobytes, + megabytes, + gibibytes, + tebibytes, + pebibytes, + exbibytes } unit = bytes; + static const char *const units[] = { + "Bytes", + "KiBytes", + "MiBytes", + "GiBytes", + "TiBytes", + "PiBytes", + "EiBytes" }; + double bytesize = (double)bsize; + const char *fmt; if (curbuffer == buffer1) curbuffer = buffer2; @@ -229,15 +249,55 @@ char *human_friendly_bytesize(unsigned long bsize) { if (bytesize > 1024) { bytesize /= 1024; - unit = gigabytes; + unit = gibibytes; + } + + if (bytesize > 1024) { + bytesize /= 1024; + unit = tebibytes; } - snprintf(curbuffer, BYTESIZE_BUFFER_SIZE, "%3.2f%s", bytesize, messages_get(units[unit])); + if (bytesize > 1024) { + bytesize /= 1024; + unit = pebibytes; + } + + if (bytesize > 1024) { + bytesize /= 1024; + unit = exbibytes; + } + + if (unit == bytes) { + fmt = "%.0f%s"; + } else { + fmt = "%3.2f%s"; + } + + snprintf(curbuffer, + BYTESIZE_BUFFER_SIZE, + fmt, + bytesize, + messages_get(units[unit])); return curbuffer; } +#ifndef HAVE_STRTOULL +#include <stdlib.h> + +/** + * string to unsigned long long + * + */ +unsigned long long int strtoull(const char *nptr, char **endptr, int base) +{ + return (unsigned long long int)strtoul(nptr, endptr, base); +} + +#endif + + #ifndef HAVE_STRCASESTR /** @@ -263,12 +323,12 @@ char *strcasestr(const char *haystack, const char *needle) #endif + #ifndef HAVE_STRNDUP /** * Duplicate up to n characters of a string. */ - char *strndup(const char *s, size_t n) { size_t len; @@ -393,6 +453,7 @@ char *strchrnul (const char *s, int c_in) #endif #ifndef HAVE_UTSNAME + #include "utils/utsname.h" int uname(struct utsname *buf) { @@ -404,9 +465,11 @@ int uname(struct utsname *buf) { return 0; } + #endif #ifndef HAVE_REALPATH + char *realpath(const char *path, char *resolved_path) { char *ret; @@ -419,8 +482,9 @@ char *realpath(const char *path, char *resolved_path) return ret; } -#ifndef HAVE_INETATON +#endif +#ifndef HAVE_INETATON int inet_aton(const char *cp, struct in_addr *inp) { @@ -470,4 +534,40 @@ int inet_pton(int af, const char *src, void *dst) #endif +#ifndef HAVE_REGEX + +#include "utils/regex.h" + +int +regcomp(regex_t *restrict preg, const char *restrictregex, int cflags) +{ + return 0; +} + +size_t +regerror(int errorcode, + const regex_t *restrict preg, + char *restrict errbuf, + size_t errbuf_size) +{ + if ((errbuf != NULL) && (errbuf_size != 0)) { + *errbuf = 0; + } + return 0; +} + +int +regexec(const regex_t *restrict preg, + const char *restrict string, + size_t nmatch, + regmatch_t pmatch[restrict], + int eflags) +{ + return REG_NOMATCH; +} + +void regfree(regex_t *preg) +{ +} + #endif diff --git a/utils/utils.h b/utils/utils.h index 3995071cd..cb9e04b45 100644 --- a/utils/utils.h +++ b/utils/utils.h @@ -89,4 +89,31 @@ */ bool is_dir(const char *path); +/** + * switch fall through + */ +#if defined __cplusplus && defined __has_cpp_attribute + #if __has_cpp_attribute(fallthrough) && __cplusplus >= __has_cpp_attribute(fallthrough) + #define fallthrough [[fallthrough]] + #elif __has_cpp_attribute(gnu::fallthrough) && __STDC_VERSION__ >= __has_cpp_attribute(gnu::fallthrough) + #define fallthrough [[gnu::fallthrough]] + #elif __has_cpp_attribute(clang::fallthrough) && __STDC_VERSION__ >= __has_cpp_attribute(clang::fallthrough) + #define fallthrough [[clang::fallthrough]] + #endif +#elif defined __STDC_VERSION__ && defined __has_c_attribute + #if __has_c_attribute(fallthrough) && __STDC_VERSION__ >= __has_c_attribute(fallthrough) + #define fallthrough [[fallthrough]] + #endif +#endif +#if !defined fallthrough && defined __has_attribute + #if __has_attribute(__fallthrough__) + #define fallthrough __attribute__((__fallthrough__)) + #endif +#endif +#if !defined fallthrough +/* early gcc and clang have no implicit fallthrough warning */ + #define fallthrough do {} while(0) +#endif + + #endif diff --git a/utils/valgrind.supp b/utils/valgrind.supp deleted file mode 100644 index f1a27f4b4..000000000 --- a/utils/valgrind.supp +++ /dev/null @@ -1,14 +0,0 @@ -# Valgrind suppression file for NetSurf - -# Suppress a valgrind message about use of uninitialized memory in strchrnul(). -# This use is OK because it provides only a speedup. -{ - strchrnul-addr4 - Memcheck:Addr4 - fun:strchrnul -} -{ - strchrnul-addr8 - Memcheck:Addr8 - fun:strchrnul -} |