summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-08-11 08:17:48 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2008-08-11 08:17:48 +0000
commit191d876e414047232c8ad252862fc2b97ca71ee8 (patch)
tree0f4638a21af24af459233062b46c6b24de77e16b
parent058fcac22516008f3247cad9641cc91139b21e8c (diff)
downloadnetsurf-191d876e414047232c8ad252862fc2b97ca71ee8.tar.gz
netsurf-191d876e414047232c8ad252862fc2b97ca71ee8.tar.bz2
Merged revisions 4859-5013,5016-5018 via svnmerge from
svn://source.netsurf-browser.org/branches/takkaria/netsurf-hubbub ........ r4860 | takkaria | 2008-08-02 03:51:35 +0100 (Sat, 02 Aug 2008) | 2 lines A really horribly rough first go at integrating hubbub with NetSurf. Segfaults, but I'm not sure what I've done wrong. ........ r4861 | jmb | 2008-08-02 05:01:19 +0100 (Sat, 02 Aug 2008) | 3 lines Fix segfault caused by mismatched struct layout expectations. Fix warnings, too. ........ r4862 | jmb | 2008-08-02 05:11:02 +0100 (Sat, 02 Aug 2008) | 2 lines Destroy parser as soon as it's no longer needed, and flag this so that html_destroy doesn't cause things to trample all over the heap. ........ r4863 | takkaria | 2008-08-02 15:30:34 +0100 (Sat, 02 Aug 2008) | 2 lines Remove deugging printf()s, add Aliases file, add script { display: none; } block. ........ r4868 | takkaria | 2008-08-02 22:14:55 +0100 (Sat, 02 Aug 2008) | 2 lines Fix a segfault bug. ........ r4869 | takkaria | 2008-08-02 22:17:58 +0100 (Sat, 02 Aug 2008) | 2 lines Add gtk/res/Aliases as a symlink to the one in !NS/Resources ........ r4870 | takkaria | 2008-08-02 22:26:31 +0100 (Sat, 02 Aug 2008) | 2 lines Fix symlink. ........ r4885 | jmb | 2008-08-03 23:26:54 +0100 (Sun, 03 Aug 2008) | 2 lines Make multiple parallel parser instances work correctly. ........ r4886 | tlsa | 2008-08-04 00:21:29 +0100 (Mon, 04 Aug 2008) | 1 line Don't display contents of STYLE. ........ r4891 | jmb | 2008-08-04 01:18:07 +0100 (Mon, 04 Aug 2008) | 5 lines Fix reparent_children to actually work Make get_parent_node pay attention to the element_only flag Fixup node referencing when appending a text child Make clone_node clone attributes and namespace information in the non-deep case ........ r4918 | jmb | 2008-08-05 15:27:03 +0100 (Tue, 05 Aug 2008) | 2 lines Fix debug target ........ r4944 | takkaria | 2008-08-07 12:56:50 +0100 (Thu, 07 Aug 2008) | 2 lines Use talloc to allocate space for Hubbub. ........ r4993 | takkaria | 2008-08-10 17:49:47 +0100 (Sun, 10 Aug 2008) | 2 lines Stub out the encoding change callback so NS-hubbub compiles again. ........ r4994 | takkaria | 2008-08-10 18:02:33 +0100 (Sun, 10 Aug 2008) | 2 lines Tell Hubbub the encoding that HTTP gives us, if we have one. ........ r5001 | takkaria | 2008-08-11 02:53:24 +0100 (Mon, 11 Aug 2008) | 2 lines First go at implementing proper <meta charset> support in NetSurf, amongst some refactoring. Probably works, but I have no pages around to test it on. ........ r5002 | takkaria | 2008-08-11 02:56:35 +0100 (Mon, 11 Aug 2008) | 2 lines Fix (I hope) a 64-bit compiler warning. ........ r5012 | takkaria | 2008-08-11 08:40:28 +0100 (Mon, 11 Aug 2008) | 2 lines Fix some nits from jmb. (Remove const from html->encoding, set encoding_source to something about right.) ........ r5013 | takkaria | 2008-08-11 08:48:50 +0100 (Mon, 11 Aug 2008) | 2 lines Properly fix <meta charset> handling, by passing in the right thing as the context. No idea how this worked before. :) (credit: jmb) ........ r5017 | jmb | 2008-08-11 09:13:22 +0100 (Mon, 11 Aug 2008) | 7 lines Pedantic typo fix. Make Hubbub YES when building for RISC OS. Make Hubbub AUTO when building for GTK Ignore the presence of Hubbub on other platforms. Remove the explicit libparserutils pkg-config stuff from the makefile (see r5016) Add some logic that means Hubbub gets enabled correctly when building on RISC OS and when cross-compiling for it. (This is ugly and will go away when pkg-config is available on RO) ........ svn path=/trunk/netsurf/; revision=5019
-rw-r--r--!NetSurf/Resources/Aliases302
-rw-r--r--!NetSurf/Resources/CSS,f792
-rw-r--r--Makefile18
-rw-r--r--Makefile.config8
-rw-r--r--debug/fontd.c17
-rw-r--r--gtk/gtk_gui.c14
l---------gtk/res/Aliases1
-rw-r--r--render/directory.c27
-rw-r--r--render/html.c471
-rw-r--r--render/html.h14
10 files changed, 869 insertions, 5 deletions
diff --git a/!NetSurf/Resources/Aliases b/!NetSurf/Resources/Aliases
new file mode 100644
index 000000000..db61ff13e
--- /dev/null
+++ b/!NetSurf/Resources/Aliases
@@ -0,0 +1,302 @@
+# > Unicode:Files.Aliases
+# Mapping of character set encoding names to their canonical form
+#
+# Lines starting with a '#' are comments, blank lines are ignored.
+#
+# Based on http://www.iana.org/assignments/character-sets and
+# http://www.iana.org/assignments/ianacharset-mib
+#
+# Canonical Form MIBenum Aliases...
+#
+US-ASCII 3 iso-ir-6 ANSI_X3.4-1986 ISO_646.irv:1991 ASCII ISO646-US ANSI_X3.4-1968 us IBM367 cp367 csASCII
+ISO-10646-UTF-1 27 csISO10646UTF1
+ISO_646.basic:1983 28 ref csISO646basic1983
+INVARIANT 29 csINVARIANT
+ISO_646.irv:1983 30 iso-ir-2 irv csISO2IntlRefVersion
+BS_4730 20 iso-ir-4 ISO646-GB gb uk csISO4UnitedKingdom
+NATS-SEFI 31 iso-ir-8-1 csNATSSEFI
+NATS-SEFI-ADD 32 iso-ir-8-2 csNATSSEFIADD
+NATS-DANO 33 iso-ir-9-1 csNATSDANO
+NATS-DANO-ADD 34 iso-ir-9-2 csNATSDANOADD
+SEN_850200_B 35 iso-ir-10 FI ISO646-FI ISO646-SE se csISO10Swedish
+SEN_850200_C 21 iso-ir-11 ISO646-SE2 se2 csISO11SwedishForNames
+KS_C_5601-1987 36 iso-ir-149 KS_C_5601-1989 KSC_5601 korean csKSC56011987
+ISO-2022-KR 37 csISO2022KR
+EUC-KR 38 csEUCKR EUCKR
+ISO-2022-JP 39 csISO2022JP
+ISO-2022-JP-2 40 csISO2022JP2
+ISO-2022-CN 104
+ISO-2022-CN-EXT 105
+JIS_C6220-1969-jp 41 JIS_C6220-1969 iso-ir-13 katakana x0201-7 csISO13JISC6220jp
+JIS_C6220-1969-ro 42 iso-ir-14 jp ISO646-JP csISO14JISC6220ro
+IT 22 iso-ir-15 ISO646-IT csISO15Italian
+PT 43 iso-ir-16 ISO646-PT csISO16Portuguese
+ES 23 iso-ir-17 ISO646-ES csISO17Spanish
+greek7-old 44 iso-ir-18 csISO18Greek7Old
+latin-greek 45 iso-ir-19 csISO19LatinGreek
+DIN_66003 24 iso-ir-21 de ISO646-DE csISO21German
+NF_Z_62-010_(1973) 46 iso-ir-25 ISO646-FR1 csISO25French
+Latin-greek-1 47 iso-ir-27 csISO27LatinGreek1
+ISO_5427 48 iso-ir-37 csISO5427Cyrillic
+JIS_C6226-1978 49 iso-ir-42 csISO42JISC62261978
+BS_viewdata 50 iso-ir-47 csISO47BSViewdata
+INIS 51 iso-ir-49 csISO49INIS
+INIS-8 52 iso-ir-50 csISO50INIS8
+INIS-cyrillic 53 iso-ir-51 csISO51INISCyrillic
+ISO_5427:1981 54 iso-ir-54 ISO5427Cyrillic1981
+ISO_5428:1980 55 iso-ir-55 csISO5428Greek
+GB_1988-80 56 iso-ir-57 cn ISO646-CN csISO57GB1988
+GB_2312-80 57 iso-ir-58 chinese csISO58GB231280
+NS_4551-1 25 iso-ir-60 ISO646-NO no csISO60DanishNorwegian csISO60Norwegian1
+NS_4551-2 58 ISO646-NO2 iso-ir-61 no2 csISO61Norwegian2
+NF_Z_62-010 26 iso-ir-69 ISO646-FR fr csISO69French
+videotex-suppl 59 iso-ir-70 csISO70VideotexSupp1
+PT2 60 iso-ir-84 ISO646-PT2 csISO84Portuguese2
+ES2 61 iso-ir-85 ISO646-ES2 csISO85Spanish2
+MSZ_7795.3 62 iso-ir-86 ISO646-HU hu csISO86Hungarian
+JIS_C6226-1983 63 iso-ir-87 x0208 JIS_X0208-1983 csISO87JISX0208
+greek7 64 iso-ir-88 csISO88Greek7
+ASMO_449 65 ISO_9036 arabic7 iso-ir-89 csISO89ASMO449
+iso-ir-90 66 csISO90
+JIS_C6229-1984-a 67 iso-ir-91 jp-ocr-a csISO91JISC62291984a
+JIS_C6229-1984-b 68 iso-ir-92 ISO646-JP-OCR-B jp-ocr-b csISO92JISC62991984b
+JIS_C6229-1984-b-add 69 iso-ir-93 jp-ocr-b-add csISO93JIS62291984badd
+JIS_C6229-1984-hand 70 iso-ir-94 jp-ocr-hand csISO94JIS62291984hand
+JIS_C6229-1984-hand-add 71 iso-ir-95 jp-ocr-hand-add csISO95JIS62291984handadd
+JIS_C6229-1984-kana 72 iso-ir-96 csISO96JISC62291984kana
+ISO_2033-1983 73 iso-ir-98 e13b csISO2033
+ANSI_X3.110-1983 74 iso-ir-99 CSA_T500-1983 NAPLPS csISO99NAPLPS
+ISO-8859-1 4 iso-ir-100 ISO_8859-1 ISO_8859-1:1987 latin1 l1 IBM819 CP819 csISOLatin1 8859_1 ISO8859-1
+ISO-8859-2 5 iso-ir-101 ISO_8859-2 ISO_8859-2:1987 latin2 l2 csISOLatin2 8859_2 ISO8859-2
+T.61-7bit 75 iso-ir-102 csISO102T617bit
+T.61-8bit 76 T.61 iso-ir-103 csISO103T618bit
+ISO-8859-3 6 iso-ir-109 ISO_8859-3 ISO_8859-3:1988 latin3 l3 csISOLatin3 8859_3 ISO8859-3
+ISO-8859-4 7 iso-ir-110 ISO_8859-4 ISO_8859-4:1988 latin4 l4 csISOLatin4 8859_4 ISO8859-4
+ECMA-cyrillic 77 iso-ir-111 KOI8-E csISO111ECMACyrillic
+CSA_Z243.4-1985-1 78 iso-ir-121 ISO646-CA csa7-1 ca csISO121Canadian1
+CSA_Z243.4-1985-2 79 iso-ir-122 ISO646-CA2 csa7-2 csISO122Canadian2
+CSA_Z243.4-1985-gr 80 iso-ir-123 csISO123CSAZ24341985gr
+ISO-8859-6 9 iso-ir-127 ISO_8859-6 ISO_8859-6:1987 ECMA-114 ASMO-708 arabic csISOLatinArabic
+ISO-8859-6-E 81 csISO88596E ISO_8859-6-E
+ISO-8859-6-I 82 csISO88596I ISO_8859-6-I
+ISO-8859-7 10 iso-ir-126 ISO_8859-7 ISO_8859-7:1987 ELOT_928 ECMA-118 greek greek8 csISOLatinGreek 8859_7 ISO8859-7
+T.101-G2 83 iso-ir-128 csISO128T101G2
+ISO-8859-8 11 iso-ir-138 ISO_8859-8 ISO_8859-8:1988 hebrew csISOLatinHebrew 8859_8 ISO8859-8
+ISO-8859-8-E 84 csISO88598E ISO_8859-8-E
+ISO-8859-8-I 85 csISO88598I ISO_8859-8-I
+CSN_369103 86 iso-ir-139 csISO139CSN369103
+JUS_I.B1.002 87 iso-ir-141 ISO646-YU js yu csISO141JUSIB1002
+ISO_6937-2-add 14 iso-ir-142 csISOTextComm
+IEC_P27-1 88 iso-ir-143 csISO143IECP271
+ISO-8859-5 8 iso-ir-144 ISO_8859-5 ISO_8859-5:1988 cyrillic csISOLatinCyrillic 8859_5 ISO8859-5
+JUS_I.B1.003-serb 89 iso-ir-146 serbian csISO146Serbian
+JUS_I.B1.003-mac 90 macedonian iso-ir-147 csISO147Macedonian
+ISO-8859-9 12 iso-ir-148 ISO_8859-9 ISO_8859-9:1989 latin5 l5 csISOLatin5 8859_9 ISO8859-9
+greek-ccitt 91 iso-ir-150 csISO150 csISO150GreekCCITT
+NC_NC00-10:81 92 cuba iso-ir-151 ISO646-CU csISO151Cuba
+ISO_6937-2-25 93 iso-ir-152 csISO6937Add
+GOST_19768-74 94 ST_SEV_358-88 iso-ir-153 csISO153GOST1976874
+ISO_8859-supp 95 iso-ir-154 latin1-2-5 csISO8859Supp
+ISO_10367-box 96 iso-ir-155 csISO10367Box
+ISO-8859-10 13 iso-ir-157 l6 ISO_8859-10:1992 csISOLatin6 latin6 8859_10 ISO8859-10
+latin-lap 97 lap iso-ir-158 csISO158Lap
+JIS_X0212-1990 98 x0212 iso-ir-159 csISO159JISX02121990
+DS_2089 99 DS2089 ISO646-DK dk csISO646Danish
+us-dk 100 csUSDK
+dk-us 101 csDKUS
+JIS_X0201 15 X0201 csHalfWidthKatakana
+KSC5636 102 ISO646-KR csKSC5636
+ISO-10646-UCS-2 1000 csUnicode UCS-2 UCS2
+ISO-10646-UCS-4 1001 csUCS4 UCS-4 UCS4
+DEC-MCS 2008 dec csDECMCS
+hp-roman8 2004 roman8 r8 csHPRoman8
+macintosh 2027 mac csMacintosh MACROMAN MAC-ROMAN X-MAC-ROMAN
+IBM037 2028 cp037 ebcdic-cp-us ebcdic-cp-ca ebcdic-cp-wt ebcdic-cp-nl csIBM037
+IBM038 2029 EBCDIC-INT cp038 csIBM038
+IBM273 2030 CP273 csIBM273
+IBM274 2031 EBCDIC-BE CP274 csIBM274
+IBM275 2032 EBCDIC-BR cp275 csIBM275
+IBM277 2033 EBCDIC-CP-DK EBCDIC-CP-NO csIBM277
+IBM278 2034 CP278 ebcdic-cp-fi ebcdic-cp-se csIBM278
+IBM280 2035 CP280 ebcdic-cp-it csIBM280
+IBM281 2036 EBCDIC-JP-E cp281 csIBM281
+IBM284 2037 CP284 ebcdic-cp-es csIBM284
+IBM285 2038 CP285 ebcdic-cp-gb csIBM285
+IBM290 2039 cp290 EBCDIC-JP-kana csIBM290
+IBM297 2040 cp297 ebcdic-cp-fr csIBM297
+IBM420 2041 cp420 ebcdic-cp-ar1 csIBM420
+IBM423 2042 cp423 ebcdic-cp-gr csIBM423
+IBM424 2043 cp424 ebcdic-cp-he csIBM424
+IBM437 2011 cp437 437 csPC8CodePage437
+IBM500 2044 CP500 ebcdic-cp-be ebcdic-cp-ch csIBM500
+IBM775 2087 cp775 csPC775Baltic
+IBM850 2009 cp850 850 csPC850Multilingual
+IBM851 2045 cp851 851 csIBM851
+IBM852 2010 cp852 852 csPCp852
+IBM855 2046 cp855 855 csIBM855
+IBM857 2047 cp857 857 csIBM857
+IBM860 2048 cp860 860 csIBM860
+IBM861 2049 cp861 861 cp-is csIBM861
+IBM862 2013 cp862 862 csPC862LatinHebrew
+IBM863 2050 cp863 863 csIBM863
+IBM864 2051 cp864 csIBM864
+IBM865 2052 cp865 865 csIBM865
+IBM866 2086 cp866 866 csIBM866
+IBM868 2053 CP868 cp-ar csIBM868
+IBM869 2054 cp869 869 cp-gr csIBM869
+IBM870 2055 CP870 ebcdic-cp-roece ebcdic-cp-yu csIBM870
+IBM871 2056 CP871 ebcdic-cp-is csIBM871
+IBM880 2057 cp880 EBCDIC-Cyrillic csIBM880
+IBM891 2058 cp891 csIBM891
+IBM903 2059 cp903 csIBM903
+IBM904 2060 cp904 904 csIBBM904
+IBM905 2061 CP905 ebcdic-cp-tr csIBM905
+IBM918 2062 CP918 ebcdic-cp-ar2 csIBM918
+IBM1026 2063 CP1026 csIBM1026
+EBCDIC-AT-DE 2064 csIBMEBCDICATDE
+EBCDIC-AT-DE-A 2065 csEBCDICATDEA
+EBCDIC-CA-FR 2066 csEBCDICCAFR
+EBCDIC-DK-NO 2067 csEBCDICDKNO
+EBCDIC-DK-NO-A 2068 csEBCDICDKNOA
+EBCDIC-FI-SE 2069 csEBCDICFISE
+EBCDIC-FI-SE-A 2070 csEBCDICFISEA
+EBCDIC-FR 2071 csEBCDICFR
+EBCDIC-IT 2072 csEBCDICIT
+EBCDIC-PT 2073 csEBCDICPT
+EBCDIC-ES 2074 csEBCDICES
+EBCDIC-ES-A 2075 csEBCDICESA
+EBCDIC-ES-S 2076 csEBCDICESS
+EBCDIC-UK 2077 csEBCDICUK
+EBCDIC-US 2078 csEBCDICUS
+UNKNOWN-8BIT 2079 csUnknown8BiT
+MNEMONIC 2080 csMnemonic
+MNEM 2081 csMnem
+VISCII 2082 csVISCII
+VIQR 2083 csVIQR
+KOI8-R 2084 csKOI8R
+KOI8-U 2088
+IBM00858 2089 CCSID00858 CP00858 PC-Multilingual-850+euro
+IBM00924 2090 CCSID00924 CP00924 ebcdic-Latin9--euro
+IBM01140 2091 CCSID01140 CP01140 ebcdic-us-37+euro
+IBM01141 2092 CCSID01141 CP01141 ebcdic-de-273+euro
+IBM01142 2093 CCSID01142 CP01142 ebcdic-dk-277+euro ebcdic-no-277+euro
+IBM01143 2094 CCSID01143 CP01143 ebcdic-fi-278+euro ebcdic-se-278+euro
+IBM01144 2095 CCSID01144 CP01144 ebcdic-it-280+euro
+IBM01145 2096 CCSID01145 CP01145 ebcdic-es-284+euro
+IBM01146 2097 CCSID01146 CP01146 ebcdic-gb-285+euro
+IBM01147 2098 CCSID01147 CP01147 ebcdic-fr-297+euro
+IBM01148 2099 CCSID01148 CP01148 ebcdic-international-500+euro
+IBM01149 2100 CCSID01149 CP01149 ebcdic-is-871+euro
+Big5-HKSCS 2101
+IBM1047 2102 IBM-1047
+PTCP154 2103 csPTCP154 PT154 CP154 Cyrillic-Asian
+Amiga-1251 2104 Ami1251 Amiga1251 Ami-1251
+KOI7-switched 2105
+UNICODE-1-1 1010 csUnicode11
+SCSU 1011
+UTF-7 1012
+UTF-16BE 1013
+UTF-16LE 1014
+UTF-16 1015
+CESU-8 1016 csCESU-8
+UTF-32 1017
+UTF-32BE 1018
+UTF-32LE 1019
+BOCU-1 1020 csBOCU-1
+UNICODE-1-1-UTF-7 103 csUnicode11UTF7
+UTF-8 106 UNICODE-1-1-UTF-8 UNICODE-2-0-UTF-8 utf8
+ISO-8859-13 109 8859_13 ISO8859-13
+ISO-8859-14 110 iso-ir-199 ISO_8859-14:1998 ISO_8859-14 latin8 iso-celtic l8 8859_14 ISO8859-14
+ISO-8859-15 111 ISO_8859-15 Latin-9 8859_15 ISO8859-15
+ISO-8859-16 112 iso-ir-226 ISO_8859-16:2001 ISO_8859-16 latin10 l10
+GBK 113 CP936 MS936 windows-936
+GB18030 114
+OSD_EBCDIC_DF04_15 115
+OSD_EBCDIC_DF03_IRV 116
+OSD_EBCDIC_DF04_1 117
+JIS_Encoding 16 csJISEncoding
+Shift_JIS 17 MS_Kanji csShiftJIS X-SJIS Shift-JIS
+EUC-JP 18 csEUCPkdFmtJapanese Extended_UNIX_Code_Packed_Format_for_Japanese EUCJP
+Extended_UNIX_Code_Fixed_Width_for_Japanese 19 csEUCFixWidJapanese
+ISO-10646-UCS-Basic 1002 csUnicodeASCII
+ISO-10646-Unicode-Latin1 1003 csUnicodeLatin1 ISO-10646
+ISO-Unicode-IBM-1261 1005 csUnicodeIBM1261
+ISO-Unicode-IBM-1268 1006 csUnicodeIBM1268
+ISO-Unicode-IBM-1276 1007 csUnicodeIBM1276
+ISO-Unicode-IBM-1264 1008 csUnicodeIBM1264
+ISO-Unicode-IBM-1265 1009 csUnicodeIBM1265
+ISO-8859-1-Windows-3.0-Latin-1 2000 csWindows30Latin1
+ISO-8859-1-Windows-3.1-Latin-1 2001 csWindows31Latin1
+ISO-8859-2-Windows-Latin-2 2002 csWindows31Latin2
+ISO-8859-9-Windows-Latin-5 2003 csWindows31Latin5
+Adobe-Standard-Encoding 2005 csAdobeStandardEncoding
+Ventura-US 2006 csVenturaUS
+Ventura-International 2007 csVenturaInternational
+PC8-Danish-Norwegian 2012 csPC8DanishNorwegian
+PC8-Turkish 2014 csPC8Turkish
+IBM-Symbols 2015 csIBMSymbols
+IBM-Thai 2016 csIBMThai
+HP-Legal 2017 csHPLegal
+HP-Pi-font 2018 csHPPiFont
+HP-Math8 2019 csHPMath8
+Adobe-Symbol-Encoding 2020 csHPPSMath
+HP-DeskTop 2021 csHPDesktop
+Ventura-Math 2022 csVenturaMath
+Microsoft-Publishing 2023 csMicrosoftPublishing
+Windows-31J 2024 csWindows31J
+GB2312 2025 csGB2312 EUC-CN EUCCN CN-GB
+Big5 2026 csBig5 BIG-FIVE BIG-5 CN-BIG5 BIG_FIVE
+windows-1250 2250 CP1250 MS-EE
+windows-1251 2251 CP1251 MS-CYRL
+windows-1252 2252 CP1252 MS-ANSI
+windows-1253 2253 CP1253 MS-GREEK
+windows-1254 2254 CP1254 MS-TURK
+windows-1255 2255
+windows-1256 2256 CP1256 MS-ARAB
+windows-1257 2257 CP1257 WINBALTRIM
+windows-1258 2258
+TIS-620 2259
+HZ-GB-2312 2085
+
+# Additional encodings not defined by IANA
+
+# Arbitrary allocations
+#CP737 3001
+#CP853 3002
+#CP856 3003
+CP874 3004 WINDOWS-874
+#CP922 3005
+#CP1046 3006
+#CP1124 3007
+#CP1125 3008 WINDOWS-1125
+#CP1129 3009
+#CP1133 3010 IBM-CP1133
+#CP1161 3011 IBM-1161 IBM1161 CSIBM1161
+#CP1162 3012 IBM-1162 IBM1162 CSIBM1162
+#CP1163 3013 IBM-1163 IBM1163 CSIBM1163
+#GEORGIAN-ACADEMY 3014
+#GEORGIAN-PS 3015
+#KOI8-RU 3016
+#KOI8-T 3017
+#MACARABIC 3018 X-MAC-ARABIC MAC-ARABIC
+#MACCROATIAN 3019 X-MAC-CROATIAN MAC-CROATIAN
+#MACGREEK 3020 X-MAC-GREEK MAC-GREEK
+#MACHEBREW 3021 X-MAC-HEBREW MAC-HEBREW
+#MACICELAND 3022 X-MAC-ICELAND MAC-ICELAND
+#MACROMANIA 3023 X-MAC-ROMANIA MAC-ROMANIA
+#MACTHAI 3024 X-MAC-THAI MAC-THAI
+#MACTURKISH 3025 X-MAC-TURKISH MAC-TURKISH
+#MULELAO-1 3026
+
+# From Unicode Lib
+ISO-IR-182 4000
+ISO-IR-197 4002
+ISO-2022-JP-1 4008
+MACCYRILLIC 4009 X-MAC-CYRILLIC MAC-CYRILLIC
+MACUKRAINE 4010 X-MAC-UKRAINIAN MAC-UKRAINIAN
+MACCENTRALEUROPE 4011 X-MAC-CENTRALEURROMAN MAC-CENTRALEURROMAN
+JOHAB 4012
+ISO-8859-11 4014 iso-ir-166 ISO_8859-11 ISO8859-11 8859_11
+X-CURRENT 4999 X-SYSTEM
+X-ACORN-LATIN1 5001
+X-ACORN-FUZZY 5002
diff --git a/!NetSurf/Resources/CSS,f79 b/!NetSurf/Resources/CSS,f79
index 640c8450a..c4ba88846 100644
--- a/!NetSurf/Resources/CSS,f79
+++ b/!NetSurf/Resources/CSS,f79
@@ -174,3 +174,5 @@ fieldset { display: block; border: thin solid #888; margin: 1.12em 0; }
[align=left] { text-align: left; }
[align=center] { text-align: center; }
[align=right] { text-align: right; }
+
+script, style { display: none; }
diff --git a/Makefile b/Makefile
index 439f49287..a207877bf 100644
--- a/Makefile
+++ b/Makefile
@@ -245,6 +245,12 @@ ifeq ($(TARGET),riscos)
$(eval $(call feature_enabled,SPRITE,-DWITH_SPRITE,,RISC OS sprite rendering))
$(eval $(call feature_enabled,ARTWORKS,-DWITH_ARTWORKS,,ArtWorks rendering))
$(eval $(call feature_enabled,PLUGINS,-DWITH_PLUGIN,,Plugin protocol support))
+ ifeq ($(HOST),riscos)
+ $(eval $(call feature_enabled,HUBBUB,-DWITH_HUBBUB,-lhubbub -lparserutils,Hubbub HTML parser))
+ else
+ NETSURF_FEATURE_HUBBUB_CFLAGS := -DWITH_HUBBUB
+ $(eval $(call pkg_config_find_and_add,HUBBUB,libhubbub,Hubbub HTML parser))
+ endif
endif
# ----------------------------------------------------------------------------
@@ -267,10 +273,12 @@ ifeq ($(TARGET),gtk)
# define additional CFLAGS and LDFLAGS requirements for pkg-configed libs here
NETSURF_FEATURE_RSVG_CFLAGS := -DWITH_RSVG
NETSURF_FEATURE_ROSPRITE_CFLAGS := -DWITH_NSSPRITE
+ NETSURF_FEATURE_HUBBUB_CFLAGS := -DWITH_HUBBUB
# add a line similar to below for each optional pkg-configed lib here
$(eval $(call pkg_config_find_and_add,RSVG,librsvg-2.0,SVG rendering))
$(eval $(call pkg_config_find_and_add,ROSPRITE,librosprite,RISC OS sprite rendering))
+ $(eval $(call pkg_config_find_and_add,HUBBUB,libhubbub,Hubbub HTML parser))
GTKCFLAGS := -std=c99 -Dgtk -Dnsgtk \
-DGTK_DISABLE_DEPRECATED \
@@ -399,10 +407,14 @@ ifeq ($(TARGET),debug)
-D_XOPEN_SOURCE=600 \
-D_POSIX_C_SOURCE=200112L \
-D_NETBSD_SOURCE \
- $(WARNFLAGS) -I. -I../../libsprite/trunk/ -g $(OPT0FLAGS) \
- $(shell $(PKG_CONFIG) --cflags librosprite) \
+ $(WARNFLAGS) -I. -g $(OPT0FLAGS) \
$(shell xml2-config --cflags)
- LDFLAGS += $(shell $(PKG_CONFIG) --libs librosprite)
+ LDFLAGS += $(shell $(PKG_CONFIG) --libs libxml-2.0 libcurl openssl)
+
+ $(eval $(call pkg_config_find_and_add,RSVG,librsvg-2.0,SVG rendering))
+ $(eval $(call pkg_config_find_and_add,ROSPRITE,librosprite,RISC OS sprite rendering))
+ $(eval $(call pkg_config_find_and_add,HUBBUB,libhubbub,Hubbub HTML parser))
+ $(eval $(call pkg_config_find_and_add,HUBBUB,libparserutils,Hubbub HTML parser))
endif
# ----------------------------------------------------------------------------
diff --git a/Makefile.config b/Makefile.config
index dad9c0273..362db38f9 100644
--- a/Makefile.config
+++ b/Makefile.config
@@ -53,6 +53,10 @@ NETSURF_USE_LIBICONV_PLUG := YES
# ----------------------------------------------------------------------------
ifeq ($(TARGET),riscos)
+ # Enable using Hubbub to parse HTML rather than libxml2
+ # Valid options: YES, NO
+ NETSURF_USE_HUBBUB := YES
+
# Use James Bursa's libsvgtiny for rendering SVG images
# Valid options: YES, NO
NETSURF_USE_NSSVG := YES
@@ -87,6 +91,10 @@ ifeq ($(TARGET),gtk)
# Where to install the netsurf binary
NETSURF_GTK_BIN := /usr/local/bin/
+ # Enable using Hubbub to parse HTML rather than libxml2
+ # Valid options: YES, NO, AUTO
+ NETSURF_USE_HUBBUB := AUTO
+
# Use librsvg in conjunction with Cairo to render SVG images
# Valid options: YES, NO, AUTO
NETSURF_USE_RSVG := AUTO
diff --git a/debug/fontd.c b/debug/fontd.c
index 50fabc59f..fa64b67c4 100644
--- a/debug/fontd.c
+++ b/debug/fontd.c
@@ -21,6 +21,22 @@
#include "render/font.h"
+static bool nsfont_width(const struct css_style *style,
+ const char *string, size_t length, int *width);
+static bool nsfont_position_in_string(const struct css_style *style,
+ const char *string, size_t length,
+ int x, size_t *char_offset, int *actual_x);
+static bool nsfont_split(const struct css_style *style,
+ const char *string, size_t length,
+ int x, size_t *char_offset, int *actual_x);
+
+const struct font_functions nsfont = {
+ nsfont_width,
+ nsfont_position_in_string,
+ nsfont_split
+};
+
+
bool nsfont_width(const struct css_style *style,
const char *string, size_t length,
int *width)
@@ -63,3 +79,4 @@ bool nsfont_split(const struct css_style *style,
*actual_x = *char_offset * 10;
return true;
}
+
diff --git a/gtk/gtk_gui.c b/gtk/gtk_gui.c
index 73e3068be..3967cd1cb 100644
--- a/gtk/gtk_gui.c
+++ b/gtk/gtk_gui.c
@@ -31,6 +31,9 @@
#include <gdk/gdkkeysyms.h>
#include <gtk/gtk.h>
#include <glade/glade.h>
+#ifdef WITH_HUBBUB
+#include <hubbub/hubbub.h>
+#endif
#include "content/content.h"
#include "content/fetch.h"
#include "content/fetchers/fetch_curl.h"
@@ -165,6 +168,13 @@ static void check_homedir(void)
}
}
+
+static void *myrealloc(void *ptr, size_t len, void *pw)
+{
+ return realloc(ptr, len);
+}
+
+
void gui_init(int argc, char** argv)
{
char buf[PATH_MAX];
@@ -182,6 +192,10 @@ void gui_init(int argc, char** argv)
LOG(("Using '%s' as Resources directory", buf));
res_dir_location = strdup(buf);
+ find_resource(buf, "Aliases", "./gtk/res/Aliases");
+ LOG(("Using '%s' as Aliases file", buf));
+ hubbub_initialise(buf, myrealloc, NULL);
+
glade_init();
gladeWindows = glade_xml_new(glade_file_location, NULL, NULL);
if (gladeWindows == NULL)
diff --git a/gtk/res/Aliases b/gtk/res/Aliases
new file mode 120000
index 000000000..a95a734da
--- /dev/null
+++ b/gtk/res/Aliases
@@ -0,0 +1 @@
+../../!NetSurf/Resources/Aliases \ No newline at end of file
diff --git a/render/directory.c b/render/directory.c
index 0f3dda03b..754449df5 100644
--- a/render/directory.c
+++ b/render/directory.c
@@ -27,6 +27,9 @@
#include <stdlib.h>
#include <sys/stat.h>
#include <time.h>
+#ifdef WITH_HUBBUB
+#include <hubbub/parser.h>
+#endif
#include <libxml/HTMLparser.h>
#include "content/content.h"
#include "render/directory.h"
@@ -45,7 +48,12 @@ bool directory_create(struct content *c, const char *params[]) {
/* html_create() must have broadcast MSG_ERROR already, so we
* don't need to. */
return false;
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, header, sizeof(header) - 1, 0);
+#else
+ hubbub_parser_parse_chunk(c->data.html.parser,
+ (uint8_t *) header, sizeof(header) - 1);
+#endif
return true;
}
@@ -92,7 +100,11 @@ bool directory_convert(struct content *c, int width, int height) {
"<body>\n<h1>\nIndex of %s</h1>\n<hr><pre>",
nice_path, nice_path);
free(nice_path);
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, buffer, strlen(buffer), 0);
+#else
+ hubbub_parser_parse_chunk(c->data.html.parser, buffer, strlen(buffer));
+#endif
res = url_parent(c->url, &up);
if (res == URL_FUNC_OK) {
@@ -100,8 +112,13 @@ bool directory_convert(struct content *c, int width, int height) {
if ((res == URL_FUNC_OK) && !compare) {
snprintf(buffer, sizeof(buffer),
"<a href=\"..\">[..]</a>\n");
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, buffer,
strlen(buffer), 0);
+#else
+ hubbub_parser_parse_chunk(c->data.html.parser,
+ buffer, strlen(buffer));
+#endif
}
free(up);
}
@@ -118,11 +135,21 @@ bool directory_convert(struct content *c, int width, int height) {
snprintf(buffer, sizeof(buffer), "<a href=\"%s/%s\">%s</a>\n",
c->url, entry->d_name, entry->d_name);
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, buffer, strlen(buffer), 0);
+#else
+ hubbub_parser_parse_chunk(c->data.html.parser,
+ buffer, strlen(buffer));
+#endif
}
closedir(parent);
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, footer, sizeof(footer) - 1, 0);
+#else
+ hubbub_parser_parse_chunk(c->data.html.parser,
+ (uint8_t *) footer, sizeof(footer) - 1);
+#endif
c->type = CONTENT_HTML;
return html_convert(c, width, height);
}
diff --git a/render/html.c b/render/html.c
index cc581a771..7d4a55a5d 100644
--- a/render/html.c
+++ b/render/html.c
@@ -20,12 +20,21 @@
* Content for text/html (implementation).
*/
+#define _GNU_SOURCE /* for strndup() */
+
#include <assert.h>
#include <ctype.h>
#include <stdint.h>
#include <string.h>
#include <strings.h>
#include <stdlib.h>
+#ifdef WITH_HUBBUB
+#include <hubbub/hubbub.h>
+#include <hubbub/parser.h>
+#include <hubbub/tree.h>
+#endif
+#include <libxml/tree.h>
+#include <libxml/parser.h>
#include <libxml/parserInternals.h>
#include "utils/config.h"
#include "content/content.h"
@@ -87,6 +96,380 @@ static const char empty_document[] =
"</html>";
+#ifdef WITH_HUBBUB
+
+
+#define NUM_NAMESPACES 7
+const char const *ns_prefixes[NUM_NAMESPACES] =
+ { NULL, NULL, "math", "svg", "xlink", "xml", "xmlns" };
+
+const char const *ns_urls[NUM_NAMESPACES] = {
+ NULL,
+ "http://www.w3.org/1999/xhtml",
+ "http://www.w3.org/1998/Math/MathML",
+ "http://www.w3.org/2000/svg",
+ "http://www.w3.org/1999/xlink",
+ "http://www.w3.org/XML/1998/namespace",
+ "http://www.w3.org/2000/xmlns/"
+};
+
+xmlNs *ns_ns[NUM_NAMESPACES];
+
+static int create_comment(void *ctx, const hubbub_string *data, void **result);
+static int create_doctype(void *ctx, const hubbub_doctype *doctype,
+ void **result);
+static int create_element(void *ctx, const hubbub_tag *tag, void **result);
+static int create_text(void *ctx, const hubbub_string *data, void **result);
+static int ref_node(void *ctx, void *node);
+static int unref_node(void *ctx, void *node);
+static int append_child(void *ctx, void *parent, void *child, void **result);
+static int insert_before(void *ctx, void *parent, void *child, void *ref_child,
+ void **result);
+static int remove_child(void *ctx, void *parent, void *child, void **result);
+static int clone_node(void *ctx, void *node, bool deep, void **result);
+static int reparent_children(void *ctx, void *node, void *new_parent);
+static int get_parent(void *ctx, void *node, bool element_only, void **result);
+static int has_children(void *ctx, void *node, bool *result);
+static int form_associate(void *ctx, void *form, void *node);
+static int add_attributes(void *ctx, void *node,
+ const hubbub_attribute *attributes, uint32_t n_attributes);
+static int set_quirks_mode(void *ctx, hubbub_quirks_mode mode);
+static int change_encoding(void *ctx, const char *mibenum);
+
+static hubbub_tree_handler tree_handler = {
+ create_comment,
+ create_doctype,
+ create_element,
+ create_text,
+ ref_node,
+ unref_node,
+ append_child,
+ insert_before,
+ remove_child,
+ clone_node,
+ reparent_children,
+ get_parent,
+ has_children,
+ form_associate,
+ add_attributes,
+ set_quirks_mode,
+ change_encoding,
+ NULL
+};
+
+
+
+/*** Tree construction functions ***/
+
+int create_comment(void *ctx, const hubbub_string *data, void **result)
+{
+ xmlNode *node = xmlNewComment(NULL);
+
+ node->content = xmlStrndup(data->ptr, data->len);
+ node->_private = (void *)1;
+ *result = node;
+
+ return 0;
+}
+
+int create_doctype(void *ctx, const hubbub_doctype *doctype, void **result)
+{
+ /* Make a node that doesn't really exist, then don't append it
+ * later. */
+ xmlNode *node = xmlNewComment(NULL);
+
+ node->_private = (void *)1;
+ *result = node;
+
+ return 0;
+}
+
+int create_element(void *ctx, const hubbub_tag *tag, void **result)
+{
+ struct content *c = ctx;
+ struct content_html_data *html = &c->data.html;
+
+ char *name = strndup((const char *) tag->name.ptr,
+ tag->name.len);
+
+ xmlNode *node = xmlNewNode(NULL, BAD_CAST name);
+ node->_private = (void *)1;
+ *result = node;
+
+ if (html->firstelem == true) {
+ for (size_t i = 1; i < NUM_NAMESPACES; i++) {
+ ns_ns[i] = xmlNewNs(node,
+ BAD_CAST ns_urls[i],
+ BAD_CAST ns_prefixes[i]);
+ }
+ html->firstelem = false;
+ }
+
+ xmlSetNs(node, ns_ns[tag->ns]);
+
+ free(name);
+
+ for (size_t i = 0; i < tag->n_attributes; i++) {
+ hubbub_attribute *attr = &tag->attributes[i];
+
+ char *name = strndup((const char *) attr->name.ptr,
+ attr->name.len);
+ char *value = strndup((const char *) attr->value.ptr,
+ attr->value.len);
+
+ if (attr->ns == HUBBUB_NS_NULL) {
+ xmlNewProp(node, BAD_CAST name, BAD_CAST value);
+ } else {
+ xmlNewNsProp(node, ns_ns[attr->ns], BAD_CAST name,
+ BAD_CAST value);
+ }
+
+ free(name);
+ free(value);
+ }
+
+ return 0;
+}
+
+int create_text(void *ctx, const hubbub_string *data, void **result)
+{
+ xmlNode *node = xmlNewTextLen(BAD_CAST data->ptr, data->len);
+ node->_private = (void *)1;
+ *result = node;
+
+ return 0;
+}
+
+int ref_node(void *ctx, void *node)
+{
+ xmlNode *n = node;
+ n->_private = (void *)((uintptr_t)n->_private + 1);
+
+ return 0;
+}
+
+int unref_node(void *ctx, void *node)
+{
+ xmlNode *n = node;
+ n->_private = (void *)((uintptr_t)n->_private - 1);
+
+ if (n->_private == (void *)0 && n->parent == NULL) {
+ xmlFreeNode(n);
+ }
+
+ return 0;
+}
+
+int append_child(void *ctx, void *parent, void *child, void **result)
+{
+ xmlNode *nparent = parent;
+ xmlNode *nchild = child;
+
+ if (nchild->type == XML_TEXT_NODE &&
+ nparent->last != NULL &&
+ nparent->last->type == XML_TEXT_NODE) {
+ xmlNode *clone;
+ clone_node(ctx, nchild, false, (void **) &clone);
+ *result = xmlAddChild(parent, clone);
+ /* node referenced by clone_node */
+ } else {
+ *result = xmlAddChild(parent, child);
+ ref_node(ctx, *result);
+ }
+
+ return 0;
+}
+
+/* insert 'child' before 'ref_child', under 'parent' */
+int insert_before(void *ctx, void *parent, void *child, void *ref_child,
+ void **result)
+{
+ *result = xmlAddPrevSibling(ref_child, child);
+ ref_node(ctx, *result);
+
+ return 0;
+}
+
+int remove_child(void *ctx, void *parent, void *child, void **result)
+{
+ xmlUnlinkNode(child);
+ *result = child;
+
+ ref_node(ctx, *result);
+
+ return 0;
+}
+
+int clone_node(void *ctx, void *node, bool deep, void **result)
+{
+ xmlNode *n = xmlCopyNode(node, deep ? 1 : 2);
+ n->_private = (void *)1;
+ *result = n;
+
+ return 0;
+}
+
+/* Take all of the child nodes of "node" and append them to "new_parent" */
+int reparent_children(void *ctx, void *node, void *new_parent)
+{
+ xmlNode *n = (xmlNode *) node;
+ xmlNode *p = (xmlNode *) new_parent;
+
+ for (xmlNode *child = n->children; child != NULL; ) {
+ xmlNode *next = child->next;
+
+ xmlUnlinkNode(child);
+
+ if (xmlAddChild(p, child) == NULL)
+ return 1;
+
+ child = next;
+ }
+
+ return 0;
+}
+
+int get_parent(void *ctx, void *node, bool element_only, void **result)
+{
+ *result = ((xmlNode *)node)->parent;
+
+ if (*result != NULL && element_only &&
+ ((xmlNode *) *result)->type != XML_ELEMENT_NODE)
+ *result = NULL;
+
+ if (*result != NULL)
+ ref_node(ctx, *result);
+
+ return 0;
+}
+
+int has_children(void *ctx, void *node, bool *result)
+{
+ *result = ((xmlNode *)node)->children ? true : false;
+
+ return 0;
+}
+
+int form_associate(void *ctx, void *form, void *node)
+{
+ return 0;
+}
+
+int add_attributes(void *ctx, void *node,
+ const hubbub_attribute *attributes, uint32_t n_attributes)
+{
+ for (size_t i = 0; i < n_attributes; i++) {
+ const hubbub_attribute *attr = &attributes[i];
+
+ char *name = strndup((const char *) attr->name.ptr,
+ attr->name.len);
+ char *value = strndup((const char *) attr->value.ptr,
+ attr->value.len);
+
+ if (attr->ns == HUBBUB_NS_NULL) {
+ xmlNewProp(node, BAD_CAST name, BAD_CAST value);
+ } else {
+ xmlNewNsProp(node, ns_ns[attr->ns], BAD_CAST name,
+ BAD_CAST value);
+ }
+
+ free(name);
+ free(value);
+ }
+
+ return 0;
+}
+
+int set_quirks_mode(void *ctx, hubbub_quirks_mode mode)
+{
+ return 0;
+}
+
+int change_encoding(void *ctx, const char *name)
+{
+ struct content *c = ctx;
+ struct content_html_data *html = &c->data.html;
+
+ /* If we have an encoding here, it means we are *certain* */
+ if (html->encoding) {
+ return 0;
+ }
+
+ /* Find the confidence otherwise (can only be from a BOM) */
+ uint32_t source;
+ const char *charset = hubbub_parser_read_charset(html->parser, &source);
+
+ if (source == HUBBUB_CHARSET_CONFIDENT) {
+ html->encoding_source = ENCODING_SOURCE_DETECTED;
+ html->encoding = (char *) charset;
+ return 0;
+ }
+
+ /* So here we have something of confidence tentative... */
+ /* http://www.whatwg.org/specs/web-apps/current-work/#change */
+
+ /* 2. "If the new encoding is identical or equivalent to the encoding
+ * that is already being used to interpret the input stream, then set
+ * the confidence to confident and abort these steps." */
+
+ /* Whatever happens, the encoding should be set here; either for
+ * reprocessing with a different charset, or for confirming that the
+ * charset is in fact correct */
+ html->encoding = (char *) name;
+ html->encoding_source = ENCODING_SOURCE_META;
+
+ /* Equal encodings will have the same string pointers */
+ return (charset == name) ? 0 : 1;
+}
+
+
+/**
+ * Talloc'd-up allocation hook for Hubbub.
+ */
+static void *html_hubbub_realloc(void *ptr, size_t len, void *pw)
+{
+ return talloc_realloc_size(pw, ptr, len);
+}
+
+
+
+/**
+ * Create, set up, and whatnot, a Hubbub parser instance, along with the
+ * relevant libxml2 bits.
+ */
+static int html_create_parser(struct content *c)
+{
+ struct content_html_data *html = &c->data.html;
+ hubbub_parser_optparams param;
+
+ html->parser = hubbub_parser_create(html->encoding,
+ html_hubbub_realloc,
+ c);
+ if (!html->parser)
+ return 1;
+
+ html->document = xmlNewDoc(BAD_CAST "1.0");
+ if (!html->document)
+ return 1;
+
+ html->tree_handler = tree_handler;
+ html->tree_handler.ctx = c;
+ param.tree_handler = &html->tree_handler;
+ hubbub_parser_setopt(html->parser, HUBBUB_PARSER_TREE_HANDLER, &param);
+
+ param.document_node = html->document;
+ hubbub_parser_setopt(html->parser, HUBBUB_PARSER_DOCUMENT_NODE, &param);
+
+ return 0;
+}
+
+
+
+#endif
+
+
+
+
/**
* Create a CONTENT_HTML.
*
@@ -101,6 +484,10 @@ bool html_create(struct content *c, const char *params[])
union content_msg_data msg_data;
html->parser = 0;
+#ifdef WITH_HUBBUB
+ html->document = 0;
+ html->firstelem = true;
+#endif
html->encoding_handler = 0;
html->encoding = 0;
html->getenc = true;
@@ -135,16 +522,26 @@ bool html_create(struct content *c, const char *params[])
}
}
+#ifndef WITH_HUBBUB
html->parser = htmlCreatePushParserCtxt(0, 0, "", 0, 0,
XML_CHAR_ENCODING_NONE);
if (!html->parser)
goto no_memory;
+#else
+
+ /* Set up the parser, libxml2 document, and that */
+ if (html_create_parser(c) != 0)
+ goto no_memory;
+#endif
+
+#ifndef WITH_HUBBUB
if (html->encoding) {
/* an encoding was specified in the Content-Type header */
if (!html_set_parser_encoding(c, html->encoding))
return false;
}
+#endif
return true;
@@ -165,6 +562,7 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
{
unsigned long x;
+#ifndef WITH_HUBBUB
if (c->data.html.getenc) {
/* No encoding was specified in the Content-Type header.
* Attempt to detect if the encoding is not 8-bit. If the
@@ -190,13 +588,36 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
if (size == 0)
return true;
}
+#endif
+
+#ifdef WITH_HUBBUB
+ hubbub_error err;
+#endif
for (x = 0; x + CHUNK <= size; x += CHUNK) {
+#ifdef WITH_HUBBUB
+ err = hubbub_parser_parse_chunk(
+ c->data.html.parser, data + x, CHUNK);
+ if (err == HUBBUB_ENCODINGCHANGE) {
+ goto encoding_change;
+ }
+#else
htmlParseChunk(c->data.html.parser, data + x, CHUNK, 0);
+#endif
gui_multitask();
}
+
+#ifdef WITH_HUBBUB
+ err = hubbub_parser_parse_chunk(
+ c->data.html.parser, data + x, (size - x));
+ if (err == HUBBUB_ENCODINGCHANGE) {
+ goto encoding_change;
+ }
+#else
htmlParseChunk(c->data.html.parser, data + x, (int) (size - x), 0);
+#endif
+#ifndef WITH_HUBBUB
if (!c->data.html.encoding && c->data.html.parser->input->encoding) {
/* The encoding was not in headers or detected,
* and the parser found a <meta http-equiv="content-type"
@@ -259,8 +680,36 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
if (!html_process_data(c, c->source_data, c->source_size))
return false;
}
+#endif
return true;
+
+#ifdef WITH_HUBBUB
+
+encoding_change:
+
+ /* Free up hubbub, libxml2 etc */
+ hubbub_parser_destroy(c->data.html.parser);
+ if (c->data.html.document) {
+ xmlFreeDoc(c->data.html.document);
+ }
+
+ /* Set up the parser, libxml2 document, and that */
+ if (html_create_parser(c) != 0) {
+ union content_msg_data msg_data;
+
+ msg_data.error = messages_get("NoMemory");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+ return false;
+ }
+
+ /* Recurse to reprocess all that data. This is safe because
+ * the encoding is now specified at parser-start which means
+ * it cannot be changed again. */
+ return html_process_data(c, c->source_data, c->source_size);
+
+#endif
+
}
@@ -274,6 +723,7 @@ bool html_process_data(struct content *c, char *data, unsigned int size)
bool html_set_parser_encoding(struct content *c, const char *encoding)
{
+#ifndef WITH_HUBBUB
struct content_html_data *html = &c->data.html;
xmlError *error;
char error_message[500];
@@ -322,6 +772,7 @@ bool html_set_parser_encoding(struct content *c, const char *encoding)
/* Ensure noone else attempts to reset the encoding */
html->getenc = false;
+#endif
return true;
}
@@ -412,14 +863,28 @@ bool html_convert(struct content *c, int width, int height)
/* finish parsing */
if (c->source_size == 0)
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, empty_document,
sizeof empty_document, 0);
+#else
+ hubbub_parser_parse_chunk(c->data.html.parser,
+ (uint8_t *) empty_document,
+ sizeof empty_document);
+#endif
+
+#ifndef WITH_HUBBUB
htmlParseChunk(c->data.html.parser, "", 0, 1);
document = c->data.html.parser->myDoc;
/*xmlDebugDumpDocument(stderr, c->data.html.parser->myDoc);*/
htmlFreeParserCtxt(c->data.html.parser);
c->data.html.parser = 0;
-
+#else
+ hubbub_parser_completed(c->data.html.parser);
+ hubbub_parser_destroy(c->data.html.parser);
+ c->data.html.parser = 0;
+ document = c->data.html.document;
+ /*xmlDebugDumpDocument(stderr, document);*/
+#endif
if (!document) {
LOG(("Parsing failed"));
msg_data.error = messages_get("ParsingFail");
@@ -1733,7 +2198,11 @@ void html_destroy(struct content *c)
}
if (c->data.html.parser)
+#ifndef WITH_HUBBUB
htmlFreeParserCtxt(c->data.html.parser);
+#else
+ hubbub_parser_destroy(c->data.html.parser);
+#endif
/* Free base target */
if (c->data.html.base_target) {
diff --git a/render/html.h b/render/html.h
index 5851b83b8..29691ea26 100644
--- a/render/html.h
+++ b/render/html.h
@@ -26,6 +26,10 @@
#define _NETSURF_RENDER_HTML_H_
#include <stdbool.h>
+#ifdef WITH_HUBBUB
+#include <hubbub/parser.h>
+#include <hubbub/tree.h>
+#endif
#include <libxml/HTMLparser.h>
#include "content/content_type.h"
#include "css/css.h"
@@ -114,11 +118,19 @@ struct content_html_iframe {
/** Data specific to CONTENT_HTML. */
struct content_html_data {
+#ifndef WITH_HUBBUB
htmlParserCtxt *parser; /**< HTML parser context. */
+#else
+ hubbub_parser *parser; /**< HTML parser context. */
+ hubbub_tree_handler tree_handler;
+ xmlDoc *document;
+ bool firstelem;
+#endif
+
/** HTML parser encoding handler. */
xmlCharEncodingHandler *encoding_handler;
- char *encoding; /**< Encoding of source, 0 if unknown. */
+ char *encoding; /**< Encoding of source, 0 if unknown. */
enum { ENCODING_SOURCE_HEADER, ENCODING_SOURCE_DETECTED,
ENCODING_SOURCE_META } encoding_source;
/**< Source of encoding information. */