summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichael Drake <michael.drake@codethink.co.uk>2021-05-15 19:41:55 +0100
committerMichael Drake <michael.drake@codethink.co.uk>2021-05-15 19:59:24 +0100
commit4de031adb16019295d67fe02e515f9982b32a74b (patch)
tree862c501a041c5e2cd4a987226175b40c6774a6ac /src
parent6c69e82879901a3a8f5eb19914e7ffc4224d0eca (diff)
downloadlibhubbub-4de031adb16019295d67fe02e515f9982b32a74b.tar.gz
libhubbub-4de031adb16019295d67fe02e515f9982b32a74b.tar.bz2
Treebuilder: Massively optimise element type from name with gperf.
Loading the html5 single page spec: * We were spending 10.81% of total runtime in element_type_from_name. Now it takes 0.66% of total runtime. * Total instruction fetch cost is reduced from 5,660,475,511 to 4,523,112,517.
Diffstat (limited to 'src')
-rw-r--r--src/treebuilder/Makefile8
-rw-r--r--src/treebuilder/element-type.gperf131
-rw-r--r--src/treebuilder/element-type.h49
-rw-r--r--src/treebuilder/internal.h27
-rw-r--r--src/treebuilder/treebuilder.c140
5 files changed, 194 insertions, 161 deletions
diff --git a/src/treebuilder/Makefile b/src/treebuilder/Makefile
index 31feae1..ce00a4c 100644
--- a/src/treebuilder/Makefile
+++ b/src/treebuilder/Makefile
@@ -6,6 +6,12 @@ DIR_SOURCES := treebuilder.c \
in_cell.c in_select.c in_select_in_table.c \
in_foreign_content.c after_body.c in_frameset.c \
after_frameset.c after_after_body.c after_after_frameset.c \
- generic_rcdata.c
+ generic_rcdata.c element-type.c
+
+$(DIR)element-type.c: $(DIR)element-type.gperf
+ $(VQ)$(ECHO) " GPERF: $<"
+ $(Q)gperf --output-file=$@ $<
+
+CLEAN_ITEMS := $(DIR)element-type.c
include $(NSBUILD)/Makefile.subdir
diff --git a/src/treebuilder/element-type.gperf b/src/treebuilder/element-type.gperf
new file mode 100644
index 0000000..d4f2aa2
--- /dev/null
+++ b/src/treebuilder/element-type.gperf
@@ -0,0 +1,131 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2021 Michael Drake <tlsa@netsurf-browser.org>
+ */
+
+%language=ANSI-C
+%compare-strncmp
+%readonly-tables
+%ignore-case
+%struct-type
+%switch=1
+%define hash-function-name hubbub_element_type_hash
+%define lookup-function-name hubbub_element_type_lookup
+
+%{
+#include <string.h>
+
+#include "treebuilder/element-type.h"
+
+%}
+
+struct element_type_map;
+%%
+a, A
+address, ADDRESS
+annotation-xml, ANNOTATION_XML
+applet, APPLET
+area, AREA
+article, ARTICLE
+aside, ASIDE
+b, B
+base, BASE
+basefont, BASEFONT
+bgsound, BGSOUND
+big, BIG
+blockquote, BLOCKQUOTE
+body, BODY
+br, BR
+button, BUTTON
+caption, CAPTION
+center, CENTER
+col, COL
+colgroup, COLGROUP
+command, COMMAND
+dd, DD
+desc, DESC
+details, DETAILS
+dialog, DIALOG
+dir, DIR
+div, DIV
+dl, DL
+dt, DT
+em, EM
+embed, EMBED
+fieldset, FIELDSET
+figcaption, FIGCAPTION
+figure, FIGURE
+font, FONT
+footer, FOOTER
+foreignobject, FOREIGNOBJECT
+form, FORM
+frame, FRAME
+frameset, FRAMESET
+h1, H1
+h2, H2
+h3, H3
+h4, H4
+h5, H5
+h6, H6
+head, HEAD
+hr, HR
+html, HTML
+i, I
+iframe, IFRAME
+image, IMAGE
+img, IMG
+input, INPUT
+isindex, ISINDEX
+li, LI
+link, LINK
+listing, LISTING
+malignmark, MALIGNMARK
+marquee, MARQUEE
+math, MATH
+menu, MENU
+meta, META
+mglyph, MGLYPH
+mi, MI
+mn, MN
+mo, MO
+ms, MS
+mtext, MTEXT
+nobr, NOBR
+noembed, NOEMBED
+noframes, NOFRAMES
+noscript, NOSCRIPT
+object, OBJECT
+ol, OL
+optgroup, OPTGROUP
+option, OPTION
+output, OUTPUT
+p, P
+param, PARAM
+plaintext, PLAINTEXT
+pre, PRE
+s, S
+script, SCRIPT
+select, SELECT
+small, SMALL
+spacer, SPACER
+strike, STRIKE
+strong, STRONG
+style, STYLE
+summary, SUMMARY
+svg, SVG
+table, TABLE
+tbody, TBODY
+td, TD
+textarea, TEXTAREA
+tfoot, TFOOT
+th, TH
+thead, THEAD
+title, TITLE
+tr, TR
+tt, TT
+u, U
+ul, UL
+wbr, WBR
+xmp, XMP
diff --git a/src/treebuilder/element-type.h b/src/treebuilder/element-type.h
new file mode 100644
index 0000000..08f58de
--- /dev/null
+++ b/src/treebuilder/element-type.h
@@ -0,0 +1,49 @@
+/*
+ * This file is part of Hubbub.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2008 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+#ifndef hubbub_treebuilder_element_type_h_
+#define hubbub_treebuilder_element_type_h_
+
+#include "treebuilder/treebuilder.h"
+
+typedef enum
+{
+/* Special */
+ ADDRESS, AREA, ARTICLE, ASIDE, BASE, BASEFONT, BGSOUND, BLOCKQUOTE,
+ BODY, BR, CENTER, COL, COLGROUP, COMMAND, DATAGRID, DD, DETAILS,
+ DIALOG, DIR, DIV, DL, DT, EMBED, FIELDSET, FIGCAPTION, FIGURE, FOOTER,
+ FORM, FRAME, FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HEADER, HR, IFRAME,
+ IMAGE, IMG, INPUT, ISINDEX, LI, LINK, LISTING, MAIN, MENU, META, NAV,
+ NOEMBED, NOFRAMES, NOSCRIPT, OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT,
+ PRE, SCRIPT, SECTION, SELECT, SPACER, STYLE, SUMMARY, TBODY, TEXTAREA,
+ TFOOT, THEAD, TITLE, TR, UL, WBR,
+/* Scoping */
+ APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH,
+/* Formatting */
+ A, B, BIG, CODE, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U,
+/* Phrasing */
+ /**< \todo Enumerate phrasing elements */
+ LABEL, OUTPUT, RP, RT, RUBY, SPAN, SUB, SUP, VAR, XMP,
+/* MathML */
+ MATH, MGLYPH, MALIGNMARK, MI, MO, MN, MS, MTEXT, ANNOTATION_XML,
+/* SVG */
+ SVG, FOREIGNOBJECT, /* foreignobject is scoping, but only in SVG ns */
+ DESC,
+ UNKNOWN
+} element_type;
+
+struct element_type_map {
+ const char *name;
+ element_type type;
+};
+
+const struct element_type_map *hubbub_element_type_lookup(
+ register const char *str,
+ register size_t len);
+
+#endif
+
diff --git a/src/treebuilder/internal.h b/src/treebuilder/internal.h
index 3c112c9..d9e1a00 100644
--- a/src/treebuilder/internal.h
+++ b/src/treebuilder/internal.h
@@ -9,32 +9,7 @@
#define hubbub_treebuilder_internal_h_
#include "treebuilder/treebuilder.h"
-
-typedef enum
-{
-/* Special */
- ADDRESS, AREA, ARTICLE, ASIDE, BASE, BASEFONT, BGSOUND, BLOCKQUOTE,
- BODY, BR, CENTER, COL, COLGROUP, COMMAND, DATAGRID, DD, DETAILS,
- DIALOG, DIR, DIV, DL, DT, EMBED, FIELDSET, FIGCAPTION, FIGURE, FOOTER,
- FORM, FRAME, FRAMESET, H1, H2, H3, H4, H5, H6, HEAD, HEADER, HR, IFRAME,
- IMAGE, IMG, INPUT, ISINDEX, LI, LINK, LISTING, MAIN, MENU, META, NAV,
- NOEMBED, NOFRAMES, NOSCRIPT, OL, OPTGROUP, OPTION, P, PARAM, PLAINTEXT,
- PRE, SCRIPT, SECTION, SELECT, SPACER, STYLE, SUMMARY, TBODY, TEXTAREA,
- TFOOT, THEAD, TITLE, TR, UL, WBR,
-/* Scoping */
- APPLET, BUTTON, CAPTION, HTML, MARQUEE, OBJECT, TABLE, TD, TH,
-/* Formatting */
- A, B, BIG, CODE, EM, FONT, I, NOBR, S, SMALL, STRIKE, STRONG, TT, U,
-/* Phrasing */
- /**< \todo Enumerate phrasing elements */
- LABEL, OUTPUT, RP, RT, RUBY, SPAN, SUB, SUP, VAR, XMP,
-/* MathML */
- MATH, MGLYPH, MALIGNMARK, MI, MO, MN, MS, MTEXT, ANNOTATION_XML,
-/* SVG */
- SVG, FOREIGNOBJECT, /* foreignobject is scoping, but only in SVG ns */
- DESC,
- UNKNOWN
-} element_type;
+#include "treebuilder/element-type.h"
/**
* Item on the element stack
diff --git a/src/treebuilder/treebuilder.c b/src/treebuilder/treebuilder.c
index b84ca11..d2a186d 100644
--- a/src/treebuilder/treebuilder.c
+++ b/src/treebuilder/treebuilder.c
@@ -17,127 +17,6 @@
#include "utils/utils.h"
#include "utils/string.h"
-
-#define S(x) x, SLEN(x)
-
-static const struct {
- const char *name;
- size_t len;
- element_type type;
-} name_type_map[] = {
- { S("address"), ADDRESS },
- { S("area"), AREA },
- { S("article"), ARTICLE },
- { S("aside"), ASIDE },
- { S("base"), BASE },
- { S("basefont"), BASEFONT },
- { S("bgsound"), BGSOUND },
- { S("blockquote"), BLOCKQUOTE },
- { S("body"), BODY },
- { S("br"), BR },
- { S("center"), CENTER },
- { S("col"), COL },
- { S("colgroup"), COLGROUP },
- { S("command"), COMMAND },
- { S("dd"), DD },
- { S("details"), DETAILS },
- { S("dialog"), DIALOG },
- { S("dir"), DIR },
- { S("div"), DIV },
- { S("dl"), DL },
- { S("dt"), DT },
- { S("embed"), EMBED },
- { S("fieldset"), FIELDSET },
- { S("figcaption"), FIGCAPTION },
- { S("figure"), FIGURE },
- { S("footer"), FOOTER },
- { S("form"), FORM },
- { S("frame"), FRAME },
- { S("frameset"), FRAMESET },
- { S("h1"), H1 },
- { S("h2"), H2 },
- { S("h3"), H3 },
- { S("h4"), H4 },
- { S("h5"), H5 },
- { S("h6"), H6 },
- { S("head"), HEAD },
- { S("hr"), HR },
- { S("iframe"), IFRAME },
- { S("image"), IMAGE },
- { S("img"), IMG },
- { S("input"), INPUT },
- { S("isindex"), ISINDEX },
- { S("li"), LI },
- { S("link"), LINK },
- { S("listing"), LISTING },
- { S("menu"), MENU },
- { S("meta"), META },
- { S("noembed"), NOEMBED },
- { S("noframes"), NOFRAMES },
- { S("noscript"), NOSCRIPT },
- { S("ol"), OL },
- { S("optgroup"), OPTGROUP },
- { S("option"), OPTION },
- { S("output"), OUTPUT },
- { S("p"), P },
- { S("param"), PARAM },
- { S("plaintext"), PLAINTEXT },
- { S("pre"), PRE },
- { S("script"), SCRIPT },
- { S("select"), SELECT },
- { S("spacer"), SPACER },
- { S("style"), STYLE },
- { S("summary"), SUMMARY },
- { S("tbody"), TBODY },
- { S("textarea"), TEXTAREA },
- { S("tfoot"), TFOOT },
- { S("thead"), THEAD },
- { S("title"), TITLE },
- { S("tr"), TR },
- { S("ul"), UL },
- { S("wbr"), WBR },
-
- { S("applet"), APPLET },
- { S("button"), BUTTON },
- { S("caption"), CAPTION },
- { S("html"), HTML },
- { S("marquee"), MARQUEE },
- { S("object"), OBJECT },
- { S("table"), TABLE },
- { S("td"), TD },
- { S("th"), TH },
-
- { S("a"), A },
- { S("b"), B },
- { S("big"), BIG },
- { S("em"), EM },
- { S("font"), FONT },
- { S("i"), I },
- { S("nobr"), NOBR },
- { S("s"), S },
- { S("small"), SMALL },
- { S("strike"), STRIKE },
- { S("strong"), STRONG },
- { S("tt"), TT },
- { S("u"), U },
-
- { S("xmp"), XMP },
-
- { S("math"), MATH },
- { S("mglyph"), MGLYPH },
- { S("malignmark"), MALIGNMARK },
- { S("mi"), MI },
- { S("mo"), MO },
- { S("mn"), MN },
- { S("ms"), MS },
- { S("mtext"), MTEXT },
- { S("annotation-xml"), ANNOTATION_XML },
-
- { S("svg"), SVG },
- { S("desc"), DESC },
- { S("foreignobject"), FOREIGNOBJECT },
-};
-
static bool is_form_associated(element_type type);
/**
@@ -1045,24 +924,17 @@ hubbub_error append_text(hubbub_treebuilder *treebuilder,
element_type element_type_from_name(hubbub_treebuilder *treebuilder,
const hubbub_string *tag_name)
{
- const uint8_t *name = tag_name->ptr;
- size_t len = tag_name->len;
- uint32_t i;
+ const struct element_type_map *value;
UNUSED(treebuilder);
- /** \todo optimise this */
-
- for (i = 0; i < N_ELEMENTS(name_type_map); i++) {
- if (name_type_map[i].len != len)
- continue;
-
- if (strncasecmp(name_type_map[i].name,
- (const char *) name, len) == 0)
- return name_type_map[i].type;
+ value = hubbub_element_type_lookup((const char *)tag_name->ptr,
+ tag_name->len);
+ if (value == NULL) {
+ return UNKNOWN;
}
- return UNKNOWN;
+ return value->type;
}
/**