summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Sanders <vince@kyllikki.org>2016-09-15 21:40:16 (GMT)
committer Vincent Sanders <vince@kyllikki.org>2016-09-15 21:40:16 (GMT)
commit125245cafceff46686b0858098d62d645956389a (patch)
treece638f234d467ab5e67a107651882529b23622de
parent7465cfc292d7104acb3f50fec5211ad13639a670 (diff)
downloadlibnspsl-125245cafceff46686b0858098d62d645956389a.tar.gz
libnspsl-125245cafceff46686b0858098d62d645956389a.tar.bz2
improve the node tree representation to halve its memory usage
-rw-r--r--src/genpubsuffix.pl86
-rw-r--r--src/nspsl.c216
2 files changed, 163 insertions, 139 deletions
diff --git a/src/genpubsuffix.pl b/src/genpubsuffix.pl
index 6e90751..e95529b 100644
--- a/src/genpubsuffix.pl
+++ b/src/genpubsuffix.pl
@@ -92,7 +92,7 @@ sub treesubdom
$tldtree_ref->{$domelem_puny} = \%node;
$$nodeidx_ref += 1;
}
-
+
# recurse down if there are more parts to the domain
if (($isexception == 0) && (scalar(@{$parts_ref}) > 0)) {
treesubdom($tldtree_ref->{$domelem_puny}, $nodeidx_ref, $strtab_ref, $stridx_ref, $parts_ref);
@@ -193,41 +193,39 @@ sub calc_pnode
my $lineidx = -1;
# update the output index to after this node
- $$opidx_ref += scalar keys %$parent_ref;
+ # need to allow for an additional node for each entry with children
+
+ # iterate over each child element domain/ref pair
+ while ( my ($cdom, $cref) = each(%$parent_ref) ) {
+ if (scalar keys (%$cref) != 0) {
+ $$opidx_ref += 2;
+ } else {
+ $$opidx_ref += 1;
+ }
+ }
# entry block
if ($startidx == ($$opidx_ref - 1)) {
- $our_dat = "\n /* entry " . $startidx . " */\n ";
+ $our_dat = "\n /* entry " . $startidx . " */\n";
} else {
- $our_dat = "\n /* entries " . $startidx . " to " . ($$opidx_ref - 1) . " */\n ";
+ $our_dat = "\n /* entries " . $startidx . " to " . ($$opidx_ref - 1) . " */\n";
}
# iterate over each child element domain/ref pair
while ( my ($cdom, $cref) = each(%$parent_ref) ) {
- # make array look pretty by limiting entries per line
- if ($lineidx == 3) {
- $our_dat .= "\n ";
- $lineidx = 0;
- } elsif ($lineidx == -1) {
- $lineidx = 1;
- } else {
- $our_dat .= " ";
- $lineidx += 1;
- }
-
- $our_dat .= "{ ";
- $our_dat .= $strtab_ref->{$cdom} . ", ";
my $child_count = scalar keys (%$cref);
- $our_dat .= $child_count . ", ";
- if ($child_count != 0) {
- $our_dat .= $$opidx_ref . ", ";
- $child_dat .= calc_pnode($cref, $strtab_ref, $opidx_ref);
+
+ $our_dat .= " { ";
+ $our_dat .= ".label = {" . $strtab_ref->{$cdom} . ", ". pstr_len($cdom) ;
+ if ($child_count == 0) {
+ # complete label for no children
+ $our_dat .= ", 0 } },\n";
} else {
- $our_dat .= 0 . ", ";
+ # complete label with children
+ $our_dat .= ", 1 } }, ";
+ $our_dat .= "{ .child = { " . $$opidx_ref . ", " . $child_count . " } },\n";
+ $child_dat .= calc_pnode($cref, $strtab_ref, $opidx_ref);
}
- $our_dat .= pstr_len($cdom) ;
- $our_dat .= " },";
-
}
return $our_dat . $child_dat;
@@ -303,21 +301,33 @@ print "};\n\n";
# As labels cannot be more than 63 characters a byte length is more
# than sufficient.
-print "struct pnode {\n";
-print " uint16_t label; /**< index of domain element in string table */\n";
-print " uint16_t child_count; /* number of children of this node */\n";
-print " uint16_t child_index; /**< index of first child node */\n";
-print " uint8_t label_len; /**< length of domain element in string table */\n";
-print "};\n\n";
-my $opidx = 1; # output index of node
+my $opidx = 2; # output index of node
+my $opnodes = ""; # output pnode initialisers
+
+# root node initialiser
+$opnodes .= " /* root entry */\n";
+$opnodes .= " { .label = { 0, 0, 1 } }, { .child = { " . $opidx . ", " . scalar keys(%tldtree) . " } },";
+
+# generate node initialiser
+$opnodes .= calc_pnode(\%tldtree, \%strtab, \$opidx);
+
+
+print "union pnode {\n";
+print " struct {\n";
+print " uint16_t idx; /**< index of domain element in string table */\n";
+print " uint8_t len; /**< length of domain element in string table */\n";
+print " uint8_t children; /**< has children */\n";
+print " } label;\n";
+print " struct {\n";
+print " uint16_t index; /**< index of first child node */\n";
+print " uint16_t count; /* number of children of this node */\n";
+print " } child;\n";
+print "};\n\n";
-print "static const struct pnode pnodes[" . $nodeidx . "] = {\n";
+print "static const union pnode pnodes[" . $opidx . "] = {\n";
-# root node
-print " /* root entry */\n";
-print " { 0, " . scalar keys(%tldtree) . ", " . $opidx . ", 0 },";
+# output node initialisors
+print $opnodes;
-# all subsequent nodes
-print calc_pnode(\%tldtree, \%strtab, \$opidx);
print "\n};\n\n";
diff --git a/src/nspsl.c b/src/nspsl.c
index 2e6e4dd..83e0f4e 100644
--- a/src/nspsl.c
+++ b/src/nspsl.c
@@ -16,113 +16,127 @@
#define DOMSEP '.'
+/**
+ * match the label element of the domain from a point in the tree
+ */
static int matchlabel(int parent, const char *start, int len)
{
- int clast = pnodes[parent].child_index + pnodes[parent].child_count;
- int cidx; /*child node index */
- int ridx = -1; /* index of match or -1 */
-
- if (pnodes[parent].child_count != 0) {
- /* there are child nodes present to scan */
-
- for (cidx = pnodes[parent].child_index; cidx < clast; cidx++) {
- if (pnodes[cidx].label == STAB_WILDCARD) {
- /* wildcard match */
- ridx = cidx;
- } else {
- if ((pnodes[cidx].label_len == len) &&
- (strncasecmp(&stab[pnodes[cidx].label],
- start,
- len) == 0)) {
-
- if ((pnodes[cidx].child_count == 1) &&
- (pnodes[pnodes[cidx].child_index].label == STAB_EXCEPTION)) {
- /* exception to previous */
- ridx = -1;
- } else {
- ridx = cidx;
- }
- break;
- }
- }
- }
- }
- return ridx;
+ int cidx; /* child node index */
+ int ccount; /* child node count */
+ int ridx = -1; /* index of match or -1 */
+
+ if (pnodes[parent].label.children != 0) {
+ /* there are child nodes present to scan */
+
+ cidx = pnodes[parent + 1].child.index;
+
+ for (ccount = pnodes[parent + 1].child.count;
+ ccount > 0;
+ ccount--) {
+ if (pnodes[cidx].label.idx == STAB_WILDCARD) {
+ /* wildcard match */
+ ridx = cidx;
+ } else {
+ if ((pnodes[cidx].label.len == len) &&
+ (strncasecmp(&stab[pnodes[cidx].label.idx],
+ start,
+ len) == 0)) {
+ /* label match */
+ if ((pnodes[cidx].label.children != 0) &&
+ (pnodes[cidx + 1].child.count == 1) &&
+ (pnodes[pnodes[cidx + 1].child.index].label.idx == STAB_EXCEPTION)) {
+ /* exception to previous */
+ ridx = -1;
+ } else {
+ ridx = cidx;
+ }
+ break;
+ }
+ }
+
+ /* move index to next sibling */
+ if (pnodes[cidx].label.children != 0) {
+ cidx += 2;
+ } else {
+ cidx += 1;
+ }
+ }
+ }
+ return ridx;
}
/*
- * Exported public API
+ * Exported public API
*/
const char *nspsl_getpublicsuffix(const char *hostname)
{
- int treeidx = 0; /* index to current tree node */
- const char *elem_start;
- const char *elem_end;
- int lab_count = 0;
-
- /* deal with obviously bad hostname */
- if ((hostname == NULL) ||
- (hostname[0]) == 0 ||
- (hostname[0] == DOMSEP)) {
- return NULL;
- }
-
- /* hostnames are ass backwards and we need to consider elemets
- * from the end first.
- */
- elem_end = hostname + strlen(hostname);
- /* fqdn have a separator on the end */
- if (elem_end[-1] == DOMSEP) {
- elem_end--;
- }
- elem_start = elem_end;
-
- /* extract the element and check for a match in our tree */
- for(;;) {
- /* find the start of the element */
- while ((elem_start > hostname) && (*elem_start != DOMSEP)) {
- elem_start--;
- }
- if (*elem_start == DOMSEP) {
- elem_start++;
- }
-
- lab_count++;
-
- /* search child nodes for label */
- treeidx = matchlabel(treeidx, elem_start, elem_end - elem_start);
- if (treeidx == -1) {
- break;
- }
-
- if (elem_start == hostname) {
- /* not valid */
- return NULL;
- }
-
- elem_end = elem_start - 1;
- elem_start = elem_end - 1;
- }
-
- /* The public suffix algorithm says: "the domain must match
- * the public suffix plus one additional label." This
- * requires there to be at least two labels so we need to
- * check
- */
- if (lab_count == 1) {
- if (elem_start == hostname) {
- elem_start = NULL;
- } else {
- /* strip the non matching part */
- elem_start -= 2;
- while (elem_start > hostname && *elem_start != DOMSEP) {
- elem_start--;
- }
- if (*elem_start == DOMSEP)
- elem_start++;
- }
- }
-
-
- return elem_start;
+ int treeidx = 0; /* index to current tree node */
+ const char *elem_start;
+ const char *elem_end;
+ int lab_count = 0;
+
+ /* deal with obviously bad hostname */
+ if ((hostname == NULL) ||
+ (hostname[0]) == 0 ||
+ (hostname[0] == DOMSEP)) {
+ return NULL;
+ }
+
+ /* hostnames are ass backwards and we need to consider elemets
+ * from the end first.
+ */
+ elem_end = hostname + strlen(hostname);
+ /* fqdn have a separator on the end */
+ if (elem_end[-1] == DOMSEP) {
+ elem_end--;
+ }
+ elem_start = elem_end;
+
+ /* extract the element and check for a match in our tree */
+ for(;;) {
+ /* find the start of the element */
+ while ((elem_start > hostname) && (*elem_start != DOMSEP)) {
+ elem_start--;
+ }
+ if (*elem_start == DOMSEP) {
+ elem_start++;
+ }
+
+ lab_count++;
+
+ /* search child nodes for label */
+ treeidx = matchlabel(treeidx, elem_start, elem_end - elem_start);
+ if (treeidx == -1) {
+ break;
+ }
+
+ if (elem_start == hostname) {
+ /* not valid */
+ return NULL;
+ }
+
+ elem_end = elem_start - 1;
+ elem_start = elem_end - 1;
+ }
+
+ /* The public suffix algorithm says: "the domain must match
+ * the public suffix plus one additional label." This
+ * requires there to be at least two labels so we need to
+ * check
+ */
+ if (lab_count == 1) {
+ if (elem_start == hostname) {
+ elem_start = NULL;
+ } else {
+ /* strip the non matching part */
+ elem_start -= 2;
+ while (elem_start > hostname && *elem_start != DOMSEP) {
+ elem_start--;
+ }
+ if (*elem_start == DOMSEP)
+ elem_start++;
+ }
+ }
+
+ return elem_start;
}