summaryrefslogtreecommitdiff
path: root/content
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2006-04-09 23:21:13 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2006-04-09 23:21:13 +0000
commitc09eb457df1962f5b014214874b2beffd69141a4 (patch)
treea7c30e8b57b1d8bdeb87127c8f1ba16e91bf3971 /content
parente5e1b982d55636b409b194cf0488ebafe9c6d519 (diff)
downloadnetsurf-c09eb457df1962f5b014214874b2beffd69141a4.tar.gz
netsurf-c09eb457df1962f5b014214874b2beffd69141a4.tar.bz2
Unify information databases
svn path=/trunk/netsurf/; revision=2519
Diffstat (limited to 'content')
-rw-r--r--content/authdb.c366
-rw-r--r--content/authdb.h18
-rw-r--r--content/certdb.c154
-rw-r--r--content/certdb.h18
-rw-r--r--content/fetch.c15
-rw-r--r--content/fetchcache.c1
-rw-r--r--content/url_store.c750
-rw-r--r--content/url_store.h61
-rw-r--r--content/urldb.c2231
-rw-r--r--content/urldb.h65
10 files changed, 2301 insertions, 1378 deletions
diff --git a/content/authdb.c b/content/authdb.c
deleted file mode 100644
index f97adb1b0..000000000
--- a/content/authdb.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * This file is part of NetSurf, http://netsurf.sourceforge.net/
- * Licensed under the GNU General Public License,
- * http://www.opensource.org/licenses/gpl-license
- * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
- */
-
-/** \file
- * HTTP authentication database (implementation)
- *
- * Authentication details are stored hashed by canonical root URI
- * (absoluteURI with no abs_path part - see RFC 2617) for fast lookup.
- *
- * A protection space is specified by the root URI and a case sensitive
- * realm match. User-agents may preemptively send authentication details
- * for locations within a currently known protected space (i.e:
- * Given a known realm URI of scheme://authority/path/to/realm/
- * the URI scheme://authority/path/to/realm/foo/ can be assumed to
- * be within the protection space.)
- *
- * In order to deal with realms within realms, the realm details are stored
- * such that the most specific URI comes first (where "most specific" is
- * classed as the one with the longest abs_path segment).
- *
- * Realms spanning domains are stored multiple times (once per domain).
- *
- * Where a higher level resource is found to be within a known realm, the
- * existing match is replaced with the new one (i.e:
- * Given a known realm of scheme://authority/path/to/realm/ (uri1)
- * and the newly-acquired knowledge that scheme://authority/path/to/ (uri2)
- * lies within the same realm, the realm details for uri1 are replaced with
- * those for uri2. - in most cases, this is likely to be a simple
- * replacement of the realm URI)
- *
- * There is currently no mechanism for retaining authentication details over
- * sessions.
- */
-#include <assert.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <string.h>
-#include "netsurf/content/authdb.h"
-#define NDEBUG
-#include "netsurf/utils/log.h"
-#include "netsurf/utils/url.h"
-
-#define HASH_SIZE 77
-
-struct realm_details {
- char *realm; /**< Realm identifier */
- char *url; /**< Base URL of realm */
- char *auth; /**< Authentication details */
- struct realm_details *next;
- struct realm_details *prev;
-};
-
-struct auth_entry {
- char *root_url; /**< Canonical root URL of realms */
- struct realm_details *realms; /**< List of realms on this host */
- struct auth_entry *next;
-};
-
-static struct auth_entry *auth_table[HASH_SIZE];
-
-static unsigned int authdb_hash(const char *s);
-static struct realm_details *authdb_get_rd(const char *canon,
- const char *url, const char *realm);
-static void authdb_dump(void);
-
-/**
- * Insert an entry into the database, potentially replacing any
- * existing entry.
- *
- * \param url Absolute URL to resource
- * \param realm Authentication realm containing resource
- * \param auth Authentication details in form "username:password"
- * \return true on success, false on error.
- */
-bool authdb_insert(const char *url, const char *realm, const char *auth)
-{
- char *canon, *stripped;
- unsigned int hash;
- struct realm_details *rd;
- struct auth_entry *entry;
- url_func_result ret;
-
- assert(url && realm && auth);
-
- LOG(("Adding '%s' - '%s'", url, realm));
-
- ret = url_canonical_root(url, &canon);
- if (ret != URL_FUNC_OK)
- return false;
-
- LOG(("'%s'", canon));
-
- ret = url_strip_lqf(url, &stripped);
- if (ret != URL_FUNC_OK) {
- free(canon);
- return false;
- }
-
- hash = authdb_hash(canon);
-
- /* Look for existing entry */
- for (entry = auth_table[hash]; entry; entry = entry->next)
- if (strcmp(entry->root_url, canon) == 0)
- break;
-
- rd = authdb_get_rd(canon, stripped, realm);
- if (rd) {
- /* We have a match */
- if (strlen(stripped) < strlen(rd->url)) {
- /* more generic, so update URL and move to
- * appropriate location in list (s.t. the invariant
- * that most specific URLs come first is maintained)
- */
- struct realm_details *r, *s;
- char *temp = strdup(auth);
-
- if (!temp) {
- free(temp);
- free(stripped);
- free(canon);
- return false;
- }
-
- free(rd->url);
- rd->url = stripped;
-
- free(rd->auth);
- rd->auth = temp;
-
- for (r = rd->next; r; r = s) {
- s = r->next;
- if (strlen(r->url) > strlen(rd->url)) {
- rd->next->prev = rd->prev;
- if (rd->prev)
- rd->prev->next = rd->next;
- else
- entry->realms = r;
-
- rd->prev = r;
- rd->next = r->next;
- if (r->next)
- r->next->prev = rd;
- r->next = rd;
- }
- }
- }
- else if (strlen(stripped) == strlen(rd->url)) {
- /* exact match, so replace auth details */
- char *temp = strdup(auth);
- if (!temp) {
- free(stripped);
- free(canon);
- return false;
- }
-
- free(rd->auth);
- rd->auth = temp;
-
- free(stripped);
- }
- /* otherwise, nothing to do */
-
- free(canon);
- return true;
- }
-
- /* no existing entry => create one */
- rd = malloc(sizeof(struct realm_details));
- if (!rd) {
- free(stripped);
- free(canon);
- return false;
- }
-
- rd->realm = strdup(realm);
- rd->auth = strdup(auth);
- rd->url = stripped;
- rd->prev = 0;
-
- if (!rd->realm || !rd->auth || ret != URL_FUNC_OK) {
- free(rd->url);
- free(rd->auth);
- free(rd->realm);
- free(rd);
- free(canon);
- return false;
- }
-
- if (entry) {
- /* found => add to it */
- rd->next = entry->realms;
- if (entry->realms)
- entry->realms->prev = rd;
- entry->realms = rd;
-
- free(canon);
- return true;
- }
-
- /* not found => create new */
- entry = malloc(sizeof(struct auth_entry));
- if (!entry) {
- free(rd->url);
- free(rd->auth);
- free(rd->realm);
- free(rd);
- free(canon);
- return false;
- }
-
- rd->next = 0;
- entry->root_url = canon;
- entry->realms = rd;
- entry->next = auth_table[hash];
- auth_table[hash] = entry;
-
- return true;
-}
-
-/**
- * Find realm details entry
- *
- * \param canon Canonical root URL
- * \param url Stripped URL to resource
- * \param realm Realm containing resource
- * \return Realm details or NULL if not found
- */
-struct realm_details *authdb_get_rd(const char *canon, const char *url,
- const char *realm)
-{
- struct auth_entry *entry;
- struct realm_details *ret;
-
- assert(canon && url);
-
- for (entry = auth_table[authdb_hash(canon)]; entry;
- entry = entry->next)
- if (strcmp(entry->root_url, canon) == 0)
- break;
-
- if (!entry)
- return NULL;
-
- for (ret = entry->realms; ret; ret = ret->next) {
- if (strcmp(ret->realm, realm))
- /* skip realms that don't match */
- continue;
- if (strlen(url) >= strlen(ret->url) &&
- !strncmp(url, ret->url, strlen(ret->url)))
- /* If the requested URL is of equal or greater
- * specificity than the stored one, but is within
- * the same realm, then use the more generic details
- */
- return ret;
- else if (strncmp(url, ret->url, strlen(url)) == 0) {
- /* We have a more general URL in the same realm */
- return ret;
- }
- }
-
- return NULL;
-}
-
-/**
- * Retrieve authentication details for an URL from the database
- *
- * \param url Absolute URL to consider
- * \return authentication details, or NULL if none found.
- */
-const char *authdb_get(const char *url)
-{
- char *canon, *stripped;
- struct auth_entry *entry;
- struct realm_details *rd;
- url_func_result ret;
-
- assert(url);
-
- LOG(("Searching for '%s'", url));
-
- authdb_dump();
-
- ret = url_canonical_root(url, &canon);
- if (ret != URL_FUNC_OK)
- return NULL;
-
- ret = url_strip_lqf(url, &stripped);
- if (ret != URL_FUNC_OK) {
- free(canon);
- return NULL;
- }
-
- /* Find auth entry */
- for (entry = auth_table[authdb_hash(canon)]; entry;
- entry = entry->next)
- if (strcmp(entry->root_url, canon) == 0)
- break;
-
- if (!entry) {
- free(stripped);
- free(canon);
- return NULL;
- }
-
- LOG(("Found entry"));
-
- /* Find realm details */
- for (rd = entry->realms; rd; rd = rd->next)
- if (strlen(stripped) >= strlen(rd->url) &&
- !strncmp(stripped, rd->url, strlen(rd->url)))
- break;
-
- if (!rd) {
- free(stripped);
- free(canon);
- return NULL;
- }
-
- LOG(("Found realm"));
-
- free(stripped);
- free(canon);
- return rd->auth;
-}
-
-/**
- * Hash function for keys.
- */
-unsigned int authdb_hash(const char *s)
-{
- unsigned int i, z = 0, m;
- if (!s)
- return 0;
-
- m = strlen(s);
-
- for (i = 0; i != m && s[i]; i++)
- z += s[i] & 0x1f; /* lower 5 bits, case insensitive */
- return z % HASH_SIZE;
-}
-
-/**
- * Dump contents of auth db to stderr
- */
-void authdb_dump(void)
-{
-#ifndef NDEBUG
- int i;
- struct auth_entry *e;
- struct realm_details *r;
-
- for (i = 0; i != HASH_SIZE; i++) {
- LOG(("%d:", i));
- for (e = auth_table[i]; e; e = e->next) {
- LOG(("\t%s", e->root_url));
- for (r = e->realms; r; r = r->next) {
- LOG(("\t\t%s - %s", r->url, r->realm));
- }
- }
- }
-#endif
-}
diff --git a/content/authdb.h b/content/authdb.h
deleted file mode 100644
index ece7b763d..000000000
--- a/content/authdb.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * This file is part of NetSurf, http://netsurf.sourceforge.net/
- * Licensed under the GNU General Public License,
- * http://www.opensource.org/licenses/gpl-license
- * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
- */
-
-/** \file
- * HTTP authentication database (interface)
- */
-
-#ifndef _NETSURF_CONTENT_AUTHDB_H_
-#define _NETSURF_CONTENT_AUTHDB_H_
-
-bool authdb_insert(const char *url, const char *realm, const char *auth);
-const char *authdb_get(const char *url);
-
-#endif
diff --git a/content/certdb.c b/content/certdb.c
deleted file mode 100644
index 78c6ec04f..000000000
--- a/content/certdb.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * This file is part of NetSurf, http://netsurf.sourceforge.net/
- * Licensed under the GNU General Public License,
- * http://www.opensource.org/licenses/gpl-license
- * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
- */
-
-/** \file
- * HTTPS certificate verification database (implementation)
- *
- * URLs of servers with invalid SSL certificates are stored hashed by
- * canonical root URI (absoluteURI with no abs_path part - see RFC 2617)
- * for fast lookup.
- */
-#include <assert.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <string.h>
-#include "netsurf/utils/config.h"
-#include "netsurf/content/certdb.h"
-#define NDEBUG
-#include "netsurf/utils/log.h"
-#include "netsurf/utils/url.h"
-
-#define HASH_SIZE 77
-
-#ifdef WITH_SSL
-
-struct cert_entry {
- char *root_url; /**< Canonical root URL */
- struct cert_entry *next;
-};
-
-static struct cert_entry *cert_table[HASH_SIZE];
-
-static unsigned int certdb_hash(const char *s);
-static void certdb_dump(void);
-
-/**
- * Insert an entry into the database
- *
- * \param url Absolute URL to resource
- * \return true on success, false on error.
- */
-bool certdb_insert(const char *url)
-{
- char *canon;
- unsigned int hash;
- struct cert_entry *entry;
- url_func_result ret;
-
- assert(url);
-
- LOG(("Adding '%s'", url));
-
- ret = url_canonical_root(url, &canon);
- if (ret != URL_FUNC_OK)
- return false;
-
- LOG(("'%s'", canon));
-
- hash = certdb_hash(canon);
-
- /* Look for existing entry */
- for (entry = cert_table[hash]; entry; entry = entry->next) {
- if (strcmp(entry->root_url, canon) == 0) {
- free(canon);
- return true;
- }
- }
-
- /* not found => create new */
- entry = malloc(sizeof(struct cert_entry));
- if (!entry) {
- free(canon);
- return false;
- }
-
- entry->root_url = canon;
- entry->next = cert_table[hash];
- cert_table[hash] = entry;
-
- return true;
-}
-
-/**
- * Retrieve certificate details for an URL from the database
- *
- * \param url Absolute URL to consider
- * \return certificate details, or NULL if none found.
- */
-const char *certdb_get(const char *url)
-{
- char *canon;
- struct cert_entry *entry;
- url_func_result ret;
-
- assert(url);
-
- LOG(("Searching for '%s'", url));
-
- certdb_dump();
-
- ret = url_canonical_root(url, &canon);
- if (ret != URL_FUNC_OK)
- return NULL;
-
- /* Find cert entry */
- for (entry = cert_table[certdb_hash(canon)]; entry;
- entry = entry->next) {
- if (strcmp(entry->root_url, canon) == 0) {
- free(canon);
- return entry->root_url;
- }
- }
-
- return NULL;
-}
-
-/**
- * Hash function for keys.
- */
-unsigned int certdb_hash(const char *s)
-{
- unsigned int i, z = 0, m;
- if (!s)
- return 0;
-
- m = strlen(s);
-
- for (i = 0; i != m && s[i]; i++)
- z += s[i] & 0x1f; /* lower 5 bits, case insensitive */
- return z % HASH_SIZE;
-}
-
-/**
- * Dump contents of auth db to stderr
- */
-void certdb_dump(void)
-{
-#ifndef NDEBUG
- int i;
- struct cert_entry *e;
-
- for (i = 0; i != HASH_SIZE; i++) {
- LOG(("%d:", i));
- for (e = cert_table[i]; e; e = e->next) {
- LOG(("\t%s", e->root_url));
- }
- }
-#endif
-}
-
-#endif
diff --git a/content/certdb.h b/content/certdb.h
deleted file mode 100644
index 28aa88664..000000000
--- a/content/certdb.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * This file is part of NetSurf, http://netsurf.sourceforge.net/
- * Licensed under the GNU General Public License,
- * http://www.opensource.org/licenses/gpl-license
- * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
- */
-
-/** \file
- * HTTPS certificate verification database (interface)
- */
-
-#ifndef _NETSURF_CONTENT_CERTDB_H_
-#define _NETSURF_CONTENT_CERTDB_H_
-
-bool certdb_insert(const char *url);
-const char *certdb_get(const char *url);
-
-#endif
diff --git a/content/fetch.c b/content/fetch.c
index 4ba322067..bfbf715a0 100644
--- a/content/fetch.c
+++ b/content/fetch.c
@@ -31,13 +31,8 @@
#ifdef WITH_SSL
#include "openssl/ssl.h"
#endif
-#ifdef WITH_AUTH
-#include "netsurf/content/authdb.h"
-#endif
-#ifdef WITH_SSL
-#include "netsurf/content/certdb.h"
-#endif
#include "netsurf/content/fetch.h"
+#include "netsurf/content/urldb.h"
#include "netsurf/desktop/options.h"
#include "netsurf/render/form.h"
#define NDEBUG
@@ -158,7 +153,7 @@ static int fetch_cert_verify_callback(X509_STORE_CTX *x509_ctx, void *parm);
ring = 0; \
} \
element->r_next = element->r_prev = 0
-
+
/** Find the element (by hostname) in the given ring, leave it in the
* provided element variable
*/
@@ -483,7 +478,7 @@ static bool ns_internal_initiate_fetch(struct fetch *fetch, CURL *handle)
fetch->curl_handle = 0;
return false;
}
-
+
/* add to the global curl multi handle */
codem = curl_multi_add_handle(fetch_curl_multi, fetch->curl_handle);
assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM);
@@ -649,7 +644,7 @@ CURLcode fetch_set_options(struct fetch *f)
SETOPT(CURLOPT_COOKIEJAR, 0);
}
#ifdef WITH_AUTH
- if ((auth = authdb_get(f->url)) != NULL) {
+ if ((auth = urldb_get_auth_details(f->url)) != NULL) {
SETOPT(CURLOPT_HTTPAUTH, CURLAUTH_ANY);
SETOPT(CURLOPT_USERPWD, auth);
} else {
@@ -677,7 +672,7 @@ CURLcode fetch_set_options(struct fetch *f)
}
#ifdef WITH_SSL
- if (certdb_get(f->url) != NULL) {
+ if (urldb_get_cert_permissions(f->url)) {
/* Disable certificate verification */
SETOPT(CURLOPT_SSL_VERIFYPEER, 0L);
SETOPT(CURLOPT_SSL_VERIFYHOST, 0L);
diff --git a/content/fetchcache.c b/content/fetchcache.c
index 47f24e89c..bc8907f14 100644
--- a/content/fetchcache.c
+++ b/content/fetchcache.c
@@ -23,7 +23,6 @@
#include "netsurf/content/content.h"
#include "netsurf/content/fetchcache.h"
#include "netsurf/content/fetch.h"
-#include "netsurf/content/url_store.h"
#include "netsurf/utils/log.h"
#include "netsurf/utils/messages.h"
#include "netsurf/utils/talloc.h"
diff --git a/content/url_store.c b/content/url_store.c
deleted file mode 100644
index fde956e46..000000000
--- a/content/url_store.c
+++ /dev/null
@@ -1,750 +0,0 @@
-/*
- * This file is part of NetSurf, http://netsurf.sourceforge.net/
- * Licensed under the GNU General Public License,
- * http://www.opensource.org/licenses/gpl-license
- * Copyright 2005 Richard Wilson <info@tinct.net>
- */
-
-/** \file
- * Central repository for URL data (implementation).
- */
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include "netsurf/content/url_store.h"
-#include "netsurf/image/bitmap.h"
-#include "netsurf/desktop/options.h"
-#ifdef riscos
-#include "netsurf/riscos/bitmap.h"
-#endif
-#include "netsurf/utils/log.h"
-#include "netsurf/utils/url.h"
-#include "netsurf/utils/utils.h"
-
-
-#define ITERATIONS_BEFORE_TEST 32
-#define MAXIMUM_URL_LENGTH 1024
-
-struct hostname_data *url_store_hostnames = NULL;
-
-static struct hostname_data *url_store_find_hostname(const char *url);
-static struct hostname_data *url_store_match_hostname(
- struct hostname_data *previous);
-
-/* used for faster matching */
-static size_t current_match_url_length;
-static char *current_match_scheme;
-static int current_match_scheme_length;
-static char *current_match_hostname;
-static int current_match_hostname_length;
-static bool current_match_www_test;
-
-/* used for faster searching */
-static struct hostname_data *last_hostname_found = NULL;
-
-/**
- * Returns the hostname data for the specified URL. If no hostname
- * data is currently available then it is created.
- *
- * \param url the url to find hostname data for
- * \return the current hostname data, or NULL if memory exhausted
- */
-struct hostname_data *url_store_find_hostname(const char *url)
-{
- struct hostname_data *first = url_store_hostnames;
- struct hostname_data *search;
- struct hostname_data *result;
- url_func_result res;
- char *hostname = NULL;
- int hostname_length;
- int compare;
- int fast_exit_counter = ITERATIONS_BEFORE_TEST;
- const char *host_test;
-
- assert(url);
-
- /* as the URL is normalised, we optimise the hostname finding for http:// */
- if (!strncmp("http://", url, 7)) {
- /* check for duplicate hostname calls */
- if ((last_hostname_found) &&
- (!strncmp(last_hostname_found->hostname, url + 7,
- last_hostname_found->hostname_length))) {
- /* ensure it isn't comparing 'foo.com' to 'foo.com.au' etc */
- if (url[last_hostname_found->hostname_length + 7] != '.')
- return last_hostname_found;
- }
-
- /* check for a hostname match */
- for (host_test = url + 7;
- ((*host_test > 32) && (*host_test != '/'));
- *host_test++);
- hostname_length = host_test - url - 7;
- host_test = url + 7;
- if ((last_hostname_found) &&
- (strncmp(host_test,
- last_hostname_found->hostname,
- hostname_length) > 0))
- first = last_hostname_found;
- for (search = first; search; search = search->next) {
- if (search->hostname_length == hostname_length) {
- compare = strncmp(host_test, search->hostname,
- hostname_length);
- if (compare == 0) {
- last_hostname_found = search;
- return search;
- } else if (compare < 0)
- break;
- }
- }
-
- /* allocate a new hostname */
- hostname = malloc(hostname_length + 1);
- if (!hostname)
- return NULL;
- memcpy(hostname, host_test, hostname_length);
- hostname[hostname_length] = '\0';
- } else {
- /* no quick match found, fallback */
- res = url_host(url, &hostname);
- switch (res) {
- case URL_FUNC_OK:
- break;
- case URL_FUNC_NOMEM:
- return NULL;
- case URL_FUNC_FAILED:
- hostname = strdup("file:/"); /* for 'file:/' */
- if (!hostname)
- return NULL;
- break;
- default:
- assert(0);
- }
- hostname_length = strlen(hostname);
- }
-
- /* try to find a matching hostname fairly quickly */
- if ((last_hostname_found) &&
- (strcmp(hostname, last_hostname_found->hostname) > 0))
- first = last_hostname_found;
- for (search = first; search; search = search->next) {
- if ((fast_exit_counter <= 0) ||
- (search->hostname_length == hostname_length)) {
- compare = strcmp(hostname, search->hostname);
- if (compare == 0) {
- free(hostname);
- last_hostname_found = search;
- return search;
- } else if (compare < 0)
- break;
- fast_exit_counter = ITERATIONS_BEFORE_TEST;
- } else {
- fast_exit_counter--;
- }
- }
-
- /* no hostname is available: create a new one */
- result = malloc(sizeof *result);
- if (!result) {
- free(hostname);
- return NULL;
- }
- result->hostname = hostname;
- result->hostname_length = hostname_length;
- result->url = 0;
- result->previous = 0;
- result->next = 0;
- last_hostname_found = result;
-
- /* simple case: no current hostnames */
- if (!url_store_hostnames) {
- url_store_hostnames = result;
- return result;
- }
-
- /* worst case scenario: the place we need to link is within the last
- * section of the hostname list so we have no reference to work back
- * from. rather than slowing with the very common case of searching,
- * we take a speed hit for this case and simply move to the very end
- * of the hostname list ready to work backwards. */
- if (!search)
- for (search = url_store_hostnames; search->next;
- search = search->next)
- ;
-
- /* we can now simply scan backwards as we know roughly where we need
- * to link to (we either had an early exit from the searching so we
- * know we're in the block following where we need to link, or we're
- * at the very end of the list as we were in the last block.) */
- while ((search) && (strcmp(hostname, search->hostname) < 0))
- search = search->previous;
-
- /* simple case: our new hostname is the first in the list */
- if (!search) {
- result->next = url_store_hostnames;
- url_store_hostnames->previous = result;
- url_store_hostnames = result;
- return result;
- }
-
- /* general case: link in after the found hostname */
- result->previous = search;
- result->next = search->next;
- if (search->next)
- search->next->previous = result;
- search->next = result;
- return result;
-}
-
-
-/**
- * Returns the url data for the specified URL. If no url
- * data is currently available then it is created.
- *
- * \param url a normalized url to find hostname data for
- * \return the current hostname data, or NULL if memory exhausted
- */
-struct url_content *url_store_find(const char *url) {
- struct hostname_data *hostname_data;
- struct url_data *search;
- struct url_data *result;
- size_t url_length;
- int compare;
- int fast_exit_counter = ITERATIONS_BEFORE_TEST;
-
- assert(url);
-
- /* find the corresponding hostname data */
- hostname_data = url_store_find_hostname(url);
- if (!hostname_data)
- return NULL;
-
- /* move to the start of the leafname */
- url_length = strlen(url);
-
- /* try to find a matching url fairly quickly */
- for (search = hostname_data->url; search; search = search->next) {
- if ((fast_exit_counter <= 0) ||
- (search->data.url_length == url_length)) {
- compare = strcmp(url, search->data.url);
- if (compare == 0)
- return &search->data;
- else if (compare < 0)
- break;
- fast_exit_counter = ITERATIONS_BEFORE_TEST;
- } else {
- fast_exit_counter--;
- }
- }
-
- /* no URL is available: create a new one */
- result = calloc(1, sizeof(struct url_data));
- if (!result)
- return NULL;
- result->data.url = malloc(url_length + 1);
- if (!result->data.url) {
- free(result);
- return NULL;
- }
- memcpy(result->data.url, url, url_length + 1);
- result->data.url_length = url_length;
- result->parent = hostname_data;
-
- /* simple case: no current URLs */
- if (!hostname_data->url) {
- hostname_data->url = result;
- return &result->data;
- }
-
- /* worst case scenario: the place we need to link is within the last
- * section of the URL list so we have no reference to work back
- * from. rather than slowing with the very common case of searching,
- * we take a speed hit for this case and simply move to the very end
- * of the URL list ready to work backwards. */
- if (!search)
- for (search = hostname_data->url; search->next;
- search = search->next)
- ;
-
- /* we can now simply scan backwards as we know roughly where we need
- * to link to (we either had an early exit from the searching so we
- * know we're in the block following where we need to link, or we're
- * at the very end of the list as we were in the last block.) */
- while ((search) && (strcmp(url, search->data.url) < 0))
- search = search->previous;
-
- /* simple case: our new hostname is the first in the list */
- if (!search) {
- result->next = hostname_data->url;
- hostname_data->url->previous = result;
- hostname_data->url = result;
- return &result->data;
- }
-
- /* general case: link in after the found hostname */
- result->previous = search;
- result->next = search->next;
- if (search->next)
- search->next->previous = result;
- search->next = result;
- return &result->data;
-}
-
-
-/**
- * Returns the next hostname that matches a part of the specified URL.
- *
- * The following variables must be initialised prior to calling:
- *
- * - current_match_scheme
- * - current_match_hostname
- * - current_match_hostname_length;
- *
- * \param url a normalized url to find the next match for
- * \param current the current hostname to search forward from, or NULL
- * \return the next matching hostname, or NULL
- */
-struct hostname_data *url_store_match_hostname(
- struct hostname_data *current) {
- int compare;
-
- assert(current_match_hostname);
-
- /* advance to the next hostname */
- if (!current)
- current = url_store_hostnames;
- else
- current = current->next;
-
- /* skip past hostname data without URLs */
- for (; current && (!current->url); current = current->next);
-
- while (current) {
- if (current->hostname_length >= current_match_hostname_length) {
- compare = strncmp(current_match_hostname, current->hostname,
- current_match_hostname_length);
- if (compare == 0)
- return current;
- else if ((compare < 0) && !current_match_www_test)
- break;
- }
- /* special case: if hostname is not www then try it */
- if (current_match_www_test && ((current->hostname_length - 4) >=
- current_match_hostname_length) &&
- (!strncmp(current->hostname, "www.", 4)) &&
- (!strncmp(current_match_hostname,
- current->hostname + 4,
- current_match_hostname_length)))
- return current;
-
- /* move to next hostname with URLs */
- current = current->next;
- for (; current && (!current->url); current = current->next);
- }
- return NULL;
-}
-
-
-
-/**
- * Returns the complete URL for the next matched stored URL.
- *
- * \param url a normalized url to find the next match for
- * \param reference internal reference (NULL for first call)
- * \return the next URL that matches
- */
-struct url_content *url_store_match(const char *url, struct url_data **reference) {
- struct hostname_data *hostname;
- struct url_data *search = NULL;
- url_func_result res;
-
- assert(url);
-
- if (!url_store_hostnames)
- return NULL;
-
- /* find the scheme and first URL, not necessarily matching */
- if (!*reference) {
- /* the hostname match is constant throughout */
- if (current_match_hostname)
- free(current_match_hostname);
- current_match_hostname = NULL;
- res = url_host(url, &current_match_hostname);
- switch (res) {
- case URL_FUNC_OK:
- break;
- case URL_FUNC_NOMEM:
- return NULL;
- case URL_FUNC_FAILED:
- /* for 'file:/' */
- current_match_hostname = strdup("file:/");
- if (!current_match_hostname)
- return NULL;
- break;
- default:
- assert(0);
- }
- current_match_hostname_length = strlen(current_match_hostname);
- /* the scheme is constant throughout */
- if (current_match_scheme)
- free(current_match_scheme);
- current_match_scheme = NULL;
- res = url_scheme(url, &current_match_scheme);
- if (res != URL_FUNC_OK)
- return NULL;
- current_match_scheme_length = strlen(current_match_scheme);
- /* the url is constant throughout */
- current_match_url_length = strlen(url);
- current_match_www_test = (!strcmp(current_match_scheme, "http") &&
- strncmp(url + 4 + 3, "www.", 4)); /* 'http' + '://' */
- /* get our initial reference */
- hostname = url_store_match_hostname(NULL);
- if (!hostname)
- return NULL;
- } else {
- search = *reference;
- hostname = search->parent;
- }
-
- /* work through all our strings, ignoring the scheme and 'www.' */
- while (hostname) {
-
- /* get the next URL to test */
- if (!search)
- search = hostname->url;
- else
- search = search->next;
-
- /* loop past end of list, or search */
- if (!search) {
- hostname = url_store_match_hostname(hostname);
- if (!hostname)
- return NULL;
- } else if (search->data.visits > 0) {
- /* straight match */
- if ((search->data.url_length >= current_match_url_length) &&
- (!strncmp(search->data.url, url,
- current_match_url_length))) {
- *reference = search;
- return &search->data;
- }
- /* try with 'www.' inserted after the scheme */
- if (current_match_www_test &&
- ((search->data.url_length - 4) >=
- current_match_url_length) &&
- (!strncmp(search->data.url,
- current_match_scheme,
- current_match_scheme_length)) &&
- (!strncmp(search->data.url +
- current_match_scheme_length + 3,
- "www.", 4)) &&
- (!strncmp(search->data.url +
- current_match_scheme_length + 7,
- url +
- current_match_scheme_length + 3,
- current_match_url_length -
- current_match_scheme_length - 3))) {
- *reference = search;
- return &search->data;
- }
- }
- }
- return NULL;
-}
-
-
-/**
- * Converts a text string into one suitable for URL matching.
- *
- * \param text the text to search with
- * \return URL matching string allocated on heap, or NULL on error
- */
-char *url_store_match_string(const char *text) {
- url_func_result res;
- char *url;
-
- assert(text);
-
- res = url_normalize(text, &url);
- if (res != URL_FUNC_OK)
- return NULL;
-
- /* drop the '/' from the end if it was added when normalizing */
- if ((url[strlen(url) - 1] == '/') && (text[strlen(text) - 1] != '/'))
- url[strlen(url) - 1] = '\0';
- return url;
-}
-
-
-/**
- * Loads the current contents of the URL store from disk
- *
- * \param file the file to load options from
- */
-void url_store_load(const char *file) {
- char s[MAXIMUM_URL_LENGTH];
- struct hostname_data *hostname;
- struct url_data *result;
- int urls;
- int i;
- int version;
- int length;
- FILE *fp;
-
- LOG(("Loading URL file"));
-
- fp = fopen(file, "r");
- if (!fp) {
- LOG(("Failed to open file '%s' for reading", file));
- return;
- }
-
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- return;
- version = atoi(s);
- if (version < 102) {
- LOG(("Unsupported URL file version."));
- return;
- }
- if (version > 105) {
- LOG(("Unknown URL file version."));
- return;
- }
-
- last_hostname_found = NULL;
- while (fgets(s, MAXIMUM_URL_LENGTH, fp)) {
- /* get the hostname */
- length = strlen(s) - 1;
- s[length] = '\0';
-
- /* skip data that has ended up with a host of '' */
- if (length == 0) {
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- urls = atoi(s);
- for (i = 0; i < (6 * urls); i++)
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- continue;
- }
-
- /* add the host at the tail */
- if (version == 105) {
- hostname = malloc(sizeof *hostname);
- if (!hostname)
- die("Insufficient memory to create hostname");
- hostname->hostname = malloc(length + 1);
- if (!hostname->hostname)
- die("Insufficient memory to create hostname");
- memcpy(hostname->hostname, s, length + 1);
- hostname->hostname_length = length;
- hostname->url = 0;
- hostname->previous = last_hostname_found;
- if (!hostname->previous)
- url_store_hostnames = hostname;
- else
- last_hostname_found->next = hostname;
- hostname->next = 0;
- last_hostname_found = hostname;
- } else {
- hostname = url_store_find_hostname(s);
- if (!hostname)
- break;
- }
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- urls = atoi(s);
-
- /* load the non-corrupt data */
- for (i = 0; i < urls; i++) {
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- length = strlen(s) - 1;
- s[length] = '\0';
- result = calloc(1, sizeof(struct url_data));
- if (!result)
- die("Insufficient memory to create URL");
- result->data.url_length = length;
- result->data.url = malloc(length + 1);
- if (!result->data.url)
- die("Insufficient memory to create URL");
- memcpy(result->data.url, s, length + 1);
- result->parent = hostname;
- result->next = hostname->url;
- if (hostname->url)
- hostname->url->previous = result;
- hostname->url = result;
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- result->data.visits = atoi(s);
- if (version == 102) {
- /* ignore requests */
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- /* ignore thumbnail size */
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- /* set last visit as today to retain */
- result->data.last_visit = time(NULL);
- } else {
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- result->data.last_visit = atoi(s);
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- result->data.type = atoi(s);
- }
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
-#ifdef riscos
- if (strlen(s) == 12) {
- /* ensure filename is 'XX.XX.XX.XX' */
- if ((s[2] == '.') && (s[5] == '.') &&
- (s[8] == '.')) {
- s[11] = '\0';
- result->data.thumbnail =
- bitmap_create_file(s);
- }
- }
-#endif
- if (version >= 104) {
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- length = strlen(s) - 1;
- if (length > 0) {
- s[length] = '\0';
- result->data.title = malloc(length + 1);
- if (result->data.title)
- memcpy(result->data.title, s,
- length + 1);
- }
- }
- }
- }
- fclose(fp);
- LOG(("Successfully loaded URL file"));
-}
-
-
-/**
- * Saves the current contents of the URL store to disk
- *
- * \param file the file to load options from
- */
-void url_store_save(const char *file) {
- struct hostname_data *search;
- struct url_data *url;
- int url_count;
- const char *thumb_file;
- char *s;
- int i;
- FILE *fp;
-#ifdef riscos
- struct bitmap *bitmap;
-#endif
- time_t min_date;
- char *title;
-
- fp = fopen(file, "w");
- if (!fp) {
- LOG(("Failed to open file '%s' for writing", file));
- return;
- }
-
- /* get the minimum date for expiry */
- min_date = time(NULL) - (60 * 60 * 24) * option_expire_url;
-
- /* file format version number */
- fprintf(fp, "105\n");
- for (search = url_store_hostnames; search; search = search->next) {
- url_count = 0;
- for (url = search->url; url; url = url->next)
- if ((url->data.last_visit > min_date) &&
- (url->data.visits > 0) &&
- (url->data.url_length <
- MAXIMUM_URL_LENGTH)) {
- url_count++;
- }
- if (url_count > 0) {
- fprintf(fp, "%s\n%i\n", search->hostname, url_count);
- for (url = search->url; url && url->next;
- url = url->next);
- for (; url; url = url->previous)
- if ((url->data.last_visit > min_date) &&
- (url->data.visits > 0) &&
- (url->data.url_length <
- MAXIMUM_URL_LENGTH)) {
- thumb_file = "";
-#ifdef riscos
- bitmap = url->data.thumbnail;
- if (bitmap)
- thumb_file = bitmap->filename;
-#endif
-
- if (url->data.title) {
- s = url->data.title;
- for (i = 0; s[i] != '\0';
- i++)
- if (s[i] < 32)
- s[i] = ' ';
- for (--i;
- ((i > 0) &&
- (s[i] == ' '));
- i--)
- s[i] = '\0';
-
- title = url->data.title;
- }
- else
- title = "";
- fprintf(fp, "%s\n%i\n%i\n%i\n%s\n%s\n",
- url->data.url,
- url->data.visits,
- (int) url->data.
- last_visit,
- url->data.type,
- thumb_file,
- title);
- }
- }
- }
- fclose(fp);
-}
-
-
-/**
- * Associates a thumbnail with a specified URL.
- */
-void url_store_add_thumbnail(const char *url, struct bitmap *bitmap) {
- struct url_content *content;
-
- content = url_store_find(url);
- if (content) {
- if (content->thumbnail)
- bitmap_destroy(content->thumbnail);
- content->thumbnail = bitmap;
- }
-}
-
-
-/**
- * Gets the thumbnail associated with a given URL.
- */
-struct bitmap *url_store_get_thumbnail(const char *url) {
- struct url_content *content;
-
- content = url_store_find(url);
- if (content)
- return content->thumbnail;
- return NULL;
-}
-
-
-int url_store_compare_last_visit(const void *a, const void *b) {
- struct url_content * const *url_a = (struct url_content * const *)a;
- struct url_content * const *url_b = (struct url_content * const *)b;
- return ((*url_a)->last_visit - (*url_b)->last_visit);
-}
diff --git a/content/url_store.h b/content/url_store.h
deleted file mode 100644
index c10bc90d0..000000000
--- a/content/url_store.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * This file is part of NetSurf, http://netsurf.sourceforge.net/
- * Licensed under the GNU General Public License,
- * http://www.opensource.org/licenses/gpl-license
- * Copyright 2005 Richard Wilson <info@tinct.net>
- */
-
-/** \file
- * Central repository for URL data (interface).
- */
-
-#ifndef _NETSURF_CONTENT_URLSTORE_H_
-#define _NETSURF_CONTENT_URLSTORE_H_
-
-#include <time.h>
-#include "netsurf/content/content_type.h"
-
-struct bitmap;
-
-
-struct hostname_data {
- char *hostname; /**< Hostname (lowercase) */
- int hostname_length; /**< Length of hostname */
- struct url_data *url; /**< URLs for this host */
- struct hostname_data *previous; /**< Previous hostname */
- struct hostname_data *next; /**< Next hostname */
-};
-
-
-struct url_content {
- struct bitmap *thumbnail; /**< Thumbnail, or NULL */
- char *url; /**< URL (including hostname) */
- char *title; /**< Page title */
- size_t url_length; /**< Length of URL (including hostname) */
- unsigned int visits; /**< Number of times visited */
- time_t last_visit; /**< The time() of the last visit */
- content_type type; /**< The content type */
-};
-
-struct url_data {
- struct url_content data; /**< Stored URL content data */
- struct url_data *previous; /**< Previous URL */
- struct url_data *next; /**< Next URL */
- struct hostname_data *parent; /**< Parent hostname data */
-};
-
-extern struct hostname_data *url_store_hostnames;
-
-struct url_content *url_store_find(const char *url);
-struct url_content *url_store_match(const char *url, struct url_data **reference);
-char *url_store_match_string(const char *text);
-
-void url_store_add_thumbnail(const char *url, struct bitmap *bitmap);
-struct bitmap *url_store_get_thumbnail(const char *url);
-
-void url_store_load(const char *file);
-void url_store_save(const char *file);
-
-int url_store_compare_last_visit(const void *, const void *);
-
-#endif
diff --git a/content/urldb.c b/content/urldb.c
new file mode 100644
index 000000000..c7a798a92
--- /dev/null
+++ b/content/urldb.c
@@ -0,0 +1,2231 @@
+/*
+ * This file is part of NetSurf, http://netsurf.sourceforge.net/
+ * Licensed under the GNU General Public License,
+ * http://www.opensource.org/licenses/gpl-license
+ * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
+ */
+
+/** \file
+ * Unified URL information database (implementation)
+ *
+ * URLs are stored in a tree-based structure as follows:
+ *
+ * The host component is extracted from each URL and, if a FQDN, split on
+ * every '.'.The tree is constructed by inserting each FQDN segment in
+ * reverse order. Duplicate nodes are merged.
+ *
+ * If the host part of an URL is an IP address, then this is added to the
+ * tree verbatim (as if it were a TLD).
+ *
+ * This provides something looking like:
+ *
+ * root (a sentinel)
+ * |
+ * -------------------------------------------------
+ * | | | | | | |
+ * com edu gov 127.0.0.1 net org uk TLDs
+ * | | | | | |
+ * google ... ... ... ... co 2LDs
+ * | |
+ * www bbc Hosts/Subdomains
+ * |
+ * www ...
+ *
+ * Each of the nodes in this tree is a struct host_part. This stores the
+ * FQDN segment (or IP address) with which the node is concerned. Each node
+ * may contain further information about paths on a host (struct path_data)
+ * or SSL certificate processing on a host-wide basis
+ * (host_part::permit_invalid_certs).
+ *
+ * Path data is concerned with storing various metadata about the path in
+ * question. This includes global history data, HTTP authentication details
+ * and any associated HTTP cookies. This is stored as a tree of path segments
+ * hanging off the relevant host_part node.
+ *
+ * Therefore, to find the last visited time of the URL
+ * http://www.example.com/path/to/resource.html, the FQDN tree would be
+ * traversed in the order root -> "com" -> "example" -> "www". The "www"
+ * node would have attached to it a tree of struct path_data:
+ *
+ * (sentinel)
+ * |
+ * path
+ * |
+ * to
+ * |
+ * resource.html
+ *
+ * This represents the absolute path "/path/to/resource.html". The leaf node
+ * "resource.html" contains the last visited time of the resource.
+ *
+ * The mechanism described above is, however, not particularly conducive to
+ * fast searching of the database for a given URL (or URLs beginning with a
+ * given prefix). Therefore, an anciliary data structure is used to enable
+ * fast searching. This structure simply reflects the contents of the
+ * database, with entries being added/removed at the same time as for the
+ * core database. In order to ensure that degenerate cases are kept to a
+ * minimum, we use an AAtree. This is an approximation of a Red-Black tree
+ * with similar performance characteristics, but with a significantly
+ * simpler implementation. Entries in this tree comprise pointers to the
+ * leaf nodes of the host tree described above.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "netsurf/image/bitmap.h"
+#include "netsurf/content/urldb.h"
+#include "netsurf/desktop/options.h"
+#ifdef riscos
+/** \todo lose this */
+#include "netsurf/riscos/bitmap.h"
+#endif
+#include "netsurf/utils/log.h"
+#include "netsurf/utils/url.h"
+#include "netsurf/utils/utils.h"
+
+struct cookie {
+ char *name; /**< Cookie name */
+ char *value; /**< Cookie value */
+ char *comment; /**< Cookie comment */
+ time_t expires; /**< Expiry timestamp, or 0 for session */
+ time_t last_used; /**< Last used time */
+ bool secure; /**< Only send for HTTPS requests */
+ enum { COOKIE_NETSCAPE = 0,
+ COOKIE_RFC2109 = 1,
+ COOKIE_RFC2965 = 2
+ } version; /**< Specification compliance */
+ bool no_destroy; /**< Never destroy this cookie,
+ * unless it's expired */
+
+ struct cookie *next; /**< Next in list */
+};
+
+struct auth_data {
+ char *realm; /**< Protection realm */
+ char *auth; /**< Authentication details in form
+ * username:password */
+};
+
+struct url_internal_data {
+ char *title; /**< Resource title */
+ unsigned int visits; /**< Visit count */
+ time_t last_visit; /**< Last visit time */
+ content_type type; /**< Type of resource */
+};
+
+struct path_data {
+ char *scheme; /**< URL scheme for data */
+ unsigned int port; /**< Port number for data */
+ char *segment; /**< Path segment for this node */
+ unsigned int frag_cnt; /**< Number of entries in ::fragment */
+ char **fragment; /**< Array of fragments */
+
+ struct bitmap *thumb; /**< Thumbnail image of resource */
+ struct url_internal_data url; /**< URL data for resource */
+ struct auth_data auth; /**< Authentication data for resource */
+ struct cookie *cookies; /**< Cookies associated with resource */
+
+ struct path_data *next; /**< Next sibling */
+ struct path_data *prev; /**< Previous sibling */
+ struct path_data *parent; /**< Parent path segment */
+ struct path_data *children; /**< Child path segments */
+ struct path_data *last; /**< Last child */
+};
+
+struct host_part {
+ /**< Known paths on this host. This _must_ be first so that
+ * struct host_part *h = (struct host_part *)mypath; works */
+ struct path_data paths;
+ bool permit_invalid_certs; /**< Allow access to SSL protected
+ * resources on this host without
+ * verifying certificate authenticity
+ */
+
+ char *part; /**< Part of host string */
+
+ struct host_part *next; /**< Next sibling */
+ struct host_part *prev; /**< Previous sibling */
+ struct host_part *parent; /**< Parent host part */
+ struct host_part *children; /**< Child host parts */
+};
+
+struct search_node {
+ const struct host_part *data; /**< Host tree entry */
+
+ unsigned int level; /**< Node level */
+
+ struct search_node *left; /**< Left subtree */
+ struct search_node *right; /**< Right subtree */
+};
+
+/* Saving */
+static void urldb_save_search_tree(struct search_node *root, FILE *fp);
+static void urldb_count_urls(const struct path_data *root, time_t expiry,
+ unsigned int *count);
+static void urldb_write_urls(const struct path_data *parent,
+ const char *host, FILE *fp, char **path, int *path_alloc,
+ int *path_used, time_t expiry);
+
+/* Iteration */
+static bool urldb_iterate_partial_host(struct search_node *root,
+ const char *prefix, bool (*callback)(const char *url));
+static bool urldb_iterate_partial_path(const struct path_data *parent,
+ const char *host, const char *prefix,
+ char **path, int *path_alloc, int *path_used,
+ bool (*callback)(const char *url));
+static bool urldb_iterate_entries_host(struct search_node *parent,
+ bool (*callback)(const char *url));
+static bool urldb_iterate_entries_path(const char *host, char **path,
+ int *path_alloc, int *path_used,
+ const struct path_data *parent,
+ bool (*callback)(const char *url));
+
+/* Insertion */
+static struct host_part *urldb_add_host_node(const char *part,
+ struct host_part *parent);
+static struct host_part *urldb_add_host(const char *host);
+static struct path_data *urldb_add_path_node(const char *scheme,
+ unsigned int port, const char *segment, const char *fragment,
+ struct path_data *parent);
+static struct path_data *urldb_add_path(const char *scheme,
+ unsigned int port, const struct host_part *host,
+ const char *path, const char *fragment);
+static int urldb_add_path_fragment_cmp(const void *a, const void *b);
+static struct path_data *urldb_add_path_fragment(struct path_data *segment,
+ const char *fragment);
+
+/* Lookup */
+static struct path_data *urldb_find_url(const char *url);
+static struct path_data *urldb_match_path(const struct path_data *parent,
+ const char *path, const char *scheme, unsigned short port);
+
+/* Dump */
+static void urldb_dump_hosts(struct host_part *parent);
+static void urldb_dump_paths(struct path_data *parent);
+static void urldb_dump_search(struct search_node *parent, int depth);
+
+/* Search tree */
+static struct search_node *urldb_search_insert(struct search_node *root,
+ const struct host_part *data);
+static struct search_node *urldb_search_insert_internal(
+ struct search_node *root, struct search_node *n);
+static struct search_node *urldb_search_remove(struct search_node *root,
+ const struct host_part *data);
+static const struct host_part *urldb_search_find(struct search_node *root,
+ const char *host);
+static struct search_node *urldb_search_skew(struct search_node *root);
+static struct search_node *urldb_search_split(struct search_node *root);
+static int urldb_search_match_host(const struct host_part *a,
+ const struct host_part *b);
+static int urldb_search_match_string(const struct host_part *a,
+ const char *b);
+static int urldb_search_match_prefix(const struct host_part *a,
+ const char *b);
+
+/** Root database handle */
+static struct host_part db_root;
+
+/** Search trees - one per letter + 1 for IPs */
+#define NUM_SEARCH_TREES 27
+#define ST_IP 0
+#define ST_DN 1
+static struct search_node empty = { 0, 0, &empty, &empty };
+static struct search_node *search_trees[NUM_SEARCH_TREES] = {
+ &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
+ &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
+ &empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
+ &empty, &empty, &empty
+};
+
+/**
+ * Import an URL database from file, replacing any existing database
+ *
+ * \param filename Name of file containing data
+ */
+void urldb_load(const char *filename)
+{
+#define MAXIMUM_URL_LENGTH 4096
+ char s[MAXIMUM_URL_LENGTH];
+ struct host_part *h;
+ int urls;
+ int i;
+ int version;
+ int length;
+ FILE *fp;
+
+ /** \todo optimise */
+
+ assert(filename);
+
+ LOG(("Loading URL file"));
+
+ fp = fopen(filename, "r");
+ if (!fp) {
+ LOG(("Failed to open file '%s' for reading", filename));
+ return;
+ }
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ return;
+ version = atoi(s);
+ if (version < 105) {
+ LOG(("Unsupported URL file version."));
+ return;
+ }
+ if (version > 105) {
+ LOG(("Unknown URL file version."));
+ return;
+ }
+
+ while (fgets(s, MAXIMUM_URL_LENGTH, fp)) {
+ /* get the hostname */
+ length = strlen(s) - 1;
+ s[length] = '\0';
+
+ /* skip data that has ended up with a host of '' */
+ if (length == 0) {
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ urls = atoi(s);
+ for (i = 0; i < (6 * urls); i++)
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ continue;
+ }
+
+ h = urldb_add_host(s);
+ if (!h)
+ die("Memory exhausted whilst loading URL file");
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ urls = atoi(s);
+
+ /* load the non-corrupt data */
+ for (i = 0; i < urls; i++) {
+ struct path_data *p = NULL;
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ length = strlen(s) - 1;
+ s[length] = '\0';
+
+ urldb_add_url(s);
+ p = urldb_find_url(s);
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ if (p)
+ p->url.visits = (unsigned int)atoi(s);
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ if (p)
+ p->url.last_visit = (time_t)atoi(s);
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ if (p)
+ p->url.type = (content_type)atoi(s);
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+#ifdef riscos
+ if (p && strlen(s) == 12) {
+ /* ensure filename is 'XX.XX.XX.XX' */
+ if ((s[2] == '.') && (s[5] == '.') &&
+ (s[8] == '.')) {
+ s[11] = '\0';
+ p->thumb = bitmap_create_file(s);
+ }
+ }
+#endif
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ length = strlen(s) - 1;
+ if (p && length > 0) {
+ s[length] = '\0';
+ p->url.title = malloc(length + 1);
+ if (p->url.title)
+ memcpy(p->url.title, s, length + 1);
+ }
+ }
+ }
+
+ fclose(fp);
+ LOG(("Successfully loaded URL file"));
+#undef MAXIMUM_URL_LENGTH
+}
+
+/**
+ * Export the current database to file
+ *
+ * \param filename Name of file to export to
+ */
+void urldb_save(const char *filename)
+{
+ FILE *fp;
+ int i;
+
+ assert(filename);
+
+ fp = fopen(filename, "w");
+ if (!fp) {
+ LOG(("Failed to open file '%s' for writing", filename));
+ return;
+ }
+
+ /* file format version number */
+ fprintf(fp, "105\n");
+
+ for (i = 0; i != NUM_SEARCH_TREES; i++) {
+ urldb_save_search_tree(search_trees[i], fp);
+ }
+
+ fclose(fp);
+}
+
+/**
+ * Save a search (sub)tree
+ *
+ * \param root Root of (sub)tree to save
+ * \param fp File to write to
+ */
+void urldb_save_search_tree(struct search_node *parent, FILE *fp)
+{
+ char host[256];
+ const struct host_part *h;
+ unsigned int path_count = 0;
+ char *path, *p, *end;
+ int path_alloc = 64, path_used = 2;
+ time_t expiry = time(NULL) - (60 * 60 * 24) * option_expire_url;
+
+ if (parent == &empty)
+ return;
+
+ urldb_save_search_tree(parent->left, fp);
+
+ path = malloc(path_alloc);
+ if (!path)
+ return;
+
+ path[0] = '/';
+ path[1] = '\0';
+
+ for (h = parent->data, p = host, end = host + sizeof host;
+ h && h != &db_root && p < end; h = h->parent) {
+ int written = snprintf(p, end - p, "%s%s", h->part,
+ (h->parent && h->parent->parent) ? "." : "");
+ if (written < 0) {
+ free(path);
+ return;
+ }
+ p += written;
+ }
+
+ urldb_count_urls(&parent->data->paths, expiry, &path_count);
+
+ if (path_count > 0) {
+ fprintf(fp, "%s\n%i\n", host, path_count);
+
+ urldb_write_urls(&parent->data->paths, host, fp,
+ &path, &path_alloc, &path_used, expiry);
+ }
+
+ free(path);
+
+ urldb_save_search_tree(parent->right, fp);
+}
+
+/**
+ * Count number of URLs associated with a host
+ *
+ * \param root Root of path data tree
+ * \param expiry Expiry time for URLs
+ * \param count Pointer to count
+ */
+void urldb_count_urls(const struct path_data *root, time_t expiry,
+ unsigned int *count)
+{
+ const struct path_data *p;
+
+ if (!root->children) {
+ if ((root->url.last_visit > expiry) &&
+ (root->url.visits > 0))
+ (*count)++;
+ }
+
+ for (p = root->children; p; p = p->next)
+ urldb_count_urls(p, expiry, count);
+}
+
+/**
+ * Write URLs associated with a host
+ *
+ * \param parent Root of (sub)tree to write
+ * \param host Current host name
+ * \param fp File to write to
+ * \param path Current path string
+ * \param path_alloc Allocated size of path
+ * \param path_used Used size of path
+ * \param expiry Expiry time of URLs
+ */
+void urldb_write_urls(const struct path_data *parent, const char *host,
+ FILE *fp, char **path, int *path_alloc, int *path_used,
+ time_t expiry)
+{
+ const struct path_data *p;
+ int i;
+ int pused = *path_used;
+
+ if (!parent->children) {
+ /* leaf node */
+ if (!((parent->url.last_visit > expiry) &&
+ (parent->url.visits > 0)))
+ /* expired */
+ return;
+
+ fprintf(fp, "%s://%s", parent->scheme, host);
+
+ if (parent->port)
+ fprintf(fp,":%d", parent->port);
+
+ fprintf(fp, "%s\n", *path);
+
+ /** \todo handle fragments? */
+
+ fprintf(fp, "%i\n%i\n%i\n", parent->url.visits,
+ (int)parent->url.last_visit,
+ (int)parent->url.type);
+
+#ifdef riscos
+ if (parent->thumb)
+ fprintf(fp, "%s\n", parent->thumb->filename);
+#else
+ fprintf(fp, "\n");
+#endif
+
+ if (parent->url.title) {
+ char *s = parent->url.title;
+ for (i = 0; s[i] != '\0'; i++)
+ if (s[i] < 32)
+ s[i] = ' ';
+ for (--i; ((i > 0) && (s[i] == ' ')); i--)
+ s[i] = '\0';
+ fprintf(fp, "%s\n", parent->url.title);
+ } else
+ fprintf(fp, "\n");
+ }
+
+ for (p = parent->children; p; p = p->next) {
+ int len = *path_used + strlen(p->segment) + 1;
+ if (*path_alloc < len) {
+ char *temp = realloc(*path,
+ (len > 64) ? len : *path_alloc + 64);
+ if (!temp)
+ return;
+ *path = temp;
+ *path_alloc = (len > 64) ? len : *path_alloc + 64;
+ }
+
+ strcat(*path, p->segment);
+ if (p->children) {
+ strcat(*path, "/");
+ } else {
+ len -= 1;
+ }
+
+ *path_used = len;
+
+ urldb_write_urls(p, host, fp, path, path_alloc, path_used,
+ expiry);
+
+ /* restore path to its state on entry to this function */
+ *path_used = pused;
+ (*path)[pused - 1] = '\0';
+ }
+}
+
+/**
+ * Insert an URL into the database
+ *
+ * \param url URL to insert
+ * \return true on success, false otherwise
+ */
+bool urldb_add_url(const char *url)
+{
+ struct host_part *h;
+ struct path_data *p;
+ char *fragment = NULL, *host, *plq, *scheme, *colon;
+ unsigned short port;
+ url_func_result ret;
+ assert(url);
+
+ /** \todo consider file: URLs */
+
+ host = strchr(url, '#');
+ if (host) {
+ fragment = strdup(host+1);
+ if (!fragment)
+ return false;
+ }
+
+ /* extract host */
+ ret = url_host(url, &host);
+ if (ret != URL_FUNC_OK)
+ return false;
+
+ /* extract path, leafname, query */
+ ret = url_plq(url, &plq);
+ if (ret != URL_FUNC_OK) {
+ free(host);
+ free(fragment);
+ return false;
+ }
+
+ /* extract scheme */
+ ret = url_scheme(url, &scheme);
+ if (ret != URL_FUNC_OK) {
+ free(plq);
+ free(host);
+ free(fragment);
+ return false;
+ }
+
+ colon = strrchr(host, ':');
+ if (!colon) {
+ port = 0;
+ } else {
+ *colon = '\0';
+ port = atoi(colon + 1);
+ }
+
+ /* Get host entry */
+ h = urldb_add_host(host);
+ if (!h) {
+ free(scheme);
+ free(plq);
+ free(host);
+ free(fragment);
+ return false;
+ }
+
+ /* Get path entry */
+ p = urldb_add_path(scheme, port, h, plq, fragment);
+ if (!p) {
+ free(scheme);
+ free(plq);
+ free(host);
+ free(fragment);
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Set an URL's title string, replacing any existing one
+ *
+ * \param url The URL to look for
+ * \param title The title string to use (copied)
+ */
+void urldb_set_url_title(const char *url, const char *title)
+{
+ struct path_data *p;
+ char *temp;
+
+ assert(url && title);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ temp = strdup(title);
+ if (!temp)
+ return;
+
+ free(p->url.title);
+ p->url.title = temp;
+}
+
+/**
+ * Set an URL's content type
+ *
+ * \param url The URL to look for
+ * \param type The type to set
+ */
+void urldb_set_url_content_type(const char *url, content_type type)
+{
+ struct path_data *p;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ p->url.type = type;
+}
+
+/**
+ * Update an URL's visit data
+ *
+ * \param url The URL to update
+ */
+void urldb_update_url_visit_data(const char *url)
+{
+ struct path_data *p;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ p->url.last_visit = time(NULL);
+ p->url.visits++;
+}
+
+/**
+ * Reset an URL's visit statistics
+ *
+ * \param url The URL to reset
+ */
+void urldb_reset_url_visit_data(const char *url)
+{
+ struct path_data *p;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ p->url.last_visit = (time_t)0;
+ p->url.visits = 0;
+}
+
+
+/**
+ * Find data for an URL.
+ *
+ * \param url Absolute URL to look for
+ * \return Pointer to result struct, or NULL
+ */
+const struct url_data *urldb_get_url_data(const char *url)
+{
+ struct path_data *p;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return NULL;
+
+ return (struct url_data *)&p->url;
+}
+
+/**
+ * Look up authentication details in database
+ *
+ * \param url Absolute URL to search for
+ * \return Pointer to authentication details, or NULL if not found
+ */
+const char *urldb_get_auth_details(const char *url)
+{
+ struct path_data *p, *q;
+
+ assert(url);
+
+ /* add to the db, so our lookup will work */
+ urldb_add_url(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return NULL;
+
+ for (; p; p = p->parent) {
+ /* The parent path entry is stored hung off the
+ * parent entry with an empty (not NULL) segment string.
+ * We look for this here.
+ */
+ for (q = p->children; q; q = q->next) {
+ if (strlen(q->segment) == 0)
+ break;
+ }
+
+ if (q && q->auth.realm && q->auth.auth)
+ break;
+ }
+
+ if (!q)
+ return NULL;
+
+ return q->auth.auth;
+}
+
+/**
+ * Retrieve certificate verification permissions from database
+ *
+ * \param url Absolute URL to search for
+ * \return true to permit connections to hosts with invalid certificates,
+ * false otherwise.
+ */
+bool urldb_get_cert_permissions(const char *url)
+{
+ struct path_data *p;
+ struct host_part *h;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return false;
+
+ for (; p && p->parent; p = p->parent)
+ /* do nothing */;
+
+ h = (struct host_part *)p;
+
+ return h->permit_invalid_certs;
+}
+
+/**
+ * Set authentication data for an URL
+ *
+ * \param url The URL to consider
+ * \param realm The authentication realm
+ * \param auth The authentication details (in form username:password)
+ */
+void urldb_set_auth_details(const char *url, const char *realm,
+ const char *auth)
+{
+ struct path_data *p;
+ char *t1, *t2;
+
+ assert(url && realm && auth);
+
+ /* add url, in case it's missing */
+ urldb_add_url(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ /** \todo search subtree for same realm/auth details
+ * and remove them (as the lookup routine searches up the tree) */
+
+ t1 = strdup(realm);
+ t2 = strdup(auth);
+
+ if (!t1 || !t2) {
+ free(t1);
+ free(t2);
+ return;
+ }
+
+ free(p->auth.realm);
+ free(p->auth.auth);
+
+ p->auth.realm = t1;
+ p->auth.auth = t2;
+}
+
+/**
+ * Set certificate verification permissions
+ *
+ * \param url URL to consider
+ * \param permit Set to true to allow invalid certificates
+ */
+void urldb_set_cert_permissions(const char *url, bool permit)
+{
+ struct path_data *p;
+ struct host_part *h;
+
+ assert(url);
+
+ /* add url, in case it's missing */
+ urldb_add_url(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ for (; p && p->parent; p = p->parent)
+ /* do nothing */;
+
+ h = (struct host_part *)p;
+
+ h->permit_invalid_certs = permit;
+}
+
+/**
+ * Set thumbnail for url, replacing any existing thumbnail
+ *
+ * \param url Absolute URL to consider
+ * \param bitmap Opaque pointer to thumbnail data
+ */
+void urldb_set_thumbnail(const char *url, struct bitmap *bitmap)
+{
+ struct path_data *p;
+
+ assert(url && bitmap);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ if (p->thumb)
+ bitmap_destroy(p->thumb);
+
+ p->thumb = bitmap;
+}
+
+/**
+ * Retrieve thumbnail data for given URL
+ *
+ * \param url Absolute URL to search for
+ * \return Pointer to thumbnail data, or NULL if not found.
+ */
+const struct bitmap *urldb_get_thumbnail(const char *url)
+{
+ struct path_data *p;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return NULL;
+
+ return p->thumb;
+}
+
+/**
+ * Iterate over entries in the database which match the given prefix
+ *
+ * \param prefix Prefix to match
+ * \param callback Callback function
+ */
+void urldb_iterate_partial(const char *prefix,
+ bool (*callback)(const char *url))
+{
+ char host[256];
+ char buf[260]; /* max domain + "www." */
+ const char *slash;
+ struct search_node *tree;
+ const struct host_part *h;
+
+ assert(prefix && callback);
+
+ slash = strchr(prefix, '/');
+
+ if (*prefix >= '0' && *prefix <= '9')
+ tree = search_trees[ST_IP];
+ else if (isalpha(*prefix))
+ tree = search_trees[ST_DN + tolower(*prefix) - 'a'];
+ else
+ return;
+
+ if (slash) {
+ /* if there's a slash in the input, then we can
+ * assume that we're looking for a path */
+ char *path, *domain = host;
+ int path_alloc = 64, path_used = 2;
+
+ snprintf(host, sizeof host, "%.*s", slash - prefix, prefix);
+
+ h = urldb_search_find(tree, host);
+ if (!h) {
+ int len = slash - prefix;
+
+ if ((len == 1 && tolower(host[0]) != 'w') ||
+ (len == 2 && (tolower(host[0]) != 'w' ||
+ tolower(host[1]) != 'w')) ||
+ (len >= 3 &&
+ strncasecmp(host, "www", 3))) {
+ snprintf(buf, sizeof buf, "www.%s", host);
+ h = urldb_search_find(
+ search_trees[ST_DN + 'w' - 'a'],
+ buf);
+ if (!h)
+ return;
+ domain = buf;
+ } else
+ return;
+ }
+
+ path = malloc(path_alloc);
+ if (!path)
+ return;
+
+ path[0] = '/';
+ path[1] = '\0';
+
+ urldb_iterate_partial_path(&h->paths, domain, slash + 1,
+ &path, &path_alloc, &path_used, callback);
+
+ free(path);
+ } else {
+ int len = strlen(prefix);
+
+ /* looking for hosts */
+ if (!urldb_iterate_partial_host(tree, prefix, callback))
+ return;
+
+ if ((len == 1 && tolower(prefix[0]) != 'w') ||
+ (len == 2 && (tolower(prefix[0]) != 'w' ||
+ tolower(prefix[1]) != 'w')) ||
+ (len >= 3 &&
+ strncasecmp(prefix, "www", 3))) {
+ /* now look for www.prefix */
+ snprintf(buf, sizeof buf, "www.%s", prefix);
+ if(!urldb_iterate_partial_host(
+ search_trees[ST_DN + 'w' - 'a'],
+ buf, callback))
+ return;
+ }
+ }
+}
+
+/**
+ * Partial host iterator (internal)
+ *
+ * \param root Root of (sub)tree to traverse
+ * \param prefix Prefix to match
+ * \param callback Callback function
+ * \return true to continue, false otherwise
+ */
+bool urldb_iterate_partial_host(struct search_node *root, const char *prefix,
+ bool (*callback)(const char *url))
+{
+ int c;
+ const struct host_part *h;
+ char domain[256], *p, *end;
+ char *path;
+ int path_alloc = 64, path_used = 2;
+
+ assert(root && prefix && callback);
+
+ if (root == &empty)
+ return true;
+
+ c = urldb_search_match_prefix(root->data, prefix);
+
+ if (c > 0)
+ /* No match => look in left subtree */
+ return urldb_iterate_partial_host(root->left, prefix,
+ callback);
+ else if (c < 0)
+ /* No match => look in right subtree */
+ return urldb_iterate_partial_host(root->right, prefix,
+ callback);
+ else {
+ /* Match => iterate over l/r subtrees & process this node */
+ if (!urldb_iterate_partial_host(root->left, prefix,
+ callback))
+ return false;
+
+ /* Generate host string */
+ for (h = root->data, p = domain,
+ end = domain + sizeof domain;
+ h && h != &db_root && p < end;
+ h = h->parent) {
+ int written = snprintf(p, end - p, "%s%s", h->part,
+ (h->parent && h->parent->parent) ? "." : "");
+ if (written < 0)
+ return false;
+ p += written;
+ }
+
+ path = malloc(path_alloc);
+ if (!path)
+ return false;
+
+ path[0] = '/';
+ path[1] = '\0';
+
+ /* and extract all paths attached to this host */
+ if (!urldb_iterate_entries_path(domain, &path, &path_alloc,
+ &path_used, &root->data->paths, callback)) {
+ free(path);
+ return false;
+ }
+
+ free(path);
+
+ if (!urldb_iterate_partial_host(root->right, prefix,
+ callback))
+ return false;
+ }
+
+ return true;
+}
+
+/**
+ * Partial path iterator (internal)
+ *
+ * \param parent Root of (sub)tree to traverse
+ * \param host Host string
+ * \param prefix Prefix to match
+ * \param path The built path up to this point
+ * \param path_alloc Allocated size of path
+ * \param path_used Used size of path
+ * \param callback Callback function
+ * \return true to continue, false otherwise
+ */
+bool urldb_iterate_partial_path(const struct path_data *parent,
+ const char *host, const char *prefix,
+ char **path, int *path_alloc, int *path_used,
+ bool (*callback)(const char *url))
+{
+ const struct path_data *p;
+ const char *slash, *end = prefix + strlen(prefix);
+ int pused = *path_used;
+ int c;
+
+ slash = strchr(prefix, '/');
+ if (!slash)
+ slash = end;
+
+ if (slash == prefix && *prefix == '/')
+ /* Ignore "//" */
+ return true;
+
+ for (p = parent->children; p; p = p->next) {
+ if ((c = strncasecmp(p->segment, prefix, slash - prefix)) < 0)
+ /* didn't match, but may be more */
+ continue;
+ else if (c > 0)
+ /* no more possible matches */
+ break;
+
+ /* prefix matches so far */
+ int len = *path_used + strlen(p->segment) + 1;
+ if (*path_alloc < len) {
+ char *temp = realloc(*path,
+ (len > 64) ? len : *path_alloc + 64);
+ if (!temp)
+ return false;
+ *path = temp;
+ *path_alloc = (len > 64) ? len : *path_alloc + 64;
+ }
+
+ strcat(*path, p->segment);
+ if (p->children)
+ strcat(*path, "/");
+ else
+ len -= 1;
+
+ *path_used = len;
+
+ if (slash == end) {
+ /* we've run out of prefix, so all
+ * paths below this one match */
+ if (!urldb_iterate_entries_path(host, path,
+ path_alloc, path_used, p, callback))
+ return false;
+ } else {
+ /* more prefix to go => recurse */
+ if (!urldb_iterate_partial_path(p, host, slash + 1,
+ path, path_alloc, path_used,
+ callback))
+ return false;
+ }
+
+ /* restore path to that from input for next child */
+ *path_used = pused;
+ (*path)[pused - 1] = '\0';
+ }
+
+ return true;
+}
+
+/**
+ * Iterate over all entries in database
+ *
+ * \param callback Function to callback for each entry
+ */
+void urldb_iterate_entries(bool (*callback)(const char *url))
+{
+ int i;
+
+ assert(callback);
+
+ for (i = 0; i < NUM_SEARCH_TREES; i++) {
+ if (!urldb_iterate_entries_host(search_trees[i],
+ callback))
+ break;
+ }
+}
+
+/**
+ * Host data iterator (internal)
+ *
+ * \param parent Root of subtree to iterate over
+ * \param callback Callback function
+ * \return true to continue, false otherwise
+ */
+bool urldb_iterate_entries_host(struct search_node *parent,
+ bool (*callback)(const char *url))
+{
+ char domain[256], *p, *end;
+ const struct host_part *h;
+ char *path;
+ int path_alloc = 64, path_used = 2;
+
+ if (parent == &empty)
+ return true;
+
+ if (!urldb_iterate_entries_host(parent->left, callback))
+ return false;
+
+ for (h = parent->data, p = domain, end = domain + sizeof domain;
+ h && h != &db_root && p < end; h = h->parent) {
+ int written = snprintf(p, end - p, "%s%s", h->part,
+ (h->parent && h->parent->parent) ? "." : "");
+ if (written < 0)
+ return false;
+ p += written;
+ }
+
+ path = malloc(path_alloc);
+ if (!path)
+ return false;
+
+ path[0] = '/';
+ path[1] = '\0';
+
+ if (!urldb_iterate_entries_path(domain, &path, &path_alloc,
+ &path_used, &parent->data->paths, callback)) {
+ free(path);
+ return false;
+ }
+
+ free(path);
+
+ if (!urldb_iterate_entries_host(parent->right, callback))
+ return false;
+
+ return true;
+}
+
+/**
+ * Path data iterator (internal)
+ *
+ * \param host Host component of output URI
+ * \param path The built path up to this point
+ * \param path_alloc Allocated size of path
+ * \param path_used Used size of path
+ * \param parent Root of subtree to iterate over
+ * \param callback Callback function to call
+ * \return true to continue, false otherwise
+ */
+bool urldb_iterate_entries_path(const char *host, char **path,
+ int *path_alloc, int *path_used,
+ const struct path_data *parent,
+ bool (*callback)(const char *url))
+{
+ const struct path_data *p;
+ int pused = *path_used;
+
+ if (!parent->children) {
+ /* leaf node */
+ int schemelen = strlen(parent->scheme);
+ int hostlen = strlen(host);
+ int prefixlen = schemelen + 3 /* :// */ +
+ hostlen + 6 /* :NNNNN */;
+ static char *url;
+ static int url_alloc;
+ int written;
+
+ if (url_alloc < *path_used + prefixlen + 2) {
+ char *temp = realloc(url, *path_used + prefixlen + 2);
+ if (!temp)
+ return false;
+ url = temp;
+ url_alloc = *path_used + prefixlen + 2;
+ }
+
+ written = sprintf(url, "%s://%s", parent->scheme, host);
+ if (written < 0) {
+ return false;
+ }
+
+ if (parent->port) {
+ written = sprintf(url + schemelen + 3 + hostlen,
+ ":%d", parent->port);
+ if (written < 0) {
+ return false;
+ }
+ written += schemelen + 3 + hostlen;
+ }
+
+ written = sprintf(url + written, "%s", *path);
+ if (written < 0) {
+ return false;
+ }
+
+ /** \todo handle fragments? */
+
+ if (!callback(url))
+ return false;
+ }
+
+ for (p = parent->children; p; p = p->next) {
+ int len = *path_used + strlen(p->segment) + 1;
+ if (*path_alloc < len) {
+ char *temp = realloc(*path,
+ (len > 64) ? len : *path_alloc + 64);
+ if (!temp)
+ return false;
+ *path = temp;
+ *path_alloc = (len > 64) ? len : *path_alloc + 64;
+ }
+
+ strcat(*path, p->segment);
+ if (p->children) {
+ strcat(*path, "/");
+ } else {
+ len -= 1;
+ }
+
+ *path_used = len;
+
+ if (!urldb_iterate_entries_path(host, path, path_alloc,
+ path_used, p, callback))
+ return false;
+
+ /* restore path to its state on entry to this function */
+ *path_used = pused;
+ (*path)[pused - 1] = '\0';
+ }
+
+ return true;
+}
+
+/**
+ * Add a host node to the tree
+ *
+ * \param part Host segment to add (or whole IP address) (copied)
+ * \param parent Parent node to add to
+ * \return Pointer to added node, or NULL on memory exhaustion
+ */
+struct host_part *urldb_add_host_node(const char *part,
+ struct host_part *parent)
+{
+ struct host_part *d;
+
+ assert(part && parent);
+
+ d = calloc(1, sizeof(struct host_part));
+ if (!d)
+ return NULL;
+
+ d->part = strdup(part);
+ if (!d->part) {
+ free(d);
+ return NULL;
+ }
+
+ d->next = parent->children;
+ if (parent->children)
+ parent->children->prev = d;
+ d->parent = parent;
+ parent->children = d;
+
+ return d;
+}
+
+/**
+ * Add a host to the database, creating any intermediate entries
+ *
+ * \param host Hostname to add
+ * \return Pointer to leaf node, or NULL on memory exhaustion
+ */
+struct host_part *urldb_add_host(const char *host)
+{
+ struct host_part *d = (struct host_part *) &db_root, *e;
+ struct search_node *s;
+ char buf[256]; /* 256 bytes is sufficient - domain names are
+ * limited to 255 chars. */
+ char *part;
+
+ assert(host);
+
+ if (*(host) >= '0' && *(host) <= '9') {
+ /* Host is an IP, so simply add as TLD */
+
+ /* Check for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcasecmp(host, e->part) == 0)
+ /* found => return it */
+ return e;
+
+ d = urldb_add_host_node(host, d);
+
+ s = urldb_search_insert(search_trees[ST_IP], d);
+ if (!s) {
+ /* failed */
+ d = NULL;
+ } else {
+ search_trees[ST_IP] = s;
+ }
+
+ return d;
+ }
+
+ /* Copy host string, so we can corrupt it */
+ strncpy(buf, host, sizeof buf);
+ buf[sizeof buf - 1] = '\0';
+
+ /* Process FQDN segments backwards */
+ do {
+ part = strrchr(buf, '.');
+ if (!part) {
+ /* last segment */
+ /* Check for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcasecmp(buf, e->part) == 0)
+ break;
+
+ if (e) {
+ d = e;
+ } else {
+ d = urldb_add_host_node(buf, d);
+ }
+
+ /* And insert into search tree */
+ if (d) {
+ if (isalpha(*buf)) {
+ struct search_node **r;
+ r = &search_trees[
+ tolower(*buf) - 'a' + ST_DN];
+
+ s = urldb_search_insert(*r, d);
+ if (!s) {
+ /* failed */
+ d = NULL;
+ } else {
+ *r = s;
+ }
+ } else {
+ d = NULL;
+ }
+ }
+ break;
+ }
+
+ /* Check for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcasecmp(part + 1, e->part) == 0)
+ break;
+
+ d = e ? e : urldb_add_host_node(part + 1, d);
+ if (!d)
+ break;
+
+ *part = '\0';
+ } while (1);
+
+ return d;
+}
+
+/**
+ * Add a path node to the tree
+ *
+ * \param scheme URL scheme associated with path (copied)
+ * \param port Port number on host associated with path
+ * \param segment Path segment to add (copied)
+ * \param fragment URL fragment (copied), or NULL
+ * \param parent Parent node to add to
+ * \return Pointer to added node, or NULL on memory exhaustion
+ */
+struct path_data *urldb_add_path_node(const char *scheme, unsigned int port,
+ const char *segment, const char *fragment,
+ struct path_data *parent)
+{
+ struct path_data *d, *e;
+
+ assert(scheme && segment && parent);
+
+ d = calloc(1, sizeof(struct path_data));
+ if (!d)
+ return NULL;
+
+ d->scheme = strdup(scheme);
+ if (!d->scheme) {
+ free(d);
+ return NULL;
+ }
+
+ d->port = port;
+
+ d->segment = strdup(segment);
+ if (!d->segment) {
+ free(d->scheme);
+ free(d);
+ return NULL;
+ }
+
+ if (fragment) {
+ if (!urldb_add_path_fragment(d, fragment)) {
+ free(d->segment);
+ free(d->scheme);
+ free(d);
+ return NULL;
+ }
+ }
+
+ for (e = parent->children; e; e = e->next)
+ if (strcmp(e->segment, d->segment) > 0)
+ break;
+
+ if (e) {
+ d->prev = e->prev;
+ d->next = e;
+ if (e->prev)
+ e->prev->next = d;
+ else
+ parent->children = d;
+ e->prev = d;
+ } else if (!parent->children) {
+ d->prev = d->next = NULL;
+ parent->children = parent->last = d;
+ } else {
+ d->next = NULL;
+ d->prev = parent->last;
+ parent->last->next = d;
+ parent->last = d;
+ }
+ d->parent = parent;
+
+ return d;
+}
+
+/**
+ * Add a path to the database, creating any intermediate entries
+ *
+ * \param scheme URL scheme associated with path
+ * \param port Port number on host associated with path
+ * \param host Host tree node to attach to
+ * \param path Absolute path to add
+ * \param fragment URL fragment, or NULL
+ * \return Pointer to leaf node, or NULL on memory exhaustion
+ */
+struct path_data *urldb_add_path(const char *scheme, unsigned int port,
+ const struct host_part *host, const char *path,
+ const char *fragment)
+{
+ struct path_data *d, *e;
+ char *buf;
+ char *segment, *slash;
+
+ assert(scheme && host && path);
+
+ d = (struct path_data *) &host->paths;
+
+ /* Copy path string, so we can corrupt it */
+ buf = malloc(strlen(path) + 1);
+ if (!buf)
+ return NULL;
+
+ /* + 1 to strip leading '/' */
+ strcpy(buf, path + 1);
+
+ segment = buf;
+
+ /* Process path segments */
+ do {
+ slash = strchr(segment, '/');
+ if (!slash) {
+ /* last segment */
+ /* look for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcmp(segment, e->segment) == 0 &&
+ strcasecmp(scheme,
+ e->scheme) == 0 &&
+ e->port == port)
+ break;
+
+ d = e ? urldb_add_path_fragment(e, fragment) :
+ urldb_add_path_node(scheme, port,
+ segment, fragment, d);
+ break;
+ }
+
+ *slash = '\0';
+
+ /* look for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcmp(segment, e->segment) == 0 &&
+ strcasecmp(scheme, e->scheme) == 0 &&
+ e->port == port)
+ break;
+
+ d = e ? e : urldb_add_path_node(scheme, port, segment,
+ NULL, d);
+ if (!d)
+ break;
+
+ segment = slash + 1;
+ } while (1);
+
+ free(buf);
+
+ return d;
+}
+
+/**
+ * Fragment comparator callback for qsort
+ */
+int urldb_add_path_fragment_cmp(const void *a, const void *b)
+{
+ return strcasecmp(*((const char **) a), *((const char **) b));
+}
+
+/**
+ * Add a fragment to a path segment
+ *
+ * \param segment Path segment to add to
+ * \param fragment Fragment to add (copied), or NULL
+ * \return segment or NULL on memory exhaustion
+ */
+struct path_data *urldb_add_path_fragment(struct path_data *segment,
+ const char *fragment)
+{
+ char **temp;
+
+ assert(segment);
+
+ /* If no fragment, this function is a NOP
+ * This may seem strange, but it makes the rest
+ * of the code cleaner */
+ if (!fragment)
+ return segment;
+
+ temp = realloc(segment->fragment,
+ (segment->frag_cnt + 1) * sizeof(char *));
+ if (!temp)
+ return NULL;
+
+ segment->fragment = temp;
+ segment->fragment[segment->frag_cnt] = strdup(fragment);
+ if (!segment->fragment[segment->frag_cnt]) {
+ /* Don't free temp - it's now our buffer */
+ return NULL;
+ }
+
+ segment->frag_cnt++;
+
+ /* We want fragments in alphabetical order, so sort them
+ * It may prove better to insert in alphabetical order instead */
+ qsort(segment->fragment, segment->frag_cnt, sizeof (char *),
+ urldb_add_path_fragment_cmp);
+
+ return segment;
+}
+
+/**
+ * Find an URL in the database
+ *
+ * \param url The URL to find
+ * \return Pointer to path data, or NULL if not found
+ */
+struct path_data *urldb_find_url(const char *url)
+{
+ const struct host_part *h;
+ struct path_data *p;
+ struct search_node *tree;
+ char *host, *plq, *scheme, *colon;
+ unsigned short port;
+ url_func_result ret;
+
+ assert(url);
+
+ /** \todo consider file: URLs */
+
+ /* extract host */
+ ret = url_host(url, &host);
+ if (ret != URL_FUNC_OK)
+ return NULL;
+
+ /* extract path, leafname, query */
+ ret = url_plq(url, &plq);
+ if (ret != URL_FUNC_OK) {
+ free(host);
+ return NULL;
+ }
+
+ /* extract scheme */
+ ret = url_scheme(url, &scheme);
+ if (ret != URL_FUNC_OK) {
+ free(plq);
+ free(host);
+ return NULL;
+ }
+
+ colon = strrchr(host, ':');
+ if (!colon) {
+ port = 0;
+ } else {
+ *colon = '\0';
+ port = atoi(colon + 1);
+ }
+
+ if (*host >= '0' && *host <= '9')
+ tree = search_trees[ST_IP];
+ else if (isalpha(*host))
+ tree = search_trees[ST_DN + tolower(*host) - 'a'];
+ else {
+ free(plq);
+ free(host);
+ free(scheme);
+ return NULL;
+ }
+
+ h = urldb_search_find(tree, host);
+ if (!h) {
+ free(plq);
+ free(host);
+ free(scheme);
+ return NULL;
+ }
+
+ p = urldb_match_path(&h->paths, plq, scheme, port);
+
+ free(plq);
+ free(host);
+ free(scheme);
+
+ return p;
+}
+
+/**
+ * Match a path string
+ *
+ * \param parent Path (sub)tree to look in
+ * \param path The path to search for
+ * \param scheme The URL scheme associated with the path
+ * \param port The port associated with the path
+ * \return Pointer to path data or NULL if not found.
+ */
+struct path_data *urldb_match_path(const struct path_data *parent,
+ const char *path, const char *scheme, unsigned short port)
+{
+ struct path_data *p;
+ const char *slash;
+
+ if (*path == '\0')
+ return (struct path_data *)parent;
+
+ slash = strchr(path + 1, '/');
+ if (!slash)
+ slash = path + strlen(path);
+
+ for (p = parent->children; p; p = p->next) {
+ if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
+ strcmp(p->scheme, scheme) == 0 &&
+ p->port == port)
+ break;
+ }
+
+ if (p) {
+ return urldb_match_path(p, slash, scheme, port);
+ }
+
+ return NULL;
+}
+
+/**
+ * Dump URL database to stderr
+ */
+void urldb_dump(void)
+{
+ int i;
+
+ urldb_dump_hosts(&db_root);
+
+ for (i = 0; i != NUM_SEARCH_TREES; i++)
+ urldb_dump_search(search_trees[i], 0);
+}
+
+/**
+ * Dump URL database hosts to stderr
+ *
+ * \param parent Parent node of tree to dump
+ */
+void urldb_dump_hosts(struct host_part *parent)
+{
+ struct host_part *h;
+
+ if (parent->part) {
+ LOG(("%s", parent->part));
+
+ LOG(("\t%s invalid SSL certs",
+ parent->permit_invalid_certs ? "Permits" : "Denies"));
+ }
+
+ /* Dump path data */
+ urldb_dump_paths(&parent->paths);
+
+ /* and recurse */
+ for (h = parent->children; h; h = h->next)
+ urldb_dump_hosts(h);
+}
+
+/**
+ * Dump URL database paths to stderr
+ *
+ * \param parent Parent node of tree to dump
+ */
+void urldb_dump_paths(struct path_data *parent)
+{
+ struct path_data *p;
+ unsigned int i;
+
+ if (parent->segment) {
+ LOG(("\t%s : %u", parent->scheme, parent->port));
+
+ LOG(("\t\t'%s'", parent->segment));
+
+ for (i = 0; i != parent->frag_cnt; i++)
+ LOG(("\t\t\t#%s", parent->fragment[i]));
+ }
+
+ /* and recurse */
+ for (p = parent->children; p; p = p->next)
+ urldb_dump_paths(p);
+}
+
+/**
+ * Dump search tree
+ *
+ * \param parent Parent node of tree to dump
+ * \param depth Tree depth
+ */
+void urldb_dump_search(struct search_node *parent, int depth)
+{
+ const struct host_part *h;
+ int i;
+
+ if (parent == &empty)
+ return;
+
+ urldb_dump_search(parent->left, depth + 1);
+
+ for (i = 0; i != depth; i++)
+ fputc(' ', stderr);
+
+ for (h = parent->data; h; h = h->parent) {
+ fprintf(stderr, "%s", h->part);
+ if (h->parent && h->parent->parent)
+ fputc('.', stderr);
+ }
+
+ fputc('\n', stderr);
+
+ urldb_dump_search(parent->right, depth + 1);
+}
+
+/**
+ * Insert a node into the search tree
+ *
+ * \param root Root of tree to insert into
+ * \param data User data to insert
+ * \return Pointer to updated root, or NULL if failed
+ */
+struct search_node *urldb_search_insert(struct search_node *root,
+ const struct host_part *data)
+{
+ struct search_node *n;
+
+ assert(root && data);
+
+ n = malloc(sizeof(struct search_node));
+ if (!n)
+ return NULL;
+
+ n->level = 1;
+ n->data = data;
+ n->left = n->right = &empty;
+
+ root = urldb_search_insert_internal(root, n);
+
+ return root;
+}
+
+/**
+ * Insert node into search tree
+ *
+ * \param root Root of (sub)tree to insert into
+ * \param n Node to insert
+ * \return Pointer to updated root
+ */
+struct search_node *urldb_search_insert_internal(struct search_node *root,
+ struct search_node *n)
+{
+ assert(root && n);
+
+ if (root == &empty) {
+ root = n;
+ } else {
+ int c = urldb_search_match_host(root->data, n->data);
+
+ if (c > 0) {
+ root->left = urldb_search_insert_internal(
+ root->left, n);
+ } else if (c < 0) {
+ root->right = urldb_search_insert_internal(
+ root->right, n);
+ } else {
+ /* exact match */
+ free(n);
+ return root;
+ }
+
+ root = urldb_search_skew(root);
+ root = urldb_search_split(root);
+ }
+
+ return root;
+}
+
+/**
+ * Delete a node from a search tree
+ *
+ * \param root Tree to remove from
+ * \param data Data to delete
+ * \return Updated root of tree
+ */
+struct search_node *urldb_search_remove(struct search_node *root,
+ const struct host_part *data)
+{
+ static struct search_node *last, *deleted;
+
+ assert(root && data);
+
+ if (root != &empty) {
+ int c = urldb_search_match_host(root->data, data);
+
+ last = root;
+ if (c > 0) {
+ root->left = urldb_search_remove(root->left, data);
+ } else {
+ deleted = root;
+ root->right = urldb_search_remove(root->right, data);
+ }
+ }
+
+ if (root == last) {
+ if (deleted != &empty &&
+ urldb_search_match_host(deleted->data,
+ data) == 0) {
+ deleted->data = last->data;
+ deleted = &empty;
+ root = root->right;
+ }
+ } else {
+ if (root->left->level < root->level - 1 ||
+ root->right->level < root->level - 1) {
+ if (root->right->level > --root->level)
+ root->right->level = root->level;
+
+ root = urldb_search_skew(root);
+ root->right = urldb_search_skew(root->right);
+ root->right->right =
+ urldb_search_skew(root->right->right);
+ root = urldb_search_split(root);
+ root->right = urldb_search_split(root->right);
+ }
+ }
+
+ return root;
+}
+
+/**
+ * Find a node in a search tree
+ *
+ * \param root Tree to look in
+ * \param host Host to find
+ * \return Pointer to host tree node, or NULL if not found
+ */
+const struct host_part *urldb_search_find(struct search_node *root,
+ const char *host)
+{
+ int c;
+
+ assert(root && host);
+
+ if (root == &empty) {
+ return NULL;
+ }
+
+ c = urldb_search_match_string(root->data, host);
+
+ if (c > 0)
+ return urldb_search_find(root->left, host);
+ else if (c < 0)
+ return urldb_search_find(root->right, host);
+ else
+ return root->data;
+}
+
+/**
+ * Compare a pair of host_parts
+ *
+ * \param a
+ * \param b
+ * \return 0 if match, non-zero, otherwise
+ */
+int urldb_search_match_host(const struct host_part *a,
+ const struct host_part *b)
+{
+ int ret;
+
+ assert(a && b);
+
+ /* traverse up tree to root, comparing parts as we go. */
+ for (; a && b; a = a->parent, b = b->parent)
+ if ((ret = strcasecmp(a->part, b->part)) != 0)
+ /* They differ => return the difference here */
+ return ret;
+
+ /* If we get here then either:
+ * a) The path lengths differ
+ * or b) The hosts are identical
+ */
+ if (a && !b)
+ /* len(a) > len(b) */
+ return 1;
+ else if (!a && b)
+ /* len(a) < len(b) */
+ return -1;
+
+ /* identical */
+ return 0;
+}
+
+/**
+ * Compare host_part with a string
+ *
+ * \param a
+ * \param b
+ * \return 0 if match, non-zero, otherwise
+ */
+int urldb_search_match_string(const struct host_part *a,
+ const char *b)
+{
+ const char *end, *dot;
+ int plen, ret;
+
+ assert(a && b);
+
+ if (*b >= '0' && *b <= '9') {
+ /* IP address */
+ return strcasecmp(a->part, b);
+ }
+
+ end = b + strlen(b);
+
+ while (b < end && a) {
+ dot = strchr(b, '.');
+ if (!dot) {
+ /* last segment */
+ dot = end;
+ }
+
+ /* Compare strings (length limited) */
+ if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
+ /* didn't match => return difference */
+ return ret;
+
+ /* The strings matched, now check that the lengths do, too */
+ plen = strlen(a->part);
+
+ if (plen > dot - b)
+ /* len(a) > len(b) */
+ return 1;
+ else if (plen < dot - b)
+ /* len(a) < len(b) */
+ return -1;
+
+ b = dot + 1;
+ a = a->parent;
+ }
+
+ /* If we get here then either:
+ * a) The path lengths differ
+ * or b) The hosts are identical
+ */
+ if (a && a != &db_root && b >= end)
+ /* len(a) > len(b) */
+ return 1;
+ else if (!a && b < end)
+ /* len(a) < len(b) */
+ return -1;
+
+ /* Identical */
+ return 0;
+}
+
+/**
+ * Compare host_part with prefix
+ *
+ * \param a
+ * \param b
+ * \return 0 if match, non-zero, otherwise
+ */
+int urldb_search_match_prefix(const struct host_part *a,
+ const char *b)
+{
+ const char *end, *dot;
+ int plen, ret;
+
+ assert(a && b);
+
+ if (*b >= '0' && *b <= '9') {
+ /* IP address */
+ return strncasecmp(a->part, b, strlen(b));
+ }
+
+ end = b + strlen(b);
+
+ while (b < end && a) {
+ dot = strchr(b, '.');
+ if (!dot) {
+ /* last segment */
+ dot = end;
+ }
+
+ /* Compare strings (length limited) */
+ if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
+ /* didn't match => return difference */
+ return ret;
+
+ /* The strings matched */
+ if (dot < end) {
+ /* Consider segment lengths only in the case
+ * where the prefix contains segments */
+ plen = strlen(a->part);
+ if (plen > dot - b)
+ /* len(a) > len(b) */
+ return 1;
+ else if (plen < dot - b)
+ /* len(a) < len(b) */
+ return -1;
+ }
+
+ b = dot + 1;
+ a = a->parent;
+ }
+
+ /* If we get here then either:
+ * a) The path lengths differ
+ * or b) The hosts are identical
+ */
+ if (a && a != &db_root && b >= end)
+ /* len(a) > len(b) => prefix matches */
+ return 0;
+ else if (!a && b < end)
+ /* len(a) < len(b) => prefix does not match */
+ return -1;
+
+ /* Identical */
+ return 0;
+}
+
+/**
+ * Rotate a subtree right
+ *
+ * \param root Root of subtree to rotate
+ * \return new root of subtree
+ */
+struct search_node *urldb_search_skew(struct search_node *root)
+{
+ struct search_node *temp;
+
+ assert(root);
+
+ if (root->left->level == root->level) {
+ temp = root->left;
+ root->left = temp->right;
+ temp->right = root;
+ root = temp;
+ }
+
+ return root;
+}
+
+/**
+ * Rotate a node left, increasing the parent's level
+ *
+ * \param root Root of subtree to rotate
+ * \return New root of subtree
+ */
+struct search_node *urldb_search_split(struct search_node *root)
+{
+ struct search_node *temp;
+
+ assert(root);
+
+ if (root->right->right->level == root->level) {
+ temp = root->right;
+ root->right = temp->left;
+ temp->left = root;
+ root = temp;
+
+ root->level++;
+ }
+
+ return root;
+}
+
+#ifdef TEST
+int main(void)
+{
+ struct host_part *h;
+ struct path_data *p;
+
+ h = urldb_add_host("127.0.0.1");
+ if (!h) {
+ LOG(("failed adding host"));
+ return 1;
+ }
+
+ /* Get host entry */
+ h = urldb_add_host("netsurf.strcprstskrzkrk.co.uk");
+ if (!h) {
+ LOG(("failed adding host"));
+ return 1;
+ }
+
+ /* Get path entry */
+ p = urldb_add_path("http", 80, h, "/path/to/resource.htm?a=b", "zz");
+ if (!p) {
+ LOG(("failed adding path"));
+ return 1;
+ }
+
+ p = urldb_add_path("http", 80, h, "/path/to/resource.htm?a=b", "aa");
+ if (!p) {
+ LOG(("failed adding path"));
+ return 1;
+ }
+
+ p = urldb_add_path("http", 80, h, "/path/to/resource.htm?a=b", "yy");
+ if (!p) {
+ LOG(("failed adding path"));
+ return 1;
+ }
+
+ urldb_dump();
+
+ return 0;
+}
+#endif
diff --git a/content/urldb.h b/content/urldb.h
new file mode 100644
index 000000000..9d59271d2
--- /dev/null
+++ b/content/urldb.h
@@ -0,0 +1,65 @@
+/*
+ * This file is part of NetSurf, http://netsurf.sourceforge.net/
+ * Licensed under the GNU General Public License,
+ * http://www.opensource.org/licenses/gpl-license
+ * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
+ */
+
+/** \file
+ * Unified URL information database (interface)
+ */
+
+#ifndef _NETSURF_CONTENT_URLDB_H_
+#define _NETSURF_CONTENT_URLDB_H_
+
+#include <stdbool.h>
+#include <time.h>
+#include "netsurf/content/content_type.h"
+
+struct url_data {
+ const char *title; /**< Resource title */
+ unsigned int visits; /**< Visit count */
+ time_t last_visit; /**< Last visit time */
+ content_type type; /**< Type of resource */
+};
+
+struct bitmap;
+
+/* Persistence support */
+void urldb_load(const char *filename);
+void urldb_save(const char *filename);
+
+/* URL insertion */
+bool urldb_add_url(const char *url);
+
+/* URL data modification / lookup */
+void urldb_set_url_title(const char *url, const char *title);
+void urldb_set_url_content_type(const char *url, content_type type);
+void urldb_update_url_visit_data(const char *url);
+void urldb_reset_url_visit_data(const char *url);
+const struct url_data *urldb_get_url_data(const char *url);
+
+/* Authentication modification / lookup */
+void urldb_set_auth_details(const char *url, const char *realm,
+ const char *auth);
+const char *urldb_get_auth_details(const char *url);
+
+/* SSL certificate permissions */
+void urldb_set_cert_permissions(const char *url, bool permit);
+bool urldb_get_cert_permissions(const char *url);
+
+/* Thumbnail handling */
+void urldb_set_thumbnail(const char *url, struct bitmap *bitmap);
+const struct bitmap *urldb_get_thumbnail(const char *url);
+
+/* URL completion */
+void urldb_iterate_partial(const char *prefix,
+ bool (*callback)(const char *url));
+
+/* Iteration */
+void urldb_iterate_entries(bool (*callback)(const char *url));
+
+/* Debug */
+void urldb_dump(void);
+
+#endif