From c09eb457df1962f5b014214874b2beffd69141a4 Mon Sep 17 00:00:00 2001
From: John Mark Bell <jmb@netsurf-browser.org>
Date: Sun, 9 Apr 2006 23:21:13 +0000
Subject: Unify information databases

svn path=/trunk/netsurf/; revision=2519
---
 content/authdb.c     |  366 ---------
 content/authdb.h     |   18 -
 content/certdb.c     |  154 ----
 content/certdb.h     |   18 -
 content/fetch.c      |   15 +-
 content/fetchcache.c |    1 -
 content/url_store.c  |  750 -----------------
 content/url_store.h  |   61 --
 content/urldb.c      | 2231 ++++++++++++++++++++++++++++++++++++++++++++++++++
 content/urldb.h      |   65 ++
 10 files changed, 2301 insertions(+), 1378 deletions(-)
 delete mode 100644 content/authdb.c
 delete mode 100644 content/authdb.h
 delete mode 100644 content/certdb.c
 delete mode 100644 content/certdb.h
 delete mode 100644 content/url_store.c
 delete mode 100644 content/url_store.h
 create mode 100644 content/urldb.c
 create mode 100644 content/urldb.h

(limited to 'content')

diff --git a/content/authdb.c b/content/authdb.c
deleted file mode 100644
index f97adb1b0..000000000
--- a/content/authdb.c
+++ /dev/null
@@ -1,366 +0,0 @@
-/*
- * This file is part of NetSurf, http://netsurf.sourceforge.net/
- * Licensed under the GNU General Public License,
- *                http://www.opensource.org/licenses/gpl-license
- * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
- */
-
-/** \file
- * HTTP authentication database (implementation)
- *
- * Authentication details are stored hashed by canonical root URI
- * (absoluteURI with no abs_path part - see RFC 2617) for fast lookup.
- *
- * A protection space is specified by the root URI and a case sensitive
- * realm match. User-agents may preemptively send authentication details
- * for locations within a currently known protected space (i.e:
- *   Given a known realm URI of scheme://authority/path/to/realm/
- *   the URI scheme://authority/path/to/realm/foo/ can be assumed to
- *   be within the protection space.)
- *
- * In order to deal with realms within realms, the realm details are stored
- * such that the most specific URI comes first (where "most specific" is
- * classed as the one with the longest abs_path segment).
- *
- * Realms spanning domains are stored multiple times (once per domain).
- *
- * Where a higher level resource is found to be within a known realm, the
- * existing match is replaced with the new one (i.e:
- *   Given a known realm of scheme://authority/path/to/realm/ (uri1)
- *   and the newly-acquired knowledge that scheme://authority/path/to/ (uri2)
- *   lies within the same realm, the realm details for uri1 are replaced with
- *   those for uri2. - in most cases, this is likely to be a simple
- *   replacement of the realm URI)
- *
- * There is currently no mechanism for retaining authentication details over
- * sessions.
- */
-#include <assert.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <string.h>
-#include "netsurf/content/authdb.h"
-#define NDEBUG
-#include "netsurf/utils/log.h"
-#include "netsurf/utils/url.h"
-
-#define HASH_SIZE 77
-
-struct realm_details {
-	char *realm;			/**< Realm identifier */
-	char *url;			/**< Base URL of realm */
-	char *auth;			/**< Authentication details */
-	struct realm_details *next;
-	struct realm_details *prev;
-};
-
-struct auth_entry {
-	char *root_url;			/**< Canonical root URL of realms */
-	struct realm_details *realms;	/**< List of realms on this host */
-	struct auth_entry *next;
-};
-
-static struct auth_entry *auth_table[HASH_SIZE];
-
-static unsigned int authdb_hash(const char *s);
-static struct realm_details *authdb_get_rd(const char *canon,
-		const char *url, const char *realm);
-static void authdb_dump(void);
-
-/**
- * Insert an entry into the database, potentially replacing any
- * existing entry.
- *
- * \param url Absolute URL to resource
- * \param realm Authentication realm containing resource
- * \param auth Authentication details in form "username:password"
- * \return true on success, false on error.
- */
-bool authdb_insert(const char *url, const char *realm, const char *auth)
-{
-	char *canon, *stripped;
-	unsigned int hash;
-	struct realm_details *rd;
-	struct auth_entry *entry;
-	url_func_result ret;
-
-	assert(url && realm && auth);
-
-	LOG(("Adding '%s' - '%s'", url, realm));
-
-	ret = url_canonical_root(url, &canon);
-	if (ret != URL_FUNC_OK)
-		return false;
-
-	LOG(("'%s'", canon));
-
-	ret = url_strip_lqf(url, &stripped);
-	if (ret != URL_FUNC_OK) {
-		free(canon);
-		return false;
-	}
-
-	hash = authdb_hash(canon);
-
-	/* Look for existing entry */
-	for (entry = auth_table[hash]; entry; entry = entry->next)
-		if (strcmp(entry->root_url, canon) == 0)
-			break;
-
-	rd = authdb_get_rd(canon, stripped, realm);
-	if (rd) {
-		/* We have a match */
-		if (strlen(stripped) < strlen(rd->url)) {
-			/* more generic, so update URL and move to
-			 * appropriate location in list (s.t. the invariant
-			 * that most specific URLs come first is maintained)
-			 */
-			struct realm_details *r, *s;
-			char *temp = strdup(auth);
-
-			if (!temp) {
-				free(temp);
-				free(stripped);
-				free(canon);
-				return false;
-			}
-
-			free(rd->url);
-			rd->url = stripped;
-
-			free(rd->auth);
-			rd->auth = temp;
-
-			for (r = rd->next; r; r = s) {
-				s = r->next;
-				if (strlen(r->url) > strlen(rd->url)) {
-					rd->next->prev = rd->prev;
-					if (rd->prev)
-						rd->prev->next = rd->next;
-					else
-						entry->realms = r;
-
-					rd->prev = r;
-					rd->next = r->next;
-					if (r->next)
-						r->next->prev = rd;
-					r->next = rd;
-				}
-			}
-		}
-		else if (strlen(stripped) == strlen(rd->url)) {
-			/* exact match, so replace auth details */
-			char *temp = strdup(auth);
-			if (!temp) {
-				free(stripped);
-				free(canon);
-				return false;
-			}
-
-			free(rd->auth);
-			rd->auth = temp;
-
-			free(stripped);
-		}
-		/* otherwise, nothing to do */
-
-		free(canon);
-		return true;
-	}
-
-	/* no existing entry => create one */
-	rd = malloc(sizeof(struct realm_details));
-	if (!rd) {
-		free(stripped);
-		free(canon);
-		return false;
-	}
-
-	rd->realm = strdup(realm);
-	rd->auth = strdup(auth);
-	rd->url = stripped;
-	rd->prev = 0;
-
-	if (!rd->realm || !rd->auth || ret != URL_FUNC_OK) {
-		free(rd->url);
-		free(rd->auth);
-		free(rd->realm);
-		free(rd);
-		free(canon);
-		return false;
-	}
-
-	if (entry) {
-		/* found => add to it */
-		rd->next = entry->realms;
-		if (entry->realms)
-			entry->realms->prev = rd;
-		entry->realms = rd;
-
-		free(canon);
-		return true;
-	}
-
-	/* not found => create new */
-	entry = malloc(sizeof(struct auth_entry));
-	if (!entry) {
-		free(rd->url);
-		free(rd->auth);
-		free(rd->realm);
-		free(rd);
-		free(canon);
-		return false;
-	}
-
-	rd->next = 0;
-	entry->root_url = canon;
-	entry->realms = rd;
-	entry->next = auth_table[hash];
-	auth_table[hash] = entry;
-
-	return true;
-}
-
-/**
- * Find realm details entry
- *
- * \param canon Canonical root URL
- * \param url Stripped URL to resource
- * \param realm Realm containing resource
- * \return Realm details or NULL if not found
- */
-struct realm_details *authdb_get_rd(const char *canon, const char *url,
-		const char *realm)
-{
-	struct auth_entry *entry;
-	struct realm_details *ret;
-
-	assert(canon && url);
-
-	for (entry = auth_table[authdb_hash(canon)]; entry;
-			entry = entry->next)
-		if (strcmp(entry->root_url, canon) == 0)
-			break;
-
-	if (!entry)
-		return NULL;
-
-	for (ret = entry->realms; ret; ret = ret->next) {
-		if (strcmp(ret->realm, realm))
-			/* skip realms that don't match */
-			continue;
-		if (strlen(url) >= strlen(ret->url) &&
-				!strncmp(url, ret->url, strlen(ret->url)))
-			/* If the requested URL is of equal or greater
-			 * specificity than the stored one, but is within
-			 * the same realm, then use the more generic details
-			 */
-			return ret;
-		else if (strncmp(url, ret->url, strlen(url)) == 0) {
-			/* We have a more general URL in the same realm */
-			return ret;
-		}
-	}
-
-	return NULL;
-}
-
-/**
- * Retrieve authentication details for an URL from the database
- *
- * \param url Absolute URL to consider
- * \return authentication details, or NULL if none found.
- */
-const char *authdb_get(const char *url)
-{
-	char *canon, *stripped;
-	struct auth_entry *entry;
-	struct realm_details *rd;
-	url_func_result ret;
-
-	assert(url);
-
-	LOG(("Searching for '%s'", url));
-
-	authdb_dump();
-
-	ret = url_canonical_root(url, &canon);
-	if (ret != URL_FUNC_OK)
-		return NULL;
-
-	ret = url_strip_lqf(url, &stripped);
-	if (ret != URL_FUNC_OK) {
-		free(canon);
-		return NULL;
-	}
-
-	/* Find auth entry */
-	for (entry = auth_table[authdb_hash(canon)]; entry;
-			entry = entry->next)
-		if (strcmp(entry->root_url, canon) == 0)
-			break;
-
-	if (!entry) {
-		free(stripped);
-		free(canon);
-		return NULL;
-	}
-
-	LOG(("Found entry"));
-
-	/* Find realm details */
-	for (rd = entry->realms; rd; rd = rd->next)
-		if (strlen(stripped) >= strlen(rd->url) &&
-				!strncmp(stripped, rd->url, strlen(rd->url)))
-			break;
-
-	if (!rd) {
-		free(stripped);
-		free(canon);
-		return NULL;
-	}
-
-	LOG(("Found realm"));
-
-	free(stripped);
-	free(canon);
-	return rd->auth;
-}
-
-/**
- * Hash function for keys.
- */
-unsigned int authdb_hash(const char *s)
-{
-	unsigned int i, z = 0, m;
-	if (!s)
-		return 0;
-
-	m = strlen(s);
-
-	for (i = 0; i != m && s[i]; i++)
-		z += s[i] & 0x1f;  /* lower 5 bits, case insensitive */
-	return z % HASH_SIZE;
-}
-
-/**
- * Dump contents of auth db to stderr
- */
-void authdb_dump(void)
-{
-#ifndef NDEBUG
-	int i;
-	struct auth_entry *e;
-	struct realm_details *r;
-
-	for (i = 0; i != HASH_SIZE; i++) {
-		LOG(("%d:", i));
-		for (e = auth_table[i]; e; e = e->next) {
-			LOG(("\t%s", e->root_url));
-			for (r = e->realms; r; r = r->next) {
-				LOG(("\t\t%s - %s", r->url, r->realm));
-			}
-		}
-	}
-#endif
-}
diff --git a/content/authdb.h b/content/authdb.h
deleted file mode 100644
index ece7b763d..000000000
--- a/content/authdb.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * This file is part of NetSurf, http://netsurf.sourceforge.net/
- * Licensed under the GNU General Public License,
- *                http://www.opensource.org/licenses/gpl-license
- * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
- */
-
-/** \file
- * HTTP authentication database (interface)
- */
-
-#ifndef _NETSURF_CONTENT_AUTHDB_H_
-#define _NETSURF_CONTENT_AUTHDB_H_
-
-bool authdb_insert(const char *url, const char *realm, const char *auth);
-const char *authdb_get(const char *url);
-
-#endif
diff --git a/content/certdb.c b/content/certdb.c
deleted file mode 100644
index 78c6ec04f..000000000
--- a/content/certdb.c
+++ /dev/null
@@ -1,154 +0,0 @@
-/*
- * This file is part of NetSurf, http://netsurf.sourceforge.net/
- * Licensed under the GNU General Public License,
- *                http://www.opensource.org/licenses/gpl-license
- * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
- */
-
-/** \file
- * HTTPS certificate verification database (implementation)
- *
- * URLs of servers with invalid SSL certificates are stored hashed by
- * canonical root URI (absoluteURI with no abs_path part - see RFC 2617)
- * for fast lookup.
- */
-#include <assert.h>
-#include <stdbool.h>
-#include <stdlib.h>
-#include <string.h>
-#include "netsurf/utils/config.h"
-#include "netsurf/content/certdb.h"
-#define NDEBUG
-#include "netsurf/utils/log.h"
-#include "netsurf/utils/url.h"
-
-#define HASH_SIZE 77
-
-#ifdef WITH_SSL
-
-struct cert_entry {
-	char *root_url;			/**< Canonical root URL */
-	struct cert_entry *next;
-};
-
-static struct cert_entry *cert_table[HASH_SIZE];
-
-static unsigned int certdb_hash(const char *s);
-static void certdb_dump(void);
-
-/**
- * Insert an entry into the database
- *
- * \param url Absolute URL to resource
- * \return true on success, false on error.
- */
-bool certdb_insert(const char *url)
-{
-	char *canon;
-	unsigned int hash;
-	struct cert_entry *entry;
-	url_func_result ret;
-
-	assert(url);
-
-	LOG(("Adding '%s'", url));
-
-	ret = url_canonical_root(url, &canon);
-	if (ret != URL_FUNC_OK)
-		return false;
-
-	LOG(("'%s'", canon));
-
-	hash = certdb_hash(canon);
-
-	/* Look for existing entry */
-	for (entry = cert_table[hash]; entry; entry = entry->next) {
-		if (strcmp(entry->root_url, canon) == 0) {
-			free(canon);
-			return true;
-		}
-	}
-
-	/* not found => create new */
-	entry = malloc(sizeof(struct cert_entry));
-	if (!entry) {
-		free(canon);
-		return false;
-	}
-
-	entry->root_url = canon;
-	entry->next = cert_table[hash];
-	cert_table[hash] = entry;
-
-	return true;
-}
-
-/**
- * Retrieve certificate details for an URL from the database
- *
- * \param url Absolute URL to consider
- * \return certificate details, or NULL if none found.
- */
-const char *certdb_get(const char *url)
-{
-	char *canon;
-	struct cert_entry *entry;
-	url_func_result ret;
-
-	assert(url);
-
-	LOG(("Searching for '%s'", url));
-
-	certdb_dump();
-
-	ret = url_canonical_root(url, &canon);
-	if (ret != URL_FUNC_OK)
-		return NULL;
-
-	/* Find cert entry */
-	for (entry = cert_table[certdb_hash(canon)]; entry;
-			entry = entry->next) {
-		if (strcmp(entry->root_url, canon) == 0) {
-			free(canon);
-			return entry->root_url;
-		}
-	}
-
-	return NULL;
-}
-
-/**
- * Hash function for keys.
- */
-unsigned int certdb_hash(const char *s)
-{
-	unsigned int i, z = 0, m;
-	if (!s)
-		return 0;
-
-	m = strlen(s);
-
-	for (i = 0; i != m && s[i]; i++)
-		z += s[i] & 0x1f;  /* lower 5 bits, case insensitive */
-	return z % HASH_SIZE;
-}
-
-/**
- * Dump contents of auth db to stderr
- */
-void certdb_dump(void)
-{
-#ifndef NDEBUG
-	int i;
-	struct cert_entry *e;
-
-	for (i = 0; i != HASH_SIZE; i++) {
-		LOG(("%d:", i));
-		for (e = cert_table[i]; e; e = e->next) {
-			LOG(("\t%s", e->root_url));
-		}
-	}
-#endif
-}
-
-#endif
diff --git a/content/certdb.h b/content/certdb.h
deleted file mode 100644
index 28aa88664..000000000
--- a/content/certdb.h
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * This file is part of NetSurf, http://netsurf.sourceforge.net/
- * Licensed under the GNU General Public License,
- *                http://www.opensource.org/licenses/gpl-license
- * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
- */
-
-/** \file
- * HTTPS certificate verification database (interface)
- */
-
-#ifndef _NETSURF_CONTENT_CERTDB_H_
-#define _NETSURF_CONTENT_CERTDB_H_
-
-bool certdb_insert(const char *url);
-const char *certdb_get(const char *url);
-
-#endif
diff --git a/content/fetch.c b/content/fetch.c
index 4ba322067..bfbf715a0 100644
--- a/content/fetch.c
+++ b/content/fetch.c
@@ -31,13 +31,8 @@
 #ifdef WITH_SSL
 #include "openssl/ssl.h"
 #endif
-#ifdef WITH_AUTH
-#include "netsurf/content/authdb.h"
-#endif
-#ifdef WITH_SSL
-#include "netsurf/content/certdb.h"
-#endif
 #include "netsurf/content/fetch.h"
+#include "netsurf/content/urldb.h"
 #include "netsurf/desktop/options.h"
 #include "netsurf/render/form.h"
 #define NDEBUG
@@ -158,7 +153,7 @@ static int fetch_cert_verify_callback(X509_STORE_CTX *x509_ctx, void *parm);
 		ring = 0; \
 	} \
 	element->r_next = element->r_prev = 0
-	
+
 /** Find the element (by hostname) in the given ring, leave it in the
  * provided element variable
  */
@@ -483,7 +478,7 @@ static bool ns_internal_initiate_fetch(struct fetch *fetch, CURL *handle)
 		fetch->curl_handle = 0;
 		return false;
 	}
-	
+
 	/* add to the global curl multi handle */
 	codem = curl_multi_add_handle(fetch_curl_multi, fetch->curl_handle);
 	assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM);
@@ -649,7 +644,7 @@ CURLcode fetch_set_options(struct fetch *f)
 		SETOPT(CURLOPT_COOKIEJAR, 0);
 	}
 #ifdef WITH_AUTH
-	if ((auth = authdb_get(f->url)) != NULL) {
+	if ((auth = urldb_get_auth_details(f->url)) != NULL) {
 		SETOPT(CURLOPT_HTTPAUTH, CURLAUTH_ANY);
 		SETOPT(CURLOPT_USERPWD, auth);
 	} else {
@@ -677,7 +672,7 @@ CURLcode fetch_set_options(struct fetch *f)
 	}
 
 #ifdef WITH_SSL
-	if (certdb_get(f->url) != NULL) {
+	if (urldb_get_cert_permissions(f->url)) {
 		/* Disable certificate verification */
 		SETOPT(CURLOPT_SSL_VERIFYPEER, 0L);
 		SETOPT(CURLOPT_SSL_VERIFYHOST, 0L);
diff --git a/content/fetchcache.c b/content/fetchcache.c
index 47f24e89c..bc8907f14 100644
--- a/content/fetchcache.c
+++ b/content/fetchcache.c
@@ -23,7 +23,6 @@
 #include "netsurf/content/content.h"
 #include "netsurf/content/fetchcache.h"
 #include "netsurf/content/fetch.h"
-#include "netsurf/content/url_store.h"
 #include "netsurf/utils/log.h"
 #include "netsurf/utils/messages.h"
 #include "netsurf/utils/talloc.h"
diff --git a/content/url_store.c b/content/url_store.c
deleted file mode 100644
index fde956e46..000000000
--- a/content/url_store.c
+++ /dev/null
@@ -1,750 +0,0 @@
-/*
- * This file is part of NetSurf, http://netsurf.sourceforge.net/
- * Licensed under the GNU General Public License,
- *		  http://www.opensource.org/licenses/gpl-license
- * Copyright 2005 Richard Wilson <info@tinct.net>
- */
-
-/** \file
- * Central repository for URL data (implementation).
- */
-
-#include <assert.h>
-#include <stdbool.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
-#include "netsurf/content/url_store.h"
-#include "netsurf/image/bitmap.h"
-#include "netsurf/desktop/options.h"
-#ifdef riscos
-#include "netsurf/riscos/bitmap.h"
-#endif
-#include "netsurf/utils/log.h"
-#include "netsurf/utils/url.h"
-#include "netsurf/utils/utils.h"
-
-
-#define ITERATIONS_BEFORE_TEST 32
-#define MAXIMUM_URL_LENGTH 1024
-
-struct hostname_data *url_store_hostnames = NULL;
-
-static struct hostname_data *url_store_find_hostname(const char *url);
-static struct hostname_data *url_store_match_hostname(
-		struct hostname_data *previous);
-
-/* used for faster matching */
-static size_t current_match_url_length;
-static char *current_match_scheme;
-static int current_match_scheme_length;
-static char *current_match_hostname;
-static int current_match_hostname_length;
-static bool current_match_www_test;
-
-/* used for faster searching */
-static struct hostname_data *last_hostname_found = NULL;
-
-/**
- * Returns the hostname data for the specified URL. If no hostname
- * data is currently available then it is created.
- *
- * \param  url  the url to find hostname data for
- * \return  the current hostname data, or NULL if memory exhausted
- */
-struct hostname_data *url_store_find_hostname(const char *url)
-{
-	struct hostname_data *first = url_store_hostnames;
-	struct hostname_data *search;
-	struct hostname_data *result;
-	url_func_result res;
-	char *hostname = NULL;
-	int hostname_length;
-	int compare;
-	int fast_exit_counter = ITERATIONS_BEFORE_TEST;
-	const char *host_test;
-
-	assert(url);
-
-	/* as the URL is normalised, we optimise the hostname finding for http:// */
-	if (!strncmp("http://", url, 7)) {
-		/* check for duplicate hostname calls */
-		if ((last_hostname_found) &&
-				(!strncmp(last_hostname_found->hostname, url + 7,
-					last_hostname_found->hostname_length))) {
-			/* ensure it isn't comparing 'foo.com' to 'foo.com.au' etc */
-			if (url[last_hostname_found->hostname_length + 7] != '.')
-				return last_hostname_found;
-		}
-
-		/* check for a hostname match */
-		for (host_test = url + 7;
-				((*host_test > 32) && (*host_test != '/'));
-				*host_test++);
-		hostname_length = host_test - url - 7;
-		host_test = url + 7;
-		if ((last_hostname_found) &&
-				(strncmp(host_test,
-					last_hostname_found->hostname,
-					hostname_length) > 0))
-			first = last_hostname_found;
-		for (search = first; search; search = search->next) {
-			if (search->hostname_length == hostname_length) {
-				compare = strncmp(host_test, search->hostname,
-						hostname_length);
-				if (compare == 0) {
-					last_hostname_found = search;
-					return search;
-				} else if (compare < 0)
-					break;
-			}
-		}
-
-		/* allocate a new hostname */
-		hostname = malloc(hostname_length + 1);
-		if (!hostname)
-			return NULL;
-		memcpy(hostname, host_test, hostname_length);
-		hostname[hostname_length] = '\0';
-	} else {
-		/* no quick match found, fallback */
-		res = url_host(url, &hostname);
-		switch (res) {
-			case URL_FUNC_OK:
-				break;
-			case URL_FUNC_NOMEM:
-				return NULL;
-			case URL_FUNC_FAILED:
-				hostname = strdup("file:/");	/* for 'file:/' */
-				if (!hostname)
-					return NULL;
-				break;
-			default:
-				assert(0);
-		}
-		hostname_length = strlen(hostname);
-	}
-
-	/* try to find a matching hostname fairly quickly */
-	if ((last_hostname_found) &&
-			(strcmp(hostname, last_hostname_found->hostname) > 0))
-		first = last_hostname_found;
-	for (search = first; search; search = search->next) {
-		if ((fast_exit_counter <= 0) ||
-				(search->hostname_length == hostname_length)) {
-			compare = strcmp(hostname, search->hostname);
-			if (compare == 0) {
-				free(hostname);
-				last_hostname_found = search;
-				return search;
-			} else if (compare < 0)
-				break;
-			fast_exit_counter = ITERATIONS_BEFORE_TEST;
-		} else {
-			fast_exit_counter--;
-		}
-	}
-
-	/* no hostname is available: create a new one */
-	result = malloc(sizeof *result);
-	if (!result) {
-		free(hostname);
-		return NULL;
-	}
-	result->hostname = hostname;
-	result->hostname_length = hostname_length;
-	result->url = 0;
-	result->previous = 0;
-	result->next = 0;
-	last_hostname_found = result;
-
-	/* simple case: no current hostnames */
-	if (!url_store_hostnames) {
-		url_store_hostnames = result;
-		return result;
-	}
-
-	/* worst case scenario: the place we need to link is within the last
-	 * section of the hostname list so we have no reference to work back
-	 * from. rather than slowing with the very common case of searching,
-	 * we take a speed hit for this case and simply move to the very end
-	 * of the hostname list ready to work backwards. */
-	if (!search)
-		for (search = url_store_hostnames; search->next;
-				search = search->next)
-			;
-
-	/* we can now simply scan backwards as we know roughly where we need
-	 * to link to (we either had an early exit from the searching so we
-	 * know we're in the block following where we need to link, or we're
-	 * at the very end of the list as we were in the last block.) */
-	while ((search) && (strcmp(hostname, search->hostname) < 0))
-		search = search->previous;
-
-	/* simple case: our new hostname is the first in the list */
-	if (!search) {
-		result->next = url_store_hostnames;
-		url_store_hostnames->previous = result;
-		url_store_hostnames = result;
-		return result;
-	}
-
-	/* general case: link in after the found hostname */
-	result->previous = search;
-	result->next = search->next;
-	if (search->next)
-		search->next->previous = result;
-	search->next = result;
-	return result;
-}
-
-
-/**
- * Returns the url data for the specified URL. If no url
- * data is currently available then it is created.
- *
- * \param  url  a normalized url to find hostname data for
- * \return  the current hostname data, or NULL if memory exhausted
- */
-struct url_content *url_store_find(const char *url) {
-	struct hostname_data *hostname_data;
-	struct url_data *search;
-	struct url_data *result;
-	size_t url_length;
-	int compare;
-	int fast_exit_counter = ITERATIONS_BEFORE_TEST;
-
-	assert(url);
-
-	/* find the corresponding hostname data */
-	hostname_data = url_store_find_hostname(url);
-	if (!hostname_data)
-		return NULL;
-
-	/* move to the start of the leafname */
-	url_length = strlen(url);
-
-	/* try to find a matching url fairly quickly */
-	for (search = hostname_data->url; search; search = search->next) {
-		if ((fast_exit_counter <= 0) ||
-				(search->data.url_length == url_length)) {
-			compare = strcmp(url, search->data.url);
-			if (compare == 0)
-				return &search->data;
-			else if (compare < 0)
-				break;
-			fast_exit_counter = ITERATIONS_BEFORE_TEST;
-		} else {
-			fast_exit_counter--;
-		}
-	}
-
-	/* no URL is available: create a new one */
-	result = calloc(1, sizeof(struct url_data));
-	if (!result)
-		return NULL;
-	result->data.url = malloc(url_length + 1);
-	if (!result->data.url) {
-		free(result);
-		return NULL;
-	}
-	memcpy(result->data.url, url, url_length + 1);
-	result->data.url_length = url_length;
-	result->parent = hostname_data;
-
-	/* simple case: no current URLs */
-	if (!hostname_data->url) {
-		hostname_data->url = result;
-		return &result->data;
-	}
-
-	/* worst case scenario: the place we need to link is within the last
-	 * section of the URL list so we have no reference to work back
-	 * from. rather than slowing with the very common case of searching,
-	 * we take a speed hit for this case and simply move to the very end
-	 * of the URL list ready to work backwards. */
-	if (!search)
-		for (search = hostname_data->url; search->next;
-				search = search->next)
-			;
-
-	/* we can now simply scan backwards as we know roughly where we need
-	 * to link to (we either had an early exit from the searching so we
-	 * know we're in the block following where we need to link, or we're
-	 * at the very end of the list as we were in the last block.) */
-	while ((search) && (strcmp(url, search->data.url) < 0))
-		search = search->previous;
-
-	/* simple case: our new hostname is the first in the list */
-	if (!search) {
-		result->next = hostname_data->url;
-		hostname_data->url->previous = result;
-		hostname_data->url = result;
-		return &result->data;
-	}
-
-	/* general case: link in after the found hostname */
-	result->previous = search;
-	result->next = search->next;
-	if (search->next)
-		search->next->previous = result;
-	search->next = result;
-	return &result->data;
-}
-
-
-/**
- * Returns the next hostname that matches a part of the specified URL.
- *
- * The following variables must be initialised prior to calling:
- *
- *  - current_match_scheme
- *  - current_match_hostname
- *  - current_match_hostname_length;
- *
- * \param url	   a normalized url to find the next match for
- * \param current  the current hostname to search forward from, or NULL
- * \return the next matching hostname, or NULL
- */
-struct hostname_data *url_store_match_hostname(
-		struct hostname_data *current) {
-	int compare;
-
-	assert(current_match_hostname);
-
-	/* advance to the next hostname */
-	if (!current)
-		current = url_store_hostnames;
-	else
-		current = current->next;
-
-	/* skip past hostname data without URLs */
-	for (; current && (!current->url); current = current->next);
-
-	while (current) {
-		if (current->hostname_length >= current_match_hostname_length) {
-			compare = strncmp(current_match_hostname, current->hostname,
-					current_match_hostname_length);
-			if (compare == 0)
-				return current;
-			else if ((compare < 0) && !current_match_www_test)
-				break;
-		}
-		/* special case: if hostname is not www then try it */
-		if (current_match_www_test && ((current->hostname_length - 4) >=
-				current_match_hostname_length) &&
-				(!strncmp(current->hostname, "www.", 4)) &&
-				(!strncmp(current_match_hostname,
-					current->hostname + 4,
-					current_match_hostname_length)))
-			return current;
-
-		/* move to next hostname with URLs */
-		current = current->next;
-		for (; current && (!current->url); current = current->next);
-	}
-	return NULL;
-}
-
-
-
-/**
- * Returns the complete URL for the next matched stored URL.
- *
- * \param url	     a normalized url to find the next match for
- * \param reference  internal reference (NULL for first call)
- * \return the next URL that matches
- */
-struct url_content *url_store_match(const char *url, struct url_data **reference) {
-	struct hostname_data *hostname;
-	struct url_data *search = NULL;
-	url_func_result res;
-
-	assert(url);
-
-	if (!url_store_hostnames)
-		return NULL;
-
-	/* find the scheme and first URL, not necessarily matching */
-	if (!*reference) {
-		/* the hostname match is constant throughout */
-		if (current_match_hostname)
-			free(current_match_hostname);
-		current_match_hostname = NULL;
-		res = url_host(url, &current_match_hostname);
-		switch (res) {
-			case URL_FUNC_OK:
-				break;
-			case URL_FUNC_NOMEM:
-					return NULL;
-			case URL_FUNC_FAILED:
-				/* for 'file:/' */
-				current_match_hostname = strdup("file:/");
-				if (!current_match_hostname)
-					return NULL;
-				break;
-			default:
-				assert(0);
-		}
-		current_match_hostname_length = strlen(current_match_hostname);
-		/* the scheme is constant throughout */
-		if (current_match_scheme)
-			free(current_match_scheme);
-		current_match_scheme = NULL;
-		res = url_scheme(url, &current_match_scheme);
-		if (res != URL_FUNC_OK)
-			return NULL;
-		current_match_scheme_length = strlen(current_match_scheme);
-		/* the url is constant throughout */
-		current_match_url_length = strlen(url);
-		current_match_www_test = (!strcmp(current_match_scheme, "http") &&
-			strncmp(url + 4 + 3, "www.", 4)); /* 'http' + '://' */
-		/* get our initial reference */
-		hostname = url_store_match_hostname(NULL);
-		if (!hostname)
-			return NULL;
-	} else {
-		search = *reference;
-		hostname = search->parent;
-	}
-
-	/* work through all our strings, ignoring the scheme and 'www.' */
-	while (hostname) {
-
-		/* get the next URL to test */
-		if (!search)
-			search = hostname->url;
-		else
-			search = search->next;
-
-		/* loop past end of list, or search */
-		if (!search) {
-			hostname = url_store_match_hostname(hostname);
-			if (!hostname)
-				return NULL;
-		} else if (search->data.visits > 0) {
-			/* straight match */
-			if ((search->data.url_length >= current_match_url_length) &&
-					(!strncmp(search->data.url, url,
-							current_match_url_length))) {
-				*reference = search;
-				return &search->data;
-			}
-			/* try with 'www.' inserted after the scheme */
-			if (current_match_www_test &&
-					((search->data.url_length - 4) >=
-						current_match_url_length) &&
-				(!strncmp(search->data.url,
-						current_match_scheme,
-						current_match_scheme_length)) &&
-				(!strncmp(search->data.url +
-						current_match_scheme_length + 3,
-						"www.", 4)) &&
-				(!strncmp(search->data.url +
-						current_match_scheme_length + 7,
-						url +
-						current_match_scheme_length + 3,
-						current_match_url_length -
-						current_match_scheme_length - 3))) {
-				*reference = search;
-				return &search->data;
-			}
-		}
-	}
-	return NULL;
-}
-
-
-/**
- * Converts a text string into one suitable for URL matching.
- *
- * \param text	     the text to search with
- * \return URL matching string allocated on heap, or NULL on error
- */
-char *url_store_match_string(const char *text) {
-	url_func_result res;
-	char *url;
-
-	assert(text);
-
-	res = url_normalize(text, &url);
-	if (res != URL_FUNC_OK)
-		return NULL;
-
-	/* drop the '/' from the end if it was added when normalizing */
-	if ((url[strlen(url) - 1] == '/') && (text[strlen(text) - 1] != '/'))
-		url[strlen(url) - 1] = '\0';
-	return url;
-}
-
-
-/**
- * Loads the current contents of the URL store from disk
- *
- * \param file  the file to load options from
- */
-void url_store_load(const char *file) {
-	char s[MAXIMUM_URL_LENGTH];
-	struct hostname_data *hostname;
-	struct url_data *result;
-	int urls;
-	int i;
-	int version;
-	int length;
-	FILE *fp;
-
-	LOG(("Loading URL file"));
-
-	fp = fopen(file, "r");
-	if (!fp) {
-		LOG(("Failed to open file '%s' for reading", file));
-		return;
-	}
-
-	if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-		return;
-	version = atoi(s);
-	if (version < 102) {
-		LOG(("Unsupported URL file version."));
-		return;
-	}
-	if (version > 105) {
-		LOG(("Unknown URL file version."));
-		return;
-	}
-
-	last_hostname_found = NULL;
-	while (fgets(s, MAXIMUM_URL_LENGTH, fp)) {
-		/* get the hostname */
-		length = strlen(s) - 1;
-		s[length] = '\0';
-
-		/* skip data that has ended up with a host of '' */
-		if (length == 0) {
-			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-				break;
-			urls = atoi(s);
-			for (i = 0; i < (6 * urls); i++)
-				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-					break;
-			continue; 
-		}
-
-		/* add the host at the tail */
-		if (version == 105) {
-			hostname = malloc(sizeof *hostname);
-			if (!hostname)
-				die("Insufficient memory to create hostname");
-			hostname->hostname = malloc(length + 1);
-			if (!hostname->hostname)
-				die("Insufficient memory to create hostname");
-			memcpy(hostname->hostname, s, length + 1);
-			hostname->hostname_length = length;
-			hostname->url = 0;
-			hostname->previous = last_hostname_found;
-			if (!hostname->previous)
-				url_store_hostnames = hostname;
-			else
-				last_hostname_found->next = hostname;
-			hostname->next = 0;
-			last_hostname_found = hostname;
-		} else {
-			hostname = url_store_find_hostname(s);
-			if (!hostname)
-				break;
-		}
-		if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-			break;
-		urls = atoi(s);
-
-		/* load the non-corrupt data */
-		for (i = 0; i < urls; i++) {
-			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-				break;
-			length = strlen(s) - 1;
-			s[length] = '\0';
-			result = calloc(1, sizeof(struct url_data));
-			if (!result)
-				die("Insufficient memory to create URL");
-			result->data.url_length = length;
-			result->data.url = malloc(length + 1);
-			if (!result->data.url)
-				die("Insufficient memory to create URL");
-			memcpy(result->data.url, s, length + 1);
-			result->parent = hostname;
-			result->next = hostname->url;
-			if (hostname->url)
-				hostname->url->previous = result;
-			hostname->url = result;
-			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-				break;
-			result->data.visits = atoi(s);
-			if (version == 102) {
-				/* ignore requests */
-				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-					break;
-				/* ignore thumbnail size */
-				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-					break;
-				/* set last visit as today to retain */
-				result->data.last_visit = time(NULL);
-			} else {
-				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-					break;
-				result->data.last_visit = atoi(s);
-				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-					break;
-				result->data.type = atoi(s);
-			}
-			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-				break;
-#ifdef riscos
-			if (strlen(s) == 12) {
-				/* ensure filename is 'XX.XX.XX.XX' */
-				if ((s[2] == '.') && (s[5] == '.') &&
-						(s[8] == '.')) {
-					s[11] = '\0';
-					result->data.thumbnail =
-							bitmap_create_file(s);
-				}
-			}
-#endif
-			if (version >= 104) {
-				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-					break;
-				length = strlen(s) - 1;
-				if (length > 0) {
-					s[length] = '\0';
-					result->data.title = malloc(length + 1);
-					if (result->data.title)
-						memcpy(result->data.title, s,
-								length + 1);
-				}
-			}
-		}
-	}
-	fclose(fp);
-	LOG(("Successfully loaded URL file"));
-}
-
-
-/**
- * Saves the current contents of the URL store to disk
- *
- * \param file  the file to load options from
- */
-void url_store_save(const char *file) {
-	struct hostname_data *search;
-	struct url_data *url;
-	int url_count;
-	const char *thumb_file;
-	char *s;
-	int i;
-	FILE *fp;
-#ifdef riscos
-	struct bitmap *bitmap;
-#endif
-	time_t min_date;
-	char *title;
-
-	fp = fopen(file, "w");
-	if (!fp) {
-		LOG(("Failed to open file '%s' for writing", file));
-		return;
-	}
-
-	/* get the minimum date for expiry */
-	min_date = time(NULL) - (60 * 60 * 24) * option_expire_url;
-
-	/* file format version number */
-	fprintf(fp, "105\n");
-	for (search = url_store_hostnames; search; search = search->next) {
-		url_count = 0;
-		for (url = search->url; url; url = url->next)
-			if ((url->data.last_visit > min_date) &&
-					(url->data.visits > 0) &&
-					(url->data.url_length <
-						MAXIMUM_URL_LENGTH)) {
-				url_count++;
-			}
-		if (url_count > 0) {
-			fprintf(fp, "%s\n%i\n", search->hostname, url_count);
-			for (url = search->url; url && url->next;
-					url = url->next);
-			for (; url; url = url->previous)
-				if ((url->data.last_visit > min_date) &&
-						(url->data.visits > 0) &&
-						(url->data.url_length <
-							MAXIMUM_URL_LENGTH)) {
-					thumb_file = "";
-#ifdef riscos
-					bitmap = url->data.thumbnail;
-					if (bitmap)
-						thumb_file = bitmap->filename;
-#endif
-
-					if (url->data.title) {
-						s = url->data.title;
-						for (i = 0; s[i] != '\0';
-								i++)
-							if (s[i] < 32)
-								s[i] = ' ';
-						for (--i;
-							((i > 0) &&
-							(s[i] == ' '));
-								i--)
-							s[i] = '\0';
-
-						title = url->data.title;
-					}
-					else
-						title = "";
-					fprintf(fp, "%s\n%i\n%i\n%i\n%s\n%s\n",
-							url->data.url,
-							url->data.visits,
-							(int) url->data.
-								last_visit,
-							url->data.type,
-							thumb_file,
-							title);
-				}
-		}
-	}
-	fclose(fp);
-}
-
-
-/**
- * Associates a thumbnail with a specified URL.
- */
-void url_store_add_thumbnail(const char *url, struct bitmap *bitmap) {
-	struct url_content *content;
-
-	content = url_store_find(url);
-	if (content) {
-		if (content->thumbnail)
-			bitmap_destroy(content->thumbnail);
-		content->thumbnail = bitmap;
-	}
-}
-
-
-/**
- * Gets the thumbnail associated with a given URL.
- */
-struct bitmap *url_store_get_thumbnail(const char *url) {
-	struct url_content *content;
-
-	content = url_store_find(url);
-	if (content)
-		return content->thumbnail;
-	return NULL;
-}
-
-
-int url_store_compare_last_visit(const void *a, const void *b) {
-	struct url_content * const *url_a = (struct url_content * const *)a;
-	struct url_content * const *url_b = (struct url_content * const *)b;
-	return ((*url_a)->last_visit - (*url_b)->last_visit);
-}
diff --git a/content/url_store.h b/content/url_store.h
deleted file mode 100644
index c10bc90d0..000000000
--- a/content/url_store.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * This file is part of NetSurf, http://netsurf.sourceforge.net/
- * Licensed under the GNU General Public License,
- *                http://www.opensource.org/licenses/gpl-license
- * Copyright 2005 Richard Wilson <info@tinct.net>
- */
-
-/** \file
- * Central repository for URL data (interface).
- */
-
-#ifndef _NETSURF_CONTENT_URLSTORE_H_
-#define _NETSURF_CONTENT_URLSTORE_H_
-
-#include <time.h>
-#include "netsurf/content/content_type.h"
-
-struct bitmap;
-
-
-struct hostname_data {
-	char *hostname;			/**< Hostname (lowercase) */
-	int hostname_length;		/**< Length of hostname */
-	struct url_data *url;		/**< URLs for this host */
-	struct hostname_data *previous;	/**< Previous hostname */
-	struct hostname_data *next;	/**< Next hostname */
-};
-
-
-struct url_content {
-  	struct bitmap *thumbnail;	/**< Thumbnail, or NULL */
-	char *url;			/**< URL (including hostname) */
-	char *title;			/**< Page title */
-	size_t url_length;		/**< Length of URL (including hostname) */
-	unsigned int visits;		/**< Number of times visited */
-	time_t last_visit;		/**< The time() of the last visit */
-	content_type type;		/**< The content type */
-};
-
-struct url_data {
-  	struct url_content data;	/**< Stored URL content data */
-  	struct url_data *previous;	/**< Previous URL */
-  	struct url_data *next;		/**< Next URL */
-	struct hostname_data *parent;	/**< Parent hostname data */
-};
-
-extern struct hostname_data *url_store_hostnames;
-
-struct url_content *url_store_find(const char *url);
-struct url_content *url_store_match(const char *url, struct url_data **reference);
-char *url_store_match_string(const char *text);
-
-void url_store_add_thumbnail(const char *url, struct bitmap *bitmap);
-struct bitmap *url_store_get_thumbnail(const char *url);
-
-void url_store_load(const char *file);
-void url_store_save(const char *file);
-
-int url_store_compare_last_visit(const void *, const void *);
-
-#endif
diff --git a/content/urldb.c b/content/urldb.c
new file mode 100644
index 000000000..c7a798a92
--- /dev/null
+++ b/content/urldb.c
@@ -0,0 +1,2231 @@
+/*
+ * This file is part of NetSurf, http://netsurf.sourceforge.net/
+ * Licensed under the GNU General Public License,
+ *                http://www.opensource.org/licenses/gpl-license
+ * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
+ */
+
+/** \file
+ * Unified URL information database (implementation)
+ *
+ * URLs are stored in a tree-based structure as follows:
+ *
+ * The host component is extracted from each URL and, if a FQDN, split on
+ * every '.'.The tree is constructed by inserting each FQDN segment in
+ * reverse order. Duplicate nodes are merged.
+ *
+ * If the host part of an URL is an IP address, then this is added to the
+ * tree verbatim (as if it were a TLD).
+ *
+ * This provides something looking like:
+ *
+ * 			      root (a sentinel)
+ * 				|
+ * 	-------------------------------------------------
+ * 	|	|	|	|	|	|	|
+ *     com     edu     gov  127.0.0.1  net     org     uk	TLDs
+ * 	|	|	|		|	|	|
+ *    google   ...     ...             ...     ...     co	2LDs
+ * 	|						|
+ *     www					       bbc  Hosts/Subdomains
+ *							|
+ *						       www	...
+ *
+ * Each of the nodes in this tree is a struct host_part. This stores the
+ * FQDN segment (or IP address) with which the node is concerned. Each node
+ * may contain further information about paths on a host (struct path_data)
+ * or SSL certificate processing on a host-wide basis
+ * (host_part::permit_invalid_certs).
+ *
+ * Path data is concerned with storing various metadata about the path in
+ * question. This includes global history data, HTTP authentication details
+ * and any associated HTTP cookies. This is stored as a tree of path segments
+ * hanging off the relevant host_part node.
+ *
+ * Therefore, to find the last visited time of the URL
+ * http://www.example.com/path/to/resource.html, the FQDN tree would be
+ * traversed in the order root -> "com" -> "example" -> "www". The "www"
+ * node would have attached to it a tree of struct path_data:
+ *
+ *			    (sentinel)
+ *				|
+ * 			       path
+ * 				|
+ * 			       to
+ * 				|
+ * 			   resource.html
+ *
+ * This represents the absolute path "/path/to/resource.html". The leaf node
+ * "resource.html" contains the last visited time of the resource.
+ *
+ * The mechanism described above is, however, not particularly conducive to
+ * fast searching of the database for a given URL (or URLs beginning with a
+ * given prefix). Therefore, an anciliary data structure is used to enable
+ * fast searching. This structure simply reflects the contents of the
+ * database, with entries being added/removed at the same time as for the
+ * core database. In order to ensure that degenerate cases are kept to a
+ * minimum, we use an AAtree. This is an approximation of a Red-Black tree
+ * with similar performance characteristics, but with a significantly
+ * simpler implementation. Entries in this tree comprise pointers to the
+ * leaf nodes of the host tree described above.
+ */
+
+#include <assert.h>
+#include <ctype.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include "netsurf/image/bitmap.h"
+#include "netsurf/content/urldb.h"
+#include "netsurf/desktop/options.h"
+#ifdef riscos
+/** \todo lose this */
+#include "netsurf/riscos/bitmap.h"
+#endif
+#include "netsurf/utils/log.h"
+#include "netsurf/utils/url.h"
+#include "netsurf/utils/utils.h"
+
+struct cookie {
+	char *name;		/**< Cookie name */
+	char *value;		/**< Cookie value */
+	char *comment;		/**< Cookie comment */
+	time_t expires;		/**< Expiry timestamp, or 0 for session */
+	time_t last_used;	/**< Last used time */
+	bool secure;		/**< Only send for HTTPS requests */
+	enum { COOKIE_NETSCAPE = 0,
+		COOKIE_RFC2109 = 1,
+		COOKIE_RFC2965 = 2
+	} version;		/**< Specification compliance */
+	bool no_destroy;	/**< Never destroy this cookie,
+				 * unless it's expired */
+
+	struct cookie *next;	/**< Next in list */
+};
+
+struct auth_data {
+	char *realm;		/**< Protection realm */
+	char *auth;		/**< Authentication details in form
+				 * username:password */
+};
+
+struct url_internal_data {
+	char *title;		/**< Resource title */
+	unsigned int visits;	/**< Visit count */
+	time_t last_visit;	/**< Last visit time */
+	content_type type;	/**< Type of resource */
+};
+
+struct path_data {
+	char *scheme;		/**< URL scheme for data */
+	unsigned int port;	/**< Port number for data */
+	char *segment;		/**< Path segment for this node */
+	unsigned int frag_cnt;	/**< Number of entries in ::fragment */
+	char **fragment;	/**< Array of fragments */
+
+	struct bitmap *thumb;	/**< Thumbnail image of resource */
+	struct url_internal_data url;	/**< URL data for resource */
+	struct auth_data auth;	/**< Authentication data for resource */
+	struct cookie *cookies;	/**< Cookies associated with resource */
+
+	struct path_data *next;	/**< Next sibling */
+	struct path_data *prev;	/**< Previous sibling */
+	struct path_data *parent;	/**< Parent path segment */
+	struct path_data *children;	/**< Child path segments */
+	struct path_data *last;		/**< Last child */
+};
+
+struct host_part {
+	/**< Known paths on this host. This _must_ be first so that
+	 * struct host_part *h = (struct host_part *)mypath; works */
+	struct path_data paths;
+	bool permit_invalid_certs;	/**< Allow access to SSL protected
+					 * resources on this host without
+					 * verifying certificate authenticity
+					 */
+
+	char *part;		/**< Part of host string */
+
+	struct host_part *next;	/**< Next sibling */
+	struct host_part *prev;	/**< Previous sibling */
+	struct host_part *parent;	/**< Parent host part */
+	struct host_part *children;	/**< Child host parts */
+};
+
+struct search_node {
+	const struct host_part *data;	/**< Host tree entry */
+
+	unsigned int level;		/**< Node level */
+
+	struct search_node *left;	/**< Left subtree */
+	struct search_node *right;	/**< Right subtree */
+};
+
+/* Saving */
+static void urldb_save_search_tree(struct search_node *root, FILE *fp);
+static void urldb_count_urls(const struct path_data *root, time_t expiry,
+		unsigned int *count);
+static void urldb_write_urls(const struct path_data *parent,
+		const char *host, FILE *fp, char **path, int *path_alloc,
+		int *path_used, time_t expiry);
+
+/* Iteration */
+static bool urldb_iterate_partial_host(struct search_node *root,
+		const char *prefix, bool (*callback)(const char *url));
+static bool urldb_iterate_partial_path(const struct path_data *parent,
+		const char *host, const char *prefix,
+		char **path, int *path_alloc, int *path_used,
+		bool (*callback)(const char *url));
+static bool urldb_iterate_entries_host(struct search_node *parent,
+		bool (*callback)(const char *url));
+static bool urldb_iterate_entries_path(const char *host, char **path,
+		int *path_alloc, int *path_used,
+		const struct path_data *parent,
+		bool (*callback)(const char *url));
+
+/* Insertion */
+static struct host_part *urldb_add_host_node(const char *part,
+		struct host_part *parent);
+static struct host_part *urldb_add_host(const char *host);
+static struct path_data *urldb_add_path_node(const char *scheme,
+		unsigned int port, const char *segment, const char *fragment,
+		struct path_data *parent);
+static struct path_data *urldb_add_path(const char *scheme,
+		unsigned int port, const struct host_part *host,
+		const char *path, const char *fragment);
+static int urldb_add_path_fragment_cmp(const void *a, const void *b);
+static struct path_data *urldb_add_path_fragment(struct path_data *segment,
+		const char *fragment);
+
+/* Lookup */
+static struct path_data *urldb_find_url(const char *url);
+static struct path_data *urldb_match_path(const struct path_data *parent,
+		const char *path, const char *scheme, unsigned short port);
+
+/* Dump */
+static void urldb_dump_hosts(struct host_part *parent);
+static void urldb_dump_paths(struct path_data *parent);
+static void urldb_dump_search(struct search_node *parent, int depth);
+
+/* Search tree */
+static struct search_node *urldb_search_insert(struct search_node *root,
+		const struct host_part *data);
+static struct search_node *urldb_search_insert_internal(
+		struct search_node *root, struct search_node *n);
+static struct search_node *urldb_search_remove(struct search_node *root,
+		const struct host_part *data);
+static const struct host_part *urldb_search_find(struct search_node *root,
+		const char *host);
+static struct search_node *urldb_search_skew(struct search_node *root);
+static struct search_node *urldb_search_split(struct search_node *root);
+static int urldb_search_match_host(const struct host_part *a,
+		const struct host_part *b);
+static int urldb_search_match_string(const struct host_part *a,
+		const char *b);
+static int urldb_search_match_prefix(const struct host_part *a,
+		const char *b);
+
+/** Root database handle */
+static struct host_part db_root;
+
+/** Search trees - one per letter + 1 for IPs */
+#define NUM_SEARCH_TREES 27
+#define ST_IP 0
+#define ST_DN 1
+static struct search_node empty = { 0, 0, &empty, &empty };
+static struct search_node *search_trees[NUM_SEARCH_TREES] = {
+	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
+	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
+	&empty, &empty, &empty, &empty, &empty, &empty, &empty, &empty,
+	&empty, &empty, &empty
+};
+
+/**
+ * Import an URL database from file, replacing any existing database
+ *
+ * \param filename Name of file containing data
+ */
+void urldb_load(const char *filename)
+{
+#define MAXIMUM_URL_LENGTH 4096
+	char s[MAXIMUM_URL_LENGTH];
+	struct host_part *h;
+	int urls;
+	int i;
+	int version;
+	int length;
+	FILE *fp;
+
+	/** \todo optimise */
+
+	assert(filename);
+
+	LOG(("Loading URL file"));
+
+	fp = fopen(filename, "r");
+	if (!fp) {
+		LOG(("Failed to open file '%s' for reading", filename));
+		return;
+	}
+
+	if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+		return;
+	version = atoi(s);
+	if (version < 105) {
+		LOG(("Unsupported URL file version."));
+		return;
+	}
+	if (version > 105) {
+		LOG(("Unknown URL file version."));
+		return;
+	}
+
+	while (fgets(s, MAXIMUM_URL_LENGTH, fp)) {
+		/* get the hostname */
+		length = strlen(s) - 1;
+		s[length] = '\0';
+
+		/* skip data that has ended up with a host of '' */
+		if (length == 0) {
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
+			urls = atoi(s);
+			for (i = 0; i < (6 * urls); i++)
+				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+					break;
+			continue;
+		}
+
+		h = urldb_add_host(s);
+		if (!h)
+			die("Memory exhausted whilst loading URL file");
+
+		if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+			break;
+		urls = atoi(s);
+
+		/* load the non-corrupt data */
+		for (i = 0; i < urls; i++) {
+			struct path_data *p = NULL;
+
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
+			length = strlen(s) - 1;
+			s[length] = '\0';
+
+			urldb_add_url(s);
+			p = urldb_find_url(s);
+
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
+			if (p)
+				p->url.visits = (unsigned int)atoi(s);
+
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
+			if (p)
+				p->url.last_visit = (time_t)atoi(s);
+
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
+			if (p)
+				p->url.type = (content_type)atoi(s);
+
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
+#ifdef riscos
+			if (p && strlen(s) == 12) {
+				/* ensure filename is 'XX.XX.XX.XX' */
+				if ((s[2] == '.') && (s[5] == '.') &&
+						(s[8] == '.')) {
+					s[11] = '\0';
+					p->thumb = bitmap_create_file(s);
+				}
+			}
+#endif
+
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
+			length = strlen(s) - 1;
+			if (p && length > 0) {
+				s[length] = '\0';
+				p->url.title = malloc(length + 1);
+				if (p->url.title)
+					memcpy(p->url.title, s, length + 1);
+			}
+		}
+	}
+
+	fclose(fp);
+	LOG(("Successfully loaded URL file"));
+#undef MAXIMUM_URL_LENGTH
+}
+
+/**
+ * Export the current database to file
+ *
+ * \param filename Name of file to export to
+ */
+void urldb_save(const char *filename)
+{
+	FILE *fp;
+	int i;
+
+	assert(filename);
+
+	fp = fopen(filename, "w");
+	if (!fp) {
+		LOG(("Failed to open file '%s' for writing", filename));
+		return;
+	}
+
+	/* file format version number */
+	fprintf(fp, "105\n");
+
+	for (i = 0; i != NUM_SEARCH_TREES; i++) {
+		urldb_save_search_tree(search_trees[i], fp);
+	}
+
+	fclose(fp);
+}
+
+/**
+ * Save a search (sub)tree
+ *
+ * \param root Root of (sub)tree to save
+ * \param fp File to write to
+ */
+void urldb_save_search_tree(struct search_node *parent, FILE *fp)
+{
+	char host[256];
+	const struct host_part *h;
+	unsigned int path_count = 0;
+	char *path, *p, *end;
+	int path_alloc = 64, path_used = 2;
+	time_t expiry = time(NULL) - (60 * 60 * 24) * option_expire_url;
+
+	if (parent == &empty)
+		return;
+
+	urldb_save_search_tree(parent->left, fp);
+
+	path = malloc(path_alloc);
+	if (!path)
+		return;
+
+	path[0] = '/';
+	path[1] = '\0';
+
+	for (h = parent->data, p = host, end = host + sizeof host;
+			h && h != &db_root && p < end; h = h->parent) {
+		int written = snprintf(p, end - p, "%s%s", h->part,
+				(h->parent && h->parent->parent) ? "." : "");
+		if (written < 0) {
+			free(path);
+			return;
+		}
+		p += written;
+	}
+
+	urldb_count_urls(&parent->data->paths, expiry, &path_count);
+
+	if (path_count > 0) {
+		fprintf(fp, "%s\n%i\n", host, path_count);
+
+		urldb_write_urls(&parent->data->paths, host, fp,
+				&path, &path_alloc, &path_used, expiry);
+	}
+
+	free(path);
+
+	urldb_save_search_tree(parent->right, fp);
+}
+
+/**
+ * Count number of URLs associated with a host
+ *
+ * \param root Root of path data tree
+ * \param expiry Expiry time for URLs
+ * \param count Pointer to count
+ */
+void urldb_count_urls(const struct path_data *root, time_t expiry,
+		unsigned int *count)
+{
+	const struct path_data *p;
+
+	if (!root->children) {
+		if ((root->url.last_visit > expiry) &&
+				(root->url.visits > 0))
+			(*count)++;
+	}
+
+	for (p = root->children; p; p = p->next)
+		urldb_count_urls(p, expiry, count);
+}
+
+/**
+ * Write URLs associated with a host
+ *
+ * \param parent Root of (sub)tree to write
+ * \param host Current host name
+ * \param fp File to write to
+ * \param path Current path string
+ * \param path_alloc Allocated size of path
+ * \param path_used Used size of path
+ * \param expiry Expiry time of URLs
+ */
+void urldb_write_urls(const struct path_data *parent, const char *host,
+		FILE *fp, char **path, int *path_alloc, int *path_used,
+		time_t expiry)
+{
+	const struct path_data *p;
+	int i;
+	int pused = *path_used;
+
+	if (!parent->children) {
+		/* leaf node */
+		if (!((parent->url.last_visit > expiry) &&
+				(parent->url.visits > 0)))
+			/* expired */
+			return;
+
+		fprintf(fp, "%s://%s", parent->scheme, host);
+
+		if (parent->port)
+			fprintf(fp,":%d", parent->port);
+
+		fprintf(fp, "%s\n", *path);
+
+		/** \todo handle fragments? */
+
+		fprintf(fp, "%i\n%i\n%i\n", parent->url.visits,
+				(int)parent->url.last_visit,
+				(int)parent->url.type);
+
+#ifdef riscos
+		if (parent->thumb)
+			fprintf(fp, "%s\n", parent->thumb->filename);
+#else
+		fprintf(fp, "\n");
+#endif
+
+		if (parent->url.title) {
+			char *s = parent->url.title;
+			for (i = 0; s[i] != '\0'; i++)
+				if (s[i] < 32)
+					s[i] = ' ';
+			for (--i; ((i > 0) && (s[i] == ' ')); i--)
+					s[i] = '\0';
+			fprintf(fp, "%s\n", parent->url.title);
+		} else
+			fprintf(fp, "\n");
+	}
+
+	for (p = parent->children; p; p = p->next) {
+		int len = *path_used + strlen(p->segment) + 1;
+		if (*path_alloc < len) {
+			char *temp = realloc(*path,
+				(len > 64) ? len : *path_alloc + 64);
+			if (!temp)
+				return;
+			*path = temp;
+			*path_alloc = (len > 64) ? len : *path_alloc + 64;
+		}
+
+		strcat(*path, p->segment);
+		if (p->children) {
+			strcat(*path, "/");
+		} else {
+			len -= 1;
+		}
+
+		*path_used = len;
+
+		urldb_write_urls(p, host, fp, path, path_alloc, path_used,
+				expiry);
+
+		/* restore path to its state on entry to this function */
+		*path_used = pused;
+		(*path)[pused - 1] = '\0';
+	}
+}
+
+/**
+ * Insert an URL into the database
+ *
+ * \param url URL to insert
+ * \return true on success, false otherwise
+ */
+bool urldb_add_url(const char *url)
+{
+	struct host_part *h;
+	struct path_data *p;
+	char *fragment = NULL, *host, *plq, *scheme, *colon;
+	unsigned short port;
+	url_func_result ret;
+	assert(url);
+
+	/** \todo consider file: URLs */
+
+	host = strchr(url, '#');
+	if (host) {
+		fragment = strdup(host+1);
+		if (!fragment)
+			return false;
+	}
+
+	/* extract host */
+	ret = url_host(url, &host);
+	if (ret != URL_FUNC_OK)
+		return false;
+
+	/* extract path, leafname, query */
+	ret = url_plq(url, &plq);
+	if (ret != URL_FUNC_OK) {
+		free(host);
+		free(fragment);
+		return false;
+	}
+
+	/* extract scheme */
+	ret = url_scheme(url, &scheme);
+	if (ret != URL_FUNC_OK) {
+		free(plq);
+		free(host);
+		free(fragment);
+		return false;
+	}
+
+	colon = strrchr(host, ':');
+	if (!colon) {
+		port = 0;
+	} else {
+		*colon = '\0';
+		port = atoi(colon + 1);
+	}
+
+	/* Get host entry */
+	h = urldb_add_host(host);
+	if (!h) {
+		free(scheme);
+		free(plq);
+		free(host);
+		free(fragment);
+		return false;
+	}
+
+	/* Get path entry */
+	p = urldb_add_path(scheme, port, h, plq, fragment);
+	if (!p) {
+		free(scheme);
+		free(plq);
+		free(host);
+		free(fragment);
+		return false;
+	}
+
+	return true;
+}
+
+/**
+ * Set an URL's title string, replacing any existing one
+ *
+ * \param url The URL to look for
+ * \param title The title string to use (copied)
+ */
+void urldb_set_url_title(const char *url, const char *title)
+{
+	struct path_data *p;
+	char *temp;
+
+	assert(url && title);
+
+	p = urldb_find_url(url);
+	if (!p)
+		return;
+
+	temp = strdup(title);
+	if (!temp)
+		return;
+
+	free(p->url.title);
+	p->url.title = temp;
+}
+
+/**
+ * Set an URL's content type
+ *
+ * \param url The URL to look for
+ * \param type The type to set
+ */
+void urldb_set_url_content_type(const char *url, content_type type)
+{
+	struct path_data *p;
+
+	assert(url);
+
+	p = urldb_find_url(url);
+	if (!p)
+		return;
+
+	p->url.type = type;
+}
+
+/**
+ * Update an URL's visit data
+ *
+ * \param url The URL to update
+ */
+void urldb_update_url_visit_data(const char *url)
+{
+	struct path_data *p;
+
+	assert(url);
+
+	p = urldb_find_url(url);
+	if (!p)
+		return;
+
+	p->url.last_visit = time(NULL);
+	p->url.visits++;
+}
+
+/**
+ * Reset an URL's visit statistics
+ *
+ * \param url The URL to reset
+ */
+void urldb_reset_url_visit_data(const char *url)
+{
+	struct path_data *p;
+
+	assert(url);
+
+	p = urldb_find_url(url);
+	if (!p)
+		return;
+
+	p->url.last_visit = (time_t)0;
+	p->url.visits = 0;
+}
+
+
+/**
+ * Find data for an URL.
+ *
+ * \param url Absolute URL to look for
+ * \return Pointer to result struct, or NULL
+ */
+const struct url_data *urldb_get_url_data(const char *url)
+{
+	struct path_data *p;
+
+	assert(url);
+
+	p = urldb_find_url(url);
+	if (!p)
+		return NULL;
+
+	return (struct url_data *)&p->url;
+}
+
+/**
+ * Look up authentication details in database
+ *
+ * \param url Absolute URL to search for
+ * \return Pointer to authentication details, or NULL if not found
+ */
+const char *urldb_get_auth_details(const char *url)
+{
+	struct path_data *p, *q;
+
+	assert(url);
+
+	/* add to the db, so our lookup will work */
+	urldb_add_url(url);
+
+	p = urldb_find_url(url);
+	if (!p)
+		return NULL;
+
+	for (; p; p = p->parent) {
+		/* The parent path entry is stored hung off the
+		 * parent entry with an empty (not NULL) segment string.
+		 * We look for this here.
+		 */
+		for (q = p->children; q; q = q->next) {
+			if (strlen(q->segment) == 0)
+				break;
+		}
+
+		if (q && q->auth.realm && q->auth.auth)
+			break;
+	}
+
+	if (!q)
+		return NULL;
+
+	return q->auth.auth;
+}
+
+/**
+ * Retrieve certificate verification permissions from database
+ *
+ * \param url Absolute URL to search for
+ * \return true to permit connections to hosts with invalid certificates,
+ * false otherwise.
+ */
+bool urldb_get_cert_permissions(const char *url)
+{
+	struct path_data *p;
+	struct host_part *h;
+
+	assert(url);
+
+	p = urldb_find_url(url);
+	if (!p)
+		return false;
+
+	for (; p && p->parent; p = p->parent)
+		/* do nothing */;
+
+	h = (struct host_part *)p;
+
+	return h->permit_invalid_certs;
+}
+
+/**
+ * Set authentication data for an URL
+ *
+ * \param url The URL to consider
+ * \param realm The authentication realm
+ * \param auth The authentication details (in form username:password)
+ */
+void urldb_set_auth_details(const char *url, const char *realm,
+		const char *auth)
+{
+	struct path_data *p;
+	char *t1, *t2;
+
+	assert(url && realm && auth);
+
+	/* add url, in case it's missing */
+	urldb_add_url(url);
+
+	p = urldb_find_url(url);
+	if (!p)
+		return;
+
+	/** \todo search subtree for same realm/auth details
+	 * and remove them (as the lookup routine searches up the tree) */
+
+	t1 = strdup(realm);
+	t2 = strdup(auth);
+
+	if (!t1 || !t2) {
+		free(t1);
+		free(t2);
+		return;
+	}
+
+	free(p->auth.realm);
+	free(p->auth.auth);
+
+	p->auth.realm = t1;
+	p->auth.auth = t2;
+}
+
+/**
+ * Set certificate verification permissions
+ *
+ * \param url URL to consider
+ * \param permit Set to true to allow invalid certificates
+ */
+void urldb_set_cert_permissions(const char *url, bool permit)
+{
+	struct path_data *p;
+	struct host_part *h;
+
+	assert(url);
+
+	/* add url, in case it's missing */
+	urldb_add_url(url);
+
+	p = urldb_find_url(url);
+	if (!p)
+		return;
+
+	for (; p && p->parent; p = p->parent)
+		/* do nothing */;
+
+	h = (struct host_part *)p;
+
+	h->permit_invalid_certs = permit;
+}
+
+/**
+ * Set thumbnail for url, replacing any existing thumbnail
+ *
+ * \param url Absolute URL to consider
+ * \param bitmap Opaque pointer to thumbnail data
+ */
+void urldb_set_thumbnail(const char *url, struct bitmap *bitmap)
+{
+	struct path_data *p;
+
+	assert(url && bitmap);
+
+	p = urldb_find_url(url);
+	if (!p)
+		return;
+
+	if (p->thumb)
+		bitmap_destroy(p->thumb);
+
+	p->thumb = bitmap;
+}
+
+/**
+ * Retrieve thumbnail data for given URL
+ *
+ * \param url Absolute URL to search for
+ * \return Pointer to thumbnail data, or NULL if not found.
+ */
+const struct bitmap *urldb_get_thumbnail(const char *url)
+{
+	struct path_data *p;
+
+	assert(url);
+
+	p = urldb_find_url(url);
+	if (!p)
+		return NULL;
+
+	return p->thumb;
+}
+
+/**
+ * Iterate over entries in the database which match the given prefix
+ *
+ * \param prefix Prefix to match
+ * \param callback Callback function
+ */
+void urldb_iterate_partial(const char *prefix,
+		bool (*callback)(const char *url))
+{
+	char host[256];
+	char buf[260]; /* max domain + "www." */
+	const char *slash;
+	struct search_node *tree;
+	const struct host_part *h;
+
+	assert(prefix && callback);
+
+	slash = strchr(prefix, '/');
+
+	if (*prefix >= '0' && *prefix <= '9')
+		tree = search_trees[ST_IP];
+	else if (isalpha(*prefix))
+		tree = search_trees[ST_DN + tolower(*prefix) - 'a'];
+	else
+		return;
+
+	if (slash) {
+		/* if there's a slash in the input, then we can
+		 * assume that we're looking for a path */
+		char *path, *domain = host;
+		int path_alloc = 64, path_used = 2;
+
+		snprintf(host, sizeof host, "%.*s", slash - prefix, prefix);
+
+		h = urldb_search_find(tree, host);
+		if (!h) {
+			int len = slash - prefix;
+
+			if ((len == 1 && tolower(host[0]) != 'w') ||
+				(len == 2 && (tolower(host[0]) != 'w' ||
+					tolower(host[1]) != 'w')) ||
+				(len >= 3 &&
+					strncasecmp(host, "www", 3))) {
+				snprintf(buf, sizeof buf, "www.%s", host);
+				h = urldb_search_find(
+					search_trees[ST_DN + 'w' - 'a'],
+					buf);
+				if (!h)
+					return;
+				domain = buf;
+			} else
+				return;
+		}
+
+		path = malloc(path_alloc);
+		if (!path)
+			return;
+
+		path[0] = '/';
+		path[1] = '\0';
+
+		urldb_iterate_partial_path(&h->paths, domain, slash + 1,
+				&path, &path_alloc, &path_used, callback);
+
+		free(path);
+	} else {
+		int len = strlen(prefix);
+
+		/* looking for hosts */
+		if (!urldb_iterate_partial_host(tree, prefix, callback))
+			return;
+
+		if ((len == 1 && tolower(prefix[0]) != 'w') ||
+				(len == 2 && (tolower(prefix[0]) != 'w' ||
+					tolower(prefix[1]) != 'w')) ||
+				(len >= 3 &&
+					strncasecmp(prefix, "www", 3))) {
+			/* now look for www.prefix */
+			snprintf(buf, sizeof buf, "www.%s", prefix);
+			if(!urldb_iterate_partial_host(
+					search_trees[ST_DN + 'w' - 'a'],
+					buf, callback))
+				return;
+		}
+	}
+}
+
+/**
+ * Partial host iterator (internal)
+ *
+ * \param root Root of (sub)tree to traverse
+ * \param prefix Prefix to match
+ * \param callback Callback function
+ * \return true to continue, false otherwise
+ */
+bool urldb_iterate_partial_host(struct search_node *root, const char *prefix,
+		bool (*callback)(const char *url))
+{
+	int c;
+	const struct host_part *h;
+	char domain[256], *p, *end;
+	char *path;
+	int path_alloc = 64, path_used = 2;
+
+	assert(root && prefix && callback);
+
+	if (root == &empty)
+		return true;
+
+	c = urldb_search_match_prefix(root->data, prefix);
+
+	if (c > 0)
+		/* No match => look in left subtree */
+		return urldb_iterate_partial_host(root->left, prefix,
+				callback);
+	else if (c < 0)
+		/* No match => look in right subtree */
+		return urldb_iterate_partial_host(root->right, prefix,
+				callback);
+	else {
+		/* Match => iterate over l/r subtrees & process this node */
+		if (!urldb_iterate_partial_host(root->left, prefix,
+				callback))
+			return false;
+
+		/* Generate host string */
+		for (h = root->data, p = domain,
+				end = domain + sizeof domain;
+				h && h != &db_root && p < end;
+				h = h->parent) {
+			int written = snprintf(p, end - p, "%s%s", h->part,
+				(h->parent && h->parent->parent) ? "." : "");
+			if (written < 0)
+				return false;
+			p += written;
+		}
+
+		path = malloc(path_alloc);
+		if (!path)
+			return false;
+
+		path[0] = '/';
+		path[1] = '\0';
+
+		/* and extract all paths attached to this host */
+		if (!urldb_iterate_entries_path(domain, &path, &path_alloc,
+				&path_used, &root->data->paths, callback)) {
+			free(path);
+			return false;
+		}
+
+		free(path);
+
+		if (!urldb_iterate_partial_host(root->right, prefix,
+				callback))
+			return false;
+	}
+
+	return true;
+}
+
+/**
+ * Partial path iterator (internal)
+ *
+ * \param parent Root of (sub)tree to traverse
+ * \param host Host string
+ * \param prefix Prefix to match
+ * \param path The built path up to this point
+ * \param path_alloc Allocated size of path
+ * \param path_used Used size of path
+ * \param callback Callback function
+ * \return true to continue, false otherwise
+ */
+bool urldb_iterate_partial_path(const struct path_data *parent,
+		const char *host, const char *prefix,
+		char **path, int *path_alloc, int *path_used,
+		bool (*callback)(const char *url))
+{
+	const struct path_data *p;
+	const char *slash, *end = prefix + strlen(prefix);
+	int pused = *path_used;
+	int c;
+
+	slash = strchr(prefix, '/');
+	if (!slash)
+		slash = end;
+
+	if (slash == prefix && *prefix == '/')
+		/* Ignore "//" */
+		return true;
+
+	for (p = parent->children; p; p = p->next) {
+		if ((c = strncasecmp(p->segment, prefix, slash - prefix)) < 0)
+			/* didn't match, but may be more */
+			continue;
+		else if (c > 0)
+			/* no more possible matches */
+			break;
+
+		/* prefix matches so far */
+		int len = *path_used + strlen(p->segment) + 1;
+		if (*path_alloc < len) {
+			char *temp = realloc(*path,
+					(len > 64) ? len : *path_alloc + 64);
+			if (!temp)
+				return false;
+			*path = temp;
+			*path_alloc = (len > 64) ? len : *path_alloc + 64;
+		}
+
+		strcat(*path, p->segment);
+		if (p->children)
+			strcat(*path, "/");
+		else
+			len -= 1;
+
+		*path_used = len;
+
+		if (slash == end) {
+			/* we've run out of prefix, so all
+			 * paths below this one match */
+			if (!urldb_iterate_entries_path(host, path,
+					path_alloc, path_used, p, callback))
+				return false;
+		} else {
+			/* more prefix to go => recurse */
+			if (!urldb_iterate_partial_path(p, host, slash + 1,
+					path, path_alloc, path_used,
+					callback))
+				return false;
+		}
+
+		/* restore path to that from input for next child */
+		*path_used = pused;
+		(*path)[pused - 1] = '\0';
+	}
+
+	return true;
+}
+
+/**
+ * Iterate over all entries in database
+ *
+ * \param callback Function to callback for each entry
+ */
+void urldb_iterate_entries(bool (*callback)(const char *url))
+{
+	int i;
+
+	assert(callback);
+
+	for (i = 0; i < NUM_SEARCH_TREES; i++) {
+		if (!urldb_iterate_entries_host(search_trees[i],
+				callback))
+			break;
+	}
+}
+
+/**
+ * Host data iterator (internal)
+ *
+ * \param parent Root of subtree to iterate over
+ * \param callback Callback function
+ * \return true to continue, false otherwise
+ */
+bool urldb_iterate_entries_host(struct search_node *parent,
+		bool (*callback)(const char *url))
+{
+	char domain[256], *p, *end;
+	const struct host_part *h;
+	char *path;
+	int path_alloc = 64, path_used = 2;
+
+	if (parent == &empty)
+		return true;
+
+	if (!urldb_iterate_entries_host(parent->left, callback))
+		return false;
+
+	for (h = parent->data, p = domain, end = domain + sizeof domain;
+			h && h != &db_root && p < end; h = h->parent) {
+		int written = snprintf(p, end - p, "%s%s", h->part,
+				(h->parent && h->parent->parent) ? "." : "");
+		if (written < 0)
+			return false;
+		p += written;
+	}
+
+	path = malloc(path_alloc);
+	if (!path)
+		return false;
+
+	path[0] = '/';
+	path[1] = '\0';
+
+	if (!urldb_iterate_entries_path(domain, &path, &path_alloc,
+			&path_used, &parent->data->paths, callback)) {
+		free(path);
+		return false;
+	}
+
+	free(path);
+
+	if (!urldb_iterate_entries_host(parent->right, callback))
+		return false;
+
+	return true;
+}
+
+/**
+ * Path data iterator (internal)
+ *
+ * \param host Host component of output URI
+ * \param path The built path up to this point
+ * \param path_alloc Allocated size of path
+ * \param path_used Used size of path
+ * \param parent Root of subtree to iterate over
+ * \param callback Callback function to call
+ * \return true to continue, false otherwise
+ */
+bool urldb_iterate_entries_path(const char *host, char **path,
+		int *path_alloc, int *path_used,
+		const struct path_data *parent,
+		bool (*callback)(const char *url))
+{
+	const struct path_data *p;
+	int pused = *path_used;
+
+	if (!parent->children) {
+		/* leaf node */
+		int schemelen = strlen(parent->scheme);
+		int hostlen = strlen(host);
+		int prefixlen = schemelen + 3 /* :// */ +
+				hostlen + 6 /* :NNNNN */;
+		static char *url;
+		static int url_alloc;
+		int written;
+
+		if (url_alloc < *path_used + prefixlen + 2) {
+			char *temp = realloc(url, *path_used + prefixlen + 2);
+			if (!temp)
+				return false;
+			url = temp;
+			url_alloc = *path_used + prefixlen + 2;
+		}
+
+		written = sprintf(url, "%s://%s", parent->scheme, host);
+		if (written < 0) {
+			return false;
+		}
+
+		if (parent->port) {
+			written = sprintf(url + schemelen + 3 + hostlen,
+					":%d", parent->port);
+			if (written < 0) {
+				return false;
+			}
+			written += schemelen + 3 + hostlen;
+		}
+
+		written = sprintf(url + written, "%s", *path);
+		if (written < 0) {
+			return false;
+		}
+
+		/** \todo handle fragments? */
+
+		if (!callback(url))
+			return false;
+	}
+
+	for (p = parent->children; p; p = p->next) {
+		int len = *path_used + strlen(p->segment) + 1;
+		if (*path_alloc < len) {
+			char *temp = realloc(*path,
+				(len > 64) ? len : *path_alloc + 64);
+			if (!temp)
+				return false;
+			*path = temp;
+			*path_alloc = (len > 64) ? len : *path_alloc + 64;
+		}
+
+		strcat(*path, p->segment);
+		if (p->children) {
+			strcat(*path, "/");
+		} else {
+			len -= 1;
+		}
+
+		*path_used = len;
+
+		if (!urldb_iterate_entries_path(host, path, path_alloc,
+				path_used, p, callback))
+			return false;
+
+		/* restore path to its state on entry to this function */
+		*path_used = pused;
+		(*path)[pused - 1] = '\0';
+	}
+
+	return true;
+}
+
+/**
+ * Add a host node to the tree
+ *
+ * \param part Host segment to add (or whole IP address) (copied)
+ * \param parent Parent node to add to
+ * \return Pointer to added node, or NULL on memory exhaustion
+ */
+struct host_part *urldb_add_host_node(const char *part,
+		struct host_part *parent)
+{
+	struct host_part *d;
+
+	assert(part && parent);
+
+	d = calloc(1, sizeof(struct host_part));
+	if (!d)
+		return NULL;
+
+	d->part = strdup(part);
+	if (!d->part) {
+		free(d);
+		return NULL;
+	}
+
+	d->next = parent->children;
+	if (parent->children)
+		parent->children->prev = d;
+	d->parent = parent;
+	parent->children = d;
+
+	return d;
+}
+
+/**
+ * Add a host to the database, creating any intermediate entries
+ *
+ * \param host Hostname to add
+ * \return Pointer to leaf node, or NULL on memory exhaustion
+ */
+struct host_part *urldb_add_host(const char *host)
+{
+	struct host_part *d = (struct host_part *) &db_root, *e;
+	struct search_node *s;
+	char buf[256]; /* 256 bytes is sufficient - domain names are
+			* limited to 255 chars. */
+	char *part;
+
+	assert(host);
+
+	if (*(host) >= '0' && *(host) <= '9') {
+		/* Host is an IP, so simply add as TLD */
+
+		/* Check for existing entry */
+		for (e = d->children; e; e = e->next)
+			if (strcasecmp(host, e->part) == 0)
+				/* found => return it */
+				return e;
+
+		d = urldb_add_host_node(host, d);
+
+		s = urldb_search_insert(search_trees[ST_IP], d);
+		if (!s) {
+			/* failed */
+			d = NULL;
+		} else {
+			search_trees[ST_IP] = s;
+		}
+
+		return d;
+	}
+
+	/* Copy host string, so we can corrupt it */
+	strncpy(buf, host, sizeof buf);
+	buf[sizeof buf - 1] = '\0';
+
+	/* Process FQDN segments backwards */
+	do {
+		part = strrchr(buf, '.');
+		if (!part) {
+			/* last segment */
+			/* Check for existing entry */
+			for (e = d->children; e; e = e->next)
+				if (strcasecmp(buf, e->part) == 0)
+					break;
+
+			if (e) {
+				d = e;
+			} else {
+				d = urldb_add_host_node(buf, d);
+			}
+
+			/* And insert into search tree */
+			if (d) {
+				if (isalpha(*buf)) {
+					struct search_node **r;
+					r = &search_trees[
+						tolower(*buf) - 'a' + ST_DN];
+
+					s = urldb_search_insert(*r, d);
+					if (!s) {
+						/* failed */
+						d = NULL;
+					} else {
+						*r = s;
+					}
+				} else {
+					d = NULL;
+				}
+			}
+			break;
+		}
+
+		/* Check for existing entry */
+		for (e = d->children; e; e = e->next)
+			if (strcasecmp(part + 1, e->part) == 0)
+				break;
+
+		d = e ? e : urldb_add_host_node(part + 1, d);
+		if (!d)
+			break;
+
+		*part = '\0';
+	} while (1);
+
+	return d;
+}
+
+/**
+ * Add a path node to the tree
+ *
+ * \param scheme URL scheme associated with path (copied)
+ * \param port Port number on host associated with path
+ * \param segment Path segment to add (copied)
+ * \param fragment URL fragment (copied), or NULL
+ * \param parent Parent node to add to
+ * \return Pointer to added node, or NULL on memory exhaustion
+ */
+struct path_data *urldb_add_path_node(const char *scheme, unsigned int port,
+		const char *segment, const char *fragment,
+		struct path_data *parent)
+{
+	struct path_data *d, *e;
+
+	assert(scheme && segment && parent);
+
+	d = calloc(1, sizeof(struct path_data));
+	if (!d)
+		return NULL;
+
+	d->scheme = strdup(scheme);
+	if (!d->scheme) {
+		free(d);
+		return NULL;
+	}
+
+	d->port = port;
+
+	d->segment = strdup(segment);
+	if (!d->segment) {
+		free(d->scheme);
+		free(d);
+		return NULL;
+	}
+
+	if (fragment) {
+		if (!urldb_add_path_fragment(d, fragment)) {
+			free(d->segment);
+			free(d->scheme);
+			free(d);
+			return NULL;
+		}
+	}
+
+	for (e = parent->children; e; e = e->next)
+		if (strcmp(e->segment, d->segment) > 0)
+			break;
+
+	if (e) {
+		d->prev = e->prev;
+		d->next = e;
+		if (e->prev)
+			e->prev->next = d;
+		else
+			parent->children = d;
+		e->prev = d;
+	} else if (!parent->children) {
+		d->prev = d->next = NULL;
+		parent->children = parent->last = d;
+	} else {
+		d->next = NULL;
+		d->prev = parent->last;
+		parent->last->next = d;
+		parent->last = d;
+	}
+	d->parent = parent;
+
+	return d;
+}
+
+/**
+ * Add a path to the database, creating any intermediate entries
+ *
+ * \param scheme URL scheme associated with path
+ * \param port Port number on host associated with path
+ * \param host Host tree node to attach to
+ * \param path Absolute path to add
+ * \param fragment URL fragment, or NULL
+ * \return Pointer to leaf node, or NULL on memory exhaustion
+ */
+struct path_data *urldb_add_path(const char *scheme, unsigned int port,
+		const struct host_part *host, const char *path,
+		const char *fragment)
+{
+	struct path_data *d, *e;
+	char *buf;
+	char *segment, *slash;
+
+	assert(scheme && host && path);
+
+	d = (struct path_data *) &host->paths;
+
+	/* Copy path string, so we can corrupt it */
+	buf = malloc(strlen(path) + 1);
+	if (!buf)
+		return NULL;
+
+	/* + 1 to strip leading '/' */
+	strcpy(buf, path + 1);
+
+	segment = buf;
+
+	/* Process path segments */
+	do {
+		slash = strchr(segment, '/');
+		if (!slash) {
+			/* last segment */
+			/* look for existing entry */
+			for (e = d->children; e; e = e->next)
+				if (strcmp(segment, e->segment) == 0 &&
+						strcasecmp(scheme,
+						e->scheme) == 0 &&
+						e->port == port)
+					break;
+
+			d = e ? urldb_add_path_fragment(e, fragment) :
+					urldb_add_path_node(scheme, port,
+					segment, fragment, d);
+			break;
+		}
+
+		*slash = '\0';
+
+		/* look for existing entry */
+		for (e = d->children; e; e = e->next)
+			if (strcmp(segment, e->segment) == 0 &&
+					strcasecmp(scheme, e->scheme) == 0 &&
+					e->port == port)
+				break;
+
+		d = e ? e : urldb_add_path_node(scheme, port, segment,
+				NULL, d);
+		if (!d)
+			break;
+
+		segment = slash + 1;
+	} while (1);
+
+	free(buf);
+
+	return d;
+}
+
+/**
+ * Fragment comparator callback for qsort
+ */
+int urldb_add_path_fragment_cmp(const void *a, const void *b)
+{
+	return strcasecmp(*((const char **) a), *((const char **) b));
+}
+
+/**
+ * Add a fragment to a path segment
+ *
+ * \param segment Path segment to add to
+ * \param fragment Fragment to add (copied), or NULL
+ * \return segment or NULL on memory exhaustion
+ */
+struct path_data *urldb_add_path_fragment(struct path_data *segment,
+		const char *fragment)
+{
+	char **temp;
+
+	assert(segment);
+
+	/* If no fragment, this function is a NOP
+	 * This may seem strange, but it makes the rest
+	 * of the code cleaner */
+	if (!fragment)
+		return segment;
+
+	temp = realloc(segment->fragment,
+			(segment->frag_cnt + 1) * sizeof(char *));
+	if (!temp)
+		return NULL;
+
+	segment->fragment = temp;
+	segment->fragment[segment->frag_cnt] = strdup(fragment);
+	if (!segment->fragment[segment->frag_cnt]) {
+		/* Don't free temp - it's now our buffer */
+		return NULL;
+	}
+
+	segment->frag_cnt++;
+
+	/* We want fragments in alphabetical order, so sort them
+	 * It may prove better to insert in alphabetical order instead */
+	qsort(segment->fragment, segment->frag_cnt, sizeof (char *),
+			urldb_add_path_fragment_cmp);
+
+	return segment;
+}
+
+/**
+ * Find an URL in the database
+ *
+ * \param url The URL to find
+ * \return Pointer to path data, or NULL if not found
+ */
+struct path_data *urldb_find_url(const char *url)
+{
+	const struct host_part *h;
+	struct path_data *p;
+	struct search_node *tree;
+	char *host, *plq, *scheme, *colon;
+	unsigned short port;
+	url_func_result ret;
+
+	assert(url);
+
+	/** \todo consider file: URLs */
+
+	/* extract host */
+	ret = url_host(url, &host);
+	if (ret != URL_FUNC_OK)
+		return NULL;
+
+	/* extract path, leafname, query */
+	ret = url_plq(url, &plq);
+	if (ret != URL_FUNC_OK) {
+		free(host);
+		return NULL;
+	}
+
+	/* extract scheme */
+	ret = url_scheme(url, &scheme);
+	if (ret != URL_FUNC_OK) {
+		free(plq);
+		free(host);
+		return NULL;
+	}
+
+	colon = strrchr(host, ':');
+	if (!colon) {
+		port = 0;
+	} else {
+		*colon = '\0';
+		port = atoi(colon + 1);
+	}
+
+	if (*host >= '0' && *host <= '9')
+		tree = search_trees[ST_IP];
+	else if (isalpha(*host))
+		tree = search_trees[ST_DN + tolower(*host) - 'a'];
+	else {
+		free(plq);
+		free(host);
+		free(scheme);
+		return NULL;
+	}
+
+	h = urldb_search_find(tree, host);
+	if (!h) {
+		free(plq);
+		free(host);
+		free(scheme);
+		return NULL;
+	}
+
+	p = urldb_match_path(&h->paths, plq, scheme, port);
+
+	free(plq);
+	free(host);
+	free(scheme);
+
+	return p;
+}
+
+/**
+ * Match a path string
+ *
+ * \param parent Path (sub)tree to look in
+ * \param path The path to search for
+ * \param scheme The URL scheme associated with the path
+ * \param port The port associated with the path
+ * \return Pointer to path data or NULL if not found.
+ */
+struct path_data *urldb_match_path(const struct path_data *parent,
+		const char *path, const char *scheme, unsigned short port)
+{
+	struct path_data *p;
+	const char *slash;
+
+	if (*path == '\0')
+		return (struct path_data *)parent;
+
+	slash = strchr(path + 1, '/');
+	if (!slash)
+		slash = path + strlen(path);
+
+	for (p = parent->children; p; p = p->next) {
+		if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
+				strcmp(p->scheme, scheme) == 0 &&
+				p->port == port)
+			break;
+	}
+
+	if (p) {
+		return urldb_match_path(p, slash, scheme, port);
+	}
+
+	return NULL;
+}
+
+/**
+ * Dump URL database to stderr
+ */
+void urldb_dump(void)
+{
+	int i;
+
+	urldb_dump_hosts(&db_root);
+
+	for (i = 0; i != NUM_SEARCH_TREES; i++)
+		urldb_dump_search(search_trees[i], 0);
+}
+
+/**
+ * Dump URL database hosts to stderr
+ *
+ * \param parent Parent node of tree to dump
+ */
+void urldb_dump_hosts(struct host_part *parent)
+{
+	struct host_part *h;
+
+	if (parent->part) {
+		LOG(("%s", parent->part));
+
+		LOG(("\t%s invalid SSL certs",
+			parent->permit_invalid_certs ? "Permits" : "Denies"));
+	}
+
+	/* Dump path data */
+	urldb_dump_paths(&parent->paths);
+
+	/* and recurse */
+	for (h = parent->children; h; h = h->next)
+		urldb_dump_hosts(h);
+}
+
+/**
+ * Dump URL database paths to stderr
+ *
+ * \param parent Parent node of tree to dump
+ */
+void urldb_dump_paths(struct path_data *parent)
+{
+	struct path_data *p;
+	unsigned int i;
+
+	if (parent->segment) {
+		LOG(("\t%s : %u", parent->scheme, parent->port));
+
+		LOG(("\t\t'%s'", parent->segment));
+
+		for (i = 0; i != parent->frag_cnt; i++)
+			LOG(("\t\t\t#%s", parent->fragment[i]));
+	}
+
+	/* and recurse */
+	for (p = parent->children; p; p = p->next)
+		urldb_dump_paths(p);
+}
+
+/**
+ * Dump search tree
+ *
+ * \param parent Parent node of tree to dump
+ * \param depth Tree depth
+ */
+void urldb_dump_search(struct search_node *parent, int depth)
+{
+	const struct host_part *h;
+	int i;
+
+	if (parent == &empty)
+		return;
+
+	urldb_dump_search(parent->left, depth + 1);
+
+	for (i = 0; i != depth; i++)
+			fputc(' ', stderr);
+
+	for (h = parent->data; h; h = h->parent) {
+		fprintf(stderr, "%s", h->part);
+		if (h->parent && h->parent->parent)
+			fputc('.', stderr);
+	}
+
+	fputc('\n', stderr);
+
+	urldb_dump_search(parent->right, depth + 1);
+}
+
+/**
+ * Insert a node into the search tree
+ *
+ * \param root Root of tree to insert into
+ * \param data User data to insert
+ * \return Pointer to updated root, or NULL if failed
+ */
+struct search_node *urldb_search_insert(struct search_node *root,
+		const struct host_part *data)
+{
+	struct search_node *n;
+
+	assert(root && data);
+
+	n = malloc(sizeof(struct search_node));
+	if (!n)
+		return NULL;
+
+	n->level = 1;
+	n->data = data;
+	n->left = n->right = &empty;
+
+	root = urldb_search_insert_internal(root, n);
+
+	return root;
+}
+
+/**
+ * Insert node into search tree
+ *
+ * \param root Root of (sub)tree to insert into
+ * \param n Node to insert
+ * \return Pointer to updated root
+ */
+struct search_node *urldb_search_insert_internal(struct search_node *root,
+		struct search_node *n)
+{
+	assert(root && n);
+
+	if (root == &empty) {
+		root = n;
+	} else {
+		int c = urldb_search_match_host(root->data, n->data);
+
+		if (c > 0) {
+			root->left = urldb_search_insert_internal(
+					root->left, n);
+		} else if (c < 0) {
+			root->right = urldb_search_insert_internal(
+					root->right, n);
+		} else {
+			/* exact match */
+			free(n);
+			return root;
+		}
+
+		root = urldb_search_skew(root);
+		root = urldb_search_split(root);
+	}
+
+	return root;
+}
+
+/**
+ * Delete a node from a search tree
+ *
+ * \param root Tree to remove from
+ * \param data Data to delete
+ * \return Updated root of tree
+ */
+struct search_node *urldb_search_remove(struct search_node *root,
+		const struct host_part *data)
+{
+	static struct search_node *last, *deleted;
+
+	assert(root && data);
+
+	if (root != &empty) {
+		int c = urldb_search_match_host(root->data, data);
+
+		last = root;
+		if (c > 0) {
+			root->left = urldb_search_remove(root->left, data);
+		} else {
+			deleted = root;
+			root->right = urldb_search_remove(root->right, data);
+		}
+	}
+
+	if (root == last) {
+		if (deleted != &empty &&
+				urldb_search_match_host(deleted->data,
+						data) == 0) {
+			deleted->data = last->data;
+			deleted = &empty;
+			root = root->right;
+		}
+	} else {
+		if (root->left->level < root->level - 1 ||
+				root->right->level < root->level - 1) {
+			if (root->right->level > --root->level)
+				root->right->level = root->level;
+
+			root = urldb_search_skew(root);
+			root->right = urldb_search_skew(root->right);
+			root->right->right =
+				urldb_search_skew(root->right->right);
+			root = urldb_search_split(root);
+			root->right = urldb_search_split(root->right);
+		}
+	}
+
+	return root;
+}
+
+/**
+ * Find a node in a search tree
+ *
+ * \param root Tree to look in
+ * \param host Host to find
+ * \return Pointer to host tree node, or NULL if not found
+ */
+const struct host_part *urldb_search_find(struct search_node *root,
+		const char *host)
+{
+	int c;
+
+	assert(root && host);
+
+	if (root == &empty) {
+		return NULL;
+	}
+
+	c = urldb_search_match_string(root->data, host);
+
+	if (c > 0)
+		return urldb_search_find(root->left, host);
+	else if (c < 0)
+		return urldb_search_find(root->right, host);
+	else
+		return root->data;
+}
+
+/**
+ * Compare a pair of host_parts
+ *
+ * \param a
+ * \param b
+ * \return 0 if match, non-zero, otherwise
+ */
+int urldb_search_match_host(const struct host_part *a,
+		const struct host_part *b)
+{
+	int ret;
+
+	assert(a && b);
+
+	/* traverse up tree to root, comparing parts as we go. */
+	for (; a && b; a = a->parent, b = b->parent)
+		if ((ret = strcasecmp(a->part, b->part)) != 0)
+			/* They differ => return the difference here */
+			return ret;
+
+	/* If we get here then either:
+	 *    a) The path lengths differ
+	 * or b) The hosts are identical
+	 */
+	if (a && !b)
+		/* len(a) > len(b) */
+		return 1;
+	else if (!a && b)
+		/* len(a) < len(b) */
+		return -1;
+
+	/* identical */
+	return 0;
+}
+
+/**
+ * Compare host_part with a string
+ *
+ * \param a
+ * \param b
+ * \return 0 if match, non-zero, otherwise
+ */
+int urldb_search_match_string(const struct host_part *a,
+		const char *b)
+{
+	const char *end, *dot;
+	int plen, ret;
+
+	assert(a && b);
+
+	if (*b >= '0' && *b <= '9') {
+		/* IP address */
+		return strcasecmp(a->part, b);
+	}
+
+	end = b + strlen(b);
+
+	while (b < end && a) {
+		dot = strchr(b, '.');
+		if (!dot) {
+			/* last segment */
+			dot = end;
+		}
+
+		/* Compare strings (length limited) */
+		if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
+			/* didn't match => return difference */
+			return ret;
+
+		/* The strings matched, now check that the lengths do, too */
+		plen = strlen(a->part);
+
+		if (plen > dot - b)
+			/* len(a) > len(b) */
+			return 1;
+		else if (plen < dot - b)
+			/* len(a) < len(b) */
+			return -1;
+
+		b = dot + 1;
+		a = a->parent;
+	}
+
+	/* If we get here then either:
+	 *    a) The path lengths differ
+	 * or b) The hosts are identical
+	 */
+	if (a && a != &db_root && b >= end)
+		/* len(a) > len(b) */
+		return 1;
+	else if (!a && b < end)
+		/* len(a) < len(b) */
+		return -1;
+
+	/* Identical */
+	return 0;
+}
+
+/**
+ * Compare host_part with prefix
+ *
+ * \param a
+ * \param b
+ * \return 0 if match, non-zero, otherwise
+ */
+int urldb_search_match_prefix(const struct host_part *a,
+		const char *b)
+{
+	const char *end, *dot;
+	int plen, ret;
+
+	assert(a && b);
+
+	if (*b >= '0' && *b <= '9') {
+		/* IP address */
+		return strncasecmp(a->part, b, strlen(b));
+	}
+
+	end = b + strlen(b);
+
+	while (b < end && a) {
+		dot = strchr(b, '.');
+		if (!dot) {
+			/* last segment */
+			dot = end;
+		}
+
+		/* Compare strings (length limited) */
+		if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
+			/* didn't match => return difference */
+			return ret;
+
+		/* The strings matched */
+		if (dot < end) {
+			/* Consider segment lengths only in the case
+			 * where the prefix contains segments */
+			plen = strlen(a->part);
+			if (plen > dot - b)
+				/* len(a) > len(b) */
+				return 1;
+			else if (plen < dot - b)
+				/* len(a) < len(b) */
+				return -1;
+		}
+
+		b = dot + 1;
+		a = a->parent;
+	}
+
+	/* If we get here then either:
+	 *    a) The path lengths differ
+	 * or b) The hosts are identical
+	 */
+	if (a && a != &db_root && b >= end)
+		/* len(a) > len(b) => prefix matches */
+		return 0;
+	else if (!a && b < end)
+		/* len(a) < len(b) => prefix does not match */
+		return -1;
+
+	/* Identical */
+	return 0;
+}
+
+/**
+ * Rotate a subtree right
+ *
+ * \param root Root of subtree to rotate
+ * \return new root of subtree
+ */
+struct search_node *urldb_search_skew(struct search_node *root)
+{
+	struct search_node *temp;
+
+	assert(root);
+
+	if (root->left->level == root->level) {
+		temp = root->left;
+		root->left = temp->right;
+		temp->right = root;
+		root = temp;
+	}
+
+	return root;
+}
+
+/**
+ * Rotate a node left, increasing the parent's level
+ *
+ * \param root Root of subtree to rotate
+ * \return New root of subtree
+ */
+struct search_node *urldb_search_split(struct search_node *root)
+{
+	struct search_node *temp;
+
+	assert(root);
+
+	if (root->right->right->level == root->level) {
+		temp = root->right;
+		root->right = temp->left;
+		temp->left = root;
+		root = temp;
+
+		root->level++;
+	}
+
+	return root;
+}
+
+#ifdef TEST
+int main(void)
+{
+	struct host_part *h;
+	struct path_data *p;
+
+	h = urldb_add_host("127.0.0.1");
+	if (!h) {
+		LOG(("failed adding host"));
+		return 1;
+	}
+
+	/* Get host entry */
+	h = urldb_add_host("netsurf.strcprstskrzkrk.co.uk");
+	if (!h) {
+		LOG(("failed adding host"));
+		return 1;
+	}
+
+	/* Get path entry */
+	p = urldb_add_path("http", 80, h, "/path/to/resource.htm?a=b", "zz");
+	if (!p) {
+		LOG(("failed adding path"));
+		return 1;
+	}
+
+	p = urldb_add_path("http", 80, h, "/path/to/resource.htm?a=b", "aa");
+	if (!p) {
+		LOG(("failed adding path"));
+		return 1;
+	}
+
+	p = urldb_add_path("http", 80, h, "/path/to/resource.htm?a=b", "yy");
+	if (!p) {
+		LOG(("failed adding path"));
+		return 1;
+	}
+
+	urldb_dump();
+
+	return 0;
+}
+#endif
diff --git a/content/urldb.h b/content/urldb.h
new file mode 100644
index 000000000..9d59271d2
--- /dev/null
+++ b/content/urldb.h
@@ -0,0 +1,65 @@
+/*
+ * This file is part of NetSurf, http://netsurf.sourceforge.net/
+ * Licensed under the GNU General Public License,
+ *                http://www.opensource.org/licenses/gpl-license
+ * Copyright 2006 John M Bell <jmb202@ecs.soton.ac.uk>
+ */
+
+/** \file
+ * Unified URL information database (interface)
+ */
+
+#ifndef _NETSURF_CONTENT_URLDB_H_
+#define _NETSURF_CONTENT_URLDB_H_
+
+#include <stdbool.h>
+#include <time.h>
+#include "netsurf/content/content_type.h"
+
+struct url_data {
+	const char *title;		/**< Resource title */
+	unsigned int visits;		/**< Visit count */
+	time_t last_visit;		/**< Last visit time */
+	content_type type;		/**< Type of resource */
+};
+
+struct bitmap;
+
+/* Persistence support */
+void urldb_load(const char *filename);
+void urldb_save(const char *filename);
+
+/* URL insertion */
+bool urldb_add_url(const char *url);
+
+/* URL data modification / lookup */
+void urldb_set_url_title(const char *url, const char *title);
+void urldb_set_url_content_type(const char *url, content_type type);
+void urldb_update_url_visit_data(const char *url);
+void urldb_reset_url_visit_data(const char *url);
+const struct url_data *urldb_get_url_data(const char *url);
+
+/* Authentication modification / lookup */
+void urldb_set_auth_details(const char *url, const char *realm,
+		const char *auth);
+const char *urldb_get_auth_details(const char *url);
+
+/* SSL certificate permissions */
+void urldb_set_cert_permissions(const char *url, bool permit);
+bool urldb_get_cert_permissions(const char *url);
+
+/* Thumbnail handling */
+void urldb_set_thumbnail(const char *url, struct bitmap *bitmap);
+const struct bitmap *urldb_get_thumbnail(const char *url);
+
+/* URL completion */
+void urldb_iterate_partial(const char *prefix,
+		bool (*callback)(const char *url));
+
+/* Iteration */
+void urldb_iterate_entries(bool (*callback)(const char *url));
+
+/* Debug */
+void urldb_dump(void);
+
+#endif
-- 
cgit v1.2.3