From cf7abb4a0ad6a6de3acf3215ca6d31fdebbf4708 Mon Sep 17 00:00:00 2001
From: Vincent Sanders <vince@kyllikki.org>
Date: Sat, 1 Nov 2014 23:27:29 +0000
Subject: restructure urldb source

remove forward declarations and restructure. exported functions are
also now documented in the urldb.h header.
---
 content/urldb.c | 5796 +++++++++++++++++++++++++++----------------------------
 content/urldb.h |  216 ++-
 2 files changed, 3028 insertions(+), 2984 deletions(-)

diff --git a/content/urldb.c b/content/urldb.c
index bf873c62e..8af6ae150 100644
--- a/content/urldb.c
+++ b/content/urldb.c
@@ -17,8 +17,9 @@
  * along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-/** \file
- * Unified URL information database (implementation)
+/**
+ * \file
+ * Unified URL information database implementation
  *
  * URLs are stored in a tree-based structure as follows:
  *
@@ -81,8 +82,8 @@
  * simpler implementation. Entries in this tree comprise pointers to the
  * leaf nodes of the host tree described above.
  *
- * REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of 
- * non-normalised URLs with urldb will result in undefined behaviour and 
+ * REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of
+ * non-normalised URLs with urldb will result in undefined behaviour and
  * potential crashes.
  */
 
@@ -217,94 +218,6 @@ struct search_node {
 	struct search_node *right;	/**< Right subtree */
 };
 
-/* Destruction */
-static void urldb_destroy_host_tree(struct host_part *root);
-static void urldb_destroy_path_tree(struct path_data *root);
-static void urldb_destroy_path_node_content(struct path_data *node);
-static void urldb_destroy_cookie(struct cookie_internal_data *c);
-static void urldb_destroy_prot_space(struct prot_space_data *space);
-static void urldb_destroy_search_tree(struct search_node *root);
-
-/* Saving */
-static void urldb_save_search_tree(struct search_node *root, FILE *fp);
-static void urldb_count_urls(const struct path_data *root, time_t expiry,
-		unsigned int *count);
-static void urldb_write_paths(const struct path_data *parent,
-		const char *host, FILE *fp, char **path, int *path_alloc,
-		int *path_used, time_t expiry);
-
-/* Iteration */
-static bool urldb_iterate_partial_host(struct search_node *root,
-		const char *prefix, bool (*callback)(nsurl *url,
-		const struct url_data *data));
-static bool urldb_iterate_partial_path(const struct path_data *parent,
-		const char *prefix, bool (*callback)(nsurl *url,
-		const struct url_data *data));
-static bool urldb_iterate_entries_host(struct search_node *parent,
-		bool (*url_callback)(nsurl *url,
-		const struct url_data *data),
-		bool (*cookie_callback)(const struct cookie_data *data));
-static bool urldb_iterate_entries_path(const struct path_data *parent,
-		bool (*url_callback)(nsurl *url,
-		const struct url_data *data),
-		bool (*cookie_callback)(const struct cookie_data *data));
-
-/* Insertion */
-static struct host_part *urldb_add_host_node(const char *part,
-		struct host_part *parent);
-static struct path_data *urldb_add_path_node(lwc_string *scheme,
-		unsigned int port, const char *segment, lwc_string *fragment,
-		struct path_data *parent);
-static int urldb_add_path_fragment_cmp(const void *a, const void *b);
-static struct path_data *urldb_add_path_fragment(struct path_data *segment,
-		lwc_string *fragment);
-
-/* Lookup */
-static struct path_data *urldb_find_url(nsurl *url);
-static struct path_data *urldb_match_path(const struct path_data *parent,
-		const char *path, lwc_string *scheme, unsigned short port);
-static struct search_node **urldb_get_search_tree_direct(const char *host);
-static struct search_node *urldb_get_search_tree(const char *host);
-
-/* Dump */
-static void urldb_dump_hosts(struct host_part *parent);
-static void urldb_dump_paths(struct path_data *parent);
-static void urldb_dump_search(struct search_node *parent, int depth);
-
-/* Search tree */
-static struct search_node *urldb_search_insert(struct search_node *root,
-		const struct host_part *data);
-static struct search_node *urldb_search_insert_internal(
-		struct search_node *root, struct search_node *n);
-/* for urldb_search_remove, see r5531 which removed it */
-static const struct host_part *urldb_search_find(struct search_node *root,
-		const char *host);
-static struct search_node *urldb_search_skew(struct search_node *root);
-static struct search_node *urldb_search_split(struct search_node *root);
-static int urldb_search_match_host(const struct host_part *a,
-		const struct host_part *b);
-static int urldb_search_match_string(const struct host_part *a,
-		const char *b);
-static int urldb_search_match_prefix(const struct host_part *a,
-		const char *b);
-
-/* Cookies */
-static struct cookie_internal_data *urldb_parse_cookie(nsurl *url,
-		const char **cookie);
-static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, 
-		char *v, bool was_quoted);
-static bool urldb_insert_cookie(struct cookie_internal_data *c, 
-		lwc_string *scheme, nsurl *url);
-static void urldb_free_cookie(struct cookie_internal_data *c);
-static bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
-		int *used, int *alloc, char **buf);
-static void urldb_delete_cookie_hosts(const char *domain, const char *path, 
-		const char *name, struct host_part *parent);
-static void urldb_delete_cookie_paths(const char *domain, const char *path, 
-		const char *name, struct path_data *parent);
-static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent);
-static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent);
-
 /** Root database handle */
 static struct host_part db_root;
 
@@ -325,6 +238,8 @@ static struct search_node *search_trees[NUM_SEARCH_TREES] = {
 #define COOKIE_FILE_VERSION 102
 static int loaded_cookie_file_version;
 #define MIN_URL_FILE_VERSION 106
+
+/** URL database file version */
 #define URL_FILE_VERSION 106
 
 /* Bloom filter used for short-circuting the false case of "is this
@@ -338,241 +253,163 @@ static int loaded_cookie_file_version;
 static struct bloom_filter *url_bloom;
 #define BLOOM_SIZE (1024 * 32)
 
+
+
 /**
- * Import an URL database from file, replacing any existing database
+ * Write paths associated with a host
  *
- * \param filename Name of file containing data
+ * \param parent Root of (sub)tree to write
+ * \param host Current host name
+ * \param fp File to write to
+ * \param path Current path string
+ * \param path_alloc Allocated size of path
+ * \param path_used Used size of path
+ * \param expiry Expiry time of URLs
  */
-nserror urldb_load(const char *filename)
+static void urldb_write_paths(const struct path_data *parent, const char *host,
+		FILE *fp, char **path, int *path_alloc, int *path_used,
+		time_t expiry)
 {
-#define MAXIMUM_URL_LENGTH 4096
-	char s[MAXIMUM_URL_LENGTH];
-	char host[256];
-	struct host_part *h;
-	int urls;
+	const struct path_data *p = parent;
 	int i;
-	int version;
-	int length;
-	FILE *fp;
-
-	assert(filename);
-
-	LOG(("Loading URL file %s", filename));
-
-        if (url_bloom == NULL)
-                url_bloom = bloom_create(BLOOM_SIZE);
-
-	fp = fopen(filename, "r");
-	if (!fp) {
-		LOG(("Failed to open file '%s' for reading", filename));
-		return NSERROR_NOT_FOUND;
-	}
-
-	if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
-		fclose(fp);
-		return NSERROR_NEED_DATA;
-	}
-
-	version = atoi(s);
-	if (version < MIN_URL_FILE_VERSION) {
-		LOG(("Unsupported URL file version."));
-		fclose(fp);
-		return NSERROR_INVALID;
-	}
-	if (version > URL_FILE_VERSION) {
-		LOG(("Unknown URL file version."));
-		fclose(fp);
-		return NSERROR_INVALID;
-	}
 
-	while (fgets(host, sizeof host, fp)) {
-		/* get the hostname */
-		length = strlen(host) - 1;
-		host[length] = '\0';
+	do {
+		int seglen = p->segment != NULL ? strlen(p->segment) : 0;
+		int len = *path_used + seglen + 1;
 
-		/* skip data that has ended up with a host of '' */
-		if (length == 0) {
-			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-				break;
-			urls = atoi(s);
-			/* Eight fields/url */
-			for (i = 0; i < (8 * urls); i++) {
-				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-					break;
-			}
-			continue;
+		if (*path_alloc < len) {
+			char *temp = realloc(*path,
+					(len > 64) ? len : *path_alloc + 64);
+			if (!temp)
+				return;
+			*path = temp;
+			*path_alloc = (len > 64) ? len : *path_alloc + 64;
 		}
 
-		/* read number of URLs */
-		if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-			break;
-		urls = atoi(s);
-
-		/* no URLs => try next host */
-		if (urls == 0) {
-			LOG(("No URLs for '%s'", host));
-			continue;
-		}
+		if (p->segment != NULL)
+			memcpy(*path + *path_used - 1, p->segment, seglen);
 
-		h = urldb_add_host(host);
-		if (!h) {
-			LOG(("Failed adding host: '%s'", host));
-			fclose(fp);
-			return NSERROR_NOMEM;
+		if (p->children != NULL) {
+			(*path)[*path_used + seglen - 1] = '/';
+			(*path)[*path_used + seglen] = '\0';
+		} else {
+			(*path)[*path_used + seglen - 1] = '\0';
+			len -= 1;
 		}
 
-		/* load the non-corrupt data */
-		for (i = 0; i < urls; i++) {
-			struct path_data *p = NULL;
-			char scheme[64], ports[10];
-			char url[64 + 3 + 256 + 6 + 4096 + 1];
-			unsigned int port;
-			bool is_file = false;
-			nsurl *nsurl;
-			lwc_string *scheme_lwc, *fragment_lwc;
-			char *path_query;
-			size_t len;
+		*path_used = len;
 
-			if (!fgets(scheme, sizeof scheme, fp))
-				break;
-			length = strlen(scheme) - 1;
-			scheme[length] = '\0';
+		if (p->children != NULL) {
+			/* Drill down into children */
+			p = p->children;
+		} else {
+			/* leaf node */
+			if (p->persistent ||((p->urld.last_visit > expiry) &&
+					     (p->urld.visits > 0))) {
+				fprintf(fp, "%s\n", lwc_string_data(p->scheme));
 
-			if (!fgets(ports, sizeof ports, fp))
-				break;
-			length = strlen(ports) - 1;
-			ports[length] = '\0';
-			port = atoi(ports);
+				if (p->port)
+					fprintf(fp,"%d\n", p->port);
+				else
+					fprintf(fp, "\n");
 
-			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-				break;
-			length = strlen(s) - 1;
-			s[length] = '\0';
+				fprintf(fp, "%s\n", *path);
 
-			if (!strcasecmp(host, "localhost") &&
-					!strcasecmp(scheme, "file"))
-				is_file = true;
+				/** \todo handle fragments? */
 
-			snprintf(url, sizeof url, "%s://%s%s%s%s",
-					scheme,
-					/* file URLs have no host */
-					(is_file ? "" : host),
-					(port ? ":" : ""),
-					(port ? ports : ""),
-					s);
+				fprintf(fp, "%i\n%i\n%i\n", p->urld.visits,
+					(int)p->urld.last_visit,
+					(int)p->urld.type);
 
-			/* TODO: store URLs in pre-parsed state, and make
-			 *       a nsurl_load to generate the nsurl more
-			 *       swiftly.
-			 *       Need a nsurl_save too.
-			 */
-			if (nsurl_create(url, &nsurl) != NSERROR_OK) {
-				LOG(("Failed inserting '%s'", url));
-				fclose(fp);
-				return NSERROR_NOMEM;
-			}
-                        
-			if (url_bloom != NULL) {
-				uint32_t hash = nsurl_hash(nsurl);
-				bloom_insert_hash(url_bloom, hash);
-			}
+				fprintf(fp, "\n");
 
-			/* Copy and merge path/query strings */
-			if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY,
-					&path_query, &len) != NSERROR_OK) {
-				LOG(("Failed inserting '%s'", url));
-				fclose(fp);
-				return NSERROR_NOMEM;
-			}
+				if (p->urld.title) {
+					uint8_t *s = (uint8_t *) p->urld.title;
 
-			scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME);
-			fragment_lwc = nsurl_get_component(nsurl,
-					NSURL_FRAGMENT);
-			p = urldb_add_path(scheme_lwc, port, h, path_query,
-					fragment_lwc, nsurl);
-			if (!p) {
-				LOG(("Failed inserting '%s'", url));
-				fclose(fp);
-				return NSERROR_NOMEM;
+					for (i = 0; s[i] != '\0'; i++)
+						if (s[i] < 32)
+							s[i] = ' ';
+					for (--i; ((i > 0) && (s[i] == ' '));
+					     i--)
+						s[i] = '\0';
+					fprintf(fp, "%s\n", p->urld.title);
+				} else
+					fprintf(fp, "\n");
 			}
-			nsurl_unref(nsurl);
-			lwc_string_unref(scheme_lwc);
-			if (fragment_lwc != NULL)
-				lwc_string_unref(fragment_lwc);
 
-			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-				break;
-			if (p)
-				p->urld.visits = (unsigned int)atoi(s);
-
-			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-				break;
-			if (p)
-				p->urld.last_visit = (time_t)atoi(s);
+			/* Now, find next node to process. */
+			while (p != parent) {
+				int seglen = p->segment != NULL
+					? strlen(p->segment) : 0;
 
-			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-				break;
-			if (p)
-				p->urld.type = (content_type)atoi(s);
+				/* Remove our segment from the path */
+				*path_used -= seglen;
+				(*path)[*path_used - 1] = '\0';
 
-			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-				break;
+				if (p->next != NULL) {
+					/* Have a sibling, process that */
+					p = p->next;
+					break;
+				}
 
+				/* Going up, so remove '/' */
+				*path_used -= 1;
+				(*path)[*path_used - 1] = '\0';
 
-			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
-				break;
-			length = strlen(s) - 1;
-			if (p && length > 0) {
-				s[length] = '\0';
-				p->urld.title = malloc(length + 1);
-				if (p->urld.title)
-					memcpy(p->urld.title, s, length + 1);
+				/* Ascend tree */
+				p = p->parent;
 			}
 		}
-	}
-
-	fclose(fp);
-	LOG(("Successfully loaded URL file"));
-#undef MAXIMUM_URL_LENGTH
-
-	return NSERROR_OK;
+	} while (p != parent);
 }
 
+
 /**
- * Export the current database to file
+ * Count number of URLs associated with a host
  *
- * \param filename Name of file to export to
+ * \param root Root of path data tree
+ * \param expiry Expiry time for URLs
+ * \param count Pointer to count
  */
-void urldb_save(const char *filename)
+static void urldb_count_urls(const struct path_data *root, time_t expiry,
+		unsigned int *count)
 {
-	FILE *fp;
-	int i;
-
-	assert(filename);
-
-	fp = fopen(filename, "w");
-	if (!fp) {
-		LOG(("Failed to open file '%s' for writing", filename));
-		return;
-	}
+	const struct path_data *p = root;
 
-	/* file format version number */
-	fprintf(fp, "%d\n", URL_FILE_VERSION);
+	do {
+		if (p->children != NULL) {
+			/* Drill down into children */
+			p = p->children;
+		} else {
+			/* No more children, increment count if required */
+			if (p->persistent || ((p->urld.last_visit > expiry) &&
+					      (p->urld.visits > 0))) {
+				(*count)++;
+			}
 
-	for (i = 0; i != NUM_SEARCH_TREES; i++) {
-		urldb_save_search_tree(search_trees[i], fp);
-	}
+			/* Now, find next node to process. */
+			while (p != root) {
+				if (p->next != NULL) {
+					/* Have a sibling, process that */
+					p = p->next;
+					break;
+				}
 
-	fclose(fp);
+				/* Ascend tree */
+				p = p->parent;
+			}
+		}
+	} while (p != root);
 }
 
+
 /**
  * Save a search (sub)tree
  *
  * \param root Root of (sub)tree to save
  * \param fp File to write to
  */
-void urldb_save_search_tree(struct search_node *parent, FILE *fp)
+static void urldb_save_search_tree(struct search_node *parent, FILE *fp)
 {
 	char host[256];
 	const struct host_part *h;
@@ -619,30 +456,52 @@ void urldb_save_search_tree(struct search_node *parent, FILE *fp)
 	urldb_save_search_tree(parent->right, fp);
 }
 
+
 /**
- * Count number of URLs associated with a host
+ * Path data iterator (internal)
  *
- * \param root Root of path data tree
- * \param expiry Expiry time for URLs
- * \param count Pointer to count
+ * \param parent Root of subtree to iterate over
+ * \param url_callback Callback function
+ * \param cookie_callback Callback function
+ * \return true to continue, false otherwise
  */
-void urldb_count_urls(const struct path_data *root, time_t expiry,
-		unsigned int *count)
+static bool urldb_iterate_entries_path(const struct path_data *parent,
+		bool (*url_callback)(nsurl *url, const struct url_data *data),
+		bool (*cookie_callback)(const struct cookie_data *data))
 {
-	const struct path_data *p = root;
+	const struct path_data *p = parent;
+	const struct cookie_data *c;
 
 	do {
 		if (p->children != NULL) {
 			/* Drill down into children */
 			p = p->children;
 		} else {
-			/* No more children, increment count if required */
-			if (p->persistent || ((p->urld.last_visit > expiry) &&
-					(p->urld.visits > 0)))
-				(*count)++;
+			/* All leaf nodes in the path tree should have an URL or
+			 * cookies attached to them. If this is not the case, it
+			 * indicates that there's a bug in the file loader/URL
+			 * insertion code. Therefore, assert this here. */
+			assert(url_callback || cookie_callback);
+
+			/** \todo handle fragments? */
+			if (url_callback) {
+				const struct url_internal_data *u = &p->urld;
+
+				assert(p->url);
+
+				if (!url_callback(p->url,
+						(const struct url_data *) u))
+					return false;
+			} else {
+				c = (const struct cookie_data *)p->cookies;
+				for (; c != NULL; c = c->next) {
+					if (!cookie_callback(c))
+						return false;
+				}
+			}
 
 			/* Now, find next node to process. */
-			while (p != root) {
+			while (p != parent) {
 				if (p->next != NULL) {
 					/* Have a sibling, process that */
 					p = p->next;
@@ -653,1521 +512,1750 @@ void urldb_count_urls(const struct path_data *root, time_t expiry,
 				p = p->parent;
 			}
 		}
-	} while (p != root);
+	} while (p != parent);
+
+	return true;
 }
 
+
 /**
- * Write paths associated with a host
+ * Check whether a host string is an IP address.
  *
- * \param parent Root of (sub)tree to write
- * \param host Current host name
- * \param fp File to write to
- * \param path Current path string
- * \param path_alloc Allocated size of path
- * \param path_used Used size of path
- * \param expiry Expiry time of URLs
+ * This call detects IPv4 addresses (all of dotted-quad or subsets,
+ * decimal or hexadecimal notations) and IPv6 addresses (including
+ * those containing embedded IPv4 addresses.)
+ *
+ * \param host a hostname terminated by '\0'
+ * \return true if the hostname is an IP address, false otherwise
  */
-void urldb_write_paths(const struct path_data *parent, const char *host,
-		FILE *fp, char **path, int *path_alloc, int *path_used,
-		time_t expiry)
+static bool urldb__host_is_ip_address(const char *host)
 {
-	const struct path_data *p = parent;
-	int i;
+	struct in_addr ipv4;
+	size_t host_len = strlen(host);
+	const char *sane_host;
+	const char *slash;
+#ifndef NO_IPV6
+	struct in6_addr ipv6;
+	char ipv6_addr[64];
+#endif
+	/** @todo FIXME Some parts of urldb.c make confusions between hosts
+	 * and "prefixes", we can sometimes be erroneously passed more than
+	 * just a host.  Sometimes we may be passed trailing slashes, or even
+	 * whole path segments.  A specific criminal in this class is
+	 * urldb_iterate_partial, which takes a prefix to search for, but
+	 * passes that prefix to functions that expect only hosts.
+	 *
+	 * For the time being, we will accept such calls; we check if there
+	 * is a / in the host parameter, and if there is, we take a copy and
+	 * replace the / with a \0.  This is not a permanent solution; we
+	 * should search through NetSurf and find all the callers that are
+	 * in error and fix them.  When doing this task, it might be wise
+	 * to replace the hideousness below with code that doesn't have to do
+	 * this, and add assert(strchr(host, '/') == NULL); somewhere.
+	 * -- rjek - 2010-11-04
+	 */
 
-	do {
-		int seglen = p->segment != NULL ? strlen(p->segment) : 0;
-		int len = *path_used + seglen + 1;
+	slash = strchr(host, '/');
+	if (slash == NULL) {
+		sane_host = host;
+	} else {
+		char *c = strdup(host);
+		c[slash - host] = '\0';
+		sane_host = c;
+		host_len = slash - host - 1;
+		LOG(("WARNING: called with non-host '%s'", host));
+	}
 
-		if (*path_alloc < len) {
-			char *temp = realloc(*path,
-					(len > 64) ? len : *path_alloc + 64);
-			if (!temp)
-				return;
-			*path = temp;
-			*path_alloc = (len > 64) ? len : *path_alloc + 64;
-		}
+	if (strspn(sane_host, "0123456789abcdefABCDEF[].:") < host_len)
+		goto out_false;
 
-		if (p->segment != NULL)
-			memcpy(*path + *path_used - 1, p->segment, seglen);
+	if (inet_aton(sane_host, &ipv4) != 0) {
+		/* This can only be a sane IPv4 address if it contains 3 dots.
+		 * Helpfully, inet_aton is happy to treat "a", "a.b", "a.b.c",
+		 * and "a.b.c.d" as valid IPv4 address strings where we only
+		 * support the full, dotted-quad, form.
+		 */
+		int num_dots = 0;
+		size_t index;
 
-		if (p->children != NULL) {
-			(*path)[*path_used + seglen - 1] = '/';
-			(*path)[*path_used + seglen] = '\0';
-		} else {
-			(*path)[*path_used + seglen - 1] = '\0';
-			len -= 1;
+		for (index = 0; index < host_len; index++) {
+			if (sane_host[index] == '.')
+				num_dots++;
 		}
 
-		*path_used = len;
+		if (num_dots == 3)
+			goto out_true;
+		else
+			goto out_false;
+	}
 
-		if (p->children != NULL) {
-			/* Drill down into children */
-			p = p->children;
-		} else {
-			/* leaf node */
-			if (p->persistent ||((p->urld.last_visit > expiry) &&
-					(p->urld.visits > 0))) {
-				fprintf(fp, "%s\n", lwc_string_data(p->scheme));
+#ifndef NO_IPV6
+	if (sane_host[0] != '[' || sane_host[host_len] != ']')
+		goto out_false;
 
-				if (p->port)
-					fprintf(fp,"%d\n", p->port);
-				else
-					fprintf(fp, "\n");
+	strncpy(ipv6_addr, sane_host + 1, sizeof(ipv6_addr));
+	ipv6_addr[sizeof(ipv6_addr) - 1] = '\0';
 
-				fprintf(fp, "%s\n", *path);
+	if (inet_pton(AF_INET6, ipv6_addr, &ipv6) == 1)
+		goto out_true;
+#endif
 
-				/** \todo handle fragments? */
+out_false:
+	if (slash != NULL) free((void *)sane_host);
+	return false;
 
-				fprintf(fp, "%i\n%i\n%i\n", p->urld.visits,
-						(int)p->urld.last_visit,
-						(int)p->urld.type);
+out_true:
+	if (slash != NULL) free((void *)sane_host);
+	return true;
+}
 
-				fprintf(fp, "\n");
 
-				if (p->urld.title) {
-					uint8_t *s = (uint8_t *) p->urld.title;
+/**
+ * Compare host_part with prefix
+ *
+ * \param a
+ * \param b
+ * \return 0 if match, non-zero, otherwise
+ */
+static int urldb_search_match_prefix(const struct host_part *a, const char *b)
+{
+	const char *end, *dot;
+	int plen, ret;
 
-					for (i = 0; s[i] != '\0'; i++)
-						if (s[i] < 32)
-							s[i] = ' ';
-					for (--i; ((i > 0) && (s[i] == ' ')); 
-							i--)
-						s[i] = '\0';
-					fprintf(fp, "%s\n", p->urld.title);
-				} else
-					fprintf(fp, "\n");
-			}
+	assert(a && a != &db_root && b);
 
-			/* Now, find next node to process. */
-			while (p != parent) {
-				int seglen = p->segment != NULL 
-						? strlen(p->segment) : 0;
+	if (urldb__host_is_ip_address(b)) {
+		/* IP address */
+		return strncasecmp(a->part, b, strlen(b));
+	}
 
-				/* Remove our segment from the path */
-				*path_used -= seglen;
-				(*path)[*path_used - 1] = '\0';
+	end = b + strlen(b) + 1;
 
-				if (p->next != NULL) {
-					/* Have a sibling, process that */
-					p = p->next;
-					break;
-				}
+	while (b < end && a && a != &db_root) {
+		dot = strchr(b, '.');
+		if (!dot) {
+			/* last segment */
+			dot = end - 1;
+		}
 
-				/* Going up, so remove '/' */
-				*path_used -= 1;
-				(*path)[*path_used - 1] = '\0';
+		/* Compare strings (length limited) */
+		if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
+			/* didn't match => return difference */
+			return ret;
 
-				/* Ascend tree */
-				p = p->parent;
-			}
+		/* The strings matched */
+		if (dot < end - 1) {
+			/* Consider segment lengths only in the case
+			 * where the prefix contains segments */
+			plen = strlen(a->part);
+			if (plen > dot - b)
+				/* len(a) > len(b) */
+				return 1;
+			else if (plen < dot - b)
+				/* len(a) < len(b) */
+				return -1;
 		}
-	} while (p != parent);
-}
-
-/**
- * Set the cross-session persistence of the entry for an URL
- *
- * \param url Absolute URL to persist
- * \param persist True to persist, false otherwise
- */
-void urldb_set_url_persistence(nsurl *url, bool persist)
-{
-	struct path_data *p;
 
-	assert(url);
+		b = dot + 1;
+		a = a->parent;
+	}
 
-	p = urldb_find_url(url);
-	if (!p)
-		return;
+	/* If we get here then either:
+	 *    a) The path lengths differ
+	 * or b) The hosts are identical
+	 */
+	if (a && a != &db_root && b >= end)
+		/* len(a) > len(b) => prefix matches */
+		return 0;
+	else if ((!a || a == &db_root) && b < end)
+		/* len(a) < len(b) => prefix does not match */
+		return -1;
 
-	p->persistent = persist;
+	/* Identical */
+	return 0;
 }
 
+
 /**
- * Insert an URL into the database
+ * Partial host iterator (internal)
  *
- * \param url Absolute URL to insert
- * \return true on success, false otherwise
+ * \param root Root of (sub)tree to traverse
+ * \param prefix Prefix to match
+ * \param callback Callback function
+ * \return true to continue, false otherwise
  */
-bool urldb_add_url(nsurl *url)
+static bool
+urldb_iterate_partial_host(struct search_node *root,
+		const char *prefix,
+		bool (*callback)(nsurl *url, const struct url_data *data))
 {
-	struct host_part *h;
-	struct path_data *p;
-	lwc_string *scheme;
-	lwc_string *port;
-	lwc_string *host;
-	lwc_string *fragment;
-	const char *host_str;
-	char *path_query = NULL;
-	size_t len;
-	bool match;
-	unsigned int port_int;
-
-	assert(url);
-        
-        if (url_bloom == NULL)
-                url_bloom = bloom_create(BLOOM_SIZE);
-        
-        if (url_bloom != NULL) {
-                uint32_t hash = nsurl_hash(url);
-                bloom_insert_hash(url_bloom, hash);
-        }
-
-	/* Copy and merge path/query strings */
-	if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
-			NSERROR_OK) {
-		return false;
-	}
-	assert(path_query != NULL);
+	int c;
 
-	scheme = nsurl_get_component(url, NSURL_SCHEME);
-	if (scheme == NULL) {
-		free(path_query);
-		return false;
-	}
+	assert(root && prefix && callback);
 
-	host = nsurl_get_component(url, NSURL_HOST);
-	if (host != NULL) {
-		host_str = lwc_string_data(host);
-		lwc_string_unref(host);
+	if (root == &empty)
+		return true;
 
-	} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
-			lwc_error_ok && match == true) {
-		host_str = "localhost";
+	c = urldb_search_match_prefix(root->data, prefix);
 
-	} else {
-		lwc_string_unref(scheme);
-		free(path_query);
-		return false;
-	}
+	if (c > 0)
+		/* No match => look in left subtree */
+		return urldb_iterate_partial_host(root->left, prefix,
+				callback);
+	else if (c < 0)
+		/* No match => look in right subtree */
+		return urldb_iterate_partial_host(root->right, prefix,
+				callback);
+	else {
+		/* Match => iterate over l/r subtrees & process this node */
+		if (!urldb_iterate_partial_host(root->left, prefix,
+				callback))
+			return false;
 
-	fragment = nsurl_get_component(url, NSURL_FRAGMENT);
+		if (root->data->paths.children) {
+			/* and extract all paths attached to this host */
+			if (!urldb_iterate_entries_path(&root->data->paths,
+					callback, NULL)) {
+				return false;
+			}
+		}
 
-	port = nsurl_get_component(url, NSURL_PORT);
-	if (port != NULL) {
-		port_int = atoi(lwc_string_data(port));
-		lwc_string_unref(port);
-	} else {
-		port_int = 0;
+		if (!urldb_iterate_partial_host(root->right, prefix,
+				callback))
+			return false;
 	}
 
-	/* Get host entry */
-	h = urldb_add_host(host_str);
-
-	/* Get path entry */
-	p = (h != NULL) ? urldb_add_path(scheme, port_int, h, path_query,
-			fragment, url) : NULL;
-
-	lwc_string_unref(scheme);
-	if (fragment != NULL)
-		lwc_string_unref(fragment);
-
-	return (p != NULL);
+	return true;
 }
 
+
 /**
- * Set an URL's title string, replacing any existing one
+ * Partial path iterator (internal)
  *
- * \param url The URL to look for
- * \param title The title string to use (copied)
+ * \param parent Root of (sub)tree to traverse
+ * \param prefix Prefix to match
+ * \param callback Callback function
+ * \return true to continue, false otherwise
  */
-void urldb_set_url_title(nsurl *url, const char *title)
+static bool urldb_iterate_partial_path(const struct path_data *parent,
+		const char *prefix, bool (*callback)(nsurl *url,
+		const struct url_data *data))
 {
-	struct path_data *p;
-	char *temp;
-
-	assert(url && title);
+	const struct path_data *p = parent->children;
+	const char *slash, *end = prefix + strlen(prefix);
 
-	p = urldb_find_url(url);
-	if (!p)
-		return;
+	/*
+	 * Given: http://www.example.org/a/b/c/d//e
+	 * and assuming a path tree:
+	 *     .
+	 *    / \
+	 *   a1 b1
+	 *  / \
+	 * a2 b2
+	 *    /|\
+	 *   a b c
+	 *   3 3 |
+	 *       d
+	 *       |
+	 *       e
+	 *      / \
+	 *      f g
+	 *
+	 * Prefix will be:	p will be:
+	 *
+	 * a/b/c/d//e		a1
+	 *   b/c/d//e		a2
+	 *   b/c/d//e		b3
+	 *     c/d//e		a3
+	 *     c/d//e		b3
+	 *     c/d//e		c
+	 *       d//e		d
+	 *         /e		e		(skip /)
+	 *          e		e
+	 *
+	 * I.E. we perform a breadth-first search of the tree.
+	 */
 
-	temp = strdup(title);
-	if (!temp)
-		return;
+	do {
+		slash = strchr(prefix, '/');
+		if (!slash)
+			slash = end;
 
-	free(p->urld.title);
-	p->urld.title = temp;
-}
+		if (slash == prefix && *prefix == '/') {
+			/* Ignore "//" */
+			prefix++;
+			continue;
+		}
 
-/**
- * Set an URL's content type
- *
- * \param url The URL to look for
- * \param type The type to set
- */
-void urldb_set_url_content_type(nsurl *url, content_type type)
-{
-	struct path_data *p;
+		if (strncasecmp(p->segment, prefix, slash - prefix) == 0) {
+			/* prefix matches so far */
+			if (slash == end) {
+				/* we've run out of prefix, so all
+				 * paths below this one match */
+				if (!urldb_iterate_entries_path(p, callback,
+						NULL))
+					return false;
 
-	assert(url);
+				/* Progress to next sibling */
+				p = p->next;
+			} else {
+				/* Skip over this segment */
+				prefix = slash + 1;
 
-	p = urldb_find_url(url);
-	if (!p)
-		return;
+				p = p->children;
+			}
+		} else {
+			/* Doesn't match this segment, try next sibling */
+			p = p->next;
+		}
+	} while (p != NULL);
 
-	p->urld.type = type;
+	return true;
 }
 
+
 /**
- * Update an URL's visit data
+ * Host data iterator (internal)
  *
- * \param url The URL to update
+ * \param parent Root of subtree to iterate over
+ * \param url_callback Callback function
+ * \param cookie_callback Callback function
+ * \return true to continue, false otherwise
  */
-void urldb_update_url_visit_data(nsurl *url)
+static bool urldb_iterate_entries_host(struct search_node *parent,
+		bool (*url_callback)(nsurl *url,
+				const struct url_data *data),
+		bool (*cookie_callback)(const struct cookie_data *data))
 {
-	struct path_data *p;
+	if (parent == &empty)
+		return true;
 
-	assert(url);
+	if (!urldb_iterate_entries_host(parent->left,
+			url_callback, cookie_callback))
+		return false;
 
-	p = urldb_find_url(url);
-	if (!p)
-		return;
+	if ((parent->data->paths.children) || ((cookie_callback) &&
+			(parent->data->paths.cookies))) {
+		/* We have paths (or domain cookies), so iterate them */
+		if (!urldb_iterate_entries_path(&parent->data->paths,
+				url_callback, cookie_callback)) {
+			return false;
+		}
+	}
 
-	p->urld.last_visit = time(NULL);
-	p->urld.visits++;
+	if (!urldb_iterate_entries_host(parent->right,
+			url_callback, cookie_callback))
+		return false;
+
+	return true;
 }
 
+
 /**
- * Reset an URL's visit statistics
+ * Add a host node to the tree
  *
- * \param url The URL to reset
+ * \param part Host segment to add (or whole IP address) (copied)
+ * \param parent Parent node to add to
+ * \return Pointer to added node, or NULL on memory exhaustion
  */
-void urldb_reset_url_visit_data(nsurl *url)
+static struct host_part *urldb_add_host_node(const char *part,
+		struct host_part *parent)
 {
-	struct path_data *p;
+	struct host_part *d;
 
-	assert(url);
+	assert(part && parent);
 
-	p = urldb_find_url(url);
-	if (!p)
-		return;
+	d = calloc(1, sizeof(struct host_part));
+	if (!d)
+		return NULL;
 
-	p->urld.last_visit = (time_t)0;
-	p->urld.visits = 0;
+	d->part = strdup(part);
+	if (!d->part) {
+		free(d);
+		return NULL;
+	}
+
+	d->next = parent->children;
+	if (parent->children)
+		parent->children->prev = d;
+	d->parent = parent;
+	parent->children = d;
+
+	return d;
 }
 
 
 /**
- * Find data for an URL.
- *
- * \param url Absolute URL to look for
- * \return Pointer to result struct, or NULL
+ * Fragment comparator callback for qsort
  */
-const struct url_data *urldb_get_url_data(nsurl *url)
+static int urldb_add_path_fragment_cmp(const void *a, const void *b)
 {
-	struct path_data *p;
-	struct url_internal_data *u;
-
-	assert(url);
-
-	p = urldb_find_url(url);
-	if (!p)
-		return NULL;
-
-	u = &p->urld;
-
-	return (const struct url_data *) u;
+	return strcasecmp(*((const char **) a), *((const char **) b));
 }
 
+
 /**
- * Extract an URL from the db
+ * Add a fragment to a path segment
  *
- * \param url URL to extract
- * \return Pointer to database's copy of URL or NULL if not found
+ * \param segment Path segment to add to
+ * \param fragment Fragment to add (copied), or NULL
+ * \return segment or NULL on memory exhaustion
  */
-nsurl *urldb_get_url(nsurl *url)
+static struct path_data *
+urldb_add_path_fragment(struct path_data *segment, lwc_string *fragment)
 {
-	struct path_data *p;
+	char **temp;
 
-	assert(url);
+	assert(segment);
 
-	p = urldb_find_url(url);
-	if (!p)
+	/* If no fragment, this function is a NOP
+	 * This may seem strange, but it makes the rest
+	 * of the code cleaner */
+	if (!fragment)
+		return segment;
+
+	temp = realloc(segment->fragment,
+			(segment->frag_cnt + 1) * sizeof(char *));
+	if (!temp)
 		return NULL;
 
-	return p->url;
+	segment->fragment = temp;
+	segment->fragment[segment->frag_cnt] =
+			strdup(lwc_string_data(fragment));
+	if (!segment->fragment[segment->frag_cnt]) {
+		/* Don't free temp - it's now our buffer */
+		return NULL;
+	}
+
+	segment->frag_cnt++;
+
+	/* We want fragments in alphabetical order, so sort them
+	 * It may prove better to insert in alphabetical order instead */
+	qsort(segment->fragment, segment->frag_cnt, sizeof (char *),
+			urldb_add_path_fragment_cmp);
+
+	return segment;
 }
 
+
 /**
- * Look up authentication details in database
+ * Add a path node to the tree
  *
- * \param url Absolute URL to search for
- * \param realm When non-NULL, it is realm which can be used to determine
- * the protection space when that's not been done before for given URL.
- * \return Pointer to authentication details, or NULL if not found
+ * \param scheme URL scheme associated with path (copied)
+ * \param port Port number on host associated with path
+ * \param segment Path segment to add (copied)
+ * \param fragment URL fragment (copied), or NULL
+ * \param parent Parent node to add to
+ * \return Pointer to added node, or NULL on memory exhaustion
  */
-const char *urldb_get_auth_details(nsurl *url, const char *realm)
+static struct path_data *
+urldb_add_path_node(lwc_string *scheme, unsigned int port,
+		const char *segment, lwc_string *fragment,
+		struct path_data *parent)
 {
-	struct path_data *p, *p_cur, *p_top;
+	struct path_data *d, *e;
 
-	assert(url);
+	assert(scheme && segment && parent);
 
-	/* add to the db, so our lookup will work */
-	urldb_add_url(url);
+	d = calloc(1, sizeof(struct path_data));
+	if (!d)
+		return NULL;
 
-	p = urldb_find_url(url);
-	if (!p)
+	d->scheme = lwc_string_ref(scheme);
+
+	d->port = port;
+
+	d->segment = strdup(segment);
+	if (!d->segment) {
+		lwc_string_unref(d->scheme);
+		free(d);
 		return NULL;
+	}
 
-	/* Check for any auth details attached to the path_data node or any of
-	 * its parents. */
-	for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) {
-		if (p_cur->prot_space) {
-			return p_cur->prot_space->auth;
+	if (fragment) {
+		if (!urldb_add_path_fragment(d, fragment)) {
+			free(d->segment);
+			lwc_string_unref(d->scheme);
+			free(d);
+			return NULL;
 		}
 	}
 
-	/* Only when we have a realm (and canonical root of given URL), we can
-	 * uniquely locate the protection space. */
-	if (realm != NULL) {
-		const struct host_part *h = (const struct host_part *)p_top;
-		const struct prot_space_data *space;
-		bool match;
+	for (e = parent->children; e; e = e->next) {
+		if (strcmp(e->segment, d->segment) > 0)
+			break;
+	}
 
-		/* Search for a possible matching protection space. */
-		for (space = h->prot_space; space != NULL;
-				space = space->next) {
-			if (!strcmp(space->realm, realm) &&
-					lwc_string_isequal(space->scheme,
-							p->scheme, &match) ==
-							lwc_error_ok &&
-					match == true &&
-					space->port == p->port) {
-				p->prot_space = space;
-				return p->prot_space->auth;
-			}
-		}
+	if (e) {
+		d->prev = e->prev;
+		d->next = e;
+		if (e->prev)
+			e->prev->next = d;
+		else
+			parent->children = d;
+		e->prev = d;
+	} else if (!parent->children) {
+		d->prev = d->next = NULL;
+		parent->children = parent->last = d;
+	} else {
+		d->next = NULL;
+		d->prev = parent->last;
+		parent->last->next = d;
+		parent->last = d;
 	}
+	d->parent = parent;
 
-	return NULL;
+	return d;
 }
 
+
 /**
- * Retrieve certificate verification permissions from database
+ * Get the search tree for a particular host
  *
- * \param url Absolute URL to search for
- * \return true to permit connections to hosts with invalid certificates,
- * false otherwise.
+ * \param host  the host to lookup
+ * \return the corresponding search tree
  */
-bool urldb_get_cert_permissions(nsurl *url)
+static struct search_node **urldb_get_search_tree_direct(const char *host)
 {
-	struct path_data *p;
-	const struct host_part *h;
-
-	assert(url);
-
-	p = urldb_find_url(url);
-	if (!p)
-		return false;
+	assert(host);
 
-	for (; p && p->parent; p = p->parent)
-		/* do nothing */;
-	assert(p);
+	if (urldb__host_is_ip_address(host))
+		return &search_trees[ST_IP];
+	else if (isalpha(*host))
+		return &search_trees[ST_DN + tolower(*host) - 'a'];
+	return &search_trees[ST_EE];
+}
 
-	h = (const struct host_part *)p;
 
-	return h->permit_invalid_certs;
+/**
+ * Get the search tree for a particular host
+ *
+ * \param host  the host to lookup
+ * \return the corresponding search tree
+ */
+static struct search_node *urldb_get_search_tree(const char *host)
+{
+	return *urldb_get_search_tree_direct(host);
 }
 
+
 /**
- * Set authentication data for an URL
+ * Compare host_part with a string
  *
- * \param url The URL to consider
- * \param realm The authentication realm
- * \param auth The authentication details (in form username:password)
+ * \param a
+ * \param b
+ * \return 0 if match, non-zero, otherwise
  */
-void urldb_set_auth_details(nsurl *url, const char *realm,
-		const char *auth)
+static int urldb_search_match_string(const struct host_part *a, const char *b)
 {
-	struct path_data *p, *pi;
-	struct host_part *h;
-	struct prot_space_data *space, *space_alloc;
-	char *realm_alloc, *auth_alloc;
-	bool match;
+	const char *end, *dot;
+	int plen, ret;
 
-	assert(url && realm && auth);
+	assert(a && a != &db_root && b);
 
-	/* add url, in case it's missing */
-	urldb_add_url(url);
+	if (urldb__host_is_ip_address(b)) {
+		/* IP address */
+		return strcasecmp(a->part, b);
+	}
 
-	p = urldb_find_url(url);
+	end = b + strlen(b) + 1;
 
-	if (!p)
-		return;
+	while (b < end && a && a != &db_root) {
+		dot = strchr(b, '.');
+		if (!dot) {
+			/* last segment */
+			dot = end - 1;
+		}
 
-	/* Search for host_part */
-	for (pi = p; pi->parent != NULL; pi = pi->parent)
-		;
-	h = (struct host_part *)pi;
+		/* Compare strings (length limited) */
+		if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
+			/* didn't match => return difference */
+			return ret;
 
-	/* Search if given URL belongs to a protection space we already know of. */
-	for (space = h->prot_space; space; space = space->next) {
-		if (!strcmp(space->realm, realm) &&
-				lwc_string_isequal(space->scheme, p->scheme,
-						&match) == lwc_error_ok &&
-				match == true &&
-				space->port == p->port)
-			break;
-	}
-
-	if (space != NULL) {
-		/* Overrule existing auth. */
-		free(space->auth);
-		space->auth = strdup(auth);
-	} else {
-		/* Create a new protection space. */
-		space = space_alloc = malloc(sizeof(struct prot_space_data));
-		realm_alloc = strdup(realm);
-		auth_alloc = strdup(auth);
+		/* The strings matched, now check that the lengths do, too */
+		plen = strlen(a->part);
 
-		if (!space_alloc || !realm_alloc || !auth_alloc) {
-			free(space_alloc);
-			free(realm_alloc);
-			free(auth_alloc);
-			return;
-		}
+		if (plen > dot - b)
+			/* len(a) > len(b) */
+			return 1;
+		else if (plen < dot - b)
+			/* len(a) < len(b) */
+			return -1;
 
-		space->scheme = lwc_string_ref(p->scheme);
-		space->port = p->port;
-		space->realm = realm_alloc;
-		space->auth = auth_alloc;
-		space->next = h->prot_space;
-		h->prot_space = space;
+		b = dot + 1;
+		a = a->parent;
 	}
 
-	p->prot_space = space;
+	/* If we get here then either:
+	 *    a) The path lengths differ
+	 * or b) The hosts are identical
+	 */
+	if (a && a != &db_root && b >= end)
+		/* len(a) > len(b) */
+		return 1;
+	else if ((!a || a == &db_root) && b < end)
+		/* len(a) < len(b) */
+		return -1;
+
+	/* Identical */
+	return 0;
 }
 
+
 /**
- * Set certificate verification permissions
+ * Find a node in a search tree
  *
- * \param url URL to consider
- * \param permit Set to true to allow invalid certificates
+ * \param root Tree to look in
+ * \param host Host to find
+ * \return Pointer to host tree node, or NULL if not found
  */
-void urldb_set_cert_permissions(nsurl *url, bool permit)
+static const struct host_part *
+urldb_search_find(struct search_node *root, const char *host)
 {
-	struct path_data *p;
-	struct host_part *h;
-
-	assert(url);
-
-	/* add url, in case it's missing */
-	urldb_add_url(url);
+	int c;
 
-	p = urldb_find_url(url);
-	if (!p)
-		return;
+	assert(root && host);
 
-	for (; p && p->parent; p = p->parent)
-		/* do nothing */;
-	assert(p);
+	if (root == &empty) {
+		return NULL;
+	}
 
-	h = (struct host_part *)p;
+	c = urldb_search_match_string(root->data, host);
 
-	h->permit_invalid_certs = permit;
+	if (c > 0)
+		return urldb_search_find(root->left, host);
+	else if (c < 0)
+		return urldb_search_find(root->right, host);
+	else
+		return root->data;
 }
 
+
 /**
- * Set thumbnail for url, replacing any existing thumbnail
+ * Match a path string
  *
- * \param url Absolute URL to consider
- * \param bitmap Opaque pointer to thumbnail data, or NULL to invalidate
+ * \param parent Path (sub)tree to look in
+ * \param path The path to search for
+ * \param scheme The URL scheme associated with the path
+ * \param port The port associated with the path
+ * \return Pointer to path data or NULL if not found.
  */
-void urldb_set_thumbnail(nsurl *url, struct bitmap *bitmap)
+static struct path_data *urldb_match_path(const struct path_data *parent,
+		const char *path, lwc_string *scheme, unsigned short port)
 {
-	struct path_data *p;
+	const struct path_data *p;
+	const char *slash;
+	bool match;
 
-	assert(url);
+	assert(parent != NULL);
+	assert(parent->segment == NULL);
 
-	p = urldb_find_url(url);
-	if (!p)
-		return;
+	if (path[0] != '/') {
+		LOG(("path is %s", path));
+	}
 
-	if (p->thumb && p->thumb != bitmap)
-		bitmap_destroy(p->thumb);
+	assert(path[0] == '/');
 
-	p->thumb = bitmap;
-}
+	/* Start with children, as parent has no segment */
+	p = parent->children;
 
-/**
- * Retrieve thumbnail data for given URL
- *
- * \param url Absolute URL to search for
- * \return Pointer to thumbnail data, or NULL if not found.
- */
-struct bitmap *urldb_get_thumbnail(nsurl *url)
-{
-	struct path_data *p;
+	while (p != NULL) {
+		slash = strchr(path + 1, '/');
+		if (!slash)
+			slash = path + strlen(path);
 
-	assert(url);
+		if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
+				lwc_string_isequal(p->scheme, scheme, &match) ==
+						lwc_error_ok &&
+				match == true &&
+				p->port == port) {
+			if (*slash == '\0') {
+				/* Complete match */
+				return (struct path_data *) p;
+			}
 
-	p = urldb_find_url(url);
-	if (!p)
-		return NULL;
+			/* Match so far, go down tree */
+			p = p->children;
 
-	return p->thumb;
+			path = slash;
+		} else {
+			/* No match, try next sibling */
+			p = p->next;
+		}
+	}
+
+	return NULL;
 }
 
+
 /**
- * Iterate over entries in the database which match the given prefix
+ * Find an URL in the database
  *
- * \param prefix Prefix to match
- * \param callback Callback function
+ * \param url Absolute URL to find
+ * \return Pointer to path data, or NULL if not found
  */
-void urldb_iterate_partial(const char *prefix,
-		bool (*callback)(nsurl *url,
-		const struct url_data *data))
+static struct path_data *urldb_find_url(nsurl *url)
 {
-	char host[256];
-	char buf[260]; /* max domain + "www." */
-	const char *slash, *scheme_sep;
-	struct search_node *tree;
 	const struct host_part *h;
+	struct path_data *p;
+	struct search_node *tree;
+	char *plq;
+	const char *host_str;
+	lwc_string *scheme, *host, *port;
+	size_t len = 0;
+	unsigned int port_int;
+	bool match;
 
-	assert(prefix && callback);
+	assert(url);
 
-	/* strip scheme */
-	scheme_sep = strstr(prefix, "://");
-	if (scheme_sep)
-		prefix = scheme_sep + 3;
+	if (url_bloom != NULL) {
+		if (bloom_search_hash(url_bloom,
+					nsurl_hash(url)) == false) {
+					return NULL;
+		}
+	}
 
-	slash = strchr(prefix, '/');
-	tree = urldb_get_search_tree(prefix);
+	scheme = nsurl_get_component(url, NSURL_SCHEME);
+	if (scheme == NULL)
+		return NULL;
 
-	if (slash) {
-		/* if there's a slash in the input, then we can
-		 * assume that we're looking for a path */
-		snprintf(host, sizeof host, "%.*s",
-				(int) (slash - prefix), prefix);
+	host = nsurl_get_component(url, NSURL_HOST);
+	if (host != NULL) {
+		host_str = lwc_string_data(host);
+		lwc_string_unref(host);
 
-		h = urldb_search_find(tree, host);
-		if (!h) {
-			int len = slash - prefix;
+	} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
+			lwc_error_ok && match == true) {
+		host_str = "localhost";
 
-			if (len <= 3 || strncasecmp(host, "www.", 4) != 0) {
-				snprintf(buf, sizeof buf, "www.%s", host);
-				h = urldb_search_find(
-					search_trees[ST_DN + 'w' - 'a'],
-					buf);
-				if (!h)
-					return;
-			} else
-				return;
-		}
+	} else {
+		lwc_string_unref(scheme);
+		return NULL;
+	}
 
-		if (h->paths.children) {
-			/* Have paths, iterate them */
-			urldb_iterate_partial_path(&h->paths, slash + 1,
-					callback);
-		}
+	tree = urldb_get_search_tree(host_str);
+	h = urldb_search_find(tree, host_str);
+	if (!h) {
+		lwc_string_unref(scheme);
+		return NULL;
+	}
+
+	/* generate plq (path, leaf, query) */
+	if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) !=
+			NSERROR_OK) {
+		lwc_string_unref(scheme);
+		return NULL;
+	}
 
+	/* Get port */
+	port = nsurl_get_component(url, NSURL_PORT);
+	if (port != NULL) {
+		port_int = atoi(lwc_string_data(port));
+		lwc_string_unref(port);
 	} else {
-		int len = strlen(prefix);
+		port_int = 0;
+	}
 
-		/* looking for hosts */
-		if (!urldb_iterate_partial_host(tree, prefix, callback))
-			return;
+	p = urldb_match_path(&h->paths, plq, scheme, port_int);
 
-		if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) {
-			/* now look for www.prefix */
-			snprintf(buf, sizeof buf, "www.%s", prefix);
-			if(!urldb_iterate_partial_host(
-					search_trees[ST_DN + 'w' - 'a'],
-					buf, callback))
-				return;
-		}
-	}
+	free(plq);
+	lwc_string_unref(scheme);
+
+	return p;
 }
 
+
 /**
- * Partial host iterator (internal)
+ * Dump URL database paths to stderr
  *
- * \param root Root of (sub)tree to traverse
- * \param prefix Prefix to match
- * \param callback Callback function
- * \return true to continue, false otherwise
+ * \param parent Parent node of tree to dump
  */
-bool urldb_iterate_partial_host(struct search_node *root, const char *prefix,
-		bool (*callback)(nsurl *url, const struct url_data *data))
+static void urldb_dump_paths(struct path_data *parent)
 {
-	int c;
+	const struct path_data *p = parent;
+	unsigned int i;
 
-	assert(root && prefix && callback);
+	do {
+		if (p->segment != NULL) {
+			LOG(("\t%s : %u", lwc_string_data(p->scheme), p->port));
 
-	if (root == &empty)
-		return true;
+			LOG(("\t\t'%s'", p->segment));
 
-	c = urldb_search_match_prefix(root->data, prefix);
+			for (i = 0; i != p->frag_cnt; i++)
+				LOG(("\t\t\t#%s", p->fragment[i]));
+		}
 
-	if (c > 0)
-		/* No match => look in left subtree */
-		return urldb_iterate_partial_host(root->left, prefix,
-				callback);
-	else if (c < 0)
-		/* No match => look in right subtree */
-		return urldb_iterate_partial_host(root->right, prefix,
-				callback);
-	else {
-		/* Match => iterate over l/r subtrees & process this node */
-		if (!urldb_iterate_partial_host(root->left, prefix,
-				callback))
-			return false;
+		if (p->children != NULL) {
+			p = p->children;
+		} else {
+			while (p != parent) {
+				if (p->next != NULL) {
+					p = p->next;
+					break;
+				}
 
-		if (root->data->paths.children) {
-			/* and extract all paths attached to this host */
-			if (!urldb_iterate_entries_path(&root->data->paths,
-					callback, NULL)) {
-				return false;
+				p = p->parent;
 			}
 		}
-
-		if (!urldb_iterate_partial_host(root->right, prefix,
-				callback))
-			return false;
-	}
-
-	return true;
+	} while (p != parent);
 }
 
+
 /**
- * Partial path iterator (internal)
+ * Dump URL database hosts to stderr
  *
- * \param parent Root of (sub)tree to traverse
- * \param prefix Prefix to match
- * \param callback Callback function
- * \return true to continue, false otherwise
+ * \param parent Parent node of tree to dump
  */
-bool urldb_iterate_partial_path(const struct path_data *parent,
-		const char *prefix, bool (*callback)(nsurl *url,
-		const struct url_data *data))
+static void urldb_dump_hosts(struct host_part *parent)
 {
-	const struct path_data *p = parent->children;
-	const char *slash, *end = prefix + strlen(prefix);
-
-	/* 
-	 * Given: http://www.example.org/a/b/c/d//e
-	 * and assuming a path tree:
-	 *     .
-	 *    / \
-	 *   a1 b1
-	 *  / \
-	 * a2 b2
-	 *    /|\
-	 *   a b c
-	 *   3 3 |
-	 *       d
-	 *       |
-	 *       e
-	 *      / \
-	 *      f g
-	 *
-	 * Prefix will be:	p will be:
-	 *
-	 * a/b/c/d//e		a1
-	 *   b/c/d//e		a2
-	 *   b/c/d//e		b3
-	 *     c/d//e		a3
-	 *     c/d//e		b3
-	 *     c/d//e		c
-	 *       d//e		d
-	 *         /e		e		(skip /)
-	 *          e		e
-	 *
-	 * I.E. we perform a breadth-first search of the tree.
-	 */
-
-	do {
-		slash = strchr(prefix, '/');
-		if (!slash)
-			slash = end;
+	struct host_part *h;
 
-		if (slash == prefix && *prefix == '/') {
-			/* Ignore "//" */
-			prefix++;
-			continue;
-		}
-	
-		if (strncasecmp(p->segment, prefix, slash - prefix) == 0) {
-			/* prefix matches so far */
-			if (slash == end) {
-				/* we've run out of prefix, so all
-				 * paths below this one match */
-				if (!urldb_iterate_entries_path(p, callback, 
-						NULL))
-					return false;
+	if (parent->part) {
+		LOG(("%s", parent->part));
 
-				/* Progress to next sibling */
-				p = p->next;
-			} else {
-				/* Skip over this segment */
-				prefix = slash + 1;
+		LOG(("\t%s invalid SSL certs",
+			parent->permit_invalid_certs ? "Permits" : "Denies"));
+	}
 
-				p = p->children;
-			}
-		} else {
-			/* Doesn't match this segment, try next sibling */
-			p = p->next;
-		}
-	} while (p != NULL);
+	/* Dump path data */
+	urldb_dump_paths(&parent->paths);
 
-	return true;
+	/* and recurse */
+	for (h = parent->children; h; h = h->next)
+		urldb_dump_hosts(h);
 }
 
+
 /**
- * Iterate over all entries in database
+ * Dump search tree
  *
- * \param callback Function to callback for each entry
+ * \param parent Parent node of tree to dump
+ * \param depth Tree depth
  */
-void urldb_iterate_entries(bool (*callback)(nsurl *url,
-		const struct url_data *data))
+static void urldb_dump_search(struct search_node *parent, int depth)
 {
+	const struct host_part *h;
 	int i;
 
-	assert(callback);
+	if (parent == &empty)
+		return;
 
-	for (i = 0; i < NUM_SEARCH_TREES; i++) {
-		if (!urldb_iterate_entries_host(search_trees[i],
-				callback, NULL))
-			break;
+	urldb_dump_search(parent->left, depth + 1);
+
+	for (i = 0; i != depth; i++)
+			fputc(' ', stderr);
+
+	for (h = parent->data; h; h = h->parent) {
+		if (h->part)
+			fprintf(stderr, "%s", h->part);
+
+		if (h->parent && h->parent->parent)
+			fputc('.', stderr);
 	}
+
+	fputc('\n', stderr);
+
+	urldb_dump_search(parent->right, depth + 1);
 }
 
+
 /**
- * Iterate over all cookies in database
+ * Compare a pair of host_parts
  *
- * \param callback Function to callback for each entry
+ * \param a
+ * \param b
+ * \return 0 if match, non-zero, otherwise
  */
-void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data))
+static int
+urldb_search_match_host(const struct host_part *a, const struct host_part *b)
 {
-	int i;
+	int ret;
 
-	assert(callback);
+	assert(a && b);
 
-	for (i = 0; i < NUM_SEARCH_TREES; i++) {
-		if (!urldb_iterate_entries_host(search_trees[i],
-				NULL, callback))
-			break;
-	}
+	/* traverse up tree to root, comparing parts as we go. */
+	for (; a && a != &db_root && b && b != &db_root;
+			a = a->parent, b = b->parent)
+		if ((ret = strcasecmp(a->part, b->part)) != 0)
+			/* They differ => return the difference here */
+			return ret;
+
+	/* If we get here then either:
+	 *    a) The path lengths differ
+	 * or b) The hosts are identical
+	 */
+	if (a && a != &db_root && (!b || b == &db_root))
+		/* len(a) > len(b) */
+		return 1;
+	else if ((!a || a == &db_root) && b && b != &db_root)
+		/* len(a) < len(b) */
+		return -1;
+
+	/* identical */
+	return 0;
 }
 
+
 /**
- * Host data iterator (internal)
+ * Rotate a subtree right
  *
- * \param parent Root of subtree to iterate over
- * \param url_callback Callback function
- * \param cookie_callback Callback function
- * \return true to continue, false otherwise
+ * \param root Root of subtree to rotate
+ * \return new root of subtree
  */
-bool urldb_iterate_entries_host(struct search_node *parent,
-		bool (*url_callback)(nsurl *url,
-				const struct url_data *data),
-		bool (*cookie_callback)(const struct cookie_data *data))
+static struct search_node *urldb_search_skew(struct search_node *root)
 {
-	if (parent == &empty)
-		return true;
+	struct search_node *temp;
 
-	if (!urldb_iterate_entries_host(parent->left,
-			url_callback, cookie_callback))
-		return false;
+	assert(root);
 
-	if ((parent->data->paths.children) || ((cookie_callback) &&
-			(parent->data->paths.cookies))) {
-		/* We have paths (or domain cookies), so iterate them */
-		if (!urldb_iterate_entries_path(&parent->data->paths,
-				url_callback, cookie_callback)) {
-			return false;
-		}
+	if (root->left->level == root->level) {
+		temp = root->left;
+		root->left = temp->right;
+		temp->right = root;
+		root = temp;
 	}
 
-	if (!urldb_iterate_entries_host(parent->right,
-			url_callback, cookie_callback))
-		return false;
-
-	return true;
+	return root;
 }
 
+
 /**
- * Path data iterator (internal)
+ * Rotate a node left, increasing the parent's level
  *
- * \param parent Root of subtree to iterate over
- * \param url_callback Callback function
- * \param cookie_callback Callback function
- * \return true to continue, false otherwise
+ * \param root Root of subtree to rotate
+ * \return New root of subtree
  */
-bool urldb_iterate_entries_path(const struct path_data *parent,
-		bool (*url_callback)(nsurl *url,
-				const struct url_data *data),
-		bool (*cookie_callback)(const struct cookie_data *data))
+static struct search_node *urldb_search_split(struct search_node *root)
 {
-	const struct path_data *p = parent;
-	const struct cookie_data *c;
-	
-	do {
-		if (p->children != NULL) {
-			/* Drill down into children */
-			p = p->children;
-		} else {
-			/* All leaf nodes in the path tree should have an URL or
-			 * cookies attached to them. If this is not the case, it
-			 * indicates that there's a bug in the file loader/URL
-			 * insertion code. Therefore, assert this here. */
-			assert(url_callback || cookie_callback);
-
-			/** \todo handle fragments? */
-			if (url_callback) {
-				const struct url_internal_data *u = &p->urld;
-
-				assert(p->url);
+	struct search_node *temp;
 
-				if (!url_callback(p->url,
-						(const struct url_data *) u))
-					return false;
-			} else {
-				c = (const struct cookie_data *)p->cookies;
-				for (; c != NULL; c = c->next)
-					if (!cookie_callback(c))
-						return false;
-			}
+	assert(root);
 
-			/* Now, find next node to process. */
-			while (p != parent) {
-				if (p->next != NULL) {
-					/* Have a sibling, process that */
-					p = p->next;
-					break;
-				}
+	if (root->right->right->level == root->level) {
+		temp = root->right;
+		root->right = temp->left;
+		temp->left = root;
+		root = temp;
 
-				/* Ascend tree */
-				p = p->parent;
-			}
-		}
-	} while (p != parent);
+		root->level++;
+	}
 
-	return true;
+	return root;
 }
 
+
 /**
- * Add a host node to the tree
+ * Insert node into search tree
  *
- * \param part Host segment to add (or whole IP address) (copied)
- * \param parent Parent node to add to
- * \return Pointer to added node, or NULL on memory exhaustion
+ * \param root Root of (sub)tree to insert into
+ * \param n Node to insert
+ * \return Pointer to updated root
  */
-struct host_part *urldb_add_host_node(const char *part,
-		struct host_part *parent)
+static struct search_node *
+urldb_search_insert_internal(struct search_node *root, struct search_node *n)
 {
-	struct host_part *d;
+	assert(root && n);
 
-	assert(part && parent);
+	if (root == &empty) {
+		root = n;
+	} else {
+		int c = urldb_search_match_host(root->data, n->data);
 
-	d = calloc(1, sizeof(struct host_part));
-	if (!d)
-		return NULL;
+		if (c > 0) {
+			root->left = urldb_search_insert_internal(
+					root->left, n);
+		} else if (c < 0) {
+			root->right = urldb_search_insert_internal(
+					root->right, n);
+		} else {
+			/* exact match */
+			free(n);
+			return root;
+		}
 
-	d->part = strdup(part);
-	if (!d->part) {
-		free(d);
-		return NULL;
+		root = urldb_search_skew(root);
+		root = urldb_search_split(root);
 	}
 
-	d->next = parent->children;
-	if (parent->children)
-		parent->children->prev = d;
-	d->parent = parent;
-	parent->children = d;
-
-	return d;
+	return root;
 }
 
 
 /**
- * Check whether a host string is an IP address.
- *
- * This call detects IPv4 addresses (all of dotted-quad or subsets,
- * decimal or hexadecimal notations) and IPv6 addresses (including
- * those containing embedded IPv4 addresses.)
+ * Insert a node into the search tree
  *
- * \param host a hostname terminated by '\0'
- * \return true if the hostname is an IP address, false otherwise
+ * \param root Root of tree to insert into
+ * \param data User data to insert
+ * \return Pointer to updated root, or NULL if failed
  */
-static bool urldb__host_is_ip_address(const char *host)
+static struct search_node *
+urldb_search_insert(struct search_node *root, const struct host_part *data)
 {
-	struct in_addr ipv4;
-	size_t host_len = strlen(host);
-	const char *sane_host;
-	const char *slash;
-#ifndef NO_IPV6
-	struct in6_addr ipv6;
-	char ipv6_addr[64];
-#endif
-	/** @todo FIXME Some parts of urldb.c make confusions between hosts
-	 * and "prefixes", we can sometimes be erroneously passed more than
-	 * just a host.  Sometimes we may be passed trailing slashes, or even
-	 * whole path segments.  A specific criminal in this class is
-	 * urldb_iterate_partial, which takes a prefix to search for, but
-	 * passes that prefix to functions that expect only hosts.
-	 *
-	 * For the time being, we will accept such calls; we check if there
-	 * is a / in the host parameter, and if there is, we take a copy and
-	 * replace the / with a \0.  This is not a permanent solution; we
-	 * should search through NetSurf and find all the callers that are
-	 * in error and fix them.  When doing this task, it might be wise
-	 * to replace the hideousness below with code that doesn't have to do
-	 * this, and add assert(strchr(host, '/') == NULL); somewhere.
-	 * -- rjek - 2010-11-04
-	 */
-
-	slash = strchr(host, '/');
-	if (slash == NULL) {
-		sane_host = host;
-	} else {
-		char *c = strdup(host);
-		c[slash - host] = '\0';
-		sane_host = c;
-		host_len = slash - host - 1;
-		LOG(("WARNING: called with non-host '%s'", host));
-	}
-
-	if (strspn(sane_host, "0123456789abcdefABCDEF[].:") < host_len)
-		goto out_false;
-
-	if (inet_aton(sane_host, &ipv4) != 0) {
-		/* This can only be a sane IPv4 address if it contains 3 dots.
-		 * Helpfully, inet_aton is happy to treat "a", "a.b", "a.b.c",
-		 * and "a.b.c.d" as valid IPv4 address strings where we only
-		 * support the full, dotted-quad, form.
-		 */
-		int num_dots = 0;
-		size_t index;
-
-		for (index = 0; index < host_len; index++) {
-			if (sane_host[index] == '.')
-				num_dots++;
-		}
-
-		if (num_dots == 3)
-			goto out_true;
-		else
-			goto out_false;
-	}
+	struct search_node *n;
 
-#ifndef NO_IPV6
-	if (sane_host[0] != '[' || sane_host[host_len] != ']')
-		goto out_false;
+	assert(root && data);
 
-	strncpy(ipv6_addr, sane_host + 1, sizeof(ipv6_addr));
-	ipv6_addr[sizeof(ipv6_addr) - 1] = '\0';
+	n = malloc(sizeof(struct search_node));
+	if (!n)
+		return NULL;
 
-	if (inet_pton(AF_INET6, ipv6_addr, &ipv6) == 1)
-		goto out_true;
-#endif
+	n->level = 1;
+	n->data = data;
+	n->left = n->right = &empty;
 
-out_false:
-	if (slash != NULL) free((void *)sane_host);
-	return false;
+	root = urldb_search_insert_internal(root, n);
 
-out_true:
-	if (slash != NULL) free((void *)sane_host);
-	return true;
+	return root;
 }
 
 
 /**
- * Add a host to the database, creating any intermediate entries
+ * Parse a cookie avpair
  *
- * \param host Hostname to add
- * \return Pointer to leaf node, or NULL on memory exhaustion
+ * \param c Cookie struct to populate
+ * \param n Name component
+ * \param v Value component
+ * \param was_quoted Whether ::v was quoted in the input
+ * \return true on success, false on memory exhaustion
  */
-struct host_part *urldb_add_host(const char *host)
+static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n,
+			       char *v, bool was_quoted)
 {
-	struct host_part *d = (struct host_part *) &db_root, *e;
-	struct search_node *s;
-	char buf[256]; /* 256 bytes is sufficient - domain names are
-			* limited to 255 chars. */
-	char *part;
-
-	assert(host);
-
-	if (urldb__host_is_ip_address(host)) {
-		/* Host is an IP, so simply add as TLD */
-
-		/* Check for existing entry */
-		for (e = d->children; e; e = e->next)
-			if (strcasecmp(host, e->part) == 0)
-				/* found => return it */
-				return e;
+	int vlen;
 
-		d = urldb_add_host_node(host, d);
+	assert(c && n && v);
 
-		s = urldb_search_insert(search_trees[ST_IP], d);
-		if (!s) {
-			/* failed */
-			d = NULL;
-		} else {
-			search_trees[ST_IP] = s;
-		}
+	/* Strip whitespace from start of name */
+	for (; *n; n++) {
+		if (*n != ' ' && *n != '\t')
+			break;
+	}
 
-		return d;
+	/* Strip whitespace from end of name */
+	for (vlen = strlen(n); vlen; vlen--) {
+		if (n[vlen] == ' ' || n[vlen] == '\t')
+			n[vlen] = '\0';
+		else
+			break;
 	}
 
-	/* Copy host string, so we can corrupt it */
-	strncpy(buf, host, sizeof buf);
-	buf[sizeof buf - 1] = '\0';
+	/* Strip whitespace from start of value */
+	for (; *v; v++) {
+		if (*v != ' ' && *v != '\t')
+			break;
+	}
 
-	/* Process FQDN segments backwards */
-	do {
-		part = strrchr(buf, '.');
-		if (!part) {
-			/* last segment */
-			/* Check for existing entry */
-			for (e = d->children; e; e = e->next)
-				if (strcasecmp(buf, e->part) == 0)
-					break;
+	/* Strip whitespace from end of value */
+	for (vlen = strlen(v); vlen; vlen--) {
+		if (v[vlen] == ' ' || v[vlen] == '\t')
+			v[vlen] = '\0';
+		else
+			break;
+	}
 
-			if (e) {
-				d = e;
-			} else {
-				d = urldb_add_host_node(buf, d);
-			}
+	if (!c->comment && strcasecmp(n, "Comment") == 0) {
+		c->comment = strdup(v);
+		if (!c->comment)
+			return false;
+	} else if (!c->domain && strcasecmp(n, "Domain") == 0) {
+		if (v[0] == '.') {
+			/* Domain must start with a dot */
+			c->domain_from_set = true;
+			c->domain = strdup(v);
+			if (!c->domain)
+				return false;
+		}
+	} else if (strcasecmp(n, "Max-Age") == 0) {
+		int temp = atoi(v);
+		if (temp == 0)
+			/* Special case - 0 means delete */
+			c->expires = 0;
+		else
+			c->expires = time(NULL) + temp;
+	} else if (!c->path && strcasecmp(n, "Path") == 0) {
+		c->path_from_set = true;
+		c->path = strdup(v);
+		if (!c->path)
+			return false;
+	} else if (strcasecmp(n, "Version") == 0) {
+		c->version = atoi(v);
+	} else if (strcasecmp(n, "Expires") == 0) {
+		char *datenoday;
+		time_t expires;
 
-			/* And insert into search tree */
-			if (d) {
-				struct search_node **r;
+		/* Strip dayname from date (these are hugely
+		 * variable and liable to break the parser.
+		 * They also serve no useful purpose) */
+		for (datenoday = v; *datenoday && !isdigit(*datenoday);
+				datenoday++)
+			; /* do nothing */
 
-				r = urldb_get_search_tree_direct(buf);
-				s = urldb_search_insert(*r, d);
-				if (!s) {
-					/* failed */
-					d = NULL;
-				} else {
-					*r = s;
-				}
-			}
-			break;
+		expires = curl_getdate(datenoday, NULL);
+		if (expires == -1) {
+			/* assume we have an unrepresentable
+			 * date => force it to the maximum
+			 * possible value of a 32bit time_t
+			 * (this may break in 2038. We'll
+			 * deal with that once we come to
+			 * it) */
+			expires = (time_t)0x7fffffff;
 		}
+		c->expires = expires;
+	} else if (strcasecmp(n, "Secure") == 0) {
+		c->secure = true;
+	} else if (strcasecmp(n, "HttpOnly") == 0) {
+		c->http_only = true;
+	} else if (!c->name) {
+		c->name = strdup(n);
+		c->value = strdup(v);
+		c->value_was_quoted = was_quoted;
+		if (!c->name || !c->value)
+			return false;
+	}
 
-		/* Check for existing entry */
-		for (e = d->children; e; e = e->next)
-			if (strcasecmp(part + 1, e->part) == 0)
-				break;
+	return true;
+}
 
-		d = e ? e : urldb_add_host_node(part + 1, d);
-		if (!d)
-			break;
 
-		*part = '\0';
-	} while (1);
+/**
+ * Free a cookie
+ *
+ * \param c The cookie to free
+ */
+static void urldb_free_cookie(struct cookie_internal_data *c)
+{
+	assert(c);
 
-	return d;
+	free(c->comment);
+	free(c->domain);
+	free(c->path);
+	free(c->name);
+	free(c->value);
+	free(c);
 }
 
+
 /**
- * Add a path node to the tree
+ * Parse a cookie
  *
- * \param scheme URL scheme associated with path (copied)
- * \param port Port number on host associated with path
- * \param segment Path segment to add (copied)
- * \param fragment URL fragment (copied), or NULL
- * \param parent Parent node to add to
- * \return Pointer to added node, or NULL on memory exhaustion
+ * \param url URL being fetched
+ * \param cookie Pointer to cookie string (updated on exit)
+ * \return Pointer to cookie structure (on heap, caller frees) or NULL
  */
-struct path_data *urldb_add_path_node(lwc_string *scheme, unsigned int port,
-		const char *segment, lwc_string *fragment,
-		struct path_data *parent)
+static struct cookie_internal_data *
+urldb_parse_cookie(nsurl *url, const char **cookie)
 {
-	struct path_data *d, *e;
+	struct cookie_internal_data *c;
+	const char *cur;
+	char name[1024], value[4096];
+	char *n = name, *v = value;
+	bool in_value = false;
+	bool had_value_data = false;
+	bool value_verbatim = false;
+	bool quoted = false;
+	bool was_quoted = false;
 
-	assert(scheme && segment && parent);
+	assert(url && cookie && *cookie);
 
-	d = calloc(1, sizeof(struct path_data));
-	if (!d)
+	c = calloc(1, sizeof(struct cookie_internal_data));
+	if (c == NULL)
 		return NULL;
 
-	d->scheme = lwc_string_ref(scheme);
+	c->expires = -1;
 
-	d->port = port;
+	name[0] = '\0';
+	value[0] = '\0';
 
-	d->segment = strdup(segment);
-	if (!d->segment) {
-		lwc_string_unref(d->scheme);
-		free(d);
-		return NULL;
-	}
+	for (cur = *cookie; *cur; cur++) {
+		if (*cur == '\r' && *(cur + 1) == '\n') {
+			/* End of header */
+			if (quoted) {
+				/* Unmatched quote encountered */
 
-	if (fragment) {
-		if (!urldb_add_path_fragment(d, fragment)) {
-			free(d->segment);
-			lwc_string_unref(d->scheme);
-			free(d);
-			return NULL;
-		}
-	}
+				/* Match Firefox 2.0.0.11 */
+				value[0] = '\0';
 
-	for (e = parent->children; e; e = e->next)
-		if (strcmp(e->segment, d->segment) > 0)
-			break;
+			}
 
-	if (e) {
-		d->prev = e->prev;
-		d->next = e;
-		if (e->prev)
-			e->prev->next = d;
-		else
-			parent->children = d;
-		e->prev = d;
-	} else if (!parent->children) {
-		d->prev = d->next = NULL;
-		parent->children = parent->last = d;
-	} else {
-		d->next = NULL;
-		d->prev = parent->last;
-		parent->last->next = d;
-		parent->last = d;
-	}
-	d->parent = parent;
+			break;
+		} else if (*cur == '\r') {
+			/* Spurious linefeed */
+			continue;
+		} else if (*cur == '\n') {
+			/* Spurious newline */
+			continue;
+		}
 
-	return d;
-}
+		if (in_value && !had_value_data) {
+			if (*cur == ' ' || *cur == '\t') {
+				/* Strip leading whitespace from value */
+				continue;
+			} else {
+				had_value_data = true;
 
-/**
- * Add a path to the database, creating any intermediate entries
- *
- * \param scheme URL scheme associated with path
- * \param port Port number on host associated with path
- * \param host Host tree node to attach to
- * \param path_query Absolute path plus query to add (freed)
- * \param fragment URL fragment, or NULL
- * \param url URL (fragment ignored)
- * \return Pointer to leaf node, or NULL on memory exhaustion
- */
-struct path_data *urldb_add_path(lwc_string *scheme, unsigned int port,
-		const struct host_part *host, char *path_query,
-		lwc_string *fragment, nsurl *url)
-{
-	struct path_data *d, *e;
-	char *buf = path_query;
-	char *segment, *slash;
-	bool match;
+				/* Value is taken verbatim if first non-space
+				 * character is not a " */
+				if (*cur != '"') {
+					value_verbatim = true;
+				}
+			}
+		}
 
-	assert(scheme && host && url);
+		if (in_value && !value_verbatim && (*cur == '"')) {
+			/* Only non-verbatim values may be quoted */
+			if (cur == *cookie || *(cur - 1) != '\\') {
+				/* Only unescaped quotes count */
+				was_quoted = quoted;
+				quoted = !quoted;
 
-	d = (struct path_data *) &host->paths;
+				continue;
+			}
+		}
 
-	/* skip leading '/' */
-	segment = buf;
-	if (*segment == '/')
-		segment++;
+		if (!quoted && !in_value && *cur == '=') {
+			/* First equals => attr-value separator */
+			in_value = true;
+			continue;
+		}
 
-	/* Process path segments */
-	do {
-		slash = strchr(segment, '/');
-		if (!slash) {
-			/* last segment */
-			/* look for existing entry */
-			for (e = d->children; e; e = e->next)
-				if (strcmp(segment, e->segment) == 0 &&
-						lwc_string_isequal(scheme,
-						e->scheme, &match) ==
-						lwc_error_ok &&
-						match == true &&
-						e->port == port)
-					break;
+		if (!quoted && (was_quoted || *cur == ';')) {
+			/* Semicolon or after quoted value
+			 * => end of current avpair */
 
-			d = e ? urldb_add_path_fragment(e, fragment) :
-					urldb_add_path_node(scheme, port,
-					segment, fragment, d);
-			break;
-		}
+			/* NUL-terminate tokens */
+			*n = '\0';
+			*v = '\0';
 
-		*slash = '\0';
+			if (!urldb_parse_avpair(c, name, value, was_quoted)) {
+				/* Memory exhausted */
+				urldb_free_cookie(c);
+				return NULL;
+			}
 
-		/* look for existing entry */
-		for (e = d->children; e; e = e->next)
-			if (strcmp(segment, e->segment) == 0 &&
-					lwc_string_isequal(scheme, e->scheme,
-						&match) == lwc_error_ok &&
-						match == true &&
-					e->port == port)
+			/* And reset to start */
+			n = name;
+			v = value;
+			in_value = false;
+			had_value_data = false;
+			value_verbatim = false;
+			was_quoted = false;
+
+			/* Now, if the current input is anything other than a
+			 * semicolon, we must be sure to reprocess it */
+			if (*cur != ';') {
+				cur--;
+			}
+
+			continue;
+		}
+
+		/* And now handle commas. These are a pain as they may mean
+		 * any of the following:
+		 *
+		 * + End of cookie
+		 * + Day separator in Expires avpair
+		 * + (Invalid) comma in unquoted value
+		 *
+		 * Therefore, in order to handle all 3 cases (2 and 3 are
+		 * identical, the difference being that 2 is in the spec and
+		 * 3 isn't), we need to determine where the comma actually
+		 * lies. We use the following heuristic:
+		 *
+		 *   Given a comma at the current input position, find the
+		 *   immediately following semicolon (or end of input if none
+		 *   found). Then, consider the input characters between
+		 *   these two positions. If any of these characters is an
+		 *   '=', we must assume that the comma signified the end of
+		 *   the current cookie.
+		 *
+		 * This holds as the first avpair of any cookie must be
+		 * NAME=VALUE, so the '=' is guaranteed to appear in the
+		 * case where the comma marks the end of a cookie.
+		 *
+		 * This will fail, however, in the case where '=' appears in
+		 * the value of the current avpair after the comma or the
+		 * subsequent cookie does not start with NAME=VALUE. Neither
+		 * of these is particularly likely and if they do occur, the
+		 * website is more broken than we can be bothered to handle.
+		 */
+		if (!quoted && *cur == ',') {
+			/* Find semi-colon, if any */
+			const char *p;
+			const char *semi = strchr(cur + 1, ';');
+			if (!semi)
+				semi = cur + strlen(cur) - 2 /* CRLF */;
+
+			/* Look for equals sign between comma and semi */
+			for (p = cur + 1; p < semi; p++)
+				if (*p == '=')
+					break;
+
+			if (p == semi) {
+				/* none found => comma internal to value */
+				/* do nothing */
+			} else {
+				/* found one => comma marks end of cookie */
+				cur++;
 				break;
+			}
+		}
 
-		d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d);
-		if (!d)
-			break;
+		/* Accumulate into buffers, always leaving space for a NUL */
+		/** \todo is silently truncating overlong names/values wise? */
+		if (!in_value) {
+			if (n < name + (sizeof(name) - 1))
+				*n++ = *cur;
+		} else {
+			if (v < value + (sizeof(value) - 1))
+				*v++ = *cur;
+		}
+	}
 
-		segment = slash + 1;
-	} while (1);
+	/* Parse final avpair */
+	*n = '\0';
+	*v = '\0';
 
-	free(path_query);
+	if (!urldb_parse_avpair(c, name, value, was_quoted)) {
+		/* Memory exhausted */
+		urldb_free_cookie(c);
+		return NULL;
+	}
 
-	if (d && !d->url) {
-		/* Insert defragmented URL */
-		if (nsurl_defragment(url, &d->url) != NSERROR_OK)
+	/* Now fix-up default values */
+	if (c->domain == NULL) {
+		lwc_string *host = nsurl_get_component(url, NSURL_HOST);
+		if (host == NULL) {
+			urldb_free_cookie(c);
 			return NULL;
+		}
+		c->domain = strdup(lwc_string_data(host));
+		lwc_string_unref(host);
 	}
 
-	return d;
-}
-
-/**
- * Fragment comparator callback for qsort
- */
-int urldb_add_path_fragment_cmp(const void *a, const void *b)
-{
-	return strcasecmp(*((const char **) a), *((const char **) b));
-}
+	if (c->path == NULL) {
+		const char *path_data;
+		char *path, *slash;
+		lwc_string *path_lwc;
 
-/**
- * Add a fragment to a path segment
- *
- * \param segment Path segment to add to
- * \param fragment Fragment to add (copied), or NULL
- * \return segment or NULL on memory exhaustion
- */
-struct path_data *urldb_add_path_fragment(struct path_data *segment,
-		lwc_string *fragment)
-{
-	char **temp;
+		path_lwc = nsurl_get_component(url, NSURL_PATH);
+		if (path_lwc == NULL) {
+			urldb_free_cookie(c);
+			return NULL;
+		}
+		path_data = lwc_string_data(path_lwc);
 
-	assert(segment);
+		/* Strip leafname and trailing slash (4.3.1) */
+		slash = strrchr(path_data, '/');
+		if (slash != NULL) {
+			/* Special case: retain first slash in path */
+			if (slash == path_data)
+				slash++;
 
-	/* If no fragment, this function is a NOP
-	 * This may seem strange, but it makes the rest
-	 * of the code cleaner */
-	if (!fragment)
-		return segment;
+			slash = strndup(path_data, slash - path_data);
+			if (slash == NULL) {
+				lwc_string_unref(path_lwc);
+				urldb_free_cookie(c);
+				return NULL;
+			}
 
-	temp = realloc(segment->fragment,
-			(segment->frag_cnt + 1) * sizeof(char *));
-	if (!temp)
-		return NULL;
+			path = slash;
+			lwc_string_unref(path_lwc);
+		} else {
+			path = strdup(lwc_string_data(path_lwc));
+			lwc_string_unref(path_lwc);
+			if (path == NULL) {
+				urldb_free_cookie(c);
+				return NULL;
+			}
+		}
 
-	segment->fragment = temp;
-	segment->fragment[segment->frag_cnt] =
-			strdup(lwc_string_data(fragment));
-	if (!segment->fragment[segment->frag_cnt]) {
-		/* Don't free temp - it's now our buffer */
-		return NULL;
+		c->path = path;
 	}
 
-	segment->frag_cnt++;
-
-	/* We want fragments in alphabetical order, so sort them
-	 * It may prove better to insert in alphabetical order instead */
-	qsort(segment->fragment, segment->frag_cnt, sizeof (char *),
-			urldb_add_path_fragment_cmp);
+	/* Write back current position */
+	*cookie = cur;
 
-	return segment;
+	return c;
 }
 
+
 /**
- * Find an URL in the database
+ * Insert a cookie into the database
  *
- * \param url Absolute URL to find
- * \return Pointer to path data, or NULL if not found
+ * \param c The cookie to insert
+ * \param scheme URL scheme associated with cookie path
+ * \param url URL (sans fragment) associated with cookie
+ * \return true on success, false on memory exhaustion (c will be freed)
  */
-struct path_data *urldb_find_url(nsurl *url)
+static bool urldb_insert_cookie(struct cookie_internal_data *c,
+				lwc_string *scheme, nsurl *url)
 {
+	struct cookie_internal_data *d;
 	const struct host_part *h;
 	struct path_data *p;
-	struct search_node *tree;
-	char *plq;
-	const char *host_str;
-	lwc_string *scheme, *host, *port;
-	size_t len = 0;
-	unsigned int port_int;
-	bool match;
+	time_t now = time(NULL);
 
-	assert(url);
-        
-	if (url_bloom != NULL) {
-		if (bloom_search_hash(url_bloom,
-					nsurl_hash(url)) == false) {
-					return NULL;
+	assert(c);
+
+	if (c->domain[0] == '.') {
+		h = urldb_search_find(
+			urldb_get_search_tree(&(c->domain[1])),
+			c->domain + 1);
+		if (!h) {
+			h = urldb_add_host(c->domain + 1);
+			if (!h) {
+				urldb_free_cookie(c);
+				return false;
+			}
 		}
-	}
 
-	scheme = nsurl_get_component(url, NSURL_SCHEME);
-	if (scheme == NULL)
-		return NULL;
+		p = (struct path_data *) &h->paths;
+	} else {
+		/* Need to have a URL and scheme, if it's not a domain cookie */
+		assert(url != NULL);
+		assert(scheme != NULL);
 
-	host = nsurl_get_component(url, NSURL_HOST);
-	if (host != NULL) {
-		host_str = lwc_string_data(host);
-		lwc_string_unref(host);
+		h = urldb_search_find(
+				urldb_get_search_tree(c->domain),
+				c->domain);
 
-	} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
-			lwc_error_ok && match == true) {
-		host_str = "localhost";
+		if (!h) {
+			h = urldb_add_host(c->domain);
+			if (!h) {
+				urldb_free_cookie(c);
+				return false;
+			}
+		}
 
-	} else {
-		lwc_string_unref(scheme);
-		return NULL;
+		/* find path */
+		p = urldb_add_path(scheme, 0, h,
+				strdup(c->path), NULL, url);
+		if (!p) {
+			urldb_free_cookie(c);
+			return false;
+		}
 	}
 
-	tree = urldb_get_search_tree(host_str);
-	h = urldb_search_find(tree, host_str);
-	if (!h) {
-		lwc_string_unref(scheme);
-		return NULL;
+	/* add cookie */
+	for (d = p->cookies; d; d = d->next) {
+		if (!strcmp(d->domain, c->domain) &&
+				!strcmp(d->path, c->path) &&
+				!strcmp(d->name, c->name))
+			break;
 	}
 
-	/* generate plq (path, leaf, query) */
-	if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) !=
-			NSERROR_OK) {
-		lwc_string_unref(scheme);
-		return NULL;
-	}
+	if (d) {
+		if (c->expires != -1 && c->expires < now) {
+			/* remove cookie */
+			if (d->next)
+				d->next->prev = d->prev;
+			else
+				p->cookies_end = d->prev;
+			if (d->prev)
+				d->prev->next = d->next;
+			else
+				p->cookies = d->next;
 
-	/* Get port */
-	port = nsurl_get_component(url, NSURL_PORT);
-	if (port != NULL) {
-		port_int = atoi(lwc_string_data(port));
-		lwc_string_unref(port);
-	} else {
-		port_int = 0;
-	}
+			cookie_manager_remove((struct cookie_data *)d);
 
-	p = urldb_match_path(&h->paths, plq, scheme, port_int);
+			urldb_free_cookie(d);
+			urldb_free_cookie(c);
+		} else {
+			/* replace d with c */
+			c->prev = d->prev;
+			c->next = d->next;
+			if (c->next)
+				c->next->prev = c;
+			else
+				p->cookies_end = c;
+			if (c->prev)
+				c->prev->next = c;
+			else
+				p->cookies = c;
 
-	free(plq);
-	lwc_string_unref(scheme);
+			cookie_manager_remove((struct cookie_data *)d);
+			urldb_free_cookie(d);
 
-	return p;
+			cookie_manager_add((struct cookie_data *)c);
+		}
+	} else {
+		c->prev = p->cookies_end;
+		c->next = NULL;
+		if (p->cookies_end)
+			p->cookies_end->next = c;
+		else
+			p->cookies = c;
+		p->cookies_end = c;
+
+		cookie_manager_add((struct cookie_data *)c);
+	}
+
+	return true;
 }
 
+
 /**
- * Match a path string
+ * Concatenate a cookie into the provided buffer
  *
- * \param parent Path (sub)tree to look in
- * \param path The path to search for
- * \param scheme The URL scheme associated with the path
- * \param port The port associated with the path
- * \return Pointer to path data or NULL if not found.
+ * \param c Cookie to concatenate
+ * \param version The version of the cookie string to output
+ * \param used Pointer to amount of buffer used (updated)
+ * \param alloc Pointer to allocated size of buffer (updated)
+ * \param buf Pointer to Pointer to buffer (updated)
+ * \return true on success, false on memory exhaustion
  */
-struct path_data *urldb_match_path(const struct path_data *parent,
-		const char *path, lwc_string *scheme, unsigned short port)
+static bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
+		int *used, int *alloc, char **buf)
 {
-	const struct path_data *p;
-	const char *slash;
-	bool match;
+	/* Combined (A)BNF for the Cookie: request header:
+	 *
+	 * CHAR           = <any US-ASCII character (octets 0 - 127)>
+	 * CTL            = <any US-ASCII control character
+	 *                  (octets 0 - 31) and DEL (127)>
+	 * CR             = <US-ASCII CR, carriage return (13)>
+	 * LF             = <US-ASCII LF, linefeed (10)>
+	 * SP             = <US-ASCII SP, space (32)>
+	 * HT             = <US-ASCII HT, horizontal-tab (9)>
+	 * <">            = <US-ASCII double-quote mark (34)>
+	 *
+	 * CRLF           = CR LF
+	 *
+	 * LWS            = [CRLF] 1*( SP | HT )
+	 *
+	 * TEXT           = <any OCTET except CTLs,
+	 *                  but including LWS>
+	 *
+	 * token          = 1*<any CHAR except CTLs or separators>
+	 * separators     = "(" | ")" | "<" | ">" | "@"
+	 *                | "," | ";" | ":" | "\" | <">
+	 *                | "/" | "[" | "]" | "?" | "="
+	 *                | "{" | "}" | SP | HT
+	 *
+	 * quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
+	 * qdtext         = <any TEXT except <">>
+	 * quoted-pair    = "\" CHAR
+	 *
+	 * attr            =       token
+	 * value           =       word
+	 * word            =       token | quoted-string
+	 *
+	 * cookie          =       "Cookie:" cookie-version
+	 *                         1*((";" | ",") cookie-value)
+	 * cookie-value    =       NAME "=" VALUE [";" path] [";" domain]
+	 * cookie-version  =       "$Version" "=" value
+	 * NAME            =       attr
+	 * VALUE           =       value
+	 * path            =       "$Path" "=" value
+	 * domain          =       "$Domain" "=" value
+	 *
+	 * A note on quoted-string handling:
+	 *   The cookie data stored in the db is verbatim (i.e. sans enclosing
+	 *   <">, if any, and with all quoted-pairs intact) thus all that we
+	 *   need to do here is ensure that value strings which were quoted
+	 *   in Set-Cookie or which include any of the separators are quoted
+	 *   before use.
+	 *
+	 * A note on cookie-value separation:
+	 *   We use semicolons for all separators, including between
+	 *   cookie-values. This simplifies things and is backwards compatible.
+	 */
+	const char * const separators = "()<>@,;:\\\"/[]?={} \t";
 
-	assert(parent != NULL);
-	assert(parent->segment == NULL);
-	assert(path[0] == '/');
+	int max_len;
 
-	/* Start with children, as parent has no segment */
-	p = parent->children;
+	assert(c && used && alloc && buf && *buf);
 
-	while (p != NULL) {
-		slash = strchr(path + 1, '/');
-		if (!slash)
-			slash = path + strlen(path);
+	/* "; " cookie-value
+	 * We allow for the possibility that values are quoted
+	 */
+	max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 +
+			(c->path_from_set ?
+				8 + strlen(c->path) + 2 : 0) +
+			(c->domain_from_set ?
+				10 + strlen(c->domain) + 2 : 0);
 
-		if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
-				lwc_string_isequal(p->scheme, scheme, &match) ==
-						lwc_error_ok &&
-				match == true &&
-				p->port == port) {
-			if (*slash == '\0') {
-				/* Complete match */
-				return (struct path_data *) p;
-			}
+	if (*used + max_len >= *alloc) {
+		char *temp = realloc(*buf, *alloc + 4096);
+		if (!temp) {
+			return false;
+		}
+		*buf = temp;
+		*alloc += 4096;
+	}
 
-			/* Match so far, go down tree */
-			p = p->children;
+	if (version == COOKIE_NETSCAPE) {
+		/* Original Netscape cookie */
+		sprintf(*buf + *used - 1, "; %s=", c->name);
+		*used += 2 + strlen(c->name) + 1;
 
-			path = slash;
+		/* The Netscape spec doesn't mention quoting of cookie values.
+		 * RFC 2109 $10.1.3 indicates that values must not be quoted.
+		 *
+		 * However, other browsers preserve quoting, so we should, too
+		 */
+		if (c->value_was_quoted) {
+			sprintf(*buf + *used - 1, "\"%s\"", c->value);
+			*used += 1 + strlen(c->value) + 1;
 		} else {
-			/* No match, try next sibling */
-			p = p->next;
+			/** \todo should we %XX-encode [;HT,SP] ? */
+			/** \todo Should we strip escaping backslashes? */
+			sprintf(*buf + *used - 1, "%s", c->value);
+			*used += strlen(c->value);
+		}
+
+		/* We don't send path/domain information -- that's what the
+		 * Netscape spec suggests we should do, anyway. */
+	} else {
+		/* RFC2109 or RFC2965 cookie */
+		sprintf(*buf + *used - 1, "; %s=", c->name);
+		*used += 2 + strlen(c->name) + 1;
+
+		/* Value needs quoting if it contains any separator or if
+		 * it needs preserving from the Set-Cookie header */
+		if (c->value_was_quoted ||
+				strpbrk(c->value, separators) != NULL) {
+			sprintf(*buf + *used - 1, "\"%s\"", c->value);
+			*used += 1 + strlen(c->value) + 1;
+		} else {
+			sprintf(*buf + *used - 1, "%s", c->value);
+			*used += strlen(c->value);
+		}
+
+		if (c->path_from_set) {
+			/* Path, quoted if necessary */
+			sprintf(*buf + *used - 1, "; $Path=");
+			*used += 8;
+
+			if (strpbrk(c->path, separators) != NULL) {
+				sprintf(*buf + *used - 1, "\"%s\"", c->path);
+				*used += 1 + strlen(c->path) + 1;
+			} else {
+				sprintf(*buf + *used - 1, "%s", c->path);
+				*used += strlen(c->path);
+			}
+		}
+
+		if (c->domain_from_set) {
+			/* Domain, quoted if necessary */
+			sprintf(*buf + *used - 1, "; $Domain=");
+			*used += 10;
+
+			if (strpbrk(c->domain, separators) != NULL) {
+				sprintf(*buf + *used - 1, "\"%s\"", c->domain);
+				*used += 1 + strlen(c->domain) + 1;
+			} else {
+				sprintf(*buf + *used - 1, "%s", c->domain);
+				*used += strlen(c->domain);
+			}
 		}
 	}
 
-	return NULL;
+	return true;
 }
 
+
 /**
- * Get the search tree for a particular host
- *
- * \param host  the host to lookup
- * \return the corresponding search tree
+ * deletes paths from a cookie.
  */
-struct search_node **urldb_get_search_tree_direct(const char *host) {
-	assert(host);
+static void urldb_delete_cookie_paths(const char *domain, const char *path,
+		const char *name, struct path_data *parent)
+{
+	struct cookie_internal_data *c;
+	struct path_data *p = parent;
 
-	if (urldb__host_is_ip_address(host))
-		return &search_trees[ST_IP];
-	else if (isalpha(*host))
-		return &search_trees[ST_DN + tolower(*host) - 'a'];
-	return &search_trees[ST_EE];
-}
+	assert(parent);
 
-/**
- * Get the search tree for a particular host
- *
- * \param host  the host to lookup
- * \return the corresponding search tree
- */
-struct search_node *urldb_get_search_tree(const char *host) {
-  	return *urldb_get_search_tree_direct(host);
-}
+	do {
+		for (c = p->cookies; c; c = c->next) {
+			if (strcmp(c->domain, domain) == 0 &&
+					strcmp(c->path, path) == 0 &&
+					strcmp(c->name, name) == 0) {
+				if (c->prev)
+					c->prev->next = c->next;
+				else
+					p->cookies = c->next;
 
-/**
- * Dump URL database to stderr
- */
-void urldb_dump(void)
-{
-	int i;
+				if (c->next)
+					c->next->prev = c->prev;
+				else
+					p->cookies_end = c->prev;
 
-	urldb_dump_hosts(&db_root);
+				urldb_free_cookie(c);
 
-	for (i = 0; i != NUM_SEARCH_TREES; i++)
-		urldb_dump_search(search_trees[i], 0);
+				return;
+			}
+		}
+
+		if (p->children) {
+			p = p->children;
+		} else {
+			while (p != parent) {
+				if (p->next != NULL) {
+					p = p->next;
+					break;
+				}
+
+				p = p->parent;
+			}
+		}
+	} while (p != parent);
 }
 
+
 /**
- * Dump URL database hosts to stderr
- *
- * \param parent Parent node of tree to dump
+ * Deletes cookie hosts and their assoicated paths
  */
-void urldb_dump_hosts(struct host_part *parent)
+static void urldb_delete_cookie_hosts(const char *domain, const char *path,
+		const char *name, struct host_part *parent)
 {
 	struct host_part *h;
+	assert(parent);
 
-	if (parent->part) {
-		LOG(("%s", parent->part));
-
-		LOG(("\t%s invalid SSL certs",
-			parent->permit_invalid_certs ? "Permits" : "Denies"));
-	}
-
-	/* Dump path data */
-	urldb_dump_paths(&parent->paths);
+	urldb_delete_cookie_paths(domain, path, name, &parent->paths);
 
-	/* and recurse */
 	for (h = parent->children; h; h = h->next)
-		urldb_dump_hosts(h);
+		urldb_delete_cookie_hosts(domain, path, name, h);
 }
 
+
 /**
- * Dump URL database paths to stderr
+ * Save a path subtree's cookies
  *
- * \param parent Parent node of tree to dump
+ * \param fp File pointer to write to
+ * \param parent Parent path
  */
-void urldb_dump_paths(struct path_data *parent)
+static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
 {
-	const struct path_data *p = parent;
-	unsigned int i;
+	struct path_data *p = parent;
+	time_t now = time(NULL);
+
+	assert(fp && parent);
 
 	do {
-		if (p->segment != NULL) {
-			LOG(("\t%s : %u", lwc_string_data(p->scheme), p->port));
+		if (p->cookies != NULL) {
+			struct cookie_internal_data *c;
 
-			LOG(("\t\t'%s'", p->segment));
+			for (c = p->cookies; c != NULL; c = c->next) {
+				if (c->expires == -1 || c->expires < now)
+					/* Skip expired & session cookies */
+					continue;
 
-			for (i = 0; i != p->frag_cnt; i++)
-				LOG(("\t\t\t#%s", p->fragment[i]));
+				fprintf(fp,
+					"%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t"
+					"%s\t%s\t%d\t%s\t%s\t%s\n",
+					c->version, c->domain,
+					c->domain_from_set, c->path,
+					c->path_from_set, c->secure,
+					c->http_only,
+					(int)c->expires, (int)c->last_used,
+					c->no_destroy, c->name, c->value,
+					c->value_was_quoted,
+					p->scheme ? lwc_string_data(p->scheme) :
+							"unused",
+					p->url ? nsurl_access(p->url) :
+							"unused",
+					c->comment ? c->comment : "");
+			}
 		}
 
 		if (p->children != NULL) {
@@ -2185,1539 +2273,1447 @@ void urldb_dump_paths(struct path_data *parent)
 	} while (p != parent);
 }
 
+
 /**
- * Dump search tree
+ * Save a host subtree's cookies
  *
- * \param parent Parent node of tree to dump
- * \param depth Tree depth
+ * \param fp File pointer to write to
+ * \param parent Parent host
  */
-void urldb_dump_search(struct search_node *parent, int depth)
+static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)
 {
-	const struct host_part *h;
-	int i;
-
-	if (parent == &empty)
-		return;
-
-	urldb_dump_search(parent->left, depth + 1);
-
-	for (i = 0; i != depth; i++)
-			fputc(' ', stderr);
-
-	for (h = parent->data; h; h = h->parent) {
-		if (h->part)
-			fprintf(stderr, "%s", h->part);
-
-		if (h->parent && h->parent->parent)
-			fputc('.', stderr);
-	}
+	struct host_part *h;
+	assert(fp && parent);
 
-	fputc('\n', stderr);
+	urldb_save_cookie_paths(fp, &parent->paths);
 
-	urldb_dump_search(parent->right, depth + 1);
+	for (h = parent->children; h; h = h->next)
+		urldb_save_cookie_hosts(fp, h);
 }
 
+
 /**
- * Insert a node into the search tree
+ * Destroy a cookie node
  *
- * \param root Root of tree to insert into
- * \param data User data to insert
- * \return Pointer to updated root, or NULL if failed
+ * \param c Cookie to destroy
  */
-struct search_node *urldb_search_insert(struct search_node *root,
-		const struct host_part *data)
+static void urldb_destroy_cookie(struct cookie_internal_data *c)
 {
-	struct search_node *n;
-
-	assert(root && data);
-
-	n = malloc(sizeof(struct search_node));
-	if (!n)
-		return NULL;
-
-	n->level = 1;
-	n->data = data;
-	n->left = n->right = &empty;
-
-	root = urldb_search_insert_internal(root, n);
+	free(c->name);
+	free(c->value);
+	free(c->comment);
+	free(c->domain);
+	free(c->path);
 
-	return root;
+	free(c);
 }
 
+
 /**
- * Insert node into search tree
+ * Destroy the contents of a path node
  *
- * \param root Root of (sub)tree to insert into
- * \param n Node to insert
- * \return Pointer to updated root
+ * \param node Node to destroy contents of (does not destroy node)
  */
-struct search_node *urldb_search_insert_internal(struct search_node *root,
-		struct search_node *n)
+static void urldb_destroy_path_node_content(struct path_data *node)
 {
-	assert(root && n);
+	struct cookie_internal_data *a, *b;
+	unsigned int i;
 
-	if (root == &empty) {
-		root = n;
-	} else {
-		int c = urldb_search_match_host(root->data, n->data);
+	if (node->url != NULL)
+		nsurl_unref(node->url);
 
-		if (c > 0) {
-			root->left = urldb_search_insert_internal(
-					root->left, n);
-		} else if (c < 0) {
-			root->right = urldb_search_insert_internal(
-					root->right, n);
-		} else {
-			/* exact match */
-			free(n);
-			return root;
-		}
+	if (node->scheme != NULL)
+		lwc_string_unref(node->scheme);
 
-		root = urldb_search_skew(root);
-		root = urldb_search_split(root);
-	}
+	free(node->segment);
+	for (i = 0; i < node->frag_cnt; i++)
+		free(node->fragment[i]);
+	free(node->fragment);
 
-	return root;
+	if (node->thumb)
+		bitmap_destroy(node->thumb);
+
+	free(node->urld.title);
+
+	for (a = node->cookies; a; a = b) {
+		b = a->next;
+		urldb_destroy_cookie(a);
+	}
 }
 
+
 /**
- * Find a node in a search tree
+ * Destroy protection space data
  *
- * \param root Tree to look in
- * \param host Host to find
- * \return Pointer to host tree node, or NULL if not found
+ * \param space Protection space to destroy
  */
-const struct host_part *urldb_search_find(struct search_node *root,
-		const char *host)
+static void urldb_destroy_prot_space(struct prot_space_data *space)
 {
-	int c;
-
-	assert(root && host);
-
-	if (root == &empty) {
-		return NULL;
-	}
-
-	c = urldb_search_match_string(root->data, host);
+	lwc_string_unref(space->scheme);
+	free(space->realm);
+	free(space->auth);
 
-	if (c > 0)
-		return urldb_search_find(root->left, host);
-	else if (c < 0)
-		return urldb_search_find(root->right, host);
-	else
-		return root->data;
+	free(space);
 }
 
+
 /**
- * Compare a pair of host_parts
+ * Destroy a path tree
  *
- * \param a
- * \param b
- * \return 0 if match, non-zero, otherwise
+ * \param root Root node of tree to destroy
  */
-int urldb_search_match_host(const struct host_part *a,
-		const struct host_part *b)
+static void urldb_destroy_path_tree(struct path_data *root)
 {
-	int ret;
+	struct path_data *p = root;
 
-	assert(a && b);
+	do {
+		if (p->children != NULL) {
+			p = p->children;
+		} else {
+			struct path_data *q = p;
 
-	/* traverse up tree to root, comparing parts as we go. */
-	for (; a && a != &db_root && b && b != &db_root;
-			a = a->parent, b = b->parent)
-		if ((ret = strcasecmp(a->part, b->part)) != 0)
-			/* They differ => return the difference here */
-			return ret;
-
-	/* If we get here then either:
-	 *    a) The path lengths differ
-	 * or b) The hosts are identical
-	 */
-	if (a && a != &db_root && (!b || b == &db_root))
-		/* len(a) > len(b) */
-		return 1;
-	else if ((!a || a == &db_root) && b && b != &db_root)
-		/* len(a) < len(b) */
-		return -1;
-
-	/* identical */
-	return 0;
-}
-
-/**
- * Compare host_part with a string
- *
- * \param a
- * \param b
- * \return 0 if match, non-zero, otherwise
- */
-int urldb_search_match_string(const struct host_part *a,
-		const char *b)
-{
-	const char *end, *dot;
-	int plen, ret;
+			while (p != root) {
+				if (p->next != NULL) {
+					p = p->next;
+					break;
+				}
 
-	assert(a && a != &db_root && b);
+				p = p->parent;
 
-	if (urldb__host_is_ip_address(b)) {
-		/* IP address */
-		return strcasecmp(a->part, b);
-	}
+				urldb_destroy_path_node_content(q);
+				free(q);
 
-	end = b + strlen(b) + 1;
+				q = p;
+			}
 
-	while (b < end && a && a != &db_root) {
-		dot = strchr(b, '.');
-		if (!dot) {
-			/* last segment */
-			dot = end - 1;
+			urldb_destroy_path_node_content(q);
+			free(q);
 		}
-
-		/* Compare strings (length limited) */
-		if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
-			/* didn't match => return difference */
-			return ret;
-
-		/* The strings matched, now check that the lengths do, too */
-		plen = strlen(a->part);
-
-		if (plen > dot - b)
-			/* len(a) > len(b) */
-			return 1;
-		else if (plen < dot - b)
-			/* len(a) < len(b) */
-			return -1;
-
-		b = dot + 1;
-		a = a->parent;
-	}
-
-	/* If we get here then either:
-	 *    a) The path lengths differ
-	 * or b) The hosts are identical
-	 */
-	if (a && a != &db_root && b >= end)
-		/* len(a) > len(b) */
-		return 1;
-	else if ((!a || a == &db_root) && b < end)
-		/* len(a) < len(b) */
-		return -1;
-
-	/* Identical */
-	return 0;
+	} while (p != root);
 }
 
+
 /**
- * Compare host_part with prefix
+ * Destroy a host tree
  *
- * \param a
- * \param b
- * \return 0 if match, non-zero, otherwise
+ * \param root Root node of tree to destroy
  */
-int urldb_search_match_prefix(const struct host_part *a,
-		const char *b)
+static void urldb_destroy_host_tree(struct host_part *root)
 {
-	const char *end, *dot;
-	int plen, ret;
-
-	assert(a && a != &db_root && b);
+	struct host_part *a, *b;
+	struct path_data *p, *q;
+	struct prot_space_data *s, *t;
 
-	if (urldb__host_is_ip_address(b)) {
-		/* IP address */
-		return strncasecmp(a->part, b, strlen(b));
+	/* Destroy children */
+	for (a = root->children; a; a = b) {
+		b = a->next;
+		urldb_destroy_host_tree(a);
 	}
 
-	end = b + strlen(b) + 1;
-
-	while (b < end && a && a != &db_root) {
-		dot = strchr(b, '.');
-		if (!dot) {
-			/* last segment */
-			dot = end - 1;
-		}
-
-		/* Compare strings (length limited) */
-		if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
-			/* didn't match => return difference */
-			return ret;
+	/* Now clean up paths */
+	for (p = root->paths.children; p; p = q) {
+		q = p->next;
+		urldb_destroy_path_tree(p);
+	}
 
-		/* The strings matched */
-		if (dot < end - 1) {
-			/* Consider segment lengths only in the case
-			 * where the prefix contains segments */
-			plen = strlen(a->part);
-			if (plen > dot - b)
-				/* len(a) > len(b) */
-				return 1;
-			else if (plen < dot - b)
-				/* len(a) < len(b) */
-				return -1;
-		}
+	/* Root path */
+	urldb_destroy_path_node_content(&root->paths);
 
-		b = dot + 1;
-		a = a->parent;
+	/* Proctection space data */
+	for (s = root->prot_space; s; s = t) {
+		t = s->next;
+		urldb_destroy_prot_space(s);
 	}
 
-	/* If we get here then either:
-	 *    a) The path lengths differ
-	 * or b) The hosts are identical
-	 */
-	if (a && a != &db_root && b >= end)
-		/* len(a) > len(b) => prefix matches */
-		return 0;
-	else if ((!a || a == &db_root) && b < end)
-		/* len(a) < len(b) => prefix does not match */
-		return -1;
-
-	/* Identical */
-	return 0;
+	/* And ourselves */
+	free(root->part);
+	free(root);
 }
 
+
 /**
- * Rotate a subtree right
+ * Destroy a search tree
  *
- * \param root Root of subtree to rotate
- * \return new root of subtree
+ * \param root Root node of tree to destroy
  */
-struct search_node *urldb_search_skew(struct search_node *root)
+static void urldb_destroy_search_tree(struct search_node *root)
 {
-	struct search_node *temp;
+	/* Destroy children */
+	if (root->left != &empty)
+		urldb_destroy_search_tree(root->left);
+	if (root->right != &empty)
+		urldb_destroy_search_tree(root->right);
 
-	assert(root);
+	/* And destroy ourselves */
+	free(root);
+}
 
-	if (root->left->level == root->level) {
-		temp = root->left;
-		root->left = temp->right;
-		temp->right = root;
-		root = temp;
-	}
 
-	return root;
-}
+/*************** External interface ***************/
 
-/**
- * Rotate a node left, increasing the parent's level
- *
- * \param root Root of subtree to rotate
- * \return New root of subtree
- */
-struct search_node *urldb_search_split(struct search_node *root)
-{
-	struct search_node *temp;
 
-	assert(root);
+/* exported interface documented in content/urldb.h */
+void urldb_destroy(void)
+{
+	struct host_part *a, *b;
+	int i;
 
-	if (root->right->right->level == root->level) {
-		temp = root->right;
-		root->right = temp->left;
-		temp->left = root;
-		root = temp;
+	/* Clean up search trees */
+	for (i = 0; i < NUM_SEARCH_TREES; i++) {
+		if (search_trees[i] != &empty)
+			urldb_destroy_search_tree(search_trees[i]);
+	}
 
-		root->level++;
+	/* And database */
+	for (a = db_root.children; a; a = b) {
+		b = a->next;
+		urldb_destroy_host_tree(a);
 	}
 
-	return root;
+	/* And the bloom filter */
+	if (url_bloom != NULL)
+		bloom_destroy(url_bloom);
 }
 
-/**
- * Retrieve cookies for an URL
- *
- * \param url URL being fetched
- * \param include_http_only Whether to include HTTP(S) only cookies.
- * \return Cookies string for libcurl (on heap), or NULL on error/no cookies
- */
-char *urldb_get_cookie(nsurl *url, bool include_http_only)
+
+/* exported interface documented in content/urldb.h */
+nserror urldb_load(const char *filename)
 {
-	const struct path_data *p, *q;
-	const struct host_part *h;
-	lwc_string *path_lwc;
-	struct cookie_internal_data *c;
-	int count = 0, version = COOKIE_RFC2965;
-	struct cookie_internal_data **matched_cookies;
-	int matched_cookies_size = 20;
-	int ret_alloc = 4096, ret_used = 1;
-	const char *path;
-	char *ret;
-	lwc_string *scheme;
-	time_t now;
+#define MAXIMUM_URL_LENGTH 4096
+	char s[MAXIMUM_URL_LENGTH];
+	char host[256];
+	struct host_part *h;
+	int urls;
 	int i;
-	bool match;
+	int version;
+	int length;
+	FILE *fp;
 
-	assert(url != NULL);
+	assert(filename);
 
-	/* The URL must exist in the db in order to find relevant cookies, since
-	 * we search up the tree from the URL node, and cookies from further
-	 * up also apply. */
-	urldb_add_url(url);
+	LOG(("Loading URL file %s", filename));
 
-	p = urldb_find_url(url);
-	if (!p)
-		return NULL;
+	if (url_bloom == NULL)
+		url_bloom = bloom_create(BLOOM_SIZE);
 
-	scheme = p->scheme;
+	fp = fopen(filename, "r");
+	if (!fp) {
+		LOG(("Failed to open file '%s' for reading", filename));
+		return NSERROR_NOT_FOUND;
+	}
 
-	matched_cookies = malloc(matched_cookies_size * 
-			sizeof(struct cookie_internal_data *));
-	if (!matched_cookies)
-		return NULL;
-
-#define GROW_MATCHED_COOKIES						\
-	do {								\
-		if (count == matched_cookies_size) {			\
-			struct cookie_internal_data **temp;		\
-			temp = realloc(matched_cookies,			\
-				(matched_cookies_size + 20) *		\
-				sizeof(struct cookie_internal_data *));	\
-									\
-			if (temp == NULL) {				\
-				free(ret);				\
-				free(matched_cookies);			\
-				return NULL;				\
-			}						\
-									\
-			matched_cookies = temp;				\
-			matched_cookies_size += 20;			\
-		}							\
-	} while(0)
+	if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
+		fclose(fp);
+		return NSERROR_NEED_DATA;
+	}
 
-	ret = malloc(ret_alloc);
-	if (!ret) {
-		free(matched_cookies);
-		return NULL;
+	version = atoi(s);
+	if (version < MIN_URL_FILE_VERSION) {
+		LOG(("Unsupported URL file version."));
+		fclose(fp);
+		return NSERROR_INVALID;
+	}
+	if (version > URL_FILE_VERSION) {
+		LOG(("Unknown URL file version."));
+		fclose(fp);
+		return NSERROR_INVALID;
 	}
 
-	ret[0] = '\0';
+	while (fgets(host, sizeof host, fp)) {
+		/* get the hostname */
+		length = strlen(host) - 1;
+		host[length] = '\0';
 
-	path_lwc = nsurl_get_component(url, NSURL_PATH);
-	if (path_lwc == NULL) {
-		free(ret);
-		free(matched_cookies);
-		return NULL;
-	}
-	path = lwc_string_data(path_lwc);
-	lwc_string_unref(path_lwc);
+		/* skip data that has ended up with a host of '' */
+		if (length == 0) {
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
+			urls = atoi(s);
+			/* Eight fields/url */
+			for (i = 0; i < (8 * urls); i++) {
+				if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+					break;
+			}
+			continue;
+		}
 
-	now = time(NULL);
+		/* read number of URLs */
+		if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+			break;
+		urls = atoi(s);
 
-	if (*(p->segment) != '\0') {
-		/* Match exact path, unless directory, when prefix matching
-		 * will handle this case for us. */
-		for (q = p->parent->children; q; q = q->next) {
-			if (strcmp(q->segment, p->segment))
-				continue;
+		/* no URLs => try next host */
+		if (urls == 0) {
+			LOG(("No URLs for '%s'", host));
+			continue;
+		}
 
-			/* Consider all cookies associated with
-			 * this exact path */
-			for (c = q->cookies; c; c = c->next) {
-				if (c->expires != -1 && c->expires < now)
-					/* cookie has expired => ignore */
-					continue;
+		h = urldb_add_host(host);
+		if (!h) {
+			LOG(("Failed adding host: '%s'", host));
+			fclose(fp);
+			return NSERROR_NOMEM;
+		}
 
-				if (c->secure && lwc_string_isequal(
-							q->scheme,
-							corestring_lwc_https,
-							&match) &&
-						match == false)
-					/* secure cookie for insecure host.
-					 * ignore */
-					continue;
+		/* load the non-corrupt data */
+		for (i = 0; i < urls; i++) {
+			struct path_data *p = NULL;
+			char scheme[64], ports[10];
+			char url[64 + 3 + 256 + 6 + 4096 + 1];
+			unsigned int port;
+			bool is_file = false;
+			nsurl *nsurl;
+			lwc_string *scheme_lwc, *fragment_lwc;
+			char *path_query;
+			size_t len;
 
-				if (c->http_only && !include_http_only)
-					/* Ignore HttpOnly */
-					continue;
+			if (!fgets(scheme, sizeof scheme, fp))
+				break;
+			length = strlen(scheme) - 1;
+			scheme[length] = '\0';
 
-				matched_cookies[count++] = c;
+			if (!fgets(ports, sizeof ports, fp))
+				break;
+			length = strlen(ports) - 1;
+			ports[length] = '\0';
+			port = atoi(ports);
 
-				GROW_MATCHED_COOKIES;
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
+			length = strlen(s) - 1;
+			s[length] = '\0';
 
-				if (c->version < (unsigned int)version)
-					version = c->version;
+			if (!strcasecmp(host, "localhost") &&
+					!strcasecmp(scheme, "file"))
+				is_file = true;
 
-				c->last_used = now;
+			snprintf(url, sizeof url, "%s://%s%s%s%s",
+					scheme,
+					/* file URLs have no host */
+					(is_file ? "" : host),
+					(port ? ":" : ""),
+					(port ? ports : ""),
+					s);
 
-				cookie_manager_add((struct cookie_data *)c);
+			/* TODO: store URLs in pre-parsed state, and make
+			 *       a nsurl_load to generate the nsurl more
+			 *       swiftly.
+			 *       Need a nsurl_save too.
+			 */
+			if (nsurl_create(url, &nsurl) != NSERROR_OK) {
+				LOG(("Failed inserting '%s'", url));
+				fclose(fp);
+				return NSERROR_NOMEM;
 			}
-		}
-	}
 
-	/* Now consider cookies whose paths prefix-match ours */
-	for (p = p->parent; p; p = p->parent) {
-		/* Find directory's path entry(ies) */
-		/* There are potentially multiple due to differing schemes */
-		for (q = p->children; q; q = q->next) {
-			if (*(q->segment) != '\0')
-				continue;
+			if (url_bloom != NULL) {
+				uint32_t hash = nsurl_hash(nsurl);
+				bloom_insert_hash(url_bloom, hash);
+			}
 
-			for (c = q->cookies; c; c = c->next) {
-				if (c->expires != -1 && c->expires < now)
-					/* cookie has expired => ignore */
-					continue;
+			/* Copy and merge path/query strings */
+			if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY,
+					&path_query, &len) != NSERROR_OK) {
+				LOG(("Failed inserting '%s'", url));
+				fclose(fp);
+				return NSERROR_NOMEM;
+			}
 
-				if (c->secure && lwc_string_isequal(
-							q->scheme,
-							corestring_lwc_https,
-							&match) &&
-						match == false)
-					/* Secure cookie for insecure server
-					 * => ignore */
-					continue;
+			scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME);
+			fragment_lwc = nsurl_get_component(nsurl,
+					NSURL_FRAGMENT);
+			p = urldb_add_path(scheme_lwc, port, h, path_query,
+					fragment_lwc, nsurl);
+			if (!p) {
+				LOG(("Failed inserting '%s'", url));
+				fclose(fp);
+				return NSERROR_NOMEM;
+			}
+			nsurl_unref(nsurl);
+			lwc_string_unref(scheme_lwc);
+			if (fragment_lwc != NULL)
+				lwc_string_unref(fragment_lwc);
 
-				matched_cookies[count++] = c;
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
+			if (p)
+				p->urld.visits = (unsigned int)atoi(s);
 
-				GROW_MATCHED_COOKIES;
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
+			if (p)
+				p->urld.last_visit = (time_t)atoi(s);
 
-				if (c->version < (unsigned int) version)
-					version = c->version;
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
+			if (p)
+				p->urld.type = (content_type)atoi(s);
 
-				c->last_used = now;
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
 
-				cookie_manager_add((struct cookie_data *)c);
+
+			if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+				break;
+			length = strlen(s) - 1;
+			if (p && length > 0) {
+				s[length] = '\0';
+				p->urld.title = malloc(length + 1);
+				if (p->urld.title)
+					memcpy(p->urld.title, s, length + 1);
 			}
 		}
+	}
 
-		if (!p->parent) {
-			/* No parent, so bail here. This can't go in
-			 * the loop exit condition as we also want to
-			 * process the top-level node.
-                         *
-                         * If p->parent is NULL then p->cookies are
-                         * the domain cookies and thus we don't even
-                         * try matching against them.
-                         */
-			break;
-		}
+	fclose(fp);
+	LOG(("Successfully loaded URL file"));
+#undef MAXIMUM_URL_LENGTH
 
-		/* Consider p itself - may be the result of Path=/foo */
-		for (c = p->cookies; c; c = c->next) {
-			if (c->expires != -1 && c->expires < now)
-				/* cookie has expired => ignore */
-				continue;
+	return NSERROR_OK;
+}
 
-			/* Ensure cookie path is a prefix of the resource */
-			if (strncmp(c->path, path, strlen(c->path)) != 0)
-				/* paths don't match => ignore */
-				continue;
+/* exported interface documented in content/urldb.h */
+nserror urldb_save(const char *filename)
+{
+	FILE *fp;
+	int i;
 
-			if (c->secure && lwc_string_isequal(p->scheme,
-						corestring_lwc_https,
-						&match) &&
-					match == false)
-				/* Secure cookie for insecure server
-				 * => ignore */
-				continue;
+	assert(filename);
 
-			matched_cookies[count++] = c;
+	fp = fopen(filename, "w");
+	if (!fp) {
+		LOG(("Failed to open file '%s' for writing", filename));
+		return NSERROR_SAVE_FAILED;
+	}
 
-			GROW_MATCHED_COOKIES;
+	/* file format version number */
+	fprintf(fp, "%d\n", URL_FILE_VERSION);
 
-			if (c->version < (unsigned int) version)
-				version = c->version;
+	for (i = 0; i != NUM_SEARCH_TREES; i++) {
+		urldb_save_search_tree(search_trees[i], fp);
+	}
 
-			c->last_used = now;
+	fclose(fp);
 
-			cookie_manager_add((struct cookie_data *)c);
-		}
+	return NSERROR_OK;
+}
 
-	}
 
-	/* Finally consider domain cookies for hosts which domain match ours */
-	for (h = (const struct host_part *)p; h && h != &db_root;
-			h = h->parent) {
-		for (c = h->paths.cookies; c; c = c->next) {
-			if (c->expires != -1 && c->expires < now)
-				/* cookie has expired => ignore */
-				continue;
+/* exported interface documented in content/urldb.h */
+void urldb_set_url_persistence(nsurl *url, bool persist)
+{
+	struct path_data *p;
 
-			/* Ensure cookie path is a prefix of the resource */
-			if (strncmp(c->path, path, strlen(c->path)) != 0)
-				/* paths don't match => ignore */
-				continue;
+	assert(url);
 
-			if (c->secure && lwc_string_isequal(scheme,
-						corestring_lwc_https,
-						&match) &&
-					match == false)
-				/* secure cookie for insecure host. ignore */
-				continue;
+	p = urldb_find_url(url);
+	if (!p)
+		return;
 
-			matched_cookies[count++] = c;
+	p->persistent = persist;
+}
 
-			GROW_MATCHED_COOKIES;
 
-			if (c->version < (unsigned int)version)
-				version = c->version;
+/* exported interface documented in content/urldb.h */
+bool urldb_add_url(nsurl *url)
+{
+	struct host_part *h;
+	struct path_data *p;
+	lwc_string *scheme;
+	lwc_string *port;
+	lwc_string *host;
+	lwc_string *fragment;
+	const char *host_str;
+	char *path_query = NULL;
+	size_t len;
+	bool match;
+	unsigned int port_int;
 
-			c->last_used = now;
+	assert(url);
 
-			cookie_manager_add((struct cookie_data *)c);
-		}
-	}
+	if (url_bloom == NULL)
+		url_bloom = bloom_create(BLOOM_SIZE);
 
-	if (count == 0) {
-		/* No cookies found */
-		free(ret);
-		free(matched_cookies);
-		return NULL;
+	if (url_bloom != NULL) {
+		uint32_t hash = nsurl_hash(url);
+		bloom_insert_hash(url_bloom, hash);
 	}
 
-	/* and build output string */
-	if (version > COOKIE_NETSCAPE) {
-		sprintf(ret, "$Version=%d", version);
-		ret_used = strlen(ret) + 1;
+	/* Copy and merge path/query strings */
+	if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
+			NSERROR_OK) {
+		return false;
 	}
+	assert(path_query != NULL);
 
-	for (i = 0; i < count; i++) {
-		if (!urldb_concat_cookie(matched_cookies[i], version,
-				&ret_used, &ret_alloc, &ret)) {
-			free(ret);
-			free(matched_cookies);
-			return NULL;
-		}
+	scheme = nsurl_get_component(url, NSURL_SCHEME);
+	if (scheme == NULL) {
+		free(path_query);
+		return false;
 	}
 
-	if (version == COOKIE_NETSCAPE) {
-		/* Old-style cookies => no version & skip "; " */
-		memmove(ret, ret + 2, ret_used - 2);
-		ret_used -= 2;
+	host = nsurl_get_component(url, NSURL_HOST);
+	if (host != NULL) {
+		host_str = lwc_string_data(host);
+		lwc_string_unref(host);
+
+	} else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
+			lwc_error_ok && match == true) {
+		host_str = "localhost";
+
+	} else {
+		lwc_string_unref(scheme);
+		free(path_query);
+		return false;
 	}
 
-	/* Now, shrink the output buffer to the required size */
-	{
-		char *temp = realloc(ret, ret_used);
-		if (!temp) {
-			free(ret);
-			free(matched_cookies);
-			return NULL;
-		}
+	fragment = nsurl_get_component(url, NSURL_FRAGMENT);
 
-		ret = temp;
+	port = nsurl_get_component(url, NSURL_PORT);
+	if (port != NULL) {
+		port_int = atoi(lwc_string_data(port));
+		lwc_string_unref(port);
+	} else {
+		port_int = 0;
 	}
 
-	free(matched_cookies);
+	/* Get host entry */
+	h = urldb_add_host(host_str);
 
-	return ret;
+	/* Get path entry */
+	p = (h != NULL) ? urldb_add_path(scheme, port_int, h, path_query,
+			fragment, url) : NULL;
 
-#undef GROW_MATCHED_COOKIES
+	lwc_string_unref(scheme);
+	if (fragment != NULL)
+		lwc_string_unref(fragment);
+
+	return (p != NULL);
 }
 
-/**
- * Parse Set-Cookie header and insert cookie(s) into database
- *
- * \param header Header to parse, with Set-Cookie: stripped
- * \param url URL being fetched
- * \param referer Referring resource, or 0 for verifiable transaction
- * \return true on success, false otherwise
- */
-bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
+
+/* exported interface documented in content/urldb.h */
+void urldb_set_url_title(nsurl *url, const char *title)
 {
-	const char *cur = header, *end;
-	lwc_string *path, *host, *scheme;
-	nsurl *urlt;
-	bool match;
+	struct path_data *p;
+	char *temp;
 
-	assert(url && header);
+	assert(url && title);
 
-	/* Get defragmented URL, as 'urlt' */
-	if (nsurl_defragment(url, &urlt) != NSERROR_OK)
-		return NULL;
+	p = urldb_find_url(url);
+	if (!p)
+		return;
 
-	scheme = nsurl_get_component(url, NSURL_SCHEME);
-	if (scheme == NULL) {
-		nsurl_unref(urlt);
-		return false;
-	}
+	temp = strdup(title);
+	if (!temp)
+		return;
 
-	path = nsurl_get_component(url, NSURL_PATH);
-	if (path == NULL) {
-		lwc_string_unref(scheme);
-		nsurl_unref(urlt);
-		return false;
-	}
+	free(p->urld.title);
+	p->urld.title = temp;
+}
 
-	host = nsurl_get_component(url, NSURL_HOST);
-	if (host == NULL) {
-		lwc_string_unref(path);
-		lwc_string_unref(scheme);
-		nsurl_unref(urlt);
-		return false;
-	}
 
-	if (referer) {
-		lwc_string *rhost;
+/* exported interface documented in content/urldb.h */
+void urldb_set_url_content_type(nsurl *url, content_type type)
+{
+	struct path_data *p;
 
-		/* Ensure that url's host name domain matches
-		 * referer's (4.3.5) */
-		rhost = nsurl_get_component(referer, NSURL_HOST);
-		if (rhost == NULL) {
-			goto error;
-		}
+	assert(url);
 
-		/* Domain match host names */
-		if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok &&
-				match == false) {
-			const char *hptr;
-			const char *rptr;
-			const char *dot;
-			const char *host_data = lwc_string_data(host);
-			const char *rhost_data = lwc_string_data(rhost);
+	p = urldb_find_url(url);
+	if (!p)
+		return;
 
-			/* Ensure neither host nor rhost are IP addresses */
-			if (urldb__host_is_ip_address(host_data) ||
-					urldb__host_is_ip_address(rhost_data)) {
-				/* IP address, so no partial match */
-				lwc_string_unref(rhost);
-				goto error;
-			}
+	p->urld.type = type;
+}
 
-			/* Not exact match, so try the following:
-			 * 
-			 * 1) Find the longest common suffix of host and rhost
-			 *    (may be all of host/rhost)
-			 * 2) Discard characters from the start of the suffix
-			 *    until the suffix starts with a dot
-			 *    (prevents foobar.com matching bar.com)
-			 * 3) Ensure the suffix is non-empty and contains 
-			 *    embedded dots (to avoid permitting .com as a 
-			 *    suffix)
-			 *
-			 * Note that the above in no way resembles the
-			 * domain matching algorithm found in RFC2109.
-			 * It does, however, model the real world rather
-			 * more accurately.
-			 */
 
-			/** \todo In future, we should consult a TLD service
-			 * instead of just looking for embedded dots.
-			 */
+/* exported interface documented in content/urldb.h */
+void urldb_update_url_visit_data(nsurl *url)
+{
+	struct path_data *p;
 
-			hptr = host_data + lwc_string_length(host) - 1;
-			rptr = rhost_data + lwc_string_length(rhost) - 1;
+	assert(url);
 
-			/* 1 */
-			while (hptr >= host_data && rptr >= rhost_data) {
-				if (*hptr != *rptr)
-					break;
-				hptr--;
-				rptr--;
-			}
-			/* Ensure we end up pointing at the start of the 
-			 * common suffix. The above loop will exit pointing
-			 * to the byte before the start of the suffix. */
-			hptr++;
+	p = urldb_find_url(url);
+	if (!p)
+		return;
 
-			/* 2 */
-			while (*hptr != '\0' && *hptr != '.')
-				hptr++;
+	p->urld.last_visit = time(NULL);
+	p->urld.visits++;
+}
 
-			/* 3 */
-			if (*hptr == '\0' || 
-				(dot = strchr(hptr + 1, '.')) == NULL ||
-					*(dot + 1) == '\0') {
-				lwc_string_unref(rhost);
-				goto error;
-			}
-		}
 
-		lwc_string_unref(rhost);
-	}
+/* exported interface documented in content/urldb.h */
+void urldb_reset_url_visit_data(nsurl *url)
+{
+	struct path_data *p;
 
-	end = cur + strlen(cur) - 2 /* Trailing CRLF */;
+	assert(url);
 
-	do {
-		struct cookie_internal_data *c;
-		char *dot;
-		size_t len;
+	p = urldb_find_url(url);
+	if (!p)
+		return;
 
-		c = urldb_parse_cookie(url, &cur);
-		if (!c) {
-			/* failed => stop parsing */
-			goto error;
-		}
+	p->urld.last_visit = (time_t)0;
+	p->urld.visits = 0;
+}
 
-		/* validate cookie */
 
-		/* 4.2.2:i Cookie must have NAME and VALUE */
-		if (!c->name || !c->value) {
-			urldb_free_cookie(c);
-			goto error;
-		}
+/* exported interface documented in content/urldb.h */
+const struct url_data *urldb_get_url_data(nsurl *url)
+{
+	struct path_data *p;
+	struct url_internal_data *u;
 
-		/* 4.3.2:i Cookie path must be a prefix of URL path */
-		len = strlen(c->path);
-		if (len > lwc_string_length(path) ||
-				strncmp(c->path, lwc_string_data(path),
-						len) != 0) {
-			urldb_free_cookie(c);
-			goto error;
-		}
+	assert(url);
 
-		/* 4.3.2:ii Cookie domain must contain embedded dots */
-		dot = strchr(c->domain + 1, '.');
-		if (!dot || *(dot + 1) == '\0') {
-			/* no embedded dots */
-			urldb_free_cookie(c);
-			goto error;
-		}
+	p = urldb_find_url(url);
+	if (!p)
+		return NULL;
 
-		/* Domain match fetch host with cookie domain */
-		if (strcasecmp(lwc_string_data(host), c->domain) != 0) {
-			int hlen, dlen;
-			char *domain = c->domain;
+	u = &p->urld;
 
-			/* c->domain must be a domain cookie here because:
-			 * c->domain is either:
-			 *   + specified in the header as a domain cookie
-			 *     (non-domain cookies in the header are ignored
-			 *      by urldb_parse_cookie / urldb_parse_avpair)
-			 *   + defaulted to the URL's host part
-			 *     (by urldb_parse_cookie if no valid domain was
-			 *      specified in the header)
-			 *
-			 * The latter will pass the strcasecmp above, which 
-			 * leaves the former (i.e. a domain cookie)
-			 */
-			assert(c->domain[0] == '.');
+	return (const struct url_data *) u;
+}
 
-			/* 4.3.2:iii */
-			if (urldb__host_is_ip_address(lwc_string_data(host))) {
-				/* IP address, so no partial match */
-				urldb_free_cookie(c);
-				goto error;
-			}
 
-			hlen = lwc_string_length(host);
-			dlen = strlen(c->domain);
+/* exported interface documented in content/urldb.h */
+nsurl *urldb_get_url(nsurl *url)
+{
+	struct path_data *p;
 
-			if (hlen <= dlen && hlen != dlen - 1) {
-				/* Partial match not possible */
-				urldb_free_cookie(c);
-				goto error;
-			}
+	assert(url);
 
-			if (hlen == dlen - 1) {
-				/* Relax matching to allow
-				 * host a.com to match .a.com */
-				domain++;
-				dlen--;
-			}
+	p = urldb_find_url(url);
+	if (!p)
+		return NULL;
 
-			if (strcasecmp(lwc_string_data(host) + (hlen - dlen),
-					domain)) {
-				urldb_free_cookie(c);
-				goto error;
-			}
+	return p->url;
+}
 
-			/* 4.3.2:iv Ensure H contains no dots
-			 *
-			 * If you believe the spec, H should contain no
-			 * dots in _any_ cookie. Unfortunately, however,
-			 * reality differs in that many sites send domain
-			 * cookies of the form .foo.com from hosts such
-			 * as bar.bat.foo.com and then expect domain
-			 * matching to work. Thus we have to do what they
-			 * expect, regardless of any potential security
-			 * implications.
-			 *
-			 * This is what code conforming to the spec would
-			 * look like:
-			 *
-			 * for (int i = 0; i < (hlen - dlen); i++) {
-			 *	if (host[i] == '.') {
-			 *		urldb_free_cookie(c);
-			 *		goto error;
-			 *	}
-			 * }
-			 */
-		}
 
-		/* Now insert into database */
-		if (!urldb_insert_cookie(c, scheme, urlt))
-			goto error;
-	} while (cur < end);
+/* exported interface documented in content/urldb.h */
+void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth)
+{
+	struct path_data *p, *pi;
+	struct host_part *h;
+	struct prot_space_data *space, *space_alloc;
+	char *realm_alloc, *auth_alloc;
+	bool match;
 
-	lwc_string_unref(host);
-	lwc_string_unref(path);
-	lwc_string_unref(scheme);
-	nsurl_unref(urlt);
+	assert(url && realm && auth);
 
-	return true;
+	/* add url, in case it's missing */
+	urldb_add_url(url);
 
-error:
-	lwc_string_unref(host);
-	lwc_string_unref(path);
-	lwc_string_unref(scheme);
-	nsurl_unref(urlt);
+	p = urldb_find_url(url);
 
-	return false;
-}
+	if (!p)
+		return;
 
-/**
- * Parse a cookie
- *
- * \param url URL being fetched
- * \param cookie Pointer to cookie string (updated on exit)
- * \return Pointer to cookie structure (on heap, caller frees) or NULL
- */
-struct cookie_internal_data *urldb_parse_cookie(nsurl *url,
-		const char **cookie)
-{
-	struct cookie_internal_data *c;
-	const char *cur;
-	char name[1024], value[4096];
-	char *n = name, *v = value;
-	bool in_value = false;
-	bool had_value_data = false;
-	bool value_verbatim = false;
-	bool quoted = false;
-	bool was_quoted = false;
+	/* Search for host_part */
+	for (pi = p; pi->parent != NULL; pi = pi->parent)
+		;
+	h = (struct host_part *)pi;
 
-	assert(url && cookie && *cookie);
+	/* Search if given URL belongs to a protection space we already know of. */
+	for (space = h->prot_space; space; space = space->next) {
+		if (!strcmp(space->realm, realm) &&
+				lwc_string_isequal(space->scheme, p->scheme,
+						&match) == lwc_error_ok &&
+				match == true &&
+				space->port == p->port)
+			break;
+	}
 
-	c = calloc(1, sizeof(struct cookie_internal_data));
-	if (c == NULL)
-		return NULL;
+	if (space != NULL) {
+		/* Overrule existing auth. */
+		free(space->auth);
+		space->auth = strdup(auth);
+	} else {
+		/* Create a new protection space. */
+		space = space_alloc = malloc(sizeof(struct prot_space_data));
+		realm_alloc = strdup(realm);
+		auth_alloc = strdup(auth);
 
-	c->expires = -1;
+		if (!space_alloc || !realm_alloc || !auth_alloc) {
+			free(space_alloc);
+			free(realm_alloc);
+			free(auth_alloc);
+			return;
+		}
 
-	name[0] = '\0';
-	value[0] = '\0';
+		space->scheme = lwc_string_ref(p->scheme);
+		space->port = p->port;
+		space->realm = realm_alloc;
+		space->auth = auth_alloc;
+		space->next = h->prot_space;
+		h->prot_space = space;
+	}
 
-	for (cur = *cookie; *cur; cur++) {
-		if (*cur == '\r' && *(cur + 1) == '\n') {
-			/* End of header */
-			if (quoted) {
-				/* Unmatched quote encountered */
+	p->prot_space = space;
+}
 
-				/* Match Firefox 2.0.0.11 */
-				value[0] = '\0';
 
-#if 0
-				/* This is what IE6/7 & Safari 3 do */
-				/* Opera 9.25 discards the entire cookie */
-
-				/* Shuffle value up by 1 */
-				memmove(value + 1, value, 
-					min(v - value, sizeof(value) - 2));
-				v++;
-				/* And insert " character at the start */
-				value[0] = '"';
-
-				/* Now, run forwards through the value
-				 * looking for a semicolon. If one exists,
-				 * terminate the value at this point. */
-				for (char *s = value; s < v; s++) {
-					if (*s == ';') {
-						*s = '\0';
-						v = s;
-						break;
-					}
-				}
-#endif
-			}
+/* exported interface documented in content/urldb.h */
+const char *urldb_get_auth_details(nsurl *url, const char *realm)
+{
+	struct path_data *p, *p_cur, *p_top;
 
-			break;
-		} else if (*cur == '\r') {
-			/* Spurious linefeed */
-			continue;	
-		} else if (*cur == '\n') {
-			/* Spurious newline */
-			continue;
-		}
+	assert(url);
 
-		if (in_value && !had_value_data) {
-			if (*cur == ' ' || *cur == '\t') {
-				/* Strip leading whitespace from value */
-				continue;
-			} else {
-				had_value_data = true;
+	/* add to the db, so our lookup will work */
+	urldb_add_url(url);
 
-				/* Value is taken verbatim if first non-space 
-				 * character is not a " */
-				if (*cur != '"') {
-					value_verbatim = true;
-				}
-			}
+	p = urldb_find_url(url);
+	if (!p)
+		return NULL;
+
+	/* Check for any auth details attached to the path_data node or any of
+	 * its parents.
+	 */
+	for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) {
+		if (p_cur->prot_space) {
+			return p_cur->prot_space->auth;
 		}
+	}
 
-		if (in_value && !value_verbatim && (*cur == '"')) {
-			/* Only non-verbatim values may be quoted */
-			if (cur == *cookie || *(cur - 1) != '\\') {
-				/* Only unescaped quotes count */
-				was_quoted = quoted;
-				quoted = !quoted;
+	/* Only when we have a realm (and canonical root of given URL), we can
+	 * uniquely locate the protection space.
+	 */
+	if (realm != NULL) {
+		const struct host_part *h = (const struct host_part *)p_top;
+		const struct prot_space_data *space;
+		bool match;
 
-				continue;
+		/* Search for a possible matching protection space. */
+		for (space = h->prot_space; space != NULL;
+		     space = space->next) {
+			if (!strcmp(space->realm, realm) &&
+			    lwc_string_isequal(space->scheme,
+					       p->scheme, &match) ==
+			    lwc_error_ok &&
+			    match == true &&
+			    space->port == p->port) {
+				p->prot_space = space;
+				return p->prot_space->auth;
 			}
 		}
+	}
 
-		if (!quoted && !in_value && *cur == '=') {
-			/* First equals => attr-value separator */
-			in_value = true;
-			continue;
-		}
+	return NULL;
+}
 
-		if (!quoted && (was_quoted || *cur == ';')) {
-			/* Semicolon or after quoted value 
-			 * => end of current avpair */
 
-			/* NUL-terminate tokens */
-			*n = '\0';
-			*v = '\0';
+/* exported interface documented in content/urldb.h */
+void urldb_set_cert_permissions(nsurl *url, bool permit)
+{
+	struct path_data *p;
+	struct host_part *h;
 
-			if (!urldb_parse_avpair(c, name, value, was_quoted)) {
-				/* Memory exhausted */
-				urldb_free_cookie(c);
-				return NULL;
-			}
+	assert(url);
 
-			/* And reset to start */
-			n = name;
-			v = value;
-			in_value = false;
-			had_value_data = false;
-			value_verbatim = false;
-			was_quoted = false;
+	/* add url, in case it's missing */
+	urldb_add_url(url);
 
-			/* Now, if the current input is anything other than a
-			 * semicolon, we must be sure to reprocess it */
-			if (*cur != ';') {
-				cur--;
-			}
+	p = urldb_find_url(url);
+	if (!p)
+		return;
 
-			continue;
-		}
+	for (; p && p->parent; p = p->parent)
+		/* do nothing */;
+	assert(p);
 
-		/* And now handle commas. These are a pain as they may mean
-		 * any of the following:
-		 *
-		 * + End of cookie
-		 * + Day separator in Expires avpair
-		 * + (Invalid) comma in unquoted value
-		 *
-		 * Therefore, in order to handle all 3 cases (2 and 3 are
-		 * identical, the difference being that 2 is in the spec and
-		 * 3 isn't), we need to determine where the comma actually
-		 * lies. We use the following heuristic:
-		 *
-		 *   Given a comma at the current input position, find the
-		 *   immediately following semicolon (or end of input if none
-		 *   found). Then, consider the input characters between
-		 *   these two positions. If any of these characters is an
-		 *   '=', we must assume that the comma signified the end of
-		 *   the current cookie.
-		 *
-		 * This holds as the first avpair of any cookie must be
-		 * NAME=VALUE, so the '=' is guaranteed to appear in the
-		 * case where the comma marks the end of a cookie.
-		 *
-		 * This will fail, however, in the case where '=' appears in
-		 * the value of the current avpair after the comma or the
-		 * subsequent cookie does not start with NAME=VALUE. Neither
-		 * of these is particularly likely and if they do occur, the
-		 * website is more broken than we can be bothered to handle.
-		 */
-		if (!quoted && *cur == ',') {
-			/* Find semi-colon, if any */
-			const char *p;
-			const char *semi = strchr(cur + 1, ';');
-			if (!semi)
-				semi = cur + strlen(cur) - 2 /* CRLF */;
+	h = (struct host_part *)p;
 
-			/* Look for equals sign between comma and semi */
-			for (p = cur + 1; p < semi; p++)
-				if (*p == '=')
-					break;
+	h->permit_invalid_certs = permit;
+}
 
-			if (p == semi) {
-				/* none found => comma internal to value */
-				/* do nothing */
-			} else {
-				/* found one => comma marks end of cookie */
-				cur++;
-				break;
-			}
-		}
 
-		/* Accumulate into buffers, always leaving space for a NUL */
-		/** \todo is silently truncating overlong names/values wise? */
-		if (!in_value) {
-			if (n < name + (sizeof(name) - 1))
-				*n++ = *cur;
-		} else {
-			if (v < value + (sizeof(value) - 1))
-				*v++ = *cur;
-		}
-	}
+/* exported interface documented in content/urldb.h */
+bool urldb_get_cert_permissions(nsurl *url)
+{
+	struct path_data *p;
+	const struct host_part *h;
 
-	/* Parse final avpair */
-	*n = '\0';
-	*v = '\0';
+	assert(url);
 
-	if (!urldb_parse_avpair(c, name, value, was_quoted)) {
-		/* Memory exhausted */
-		urldb_free_cookie(c);
-		return NULL;
-	}
+	p = urldb_find_url(url);
+	if (!p)
+		return false;
 
-	/* Now fix-up default values */
-	if (c->domain == NULL) {
-		lwc_string *host = nsurl_get_component(url, NSURL_HOST);
-		if (host == NULL) {
-			urldb_free_cookie(c);
-			return NULL;
-		}
-		c->domain = strdup(lwc_string_data(host));
-		lwc_string_unref(host);
-	}
+	for (; p && p->parent; p = p->parent)
+		/* do nothing */;
+	assert(p);
 
-	if (c->path == NULL) {
-		const char *path_data;
-		char *path, *slash;
-		lwc_string *path_lwc;
+	h = (const struct host_part *)p;
 
-		path_lwc = nsurl_get_component(url, NSURL_PATH);
-		if (path_lwc == NULL) {
-			urldb_free_cookie(c);
-			return NULL;
-		}
-		path_data = lwc_string_data(path_lwc);
+	return h->permit_invalid_certs;
+}
 
-		/* Strip leafname and trailing slash (4.3.1) */
-		slash = strrchr(path_data, '/');
-		if (slash != NULL) {
-			/* Special case: retain first slash in path */
-			if (slash == path_data)
-				slash++;
 
-			slash = strndup(path_data, slash - path_data);
-			if (slash == NULL) {
-				lwc_string_unref(path_lwc);
-				urldb_free_cookie(c);
-				return NULL;
-			}
+/* exported interface documented in content/urldb.h */
+void urldb_set_thumbnail(nsurl *url, struct bitmap *bitmap)
+{
+	struct path_data *p;
 
-			path = slash;
-			lwc_string_unref(path_lwc);
-		} else {
-			path = strdup(lwc_string_data(path_lwc));
-			lwc_string_unref(path_lwc);
-			if (path == NULL) {
-				urldb_free_cookie(c);
-				return NULL;
-			}
-		}
+	assert(url);
 
-		c->path = path;
-	}
+	p = urldb_find_url(url);
+	if (!p)
+		return;
 
-	/* Write back current position */
-	*cookie = cur;
+	if (p->thumb && p->thumb != bitmap)
+		bitmap_destroy(p->thumb);
 
-	return c;
+	p->thumb = bitmap;
 }
 
-/**
- * Parse a cookie avpair
- *
- * \param c Cookie struct to populate
- * \param n Name component
- * \param v Value component
- * \param was_quoted Whether ::v was quoted in the input
- * \return true on success, false on memory exhaustion
- */
-bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, char *v,
-		bool was_quoted)
+
+/* exported interface documented in content/urldb.h */
+struct bitmap *urldb_get_thumbnail(nsurl *url)
 {
-	int vlen;
+	struct path_data *p;
 
-	assert(c && n && v);
+	assert(url);
 
-	/* Strip whitespace from start of name */
-	for (; *n; n++) {
-		if (*n != ' ' && *n != '\t')
-			break;
-	}
+	p = urldb_find_url(url);
+	if (!p)
+		return NULL;
 
-	/* Strip whitespace from end of name */
-	for (vlen = strlen(n); vlen; vlen--) {
-		if (n[vlen] == ' ' || n[vlen] == '\t')
-			n[vlen] = '\0';
-		else
-			break;
-	}
+	return p->thumb;
+}
 
-	/* Strip whitespace from start of value */
-	for (; *v; v++) {
-		if (*v != ' ' && *v != '\t')
-			break;
-	}
 
-	/* Strip whitespace from end of value */
-	for (vlen = strlen(v); vlen; vlen--) {
-		if (v[vlen] == ' ' || v[vlen] == '\t')
-			v[vlen] = '\0';
-		else
-			break;
-	}
+/* exported interface documented in content/urldb.h */
+void urldb_iterate_partial(const char *prefix,
+		bool (*callback)(nsurl *url,
+		const struct url_data *data))
+{
+	char host[256];
+	char buf[260]; /* max domain + "www." */
+	const char *slash, *scheme_sep;
+	struct search_node *tree;
+	const struct host_part *h;
 
-	if (!c->comment && strcasecmp(n, "Comment") == 0) {
-		c->comment = strdup(v);
-		if (!c->comment)
-			return false;
-	} else if (!c->domain && strcasecmp(n, "Domain") == 0) {
-		if (v[0] == '.') {
-			/* Domain must start with a dot */
-			c->domain_from_set = true;
-			c->domain = strdup(v);
-			if (!c->domain)
-				return false;
-		}
-	} else if (strcasecmp(n, "Max-Age") == 0) {
-		int temp = atoi(v);
-		if (temp == 0)
-			/* Special case - 0 means delete */
-			c->expires = 0;
-		else
-			c->expires = time(NULL) + temp;
-	} else if (!c->path && strcasecmp(n, "Path") == 0) {
-		c->path_from_set = true;
-		c->path = strdup(v);
-		if (!c->path)
-			return false;
-	} else if (strcasecmp(n, "Version") == 0) {
-		c->version = atoi(v);
-	} else if (strcasecmp(n, "Expires") == 0) {
-		char *datenoday;
-		time_t expires;
+	assert(prefix && callback);
 
-		/* Strip dayname from date (these are hugely
-		 * variable and liable to break the parser.
-		 * They also serve no useful purpose) */
-		for (datenoday = v; *datenoday && !isdigit(*datenoday);
-				datenoday++)
-			; /* do nothing */
+	/* strip scheme */
+	scheme_sep = strstr(prefix, "://");
+	if (scheme_sep)
+		prefix = scheme_sep + 3;
 
-		expires = curl_getdate(datenoday, NULL);
-		if (expires == -1) {
-			/* assume we have an unrepresentable
-			 * date => force it to the maximum
-			 * possible value of a 32bit time_t
-			 * (this may break in 2038. We'll
-			 * deal with that once we come to
-			 * it) */
-			expires = (time_t)0x7fffffff;
-		}
-		c->expires = expires;
-	} else if (strcasecmp(n, "Secure") == 0) {
-		c->secure = true;
-	} else if (strcasecmp(n, "HttpOnly") == 0) {
-		c->http_only = true;
-	} else if (!c->name) {
-		c->name = strdup(n);
-		c->value = strdup(v);
-		c->value_was_quoted = was_quoted;
-		if (!c->name || !c->value)
-			return false;
-	}
+	slash = strchr(prefix, '/');
+	tree = urldb_get_search_tree(prefix);
 
-	return true;
-}
+	if (slash) {
+		/* if there's a slash in the input, then we can
+		 * assume that we're looking for a path */
+		snprintf(host, sizeof host, "%.*s",
+				(int) (slash - prefix), prefix);
 
-/**
- * Insert a cookie into the database
- *
- * \param c The cookie to insert
- * \param scheme URL scheme associated with cookie path
- * \param url URL (sans fragment) associated with cookie
- * \return true on success, false on memory exhaustion (c will be freed)
- */
-bool urldb_insert_cookie(struct cookie_internal_data *c, lwc_string *scheme,
-		nsurl *url)
-{
-	struct cookie_internal_data *d;
-	const struct host_part *h;
-	struct path_data *p;
-	time_t now = time(NULL);
+		h = urldb_search_find(tree, host);
+		if (!h) {
+			int len = slash - prefix;
 
-	assert(c);
+			if (len <= 3 || strncasecmp(host, "www.", 4) != 0) {
+				snprintf(buf, sizeof buf, "www.%s", host);
+				h = urldb_search_find(
+					search_trees[ST_DN + 'w' - 'a'],
+					buf);
+				if (!h)
+					return;
+			} else
+				return;
+		}
 
-	if (c->domain[0] == '.') {
-		h = urldb_search_find(
-			urldb_get_search_tree(&(c->domain[1])),
-			c->domain + 1);
-		if (!h) {
-			h = urldb_add_host(c->domain + 1);
-			if (!h) {
-				urldb_free_cookie(c);
-				return false;
-			}
+		if (h->paths.children) {
+			/* Have paths, iterate them */
+			urldb_iterate_partial_path(&h->paths, slash + 1,
+					callback);
 		}
 
-		p = (struct path_data *) &h->paths;
 	} else {
-		/* Need to have a URL and scheme, if it's not a domain cookie */
-		assert(url != NULL);
-		assert(scheme != NULL);
-
-		h = urldb_search_find(
-				urldb_get_search_tree(c->domain),
-				c->domain);
+		int len = strlen(prefix);
 
-		if (!h) {
-			h = urldb_add_host(c->domain);
-			if (!h) {
-				urldb_free_cookie(c);
-				return false;
-			}
-		}
+		/* looking for hosts */
+		if (!urldb_iterate_partial_host(tree, prefix, callback))
+			return;
 
-		/* find path */
-		p = urldb_add_path(scheme, 0, h,
-				strdup(c->path), NULL, url);
-		if (!p) {
-			urldb_free_cookie(c);
-			return false;
+		if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) {
+			/* now look for www.prefix */
+			snprintf(buf, sizeof buf, "www.%s", prefix);
+			if(!urldb_iterate_partial_host(
+					search_trees[ST_DN + 'w' - 'a'],
+					buf, callback))
+				return;
 		}
 	}
+}
 
-	/* add cookie */
-	for (d = p->cookies; d; d = d->next) {
-		if (!strcmp(d->domain, c->domain) &&
-				!strcmp(d->path, c->path) &&
-				!strcmp(d->name, c->name))
-			break;
-	}
 
-	if (d) {
-		if (c->expires != -1 && c->expires < now) {
-			/* remove cookie */
-			if (d->next)
-				d->next->prev = d->prev;
-			else
-				p->cookies_end = d->prev;
-			if (d->prev)
-				d->prev->next = d->next;
-			else
-				p->cookies = d->next;
+/* exported interface documented in content/urldb.h */
+void urldb_iterate_entries(bool (*callback)(nsurl *url,
+		const struct url_data *data))
+{
+	int i;
 
-			cookie_manager_remove((struct cookie_data *)d);
+	assert(callback);
 
-			urldb_free_cookie(d);
-			urldb_free_cookie(c);
-		} else {
-			/* replace d with c */
-			c->prev = d->prev;
-			c->next = d->next;
-			if (c->next)
-				c->next->prev = c;
-			else
-				p->cookies_end = c;
-			if (c->prev)
-				c->prev->next = c;
-			else
-				p->cookies = c;
+	for (i = 0; i < NUM_SEARCH_TREES; i++) {
+		if (!urldb_iterate_entries_host(search_trees[i],
+				callback, NULL))
+			break;
+	}
+}
 
-			cookie_manager_remove((struct cookie_data *)d);
-			urldb_free_cookie(d);
 
-			cookie_manager_add((struct cookie_data *)c);
-		}
-	} else {
-		c->prev = p->cookies_end;
-		c->next = NULL;
-		if (p->cookies_end)
-			p->cookies_end->next = c;
-		else
-			p->cookies = c;
-		p->cookies_end = c;
+/* exported interface documented in content/urldb.h */
+void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data))
+{
+	int i;
 
-		cookie_manager_add((struct cookie_data *)c);
-	}
+	assert(callback);
 
-	return true;
+	for (i = 0; i < NUM_SEARCH_TREES; i++) {
+		if (!urldb_iterate_entries_host(search_trees[i],
+				NULL, callback))
+			break;
+	}
 }
 
-/**
- * Free a cookie
- *
- * \param c The cookie to free
- */
-void urldb_free_cookie(struct cookie_internal_data *c)
+
+/* exported interface documented in content/urldb.h */
+bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
 {
-	assert(c);
+	const char *cur = header, *end;
+	lwc_string *path, *host, *scheme;
+	nsurl *urlt;
+	bool match;
 
-	free(c->comment);
-	free(c->domain);
-	free(c->path);
-	free(c->name);
-	free(c->value);
-	free(c);
-}
+	assert(url && header);
 
-/**
- * Concatenate a cookie into the provided buffer
- *
- * \param c Cookie to concatenate
- * \param version The version of the cookie string to output
- * \param used Pointer to amount of buffer used (updated)
- * \param alloc Pointer to allocated size of buffer (updated)
- * \param buf Pointer to Pointer to buffer (updated)
- * \return true on success, false on memory exhaustion
- */
-bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
-		int *used, int *alloc, char **buf)
+	/* Get defragmented URL, as 'urlt' */
+	if (nsurl_defragment(url, &urlt) != NSERROR_OK)
+		return NULL;
+
+	scheme = nsurl_get_component(url, NSURL_SCHEME);
+	if (scheme == NULL) {
+		nsurl_unref(urlt);
+		return false;
+	}
+
+	path = nsurl_get_component(url, NSURL_PATH);
+	if (path == NULL) {
+		lwc_string_unref(scheme);
+		nsurl_unref(urlt);
+		return false;
+	}
+
+	host = nsurl_get_component(url, NSURL_HOST);
+	if (host == NULL) {
+		lwc_string_unref(path);
+		lwc_string_unref(scheme);
+		nsurl_unref(urlt);
+		return false;
+	}
+
+	if (referer) {
+		lwc_string *rhost;
+
+		/* Ensure that url's host name domain matches
+		 * referer's (4.3.5) */
+		rhost = nsurl_get_component(referer, NSURL_HOST);
+		if (rhost == NULL) {
+			goto error;
+		}
+
+		/* Domain match host names */
+		if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok &&
+				match == false) {
+			const char *hptr;
+			const char *rptr;
+			const char *dot;
+			const char *host_data = lwc_string_data(host);
+			const char *rhost_data = lwc_string_data(rhost);
+
+			/* Ensure neither host nor rhost are IP addresses */
+			if (urldb__host_is_ip_address(host_data) ||
+					urldb__host_is_ip_address(rhost_data)) {
+				/* IP address, so no partial match */
+				lwc_string_unref(rhost);
+				goto error;
+			}
+
+			/* Not exact match, so try the following:
+			 *
+			 * 1) Find the longest common suffix of host and rhost
+			 *    (may be all of host/rhost)
+			 * 2) Discard characters from the start of the suffix
+			 *    until the suffix starts with a dot
+			 *    (prevents foobar.com matching bar.com)
+			 * 3) Ensure the suffix is non-empty and contains
+			 *    embedded dots (to avoid permitting .com as a
+			 *    suffix)
+			 *
+			 * Note that the above in no way resembles the
+			 * domain matching algorithm found in RFC2109.
+			 * It does, however, model the real world rather
+			 * more accurately.
+			 */
+
+			/** \todo In future, we should consult a TLD service
+			 * instead of just looking for embedded dots.
+			 */
+
+			hptr = host_data + lwc_string_length(host) - 1;
+			rptr = rhost_data + lwc_string_length(rhost) - 1;
+
+			/* 1 */
+			while (hptr >= host_data && rptr >= rhost_data) {
+				if (*hptr != *rptr)
+					break;
+				hptr--;
+				rptr--;
+			}
+			/* Ensure we end up pointing at the start of the
+			 * common suffix. The above loop will exit pointing
+			 * to the byte before the start of the suffix. */
+			hptr++;
+
+			/* 2 */
+			while (*hptr != '\0' && *hptr != '.')
+				hptr++;
+
+			/* 3 */
+			if (*hptr == '\0' ||
+				(dot = strchr(hptr + 1, '.')) == NULL ||
+					*(dot + 1) == '\0') {
+				lwc_string_unref(rhost);
+				goto error;
+			}
+		}
+
+		lwc_string_unref(rhost);
+	}
+
+	end = cur + strlen(cur) - 2 /* Trailing CRLF */;
+
+	do {
+		struct cookie_internal_data *c;
+		char *dot;
+		size_t len;
+
+		c = urldb_parse_cookie(url, &cur);
+		if (!c) {
+			/* failed => stop parsing */
+			goto error;
+		}
+
+		/* validate cookie */
+
+		/* 4.2.2:i Cookie must have NAME and VALUE */
+		if (!c->name || !c->value) {
+			urldb_free_cookie(c);
+			goto error;
+		}
+
+		/* 4.3.2:i Cookie path must be a prefix of URL path */
+		len = strlen(c->path);
+		if (len > lwc_string_length(path) ||
+				strncmp(c->path, lwc_string_data(path),
+						len) != 0) {
+			urldb_free_cookie(c);
+			goto error;
+		}
+
+		/* 4.3.2:ii Cookie domain must contain embedded dots */
+		dot = strchr(c->domain + 1, '.');
+		if (!dot || *(dot + 1) == '\0') {
+			/* no embedded dots */
+			urldb_free_cookie(c);
+			goto error;
+		}
+
+		/* Domain match fetch host with cookie domain */
+		if (strcasecmp(lwc_string_data(host), c->domain) != 0) {
+			int hlen, dlen;
+			char *domain = c->domain;
+
+			/* c->domain must be a domain cookie here because:
+			 * c->domain is either:
+			 *   + specified in the header as a domain cookie
+			 *     (non-domain cookies in the header are ignored
+			 *      by urldb_parse_cookie / urldb_parse_avpair)
+			 *   + defaulted to the URL's host part
+			 *     (by urldb_parse_cookie if no valid domain was
+			 *      specified in the header)
+			 *
+			 * The latter will pass the strcasecmp above, which
+			 * leaves the former (i.e. a domain cookie)
+			 */
+			assert(c->domain[0] == '.');
+
+			/* 4.3.2:iii */
+			if (urldb__host_is_ip_address(lwc_string_data(host))) {
+				/* IP address, so no partial match */
+				urldb_free_cookie(c);
+				goto error;
+			}
+
+			hlen = lwc_string_length(host);
+			dlen = strlen(c->domain);
+
+			if (hlen <= dlen && hlen != dlen - 1) {
+				/* Partial match not possible */
+				urldb_free_cookie(c);
+				goto error;
+			}
+
+			if (hlen == dlen - 1) {
+				/* Relax matching to allow
+				 * host a.com to match .a.com */
+				domain++;
+				dlen--;
+			}
+
+			if (strcasecmp(lwc_string_data(host) + (hlen - dlen),
+					domain)) {
+				urldb_free_cookie(c);
+				goto error;
+			}
+
+			/* 4.3.2:iv Ensure H contains no dots
+			 *
+			 * If you believe the spec, H should contain no
+			 * dots in _any_ cookie. Unfortunately, however,
+			 * reality differs in that many sites send domain
+			 * cookies of the form .foo.com from hosts such
+			 * as bar.bat.foo.com and then expect domain
+			 * matching to work. Thus we have to do what they
+			 * expect, regardless of any potential security
+			 * implications.
+			 *
+			 * This is what code conforming to the spec would
+			 * look like:
+			 *
+			 * for (int i = 0; i < (hlen - dlen); i++) {
+			 *	if (host[i] == '.') {
+			 *		urldb_free_cookie(c);
+			 *		goto error;
+			 *	}
+			 * }
+			 */
+		}
+
+		/* Now insert into database */
+		if (!urldb_insert_cookie(c, scheme, urlt))
+			goto error;
+	} while (cur < end);
+
+	lwc_string_unref(host);
+	lwc_string_unref(path);
+	lwc_string_unref(scheme);
+	nsurl_unref(urlt);
+
+	return true;
+
+error:
+	lwc_string_unref(host);
+	lwc_string_unref(path);
+	lwc_string_unref(scheme);
+	nsurl_unref(urlt);
+
+	return false;
+}
+
+
+/* exported interface documented in content/urldb.h */
+char *urldb_get_cookie(nsurl *url, bool include_http_only)
 {
-	/* Combined (A)BNF for the Cookie: request header:
-	 * 
-	 * CHAR           = <any US-ASCII character (octets 0 - 127)>
-	 * CTL            = <any US-ASCII control character
-	 *                  (octets 0 - 31) and DEL (127)>
-	 * CR             = <US-ASCII CR, carriage return (13)>
-	 * LF             = <US-ASCII LF, linefeed (10)> 
-	 * SP             = <US-ASCII SP, space (32)>
-	 * HT             = <US-ASCII HT, horizontal-tab (9)>
-	 * <">            = <US-ASCII double-quote mark (34)>
-	 *
-	 * CRLF           = CR LF
-	 *
-	 * LWS            = [CRLF] 1*( SP | HT )
-	 *
-	 * TEXT           = <any OCTET except CTLs,
-	 *                  but including LWS>
-	 *
-	 * token          = 1*<any CHAR except CTLs or separators>
-	 * separators     = "(" | ")" | "<" | ">" | "@"
-	 *                | "," | ";" | ":" | "\" | <">
-	 *                | "/" | "[" | "]" | "?" | "="
-	 *                | "{" | "}" | SP | HT
-	 *
-	 * quoted-string  = ( <"> *(qdtext | quoted-pair ) <"> )
-	 * qdtext         = <any TEXT except <">>
-	 * quoted-pair    = "\" CHAR
-	 *
-	 * attr            =       token
-	 * value           =       word
-	 * word            =       token | quoted-string
-	 *
-	 * cookie          =       "Cookie:" cookie-version
-	 *                         1*((";" | ",") cookie-value)
-	 * cookie-value    =       NAME "=" VALUE [";" path] [";" domain]
-	 * cookie-version  =       "$Version" "=" value
-	 * NAME            =       attr
-	 * VALUE           =       value
-	 * path            =       "$Path" "=" value
-	 * domain          =       "$Domain" "=" value
-	 *
-	 * A note on quoted-string handling:
-	 *   The cookie data stored in the db is verbatim (i.e. sans enclosing
-	 *   <">, if any, and with all quoted-pairs intact) thus all that we 
-	 *   need to do here is ensure that value strings which were quoted
-	 *   in Set-Cookie or which include any of the separators are quoted 
-	 *   before use.
-	 *
-	 * A note on cookie-value separation:
-	 *   We use semicolons for all separators, including between 
-	 *   cookie-values. This simplifies things and is backwards compatible.
-	 */		      
-	const char * const separators = "()<>@,;:\\\"/[]?={} \t";
+	const struct path_data *p, *q;
+	const struct host_part *h;
+	lwc_string *path_lwc;
+	struct cookie_internal_data *c;
+	int count = 0, version = COOKIE_RFC2965;
+	struct cookie_internal_data **matched_cookies;
+	int matched_cookies_size = 20;
+	int ret_alloc = 4096, ret_used = 1;
+	const char *path;
+	char *ret;
+	lwc_string *scheme;
+	time_t now;
+	int i;
+	bool match;
+
+	assert(url != NULL);
+
+	/* The URL must exist in the db in order to find relevant cookies, since
+	 * we search up the tree from the URL node, and cookies from further
+	 * up also apply. */
+	urldb_add_url(url);
+
+	p = urldb_find_url(url);
+	if (!p)
+		return NULL;
+
+	scheme = p->scheme;
+
+	matched_cookies = malloc(matched_cookies_size *
+			sizeof(struct cookie_internal_data *));
+	if (!matched_cookies)
+		return NULL;
+
+#define GROW_MATCHED_COOKIES						\
+	do {								\
+		if (count == matched_cookies_size) {			\
+			struct cookie_internal_data **temp;		\
+			temp = realloc(matched_cookies,			\
+				(matched_cookies_size + 20) *		\
+				sizeof(struct cookie_internal_data *));	\
+									\
+			if (temp == NULL) {				\
+				free(ret);				\
+				free(matched_cookies);			\
+				return NULL;				\
+			}						\
+									\
+			matched_cookies = temp;				\
+			matched_cookies_size += 20;			\
+		}							\
+	} while(0)
+
+	ret = malloc(ret_alloc);
+	if (!ret) {
+		free(matched_cookies);
+		return NULL;
+	}
+
+	ret[0] = '\0';
+
+	path_lwc = nsurl_get_component(url, NSURL_PATH);
+	if (path_lwc == NULL) {
+		free(ret);
+		free(matched_cookies);
+		return NULL;
+	}
+	path = lwc_string_data(path_lwc);
+	lwc_string_unref(path_lwc);
+
+	now = time(NULL);
+
+	if (*(p->segment) != '\0') {
+		/* Match exact path, unless directory, when prefix matching
+		 * will handle this case for us. */
+		for (q = p->parent->children; q; q = q->next) {
+			if (strcmp(q->segment, p->segment))
+				continue;
+
+			/* Consider all cookies associated with
+			 * this exact path */
+			for (c = q->cookies; c; c = c->next) {
+				if (c->expires != -1 && c->expires < now)
+					/* cookie has expired => ignore */
+					continue;
+
+				if (c->secure && lwc_string_isequal(
+							q->scheme,
+							corestring_lwc_https,
+							&match) &&
+						match == false)
+					/* secure cookie for insecure host.
+					 * ignore */
+					continue;
+
+				if (c->http_only && !include_http_only)
+					/* Ignore HttpOnly */
+					continue;
+
+				matched_cookies[count++] = c;
+
+				GROW_MATCHED_COOKIES;
+
+				if (c->version < (unsigned int)version)
+					version = c->version;
+
+				c->last_used = now;
+
+				cookie_manager_add((struct cookie_data *)c);
+			}
+		}
+	}
+
+	/* Now consider cookies whose paths prefix-match ours */
+	for (p = p->parent; p; p = p->parent) {
+		/* Find directory's path entry(ies) */
+		/* There are potentially multiple due to differing schemes */
+		for (q = p->children; q; q = q->next) {
+			if (*(q->segment) != '\0')
+				continue;
+
+			for (c = q->cookies; c; c = c->next) {
+				if (c->expires != -1 && c->expires < now)
+					/* cookie has expired => ignore */
+					continue;
+
+				if (c->secure && lwc_string_isequal(
+							q->scheme,
+							corestring_lwc_https,
+							&match) &&
+						match == false)
+					/* Secure cookie for insecure server
+					 * => ignore */
+					continue;
+
+				matched_cookies[count++] = c;
+
+				GROW_MATCHED_COOKIES;
+
+				if (c->version < (unsigned int) version)
+					version = c->version;
+
+				c->last_used = now;
+
+				cookie_manager_add((struct cookie_data *)c);
+			}
+		}
+
+		if (!p->parent) {
+			/* No parent, so bail here. This can't go in
+			 * the loop exit condition as we also want to
+			 * process the top-level node.
+			 *
+			 * If p->parent is NULL then p->cookies are
+			 * the domain cookies and thus we don't even
+			 * try matching against them.
+			 */
+			break;
+		}
+
+		/* Consider p itself - may be the result of Path=/foo */
+		for (c = p->cookies; c; c = c->next) {
+			if (c->expires != -1 && c->expires < now)
+				/* cookie has expired => ignore */
+				continue;
+
+			/* Ensure cookie path is a prefix of the resource */
+			if (strncmp(c->path, path, strlen(c->path)) != 0)
+				/* paths don't match => ignore */
+				continue;
+
+			if (c->secure && lwc_string_isequal(p->scheme,
+						corestring_lwc_https,
+						&match) &&
+					match == false)
+				/* Secure cookie for insecure server
+				 * => ignore */
+				continue;
+
+			matched_cookies[count++] = c;
+
+			GROW_MATCHED_COOKIES;
+
+			if (c->version < (unsigned int) version)
+				version = c->version;
+
+			c->last_used = now;
+
+			cookie_manager_add((struct cookie_data *)c);
+		}
+
+	}
+
+	/* Finally consider domain cookies for hosts which domain match ours */
+	for (h = (const struct host_part *)p; h && h != &db_root;
+			h = h->parent) {
+		for (c = h->paths.cookies; c; c = c->next) {
+			if (c->expires != -1 && c->expires < now)
+				/* cookie has expired => ignore */
+				continue;
+
+			/* Ensure cookie path is a prefix of the resource */
+			if (strncmp(c->path, path, strlen(c->path)) != 0)
+				/* paths don't match => ignore */
+				continue;
+
+			if (c->secure && lwc_string_isequal(scheme,
+						corestring_lwc_https,
+						&match) &&
+					match == false)
+				/* secure cookie for insecure host. ignore */
+				continue;
 
-	int max_len;
+			matched_cookies[count++] = c;
 
-	assert(c && used && alloc && buf && *buf);
+			GROW_MATCHED_COOKIES;
 
-	/* "; " cookie-value 
-	 * We allow for the possibility that values are quoted
-	 */
-	max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 +
-			(c->path_from_set ?
-				8 + strlen(c->path) + 2 : 0) +
-			(c->domain_from_set ?
-				10 + strlen(c->domain) + 2 : 0);
+			if (c->version < (unsigned int)version)
+				version = c->version;
 
-	if (*used + max_len >= *alloc) {
-		char *temp = realloc(*buf, *alloc + 4096);
-		if (!temp) {
-			return false;
+			c->last_used = now;
+
+			cookie_manager_add((struct cookie_data *)c);
 		}
-		*buf = temp;
-		*alloc += 4096;
 	}
 
-	if (version == COOKIE_NETSCAPE) {
-		/* Original Netscape cookie */
-		sprintf(*buf + *used - 1, "; %s=", c->name);
-		*used += 2 + strlen(c->name) + 1;
+	if (count == 0) {
+		/* No cookies found */
+		free(ret);
+		free(matched_cookies);
+		return NULL;
+	}
 
-		/* The Netscape spec doesn't mention quoting of cookie values.
-		 * RFC 2109 $10.1.3 indicates that values must not be quoted.
-		 *
-		 * However, other browsers preserve quoting, so we should, too
-		 */
-		if (c->value_was_quoted) {
-			sprintf(*buf + *used - 1, "\"%s\"", c->value);
-			*used += 1 + strlen(c->value) + 1;
-		} else {
-			/** \todo should we %XX-encode [;HT,SP] ? */
-			/** \todo Should we strip escaping backslashes? */
-			sprintf(*buf + *used - 1, "%s", c->value);
-			*used += strlen(c->value);
+	/* and build output string */
+	if (version > COOKIE_NETSCAPE) {
+		sprintf(ret, "$Version=%d", version);
+		ret_used = strlen(ret) + 1;
+	}
+
+	for (i = 0; i < count; i++) {
+		if (!urldb_concat_cookie(matched_cookies[i], version,
+				&ret_used, &ret_alloc, &ret)) {
+			free(ret);
+			free(matched_cookies);
+			return NULL;
 		}
+	}
 
-		/* We don't send path/domain information -- that's what the 
-		 * Netscape spec suggests we should do, anyway. */
-	} else {
-		/* RFC2109 or RFC2965 cookie */
-		sprintf(*buf + *used - 1, "; %s=", c->name);
-		*used += 2 + strlen(c->name) + 1;
+	if (version == COOKIE_NETSCAPE) {
+		/* Old-style cookies => no version & skip "; " */
+		memmove(ret, ret + 2, ret_used - 2);
+		ret_used -= 2;
+	}
 
-		/* Value needs quoting if it contains any separator or if
-		 * it needs preserving from the Set-Cookie header */
-		if (c->value_was_quoted ||
-				strpbrk(c->value, separators) != NULL) {
-			sprintf(*buf + *used - 1, "\"%s\"", c->value);
-			*used += 1 + strlen(c->value) + 1;
-		} else {
-			sprintf(*buf + *used - 1, "%s", c->value);
-			*used += strlen(c->value);
+	/* Now, shrink the output buffer to the required size */
+	{
+		char *temp = realloc(ret, ret_used);
+		if (!temp) {
+			free(ret);
+			free(matched_cookies);
+			return NULL;
 		}
 
-		if (c->path_from_set) {
-			/* Path, quoted if necessary */
-			sprintf(*buf + *used - 1, "; $Path=");
-			*used += 8;
+		ret = temp;
+	}
 
-			if (strpbrk(c->path, separators) != NULL) {
-				sprintf(*buf + *used - 1, "\"%s\"", c->path);
-				*used += 1 + strlen(c->path) + 1;
-			} else {
-				sprintf(*buf + *used - 1, "%s", c->path);
-				*used += strlen(c->path);
-			}
-		}
+	free(matched_cookies);
 
-		if (c->domain_from_set) {
-			/* Domain, quoted if necessary */
-			sprintf(*buf + *used - 1, "; $Domain=");
-			*used += 10;
+	return ret;
 
-			if (strpbrk(c->domain, separators) != NULL) {
-				sprintf(*buf + *used - 1, "\"%s\"", c->domain);
-				*used += 1 + strlen(c->domain) + 1;
-			} else {
-				sprintf(*buf + *used - 1, "%s", c->domain);
-				*used += strlen(c->domain);
-			}
-		}
-	}
+#undef GROW_MATCHED_COOKIES
+}
 
-	return true;
+
+/* exported interface documented in content/urldb.h */
+void urldb_delete_cookie(const char *domain, const char *path,
+		const char *name)
+{
+	urldb_delete_cookie_hosts(domain, path, name, &db_root);
 }
 
-/**
- * Load a cookie file into the database
- *
- * \param filename File to load
- */
+
+/* exported interface documented in content/urldb.h */
 void urldb_load_cookies(const char *filename)
 {
 	FILE *fp;
@@ -3770,7 +3766,7 @@ void urldb_load_cookies(const char *filename)
 		if (strncasecmp(s, "Version:", 8) == 0) {
 			FIND_T; SKIP_T; loaded_cookie_file_version = atoi(p);
 
-			if (loaded_cookie_file_version < 
+			if (loaded_cookie_file_version <
 					MIN_COOKIE_FILE_VERSION) {
 				LOG(("Unsupported Cookie file version"));
 				break;
@@ -3882,84 +3878,12 @@ void urldb_load_cookies(const char *filename)
 	fclose(fp);
 }
 
-/**
- * Delete a cookie
- *
- * \param domain The cookie's domain
- * \param path The cookie's path
- * \param name The cookie's name
- */
-void urldb_delete_cookie(const char *domain, const char *path,
-		const char *name)
-{
-	urldb_delete_cookie_hosts(domain, path, name, &db_root);
-}
-
-void urldb_delete_cookie_hosts(const char *domain, const char *path,
-		const char *name, struct host_part *parent)
-{
-	struct host_part *h;
-	assert(parent);
-
-	urldb_delete_cookie_paths(domain, path, name, &parent->paths);
-
-	for (h = parent->children; h; h = h->next)
-		urldb_delete_cookie_hosts(domain, path, name, h);
-}
-
-void urldb_delete_cookie_paths(const char *domain, const char *path,
-		const char *name, struct path_data *parent)
-{
-	struct cookie_internal_data *c;
-	struct path_data *p = parent;
-
-	assert(parent);
-
-	do {
-		for (c = p->cookies; c; c = c->next) {
-			if (strcmp(c->domain, domain) == 0 && 
-					strcmp(c->path, path) == 0 &&
-					strcmp(c->name, name) == 0) {
-				if (c->prev)
-					c->prev->next = c->next;
-				else
-					p->cookies = c->next;
-
-				if (c->next)
-					c->next->prev = c->prev;
-				else
-					p->cookies_end = c->prev;
-
-				urldb_free_cookie(c);
-
-				return;
-			}
-		}
-
-		if (p->children) {
-			p = p->children;
-		} else {
-			while (p != parent) {
-				if (p->next != NULL) {
-					p = p->next;
-					break;
-				}
-
-				p = p->parent;
-			}
-		}
-	} while(p != parent);
-}
 
-/**
- * Save persistent cookies to file
- *
- * \param filename Path to save to
- */
+/* exported interface documented in content/urldb.h */
 void urldb_save_cookies(const char *filename)
 {
 	FILE *fp;
-	int cookie_file_version = max(loaded_cookie_file_version, 
+	int cookie_file_version = max(loaded_cookie_file_version,
 			COOKIE_FILE_VERSION);
 
 	assert(filename);
@@ -3985,256 +3909,176 @@ void urldb_save_cookies(const char *filename)
 
 	urldb_save_cookie_hosts(fp, &db_root);
 
-	fclose(fp);
-}
-
-/**
- * Save a host subtree's cookies
- *
- * \param fp File pointer to write to
- * \param parent Parent host
- */
-void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)
-{
-	struct host_part *h;
-	assert(fp && parent);
-
-	urldb_save_cookie_paths(fp, &parent->paths);
-
-	for (h = parent->children; h; h = h->next)
-		urldb_save_cookie_hosts(fp, h);
-}
-
-/**
- * Save a path subtree's cookies
- *
- * \param fp File pointer to write to
- * \param parent Parent path
- */
-void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
-{
-	struct path_data *p = parent;
-	time_t now = time(NULL);
-
-	assert(fp && parent);
-
-	do {
-		if (p->cookies != NULL) {
-			struct cookie_internal_data *c;
-
-			for (c = p->cookies; c != NULL; c = c->next) {
-				if (c->expires == -1 || c->expires < now)
-					/* Skip expired & session cookies */
-					continue;
-
-				fprintf(fp, 
-					"%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t"
-					"%s\t%s\t%d\t%s\t%s\t%s\n",
-					c->version, c->domain,
-					c->domain_from_set, c->path,
-					c->path_from_set, c->secure,
-					c->http_only,
-					(int)c->expires, (int)c->last_used,
-					c->no_destroy, c->name, c->value,
-					c->value_was_quoted,
-					p->scheme ? lwc_string_data(p->scheme) :
-							"unused",
-					p->url ? nsurl_access(p->url) :
-							"unused",
-					c->comment ? c->comment : "");
-			}
-		}
-
-		if (p->children != NULL) {
-			p = p->children;
-		} else {
-			while (p != parent) {
-				if (p->next != NULL) {
-					p = p->next;
-					break;
-				}
-
-				p = p->parent;
-			}
-		}
-	} while (p != parent);
+	fclose(fp);
 }
 
 
-/**
- * Destroy urldb
- */
-void urldb_destroy(void)
+/* exported interface documented in content/urldb.h */
+void urldb_dump(void)
 {
-	struct host_part *a, *b;
 	int i;
 
-	/* Clean up search trees */
-	for (i = 0; i < NUM_SEARCH_TREES; i++) {
-		if (search_trees[i] != &empty)
-			urldb_destroy_search_tree(search_trees[i]);
-	}
+	urldb_dump_hosts(&db_root);
 
-	/* And database */
-	for (a = db_root.children; a; a = b) {
-		b = a->next;
-		urldb_destroy_host_tree(a);
-	}
-        
-        /* And the bloom filter */
-        if (url_bloom != NULL)
-                bloom_destroy(url_bloom);
+	for (i = 0; i != NUM_SEARCH_TREES; i++)
+		urldb_dump_search(search_trees[i], 0);
 }
 
-/**
- * Destroy a host tree
- *
- * \param root Root node of tree to destroy
- */
-void urldb_destroy_host_tree(struct host_part *root)
+
+/* exported interface documented in content/urldb.h */
+struct host_part *urldb_add_host(const char *host)
 {
-	struct host_part *a, *b;
-	struct path_data *p, *q;
-	struct prot_space_data *s, *t;
+	struct host_part *d = (struct host_part *) &db_root, *e;
+	struct search_node *s;
+	char buf[256]; /* 256 bytes is sufficient - domain names are
+			* limited to 255 chars. */
+	char *part;
 
-	/* Destroy children */
-	for (a = root->children; a; a = b) {
-		b = a->next;
-		urldb_destroy_host_tree(a);
-	}
+	assert(host);
 
-	/* Now clean up paths */
-	for (p = root->paths.children; p; p = q) {
-		q = p->next;
-		urldb_destroy_path_tree(p);
-	}
+	if (urldb__host_is_ip_address(host)) {
+		/* Host is an IP, so simply add as TLD */
 
-	/* Root path */
-	urldb_destroy_path_node_content(&root->paths);
+		/* Check for existing entry */
+		for (e = d->children; e; e = e->next)
+			if (strcasecmp(host, e->part) == 0)
+				/* found => return it */
+				return e;
 
-	/* Proctection space data */
-	for (s = root->prot_space; s; s = t) {
-		t = s->next;
-		urldb_destroy_prot_space(s);
-	}
+		d = urldb_add_host_node(host, d);
 
-	/* And ourselves */
-	free(root->part);
-	free(root);
-}
+		s = urldb_search_insert(search_trees[ST_IP], d);
+		if (!s) {
+			/* failed */
+			d = NULL;
+		} else {
+			search_trees[ST_IP] = s;
+		}
 
-/**
- * Destroy a path tree
- *
- * \param root Root node of tree to destroy
- */
-void urldb_destroy_path_tree(struct path_data *root)
-{
-	struct path_data *p = root;
+		return d;
+	}
 
-	do {
-		if (p->children != NULL) {
-			p = p->children;
-		} else {
-			struct path_data *q = p;
+	/* Copy host string, so we can corrupt it */
+	strncpy(buf, host, sizeof buf);
+	buf[sizeof buf - 1] = '\0';
 
-			while (p != root) {
-				if (p->next != NULL) {
-					p = p->next;
+	/* Process FQDN segments backwards */
+	do {
+		part = strrchr(buf, '.');
+		if (!part) {
+			/* last segment */
+			/* Check for existing entry */
+			for (e = d->children; e; e = e->next)
+				if (strcasecmp(buf, e->part) == 0)
 					break;
-				}
 
-				p = p->parent;
+			if (e) {
+				d = e;
+			} else {
+				d = urldb_add_host_node(buf, d);
+			}
 
-				urldb_destroy_path_node_content(q);
-				free(q);
+			/* And insert into search tree */
+			if (d) {
+				struct search_node **r;
 
-				q = p;
+				r = urldb_get_search_tree_direct(buf);
+				s = urldb_search_insert(*r, d);
+				if (!s) {
+					/* failed */
+					d = NULL;
+				} else {
+					*r = s;
+				}
 			}
-
-			urldb_destroy_path_node_content(q);
-			free(q);
+			break;
 		}
-	} while (p != root);
+
+		/* Check for existing entry */
+		for (e = d->children; e; e = e->next)
+			if (strcasecmp(part + 1, e->part) == 0)
+				break;
+
+		d = e ? e : urldb_add_host_node(part + 1, d);
+		if (!d)
+			break;
+
+		*part = '\0';
+	} while (1);
+
+	return d;
 }
 
-/**
- * Destroy the contents of a path node
- *
- * \param node Node to destroy contents of (does not destroy node)
- */
-void urldb_destroy_path_node_content(struct path_data *node)
-{
-	struct cookie_internal_data *a, *b;
-	unsigned int i;
 
-	if (node->url != NULL)
-		nsurl_unref(node->url);
+/* exported interface documented in content/urldb.h */
+struct path_data *
+urldb_add_path(lwc_string *scheme,
+	       unsigned int port,
+	       const struct host_part *host,
+	       char *path_query,
+	       lwc_string *fragment,
+	       nsurl *url)
+{
+	struct path_data *d, *e;
+	char *buf = path_query;
+	char *segment, *slash;
+	bool match;
 
-	if (node->scheme != NULL)
-		lwc_string_unref(node->scheme);
+	assert(scheme && host && url);
 
-	free(node->segment);
-	for (i = 0; i < node->frag_cnt; i++)
-		free(node->fragment[i]);
-	free(node->fragment);
+	d = (struct path_data *) &host->paths;
 
-	if (node->thumb)
-		bitmap_destroy(node->thumb);
+	/* skip leading '/' */
+	segment = buf;
+	if (*segment == '/')
+		segment++;
 
-	free(node->urld.title);
+	/* Process path segments */
+	do {
+		slash = strchr(segment, '/');
+		if (!slash) {
+			/* last segment */
+			/* look for existing entry */
+			for (e = d->children; e; e = e->next)
+				if (strcmp(segment, e->segment) == 0 &&
+						lwc_string_isequal(scheme,
+						e->scheme, &match) ==
+						lwc_error_ok &&
+						match == true &&
+						e->port == port)
+					break;
 
-	for (a = node->cookies; a; a = b) {
-		b = a->next;
-		urldb_destroy_cookie(a);
-	}
-}
+			d = e ? urldb_add_path_fragment(e, fragment) :
+					urldb_add_path_node(scheme, port,
+					segment, fragment, d);
+			break;
+		}
 
-/**
- * Destroy a cookie node
- *
- * \param c Cookie to destroy
- */
-void urldb_destroy_cookie(struct cookie_internal_data *c)
-{
-	free(c->name);
-	free(c->value);
-	free(c->comment);
-	free(c->domain);
-	free(c->path);
+		*slash = '\0';
 
-	free(c);
-}
+		/* look for existing entry */
+		for (e = d->children; e; e = e->next)
+			if (strcmp(segment, e->segment) == 0 &&
+					lwc_string_isequal(scheme, e->scheme,
+						&match) == lwc_error_ok &&
+						match == true &&
+					e->port == port)
+				break;
 
-/**
- * Destroy protection space data
- *
- * \param space Protection space to destroy
- */
-void urldb_destroy_prot_space(struct prot_space_data *space)
-{
-	lwc_string_unref(space->scheme);
-	free(space->realm);
-	free(space->auth);
+		d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d);
+		if (!d)
+			break;
 
-	free(space);
-}
+		segment = slash + 1;
+	} while (1);
 
+	free(path_query);
 
-/**
- * Destroy a search tree
- *
- * \param root Root node of tree to destroy
- */
-void urldb_destroy_search_tree(struct search_node *root)
-{
-	/* Destroy children */
-	if (root->left != &empty)
-		urldb_destroy_search_tree(root->left);
-	if (root->right != &empty)
-		urldb_destroy_search_tree(root->right);
+	if (d && !d->url) {
+		/* Insert defragmented URL */
+		if (nsurl_defragment(url, &d->url) != NSERROR_OK)
+			return NULL;
+	}
 
-	/* And destroy ourselves */
-	free(root);
+	return d;
 }
-
diff --git a/content/urldb.h b/content/urldb.h
index c0fece24e..d7ca8b0f8 100644
--- a/content/urldb.h
+++ b/content/urldb.h
@@ -64,62 +64,262 @@ struct cookie_data {
 
 struct bitmap;
 
-/* Destruction */
+/**
+ * Destroy urldb
+ */
 void urldb_destroy(void);
 
+
 /* Persistence support */
+
+/**
+ * Import an URL database from file, replacing any existing database
+ *
+ * \param filename Name of file containing data
+ */
 nserror urldb_load(const char *filename);
-void urldb_save(const char *filename);
+
+/**
+ * Export the current database to file
+ *
+ * \param filename Name of file to export to
+ */
+nserror urldb_save(const char *filename);
+
+/**
+ * Set the cross-session persistence of the entry for an URL
+ *
+ * \param url Absolute URL to persist
+ * \param persist True to persist, false otherwise
+ */
 void urldb_set_url_persistence(nsurl *url, bool persist);
 
+
 /* URL insertion */
+
+/**
+ * Insert an URL into the database
+ *
+ * \param url Absolute URL to insert
+ * \return true on success, false otherwise
+ */
 bool urldb_add_url(nsurl *url);
 
 
 /* URL data modification / lookup */
+
+/**
+ * Set an URL's title string, replacing any existing one
+ *
+ * \param url The URL to look for
+ * \param title The title string to use (copied)
+ */
 void urldb_set_url_title(nsurl *url, const char *title);
+
+/**
+ * Set an URL's content type
+ *
+ * \param url The URL to look for
+ * \param type The type to set
+ */
 void urldb_set_url_content_type(nsurl *url, content_type type);
+
+/**
+ * Update an URL's visit data
+ *
+ * \param url The URL to update
+ */
 void urldb_update_url_visit_data(nsurl *url);
+
+/**
+ * Reset an URL's visit statistics
+ *
+ * \param url The URL to reset
+ */
 void urldb_reset_url_visit_data(nsurl *url);
+
+/**
+ * Find data for an URL.
+ *
+ * \param url Absolute URL to look for
+ * \return Pointer to result struct, or NULL
+ */
 const struct url_data *urldb_get_url_data(nsurl *url);
+
+/**
+ * Extract an URL from the db
+ *
+ * \param url URL to extract
+ * \return Pointer to database's copy of URL or NULL if not found
+ */
 nsurl *urldb_get_url(nsurl *url);
 
+
 /* Authentication modification / lookup */
-void urldb_set_auth_details(nsurl *url, const char *realm,
-		const char *auth);
+
+/**
+ * Set authentication data for an URL
+ *
+ * \param url The URL to consider
+ * \param realm The authentication realm
+ * \param auth The authentication details (in form username:password)
+ */
+void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth);
+
+/**
+ * Look up authentication details in database
+ *
+ * \param url Absolute URL to search for
+ * \param realm When non-NULL, it is realm which can be used to determine
+ *        the protection space when that's not been done before for given URL.
+ * \return Pointer to authentication details, or NULL if not found
+ */
 const char *urldb_get_auth_details(nsurl *url, const char *realm);
 
+
 /* SSL certificate permissions */
+
+/**
+ * Set certificate verification permissions
+ *
+ * \param url URL to consider
+ * \param permit Set to true to allow invalid certificates
+ */
 void urldb_set_cert_permissions(nsurl *url, bool permit);
+
+/**
+ * Retrieve certificate verification permissions from database
+ *
+ * \param url Absolute URL to search for
+ * \return true to permit connections to hosts with invalid certificates,
+ * false otherwise.
+ */
 bool urldb_get_cert_permissions(nsurl *url);
 
+
 /* Thumbnail handling */
+
+/**
+ * Set thumbnail for url, replacing any existing thumbnail
+ *
+ * \param url Absolute URL to consider
+ * \param bitmap Opaque pointer to thumbnail data, or NULL to invalidate
+ */
 void urldb_set_thumbnail(nsurl *url, struct bitmap *bitmap);
+
+/**
+ * Retrieve thumbnail data for given URL
+ *
+ * \param url Absolute URL to search for
+ * \return Pointer to thumbnail data, or NULL if not found.
+ */
 struct bitmap *urldb_get_thumbnail(nsurl *url);
 
+
 /* URL completion */
+
+/**
+ * Iterate over entries in the database which match the given prefix
+ *
+ * \param prefix Prefix to match
+ * \param callback Callback function
+ */
 void urldb_iterate_partial(const char *prefix,
-		bool (*callback)(nsurl *url,
-		const struct url_data *data));
+		bool (*callback)(nsurl *url, const struct url_data *data));
+
 
 /* Iteration */
+
+/**
+ * Iterate over all entries in database
+ *
+ * \param callback Function to callback for each entry
+ */
 void urldb_iterate_entries(bool (*callback)(nsurl *url,
 		const struct url_data *data));
+
+/**
+ * Iterate over all cookies in database
+ *
+ * \param callback Function to callback for each entry
+ */
 void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *cookie));
 
-/* Debug */
-void urldb_dump(void);
 
 /* Cookies */
+
+/**
+ * Parse Set-Cookie header and insert cookie(s) into database
+ *
+ * \param header Header to parse, with Set-Cookie: stripped
+ * \param url URL being fetched
+ * \param referer Referring resource, or 0 for verifiable transaction
+ * \return true on success, false otherwise
+ */
 bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer);
+
+/**
+ * Retrieve cookies for an URL
+ *
+ * \param url URL being fetched
+ * \param include_http_only Whether to include HTTP(S) only cookies.
+ * \return Cookies string for libcurl (on heap), or NULL on error/no cookies
+ */
 char *urldb_get_cookie(nsurl *url, bool include_http_only);
+
+/**
+ * Delete a cookie
+ *
+ * \param domain The cookie's domain
+ * \param path The cookie's path
+ * \param name The cookie's name
+ */
 void urldb_delete_cookie(const char *domain, const char *path, const char *name);
+
+/**
+ * Load a cookie file into the database
+ *
+ * \param filename File to load
+ */
 void urldb_load_cookies(const char *filename);
+
+/**
+ * Save persistent cookies to file
+ *
+ * \param filename Path to save to
+ */
 void urldb_save_cookies(const char *filename);
 
 
+/* Debug */
+
+/**
+ * Dump URL database to stderr
+ */
+void urldb_dump(void);
+
+
 /* test harness only */
+
+/**
+ * Add a host to the database, creating any intermediate entries
+ *
+ * \param host Hostname to add
+ * \return Pointer to leaf node, or NULL on memory exhaustion
+ */
 struct host_part *urldb_add_host(const char *host);
+
+/**
+ * Add a path to the database, creating any intermediate entries
+ *
+ * \param scheme URL scheme associated with path
+ * \param port Port number on host associated with path
+ * \param host Host tree node to attach to
+ * \param path_query Absolute path plus query to add (freed)
+ * \param fragment URL fragment, or NULL
+ * \param url URL (fragment ignored)
+ * \return Pointer to leaf node, or NULL on memory exhaustion
+ */
 struct path_data *urldb_add_path(lwc_string *scheme, unsigned int port,
 		const struct host_part *host, char *path_query,
 		lwc_string *fragment, nsurl *url);
-- 
cgit v1.2.3