summaryrefslogtreecommitdiff
path: root/content/urldb.c
diff options
context:
space:
mode:
Diffstat (limited to 'content/urldb.c')
-rw-r--r--content/urldb.c4800
1 files changed, 2322 insertions, 2478 deletions
diff --git a/content/urldb.c b/content/urldb.c
index bf873c62e..8af6ae150 100644
--- a/content/urldb.c
+++ b/content/urldb.c
@@ -17,8 +17,9 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
-/** \file
- * Unified URL information database (implementation)
+/**
+ * \file
+ * Unified URL information database implementation
*
* URLs are stored in a tree-based structure as follows:
*
@@ -81,8 +82,8 @@
* simpler implementation. Entries in this tree comprise pointers to the
* leaf nodes of the host tree described above.
*
- * REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of
- * non-normalised URLs with urldb will result in undefined behaviour and
+ * REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of
+ * non-normalised URLs with urldb will result in undefined behaviour and
* potential crashes.
*/
@@ -217,94 +218,6 @@ struct search_node {
struct search_node *right; /**< Right subtree */
};
-/* Destruction */
-static void urldb_destroy_host_tree(struct host_part *root);
-static void urldb_destroy_path_tree(struct path_data *root);
-static void urldb_destroy_path_node_content(struct path_data *node);
-static void urldb_destroy_cookie(struct cookie_internal_data *c);
-static void urldb_destroy_prot_space(struct prot_space_data *space);
-static void urldb_destroy_search_tree(struct search_node *root);
-
-/* Saving */
-static void urldb_save_search_tree(struct search_node *root, FILE *fp);
-static void urldb_count_urls(const struct path_data *root, time_t expiry,
- unsigned int *count);
-static void urldb_write_paths(const struct path_data *parent,
- const char *host, FILE *fp, char **path, int *path_alloc,
- int *path_used, time_t expiry);
-
-/* Iteration */
-static bool urldb_iterate_partial_host(struct search_node *root,
- const char *prefix, bool (*callback)(nsurl *url,
- const struct url_data *data));
-static bool urldb_iterate_partial_path(const struct path_data *parent,
- const char *prefix, bool (*callback)(nsurl *url,
- const struct url_data *data));
-static bool urldb_iterate_entries_host(struct search_node *parent,
- bool (*url_callback)(nsurl *url,
- const struct url_data *data),
- bool (*cookie_callback)(const struct cookie_data *data));
-static bool urldb_iterate_entries_path(const struct path_data *parent,
- bool (*url_callback)(nsurl *url,
- const struct url_data *data),
- bool (*cookie_callback)(const struct cookie_data *data));
-
-/* Insertion */
-static struct host_part *urldb_add_host_node(const char *part,
- struct host_part *parent);
-static struct path_data *urldb_add_path_node(lwc_string *scheme,
- unsigned int port, const char *segment, lwc_string *fragment,
- struct path_data *parent);
-static int urldb_add_path_fragment_cmp(const void *a, const void *b);
-static struct path_data *urldb_add_path_fragment(struct path_data *segment,
- lwc_string *fragment);
-
-/* Lookup */
-static struct path_data *urldb_find_url(nsurl *url);
-static struct path_data *urldb_match_path(const struct path_data *parent,
- const char *path, lwc_string *scheme, unsigned short port);
-static struct search_node **urldb_get_search_tree_direct(const char *host);
-static struct search_node *urldb_get_search_tree(const char *host);
-
-/* Dump */
-static void urldb_dump_hosts(struct host_part *parent);
-static void urldb_dump_paths(struct path_data *parent);
-static void urldb_dump_search(struct search_node *parent, int depth);
-
-/* Search tree */
-static struct search_node *urldb_search_insert(struct search_node *root,
- const struct host_part *data);
-static struct search_node *urldb_search_insert_internal(
- struct search_node *root, struct search_node *n);
-/* for urldb_search_remove, see r5531 which removed it */
-static const struct host_part *urldb_search_find(struct search_node *root,
- const char *host);
-static struct search_node *urldb_search_skew(struct search_node *root);
-static struct search_node *urldb_search_split(struct search_node *root);
-static int urldb_search_match_host(const struct host_part *a,
- const struct host_part *b);
-static int urldb_search_match_string(const struct host_part *a,
- const char *b);
-static int urldb_search_match_prefix(const struct host_part *a,
- const char *b);
-
-/* Cookies */
-static struct cookie_internal_data *urldb_parse_cookie(nsurl *url,
- const char **cookie);
-static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n,
- char *v, bool was_quoted);
-static bool urldb_insert_cookie(struct cookie_internal_data *c,
- lwc_string *scheme, nsurl *url);
-static void urldb_free_cookie(struct cookie_internal_data *c);
-static bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
- int *used, int *alloc, char **buf);
-static void urldb_delete_cookie_hosts(const char *domain, const char *path,
- const char *name, struct host_part *parent);
-static void urldb_delete_cookie_paths(const char *domain, const char *path,
- const char *name, struct path_data *parent);
-static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent);
-static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent);
-
/** Root database handle */
static struct host_part db_root;
@@ -325,6 +238,8 @@ static struct search_node *search_trees[NUM_SEARCH_TREES] = {
#define COOKIE_FILE_VERSION 102
static int loaded_cookie_file_version;
#define MIN_URL_FILE_VERSION 106
+
+/** URL database file version */
#define URL_FILE_VERSION 106
/* Bloom filter used for short-circuting the false case of "is this
@@ -338,323 +253,7 @@ static int loaded_cookie_file_version;
static struct bloom_filter *url_bloom;
#define BLOOM_SIZE (1024 * 32)
-/**
- * Import an URL database from file, replacing any existing database
- *
- * \param filename Name of file containing data
- */
-nserror urldb_load(const char *filename)
-{
-#define MAXIMUM_URL_LENGTH 4096
- char s[MAXIMUM_URL_LENGTH];
- char host[256];
- struct host_part *h;
- int urls;
- int i;
- int version;
- int length;
- FILE *fp;
-
- assert(filename);
-
- LOG(("Loading URL file %s", filename));
- if (url_bloom == NULL)
- url_bloom = bloom_create(BLOOM_SIZE);
-
- fp = fopen(filename, "r");
- if (!fp) {
- LOG(("Failed to open file '%s' for reading", filename));
- return NSERROR_NOT_FOUND;
- }
-
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
- fclose(fp);
- return NSERROR_NEED_DATA;
- }
-
- version = atoi(s);
- if (version < MIN_URL_FILE_VERSION) {
- LOG(("Unsupported URL file version."));
- fclose(fp);
- return NSERROR_INVALID;
- }
- if (version > URL_FILE_VERSION) {
- LOG(("Unknown URL file version."));
- fclose(fp);
- return NSERROR_INVALID;
- }
-
- while (fgets(host, sizeof host, fp)) {
- /* get the hostname */
- length = strlen(host) - 1;
- host[length] = '\0';
-
- /* skip data that has ended up with a host of '' */
- if (length == 0) {
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- urls = atoi(s);
- /* Eight fields/url */
- for (i = 0; i < (8 * urls); i++) {
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- }
- continue;
- }
-
- /* read number of URLs */
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- urls = atoi(s);
-
- /* no URLs => try next host */
- if (urls == 0) {
- LOG(("No URLs for '%s'", host));
- continue;
- }
-
- h = urldb_add_host(host);
- if (!h) {
- LOG(("Failed adding host: '%s'", host));
- fclose(fp);
- return NSERROR_NOMEM;
- }
-
- /* load the non-corrupt data */
- for (i = 0; i < urls; i++) {
- struct path_data *p = NULL;
- char scheme[64], ports[10];
- char url[64 + 3 + 256 + 6 + 4096 + 1];
- unsigned int port;
- bool is_file = false;
- nsurl *nsurl;
- lwc_string *scheme_lwc, *fragment_lwc;
- char *path_query;
- size_t len;
-
- if (!fgets(scheme, sizeof scheme, fp))
- break;
- length = strlen(scheme) - 1;
- scheme[length] = '\0';
-
- if (!fgets(ports, sizeof ports, fp))
- break;
- length = strlen(ports) - 1;
- ports[length] = '\0';
- port = atoi(ports);
-
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- length = strlen(s) - 1;
- s[length] = '\0';
-
- if (!strcasecmp(host, "localhost") &&
- !strcasecmp(scheme, "file"))
- is_file = true;
-
- snprintf(url, sizeof url, "%s://%s%s%s%s",
- scheme,
- /* file URLs have no host */
- (is_file ? "" : host),
- (port ? ":" : ""),
- (port ? ports : ""),
- s);
-
- /* TODO: store URLs in pre-parsed state, and make
- * a nsurl_load to generate the nsurl more
- * swiftly.
- * Need a nsurl_save too.
- */
- if (nsurl_create(url, &nsurl) != NSERROR_OK) {
- LOG(("Failed inserting '%s'", url));
- fclose(fp);
- return NSERROR_NOMEM;
- }
-
- if (url_bloom != NULL) {
- uint32_t hash = nsurl_hash(nsurl);
- bloom_insert_hash(url_bloom, hash);
- }
-
- /* Copy and merge path/query strings */
- if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY,
- &path_query, &len) != NSERROR_OK) {
- LOG(("Failed inserting '%s'", url));
- fclose(fp);
- return NSERROR_NOMEM;
- }
-
- scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME);
- fragment_lwc = nsurl_get_component(nsurl,
- NSURL_FRAGMENT);
- p = urldb_add_path(scheme_lwc, port, h, path_query,
- fragment_lwc, nsurl);
- if (!p) {
- LOG(("Failed inserting '%s'", url));
- fclose(fp);
- return NSERROR_NOMEM;
- }
- nsurl_unref(nsurl);
- lwc_string_unref(scheme_lwc);
- if (fragment_lwc != NULL)
- lwc_string_unref(fragment_lwc);
-
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- if (p)
- p->urld.visits = (unsigned int)atoi(s);
-
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- if (p)
- p->urld.last_visit = (time_t)atoi(s);
-
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- if (p)
- p->urld.type = (content_type)atoi(s);
-
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
-
-
- if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
- break;
- length = strlen(s) - 1;
- if (p && length > 0) {
- s[length] = '\0';
- p->urld.title = malloc(length + 1);
- if (p->urld.title)
- memcpy(p->urld.title, s, length + 1);
- }
- }
- }
-
- fclose(fp);
- LOG(("Successfully loaded URL file"));
-#undef MAXIMUM_URL_LENGTH
-
- return NSERROR_OK;
-}
-
-/**
- * Export the current database to file
- *
- * \param filename Name of file to export to
- */
-void urldb_save(const char *filename)
-{
- FILE *fp;
- int i;
-
- assert(filename);
-
- fp = fopen(filename, "w");
- if (!fp) {
- LOG(("Failed to open file '%s' for writing", filename));
- return;
- }
-
- /* file format version number */
- fprintf(fp, "%d\n", URL_FILE_VERSION);
-
- for (i = 0; i != NUM_SEARCH_TREES; i++) {
- urldb_save_search_tree(search_trees[i], fp);
- }
-
- fclose(fp);
-}
-
-/**
- * Save a search (sub)tree
- *
- * \param root Root of (sub)tree to save
- * \param fp File to write to
- */
-void urldb_save_search_tree(struct search_node *parent, FILE *fp)
-{
- char host[256];
- const struct host_part *h;
- unsigned int path_count = 0;
- char *path, *p, *end;
- int path_alloc = 64, path_used = 1;
- time_t expiry;
-
- expiry = time(NULL) - ((60 * 60 * 24) * nsoption_int(expire_url));
-
- if (parent == &empty)
- return;
-
- urldb_save_search_tree(parent->left, fp);
-
- path = malloc(path_alloc);
- if (!path)
- return;
-
- path[0] = '\0';
-
- for (h = parent->data, p = host, end = host + sizeof host;
- h && h != &db_root && p < end; h = h->parent) {
- int written = snprintf(p, end - p, "%s%s", h->part,
- (h->parent && h->parent->parent) ? "." : "");
- if (written < 0) {
- free(path);
- return;
- }
- p += written;
- }
-
- urldb_count_urls(&parent->data->paths, expiry, &path_count);
-
- if (path_count > 0) {
- fprintf(fp, "%s\n%i\n", host, path_count);
-
- urldb_write_paths(&parent->data->paths, host, fp,
- &path, &path_alloc, &path_used, expiry);
- }
-
- free(path);
-
- urldb_save_search_tree(parent->right, fp);
-}
-
-/**
- * Count number of URLs associated with a host
- *
- * \param root Root of path data tree
- * \param expiry Expiry time for URLs
- * \param count Pointer to count
- */
-void urldb_count_urls(const struct path_data *root, time_t expiry,
- unsigned int *count)
-{
- const struct path_data *p = root;
-
- do {
- if (p->children != NULL) {
- /* Drill down into children */
- p = p->children;
- } else {
- /* No more children, increment count if required */
- if (p->persistent || ((p->urld.last_visit > expiry) &&
- (p->urld.visits > 0)))
- (*count)++;
-
- /* Now, find next node to process. */
- while (p != root) {
- if (p->next != NULL) {
- /* Have a sibling, process that */
- p = p->next;
- break;
- }
-
- /* Ascend tree */
- p = p->parent;
- }
- }
- } while (p != root);
-}
/**
* Write paths associated with a host
@@ -667,7 +266,7 @@ void urldb_count_urls(const struct path_data *root, time_t expiry,
* \param path_used Used size of path
* \param expiry Expiry time of URLs
*/
-void urldb_write_paths(const struct path_data *parent, const char *host,
+static void urldb_write_paths(const struct path_data *parent, const char *host,
FILE *fp, char **path, int *path_alloc, int *path_used,
time_t expiry)
{
@@ -706,7 +305,7 @@ void urldb_write_paths(const struct path_data *parent, const char *host,
} else {
/* leaf node */
if (p->persistent ||((p->urld.last_visit > expiry) &&
- (p->urld.visits > 0))) {
+ (p->urld.visits > 0))) {
fprintf(fp, "%s\n", lwc_string_data(p->scheme));
if (p->port)
@@ -719,8 +318,8 @@ void urldb_write_paths(const struct path_data *parent, const char *host,
/** \todo handle fragments? */
fprintf(fp, "%i\n%i\n%i\n", p->urld.visits,
- (int)p->urld.last_visit,
- (int)p->urld.type);
+ (int)p->urld.last_visit,
+ (int)p->urld.type);
fprintf(fp, "\n");
@@ -730,8 +329,8 @@ void urldb_write_paths(const struct path_data *parent, const char *host,
for (i = 0; s[i] != '\0'; i++)
if (s[i] < 32)
s[i] = ' ';
- for (--i; ((i > 0) && (s[i] == ' '));
- i--)
+ for (--i; ((i > 0) && (s[i] == ' '));
+ i--)
s[i] = '\0';
fprintf(fp, "%s\n", p->urld.title);
} else
@@ -740,8 +339,8 @@ void urldb_write_paths(const struct path_data *parent, const char *host,
/* Now, find next node to process. */
while (p != parent) {
- int seglen = p->segment != NULL
- ? strlen(p->segment) : 0;
+ int seglen = p->segment != NULL
+ ? strlen(p->segment) : 0;
/* Remove our segment from the path */
*path_used -= seglen;
@@ -764,522 +363,319 @@ void urldb_write_paths(const struct path_data *parent, const char *host,
} while (p != parent);
}
-/**
- * Set the cross-session persistence of the entry for an URL
- *
- * \param url Absolute URL to persist
- * \param persist True to persist, false otherwise
- */
-void urldb_set_url_persistence(nsurl *url, bool persist)
-{
- struct path_data *p;
-
- assert(url);
-
- p = urldb_find_url(url);
- if (!p)
- return;
-
- p->persistent = persist;
-}
-
-/**
- * Insert an URL into the database
- *
- * \param url Absolute URL to insert
- * \return true on success, false otherwise
- */
-bool urldb_add_url(nsurl *url)
-{
- struct host_part *h;
- struct path_data *p;
- lwc_string *scheme;
- lwc_string *port;
- lwc_string *host;
- lwc_string *fragment;
- const char *host_str;
- char *path_query = NULL;
- size_t len;
- bool match;
- unsigned int port_int;
-
- assert(url);
-
- if (url_bloom == NULL)
- url_bloom = bloom_create(BLOOM_SIZE);
-
- if (url_bloom != NULL) {
- uint32_t hash = nsurl_hash(url);
- bloom_insert_hash(url_bloom, hash);
- }
-
- /* Copy and merge path/query strings */
- if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
- NSERROR_OK) {
- return false;
- }
- assert(path_query != NULL);
-
- scheme = nsurl_get_component(url, NSURL_SCHEME);
- if (scheme == NULL) {
- free(path_query);
- return false;
- }
-
- host = nsurl_get_component(url, NSURL_HOST);
- if (host != NULL) {
- host_str = lwc_string_data(host);
- lwc_string_unref(host);
-
- } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
- lwc_error_ok && match == true) {
- host_str = "localhost";
-
- } else {
- lwc_string_unref(scheme);
- free(path_query);
- return false;
- }
-
- fragment = nsurl_get_component(url, NSURL_FRAGMENT);
-
- port = nsurl_get_component(url, NSURL_PORT);
- if (port != NULL) {
- port_int = atoi(lwc_string_data(port));
- lwc_string_unref(port);
- } else {
- port_int = 0;
- }
-
- /* Get host entry */
- h = urldb_add_host(host_str);
-
- /* Get path entry */
- p = (h != NULL) ? urldb_add_path(scheme, port_int, h, path_query,
- fragment, url) : NULL;
-
- lwc_string_unref(scheme);
- if (fragment != NULL)
- lwc_string_unref(fragment);
-
- return (p != NULL);
-}
/**
- * Set an URL's title string, replacing any existing one
+ * Count number of URLs associated with a host
*
- * \param url The URL to look for
- * \param title The title string to use (copied)
+ * \param root Root of path data tree
+ * \param expiry Expiry time for URLs
+ * \param count Pointer to count
*/
-void urldb_set_url_title(nsurl *url, const char *title)
+static void urldb_count_urls(const struct path_data *root, time_t expiry,
+ unsigned int *count)
{
- struct path_data *p;
- char *temp;
-
- assert(url && title);
+ const struct path_data *p = root;
- p = urldb_find_url(url);
- if (!p)
- return;
+ do {
+ if (p->children != NULL) {
+ /* Drill down into children */
+ p = p->children;
+ } else {
+ /* No more children, increment count if required */
+ if (p->persistent || ((p->urld.last_visit > expiry) &&
+ (p->urld.visits > 0))) {
+ (*count)++;
+ }
- temp = strdup(title);
- if (!temp)
- return;
+ /* Now, find next node to process. */
+ while (p != root) {
+ if (p->next != NULL) {
+ /* Have a sibling, process that */
+ p = p->next;
+ break;
+ }
- free(p->urld.title);
- p->urld.title = temp;
+ /* Ascend tree */
+ p = p->parent;
+ }
+ }
+ } while (p != root);
}
-/**
- * Set an URL's content type
- *
- * \param url The URL to look for
- * \param type The type to set
- */
-void urldb_set_url_content_type(nsurl *url, content_type type)
-{
- struct path_data *p;
-
- assert(url);
-
- p = urldb_find_url(url);
- if (!p)
- return;
-
- p->urld.type = type;
-}
/**
- * Update an URL's visit data
+ * Save a search (sub)tree
*
- * \param url The URL to update
+ * \param root Root of (sub)tree to save
+ * \param fp File to write to
*/
-void urldb_update_url_visit_data(nsurl *url)
+static void urldb_save_search_tree(struct search_node *parent, FILE *fp)
{
- struct path_data *p;
+ char host[256];
+ const struct host_part *h;
+ unsigned int path_count = 0;
+ char *path, *p, *end;
+ int path_alloc = 64, path_used = 1;
+ time_t expiry;
- assert(url);
+ expiry = time(NULL) - ((60 * 60 * 24) * nsoption_int(expire_url));
- p = urldb_find_url(url);
- if (!p)
+ if (parent == &empty)
return;
- p->urld.last_visit = time(NULL);
- p->urld.visits++;
-}
-
-/**
- * Reset an URL's visit statistics
- *
- * \param url The URL to reset
- */
-void urldb_reset_url_visit_data(nsurl *url)
-{
- struct path_data *p;
-
- assert(url);
+ urldb_save_search_tree(parent->left, fp);
- p = urldb_find_url(url);
- if (!p)
+ path = malloc(path_alloc);
+ if (!path)
return;
- p->urld.last_visit = (time_t)0;
- p->urld.visits = 0;
-}
+ path[0] = '\0';
+ for (h = parent->data, p = host, end = host + sizeof host;
+ h && h != &db_root && p < end; h = h->parent) {
+ int written = snprintf(p, end - p, "%s%s", h->part,
+ (h->parent && h->parent->parent) ? "." : "");
+ if (written < 0) {
+ free(path);
+ return;
+ }
+ p += written;
+ }
-/**
- * Find data for an URL.
- *
- * \param url Absolute URL to look for
- * \return Pointer to result struct, or NULL
- */
-const struct url_data *urldb_get_url_data(nsurl *url)
-{
- struct path_data *p;
- struct url_internal_data *u;
+ urldb_count_urls(&parent->data->paths, expiry, &path_count);
- assert(url);
+ if (path_count > 0) {
+ fprintf(fp, "%s\n%i\n", host, path_count);
- p = urldb_find_url(url);
- if (!p)
- return NULL;
+ urldb_write_paths(&parent->data->paths, host, fp,
+ &path, &path_alloc, &path_used, expiry);
+ }
- u = &p->urld;
+ free(path);
- return (const struct url_data *) u;
+ urldb_save_search_tree(parent->right, fp);
}
-/**
- * Extract an URL from the db
- *
- * \param url URL to extract
- * \return Pointer to database's copy of URL or NULL if not found
- */
-nsurl *urldb_get_url(nsurl *url)
-{
- struct path_data *p;
-
- assert(url);
-
- p = urldb_find_url(url);
- if (!p)
- return NULL;
-
- return p->url;
-}
/**
- * Look up authentication details in database
+ * Path data iterator (internal)
*
- * \param url Absolute URL to search for
- * \param realm When non-NULL, it is realm which can be used to determine
- * the protection space when that's not been done before for given URL.
- * \return Pointer to authentication details, or NULL if not found
+ * \param parent Root of subtree to iterate over
+ * \param url_callback Callback function
+ * \param cookie_callback Callback function
+ * \return true to continue, false otherwise
*/
-const char *urldb_get_auth_details(nsurl *url, const char *realm)
+static bool urldb_iterate_entries_path(const struct path_data *parent,
+ bool (*url_callback)(nsurl *url, const struct url_data *data),
+ bool (*cookie_callback)(const struct cookie_data *data))
{
- struct path_data *p, *p_cur, *p_top;
+ const struct path_data *p = parent;
+ const struct cookie_data *c;
- assert(url);
+ do {
+ if (p->children != NULL) {
+ /* Drill down into children */
+ p = p->children;
+ } else {
+ /* All leaf nodes in the path tree should have an URL or
+ * cookies attached to them. If this is not the case, it
+ * indicates that there's a bug in the file loader/URL
+ * insertion code. Therefore, assert this here. */
+ assert(url_callback || cookie_callback);
- /* add to the db, so our lookup will work */
- urldb_add_url(url);
+ /** \todo handle fragments? */
+ if (url_callback) {
+ const struct url_internal_data *u = &p->urld;
- p = urldb_find_url(url);
- if (!p)
- return NULL;
+ assert(p->url);
- /* Check for any auth details attached to the path_data node or any of
- * its parents. */
- for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) {
- if (p_cur->prot_space) {
- return p_cur->prot_space->auth;
- }
- }
+ if (!url_callback(p->url,
+ (const struct url_data *) u))
+ return false;
+ } else {
+ c = (const struct cookie_data *)p->cookies;
+ for (; c != NULL; c = c->next) {
+ if (!cookie_callback(c))
+ return false;
+ }
+ }
- /* Only when we have a realm (and canonical root of given URL), we can
- * uniquely locate the protection space. */
- if (realm != NULL) {
- const struct host_part *h = (const struct host_part *)p_top;
- const struct prot_space_data *space;
- bool match;
+ /* Now, find next node to process. */
+ while (p != parent) {
+ if (p->next != NULL) {
+ /* Have a sibling, process that */
+ p = p->next;
+ break;
+ }
- /* Search for a possible matching protection space. */
- for (space = h->prot_space; space != NULL;
- space = space->next) {
- if (!strcmp(space->realm, realm) &&
- lwc_string_isequal(space->scheme,
- p->scheme, &match) ==
- lwc_error_ok &&
- match == true &&
- space->port == p->port) {
- p->prot_space = space;
- return p->prot_space->auth;
+ /* Ascend tree */
+ p = p->parent;
}
}
- }
+ } while (p != parent);
- return NULL;
+ return true;
}
-/**
- * Retrieve certificate verification permissions from database
- *
- * \param url Absolute URL to search for
- * \return true to permit connections to hosts with invalid certificates,
- * false otherwise.
- */
-bool urldb_get_cert_permissions(nsurl *url)
-{
- struct path_data *p;
- const struct host_part *h;
-
- assert(url);
-
- p = urldb_find_url(url);
- if (!p)
- return false;
-
- for (; p && p->parent; p = p->parent)
- /* do nothing */;
- assert(p);
-
- h = (const struct host_part *)p;
-
- return h->permit_invalid_certs;
-}
/**
- * Set authentication data for an URL
+ * Check whether a host string is an IP address.
*
- * \param url The URL to consider
- * \param realm The authentication realm
- * \param auth The authentication details (in form username:password)
+ * This call detects IPv4 addresses (all of dotted-quad or subsets,
+ * decimal or hexadecimal notations) and IPv6 addresses (including
+ * those containing embedded IPv4 addresses.)
+ *
+ * \param host a hostname terminated by '\0'
+ * \return true if the hostname is an IP address, false otherwise
*/
-void urldb_set_auth_details(nsurl *url, const char *realm,
- const char *auth)
+static bool urldb__host_is_ip_address(const char *host)
{
- struct path_data *p, *pi;
- struct host_part *h;
- struct prot_space_data *space, *space_alloc;
- char *realm_alloc, *auth_alloc;
- bool match;
-
- assert(url && realm && auth);
-
- /* add url, in case it's missing */
- urldb_add_url(url);
-
- p = urldb_find_url(url);
-
- if (!p)
- return;
-
- /* Search for host_part */
- for (pi = p; pi->parent != NULL; pi = pi->parent)
- ;
- h = (struct host_part *)pi;
-
- /* Search if given URL belongs to a protection space we already know of. */
- for (space = h->prot_space; space; space = space->next) {
- if (!strcmp(space->realm, realm) &&
- lwc_string_isequal(space->scheme, p->scheme,
- &match) == lwc_error_ok &&
- match == true &&
- space->port == p->port)
- break;
- }
+ struct in_addr ipv4;
+ size_t host_len = strlen(host);
+ const char *sane_host;
+ const char *slash;
+#ifndef NO_IPV6
+ struct in6_addr ipv6;
+ char ipv6_addr[64];
+#endif
+ /** @todo FIXME Some parts of urldb.c make confusions between hosts
+ * and "prefixes", we can sometimes be erroneously passed more than
+ * just a host. Sometimes we may be passed trailing slashes, or even
+ * whole path segments. A specific criminal in this class is
+ * urldb_iterate_partial, which takes a prefix to search for, but
+ * passes that prefix to functions that expect only hosts.
+ *
+ * For the time being, we will accept such calls; we check if there
+ * is a / in the host parameter, and if there is, we take a copy and
+ * replace the / with a \0. This is not a permanent solution; we
+ * should search through NetSurf and find all the callers that are
+ * in error and fix them. When doing this task, it might be wise
+ * to replace the hideousness below with code that doesn't have to do
+ * this, and add assert(strchr(host, '/') == NULL); somewhere.
+ * -- rjek - 2010-11-04
+ */
- if (space != NULL) {
- /* Overrule existing auth. */
- free(space->auth);
- space->auth = strdup(auth);
+ slash = strchr(host, '/');
+ if (slash == NULL) {
+ sane_host = host;
} else {
- /* Create a new protection space. */
- space = space_alloc = malloc(sizeof(struct prot_space_data));
- realm_alloc = strdup(realm);
- auth_alloc = strdup(auth);
-
- if (!space_alloc || !realm_alloc || !auth_alloc) {
- free(space_alloc);
- free(realm_alloc);
- free(auth_alloc);
- return;
- }
-
- space->scheme = lwc_string_ref(p->scheme);
- space->port = p->port;
- space->realm = realm_alloc;
- space->auth = auth_alloc;
- space->next = h->prot_space;
- h->prot_space = space;
+ char *c = strdup(host);
+ c[slash - host] = '\0';
+ sane_host = c;
+ host_len = slash - host - 1;
+ LOG(("WARNING: called with non-host '%s'", host));
}
- p->prot_space = space;
-}
-
-/**
- * Set certificate verification permissions
- *
- * \param url URL to consider
- * \param permit Set to true to allow invalid certificates
- */
-void urldb_set_cert_permissions(nsurl *url, bool permit)
-{
- struct path_data *p;
- struct host_part *h;
-
- assert(url);
-
- /* add url, in case it's missing */
- urldb_add_url(url);
-
- p = urldb_find_url(url);
- if (!p)
- return;
+ if (strspn(sane_host, "0123456789abcdefABCDEF[].:") < host_len)
+ goto out_false;
- for (; p && p->parent; p = p->parent)
- /* do nothing */;
- assert(p);
+ if (inet_aton(sane_host, &ipv4) != 0) {
+ /* This can only be a sane IPv4 address if it contains 3 dots.
+ * Helpfully, inet_aton is happy to treat "a", "a.b", "a.b.c",
+ * and "a.b.c.d" as valid IPv4 address strings where we only
+ * support the full, dotted-quad, form.
+ */
+ int num_dots = 0;
+ size_t index;
- h = (struct host_part *)p;
+ for (index = 0; index < host_len; index++) {
+ if (sane_host[index] == '.')
+ num_dots++;
+ }
- h->permit_invalid_certs = permit;
-}
+ if (num_dots == 3)
+ goto out_true;
+ else
+ goto out_false;
+ }
-/**
- * Set thumbnail for url, replacing any existing thumbnail
- *
- * \param url Absolute URL to consider
- * \param bitmap Opaque pointer to thumbnail data, or NULL to invalidate
- */
-void urldb_set_thumbnail(nsurl *url, struct bitmap *bitmap)
-{
- struct path_data *p;
+#ifndef NO_IPV6
+ if (sane_host[0] != '[' || sane_host[host_len] != ']')
+ goto out_false;
- assert(url);
+ strncpy(ipv6_addr, sane_host + 1, sizeof(ipv6_addr));
+ ipv6_addr[sizeof(ipv6_addr) - 1] = '\0';
- p = urldb_find_url(url);
- if (!p)
- return;
+ if (inet_pton(AF_INET6, ipv6_addr, &ipv6) == 1)
+ goto out_true;
+#endif
- if (p->thumb && p->thumb != bitmap)
- bitmap_destroy(p->thumb);
+out_false:
+ if (slash != NULL) free((void *)sane_host);
+ return false;
- p->thumb = bitmap;
+out_true:
+ if (slash != NULL) free((void *)sane_host);
+ return true;
}
-/**
- * Retrieve thumbnail data for given URL
- *
- * \param url Absolute URL to search for
- * \return Pointer to thumbnail data, or NULL if not found.
- */
-struct bitmap *urldb_get_thumbnail(nsurl *url)
-{
- struct path_data *p;
-
- assert(url);
-
- p = urldb_find_url(url);
- if (!p)
- return NULL;
-
- return p->thumb;
-}
/**
- * Iterate over entries in the database which match the given prefix
+ * Compare host_part with prefix
*
- * \param prefix Prefix to match
- * \param callback Callback function
+ * \param a
+ * \param b
+ * \return 0 if match, non-zero, otherwise
*/
-void urldb_iterate_partial(const char *prefix,
- bool (*callback)(nsurl *url,
- const struct url_data *data))
+static int urldb_search_match_prefix(const struct host_part *a, const char *b)
{
- char host[256];
- char buf[260]; /* max domain + "www." */
- const char *slash, *scheme_sep;
- struct search_node *tree;
- const struct host_part *h;
-
- assert(prefix && callback);
-
- /* strip scheme */
- scheme_sep = strstr(prefix, "://");
- if (scheme_sep)
- prefix = scheme_sep + 3;
+ const char *end, *dot;
+ int plen, ret;
- slash = strchr(prefix, '/');
- tree = urldb_get_search_tree(prefix);
+ assert(a && a != &db_root && b);
- if (slash) {
- /* if there's a slash in the input, then we can
- * assume that we're looking for a path */
- snprintf(host, sizeof host, "%.*s",
- (int) (slash - prefix), prefix);
+ if (urldb__host_is_ip_address(b)) {
+ /* IP address */
+ return strncasecmp(a->part, b, strlen(b));
+ }
- h = urldb_search_find(tree, host);
- if (!h) {
- int len = slash - prefix;
+ end = b + strlen(b) + 1;
- if (len <= 3 || strncasecmp(host, "www.", 4) != 0) {
- snprintf(buf, sizeof buf, "www.%s", host);
- h = urldb_search_find(
- search_trees[ST_DN + 'w' - 'a'],
- buf);
- if (!h)
- return;
- } else
- return;
+ while (b < end && a && a != &db_root) {
+ dot = strchr(b, '.');
+ if (!dot) {
+ /* last segment */
+ dot = end - 1;
}
- if (h->paths.children) {
- /* Have paths, iterate them */
- urldb_iterate_partial_path(&h->paths, slash + 1,
- callback);
+ /* Compare strings (length limited) */
+ if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
+ /* didn't match => return difference */
+ return ret;
+
+ /* The strings matched */
+ if (dot < end - 1) {
+ /* Consider segment lengths only in the case
+ * where the prefix contains segments */
+ plen = strlen(a->part);
+ if (plen > dot - b)
+ /* len(a) > len(b) */
+ return 1;
+ else if (plen < dot - b)
+ /* len(a) < len(b) */
+ return -1;
}
- } else {
- int len = strlen(prefix);
+ b = dot + 1;
+ a = a->parent;
+ }
- /* looking for hosts */
- if (!urldb_iterate_partial_host(tree, prefix, callback))
- return;
+ /* If we get here then either:
+ * a) The path lengths differ
+ * or b) The hosts are identical
+ */
+ if (a && a != &db_root && b >= end)
+ /* len(a) > len(b) => prefix matches */
+ return 0;
+ else if ((!a || a == &db_root) && b < end)
+ /* len(a) < len(b) => prefix does not match */
+ return -1;
- if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) {
- /* now look for www.prefix */
- snprintf(buf, sizeof buf, "www.%s", prefix);
- if(!urldb_iterate_partial_host(
- search_trees[ST_DN + 'w' - 'a'],
- buf, callback))
- return;
- }
- }
+ /* Identical */
+ return 0;
}
+
/**
* Partial host iterator (internal)
*
@@ -1288,7 +684,9 @@ void urldb_iterate_partial(const char *prefix,
* \param callback Callback function
* \return true to continue, false otherwise
*/
-bool urldb_iterate_partial_host(struct search_node *root, const char *prefix,
+static bool
+urldb_iterate_partial_host(struct search_node *root,
+ const char *prefix,
bool (*callback)(nsurl *url, const struct url_data *data))
{
int c;
@@ -1330,6 +728,7 @@ bool urldb_iterate_partial_host(struct search_node *root, const char *prefix,
return true;
}
+
/**
* Partial path iterator (internal)
*
@@ -1338,14 +737,14 @@ bool urldb_iterate_partial_host(struct search_node *root, const char *prefix,
* \param callback Callback function
* \return true to continue, false otherwise
*/
-bool urldb_iterate_partial_path(const struct path_data *parent,
+static bool urldb_iterate_partial_path(const struct path_data *parent,
const char *prefix, bool (*callback)(nsurl *url,
const struct url_data *data))
{
const struct path_data *p = parent->children;
const char *slash, *end = prefix + strlen(prefix);
- /*
+ /*
* Given: http://www.example.org/a/b/c/d//e
* and assuming a path tree:
* .
@@ -1387,13 +786,13 @@ bool urldb_iterate_partial_path(const struct path_data *parent,
prefix++;
continue;
}
-
+
if (strncasecmp(p->segment, prefix, slash - prefix) == 0) {
/* prefix matches so far */
if (slash == end) {
/* we've run out of prefix, so all
* paths below this one match */
- if (!urldb_iterate_entries_path(p, callback,
+ if (!urldb_iterate_entries_path(p, callback,
NULL))
return false;
@@ -1414,42 +813,6 @@ bool urldb_iterate_partial_path(const struct path_data *parent,
return true;
}
-/**
- * Iterate over all entries in database
- *
- * \param callback Function to callback for each entry
- */
-void urldb_iterate_entries(bool (*callback)(nsurl *url,
- const struct url_data *data))
-{
- int i;
-
- assert(callback);
-
- for (i = 0; i < NUM_SEARCH_TREES; i++) {
- if (!urldb_iterate_entries_host(search_trees[i],
- callback, NULL))
- break;
- }
-}
-
-/**
- * Iterate over all cookies in database
- *
- * \param callback Function to callback for each entry
- */
-void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data))
-{
- int i;
-
- assert(callback);
-
- for (i = 0; i < NUM_SEARCH_TREES; i++) {
- if (!urldb_iterate_entries_host(search_trees[i],
- NULL, callback))
- break;
- }
-}
/**
* Host data iterator (internal)
@@ -1459,7 +822,7 @@ void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data))
* \param cookie_callback Callback function
* \return true to continue, false otherwise
*/
-bool urldb_iterate_entries_host(struct search_node *parent,
+static bool urldb_iterate_entries_host(struct search_node *parent,
bool (*url_callback)(nsurl *url,
const struct url_data *data),
bool (*cookie_callback)(const struct cookie_data *data))
@@ -1487,65 +850,6 @@ bool urldb_iterate_entries_host(struct search_node *parent,
return true;
}
-/**
- * Path data iterator (internal)
- *
- * \param parent Root of subtree to iterate over
- * \param url_callback Callback function
- * \param cookie_callback Callback function
- * \return true to continue, false otherwise
- */
-bool urldb_iterate_entries_path(const struct path_data *parent,
- bool (*url_callback)(nsurl *url,
- const struct url_data *data),
- bool (*cookie_callback)(const struct cookie_data *data))
-{
- const struct path_data *p = parent;
- const struct cookie_data *c;
-
- do {
- if (p->children != NULL) {
- /* Drill down into children */
- p = p->children;
- } else {
- /* All leaf nodes in the path tree should have an URL or
- * cookies attached to them. If this is not the case, it
- * indicates that there's a bug in the file loader/URL
- * insertion code. Therefore, assert this here. */
- assert(url_callback || cookie_callback);
-
- /** \todo handle fragments? */
- if (url_callback) {
- const struct url_internal_data *u = &p->urld;
-
- assert(p->url);
-
- if (!url_callback(p->url,
- (const struct url_data *) u))
- return false;
- } else {
- c = (const struct cookie_data *)p->cookies;
- for (; c != NULL; c = c->next)
- if (!cookie_callback(c))
- return false;
- }
-
- /* Now, find next node to process. */
- while (p != parent) {
- if (p->next != NULL) {
- /* Have a sibling, process that */
- p = p->next;
- break;
- }
-
- /* Ascend tree */
- p = p->parent;
- }
- }
- } while (p != parent);
-
- return true;
-}
/**
* Add a host node to the tree
@@ -1554,7 +858,7 @@ bool urldb_iterate_entries_path(const struct path_data *parent,
* \param parent Parent node to add to
* \return Pointer to added node, or NULL on memory exhaustion
*/
-struct host_part *urldb_add_host_node(const char *part,
+static struct host_part *urldb_add_host_node(const char *part,
struct host_part *parent)
{
struct host_part *d;
@@ -1582,186 +886,58 @@ struct host_part *urldb_add_host_node(const char *part,
/**
- * Check whether a host string is an IP address.
- *
- * This call detects IPv4 addresses (all of dotted-quad or subsets,
- * decimal or hexadecimal notations) and IPv6 addresses (including
- * those containing embedded IPv4 addresses.)
- *
- * \param host a hostname terminated by '\0'
- * \return true if the hostname is an IP address, false otherwise
+ * Fragment comparator callback for qsort
*/
-static bool urldb__host_is_ip_address(const char *host)
+static int urldb_add_path_fragment_cmp(const void *a, const void *b)
{
- struct in_addr ipv4;
- size_t host_len = strlen(host);
- const char *sane_host;
- const char *slash;
-#ifndef NO_IPV6
- struct in6_addr ipv6;
- char ipv6_addr[64];
-#endif
- /** @todo FIXME Some parts of urldb.c make confusions between hosts
- * and "prefixes", we can sometimes be erroneously passed more than
- * just a host. Sometimes we may be passed trailing slashes, or even
- * whole path segments. A specific criminal in this class is
- * urldb_iterate_partial, which takes a prefix to search for, but
- * passes that prefix to functions that expect only hosts.
- *
- * For the time being, we will accept such calls; we check if there
- * is a / in the host parameter, and if there is, we take a copy and
- * replace the / with a \0. This is not a permanent solution; we
- * should search through NetSurf and find all the callers that are
- * in error and fix them. When doing this task, it might be wise
- * to replace the hideousness below with code that doesn't have to do
- * this, and add assert(strchr(host, '/') == NULL); somewhere.
- * -- rjek - 2010-11-04
- */
-
- slash = strchr(host, '/');
- if (slash == NULL) {
- sane_host = host;
- } else {
- char *c = strdup(host);
- c[slash - host] = '\0';
- sane_host = c;
- host_len = slash - host - 1;
- LOG(("WARNING: called with non-host '%s'", host));
- }
-
- if (strspn(sane_host, "0123456789abcdefABCDEF[].:") < host_len)
- goto out_false;
-
- if (inet_aton(sane_host, &ipv4) != 0) {
- /* This can only be a sane IPv4 address if it contains 3 dots.
- * Helpfully, inet_aton is happy to treat "a", "a.b", "a.b.c",
- * and "a.b.c.d" as valid IPv4 address strings where we only
- * support the full, dotted-quad, form.
- */
- int num_dots = 0;
- size_t index;
-
- for (index = 0; index < host_len; index++) {
- if (sane_host[index] == '.')
- num_dots++;
- }
-
- if (num_dots == 3)
- goto out_true;
- else
- goto out_false;
- }
-
-#ifndef NO_IPV6
- if (sane_host[0] != '[' || sane_host[host_len] != ']')
- goto out_false;
-
- strncpy(ipv6_addr, sane_host + 1, sizeof(ipv6_addr));
- ipv6_addr[sizeof(ipv6_addr) - 1] = '\0';
-
- if (inet_pton(AF_INET6, ipv6_addr, &ipv6) == 1)
- goto out_true;
-#endif
-
-out_false:
- if (slash != NULL) free((void *)sane_host);
- return false;
-
-out_true:
- if (slash != NULL) free((void *)sane_host);
- return true;
+ return strcasecmp(*((const char **) a), *((const char **) b));
}
/**
- * Add a host to the database, creating any intermediate entries
+ * Add a fragment to a path segment
*
- * \param host Hostname to add
- * \return Pointer to leaf node, or NULL on memory exhaustion
+ * \param segment Path segment to add to
+ * \param fragment Fragment to add (copied), or NULL
+ * \return segment or NULL on memory exhaustion
*/
-struct host_part *urldb_add_host(const char *host)
+static struct path_data *
+urldb_add_path_fragment(struct path_data *segment, lwc_string *fragment)
{
- struct host_part *d = (struct host_part *) &db_root, *e;
- struct search_node *s;
- char buf[256]; /* 256 bytes is sufficient - domain names are
- * limited to 255 chars. */
- char *part;
-
- assert(host);
-
- if (urldb__host_is_ip_address(host)) {
- /* Host is an IP, so simply add as TLD */
+ char **temp;
- /* Check for existing entry */
- for (e = d->children; e; e = e->next)
- if (strcasecmp(host, e->part) == 0)
- /* found => return it */
- return e;
+ assert(segment);
- d = urldb_add_host_node(host, d);
+ /* If no fragment, this function is a NOP
+ * This may seem strange, but it makes the rest
+ * of the code cleaner */
+ if (!fragment)
+ return segment;
- s = urldb_search_insert(search_trees[ST_IP], d);
- if (!s) {
- /* failed */
- d = NULL;
- } else {
- search_trees[ST_IP] = s;
- }
+ temp = realloc(segment->fragment,
+ (segment->frag_cnt + 1) * sizeof(char *));
+ if (!temp)
+ return NULL;
- return d;
+ segment->fragment = temp;
+ segment->fragment[segment->frag_cnt] =
+ strdup(lwc_string_data(fragment));
+ if (!segment->fragment[segment->frag_cnt]) {
+ /* Don't free temp - it's now our buffer */
+ return NULL;
}
- /* Copy host string, so we can corrupt it */
- strncpy(buf, host, sizeof buf);
- buf[sizeof buf - 1] = '\0';
-
- /* Process FQDN segments backwards */
- do {
- part = strrchr(buf, '.');
- if (!part) {
- /* last segment */
- /* Check for existing entry */
- for (e = d->children; e; e = e->next)
- if (strcasecmp(buf, e->part) == 0)
- break;
-
- if (e) {
- d = e;
- } else {
- d = urldb_add_host_node(buf, d);
- }
-
- /* And insert into search tree */
- if (d) {
- struct search_node **r;
-
- r = urldb_get_search_tree_direct(buf);
- s = urldb_search_insert(*r, d);
- if (!s) {
- /* failed */
- d = NULL;
- } else {
- *r = s;
- }
- }
- break;
- }
-
- /* Check for existing entry */
- for (e = d->children; e; e = e->next)
- if (strcasecmp(part + 1, e->part) == 0)
- break;
-
- d = e ? e : urldb_add_host_node(part + 1, d);
- if (!d)
- break;
+ segment->frag_cnt++;
- *part = '\0';
- } while (1);
+ /* We want fragments in alphabetical order, so sort them
+ * It may prove better to insert in alphabetical order instead */
+ qsort(segment->fragment, segment->frag_cnt, sizeof (char *),
+ urldb_add_path_fragment_cmp);
- return d;
+ return segment;
}
+
/**
* Add a path node to the tree
*
@@ -1772,7 +948,8 @@ struct host_part *urldb_add_host(const char *host)
* \param parent Parent node to add to
* \return Pointer to added node, or NULL on memory exhaustion
*/
-struct path_data *urldb_add_path_node(lwc_string *scheme, unsigned int port,
+static struct path_data *
+urldb_add_path_node(lwc_string *scheme, unsigned int port,
const char *segment, lwc_string *fragment,
struct path_data *parent)
{
@@ -1804,9 +981,10 @@ struct path_data *urldb_add_path_node(lwc_string *scheme, unsigned int port,
}
}
- for (e = parent->children; e; e = e->next)
+ for (e = parent->children; e; e = e->next) {
if (strcmp(e->segment, d->segment) > 0)
break;
+ }
if (e) {
d->prev = e->prev;
@@ -1830,143 +1008,193 @@ struct path_data *urldb_add_path_node(lwc_string *scheme, unsigned int port,
return d;
}
+
/**
- * Add a path to the database, creating any intermediate entries
+ * Get the search tree for a particular host
*
- * \param scheme URL scheme associated with path
- * \param port Port number on host associated with path
- * \param host Host tree node to attach to
- * \param path_query Absolute path plus query to add (freed)
- * \param fragment URL fragment, or NULL
- * \param url URL (fragment ignored)
- * \return Pointer to leaf node, or NULL on memory exhaustion
+ * \param host the host to lookup
+ * \return the corresponding search tree
*/
-struct path_data *urldb_add_path(lwc_string *scheme, unsigned int port,
- const struct host_part *host, char *path_query,
- lwc_string *fragment, nsurl *url)
+static struct search_node **urldb_get_search_tree_direct(const char *host)
{
- struct path_data *d, *e;
- char *buf = path_query;
- char *segment, *slash;
- bool match;
+ assert(host);
- assert(scheme && host && url);
+ if (urldb__host_is_ip_address(host))
+ return &search_trees[ST_IP];
+ else if (isalpha(*host))
+ return &search_trees[ST_DN + tolower(*host) - 'a'];
+ return &search_trees[ST_EE];
+}
- d = (struct path_data *) &host->paths;
- /* skip leading '/' */
- segment = buf;
- if (*segment == '/')
- segment++;
+/**
+ * Get the search tree for a particular host
+ *
+ * \param host the host to lookup
+ * \return the corresponding search tree
+ */
+static struct search_node *urldb_get_search_tree(const char *host)
+{
+ return *urldb_get_search_tree_direct(host);
+}
- /* Process path segments */
- do {
- slash = strchr(segment, '/');
- if (!slash) {
- /* last segment */
- /* look for existing entry */
- for (e = d->children; e; e = e->next)
- if (strcmp(segment, e->segment) == 0 &&
- lwc_string_isequal(scheme,
- e->scheme, &match) ==
- lwc_error_ok &&
- match == true &&
- e->port == port)
- break;
- d = e ? urldb_add_path_fragment(e, fragment) :
- urldb_add_path_node(scheme, port,
- segment, fragment, d);
- break;
- }
+/**
+ * Compare host_part with a string
+ *
+ * \param a
+ * \param b
+ * \return 0 if match, non-zero, otherwise
+ */
+static int urldb_search_match_string(const struct host_part *a, const char *b)
+{
+ const char *end, *dot;
+ int plen, ret;
- *slash = '\0';
+ assert(a && a != &db_root && b);
- /* look for existing entry */
- for (e = d->children; e; e = e->next)
- if (strcmp(segment, e->segment) == 0 &&
- lwc_string_isequal(scheme, e->scheme,
- &match) == lwc_error_ok &&
- match == true &&
- e->port == port)
- break;
+ if (urldb__host_is_ip_address(b)) {
+ /* IP address */
+ return strcasecmp(a->part, b);
+ }
- d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d);
- if (!d)
- break;
+ end = b + strlen(b) + 1;
- segment = slash + 1;
- } while (1);
+ while (b < end && a && a != &db_root) {
+ dot = strchr(b, '.');
+ if (!dot) {
+ /* last segment */
+ dot = end - 1;
+ }
- free(path_query);
+ /* Compare strings (length limited) */
+ if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
+ /* didn't match => return difference */
+ return ret;
- if (d && !d->url) {
- /* Insert defragmented URL */
- if (nsurl_defragment(url, &d->url) != NSERROR_OK)
- return NULL;
+ /* The strings matched, now check that the lengths do, too */
+ plen = strlen(a->part);
+
+ if (plen > dot - b)
+ /* len(a) > len(b) */
+ return 1;
+ else if (plen < dot - b)
+ /* len(a) < len(b) */
+ return -1;
+
+ b = dot + 1;
+ a = a->parent;
}
- return d;
+ /* If we get here then either:
+ * a) The path lengths differ
+ * or b) The hosts are identical
+ */
+ if (a && a != &db_root && b >= end)
+ /* len(a) > len(b) */
+ return 1;
+ else if ((!a || a == &db_root) && b < end)
+ /* len(a) < len(b) */
+ return -1;
+
+ /* Identical */
+ return 0;
}
+
/**
- * Fragment comparator callback for qsort
+ * Find a node in a search tree
+ *
+ * \param root Tree to look in
+ * \param host Host to find
+ * \return Pointer to host tree node, or NULL if not found
*/
-int urldb_add_path_fragment_cmp(const void *a, const void *b)
+static const struct host_part *
+urldb_search_find(struct search_node *root, const char *host)
{
- return strcasecmp(*((const char **) a), *((const char **) b));
+ int c;
+
+ assert(root && host);
+
+ if (root == &empty) {
+ return NULL;
+ }
+
+ c = urldb_search_match_string(root->data, host);
+
+ if (c > 0)
+ return urldb_search_find(root->left, host);
+ else if (c < 0)
+ return urldb_search_find(root->right, host);
+ else
+ return root->data;
}
+
/**
- * Add a fragment to a path segment
+ * Match a path string
*
- * \param segment Path segment to add to
- * \param fragment Fragment to add (copied), or NULL
- * \return segment or NULL on memory exhaustion
+ * \param parent Path (sub)tree to look in
+ * \param path The path to search for
+ * \param scheme The URL scheme associated with the path
+ * \param port The port associated with the path
+ * \return Pointer to path data or NULL if not found.
*/
-struct path_data *urldb_add_path_fragment(struct path_data *segment,
- lwc_string *fragment)
+static struct path_data *urldb_match_path(const struct path_data *parent,
+ const char *path, lwc_string *scheme, unsigned short port)
{
- char **temp;
+ const struct path_data *p;
+ const char *slash;
+ bool match;
- assert(segment);
+ assert(parent != NULL);
+ assert(parent->segment == NULL);
- /* If no fragment, this function is a NOP
- * This may seem strange, but it makes the rest
- * of the code cleaner */
- if (!fragment)
- return segment;
+ if (path[0] != '/') {
+ LOG(("path is %s", path));
+ }
- temp = realloc(segment->fragment,
- (segment->frag_cnt + 1) * sizeof(char *));
- if (!temp)
- return NULL;
+ assert(path[0] == '/');
- segment->fragment = temp;
- segment->fragment[segment->frag_cnt] =
- strdup(lwc_string_data(fragment));
- if (!segment->fragment[segment->frag_cnt]) {
- /* Don't free temp - it's now our buffer */
- return NULL;
- }
+ /* Start with children, as parent has no segment */
+ p = parent->children;
- segment->frag_cnt++;
+ while (p != NULL) {
+ slash = strchr(path + 1, '/');
+ if (!slash)
+ slash = path + strlen(path);
- /* We want fragments in alphabetical order, so sort them
- * It may prove better to insert in alphabetical order instead */
- qsort(segment->fragment, segment->frag_cnt, sizeof (char *),
- urldb_add_path_fragment_cmp);
+ if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
+ lwc_string_isequal(p->scheme, scheme, &match) ==
+ lwc_error_ok &&
+ match == true &&
+ p->port == port) {
+ if (*slash == '\0') {
+ /* Complete match */
+ return (struct path_data *) p;
+ }
- return segment;
+ /* Match so far, go down tree */
+ p = p->children;
+
+ path = slash;
+ } else {
+ /* No match, try next sibling */
+ p = p->next;
+ }
+ }
+
+ return NULL;
}
+
/**
* Find an URL in the database
*
* \param url Absolute URL to find
* \return Pointer to path data, or NULL if not found
*/
-struct path_data *urldb_find_url(nsurl *url)
+static struct path_data *urldb_find_url(nsurl *url)
{
const struct host_part *h;
struct path_data *p;
@@ -1979,7 +1207,7 @@ struct path_data *urldb_find_url(nsurl *url)
bool match;
assert(url);
-
+
if (url_bloom != NULL) {
if (bloom_search_hash(url_bloom,
nsurl_hash(url)) == false) {
@@ -2036,102 +1264,49 @@ struct path_data *urldb_find_url(nsurl *url)
return p;
}
+
/**
- * Match a path string
+ * Dump URL database paths to stderr
*
- * \param parent Path (sub)tree to look in
- * \param path The path to search for
- * \param scheme The URL scheme associated with the path
- * \param port The port associated with the path
- * \return Pointer to path data or NULL if not found.
+ * \param parent Parent node of tree to dump
*/
-struct path_data *urldb_match_path(const struct path_data *parent,
- const char *path, lwc_string *scheme, unsigned short port)
+static void urldb_dump_paths(struct path_data *parent)
{
- const struct path_data *p;
- const char *slash;
- bool match;
-
- assert(parent != NULL);
- assert(parent->segment == NULL);
- assert(path[0] == '/');
+ const struct path_data *p = parent;
+ unsigned int i;
- /* Start with children, as parent has no segment */
- p = parent->children;
+ do {
+ if (p->segment != NULL) {
+ LOG(("\t%s : %u", lwc_string_data(p->scheme), p->port));
- while (p != NULL) {
- slash = strchr(path + 1, '/');
- if (!slash)
- slash = path + strlen(path);
+ LOG(("\t\t'%s'", p->segment));
- if (strncmp(p->segment, path + 1, slash - path - 1) == 0 &&
- lwc_string_isequal(p->scheme, scheme, &match) ==
- lwc_error_ok &&
- match == true &&
- p->port == port) {
- if (*slash == '\0') {
- /* Complete match */
- return (struct path_data *) p;
- }
+ for (i = 0; i != p->frag_cnt; i++)
+ LOG(("\t\t\t#%s", p->fragment[i]));
+ }
- /* Match so far, go down tree */
+ if (p->children != NULL) {
p = p->children;
-
- path = slash;
} else {
- /* No match, try next sibling */
- p = p->next;
- }
- }
-
- return NULL;
-}
-
-/**
- * Get the search tree for a particular host
- *
- * \param host the host to lookup
- * \return the corresponding search tree
- */
-struct search_node **urldb_get_search_tree_direct(const char *host) {
- assert(host);
-
- if (urldb__host_is_ip_address(host))
- return &search_trees[ST_IP];
- else if (isalpha(*host))
- return &search_trees[ST_DN + tolower(*host) - 'a'];
- return &search_trees[ST_EE];
-}
+ while (p != parent) {
+ if (p->next != NULL) {
+ p = p->next;
+ break;
+ }
-/**
- * Get the search tree for a particular host
- *
- * \param host the host to lookup
- * \return the corresponding search tree
- */
-struct search_node *urldb_get_search_tree(const char *host) {
- return *urldb_get_search_tree_direct(host);
+ p = p->parent;
+ }
+ }
+ } while (p != parent);
}
-/**
- * Dump URL database to stderr
- */
-void urldb_dump(void)
-{
- int i;
-
- urldb_dump_hosts(&db_root);
-
- for (i = 0; i != NUM_SEARCH_TREES; i++)
- urldb_dump_search(search_trees[i], 0);
-}
/**
* Dump URL database hosts to stderr
*
* \param parent Parent node of tree to dump
*/
-void urldb_dump_hosts(struct host_part *parent)
+static void urldb_dump_hosts(struct host_part *parent)
{
struct host_part *h;
@@ -2150,40 +1325,6 @@ void urldb_dump_hosts(struct host_part *parent)
urldb_dump_hosts(h);
}
-/**
- * Dump URL database paths to stderr
- *
- * \param parent Parent node of tree to dump
- */
-void urldb_dump_paths(struct path_data *parent)
-{
- const struct path_data *p = parent;
- unsigned int i;
-
- do {
- if (p->segment != NULL) {
- LOG(("\t%s : %u", lwc_string_data(p->scheme), p->port));
-
- LOG(("\t\t'%s'", p->segment));
-
- for (i = 0; i != p->frag_cnt; i++)
- LOG(("\t\t\t#%s", p->fragment[i]));
- }
-
- if (p->children != NULL) {
- p = p->children;
- } else {
- while (p != parent) {
- if (p->next != NULL) {
- p = p->next;
- break;
- }
-
- p = p->parent;
- }
- }
- } while (p != parent);
-}
/**
* Dump search tree
@@ -2191,7 +1332,7 @@ void urldb_dump_paths(struct path_data *parent)
* \param parent Parent node of tree to dump
* \param depth Tree depth
*/
-void urldb_dump_search(struct search_node *parent, int depth)
+static void urldb_dump_search(struct search_node *parent, int depth)
{
const struct host_part *h;
int i;
@@ -2217,96 +1358,6 @@ void urldb_dump_search(struct search_node *parent, int depth)
urldb_dump_search(parent->right, depth + 1);
}
-/**
- * Insert a node into the search tree
- *
- * \param root Root of tree to insert into
- * \param data User data to insert
- * \return Pointer to updated root, or NULL if failed
- */
-struct search_node *urldb_search_insert(struct search_node *root,
- const struct host_part *data)
-{
- struct search_node *n;
-
- assert(root && data);
-
- n = malloc(sizeof(struct search_node));
- if (!n)
- return NULL;
-
- n->level = 1;
- n->data = data;
- n->left = n->right = &empty;
-
- root = urldb_search_insert_internal(root, n);
-
- return root;
-}
-
-/**
- * Insert node into search tree
- *
- * \param root Root of (sub)tree to insert into
- * \param n Node to insert
- * \return Pointer to updated root
- */
-struct search_node *urldb_search_insert_internal(struct search_node *root,
- struct search_node *n)
-{
- assert(root && n);
-
- if (root == &empty) {
- root = n;
- } else {
- int c = urldb_search_match_host(root->data, n->data);
-
- if (c > 0) {
- root->left = urldb_search_insert_internal(
- root->left, n);
- } else if (c < 0) {
- root->right = urldb_search_insert_internal(
- root->right, n);
- } else {
- /* exact match */
- free(n);
- return root;
- }
-
- root = urldb_search_skew(root);
- root = urldb_search_split(root);
- }
-
- return root;
-}
-
-/**
- * Find a node in a search tree
- *
- * \param root Tree to look in
- * \param host Host to find
- * \return Pointer to host tree node, or NULL if not found
- */
-const struct host_part *urldb_search_find(struct search_node *root,
- const char *host)
-{
- int c;
-
- assert(root && host);
-
- if (root == &empty) {
- return NULL;
- }
-
- c = urldb_search_match_string(root->data, host);
-
- if (c > 0)
- return urldb_search_find(root->left, host);
- else if (c < 0)
- return urldb_search_find(root->right, host);
- else
- return root->data;
-}
/**
* Compare a pair of host_parts
@@ -2315,8 +1366,8 @@ const struct host_part *urldb_search_find(struct search_node *root,
* \param b
* \return 0 if match, non-zero, otherwise
*/
-int urldb_search_match_host(const struct host_part *a,
- const struct host_part *b)
+static int
+urldb_search_match_host(const struct host_part *a, const struct host_part *b)
{
int ret;
@@ -2344,134 +1395,6 @@ int urldb_search_match_host(const struct host_part *a,
return 0;
}
-/**
- * Compare host_part with a string
- *
- * \param a
- * \param b
- * \return 0 if match, non-zero, otherwise
- */
-int urldb_search_match_string(const struct host_part *a,
- const char *b)
-{
- const char *end, *dot;
- int plen, ret;
-
- assert(a && a != &db_root && b);
-
- if (urldb__host_is_ip_address(b)) {
- /* IP address */
- return strcasecmp(a->part, b);
- }
-
- end = b + strlen(b) + 1;
-
- while (b < end && a && a != &db_root) {
- dot = strchr(b, '.');
- if (!dot) {
- /* last segment */
- dot = end - 1;
- }
-
- /* Compare strings (length limited) */
- if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
- /* didn't match => return difference */
- return ret;
-
- /* The strings matched, now check that the lengths do, too */
- plen = strlen(a->part);
-
- if (plen > dot - b)
- /* len(a) > len(b) */
- return 1;
- else if (plen < dot - b)
- /* len(a) < len(b) */
- return -1;
-
- b = dot + 1;
- a = a->parent;
- }
-
- /* If we get here then either:
- * a) The path lengths differ
- * or b) The hosts are identical
- */
- if (a && a != &db_root && b >= end)
- /* len(a) > len(b) */
- return 1;
- else if ((!a || a == &db_root) && b < end)
- /* len(a) < len(b) */
- return -1;
-
- /* Identical */
- return 0;
-}
-
-/**
- * Compare host_part with prefix
- *
- * \param a
- * \param b
- * \return 0 if match, non-zero, otherwise
- */
-int urldb_search_match_prefix(const struct host_part *a,
- const char *b)
-{
- const char *end, *dot;
- int plen, ret;
-
- assert(a && a != &db_root && b);
-
- if (urldb__host_is_ip_address(b)) {
- /* IP address */
- return strncasecmp(a->part, b, strlen(b));
- }
-
- end = b + strlen(b) + 1;
-
- while (b < end && a && a != &db_root) {
- dot = strchr(b, '.');
- if (!dot) {
- /* last segment */
- dot = end - 1;
- }
-
- /* Compare strings (length limited) */
- if ((ret = strncasecmp(a->part, b, dot - b)) != 0)
- /* didn't match => return difference */
- return ret;
-
- /* The strings matched */
- if (dot < end - 1) {
- /* Consider segment lengths only in the case
- * where the prefix contains segments */
- plen = strlen(a->part);
- if (plen > dot - b)
- /* len(a) > len(b) */
- return 1;
- else if (plen < dot - b)
- /* len(a) < len(b) */
- return -1;
- }
-
- b = dot + 1;
- a = a->parent;
- }
-
- /* If we get here then either:
- * a) The path lengths differ
- * or b) The hosts are identical
- */
- if (a && a != &db_root && b >= end)
- /* len(a) > len(b) => prefix matches */
- return 0;
- else if ((!a || a == &db_root) && b < end)
- /* len(a) < len(b) => prefix does not match */
- return -1;
-
- /* Identical */
- return 0;
-}
/**
* Rotate a subtree right
@@ -2479,7 +1402,7 @@ int urldb_search_match_prefix(const struct host_part *a,
* \param root Root of subtree to rotate
* \return new root of subtree
*/
-struct search_node *urldb_search_skew(struct search_node *root)
+static struct search_node *urldb_search_skew(struct search_node *root)
{
struct search_node *temp;
@@ -2495,13 +1418,14 @@ struct search_node *urldb_search_skew(struct search_node *root)
return root;
}
+
/**
* Rotate a node left, increasing the parent's level
*
* \param root Root of subtree to rotate
* \return New root of subtree
*/
-struct search_node *urldb_search_split(struct search_node *root)
+static struct search_node *urldb_search_split(struct search_node *root)
{
struct search_node *temp;
@@ -2519,538 +1443,198 @@ struct search_node *urldb_search_split(struct search_node *root)
return root;
}
+
/**
- * Retrieve cookies for an URL
+ * Insert node into search tree
*
- * \param url URL being fetched
- * \param include_http_only Whether to include HTTP(S) only cookies.
- * \return Cookies string for libcurl (on heap), or NULL on error/no cookies
+ * \param root Root of (sub)tree to insert into
+ * \param n Node to insert
+ * \return Pointer to updated root
*/
-char *urldb_get_cookie(nsurl *url, bool include_http_only)
+static struct search_node *
+urldb_search_insert_internal(struct search_node *root, struct search_node *n)
{
- const struct path_data *p, *q;
- const struct host_part *h;
- lwc_string *path_lwc;
- struct cookie_internal_data *c;
- int count = 0, version = COOKIE_RFC2965;
- struct cookie_internal_data **matched_cookies;
- int matched_cookies_size = 20;
- int ret_alloc = 4096, ret_used = 1;
- const char *path;
- char *ret;
- lwc_string *scheme;
- time_t now;
- int i;
- bool match;
-
- assert(url != NULL);
-
- /* The URL must exist in the db in order to find relevant cookies, since
- * we search up the tree from the URL node, and cookies from further
- * up also apply. */
- urldb_add_url(url);
-
- p = urldb_find_url(url);
- if (!p)
- return NULL;
-
- scheme = p->scheme;
-
- matched_cookies = malloc(matched_cookies_size *
- sizeof(struct cookie_internal_data *));
- if (!matched_cookies)
- return NULL;
-
-#define GROW_MATCHED_COOKIES \
- do { \
- if (count == matched_cookies_size) { \
- struct cookie_internal_data **temp; \
- temp = realloc(matched_cookies, \
- (matched_cookies_size + 20) * \
- sizeof(struct cookie_internal_data *)); \
- \
- if (temp == NULL) { \
- free(ret); \
- free(matched_cookies); \
- return NULL; \
- } \
- \
- matched_cookies = temp; \
- matched_cookies_size += 20; \
- } \
- } while(0)
-
- ret = malloc(ret_alloc);
- if (!ret) {
- free(matched_cookies);
- return NULL;
- }
-
- ret[0] = '\0';
-
- path_lwc = nsurl_get_component(url, NSURL_PATH);
- if (path_lwc == NULL) {
- free(ret);
- free(matched_cookies);
- return NULL;
- }
- path = lwc_string_data(path_lwc);
- lwc_string_unref(path_lwc);
-
- now = time(NULL);
-
- if (*(p->segment) != '\0') {
- /* Match exact path, unless directory, when prefix matching
- * will handle this case for us. */
- for (q = p->parent->children; q; q = q->next) {
- if (strcmp(q->segment, p->segment))
- continue;
-
- /* Consider all cookies associated with
- * this exact path */
- for (c = q->cookies; c; c = c->next) {
- if (c->expires != -1 && c->expires < now)
- /* cookie has expired => ignore */
- continue;
-
- if (c->secure && lwc_string_isequal(
- q->scheme,
- corestring_lwc_https,
- &match) &&
- match == false)
- /* secure cookie for insecure host.
- * ignore */
- continue;
-
- if (c->http_only && !include_http_only)
- /* Ignore HttpOnly */
- continue;
-
- matched_cookies[count++] = c;
-
- GROW_MATCHED_COOKIES;
-
- if (c->version < (unsigned int)version)
- version = c->version;
-
- c->last_used = now;
-
- cookie_manager_add((struct cookie_data *)c);
- }
- }
- }
-
- /* Now consider cookies whose paths prefix-match ours */
- for (p = p->parent; p; p = p->parent) {
- /* Find directory's path entry(ies) */
- /* There are potentially multiple due to differing schemes */
- for (q = p->children; q; q = q->next) {
- if (*(q->segment) != '\0')
- continue;
-
- for (c = q->cookies; c; c = c->next) {
- if (c->expires != -1 && c->expires < now)
- /* cookie has expired => ignore */
- continue;
-
- if (c->secure && lwc_string_isequal(
- q->scheme,
- corestring_lwc_https,
- &match) &&
- match == false)
- /* Secure cookie for insecure server
- * => ignore */
- continue;
-
- matched_cookies[count++] = c;
-
- GROW_MATCHED_COOKIES;
-
- if (c->version < (unsigned int) version)
- version = c->version;
-
- c->last_used = now;
-
- cookie_manager_add((struct cookie_data *)c);
- }
- }
-
- if (!p->parent) {
- /* No parent, so bail here. This can't go in
- * the loop exit condition as we also want to
- * process the top-level node.
- *
- * If p->parent is NULL then p->cookies are
- * the domain cookies and thus we don't even
- * try matching against them.
- */
- break;
- }
-
- /* Consider p itself - may be the result of Path=/foo */
- for (c = p->cookies; c; c = c->next) {
- if (c->expires != -1 && c->expires < now)
- /* cookie has expired => ignore */
- continue;
-
- /* Ensure cookie path is a prefix of the resource */
- if (strncmp(c->path, path, strlen(c->path)) != 0)
- /* paths don't match => ignore */
- continue;
-
- if (c->secure && lwc_string_isequal(p->scheme,
- corestring_lwc_https,
- &match) &&
- match == false)
- /* Secure cookie for insecure server
- * => ignore */
- continue;
-
- matched_cookies[count++] = c;
-
- GROW_MATCHED_COOKIES;
-
- if (c->version < (unsigned int) version)
- version = c->version;
+ assert(root && n);
- c->last_used = now;
+ if (root == &empty) {
+ root = n;
+ } else {
+ int c = urldb_search_match_host(root->data, n->data);
- cookie_manager_add((struct cookie_data *)c);
+ if (c > 0) {
+ root->left = urldb_search_insert_internal(
+ root->left, n);
+ } else if (c < 0) {
+ root->right = urldb_search_insert_internal(
+ root->right, n);
+ } else {
+ /* exact match */
+ free(n);
+ return root;
}
+ root = urldb_search_skew(root);
+ root = urldb_search_split(root);
}
- /* Finally consider domain cookies for hosts which domain match ours */
- for (h = (const struct host_part *)p; h && h != &db_root;
- h = h->parent) {
- for (c = h->paths.cookies; c; c = c->next) {
- if (c->expires != -1 && c->expires < now)
- /* cookie has expired => ignore */
- continue;
-
- /* Ensure cookie path is a prefix of the resource */
- if (strncmp(c->path, path, strlen(c->path)) != 0)
- /* paths don't match => ignore */
- continue;
-
- if (c->secure && lwc_string_isequal(scheme,
- corestring_lwc_https,
- &match) &&
- match == false)
- /* secure cookie for insecure host. ignore */
- continue;
-
- matched_cookies[count++] = c;
+ return root;
+}
- GROW_MATCHED_COOKIES;
- if (c->version < (unsigned int)version)
- version = c->version;
-
- c->last_used = now;
+/**
+ * Insert a node into the search tree
+ *
+ * \param root Root of tree to insert into
+ * \param data User data to insert
+ * \return Pointer to updated root, or NULL if failed
+ */
+static struct search_node *
+urldb_search_insert(struct search_node *root, const struct host_part *data)
+{
+ struct search_node *n;
- cookie_manager_add((struct cookie_data *)c);
- }
- }
+ assert(root && data);
- if (count == 0) {
- /* No cookies found */
- free(ret);
- free(matched_cookies);
+ n = malloc(sizeof(struct search_node));
+ if (!n)
return NULL;
- }
-
- /* and build output string */
- if (version > COOKIE_NETSCAPE) {
- sprintf(ret, "$Version=%d", version);
- ret_used = strlen(ret) + 1;
- }
-
- for (i = 0; i < count; i++) {
- if (!urldb_concat_cookie(matched_cookies[i], version,
- &ret_used, &ret_alloc, &ret)) {
- free(ret);
- free(matched_cookies);
- return NULL;
- }
- }
-
- if (version == COOKIE_NETSCAPE) {
- /* Old-style cookies => no version & skip "; " */
- memmove(ret, ret + 2, ret_used - 2);
- ret_used -= 2;
- }
- /* Now, shrink the output buffer to the required size */
- {
- char *temp = realloc(ret, ret_used);
- if (!temp) {
- free(ret);
- free(matched_cookies);
- return NULL;
- }
-
- ret = temp;
- }
-
- free(matched_cookies);
+ n->level = 1;
+ n->data = data;
+ n->left = n->right = &empty;
- return ret;
+ root = urldb_search_insert_internal(root, n);
-#undef GROW_MATCHED_COOKIES
+ return root;
}
+
/**
- * Parse Set-Cookie header and insert cookie(s) into database
+ * Parse a cookie avpair
*
- * \param header Header to parse, with Set-Cookie: stripped
- * \param url URL being fetched
- * \param referer Referring resource, or 0 for verifiable transaction
- * \return true on success, false otherwise
+ * \param c Cookie struct to populate
+ * \param n Name component
+ * \param v Value component
+ * \param was_quoted Whether ::v was quoted in the input
+ * \return true on success, false on memory exhaustion
*/
-bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
+static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n,
+ char *v, bool was_quoted)
{
- const char *cur = header, *end;
- lwc_string *path, *host, *scheme;
- nsurl *urlt;
- bool match;
-
- assert(url && header);
+ int vlen;
- /* Get defragmented URL, as 'urlt' */
- if (nsurl_defragment(url, &urlt) != NSERROR_OK)
- return NULL;
+ assert(c && n && v);
- scheme = nsurl_get_component(url, NSURL_SCHEME);
- if (scheme == NULL) {
- nsurl_unref(urlt);
- return false;
+ /* Strip whitespace from start of name */
+ for (; *n; n++) {
+ if (*n != ' ' && *n != '\t')
+ break;
}
- path = nsurl_get_component(url, NSURL_PATH);
- if (path == NULL) {
- lwc_string_unref(scheme);
- nsurl_unref(urlt);
- return false;
+ /* Strip whitespace from end of name */
+ for (vlen = strlen(n); vlen; vlen--) {
+ if (n[vlen] == ' ' || n[vlen] == '\t')
+ n[vlen] = '\0';
+ else
+ break;
}
- host = nsurl_get_component(url, NSURL_HOST);
- if (host == NULL) {
- lwc_string_unref(path);
- lwc_string_unref(scheme);
- nsurl_unref(urlt);
- return false;
+ /* Strip whitespace from start of value */
+ for (; *v; v++) {
+ if (*v != ' ' && *v != '\t')
+ break;
}
- if (referer) {
- lwc_string *rhost;
-
- /* Ensure that url's host name domain matches
- * referer's (4.3.5) */
- rhost = nsurl_get_component(referer, NSURL_HOST);
- if (rhost == NULL) {
- goto error;
- }
-
- /* Domain match host names */
- if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok &&
- match == false) {
- const char *hptr;
- const char *rptr;
- const char *dot;
- const char *host_data = lwc_string_data(host);
- const char *rhost_data = lwc_string_data(rhost);
-
- /* Ensure neither host nor rhost are IP addresses */
- if (urldb__host_is_ip_address(host_data) ||
- urldb__host_is_ip_address(rhost_data)) {
- /* IP address, so no partial match */
- lwc_string_unref(rhost);
- goto error;
- }
-
- /* Not exact match, so try the following:
- *
- * 1) Find the longest common suffix of host and rhost
- * (may be all of host/rhost)
- * 2) Discard characters from the start of the suffix
- * until the suffix starts with a dot
- * (prevents foobar.com matching bar.com)
- * 3) Ensure the suffix is non-empty and contains
- * embedded dots (to avoid permitting .com as a
- * suffix)
- *
- * Note that the above in no way resembles the
- * domain matching algorithm found in RFC2109.
- * It does, however, model the real world rather
- * more accurately.
- */
-
- /** \todo In future, we should consult a TLD service
- * instead of just looking for embedded dots.
- */
-
- hptr = host_data + lwc_string_length(host) - 1;
- rptr = rhost_data + lwc_string_length(rhost) - 1;
-
- /* 1 */
- while (hptr >= host_data && rptr >= rhost_data) {
- if (*hptr != *rptr)
- break;
- hptr--;
- rptr--;
- }
- /* Ensure we end up pointing at the start of the
- * common suffix. The above loop will exit pointing
- * to the byte before the start of the suffix. */
- hptr++;
-
- /* 2 */
- while (*hptr != '\0' && *hptr != '.')
- hptr++;
-
- /* 3 */
- if (*hptr == '\0' ||
- (dot = strchr(hptr + 1, '.')) == NULL ||
- *(dot + 1) == '\0') {
- lwc_string_unref(rhost);
- goto error;
- }
- }
-
- lwc_string_unref(rhost);
+ /* Strip whitespace from end of value */
+ for (vlen = strlen(v); vlen; vlen--) {
+ if (v[vlen] == ' ' || v[vlen] == '\t')
+ v[vlen] = '\0';
+ else
+ break;
}
- end = cur + strlen(cur) - 2 /* Trailing CRLF */;
-
- do {
- struct cookie_internal_data *c;
- char *dot;
- size_t len;
-
- c = urldb_parse_cookie(url, &cur);
- if (!c) {
- /* failed => stop parsing */
- goto error;
- }
-
- /* validate cookie */
-
- /* 4.2.2:i Cookie must have NAME and VALUE */
- if (!c->name || !c->value) {
- urldb_free_cookie(c);
- goto error;
- }
-
- /* 4.3.2:i Cookie path must be a prefix of URL path */
- len = strlen(c->path);
- if (len > lwc_string_length(path) ||
- strncmp(c->path, lwc_string_data(path),
- len) != 0) {
- urldb_free_cookie(c);
- goto error;
- }
-
- /* 4.3.2:ii Cookie domain must contain embedded dots */
- dot = strchr(c->domain + 1, '.');
- if (!dot || *(dot + 1) == '\0') {
- /* no embedded dots */
- urldb_free_cookie(c);
- goto error;
+ if (!c->comment && strcasecmp(n, "Comment") == 0) {
+ c->comment = strdup(v);
+ if (!c->comment)
+ return false;
+ } else if (!c->domain && strcasecmp(n, "Domain") == 0) {
+ if (v[0] == '.') {
+ /* Domain must start with a dot */
+ c->domain_from_set = true;
+ c->domain = strdup(v);
+ if (!c->domain)
+ return false;
}
+ } else if (strcasecmp(n, "Max-Age") == 0) {
+ int temp = atoi(v);
+ if (temp == 0)
+ /* Special case - 0 means delete */
+ c->expires = 0;
+ else
+ c->expires = time(NULL) + temp;
+ } else if (!c->path && strcasecmp(n, "Path") == 0) {
+ c->path_from_set = true;
+ c->path = strdup(v);
+ if (!c->path)
+ return false;
+ } else if (strcasecmp(n, "Version") == 0) {
+ c->version = atoi(v);
+ } else if (strcasecmp(n, "Expires") == 0) {
+ char *datenoday;
+ time_t expires;
- /* Domain match fetch host with cookie domain */
- if (strcasecmp(lwc_string_data(host), c->domain) != 0) {
- int hlen, dlen;
- char *domain = c->domain;
-
- /* c->domain must be a domain cookie here because:
- * c->domain is either:
- * + specified in the header as a domain cookie
- * (non-domain cookies in the header are ignored
- * by urldb_parse_cookie / urldb_parse_avpair)
- * + defaulted to the URL's host part
- * (by urldb_parse_cookie if no valid domain was
- * specified in the header)
- *
- * The latter will pass the strcasecmp above, which
- * leaves the former (i.e. a domain cookie)
- */
- assert(c->domain[0] == '.');
-
- /* 4.3.2:iii */
- if (urldb__host_is_ip_address(lwc_string_data(host))) {
- /* IP address, so no partial match */
- urldb_free_cookie(c);
- goto error;
- }
-
- hlen = lwc_string_length(host);
- dlen = strlen(c->domain);
-
- if (hlen <= dlen && hlen != dlen - 1) {
- /* Partial match not possible */
- urldb_free_cookie(c);
- goto error;
- }
-
- if (hlen == dlen - 1) {
- /* Relax matching to allow
- * host a.com to match .a.com */
- domain++;
- dlen--;
- }
-
- if (strcasecmp(lwc_string_data(host) + (hlen - dlen),
- domain)) {
- urldb_free_cookie(c);
- goto error;
- }
+ /* Strip dayname from date (these are hugely
+ * variable and liable to break the parser.
+ * They also serve no useful purpose) */
+ for (datenoday = v; *datenoday && !isdigit(*datenoday);
+ datenoday++)
+ ; /* do nothing */
- /* 4.3.2:iv Ensure H contains no dots
- *
- * If you believe the spec, H should contain no
- * dots in _any_ cookie. Unfortunately, however,
- * reality differs in that many sites send domain
- * cookies of the form .foo.com from hosts such
- * as bar.bat.foo.com and then expect domain
- * matching to work. Thus we have to do what they
- * expect, regardless of any potential security
- * implications.
- *
- * This is what code conforming to the spec would
- * look like:
- *
- * for (int i = 0; i < (hlen - dlen); i++) {
- * if (host[i] == '.') {
- * urldb_free_cookie(c);
- * goto error;
- * }
- * }
- */
+ expires = curl_getdate(datenoday, NULL);
+ if (expires == -1) {
+ /* assume we have an unrepresentable
+ * date => force it to the maximum
+ * possible value of a 32bit time_t
+ * (this may break in 2038. We'll
+ * deal with that once we come to
+ * it) */
+ expires = (time_t)0x7fffffff;
}
-
- /* Now insert into database */
- if (!urldb_insert_cookie(c, scheme, urlt))
- goto error;
- } while (cur < end);
-
- lwc_string_unref(host);
- lwc_string_unref(path);
- lwc_string_unref(scheme);
- nsurl_unref(urlt);
+ c->expires = expires;
+ } else if (strcasecmp(n, "Secure") == 0) {
+ c->secure = true;
+ } else if (strcasecmp(n, "HttpOnly") == 0) {
+ c->http_only = true;
+ } else if (!c->name) {
+ c->name = strdup(n);
+ c->value = strdup(v);
+ c->value_was_quoted = was_quoted;
+ if (!c->name || !c->value)
+ return false;
+ }
return true;
+}
-error:
- lwc_string_unref(host);
- lwc_string_unref(path);
- lwc_string_unref(scheme);
- nsurl_unref(urlt);
- return false;
+/**
+ * Free a cookie
+ *
+ * \param c The cookie to free
+ */
+static void urldb_free_cookie(struct cookie_internal_data *c)
+{
+ assert(c);
+
+ free(c->comment);
+ free(c->domain);
+ free(c->path);
+ free(c->name);
+ free(c->value);
+ free(c);
}
+
/**
* Parse a cookie
*
@@ -3058,8 +1642,8 @@ error:
* \param cookie Pointer to cookie string (updated on exit)
* \return Pointer to cookie structure (on heap, caller frees) or NULL
*/
-struct cookie_internal_data *urldb_parse_cookie(nsurl *url,
- const char **cookie)
+static struct cookie_internal_data *
+urldb_parse_cookie(nsurl *url, const char **cookie)
{
struct cookie_internal_data *c;
const char *cur;
@@ -3091,34 +1675,12 @@ struct cookie_internal_data *urldb_parse_cookie(nsurl *url,
/* Match Firefox 2.0.0.11 */
value[0] = '\0';
-#if 0
- /* This is what IE6/7 & Safari 3 do */
- /* Opera 9.25 discards the entire cookie */
-
- /* Shuffle value up by 1 */
- memmove(value + 1, value,
- min(v - value, sizeof(value) - 2));
- v++;
- /* And insert " character at the start */
- value[0] = '"';
-
- /* Now, run forwards through the value
- * looking for a semicolon. If one exists,
- * terminate the value at this point. */
- for (char *s = value; s < v; s++) {
- if (*s == ';') {
- *s = '\0';
- v = s;
- break;
- }
- }
-#endif
}
break;
} else if (*cur == '\r') {
/* Spurious linefeed */
- continue;
+ continue;
} else if (*cur == '\n') {
/* Spurious newline */
continue;
@@ -3131,7 +1693,7 @@ struct cookie_internal_data *urldb_parse_cookie(nsurl *url,
} else {
had_value_data = true;
- /* Value is taken verbatim if first non-space
+ /* Value is taken verbatim if first non-space
* character is not a " */
if (*cur != '"') {
value_verbatim = true;
@@ -3157,7 +1719,7 @@ struct cookie_internal_data *urldb_parse_cookie(nsurl *url,
}
if (!quoted && (was_quoted || *cur == ';')) {
- /* Semicolon or after quoted value
+ /* Semicolon or after quoted value
* => end of current avpair */
/* NUL-terminate tokens */
@@ -3316,112 +1878,6 @@ struct cookie_internal_data *urldb_parse_cookie(nsurl *url,
return c;
}
-/**
- * Parse a cookie avpair
- *
- * \param c Cookie struct to populate
- * \param n Name component
- * \param v Value component
- * \param was_quoted Whether ::v was quoted in the input
- * \return true on success, false on memory exhaustion
- */
-bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, char *v,
- bool was_quoted)
-{
- int vlen;
-
- assert(c && n && v);
-
- /* Strip whitespace from start of name */
- for (; *n; n++) {
- if (*n != ' ' && *n != '\t')
- break;
- }
-
- /* Strip whitespace from end of name */
- for (vlen = strlen(n); vlen; vlen--) {
- if (n[vlen] == ' ' || n[vlen] == '\t')
- n[vlen] = '\0';
- else
- break;
- }
-
- /* Strip whitespace from start of value */
- for (; *v; v++) {
- if (*v != ' ' && *v != '\t')
- break;
- }
-
- /* Strip whitespace from end of value */
- for (vlen = strlen(v); vlen; vlen--) {
- if (v[vlen] == ' ' || v[vlen] == '\t')
- v[vlen] = '\0';
- else
- break;
- }
-
- if (!c->comment && strcasecmp(n, "Comment") == 0) {
- c->comment = strdup(v);
- if (!c->comment)
- return false;
- } else if (!c->domain && strcasecmp(n, "Domain") == 0) {
- if (v[0] == '.') {
- /* Domain must start with a dot */
- c->domain_from_set = true;
- c->domain = strdup(v);
- if (!c->domain)
- return false;
- }
- } else if (strcasecmp(n, "Max-Age") == 0) {
- int temp = atoi(v);
- if (temp == 0)
- /* Special case - 0 means delete */
- c->expires = 0;
- else
- c->expires = time(NULL) + temp;
- } else if (!c->path && strcasecmp(n, "Path") == 0) {
- c->path_from_set = true;
- c->path = strdup(v);
- if (!c->path)
- return false;
- } else if (strcasecmp(n, "Version") == 0) {
- c->version = atoi(v);
- } else if (strcasecmp(n, "Expires") == 0) {
- char *datenoday;
- time_t expires;
-
- /* Strip dayname from date (these are hugely
- * variable and liable to break the parser.
- * They also serve no useful purpose) */
- for (datenoday = v; *datenoday && !isdigit(*datenoday);
- datenoday++)
- ; /* do nothing */
-
- expires = curl_getdate(datenoday, NULL);
- if (expires == -1) {
- /* assume we have an unrepresentable
- * date => force it to the maximum
- * possible value of a 32bit time_t
- * (this may break in 2038. We'll
- * deal with that once we come to
- * it) */
- expires = (time_t)0x7fffffff;
- }
- c->expires = expires;
- } else if (strcasecmp(n, "Secure") == 0) {
- c->secure = true;
- } else if (strcasecmp(n, "HttpOnly") == 0) {
- c->http_only = true;
- } else if (!c->name) {
- c->name = strdup(n);
- c->value = strdup(v);
- c->value_was_quoted = was_quoted;
- if (!c->name || !c->value)
- return false;
- }
-
- return true;
-}
/**
* Insert a cookie into the database
@@ -3431,8 +1887,8 @@ bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, char *v,
* \param url URL (sans fragment) associated with cookie
* \return true on success, false on memory exhaustion (c will be freed)
*/
-bool urldb_insert_cookie(struct cookie_internal_data *c, lwc_string *scheme,
- nsurl *url)
+static bool urldb_insert_cookie(struct cookie_internal_data *c,
+ lwc_string *scheme, nsurl *url)
{
struct cookie_internal_data *d;
const struct host_part *h;
@@ -3537,22 +1993,6 @@ bool urldb_insert_cookie(struct cookie_internal_data *c, lwc_string *scheme,
return true;
}
-/**
- * Free a cookie
- *
- * \param c The cookie to free
- */
-void urldb_free_cookie(struct cookie_internal_data *c)
-{
- assert(c);
-
- free(c->comment);
- free(c->domain);
- free(c->path);
- free(c->name);
- free(c->value);
- free(c);
-}
/**
* Concatenate a cookie into the provided buffer
@@ -3564,16 +2004,16 @@ void urldb_free_cookie(struct cookie_internal_data *c)
* \param buf Pointer to Pointer to buffer (updated)
* \return true on success, false on memory exhaustion
*/
-bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
+static bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
int *used, int *alloc, char **buf)
{
/* Combined (A)BNF for the Cookie: request header:
- *
+ *
* CHAR = <any US-ASCII character (octets 0 - 127)>
* CTL = <any US-ASCII control character
* (octets 0 - 31) and DEL (127)>
* CR = <US-ASCII CR, carriage return (13)>
- * LF = <US-ASCII LF, linefeed (10)>
+ * LF = <US-ASCII LF, linefeed (10)>
* SP = <US-ASCII SP, space (32)>
* HT = <US-ASCII HT, horizontal-tab (9)>
* <"> = <US-ASCII double-quote mark (34)>
@@ -3610,22 +2050,22 @@ bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
*
* A note on quoted-string handling:
* The cookie data stored in the db is verbatim (i.e. sans enclosing
- * <">, if any, and with all quoted-pairs intact) thus all that we
+ * <">, if any, and with all quoted-pairs intact) thus all that we
* need to do here is ensure that value strings which were quoted
- * in Set-Cookie or which include any of the separators are quoted
+ * in Set-Cookie or which include any of the separators are quoted
* before use.
*
* A note on cookie-value separation:
- * We use semicolons for all separators, including between
+ * We use semicolons for all separators, including between
* cookie-values. This simplifies things and is backwards compatible.
- */
+ */
const char * const separators = "()<>@,;:\\\"/[]?={} \t";
int max_len;
assert(c && used && alloc && buf && *buf);
- /* "; " cookie-value
+ /* "; " cookie-value
* We allow for the possibility that values are quoted
*/
max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 +
@@ -3663,7 +2103,7 @@ bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
*used += strlen(c->value);
}
- /* We don't send path/domain information -- that's what the
+ /* We don't send path/domain information -- that's what the
* Netscape spec suggests we should do, anyway. */
} else {
/* RFC2109 or RFC2965 cookie */
@@ -3713,11 +2153,1567 @@ bool urldb_concat_cookie(struct cookie_internal_data *c, int version,
return true;
}
+
+/**
+ * deletes paths from a cookie.
+ */
+static void urldb_delete_cookie_paths(const char *domain, const char *path,
+ const char *name, struct path_data *parent)
+{
+ struct cookie_internal_data *c;
+ struct path_data *p = parent;
+
+ assert(parent);
+
+ do {
+ for (c = p->cookies; c; c = c->next) {
+ if (strcmp(c->domain, domain) == 0 &&
+ strcmp(c->path, path) == 0 &&
+ strcmp(c->name, name) == 0) {
+ if (c->prev)
+ c->prev->next = c->next;
+ else
+ p->cookies = c->next;
+
+ if (c->next)
+ c->next->prev = c->prev;
+ else
+ p->cookies_end = c->prev;
+
+ urldb_free_cookie(c);
+
+ return;
+ }
+ }
+
+ if (p->children) {
+ p = p->children;
+ } else {
+ while (p != parent) {
+ if (p->next != NULL) {
+ p = p->next;
+ break;
+ }
+
+ p = p->parent;
+ }
+ }
+ } while (p != parent);
+}
+
+
+/**
+ * Deletes cookie hosts and their assoicated paths
+ */
+static void urldb_delete_cookie_hosts(const char *domain, const char *path,
+ const char *name, struct host_part *parent)
+{
+ struct host_part *h;
+ assert(parent);
+
+ urldb_delete_cookie_paths(domain, path, name, &parent->paths);
+
+ for (h = parent->children; h; h = h->next)
+ urldb_delete_cookie_hosts(domain, path, name, h);
+}
+
+
+/**
+ * Save a path subtree's cookies
+ *
+ * \param fp File pointer to write to
+ * \param parent Parent path
+ */
+static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
+{
+ struct path_data *p = parent;
+ time_t now = time(NULL);
+
+ assert(fp && parent);
+
+ do {
+ if (p->cookies != NULL) {
+ struct cookie_internal_data *c;
+
+ for (c = p->cookies; c != NULL; c = c->next) {
+ if (c->expires == -1 || c->expires < now)
+ /* Skip expired & session cookies */
+ continue;
+
+ fprintf(fp,
+ "%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t"
+ "%s\t%s\t%d\t%s\t%s\t%s\n",
+ c->version, c->domain,
+ c->domain_from_set, c->path,
+ c->path_from_set, c->secure,
+ c->http_only,
+ (int)c->expires, (int)c->last_used,
+ c->no_destroy, c->name, c->value,
+ c->value_was_quoted,
+ p->scheme ? lwc_string_data(p->scheme) :
+ "unused",
+ p->url ? nsurl_access(p->url) :
+ "unused",
+ c->comment ? c->comment : "");
+ }
+ }
+
+ if (p->children != NULL) {
+ p = p->children;
+ } else {
+ while (p != parent) {
+ if (p->next != NULL) {
+ p = p->next;
+ break;
+ }
+
+ p = p->parent;
+ }
+ }
+ } while (p != parent);
+}
+
+
+/**
+ * Save a host subtree's cookies
+ *
+ * \param fp File pointer to write to
+ * \param parent Parent host
+ */
+static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)
+{
+ struct host_part *h;
+ assert(fp && parent);
+
+ urldb_save_cookie_paths(fp, &parent->paths);
+
+ for (h = parent->children; h; h = h->next)
+ urldb_save_cookie_hosts(fp, h);
+}
+
+
+/**
+ * Destroy a cookie node
+ *
+ * \param c Cookie to destroy
+ */
+static void urldb_destroy_cookie(struct cookie_internal_data *c)
+{
+ free(c->name);
+ free(c->value);
+ free(c->comment);
+ free(c->domain);
+ free(c->path);
+
+ free(c);
+}
+
+
+/**
+ * Destroy the contents of a path node
+ *
+ * \param node Node to destroy contents of (does not destroy node)
+ */
+static void urldb_destroy_path_node_content(struct path_data *node)
+{
+ struct cookie_internal_data *a, *b;
+ unsigned int i;
+
+ if (node->url != NULL)
+ nsurl_unref(node->url);
+
+ if (node->scheme != NULL)
+ lwc_string_unref(node->scheme);
+
+ free(node->segment);
+ for (i = 0; i < node->frag_cnt; i++)
+ free(node->fragment[i]);
+ free(node->fragment);
+
+ if (node->thumb)
+ bitmap_destroy(node->thumb);
+
+ free(node->urld.title);
+
+ for (a = node->cookies; a; a = b) {
+ b = a->next;
+ urldb_destroy_cookie(a);
+ }
+}
+
+
+/**
+ * Destroy protection space data
+ *
+ * \param space Protection space to destroy
+ */
+static void urldb_destroy_prot_space(struct prot_space_data *space)
+{
+ lwc_string_unref(space->scheme);
+ free(space->realm);
+ free(space->auth);
+
+ free(space);
+}
+
+
+/**
+ * Destroy a path tree
+ *
+ * \param root Root node of tree to destroy
+ */
+static void urldb_destroy_path_tree(struct path_data *root)
+{
+ struct path_data *p = root;
+
+ do {
+ if (p->children != NULL) {
+ p = p->children;
+ } else {
+ struct path_data *q = p;
+
+ while (p != root) {
+ if (p->next != NULL) {
+ p = p->next;
+ break;
+ }
+
+ p = p->parent;
+
+ urldb_destroy_path_node_content(q);
+ free(q);
+
+ q = p;
+ }
+
+ urldb_destroy_path_node_content(q);
+ free(q);
+ }
+ } while (p != root);
+}
+
+
/**
- * Load a cookie file into the database
+ * Destroy a host tree
*
- * \param filename File to load
+ * \param root Root node of tree to destroy
*/
+static void urldb_destroy_host_tree(struct host_part *root)
+{
+ struct host_part *a, *b;
+ struct path_data *p, *q;
+ struct prot_space_data *s, *t;
+
+ /* Destroy children */
+ for (a = root->children; a; a = b) {
+ b = a->next;
+ urldb_destroy_host_tree(a);
+ }
+
+ /* Now clean up paths */
+ for (p = root->paths.children; p; p = q) {
+ q = p->next;
+ urldb_destroy_path_tree(p);
+ }
+
+ /* Root path */
+ urldb_destroy_path_node_content(&root->paths);
+
+ /* Proctection space data */
+ for (s = root->prot_space; s; s = t) {
+ t = s->next;
+ urldb_destroy_prot_space(s);
+ }
+
+ /* And ourselves */
+ free(root->part);
+ free(root);
+}
+
+
+/**
+ * Destroy a search tree
+ *
+ * \param root Root node of tree to destroy
+ */
+static void urldb_destroy_search_tree(struct search_node *root)
+{
+ /* Destroy children */
+ if (root->left != &empty)
+ urldb_destroy_search_tree(root->left);
+ if (root->right != &empty)
+ urldb_destroy_search_tree(root->right);
+
+ /* And destroy ourselves */
+ free(root);
+}
+
+
+/*************** External interface ***************/
+
+
+/* exported interface documented in content/urldb.h */
+void urldb_destroy(void)
+{
+ struct host_part *a, *b;
+ int i;
+
+ /* Clean up search trees */
+ for (i = 0; i < NUM_SEARCH_TREES; i++) {
+ if (search_trees[i] != &empty)
+ urldb_destroy_search_tree(search_trees[i]);
+ }
+
+ /* And database */
+ for (a = db_root.children; a; a = b) {
+ b = a->next;
+ urldb_destroy_host_tree(a);
+ }
+
+ /* And the bloom filter */
+ if (url_bloom != NULL)
+ bloom_destroy(url_bloom);
+}
+
+
+/* exported interface documented in content/urldb.h */
+nserror urldb_load(const char *filename)
+{
+#define MAXIMUM_URL_LENGTH 4096
+ char s[MAXIMUM_URL_LENGTH];
+ char host[256];
+ struct host_part *h;
+ int urls;
+ int i;
+ int version;
+ int length;
+ FILE *fp;
+
+ assert(filename);
+
+ LOG(("Loading URL file %s", filename));
+
+ if (url_bloom == NULL)
+ url_bloom = bloom_create(BLOOM_SIZE);
+
+ fp = fopen(filename, "r");
+ if (!fp) {
+ LOG(("Failed to open file '%s' for reading", filename));
+ return NSERROR_NOT_FOUND;
+ }
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) {
+ fclose(fp);
+ return NSERROR_NEED_DATA;
+ }
+
+ version = atoi(s);
+ if (version < MIN_URL_FILE_VERSION) {
+ LOG(("Unsupported URL file version."));
+ fclose(fp);
+ return NSERROR_INVALID;
+ }
+ if (version > URL_FILE_VERSION) {
+ LOG(("Unknown URL file version."));
+ fclose(fp);
+ return NSERROR_INVALID;
+ }
+
+ while (fgets(host, sizeof host, fp)) {
+ /* get the hostname */
+ length = strlen(host) - 1;
+ host[length] = '\0';
+
+ /* skip data that has ended up with a host of '' */
+ if (length == 0) {
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ urls = atoi(s);
+ /* Eight fields/url */
+ for (i = 0; i < (8 * urls); i++) {
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ }
+ continue;
+ }
+
+ /* read number of URLs */
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ urls = atoi(s);
+
+ /* no URLs => try next host */
+ if (urls == 0) {
+ LOG(("No URLs for '%s'", host));
+ continue;
+ }
+
+ h = urldb_add_host(host);
+ if (!h) {
+ LOG(("Failed adding host: '%s'", host));
+ fclose(fp);
+ return NSERROR_NOMEM;
+ }
+
+ /* load the non-corrupt data */
+ for (i = 0; i < urls; i++) {
+ struct path_data *p = NULL;
+ char scheme[64], ports[10];
+ char url[64 + 3 + 256 + 6 + 4096 + 1];
+ unsigned int port;
+ bool is_file = false;
+ nsurl *nsurl;
+ lwc_string *scheme_lwc, *fragment_lwc;
+ char *path_query;
+ size_t len;
+
+ if (!fgets(scheme, sizeof scheme, fp))
+ break;
+ length = strlen(scheme) - 1;
+ scheme[length] = '\0';
+
+ if (!fgets(ports, sizeof ports, fp))
+ break;
+ length = strlen(ports) - 1;
+ ports[length] = '\0';
+ port = atoi(ports);
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ length = strlen(s) - 1;
+ s[length] = '\0';
+
+ if (!strcasecmp(host, "localhost") &&
+ !strcasecmp(scheme, "file"))
+ is_file = true;
+
+ snprintf(url, sizeof url, "%s://%s%s%s%s",
+ scheme,
+ /* file URLs have no host */
+ (is_file ? "" : host),
+ (port ? ":" : ""),
+ (port ? ports : ""),
+ s);
+
+ /* TODO: store URLs in pre-parsed state, and make
+ * a nsurl_load to generate the nsurl more
+ * swiftly.
+ * Need a nsurl_save too.
+ */
+ if (nsurl_create(url, &nsurl) != NSERROR_OK) {
+ LOG(("Failed inserting '%s'", url));
+ fclose(fp);
+ return NSERROR_NOMEM;
+ }
+
+ if (url_bloom != NULL) {
+ uint32_t hash = nsurl_hash(nsurl);
+ bloom_insert_hash(url_bloom, hash);
+ }
+
+ /* Copy and merge path/query strings */
+ if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY,
+ &path_query, &len) != NSERROR_OK) {
+ LOG(("Failed inserting '%s'", url));
+ fclose(fp);
+ return NSERROR_NOMEM;
+ }
+
+ scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME);
+ fragment_lwc = nsurl_get_component(nsurl,
+ NSURL_FRAGMENT);
+ p = urldb_add_path(scheme_lwc, port, h, path_query,
+ fragment_lwc, nsurl);
+ if (!p) {
+ LOG(("Failed inserting '%s'", url));
+ fclose(fp);
+ return NSERROR_NOMEM;
+ }
+ nsurl_unref(nsurl);
+ lwc_string_unref(scheme_lwc);
+ if (fragment_lwc != NULL)
+ lwc_string_unref(fragment_lwc);
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ if (p)
+ p->urld.visits = (unsigned int)atoi(s);
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ if (p)
+ p->urld.last_visit = (time_t)atoi(s);
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ if (p)
+ p->urld.type = (content_type)atoi(s);
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+
+
+ if (!fgets(s, MAXIMUM_URL_LENGTH, fp))
+ break;
+ length = strlen(s) - 1;
+ if (p && length > 0) {
+ s[length] = '\0';
+ p->urld.title = malloc(length + 1);
+ if (p->urld.title)
+ memcpy(p->urld.title, s, length + 1);
+ }
+ }
+ }
+
+ fclose(fp);
+ LOG(("Successfully loaded URL file"));
+#undef MAXIMUM_URL_LENGTH
+
+ return NSERROR_OK;
+}
+
+/* exported interface documented in content/urldb.h */
+nserror urldb_save(const char *filename)
+{
+ FILE *fp;
+ int i;
+
+ assert(filename);
+
+ fp = fopen(filename, "w");
+ if (!fp) {
+ LOG(("Failed to open file '%s' for writing", filename));
+ return NSERROR_SAVE_FAILED;
+ }
+
+ /* file format version number */
+ fprintf(fp, "%d\n", URL_FILE_VERSION);
+
+ for (i = 0; i != NUM_SEARCH_TREES; i++) {
+ urldb_save_search_tree(search_trees[i], fp);
+ }
+
+ fclose(fp);
+
+ return NSERROR_OK;
+}
+
+
+/* exported interface documented in content/urldb.h */
+void urldb_set_url_persistence(nsurl *url, bool persist)
+{
+ struct path_data *p;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ p->persistent = persist;
+}
+
+
+/* exported interface documented in content/urldb.h */
+bool urldb_add_url(nsurl *url)
+{
+ struct host_part *h;
+ struct path_data *p;
+ lwc_string *scheme;
+ lwc_string *port;
+ lwc_string *host;
+ lwc_string *fragment;
+ const char *host_str;
+ char *path_query = NULL;
+ size_t len;
+ bool match;
+ unsigned int port_int;
+
+ assert(url);
+
+ if (url_bloom == NULL)
+ url_bloom = bloom_create(BLOOM_SIZE);
+
+ if (url_bloom != NULL) {
+ uint32_t hash = nsurl_hash(url);
+ bloom_insert_hash(url_bloom, hash);
+ }
+
+ /* Copy and merge path/query strings */
+ if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) !=
+ NSERROR_OK) {
+ return false;
+ }
+ assert(path_query != NULL);
+
+ scheme = nsurl_get_component(url, NSURL_SCHEME);
+ if (scheme == NULL) {
+ free(path_query);
+ return false;
+ }
+
+ host = nsurl_get_component(url, NSURL_HOST);
+ if (host != NULL) {
+ host_str = lwc_string_data(host);
+ lwc_string_unref(host);
+
+ } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) ==
+ lwc_error_ok && match == true) {
+ host_str = "localhost";
+
+ } else {
+ lwc_string_unref(scheme);
+ free(path_query);
+ return false;
+ }
+
+ fragment = nsurl_get_component(url, NSURL_FRAGMENT);
+
+ port = nsurl_get_component(url, NSURL_PORT);
+ if (port != NULL) {
+ port_int = atoi(lwc_string_data(port));
+ lwc_string_unref(port);
+ } else {
+ port_int = 0;
+ }
+
+ /* Get host entry */
+ h = urldb_add_host(host_str);
+
+ /* Get path entry */
+ p = (h != NULL) ? urldb_add_path(scheme, port_int, h, path_query,
+ fragment, url) : NULL;
+
+ lwc_string_unref(scheme);
+ if (fragment != NULL)
+ lwc_string_unref(fragment);
+
+ return (p != NULL);
+}
+
+
+/* exported interface documented in content/urldb.h */
+void urldb_set_url_title(nsurl *url, const char *title)
+{
+ struct path_data *p;
+ char *temp;
+
+ assert(url && title);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ temp = strdup(title);
+ if (!temp)
+ return;
+
+ free(p->urld.title);
+ p->urld.title = temp;
+}
+
+
+/* exported interface documented in content/urldb.h */
+void urldb_set_url_content_type(nsurl *url, content_type type)
+{
+ struct path_data *p;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ p->urld.type = type;
+}
+
+
+/* exported interface documented in content/urldb.h */
+void urldb_update_url_visit_data(nsurl *url)
+{
+ struct path_data *p;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ p->urld.last_visit = time(NULL);
+ p->urld.visits++;
+}
+
+
+/* exported interface documented in content/urldb.h */
+void urldb_reset_url_visit_data(nsurl *url)
+{
+ struct path_data *p;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ p->urld.last_visit = (time_t)0;
+ p->urld.visits = 0;
+}
+
+
+/* exported interface documented in content/urldb.h */
+const struct url_data *urldb_get_url_data(nsurl *url)
+{
+ struct path_data *p;
+ struct url_internal_data *u;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return NULL;
+
+ u = &p->urld;
+
+ return (const struct url_data *) u;
+}
+
+
+/* exported interface documented in content/urldb.h */
+nsurl *urldb_get_url(nsurl *url)
+{
+ struct path_data *p;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return NULL;
+
+ return p->url;
+}
+
+
+/* exported interface documented in content/urldb.h */
+void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth)
+{
+ struct path_data *p, *pi;
+ struct host_part *h;
+ struct prot_space_data *space, *space_alloc;
+ char *realm_alloc, *auth_alloc;
+ bool match;
+
+ assert(url && realm && auth);
+
+ /* add url, in case it's missing */
+ urldb_add_url(url);
+
+ p = urldb_find_url(url);
+
+ if (!p)
+ return;
+
+ /* Search for host_part */
+ for (pi = p; pi->parent != NULL; pi = pi->parent)
+ ;
+ h = (struct host_part *)pi;
+
+ /* Search if given URL belongs to a protection space we already know of. */
+ for (space = h->prot_space; space; space = space->next) {
+ if (!strcmp(space->realm, realm) &&
+ lwc_string_isequal(space->scheme, p->scheme,
+ &match) == lwc_error_ok &&
+ match == true &&
+ space->port == p->port)
+ break;
+ }
+
+ if (space != NULL) {
+ /* Overrule existing auth. */
+ free(space->auth);
+ space->auth = strdup(auth);
+ } else {
+ /* Create a new protection space. */
+ space = space_alloc = malloc(sizeof(struct prot_space_data));
+ realm_alloc = strdup(realm);
+ auth_alloc = strdup(auth);
+
+ if (!space_alloc || !realm_alloc || !auth_alloc) {
+ free(space_alloc);
+ free(realm_alloc);
+ free(auth_alloc);
+ return;
+ }
+
+ space->scheme = lwc_string_ref(p->scheme);
+ space->port = p->port;
+ space->realm = realm_alloc;
+ space->auth = auth_alloc;
+ space->next = h->prot_space;
+ h->prot_space = space;
+ }
+
+ p->prot_space = space;
+}
+
+
+/* exported interface documented in content/urldb.h */
+const char *urldb_get_auth_details(nsurl *url, const char *realm)
+{
+ struct path_data *p, *p_cur, *p_top;
+
+ assert(url);
+
+ /* add to the db, so our lookup will work */
+ urldb_add_url(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return NULL;
+
+ /* Check for any auth details attached to the path_data node or any of
+ * its parents.
+ */
+ for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) {
+ if (p_cur->prot_space) {
+ return p_cur->prot_space->auth;
+ }
+ }
+
+ /* Only when we have a realm (and canonical root of given URL), we can
+ * uniquely locate the protection space.
+ */
+ if (realm != NULL) {
+ const struct host_part *h = (const struct host_part *)p_top;
+ const struct prot_space_data *space;
+ bool match;
+
+ /* Search for a possible matching protection space. */
+ for (space = h->prot_space; space != NULL;
+ space = space->next) {
+ if (!strcmp(space->realm, realm) &&
+ lwc_string_isequal(space->scheme,
+ p->scheme, &match) ==
+ lwc_error_ok &&
+ match == true &&
+ space->port == p->port) {
+ p->prot_space = space;
+ return p->prot_space->auth;
+ }
+ }
+ }
+
+ return NULL;
+}
+
+
+/* exported interface documented in content/urldb.h */
+void urldb_set_cert_permissions(nsurl *url, bool permit)
+{
+ struct path_data *p;
+ struct host_part *h;
+
+ assert(url);
+
+ /* add url, in case it's missing */
+ urldb_add_url(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ for (; p && p->parent; p = p->parent)
+ /* do nothing */;
+ assert(p);
+
+ h = (struct host_part *)p;
+
+ h->permit_invalid_certs = permit;
+}
+
+
+/* exported interface documented in content/urldb.h */
+bool urldb_get_cert_permissions(nsurl *url)
+{
+ struct path_data *p;
+ const struct host_part *h;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return false;
+
+ for (; p && p->parent; p = p->parent)
+ /* do nothing */;
+ assert(p);
+
+ h = (const struct host_part *)p;
+
+ return h->permit_invalid_certs;
+}
+
+
+/* exported interface documented in content/urldb.h */
+void urldb_set_thumbnail(nsurl *url, struct bitmap *bitmap)
+{
+ struct path_data *p;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return;
+
+ if (p->thumb && p->thumb != bitmap)
+ bitmap_destroy(p->thumb);
+
+ p->thumb = bitmap;
+}
+
+
+/* exported interface documented in content/urldb.h */
+struct bitmap *urldb_get_thumbnail(nsurl *url)
+{
+ struct path_data *p;
+
+ assert(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return NULL;
+
+ return p->thumb;
+}
+
+
+/* exported interface documented in content/urldb.h */
+void urldb_iterate_partial(const char *prefix,
+ bool (*callback)(nsurl *url,
+ const struct url_data *data))
+{
+ char host[256];
+ char buf[260]; /* max domain + "www." */
+ const char *slash, *scheme_sep;
+ struct search_node *tree;
+ const struct host_part *h;
+
+ assert(prefix && callback);
+
+ /* strip scheme */
+ scheme_sep = strstr(prefix, "://");
+ if (scheme_sep)
+ prefix = scheme_sep + 3;
+
+ slash = strchr(prefix, '/');
+ tree = urldb_get_search_tree(prefix);
+
+ if (slash) {
+ /* if there's a slash in the input, then we can
+ * assume that we're looking for a path */
+ snprintf(host, sizeof host, "%.*s",
+ (int) (slash - prefix), prefix);
+
+ h = urldb_search_find(tree, host);
+ if (!h) {
+ int len = slash - prefix;
+
+ if (len <= 3 || strncasecmp(host, "www.", 4) != 0) {
+ snprintf(buf, sizeof buf, "www.%s", host);
+ h = urldb_search_find(
+ search_trees[ST_DN + 'w' - 'a'],
+ buf);
+ if (!h)
+ return;
+ } else
+ return;
+ }
+
+ if (h->paths.children) {
+ /* Have paths, iterate them */
+ urldb_iterate_partial_path(&h->paths, slash + 1,
+ callback);
+ }
+
+ } else {
+ int len = strlen(prefix);
+
+ /* looking for hosts */
+ if (!urldb_iterate_partial_host(tree, prefix, callback))
+ return;
+
+ if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) {
+ /* now look for www.prefix */
+ snprintf(buf, sizeof buf, "www.%s", prefix);
+ if(!urldb_iterate_partial_host(
+ search_trees[ST_DN + 'w' - 'a'],
+ buf, callback))
+ return;
+ }
+ }
+}
+
+
+/* exported interface documented in content/urldb.h */
+void urldb_iterate_entries(bool (*callback)(nsurl *url,
+ const struct url_data *data))
+{
+ int i;
+
+ assert(callback);
+
+ for (i = 0; i < NUM_SEARCH_TREES; i++) {
+ if (!urldb_iterate_entries_host(search_trees[i],
+ callback, NULL))
+ break;
+ }
+}
+
+
+/* exported interface documented in content/urldb.h */
+void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data))
+{
+ int i;
+
+ assert(callback);
+
+ for (i = 0; i < NUM_SEARCH_TREES; i++) {
+ if (!urldb_iterate_entries_host(search_trees[i],
+ NULL, callback))
+ break;
+ }
+}
+
+
+/* exported interface documented in content/urldb.h */
+bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer)
+{
+ const char *cur = header, *end;
+ lwc_string *path, *host, *scheme;
+ nsurl *urlt;
+ bool match;
+
+ assert(url && header);
+
+ /* Get defragmented URL, as 'urlt' */
+ if (nsurl_defragment(url, &urlt) != NSERROR_OK)
+ return NULL;
+
+ scheme = nsurl_get_component(url, NSURL_SCHEME);
+ if (scheme == NULL) {
+ nsurl_unref(urlt);
+ return false;
+ }
+
+ path = nsurl_get_component(url, NSURL_PATH);
+ if (path == NULL) {
+ lwc_string_unref(scheme);
+ nsurl_unref(urlt);
+ return false;
+ }
+
+ host = nsurl_get_component(url, NSURL_HOST);
+ if (host == NULL) {
+ lwc_string_unref(path);
+ lwc_string_unref(scheme);
+ nsurl_unref(urlt);
+ return false;
+ }
+
+ if (referer) {
+ lwc_string *rhost;
+
+ /* Ensure that url's host name domain matches
+ * referer's (4.3.5) */
+ rhost = nsurl_get_component(referer, NSURL_HOST);
+ if (rhost == NULL) {
+ goto error;
+ }
+
+ /* Domain match host names */
+ if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok &&
+ match == false) {
+ const char *hptr;
+ const char *rptr;
+ const char *dot;
+ const char *host_data = lwc_string_data(host);
+ const char *rhost_data = lwc_string_data(rhost);
+
+ /* Ensure neither host nor rhost are IP addresses */
+ if (urldb__host_is_ip_address(host_data) ||
+ urldb__host_is_ip_address(rhost_data)) {
+ /* IP address, so no partial match */
+ lwc_string_unref(rhost);
+ goto error;
+ }
+
+ /* Not exact match, so try the following:
+ *
+ * 1) Find the longest common suffix of host and rhost
+ * (may be all of host/rhost)
+ * 2) Discard characters from the start of the suffix
+ * until the suffix starts with a dot
+ * (prevents foobar.com matching bar.com)
+ * 3) Ensure the suffix is non-empty and contains
+ * embedded dots (to avoid permitting .com as a
+ * suffix)
+ *
+ * Note that the above in no way resembles the
+ * domain matching algorithm found in RFC2109.
+ * It does, however, model the real world rather
+ * more accurately.
+ */
+
+ /** \todo In future, we should consult a TLD service
+ * instead of just looking for embedded dots.
+ */
+
+ hptr = host_data + lwc_string_length(host) - 1;
+ rptr = rhost_data + lwc_string_length(rhost) - 1;
+
+ /* 1 */
+ while (hptr >= host_data && rptr >= rhost_data) {
+ if (*hptr != *rptr)
+ break;
+ hptr--;
+ rptr--;
+ }
+ /* Ensure we end up pointing at the start of the
+ * common suffix. The above loop will exit pointing
+ * to the byte before the start of the suffix. */
+ hptr++;
+
+ /* 2 */
+ while (*hptr != '\0' && *hptr != '.')
+ hptr++;
+
+ /* 3 */
+ if (*hptr == '\0' ||
+ (dot = strchr(hptr + 1, '.')) == NULL ||
+ *(dot + 1) == '\0') {
+ lwc_string_unref(rhost);
+ goto error;
+ }
+ }
+
+ lwc_string_unref(rhost);
+ }
+
+ end = cur + strlen(cur) - 2 /* Trailing CRLF */;
+
+ do {
+ struct cookie_internal_data *c;
+ char *dot;
+ size_t len;
+
+ c = urldb_parse_cookie(url, &cur);
+ if (!c) {
+ /* failed => stop parsing */
+ goto error;
+ }
+
+ /* validate cookie */
+
+ /* 4.2.2:i Cookie must have NAME and VALUE */
+ if (!c->name || !c->value) {
+ urldb_free_cookie(c);
+ goto error;
+ }
+
+ /* 4.3.2:i Cookie path must be a prefix of URL path */
+ len = strlen(c->path);
+ if (len > lwc_string_length(path) ||
+ strncmp(c->path, lwc_string_data(path),
+ len) != 0) {
+ urldb_free_cookie(c);
+ goto error;
+ }
+
+ /* 4.3.2:ii Cookie domain must contain embedded dots */
+ dot = strchr(c->domain + 1, '.');
+ if (!dot || *(dot + 1) == '\0') {
+ /* no embedded dots */
+ urldb_free_cookie(c);
+ goto error;
+ }
+
+ /* Domain match fetch host with cookie domain */
+ if (strcasecmp(lwc_string_data(host), c->domain) != 0) {
+ int hlen, dlen;
+ char *domain = c->domain;
+
+ /* c->domain must be a domain cookie here because:
+ * c->domain is either:
+ * + specified in the header as a domain cookie
+ * (non-domain cookies in the header are ignored
+ * by urldb_parse_cookie / urldb_parse_avpair)
+ * + defaulted to the URL's host part
+ * (by urldb_parse_cookie if no valid domain was
+ * specified in the header)
+ *
+ * The latter will pass the strcasecmp above, which
+ * leaves the former (i.e. a domain cookie)
+ */
+ assert(c->domain[0] == '.');
+
+ /* 4.3.2:iii */
+ if (urldb__host_is_ip_address(lwc_string_data(host))) {
+ /* IP address, so no partial match */
+ urldb_free_cookie(c);
+ goto error;
+ }
+
+ hlen = lwc_string_length(host);
+ dlen = strlen(c->domain);
+
+ if (hlen <= dlen && hlen != dlen - 1) {
+ /* Partial match not possible */
+ urldb_free_cookie(c);
+ goto error;
+ }
+
+ if (hlen == dlen - 1) {
+ /* Relax matching to allow
+ * host a.com to match .a.com */
+ domain++;
+ dlen--;
+ }
+
+ if (strcasecmp(lwc_string_data(host) + (hlen - dlen),
+ domain)) {
+ urldb_free_cookie(c);
+ goto error;
+ }
+
+ /* 4.3.2:iv Ensure H contains no dots
+ *
+ * If you believe the spec, H should contain no
+ * dots in _any_ cookie. Unfortunately, however,
+ * reality differs in that many sites send domain
+ * cookies of the form .foo.com from hosts such
+ * as bar.bat.foo.com and then expect domain
+ * matching to work. Thus we have to do what they
+ * expect, regardless of any potential security
+ * implications.
+ *
+ * This is what code conforming to the spec would
+ * look like:
+ *
+ * for (int i = 0; i < (hlen - dlen); i++) {
+ * if (host[i] == '.') {
+ * urldb_free_cookie(c);
+ * goto error;
+ * }
+ * }
+ */
+ }
+
+ /* Now insert into database */
+ if (!urldb_insert_cookie(c, scheme, urlt))
+ goto error;
+ } while (cur < end);
+
+ lwc_string_unref(host);
+ lwc_string_unref(path);
+ lwc_string_unref(scheme);
+ nsurl_unref(urlt);
+
+ return true;
+
+error:
+ lwc_string_unref(host);
+ lwc_string_unref(path);
+ lwc_string_unref(scheme);
+ nsurl_unref(urlt);
+
+ return false;
+}
+
+
+/* exported interface documented in content/urldb.h */
+char *urldb_get_cookie(nsurl *url, bool include_http_only)
+{
+ const struct path_data *p, *q;
+ const struct host_part *h;
+ lwc_string *path_lwc;
+ struct cookie_internal_data *c;
+ int count = 0, version = COOKIE_RFC2965;
+ struct cookie_internal_data **matched_cookies;
+ int matched_cookies_size = 20;
+ int ret_alloc = 4096, ret_used = 1;
+ const char *path;
+ char *ret;
+ lwc_string *scheme;
+ time_t now;
+ int i;
+ bool match;
+
+ assert(url != NULL);
+
+ /* The URL must exist in the db in order to find relevant cookies, since
+ * we search up the tree from the URL node, and cookies from further
+ * up also apply. */
+ urldb_add_url(url);
+
+ p = urldb_find_url(url);
+ if (!p)
+ return NULL;
+
+ scheme = p->scheme;
+
+ matched_cookies = malloc(matched_cookies_size *
+ sizeof(struct cookie_internal_data *));
+ if (!matched_cookies)
+ return NULL;
+
+#define GROW_MATCHED_COOKIES \
+ do { \
+ if (count == matched_cookies_size) { \
+ struct cookie_internal_data **temp; \
+ temp = realloc(matched_cookies, \
+ (matched_cookies_size + 20) * \
+ sizeof(struct cookie_internal_data *)); \
+ \
+ if (temp == NULL) { \
+ free(ret); \
+ free(matched_cookies); \
+ return NULL; \
+ } \
+ \
+ matched_cookies = temp; \
+ matched_cookies_size += 20; \
+ } \
+ } while(0)
+
+ ret = malloc(ret_alloc);
+ if (!ret) {
+ free(matched_cookies);
+ return NULL;
+ }
+
+ ret[0] = '\0';
+
+ path_lwc = nsurl_get_component(url, NSURL_PATH);
+ if (path_lwc == NULL) {
+ free(ret);
+ free(matched_cookies);
+ return NULL;
+ }
+ path = lwc_string_data(path_lwc);
+ lwc_string_unref(path_lwc);
+
+ now = time(NULL);
+
+ if (*(p->segment) != '\0') {
+ /* Match exact path, unless directory, when prefix matching
+ * will handle this case for us. */
+ for (q = p->parent->children; q; q = q->next) {
+ if (strcmp(q->segment, p->segment))
+ continue;
+
+ /* Consider all cookies associated with
+ * this exact path */
+ for (c = q->cookies; c; c = c->next) {
+ if (c->expires != -1 && c->expires < now)
+ /* cookie has expired => ignore */
+ continue;
+
+ if (c->secure && lwc_string_isequal(
+ q->scheme,
+ corestring_lwc_https,
+ &match) &&
+ match == false)
+ /* secure cookie for insecure host.
+ * ignore */
+ continue;
+
+ if (c->http_only && !include_http_only)
+ /* Ignore HttpOnly */
+ continue;
+
+ matched_cookies[count++] = c;
+
+ GROW_MATCHED_COOKIES;
+
+ if (c->version < (unsigned int)version)
+ version = c->version;
+
+ c->last_used = now;
+
+ cookie_manager_add((struct cookie_data *)c);
+ }
+ }
+ }
+
+ /* Now consider cookies whose paths prefix-match ours */
+ for (p = p->parent; p; p = p->parent) {
+ /* Find directory's path entry(ies) */
+ /* There are potentially multiple due to differing schemes */
+ for (q = p->children; q; q = q->next) {
+ if (*(q->segment) != '\0')
+ continue;
+
+ for (c = q->cookies; c; c = c->next) {
+ if (c->expires != -1 && c->expires < now)
+ /* cookie has expired => ignore */
+ continue;
+
+ if (c->secure && lwc_string_isequal(
+ q->scheme,
+ corestring_lwc_https,
+ &match) &&
+ match == false)
+ /* Secure cookie for insecure server
+ * => ignore */
+ continue;
+
+ matched_cookies[count++] = c;
+
+ GROW_MATCHED_COOKIES;
+
+ if (c->version < (unsigned int) version)
+ version = c->version;
+
+ c->last_used = now;
+
+ cookie_manager_add((struct cookie_data *)c);
+ }
+ }
+
+ if (!p->parent) {
+ /* No parent, so bail here. This can't go in
+ * the loop exit condition as we also want to
+ * process the top-level node.
+ *
+ * If p->parent is NULL then p->cookies are
+ * the domain cookies and thus we don't even
+ * try matching against them.
+ */
+ break;
+ }
+
+ /* Consider p itself - may be the result of Path=/foo */
+ for (c = p->cookies; c; c = c->next) {
+ if (c->expires != -1 && c->expires < now)
+ /* cookie has expired => ignore */
+ continue;
+
+ /* Ensure cookie path is a prefix of the resource */
+ if (strncmp(c->path, path, strlen(c->path)) != 0)
+ /* paths don't match => ignore */
+ continue;
+
+ if (c->secure && lwc_string_isequal(p->scheme,
+ corestring_lwc_https,
+ &match) &&
+ match == false)
+ /* Secure cookie for insecure server
+ * => ignore */
+ continue;
+
+ matched_cookies[count++] = c;
+
+ GROW_MATCHED_COOKIES;
+
+ if (c->version < (unsigned int) version)
+ version = c->version;
+
+ c->last_used = now;
+
+ cookie_manager_add((struct cookie_data *)c);
+ }
+
+ }
+
+ /* Finally consider domain cookies for hosts which domain match ours */
+ for (h = (const struct host_part *)p; h && h != &db_root;
+ h = h->parent) {
+ for (c = h->paths.cookies; c; c = c->next) {
+ if (c->expires != -1 && c->expires < now)
+ /* cookie has expired => ignore */
+ continue;
+
+ /* Ensure cookie path is a prefix of the resource */
+ if (strncmp(c->path, path, strlen(c->path)) != 0)
+ /* paths don't match => ignore */
+ continue;
+
+ if (c->secure && lwc_string_isequal(scheme,
+ corestring_lwc_https,
+ &match) &&
+ match == false)
+ /* secure cookie for insecure host. ignore */
+ continue;
+
+ matched_cookies[count++] = c;
+
+ GROW_MATCHED_COOKIES;
+
+ if (c->version < (unsigned int)version)
+ version = c->version;
+
+ c->last_used = now;
+
+ cookie_manager_add((struct cookie_data *)c);
+ }
+ }
+
+ if (count == 0) {
+ /* No cookies found */
+ free(ret);
+ free(matched_cookies);
+ return NULL;
+ }
+
+ /* and build output string */
+ if (version > COOKIE_NETSCAPE) {
+ sprintf(ret, "$Version=%d", version);
+ ret_used = strlen(ret) + 1;
+ }
+
+ for (i = 0; i < count; i++) {
+ if (!urldb_concat_cookie(matched_cookies[i], version,
+ &ret_used, &ret_alloc, &ret)) {
+ free(ret);
+ free(matched_cookies);
+ return NULL;
+ }
+ }
+
+ if (version == COOKIE_NETSCAPE) {
+ /* Old-style cookies => no version & skip "; " */
+ memmove(ret, ret + 2, ret_used - 2);
+ ret_used -= 2;
+ }
+
+ /* Now, shrink the output buffer to the required size */
+ {
+ char *temp = realloc(ret, ret_used);
+ if (!temp) {
+ free(ret);
+ free(matched_cookies);
+ return NULL;
+ }
+
+ ret = temp;
+ }
+
+ free(matched_cookies);
+
+ return ret;
+
+#undef GROW_MATCHED_COOKIES
+}
+
+
+/* exported interface documented in content/urldb.h */
+void urldb_delete_cookie(const char *domain, const char *path,
+ const char *name)
+{
+ urldb_delete_cookie_hosts(domain, path, name, &db_root);
+}
+
+
+/* exported interface documented in content/urldb.h */
void urldb_load_cookies(const char *filename)
{
FILE *fp;
@@ -3770,7 +3766,7 @@ void urldb_load_cookies(const char *filename)
if (strncasecmp(s, "Version:", 8) == 0) {
FIND_T; SKIP_T; loaded_cookie_file_version = atoi(p);
- if (loaded_cookie_file_version <
+ if (loaded_cookie_file_version <
MIN_COOKIE_FILE_VERSION) {
LOG(("Unsupported Cookie file version"));
break;
@@ -3882,84 +3878,12 @@ void urldb_load_cookies(const char *filename)
fclose(fp);
}
-/**
- * Delete a cookie
- *
- * \param domain The cookie's domain
- * \param path The cookie's path
- * \param name The cookie's name
- */
-void urldb_delete_cookie(const char *domain, const char *path,
- const char *name)
-{
- urldb_delete_cookie_hosts(domain, path, name, &db_root);
-}
-
-void urldb_delete_cookie_hosts(const char *domain, const char *path,
- const char *name, struct host_part *parent)
-{
- struct host_part *h;
- assert(parent);
-
- urldb_delete_cookie_paths(domain, path, name, &parent->paths);
-
- for (h = parent->children; h; h = h->next)
- urldb_delete_cookie_hosts(domain, path, name, h);
-}
-
-void urldb_delete_cookie_paths(const char *domain, const char *path,
- const char *name, struct path_data *parent)
-{
- struct cookie_internal_data *c;
- struct path_data *p = parent;
-
- assert(parent);
-
- do {
- for (c = p->cookies; c; c = c->next) {
- if (strcmp(c->domain, domain) == 0 &&
- strcmp(c->path, path) == 0 &&
- strcmp(c->name, name) == 0) {
- if (c->prev)
- c->prev->next = c->next;
- else
- p->cookies = c->next;
-
- if (c->next)
- c->next->prev = c->prev;
- else
- p->cookies_end = c->prev;
-
- urldb_free_cookie(c);
-
- return;
- }
- }
-
- if (p->children) {
- p = p->children;
- } else {
- while (p != parent) {
- if (p->next != NULL) {
- p = p->next;
- break;
- }
-
- p = p->parent;
- }
- }
- } while(p != parent);
-}
-/**
- * Save persistent cookies to file
- *
- * \param filename Path to save to
- */
+/* exported interface documented in content/urldb.h */
void urldb_save_cookies(const char *filename)
{
FILE *fp;
- int cookie_file_version = max(loaded_cookie_file_version,
+ int cookie_file_version = max(loaded_cookie_file_version,
COOKIE_FILE_VERSION);
assert(filename);
@@ -3988,253 +3912,173 @@ void urldb_save_cookies(const char *filename)
fclose(fp);
}
-/**
- * Save a host subtree's cookies
- *
- * \param fp File pointer to write to
- * \param parent Parent host
- */
-void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent)
+
+/* exported interface documented in content/urldb.h */
+void urldb_dump(void)
{
- struct host_part *h;
- assert(fp && parent);
+ int i;
- urldb_save_cookie_paths(fp, &parent->paths);
+ urldb_dump_hosts(&db_root);
- for (h = parent->children; h; h = h->next)
- urldb_save_cookie_hosts(fp, h);
+ for (i = 0; i != NUM_SEARCH_TREES; i++)
+ urldb_dump_search(search_trees[i], 0);
}
-/**
- * Save a path subtree's cookies
- *
- * \param fp File pointer to write to
- * \param parent Parent path
- */
-void urldb_save_cookie_paths(FILE *fp, struct path_data *parent)
+
+/* exported interface documented in content/urldb.h */
+struct host_part *urldb_add_host(const char *host)
{
- struct path_data *p = parent;
- time_t now = time(NULL);
+ struct host_part *d = (struct host_part *) &db_root, *e;
+ struct search_node *s;
+ char buf[256]; /* 256 bytes is sufficient - domain names are
+ * limited to 255 chars. */
+ char *part;
- assert(fp && parent);
+ assert(host);
- do {
- if (p->cookies != NULL) {
- struct cookie_internal_data *c;
+ if (urldb__host_is_ip_address(host)) {
+ /* Host is an IP, so simply add as TLD */
- for (c = p->cookies; c != NULL; c = c->next) {
- if (c->expires == -1 || c->expires < now)
- /* Skip expired & session cookies */
- continue;
+ /* Check for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcasecmp(host, e->part) == 0)
+ /* found => return it */
+ return e;
- fprintf(fp,
- "%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t"
- "%s\t%s\t%d\t%s\t%s\t%s\n",
- c->version, c->domain,
- c->domain_from_set, c->path,
- c->path_from_set, c->secure,
- c->http_only,
- (int)c->expires, (int)c->last_used,
- c->no_destroy, c->name, c->value,
- c->value_was_quoted,
- p->scheme ? lwc_string_data(p->scheme) :
- "unused",
- p->url ? nsurl_access(p->url) :
- "unused",
- c->comment ? c->comment : "");
- }
- }
+ d = urldb_add_host_node(host, d);
- if (p->children != NULL) {
- p = p->children;
+ s = urldb_search_insert(search_trees[ST_IP], d);
+ if (!s) {
+ /* failed */
+ d = NULL;
} else {
- while (p != parent) {
- if (p->next != NULL) {
- p = p->next;
- break;
- }
-
- p = p->parent;
- }
+ search_trees[ST_IP] = s;
}
- } while (p != parent);
-}
-
-
-/**
- * Destroy urldb
- */
-void urldb_destroy(void)
-{
- struct host_part *a, *b;
- int i;
-
- /* Clean up search trees */
- for (i = 0; i < NUM_SEARCH_TREES; i++) {
- if (search_trees[i] != &empty)
- urldb_destroy_search_tree(search_trees[i]);
- }
-
- /* And database */
- for (a = db_root.children; a; a = b) {
- b = a->next;
- urldb_destroy_host_tree(a);
- }
-
- /* And the bloom filter */
- if (url_bloom != NULL)
- bloom_destroy(url_bloom);
-}
-
-/**
- * Destroy a host tree
- *
- * \param root Root node of tree to destroy
- */
-void urldb_destroy_host_tree(struct host_part *root)
-{
- struct host_part *a, *b;
- struct path_data *p, *q;
- struct prot_space_data *s, *t;
-
- /* Destroy children */
- for (a = root->children; a; a = b) {
- b = a->next;
- urldb_destroy_host_tree(a);
- }
- /* Now clean up paths */
- for (p = root->paths.children; p; p = q) {
- q = p->next;
- urldb_destroy_path_tree(p);
+ return d;
}
- /* Root path */
- urldb_destroy_path_node_content(&root->paths);
-
- /* Proctection space data */
- for (s = root->prot_space; s; s = t) {
- t = s->next;
- urldb_destroy_prot_space(s);
- }
+ /* Copy host string, so we can corrupt it */
+ strncpy(buf, host, sizeof buf);
+ buf[sizeof buf - 1] = '\0';
- /* And ourselves */
- free(root->part);
- free(root);
-}
+ /* Process FQDN segments backwards */
+ do {
+ part = strrchr(buf, '.');
+ if (!part) {
+ /* last segment */
+ /* Check for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcasecmp(buf, e->part) == 0)
+ break;
-/**
- * Destroy a path tree
- *
- * \param root Root node of tree to destroy
- */
-void urldb_destroy_path_tree(struct path_data *root)
-{
- struct path_data *p = root;
+ if (e) {
+ d = e;
+ } else {
+ d = urldb_add_host_node(buf, d);
+ }
- do {
- if (p->children != NULL) {
- p = p->children;
- } else {
- struct path_data *q = p;
+ /* And insert into search tree */
+ if (d) {
+ struct search_node **r;
- while (p != root) {
- if (p->next != NULL) {
- p = p->next;
- break;
+ r = urldb_get_search_tree_direct(buf);
+ s = urldb_search_insert(*r, d);
+ if (!s) {
+ /* failed */
+ d = NULL;
+ } else {
+ *r = s;
}
+ }
+ break;
+ }
- p = p->parent;
+ /* Check for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcasecmp(part + 1, e->part) == 0)
+ break;
- urldb_destroy_path_node_content(q);
- free(q);
+ d = e ? e : urldb_add_host_node(part + 1, d);
+ if (!d)
+ break;
- q = p;
- }
+ *part = '\0';
+ } while (1);
- urldb_destroy_path_node_content(q);
- free(q);
- }
- } while (p != root);
+ return d;
}
-/**
- * Destroy the contents of a path node
- *
- * \param node Node to destroy contents of (does not destroy node)
- */
-void urldb_destroy_path_node_content(struct path_data *node)
-{
- struct cookie_internal_data *a, *b;
- unsigned int i;
- if (node->url != NULL)
- nsurl_unref(node->url);
+/* exported interface documented in content/urldb.h */
+struct path_data *
+urldb_add_path(lwc_string *scheme,
+ unsigned int port,
+ const struct host_part *host,
+ char *path_query,
+ lwc_string *fragment,
+ nsurl *url)
+{
+ struct path_data *d, *e;
+ char *buf = path_query;
+ char *segment, *slash;
+ bool match;
- if (node->scheme != NULL)
- lwc_string_unref(node->scheme);
+ assert(scheme && host && url);
- free(node->segment);
- for (i = 0; i < node->frag_cnt; i++)
- free(node->fragment[i]);
- free(node->fragment);
+ d = (struct path_data *) &host->paths;
- if (node->thumb)
- bitmap_destroy(node->thumb);
+ /* skip leading '/' */
+ segment = buf;
+ if (*segment == '/')
+ segment++;
- free(node->urld.title);
+ /* Process path segments */
+ do {
+ slash = strchr(segment, '/');
+ if (!slash) {
+ /* last segment */
+ /* look for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcmp(segment, e->segment) == 0 &&
+ lwc_string_isequal(scheme,
+ e->scheme, &match) ==
+ lwc_error_ok &&
+ match == true &&
+ e->port == port)
+ break;
- for (a = node->cookies; a; a = b) {
- b = a->next;
- urldb_destroy_cookie(a);
- }
-}
+ d = e ? urldb_add_path_fragment(e, fragment) :
+ urldb_add_path_node(scheme, port,
+ segment, fragment, d);
+ break;
+ }
-/**
- * Destroy a cookie node
- *
- * \param c Cookie to destroy
- */
-void urldb_destroy_cookie(struct cookie_internal_data *c)
-{
- free(c->name);
- free(c->value);
- free(c->comment);
- free(c->domain);
- free(c->path);
+ *slash = '\0';
- free(c);
-}
+ /* look for existing entry */
+ for (e = d->children; e; e = e->next)
+ if (strcmp(segment, e->segment) == 0 &&
+ lwc_string_isequal(scheme, e->scheme,
+ &match) == lwc_error_ok &&
+ match == true &&
+ e->port == port)
+ break;
-/**
- * Destroy protection space data
- *
- * \param space Protection space to destroy
- */
-void urldb_destroy_prot_space(struct prot_space_data *space)
-{
- lwc_string_unref(space->scheme);
- free(space->realm);
- free(space->auth);
+ d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d);
+ if (!d)
+ break;
- free(space);
-}
+ segment = slash + 1;
+ } while (1);
+ free(path_query);
-/**
- * Destroy a search tree
- *
- * \param root Root node of tree to destroy
- */
-void urldb_destroy_search_tree(struct search_node *root)
-{
- /* Destroy children */
- if (root->left != &empty)
- urldb_destroy_search_tree(root->left);
- if (root->right != &empty)
- urldb_destroy_search_tree(root->right);
+ if (d && !d->url) {
+ /* Insert defragmented URL */
+ if (nsurl_defragment(url, &d->url) != NSERROR_OK)
+ return NULL;
+ }
- /* And destroy ourselves */
- free(root);
+ return d;
}
-