From cf7abb4a0ad6a6de3acf3215ca6d31fdebbf4708 Mon Sep 17 00:00:00 2001 From: Vincent Sanders Date: Sat, 1 Nov 2014 23:27:29 +0000 Subject: restructure urldb source remove forward declarations and restructure. exported functions are also now documented in the urldb.h header. --- content/urldb.c | 5796 +++++++++++++++++++++++++++---------------------------- content/urldb.h | 216 ++- 2 files changed, 3028 insertions(+), 2984 deletions(-) diff --git a/content/urldb.c b/content/urldb.c index bf873c62e..8af6ae150 100644 --- a/content/urldb.c +++ b/content/urldb.c @@ -17,8 +17,9 @@ * along with this program. If not, see . */ -/** \file - * Unified URL information database (implementation) +/** + * \file + * Unified URL information database implementation * * URLs are stored in a tree-based structure as follows: * @@ -81,8 +82,8 @@ * simpler implementation. Entries in this tree comprise pointers to the * leaf nodes of the host tree described above. * - * REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of - * non-normalised URLs with urldb will result in undefined behaviour and + * REALLY IMPORTANT NOTE: urldb expects all URLs to be normalised. Use of + * non-normalised URLs with urldb will result in undefined behaviour and * potential crashes. */ @@ -217,94 +218,6 @@ struct search_node { struct search_node *right; /**< Right subtree */ }; -/* Destruction */ -static void urldb_destroy_host_tree(struct host_part *root); -static void urldb_destroy_path_tree(struct path_data *root); -static void urldb_destroy_path_node_content(struct path_data *node); -static void urldb_destroy_cookie(struct cookie_internal_data *c); -static void urldb_destroy_prot_space(struct prot_space_data *space); -static void urldb_destroy_search_tree(struct search_node *root); - -/* Saving */ -static void urldb_save_search_tree(struct search_node *root, FILE *fp); -static void urldb_count_urls(const struct path_data *root, time_t expiry, - unsigned int *count); -static void urldb_write_paths(const struct path_data *parent, - const char *host, FILE *fp, char **path, int *path_alloc, - int *path_used, time_t expiry); - -/* Iteration */ -static bool urldb_iterate_partial_host(struct search_node *root, - const char *prefix, bool (*callback)(nsurl *url, - const struct url_data *data)); -static bool urldb_iterate_partial_path(const struct path_data *parent, - const char *prefix, bool (*callback)(nsurl *url, - const struct url_data *data)); -static bool urldb_iterate_entries_host(struct search_node *parent, - bool (*url_callback)(nsurl *url, - const struct url_data *data), - bool (*cookie_callback)(const struct cookie_data *data)); -static bool urldb_iterate_entries_path(const struct path_data *parent, - bool (*url_callback)(nsurl *url, - const struct url_data *data), - bool (*cookie_callback)(const struct cookie_data *data)); - -/* Insertion */ -static struct host_part *urldb_add_host_node(const char *part, - struct host_part *parent); -static struct path_data *urldb_add_path_node(lwc_string *scheme, - unsigned int port, const char *segment, lwc_string *fragment, - struct path_data *parent); -static int urldb_add_path_fragment_cmp(const void *a, const void *b); -static struct path_data *urldb_add_path_fragment(struct path_data *segment, - lwc_string *fragment); - -/* Lookup */ -static struct path_data *urldb_find_url(nsurl *url); -static struct path_data *urldb_match_path(const struct path_data *parent, - const char *path, lwc_string *scheme, unsigned short port); -static struct search_node **urldb_get_search_tree_direct(const char *host); -static struct search_node *urldb_get_search_tree(const char *host); - -/* Dump */ -static void urldb_dump_hosts(struct host_part *parent); -static void urldb_dump_paths(struct path_data *parent); -static void urldb_dump_search(struct search_node *parent, int depth); - -/* Search tree */ -static struct search_node *urldb_search_insert(struct search_node *root, - const struct host_part *data); -static struct search_node *urldb_search_insert_internal( - struct search_node *root, struct search_node *n); -/* for urldb_search_remove, see r5531 which removed it */ -static const struct host_part *urldb_search_find(struct search_node *root, - const char *host); -static struct search_node *urldb_search_skew(struct search_node *root); -static struct search_node *urldb_search_split(struct search_node *root); -static int urldb_search_match_host(const struct host_part *a, - const struct host_part *b); -static int urldb_search_match_string(const struct host_part *a, - const char *b); -static int urldb_search_match_prefix(const struct host_part *a, - const char *b); - -/* Cookies */ -static struct cookie_internal_data *urldb_parse_cookie(nsurl *url, - const char **cookie); -static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, - char *v, bool was_quoted); -static bool urldb_insert_cookie(struct cookie_internal_data *c, - lwc_string *scheme, nsurl *url); -static void urldb_free_cookie(struct cookie_internal_data *c); -static bool urldb_concat_cookie(struct cookie_internal_data *c, int version, - int *used, int *alloc, char **buf); -static void urldb_delete_cookie_hosts(const char *domain, const char *path, - const char *name, struct host_part *parent); -static void urldb_delete_cookie_paths(const char *domain, const char *path, - const char *name, struct path_data *parent); -static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent); -static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent); - /** Root database handle */ static struct host_part db_root; @@ -325,6 +238,8 @@ static struct search_node *search_trees[NUM_SEARCH_TREES] = { #define COOKIE_FILE_VERSION 102 static int loaded_cookie_file_version; #define MIN_URL_FILE_VERSION 106 + +/** URL database file version */ #define URL_FILE_VERSION 106 /* Bloom filter used for short-circuting the false case of "is this @@ -338,241 +253,163 @@ static int loaded_cookie_file_version; static struct bloom_filter *url_bloom; #define BLOOM_SIZE (1024 * 32) + + /** - * Import an URL database from file, replacing any existing database + * Write paths associated with a host * - * \param filename Name of file containing data + * \param parent Root of (sub)tree to write + * \param host Current host name + * \param fp File to write to + * \param path Current path string + * \param path_alloc Allocated size of path + * \param path_used Used size of path + * \param expiry Expiry time of URLs */ -nserror urldb_load(const char *filename) +static void urldb_write_paths(const struct path_data *parent, const char *host, + FILE *fp, char **path, int *path_alloc, int *path_used, + time_t expiry) { -#define MAXIMUM_URL_LENGTH 4096 - char s[MAXIMUM_URL_LENGTH]; - char host[256]; - struct host_part *h; - int urls; + const struct path_data *p = parent; int i; - int version; - int length; - FILE *fp; - - assert(filename); - - LOG(("Loading URL file %s", filename)); - - if (url_bloom == NULL) - url_bloom = bloom_create(BLOOM_SIZE); - - fp = fopen(filename, "r"); - if (!fp) { - LOG(("Failed to open file '%s' for reading", filename)); - return NSERROR_NOT_FOUND; - } - - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) { - fclose(fp); - return NSERROR_NEED_DATA; - } - - version = atoi(s); - if (version < MIN_URL_FILE_VERSION) { - LOG(("Unsupported URL file version.")); - fclose(fp); - return NSERROR_INVALID; - } - if (version > URL_FILE_VERSION) { - LOG(("Unknown URL file version.")); - fclose(fp); - return NSERROR_INVALID; - } - while (fgets(host, sizeof host, fp)) { - /* get the hostname */ - length = strlen(host) - 1; - host[length] = '\0'; + do { + int seglen = p->segment != NULL ? strlen(p->segment) : 0; + int len = *path_used + seglen + 1; - /* skip data that has ended up with a host of '' */ - if (length == 0) { - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - urls = atoi(s); - /* Eight fields/url */ - for (i = 0; i < (8 * urls); i++) { - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - } - continue; + if (*path_alloc < len) { + char *temp = realloc(*path, + (len > 64) ? len : *path_alloc + 64); + if (!temp) + return; + *path = temp; + *path_alloc = (len > 64) ? len : *path_alloc + 64; } - /* read number of URLs */ - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - urls = atoi(s); - - /* no URLs => try next host */ - if (urls == 0) { - LOG(("No URLs for '%s'", host)); - continue; - } + if (p->segment != NULL) + memcpy(*path + *path_used - 1, p->segment, seglen); - h = urldb_add_host(host); - if (!h) { - LOG(("Failed adding host: '%s'", host)); - fclose(fp); - return NSERROR_NOMEM; + if (p->children != NULL) { + (*path)[*path_used + seglen - 1] = '/'; + (*path)[*path_used + seglen] = '\0'; + } else { + (*path)[*path_used + seglen - 1] = '\0'; + len -= 1; } - /* load the non-corrupt data */ - for (i = 0; i < urls; i++) { - struct path_data *p = NULL; - char scheme[64], ports[10]; - char url[64 + 3 + 256 + 6 + 4096 + 1]; - unsigned int port; - bool is_file = false; - nsurl *nsurl; - lwc_string *scheme_lwc, *fragment_lwc; - char *path_query; - size_t len; + *path_used = len; - if (!fgets(scheme, sizeof scheme, fp)) - break; - length = strlen(scheme) - 1; - scheme[length] = '\0'; + if (p->children != NULL) { + /* Drill down into children */ + p = p->children; + } else { + /* leaf node */ + if (p->persistent ||((p->urld.last_visit > expiry) && + (p->urld.visits > 0))) { + fprintf(fp, "%s\n", lwc_string_data(p->scheme)); - if (!fgets(ports, sizeof ports, fp)) - break; - length = strlen(ports) - 1; - ports[length] = '\0'; - port = atoi(ports); + if (p->port) + fprintf(fp,"%d\n", p->port); + else + fprintf(fp, "\n"); - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - length = strlen(s) - 1; - s[length] = '\0'; + fprintf(fp, "%s\n", *path); - if (!strcasecmp(host, "localhost") && - !strcasecmp(scheme, "file")) - is_file = true; + /** \todo handle fragments? */ - snprintf(url, sizeof url, "%s://%s%s%s%s", - scheme, - /* file URLs have no host */ - (is_file ? "" : host), - (port ? ":" : ""), - (port ? ports : ""), - s); + fprintf(fp, "%i\n%i\n%i\n", p->urld.visits, + (int)p->urld.last_visit, + (int)p->urld.type); - /* TODO: store URLs in pre-parsed state, and make - * a nsurl_load to generate the nsurl more - * swiftly. - * Need a nsurl_save too. - */ - if (nsurl_create(url, &nsurl) != NSERROR_OK) { - LOG(("Failed inserting '%s'", url)); - fclose(fp); - return NSERROR_NOMEM; - } - - if (url_bloom != NULL) { - uint32_t hash = nsurl_hash(nsurl); - bloom_insert_hash(url_bloom, hash); - } + fprintf(fp, "\n"); - /* Copy and merge path/query strings */ - if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY, - &path_query, &len) != NSERROR_OK) { - LOG(("Failed inserting '%s'", url)); - fclose(fp); - return NSERROR_NOMEM; - } + if (p->urld.title) { + uint8_t *s = (uint8_t *) p->urld.title; - scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME); - fragment_lwc = nsurl_get_component(nsurl, - NSURL_FRAGMENT); - p = urldb_add_path(scheme_lwc, port, h, path_query, - fragment_lwc, nsurl); - if (!p) { - LOG(("Failed inserting '%s'", url)); - fclose(fp); - return NSERROR_NOMEM; + for (i = 0; s[i] != '\0'; i++) + if (s[i] < 32) + s[i] = ' '; + for (--i; ((i > 0) && (s[i] == ' ')); + i--) + s[i] = '\0'; + fprintf(fp, "%s\n", p->urld.title); + } else + fprintf(fp, "\n"); } - nsurl_unref(nsurl); - lwc_string_unref(scheme_lwc); - if (fragment_lwc != NULL) - lwc_string_unref(fragment_lwc); - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - if (p) - p->urld.visits = (unsigned int)atoi(s); - - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - if (p) - p->urld.last_visit = (time_t)atoi(s); + /* Now, find next node to process. */ + while (p != parent) { + int seglen = p->segment != NULL + ? strlen(p->segment) : 0; - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - if (p) - p->urld.type = (content_type)atoi(s); + /* Remove our segment from the path */ + *path_used -= seglen; + (*path)[*path_used - 1] = '\0'; - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; + if (p->next != NULL) { + /* Have a sibling, process that */ + p = p->next; + break; + } + /* Going up, so remove '/' */ + *path_used -= 1; + (*path)[*path_used - 1] = '\0'; - if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) - break; - length = strlen(s) - 1; - if (p && length > 0) { - s[length] = '\0'; - p->urld.title = malloc(length + 1); - if (p->urld.title) - memcpy(p->urld.title, s, length + 1); + /* Ascend tree */ + p = p->parent; } } - } - - fclose(fp); - LOG(("Successfully loaded URL file")); -#undef MAXIMUM_URL_LENGTH - - return NSERROR_OK; + } while (p != parent); } + /** - * Export the current database to file + * Count number of URLs associated with a host * - * \param filename Name of file to export to + * \param root Root of path data tree + * \param expiry Expiry time for URLs + * \param count Pointer to count */ -void urldb_save(const char *filename) +static void urldb_count_urls(const struct path_data *root, time_t expiry, + unsigned int *count) { - FILE *fp; - int i; - - assert(filename); - - fp = fopen(filename, "w"); - if (!fp) { - LOG(("Failed to open file '%s' for writing", filename)); - return; - } + const struct path_data *p = root; - /* file format version number */ - fprintf(fp, "%d\n", URL_FILE_VERSION); + do { + if (p->children != NULL) { + /* Drill down into children */ + p = p->children; + } else { + /* No more children, increment count if required */ + if (p->persistent || ((p->urld.last_visit > expiry) && + (p->urld.visits > 0))) { + (*count)++; + } - for (i = 0; i != NUM_SEARCH_TREES; i++) { - urldb_save_search_tree(search_trees[i], fp); - } + /* Now, find next node to process. */ + while (p != root) { + if (p->next != NULL) { + /* Have a sibling, process that */ + p = p->next; + break; + } - fclose(fp); + /* Ascend tree */ + p = p->parent; + } + } + } while (p != root); } + /** * Save a search (sub)tree * * \param root Root of (sub)tree to save * \param fp File to write to */ -void urldb_save_search_tree(struct search_node *parent, FILE *fp) +static void urldb_save_search_tree(struct search_node *parent, FILE *fp) { char host[256]; const struct host_part *h; @@ -619,30 +456,52 @@ void urldb_save_search_tree(struct search_node *parent, FILE *fp) urldb_save_search_tree(parent->right, fp); } + /** - * Count number of URLs associated with a host + * Path data iterator (internal) * - * \param root Root of path data tree - * \param expiry Expiry time for URLs - * \param count Pointer to count + * \param parent Root of subtree to iterate over + * \param url_callback Callback function + * \param cookie_callback Callback function + * \return true to continue, false otherwise */ -void urldb_count_urls(const struct path_data *root, time_t expiry, - unsigned int *count) +static bool urldb_iterate_entries_path(const struct path_data *parent, + bool (*url_callback)(nsurl *url, const struct url_data *data), + bool (*cookie_callback)(const struct cookie_data *data)) { - const struct path_data *p = root; + const struct path_data *p = parent; + const struct cookie_data *c; do { if (p->children != NULL) { /* Drill down into children */ p = p->children; } else { - /* No more children, increment count if required */ - if (p->persistent || ((p->urld.last_visit > expiry) && - (p->urld.visits > 0))) - (*count)++; + /* All leaf nodes in the path tree should have an URL or + * cookies attached to them. If this is not the case, it + * indicates that there's a bug in the file loader/URL + * insertion code. Therefore, assert this here. */ + assert(url_callback || cookie_callback); + + /** \todo handle fragments? */ + if (url_callback) { + const struct url_internal_data *u = &p->urld; + + assert(p->url); + + if (!url_callback(p->url, + (const struct url_data *) u)) + return false; + } else { + c = (const struct cookie_data *)p->cookies; + for (; c != NULL; c = c->next) { + if (!cookie_callback(c)) + return false; + } + } /* Now, find next node to process. */ - while (p != root) { + while (p != parent) { if (p->next != NULL) { /* Have a sibling, process that */ p = p->next; @@ -653,1521 +512,1750 @@ void urldb_count_urls(const struct path_data *root, time_t expiry, p = p->parent; } } - } while (p != root); + } while (p != parent); + + return true; } + /** - * Write paths associated with a host + * Check whether a host string is an IP address. * - * \param parent Root of (sub)tree to write - * \param host Current host name - * \param fp File to write to - * \param path Current path string - * \param path_alloc Allocated size of path - * \param path_used Used size of path - * \param expiry Expiry time of URLs + * This call detects IPv4 addresses (all of dotted-quad or subsets, + * decimal or hexadecimal notations) and IPv6 addresses (including + * those containing embedded IPv4 addresses.) + * + * \param host a hostname terminated by '\0' + * \return true if the hostname is an IP address, false otherwise */ -void urldb_write_paths(const struct path_data *parent, const char *host, - FILE *fp, char **path, int *path_alloc, int *path_used, - time_t expiry) +static bool urldb__host_is_ip_address(const char *host) { - const struct path_data *p = parent; - int i; + struct in_addr ipv4; + size_t host_len = strlen(host); + const char *sane_host; + const char *slash; +#ifndef NO_IPV6 + struct in6_addr ipv6; + char ipv6_addr[64]; +#endif + /** @todo FIXME Some parts of urldb.c make confusions between hosts + * and "prefixes", we can sometimes be erroneously passed more than + * just a host. Sometimes we may be passed trailing slashes, or even + * whole path segments. A specific criminal in this class is + * urldb_iterate_partial, which takes a prefix to search for, but + * passes that prefix to functions that expect only hosts. + * + * For the time being, we will accept such calls; we check if there + * is a / in the host parameter, and if there is, we take a copy and + * replace the / with a \0. This is not a permanent solution; we + * should search through NetSurf and find all the callers that are + * in error and fix them. When doing this task, it might be wise + * to replace the hideousness below with code that doesn't have to do + * this, and add assert(strchr(host, '/') == NULL); somewhere. + * -- rjek - 2010-11-04 + */ - do { - int seglen = p->segment != NULL ? strlen(p->segment) : 0; - int len = *path_used + seglen + 1; + slash = strchr(host, '/'); + if (slash == NULL) { + sane_host = host; + } else { + char *c = strdup(host); + c[slash - host] = '\0'; + sane_host = c; + host_len = slash - host - 1; + LOG(("WARNING: called with non-host '%s'", host)); + } - if (*path_alloc < len) { - char *temp = realloc(*path, - (len > 64) ? len : *path_alloc + 64); - if (!temp) - return; - *path = temp; - *path_alloc = (len > 64) ? len : *path_alloc + 64; - } + if (strspn(sane_host, "0123456789abcdefABCDEF[].:") < host_len) + goto out_false; - if (p->segment != NULL) - memcpy(*path + *path_used - 1, p->segment, seglen); + if (inet_aton(sane_host, &ipv4) != 0) { + /* This can only be a sane IPv4 address if it contains 3 dots. + * Helpfully, inet_aton is happy to treat "a", "a.b", "a.b.c", + * and "a.b.c.d" as valid IPv4 address strings where we only + * support the full, dotted-quad, form. + */ + int num_dots = 0; + size_t index; - if (p->children != NULL) { - (*path)[*path_used + seglen - 1] = '/'; - (*path)[*path_used + seglen] = '\0'; - } else { - (*path)[*path_used + seglen - 1] = '\0'; - len -= 1; + for (index = 0; index < host_len; index++) { + if (sane_host[index] == '.') + num_dots++; } - *path_used = len; + if (num_dots == 3) + goto out_true; + else + goto out_false; + } - if (p->children != NULL) { - /* Drill down into children */ - p = p->children; - } else { - /* leaf node */ - if (p->persistent ||((p->urld.last_visit > expiry) && - (p->urld.visits > 0))) { - fprintf(fp, "%s\n", lwc_string_data(p->scheme)); +#ifndef NO_IPV6 + if (sane_host[0] != '[' || sane_host[host_len] != ']') + goto out_false; - if (p->port) - fprintf(fp,"%d\n", p->port); - else - fprintf(fp, "\n"); + strncpy(ipv6_addr, sane_host + 1, sizeof(ipv6_addr)); + ipv6_addr[sizeof(ipv6_addr) - 1] = '\0'; - fprintf(fp, "%s\n", *path); + if (inet_pton(AF_INET6, ipv6_addr, &ipv6) == 1) + goto out_true; +#endif - /** \todo handle fragments? */ +out_false: + if (slash != NULL) free((void *)sane_host); + return false; - fprintf(fp, "%i\n%i\n%i\n", p->urld.visits, - (int)p->urld.last_visit, - (int)p->urld.type); +out_true: + if (slash != NULL) free((void *)sane_host); + return true; +} - fprintf(fp, "\n"); - if (p->urld.title) { - uint8_t *s = (uint8_t *) p->urld.title; +/** + * Compare host_part with prefix + * + * \param a + * \param b + * \return 0 if match, non-zero, otherwise + */ +static int urldb_search_match_prefix(const struct host_part *a, const char *b) +{ + const char *end, *dot; + int plen, ret; - for (i = 0; s[i] != '\0'; i++) - if (s[i] < 32) - s[i] = ' '; - for (--i; ((i > 0) && (s[i] == ' ')); - i--) - s[i] = '\0'; - fprintf(fp, "%s\n", p->urld.title); - } else - fprintf(fp, "\n"); - } + assert(a && a != &db_root && b); - /* Now, find next node to process. */ - while (p != parent) { - int seglen = p->segment != NULL - ? strlen(p->segment) : 0; + if (urldb__host_is_ip_address(b)) { + /* IP address */ + return strncasecmp(a->part, b, strlen(b)); + } - /* Remove our segment from the path */ - *path_used -= seglen; - (*path)[*path_used - 1] = '\0'; + end = b + strlen(b) + 1; - if (p->next != NULL) { - /* Have a sibling, process that */ - p = p->next; - break; - } + while (b < end && a && a != &db_root) { + dot = strchr(b, '.'); + if (!dot) { + /* last segment */ + dot = end - 1; + } - /* Going up, so remove '/' */ - *path_used -= 1; - (*path)[*path_used - 1] = '\0'; + /* Compare strings (length limited) */ + if ((ret = strncasecmp(a->part, b, dot - b)) != 0) + /* didn't match => return difference */ + return ret; - /* Ascend tree */ - p = p->parent; - } + /* The strings matched */ + if (dot < end - 1) { + /* Consider segment lengths only in the case + * where the prefix contains segments */ + plen = strlen(a->part); + if (plen > dot - b) + /* len(a) > len(b) */ + return 1; + else if (plen < dot - b) + /* len(a) < len(b) */ + return -1; } - } while (p != parent); -} - -/** - * Set the cross-session persistence of the entry for an URL - * - * \param url Absolute URL to persist - * \param persist True to persist, false otherwise - */ -void urldb_set_url_persistence(nsurl *url, bool persist) -{ - struct path_data *p; - assert(url); + b = dot + 1; + a = a->parent; + } - p = urldb_find_url(url); - if (!p) - return; + /* If we get here then either: + * a) The path lengths differ + * or b) The hosts are identical + */ + if (a && a != &db_root && b >= end) + /* len(a) > len(b) => prefix matches */ + return 0; + else if ((!a || a == &db_root) && b < end) + /* len(a) < len(b) => prefix does not match */ + return -1; - p->persistent = persist; + /* Identical */ + return 0; } + /** - * Insert an URL into the database + * Partial host iterator (internal) * - * \param url Absolute URL to insert - * \return true on success, false otherwise + * \param root Root of (sub)tree to traverse + * \param prefix Prefix to match + * \param callback Callback function + * \return true to continue, false otherwise */ -bool urldb_add_url(nsurl *url) +static bool +urldb_iterate_partial_host(struct search_node *root, + const char *prefix, + bool (*callback)(nsurl *url, const struct url_data *data)) { - struct host_part *h; - struct path_data *p; - lwc_string *scheme; - lwc_string *port; - lwc_string *host; - lwc_string *fragment; - const char *host_str; - char *path_query = NULL; - size_t len; - bool match; - unsigned int port_int; - - assert(url); - - if (url_bloom == NULL) - url_bloom = bloom_create(BLOOM_SIZE); - - if (url_bloom != NULL) { - uint32_t hash = nsurl_hash(url); - bloom_insert_hash(url_bloom, hash); - } - - /* Copy and merge path/query strings */ - if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) != - NSERROR_OK) { - return false; - } - assert(path_query != NULL); + int c; - scheme = nsurl_get_component(url, NSURL_SCHEME); - if (scheme == NULL) { - free(path_query); - return false; - } + assert(root && prefix && callback); - host = nsurl_get_component(url, NSURL_HOST); - if (host != NULL) { - host_str = lwc_string_data(host); - lwc_string_unref(host); + if (root == &empty) + return true; - } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) == - lwc_error_ok && match == true) { - host_str = "localhost"; + c = urldb_search_match_prefix(root->data, prefix); - } else { - lwc_string_unref(scheme); - free(path_query); - return false; - } + if (c > 0) + /* No match => look in left subtree */ + return urldb_iterate_partial_host(root->left, prefix, + callback); + else if (c < 0) + /* No match => look in right subtree */ + return urldb_iterate_partial_host(root->right, prefix, + callback); + else { + /* Match => iterate over l/r subtrees & process this node */ + if (!urldb_iterate_partial_host(root->left, prefix, + callback)) + return false; - fragment = nsurl_get_component(url, NSURL_FRAGMENT); + if (root->data->paths.children) { + /* and extract all paths attached to this host */ + if (!urldb_iterate_entries_path(&root->data->paths, + callback, NULL)) { + return false; + } + } - port = nsurl_get_component(url, NSURL_PORT); - if (port != NULL) { - port_int = atoi(lwc_string_data(port)); - lwc_string_unref(port); - } else { - port_int = 0; + if (!urldb_iterate_partial_host(root->right, prefix, + callback)) + return false; } - /* Get host entry */ - h = urldb_add_host(host_str); - - /* Get path entry */ - p = (h != NULL) ? urldb_add_path(scheme, port_int, h, path_query, - fragment, url) : NULL; - - lwc_string_unref(scheme); - if (fragment != NULL) - lwc_string_unref(fragment); - - return (p != NULL); + return true; } + /** - * Set an URL's title string, replacing any existing one + * Partial path iterator (internal) * - * \param url The URL to look for - * \param title The title string to use (copied) + * \param parent Root of (sub)tree to traverse + * \param prefix Prefix to match + * \param callback Callback function + * \return true to continue, false otherwise */ -void urldb_set_url_title(nsurl *url, const char *title) +static bool urldb_iterate_partial_path(const struct path_data *parent, + const char *prefix, bool (*callback)(nsurl *url, + const struct url_data *data)) { - struct path_data *p; - char *temp; - - assert(url && title); + const struct path_data *p = parent->children; + const char *slash, *end = prefix + strlen(prefix); - p = urldb_find_url(url); - if (!p) - return; + /* + * Given: http://www.example.org/a/b/c/d//e + * and assuming a path tree: + * . + * / \ + * a1 b1 + * / \ + * a2 b2 + * /|\ + * a b c + * 3 3 | + * d + * | + * e + * / \ + * f g + * + * Prefix will be: p will be: + * + * a/b/c/d//e a1 + * b/c/d//e a2 + * b/c/d//e b3 + * c/d//e a3 + * c/d//e b3 + * c/d//e c + * d//e d + * /e e (skip /) + * e e + * + * I.E. we perform a breadth-first search of the tree. + */ - temp = strdup(title); - if (!temp) - return; + do { + slash = strchr(prefix, '/'); + if (!slash) + slash = end; - free(p->urld.title); - p->urld.title = temp; -} + if (slash == prefix && *prefix == '/') { + /* Ignore "//" */ + prefix++; + continue; + } -/** - * Set an URL's content type - * - * \param url The URL to look for - * \param type The type to set - */ -void urldb_set_url_content_type(nsurl *url, content_type type) -{ - struct path_data *p; + if (strncasecmp(p->segment, prefix, slash - prefix) == 0) { + /* prefix matches so far */ + if (slash == end) { + /* we've run out of prefix, so all + * paths below this one match */ + if (!urldb_iterate_entries_path(p, callback, + NULL)) + return false; - assert(url); + /* Progress to next sibling */ + p = p->next; + } else { + /* Skip over this segment */ + prefix = slash + 1; - p = urldb_find_url(url); - if (!p) - return; + p = p->children; + } + } else { + /* Doesn't match this segment, try next sibling */ + p = p->next; + } + } while (p != NULL); - p->urld.type = type; + return true; } + /** - * Update an URL's visit data + * Host data iterator (internal) * - * \param url The URL to update + * \param parent Root of subtree to iterate over + * \param url_callback Callback function + * \param cookie_callback Callback function + * \return true to continue, false otherwise */ -void urldb_update_url_visit_data(nsurl *url) +static bool urldb_iterate_entries_host(struct search_node *parent, + bool (*url_callback)(nsurl *url, + const struct url_data *data), + bool (*cookie_callback)(const struct cookie_data *data)) { - struct path_data *p; + if (parent == &empty) + return true; - assert(url); + if (!urldb_iterate_entries_host(parent->left, + url_callback, cookie_callback)) + return false; - p = urldb_find_url(url); - if (!p) - return; + if ((parent->data->paths.children) || ((cookie_callback) && + (parent->data->paths.cookies))) { + /* We have paths (or domain cookies), so iterate them */ + if (!urldb_iterate_entries_path(&parent->data->paths, + url_callback, cookie_callback)) { + return false; + } + } - p->urld.last_visit = time(NULL); - p->urld.visits++; + if (!urldb_iterate_entries_host(parent->right, + url_callback, cookie_callback)) + return false; + + return true; } + /** - * Reset an URL's visit statistics + * Add a host node to the tree * - * \param url The URL to reset + * \param part Host segment to add (or whole IP address) (copied) + * \param parent Parent node to add to + * \return Pointer to added node, or NULL on memory exhaustion */ -void urldb_reset_url_visit_data(nsurl *url) +static struct host_part *urldb_add_host_node(const char *part, + struct host_part *parent) { - struct path_data *p; + struct host_part *d; - assert(url); + assert(part && parent); - p = urldb_find_url(url); - if (!p) - return; + d = calloc(1, sizeof(struct host_part)); + if (!d) + return NULL; - p->urld.last_visit = (time_t)0; - p->urld.visits = 0; + d->part = strdup(part); + if (!d->part) { + free(d); + return NULL; + } + + d->next = parent->children; + if (parent->children) + parent->children->prev = d; + d->parent = parent; + parent->children = d; + + return d; } /** - * Find data for an URL. - * - * \param url Absolute URL to look for - * \return Pointer to result struct, or NULL + * Fragment comparator callback for qsort */ -const struct url_data *urldb_get_url_data(nsurl *url) +static int urldb_add_path_fragment_cmp(const void *a, const void *b) { - struct path_data *p; - struct url_internal_data *u; - - assert(url); - - p = urldb_find_url(url); - if (!p) - return NULL; - - u = &p->urld; - - return (const struct url_data *) u; + return strcasecmp(*((const char **) a), *((const char **) b)); } + /** - * Extract an URL from the db + * Add a fragment to a path segment * - * \param url URL to extract - * \return Pointer to database's copy of URL or NULL if not found + * \param segment Path segment to add to + * \param fragment Fragment to add (copied), or NULL + * \return segment or NULL on memory exhaustion */ -nsurl *urldb_get_url(nsurl *url) +static struct path_data * +urldb_add_path_fragment(struct path_data *segment, lwc_string *fragment) { - struct path_data *p; + char **temp; - assert(url); + assert(segment); - p = urldb_find_url(url); - if (!p) + /* If no fragment, this function is a NOP + * This may seem strange, but it makes the rest + * of the code cleaner */ + if (!fragment) + return segment; + + temp = realloc(segment->fragment, + (segment->frag_cnt + 1) * sizeof(char *)); + if (!temp) return NULL; - return p->url; + segment->fragment = temp; + segment->fragment[segment->frag_cnt] = + strdup(lwc_string_data(fragment)); + if (!segment->fragment[segment->frag_cnt]) { + /* Don't free temp - it's now our buffer */ + return NULL; + } + + segment->frag_cnt++; + + /* We want fragments in alphabetical order, so sort them + * It may prove better to insert in alphabetical order instead */ + qsort(segment->fragment, segment->frag_cnt, sizeof (char *), + urldb_add_path_fragment_cmp); + + return segment; } + /** - * Look up authentication details in database + * Add a path node to the tree * - * \param url Absolute URL to search for - * \param realm When non-NULL, it is realm which can be used to determine - * the protection space when that's not been done before for given URL. - * \return Pointer to authentication details, or NULL if not found + * \param scheme URL scheme associated with path (copied) + * \param port Port number on host associated with path + * \param segment Path segment to add (copied) + * \param fragment URL fragment (copied), or NULL + * \param parent Parent node to add to + * \return Pointer to added node, or NULL on memory exhaustion */ -const char *urldb_get_auth_details(nsurl *url, const char *realm) +static struct path_data * +urldb_add_path_node(lwc_string *scheme, unsigned int port, + const char *segment, lwc_string *fragment, + struct path_data *parent) { - struct path_data *p, *p_cur, *p_top; + struct path_data *d, *e; - assert(url); + assert(scheme && segment && parent); - /* add to the db, so our lookup will work */ - urldb_add_url(url); + d = calloc(1, sizeof(struct path_data)); + if (!d) + return NULL; - p = urldb_find_url(url); - if (!p) + d->scheme = lwc_string_ref(scheme); + + d->port = port; + + d->segment = strdup(segment); + if (!d->segment) { + lwc_string_unref(d->scheme); + free(d); return NULL; + } - /* Check for any auth details attached to the path_data node or any of - * its parents. */ - for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) { - if (p_cur->prot_space) { - return p_cur->prot_space->auth; + if (fragment) { + if (!urldb_add_path_fragment(d, fragment)) { + free(d->segment); + lwc_string_unref(d->scheme); + free(d); + return NULL; } } - /* Only when we have a realm (and canonical root of given URL), we can - * uniquely locate the protection space. */ - if (realm != NULL) { - const struct host_part *h = (const struct host_part *)p_top; - const struct prot_space_data *space; - bool match; + for (e = parent->children; e; e = e->next) { + if (strcmp(e->segment, d->segment) > 0) + break; + } - /* Search for a possible matching protection space. */ - for (space = h->prot_space; space != NULL; - space = space->next) { - if (!strcmp(space->realm, realm) && - lwc_string_isequal(space->scheme, - p->scheme, &match) == - lwc_error_ok && - match == true && - space->port == p->port) { - p->prot_space = space; - return p->prot_space->auth; - } - } + if (e) { + d->prev = e->prev; + d->next = e; + if (e->prev) + e->prev->next = d; + else + parent->children = d; + e->prev = d; + } else if (!parent->children) { + d->prev = d->next = NULL; + parent->children = parent->last = d; + } else { + d->next = NULL; + d->prev = parent->last; + parent->last->next = d; + parent->last = d; } + d->parent = parent; - return NULL; + return d; } + /** - * Retrieve certificate verification permissions from database + * Get the search tree for a particular host * - * \param url Absolute URL to search for - * \return true to permit connections to hosts with invalid certificates, - * false otherwise. + * \param host the host to lookup + * \return the corresponding search tree */ -bool urldb_get_cert_permissions(nsurl *url) +static struct search_node **urldb_get_search_tree_direct(const char *host) { - struct path_data *p; - const struct host_part *h; - - assert(url); - - p = urldb_find_url(url); - if (!p) - return false; + assert(host); - for (; p && p->parent; p = p->parent) - /* do nothing */; - assert(p); + if (urldb__host_is_ip_address(host)) + return &search_trees[ST_IP]; + else if (isalpha(*host)) + return &search_trees[ST_DN + tolower(*host) - 'a']; + return &search_trees[ST_EE]; +} - h = (const struct host_part *)p; - return h->permit_invalid_certs; +/** + * Get the search tree for a particular host + * + * \param host the host to lookup + * \return the corresponding search tree + */ +static struct search_node *urldb_get_search_tree(const char *host) +{ + return *urldb_get_search_tree_direct(host); } + /** - * Set authentication data for an URL + * Compare host_part with a string * - * \param url The URL to consider - * \param realm The authentication realm - * \param auth The authentication details (in form username:password) + * \param a + * \param b + * \return 0 if match, non-zero, otherwise */ -void urldb_set_auth_details(nsurl *url, const char *realm, - const char *auth) +static int urldb_search_match_string(const struct host_part *a, const char *b) { - struct path_data *p, *pi; - struct host_part *h; - struct prot_space_data *space, *space_alloc; - char *realm_alloc, *auth_alloc; - bool match; + const char *end, *dot; + int plen, ret; - assert(url && realm && auth); + assert(a && a != &db_root && b); - /* add url, in case it's missing */ - urldb_add_url(url); + if (urldb__host_is_ip_address(b)) { + /* IP address */ + return strcasecmp(a->part, b); + } - p = urldb_find_url(url); + end = b + strlen(b) + 1; - if (!p) - return; + while (b < end && a && a != &db_root) { + dot = strchr(b, '.'); + if (!dot) { + /* last segment */ + dot = end - 1; + } - /* Search for host_part */ - for (pi = p; pi->parent != NULL; pi = pi->parent) - ; - h = (struct host_part *)pi; + /* Compare strings (length limited) */ + if ((ret = strncasecmp(a->part, b, dot - b)) != 0) + /* didn't match => return difference */ + return ret; - /* Search if given URL belongs to a protection space we already know of. */ - for (space = h->prot_space; space; space = space->next) { - if (!strcmp(space->realm, realm) && - lwc_string_isequal(space->scheme, p->scheme, - &match) == lwc_error_ok && - match == true && - space->port == p->port) - break; - } - - if (space != NULL) { - /* Overrule existing auth. */ - free(space->auth); - space->auth = strdup(auth); - } else { - /* Create a new protection space. */ - space = space_alloc = malloc(sizeof(struct prot_space_data)); - realm_alloc = strdup(realm); - auth_alloc = strdup(auth); + /* The strings matched, now check that the lengths do, too */ + plen = strlen(a->part); - if (!space_alloc || !realm_alloc || !auth_alloc) { - free(space_alloc); - free(realm_alloc); - free(auth_alloc); - return; - } + if (plen > dot - b) + /* len(a) > len(b) */ + return 1; + else if (plen < dot - b) + /* len(a) < len(b) */ + return -1; - space->scheme = lwc_string_ref(p->scheme); - space->port = p->port; - space->realm = realm_alloc; - space->auth = auth_alloc; - space->next = h->prot_space; - h->prot_space = space; + b = dot + 1; + a = a->parent; } - p->prot_space = space; + /* If we get here then either: + * a) The path lengths differ + * or b) The hosts are identical + */ + if (a && a != &db_root && b >= end) + /* len(a) > len(b) */ + return 1; + else if ((!a || a == &db_root) && b < end) + /* len(a) < len(b) */ + return -1; + + /* Identical */ + return 0; } + /** - * Set certificate verification permissions + * Find a node in a search tree * - * \param url URL to consider - * \param permit Set to true to allow invalid certificates + * \param root Tree to look in + * \param host Host to find + * \return Pointer to host tree node, or NULL if not found */ -void urldb_set_cert_permissions(nsurl *url, bool permit) +static const struct host_part * +urldb_search_find(struct search_node *root, const char *host) { - struct path_data *p; - struct host_part *h; - - assert(url); - - /* add url, in case it's missing */ - urldb_add_url(url); + int c; - p = urldb_find_url(url); - if (!p) - return; + assert(root && host); - for (; p && p->parent; p = p->parent) - /* do nothing */; - assert(p); + if (root == &empty) { + return NULL; + } - h = (struct host_part *)p; + c = urldb_search_match_string(root->data, host); - h->permit_invalid_certs = permit; + if (c > 0) + return urldb_search_find(root->left, host); + else if (c < 0) + return urldb_search_find(root->right, host); + else + return root->data; } + /** - * Set thumbnail for url, replacing any existing thumbnail + * Match a path string * - * \param url Absolute URL to consider - * \param bitmap Opaque pointer to thumbnail data, or NULL to invalidate + * \param parent Path (sub)tree to look in + * \param path The path to search for + * \param scheme The URL scheme associated with the path + * \param port The port associated with the path + * \return Pointer to path data or NULL if not found. */ -void urldb_set_thumbnail(nsurl *url, struct bitmap *bitmap) +static struct path_data *urldb_match_path(const struct path_data *parent, + const char *path, lwc_string *scheme, unsigned short port) { - struct path_data *p; + const struct path_data *p; + const char *slash; + bool match; - assert(url); + assert(parent != NULL); + assert(parent->segment == NULL); - p = urldb_find_url(url); - if (!p) - return; + if (path[0] != '/') { + LOG(("path is %s", path)); + } - if (p->thumb && p->thumb != bitmap) - bitmap_destroy(p->thumb); + assert(path[0] == '/'); - p->thumb = bitmap; -} + /* Start with children, as parent has no segment */ + p = parent->children; -/** - * Retrieve thumbnail data for given URL - * - * \param url Absolute URL to search for - * \return Pointer to thumbnail data, or NULL if not found. - */ -struct bitmap *urldb_get_thumbnail(nsurl *url) -{ - struct path_data *p; + while (p != NULL) { + slash = strchr(path + 1, '/'); + if (!slash) + slash = path + strlen(path); - assert(url); + if (strncmp(p->segment, path + 1, slash - path - 1) == 0 && + lwc_string_isequal(p->scheme, scheme, &match) == + lwc_error_ok && + match == true && + p->port == port) { + if (*slash == '\0') { + /* Complete match */ + return (struct path_data *) p; + } - p = urldb_find_url(url); - if (!p) - return NULL; + /* Match so far, go down tree */ + p = p->children; - return p->thumb; + path = slash; + } else { + /* No match, try next sibling */ + p = p->next; + } + } + + return NULL; } + /** - * Iterate over entries in the database which match the given prefix + * Find an URL in the database * - * \param prefix Prefix to match - * \param callback Callback function + * \param url Absolute URL to find + * \return Pointer to path data, or NULL if not found */ -void urldb_iterate_partial(const char *prefix, - bool (*callback)(nsurl *url, - const struct url_data *data)) +static struct path_data *urldb_find_url(nsurl *url) { - char host[256]; - char buf[260]; /* max domain + "www." */ - const char *slash, *scheme_sep; - struct search_node *tree; const struct host_part *h; + struct path_data *p; + struct search_node *tree; + char *plq; + const char *host_str; + lwc_string *scheme, *host, *port; + size_t len = 0; + unsigned int port_int; + bool match; - assert(prefix && callback); + assert(url); - /* strip scheme */ - scheme_sep = strstr(prefix, "://"); - if (scheme_sep) - prefix = scheme_sep + 3; + if (url_bloom != NULL) { + if (bloom_search_hash(url_bloom, + nsurl_hash(url)) == false) { + return NULL; + } + } - slash = strchr(prefix, '/'); - tree = urldb_get_search_tree(prefix); + scheme = nsurl_get_component(url, NSURL_SCHEME); + if (scheme == NULL) + return NULL; - if (slash) { - /* if there's a slash in the input, then we can - * assume that we're looking for a path */ - snprintf(host, sizeof host, "%.*s", - (int) (slash - prefix), prefix); + host = nsurl_get_component(url, NSURL_HOST); + if (host != NULL) { + host_str = lwc_string_data(host); + lwc_string_unref(host); - h = urldb_search_find(tree, host); - if (!h) { - int len = slash - prefix; + } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) == + lwc_error_ok && match == true) { + host_str = "localhost"; - if (len <= 3 || strncasecmp(host, "www.", 4) != 0) { - snprintf(buf, sizeof buf, "www.%s", host); - h = urldb_search_find( - search_trees[ST_DN + 'w' - 'a'], - buf); - if (!h) - return; - } else - return; - } + } else { + lwc_string_unref(scheme); + return NULL; + } - if (h->paths.children) { - /* Have paths, iterate them */ - urldb_iterate_partial_path(&h->paths, slash + 1, - callback); - } + tree = urldb_get_search_tree(host_str); + h = urldb_search_find(tree, host_str); + if (!h) { + lwc_string_unref(scheme); + return NULL; + } + + /* generate plq (path, leaf, query) */ + if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) != + NSERROR_OK) { + lwc_string_unref(scheme); + return NULL; + } + /* Get port */ + port = nsurl_get_component(url, NSURL_PORT); + if (port != NULL) { + port_int = atoi(lwc_string_data(port)); + lwc_string_unref(port); } else { - int len = strlen(prefix); + port_int = 0; + } - /* looking for hosts */ - if (!urldb_iterate_partial_host(tree, prefix, callback)) - return; + p = urldb_match_path(&h->paths, plq, scheme, port_int); - if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) { - /* now look for www.prefix */ - snprintf(buf, sizeof buf, "www.%s", prefix); - if(!urldb_iterate_partial_host( - search_trees[ST_DN + 'w' - 'a'], - buf, callback)) - return; - } - } + free(plq); + lwc_string_unref(scheme); + + return p; } + /** - * Partial host iterator (internal) + * Dump URL database paths to stderr * - * \param root Root of (sub)tree to traverse - * \param prefix Prefix to match - * \param callback Callback function - * \return true to continue, false otherwise + * \param parent Parent node of tree to dump */ -bool urldb_iterate_partial_host(struct search_node *root, const char *prefix, - bool (*callback)(nsurl *url, const struct url_data *data)) +static void urldb_dump_paths(struct path_data *parent) { - int c; + const struct path_data *p = parent; + unsigned int i; - assert(root && prefix && callback); + do { + if (p->segment != NULL) { + LOG(("\t%s : %u", lwc_string_data(p->scheme), p->port)); - if (root == &empty) - return true; + LOG(("\t\t'%s'", p->segment)); - c = urldb_search_match_prefix(root->data, prefix); + for (i = 0; i != p->frag_cnt; i++) + LOG(("\t\t\t#%s", p->fragment[i])); + } - if (c > 0) - /* No match => look in left subtree */ - return urldb_iterate_partial_host(root->left, prefix, - callback); - else if (c < 0) - /* No match => look in right subtree */ - return urldb_iterate_partial_host(root->right, prefix, - callback); - else { - /* Match => iterate over l/r subtrees & process this node */ - if (!urldb_iterate_partial_host(root->left, prefix, - callback)) - return false; + if (p->children != NULL) { + p = p->children; + } else { + while (p != parent) { + if (p->next != NULL) { + p = p->next; + break; + } - if (root->data->paths.children) { - /* and extract all paths attached to this host */ - if (!urldb_iterate_entries_path(&root->data->paths, - callback, NULL)) { - return false; + p = p->parent; } } - - if (!urldb_iterate_partial_host(root->right, prefix, - callback)) - return false; - } - - return true; + } while (p != parent); } + /** - * Partial path iterator (internal) + * Dump URL database hosts to stderr * - * \param parent Root of (sub)tree to traverse - * \param prefix Prefix to match - * \param callback Callback function - * \return true to continue, false otherwise + * \param parent Parent node of tree to dump */ -bool urldb_iterate_partial_path(const struct path_data *parent, - const char *prefix, bool (*callback)(nsurl *url, - const struct url_data *data)) +static void urldb_dump_hosts(struct host_part *parent) { - const struct path_data *p = parent->children; - const char *slash, *end = prefix + strlen(prefix); - - /* - * Given: http://www.example.org/a/b/c/d//e - * and assuming a path tree: - * . - * / \ - * a1 b1 - * / \ - * a2 b2 - * /|\ - * a b c - * 3 3 | - * d - * | - * e - * / \ - * f g - * - * Prefix will be: p will be: - * - * a/b/c/d//e a1 - * b/c/d//e a2 - * b/c/d//e b3 - * c/d//e a3 - * c/d//e b3 - * c/d//e c - * d//e d - * /e e (skip /) - * e e - * - * I.E. we perform a breadth-first search of the tree. - */ - - do { - slash = strchr(prefix, '/'); - if (!slash) - slash = end; + struct host_part *h; - if (slash == prefix && *prefix == '/') { - /* Ignore "//" */ - prefix++; - continue; - } - - if (strncasecmp(p->segment, prefix, slash - prefix) == 0) { - /* prefix matches so far */ - if (slash == end) { - /* we've run out of prefix, so all - * paths below this one match */ - if (!urldb_iterate_entries_path(p, callback, - NULL)) - return false; + if (parent->part) { + LOG(("%s", parent->part)); - /* Progress to next sibling */ - p = p->next; - } else { - /* Skip over this segment */ - prefix = slash + 1; + LOG(("\t%s invalid SSL certs", + parent->permit_invalid_certs ? "Permits" : "Denies")); + } - p = p->children; - } - } else { - /* Doesn't match this segment, try next sibling */ - p = p->next; - } - } while (p != NULL); + /* Dump path data */ + urldb_dump_paths(&parent->paths); - return true; + /* and recurse */ + for (h = parent->children; h; h = h->next) + urldb_dump_hosts(h); } + /** - * Iterate over all entries in database + * Dump search tree * - * \param callback Function to callback for each entry + * \param parent Parent node of tree to dump + * \param depth Tree depth */ -void urldb_iterate_entries(bool (*callback)(nsurl *url, - const struct url_data *data)) +static void urldb_dump_search(struct search_node *parent, int depth) { + const struct host_part *h; int i; - assert(callback); + if (parent == &empty) + return; - for (i = 0; i < NUM_SEARCH_TREES; i++) { - if (!urldb_iterate_entries_host(search_trees[i], - callback, NULL)) - break; + urldb_dump_search(parent->left, depth + 1); + + for (i = 0; i != depth; i++) + fputc(' ', stderr); + + for (h = parent->data; h; h = h->parent) { + if (h->part) + fprintf(stderr, "%s", h->part); + + if (h->parent && h->parent->parent) + fputc('.', stderr); } + + fputc('\n', stderr); + + urldb_dump_search(parent->right, depth + 1); } + /** - * Iterate over all cookies in database + * Compare a pair of host_parts * - * \param callback Function to callback for each entry + * \param a + * \param b + * \return 0 if match, non-zero, otherwise */ -void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data)) +static int +urldb_search_match_host(const struct host_part *a, const struct host_part *b) { - int i; + int ret; - assert(callback); + assert(a && b); - for (i = 0; i < NUM_SEARCH_TREES; i++) { - if (!urldb_iterate_entries_host(search_trees[i], - NULL, callback)) - break; - } + /* traverse up tree to root, comparing parts as we go. */ + for (; a && a != &db_root && b && b != &db_root; + a = a->parent, b = b->parent) + if ((ret = strcasecmp(a->part, b->part)) != 0) + /* They differ => return the difference here */ + return ret; + + /* If we get here then either: + * a) The path lengths differ + * or b) The hosts are identical + */ + if (a && a != &db_root && (!b || b == &db_root)) + /* len(a) > len(b) */ + return 1; + else if ((!a || a == &db_root) && b && b != &db_root) + /* len(a) < len(b) */ + return -1; + + /* identical */ + return 0; } + /** - * Host data iterator (internal) + * Rotate a subtree right * - * \param parent Root of subtree to iterate over - * \param url_callback Callback function - * \param cookie_callback Callback function - * \return true to continue, false otherwise + * \param root Root of subtree to rotate + * \return new root of subtree */ -bool urldb_iterate_entries_host(struct search_node *parent, - bool (*url_callback)(nsurl *url, - const struct url_data *data), - bool (*cookie_callback)(const struct cookie_data *data)) +static struct search_node *urldb_search_skew(struct search_node *root) { - if (parent == &empty) - return true; + struct search_node *temp; - if (!urldb_iterate_entries_host(parent->left, - url_callback, cookie_callback)) - return false; + assert(root); - if ((parent->data->paths.children) || ((cookie_callback) && - (parent->data->paths.cookies))) { - /* We have paths (or domain cookies), so iterate them */ - if (!urldb_iterate_entries_path(&parent->data->paths, - url_callback, cookie_callback)) { - return false; - } + if (root->left->level == root->level) { + temp = root->left; + root->left = temp->right; + temp->right = root; + root = temp; } - if (!urldb_iterate_entries_host(parent->right, - url_callback, cookie_callback)) - return false; - - return true; + return root; } + /** - * Path data iterator (internal) + * Rotate a node left, increasing the parent's level * - * \param parent Root of subtree to iterate over - * \param url_callback Callback function - * \param cookie_callback Callback function - * \return true to continue, false otherwise + * \param root Root of subtree to rotate + * \return New root of subtree */ -bool urldb_iterate_entries_path(const struct path_data *parent, - bool (*url_callback)(nsurl *url, - const struct url_data *data), - bool (*cookie_callback)(const struct cookie_data *data)) +static struct search_node *urldb_search_split(struct search_node *root) { - const struct path_data *p = parent; - const struct cookie_data *c; - - do { - if (p->children != NULL) { - /* Drill down into children */ - p = p->children; - } else { - /* All leaf nodes in the path tree should have an URL or - * cookies attached to them. If this is not the case, it - * indicates that there's a bug in the file loader/URL - * insertion code. Therefore, assert this here. */ - assert(url_callback || cookie_callback); - - /** \todo handle fragments? */ - if (url_callback) { - const struct url_internal_data *u = &p->urld; - - assert(p->url); + struct search_node *temp; - if (!url_callback(p->url, - (const struct url_data *) u)) - return false; - } else { - c = (const struct cookie_data *)p->cookies; - for (; c != NULL; c = c->next) - if (!cookie_callback(c)) - return false; - } + assert(root); - /* Now, find next node to process. */ - while (p != parent) { - if (p->next != NULL) { - /* Have a sibling, process that */ - p = p->next; - break; - } + if (root->right->right->level == root->level) { + temp = root->right; + root->right = temp->left; + temp->left = root; + root = temp; - /* Ascend tree */ - p = p->parent; - } - } - } while (p != parent); + root->level++; + } - return true; + return root; } + /** - * Add a host node to the tree + * Insert node into search tree * - * \param part Host segment to add (or whole IP address) (copied) - * \param parent Parent node to add to - * \return Pointer to added node, or NULL on memory exhaustion + * \param root Root of (sub)tree to insert into + * \param n Node to insert + * \return Pointer to updated root */ -struct host_part *urldb_add_host_node(const char *part, - struct host_part *parent) +static struct search_node * +urldb_search_insert_internal(struct search_node *root, struct search_node *n) { - struct host_part *d; + assert(root && n); - assert(part && parent); + if (root == &empty) { + root = n; + } else { + int c = urldb_search_match_host(root->data, n->data); - d = calloc(1, sizeof(struct host_part)); - if (!d) - return NULL; + if (c > 0) { + root->left = urldb_search_insert_internal( + root->left, n); + } else if (c < 0) { + root->right = urldb_search_insert_internal( + root->right, n); + } else { + /* exact match */ + free(n); + return root; + } - d->part = strdup(part); - if (!d->part) { - free(d); - return NULL; + root = urldb_search_skew(root); + root = urldb_search_split(root); } - d->next = parent->children; - if (parent->children) - parent->children->prev = d; - d->parent = parent; - parent->children = d; - - return d; + return root; } /** - * Check whether a host string is an IP address. - * - * This call detects IPv4 addresses (all of dotted-quad or subsets, - * decimal or hexadecimal notations) and IPv6 addresses (including - * those containing embedded IPv4 addresses.) + * Insert a node into the search tree * - * \param host a hostname terminated by '\0' - * \return true if the hostname is an IP address, false otherwise + * \param root Root of tree to insert into + * \param data User data to insert + * \return Pointer to updated root, or NULL if failed */ -static bool urldb__host_is_ip_address(const char *host) +static struct search_node * +urldb_search_insert(struct search_node *root, const struct host_part *data) { - struct in_addr ipv4; - size_t host_len = strlen(host); - const char *sane_host; - const char *slash; -#ifndef NO_IPV6 - struct in6_addr ipv6; - char ipv6_addr[64]; -#endif - /** @todo FIXME Some parts of urldb.c make confusions between hosts - * and "prefixes", we can sometimes be erroneously passed more than - * just a host. Sometimes we may be passed trailing slashes, or even - * whole path segments. A specific criminal in this class is - * urldb_iterate_partial, which takes a prefix to search for, but - * passes that prefix to functions that expect only hosts. - * - * For the time being, we will accept such calls; we check if there - * is a / in the host parameter, and if there is, we take a copy and - * replace the / with a \0. This is not a permanent solution; we - * should search through NetSurf and find all the callers that are - * in error and fix them. When doing this task, it might be wise - * to replace the hideousness below with code that doesn't have to do - * this, and add assert(strchr(host, '/') == NULL); somewhere. - * -- rjek - 2010-11-04 - */ - - slash = strchr(host, '/'); - if (slash == NULL) { - sane_host = host; - } else { - char *c = strdup(host); - c[slash - host] = '\0'; - sane_host = c; - host_len = slash - host - 1; - LOG(("WARNING: called with non-host '%s'", host)); - } - - if (strspn(sane_host, "0123456789abcdefABCDEF[].:") < host_len) - goto out_false; - - if (inet_aton(sane_host, &ipv4) != 0) { - /* This can only be a sane IPv4 address if it contains 3 dots. - * Helpfully, inet_aton is happy to treat "a", "a.b", "a.b.c", - * and "a.b.c.d" as valid IPv4 address strings where we only - * support the full, dotted-quad, form. - */ - int num_dots = 0; - size_t index; - - for (index = 0; index < host_len; index++) { - if (sane_host[index] == '.') - num_dots++; - } - - if (num_dots == 3) - goto out_true; - else - goto out_false; - } + struct search_node *n; -#ifndef NO_IPV6 - if (sane_host[0] != '[' || sane_host[host_len] != ']') - goto out_false; + assert(root && data); - strncpy(ipv6_addr, sane_host + 1, sizeof(ipv6_addr)); - ipv6_addr[sizeof(ipv6_addr) - 1] = '\0'; + n = malloc(sizeof(struct search_node)); + if (!n) + return NULL; - if (inet_pton(AF_INET6, ipv6_addr, &ipv6) == 1) - goto out_true; -#endif + n->level = 1; + n->data = data; + n->left = n->right = ∅ -out_false: - if (slash != NULL) free((void *)sane_host); - return false; + root = urldb_search_insert_internal(root, n); -out_true: - if (slash != NULL) free((void *)sane_host); - return true; + return root; } /** - * Add a host to the database, creating any intermediate entries + * Parse a cookie avpair * - * \param host Hostname to add - * \return Pointer to leaf node, or NULL on memory exhaustion + * \param c Cookie struct to populate + * \param n Name component + * \param v Value component + * \param was_quoted Whether ::v was quoted in the input + * \return true on success, false on memory exhaustion */ -struct host_part *urldb_add_host(const char *host) +static bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, + char *v, bool was_quoted) { - struct host_part *d = (struct host_part *) &db_root, *e; - struct search_node *s; - char buf[256]; /* 256 bytes is sufficient - domain names are - * limited to 255 chars. */ - char *part; - - assert(host); - - if (urldb__host_is_ip_address(host)) { - /* Host is an IP, so simply add as TLD */ - - /* Check for existing entry */ - for (e = d->children; e; e = e->next) - if (strcasecmp(host, e->part) == 0) - /* found => return it */ - return e; + int vlen; - d = urldb_add_host_node(host, d); + assert(c && n && v); - s = urldb_search_insert(search_trees[ST_IP], d); - if (!s) { - /* failed */ - d = NULL; - } else { - search_trees[ST_IP] = s; - } + /* Strip whitespace from start of name */ + for (; *n; n++) { + if (*n != ' ' && *n != '\t') + break; + } - return d; + /* Strip whitespace from end of name */ + for (vlen = strlen(n); vlen; vlen--) { + if (n[vlen] == ' ' || n[vlen] == '\t') + n[vlen] = '\0'; + else + break; } - /* Copy host string, so we can corrupt it */ - strncpy(buf, host, sizeof buf); - buf[sizeof buf - 1] = '\0'; + /* Strip whitespace from start of value */ + for (; *v; v++) { + if (*v != ' ' && *v != '\t') + break; + } - /* Process FQDN segments backwards */ - do { - part = strrchr(buf, '.'); - if (!part) { - /* last segment */ - /* Check for existing entry */ - for (e = d->children; e; e = e->next) - if (strcasecmp(buf, e->part) == 0) - break; + /* Strip whitespace from end of value */ + for (vlen = strlen(v); vlen; vlen--) { + if (v[vlen] == ' ' || v[vlen] == '\t') + v[vlen] = '\0'; + else + break; + } - if (e) { - d = e; - } else { - d = urldb_add_host_node(buf, d); - } + if (!c->comment && strcasecmp(n, "Comment") == 0) { + c->comment = strdup(v); + if (!c->comment) + return false; + } else if (!c->domain && strcasecmp(n, "Domain") == 0) { + if (v[0] == '.') { + /* Domain must start with a dot */ + c->domain_from_set = true; + c->domain = strdup(v); + if (!c->domain) + return false; + } + } else if (strcasecmp(n, "Max-Age") == 0) { + int temp = atoi(v); + if (temp == 0) + /* Special case - 0 means delete */ + c->expires = 0; + else + c->expires = time(NULL) + temp; + } else if (!c->path && strcasecmp(n, "Path") == 0) { + c->path_from_set = true; + c->path = strdup(v); + if (!c->path) + return false; + } else if (strcasecmp(n, "Version") == 0) { + c->version = atoi(v); + } else if (strcasecmp(n, "Expires") == 0) { + char *datenoday; + time_t expires; - /* And insert into search tree */ - if (d) { - struct search_node **r; + /* Strip dayname from date (these are hugely + * variable and liable to break the parser. + * They also serve no useful purpose) */ + for (datenoday = v; *datenoday && !isdigit(*datenoday); + datenoday++) + ; /* do nothing */ - r = urldb_get_search_tree_direct(buf); - s = urldb_search_insert(*r, d); - if (!s) { - /* failed */ - d = NULL; - } else { - *r = s; - } - } - break; + expires = curl_getdate(datenoday, NULL); + if (expires == -1) { + /* assume we have an unrepresentable + * date => force it to the maximum + * possible value of a 32bit time_t + * (this may break in 2038. We'll + * deal with that once we come to + * it) */ + expires = (time_t)0x7fffffff; } + c->expires = expires; + } else if (strcasecmp(n, "Secure") == 0) { + c->secure = true; + } else if (strcasecmp(n, "HttpOnly") == 0) { + c->http_only = true; + } else if (!c->name) { + c->name = strdup(n); + c->value = strdup(v); + c->value_was_quoted = was_quoted; + if (!c->name || !c->value) + return false; + } - /* Check for existing entry */ - for (e = d->children; e; e = e->next) - if (strcasecmp(part + 1, e->part) == 0) - break; + return true; +} - d = e ? e : urldb_add_host_node(part + 1, d); - if (!d) - break; - *part = '\0'; - } while (1); +/** + * Free a cookie + * + * \param c The cookie to free + */ +static void urldb_free_cookie(struct cookie_internal_data *c) +{ + assert(c); - return d; + free(c->comment); + free(c->domain); + free(c->path); + free(c->name); + free(c->value); + free(c); } + /** - * Add a path node to the tree + * Parse a cookie * - * \param scheme URL scheme associated with path (copied) - * \param port Port number on host associated with path - * \param segment Path segment to add (copied) - * \param fragment URL fragment (copied), or NULL - * \param parent Parent node to add to - * \return Pointer to added node, or NULL on memory exhaustion + * \param url URL being fetched + * \param cookie Pointer to cookie string (updated on exit) + * \return Pointer to cookie structure (on heap, caller frees) or NULL */ -struct path_data *urldb_add_path_node(lwc_string *scheme, unsigned int port, - const char *segment, lwc_string *fragment, - struct path_data *parent) +static struct cookie_internal_data * +urldb_parse_cookie(nsurl *url, const char **cookie) { - struct path_data *d, *e; + struct cookie_internal_data *c; + const char *cur; + char name[1024], value[4096]; + char *n = name, *v = value; + bool in_value = false; + bool had_value_data = false; + bool value_verbatim = false; + bool quoted = false; + bool was_quoted = false; - assert(scheme && segment && parent); + assert(url && cookie && *cookie); - d = calloc(1, sizeof(struct path_data)); - if (!d) + c = calloc(1, sizeof(struct cookie_internal_data)); + if (c == NULL) return NULL; - d->scheme = lwc_string_ref(scheme); + c->expires = -1; - d->port = port; + name[0] = '\0'; + value[0] = '\0'; - d->segment = strdup(segment); - if (!d->segment) { - lwc_string_unref(d->scheme); - free(d); - return NULL; - } + for (cur = *cookie; *cur; cur++) { + if (*cur == '\r' && *(cur + 1) == '\n') { + /* End of header */ + if (quoted) { + /* Unmatched quote encountered */ - if (fragment) { - if (!urldb_add_path_fragment(d, fragment)) { - free(d->segment); - lwc_string_unref(d->scheme); - free(d); - return NULL; - } - } + /* Match Firefox 2.0.0.11 */ + value[0] = '\0'; - for (e = parent->children; e; e = e->next) - if (strcmp(e->segment, d->segment) > 0) - break; + } - if (e) { - d->prev = e->prev; - d->next = e; - if (e->prev) - e->prev->next = d; - else - parent->children = d; - e->prev = d; - } else if (!parent->children) { - d->prev = d->next = NULL; - parent->children = parent->last = d; - } else { - d->next = NULL; - d->prev = parent->last; - parent->last->next = d; - parent->last = d; - } - d->parent = parent; + break; + } else if (*cur == '\r') { + /* Spurious linefeed */ + continue; + } else if (*cur == '\n') { + /* Spurious newline */ + continue; + } - return d; -} + if (in_value && !had_value_data) { + if (*cur == ' ' || *cur == '\t') { + /* Strip leading whitespace from value */ + continue; + } else { + had_value_data = true; -/** - * Add a path to the database, creating any intermediate entries - * - * \param scheme URL scheme associated with path - * \param port Port number on host associated with path - * \param host Host tree node to attach to - * \param path_query Absolute path plus query to add (freed) - * \param fragment URL fragment, or NULL - * \param url URL (fragment ignored) - * \return Pointer to leaf node, or NULL on memory exhaustion - */ -struct path_data *urldb_add_path(lwc_string *scheme, unsigned int port, - const struct host_part *host, char *path_query, - lwc_string *fragment, nsurl *url) -{ - struct path_data *d, *e; - char *buf = path_query; - char *segment, *slash; - bool match; + /* Value is taken verbatim if first non-space + * character is not a " */ + if (*cur != '"') { + value_verbatim = true; + } + } + } - assert(scheme && host && url); + if (in_value && !value_verbatim && (*cur == '"')) { + /* Only non-verbatim values may be quoted */ + if (cur == *cookie || *(cur - 1) != '\\') { + /* Only unescaped quotes count */ + was_quoted = quoted; + quoted = !quoted; - d = (struct path_data *) &host->paths; + continue; + } + } - /* skip leading '/' */ - segment = buf; - if (*segment == '/') - segment++; + if (!quoted && !in_value && *cur == '=') { + /* First equals => attr-value separator */ + in_value = true; + continue; + } - /* Process path segments */ - do { - slash = strchr(segment, '/'); - if (!slash) { - /* last segment */ - /* look for existing entry */ - for (e = d->children; e; e = e->next) - if (strcmp(segment, e->segment) == 0 && - lwc_string_isequal(scheme, - e->scheme, &match) == - lwc_error_ok && - match == true && - e->port == port) - break; + if (!quoted && (was_quoted || *cur == ';')) { + /* Semicolon or after quoted value + * => end of current avpair */ - d = e ? urldb_add_path_fragment(e, fragment) : - urldb_add_path_node(scheme, port, - segment, fragment, d); - break; - } + /* NUL-terminate tokens */ + *n = '\0'; + *v = '\0'; - *slash = '\0'; + if (!urldb_parse_avpair(c, name, value, was_quoted)) { + /* Memory exhausted */ + urldb_free_cookie(c); + return NULL; + } - /* look for existing entry */ - for (e = d->children; e; e = e->next) - if (strcmp(segment, e->segment) == 0 && - lwc_string_isequal(scheme, e->scheme, - &match) == lwc_error_ok && - match == true && - e->port == port) + /* And reset to start */ + n = name; + v = value; + in_value = false; + had_value_data = false; + value_verbatim = false; + was_quoted = false; + + /* Now, if the current input is anything other than a + * semicolon, we must be sure to reprocess it */ + if (*cur != ';') { + cur--; + } + + continue; + } + + /* And now handle commas. These are a pain as they may mean + * any of the following: + * + * + End of cookie + * + Day separator in Expires avpair + * + (Invalid) comma in unquoted value + * + * Therefore, in order to handle all 3 cases (2 and 3 are + * identical, the difference being that 2 is in the spec and + * 3 isn't), we need to determine where the comma actually + * lies. We use the following heuristic: + * + * Given a comma at the current input position, find the + * immediately following semicolon (or end of input if none + * found). Then, consider the input characters between + * these two positions. If any of these characters is an + * '=', we must assume that the comma signified the end of + * the current cookie. + * + * This holds as the first avpair of any cookie must be + * NAME=VALUE, so the '=' is guaranteed to appear in the + * case where the comma marks the end of a cookie. + * + * This will fail, however, in the case where '=' appears in + * the value of the current avpair after the comma or the + * subsequent cookie does not start with NAME=VALUE. Neither + * of these is particularly likely and if they do occur, the + * website is more broken than we can be bothered to handle. + */ + if (!quoted && *cur == ',') { + /* Find semi-colon, if any */ + const char *p; + const char *semi = strchr(cur + 1, ';'); + if (!semi) + semi = cur + strlen(cur) - 2 /* CRLF */; + + /* Look for equals sign between comma and semi */ + for (p = cur + 1; p < semi; p++) + if (*p == '=') + break; + + if (p == semi) { + /* none found => comma internal to value */ + /* do nothing */ + } else { + /* found one => comma marks end of cookie */ + cur++; break; + } + } - d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d); - if (!d) - break; + /* Accumulate into buffers, always leaving space for a NUL */ + /** \todo is silently truncating overlong names/values wise? */ + if (!in_value) { + if (n < name + (sizeof(name) - 1)) + *n++ = *cur; + } else { + if (v < value + (sizeof(value) - 1)) + *v++ = *cur; + } + } - segment = slash + 1; - } while (1); + /* Parse final avpair */ + *n = '\0'; + *v = '\0'; - free(path_query); + if (!urldb_parse_avpair(c, name, value, was_quoted)) { + /* Memory exhausted */ + urldb_free_cookie(c); + return NULL; + } - if (d && !d->url) { - /* Insert defragmented URL */ - if (nsurl_defragment(url, &d->url) != NSERROR_OK) + /* Now fix-up default values */ + if (c->domain == NULL) { + lwc_string *host = nsurl_get_component(url, NSURL_HOST); + if (host == NULL) { + urldb_free_cookie(c); return NULL; + } + c->domain = strdup(lwc_string_data(host)); + lwc_string_unref(host); } - return d; -} - -/** - * Fragment comparator callback for qsort - */ -int urldb_add_path_fragment_cmp(const void *a, const void *b) -{ - return strcasecmp(*((const char **) a), *((const char **) b)); -} + if (c->path == NULL) { + const char *path_data; + char *path, *slash; + lwc_string *path_lwc; -/** - * Add a fragment to a path segment - * - * \param segment Path segment to add to - * \param fragment Fragment to add (copied), or NULL - * \return segment or NULL on memory exhaustion - */ -struct path_data *urldb_add_path_fragment(struct path_data *segment, - lwc_string *fragment) -{ - char **temp; + path_lwc = nsurl_get_component(url, NSURL_PATH); + if (path_lwc == NULL) { + urldb_free_cookie(c); + return NULL; + } + path_data = lwc_string_data(path_lwc); - assert(segment); + /* Strip leafname and trailing slash (4.3.1) */ + slash = strrchr(path_data, '/'); + if (slash != NULL) { + /* Special case: retain first slash in path */ + if (slash == path_data) + slash++; - /* If no fragment, this function is a NOP - * This may seem strange, but it makes the rest - * of the code cleaner */ - if (!fragment) - return segment; + slash = strndup(path_data, slash - path_data); + if (slash == NULL) { + lwc_string_unref(path_lwc); + urldb_free_cookie(c); + return NULL; + } - temp = realloc(segment->fragment, - (segment->frag_cnt + 1) * sizeof(char *)); - if (!temp) - return NULL; + path = slash; + lwc_string_unref(path_lwc); + } else { + path = strdup(lwc_string_data(path_lwc)); + lwc_string_unref(path_lwc); + if (path == NULL) { + urldb_free_cookie(c); + return NULL; + } + } - segment->fragment = temp; - segment->fragment[segment->frag_cnt] = - strdup(lwc_string_data(fragment)); - if (!segment->fragment[segment->frag_cnt]) { - /* Don't free temp - it's now our buffer */ - return NULL; + c->path = path; } - segment->frag_cnt++; - - /* We want fragments in alphabetical order, so sort them - * It may prove better to insert in alphabetical order instead */ - qsort(segment->fragment, segment->frag_cnt, sizeof (char *), - urldb_add_path_fragment_cmp); + /* Write back current position */ + *cookie = cur; - return segment; + return c; } + /** - * Find an URL in the database + * Insert a cookie into the database * - * \param url Absolute URL to find - * \return Pointer to path data, or NULL if not found + * \param c The cookie to insert + * \param scheme URL scheme associated with cookie path + * \param url URL (sans fragment) associated with cookie + * \return true on success, false on memory exhaustion (c will be freed) */ -struct path_data *urldb_find_url(nsurl *url) +static bool urldb_insert_cookie(struct cookie_internal_data *c, + lwc_string *scheme, nsurl *url) { + struct cookie_internal_data *d; const struct host_part *h; struct path_data *p; - struct search_node *tree; - char *plq; - const char *host_str; - lwc_string *scheme, *host, *port; - size_t len = 0; - unsigned int port_int; - bool match; + time_t now = time(NULL); - assert(url); - - if (url_bloom != NULL) { - if (bloom_search_hash(url_bloom, - nsurl_hash(url)) == false) { - return NULL; + assert(c); + + if (c->domain[0] == '.') { + h = urldb_search_find( + urldb_get_search_tree(&(c->domain[1])), + c->domain + 1); + if (!h) { + h = urldb_add_host(c->domain + 1); + if (!h) { + urldb_free_cookie(c); + return false; + } } - } - scheme = nsurl_get_component(url, NSURL_SCHEME); - if (scheme == NULL) - return NULL; + p = (struct path_data *) &h->paths; + } else { + /* Need to have a URL and scheme, if it's not a domain cookie */ + assert(url != NULL); + assert(scheme != NULL); - host = nsurl_get_component(url, NSURL_HOST); - if (host != NULL) { - host_str = lwc_string_data(host); - lwc_string_unref(host); + h = urldb_search_find( + urldb_get_search_tree(c->domain), + c->domain); - } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) == - lwc_error_ok && match == true) { - host_str = "localhost"; + if (!h) { + h = urldb_add_host(c->domain); + if (!h) { + urldb_free_cookie(c); + return false; + } + } - } else { - lwc_string_unref(scheme); - return NULL; + /* find path */ + p = urldb_add_path(scheme, 0, h, + strdup(c->path), NULL, url); + if (!p) { + urldb_free_cookie(c); + return false; + } } - tree = urldb_get_search_tree(host_str); - h = urldb_search_find(tree, host_str); - if (!h) { - lwc_string_unref(scheme); - return NULL; + /* add cookie */ + for (d = p->cookies; d; d = d->next) { + if (!strcmp(d->domain, c->domain) && + !strcmp(d->path, c->path) && + !strcmp(d->name, c->name)) + break; } - /* generate plq (path, leaf, query) */ - if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &plq, &len) != - NSERROR_OK) { - lwc_string_unref(scheme); - return NULL; - } + if (d) { + if (c->expires != -1 && c->expires < now) { + /* remove cookie */ + if (d->next) + d->next->prev = d->prev; + else + p->cookies_end = d->prev; + if (d->prev) + d->prev->next = d->next; + else + p->cookies = d->next; - /* Get port */ - port = nsurl_get_component(url, NSURL_PORT); - if (port != NULL) { - port_int = atoi(lwc_string_data(port)); - lwc_string_unref(port); - } else { - port_int = 0; - } + cookie_manager_remove((struct cookie_data *)d); - p = urldb_match_path(&h->paths, plq, scheme, port_int); + urldb_free_cookie(d); + urldb_free_cookie(c); + } else { + /* replace d with c */ + c->prev = d->prev; + c->next = d->next; + if (c->next) + c->next->prev = c; + else + p->cookies_end = c; + if (c->prev) + c->prev->next = c; + else + p->cookies = c; - free(plq); - lwc_string_unref(scheme); + cookie_manager_remove((struct cookie_data *)d); + urldb_free_cookie(d); - return p; + cookie_manager_add((struct cookie_data *)c); + } + } else { + c->prev = p->cookies_end; + c->next = NULL; + if (p->cookies_end) + p->cookies_end->next = c; + else + p->cookies = c; + p->cookies_end = c; + + cookie_manager_add((struct cookie_data *)c); + } + + return true; } + /** - * Match a path string + * Concatenate a cookie into the provided buffer * - * \param parent Path (sub)tree to look in - * \param path The path to search for - * \param scheme The URL scheme associated with the path - * \param port The port associated with the path - * \return Pointer to path data or NULL if not found. + * \param c Cookie to concatenate + * \param version The version of the cookie string to output + * \param used Pointer to amount of buffer used (updated) + * \param alloc Pointer to allocated size of buffer (updated) + * \param buf Pointer to Pointer to buffer (updated) + * \return true on success, false on memory exhaustion */ -struct path_data *urldb_match_path(const struct path_data *parent, - const char *path, lwc_string *scheme, unsigned short port) +static bool urldb_concat_cookie(struct cookie_internal_data *c, int version, + int *used, int *alloc, char **buf) { - const struct path_data *p; - const char *slash; - bool match; + /* Combined (A)BNF for the Cookie: request header: + * + * CHAR = + * CTL = + * CR = + * LF = + * SP = + * HT = + * <"> = + * + * CRLF = CR LF + * + * LWS = [CRLF] 1*( SP | HT ) + * + * TEXT = + * + * token = 1* + * separators = "(" | ")" | "<" | ">" | "@" + * | "," | ";" | ":" | "\" | <"> + * | "/" | "[" | "]" | "?" | "=" + * | "{" | "}" | SP | HT + * + * quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) + * qdtext = > + * quoted-pair = "\" CHAR + * + * attr = token + * value = word + * word = token | quoted-string + * + * cookie = "Cookie:" cookie-version + * 1*((";" | ",") cookie-value) + * cookie-value = NAME "=" VALUE [";" path] [";" domain] + * cookie-version = "$Version" "=" value + * NAME = attr + * VALUE = value + * path = "$Path" "=" value + * domain = "$Domain" "=" value + * + * A note on quoted-string handling: + * The cookie data stored in the db is verbatim (i.e. sans enclosing + * <">, if any, and with all quoted-pairs intact) thus all that we + * need to do here is ensure that value strings which were quoted + * in Set-Cookie or which include any of the separators are quoted + * before use. + * + * A note on cookie-value separation: + * We use semicolons for all separators, including between + * cookie-values. This simplifies things and is backwards compatible. + */ + const char * const separators = "()<>@,;:\\\"/[]?={} \t"; - assert(parent != NULL); - assert(parent->segment == NULL); - assert(path[0] == '/'); + int max_len; - /* Start with children, as parent has no segment */ - p = parent->children; + assert(c && used && alloc && buf && *buf); - while (p != NULL) { - slash = strchr(path + 1, '/'); - if (!slash) - slash = path + strlen(path); + /* "; " cookie-value + * We allow for the possibility that values are quoted + */ + max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 + + (c->path_from_set ? + 8 + strlen(c->path) + 2 : 0) + + (c->domain_from_set ? + 10 + strlen(c->domain) + 2 : 0); - if (strncmp(p->segment, path + 1, slash - path - 1) == 0 && - lwc_string_isequal(p->scheme, scheme, &match) == - lwc_error_ok && - match == true && - p->port == port) { - if (*slash == '\0') { - /* Complete match */ - return (struct path_data *) p; - } + if (*used + max_len >= *alloc) { + char *temp = realloc(*buf, *alloc + 4096); + if (!temp) { + return false; + } + *buf = temp; + *alloc += 4096; + } - /* Match so far, go down tree */ - p = p->children; + if (version == COOKIE_NETSCAPE) { + /* Original Netscape cookie */ + sprintf(*buf + *used - 1, "; %s=", c->name); + *used += 2 + strlen(c->name) + 1; - path = slash; + /* The Netscape spec doesn't mention quoting of cookie values. + * RFC 2109 $10.1.3 indicates that values must not be quoted. + * + * However, other browsers preserve quoting, so we should, too + */ + if (c->value_was_quoted) { + sprintf(*buf + *used - 1, "\"%s\"", c->value); + *used += 1 + strlen(c->value) + 1; } else { - /* No match, try next sibling */ - p = p->next; + /** \todo should we %XX-encode [;HT,SP] ? */ + /** \todo Should we strip escaping backslashes? */ + sprintf(*buf + *used - 1, "%s", c->value); + *used += strlen(c->value); + } + + /* We don't send path/domain information -- that's what the + * Netscape spec suggests we should do, anyway. */ + } else { + /* RFC2109 or RFC2965 cookie */ + sprintf(*buf + *used - 1, "; %s=", c->name); + *used += 2 + strlen(c->name) + 1; + + /* Value needs quoting if it contains any separator or if + * it needs preserving from the Set-Cookie header */ + if (c->value_was_quoted || + strpbrk(c->value, separators) != NULL) { + sprintf(*buf + *used - 1, "\"%s\"", c->value); + *used += 1 + strlen(c->value) + 1; + } else { + sprintf(*buf + *used - 1, "%s", c->value); + *used += strlen(c->value); + } + + if (c->path_from_set) { + /* Path, quoted if necessary */ + sprintf(*buf + *used - 1, "; $Path="); + *used += 8; + + if (strpbrk(c->path, separators) != NULL) { + sprintf(*buf + *used - 1, "\"%s\"", c->path); + *used += 1 + strlen(c->path) + 1; + } else { + sprintf(*buf + *used - 1, "%s", c->path); + *used += strlen(c->path); + } + } + + if (c->domain_from_set) { + /* Domain, quoted if necessary */ + sprintf(*buf + *used - 1, "; $Domain="); + *used += 10; + + if (strpbrk(c->domain, separators) != NULL) { + sprintf(*buf + *used - 1, "\"%s\"", c->domain); + *used += 1 + strlen(c->domain) + 1; + } else { + sprintf(*buf + *used - 1, "%s", c->domain); + *used += strlen(c->domain); + } } } - return NULL; + return true; } + /** - * Get the search tree for a particular host - * - * \param host the host to lookup - * \return the corresponding search tree + * deletes paths from a cookie. */ -struct search_node **urldb_get_search_tree_direct(const char *host) { - assert(host); +static void urldb_delete_cookie_paths(const char *domain, const char *path, + const char *name, struct path_data *parent) +{ + struct cookie_internal_data *c; + struct path_data *p = parent; - if (urldb__host_is_ip_address(host)) - return &search_trees[ST_IP]; - else if (isalpha(*host)) - return &search_trees[ST_DN + tolower(*host) - 'a']; - return &search_trees[ST_EE]; -} + assert(parent); -/** - * Get the search tree for a particular host - * - * \param host the host to lookup - * \return the corresponding search tree - */ -struct search_node *urldb_get_search_tree(const char *host) { - return *urldb_get_search_tree_direct(host); -} + do { + for (c = p->cookies; c; c = c->next) { + if (strcmp(c->domain, domain) == 0 && + strcmp(c->path, path) == 0 && + strcmp(c->name, name) == 0) { + if (c->prev) + c->prev->next = c->next; + else + p->cookies = c->next; -/** - * Dump URL database to stderr - */ -void urldb_dump(void) -{ - int i; + if (c->next) + c->next->prev = c->prev; + else + p->cookies_end = c->prev; - urldb_dump_hosts(&db_root); + urldb_free_cookie(c); - for (i = 0; i != NUM_SEARCH_TREES; i++) - urldb_dump_search(search_trees[i], 0); + return; + } + } + + if (p->children) { + p = p->children; + } else { + while (p != parent) { + if (p->next != NULL) { + p = p->next; + break; + } + + p = p->parent; + } + } + } while (p != parent); } + /** - * Dump URL database hosts to stderr - * - * \param parent Parent node of tree to dump + * Deletes cookie hosts and their assoicated paths */ -void urldb_dump_hosts(struct host_part *parent) +static void urldb_delete_cookie_hosts(const char *domain, const char *path, + const char *name, struct host_part *parent) { struct host_part *h; + assert(parent); - if (parent->part) { - LOG(("%s", parent->part)); - - LOG(("\t%s invalid SSL certs", - parent->permit_invalid_certs ? "Permits" : "Denies")); - } - - /* Dump path data */ - urldb_dump_paths(&parent->paths); + urldb_delete_cookie_paths(domain, path, name, &parent->paths); - /* and recurse */ for (h = parent->children; h; h = h->next) - urldb_dump_hosts(h); + urldb_delete_cookie_hosts(domain, path, name, h); } + /** - * Dump URL database paths to stderr + * Save a path subtree's cookies * - * \param parent Parent node of tree to dump + * \param fp File pointer to write to + * \param parent Parent path */ -void urldb_dump_paths(struct path_data *parent) +static void urldb_save_cookie_paths(FILE *fp, struct path_data *parent) { - const struct path_data *p = parent; - unsigned int i; + struct path_data *p = parent; + time_t now = time(NULL); + + assert(fp && parent); do { - if (p->segment != NULL) { - LOG(("\t%s : %u", lwc_string_data(p->scheme), p->port)); + if (p->cookies != NULL) { + struct cookie_internal_data *c; - LOG(("\t\t'%s'", p->segment)); + for (c = p->cookies; c != NULL; c = c->next) { + if (c->expires == -1 || c->expires < now) + /* Skip expired & session cookies */ + continue; - for (i = 0; i != p->frag_cnt; i++) - LOG(("\t\t\t#%s", p->fragment[i])); + fprintf(fp, + "%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t" + "%s\t%s\t%d\t%s\t%s\t%s\n", + c->version, c->domain, + c->domain_from_set, c->path, + c->path_from_set, c->secure, + c->http_only, + (int)c->expires, (int)c->last_used, + c->no_destroy, c->name, c->value, + c->value_was_quoted, + p->scheme ? lwc_string_data(p->scheme) : + "unused", + p->url ? nsurl_access(p->url) : + "unused", + c->comment ? c->comment : ""); + } } if (p->children != NULL) { @@ -2185,1539 +2273,1447 @@ void urldb_dump_paths(struct path_data *parent) } while (p != parent); } + /** - * Dump search tree + * Save a host subtree's cookies * - * \param parent Parent node of tree to dump - * \param depth Tree depth + * \param fp File pointer to write to + * \param parent Parent host */ -void urldb_dump_search(struct search_node *parent, int depth) +static void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent) { - const struct host_part *h; - int i; - - if (parent == &empty) - return; - - urldb_dump_search(parent->left, depth + 1); - - for (i = 0; i != depth; i++) - fputc(' ', stderr); - - for (h = parent->data; h; h = h->parent) { - if (h->part) - fprintf(stderr, "%s", h->part); - - if (h->parent && h->parent->parent) - fputc('.', stderr); - } + struct host_part *h; + assert(fp && parent); - fputc('\n', stderr); + urldb_save_cookie_paths(fp, &parent->paths); - urldb_dump_search(parent->right, depth + 1); + for (h = parent->children; h; h = h->next) + urldb_save_cookie_hosts(fp, h); } + /** - * Insert a node into the search tree + * Destroy a cookie node * - * \param root Root of tree to insert into - * \param data User data to insert - * \return Pointer to updated root, or NULL if failed + * \param c Cookie to destroy */ -struct search_node *urldb_search_insert(struct search_node *root, - const struct host_part *data) +static void urldb_destroy_cookie(struct cookie_internal_data *c) { - struct search_node *n; - - assert(root && data); - - n = malloc(sizeof(struct search_node)); - if (!n) - return NULL; - - n->level = 1; - n->data = data; - n->left = n->right = ∅ - - root = urldb_search_insert_internal(root, n); + free(c->name); + free(c->value); + free(c->comment); + free(c->domain); + free(c->path); - return root; + free(c); } + /** - * Insert node into search tree + * Destroy the contents of a path node * - * \param root Root of (sub)tree to insert into - * \param n Node to insert - * \return Pointer to updated root + * \param node Node to destroy contents of (does not destroy node) */ -struct search_node *urldb_search_insert_internal(struct search_node *root, - struct search_node *n) +static void urldb_destroy_path_node_content(struct path_data *node) { - assert(root && n); + struct cookie_internal_data *a, *b; + unsigned int i; - if (root == &empty) { - root = n; - } else { - int c = urldb_search_match_host(root->data, n->data); + if (node->url != NULL) + nsurl_unref(node->url); - if (c > 0) { - root->left = urldb_search_insert_internal( - root->left, n); - } else if (c < 0) { - root->right = urldb_search_insert_internal( - root->right, n); - } else { - /* exact match */ - free(n); - return root; - } + if (node->scheme != NULL) + lwc_string_unref(node->scheme); - root = urldb_search_skew(root); - root = urldb_search_split(root); - } + free(node->segment); + for (i = 0; i < node->frag_cnt; i++) + free(node->fragment[i]); + free(node->fragment); - return root; + if (node->thumb) + bitmap_destroy(node->thumb); + + free(node->urld.title); + + for (a = node->cookies; a; a = b) { + b = a->next; + urldb_destroy_cookie(a); + } } + /** - * Find a node in a search tree + * Destroy protection space data * - * \param root Tree to look in - * \param host Host to find - * \return Pointer to host tree node, or NULL if not found + * \param space Protection space to destroy */ -const struct host_part *urldb_search_find(struct search_node *root, - const char *host) +static void urldb_destroy_prot_space(struct prot_space_data *space) { - int c; - - assert(root && host); - - if (root == &empty) { - return NULL; - } - - c = urldb_search_match_string(root->data, host); + lwc_string_unref(space->scheme); + free(space->realm); + free(space->auth); - if (c > 0) - return urldb_search_find(root->left, host); - else if (c < 0) - return urldb_search_find(root->right, host); - else - return root->data; + free(space); } + /** - * Compare a pair of host_parts + * Destroy a path tree * - * \param a - * \param b - * \return 0 if match, non-zero, otherwise + * \param root Root node of tree to destroy */ -int urldb_search_match_host(const struct host_part *a, - const struct host_part *b) +static void urldb_destroy_path_tree(struct path_data *root) { - int ret; + struct path_data *p = root; - assert(a && b); + do { + if (p->children != NULL) { + p = p->children; + } else { + struct path_data *q = p; - /* traverse up tree to root, comparing parts as we go. */ - for (; a && a != &db_root && b && b != &db_root; - a = a->parent, b = b->parent) - if ((ret = strcasecmp(a->part, b->part)) != 0) - /* They differ => return the difference here */ - return ret; - - /* If we get here then either: - * a) The path lengths differ - * or b) The hosts are identical - */ - if (a && a != &db_root && (!b || b == &db_root)) - /* len(a) > len(b) */ - return 1; - else if ((!a || a == &db_root) && b && b != &db_root) - /* len(a) < len(b) */ - return -1; - - /* identical */ - return 0; -} - -/** - * Compare host_part with a string - * - * \param a - * \param b - * \return 0 if match, non-zero, otherwise - */ -int urldb_search_match_string(const struct host_part *a, - const char *b) -{ - const char *end, *dot; - int plen, ret; + while (p != root) { + if (p->next != NULL) { + p = p->next; + break; + } - assert(a && a != &db_root && b); + p = p->parent; - if (urldb__host_is_ip_address(b)) { - /* IP address */ - return strcasecmp(a->part, b); - } + urldb_destroy_path_node_content(q); + free(q); - end = b + strlen(b) + 1; + q = p; + } - while (b < end && a && a != &db_root) { - dot = strchr(b, '.'); - if (!dot) { - /* last segment */ - dot = end - 1; + urldb_destroy_path_node_content(q); + free(q); } - - /* Compare strings (length limited) */ - if ((ret = strncasecmp(a->part, b, dot - b)) != 0) - /* didn't match => return difference */ - return ret; - - /* The strings matched, now check that the lengths do, too */ - plen = strlen(a->part); - - if (plen > dot - b) - /* len(a) > len(b) */ - return 1; - else if (plen < dot - b) - /* len(a) < len(b) */ - return -1; - - b = dot + 1; - a = a->parent; - } - - /* If we get here then either: - * a) The path lengths differ - * or b) The hosts are identical - */ - if (a && a != &db_root && b >= end) - /* len(a) > len(b) */ - return 1; - else if ((!a || a == &db_root) && b < end) - /* len(a) < len(b) */ - return -1; - - /* Identical */ - return 0; + } while (p != root); } + /** - * Compare host_part with prefix + * Destroy a host tree * - * \param a - * \param b - * \return 0 if match, non-zero, otherwise + * \param root Root node of tree to destroy */ -int urldb_search_match_prefix(const struct host_part *a, - const char *b) +static void urldb_destroy_host_tree(struct host_part *root) { - const char *end, *dot; - int plen, ret; - - assert(a && a != &db_root && b); + struct host_part *a, *b; + struct path_data *p, *q; + struct prot_space_data *s, *t; - if (urldb__host_is_ip_address(b)) { - /* IP address */ - return strncasecmp(a->part, b, strlen(b)); + /* Destroy children */ + for (a = root->children; a; a = b) { + b = a->next; + urldb_destroy_host_tree(a); } - end = b + strlen(b) + 1; - - while (b < end && a && a != &db_root) { - dot = strchr(b, '.'); - if (!dot) { - /* last segment */ - dot = end - 1; - } - - /* Compare strings (length limited) */ - if ((ret = strncasecmp(a->part, b, dot - b)) != 0) - /* didn't match => return difference */ - return ret; + /* Now clean up paths */ + for (p = root->paths.children; p; p = q) { + q = p->next; + urldb_destroy_path_tree(p); + } - /* The strings matched */ - if (dot < end - 1) { - /* Consider segment lengths only in the case - * where the prefix contains segments */ - plen = strlen(a->part); - if (plen > dot - b) - /* len(a) > len(b) */ - return 1; - else if (plen < dot - b) - /* len(a) < len(b) */ - return -1; - } + /* Root path */ + urldb_destroy_path_node_content(&root->paths); - b = dot + 1; - a = a->parent; + /* Proctection space data */ + for (s = root->prot_space; s; s = t) { + t = s->next; + urldb_destroy_prot_space(s); } - /* If we get here then either: - * a) The path lengths differ - * or b) The hosts are identical - */ - if (a && a != &db_root && b >= end) - /* len(a) > len(b) => prefix matches */ - return 0; - else if ((!a || a == &db_root) && b < end) - /* len(a) < len(b) => prefix does not match */ - return -1; - - /* Identical */ - return 0; + /* And ourselves */ + free(root->part); + free(root); } + /** - * Rotate a subtree right + * Destroy a search tree * - * \param root Root of subtree to rotate - * \return new root of subtree + * \param root Root node of tree to destroy */ -struct search_node *urldb_search_skew(struct search_node *root) +static void urldb_destroy_search_tree(struct search_node *root) { - struct search_node *temp; + /* Destroy children */ + if (root->left != &empty) + urldb_destroy_search_tree(root->left); + if (root->right != &empty) + urldb_destroy_search_tree(root->right); - assert(root); + /* And destroy ourselves */ + free(root); +} - if (root->left->level == root->level) { - temp = root->left; - root->left = temp->right; - temp->right = root; - root = temp; - } - return root; -} +/*************** External interface ***************/ -/** - * Rotate a node left, increasing the parent's level - * - * \param root Root of subtree to rotate - * \return New root of subtree - */ -struct search_node *urldb_search_split(struct search_node *root) -{ - struct search_node *temp; - assert(root); +/* exported interface documented in content/urldb.h */ +void urldb_destroy(void) +{ + struct host_part *a, *b; + int i; - if (root->right->right->level == root->level) { - temp = root->right; - root->right = temp->left; - temp->left = root; - root = temp; + /* Clean up search trees */ + for (i = 0; i < NUM_SEARCH_TREES; i++) { + if (search_trees[i] != &empty) + urldb_destroy_search_tree(search_trees[i]); + } - root->level++; + /* And database */ + for (a = db_root.children; a; a = b) { + b = a->next; + urldb_destroy_host_tree(a); } - return root; + /* And the bloom filter */ + if (url_bloom != NULL) + bloom_destroy(url_bloom); } -/** - * Retrieve cookies for an URL - * - * \param url URL being fetched - * \param include_http_only Whether to include HTTP(S) only cookies. - * \return Cookies string for libcurl (on heap), or NULL on error/no cookies - */ -char *urldb_get_cookie(nsurl *url, bool include_http_only) + +/* exported interface documented in content/urldb.h */ +nserror urldb_load(const char *filename) { - const struct path_data *p, *q; - const struct host_part *h; - lwc_string *path_lwc; - struct cookie_internal_data *c; - int count = 0, version = COOKIE_RFC2965; - struct cookie_internal_data **matched_cookies; - int matched_cookies_size = 20; - int ret_alloc = 4096, ret_used = 1; - const char *path; - char *ret; - lwc_string *scheme; - time_t now; +#define MAXIMUM_URL_LENGTH 4096 + char s[MAXIMUM_URL_LENGTH]; + char host[256]; + struct host_part *h; + int urls; int i; - bool match; + int version; + int length; + FILE *fp; - assert(url != NULL); + assert(filename); - /* The URL must exist in the db in order to find relevant cookies, since - * we search up the tree from the URL node, and cookies from further - * up also apply. */ - urldb_add_url(url); + LOG(("Loading URL file %s", filename)); - p = urldb_find_url(url); - if (!p) - return NULL; + if (url_bloom == NULL) + url_bloom = bloom_create(BLOOM_SIZE); - scheme = p->scheme; + fp = fopen(filename, "r"); + if (!fp) { + LOG(("Failed to open file '%s' for reading", filename)); + return NSERROR_NOT_FOUND; + } - matched_cookies = malloc(matched_cookies_size * - sizeof(struct cookie_internal_data *)); - if (!matched_cookies) - return NULL; - -#define GROW_MATCHED_COOKIES \ - do { \ - if (count == matched_cookies_size) { \ - struct cookie_internal_data **temp; \ - temp = realloc(matched_cookies, \ - (matched_cookies_size + 20) * \ - sizeof(struct cookie_internal_data *)); \ - \ - if (temp == NULL) { \ - free(ret); \ - free(matched_cookies); \ - return NULL; \ - } \ - \ - matched_cookies = temp; \ - matched_cookies_size += 20; \ - } \ - } while(0) + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) { + fclose(fp); + return NSERROR_NEED_DATA; + } - ret = malloc(ret_alloc); - if (!ret) { - free(matched_cookies); - return NULL; + version = atoi(s); + if (version < MIN_URL_FILE_VERSION) { + LOG(("Unsupported URL file version.")); + fclose(fp); + return NSERROR_INVALID; + } + if (version > URL_FILE_VERSION) { + LOG(("Unknown URL file version.")); + fclose(fp); + return NSERROR_INVALID; } - ret[0] = '\0'; + while (fgets(host, sizeof host, fp)) { + /* get the hostname */ + length = strlen(host) - 1; + host[length] = '\0'; - path_lwc = nsurl_get_component(url, NSURL_PATH); - if (path_lwc == NULL) { - free(ret); - free(matched_cookies); - return NULL; - } - path = lwc_string_data(path_lwc); - lwc_string_unref(path_lwc); + /* skip data that has ended up with a host of '' */ + if (length == 0) { + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + urls = atoi(s); + /* Eight fields/url */ + for (i = 0; i < (8 * urls); i++) { + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + } + continue; + } - now = time(NULL); + /* read number of URLs */ + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + urls = atoi(s); - if (*(p->segment) != '\0') { - /* Match exact path, unless directory, when prefix matching - * will handle this case for us. */ - for (q = p->parent->children; q; q = q->next) { - if (strcmp(q->segment, p->segment)) - continue; + /* no URLs => try next host */ + if (urls == 0) { + LOG(("No URLs for '%s'", host)); + continue; + } - /* Consider all cookies associated with - * this exact path */ - for (c = q->cookies; c; c = c->next) { - if (c->expires != -1 && c->expires < now) - /* cookie has expired => ignore */ - continue; + h = urldb_add_host(host); + if (!h) { + LOG(("Failed adding host: '%s'", host)); + fclose(fp); + return NSERROR_NOMEM; + } - if (c->secure && lwc_string_isequal( - q->scheme, - corestring_lwc_https, - &match) && - match == false) - /* secure cookie for insecure host. - * ignore */ - continue; + /* load the non-corrupt data */ + for (i = 0; i < urls; i++) { + struct path_data *p = NULL; + char scheme[64], ports[10]; + char url[64 + 3 + 256 + 6 + 4096 + 1]; + unsigned int port; + bool is_file = false; + nsurl *nsurl; + lwc_string *scheme_lwc, *fragment_lwc; + char *path_query; + size_t len; - if (c->http_only && !include_http_only) - /* Ignore HttpOnly */ - continue; + if (!fgets(scheme, sizeof scheme, fp)) + break; + length = strlen(scheme) - 1; + scheme[length] = '\0'; - matched_cookies[count++] = c; + if (!fgets(ports, sizeof ports, fp)) + break; + length = strlen(ports) - 1; + ports[length] = '\0'; + port = atoi(ports); - GROW_MATCHED_COOKIES; + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + length = strlen(s) - 1; + s[length] = '\0'; - if (c->version < (unsigned int)version) - version = c->version; + if (!strcasecmp(host, "localhost") && + !strcasecmp(scheme, "file")) + is_file = true; - c->last_used = now; + snprintf(url, sizeof url, "%s://%s%s%s%s", + scheme, + /* file URLs have no host */ + (is_file ? "" : host), + (port ? ":" : ""), + (port ? ports : ""), + s); - cookie_manager_add((struct cookie_data *)c); + /* TODO: store URLs in pre-parsed state, and make + * a nsurl_load to generate the nsurl more + * swiftly. + * Need a nsurl_save too. + */ + if (nsurl_create(url, &nsurl) != NSERROR_OK) { + LOG(("Failed inserting '%s'", url)); + fclose(fp); + return NSERROR_NOMEM; } - } - } - /* Now consider cookies whose paths prefix-match ours */ - for (p = p->parent; p; p = p->parent) { - /* Find directory's path entry(ies) */ - /* There are potentially multiple due to differing schemes */ - for (q = p->children; q; q = q->next) { - if (*(q->segment) != '\0') - continue; + if (url_bloom != NULL) { + uint32_t hash = nsurl_hash(nsurl); + bloom_insert_hash(url_bloom, hash); + } - for (c = q->cookies; c; c = c->next) { - if (c->expires != -1 && c->expires < now) - /* cookie has expired => ignore */ - continue; + /* Copy and merge path/query strings */ + if (nsurl_get(nsurl, NSURL_PATH | NSURL_QUERY, + &path_query, &len) != NSERROR_OK) { + LOG(("Failed inserting '%s'", url)); + fclose(fp); + return NSERROR_NOMEM; + } - if (c->secure && lwc_string_isequal( - q->scheme, - corestring_lwc_https, - &match) && - match == false) - /* Secure cookie for insecure server - * => ignore */ - continue; + scheme_lwc = nsurl_get_component(nsurl, NSURL_SCHEME); + fragment_lwc = nsurl_get_component(nsurl, + NSURL_FRAGMENT); + p = urldb_add_path(scheme_lwc, port, h, path_query, + fragment_lwc, nsurl); + if (!p) { + LOG(("Failed inserting '%s'", url)); + fclose(fp); + return NSERROR_NOMEM; + } + nsurl_unref(nsurl); + lwc_string_unref(scheme_lwc); + if (fragment_lwc != NULL) + lwc_string_unref(fragment_lwc); - matched_cookies[count++] = c; + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + if (p) + p->urld.visits = (unsigned int)atoi(s); - GROW_MATCHED_COOKIES; + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + if (p) + p->urld.last_visit = (time_t)atoi(s); - if (c->version < (unsigned int) version) - version = c->version; + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + if (p) + p->urld.type = (content_type)atoi(s); - c->last_used = now; + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; - cookie_manager_add((struct cookie_data *)c); + + if (!fgets(s, MAXIMUM_URL_LENGTH, fp)) + break; + length = strlen(s) - 1; + if (p && length > 0) { + s[length] = '\0'; + p->urld.title = malloc(length + 1); + if (p->urld.title) + memcpy(p->urld.title, s, length + 1); } } + } - if (!p->parent) { - /* No parent, so bail here. This can't go in - * the loop exit condition as we also want to - * process the top-level node. - * - * If p->parent is NULL then p->cookies are - * the domain cookies and thus we don't even - * try matching against them. - */ - break; - } + fclose(fp); + LOG(("Successfully loaded URL file")); +#undef MAXIMUM_URL_LENGTH - /* Consider p itself - may be the result of Path=/foo */ - for (c = p->cookies; c; c = c->next) { - if (c->expires != -1 && c->expires < now) - /* cookie has expired => ignore */ - continue; + return NSERROR_OK; +} - /* Ensure cookie path is a prefix of the resource */ - if (strncmp(c->path, path, strlen(c->path)) != 0) - /* paths don't match => ignore */ - continue; +/* exported interface documented in content/urldb.h */ +nserror urldb_save(const char *filename) +{ + FILE *fp; + int i; - if (c->secure && lwc_string_isequal(p->scheme, - corestring_lwc_https, - &match) && - match == false) - /* Secure cookie for insecure server - * => ignore */ - continue; + assert(filename); - matched_cookies[count++] = c; + fp = fopen(filename, "w"); + if (!fp) { + LOG(("Failed to open file '%s' for writing", filename)); + return NSERROR_SAVE_FAILED; + } - GROW_MATCHED_COOKIES; + /* file format version number */ + fprintf(fp, "%d\n", URL_FILE_VERSION); - if (c->version < (unsigned int) version) - version = c->version; + for (i = 0; i != NUM_SEARCH_TREES; i++) { + urldb_save_search_tree(search_trees[i], fp); + } - c->last_used = now; + fclose(fp); - cookie_manager_add((struct cookie_data *)c); - } + return NSERROR_OK; +} - } - /* Finally consider domain cookies for hosts which domain match ours */ - for (h = (const struct host_part *)p; h && h != &db_root; - h = h->parent) { - for (c = h->paths.cookies; c; c = c->next) { - if (c->expires != -1 && c->expires < now) - /* cookie has expired => ignore */ - continue; +/* exported interface documented in content/urldb.h */ +void urldb_set_url_persistence(nsurl *url, bool persist) +{ + struct path_data *p; - /* Ensure cookie path is a prefix of the resource */ - if (strncmp(c->path, path, strlen(c->path)) != 0) - /* paths don't match => ignore */ - continue; + assert(url); - if (c->secure && lwc_string_isequal(scheme, - corestring_lwc_https, - &match) && - match == false) - /* secure cookie for insecure host. ignore */ - continue; + p = urldb_find_url(url); + if (!p) + return; - matched_cookies[count++] = c; + p->persistent = persist; +} - GROW_MATCHED_COOKIES; - if (c->version < (unsigned int)version) - version = c->version; +/* exported interface documented in content/urldb.h */ +bool urldb_add_url(nsurl *url) +{ + struct host_part *h; + struct path_data *p; + lwc_string *scheme; + lwc_string *port; + lwc_string *host; + lwc_string *fragment; + const char *host_str; + char *path_query = NULL; + size_t len; + bool match; + unsigned int port_int; - c->last_used = now; + assert(url); - cookie_manager_add((struct cookie_data *)c); - } - } + if (url_bloom == NULL) + url_bloom = bloom_create(BLOOM_SIZE); - if (count == 0) { - /* No cookies found */ - free(ret); - free(matched_cookies); - return NULL; + if (url_bloom != NULL) { + uint32_t hash = nsurl_hash(url); + bloom_insert_hash(url_bloom, hash); } - /* and build output string */ - if (version > COOKIE_NETSCAPE) { - sprintf(ret, "$Version=%d", version); - ret_used = strlen(ret) + 1; + /* Copy and merge path/query strings */ + if (nsurl_get(url, NSURL_PATH | NSURL_QUERY, &path_query, &len) != + NSERROR_OK) { + return false; } + assert(path_query != NULL); - for (i = 0; i < count; i++) { - if (!urldb_concat_cookie(matched_cookies[i], version, - &ret_used, &ret_alloc, &ret)) { - free(ret); - free(matched_cookies); - return NULL; - } + scheme = nsurl_get_component(url, NSURL_SCHEME); + if (scheme == NULL) { + free(path_query); + return false; } - if (version == COOKIE_NETSCAPE) { - /* Old-style cookies => no version & skip "; " */ - memmove(ret, ret + 2, ret_used - 2); - ret_used -= 2; + host = nsurl_get_component(url, NSURL_HOST); + if (host != NULL) { + host_str = lwc_string_data(host); + lwc_string_unref(host); + + } else if (lwc_string_isequal(scheme, corestring_lwc_file, &match) == + lwc_error_ok && match == true) { + host_str = "localhost"; + + } else { + lwc_string_unref(scheme); + free(path_query); + return false; } - /* Now, shrink the output buffer to the required size */ - { - char *temp = realloc(ret, ret_used); - if (!temp) { - free(ret); - free(matched_cookies); - return NULL; - } + fragment = nsurl_get_component(url, NSURL_FRAGMENT); - ret = temp; + port = nsurl_get_component(url, NSURL_PORT); + if (port != NULL) { + port_int = atoi(lwc_string_data(port)); + lwc_string_unref(port); + } else { + port_int = 0; } - free(matched_cookies); + /* Get host entry */ + h = urldb_add_host(host_str); - return ret; + /* Get path entry */ + p = (h != NULL) ? urldb_add_path(scheme, port_int, h, path_query, + fragment, url) : NULL; -#undef GROW_MATCHED_COOKIES + lwc_string_unref(scheme); + if (fragment != NULL) + lwc_string_unref(fragment); + + return (p != NULL); } -/** - * Parse Set-Cookie header and insert cookie(s) into database - * - * \param header Header to parse, with Set-Cookie: stripped - * \param url URL being fetched - * \param referer Referring resource, or 0 for verifiable transaction - * \return true on success, false otherwise - */ -bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer) + +/* exported interface documented in content/urldb.h */ +void urldb_set_url_title(nsurl *url, const char *title) { - const char *cur = header, *end; - lwc_string *path, *host, *scheme; - nsurl *urlt; - bool match; + struct path_data *p; + char *temp; - assert(url && header); + assert(url && title); - /* Get defragmented URL, as 'urlt' */ - if (nsurl_defragment(url, &urlt) != NSERROR_OK) - return NULL; + p = urldb_find_url(url); + if (!p) + return; - scheme = nsurl_get_component(url, NSURL_SCHEME); - if (scheme == NULL) { - nsurl_unref(urlt); - return false; - } + temp = strdup(title); + if (!temp) + return; - path = nsurl_get_component(url, NSURL_PATH); - if (path == NULL) { - lwc_string_unref(scheme); - nsurl_unref(urlt); - return false; - } + free(p->urld.title); + p->urld.title = temp; +} - host = nsurl_get_component(url, NSURL_HOST); - if (host == NULL) { - lwc_string_unref(path); - lwc_string_unref(scheme); - nsurl_unref(urlt); - return false; - } - if (referer) { - lwc_string *rhost; +/* exported interface documented in content/urldb.h */ +void urldb_set_url_content_type(nsurl *url, content_type type) +{ + struct path_data *p; - /* Ensure that url's host name domain matches - * referer's (4.3.5) */ - rhost = nsurl_get_component(referer, NSURL_HOST); - if (rhost == NULL) { - goto error; - } + assert(url); - /* Domain match host names */ - if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok && - match == false) { - const char *hptr; - const char *rptr; - const char *dot; - const char *host_data = lwc_string_data(host); - const char *rhost_data = lwc_string_data(rhost); + p = urldb_find_url(url); + if (!p) + return; - /* Ensure neither host nor rhost are IP addresses */ - if (urldb__host_is_ip_address(host_data) || - urldb__host_is_ip_address(rhost_data)) { - /* IP address, so no partial match */ - lwc_string_unref(rhost); - goto error; - } + p->urld.type = type; +} - /* Not exact match, so try the following: - * - * 1) Find the longest common suffix of host and rhost - * (may be all of host/rhost) - * 2) Discard characters from the start of the suffix - * until the suffix starts with a dot - * (prevents foobar.com matching bar.com) - * 3) Ensure the suffix is non-empty and contains - * embedded dots (to avoid permitting .com as a - * suffix) - * - * Note that the above in no way resembles the - * domain matching algorithm found in RFC2109. - * It does, however, model the real world rather - * more accurately. - */ - /** \todo In future, we should consult a TLD service - * instead of just looking for embedded dots. - */ +/* exported interface documented in content/urldb.h */ +void urldb_update_url_visit_data(nsurl *url) +{ + struct path_data *p; - hptr = host_data + lwc_string_length(host) - 1; - rptr = rhost_data + lwc_string_length(rhost) - 1; + assert(url); - /* 1 */ - while (hptr >= host_data && rptr >= rhost_data) { - if (*hptr != *rptr) - break; - hptr--; - rptr--; - } - /* Ensure we end up pointing at the start of the - * common suffix. The above loop will exit pointing - * to the byte before the start of the suffix. */ - hptr++; + p = urldb_find_url(url); + if (!p) + return; - /* 2 */ - while (*hptr != '\0' && *hptr != '.') - hptr++; + p->urld.last_visit = time(NULL); + p->urld.visits++; +} - /* 3 */ - if (*hptr == '\0' || - (dot = strchr(hptr + 1, '.')) == NULL || - *(dot + 1) == '\0') { - lwc_string_unref(rhost); - goto error; - } - } - lwc_string_unref(rhost); - } +/* exported interface documented in content/urldb.h */ +void urldb_reset_url_visit_data(nsurl *url) +{ + struct path_data *p; - end = cur + strlen(cur) - 2 /* Trailing CRLF */; + assert(url); - do { - struct cookie_internal_data *c; - char *dot; - size_t len; + p = urldb_find_url(url); + if (!p) + return; - c = urldb_parse_cookie(url, &cur); - if (!c) { - /* failed => stop parsing */ - goto error; - } + p->urld.last_visit = (time_t)0; + p->urld.visits = 0; +} - /* validate cookie */ - /* 4.2.2:i Cookie must have NAME and VALUE */ - if (!c->name || !c->value) { - urldb_free_cookie(c); - goto error; - } +/* exported interface documented in content/urldb.h */ +const struct url_data *urldb_get_url_data(nsurl *url) +{ + struct path_data *p; + struct url_internal_data *u; - /* 4.3.2:i Cookie path must be a prefix of URL path */ - len = strlen(c->path); - if (len > lwc_string_length(path) || - strncmp(c->path, lwc_string_data(path), - len) != 0) { - urldb_free_cookie(c); - goto error; - } + assert(url); - /* 4.3.2:ii Cookie domain must contain embedded dots */ - dot = strchr(c->domain + 1, '.'); - if (!dot || *(dot + 1) == '\0') { - /* no embedded dots */ - urldb_free_cookie(c); - goto error; - } + p = urldb_find_url(url); + if (!p) + return NULL; - /* Domain match fetch host with cookie domain */ - if (strcasecmp(lwc_string_data(host), c->domain) != 0) { - int hlen, dlen; - char *domain = c->domain; + u = &p->urld; - /* c->domain must be a domain cookie here because: - * c->domain is either: - * + specified in the header as a domain cookie - * (non-domain cookies in the header are ignored - * by urldb_parse_cookie / urldb_parse_avpair) - * + defaulted to the URL's host part - * (by urldb_parse_cookie if no valid domain was - * specified in the header) - * - * The latter will pass the strcasecmp above, which - * leaves the former (i.e. a domain cookie) - */ - assert(c->domain[0] == '.'); + return (const struct url_data *) u; +} - /* 4.3.2:iii */ - if (urldb__host_is_ip_address(lwc_string_data(host))) { - /* IP address, so no partial match */ - urldb_free_cookie(c); - goto error; - } - hlen = lwc_string_length(host); - dlen = strlen(c->domain); +/* exported interface documented in content/urldb.h */ +nsurl *urldb_get_url(nsurl *url) +{ + struct path_data *p; - if (hlen <= dlen && hlen != dlen - 1) { - /* Partial match not possible */ - urldb_free_cookie(c); - goto error; - } + assert(url); - if (hlen == dlen - 1) { - /* Relax matching to allow - * host a.com to match .a.com */ - domain++; - dlen--; - } + p = urldb_find_url(url); + if (!p) + return NULL; - if (strcasecmp(lwc_string_data(host) + (hlen - dlen), - domain)) { - urldb_free_cookie(c); - goto error; - } + return p->url; +} - /* 4.3.2:iv Ensure H contains no dots - * - * If you believe the spec, H should contain no - * dots in _any_ cookie. Unfortunately, however, - * reality differs in that many sites send domain - * cookies of the form .foo.com from hosts such - * as bar.bat.foo.com and then expect domain - * matching to work. Thus we have to do what they - * expect, regardless of any potential security - * implications. - * - * This is what code conforming to the spec would - * look like: - * - * for (int i = 0; i < (hlen - dlen); i++) { - * if (host[i] == '.') { - * urldb_free_cookie(c); - * goto error; - * } - * } - */ - } - /* Now insert into database */ - if (!urldb_insert_cookie(c, scheme, urlt)) - goto error; - } while (cur < end); +/* exported interface documented in content/urldb.h */ +void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth) +{ + struct path_data *p, *pi; + struct host_part *h; + struct prot_space_data *space, *space_alloc; + char *realm_alloc, *auth_alloc; + bool match; - lwc_string_unref(host); - lwc_string_unref(path); - lwc_string_unref(scheme); - nsurl_unref(urlt); + assert(url && realm && auth); - return true; + /* add url, in case it's missing */ + urldb_add_url(url); -error: - lwc_string_unref(host); - lwc_string_unref(path); - lwc_string_unref(scheme); - nsurl_unref(urlt); + p = urldb_find_url(url); - return false; -} + if (!p) + return; -/** - * Parse a cookie - * - * \param url URL being fetched - * \param cookie Pointer to cookie string (updated on exit) - * \return Pointer to cookie structure (on heap, caller frees) or NULL - */ -struct cookie_internal_data *urldb_parse_cookie(nsurl *url, - const char **cookie) -{ - struct cookie_internal_data *c; - const char *cur; - char name[1024], value[4096]; - char *n = name, *v = value; - bool in_value = false; - bool had_value_data = false; - bool value_verbatim = false; - bool quoted = false; - bool was_quoted = false; + /* Search for host_part */ + for (pi = p; pi->parent != NULL; pi = pi->parent) + ; + h = (struct host_part *)pi; - assert(url && cookie && *cookie); + /* Search if given URL belongs to a protection space we already know of. */ + for (space = h->prot_space; space; space = space->next) { + if (!strcmp(space->realm, realm) && + lwc_string_isequal(space->scheme, p->scheme, + &match) == lwc_error_ok && + match == true && + space->port == p->port) + break; + } - c = calloc(1, sizeof(struct cookie_internal_data)); - if (c == NULL) - return NULL; + if (space != NULL) { + /* Overrule existing auth. */ + free(space->auth); + space->auth = strdup(auth); + } else { + /* Create a new protection space. */ + space = space_alloc = malloc(sizeof(struct prot_space_data)); + realm_alloc = strdup(realm); + auth_alloc = strdup(auth); - c->expires = -1; + if (!space_alloc || !realm_alloc || !auth_alloc) { + free(space_alloc); + free(realm_alloc); + free(auth_alloc); + return; + } - name[0] = '\0'; - value[0] = '\0'; + space->scheme = lwc_string_ref(p->scheme); + space->port = p->port; + space->realm = realm_alloc; + space->auth = auth_alloc; + space->next = h->prot_space; + h->prot_space = space; + } - for (cur = *cookie; *cur; cur++) { - if (*cur == '\r' && *(cur + 1) == '\n') { - /* End of header */ - if (quoted) { - /* Unmatched quote encountered */ + p->prot_space = space; +} - /* Match Firefox 2.0.0.11 */ - value[0] = '\0'; -#if 0 - /* This is what IE6/7 & Safari 3 do */ - /* Opera 9.25 discards the entire cookie */ - - /* Shuffle value up by 1 */ - memmove(value + 1, value, - min(v - value, sizeof(value) - 2)); - v++; - /* And insert " character at the start */ - value[0] = '"'; - - /* Now, run forwards through the value - * looking for a semicolon. If one exists, - * terminate the value at this point. */ - for (char *s = value; s < v; s++) { - if (*s == ';') { - *s = '\0'; - v = s; - break; - } - } -#endif - } +/* exported interface documented in content/urldb.h */ +const char *urldb_get_auth_details(nsurl *url, const char *realm) +{ + struct path_data *p, *p_cur, *p_top; - break; - } else if (*cur == '\r') { - /* Spurious linefeed */ - continue; - } else if (*cur == '\n') { - /* Spurious newline */ - continue; - } + assert(url); - if (in_value && !had_value_data) { - if (*cur == ' ' || *cur == '\t') { - /* Strip leading whitespace from value */ - continue; - } else { - had_value_data = true; + /* add to the db, so our lookup will work */ + urldb_add_url(url); - /* Value is taken verbatim if first non-space - * character is not a " */ - if (*cur != '"') { - value_verbatim = true; - } - } + p = urldb_find_url(url); + if (!p) + return NULL; + + /* Check for any auth details attached to the path_data node or any of + * its parents. + */ + for (p_cur = p; p_cur != NULL; p_top = p_cur, p_cur = p_cur->parent) { + if (p_cur->prot_space) { + return p_cur->prot_space->auth; } + } - if (in_value && !value_verbatim && (*cur == '"')) { - /* Only non-verbatim values may be quoted */ - if (cur == *cookie || *(cur - 1) != '\\') { - /* Only unescaped quotes count */ - was_quoted = quoted; - quoted = !quoted; + /* Only when we have a realm (and canonical root of given URL), we can + * uniquely locate the protection space. + */ + if (realm != NULL) { + const struct host_part *h = (const struct host_part *)p_top; + const struct prot_space_data *space; + bool match; - continue; + /* Search for a possible matching protection space. */ + for (space = h->prot_space; space != NULL; + space = space->next) { + if (!strcmp(space->realm, realm) && + lwc_string_isequal(space->scheme, + p->scheme, &match) == + lwc_error_ok && + match == true && + space->port == p->port) { + p->prot_space = space; + return p->prot_space->auth; } } + } - if (!quoted && !in_value && *cur == '=') { - /* First equals => attr-value separator */ - in_value = true; - continue; - } + return NULL; +} - if (!quoted && (was_quoted || *cur == ';')) { - /* Semicolon or after quoted value - * => end of current avpair */ - /* NUL-terminate tokens */ - *n = '\0'; - *v = '\0'; +/* exported interface documented in content/urldb.h */ +void urldb_set_cert_permissions(nsurl *url, bool permit) +{ + struct path_data *p; + struct host_part *h; - if (!urldb_parse_avpair(c, name, value, was_quoted)) { - /* Memory exhausted */ - urldb_free_cookie(c); - return NULL; - } + assert(url); - /* And reset to start */ - n = name; - v = value; - in_value = false; - had_value_data = false; - value_verbatim = false; - was_quoted = false; + /* add url, in case it's missing */ + urldb_add_url(url); - /* Now, if the current input is anything other than a - * semicolon, we must be sure to reprocess it */ - if (*cur != ';') { - cur--; - } + p = urldb_find_url(url); + if (!p) + return; - continue; - } + for (; p && p->parent; p = p->parent) + /* do nothing */; + assert(p); - /* And now handle commas. These are a pain as they may mean - * any of the following: - * - * + End of cookie - * + Day separator in Expires avpair - * + (Invalid) comma in unquoted value - * - * Therefore, in order to handle all 3 cases (2 and 3 are - * identical, the difference being that 2 is in the spec and - * 3 isn't), we need to determine where the comma actually - * lies. We use the following heuristic: - * - * Given a comma at the current input position, find the - * immediately following semicolon (or end of input if none - * found). Then, consider the input characters between - * these two positions. If any of these characters is an - * '=', we must assume that the comma signified the end of - * the current cookie. - * - * This holds as the first avpair of any cookie must be - * NAME=VALUE, so the '=' is guaranteed to appear in the - * case where the comma marks the end of a cookie. - * - * This will fail, however, in the case where '=' appears in - * the value of the current avpair after the comma or the - * subsequent cookie does not start with NAME=VALUE. Neither - * of these is particularly likely and if they do occur, the - * website is more broken than we can be bothered to handle. - */ - if (!quoted && *cur == ',') { - /* Find semi-colon, if any */ - const char *p; - const char *semi = strchr(cur + 1, ';'); - if (!semi) - semi = cur + strlen(cur) - 2 /* CRLF */; + h = (struct host_part *)p; - /* Look for equals sign between comma and semi */ - for (p = cur + 1; p < semi; p++) - if (*p == '=') - break; + h->permit_invalid_certs = permit; +} - if (p == semi) { - /* none found => comma internal to value */ - /* do nothing */ - } else { - /* found one => comma marks end of cookie */ - cur++; - break; - } - } - /* Accumulate into buffers, always leaving space for a NUL */ - /** \todo is silently truncating overlong names/values wise? */ - if (!in_value) { - if (n < name + (sizeof(name) - 1)) - *n++ = *cur; - } else { - if (v < value + (sizeof(value) - 1)) - *v++ = *cur; - } - } +/* exported interface documented in content/urldb.h */ +bool urldb_get_cert_permissions(nsurl *url) +{ + struct path_data *p; + const struct host_part *h; - /* Parse final avpair */ - *n = '\0'; - *v = '\0'; + assert(url); - if (!urldb_parse_avpair(c, name, value, was_quoted)) { - /* Memory exhausted */ - urldb_free_cookie(c); - return NULL; - } + p = urldb_find_url(url); + if (!p) + return false; - /* Now fix-up default values */ - if (c->domain == NULL) { - lwc_string *host = nsurl_get_component(url, NSURL_HOST); - if (host == NULL) { - urldb_free_cookie(c); - return NULL; - } - c->domain = strdup(lwc_string_data(host)); - lwc_string_unref(host); - } + for (; p && p->parent; p = p->parent) + /* do nothing */; + assert(p); - if (c->path == NULL) { - const char *path_data; - char *path, *slash; - lwc_string *path_lwc; + h = (const struct host_part *)p; - path_lwc = nsurl_get_component(url, NSURL_PATH); - if (path_lwc == NULL) { - urldb_free_cookie(c); - return NULL; - } - path_data = lwc_string_data(path_lwc); + return h->permit_invalid_certs; +} - /* Strip leafname and trailing slash (4.3.1) */ - slash = strrchr(path_data, '/'); - if (slash != NULL) { - /* Special case: retain first slash in path */ - if (slash == path_data) - slash++; - slash = strndup(path_data, slash - path_data); - if (slash == NULL) { - lwc_string_unref(path_lwc); - urldb_free_cookie(c); - return NULL; - } +/* exported interface documented in content/urldb.h */ +void urldb_set_thumbnail(nsurl *url, struct bitmap *bitmap) +{ + struct path_data *p; - path = slash; - lwc_string_unref(path_lwc); - } else { - path = strdup(lwc_string_data(path_lwc)); - lwc_string_unref(path_lwc); - if (path == NULL) { - urldb_free_cookie(c); - return NULL; - } - } + assert(url); - c->path = path; - } + p = urldb_find_url(url); + if (!p) + return; - /* Write back current position */ - *cookie = cur; + if (p->thumb && p->thumb != bitmap) + bitmap_destroy(p->thumb); - return c; + p->thumb = bitmap; } -/** - * Parse a cookie avpair - * - * \param c Cookie struct to populate - * \param n Name component - * \param v Value component - * \param was_quoted Whether ::v was quoted in the input - * \return true on success, false on memory exhaustion - */ -bool urldb_parse_avpair(struct cookie_internal_data *c, char *n, char *v, - bool was_quoted) + +/* exported interface documented in content/urldb.h */ +struct bitmap *urldb_get_thumbnail(nsurl *url) { - int vlen; + struct path_data *p; - assert(c && n && v); + assert(url); - /* Strip whitespace from start of name */ - for (; *n; n++) { - if (*n != ' ' && *n != '\t') - break; - } + p = urldb_find_url(url); + if (!p) + return NULL; - /* Strip whitespace from end of name */ - for (vlen = strlen(n); vlen; vlen--) { - if (n[vlen] == ' ' || n[vlen] == '\t') - n[vlen] = '\0'; - else - break; - } + return p->thumb; +} - /* Strip whitespace from start of value */ - for (; *v; v++) { - if (*v != ' ' && *v != '\t') - break; - } - /* Strip whitespace from end of value */ - for (vlen = strlen(v); vlen; vlen--) { - if (v[vlen] == ' ' || v[vlen] == '\t') - v[vlen] = '\0'; - else - break; - } +/* exported interface documented in content/urldb.h */ +void urldb_iterate_partial(const char *prefix, + bool (*callback)(nsurl *url, + const struct url_data *data)) +{ + char host[256]; + char buf[260]; /* max domain + "www." */ + const char *slash, *scheme_sep; + struct search_node *tree; + const struct host_part *h; - if (!c->comment && strcasecmp(n, "Comment") == 0) { - c->comment = strdup(v); - if (!c->comment) - return false; - } else if (!c->domain && strcasecmp(n, "Domain") == 0) { - if (v[0] == '.') { - /* Domain must start with a dot */ - c->domain_from_set = true; - c->domain = strdup(v); - if (!c->domain) - return false; - } - } else if (strcasecmp(n, "Max-Age") == 0) { - int temp = atoi(v); - if (temp == 0) - /* Special case - 0 means delete */ - c->expires = 0; - else - c->expires = time(NULL) + temp; - } else if (!c->path && strcasecmp(n, "Path") == 0) { - c->path_from_set = true; - c->path = strdup(v); - if (!c->path) - return false; - } else if (strcasecmp(n, "Version") == 0) { - c->version = atoi(v); - } else if (strcasecmp(n, "Expires") == 0) { - char *datenoday; - time_t expires; + assert(prefix && callback); - /* Strip dayname from date (these are hugely - * variable and liable to break the parser. - * They also serve no useful purpose) */ - for (datenoday = v; *datenoday && !isdigit(*datenoday); - datenoday++) - ; /* do nothing */ + /* strip scheme */ + scheme_sep = strstr(prefix, "://"); + if (scheme_sep) + prefix = scheme_sep + 3; - expires = curl_getdate(datenoday, NULL); - if (expires == -1) { - /* assume we have an unrepresentable - * date => force it to the maximum - * possible value of a 32bit time_t - * (this may break in 2038. We'll - * deal with that once we come to - * it) */ - expires = (time_t)0x7fffffff; - } - c->expires = expires; - } else if (strcasecmp(n, "Secure") == 0) { - c->secure = true; - } else if (strcasecmp(n, "HttpOnly") == 0) { - c->http_only = true; - } else if (!c->name) { - c->name = strdup(n); - c->value = strdup(v); - c->value_was_quoted = was_quoted; - if (!c->name || !c->value) - return false; - } + slash = strchr(prefix, '/'); + tree = urldb_get_search_tree(prefix); - return true; -} + if (slash) { + /* if there's a slash in the input, then we can + * assume that we're looking for a path */ + snprintf(host, sizeof host, "%.*s", + (int) (slash - prefix), prefix); -/** - * Insert a cookie into the database - * - * \param c The cookie to insert - * \param scheme URL scheme associated with cookie path - * \param url URL (sans fragment) associated with cookie - * \return true on success, false on memory exhaustion (c will be freed) - */ -bool urldb_insert_cookie(struct cookie_internal_data *c, lwc_string *scheme, - nsurl *url) -{ - struct cookie_internal_data *d; - const struct host_part *h; - struct path_data *p; - time_t now = time(NULL); + h = urldb_search_find(tree, host); + if (!h) { + int len = slash - prefix; - assert(c); + if (len <= 3 || strncasecmp(host, "www.", 4) != 0) { + snprintf(buf, sizeof buf, "www.%s", host); + h = urldb_search_find( + search_trees[ST_DN + 'w' - 'a'], + buf); + if (!h) + return; + } else + return; + } - if (c->domain[0] == '.') { - h = urldb_search_find( - urldb_get_search_tree(&(c->domain[1])), - c->domain + 1); - if (!h) { - h = urldb_add_host(c->domain + 1); - if (!h) { - urldb_free_cookie(c); - return false; - } + if (h->paths.children) { + /* Have paths, iterate them */ + urldb_iterate_partial_path(&h->paths, slash + 1, + callback); } - p = (struct path_data *) &h->paths; } else { - /* Need to have a URL and scheme, if it's not a domain cookie */ - assert(url != NULL); - assert(scheme != NULL); - - h = urldb_search_find( - urldb_get_search_tree(c->domain), - c->domain); + int len = strlen(prefix); - if (!h) { - h = urldb_add_host(c->domain); - if (!h) { - urldb_free_cookie(c); - return false; - } - } + /* looking for hosts */ + if (!urldb_iterate_partial_host(tree, prefix, callback)) + return; - /* find path */ - p = urldb_add_path(scheme, 0, h, - strdup(c->path), NULL, url); - if (!p) { - urldb_free_cookie(c); - return false; + if (len <= 3 || strncasecmp(prefix, "www.", 4) != 0) { + /* now look for www.prefix */ + snprintf(buf, sizeof buf, "www.%s", prefix); + if(!urldb_iterate_partial_host( + search_trees[ST_DN + 'w' - 'a'], + buf, callback)) + return; } } +} - /* add cookie */ - for (d = p->cookies; d; d = d->next) { - if (!strcmp(d->domain, c->domain) && - !strcmp(d->path, c->path) && - !strcmp(d->name, c->name)) - break; - } - if (d) { - if (c->expires != -1 && c->expires < now) { - /* remove cookie */ - if (d->next) - d->next->prev = d->prev; - else - p->cookies_end = d->prev; - if (d->prev) - d->prev->next = d->next; - else - p->cookies = d->next; +/* exported interface documented in content/urldb.h */ +void urldb_iterate_entries(bool (*callback)(nsurl *url, + const struct url_data *data)) +{ + int i; - cookie_manager_remove((struct cookie_data *)d); + assert(callback); - urldb_free_cookie(d); - urldb_free_cookie(c); - } else { - /* replace d with c */ - c->prev = d->prev; - c->next = d->next; - if (c->next) - c->next->prev = c; - else - p->cookies_end = c; - if (c->prev) - c->prev->next = c; - else - p->cookies = c; + for (i = 0; i < NUM_SEARCH_TREES; i++) { + if (!urldb_iterate_entries_host(search_trees[i], + callback, NULL)) + break; + } +} - cookie_manager_remove((struct cookie_data *)d); - urldb_free_cookie(d); - cookie_manager_add((struct cookie_data *)c); - } - } else { - c->prev = p->cookies_end; - c->next = NULL; - if (p->cookies_end) - p->cookies_end->next = c; - else - p->cookies = c; - p->cookies_end = c; +/* exported interface documented in content/urldb.h */ +void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *data)) +{ + int i; - cookie_manager_add((struct cookie_data *)c); - } + assert(callback); - return true; + for (i = 0; i < NUM_SEARCH_TREES; i++) { + if (!urldb_iterate_entries_host(search_trees[i], + NULL, callback)) + break; + } } -/** - * Free a cookie - * - * \param c The cookie to free - */ -void urldb_free_cookie(struct cookie_internal_data *c) + +/* exported interface documented in content/urldb.h */ +bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer) { - assert(c); + const char *cur = header, *end; + lwc_string *path, *host, *scheme; + nsurl *urlt; + bool match; - free(c->comment); - free(c->domain); - free(c->path); - free(c->name); - free(c->value); - free(c); -} + assert(url && header); -/** - * Concatenate a cookie into the provided buffer - * - * \param c Cookie to concatenate - * \param version The version of the cookie string to output - * \param used Pointer to amount of buffer used (updated) - * \param alloc Pointer to allocated size of buffer (updated) - * \param buf Pointer to Pointer to buffer (updated) - * \return true on success, false on memory exhaustion - */ -bool urldb_concat_cookie(struct cookie_internal_data *c, int version, - int *used, int *alloc, char **buf) + /* Get defragmented URL, as 'urlt' */ + if (nsurl_defragment(url, &urlt) != NSERROR_OK) + return NULL; + + scheme = nsurl_get_component(url, NSURL_SCHEME); + if (scheme == NULL) { + nsurl_unref(urlt); + return false; + } + + path = nsurl_get_component(url, NSURL_PATH); + if (path == NULL) { + lwc_string_unref(scheme); + nsurl_unref(urlt); + return false; + } + + host = nsurl_get_component(url, NSURL_HOST); + if (host == NULL) { + lwc_string_unref(path); + lwc_string_unref(scheme); + nsurl_unref(urlt); + return false; + } + + if (referer) { + lwc_string *rhost; + + /* Ensure that url's host name domain matches + * referer's (4.3.5) */ + rhost = nsurl_get_component(referer, NSURL_HOST); + if (rhost == NULL) { + goto error; + } + + /* Domain match host names */ + if (lwc_string_isequal(host, rhost, &match) == lwc_error_ok && + match == false) { + const char *hptr; + const char *rptr; + const char *dot; + const char *host_data = lwc_string_data(host); + const char *rhost_data = lwc_string_data(rhost); + + /* Ensure neither host nor rhost are IP addresses */ + if (urldb__host_is_ip_address(host_data) || + urldb__host_is_ip_address(rhost_data)) { + /* IP address, so no partial match */ + lwc_string_unref(rhost); + goto error; + } + + /* Not exact match, so try the following: + * + * 1) Find the longest common suffix of host and rhost + * (may be all of host/rhost) + * 2) Discard characters from the start of the suffix + * until the suffix starts with a dot + * (prevents foobar.com matching bar.com) + * 3) Ensure the suffix is non-empty and contains + * embedded dots (to avoid permitting .com as a + * suffix) + * + * Note that the above in no way resembles the + * domain matching algorithm found in RFC2109. + * It does, however, model the real world rather + * more accurately. + */ + + /** \todo In future, we should consult a TLD service + * instead of just looking for embedded dots. + */ + + hptr = host_data + lwc_string_length(host) - 1; + rptr = rhost_data + lwc_string_length(rhost) - 1; + + /* 1 */ + while (hptr >= host_data && rptr >= rhost_data) { + if (*hptr != *rptr) + break; + hptr--; + rptr--; + } + /* Ensure we end up pointing at the start of the + * common suffix. The above loop will exit pointing + * to the byte before the start of the suffix. */ + hptr++; + + /* 2 */ + while (*hptr != '\0' && *hptr != '.') + hptr++; + + /* 3 */ + if (*hptr == '\0' || + (dot = strchr(hptr + 1, '.')) == NULL || + *(dot + 1) == '\0') { + lwc_string_unref(rhost); + goto error; + } + } + + lwc_string_unref(rhost); + } + + end = cur + strlen(cur) - 2 /* Trailing CRLF */; + + do { + struct cookie_internal_data *c; + char *dot; + size_t len; + + c = urldb_parse_cookie(url, &cur); + if (!c) { + /* failed => stop parsing */ + goto error; + } + + /* validate cookie */ + + /* 4.2.2:i Cookie must have NAME and VALUE */ + if (!c->name || !c->value) { + urldb_free_cookie(c); + goto error; + } + + /* 4.3.2:i Cookie path must be a prefix of URL path */ + len = strlen(c->path); + if (len > lwc_string_length(path) || + strncmp(c->path, lwc_string_data(path), + len) != 0) { + urldb_free_cookie(c); + goto error; + } + + /* 4.3.2:ii Cookie domain must contain embedded dots */ + dot = strchr(c->domain + 1, '.'); + if (!dot || *(dot + 1) == '\0') { + /* no embedded dots */ + urldb_free_cookie(c); + goto error; + } + + /* Domain match fetch host with cookie domain */ + if (strcasecmp(lwc_string_data(host), c->domain) != 0) { + int hlen, dlen; + char *domain = c->domain; + + /* c->domain must be a domain cookie here because: + * c->domain is either: + * + specified in the header as a domain cookie + * (non-domain cookies in the header are ignored + * by urldb_parse_cookie / urldb_parse_avpair) + * + defaulted to the URL's host part + * (by urldb_parse_cookie if no valid domain was + * specified in the header) + * + * The latter will pass the strcasecmp above, which + * leaves the former (i.e. a domain cookie) + */ + assert(c->domain[0] == '.'); + + /* 4.3.2:iii */ + if (urldb__host_is_ip_address(lwc_string_data(host))) { + /* IP address, so no partial match */ + urldb_free_cookie(c); + goto error; + } + + hlen = lwc_string_length(host); + dlen = strlen(c->domain); + + if (hlen <= dlen && hlen != dlen - 1) { + /* Partial match not possible */ + urldb_free_cookie(c); + goto error; + } + + if (hlen == dlen - 1) { + /* Relax matching to allow + * host a.com to match .a.com */ + domain++; + dlen--; + } + + if (strcasecmp(lwc_string_data(host) + (hlen - dlen), + domain)) { + urldb_free_cookie(c); + goto error; + } + + /* 4.3.2:iv Ensure H contains no dots + * + * If you believe the spec, H should contain no + * dots in _any_ cookie. Unfortunately, however, + * reality differs in that many sites send domain + * cookies of the form .foo.com from hosts such + * as bar.bat.foo.com and then expect domain + * matching to work. Thus we have to do what they + * expect, regardless of any potential security + * implications. + * + * This is what code conforming to the spec would + * look like: + * + * for (int i = 0; i < (hlen - dlen); i++) { + * if (host[i] == '.') { + * urldb_free_cookie(c); + * goto error; + * } + * } + */ + } + + /* Now insert into database */ + if (!urldb_insert_cookie(c, scheme, urlt)) + goto error; + } while (cur < end); + + lwc_string_unref(host); + lwc_string_unref(path); + lwc_string_unref(scheme); + nsurl_unref(urlt); + + return true; + +error: + lwc_string_unref(host); + lwc_string_unref(path); + lwc_string_unref(scheme); + nsurl_unref(urlt); + + return false; +} + + +/* exported interface documented in content/urldb.h */ +char *urldb_get_cookie(nsurl *url, bool include_http_only) { - /* Combined (A)BNF for the Cookie: request header: - * - * CHAR = - * CTL = - * CR = - * LF = - * SP = - * HT = - * <"> = - * - * CRLF = CR LF - * - * LWS = [CRLF] 1*( SP | HT ) - * - * TEXT = - * - * token = 1* - * separators = "(" | ")" | "<" | ">" | "@" - * | "," | ";" | ":" | "\" | <"> - * | "/" | "[" | "]" | "?" | "=" - * | "{" | "}" | SP | HT - * - * quoted-string = ( <"> *(qdtext | quoted-pair ) <"> ) - * qdtext = > - * quoted-pair = "\" CHAR - * - * attr = token - * value = word - * word = token | quoted-string - * - * cookie = "Cookie:" cookie-version - * 1*((";" | ",") cookie-value) - * cookie-value = NAME "=" VALUE [";" path] [";" domain] - * cookie-version = "$Version" "=" value - * NAME = attr - * VALUE = value - * path = "$Path" "=" value - * domain = "$Domain" "=" value - * - * A note on quoted-string handling: - * The cookie data stored in the db is verbatim (i.e. sans enclosing - * <">, if any, and with all quoted-pairs intact) thus all that we - * need to do here is ensure that value strings which were quoted - * in Set-Cookie or which include any of the separators are quoted - * before use. - * - * A note on cookie-value separation: - * We use semicolons for all separators, including between - * cookie-values. This simplifies things and is backwards compatible. - */ - const char * const separators = "()<>@,;:\\\"/[]?={} \t"; + const struct path_data *p, *q; + const struct host_part *h; + lwc_string *path_lwc; + struct cookie_internal_data *c; + int count = 0, version = COOKIE_RFC2965; + struct cookie_internal_data **matched_cookies; + int matched_cookies_size = 20; + int ret_alloc = 4096, ret_used = 1; + const char *path; + char *ret; + lwc_string *scheme; + time_t now; + int i; + bool match; + + assert(url != NULL); + + /* The URL must exist in the db in order to find relevant cookies, since + * we search up the tree from the URL node, and cookies from further + * up also apply. */ + urldb_add_url(url); + + p = urldb_find_url(url); + if (!p) + return NULL; + + scheme = p->scheme; + + matched_cookies = malloc(matched_cookies_size * + sizeof(struct cookie_internal_data *)); + if (!matched_cookies) + return NULL; + +#define GROW_MATCHED_COOKIES \ + do { \ + if (count == matched_cookies_size) { \ + struct cookie_internal_data **temp; \ + temp = realloc(matched_cookies, \ + (matched_cookies_size + 20) * \ + sizeof(struct cookie_internal_data *)); \ + \ + if (temp == NULL) { \ + free(ret); \ + free(matched_cookies); \ + return NULL; \ + } \ + \ + matched_cookies = temp; \ + matched_cookies_size += 20; \ + } \ + } while(0) + + ret = malloc(ret_alloc); + if (!ret) { + free(matched_cookies); + return NULL; + } + + ret[0] = '\0'; + + path_lwc = nsurl_get_component(url, NSURL_PATH); + if (path_lwc == NULL) { + free(ret); + free(matched_cookies); + return NULL; + } + path = lwc_string_data(path_lwc); + lwc_string_unref(path_lwc); + + now = time(NULL); + + if (*(p->segment) != '\0') { + /* Match exact path, unless directory, when prefix matching + * will handle this case for us. */ + for (q = p->parent->children; q; q = q->next) { + if (strcmp(q->segment, p->segment)) + continue; + + /* Consider all cookies associated with + * this exact path */ + for (c = q->cookies; c; c = c->next) { + if (c->expires != -1 && c->expires < now) + /* cookie has expired => ignore */ + continue; + + if (c->secure && lwc_string_isequal( + q->scheme, + corestring_lwc_https, + &match) && + match == false) + /* secure cookie for insecure host. + * ignore */ + continue; + + if (c->http_only && !include_http_only) + /* Ignore HttpOnly */ + continue; + + matched_cookies[count++] = c; + + GROW_MATCHED_COOKIES; + + if (c->version < (unsigned int)version) + version = c->version; + + c->last_used = now; + + cookie_manager_add((struct cookie_data *)c); + } + } + } + + /* Now consider cookies whose paths prefix-match ours */ + for (p = p->parent; p; p = p->parent) { + /* Find directory's path entry(ies) */ + /* There are potentially multiple due to differing schemes */ + for (q = p->children; q; q = q->next) { + if (*(q->segment) != '\0') + continue; + + for (c = q->cookies; c; c = c->next) { + if (c->expires != -1 && c->expires < now) + /* cookie has expired => ignore */ + continue; + + if (c->secure && lwc_string_isequal( + q->scheme, + corestring_lwc_https, + &match) && + match == false) + /* Secure cookie for insecure server + * => ignore */ + continue; + + matched_cookies[count++] = c; + + GROW_MATCHED_COOKIES; + + if (c->version < (unsigned int) version) + version = c->version; + + c->last_used = now; + + cookie_manager_add((struct cookie_data *)c); + } + } + + if (!p->parent) { + /* No parent, so bail here. This can't go in + * the loop exit condition as we also want to + * process the top-level node. + * + * If p->parent is NULL then p->cookies are + * the domain cookies and thus we don't even + * try matching against them. + */ + break; + } + + /* Consider p itself - may be the result of Path=/foo */ + for (c = p->cookies; c; c = c->next) { + if (c->expires != -1 && c->expires < now) + /* cookie has expired => ignore */ + continue; + + /* Ensure cookie path is a prefix of the resource */ + if (strncmp(c->path, path, strlen(c->path)) != 0) + /* paths don't match => ignore */ + continue; + + if (c->secure && lwc_string_isequal(p->scheme, + corestring_lwc_https, + &match) && + match == false) + /* Secure cookie for insecure server + * => ignore */ + continue; + + matched_cookies[count++] = c; + + GROW_MATCHED_COOKIES; + + if (c->version < (unsigned int) version) + version = c->version; + + c->last_used = now; + + cookie_manager_add((struct cookie_data *)c); + } + + } + + /* Finally consider domain cookies for hosts which domain match ours */ + for (h = (const struct host_part *)p; h && h != &db_root; + h = h->parent) { + for (c = h->paths.cookies; c; c = c->next) { + if (c->expires != -1 && c->expires < now) + /* cookie has expired => ignore */ + continue; + + /* Ensure cookie path is a prefix of the resource */ + if (strncmp(c->path, path, strlen(c->path)) != 0) + /* paths don't match => ignore */ + continue; + + if (c->secure && lwc_string_isequal(scheme, + corestring_lwc_https, + &match) && + match == false) + /* secure cookie for insecure host. ignore */ + continue; - int max_len; + matched_cookies[count++] = c; - assert(c && used && alloc && buf && *buf); + GROW_MATCHED_COOKIES; - /* "; " cookie-value - * We allow for the possibility that values are quoted - */ - max_len = 2 + strlen(c->name) + 1 + strlen(c->value) + 2 + - (c->path_from_set ? - 8 + strlen(c->path) + 2 : 0) + - (c->domain_from_set ? - 10 + strlen(c->domain) + 2 : 0); + if (c->version < (unsigned int)version) + version = c->version; - if (*used + max_len >= *alloc) { - char *temp = realloc(*buf, *alloc + 4096); - if (!temp) { - return false; + c->last_used = now; + + cookie_manager_add((struct cookie_data *)c); } - *buf = temp; - *alloc += 4096; } - if (version == COOKIE_NETSCAPE) { - /* Original Netscape cookie */ - sprintf(*buf + *used - 1, "; %s=", c->name); - *used += 2 + strlen(c->name) + 1; + if (count == 0) { + /* No cookies found */ + free(ret); + free(matched_cookies); + return NULL; + } - /* The Netscape spec doesn't mention quoting of cookie values. - * RFC 2109 $10.1.3 indicates that values must not be quoted. - * - * However, other browsers preserve quoting, so we should, too - */ - if (c->value_was_quoted) { - sprintf(*buf + *used - 1, "\"%s\"", c->value); - *used += 1 + strlen(c->value) + 1; - } else { - /** \todo should we %XX-encode [;HT,SP] ? */ - /** \todo Should we strip escaping backslashes? */ - sprintf(*buf + *used - 1, "%s", c->value); - *used += strlen(c->value); + /* and build output string */ + if (version > COOKIE_NETSCAPE) { + sprintf(ret, "$Version=%d", version); + ret_used = strlen(ret) + 1; + } + + for (i = 0; i < count; i++) { + if (!urldb_concat_cookie(matched_cookies[i], version, + &ret_used, &ret_alloc, &ret)) { + free(ret); + free(matched_cookies); + return NULL; } + } - /* We don't send path/domain information -- that's what the - * Netscape spec suggests we should do, anyway. */ - } else { - /* RFC2109 or RFC2965 cookie */ - sprintf(*buf + *used - 1, "; %s=", c->name); - *used += 2 + strlen(c->name) + 1; + if (version == COOKIE_NETSCAPE) { + /* Old-style cookies => no version & skip "; " */ + memmove(ret, ret + 2, ret_used - 2); + ret_used -= 2; + } - /* Value needs quoting if it contains any separator or if - * it needs preserving from the Set-Cookie header */ - if (c->value_was_quoted || - strpbrk(c->value, separators) != NULL) { - sprintf(*buf + *used - 1, "\"%s\"", c->value); - *used += 1 + strlen(c->value) + 1; - } else { - sprintf(*buf + *used - 1, "%s", c->value); - *used += strlen(c->value); + /* Now, shrink the output buffer to the required size */ + { + char *temp = realloc(ret, ret_used); + if (!temp) { + free(ret); + free(matched_cookies); + return NULL; } - if (c->path_from_set) { - /* Path, quoted if necessary */ - sprintf(*buf + *used - 1, "; $Path="); - *used += 8; + ret = temp; + } - if (strpbrk(c->path, separators) != NULL) { - sprintf(*buf + *used - 1, "\"%s\"", c->path); - *used += 1 + strlen(c->path) + 1; - } else { - sprintf(*buf + *used - 1, "%s", c->path); - *used += strlen(c->path); - } - } + free(matched_cookies); - if (c->domain_from_set) { - /* Domain, quoted if necessary */ - sprintf(*buf + *used - 1, "; $Domain="); - *used += 10; + return ret; - if (strpbrk(c->domain, separators) != NULL) { - sprintf(*buf + *used - 1, "\"%s\"", c->domain); - *used += 1 + strlen(c->domain) + 1; - } else { - sprintf(*buf + *used - 1, "%s", c->domain); - *used += strlen(c->domain); - } - } - } +#undef GROW_MATCHED_COOKIES +} - return true; + +/* exported interface documented in content/urldb.h */ +void urldb_delete_cookie(const char *domain, const char *path, + const char *name) +{ + urldb_delete_cookie_hosts(domain, path, name, &db_root); } -/** - * Load a cookie file into the database - * - * \param filename File to load - */ + +/* exported interface documented in content/urldb.h */ void urldb_load_cookies(const char *filename) { FILE *fp; @@ -3770,7 +3766,7 @@ void urldb_load_cookies(const char *filename) if (strncasecmp(s, "Version:", 8) == 0) { FIND_T; SKIP_T; loaded_cookie_file_version = atoi(p); - if (loaded_cookie_file_version < + if (loaded_cookie_file_version < MIN_COOKIE_FILE_VERSION) { LOG(("Unsupported Cookie file version")); break; @@ -3882,84 +3878,12 @@ void urldb_load_cookies(const char *filename) fclose(fp); } -/** - * Delete a cookie - * - * \param domain The cookie's domain - * \param path The cookie's path - * \param name The cookie's name - */ -void urldb_delete_cookie(const char *domain, const char *path, - const char *name) -{ - urldb_delete_cookie_hosts(domain, path, name, &db_root); -} - -void urldb_delete_cookie_hosts(const char *domain, const char *path, - const char *name, struct host_part *parent) -{ - struct host_part *h; - assert(parent); - - urldb_delete_cookie_paths(domain, path, name, &parent->paths); - - for (h = parent->children; h; h = h->next) - urldb_delete_cookie_hosts(domain, path, name, h); -} - -void urldb_delete_cookie_paths(const char *domain, const char *path, - const char *name, struct path_data *parent) -{ - struct cookie_internal_data *c; - struct path_data *p = parent; - - assert(parent); - - do { - for (c = p->cookies; c; c = c->next) { - if (strcmp(c->domain, domain) == 0 && - strcmp(c->path, path) == 0 && - strcmp(c->name, name) == 0) { - if (c->prev) - c->prev->next = c->next; - else - p->cookies = c->next; - - if (c->next) - c->next->prev = c->prev; - else - p->cookies_end = c->prev; - - urldb_free_cookie(c); - - return; - } - } - - if (p->children) { - p = p->children; - } else { - while (p != parent) { - if (p->next != NULL) { - p = p->next; - break; - } - - p = p->parent; - } - } - } while(p != parent); -} -/** - * Save persistent cookies to file - * - * \param filename Path to save to - */ +/* exported interface documented in content/urldb.h */ void urldb_save_cookies(const char *filename) { FILE *fp; - int cookie_file_version = max(loaded_cookie_file_version, + int cookie_file_version = max(loaded_cookie_file_version, COOKIE_FILE_VERSION); assert(filename); @@ -3985,256 +3909,176 @@ void urldb_save_cookies(const char *filename) urldb_save_cookie_hosts(fp, &db_root); - fclose(fp); -} - -/** - * Save a host subtree's cookies - * - * \param fp File pointer to write to - * \param parent Parent host - */ -void urldb_save_cookie_hosts(FILE *fp, struct host_part *parent) -{ - struct host_part *h; - assert(fp && parent); - - urldb_save_cookie_paths(fp, &parent->paths); - - for (h = parent->children; h; h = h->next) - urldb_save_cookie_hosts(fp, h); -} - -/** - * Save a path subtree's cookies - * - * \param fp File pointer to write to - * \param parent Parent path - */ -void urldb_save_cookie_paths(FILE *fp, struct path_data *parent) -{ - struct path_data *p = parent; - time_t now = time(NULL); - - assert(fp && parent); - - do { - if (p->cookies != NULL) { - struct cookie_internal_data *c; - - for (c = p->cookies; c != NULL; c = c->next) { - if (c->expires == -1 || c->expires < now) - /* Skip expired & session cookies */ - continue; - - fprintf(fp, - "%d\t%s\t%d\t%s\t%d\t%d\t%d\t%d\t%d\t%d\t" - "%s\t%s\t%d\t%s\t%s\t%s\n", - c->version, c->domain, - c->domain_from_set, c->path, - c->path_from_set, c->secure, - c->http_only, - (int)c->expires, (int)c->last_used, - c->no_destroy, c->name, c->value, - c->value_was_quoted, - p->scheme ? lwc_string_data(p->scheme) : - "unused", - p->url ? nsurl_access(p->url) : - "unused", - c->comment ? c->comment : ""); - } - } - - if (p->children != NULL) { - p = p->children; - } else { - while (p != parent) { - if (p->next != NULL) { - p = p->next; - break; - } - - p = p->parent; - } - } - } while (p != parent); + fclose(fp); } -/** - * Destroy urldb - */ -void urldb_destroy(void) +/* exported interface documented in content/urldb.h */ +void urldb_dump(void) { - struct host_part *a, *b; int i; - /* Clean up search trees */ - for (i = 0; i < NUM_SEARCH_TREES; i++) { - if (search_trees[i] != &empty) - urldb_destroy_search_tree(search_trees[i]); - } + urldb_dump_hosts(&db_root); - /* And database */ - for (a = db_root.children; a; a = b) { - b = a->next; - urldb_destroy_host_tree(a); - } - - /* And the bloom filter */ - if (url_bloom != NULL) - bloom_destroy(url_bloom); + for (i = 0; i != NUM_SEARCH_TREES; i++) + urldb_dump_search(search_trees[i], 0); } -/** - * Destroy a host tree - * - * \param root Root node of tree to destroy - */ -void urldb_destroy_host_tree(struct host_part *root) + +/* exported interface documented in content/urldb.h */ +struct host_part *urldb_add_host(const char *host) { - struct host_part *a, *b; - struct path_data *p, *q; - struct prot_space_data *s, *t; + struct host_part *d = (struct host_part *) &db_root, *e; + struct search_node *s; + char buf[256]; /* 256 bytes is sufficient - domain names are + * limited to 255 chars. */ + char *part; - /* Destroy children */ - for (a = root->children; a; a = b) { - b = a->next; - urldb_destroy_host_tree(a); - } + assert(host); - /* Now clean up paths */ - for (p = root->paths.children; p; p = q) { - q = p->next; - urldb_destroy_path_tree(p); - } + if (urldb__host_is_ip_address(host)) { + /* Host is an IP, so simply add as TLD */ - /* Root path */ - urldb_destroy_path_node_content(&root->paths); + /* Check for existing entry */ + for (e = d->children; e; e = e->next) + if (strcasecmp(host, e->part) == 0) + /* found => return it */ + return e; - /* Proctection space data */ - for (s = root->prot_space; s; s = t) { - t = s->next; - urldb_destroy_prot_space(s); - } + d = urldb_add_host_node(host, d); - /* And ourselves */ - free(root->part); - free(root); -} + s = urldb_search_insert(search_trees[ST_IP], d); + if (!s) { + /* failed */ + d = NULL; + } else { + search_trees[ST_IP] = s; + } -/** - * Destroy a path tree - * - * \param root Root node of tree to destroy - */ -void urldb_destroy_path_tree(struct path_data *root) -{ - struct path_data *p = root; + return d; + } - do { - if (p->children != NULL) { - p = p->children; - } else { - struct path_data *q = p; + /* Copy host string, so we can corrupt it */ + strncpy(buf, host, sizeof buf); + buf[sizeof buf - 1] = '\0'; - while (p != root) { - if (p->next != NULL) { - p = p->next; + /* Process FQDN segments backwards */ + do { + part = strrchr(buf, '.'); + if (!part) { + /* last segment */ + /* Check for existing entry */ + for (e = d->children; e; e = e->next) + if (strcasecmp(buf, e->part) == 0) break; - } - p = p->parent; + if (e) { + d = e; + } else { + d = urldb_add_host_node(buf, d); + } - urldb_destroy_path_node_content(q); - free(q); + /* And insert into search tree */ + if (d) { + struct search_node **r; - q = p; + r = urldb_get_search_tree_direct(buf); + s = urldb_search_insert(*r, d); + if (!s) { + /* failed */ + d = NULL; + } else { + *r = s; + } } - - urldb_destroy_path_node_content(q); - free(q); + break; } - } while (p != root); + + /* Check for existing entry */ + for (e = d->children; e; e = e->next) + if (strcasecmp(part + 1, e->part) == 0) + break; + + d = e ? e : urldb_add_host_node(part + 1, d); + if (!d) + break; + + *part = '\0'; + } while (1); + + return d; } -/** - * Destroy the contents of a path node - * - * \param node Node to destroy contents of (does not destroy node) - */ -void urldb_destroy_path_node_content(struct path_data *node) -{ - struct cookie_internal_data *a, *b; - unsigned int i; - if (node->url != NULL) - nsurl_unref(node->url); +/* exported interface documented in content/urldb.h */ +struct path_data * +urldb_add_path(lwc_string *scheme, + unsigned int port, + const struct host_part *host, + char *path_query, + lwc_string *fragment, + nsurl *url) +{ + struct path_data *d, *e; + char *buf = path_query; + char *segment, *slash; + bool match; - if (node->scheme != NULL) - lwc_string_unref(node->scheme); + assert(scheme && host && url); - free(node->segment); - for (i = 0; i < node->frag_cnt; i++) - free(node->fragment[i]); - free(node->fragment); + d = (struct path_data *) &host->paths; - if (node->thumb) - bitmap_destroy(node->thumb); + /* skip leading '/' */ + segment = buf; + if (*segment == '/') + segment++; - free(node->urld.title); + /* Process path segments */ + do { + slash = strchr(segment, '/'); + if (!slash) { + /* last segment */ + /* look for existing entry */ + for (e = d->children; e; e = e->next) + if (strcmp(segment, e->segment) == 0 && + lwc_string_isequal(scheme, + e->scheme, &match) == + lwc_error_ok && + match == true && + e->port == port) + break; - for (a = node->cookies; a; a = b) { - b = a->next; - urldb_destroy_cookie(a); - } -} + d = e ? urldb_add_path_fragment(e, fragment) : + urldb_add_path_node(scheme, port, + segment, fragment, d); + break; + } -/** - * Destroy a cookie node - * - * \param c Cookie to destroy - */ -void urldb_destroy_cookie(struct cookie_internal_data *c) -{ - free(c->name); - free(c->value); - free(c->comment); - free(c->domain); - free(c->path); + *slash = '\0'; - free(c); -} + /* look for existing entry */ + for (e = d->children; e; e = e->next) + if (strcmp(segment, e->segment) == 0 && + lwc_string_isequal(scheme, e->scheme, + &match) == lwc_error_ok && + match == true && + e->port == port) + break; -/** - * Destroy protection space data - * - * \param space Protection space to destroy - */ -void urldb_destroy_prot_space(struct prot_space_data *space) -{ - lwc_string_unref(space->scheme); - free(space->realm); - free(space->auth); + d = e ? e : urldb_add_path_node(scheme, port, segment, NULL, d); + if (!d) + break; - free(space); -} + segment = slash + 1; + } while (1); + free(path_query); -/** - * Destroy a search tree - * - * \param root Root node of tree to destroy - */ -void urldb_destroy_search_tree(struct search_node *root) -{ - /* Destroy children */ - if (root->left != &empty) - urldb_destroy_search_tree(root->left); - if (root->right != &empty) - urldb_destroy_search_tree(root->right); + if (d && !d->url) { + /* Insert defragmented URL */ + if (nsurl_defragment(url, &d->url) != NSERROR_OK) + return NULL; + } - /* And destroy ourselves */ - free(root); + return d; } - diff --git a/content/urldb.h b/content/urldb.h index c0fece24e..d7ca8b0f8 100644 --- a/content/urldb.h +++ b/content/urldb.h @@ -64,62 +64,262 @@ struct cookie_data { struct bitmap; -/* Destruction */ +/** + * Destroy urldb + */ void urldb_destroy(void); + /* Persistence support */ + +/** + * Import an URL database from file, replacing any existing database + * + * \param filename Name of file containing data + */ nserror urldb_load(const char *filename); -void urldb_save(const char *filename); + +/** + * Export the current database to file + * + * \param filename Name of file to export to + */ +nserror urldb_save(const char *filename); + +/** + * Set the cross-session persistence of the entry for an URL + * + * \param url Absolute URL to persist + * \param persist True to persist, false otherwise + */ void urldb_set_url_persistence(nsurl *url, bool persist); + /* URL insertion */ + +/** + * Insert an URL into the database + * + * \param url Absolute URL to insert + * \return true on success, false otherwise + */ bool urldb_add_url(nsurl *url); /* URL data modification / lookup */ + +/** + * Set an URL's title string, replacing any existing one + * + * \param url The URL to look for + * \param title The title string to use (copied) + */ void urldb_set_url_title(nsurl *url, const char *title); + +/** + * Set an URL's content type + * + * \param url The URL to look for + * \param type The type to set + */ void urldb_set_url_content_type(nsurl *url, content_type type); + +/** + * Update an URL's visit data + * + * \param url The URL to update + */ void urldb_update_url_visit_data(nsurl *url); + +/** + * Reset an URL's visit statistics + * + * \param url The URL to reset + */ void urldb_reset_url_visit_data(nsurl *url); + +/** + * Find data for an URL. + * + * \param url Absolute URL to look for + * \return Pointer to result struct, or NULL + */ const struct url_data *urldb_get_url_data(nsurl *url); + +/** + * Extract an URL from the db + * + * \param url URL to extract + * \return Pointer to database's copy of URL or NULL if not found + */ nsurl *urldb_get_url(nsurl *url); + /* Authentication modification / lookup */ -void urldb_set_auth_details(nsurl *url, const char *realm, - const char *auth); + +/** + * Set authentication data for an URL + * + * \param url The URL to consider + * \param realm The authentication realm + * \param auth The authentication details (in form username:password) + */ +void urldb_set_auth_details(nsurl *url, const char *realm, const char *auth); + +/** + * Look up authentication details in database + * + * \param url Absolute URL to search for + * \param realm When non-NULL, it is realm which can be used to determine + * the protection space when that's not been done before for given URL. + * \return Pointer to authentication details, or NULL if not found + */ const char *urldb_get_auth_details(nsurl *url, const char *realm); + /* SSL certificate permissions */ + +/** + * Set certificate verification permissions + * + * \param url URL to consider + * \param permit Set to true to allow invalid certificates + */ void urldb_set_cert_permissions(nsurl *url, bool permit); + +/** + * Retrieve certificate verification permissions from database + * + * \param url Absolute URL to search for + * \return true to permit connections to hosts with invalid certificates, + * false otherwise. + */ bool urldb_get_cert_permissions(nsurl *url); + /* Thumbnail handling */ + +/** + * Set thumbnail for url, replacing any existing thumbnail + * + * \param url Absolute URL to consider + * \param bitmap Opaque pointer to thumbnail data, or NULL to invalidate + */ void urldb_set_thumbnail(nsurl *url, struct bitmap *bitmap); + +/** + * Retrieve thumbnail data for given URL + * + * \param url Absolute URL to search for + * \return Pointer to thumbnail data, or NULL if not found. + */ struct bitmap *urldb_get_thumbnail(nsurl *url); + /* URL completion */ + +/** + * Iterate over entries in the database which match the given prefix + * + * \param prefix Prefix to match + * \param callback Callback function + */ void urldb_iterate_partial(const char *prefix, - bool (*callback)(nsurl *url, - const struct url_data *data)); + bool (*callback)(nsurl *url, const struct url_data *data)); + /* Iteration */ + +/** + * Iterate over all entries in database + * + * \param callback Function to callback for each entry + */ void urldb_iterate_entries(bool (*callback)(nsurl *url, const struct url_data *data)); + +/** + * Iterate over all cookies in database + * + * \param callback Function to callback for each entry + */ void urldb_iterate_cookies(bool (*callback)(const struct cookie_data *cookie)); -/* Debug */ -void urldb_dump(void); /* Cookies */ + +/** + * Parse Set-Cookie header and insert cookie(s) into database + * + * \param header Header to parse, with Set-Cookie: stripped + * \param url URL being fetched + * \param referer Referring resource, or 0 for verifiable transaction + * \return true on success, false otherwise + */ bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer); + +/** + * Retrieve cookies for an URL + * + * \param url URL being fetched + * \param include_http_only Whether to include HTTP(S) only cookies. + * \return Cookies string for libcurl (on heap), or NULL on error/no cookies + */ char *urldb_get_cookie(nsurl *url, bool include_http_only); + +/** + * Delete a cookie + * + * \param domain The cookie's domain + * \param path The cookie's path + * \param name The cookie's name + */ void urldb_delete_cookie(const char *domain, const char *path, const char *name); + +/** + * Load a cookie file into the database + * + * \param filename File to load + */ void urldb_load_cookies(const char *filename); + +/** + * Save persistent cookies to file + * + * \param filename Path to save to + */ void urldb_save_cookies(const char *filename); +/* Debug */ + +/** + * Dump URL database to stderr + */ +void urldb_dump(void); + + /* test harness only */ + +/** + * Add a host to the database, creating any intermediate entries + * + * \param host Hostname to add + * \return Pointer to leaf node, or NULL on memory exhaustion + */ struct host_part *urldb_add_host(const char *host); + +/** + * Add a path to the database, creating any intermediate entries + * + * \param scheme URL scheme associated with path + * \param port Port number on host associated with path + * \param host Host tree node to attach to + * \param path_query Absolute path plus query to add (freed) + * \param fragment URL fragment, or NULL + * \param url URL (fragment ignored) + * \return Pointer to leaf node, or NULL on memory exhaustion + */ struct path_data *urldb_add_path(lwc_string *scheme, unsigned int port, const struct host_part *host, char *path_query, lwc_string *fragment, nsurl *url); -- cgit v1.2.3