diff options
Diffstat (limited to 'content')
30 files changed, 4124 insertions, 1658 deletions
diff --git a/content/Makefile b/content/Makefile index 557e6c787..ab257eaea 100644 --- a/content/Makefile +++ b/content/Makefile @@ -1,6 +1,11 @@ # Content sources S_CONTENT := content.c content_factory.c dirlist.c fetch.c hlcache.c \ - llcache.c mimesniff.c urldb.c + llcache.c mimesniff.c urldb.c no_backing_store.c -S_CONTENT := $(addprefix content/,$(S_CONTENT))
\ No newline at end of file +# Make filesystem backing store available +ifeq ($(NETSURF_FS_BACKING_STORE),YES) + S_CONTENT += fs_backing_store.c +endif + +S_CONTENT := $(addprefix content/,$(S_CONTENT)) diff --git a/content/backing_store.h b/content/backing_store.h new file mode 100644 index 000000000..849e11aeb --- /dev/null +++ b/content/backing_store.h @@ -0,0 +1,100 @@ +/* + * Copyright 2014 Vincent Sanders <vince@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** \file + * Low-level source data cache backing store interface + */ + +#ifndef NETSURF_CONTENT_LLCACHE_PRIVATE_H_ +#define NETSURF_CONTENT_LLCACHE_PRIVATE_H_ + +#include "content/llcache.h" + +/** storage control flags */ +enum backing_store_flags { + BACKING_STORE_NONE = 0, /**< no special processing */ + BACKING_STORE_META = 1, /**< data is metadata */ + BACKING_STORE_MMAP = 2, /**< when data is retrived this indicates the + * returned buffer may be memory mapped, + * flag must be cleared if the storage is + * allocated and is not memory mapped. + */ +}; + +/** low level cache backing store operation table + * + * The low level cache (source objects) has the capability to make + * objects and their metadata (headers etc) persistant by writing to a + * backing store using these operations. + */ +struct gui_llcache_table { + /** + * Initialise the backing store. + * + * @param parameters to configure backing store. + * @return NSERROR_OK on success or error code on faliure. + */ + nserror (*initialise)(const struct llcache_store_parameters *parameters); + + /** + * Finalise the backing store. + * + * @return NSERROR_OK on success or error code on faliure. + */ + nserror (*finalise)(void); + + /** + * Place an object in the backing store. + * + * @param url The url is used as the unique primary key for the data. + * @param flags The flags to control how the obejct is stored. + * @param data The objects data. + * @param datalen The length of the \a data. + * @return NSERROR_OK on success or error code on faliure. + */ + nserror (*store)(struct nsurl *url, enum backing_store_flags flags, + const uint8_t *data, const size_t datalen); + + /** + * Retrive an object from the backing store. + * + * @param url The url is used as the unique primary key for the data. + * @param flags The flags to control how the object is retrived. + * @param data The objects data. + * @param datalen The length of the \a data retrieved. + * @return NSERROR_OK on success or error code on faliure. + */ + nserror (*fetch)(struct nsurl *url, enum backing_store_flags *flags, + uint8_t **data, size_t *datalen); + + /** + * Invalidate a source object from the backing store. + * + * The entry (if present in the backing store) must no longer + * be returned as a result to the fetch or meta operations. + * + * @param url The url is used as the unique primary key to invalidate. + * @return NSERROR_OK on success or error code on faliure. + */ + nserror (*invalidate)(struct nsurl *url); +}; + +extern struct gui_llcache_table* null_llcache_table; +extern struct gui_llcache_table* filesystem_llcache_table; + +#endif diff --git a/content/content.c b/content/content.c index 1c667f12d..4641571bd 100644 --- a/content/content.c +++ b/content/content.c @@ -31,15 +31,13 @@ #include <string.h> #include <strings.h> #include <time.h> + #include "utils/config.h" #include "content/content_protected.h" #include "content/hlcache.h" -#include "css/css.h" #include "image/bitmap.h" #include "desktop/browser.h" #include "utils/nsoption.h" -#include "render/html.h" -#include "render/textplain.h" #include "utils/http.h" #include "utils/log.h" @@ -840,15 +838,15 @@ bool content_drop_file_at_point(struct hlcache_handle *h, } -void content_search(struct hlcache_handle *h, - struct gui_search_callbacks *gui_callbacks, void *gui_data, +void content_search(struct hlcache_handle *h, void *context, search_flags_t flags, const char *string) { struct content *c = hlcache_handle_get_content(h); assert(c != 0); - if (c->handler->search != NULL) - c->handler->search(c, gui_callbacks, gui_data, flags, string); + if (c->handler->search != NULL) { + c->handler->search(c, context, flags, string); + } } @@ -857,18 +855,22 @@ void content_search_clear(struct hlcache_handle *h) struct content *c = hlcache_handle_get_content(h); assert(c != 0); - if (c->handler->search_clear != NULL) + if (c->handler->search_clear != NULL) { c->handler->search_clear(c); + } } - -void content_debug_dump(struct hlcache_handle *h, FILE *f) +/* exported interface documented in content/content.h */ +nserror content_debug_dump(struct hlcache_handle *h, FILE *f, enum content_debug op) { struct content *c = hlcache_handle_get_content(h); assert(c != 0); - if (c->handler->debug_dump != NULL) - c->handler->debug_dump(c, f); + if (c->handler->debug_dump == NULL) { + return NSERROR_NOT_IMPLEMENTED; + } + + return c->handler->debug_dump(c, f, op); } diff --git a/content/content.h b/content/content.h index 467fa6055..752370034 100644 --- a/content/content.h +++ b/content/content.h @@ -83,9 +83,16 @@ typedef enum { CONTENT_MSG_POINTER, /**< Wants a specific mouse pointer set */ CONTENT_MSG_SELECTION, /**< A selection made or cleared */ CONTENT_MSG_CARET, /**< Caret movement / hiding */ - CONTENT_MSG_DRAG /**< A drag started or ended */ + CONTENT_MSG_DRAG, /**< A drag started or ended */ + CONTENT_MSG_GADGETCLICK/**< A gadget has been clicked on (mainly for file) */ } content_msg; +/** Debugging dump operations */ +enum content_debug { + CONTENT_DEBUG_RENDER, /** Debug the contents rendering. */ + CONTENT_DEBUG_DOM /** Debug teh contents Document Object. */ +}; + /** RFC5988 metadata link */ struct content_rfc5988_link { struct content_rfc5988_link *next; /**< next rfc5988_link in list */ @@ -190,6 +197,10 @@ union content_msg_data { } type; const struct rect *rect; } drag; + /** CONTENT_MSG_GADGETCLICK - User clicked on a form gadget */ + struct { + struct form_control *gadget; + } gadget_click; }; /** parameters to content redraw */ @@ -261,11 +272,20 @@ bool content_scroll_at_point(struct hlcache_handle *h, int x, int y, int scrx, int scry); bool content_drop_file_at_point(struct hlcache_handle *h, int x, int y, char *file); -void content_search(struct hlcache_handle *h, - struct gui_search_callbacks *gui_callbacks, void *gui_data, + +void content_search(struct hlcache_handle *h, void *context, search_flags_t flags, const char *string); void content_search_clear(struct hlcache_handle *h); -void content_debug_dump(struct hlcache_handle *h, FILE *f); + +/** + * Dump debug information to file. + * + * \param h content handle to debug. + * \param f File to write output to. + * \param op Debug operation type. + */ +nserror content_debug_dump(struct hlcache_handle *h, FILE *f, enum content_debug op); + struct content_rfc5988_link *content_find_rfc5988_link(struct hlcache_handle *c, lwc_string *rel); diff --git a/content/content_protected.h b/content/content_protected.h index 57ce35775..7d51e1b8c 100644 --- a/content/content_protected.h +++ b/content/content_protected.h @@ -73,12 +73,10 @@ struct content_handler { int scrx, int scry); bool (*drop_file_at_point)(struct content *c, int x, int y, char *file); - void (*search)(struct content *c, - struct gui_search_callbacks *gui_callbacks, - void *gui_data, search_flags_t flags, - const char *string); + void (*search)(struct content *c, void *context, search_flags_t flags, + const char *string); void (*search_clear)(struct content *c); - void (*debug_dump)(struct content *c, FILE *f); + nserror (*debug_dump)(struct content *c, FILE *f, enum content_debug op); nserror (*clone)(const struct content *old, struct content **newc); bool (*matches_quirks)(const struct content *c, bool quirks); content_type (*type)(void); diff --git a/content/content_type.h b/content/content_type.h index 6488692fe..9f8c2f307 100644 --- a/content/content_type.h +++ b/content/content_type.h @@ -25,9 +25,6 @@ #ifndef _NETSURF_DESKTOP_CONTENT_TYPE_H_ #define _NETSURF_DESKTOP_CONTENT_TYPE_H_ -#include "utils/config.h" - - /** The type of a content. */ typedef enum { CONTENT_NONE = 0x00, diff --git a/content/dirlist.c b/content/dirlist.c index 433e21026..1d7a67e1f 100644 --- a/content/dirlist.c +++ b/content/dirlist.c @@ -24,10 +24,11 @@ #include <string.h> #include <stdio.h> #include <stdlib.h> -#include "content/dirlist.h" + +#include "utils/nsurl.h" #include "utils/messages.h" -static const char footer[] = "</div>\n</body>\n</html>\n"; +#include "content/dirlist.h" static int dirlist_filesize_calculate(unsigned long *bytesize); static int dirlist_filesize_value(unsigned long bytesize); @@ -264,7 +265,7 @@ bool dirlist_generate_headings(char *buffer, int buffer_length) * dirlist_generate_bottom() */ -bool dirlist_generate_row(bool even, bool directory, char *url, char *name, +bool dirlist_generate_row(bool even, bool directory, nsurl *url, char *name, const char *mimetype, long long size, char *date, char *time, char *buffer, int buffer_length) { @@ -290,7 +291,7 @@ bool dirlist_generate_row(bool even, bool directory, char *url, char *name, "\t<span class=\"date\">%s</span>\n" "\t<span class=\"time\">%s</span>\n" "</a>\n", - url, even ? "even" : "odd", + nsurl_access(url), even ? "even" : "odd", directory ? "dir" : "file", name, mimetype, size_string, unit, date, time); if (error < 0 || error >= buffer_length) diff --git a/content/dirlist.h b/content/dirlist.h index bf90ec6d4..687f50688 100644 --- a/content/dirlist.h +++ b/content/dirlist.h @@ -39,7 +39,7 @@ bool dirlist_generate_title(const char *title, char *buffer, int buffer_length); bool dirlist_generate_parent_link(const char *parent, char *buffer, int buffer_length); bool dirlist_generate_headings(char *buffer, int buffer_length); -bool dirlist_generate_row(bool even, bool directory, char *url, char *name, +bool dirlist_generate_row(bool even, bool directory, nsurl *url, char *name, const char *mimetype, long long size, char *date, char *time, char *buffer, int buffer_length); bool dirlist_generate_bottom(char *buffer, int buffer_length); diff --git a/content/fetch.c b/content/fetch.c index 1ff925ae3..9a0796294 100644 --- a/content/fetch.c +++ b/content/fetch.c @@ -19,12 +19,15 @@ */ /** \file - * Fetching of data from a URL (implementation). + * Implementation of fetching of data from a URL. + * + * The implementation is the fetch factory and the generic operations + * around the fetcher specific methods. * * Active fetches are held in the circular linked list ::fetch_ring. There may * be at most ::option_max_fetchers_per_host active requests per Host: header. * There may be at most ::option_max_fetchers active requests overall. Inactive - * fetchers are stored in the ::queue_ring waiting for use. + * fetches are stored in the ::queue_ring waiting for use. */ #include <assert.h> @@ -33,18 +36,13 @@ #include <string.h> #include <strings.h> #include <time.h> - #include <libwapcaplet/libwapcaplet.h> +#include <curl/curl.h> #include "utils/config.h" -#include "content/fetch.h" -#include "content/fetchers/resource.h" -#include "content/fetchers/about.h" -#include "content/fetchers/curl.h" -#include "content/fetchers/data.h" -#include "content/fetchers/file.h" -#include "content/urldb.h" #include "desktop/netsurf.h" +#include "desktop/gui_factory.h" +#include "utils/corestrings.h" #include "utils/nsoption.h" #include "utils/log.h" #include "utils/messages.h" @@ -52,27 +50,50 @@ #include "utils/utils.h" #include "utils/ring.h" +#include "content/fetch.h" +#include "content/fetchers.h" +#include "content/fetchers/resource.h" +#include "content/fetchers/about.h" +#include "content/fetchers/curl.h" +#include "content/fetchers/data.h" +#include "content/fetchers/file.h" +#include "javascript/fetcher.h" +#include "content/urldb.h" + /* Define this to turn on verbose fetch logging */ #undef DEBUG_FETCH_VERBOSE -bool fetch_active; /**< Fetches in progress, please call fetch_poll(). */ +/** Verbose fetcher logging */ +#ifdef DEBUG_FETCH_VERBOSE +#define FETCH_LOG(x) LOG(x) +#else +#define FETCH_LOG(x) +#endif + +/** The maximum number of fetchers that can be added */ +#define MAX_FETCHERS 10 + +/** The time in ms between polling the fetchers. + * + * \todo The schedule timeout should be profiled to see if there is a + * better value or even if it needs to be dynamic. + */ +#define SCHEDULE_TIME 10 + +/** The fdset timeout in ms */ +#define FDSET_TIMEOUT 1000 -/** Information about a fetcher for a given scheme. */ +/** + * Information about a fetcher for a given scheme. + */ typedef struct scheme_fetcher_s { - lwc_string *scheme_name; /**< The scheme. */ - fetcher_can_fetch can_fetch; /**< Ensure an URL can be fetched. */ - fetcher_setup_fetch setup_fetch; /**< Set up a fetch. */ - fetcher_start_fetch start_fetch; /**< Start a fetch. */ - fetcher_abort_fetch abort_fetch; /**< Abort a fetch. */ - fetcher_free_fetch free_fetch; /**< Free a fetch. */ - fetcher_poll_fetcher poll_fetcher; /**< Poll this fetcher. */ - fetcher_finalise finaliser; /**< Clean up this fetcher. */ - int refcount; /**< When zero, clean up the fetcher. */ - struct scheme_fetcher_s *next_fetcher; /**< Next fetcher in the list. */ - struct scheme_fetcher_s *prev_fetcher; /**< Prev fetcher in the list. */ + lwc_string *scheme; /**< The scheme. */ + + struct fetcher_operation_table ops; /**< The fetchers operations. */ + int refcount; /**< When zero the fetcher is no longer in use. */ } scheme_fetcher; -static scheme_fetcher *fetchers = NULL; +static scheme_fetcher fetchers[MAX_FETCHERS]; /** Information for a single fetch. */ struct fetch { @@ -84,176 +105,351 @@ struct fetch { void *p; /**< Private data for callback. */ lwc_string *host; /**< Host part of URL, interned */ long http_code; /**< HTTP response code, or 0. */ - scheme_fetcher *ops; /**< Fetcher operations for this fetch, - NULL if not set. */ + int fetcherd; /**< Fetcher descriptor for this fetch */ void *fetcher_handle; /**< The handle for the fetcher. */ bool fetch_is_active; /**< This fetch is active. */ struct fetch *r_prev; /**< Previous active fetch in ::fetch_ring. */ struct fetch *r_next; /**< Next active fetch in ::fetch_ring. */ }; -static struct fetch *fetch_ring = 0; /**< Ring of active fetches. */ -static struct fetch *queue_ring = 0; /**< Ring of queued fetches */ +static struct fetch *fetch_ring = NULL; /**< Ring of active fetches. */ +static struct fetch *queue_ring = NULL; /**< Ring of queued fetches */ -#define fetch_ref_fetcher(F) F->refcount++ -static void fetch_unref_fetcher(scheme_fetcher *fetcher); -static void fetch_dispatch_jobs(void); -static bool fetch_choose_and_dispatch(void); -static bool fetch_dispatch_job(struct fetch *fetch); +/****************************************************************************** + * fetch internals * + ******************************************************************************/ -/* Static lwc_strings */ -static lwc_string *fetch_http_lwc; -static lwc_string *fetch_https_lwc; +static inline void fetch_ref_fetcher(int fetcherd) +{ + fetchers[fetcherd].refcount++; +} +static inline void fetch_unref_fetcher(int fetcherd) +{ + fetchers[fetcherd].refcount--; + if (fetchers[fetcherd].refcount == 0) { + fetchers[fetcherd].ops.finalise(fetchers[fetcherd].scheme); + lwc_string_unref(fetchers[fetcherd].scheme); + } +} /** - * Initialise the fetcher. + * Find a suitable fetcher for a scheme. */ - -void fetch_init(void) +static int get_fetcher_for_scheme(lwc_string *scheme) { - fetch_curl_register(); - fetch_data_register(); - fetch_file_register(); - fetch_resource_register(); - fetch_about_register(); - fetch_active = false; + int fetcherd; + bool match; - if (lwc_intern_string("http", SLEN("http"), &fetch_http_lwc) != - lwc_error_ok) { - die("Failed to initialise the fetch module " - "(couldn't intern \"http\")."); + for (fetcherd = 0; fetcherd < MAX_FETCHERS; fetcherd++) { + if ((fetchers[fetcherd].refcount > 0) && + (lwc_string_isequal(fetchers[fetcherd].scheme, + scheme, &match) == lwc_error_ok) && + (match == true)) { + return fetcherd; + } } + return -1; +} + +/** + * Dispatch a single job + */ +static bool fetch_dispatch_job(struct fetch *fetch) +{ + RING_REMOVE(queue_ring, fetch); + FETCH_LOG(("Attempting to start fetch %p, fetcher %p, url %s", fetch, + fetch->fetcher_handle, nsurl_access(fetch->url))); - if (lwc_intern_string("https", SLEN("https"), &fetch_https_lwc) != - lwc_error_ok) { - die("Failed to initialise the fetch module " - "(couldn't intern \"https\")."); + if (!fetchers[fetch->fetcherd].ops.start(fetch->fetcher_handle)) { + RING_INSERT(queue_ring, fetch); /* Put it back on the end of the queue */ + return false; + } else { + RING_INSERT(fetch_ring, fetch); + fetch->fetch_is_active = true; + return true; } } +/** + * Choose and dispatch a single job. Return false if we failed to dispatch + * anything. + * + * We don't check the overall dispatch size here because we're not called unless + * there is room in the fetch queue for us. + */ +static bool fetch_choose_and_dispatch(void) +{ + bool same_host; + struct fetch *queueitem; + queueitem = queue_ring; + do { + /* We can dispatch the selected item if there is room in the + * fetch ring + */ + int countbyhost; + RING_COUNTBYLWCHOST(struct fetch, fetch_ring, countbyhost, + queueitem->host); + if (countbyhost < nsoption_int(max_fetchers_per_host)) { + /* We can dispatch this item in theory */ + return fetch_dispatch_job(queueitem); + } + /* skip over other items with the same host */ + same_host = true; + while (same_host == true && queueitem->r_next != queue_ring) { + if (lwc_string_isequal(queueitem->host, + queueitem->r_next->host, &same_host) == + lwc_error_ok && same_host == true) { + queueitem = queueitem->r_next; + } + } + queueitem = queueitem->r_next; + } while (queueitem != queue_ring); + return false; +} + +static void dump_rings(void) +{ +#ifdef DEBUG_FETCH_VERBOSE + struct fetch *q; + struct fetch *f; + + q = queue_ring; + if (q) { + do { + LOG(("queue_ring: %s", nsurl_access(q->url))); + q = q->r_next; + } while (q != queue_ring); + } + f = fetch_ring; + if (f) { + do { + LOG(("fetch_ring: %s", nsurl_access(f->url))); + f = f->r_next; + } while (f != fetch_ring); + } +#endif +} /** - * Clean up for quit. + * Dispatch as many jobs as we have room to dispatch. * - * Must be called before exiting. + * @return true if there are active fetchers that require polling else false. */ +static bool fetch_dispatch_jobs(void) +{ + int all_active; + int all_queued; + + RING_GETSIZE(struct fetch, queue_ring, all_queued); + RING_GETSIZE(struct fetch, fetch_ring, all_active); + + FETCH_LOG(("queue_ring %i, fetch_ring %i", all_queued, all_active)); + dump_rings(); -void fetch_quit(void) + while ((all_queued != 0) && + (all_active < nsoption_int(max_fetchers)) && + fetch_choose_and_dispatch()) { + all_queued--; + all_active++; + FETCH_LOG(("%d queued, %d fetching", + all_queued, all_active)); + } + + FETCH_LOG(("Fetch ring is now %d elements.", all_active)); + FETCH_LOG(("Queue ring is now %d elements.", all_queued)); + + return (all_active > 0); +} + +static void fetcher_poll(void *unused) { - while (fetchers != NULL) { - if (fetchers->refcount != 1) { - LOG(("Fetcher for scheme %s still active?!", - lwc_string_data(fetchers->scheme_name))); - /* We shouldn't do this, but... */ - fetchers->refcount = 1; + int fetcherd; + + if (fetch_dispatch_jobs()) { + FETCH_LOG(("Polling fetchers")); + for (fetcherd = 0; fetcherd < MAX_FETCHERS; fetcherd++) { + if (fetchers[fetcherd].refcount > 0) { + /* fetcher present */ + fetchers[fetcherd].ops.poll(fetchers[fetcherd].scheme); + } } - fetch_unref_fetcher(fetchers); - } - lwc_string_unref(fetch_http_lwc); - lwc_string_unref(fetch_https_lwc); + /* schedule active fetchers to run again in 10ms */ + guit->browser->schedule(SCHEDULE_TIME, fetcher_poll, NULL); + } } +/****************************************************************************** + * Public API * + ******************************************************************************/ -bool fetch_add_fetcher(lwc_string *scheme, - fetcher_initialise initialiser, - fetcher_can_fetch can_fetch, - fetcher_setup_fetch setup_fetch, - fetcher_start_fetch start_fetch, - fetcher_abort_fetch abort_fetch, - fetcher_free_fetch free_fetch, - fetcher_poll_fetcher poll_fetcher, - fetcher_finalise finaliser) +/* exported interface documented in content/fetch.h */ +nserror fetcher_init(void) { - scheme_fetcher *new_fetcher; - if (!initialiser(scheme)) - return false; - new_fetcher = malloc(sizeof(scheme_fetcher)); - if (new_fetcher == NULL) { - finaliser(scheme); - return false; - } - new_fetcher->scheme_name = scheme; - new_fetcher->refcount = 0; - new_fetcher->can_fetch = can_fetch; - new_fetcher->setup_fetch = setup_fetch; - new_fetcher->start_fetch = start_fetch; - new_fetcher->abort_fetch = abort_fetch; - new_fetcher->free_fetch = free_fetch; - new_fetcher->poll_fetcher = poll_fetcher; - new_fetcher->finaliser = finaliser; - new_fetcher->next_fetcher = fetchers; - fetchers = new_fetcher; - fetch_ref_fetcher(new_fetcher); - return true; + fetch_curl_register(); + fetch_data_register(); + fetch_file_register(); + fetch_resource_register(); + fetch_about_register(); + fetch_javascript_register(); + + return NSERROR_OK; } +/* exported interface documented in content/fetchers.h */ +void fetcher_quit(void) +{ + int fetcherd; /* fetcher index */ + for (fetcherd = 0; fetcherd < MAX_FETCHERS; fetcherd++) { + if (fetchers[fetcherd].refcount > 1) { + /* fetcher still has reference at quit. This + * should not happen as the fetch should have + * been aborted in llcache shutdown. + * + * This appears to be normal behaviour if a + * curl operation is still in progress at exit + * as the abort waits for curl to complete. + * + * We could make the user wait for curl to + * complete but we are exiting anyway so thats + * unhelpful. Instead we just log it and force + * the reference count to allow the fetcher to + * be stopped. + */ + LOG(("Fetcher for scheme %s still has %d active users at quit.", + lwc_string_data(fetchers[fetcherd].scheme), + fetchers[fetcherd].refcount)); + + fetchers[fetcherd].refcount = 1; + } + if (fetchers[fetcherd].refcount == 1) { -void fetch_unref_fetcher(scheme_fetcher *fetcher) + fetch_unref_fetcher(fetcherd); + } + } +} + +/* exported interface documented in content/fetchers.h */ +nserror +fetcher_add(lwc_string *scheme, const struct fetcher_operation_table *ops) { - if (--fetcher->refcount == 0) { - fetcher->finaliser(fetcher->scheme_name); - lwc_string_unref(fetcher->scheme_name); - if (fetcher == fetchers) { - fetchers = fetcher->next_fetcher; - if (fetchers) - fetchers->prev_fetcher = NULL; - } else { - fetcher->prev_fetcher->next_fetcher = - fetcher->next_fetcher; - if (fetcher->next_fetcher != NULL) - fetcher->next_fetcher->prev_fetcher = - fetcher->prev_fetcher; + int fetcherd; + + /* find unused fetcher descriptor */ + for (fetcherd = 0; fetcherd < MAX_FETCHERS; fetcherd++) { + if (fetchers[fetcherd].refcount == 0) { + break; } - free(fetcher); } + if (fetcherd == MAX_FETCHERS) { + return NSERROR_INIT_FAILED; + } + + if (!ops->initialise(scheme)) { + return NSERROR_INIT_FAILED; + } + + fetchers[fetcherd].scheme = scheme; + fetchers[fetcherd].ops = *ops; + + fetch_ref_fetcher(fetcherd); + + return NSERROR_OK; } +/* exported interface documented in content/fetch.h */ +nserror fetcher_fdset(fd_set *read_fd_set, + fd_set *write_fd_set, + fd_set *except_fd_set, + int *maxfd_out) +{ + CURLMcode code; + int maxfd; + int fetcherd; /* fetcher index */ + + if (!fetch_dispatch_jobs()) { + FETCH_LOG(("No jobs")); + *maxfd_out = -1; + return NSERROR_OK; + } -/** - * Start fetching data for the given URL. - * - * The function returns immediately. The fetch may be queued for later - * processing. - * - * A pointer to an opaque struct fetch is returned, which can be passed to - * fetch_abort() to abort the fetch at any time. Returns 0 if memory is - * exhausted (or some other fatal error occurred). - * - * The caller must supply a callback function which is called when anything - * interesting happens. The callback function is first called with msg - * FETCH_HEADER, with the header in data, then one or more times - * with FETCH_DATA with some data for the url, and finally with - * FETCH_FINISHED. Alternatively, FETCH_ERROR indicates an error occurred: - * data contains an error message. FETCH_REDIRECT may replace the FETCH_HEADER, - * FETCH_DATA, FETCH_FINISHED sequence if the server sends a replacement URL. - * - */ + FETCH_LOG(("Polling fetchers")); -struct fetch * fetch_start(nsurl *url, nsurl *referer, - fetch_callback callback, - void *p, bool only_2xx, const char *post_urlenc, - const struct fetch_multipart_data *post_multipart, - bool verifiable, bool downgrade_tls, - const char *headers[]) + for (fetcherd = 0; fetcherd < MAX_FETCHERS; fetcherd++) { + if (fetchers[fetcherd].refcount > 0) { + /* fetcher present */ + fetchers[fetcherd].ops.poll(fetchers[fetcherd].scheme); + } + } + + FD_ZERO(read_fd_set); + FD_ZERO(write_fd_set); + FD_ZERO(except_fd_set); + code = curl_multi_fdset(fetch_curl_multi, + read_fd_set, + write_fd_set, + except_fd_set, + &maxfd); + assert(code == CURLM_OK); + + if (maxfd >= 0) { + /* change the scheduled poll to happen is a 1000ms as + * we assume fetching an fdset means the fetchers will + * be run by the client waking up on data available on + * the fd and re-calling fetcher_fdset() if this does + * not happen the fetch polling will continue as + * usual. + */ + /** @note adjusting the schedule time is only done for + * curl currently. This is because as it is assumed to + * be the only fetcher that can possibly have fd to + * select on. All the other fetchers continue to need + * polling frequently. + */ + guit->browser->schedule(FDSET_TIMEOUT, fetcher_poll, NULL); + } + + *maxfd_out = maxfd; + + return NSERROR_OK; +} + +/* exported interface documented in content/fetch.h */ +struct fetch * +fetch_start(nsurl *url, + nsurl *referer, + fetch_callback callback, + void *p, + bool only_2xx, + const char *post_urlenc, + const struct fetch_multipart_data *post_multipart, + bool verifiable, + bool downgrade_tls, + const char *headers[]) { struct fetch *fetch; - scheme_fetcher *fetcher = fetchers; lwc_string *scheme; bool match; fetch = malloc(sizeof (*fetch)); - if (fetch == NULL) + if (fetch == NULL) { return NULL; + } /* The URL we're fetching must have a scheme */ scheme = nsurl_get_component(url, NSURL_SCHEME); assert(scheme != NULL); -#ifdef DEBUG_FETCH_VERBOSE - LOG(("fetch %p, url '%s'", fetch, nsurl_access(url))); -#endif + /* try and obtain a fetcher for this scheme */ + fetch->fetcherd = get_fetcher_for_scheme(scheme); + if (fetch->fetcherd == -1) { + lwc_string_unref(scheme); + free(fetch); + return NULL; + } + + FETCH_LOG(("fetch %p, url '%s'", fetch, nsurl_access(url))); /* construct a new fetch structure */ fetch->callback = callback; @@ -266,10 +462,9 @@ struct fetch * fetch_start(nsurl *url, nsurl *referer, fetch->referer = NULL; fetch->send_referer = false; fetch->fetcher_handle = NULL; - fetch->ops = NULL; fetch->fetch_is_active = false; fetch->host = nsurl_get_component(url, NSURL_HOST); - + if (referer != NULL) { lwc_string *ref_scheme; fetch->referer = nsurl_ref(referer); @@ -279,7 +474,7 @@ struct fetch * fetch_start(nsurl *url, nsurl *referer, /* Determine whether to send the Referer header */ if (nsoption_bool(send_referer) && ref_scheme != NULL) { - /* User permits us to send the header + /* User permits us to send the header * Only send it if: * 1) The fetch and referer schemes match * or 2) The fetch is https and the referer is http @@ -292,15 +487,15 @@ struct fetch * fetch_start(nsurl *url, nsurl *referer, bool match1; bool match2; if (lwc_string_isequal(scheme, ref_scheme, - &match) != lwc_error_ok) { + &match) != lwc_error_ok) { match = false; } - if (lwc_string_isequal(scheme, fetch_https_lwc, - &match1) != lwc_error_ok) { + if (lwc_string_isequal(scheme, corestring_lwc_https, + &match1) != lwc_error_ok) { match1 = false; } - if (lwc_string_isequal(ref_scheme, fetch_http_lwc, - &match2) != lwc_error_ok) { + if (lwc_string_isequal(ref_scheme, corestring_lwc_http, + &match2) != lwc_error_ok) { match2= false; } if (match == true || (match1 == true && match2 == true)) @@ -310,261 +505,93 @@ struct fetch * fetch_start(nsurl *url, nsurl *referer, lwc_string_unref(ref_scheme); } - /* Pick the scheme ops */ - while (fetcher) { - if ((lwc_string_isequal(fetcher->scheme_name, scheme, - &match) == lwc_error_ok) && (match == true)) { - fetch->ops = fetcher; - break; - } - fetcher = fetcher->next_fetcher; - } - - if (fetch->ops == NULL) - goto failed; - - /* Got a scheme fetcher, try and set up the fetch */ - fetch->fetcher_handle = fetch->ops->setup_fetch(fetch, url, - only_2xx, downgrade_tls, - post_urlenc, post_multipart, - headers); - - if (fetch->fetcher_handle == NULL) - goto failed; - - /* Rah, got it, so ref the fetcher. */ - fetch_ref_fetcher(fetch->ops); - /* these aren't needed past here */ lwc_string_unref(scheme); - /* Dump us in the queue and ask the queue to run. */ - RING_INSERT(queue_ring, fetch); - fetch_dispatch_jobs(); - - return fetch; - -failed: - lwc_string_unref(scheme); - - if (fetch->host != NULL) - lwc_string_unref(fetch->host); - if (fetch->url != NULL) - nsurl_unref(fetch->url); - if (fetch->referer != NULL) - nsurl_unref(fetch->referer); - - free(fetch); - - return NULL; -} + /* try and set up the fetch */ + fetch->fetcher_handle = fetchers[fetch->fetcherd].ops.setup(fetch, url, + only_2xx, downgrade_tls, + post_urlenc, post_multipart, + headers); + if (fetch->fetcher_handle == NULL) { + if (fetch->host != NULL) + lwc_string_unref(fetch->host); -/** - * Dispatch as many jobs as we have room to dispatch. - */ -void fetch_dispatch_jobs(void) -{ - int all_active, all_queued; -#ifdef DEBUG_FETCH_VERBOSE - struct fetch *q; - struct fetch *f; -#endif - - if (!queue_ring) - return; /* Nothing to do, the queue is empty */ - RING_GETSIZE(struct fetch, queue_ring, all_queued); - RING_GETSIZE(struct fetch, fetch_ring, all_active); + if (fetch->url != NULL) + nsurl_unref(fetch->url); -#ifdef DEBUG_FETCH_VERBOSE - LOG(("queue_ring %i, fetch_ring %i", all_queued, all_active)); + if (fetch->referer != NULL) + nsurl_unref(fetch->referer); - q = queue_ring; - if (q) { - do { - LOG(("queue_ring: %s", q->url)); - q = q->r_next; - } while (q != queue_ring); - } - f = fetch_ring; - if (f) { - do { - LOG(("fetch_ring: %s", f->url)); - f = f->r_next; - } while (f != fetch_ring); - } -#endif + free(fetch); - while ( all_queued && all_active < nsoption_int(max_fetchers) ) { - /*LOG(("%d queued, %d fetching", all_queued, all_active));*/ - if (fetch_choose_and_dispatch()) { - all_queued--; - all_active++; - } else { - /* Either a dispatch failed or we ran out. Just stop */ - break; - } + return NULL; } - fetch_active = (all_active > 0); -#ifdef DEBUG_FETCH_VERBOSE - LOG(("Fetch ring is now %d elements.", all_active)); - LOG(("Queue ring is now %d elements.", all_queued)); -#endif -} + /* Rah, got it, so ref the fetcher. */ + fetch_ref_fetcher(fetch->fetcherd); -/** - * Choose and dispatch a single job. Return false if we failed to dispatch - * anything. - * - * We don't check the overall dispatch size here because we're not called unless - * there is room in the fetch queue for us. - */ -bool fetch_choose_and_dispatch(void) -{ - bool same_host; - struct fetch *queueitem; - queueitem = queue_ring; - do { - /* We can dispatch the selected item if there is room in the - * fetch ring - */ - int countbyhost; - RING_COUNTBYLWCHOST(struct fetch, fetch_ring, countbyhost, - queueitem->host); - if (countbyhost < nsoption_int(max_fetchers_per_host)) { - /* We can dispatch this item in theory */ - return fetch_dispatch_job(queueitem); - } - /* skip over other items with the same host */ - same_host = true; - while (same_host == true && queueitem->r_next != queue_ring) { - if (lwc_string_isequal(queueitem->host, - queueitem->r_next->host, &same_host) == - lwc_error_ok && same_host == true) { - queueitem = queueitem->r_next; - } - } - queueitem = queueitem->r_next; - } while (queueitem != queue_ring); - return false; -} - + /* Dump new fetch in the queue. */ + RING_INSERT(queue_ring, fetch); -/** - * Dispatch a single job - */ -bool fetch_dispatch_job(struct fetch *fetch) -{ - RING_REMOVE(queue_ring, fetch); -#ifdef DEBUG_FETCH_VERBOSE - LOG(("Attempting to start fetch %p, fetcher %p, url %s", fetch, - fetch->fetcher_handle, nsurl_access(fetch->url))); -#endif - if (!fetch->ops->start_fetch(fetch->fetcher_handle)) { - RING_INSERT(queue_ring, fetch); /* Put it back on the end of the queue */ - return false; - } else { - RING_INSERT(fetch_ring, fetch); - fetch->fetch_is_active = true; - return true; + /* Ask the queue to run. */ + if (fetch_dispatch_jobs()) { + FETCH_LOG(("scheduling poll")); + /* schedule active fetchers to run again in 10ms */ + guit->browser->schedule(10, fetcher_poll, NULL); } -} + return fetch; +} -/** - * Abort a fetch. - */ - +/* exported interface documented in content/fetch.h */ void fetch_abort(struct fetch *f) { assert(f); -#ifdef DEBUG_FETCH_VERBOSE - LOG(("fetch %p, fetcher %p, url '%s'", f, f->fetcher_handle, - nsurl_access(f->url))); -#endif - f->ops->abort_fetch(f->fetcher_handle); + FETCH_LOG(("fetch %p, fetcher %p, url '%s'", f, f->fetcher_handle, + nsurl_access(f->url))); + fetchers[f->fetcherd].ops.abort(f->fetcher_handle); } - -/** - * Free a fetch structure and associated resources. - */ - +/* exported interface documented in content/fetch.h */ void fetch_free(struct fetch *f) { -#ifdef DEBUG_FETCH_VERBOSE - LOG(("Freeing fetch %p, fetcher %p", f, f->fetcher_handle)); -#endif - f->ops->free_fetch(f->fetcher_handle); - fetch_unref_fetcher(f->ops); - nsurl_unref(f->url); - if (f->referer != NULL) - nsurl_unref(f->referer); - if (f->host != NULL) - lwc_string_unref(f->host); - free(f); -} + FETCH_LOG(("Freeing fetch %p, fetcher %p", f, f->fetcher_handle)); + fetchers[f->fetcherd].ops.free(f->fetcher_handle); -/** - * Do some work on current fetches. - * - * Must be called regularly to make progress on fetches. - */ + fetch_unref_fetcher(f->fetcherd); -void fetch_poll(void) -{ - scheme_fetcher *fetcher = fetchers; - scheme_fetcher *next_fetcher; - - fetch_dispatch_jobs(); - - if (!fetch_active) - return; /* No point polling, there's no fetch active. */ - while (fetcher != NULL) { - next_fetcher = fetcher->next_fetcher; - if (fetcher->poll_fetcher != NULL) { - /* LOG(("Polling fetcher for %s", - lwc_string_data(fetcher->scheme_name))); */ - fetcher->poll_fetcher(fetcher->scheme_name); - } - fetcher = next_fetcher; + nsurl_unref(f->url); + if (f->referer != NULL) { + nsurl_unref(f->referer); } + if (f->host != NULL) { + lwc_string_unref(f->host); + } + free(f); } -/** - * Check if a URL's scheme can be fetched. - * - * \param url URL to check - * \return true if the scheme is supported - */ +/* exported interface documented in content/fetch.h */ bool fetch_can_fetch(const nsurl *url) { - scheme_fetcher *fetcher = fetchers; - bool match; lwc_string *scheme = nsurl_get_component(url, NSURL_SCHEME); + int fetcherd; - while (fetcher != NULL) { - if (lwc_string_isequal(fetcher->scheme_name, scheme, &match) == lwc_error_ok && match == true) { - break; - } + fetcherd = get_fetcher_for_scheme(scheme); + lwc_string_unref(scheme); - fetcher = fetcher->next_fetcher; + if (fetcherd == -1) { + return false; } - lwc_string_unref(scheme); - - return fetcher == NULL ? false : fetcher->can_fetch(url); + return fetchers[fetcherd].ops.acceptable(url); } - -/** - * Change the callback function for a fetch. - */ - +/* exported interface documented in content/fetch.h */ void fetch_change_callback(struct fetch *fetch, fetch_callback callback, void *p) @@ -574,22 +601,13 @@ void fetch_change_callback(struct fetch *fetch, fetch->p = p; } - -/** - * Get the HTTP response code. - */ - +/* exported interface documented in content/fetch.h */ long fetch_http_code(struct fetch *fetch) { return fetch->http_code; } -/** - * Determine if a fetch was verifiable - * - * \param fetch Fetch to consider - * \return Verifiable status of fetch - */ +/* exported interface documented in content/fetch.h */ bool fetch_get_verifiable(struct fetch *fetch) { assert(fetch); @@ -597,14 +615,9 @@ bool fetch_get_verifiable(struct fetch *fetch) return fetch->verifiable; } -/** - * Clone a linked list of fetch_multipart_data. - * - * \param list List to clone - * \return Pointer to head of cloned list, or NULL on failure - */ -struct fetch_multipart_data *fetch_multipart_data_clone( - const struct fetch_multipart_data *list) +/* exported interface documented in content/fetch.h */ +struct fetch_multipart_data * +fetch_multipart_data_clone(const struct fetch_multipart_data *list) { struct fetch_multipart_data *clone, *last = NULL; struct fetch_multipart_data *result = NULL; @@ -639,6 +652,21 @@ struct fetch_multipart_data *fetch_multipart_data_clone( return NULL; } + if (clone->file) { + clone->rawfile = strdup(list->rawfile); + if (clone->rawfile == NULL) { + free(clone->value); + free(clone->name); + free(clone); + if (result != NULL) + fetch_multipart_data_destroy(result); + + return NULL; + } + } else { + clone->rawfile = NULL; + } + clone->next = NULL; if (result == NULL) @@ -652,11 +680,7 @@ struct fetch_multipart_data *fetch_multipart_data_clone( return result; } -/** - * Free a linked list of fetch_multipart_data. - * - * \param list Pointer to head of list to free - */ +/* exported interface documented in content/fetch.h */ void fetch_multipart_data_destroy(struct fetch_multipart_data *list) { struct fetch_multipart_data *next; @@ -665,10 +689,15 @@ void fetch_multipart_data_destroy(struct fetch_multipart_data *list) next = list->next; free(list->name); free(list->value); + if (list->file) { + FETCH_LOG(("Freeing rawfile: %s", list->rawfile)); + free(list->rawfile); + } free(list); } } +/* exported interface documented in content/fetch.h */ void fetch_send_callback(const fetch_msg *msg, struct fetch *fetch) { @@ -676,42 +705,42 @@ fetch_send_callback(const fetch_msg *msg, struct fetch *fetch) } +/* exported interface documented in content/fetch.h */ void fetch_remove_from_queues(struct fetch *fetch) { - int all_active, all_queued; + FETCH_LOG(("Fetch %p, fetcher %p can be freed", + fetch, fetch->fetcher_handle)); /* Go ahead and free the fetch properly now */ -#ifdef DEBUG_FETCH_VERBOSE - LOG(("Fetch %p, fetcher %p can be freed", fetch, fetch->fetcher_handle)); -#endif - if (fetch->fetch_is_active) { RING_REMOVE(fetch_ring, fetch); } else { RING_REMOVE(queue_ring, fetch); } +#ifdef DEBUG_FETCH_VERBOSE + int all_active; + int all_queued; + RING_GETSIZE(struct fetch, fetch_ring, all_active); RING_GETSIZE(struct fetch, queue_ring, all_queued); - fetch_active = (all_active > 0); - -#ifdef DEBUG_FETCH_VERBOSE LOG(("Fetch ring is now %d elements.", all_active)); + LOG(("Queue ring is now %d elements.", all_queued)); #endif } -void -fetch_set_http_code(struct fetch *fetch, long http_code) +/* exported interface documented in content/fetch.h */ +void fetch_set_http_code(struct fetch *fetch, long http_code) { -#ifdef DEBUG_FETCH_VERBOSE - LOG(("Setting HTTP code to %ld", http_code)); -#endif + FETCH_LOG(("Setting HTTP code to %ld", http_code)); + fetch->http_code = http_code; } +/* exported interface documented in content/fetch.h */ const char *fetch_get_referer_to_send(struct fetch *fetch) { if (fetch->send_referer) @@ -719,8 +748,8 @@ const char *fetch_get_referer_to_send(struct fetch *fetch) return NULL; } -void -fetch_set_cookie(struct fetch *fetch, const char *data) +/* exported interface documented in content/fetch.h */ +void fetch_set_cookie(struct fetch *fetch, const char *data) { assert(fetch && data); @@ -743,4 +772,3 @@ fetch_set_cookie(struct fetch *fetch, const char *data) urldb_set_cookie(data, fetch->url, fetch->referer); } } - diff --git a/content/fetch.h b/content/fetch.h index d23b3cd4b..37539ef2b 100644 --- a/content/fetch.h +++ b/content/fetch.h @@ -25,8 +25,6 @@ #include <stdbool.h> -#include <libwapcaplet/libwapcaplet.h> - #include "utils/config.h" #include "utils/nsurl.h" @@ -79,6 +77,7 @@ struct fetch_multipart_data { bool file; /**< Item is a file */ char *name; /**< Name of item */ char *value; /**< Item value */ + char *rawfile; /**< Raw filename if file is true */ struct fetch_multipart_data *next; /**< Next in linked list */ }; @@ -94,76 +93,110 @@ struct ssl_cert_info { int cert_type; /**< Certificate type */ }; -extern bool fetch_active; - typedef void (*fetch_callback)(const fetch_msg *msg, void *p); - -void fetch_init(void); -struct fetch * fetch_start(nsurl *url, nsurl *referer, - fetch_callback callback, - void *p, bool only_2xx, const char *post_urlenc, - const struct fetch_multipart_data *post_multipart, - bool verifiable, bool downgrade_tls, - const char *headers[]); +/** + * Start fetching data for the given URL. + * + * The function returns immediately. The fetch may be queued for later + * processing. + * + * A pointer to an opaque struct fetch is returned, which can be passed to + * fetch_abort() to abort the fetch at any time. Returns NULL if memory is + * exhausted (or some other fatal error occurred). + * + * The caller must supply a callback function which is called when anything + * interesting happens. The callback function is first called with msg + * FETCH_HEADER, with the header in data, then one or more times + * with FETCH_DATA with some data for the url, and finally with + * FETCH_FINISHED. Alternatively, FETCH_ERROR indicates an error occurred: + * data contains an error message. FETCH_REDIRECT may replace the FETCH_HEADER, + * FETCH_DATA, FETCH_FINISHED sequence if the server sends a replacement URL. + * + */ +struct fetch *fetch_start(nsurl *url, nsurl *referer, + fetch_callback callback, + void *p, bool only_2xx, const char *post_urlenc, + const struct fetch_multipart_data *post_multipart, + bool verifiable, bool downgrade_tls, + const char *headers[]); + +/** + * Abort a fetch. + */ void fetch_abort(struct fetch *f); -void fetch_poll(void); -void fetch_quit(void); -const char *fetch_filetype(const char *unix_path); -char *fetch_mimetype(const char *ro_path); + + +/** + * Check if a URL's scheme can be fetched. + * + * \param url URL to check + * \return true if the scheme is supported + */ bool fetch_can_fetch(const nsurl *url); -void fetch_change_callback(struct fetch *fetch, - fetch_callback callback, - void *p); + +/** + * Change the callback function for a fetch. + */ +void fetch_change_callback(struct fetch *fetch, fetch_callback callback, void *p); + +/** + * Get the HTTP response code. + */ long fetch_http_code(struct fetch *fetch); + +/** + * Determine if a fetch was verifiable + * + * \param fetch Fetch to consider + * \return Verifiable status of fetch + */ bool fetch_get_verifiable(struct fetch *fetch); +/** + * Free a linked list of fetch_multipart_data. + * + * \param list Pointer to head of list to free + */ void fetch_multipart_data_destroy(struct fetch_multipart_data *list); -struct fetch_multipart_data *fetch_multipart_data_clone( - const struct fetch_multipart_data *list); - -/* API for fetchers themselves */ - -typedef bool (*fetcher_initialise)(lwc_string *scheme); -typedef bool (*fetcher_can_fetch)(const nsurl *url); -typedef void *(*fetcher_setup_fetch)(struct fetch *parent_fetch, nsurl *url, - bool only_2xx, bool downgrade_tls, const char *post_urlenc, - const struct fetch_multipart_data *post_multipart, - const char **headers); -typedef bool (*fetcher_start_fetch)(void *fetch); -typedef void (*fetcher_abort_fetch)(void *fetch); -typedef void (*fetcher_free_fetch)(void *fetch); -typedef void (*fetcher_poll_fetcher)(lwc_string *scheme); -typedef void (*fetcher_finalise)(lwc_string *scheme); - -/** Register a fetcher for a scheme + +/** + * Clone a linked list of fetch_multipart_data. * - * \param scheme scheme fetcher is for (caller relinquishes ownership) - * \param initialiser fetcher initialiser - * \param can_fetch fetcher can fetch function - * \param setup_fetch fetcher fetch setup function - * \param start_fetch fetcher fetch start function - * \param abort_fetch fetcher fetch abort function - * \param free_fetch fetcher fetch free function - * \param poll_fetcher fetcher poll function - * \param finaliser fetcher finaliser - * \return true iff success - */ -bool fetch_add_fetcher(lwc_string *scheme, - fetcher_initialise initialiser, - fetcher_can_fetch can_fetch, - fetcher_setup_fetch setup_fetch, - fetcher_start_fetch start_fetch, - fetcher_abort_fetch abort_fetch, - fetcher_free_fetch free_fetch, - fetcher_poll_fetcher poll_fetcher, - fetcher_finalise finaliser); + * \param list List to clone + * \return Pointer to head of cloned list, or NULL on failure + */ +struct fetch_multipart_data *fetch_multipart_data_clone(const struct fetch_multipart_data *list); +/** + * send message to fetch + */ void fetch_send_callback(const fetch_msg *msg, struct fetch *fetch); + +/** + * remove a queued fetch + */ void fetch_remove_from_queues(struct fetch *fetch); + +/** + * Free a fetch structure and associated resources. + */ void fetch_free(struct fetch *f); + +/** + * set the http code of a fetch + */ void fetch_set_http_code(struct fetch *fetch, long http_code); + +/** + * get the referer from the fetch + */ const char *fetch_get_referer_to_send(struct fetch *fetch); + +/** + * set cookie data on a fetch + */ void fetch_set_cookie(struct fetch *fetch, const char *data); + #endif diff --git a/content/fetchers.h b/content/fetchers.h new file mode 100644 index 000000000..95034bbff --- /dev/null +++ b/content/fetchers.h @@ -0,0 +1,152 @@ +/* + * Copyright 2014 Vincent Sanders <vince@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** + * \file content/fetchers.h + * + * Interface for fetchers factory. + */ + +#ifndef _NETSURF_DESKTOP_FETCHERS_H_ +#define _NETSURF_DESKTOP_FETCHERS_H_ + +#include "utils/config.h" +#include <libwapcaplet/libwapcaplet.h> + +struct nsurl; +struct fetch_multipart_data; +struct fetch; + +/** + * Fetcher operations API + * + * These are the operations a fetcher must implement. + * + * Each fetcher is called once for initialisaion and finalisation. + * The poll entry point will be called to allow all active fetches to progress. + * The flow of a fetch operation is: + * URL is checked for aceptability. + * setup with all applicable data. + * start is called before teh first poll + * after completion or abort it is freed + * + */ +struct fetcher_operation_table { + /** + * The initialiser for the fetcher. + * + * Called once to initialise the fetcher. + */ + bool (*initialise)(lwc_string *scheme); + + /** + * Can this fetcher accept a url. + * + * \param url the URL to check + * \return true if the fetcher can handle the url else false. + */ + bool (*acceptable)(const struct nsurl *url); + + /** + * Setup a fetch + */ + void *(*setup)(struct fetch *parent_fetch, struct nsurl *url, + bool only_2xx, bool downgrade_tls, const char *post_urlenc, + const struct fetch_multipart_data *post_multipart, + const char **headers); + + /** + * start a fetch. + */ + bool (*start)(void *fetch); + + /** + * abort a fetch. + */ + void (*abort)(void *fetch); + + /** + * free a fetch allocated through the setup method. + */ + void (*free)(void *fetch); + + /** + * poll a fetcher to let it make progress. + */ + void (*poll)(lwc_string *scheme); + + /** + * Finalise the fetcher. + */ + void (*finalise)(lwc_string *scheme); +}; + + +/** + * Register a fetcher for a scheme + * + * \param scheme The scheme fetcher is for (caller relinquishes ownership) + * \param ops The operations for the fetcher. + * \return NSERROR_OK or appropriate error code. + */ +nserror fetcher_add(lwc_string *scheme, const struct fetcher_operation_table *ops); + + +/** + * Initialise all registered fetchers. + * + * \return NSERROR_OK or error code + */ +nserror fetcher_init(void); + + +/** + * Clean up for quit. + * + * Must be called before exiting. + */ +void fetcher_quit(void); + + +/** + * Get the set of file descriptors the fetchers are currently using. + * + * This obtains the file descriptors the fetch system is using to + * obtain data. It will cause the fetchers to make progress, if + * possible, potentially completing fetches before requiring activity + * on file descriptors. + * + * If a set of descriptors is returned (maxfd is not -1) The caller is + * expected to wait on them (with select etc.) and continue to obtain + * the fdset with this call. This will switch the fetchers from polled + * mode to waiting for network activity which is much more efficient. + * + * \note If the caller does not subsequently obtain the fdset again + * the fetchers will fall back to the less efficient polled + * operation. The fallback to polled operation will only occour after + * a timeout which introduces additional delay. + * + * \param read_fd_set[out] The fd set for read. + * \param write_fd_set[out] The fd set for write. + * \param except_fd_set[out] The fd set for exceptions. + * \param maxfd[out] The highest fd number in the set or -1 if no fd available. + * \return NSERROR_OK on success or appropriate error code. + */ +nserror fetcher_fdset(fd_set *read_fd_set, fd_set *write_fd_set, fd_set *except_fd_set, int *maxfd); + +#endif diff --git a/content/fetchers/Makefile b/content/fetchers/Makefile index 06e38e4c8..855154232 100644 --- a/content/fetchers/Makefile +++ b/content/fetchers/Makefile @@ -5,4 +5,4 @@ S_FETCHERS := curl.c data.c file.c about.c resource.c S_FETCHERS := $(addprefix content/fetchers/,$(S_FETCHERS)) # The following files depend on the testament -content/fetchers/about.c: testament utils/testament.h +content/fetchers/about.c: testament $(OBJROOT)/testament.h diff --git a/content/fetchers/about.c b/content/fetchers/about.c index cac8b2b01..8c4d29a91 100644 --- a/content/fetchers/about.c +++ b/content/fetchers/about.c @@ -41,19 +41,20 @@ #include <libwapcaplet/libwapcaplet.h> +#include "testament.h" + #include "utils/config.h" -#include "content/dirlist.h" #include "content/fetch.h" +#include "content/fetchers.h" #include "content/fetchers/about.h" #include "content/urldb.h" #include "desktop/netsurf.h" #include "utils/nsoption.h" +#include "utils/corestrings.h" #include "utils/log.h" #include "utils/messages.h" -#include "utils/url.h" #include "utils/utils.h" #include "utils/ring.h" -#include "utils/testament.h" #include "image/image_cache.h" struct fetch_about_context; @@ -489,8 +490,8 @@ static bool fetch_about_testament_handler(struct fetch_about_context *ctx) slen = snprintf(buffer, sizeof buffer, - "Built by %s (%s) from %s at revision %s\n\n", - GECOS, USERNAME, WT_BRANCHPATH, WT_REVID); + "Built by %s (%s) from %s at revision %s on %s\n\n", + GECOS, USERNAME, WT_BRANCHPATH, WT_REVID, WT_COMPILEDATE); msg.data.header_or_data.len = slen; if (fetch_about_send_callback(&msg, ctx)) @@ -837,23 +838,19 @@ static void fetch_about_poll(lwc_string *scheme) } while ( (c = next) != ring && ring != NULL); } -void fetch_about_register(void) +nserror fetch_about_register(void) { - lwc_string *scheme; - - if (lwc_intern_string("about", SLEN("about"), - &scheme) != lwc_error_ok) { - die("Failed to initialise the fetch module " - "(couldn't intern \"about\")."); - } - - fetch_add_fetcher(scheme, - fetch_about_initialise, - fetch_about_can_fetch, - fetch_about_setup, - fetch_about_start, - fetch_about_abort, - fetch_about_free, - fetch_about_poll, - fetch_about_finalise); + lwc_string *scheme = lwc_string_ref(corestring_lwc_about); + const struct fetcher_operation_table fetcher_ops = { + .initialise = fetch_about_initialise, + .acceptable = fetch_about_can_fetch, + .setup = fetch_about_setup, + .start = fetch_about_start, + .abort = fetch_about_abort, + .free = fetch_about_free, + .poll = fetch_about_poll, + .finalise = fetch_about_finalise + }; + + return fetcher_add(scheme, &fetcher_ops); } diff --git a/content/fetchers/about.h b/content/fetchers/about.h index f22be6a5d..9544971a6 100644 --- a/content/fetchers/about.h +++ b/content/fetchers/about.h @@ -23,6 +23,6 @@ #ifndef NETSURF_CONTENT_FETCHERS_FETCH_ABOUT_H #define NETSURF_CONTENT_FETCHERS_FETCH_ABOUT_H -void fetch_about_register(void); +nserror fetch_about_register(void); #endif diff --git a/content/fetchers/curl.c b/content/fetchers/curl.c index 1dfc44631..51b0f9974 100644 --- a/content/fetchers/curl.c +++ b/content/fetchers/curl.c @@ -19,7 +19,7 @@ */ /** \file - * Fetching of data from a URL (implementation). + * Fetching of data from an URL (implementation). * * This implementation uses libcurl's 'multi' interface. * @@ -36,32 +36,27 @@ #include <strings.h> #include <time.h> #include <sys/stat.h> +#include <openssl/ssl.h> #include <libwapcaplet/libwapcaplet.h> #include "utils/config.h" -#include <openssl/ssl.h> -#include "content/fetch.h" -#include "content/fetchers/curl.h" -#include "content/urldb.h" #include "desktop/netsurf.h" +#include "desktop/gui_factory.h" +#include "utils/corestrings.h" #include "utils/nsoption.h" #include "utils/log.h" #include "utils/messages.h" -#include "utils/schedule.h" #include "utils/utils.h" #include "utils/ring.h" #include "utils/useragent.h" +#include "utils/file.h" -/* BIG FAT WARNING: This is here because curl doesn't give you an FD to - * poll on, until it has processed a bit of the handle. So we need schedules - * in order to make this work. - */ -#include <desktop/browser.h> +#include "content/fetch.h" +#include "content/fetchers.h" +#include "content/fetchers/curl.h" +#include "content/urldb.h" -/* uncomment this to use scheduler based calling -#define FETCHER_CURLL_SCHEDULED 1 -*/ /** SSL certificate info */ struct cert_info { @@ -163,6 +158,16 @@ void fetch_curl_register(void) curl_version_info_data *data; int i; lwc_string *scheme; + const struct fetcher_operation_table fetcher_ops = { + .initialise = fetch_curl_initialise, + .acceptable = fetch_curl_can_fetch, + .setup = fetch_curl_setup, + .start = fetch_curl_start, + .abort = fetch_curl_abort, + .free = fetch_curl_free, + .poll = fetch_curl_poll, + .finalise = fetch_curl_finalise + }; LOG(("curl_version %s", curl_version())); @@ -176,6 +181,25 @@ void fetch_curl_register(void) die("Failed to initialise the fetch module " "(curl_multi_init failed)."); +#if LIBCURL_VERSION_NUM >= 0x071e00 + /* We've been built against 7.30.0 or later: configure caching */ + { + CURLMcode mcode; + int maxconnects = nsoption_int(max_fetchers) + + nsoption_int(max_cached_fetch_handles); + +#undef SETOPT +#define SETOPT(option, value) \ + mcode = curl_multi_setopt(fetch_curl_multi, option, value); \ + if (mcode != CURLM_OK) \ + goto curl_multi_setopt_failed; + + SETOPT(CURLMOPT_MAXCONNECTS, maxconnects); + SETOPT(CURLMOPT_MAX_TOTAL_CONNECTIONS, maxconnects); + SETOPT(CURLMOPT_MAX_HOST_CONNECTIONS, nsoption_int(max_fetchers_per_host)); + } +#endif + /* Create a curl easy handle with the options that are common to all fetches. */ fetch_blank_curl = curl_easy_init(); @@ -235,37 +259,17 @@ void fetch_curl_register(void) for (i = 0; data->protocols[i]; i++) { if (strcmp(data->protocols[i], "http") == 0) { - if (lwc_intern_string("http", SLEN("http"), - &scheme) != lwc_error_ok) { - die("Failed to initialise the fetch module " - "(couldn't intern \"http\")."); - } + scheme = lwc_string_ref(corestring_lwc_http); } else if (strcmp(data->protocols[i], "https") == 0) { - if (lwc_intern_string("https", SLEN("https"), - &scheme) != lwc_error_ok) { - die("Failed to initialise the fetch module " - "(couldn't intern \"https\")."); - } + scheme = lwc_string_ref(corestring_lwc_https); } else { /* Ignore non-http(s) protocols */ continue; } - if (!fetch_add_fetcher(scheme, - fetch_curl_initialise, - fetch_curl_can_fetch, - fetch_curl_setup, - fetch_curl_start, - fetch_curl_abort, - fetch_curl_free, -#ifdef FETCHER_CURLL_SCHEDULED - NULL, -#else - fetch_curl_poll, -#endif - fetch_curl_finalise)) { + if (fetcher_add(scheme, &fetcher_ops) != NSERROR_OK) { LOG(("Unable to register cURL fetcher for %s", data->protocols[i])); } @@ -275,6 +279,12 @@ void fetch_curl_register(void) curl_easy_setopt_failed: die("Failed to initialise the fetch module " "(curl_easy_setopt failed)."); + +#if LIBCURL_VERSION_NUM >= 0x071e00 +curl_multi_setopt_failed: + die("Failed to initialise the fetch module " + "(curl_multi_setopt failed)."); +#endif } @@ -490,9 +500,7 @@ bool fetch_curl_initiate_fetch(struct curl_fetch_info *fetch, CURL *handle) /* add to the global curl multi handle */ codem = curl_multi_add_handle(fetch_curl_multi, fetch->curl_handle); assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM); - - schedule(1, (schedule_callback_fn)fetch_curl_poll, NULL); - + return true; } @@ -524,6 +532,11 @@ CURL *fetch_curl_get_handle(lwc_string *host) void fetch_curl_cache_handle(CURL *handle, lwc_string *host) { +#if LIBCURL_VERSION_NUM >= 0x071e00 + /* 7.30.0 or later has its own connection caching; suppress ours */ + curl_easy_cleanup(handle); + return; +#else struct cache_handle *h = 0; int c; RING_FINDBYLWCHOST(curl_handle_ring, h, host); @@ -561,6 +574,7 @@ void fetch_curl_cache_handle(CURL *handle, lwc_string *host) h->handle = handle; h->host = lwc_string_ref(host); RING_INSERT(curl_handle_ring, h); +#endif } @@ -687,16 +701,14 @@ fetch_curl_sslctxfun(CURL *curl_handle, void *_sslctx, void *parm) parm); if (f->downgrade_tls) { + /* Disable TLS 1.1/1.2 if the server can't cope with them */ #ifdef SSL_OP_NO_TLSv1_1 - /* Disable TLS1.1, if the server can't cope with it */ options |= SSL_OP_NO_TLSv1_1; #endif - } - #ifdef SSL_OP_NO_TLSv1_2 - /* Disable TLS1.2, as it causes some servers to stall. */ - options |= SSL_OP_NO_TLSv1_2; + options |= SSL_OP_NO_TLSv1_2; #endif + } SSL_CTX_set_options(sslctx, options); @@ -817,12 +829,6 @@ void fetch_curl_poll(lwc_string *scheme_ignored) } curl_msg = curl_multi_info_read(fetch_curl_multi, &queue); } - -#ifdef FETCHER_CURLL_SCHEDULED - if (running != 0) { - schedule(1, (schedule_callback_fn)fetch_curl_poll, fetch_curl_poll); - } -#endif } @@ -915,10 +921,12 @@ void fetch_curl_done(CURL *curl_handle, CURLcode result) BIO_get_mem_ptr(mem, &buf); (void) BIO_set_close(mem, BIO_NOCLOSE); BIO_free(mem); - snprintf(ssl_certs[i].not_before, - min(sizeof ssl_certs[i].not_before, - (unsigned) buf->length + 1), - "%s", buf->data); + memcpy(ssl_certs[i].not_before, + buf->data, + min(sizeof(ssl_certs[i].not_before) - 1, + (unsigned)buf->length)); + ssl_certs[i].not_before[min(sizeof(ssl_certs[i].not_before) - 1, + (unsigned)buf->length)] = 0; BUF_MEM_free(buf); mem = BIO_new(BIO_s_mem()); @@ -927,10 +935,13 @@ void fetch_curl_done(CURL *curl_handle, CURLcode result) BIO_get_mem_ptr(mem, &buf); (void) BIO_set_close(mem, BIO_NOCLOSE); BIO_free(mem); - snprintf(ssl_certs[i].not_after, - min(sizeof ssl_certs[i].not_after, - (unsigned) buf->length + 1), - "%s", buf->data); + memcpy(ssl_certs[i].not_after, + buf->data, + min(sizeof(ssl_certs[i].not_after) - 1, + (unsigned)buf->length)); + ssl_certs[i].not_after[min(sizeof(ssl_certs[i].not_after) - 1, + (unsigned)buf->length)] = 0; + BUF_MEM_free(buf); ssl_certs[i].sig_type = @@ -946,24 +957,30 @@ void fetch_curl_done(CURL *curl_handle, CURLcode result) BIO_get_mem_ptr(mem, &buf); (void) BIO_set_close(mem, BIO_NOCLOSE); BIO_free(mem); - snprintf(ssl_certs[i].issuer, - min(sizeof ssl_certs[i].issuer, - (unsigned) buf->length + 1), - "%s", buf->data); + memcpy(ssl_certs[i].issuer, + buf->data, + min(sizeof(ssl_certs[i].issuer) - 1, + (unsigned) buf->length)); + ssl_certs[i].issuer[min(sizeof(ssl_certs[i].issuer) - 1, + (unsigned) buf->length)] = 0; BUF_MEM_free(buf); mem = BIO_new(BIO_s_mem()); X509_NAME_print_ex(mem, X509_get_subject_name(certs[i].cert), - 0, XN_FLAG_SEP_CPLUS_SPC | - XN_FLAG_DN_REV | XN_FLAG_FN_NONE); + 0, + XN_FLAG_SEP_CPLUS_SPC | + XN_FLAG_DN_REV | + XN_FLAG_FN_NONE); BIO_get_mem_ptr(mem, &buf); (void) BIO_set_close(mem, BIO_NOCLOSE); BIO_free(mem); - snprintf(ssl_certs[i].subject, - min(sizeof ssl_certs[i].subject, - (unsigned) buf->length + 1), - "%s", buf->data); + memcpy(ssl_certs[i].subject, + buf->data, + min(sizeof(ssl_certs[i].subject) - 1, + (unsigned)buf->length)); + ssl_certs[i].subject[min(sizeof(ssl_certs[i].subject) - 1, + (unsigned) buf->length)] = 0; BUF_MEM_free(buf); ssl_certs[i].cert_type = @@ -1263,15 +1280,15 @@ fetch_curl_post_convert(const struct fetch_multipart_data *control) { struct curl_httppost *post = 0, *last = 0; CURLFORMcode code; + nserror ret; for (; control; control = control->next) { if (control->file) { - char *leafname = 0; - - leafname = filename_from_path(control->value); - - if (leafname == NULL) + char *leafname = NULL; + ret = guit->file->basename(control->value, &leafname, NULL); + if (ret != NSERROR_OK) { continue; + } /* We have to special case filenames of "", so curl * a) actually attempts the fetch and @@ -1298,10 +1315,10 @@ fetch_curl_post_convert(const struct fetch_multipart_data *control) LOG(("curl_formadd: %d (%s)", code, control->name)); } else { - char *mimetype = fetch_mimetype(control->value); + char *mimetype = guit->fetch->mimetype(control->value); code = curl_formadd(&post, &last, CURLFORM_COPYNAME, control->name, - CURLFORM_FILE, control->value, + CURLFORM_FILE, control->rawfile, CURLFORM_FILENAME, leafname, CURLFORM_CONTENTTYPE, (mimetype != 0 ? mimetype : "text/plain"), diff --git a/content/fetchers/data.c b/content/fetchers/data.c index fbaa24780..94ba63827 100644 --- a/content/fetchers/data.c +++ b/content/fetchers/data.c @@ -31,13 +31,14 @@ #include "utils/config.h" #include "content/fetch.h" +#include "content/fetchers.h" #include "content/fetchers/data.h" #include "content/urldb.h" #include "desktop/netsurf.h" +#include "utils/corestrings.h" #include "utils/nsoption.h" #include "utils/log.h" #include "utils/messages.h" -#include "utils/url.h" #include "utils/utils.h" #include "utils/ring.h" #include "utils/base64.h" @@ -324,22 +325,19 @@ static void fetch_data_poll(lwc_string *scheme) } while ( (c = next) != ring && ring != NULL); } -void fetch_data_register(void) +nserror fetch_data_register(void) { - lwc_string *scheme; - - if (lwc_intern_string("data", SLEN("data"), &scheme) != lwc_error_ok) { - die("Failed to initialise the fetch module " - "(couldn't intern \"data\")."); - } - - fetch_add_fetcher(scheme, - fetch_data_initialise, - fetch_data_can_fetch, - fetch_data_setup, - fetch_data_start, - fetch_data_abort, - fetch_data_free, - fetch_data_poll, - fetch_data_finalise); + lwc_string *scheme = lwc_string_ref(corestring_lwc_data); + const struct fetcher_operation_table fetcher_ops = { + .initialise = fetch_data_initialise, + .acceptable = fetch_data_can_fetch, + .setup = fetch_data_setup, + .start = fetch_data_start, + .abort = fetch_data_abort, + .free = fetch_data_free, + .poll = fetch_data_poll, + .finalise = fetch_data_finalise + }; + + return fetcher_add(scheme, &fetcher_ops); } diff --git a/content/fetchers/data.h b/content/fetchers/data.h index 76f02cb3b..f6017e07a 100644 --- a/content/fetchers/data.h +++ b/content/fetchers/data.h @@ -23,6 +23,6 @@ #ifndef NETSURF_CONTENT_FETCHERS_FETCH_DATA_H #define NETSURF_CONTENT_FETCHERS_FETCH_DATA_H -void fetch_data_register(void); +nserror fetch_data_register(void); #endif diff --git a/content/fetchers/file.c b/content/fetchers/file.c index c574c2160..f08be6288 100644 --- a/content/fetchers/file.c +++ b/content/fetchers/file.c @@ -18,6 +18,8 @@ /* file: URL handling. Based on the data fetcher by Rob Kendrick */ +#include "utils/config.h" + #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> @@ -34,26 +36,28 @@ #include <limits.h> #include <stdarg.h> -#include "utils/config.h" - #ifdef HAVE_MMAP #include <sys/mman.h> #endif #include <libwapcaplet/libwapcaplet.h> -#include "content/dirlist.h" -#include "content/fetch.h" -#include "content/fetchers/file.h" -#include "content/urldb.h" #include "desktop/netsurf.h" +#include "desktop/gui_factory.h" +#include "utils/corestrings.h" #include "utils/nsoption.h" #include "utils/errors.h" #include "utils/log.h" #include "utils/messages.h" -#include "utils/url.h" #include "utils/utils.h" #include "utils/ring.h" +#include "utils/file.h" + +#include "content/dirlist.h" +#include "content/fetch.h" +#include "content/fetchers.h" +#include "content/urldb.h" +#include "content/fetchers/file.h" /* Maximum size of read buffer */ #define FETCH_FILE_MAX_BUF_SIZE (1024 * 1024) @@ -135,13 +139,14 @@ fetch_file_setup(struct fetch *fetchh, { struct fetch_file_context *ctx; int i; + nserror ret; ctx = calloc(1, sizeof(*ctx)); if (ctx == NULL) return NULL; - ctx->path = url_to_path(nsurl_access(url)); - if (ctx->path == NULL) { + ret = guit->file->nsurl_to_path(url, &ctx->path); + if (ret != NSERROR_OK) { free(ctx); return NULL; } @@ -304,7 +309,7 @@ static void fetch_file_process_plain(struct fetch_file_context *ctx, /* content type */ if (fetch_file_send_header(ctx, "Content-Type: %s", - fetch_filetype(ctx->path))) + guit->fetch->filetype(ctx->path))) goto fetch_file_process_aborted; /* content length */ @@ -384,7 +389,7 @@ fetch_file_process_aborted: /* content type */ if (fetch_file_send_header(ctx, "Content-Type: %s", - fetch_filetype(ctx->path))) + guit->fetch->filetype(ctx->path))) goto fetch_file_process_aborted; /* content length */ @@ -487,6 +492,98 @@ static char *gen_nice_title(char *path) return title; } +/** + * generate an output row of the directory listing. + * + * @param ent current directory entry. + */ +static nserror +process_dir_ent(struct fetch_file_context *ctx, + struct dirent *ent, + bool even, + char *buffer, + size_t buffer_len) +{ + nserror ret; + char *urlpath = NULL; /* buffer for leaf entry path */ + struct stat ent_stat; /* stat result of leaf entry */ + char datebuf[64]; /* buffer for date text */ + char timebuf[64]; /* buffer for time text */ + nsurl *url; + + /* skip hidden files */ + if (ent->d_name[0] == '.') { + return NSERROR_BAD_PARAMETER; + } + + ret = netsurf_mkpath(&urlpath, NULL, 2, ctx->path, ent->d_name); + if (ret != NSERROR_OK) { + return ret; + } + + if (stat(urlpath, &ent_stat) != 0) { + ent_stat.st_mode = 0; + datebuf[0] = 0; + timebuf[0] = 0; + } else { + /* Get date in output format */ + if (strftime((char *)&datebuf, sizeof datebuf, "%a %d %b %Y", + localtime(&ent_stat.st_mtime)) == 0) { + datebuf[0] = '-'; + datebuf[1] = 0; + } + + /* Get time in output format */ + if (strftime((char *)&timebuf, sizeof timebuf, "%H:%M", + localtime(&ent_stat.st_mtime)) == 0) { + timebuf[0] = '-'; + timebuf[1] = 0; + } + } + + ret = guit->file->path_to_nsurl(urlpath, &url); + if (ret != NSERROR_OK) { + free(urlpath); + return ret; + } + + if (S_ISREG(ent_stat.st_mode)) { + /* regular file */ + dirlist_generate_row(even, + false, + url, + ent->d_name, + guit->fetch->filetype(urlpath), + ent_stat.st_size, + datebuf, timebuf, + buffer, buffer_len); + } else if (S_ISDIR(ent_stat.st_mode)) { + /* directory */ + dirlist_generate_row(even, + true, + url, + ent->d_name, + messages_get("FileDirectory"), + -1, + datebuf, timebuf, + buffer, buffer_len); + } else { + /* something else */ + dirlist_generate_row(even, + false, + url, + ent->d_name, + "", + -1, + datebuf, timebuf, + buffer, buffer_len); + } + + nsurl_unref(url); + free(urlpath); + + return NSERROR_OK; +} static void fetch_file_process_dir(struct fetch_file_context *ctx, struct stat *fdstat) @@ -497,13 +594,7 @@ static void fetch_file_process_dir(struct fetch_file_context *ctx, char *title; /* pretty printed title */ nserror err; /* result from url routines */ nsurl *up; /* url of parent */ - char *path; /* url for list entries */ - struct stat ent_stat; /* stat result of leaf entry */ - char datebuf[64]; /* buffer for date text */ - char timebuf[64]; /* buffer for time text */ - char urlpath[PATH_MAX]; /* buffer for leaf entry path */ - struct dirent *ent; /* current directory entry */ struct dirent **listing = NULL; /* directory entry listing */ int i; /* directory entry index */ int n; /* number of directory entries */ @@ -568,78 +659,17 @@ static void fetch_file_process_dir(struct fetch_file_context *ctx, goto fetch_file_process_dir_aborted; for (i = 0; i < n; i++) { - ent = listing[i]; - - if (ent->d_name[0] == '.') - continue; - - strncpy(urlpath, ctx->path, sizeof urlpath); - if (path_add_part(urlpath, sizeof urlpath, - ent->d_name) == false) - continue; - - if (stat(urlpath, &ent_stat) != 0) { - ent_stat.st_mode = 0; - datebuf[0] = 0; - timebuf[0] = 0; - } else { - /* Get date in output format */ - if (strftime((char *)&datebuf, sizeof datebuf, - "%a %d %b %Y", - localtime(&ent_stat.st_mtime)) == 0) { - strncpy(datebuf, "-", sizeof datebuf); - } - /* Get time in output format */ - if (strftime((char *)&timebuf, sizeof timebuf, - "%H:%M", - localtime(&ent_stat.st_mtime)) == 0) { - strncpy(timebuf, "-", sizeof timebuf); - } - } + err = process_dir_ent(ctx, listing[i], even, buffer, + sizeof(buffer)); - if((path = path_to_url(urlpath)) == NULL) - continue; + if (err == NSERROR_OK) { + msg.data.header_or_data.len = strlen(buffer); + if (fetch_file_send_callback(&msg, ctx)) + goto fetch_file_process_dir_aborted; - if (S_ISREG(ent_stat.st_mode)) { - /* regular file */ - dirlist_generate_row(even, - false, - path, - ent->d_name, - fetch_filetype(urlpath), - ent_stat.st_size, - datebuf, timebuf, - buffer, sizeof(buffer)); - } else if (S_ISDIR(ent_stat.st_mode)) { - /* directory */ - dirlist_generate_row(even, - true, - path, - ent->d_name, - messages_get("FileDirectory"), - -1, - datebuf, timebuf, - buffer, sizeof(buffer)); - } else { - /* something else */ - dirlist_generate_row(even, - false, - path, - ent->d_name, - "", - -1, - datebuf, timebuf, - buffer, sizeof(buffer)); + even = !even; } - - free(path); - - msg.data.header_or_data.len = strlen(buffer); - if (fetch_file_send_callback(&msg, ctx)) - goto fetch_file_process_dir_aborted; - - even = !even; } /* directory listing bottom */ @@ -731,22 +761,19 @@ static void fetch_file_poll(lwc_string *scheme) } while ( (c = next) != ring && ring != NULL); } -void fetch_file_register(void) +nserror fetch_file_register(void) { - lwc_string *scheme; - - if (lwc_intern_string("file", SLEN("file"), &scheme) != lwc_error_ok) { - die("Failed to initialise the fetch module " - "(couldn't intern \"file\")."); - } - - fetch_add_fetcher(scheme, - fetch_file_initialise, - fetch_file_can_fetch, - fetch_file_setup, - fetch_file_start, - fetch_file_abort, - fetch_file_free, - fetch_file_poll, - fetch_file_finalise); + lwc_string *scheme = lwc_string_ref(corestring_lwc_file); + const struct fetcher_operation_table fetcher_ops = { + .initialise = fetch_file_initialise, + .acceptable = fetch_file_can_fetch, + .setup = fetch_file_setup, + .start = fetch_file_start, + .abort = fetch_file_abort, + .free = fetch_file_free, + .poll = fetch_file_poll, + .finalise = fetch_file_finalise + }; + + return fetcher_add(scheme, &fetcher_ops); } diff --git a/content/fetchers/file.h b/content/fetchers/file.h index d1621b9ba..b3c39db9f 100644 --- a/content/fetchers/file.h +++ b/content/fetchers/file.h @@ -23,6 +23,6 @@ #ifndef NETSURF_CONTENT_FETCHERS_FETCH_FILE_H #define NETSURF_CONTENT_FETCHERS_FETCH_FILE_H -void fetch_file_register(void); +nserror fetch_file_register(void); #endif diff --git a/content/fetchers/resource.c b/content/fetchers/resource.c index 0119c6b3b..18e302140 100644 --- a/content/fetchers/resource.c +++ b/content/fetchers/resource.c @@ -37,17 +37,19 @@ #include <libwapcaplet/libwapcaplet.h> #include "utils/config.h" -#include "content/dirlist.h" -#include "content/fetch.h" -#include "content/fetchers/resource.h" -#include "content/urldb.h" -#include "desktop/gui.h" +#include "utils/errors.h" +#include "utils/corestrings.h" #include "utils/nsoption.h" #include "utils/log.h" #include "utils/messages.h" -#include "utils/url.h" #include "utils/utils.h" #include "utils/ring.h" +#include "desktop/gui_factory.h" + +#include "content/fetch.h" +#include "content/fetchers.h" +#include "content/fetchers/resource.h" +#include "content/urldb.h" struct fetch_resource_context; @@ -81,6 +83,7 @@ static const char *fetch_resource_paths[] = { "licence.html", "welcome.html", "favicon.ico", + "default.ico", "netsurf.png", "icons/arrow-l.png", "icons/content.png", @@ -206,7 +209,7 @@ static bool fetch_resource_initialise(lwc_string *scheme) } } - e->url = gui_get_resource_url(fetch_resource_paths[i]); + e->url = guit->fetch->get_resource_url(fetch_resource_paths[i]); if (e->url == NULL) { lwc_string_unref(e->path); } else { @@ -353,23 +356,19 @@ static void fetch_resource_poll(lwc_string *scheme) } while ( (c = next) != ring && ring != NULL); } -void fetch_resource_register(void) +nserror fetch_resource_register(void) { - lwc_string *scheme; - - if (lwc_intern_string("resource", SLEN("resource"), - &scheme) != lwc_error_ok) { - die("Failed to initialise the fetch module " - "(couldn't intern \"resource\")."); - } - - fetch_add_fetcher(scheme, - fetch_resource_initialise, - fetch_resource_can_fetch, - fetch_resource_setup, - fetch_resource_start, - fetch_resource_abort, - fetch_resource_free, - fetch_resource_poll, - fetch_resource_finalise); + lwc_string *scheme = lwc_string_ref(corestring_lwc_resource); + const struct fetcher_operation_table fetcher_ops = { + .initialise = fetch_resource_initialise, + .acceptable = fetch_resource_can_fetch, + .setup = fetch_resource_setup, + .start = fetch_resource_start, + .abort = fetch_resource_abort, + .free = fetch_resource_free, + .poll = fetch_resource_poll, + .finalise = fetch_resource_finalise + }; + + return fetcher_add(scheme, &fetcher_ops); } diff --git a/content/fetchers/resource.h b/content/fetchers/resource.h index 79d8e37c4..cf4d6edac 100644 --- a/content/fetchers/resource.h +++ b/content/fetchers/resource.h @@ -35,6 +35,6 @@ * * should only be called from the fetch initialise */ -void fetch_resource_register(void); +nserror fetch_resource_register(void); #endif diff --git a/content/fs_backing_store.c b/content/fs_backing_store.c new file mode 100644 index 000000000..fde17ed62 --- /dev/null +++ b/content/fs_backing_store.c @@ -0,0 +1,1256 @@ +/* + * Copyright 2014 Vincent Sanders <vince@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** \file + * Low-level resource cache persistent storage implementation. + * + * file based backing store. + * + * \todo Consider improving eviction sorting to include objects size + * and remaining lifetime and other cost metrics. + * + * \todo make backing store have a more efficient small object storage. + * + */ + +#include <unistd.h> +#include <string.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <fcntl.h> +#include <errno.h> +#include <time.h> +#include <stdlib.h> + +#include "utils/filepath.h" +#include "utils/file.h" +#include "utils/nsurl.h" +#include "utils/log.h" +#include "utils/utils.h" +#include "utils/messages.h" +#include "desktop/gui.h" + +#include "content/backing_store.h" + +/** Default number of bits of the ident to use in index hash */ +#define DEFAULT_IDENT_SIZE 20 + +/** Default number of bits to use for an entry index. */ +#define DEFAULT_ENTRY_SIZE 16 + +/** Backing store file format version */ +#define CONTROL_VERSION 110 + +/** Get address from ident */ +#define BS_ADDRESS(ident, state) ((ident) & ((1 << state->ident_bits) - 1)) + +/** Lookup store entry index from ident */ +#define BS_ENTRY_INDEX(ident, state) state->addrmap[(ident) & ((1 << state->ident_bits) - 1)] + +/** Get store entry from ident. */ +#define BS_ENTRY(ident, state) state->entries[state->addrmap[(ident) & ((1 << state->ident_bits) - 1)]] + +enum store_entry_flags { + STORE_ENTRY_FLAG_NONE = 0, +}; + +/** + * The type used to store index values refering to store entries. Care + * must be taken with this type as it is used to build address to + * entry mapping so changing the size will have large impacts on + * memory usage. + */ +typedef uint16_t entry_index_t; + +/** + * The type used as a binary identifier for each entry derived from + * the url. A larger identifier will have fewer collisions but + * requires proportionately more storage. + */ +typedef uint32_t entry_ident_t; + +/** + * Backing store object index entry. + * + * @note Order is important to avoid structure packing overhead. + */ +struct store_entry { + int64_t last_used; /**< unix time the entry was last used */ + entry_ident_t ident; /**< entry identifier */ + uint32_t data_alloc; /**< currently allocated size of data on disc */ + uint32_t meta_alloc; /**< currently allocated size of metadata on disc */ + uint16_t use_count; /**< number of times this entry has been accessed */ + uint16_t flags; /**< entry flags (unused) */ + uint16_t data_block; /**< small object data block entry (unused) */ + uint16_t meta_block; /**< small object meta block entry (unused) */ +}; + +/** + * Parameters controlling the backing store. + */ +struct store_state { + char *path; /**< The path to the backing store */ + size_t limit; /**< The backing store upper bound target size */ + size_t hysteresis; /**< The hysteresis around the target size */ + + unsigned int ident_bits; /**< log2 number of bits to use for address. */ + + struct store_entry *entries; /**< store entries. */ + unsigned int entry_bits; /**< log2 number of bits in entry index. */ + unsigned int last_entry; /**< index of last usable entry. */ + + /** flag indicating if the entries have been made persistant + * since they were last changed. + */ + bool entries_dirty; + + /** URL identifier to entry index mapping. + * + * This is an open coded index on the entries url field and + * provides a computationaly inexpensive way to go from the + * url to an entry. + */ + entry_index_t *addrmap; + + uint64_t total_alloc; /**< total size of all allocated storage. */ + + size_t hit_count; /**< number of cache hits */ + uint64_t hit_size; /**< size of storage served */ + size_t miss_count; /**< number of cache misses */ + +}; + +/** + * Global storage state. + * + * @todo Investigate if there is a way to have a context rather than + * use a global. + */ +struct store_state *storestate; + + + +/** + * Remove a backing store entry from the entry table. + * + * This finds the store entry associated with the given key and + * removes it from the table. The removed entry is returned but is + * only valid until the next set_store_entry call. + * + * @param state The store state to use. + * @param url The value used as the unique key to search entries for. + * @param bse Pointer used to return value. + * @return NSERROR_OK and bse updated on succes or NSERROR_NOT_FOUND + * if no entry coresponds to the url. + */ +static nserror +remove_store_entry(struct store_state *state, + entry_ident_t ident, + struct store_entry **bse) +{ + entry_index_t sei; /* store entry index */ + + sei = BS_ENTRY_INDEX(ident, state); + if (sei == 0) { + LOG(("ident 0x%08x not in index", ident)); + return NSERROR_NOT_FOUND; + } + + if (state->entries[sei].ident != ident) { + /* entry ident did not match */ + LOG(("ident 0x%08x did not match entry index %d", ident, sei)); + return NSERROR_NOT_FOUND; + } + + /* sei is entry to be removed, we swap it to the end of the + * table so there are no gaps and the returned entry is held + * in storage with reasonable lifetime. + */ + + /* remove entry from map */ + BS_ENTRY_INDEX(ident, state) = 0; + + /* global allocation accounting */ + state->total_alloc -= state->entries[sei].data_alloc; + state->total_alloc -= state->entries[sei].meta_alloc; + + state->last_entry--; + + if (sei == state->last_entry) { + /* the removed entry was the last one, how convenient */ + *bse = &state->entries[sei]; + } else { + /* need to swap entries */ + struct store_entry tent; + + tent = state->entries[sei]; + state->entries[sei] = state->entries[state->last_entry]; + state->entries[state->last_entry] = tent; + + /* update map for moved entry */ + BS_ENTRY_INDEX(state->entries[sei].ident, state) = sei; + + *bse = &state->entries[state->last_entry]; + } + + return NSERROR_OK; +} + + +/** + * Generate a filename for an object. + * + * this generates the filename for an object on disc. It is necessary + * for this to generate a filename which conforms to the limitations + * of all the filesystems the cache can be placed upon. + * + * From http://en.wikipedia.org/wiki/Comparison_of_file_systems#Limits + * the relevant subset is: + * - path elements no longer than 8 characters + * - acceptable characters are A-Z, 0-9 + * - short total path lengths (255 or less) + * + * The short total path lengths mean the encoding must represent as + * much data as possible in the least number of characters. + * + * To achieve all these goals we use RFC4648 base32 encoding which packs + * 5bits into each character of the filename. + * + * @note Version 1.00 of the cache implementation used base64 to + * encode this, however that did not meet the requirement for only + * using uppercase characters. + * + * @param state The store state to use. + * @param ident The identifier to use. + * @return The filename string or NULL on allocation error. + */ +static char * +store_fname(struct store_state *state, + entry_ident_t ident, + enum backing_store_flags flags) +{ + char *fname = NULL; + uint8_t b32u_i[8]; /* base32 encoded ident */ + uint8_t b32u_d[6][2]; /* base64 ident as separate components */ + const char *dat; + + /* RFC4648 base32 encoding table */ + static const uint8_t encoding_table[] = { + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', + 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', + 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', + 'Y', 'Z', '2', '3', '4', '5', '6', '7' + }; + + /* base32 encode ident */ + b32u_i[0] = b32u_d[0][0] = encoding_table[(ident ) & 0x1f]; + b32u_i[1] = b32u_d[1][0] = encoding_table[(ident >> 5) & 0x1f]; + b32u_i[2] = b32u_d[2][0] = encoding_table[(ident >> 10) & 0x1f]; + b32u_i[3] = b32u_d[3][0] = encoding_table[(ident >> 15) & 0x1f]; + b32u_i[4] = b32u_d[4][0] = encoding_table[(ident >> 20) & 0x1f]; + b32u_i[5] = b32u_d[5][0] = encoding_table[(ident >> 25) & 0x1f]; + b32u_i[6] = encoding_table[(ident >> 30) & 0x1f]; + /* null terminate strings */ + b32u_i[7] = b32u_d[0][1] = b32u_d[1][1] = b32u_d[2][1] = + b32u_d[3][1] = b32u_d[4][1] = b32u_d[5][1] = 0; + + if ((flags & BACKING_STORE_META) != 0) { + dat = "m"; /* metadata */ + } else { + dat = "d"; /* data */ + } + + /* number of chars with usefully encoded data in base 32 */ + switch(((state->ident_bits + 4) / 5)) { + case 1: + netsurf_mkpath(&fname, NULL, 3, state->path, dat, + b32u_i); + break; + + case 2: + netsurf_mkpath(&fname, NULL, 4, state->path, dat, + b32u_d[0], + b32u_i); + break; + + case 3: + netsurf_mkpath(&fname, NULL, 5, state->path, dat, + b32u_d[0], b32u_d[1], + b32u_i); + break; + + case 4: + netsurf_mkpath(&fname, NULL, 6, state->path, dat, + b32u_d[0], b32u_d[1], b32u_d[2], + b32u_i); + break; + + case 5: + netsurf_mkpath(&fname, NULL, 7, state->path, dat, + b32u_d[0], b32u_d[1], b32u_d[2], b32u_d[3], + b32u_i); + break; + + case 6: + netsurf_mkpath(&fname, NULL, 8, state->path, dat, + b32u_d[0], b32u_d[1], b32u_d[2], b32u_d[3], + b32u_d[4], + b32u_i); + break; + + case 7: + netsurf_mkpath(&fname, NULL, 9, state->path, dat, + b32u_d[0], b32u_d[1], b32u_d[2], b32u_d[3], + b32u_d[4], b32u_d[5], + b32u_i); + break; + + default: + assert("Invalid path depth in store_fname()" == NULL); + } + + return fname; +} + + +/** + * Remove the entry and files associated with an identifier. + * + * @param state The store state to use. + * @param ident The identifier to use. + * @return NSERROR_OK on sucess or error code on failure. + */ +static nserror +unlink_ident(struct store_state *state, entry_ident_t ident) +{ + char *fname; + nserror ret; + struct store_entry *bse; + + /* LOG(("ident %08x", ident)); */ + + /* use the url hash as the entry identifier */ + ret = remove_store_entry(state, ident, &bse); + if (ret != NSERROR_OK) { + /* LOG(("entry not found")); */ + return ret; + } + + fname = store_fname(state, bse->ident, BACKING_STORE_META); + if (fname == NULL) { + return NSERROR_NOMEM; + } + unlink(fname); + free(fname); + + fname = store_fname(state, bse->ident, BACKING_STORE_NONE); + if (fname == NULL) { + return NSERROR_NOMEM; + } + unlink(fname); + free(fname); + + return NSERROR_OK; +} + + +/** + * Quick sort comparison. + */ +static int compar(const void *va, const void *vb) +{ + const struct store_entry *a = &BS_ENTRY(*(entry_ident_t *)va, storestate); + const struct store_entry *b = &BS_ENTRY(*(entry_ident_t *)vb, storestate); + + if (a->use_count < b->use_count) { + return -1; + } else if (a->use_count > b->use_count) { + return 1; + } + /* use count is the same - now consider last use time */ + + if (a->last_used < b->last_used) { + return -1; + } else if (a->last_used > b->last_used) { + return 1; + } + + /* they are the same */ + return 0; +} + + +/** + * Evict entries from backing store as per configuration. + * + * Entries are evicted to ensure the cache remains within the + * configured limits on size and number of entries. + * + * The approach is to check if the cache limits have been exceeded and + * if so build and sort list of entries to evict. The list is sorted + * by use count and then by age, so oldest object with least number of uses + * get evicted first. + * + * @param state The store state to use. + * @return NSERROR_OK on success or error code on failure. + */ +static nserror store_evict(struct store_state *state) +{ + entry_ident_t *elist; /* sorted list of entry identifiers */ + unsigned int ent; + unsigned int ent_count; + size_t removed; /* size of removed entries */ + nserror ret = NSERROR_OK; + + /* check if the cache has exceeded configured limit */ + if ((state->total_alloc < state->limit) && + (state->last_entry < (1U << state->entry_bits))) { + /* cache within limits */ + return NSERROR_OK; + } + + LOG(("Evicting entries to reduce %d by %d", + state->total_alloc, state->hysteresis)); + + /* allocate storage for the list */ + elist = malloc(sizeof(entry_ident_t) * state->last_entry); + if (elist == NULL) { + return NSERROR_NOMEM; + } + + /* sort the list avoiding entry 0 which is the empty sentinel */ + for (ent = 1; ent < state->last_entry; ent++) { + elist[ent - 1] = state->entries[ent].ident; + } + ent_count = ent - 1; /* important to keep this as the entry count will change when entries are removed */ + qsort(elist, ent_count, sizeof(entry_ident_t), compar); + + /* evict entries in listed order */ + removed = 0; + for (ent = 0; ent < ent_count; ent++) { + + removed += BS_ENTRY(elist[ent], state).data_alloc; + removed += BS_ENTRY(elist[ent], state).meta_alloc; + + ret = unlink_ident(state, elist[ent]); + if (ret != NSERROR_OK) { + break; + } + + if (removed > state->hysteresis) { + break; + } + } + + free(elist); + + LOG(("removed %d in %d entries", removed, ent)); + + return ret; +} + + +/** + * Lookup a backing store entry in the entry table from a url. + * + * This finds the store entry associated with the given + * key. Additionally if an entry is found it updates the usage data + * about the entry. + * + * @param state The store state to use. + * @param url The value used as the unique key to search entries for. + * @param bse Pointer used to return value. + * @return NSERROR_OK and bse updated on success or NSERROR_NOT_FOUND + * if no entry corresponds to the url. + */ +static nserror +get_store_entry(struct store_state *state, nsurl *url, struct store_entry **bse) +{ + entry_ident_t ident; + unsigned int sei; /* store entry index */ + + LOG(("url:%s", nsurl_access(url))); + + /* use the url hash as the entry identifier */ + ident = nsurl_hash(url); + + sei = BS_ENTRY_INDEX(ident, state); + + if (sei == 0) { + return NSERROR_NOT_FOUND; + } + + if (state->entries[sei].ident != ident) { + /* entry ident did not match */ + LOG(("ident did not match entry")); + return NSERROR_NOT_FOUND; + } + + *bse = &state->entries[sei]; + + state->entries[sei].last_used = time(NULL); + state->entries[sei].use_count++; + + state->entries_dirty = true; + + return NSERROR_OK; +} + + +/** + * Set a backing store entry in the entry table from a url. + * + * This creates a backing store entry in the entry table for a url. + * + * @param url The value used as the unique key to search entries for. + * @param bse Pointer used to return value. + * @return NSERROR_OK and \a bse updated on success or NSERROR_NOT_FOUND + * if no entry coresponds to the url. + */ +static nserror +set_store_entry(struct store_state *state, + nsurl *url, + enum backing_store_flags flags, + const uint8_t *data, + const size_t datalen, + struct store_entry **bse) +{ + entry_ident_t ident; + entry_index_t sei; /* store entry index */ + struct store_entry *se; + nserror ret; + bool isrep; /* is the store repalcing an existing entry or not */ + + LOG(("url:%s", nsurl_access(url))); + + /* evict entries as required and ensure there is at least one + * new entry available. + */ + ret = store_evict(state); + if (ret != NSERROR_OK) { + return ret; + } + + /* use the url hash as the entry identifier */ + ident = nsurl_hash(url); + + sei = BS_ENTRY_INDEX(ident, state); + + /** @todo Should this deal with cache eviction? */ + + if (sei == 0) { + /* allocating the next available entry */ + sei = state->last_entry; + state->last_entry++; + BS_ENTRY_INDEX(ident, state) = sei; + isrep = false; + } else { + /* updating or replacing existing entry */ + /** @todo should we be checking the entry ident + * matches the url. Thats a collision in the address + * mapping right? and is it important? + */ + isrep = true; + } + + se = &state->entries[sei]; + + se->ident = ident; + se->flags = STORE_ENTRY_FLAG_NONE; + se->use_count = 1; + se->last_used = time(NULL); + + /* account for allocation */ + if ((flags & BACKING_STORE_META) != 0) { + if (isrep) { + state->total_alloc -= se->meta_alloc; + } else { + se->data_alloc = 0; + } + se->meta_alloc = datalen; + } else { + if (isrep) { + state->total_alloc -= se->data_alloc; + } else { + se->meta_alloc = 0; + } + se->data_alloc = datalen; + } + state->total_alloc += datalen; + + state->entries_dirty = true; + + *bse = se; + + return NSERROR_OK; +} + + + + +/** + * Open a file using a store ident. + * + * @param state The store state to use. + * @param ident The identifier of the file to open. + * @param flags The backing store flags. + * @pram openflags The flags used with the open call. + * @return An fd from the open call or -1 on error. + */ +static int +store_open(struct store_state *state, + uint32_t ident, + enum backing_store_flags flags, + int openflags) +{ + char *fname; + nserror ret; + int fd; + + fname = store_fname(state, ident, flags); + if (fname == NULL) { + LOG(("filename error")); + return -1; + } + + /** @todo mkdir only on write flag */ + /* ensure path to file is usable */ + ret = netsurf_mkdir_all(fname); + if (ret != NSERROR_OK) { + LOG(("file path \"%s\" could not be created", fname)); + free(fname); + return -1; + } + + LOG(("opening %s", fname)); + fd = open(fname, openflags, S_IRUSR | S_IWUSR); + + free(fname); + + return fd; +} + +/** + * Construct address ident to filesystem entry map + * + * To allow a filesystem entry to be found from it's identifier we + * construct an mapping index. This is a hash map from the entries URL + * (its unique key) to filesystem entry. + * + * As the entire entry list must be iterated over to construct the map + * we also compute the total storage in use. + * + * @param state The backing store global state. + * @return NSERROR_OK on sucess or NSERROR_NOMEM if the map storage + * could not be allocated. + */ +static nserror +build_entrymap(struct store_state *state) +{ + unsigned int eloop; + + LOG(("Allocating %d bytes for max of %d buckets", + (1 << state->ident_bits) * sizeof(entry_index_t), + 1 << state->ident_bits)); + + state->addrmap = calloc(1 << state->ident_bits, sizeof(entry_index_t)); + if (state->addrmap == NULL) { + return NSERROR_NOMEM; + } + + state->total_alloc = 0; + + for (eloop = 1; eloop < state->last_entry; eloop++) { + /* + LOG(("entry:%d ident:0x%08x used:%d", + eloop, + BS_ADDRESS(state->entries[eloop].ident, state), + state->entries[eloop].use_count)); + */ + + /* update the address map to point at the entry */ + BS_ENTRY_INDEX(state->entries[eloop].ident, state) = eloop; + + /* account for the storage space */ + state->total_alloc += state->entries[eloop].data_alloc + + state->entries[eloop].meta_alloc; + } + + return NSERROR_OK; +} + +/** + * Write filesystem entries to file. + * + * @todo consider atomic replace using rename. + * + * @param state The backing store state to read the entries from. + * @return NSERROR_OK on sucess or error code on faliure. + */ +static nserror write_entries(struct store_state *state) +{ + int fd; + char *fname = NULL; + ssize_t written; + nserror ret; + + if (state->entries_dirty == false) { + /* entries have not been updated since last write */ + return NSERROR_OK; + } + + ret = netsurf_mkpath(&fname, NULL, 2, state->path, "entries"); + if (ret != NSERROR_OK) { + return ret; + } + + fd = open(fname, O_RDWR | O_CREAT | O_TRUNC, S_IRUSR | S_IWUSR); + free(fname); + if (fd == -1) { + return NSERROR_SAVE_FAILED; + } + + written = write(fd, state->entries, + state->last_entry * sizeof(struct store_entry)); + close(fd); + if (written < 0) { + /** @todo Delete the file? */ + return NSERROR_SAVE_FAILED; + } + + return NSERROR_OK; +} + +/** + * Read description entries into memory. + * + * @param state The backing store state to put the loaded entries in. + * @return NSERROR_OK on sucess or error code on faliure. + */ +static nserror +read_entries(struct store_state *state) +{ + int fd; + ssize_t rd; + size_t entries_size; + char *fname = NULL; + nserror ret; + + ret = netsurf_mkpath(&fname, NULL, 2, state->path, "entries"); + if (ret != NSERROR_OK) { + return ret; + } + + entries_size = (1 << state->entry_bits) * sizeof(struct store_entry); + + LOG(("Allocating %d bytes for max of %d entries", + entries_size, 1 << state->entry_bits)); + + state->entries = calloc(1, entries_size); + if (state->entries == NULL) { + free(fname); + return NSERROR_NOMEM; + } + + fd = open(fname, O_RDWR); + free(fname); + if (fd != -1) { + rd = read(fd, state->entries, entries_size); + close(fd); + if (rd > 0) { + state->last_entry = rd / sizeof(struct store_entry); + LOG(("Read %d entries", state->last_entry)); + } + } else { + /* could rebuild entries from fs */ + state->last_entry = 1; + } + return NSERROR_OK; +} + +/** + * Write the cache tag file. + * + * @param state The cache state. + * @return NSERROR_OK on sucess or error code on faliure. + */ +static nserror +write_cache_tag(struct store_state *state) +{ + FILE *fcachetag; + nserror ret; + char *fname = NULL; + + ret = netsurf_mkpath(&fname, NULL, 2, state->path, "CACHEDIR.TAG"); + if (ret != NSERROR_OK) { + return ret; + } + + fcachetag = fopen(fname, "wb"); + + free(fname); + + if (fcachetag == NULL) { + return NSERROR_NOT_FOUND; + } + + fprintf(fcachetag, + "Signature: 8a477f597d28d172789f06886806bc55\n" + "# This file is a cache directory tag created by NetSurf.\n" + "# For information about cache directory tags, see:\n" + "# http://www.brynosaurus.com/cachedir/\n"); + + fclose(fcachetag); + + return NSERROR_OK; +} + +/** + * Write the control file for the current state. + * + * @param state The state to write to the control file. + * @return NSERROR_OK on sucess or error code on faliure. + */ +static nserror +write_control(struct store_state *state) +{ + FILE *fcontrol; + nserror ret; + char *fname = NULL; + + ret = netsurf_mkpath(&fname, NULL, 2, state->path, "control"); + if (ret != NSERROR_OK) { + return ret; + } + + LOG(("writing control file \"%s\"", fname)); + + ret = netsurf_mkdir_all(fname); + if (ret != NSERROR_OK) { + free(fname); + return ret; + } + + fcontrol = fopen(fname, "wb"); + + free(fname); + + if (fcontrol == NULL) { + return NSERROR_NOT_FOUND; + } + + fprintf(fcontrol, "%u%c", CONTROL_VERSION, 0); + fprintf(fcontrol, "%u%c", state->entry_bits, 0); + fprintf(fcontrol, "%u%c", state->ident_bits, 0); + fprintf(fcontrol, "%u%c", state->last_entry, 0); + + fclose(fcontrol); + + return NSERROR_OK; +} + + +/** + * Read and parse the control file. + * + * @param state The state to read from the control file. + * @return NSERROR_OK on sucess or error code on faliure. + */ +static nserror +read_control(struct store_state *state) +{ + nserror ret; + FILE *fcontrol; + unsigned int ctrlversion; + unsigned int addrbits; + unsigned int entrybits; + char *fname = NULL; + + ret = netsurf_mkpath(&fname, NULL, 2, state->path, "control"); + if (ret != NSERROR_OK) { + return ret; + } + + LOG(("opening control file \"%s\"", fname)); + + fcontrol = fopen(fname, "rb"); + + free(fname); + + if (fcontrol == NULL) { + /* unable to open control file */ + if (errno == ENOENT) { + return NSERROR_NOT_FOUND; + } else { + return NSERROR_INIT_FAILED; + } + } + + /* read control and setup new state */ + + /* first line is version */ + if (fscanf(fcontrol, "%u", &ctrlversion) != 1) { + goto control_error; + } + + if (ctrlversion != CONTROL_VERSION) { + goto control_error; + } + + if (fgetc(fcontrol) != 0) { + goto control_error; + } + + /* second line is log2 max number of entries */ + if (fscanf(fcontrol, "%u", &entrybits) != 1) { + goto control_error; + } + if (fgetc(fcontrol) != 0) { + goto control_error; + } + + /* second line is log2 size of address hash */ + if (fscanf(fcontrol, "%u", &addrbits) != 1) { + goto control_error; + } + if (fgetc(fcontrol) != 0) { + goto control_error; + } + + fclose(fcontrol); + + state->entry_bits = entrybits; + state->ident_bits = addrbits; + + return NSERROR_OK; + +control_error: /* problem with the control file */ + + fclose(fcontrol); + + return NSERROR_INIT_FAILED; +} + + + + +/* Functions exported in the backing store table */ + +/** + * Initialise the backing store. + * + * @param parameters to configure backing store. + * @return NSERROR_OK on success or error code on faliure. + */ +static nserror +initialise(const struct llcache_store_parameters *parameters) +{ + struct store_state *newstate; + nserror ret; + + /* check backing store is not already initialised */ + if (storestate != NULL) { + return NSERROR_INIT_FAILED; + } + + /* if we are not allowed any space simply give up on init */ + if (parameters->limit == 0) { + return NSERROR_OK; + } + + /* if the path to the cache directory is not set do not init */ + if (parameters->path == NULL) { + return NSERROR_OK; + } + + /* allocate new store state and set defaults */ + newstate = calloc(1, sizeof(struct store_state)); + if (newstate == NULL) { + return NSERROR_NOMEM; + } + + newstate->path = strdup(parameters->path); + newstate->limit = parameters->limit; + newstate->hysteresis = parameters->hysteresis; + + if (parameters->address_size == 0) { + newstate->ident_bits = DEFAULT_IDENT_SIZE; + } else { + newstate->ident_bits = parameters->address_size; + } + + if (parameters->entry_size == 0) { + newstate->entry_bits = DEFAULT_ENTRY_SIZE; + } else { + newstate->entry_bits = parameters->entry_size; + } + + ret = read_control(newstate); + if (ret != NSERROR_OK) { + LOG(("read control failed %s", messages_get_errorcode(ret))); + ret = write_control(newstate); + if (ret == NSERROR_OK) { + write_cache_tag(newstate); + } + } + if (ret != NSERROR_OK) { + /* that went well obviously */ + free(newstate->path); + free(newstate); + return ret; + } + + /* ensure the maximum number of entries can be represented in + * the type available to store it. + */ + if (newstate->entry_bits > (8 * sizeof(entry_index_t))) { + newstate->entry_bits = (8 * sizeof(entry_index_t)); + } + + /* read filesystem entries */ + ret = read_entries(newstate); + if (ret != NSERROR_OK) { + /* that went well obviously */ + free(newstate->path); + free(newstate); + return ret; + } + + /* build entry hash map */ + ret = build_entrymap(newstate); + if (ret != NSERROR_OK) { + /* that obviously went well */ + free(newstate->path); + free(newstate); + return ret; + } + + storestate = newstate; + + LOG(("FS backing store init successful")); + + LOG(("path:%s limit:%d hyst:%d addr:%d entries:%d", newstate->path, newstate->limit, newstate->hysteresis, newstate->ident_bits, newstate->entry_bits)); + LOG(("Using %d/%d", newstate->total_alloc, newstate->limit)); + + return NSERROR_OK; +} + + +/** + * Finalise the backing store. + * + * @return NSERROR_OK on success. + */ +static nserror +finalise(void) +{ + if (storestate != NULL) { + write_entries(storestate); + + /* avoid division by zero */ + if (storestate->miss_count == 0) { + storestate->miss_count = 1; + } + LOG(("hits:%d misses:%d hit ratio:%d returned:%d bytes", + storestate->hit_count, storestate->miss_count, + storestate->hit_count / storestate->miss_count, + storestate->hit_size)); + + free(storestate->path); + free(storestate); + storestate = NULL; + } + return NSERROR_OK; +} + + +/** + * Place an object in the backing store. + * + * @param url The url is used as the unique primary key for the data. + * @param flags The flags to control how the object is stored. + * @param data The objects source data. + * @param datalen The length of the \a data. + * @return NSERROR_OK on success or error code on faliure. + */ +static nserror +store(nsurl *url, + enum backing_store_flags flags, + const uint8_t *data, + const size_t datalen) +{ + nserror ret; + struct store_entry *bse; + ssize_t written; + int fd; + + /* check backing store is initialised */ + if (storestate == NULL) { + return NSERROR_INIT_FAILED; + } + + /* set the store entry up */ + ret = set_store_entry(storestate, url, flags, data, datalen, &bse); + if (ret != NSERROR_OK) { + LOG(("store entry setting failed")); + return ret; + } + + fd = store_open(storestate, bse->ident, flags, O_CREAT | O_WRONLY); + if (fd < 0) { + perror(""); + LOG(("Open failed %d",fd)); + return NSERROR_SAVE_FAILED; + } + + + LOG(("Writing %d bytes from %p", datalen, data)); + written = write(fd, data, datalen); + + close(fd); + if (written < 0 || (size_t) written < datalen) { + /** @todo Delete the file? */ + return NSERROR_SAVE_FAILED; + } + + return NSERROR_OK; +} + +/** + * Retrive an object from the backing store. + * + * @param url The url is used as the unique primary key for the data. + * @param flags The flags to control how the object is stored. + * @param data The objects data. + * @param datalen The length of the \a data retrieved. + * @return NSERROR_OK on success or error code on faliure. + */ +static nserror +fetch(nsurl *url, + enum backing_store_flags *flags, + uint8_t **data_out, + size_t *datalen_out) +{ + nserror ret; + struct store_entry *bse; + uint8_t *data; + size_t datalen; + int fd; + ssize_t rd; + + /* check backing store is initialised */ + if (storestate == NULL) { + return NSERROR_INIT_FAILED; + } + + ret = get_store_entry(storestate, url, &bse); + if (ret != NSERROR_OK) { + LOG(("entry not found")); + storestate->miss_count++; + return ret; + } + storestate->hit_count++; + + LOG(("retriving cache file for url:%s", nsurl_access(url))); + + fd = store_open(storestate, bse->ident, *flags, O_RDONLY); + if (fd < 0) { + LOG(("Open failed")); + /** @todo should this invalidate the entry? */ + return NSERROR_NOT_FOUND; + } + + data = *data_out; + datalen = *datalen_out; + + /* need to deal with buffers */ + if (data == NULL) { + if (datalen == 0) { + /* caller did not know the files length */ + if (((*flags) & BACKING_STORE_META) != 0) { + datalen = bse->meta_alloc; + } else { + datalen = bse->data_alloc; + } + } + + data = malloc(datalen); + if (data == NULL) { + close(fd); + return NSERROR_NOMEM; + } + } + + /** @todo should this check datalen is sufficient */ + + LOG(("Reading %d bytes into %p from file", datalen, data)); + + /** @todo this read should be an a loop */ + rd = read(fd, data, datalen); + if (rd <= 0) { + LOG(("read returned %d", rd)); + close(fd); + if ((*data_out) == NULL) { + free(data); + } + return NSERROR_NOT_FOUND; + } + + close(fd); + + storestate->hit_size += datalen; + + *data_out = data; + *datalen_out = datalen; + + return NSERROR_OK; +} + + +/** + * Invalidate a source object from the backing store. + * + * The entry (if present in the backing store) must no longer + * be returned as a result to the fetch or meta operations. + * + * @param url The url is used as the unique primary key to invalidate. + * @return NSERROR_OK on success or error code on faliure. + */ +static nserror +invalidate(nsurl *url) +{ + /* check backing store is initialised */ + if (storestate == NULL) { + return NSERROR_INIT_FAILED; + } + + LOG(("url:%s", nsurl_access(url))); + + return unlink_ident(storestate, nsurl_hash(url)); +} + + +static struct gui_llcache_table llcache_table = { + .initialise = initialise, + .finalise = finalise, + .store = store, + .fetch = fetch, + .invalidate = invalidate, +}; + +struct gui_llcache_table *filesystem_llcache_table = &llcache_table; diff --git a/content/hlcache.c b/content/hlcache.c index e8b2836db..0d7996524 100644 --- a/content/hlcache.c +++ b/content/hlcache.c @@ -24,17 +24,17 @@ #include <stdlib.h> #include <string.h> -#include "content/content.h" -#include "content/hlcache.h" -#include "content/mimesniff.h" +#include "desktop/gui_factory.h" #include "utils/http.h" #include "utils/log.h" #include "utils/messages.h" #include "utils/ring.h" -#include "utils/schedule.h" -#include "utils/url.h" #include "utils/utils.h" +#include "content/content.h" +#include "content/mimesniff.h" +#include "content/hlcache.h" + typedef struct hlcache_entry hlcache_entry; typedef struct hlcache_retrieval_ctx hlcache_retrieval_ctx; @@ -94,23 +94,427 @@ struct hlcache_s { static struct hlcache_s *hlcache = NULL; -static void hlcache_clean(void *ignored); +/****************************************************************************** + * High-level cache internals * + ******************************************************************************/ -static nserror hlcache_llcache_callback(llcache_handle *handle, - const llcache_event *event, void *pw); -static nserror hlcache_migrate_ctx(hlcache_retrieval_ctx *ctx, - lwc_string *effective_type); + +/** + * Attempt to clean the cache + */ +static void hlcache_clean(void *ignored) +{ + hlcache_entry *entry, *next; + + for (entry = hlcache->content_list; entry != NULL; entry = next) { + next = entry->next; + + if (entry->content == NULL) + continue; + + if (content__get_status(entry->content) == + CONTENT_STATUS_LOADING) + continue; + + if (content_count_users(entry->content) != 0) + continue; + + /** \todo This is over-zealous: all unused contents + * will be immediately destroyed. Ideally, we want to + * purge all unused contents that are using stale + * source data, and enough fresh contents such that + * the cache fits in the configured cache size limit. + */ + + /* Remove entry from cache */ + if (entry->prev == NULL) + hlcache->content_list = entry->next; + else + entry->prev->next = entry->next; + + if (entry->next != NULL) + entry->next->prev = entry->prev; + + /* Destroy content */ + content_destroy(entry->content); + + /* Destroy entry */ + free(entry); + } + + /* Attempt to clean the llcache */ + llcache_clean(false); + + /* Re-schedule ourselves */ + guit->browser->schedule(hlcache->params.bg_clean_time, hlcache_clean, NULL); +} + +/** + * Determine if the specified MIME type is acceptable + * + * \param mime_type MIME type to consider + * \param accepted_types Array of acceptable types, or NULL for any + * \param computed_type Pointer to location to receive computed type of object + * \return True if the type is acceptable, false otherwise + */ static bool hlcache_type_is_acceptable(lwc_string *mime_type, - content_type accepted_types, content_type *computed_type); + content_type accepted_types, content_type *computed_type) +{ + content_type type; + + type = content_factory_type_from_mime_type(mime_type); + + *computed_type = type; + + return ((accepted_types & type) != 0); +} + +/** + * Veneer between content callback API and hlcache callback API + * + * \param c Content to emit message for + * \param msg Message to emit + * \param data Data for message + * \param pw Pointer to private data (hlcache_handle) + */ +static void hlcache_content_callback(struct content *c, content_msg msg, + union content_msg_data data, void *pw) +{ + hlcache_handle *handle = pw; + hlcache_event event; + nserror error = NSERROR_OK; + + event.type = msg; + event.data = data; + + if (handle->cb != NULL) + error = handle->cb(handle, &event, handle->pw); + + if (error != NSERROR_OK) + LOG(("Error in callback: %d", error)); +} + +/** + * Find a content for the high-level cache handle + * + * \param ctx High-level cache retrieval context + * \param effective_type Effective MIME type of content + * \return NSERROR_OK on success, + * NSERROR_NEED_DATA on success where data is needed, + * appropriate error otherwise + * + * \pre handle::state == HLCACHE_HANDLE_NEW + * \pre Headers must have been received for associated low-level handle + * \post Low-level handle is either released, or associated with new content + * \post High-level handle is registered with content + */ static nserror hlcache_find_content(hlcache_retrieval_ctx *ctx, - lwc_string *effective_type); -static void hlcache_content_callback(struct content *c, - content_msg msg, union content_msg_data data, void *pw); + lwc_string *effective_type) +{ + hlcache_entry *entry; + hlcache_event event; + nserror error = NSERROR_OK; + + /* Search list of cached contents for a suitable one */ + for (entry = hlcache->content_list; entry != NULL; entry = entry->next) { + hlcache_handle entry_handle = { entry, NULL, NULL }; + const llcache_handle *entry_llcache; + + if (entry->content == NULL) + continue; + + /* Ignore contents in the error state */ + if (content_get_status(&entry_handle) == CONTENT_STATUS_ERROR) + continue; + + /* Ensure that content is shareable */ + if (content_is_shareable(entry->content) == false) + continue; + + /* Ensure that quirks mode is acceptable */ + if (content_matches_quirks(entry->content, + ctx->child.quirks) == false) + continue; + + /* Ensure that content uses same low-level object as + * low-level handle */ + entry_llcache = content_get_llcache_handle(entry->content); + + if (llcache_handle_references_same_object(entry_llcache, + ctx->llcache)) + break; + } + + if (entry == NULL) { + /* No existing entry, so need to create one */ + entry = malloc(sizeof(hlcache_entry)); + if (entry == NULL) + return NSERROR_NOMEM; + + /* Create content using llhandle */ + entry->content = content_factory_create_content(ctx->llcache, + ctx->child.charset, ctx->child.quirks, + effective_type); + if (entry->content == NULL) { + free(entry); + return NSERROR_NOMEM; + } + + /* Insert into cache */ + entry->prev = NULL; + entry->next = hlcache->content_list; + if (hlcache->content_list != NULL) + hlcache->content_list->prev = entry; + hlcache->content_list = entry; + + /* Signal to caller that we created a content */ + error = NSERROR_NEED_DATA; + + hlcache->miss_count++; + } else { + /* Found a suitable content: no longer need low-level handle */ + llcache_handle_release(ctx->llcache); + hlcache->hit_count++; + } + + /* Associate handle with content */ + if (content_add_user(entry->content, + hlcache_content_callback, ctx->handle) == false) + return NSERROR_NOMEM; + + /* Associate cache entry with handle */ + ctx->handle->entry = entry; + + /* Catch handle up with state of content */ + if (ctx->handle->cb != NULL) { + content_status status = content_get_status(ctx->handle); + + if (status == CONTENT_STATUS_LOADING) { + event.type = CONTENT_MSG_LOADING; + ctx->handle->cb(ctx->handle, &event, ctx->handle->pw); + } else if (status == CONTENT_STATUS_READY) { + event.type = CONTENT_MSG_LOADING; + ctx->handle->cb(ctx->handle, &event, ctx->handle->pw); + + if (ctx->handle->cb != NULL) { + event.type = CONTENT_MSG_READY; + ctx->handle->cb(ctx->handle, &event, + ctx->handle->pw); + } + } else if (status == CONTENT_STATUS_DONE) { + event.type = CONTENT_MSG_LOADING; + ctx->handle->cb(ctx->handle, &event, ctx->handle->pw); + + if (ctx->handle->cb != NULL) { + event.type = CONTENT_MSG_READY; + ctx->handle->cb(ctx->handle, &event, + ctx->handle->pw); + } + + if (ctx->handle->cb != NULL) { + event.type = CONTENT_MSG_DONE; + ctx->handle->cb(ctx->handle, &event, + ctx->handle->pw); + } + } + } + + return error; +} + +/** + * Migrate a retrieval context into its final destination content + * + * \param ctx Context to migrate + * \param effective_type The effective MIME type of the content, or NULL + * \return NSERROR_OK on success, + * NSERROR_NEED_DATA on success where data is needed, + * appropriate error otherwise + */ +static nserror hlcache_migrate_ctx(hlcache_retrieval_ctx *ctx, + lwc_string *effective_type) +{ + content_type type = CONTENT_NONE; + nserror error = NSERROR_OK; + + ctx->migrate_target = true; + + if ((effective_type != NULL) && + hlcache_type_is_acceptable(effective_type, + ctx->accepted_types, + &type)) { + error = hlcache_find_content(ctx, effective_type); + if (error != NSERROR_OK && error != NSERROR_NEED_DATA) { + if (ctx->handle->cb != NULL) { + hlcache_event hlevent; + + hlevent.type = CONTENT_MSG_ERROR; + hlevent.data.error = messages_get("MiscError"); + + ctx->handle->cb(ctx->handle, &hlevent, + ctx->handle->pw); + } + + llcache_handle_abort(ctx->llcache); + llcache_handle_release(ctx->llcache); + } + } else if (type == CONTENT_NONE && + (ctx->flags & HLCACHE_RETRIEVE_MAY_DOWNLOAD)) { + /* Unknown type, and we can download, so convert */ + llcache_handle_force_stream(ctx->llcache); + + if (ctx->handle->cb != NULL) { + hlcache_event hlevent; + + hlevent.type = CONTENT_MSG_DOWNLOAD; + hlevent.data.download = ctx->llcache; + + ctx->handle->cb(ctx->handle, &hlevent, + ctx->handle->pw); + } + + /* Ensure caller knows we need data */ + error = NSERROR_NEED_DATA; + } else { + /* Unacceptable type: report error */ + if (ctx->handle->cb != NULL) { + hlcache_event hlevent; + + hlevent.type = CONTENT_MSG_ERROR; + hlevent.data.error = messages_get("UnacceptableType"); + + ctx->handle->cb(ctx->handle, &hlevent, + ctx->handle->pw); + } + + llcache_handle_abort(ctx->llcache); + llcache_handle_release(ctx->llcache); + } + + ctx->migrate_target = false; + + /* No longer require retrieval context */ + RING_REMOVE(hlcache->retrieval_ctx_ring, ctx); + free((char *) ctx->child.charset); + free(ctx); + + return error; +} + +/** + * Handler for low-level cache events + * + * \param handle Handle for which event is issued + * \param event Event data + * \param pw Pointer to client-specific data + * \return NSERROR_OK on success, appropriate error otherwise + */ +static nserror hlcache_llcache_callback(llcache_handle *handle, + const llcache_event *event, void *pw) +{ + hlcache_retrieval_ctx *ctx = pw; + lwc_string *effective_type = NULL; + nserror error; + + assert(ctx->llcache == handle); + + switch (event->type) { + case LLCACHE_EVENT_HAD_HEADERS: + error = mimesniff_compute_effective_type(handle, NULL, 0, + ctx->flags & HLCACHE_RETRIEVE_SNIFF_TYPE, + ctx->accepted_types == CONTENT_IMAGE, + &effective_type); + if (error == NSERROR_OK || error == NSERROR_NOT_FOUND) { + /* If the sniffer was successful or failed to find + * a Content-Type header when sniffing was + * prohibited, we must migrate the retrieval context. */ + error = hlcache_migrate_ctx(ctx, effective_type); + + if (effective_type != NULL) + lwc_string_unref(effective_type); + } + + /* No need to report that we need data: + * we'll get some anyway if there is any */ + if (error == NSERROR_NEED_DATA) + error = NSERROR_OK; + + return error; + + break; + case LLCACHE_EVENT_HAD_DATA: + error = mimesniff_compute_effective_type(handle, + event->data.data.buf, event->data.data.len, + ctx->flags & HLCACHE_RETRIEVE_SNIFF_TYPE, + ctx->accepted_types == CONTENT_IMAGE, + &effective_type); + if (error != NSERROR_OK) { + assert(0 && "MIME sniff failed with data"); + } + + error = hlcache_migrate_ctx(ctx, effective_type); + + lwc_string_unref(effective_type); + + return error; + + break; + case LLCACHE_EVENT_DONE: + /* DONE event before we could determine the effective MIME type. + */ + error = mimesniff_compute_effective_type(handle, + NULL, 0, false, false, &effective_type); + if (error == NSERROR_OK) { + error = hlcache_migrate_ctx(ctx, effective_type); + + lwc_string_unref(effective_type); + + return error; + } + + if (ctx->handle->cb != NULL) { + hlcache_event hlevent; + + hlevent.type = CONTENT_MSG_ERROR; + hlevent.data.error = messages_get("BadType"); + + ctx->handle->cb(ctx->handle, &hlevent, ctx->handle->pw); + } + break; + case LLCACHE_EVENT_ERROR: + if (ctx->handle->cb != NULL) { + hlcache_event hlevent; + + hlevent.type = CONTENT_MSG_ERROR; + hlevent.data.error = event->data.msg; + + ctx->handle->cb(ctx->handle, &hlevent, ctx->handle->pw); + } + break; + case LLCACHE_EVENT_PROGRESS: + break; + case LLCACHE_EVENT_REDIRECT: + if (ctx->handle->cb != NULL) { + hlcache_event hlevent; + + hlevent.type = CONTENT_MSG_REDIRECT; + hlevent.data.redirect.from = event->data.redirect.from; + hlevent.data.redirect.to = event->data.redirect.to; + + ctx->handle->cb(ctx->handle, &hlevent, ctx->handle->pw); + } + break; + } + + return NSERROR_OK; +} + /****************************************************************************** * Public API * ******************************************************************************/ + nserror hlcache_initialise(const struct hlcache_parameters *hlcache_parameters) { @@ -121,9 +525,7 @@ hlcache_initialise(const struct hlcache_parameters *hlcache_parameters) return NSERROR_NOMEM; } - ret = llcache_initialise(hlcache_parameters->cb, - hlcache_parameters->cb_ctx, - hlcache_parameters->limit); + ret = llcache_initialise(&hlcache_parameters->llcache); if (ret != NSERROR_OK) { free(hlcache); hlcache = NULL; @@ -133,7 +535,7 @@ hlcache_initialise(const struct hlcache_parameters *hlcache_parameters) hlcache->params = *hlcache_parameters; /* Schedule the cache cleanup */ - schedule(hlcache->params.bg_clean_time / 10, hlcache_clean, NULL); + guit->browser->schedule(hlcache->params.bg_clean_time, hlcache_clean, NULL); return NSERROR_OK; } @@ -142,7 +544,7 @@ hlcache_initialise(const struct hlcache_parameters *hlcache_parameters) void hlcache_stop(void) { /* Remove the hlcache_clean schedule */ - schedule_remove(hlcache_clean, NULL); + guit->browser->schedule(-1, hlcache_clean, NULL); } /* See hlcache.h for documentation */ @@ -220,15 +622,6 @@ void hlcache_finalise(void) } /* See hlcache.h for documentation */ -nserror hlcache_poll(void) -{ - - llcache_poll(); - - return NSERROR_OK; -} - -/* See hlcache.h for documentation */ nserror hlcache_handle_retrieve(nsurl *url, uint32_t flags, nsurl *referer, llcache_post_data *post, hlcache_handle_callback cb, void *pw, @@ -295,7 +688,7 @@ nserror hlcache_handle_release(hlcache_handle *handle) RING_ITERATE_START(struct hlcache_retrieval_ctx, hlcache->retrieval_ctx_ring, ictx) { - if (ictx->handle == handle && + if (ictx->handle == handle && ictx->migrate_target == false) { /* This is the nascent context for us, * so abort the fetch */ @@ -347,7 +740,7 @@ nserror hlcache_handle_abort(hlcache_handle *handle) RING_ITERATE_START(struct hlcache_retrieval_ctx, hlcache->retrieval_ctx_ring, ictx) { - if (ictx->handle == handle && + if (ictx->handle == handle && ictx->migrate_target == false) { /* This is the nascent context for us, * so abort the fetch */ @@ -448,416 +841,3 @@ nsurl *hlcache_handle_get_url(const hlcache_handle *handle) return result; } - -/****************************************************************************** - * High-level cache internals * - ******************************************************************************/ - -/** - * Attempt to clean the cache - */ -void hlcache_clean(void *ignored) -{ - hlcache_entry *entry, *next; - - for (entry = hlcache->content_list; entry != NULL; entry = next) { - next = entry->next; - - if (entry->content == NULL) - continue; - - if (content__get_status(entry->content) == - CONTENT_STATUS_LOADING) - continue; - - if (content_count_users(entry->content) != 0) - continue; - - /** \todo This is over-zealous: all unused contents - * will be immediately destroyed. Ideally, we want to - * purge all unused contents that are using stale - * source data, and enough fresh contents such that - * the cache fits in the configured cache size limit. - */ - - /* Remove entry from cache */ - if (entry->prev == NULL) - hlcache->content_list = entry->next; - else - entry->prev->next = entry->next; - - if (entry->next != NULL) - entry->next->prev = entry->prev; - - /* Destroy content */ - content_destroy(entry->content); - - /* Destroy entry */ - free(entry); - } - - /* Attempt to clean the llcache */ - llcache_clean(); - - /* Re-schedule ourselves */ - schedule(hlcache->params.bg_clean_time / 10, hlcache_clean, NULL); -} - -/** - * Handler for low-level cache events - * - * \param handle Handle for which event is issued - * \param event Event data - * \param pw Pointer to client-specific data - * \return NSERROR_OK on success, appropriate error otherwise - */ -nserror hlcache_llcache_callback(llcache_handle *handle, - const llcache_event *event, void *pw) -{ - hlcache_retrieval_ctx *ctx = pw; - lwc_string *effective_type = NULL; - nserror error; - - assert(ctx->llcache == handle); - - switch (event->type) { - case LLCACHE_EVENT_HAD_HEADERS: - error = mimesniff_compute_effective_type(handle, NULL, 0, - ctx->flags & HLCACHE_RETRIEVE_SNIFF_TYPE, - ctx->accepted_types == CONTENT_IMAGE, - &effective_type); - if (error == NSERROR_OK || error == NSERROR_NOT_FOUND) { - /* If the sniffer was successful or failed to find - * a Content-Type header when sniffing was - * prohibited, we must migrate the retrieval context. */ - error = hlcache_migrate_ctx(ctx, effective_type); - - if (effective_type != NULL) - lwc_string_unref(effective_type); - } - - /* No need to report that we need data: - * we'll get some anyway if there is any */ - if (error == NSERROR_NEED_DATA) - error = NSERROR_OK; - - return error; - - break; - case LLCACHE_EVENT_HAD_DATA: - error = mimesniff_compute_effective_type(handle, - event->data.data.buf, event->data.data.len, - ctx->flags & HLCACHE_RETRIEVE_SNIFF_TYPE, - ctx->accepted_types == CONTENT_IMAGE, - &effective_type); - if (error != NSERROR_OK) { - assert(0 && "MIME sniff failed with data"); - } - - error = hlcache_migrate_ctx(ctx, effective_type); - - lwc_string_unref(effective_type); - - return error; - - break; - case LLCACHE_EVENT_DONE: - /* DONE event before we could determine the effective MIME type. - */ - error = mimesniff_compute_effective_type(handle, - NULL, 0, false, false, &effective_type); - if (error == NSERROR_OK) { - error = hlcache_migrate_ctx(ctx, effective_type); - - lwc_string_unref(effective_type); - - return error; - } - - if (ctx->handle->cb != NULL) { - hlcache_event hlevent; - - hlevent.type = CONTENT_MSG_ERROR; - hlevent.data.error = messages_get("BadType"); - - ctx->handle->cb(ctx->handle, &hlevent, ctx->handle->pw); - } - break; - case LLCACHE_EVENT_ERROR: - if (ctx->handle->cb != NULL) { - hlcache_event hlevent; - - hlevent.type = CONTENT_MSG_ERROR; - hlevent.data.error = event->data.msg; - - ctx->handle->cb(ctx->handle, &hlevent, ctx->handle->pw); - } - break; - case LLCACHE_EVENT_PROGRESS: - break; - case LLCACHE_EVENT_REDIRECT: - if (ctx->handle->cb != NULL) { - hlcache_event hlevent; - - hlevent.type = CONTENT_MSG_REDIRECT; - hlevent.data.redirect.from = event->data.redirect.from; - hlevent.data.redirect.to = event->data.redirect.to; - - ctx->handle->cb(ctx->handle, &hlevent, ctx->handle->pw); - } - break; - } - - return NSERROR_OK; -} - -/** - * Migrate a retrieval context into its final destination content - * - * \param ctx Context to migrate - * \param effective_type The effective MIME type of the content, or NULL - * \return NSERROR_OK on success, - * NSERROR_NEED_DATA on success where data is needed, - * appropriate error otherwise - */ -nserror hlcache_migrate_ctx(hlcache_retrieval_ctx *ctx, - lwc_string *effective_type) -{ - content_type type = CONTENT_NONE; - nserror error = NSERROR_OK; - - ctx->migrate_target = true; - - if (effective_type != NULL && - hlcache_type_is_acceptable(effective_type, - ctx->accepted_types, &type)) { - error = hlcache_find_content(ctx, effective_type); - if (error != NSERROR_OK && error != NSERROR_NEED_DATA) { - if (ctx->handle->cb != NULL) { - hlcache_event hlevent; - - hlevent.type = CONTENT_MSG_ERROR; - hlevent.data.error = messages_get("MiscError"); - - ctx->handle->cb(ctx->handle, &hlevent, - ctx->handle->pw); - } - - llcache_handle_abort(ctx->llcache); - llcache_handle_release(ctx->llcache); - } - } else if (type == CONTENT_NONE && - (ctx->flags & HLCACHE_RETRIEVE_MAY_DOWNLOAD)) { - /* Unknown type, and we can download, so convert */ - llcache_handle_force_stream(ctx->llcache); - - if (ctx->handle->cb != NULL) { - hlcache_event hlevent; - - hlevent.type = CONTENT_MSG_DOWNLOAD; - hlevent.data.download = ctx->llcache; - - ctx->handle->cb(ctx->handle, &hlevent, - ctx->handle->pw); - } - - /* Ensure caller knows we need data */ - error = NSERROR_NEED_DATA; - } else { - /* Unacceptable type: report error */ - if (ctx->handle->cb != NULL) { - hlcache_event hlevent; - - hlevent.type = CONTENT_MSG_ERROR; - hlevent.data.error = messages_get("UnacceptableType"); - - ctx->handle->cb(ctx->handle, &hlevent, - ctx->handle->pw); - } - - llcache_handle_abort(ctx->llcache); - llcache_handle_release(ctx->llcache); - } - - ctx->migrate_target = false; - - /* No longer require retrieval context */ - RING_REMOVE(hlcache->retrieval_ctx_ring, ctx); - free((char *) ctx->child.charset); - free(ctx); - - return error; -} - -/** - * Determine if the specified MIME type is acceptable - * - * \param mime_type MIME type to consider - * \param accepted_types Array of acceptable types, or NULL for any - * \param computed_type Pointer to location to receive computed type of object - * \return True if the type is acceptable, false otherwise - */ -bool hlcache_type_is_acceptable(lwc_string *mime_type, - content_type accepted_types, content_type *computed_type) -{ - content_type type; - - type = content_factory_type_from_mime_type(mime_type); - - *computed_type = type; - - return ((accepted_types & type) != 0); -} - -/** - * Find a content for the high-level cache handle - * - * \param ctx High-level cache retrieval context - * \param effective_type Effective MIME type of content - * \return NSERROR_OK on success, - * NSERROR_NEED_DATA on success where data is needed, - * appropriate error otherwise - * - * \pre handle::state == HLCACHE_HANDLE_NEW - * \pre Headers must have been received for associated low-level handle - * \post Low-level handle is either released, or associated with new content - * \post High-level handle is registered with content - */ -nserror hlcache_find_content(hlcache_retrieval_ctx *ctx, - lwc_string *effective_type) -{ - hlcache_entry *entry; - hlcache_event event; - nserror error = NSERROR_OK; - - /* Search list of cached contents for a suitable one */ - for (entry = hlcache->content_list; entry != NULL; entry = entry->next) { - hlcache_handle entry_handle = { entry, NULL, NULL }; - const llcache_handle *entry_llcache; - - if (entry->content == NULL) - continue; - - /* Ignore contents in the error state */ - if (content_get_status(&entry_handle) == CONTENT_STATUS_ERROR) - continue; - - /* Ensure that content is shareable */ - if (content_is_shareable(entry->content) == false) - continue; - - /* Ensure that quirks mode is acceptable */ - if (content_matches_quirks(entry->content, - ctx->child.quirks) == false) - continue; - - /* Ensure that content uses same low-level object as - * low-level handle */ - entry_llcache = content_get_llcache_handle(entry->content); - - if (llcache_handle_references_same_object(entry_llcache, - ctx->llcache)) - break; - } - - if (entry == NULL) { - /* No existing entry, so need to create one */ - entry = malloc(sizeof(hlcache_entry)); - if (entry == NULL) - return NSERROR_NOMEM; - - /* Create content using llhandle */ - entry->content = content_factory_create_content(ctx->llcache, - ctx->child.charset, ctx->child.quirks, - effective_type); - if (entry->content == NULL) { - free(entry); - return NSERROR_NOMEM; - } - - /* Insert into cache */ - entry->prev = NULL; - entry->next = hlcache->content_list; - if (hlcache->content_list != NULL) - hlcache->content_list->prev = entry; - hlcache->content_list = entry; - - /* Signal to caller that we created a content */ - error = NSERROR_NEED_DATA; - - hlcache->miss_count++; - } else { - /* Found a suitable content: no longer need low-level handle */ - llcache_handle_release(ctx->llcache); - hlcache->hit_count++; - } - - /* Associate handle with content */ - if (content_add_user(entry->content, - hlcache_content_callback, ctx->handle) == false) - return NSERROR_NOMEM; - - /* Associate cache entry with handle */ - ctx->handle->entry = entry; - - /* Catch handle up with state of content */ - if (ctx->handle->cb != NULL) { - content_status status = content_get_status(ctx->handle); - - if (status == CONTENT_STATUS_LOADING) { - event.type = CONTENT_MSG_LOADING; - ctx->handle->cb(ctx->handle, &event, ctx->handle->pw); - } else if (status == CONTENT_STATUS_READY) { - event.type = CONTENT_MSG_LOADING; - ctx->handle->cb(ctx->handle, &event, ctx->handle->pw); - - if (ctx->handle->cb != NULL) { - event.type = CONTENT_MSG_READY; - ctx->handle->cb(ctx->handle, &event, - ctx->handle->pw); - } - } else if (status == CONTENT_STATUS_DONE) { - event.type = CONTENT_MSG_LOADING; - ctx->handle->cb(ctx->handle, &event, ctx->handle->pw); - - if (ctx->handle->cb != NULL) { - event.type = CONTENT_MSG_READY; - ctx->handle->cb(ctx->handle, &event, - ctx->handle->pw); - } - - if (ctx->handle->cb != NULL) { - event.type = CONTENT_MSG_DONE; - ctx->handle->cb(ctx->handle, &event, - ctx->handle->pw); - } - } - } - - return error; -} - -/** - * Veneer between content callback API and hlcache callback API - * - * \param c Content to emit message for - * \param msg Message to emit - * \param data Data for message - * \param pw Pointer to private data (hlcache_handle) - */ -void hlcache_content_callback(struct content *c, content_msg msg, - union content_msg_data data, void *pw) -{ - hlcache_handle *handle = pw; - hlcache_event event; - nserror error = NSERROR_OK; - - event.type = msg; - event.data = data; - - if (handle->cb != NULL) - error = handle->cb(handle, &event, handle->pw); - - if (error != NSERROR_OK) - LOG(("Error in callback: %d", error)); -} diff --git a/content/hlcache.h b/content/hlcache.h index 41f1ed6f4..e0bf4161d 100644 --- a/content/hlcache.h +++ b/content/hlcache.h @@ -23,11 +23,12 @@ #ifndef NETSURF_CONTENT_HLCACHE_H_ #define NETSURF_CONTENT_HLCACHE_H_ -#include "content/content.h" -#include "content/llcache.h" #include "utils/errors.h" #include "utils/nsurl.h" +#include "content/content.h" +#include "content/llcache.h" + /** High-level cache handle */ typedef struct hlcache_handle hlcache_handle; @@ -44,18 +45,10 @@ typedef struct { } hlcache_event; struct hlcache_parameters { - llcache_query_callback cb; /**< Query handler for llcache */ - void *cb_ctx; /**< Pointer to llcache query handler data */ - /** How frequently the background cache clean process is run (ms) */ unsigned int bg_clean_time; - /** The target upper bound for the cache size */ - size_t limit; - - /** The hysteresis allowed round the target size */ - size_t hysteresis; - + struct llcache_parameters llcache; }; /** @@ -67,13 +60,13 @@ struct hlcache_parameters { * \return NSERROR_OK on success, appropriate error otherwise. */ typedef nserror (*hlcache_handle_callback)(hlcache_handle *handle, - const hlcache_event *event, void *pw); + const hlcache_event *event, void *pw); /** Flags for high-level cache object retrieval */ enum hlcache_retrieve_flag { - /* Note: low-level cache retrieval flags occupy the bottom 16 bits of - * the flags word. High-level cache flags occupy the top 16 bits. - * To avoid confusion, high-level flags are allocated from bit 31 down. + /* Note: low-level cache retrieval flags occupy the bottom 16 bits of + * the flags word. High-level cache flags occupy the top 16 bits. + * To avoid confusion, high-level flags are allocated from bit 31 down. */ /** It's permitted to convert this request into a download */ HLCACHE_RETRIEVE_MAY_DOWNLOAD = (1 << 31), @@ -84,7 +77,7 @@ enum hlcache_retrieve_flag { /** * Initialise the high-level cache, preparing the llcache also. * - * \param hlcache_parameters Settings to initialise cache with + * \param hlcache_parameters Settings to initialise cache with * \return NSERROR_OK on success, appropriate error otherwise. */ nserror hlcache_initialise(const struct hlcache_parameters *hlcache_parameters); @@ -101,14 +94,6 @@ void hlcache_stop(void); void hlcache_finalise(void); /** - * Drive the low-level cache poll loop, and attempt to clean the cache. - * No guarantee is made about what, if any, cache cleaning will occur. - * - * \return NSERROR_OK - */ -nserror hlcache_poll(void); - -/** * Retrieve a high-level cache handle for an object * * \param url URL of the object to retrieve handle for @@ -133,7 +118,7 @@ nserror hlcache_poll(void); nserror hlcache_handle_retrieve(nsurl *url, uint32_t flags, nsurl *referer, llcache_post_data *post, hlcache_handle_callback cb, void *pw, - hlcache_child_context *child, + hlcache_child_context *child, content_type accepted_types, hlcache_handle **result); /** @@ -169,13 +154,13 @@ nserror hlcache_handle_replace_callback(hlcache_handle *handle, * \param handle Cache handle to dereference * \return Pointer to content object, or NULL if there is none * - * \todo This may not be correct. Ideally, the client should never need to - * directly access a content object. It may, therefore, be better to provide a - * bunch of veneers here that take a hlcache_handle and invoke the + * \todo This may not be correct. Ideally, the client should never need to + * directly access a content object. It may, therefore, be better to provide a + * bunch of veneers here that take a hlcache_handle and invoke the * corresponding content_ API. If there's no content object associated with the - * hlcache_handle (e.g. because the source data is still being fetched, so it - * doesn't exist yet), then these veneers would behave as a NOP. The important - * thing being that the client need not care about this possibility and can + * hlcache_handle (e.g. because the source data is still being fetched, so it + * doesn't exist yet), then these veneers would behave as a NOP. The important + * thing being that the client need not care about this possibility and can * just call the functions with impugnity. */ struct content *hlcache_handle_get_content(const hlcache_handle *handle); diff --git a/content/llcache.c b/content/llcache.c index f2e519f49..ad4d6d1f7 100644 --- a/content/llcache.c +++ b/content/llcache.c @@ -17,25 +17,50 @@ */ /** \file - * Low-level resource cache (implementation) + * Low-level resource cache implementation + * + * This is the implementation of the low level cache. This cache + * stores source objects in memory and may use a persistant backing + * store to extend their lifetime. + * + * \todo fix writeout conditions and ordering. + * + * \todo support mmaped retrieve + * + * \todo instrument and (auto)tune + * */ #include <stdlib.h> #include <string.h> -#include <time.h> - #include <curl/curl.h> -#include "content/fetch.h" -#include "content/llcache.h" -#include "content/urldb.h" +#include "utils/config.h" + +#include "utils/corestrings.h" #include "utils/log.h" #include "utils/messages.h" #include "utils/nsurl.h" #include "utils/utils.h" +#include "utils/time.h" +#include "desktop/gui_factory.h" + +#include "content/fetch.h" +#include "content/backing_store.h" +#include "content/urldb.h" /** Define to enable tracing of llcache operations. */ #undef LLCACHE_TRACE +//#define LLCACHE_TRACE 1 + +#ifdef LLCACHE_TRACE +#define LLCACHE_LOG(x) LOG(x) +#else +#define LLCACHE_LOG(x) +#endif + +#define LLCACHE_MIN_DISC_LIFETIME 3600 +#define LLCACHE_MAX_DISC_BANDWIDTH (512*1024) /** State of a low-level cache object fetch */ typedef enum { @@ -74,7 +99,7 @@ typedef struct llcache_object_user { typedef struct { uint32_t flags; /**< Fetch flags */ nsurl *referer; /**< Referring URL, or NULL if none */ - llcache_post_data *post; /**< POST data, or NULL for GET */ + llcache_post_data *post; /**< POST data, or NULL for GET */ struct fetch *fetch; /**< Fetch handle for this object */ @@ -89,19 +114,23 @@ typedef struct { bool outstanding_query; /**< Waiting for a query response */ } llcache_fetch_ctx; +/** validation control */ typedef enum { LLCACHE_VALIDATE_FRESH, /**< Only revalidate if not fresh */ LLCACHE_VALIDATE_ALWAYS, /**< Always revalidate */ LLCACHE_VALIDATE_ONCE /**< Revalidate once only */ } llcache_validate; +/** cache control value for invalid age */ +#define INVALID_AGE -1 + /** Cache control data */ typedef struct { time_t req_time; /**< Time of request */ time_t res_time; /**< Time of response */ + time_t fin_time; /**< Time of request completion */ time_t date; /**< Date: response header */ time_t expires; /**< Expires: response header */ -#define INVALID_AGE -1 int age; /**< Age: response header */ int max_age; /**< Max-Age Cache-control parameter */ llcache_validate no_cache; /**< No-Cache Cache-control parameter */ @@ -115,32 +144,49 @@ typedef struct { char *value; /**< Header value */ } llcache_header; +/** Current status of objects data */ +typedef enum { + LLCACHE_STATE_RAM = 0, /**< source data is stored in RAM only */ + LLCACHE_STATE_MMAP, /**< source data is mmaped (implies on disc too) */ + LLCACHE_STATE_DISC, /**< source data is stored on disc */ +} llcache_store_state; + /** Low-level cache object */ /** \todo Consider whether a list is a sane container */ struct llcache_object { - llcache_object *prev; /**< Previous in list */ - llcache_object *next; /**< Next in list */ + llcache_object *prev; /**< Previous in list */ + llcache_object *next; /**< Next in list */ + + nsurl *url; /**< Post-redirect URL for object */ - nsurl *url; /**< Post-redirect URL for object */ - bool has_query; /**< URL has a query segment */ - /** \todo We need a generic dynamic buffer object */ - uint8_t *source_data; /**< Source data for object */ - size_t source_len; /**< Byte length of source data */ - size_t source_alloc; /**< Allocated size of source buffer */ + uint8_t *source_data; /**< Source data for object */ + size_t source_len; /**< Byte length of source data */ + size_t source_alloc; /**< Allocated size of source buffer */ + + llcache_store_state store_state; /**< where the data for the object is stored */ - llcache_object_user *users; /**< List of users */ + llcache_object_user *users; /**< List of users */ - llcache_fetch_ctx fetch; /**< Fetch context for object */ + llcache_fetch_ctx fetch; /**< Fetch context for object */ - llcache_cache_control cache; /**< Cache control data for object */ - llcache_object *candidate; /**< Object to use, if fetch determines - * that it is still fresh */ - uint32_t candidate_count; /**< Count of objects this is a - * candidate for */ + llcache_cache_control cache; /**< Cache control data for object */ + llcache_object *candidate; /**< Object to use, if fetch determines + * that it is still fresh + */ + uint32_t candidate_count; /**< Count of objects this is a + * candidate for + */ - llcache_header *headers; /**< Fetch headers */ - size_t num_headers; /**< Number of fetch headers */ + llcache_header *headers; /**< Fetch headers */ + size_t num_headers; /**< Number of fetch headers */ + + /* Instrumentation. These elemnts are strictly for information + * to improve the cache performance and to provide performace + * metrics. The values are non-authorative and must not be used to + * determine object lifetime etc. + */ + time_t last_used; /**< time the last user was removed from the object */ }; struct llcache_s { @@ -156,19 +202,28 @@ struct llcache_s { /** Head of the low-level uncached object list */ llcache_object *uncached_objects; + /** The target upper bound for the RAM cache size */ uint32_t limit; + + /** The minimum lifetime to consider sending objects to + * backing store. + */ + int minimum_lifetime; + + /** The maximum bandwidth to allow the backing store to use. */ + size_t bandwidth; + + /** Whether or not our users are caught up */ + bool all_caught_up; }; /** low level cache state */ static struct llcache_s *llcache = NULL; -/* Static lwc_strings */ -static lwc_string *llcache_file_lwc; -static lwc_string *llcache_about_lwc; -static lwc_string *llcache_resource_lwc; - /* forward referenced callback function */ static void llcache_fetch_callback(const fetch_msg *msg, void *p); +/* forward referenced catch up function */ +static void llcache_users_not_caught_up(void); /****************************************************************************** @@ -204,9 +259,7 @@ static nserror llcache_object_user_new(llcache_handle_callback cb, void *pw, u->handle = h; -#ifdef LLCACHE_TRACE - LOG(("Created user %p (%p, %p, %p)", u, h, (void *) cb, pw)); -#endif + LLCACHE_LOG(("Created user %p (%p, %p, %p)", u, h, (void *) cb, pw)); *user = u; @@ -223,13 +276,11 @@ static nserror llcache_object_user_new(llcache_handle_callback cb, void *pw, */ static nserror llcache_object_user_destroy(llcache_object_user *user) { -#ifdef LLCACHE_TRACE - LOG(("Destroyed user %p", user)); -#endif - + LLCACHE_LOG(("Destroyed user %p", user)); + assert(user->next == NULL); assert(user->prev == NULL); - + if (user->handle != NULL) free(user->handle); @@ -245,7 +296,7 @@ static nserror llcache_object_user_destroy(llcache_object_user *user) * \param user User to remove * \return NSERROR_OK. */ -static nserror llcache_object_remove_user(llcache_object *object, +static nserror llcache_object_remove_user(llcache_object *object, llcache_object_user *user) { assert(user != NULL); @@ -253,7 +304,7 @@ static nserror llcache_object_remove_user(llcache_object *object, assert(object->users != NULL); assert(user->handle == NULL || user->handle->object == object); assert((user->prev != NULL) || (object->users == user)); - + if (user == object->users) object->users = user->next; else @@ -261,12 +312,15 @@ static nserror llcache_object_remove_user(llcache_object *object, if (user->next != NULL) user->next->prev = user->prev; - + user->next = user->prev = NULL; - -#ifdef LLCACHE_TRACE - LOG(("Removing user %p from %p", user, object)); -#endif + + /* record the time the last user was removed from the object */ + if (object->users == NULL) { + object->last_used = time(NULL); + } + + LLCACHE_LOG(("Removing user %p from %p", user, object)); return NSERROR_OK; } @@ -283,7 +337,7 @@ static nserror llcache_send_event_to_users(llcache_object *object, { nserror error = NSERROR_OK; llcache_object_user *user, *next_user; - + user = object->users; while (user != NULL) { user->iterator_target = true; @@ -305,7 +359,7 @@ static nserror llcache_send_event_to_users(llcache_object *object, user = next_user; } - + return error; } @@ -322,9 +376,7 @@ static nserror llcache_object_new(nsurl *url, llcache_object **result) if (obj == NULL) return NSERROR_NOMEM; -#ifdef LLCACHE_TRACE - LOG(("Created object %p (%s)", obj, nsurl_access(url))); -#endif + LLCACHE_LOG(("Created object %p (%s)", obj, nsurl_access(url))); obj->url = nsurl_ref(url); @@ -340,7 +392,7 @@ static nserror llcache_object_new(nsurl *url, llcache_object **result) * \param clone Pointer to location to receive clone * \return NSERROR_OK on success, appropriate error otherwise */ -static nserror llcache_post_data_clone(const llcache_post_data *orig, +static nserror llcache_post_data_clone(const llcache_post_data *orig, llcache_post_data **clone) { llcache_post_data *post_clone; @@ -383,17 +435,31 @@ static nserror llcache_post_data_clone(const llcache_post_data *orig, * \param value Pointer to location to receive header value * \return NSERROR_OK on success, appropriate error otherwise */ -static nserror llcache_fetch_split_header(const uint8_t *data, size_t len, +static nserror llcache_fetch_split_header(const uint8_t *data, size_t len, char **name, char **value) { char *n, *v; const uint8_t *colon; + /* Strip leading whitespace from name */ + while (data[0] == ' ' || data[0] == '\t' || + data[0] == '\r' || data[0] == '\n') { + data++; + } + /* Find colon */ colon = (const uint8_t *) strchr((const char *) data, ':'); if (colon == NULL) { /* Failed, assume a key with no value */ - n = strdup((const char *) data); + colon = data + strlen((const char *)data); + + /* Strip trailing whitespace from name */ + while ((colon > data) && + (colon[-1] == ' ' || colon[-1] == '\t' || + colon[-1] == '\r' || colon[-1] == '\n')) { + colon--; + } + n = strndup((const char *) data, colon - data); if (n == NULL) return NSERROR_NOMEM; @@ -405,15 +471,9 @@ static nserror llcache_fetch_split_header(const uint8_t *data, size_t len, } else { /* Split header into name & value */ - /* Strip leading whitespace from name */ - while (data[0] == ' ' || data[0] == '\t' || - data[0] == '\r' || data[0] == '\n') { - data++; - } - /* Strip trailing whitespace from name */ - while (colon > data && (colon[-1] == ' ' || - colon[-1] == '\t' || colon[-1] == '\r' || + while (colon > data && (colon[-1] == ' ' || + colon[-1] == '\t' || colon[-1] == '\r' || colon[-1] == '\n')) colon--; @@ -429,12 +489,12 @@ static nserror llcache_fetch_split_header(const uint8_t *data, size_t len, /* Skip over colon and any subsequent whitespace */ do { colon++; - } while (*colon == ' ' || *colon == '\t' || + } while (*colon == ' ' || *colon == '\t' || *colon == '\r' || *colon == '\n'); /* Strip trailing whitespace from value */ - while (len > 0 && (data[len - 1] == ' ' || - data[len - 1] == '\t' || + while (len > 0 && (data[len - 1] == ' ' || + data[len - 1] == '\t' || data[len - 1] == '\r' || data[len - 1] == '\n')) { len--; @@ -463,11 +523,11 @@ static nserror llcache_fetch_split_header(const uint8_t *data, size_t len, * \param value Pointer to location to receive header value * \return NSERROR_OK on success, appropriate error otherwise * - * \note This function also has the side-effect of updating + * \note This function also has the side-effect of updating * the cache control data for the object if an interesting * header is encountered */ -static nserror llcache_fetch_parse_header(llcache_object *object, +static nserror llcache_fetch_parse_header(llcache_object *object, const uint8_t *data, size_t len, char **name, char **value) { nserror error; @@ -503,13 +563,13 @@ static nserror llcache_fetch_parse_header(llcache_object *object, while (*comma != '\0' && *comma != ',') comma++; - if (8 < comma - start && (strncasecmp(start, - "no-cache", 8) == 0 || + if (8 < comma - start && (strncasecmp(start, + "no-cache", 8) == 0 || strncasecmp(start, "no-store", 8) == 0)) /* When we get a disk cache we should * distinguish between these two */ object->cache.no_cache = LLCACHE_VALIDATE_ALWAYS; - else if (7 < comma - start && + else if (7 < comma - start && strncasecmp(start, "max-age", 7) == 0) { /* Find '=' */ while (start < comma && *start != '=') @@ -551,7 +611,7 @@ static nserror llcache_fetch_parse_header(llcache_object *object, #undef SKIP_ST - return NSERROR_OK; + return NSERROR_OK; } /* Destroy headers */ @@ -585,7 +645,7 @@ static inline void llcache_invalidate_cache_control_data(llcache_object *object) * \param len Byte length of header * \return NSERROR_OK on success, appropriate error otherwise */ -static nserror llcache_fetch_process_header(llcache_object *object, +static nserror llcache_fetch_process_header(llcache_object *object, const uint8_t *data, size_t len) { nserror error; @@ -593,14 +653,14 @@ static nserror llcache_fetch_process_header(llcache_object *object, llcache_header *temp; /* The headers for multiple HTTP responses may be delivered to us if - * the fetch layer receives a 401 response for which it has + * the fetch layer receives a 401 response for which it has * authentication credentials. This will result in a silent re-request * after which we'll receive the actual response headers for the * object we want to fetch (assuming that the credentials were correct * of course) * - * Therefore, if the header is an HTTP response start marker, then we - * must discard any headers we've read so far, reset the cache data + * Therefore, if the header is an HTTP response start marker, then we + * must discard any headers we've read so far, reset the cache data * that we might have computed, and start again. */ /** \todo Properly parse the response line */ @@ -620,8 +680,15 @@ static nserror llcache_fetch_process_header(llcache_object *object, return error; } + /* deal with empty header */ + if (name[0] == 0) { + free(name); + free(value); + return NSERROR_OK; + } + /* Append header data to the object's headers array */ - temp = realloc(object->headers, (object->num_headers + 1) * + temp = realloc(object->headers, (object->num_headers + 1) * sizeof(llcache_header)); if (temp == NULL) { free(name); @@ -646,7 +713,7 @@ static nserror llcache_fetch_process_header(llcache_object *object, * \return NSERROR_OK on success, appropriate error otherwise * * \pre The fetch parameters in object->fetch must be populated - */ + */ static nserror llcache_object_refetch(llcache_object *object) { const char *urlenc = NULL; @@ -667,7 +734,7 @@ static nserror llcache_object_refetch(llcache_object *object) return NSERROR_NOMEM; if (object->cache.etag != NULL) { - const size_t len = SLEN("If-None-Match: ") + + const size_t len = SLEN("If-None-Match: ") + strlen(object->cache.etag) + 1; headers[header_idx] = malloc(len); @@ -703,13 +770,12 @@ static nserror llcache_object_refetch(llcache_object *object) /* Reset cache control data */ llcache_invalidate_cache_control_data(object); object->cache.req_time = time(NULL); + object->cache.fin_time = object->cache.req_time; /* Reset fetch state */ object->fetch.state = LLCACHE_FETCH_INIT; -#ifdef LLCACHE_TRACE - LOG(("Refetching %p", object)); -#endif + LLCACHE_LOG(("Refetching %p", object)); /* Kick off fetch */ object->fetch.fetch = fetch_start(object->url, object->fetch.referer, @@ -743,7 +809,7 @@ static nserror llcache_object_refetch(llcache_object *object) * \return NSERROR_OK on success, appropriate error otherwise * * \pre object::url must contain the URL to fetch - * \pre If there is a freshness validation candidate, + * \pre If there is a freshness validation candidate, * object::candidate and object::cache must be filled in * \pre There must not be a fetch in progress for \a object */ @@ -755,9 +821,7 @@ static nserror llcache_object_fetch(llcache_object *object, uint32_t flags, nsurl *referer_clone = NULL; llcache_post_data *post_clone = NULL; -#ifdef LLCACHE_TRACE - LOG(("Starting fetch for %p", object)); -#endif + LLCACHE_LOG(("Starting fetch for %p", object)); if (post != NULL) { error = llcache_post_data_clone(post, &post_clone); @@ -790,9 +854,7 @@ static nserror llcache_object_destroy(llcache_object *object) { size_t i; -#ifdef LLCACHE_TRACE - LOG(("Destroying object %p", object)); -#endif + LLCACHE_LOG(("Destroying object %p", object)); nsurl_unref(object->url); free(object->source_data); @@ -876,9 +938,7 @@ llcache_object_rfc2616_remaining_lifetime(const llcache_cache_control *cd) else freshness_lifetime = 0; -#ifdef LLCACHE_TRACE - LOG(("%d:%d", freshness_lifetime, current_age)); -#endif + /* LLCACHE_LOG(("%d:%d", freshness_lifetime, current_age)); */ if ((cd->no_cache == LLCACHE_VALIDATE_FRESH) && (freshness_lifetime > current_age)) { @@ -907,17 +967,15 @@ static bool llcache_object_is_fresh(const llcache_object *object) remaining_lifetime = llcache_object_rfc2616_remaining_lifetime(cd); -#ifdef LLCACHE_TRACE - LOG(("%p: (%d > 0 || %d != %d)", object, + LLCACHE_LOG(("%p: (%d > 0 || %d != %d)", object, remaining_lifetime, object->fetch.state, LLCACHE_FETCH_COMPLETE)); -#endif /* The object is fresh if: * - it was not forbidden from being returned from the cache * unvalidated. * - * - it has remaining lifetime or still being fetched. + * - it has remaining lifetime or still being fetched. */ return ((cd->no_cache == LLCACHE_VALIDATE_FRESH) && ((remaining_lifetime > 0) || @@ -959,6 +1017,7 @@ static nserror llcache_object_clone_cache_data(llcache_object *source, destination->cache.req_time = source->cache.req_time; destination->cache.res_time = source->cache.res_time; + destination->cache.fin_time = source->cache.fin_time; if (source->cache.date != 0) destination->cache.date = source->cache.date; @@ -974,7 +1033,7 @@ static nserror llcache_object_clone_cache_data(llcache_object *source, if (source->cache.no_cache != LLCACHE_VALIDATE_FRESH) destination->cache.no_cache = source->cache.no_cache; - + if (source->cache.last_modified != 0) destination->cache.last_modified = source->cache.last_modified; @@ -982,6 +1041,377 @@ static nserror llcache_object_clone_cache_data(llcache_object *source, } /** + * Remove a low-level cache object from a cache list + * + * \param object Object to remove + * \param list List to remove from + * \return NSERROR_OK + */ +static nserror +llcache_object_remove_from_list(llcache_object *object, llcache_object **list) +{ + if (object == *list) + *list = object->next; + else + object->prev->next = object->next; + + if (object->next != NULL) + object->next->prev = object->prev; + + return NSERROR_OK; +} + +/** + * Retrieve source data for an object from persistant store if necessary. + * + * If an objects source data has been placed in the persistant store + * and the in memory copy freed this will attempt to retrive the + * source data. + * + * @param object the object to operate on. + * @return apropriate error code. + */ +static nserror llcache_persist_retrieve(llcache_object *object) +{ + enum backing_store_flags flags = BACKING_STORE_NONE; + + /* ensure the source data is present if necessary */ + if ((object->source_data != NULL) || + (object->store_state != LLCACHE_STATE_DISC)) { + /* source data does not require retriving from + * persistant store. + */ + return NSERROR_OK; + } + + /* Source data for the object may be in the persiatant store */ + return guit->llcache->fetch(object->url, + &flags, + &object->source_data, + &object->source_len); +} + +/** + * Generate a serialised version of an objects metadata + * + * metadata includes object headers + */ +static nserror +llcache_serialise_metadata(llcache_object *object, + uint8_t **data_out, + size_t *datasize_out) +{ + size_t allocsize; + int datasize; + uint8_t *data; + char *op; + unsigned int hloop; + int use; + + allocsize = 10 + 1; /* object length */ + + allocsize += 10 + 1; /* request time */ + + allocsize += 10 + 1; /* response time */ + + allocsize += 10 + 1; /* completion time */ + + allocsize += 10 + 1; /* space for number of header entries */ + + allocsize += nsurl_length(object->url) + 1; + + for (hloop = 0 ; hloop < object->num_headers ; hloop++) { + allocsize += strlen(object->headers[hloop].name) + 1; + allocsize += strlen(object->headers[hloop].value) + 1; + } + + data = malloc(allocsize); + if (data == NULL) { + return NSERROR_NOMEM; + } + + op = (char *)data; + datasize = allocsize; + + /* the url, used for checking for collisions */ + use = snprintf(op, datasize, "%s", nsurl_access(object->url)); + if (use < 0) { + goto operror; + } + use++; /* does not count the null */ + if (use > datasize) { + goto overflow; + } + op += use; + datasize -= use; + + /* object size */ + use = snprintf(op, datasize, "%zu", object->source_len); + if (use < 0) { + goto operror; + } + use++; /* does not count the null */ + if (use > datasize) + goto overflow; + op += use; + datasize -= use; + + /* Time of request */ + use = nsc_sntimet(op, datasize, &object->cache.req_time); + if (use == 0) + goto overflow; + use++; /* does not count the null */ + op += use; + datasize -= use; + + /* Time of response */ + use = nsc_sntimet(op, datasize, &object->cache.res_time); + if (use == 0) + goto overflow; + use++; /* does not count the null */ + op += use; + datasize -= use; + + /* Time of completion */ + use = nsc_sntimet(op, datasize, &object->cache.fin_time); + if (use == 0) + goto overflow; + use++; /* does not count the null */ + op += use; + datasize -= use; + + /* number of headers */ + use = snprintf(op, datasize, "%zu", object->num_headers); + if (use < 0) { + goto operror; + } + use++; /* does not count the null */ + if (use > datasize) + goto overflow; + op += use; + datasize -= use; + + /* headers */ + for (hloop = 0 ; hloop < object->num_headers ; hloop++) { + use = snprintf(op, datasize, + "%s:%s", + object->headers[hloop].name, + object->headers[hloop].value); + if (use < 0) { + goto operror; + } + use++; /* does not count the null */ + if (use > datasize) + goto overflow; + op += use; + datasize -= use; + } + + LLCACHE_LOG(("Filled buffer with %d spare", datasize)); + + *data_out = data; + *datasize_out = allocsize - datasize; + + return NSERROR_OK; + +overflow: + /* somehow we overflowed the buffer - hth? */ + LOG(("Overflowed metadata buffer")); + free(data); + return NSERROR_INVALID; + +operror: + /* output error */ + LOG(("Output error")); + free(data); + return NSERROR_INVALID; +} + +/** + * un-serialise an objects metadata. + */ +static nserror +llcache_process_metadata(llcache_object *object) +{ + nserror res; + uint8_t *metadata = NULL; + size_t metadatalen = 0; + nsurl *metadataurl; + unsigned int line; + uint8_t *end; + char *ln; + int lnsize; + size_t num_headers; + size_t hloop; + enum backing_store_flags flags = BACKING_STORE_META; + + LOG(("Retriving metadata")); + + /* attempt to retrieve object metadata from the backing store */ + res = guit->llcache->fetch(object->url, + &flags, + &metadata, + &metadatalen); + if (res != NSERROR_OK) { + return res; + } + + end = metadata + metadatalen; + + LOG(("Processing retrived data")); + + /* metadata line 1 is the url the metadata referrs to */ + line = 1; + ln = (char *)metadata; + lnsize = strlen(ln); + + if (lnsize < 7) + goto format_error; + + res = nsurl_create(ln, &metadataurl); + if (res != NSERROR_OK) { + free(metadata); + return res; + } + + if (nsurl_compare(object->url, metadataurl, NSURL_COMPLETE) != true) { + /* backing store returned the wrong object for the + * request. This may occour if the backing store had + * a collision in its stoage method. We cope with this + * by simply skipping caching of this object. + */ + + LOG(("Got metadata for %s instead of %s", + nsurl_access(metadataurl), + nsurl_access(object->url))); + + nsurl_unref(metadataurl); + + free(metadata); + + return NSERROR_BAD_URL; + } + nsurl_unref(metadataurl); + + + /* metadata line 2 is the objects length */ + line = 2; + ln += lnsize + 1; + lnsize = strlen(ln); + + if ((lnsize < 1) || + (sscanf(ln, "%zu", &object->source_len) != 1)) + goto format_error; + object->source_alloc = metadatalen; + + /* metadata line 3 is the time of request */ + line = 3; + ln += lnsize + 1; + lnsize = strlen(ln); + + if (nsc_snptimet(ln, lnsize, &object->cache.req_time) != NSERROR_OK) + goto format_error; + + /* metadata line 4 is the time of response */ + line = 4; + ln += lnsize + 1; + lnsize = strlen(ln); + + if (nsc_snptimet(ln, lnsize, &object->cache.res_time) != NSERROR_OK) + goto format_error; + + /* metadata line 5 is the time of request completion */ + line = 5; + ln += lnsize + 1; + lnsize = strlen(ln); + + if (nsc_snptimet(ln, lnsize, &object->cache.fin_time) != NSERROR_OK) + goto format_error; + + /* metadata line 6 is the number of headers */ + line = 6; + ln += lnsize + 1; + lnsize = strlen(ln); + + if ((lnsize < 1) || + (sscanf(ln, "%zu", &num_headers) != 1)) + goto format_error; + + + /* read headers */ + for (hloop = 0 ; hloop < num_headers; hloop++) { + line++; + ln += lnsize + 1; + lnsize = strlen(ln); + + res = llcache_fetch_process_header(object, (uint8_t *)ln, lnsize); + if (res != NSERROR_OK) { + free(metadata); + return res; + } + } + + free(metadata); + + /* object stored in backing store */ + object->store_state = LLCACHE_STATE_DISC; + + return NSERROR_OK; + +format_error: + LOG(("metadata error on line %d\n", line)); + free(metadata); + return NSERROR_INVALID; + +} + +/** + * attempt to retrieve an object from persistant storage. + */ +static nserror +llcache_object_fetch_persistant(llcache_object *object, + uint32_t flags, + nsurl *referer, + const llcache_post_data *post, + uint32_t redirect_count) +{ + nserror error; + nsurl *referer_clone = NULL; + llcache_post_data *post_clone = NULL; + + object->cache.req_time = time(NULL); + object->cache.fin_time = object->cache.req_time; + + /* retrieve and process metadata */ + error = llcache_process_metadata(object); + if (error != NSERROR_OK) { + return error; + } + + /* entry came out of cache - need to setup object state */ + if (post != NULL) { + error = llcache_post_data_clone(post, &post_clone); + if (error != NSERROR_OK) + return error; + } + + if (referer != NULL) { + referer_clone = nsurl_ref(referer); + } + + object->fetch.flags = flags; + object->fetch.referer = referer_clone; + object->fetch.post = post_clone; + object->fetch.redirect_count = redirect_count; + + /* fetch is "finished" */ + object->fetch.state = LLCACHE_FETCH_COMPLETE; + object->fetch.fetch = NULL; + + return NSERROR_OK; +} + +/** * Retrieve a potentially cached object * * \param url URL of object to retrieve @@ -992,96 +1422,158 @@ static nserror llcache_object_clone_cache_data(llcache_object *source, * \param result Pointer to location to recieve retrieved object * \return NSERROR_OK on success, appropriate error otherwise */ -static nserror llcache_object_retrieve_from_cache(nsurl *url, uint32_t flags, - nsurl *referer, const llcache_post_data *post, - uint32_t redirect_count, llcache_object **result) +static nserror +llcache_object_retrieve_from_cache(nsurl *url, + uint32_t flags, + nsurl *referer, + const llcache_post_data *post, + uint32_t redirect_count, + llcache_object **result) { nserror error; llcache_object *obj, *newest = NULL; -#ifdef LLCACHE_TRACE - LOG(("Searching cache for %s (%x %s %p)", url, flags, referer, post)); -#endif + LLCACHE_LOG(("Searching cache for %s flags:%x referer:%s post:%p", + nsurl_access(url), flags, referer==NULL?"":nsurl_access(referer), post)); /* Search for the most recently fetched matching object */ for (obj = llcache->cached_objects; obj != NULL; obj = obj->next) { - if ((newest == NULL || - obj->cache.req_time > newest->cache.req_time) && - nsurl_compare(obj->url, url, - NSURL_COMPLETE) == true) { + if ((newest == NULL || + obj->cache.req_time > newest->cache.req_time) && + nsurl_compare(obj->url, url, + NSURL_COMPLETE) == true) { newest = obj; } } - if (newest != NULL && llcache_object_is_fresh(newest)) { - /* Found a suitable object, and it's still fresh, so use it */ - obj = newest; + /* No viable object found in cache create one and attempt to + * pull from persistant store. + */ + if (newest == NULL) { + LLCACHE_LOG(("No viable object found in cache")); -#ifdef LLCACHE_TRACE - LOG(("Found fresh %p", obj)); -#endif + error = llcache_object_new(url, &obj); + if (error != NSERROR_OK) + return error; + + /* attempt to retrieve object from persistant store */ + error = llcache_object_fetch_persistant(obj, flags, referer, post, redirect_count); + if (error == NSERROR_OK) { + LLCACHE_LOG(("retrived object from persistant store")); + + /* set object from persistant store as newest */ + newest = obj; + + /* Add new object to cached object list */ + llcache_object_add_to_list(obj, &llcache->cached_objects); + + } + /* else no object found and unretrivable from cache, + * fall through to start fetch + */ + } + + if ((newest != NULL) && (llcache_object_is_fresh(newest))) { + /* Found a suitable object, and it's still fresh */ + LLCACHE_LOG(("Found fresh %p", newest)); /* The client needs to catch up with the object's state. * This will occur the next time that llcache_poll is called. */ - } else if (newest != NULL) { - /* Found a candidate object but it needs freshness validation */ - /* Create a new object */ - error = llcache_object_new(url, &obj); - if (error != NSERROR_OK) - return error; + /* ensure the source data is present */ + error = llcache_persist_retrieve(newest); + if (error == NSERROR_OK) { + /* source data was sucessfully retrived from + * persistant store + */ + *result = newest; -#ifdef LLCACHE_TRACE - LOG(("Found candidate %p (%p)", obj, newest)); -#endif + return NSERROR_OK; + } - /* Clone candidate's cache data */ - error = llcache_object_clone_cache_data(newest, obj, true); - if (error != NSERROR_OK) { - llcache_object_destroy(obj); - return error; - } + /* retrival of source data from persistant store + * failed, destroy cache object and fall though to + * cache miss to re-retch + */ + LLCACHE_LOG(("Persistant retrival failed for %p", newest)); - /* Record candidate, so we can fall back if it is still fresh */ - newest->candidate_count++; - obj->candidate = newest; + llcache_object_remove_from_list(newest, &llcache->cached_objects); + llcache_object_destroy(newest); - /* Attempt to kick-off fetch */ - error = llcache_object_fetch(obj, flags, referer, post, - redirect_count); + error = llcache_object_new(url, &obj); if (error != NSERROR_OK) { - newest->candidate_count--; - llcache_object_destroy(obj); return error; } + } else if (newest != NULL) { + /* Found a candidate object but it needs freshness validation */ - /* Add new object to cache */ - llcache_object_add_to_list(obj, &llcache->cached_objects); - } else { - /* No object found; create a new one */ - /* Create new object */ - error = llcache_object_new(url, &obj); - if (error != NSERROR_OK) - return error; + /* ensure the source data is present */ + error = llcache_persist_retrieve(newest); + if (error == NSERROR_OK) { -#ifdef LLCACHE_TRACE - LOG(("Not found %p", obj)); -#endif + /* Create a new object */ + error = llcache_object_new(url, &obj); + if (error != NSERROR_OK) + return error; - /* Attempt to kick-off fetch */ - error = llcache_object_fetch(obj, flags, referer, post, - redirect_count); + LLCACHE_LOG(("Found candidate %p (%p)", obj, newest)); + + /* Clone candidate's cache data */ + error = llcache_object_clone_cache_data(newest, obj, true); + if (error != NSERROR_OK) { + llcache_object_destroy(obj); + return error; + } + + /* Record candidate, so we can fall back if it is still fresh */ + newest->candidate_count++; + obj->candidate = newest; + + /* Attempt to kick-off fetch */ + error = llcache_object_fetch(obj, flags, referer, post, + redirect_count); + if (error != NSERROR_OK) { + newest->candidate_count--; + llcache_object_destroy(obj); + return error; + } + + /* Add new object to cache */ + llcache_object_add_to_list(obj, &llcache->cached_objects); + + *result = obj; + + return NSERROR_OK; + } + + LLCACHE_LOG(("Persistant retrival failed for %p", newest)); + + /* retrival of source data from persistant store + * failed, destroy cache object and fall though to + * cache miss to re-retch + */ + llcache_object_remove_from_list(newest, + &llcache->cached_objects); + llcache_object_destroy(newest); + + error = llcache_object_new(url, &obj); if (error != NSERROR_OK) { - llcache_object_destroy(obj); return error; } + } - /* Add new object to cache */ - llcache_object_add_to_list(obj, &llcache->cached_objects); + /* Attempt to kick-off fetch */ + error = llcache_object_fetch(obj, flags, referer, post, redirect_count); + if (error != NSERROR_OK) { + llcache_object_destroy(obj); + return error; } + /* Add new object to cache */ + llcache_object_add_to_list(obj, &llcache->cached_objects); + *result = obj; return NSERROR_OK; @@ -1104,33 +1596,46 @@ static nserror llcache_object_retrieve(nsurl *url, uint32_t flags, { nserror error; llcache_object *obj; - bool has_query; nsurl *defragmented_url; + bool uncachable = false; -#ifdef LLCACHE_TRACE - LOG(("Retrieve %s (%x, %s, %p)", url, flags, referer, post)); -#endif - - /** - * Caching Rules: - * - * 1) Forced fetches are never cached - * 2) POST requests are never cached - */ + LLCACHE_LOG(("Retrieve %s (%x, %s, %p)", nsurl_access(url), flags, + referer==NULL?"":nsurl_access(referer), post)); - /* Look for a query segment */ - has_query = nsurl_has_component(url, NSURL_QUERY); /* Get rid of any url fragment */ - if (nsurl_has_component(url, NSURL_FRAGMENT)) { - error = nsurl_defragment(url, &defragmented_url); - if (error != NSERROR_OK) - return error; + error = nsurl_defragment(url, &defragmented_url); + if (error != NSERROR_OK) + return error; + + /* determine if content is cachable */ + if ((flags & LLCACHE_RETRIEVE_FORCE_FETCH) != 0) { + /* Forced fetches are never cached */ + uncachable = true; + } else if (post != NULL) { + /* POST requests are never cached */ + uncachable = true; } else { - defragmented_url = nsurl_ref(url); + /* only http and https schemes are cached */ + lwc_string *scheme; + bool match; + + scheme = nsurl_get_component(defragmented_url, NSURL_SCHEME); + + if (lwc_string_caseless_isequal(scheme, corestring_lwc_http, + &match) == lwc_error_ok && + (match == false)) { + if (lwc_string_caseless_isequal(scheme, corestring_lwc_https, + &match) == lwc_error_ok && + (match == false)) { + uncachable = true; + } + } + lwc_string_unref(scheme); } - if (flags & LLCACHE_RETRIEVE_FORCE_FETCH || post != NULL) { + + if (uncachable) { /* Create new object */ error = llcache_object_new(defragmented_url, &obj); if (error != NSERROR_OK) { @@ -1139,7 +1644,7 @@ static nserror llcache_object_retrieve(nsurl *url, uint32_t flags, } /* Attempt to kick-off fetch */ - error = llcache_object_fetch(obj, flags, referer, post, + error = llcache_object_fetch(obj, flags, referer, post, redirect_count); if (error != NSERROR_OK) { llcache_object_destroy(obj); @@ -1159,17 +1664,13 @@ static nserror llcache_object_retrieve(nsurl *url, uint32_t flags, /* Returned object is already in the cached list */ } - - obj->has_query = has_query; -#ifdef LLCACHE_TRACE - LOG(("Retrieved %p", obj)); -#endif - + LLCACHE_LOG(("Retrieved %p", obj)); + *result = obj; - + nsurl_unref(defragmented_url); - + return NSERROR_OK; } @@ -1196,9 +1697,7 @@ static nserror llcache_object_add_user(llcache_object *object, object->users->prev = user; object->users = user; -#ifdef LLCACHE_TRACE - LOG(("Adding user %p to %p", user, object)); -#endif + LLCACHE_LOG(("Adding user %p to %p", user, object)); return NSERROR_OK; } @@ -1229,13 +1728,13 @@ static nserror llcache_fetch_redirect(llcache_object *object, const char *target /* Abort fetch for this object */ fetch_abort(object->fetch.fetch); object->fetch.fetch = NULL; - + /* Invalidate the cache control data */ llcache_invalidate_cache_control_data(object); /* And mark it complete */ object->fetch.state = LLCACHE_FETCH_COMPLETE; - + /* Forcibly stop redirecting if we've followed too many redirects */ #define REDIRECT_LIMIT 10 if (object->fetch.redirect_count > REDIRECT_LIMIT) { @@ -1243,7 +1742,7 @@ static nserror llcache_fetch_redirect(llcache_object *object, const char *target event.type = LLCACHE_EVENT_ERROR; event.data.msg = messages_get("BadRedirect"); - + return llcache_send_event_to_users(object, &event); } #undef REDIRECT_LIMIT @@ -1272,16 +1771,16 @@ static nserror llcache_fetch_redirect(llcache_object *object, const char *target scheme = nsurl_get_component(url, NSURL_SCHEME); /* resource: and about: are allowed to redirect anywhere */ - if ((lwc_string_isequal(object_scheme, llcache_resource_lwc, + if ((lwc_string_isequal(object_scheme, corestring_lwc_resource, &match) == lwc_error_ok && match == false) && - (lwc_string_isequal(object_scheme, llcache_about_lwc, + (lwc_string_isequal(object_scheme, corestring_lwc_about, &match) == lwc_error_ok && match == false)) { /* file, about and resource are not valid redirect targets */ - if ((lwc_string_isequal(object_scheme, llcache_file_lwc, + if ((lwc_string_isequal(object_scheme, corestring_lwc_file, &match) == lwc_error_ok && match == true) || - (lwc_string_isequal(object_scheme, llcache_about_lwc, + (lwc_string_isequal(object_scheme, corestring_lwc_about, &match) == lwc_error_ok && match == true) || - (lwc_string_isequal(object_scheme, llcache_resource_lwc, + (lwc_string_isequal(object_scheme, corestring_lwc_resource, &match) == lwc_error_ok && match == true)) { lwc_string_unref(object_scheme); lwc_string_unref(scheme); @@ -1310,7 +1809,7 @@ static nserror llcache_fetch_redirect(llcache_object *object, const char *target /* Attempt to fetch target URL */ error = llcache_object_retrieve(url, object->fetch.flags, - object->fetch.referer, post, + object->fetch.referer, post, object->fetch.redirect_count + 1, &dest); /* No longer require url */ @@ -1330,7 +1829,7 @@ static nserror llcache_fetch_redirect(llcache_object *object, const char *target /* Dest is now our object */ *replacement = dest; - return NSERROR_OK; + return NSERROR_OK; } /** @@ -1377,14 +1876,14 @@ static nserror llcache_fetch_notmodified(llcache_object *object, object->candidate->candidate_count--; /* Clone our cache control data into the candidate */ - llcache_object_clone_cache_data(object, object->candidate, + llcache_object_clone_cache_data(object, object->candidate, false); /* Bring candidate's cache data up to date */ llcache_object_cache_update(object->candidate); /* Revert no-cache to normal, if required */ - if (object->candidate->cache.no_cache == + if (object->candidate->cache.no_cache == LLCACHE_VALIDATE_ONCE) { - object->candidate->cache.no_cache = + object->candidate->cache.no_cache = LLCACHE_VALIDATE_FRESH; } @@ -1419,9 +1918,43 @@ static nserror llcache_fetch_notmodified(llcache_object *object, * \param len Byte length of data * \return NSERROR_OK on success, appropriate error otherwise. */ -static nserror llcache_fetch_process_data(llcache_object *object, const uint8_t *data, - size_t len) +static nserror +llcache_fetch_process_data(llcache_object *object, + const uint8_t *data, + size_t len) { + if (object->fetch.state != LLCACHE_FETCH_DATA) { + /* On entry into this state, check if we need to + * invalidate the cache control data. We are guaranteed + * to have received all response headers. + * + * There are two cases in which we want to suppress + * cacheing of an object: + * + * 1) The HTTP response code is not 200 or 203 + * 2) The request URI had a query string and the + * response headers did not provide an explicit + * object expiration time. + */ + long http_code = fetch_http_code(object->fetch.fetch); + + if ((http_code != 200 && http_code != 203) || + (nsurl_has_component(object->url, NSURL_QUERY) && + (object->cache.max_age == INVALID_AGE && + object->cache.expires == 0))) { + /* Invalidate cache control data */ + llcache_invalidate_cache_control_data(object); + } + + /* Release candidate, if any */ + if (object->candidate != NULL) { + object->candidate->candidate_count--; + object->candidate = NULL; + } + + object->fetch.state = LLCACHE_FETCH_DATA; + } + /* Resize source buffer if it's too small */ if (object->source_len + len >= object->source_alloc) { const size_t new_len = object->source_len + len + 64 * 1024; @@ -1468,7 +2001,7 @@ static nserror llcache_query_handle_response(bool proceed, void *cbpw) event.type = LLCACHE_EVENT_ERROR; /** \todo More appropriate error message */ event.data.msg = messages_get("FetchFailed"); - + return llcache_send_event_to_users(object, &event); } @@ -1515,7 +2048,7 @@ static nserror llcache_fetch_auth(llcache_object *object, const char *realm) object->fetch.outstanding_query = true; - error = llcache->query_cb(&query, llcache->query_cb_pw, + error = llcache->query_cb(&query, llcache->query_cb_pw, llcache_query_handle_response, object); } else { llcache_event event; @@ -1527,7 +2060,7 @@ static nserror llcache_fetch_auth(llcache_object *object, const char *realm) event.type = LLCACHE_EVENT_ERROR; /** \todo More appropriate error message */ event.data.msg = messages_get("FetchFailed"); - + error = llcache_send_event_to_users(object, &event); } } else { @@ -1582,7 +2115,7 @@ static nserror llcache_fetch_cert_error(llcache_object *object, event.type = LLCACHE_EVENT_ERROR; /** \todo More appropriate error message */ event.data.msg = messages_get("FetchFailed"); - + error = llcache_send_event_to_users(object, &event); } @@ -1616,7 +2149,7 @@ static nserror llcache_fetch_ssl_error(llcache_object *object) event.type = LLCACHE_EVENT_ERROR; /** \todo More appropriate error message */ event.data.msg = messages_get("FetchFailed"); - + error = llcache_send_event_to_users(object, &event); } else { /* Flag that we've tried to downgrade, so that if the @@ -1629,6 +2162,150 @@ static nserror llcache_fetch_ssl_error(llcache_object *object) } /** + * construct a sorted list of objects available for writeout operation + * + * The list contains fresh cacheable objects held in RAM with no + * pending fetches. Any objects with a remaining lifetime less than + * the configured minimum lifetime are simply not considered, they will + * become stale before pushing to backing store is worth the cost. + * + * \todo calculate useful cost metrics to improve sorting. + * + */ +static nserror +build_candidate_list(struct llcache_object ***lst_out, int *lst_len_out) +{ + llcache_object *object, *next; + struct llcache_object **lst; + int lst_len = 0; + int remaining_lifetime; + +#define MAX_PERSIST_PER_RUN 512 + + lst = calloc(MAX_PERSIST_PER_RUN, sizeof(struct llcache_object *)); + if (lst == NULL) + return NSERROR_NOMEM; + + for (object = llcache->cached_objects; object != NULL; object = next) { + next = object->next; + + remaining_lifetime = llcache_object_rfc2616_remaining_lifetime(&object->cache); + + /* cacehable objects with no pending fetches, not + * already on disc and with sufficient lifetime to + * make disc cache worthwile + */ + if ((object->candidate_count == 0) && + (object->fetch.fetch == NULL) && + (object->fetch.outstanding_query == false) && + (object->store_state == LLCACHE_STATE_RAM) && + (remaining_lifetime > llcache->minimum_lifetime)) { + lst[lst_len] = object; + lst_len++; + if (lst_len == MAX_PERSIST_PER_RUN) + break; + } + } + + if (lst_len == 0) { + free(lst); + return NSERROR_NOT_FOUND; + } + + /** \todo sort list here */ + + *lst_len_out = lst_len; + *lst_out = lst; + +#undef MAX_PERSIST_PER_RUN + + return NSERROR_OK; +} + +static nserror +write_backing_store(struct llcache_object *object, size_t *written_out) +{ + nserror ret; + uint8_t *metadata; + size_t metadatasize; + + /* put object data in backing store */ + ret = guit->llcache->store(object->url, + BACKING_STORE_NONE, + object->source_data, + object->source_len); + if (ret != NSERROR_OK) { + /* unable to put source data in backing store */ + return ret; + } + + ret = llcache_serialise_metadata(object, &metadata, &metadatasize); + if (ret != NSERROR_OK) { + /* There has been a metadata serialisation error. Ensure the + * already written data object is invalidated. + */ + guit->llcache->invalidate(object->url); + return ret; + } + + ret = guit->llcache->store(object->url, + BACKING_STORE_META, + metadata, + metadatasize); + free(metadata); + if (ret != NSERROR_OK) { + /* There has been an error putting the metadata in the + * backing store. Ensure the data object is invalidated. + */ + guit->llcache->invalidate(object->url); + return ret; + } + object->store_state = LLCACHE_STATE_DISC; + + *written_out = object->source_len + metadatasize; + + return NSERROR_OK; +} + +/** + * possibly write objects data to backing store. + */ +static void llcache_persist(void *p) +{ + nserror ret; + size_t size_written; + size_t total_written = 0; + struct llcache_object **lst; + int lst_count; + int idx; + + ret = build_candidate_list(&lst, &lst_count); + if (ret == NSERROR_OK) { + /* obtained a candidate list, make each object + * persistant in turn + */ + for (idx = 0; idx < lst_count; idx++) { + ret = write_backing_store(lst[idx], &size_written); + if (ret != NSERROR_OK) { + break; + } + total_written += size_written; + + if (total_written > llcache->bandwidth) { + /* The bandwidth limit has been reached. + * Writeout scheduled for the remaining objects + */ + guit->browser->schedule(1000, llcache_persist, NULL); + break; + } + } + + free(lst); + } +} + + +/** * Handler for fetch events * * \param msg Fetch event @@ -1640,17 +2317,15 @@ static void llcache_fetch_callback(const fetch_msg *msg, void *p) llcache_object *object = p; llcache_event event; -#ifdef LLCACHE_TRACE - LOG(("Fetch event %d for %p", msg->type, object)); -#endif + LLCACHE_LOG(("Fetch event %d for %p", msg->type, object)); switch (msg->type) { case FETCH_HEADER: /* Received a fetch header */ object->fetch.state = LLCACHE_FETCH_HEADERS; - error = llcache_fetch_process_header(object, - msg->data.header_or_data.buf, + error = llcache_fetch_process_header(object, + msg->data.header_or_data.buf, msg->data.header_or_data.len); break; @@ -1664,7 +2339,7 @@ static void llcache_fetch_callback(const fetch_msg *msg, void *p) object->candidate = NULL; } - error = llcache_fetch_redirect(object, + error = llcache_fetch_redirect(object, msg->data.redirect, &object); break; case FETCH_NOTMODIFIED: @@ -1675,39 +2350,7 @@ static void llcache_fetch_callback(const fetch_msg *msg, void *p) /* Normal 2xx state machine */ case FETCH_DATA: /* Received some data */ - if (object->fetch.state != LLCACHE_FETCH_DATA) { - /* On entry into this state, check if we need to - * invalidate the cache control data. We are guaranteed - * to have received all response headers. - * - * There are two cases in which we want to suppress - * cacheing of an object: - * - * 1) The HTTP response code is not 200 or 203 - * 2) The request URI had a query string and the - * response headers did not provide an explicit - * object expiration time. - */ - long http_code = fetch_http_code(object->fetch.fetch); - - if ((http_code != 200 && http_code != 203) || - (object->has_query && - (object->cache.max_age == INVALID_AGE && - object->cache.expires == 0))) { - /* Invalidate cache control data */ - llcache_invalidate_cache_control_data(object); - } - - /* Release candidate, if any */ - if (object->candidate != NULL) { - object->candidate->candidate_count--; - object->candidate = NULL; - } - } - - object->fetch.state = LLCACHE_FETCH_DATA; - - error = llcache_fetch_process_data(object, + error = llcache_fetch_process_data(object, msg->data.header_or_data.buf, msg->data.header_or_data.len); break; @@ -1720,7 +2363,7 @@ static void llcache_fetch_callback(const fetch_msg *msg, void *p) object->fetch.fetch = NULL; /* Shrink source buffer to required size */ - temp = realloc(object->source_data, + temp = realloc(object->source_data, object->source_len); /* If source_len is 0, then temp may be NULL */ if (temp != NULL || object->source_len == 0) { @@ -1729,6 +2372,11 @@ static void llcache_fetch_callback(const fetch_msg *msg, void *p) } llcache_object_cache_update(object); + + /* record when the fetch finished */ + object->cache.fin_time = time(NULL); + + guit->browser->schedule(5000, llcache_persist, NULL); } break; @@ -1752,9 +2400,9 @@ static void llcache_fetch_callback(const fetch_msg *msg, void *p) event.type = LLCACHE_EVENT_ERROR; event.data.msg = msg->data.error; - + error = llcache_send_event_to_users(object, &event); - + break; case FETCH_PROGRESS: /* Progress update */ @@ -1762,7 +2410,7 @@ static void llcache_fetch_callback(const fetch_msg *msg, void *p) event.data.msg = msg->data.progress; error = llcache_send_event_to_users(object, &event); - + break; /* Events requiring action */ @@ -1786,8 +2434,8 @@ static void llcache_fetch_callback(const fetch_msg *msg, void *p) object->candidate = NULL; } - error = llcache_fetch_cert_error(object, - msg->data.cert_err.certs, + error = llcache_fetch_cert_error(object, + msg->data.cert_err.certs, msg->data.cert_err.num_certs); break; case FETCH_SSL_ERR: @@ -1805,6 +2453,14 @@ static void llcache_fetch_callback(const fetch_msg *msg, void *p) /* Deal with any errors reported by event handlers */ if (error != NSERROR_OK) { + if (error == NSERROR_NOMEM) { + /* attempt to purge the cache to free some + * memory. will not help this fetch, but may + * allow the UI to report errors etc. + */ + llcache_clean(true); + } + if (object->fetch.fetch != NULL) { fetch_abort(object->fetch.fetch); object->fetch.fetch = NULL; @@ -1814,8 +2470,10 @@ static void llcache_fetch_callback(const fetch_msg *msg, void *p) object->fetch.state = LLCACHE_FETCH_COMPLETE; } - return; } + + /* There may be users which are not caught up so schedule ourselves */ + llcache_users_not_caught_up(); } /** @@ -1838,26 +2496,6 @@ static llcache_object_user *llcache_object_find_user(const llcache_handle *handl return user; } -/** - * Remove a low-level cache object from a cache list - * - * \param object Object to remove - * \param list List to remove from - * \return NSERROR_OK - */ -static nserror llcache_object_remove_from_list(llcache_object *object, - llcache_object **list) -{ - if (object == *list) - *list = object->next; - else - object->prev->next = object->next; - - if (object->next != NULL) - object->next->prev = object->prev; - - return NSERROR_OK; -} /** * Determine if a low-level cache object resides in a given list @@ -1896,7 +2534,7 @@ static nserror llcache_object_notify_users(llcache_object *object) #endif /** - * State transitions and event emission for users. + * State transitions and event emission for users. * Rows: user state. Cols: object state. * * User\Obj INIT HEADERS DATA COMPLETE @@ -1928,8 +2566,8 @@ static nserror llcache_object_notify_users(llcache_object *object) user->iterator_target = true; /* A note on the computation of next_user: - * - * Within this loop, we may make a number of calls to + * + * Within this loop, we may make a number of calls to * client code. Our contract with clients is that they * can do whatever they like from within their callback * handlers. This is so that we limit the pain of @@ -1940,7 +2578,7 @@ static nserror llcache_object_notify_users(llcache_object *object) * user list. In the common case, the user they attempt * to remove is the current iteration target, and we * already protect against that causing problems here. - * However, no such protection exists if the client + * However, no such protection exists if the client * attempts to remove other users from this object's * user list. * @@ -1961,13 +2599,13 @@ static nserror llcache_object_notify_users(llcache_object *object) emitted_notify = true; } - LOG(("User %p state: %d Object state: %d", + LOG(("User %p state: %d Object state: %d", user, handle->state, objstate)); } #endif /* User: INIT, Obj: HEADERS, DATA, COMPLETE => User->HEADERS */ - if (handle->state == LLCACHE_FETCH_INIT && + if (handle->state == LLCACHE_FETCH_INIT && objstate > LLCACHE_FETCH_INIT) { handle->state = LLCACHE_FETCH_HEADERS; } @@ -1995,10 +2633,11 @@ static nserror llcache_object_notify_users(llcache_object *object) /* User requested replay */ handle->state = LLCACHE_FETCH_HEADERS; - /* Continue with the next user -- we'll + /* Continue with the next user -- we'll * reemit the event next time round */ user->iterator_target = false; next_user = user->next; + llcache_users_not_caught_up(); continue; } else if (error != NSERROR_OK) { user->iterator_target = false; @@ -2014,15 +2653,15 @@ static nserror llcache_object_notify_users(llcache_object *object) /* Construct HAD_DATA event */ event.type = LLCACHE_EVENT_HAD_DATA; - event.data.data.buf = + event.data.data.buf = object->source_data + handle->bytes; - event.data.data.len = + event.data.data.len = object->source_len - handle->bytes; /* Update record of last byte emitted */ - if (object->fetch.flags & + if (object->fetch.flags & LLCACHE_RETRIEVE_STREAM_DATA) { - /* Streaming, so reset to zero to + /* Streaming, so reset to zero to * minimise amount of cached source data. * Additionally, we don't support replay * when streaming. */ @@ -2048,10 +2687,11 @@ static nserror llcache_object_notify_users(llcache_object *object) /* User requested replay */ handle->bytes = orig_handle_read; - /* Continue with the next user -- we'll + /* Continue with the next user -- we'll * reemit the data next time round */ user->iterator_target = false; next_user = user->next; + llcache_users_not_caught_up(); continue; } else if (error != NSERROR_OK) { user->iterator_target = false; @@ -2081,10 +2721,11 @@ static nserror llcache_object_notify_users(llcache_object *object) /* User requested replay */ handle->state = LLCACHE_FETCH_DATA; - /* Continue with the next user -- we'll + /* Continue with the next user -- we'll * reemit the event next time round */ user->iterator_target = false; next_user = user->next; + llcache_users_not_caught_up(); continue; } else if (error != NSERROR_OK) { user->iterator_target = false; @@ -2115,42 +2756,40 @@ static nserror llcache_object_notify_users(llcache_object *object) * \param snapshot Pointer to receive snapshot of \a object * \return NSERROR_OK on success, appropriate error otherwise */ -static nserror llcache_object_snapshot(llcache_object *object, - llcache_object **snapshot) +static nserror +llcache_object_snapshot(llcache_object *object, llcache_object **snapshot) { llcache_object *newobj; nserror error; - + error = llcache_object_new(object->url, &newobj); - + if (error != NSERROR_OK) return error; - - newobj->has_query = object->has_query; newobj->source_alloc = newobj->source_len = object->source_len; - + if (object->source_len > 0) { newobj->source_data = malloc(newobj->source_alloc); if (newobj->source_data == NULL) { llcache_object_destroy(newobj); return NSERROR_NOMEM; } - memcpy(newobj->source_data, object->source_data, + memcpy(newobj->source_data, object->source_data, newobj->source_len); } - + if (object->num_headers > 0) { - newobj->headers = calloc(sizeof(llcache_header), + newobj->headers = calloc(sizeof(llcache_header), object->num_headers); if (newobj->headers == NULL) { llcache_object_destroy(newobj); return NSERROR_NOMEM; } while (newobj->num_headers < object->num_headers) { - llcache_header *nh = + llcache_header *nh = &(newobj->headers[newobj->num_headers]); - llcache_header *oh = + llcache_header *oh = &(object->headers[newobj->num_headers]); newobj->num_headers += 1; nh->name = strdup(oh->name); @@ -2161,62 +2800,97 @@ static nserror llcache_object_snapshot(llcache_object *object, } } } - + newobj->fetch.state = LLCACHE_FETCH_COMPLETE; - + *snapshot = newobj; - + return NSERROR_OK; } +/** + * total ram usage of object + */ +static inline uint32_t +total_object_size(llcache_object *object) +{ + uint32_t tot; + size_t hdrc; + + tot = sizeof(*object); + tot += nsurl_length(object->url); + + if (object->source_data != NULL) { + tot += object->source_len; + } + + tot += sizeof(llcache_header) * object->num_headers; + + for (hdrc = 0; hdrc < object->num_headers; hdrc++) { + if (object->headers[hdrc].name != NULL) { + tot += strlen(object->headers[hdrc].name); + } + if (object->headers[hdrc].value != NULL) { + tot += strlen(object->headers[hdrc].value); + } + } + + return tot; +} /****************************************************************************** * Public API * ******************************************************************************/ -/** +/* * Attempt to clean the cache + * + * The memory cache cleaning discards objects in order of increasing value. + * + * Exported interface documented in llcache.h */ -/* Exported interface documented in llcache.h */ -void llcache_clean(void) +void llcache_clean(bool purge) { llcache_object *object, *next; uint32_t llcache_size = 0; int remaining_lifetime; + uint32_t limit; -#ifdef LLCACHE_TRACE - LOG(("Attempting cache clean")); -#endif + LLCACHE_LOG(("Attempting cache clean")); - /* Candidates for cleaning are (in order of priority): - * - * 1) Uncacheable objects with no users - * 2) Stale cacheable objects with no users or pending fetches - * 3) Fresh cacheable objects with no users or pending fetches - */ + /* If the cache is being purged set the size limit to zero. */ + if (purge) { + limit = 0; + } else { + limit = llcache->limit; + } - /* 1) Uncacheable objects with no users or fetches */ - for (object = llcache->uncached_objects; object != NULL; object = next) { + /* Uncacheable objects with no users or fetches */ + for (object = llcache->uncached_objects; + object != NULL; + object = next) { next = object->next; /* The candidate count of uncacheable objects is always 0 */ - if ((object->users == NULL) && + if ((object->users == NULL) && (object->candidate_count == 0) && (object->fetch.fetch == NULL) && (object->fetch.outstanding_query == false)) { -#ifdef LLCACHE_TRACE - LOG(("Found victim %p", object)); -#endif - llcache_object_remove_from_list(object, + LLCACHE_LOG(("Discarding uncachable object with no users (%p) %s", object, nsurl_access(object->url))); + + llcache_object_remove_from_list(object, &llcache->uncached_objects); llcache_object_destroy(object); } else { - llcache_size += object->source_len + sizeof(*object); + llcache_size += total_object_size(object); } } - /* 2) Stale cacheable objects with no users or pending fetches */ - for (object = llcache->cached_objects; object != NULL; object = next) { + + /* Stale cacheable objects with no users or pending fetches */ + for (object = llcache->cached_objects; + object != NULL; + object = next) { next = object->next; remaining_lifetime = llcache_object_rfc2616_remaining_lifetime(&object->cache); @@ -2224,85 +2898,139 @@ void llcache_clean(void) if ((object->users == NULL) && (object->candidate_count == 0) && (object->fetch.fetch == NULL) && - (object->fetch.outstanding_query == false)) { + (object->fetch.outstanding_query == false) && + (remaining_lifetime <= 0)) { + /* object is stale */ + LLCACHE_LOG(("discarding stale cacheable object with no users or pending fetches (%p) %s", object, nsurl_access(object->url))); - if (remaining_lifetime > 0) { - /* object is fresh */ - llcache_size += object->source_len + sizeof(*object); - } else { - /* object is not fresh */ -#ifdef LLCACHE_TRACE - LOG(("Found stale cacheable object (%p) with no users or pending fetches", object)); -#endif llcache_object_remove_from_list(object, &llcache->cached_objects); + + if (object->store_state == LLCACHE_STATE_DISC) { + guit->llcache->invalidate(object->url); + } + llcache_object_destroy(object); - } + } else { - llcache_size += object->source_len + sizeof(*object); + /* object has users so account for the storage */ + llcache_size += total_object_size(object); } } - /* 3) Fresh cacheable objects with no users or pending - * fetches, only if the cache exceeds the configured size. + /* if the cache limit is exceeded try to make some objects + * persistant so their RAM can be reclaimed in the next + * step */ - if (llcache->limit < llcache_size) { - for (object = llcache->cached_objects; object != NULL; - object = next) { - next = object->next; - - if ((object->users == NULL) && - (object->candidate_count == 0) && - (object->fetch.fetch == NULL) && - (object->fetch.outstanding_query == false)) { -#ifdef LLCACHE_TRACE - LOG(("Found victim %p", object)); -#endif - llcache_size -= - object->source_len + sizeof(*object); + if (limit < llcache_size) { + llcache_persist(NULL); + } - llcache_object_remove_from_list(object, + /* Source data of fresh cacheable objects with no users, no + * pending fetches and pushed to persistant store while the + * cache exceeds the configured size. + */ + for (object = llcache->cached_objects; + ((limit < llcache_size) && (object != NULL)); + object = next) { + next = object->next; + if ((object->users == NULL) && + (object->candidate_count == 0) && + (object->fetch.fetch == NULL) && + (object->fetch.outstanding_query == false) && + (object->store_state == LLCACHE_STATE_DISC)) { + free(object->source_data); + object->source_data = NULL; + + llcache_size -= object->source_len; + + LLCACHE_LOG(("Freeing source data for %p len:%d", + object, + object->source_len)); + } + } + + /* Fresh cacheable objects with no users, no pending fetches + * and pushed to persistant store while the cache exceeds + * the configured size. Efectively just the object metadata. + */ + for (object = llcache->cached_objects; + ((limit < llcache_size) && (object != NULL)); + object = next) { + next = object->next; + if ((object->users == NULL) && + (object->candidate_count == 0) && + (object->fetch.fetch == NULL) && + (object->fetch.outstanding_query == false) && + (object->store_state == LLCACHE_STATE_DISC) && + (object->source_data == NULL)) { + LLCACHE_LOG(("discarding backed object len:%d age:%d (%p) %s", + object->source_len, + time(NULL) - object->last_used, + object, + nsurl_access(object->url))); + + llcache_size -= total_object_size(object); + + llcache_object_remove_from_list(object, &llcache->cached_objects); - llcache_object_destroy(object); - } + llcache_object_destroy(object); + } } -#ifdef LLCACHE_TRACE - LOG(("Size: %u", llcache_size)); -#endif + /* Fresh cacheable objects with no users or pending fetches + * while the cache exceeds the configured size. These are the + * most valuble objects as replacing them is a full network + * fetch + */ + for (object = llcache->cached_objects; + ((limit < llcache_size) && (object != NULL)); + object = next) { + next = object->next; + + if ((object->users == NULL) && + (object->candidate_count == 0) && + (object->fetch.fetch == NULL) && + (object->fetch.outstanding_query == false) && + (object->store_state == LLCACHE_STATE_RAM)) { + LLCACHE_LOG(("discarding fresh object len:%d age:%d (%p) %s", + object->source_len, + time(NULL) - object->last_used, + object, + nsurl_access(object->url))); + + llcache_size -= object->source_len + sizeof(*object); + + llcache_object_remove_from_list(object, + &llcache->cached_objects); + llcache_object_destroy(object); + } + } + LLCACHE_LOG(("Size: %u", llcache_size)); } /* See llcache.h for documentation */ -nserror -llcache_initialise(llcache_query_callback cb, void *pw, uint32_t llcache_limit) +nserror +llcache_initialise(const struct llcache_parameters *prm) { llcache = calloc(1, sizeof(struct llcache_s)); if (llcache == NULL) { return NSERROR_NOMEM; } - llcache->query_cb = cb; - llcache->query_cb_pw = pw; - llcache->limit = llcache_limit; + llcache->query_cb = prm->cb; + llcache->query_cb_pw = prm->cb_ctx; + llcache->limit = prm->limit; + llcache->minimum_lifetime = prm->minimum_lifetime; + llcache->bandwidth = prm->bandwidth; + llcache->all_caught_up = true; - /* Create static scheme strings */ - if (lwc_intern_string("file", SLEN("file"), - &llcache_file_lwc) != lwc_error_ok) - return NSERROR_NOMEM; - - if (lwc_intern_string("about", SLEN("about"), - &llcache_about_lwc) != lwc_error_ok) - return NSERROR_NOMEM; - - if (lwc_intern_string("resource", SLEN("resource"), - &llcache_resource_lwc) != lwc_error_ok) - return NSERROR_NOMEM; + LOG(("llcache initialising with a limit of %d bytes", llcache->limit)); - LOG(("llcache initialised with a limit of %d bytes", llcache_limit)); - - return NSERROR_OK; + /* backing store initialisation */ + return guit->llcache->initialise(&prm->store); } /* See llcache.h for documentation */ @@ -2347,29 +3075,35 @@ void llcache_finalise(void) } /* Fetch system has already been destroyed */ - object->fetch.fetch = NULL; + object->fetch.fetch = NULL; llcache_object_destroy(object); } - /* Unref static scheme lwc strings */ - lwc_string_unref(llcache_file_lwc); - lwc_string_unref(llcache_about_lwc); - lwc_string_unref(llcache_resource_lwc); + /* backing store finalisation */ + guit->llcache->finalise(); free(llcache); llcache = NULL; } -/* See llcache.h for documentation */ -nserror llcache_poll(void) +/** + * Catch up the cache users with state changes from fetchers. + * + * \param ignored We ignore this because all our state comes from llcache. + */ +static void llcache_catch_up_all_users(void *ignored) { llcache_object *object; - - fetch_poll(); - + + /* Assume after this we'll be all caught up. If any user of a handle + * defers then we'll end up set not caught up and we'll + * reschedule at that point via llcache_users_not_caught_up() + */ + llcache->all_caught_up = true; + /* Catch new users up with state of objects */ - for (object = llcache->cached_objects; object != NULL; + for (object = llcache->cached_objects; object != NULL; object = object->next) { llcache_object_notify_users(object); } @@ -2378,10 +3112,21 @@ nserror llcache_poll(void) object = object->next) { llcache_object_notify_users(object); } +} - return NSERROR_OK; +/** + * Ask for ::llcache_catch_up_all_users to be scheduled ASAP to pump the + * user state machines. + */ +static void llcache_users_not_caught_up() +{ + if (llcache->all_caught_up) { + llcache->all_caught_up = false; + guit->browser->schedule(0, llcache_catch_up_all_users, NULL); + } } + /* See llcache.h for documentation */ nserror llcache_handle_retrieve(nsurl *url, uint32_t flags, nsurl *referer, const llcache_post_data *post, @@ -2414,6 +3159,9 @@ nserror llcache_handle_retrieve(nsurl *url, uint32_t flags, *result = user->handle; + /* Users exist which are now not caught up! */ + llcache_users_not_caught_up(); + return NSERROR_OK; } @@ -2437,7 +3185,7 @@ nserror llcache_handle_release(llcache_handle *handle) assert(user != NULL); if (user->iterator_target) { - /* Can't remove / delete user object if it's + /* Can't remove / delete user object if it's * the target of an iterator */ user->queued_for_delete = true; } else { @@ -2447,8 +3195,8 @@ nserror llcache_handle_release(llcache_handle *handle) error = llcache_object_user_destroy(user); } } - - return error; + + return error; } /* See llcache.h for documentation */ @@ -2456,14 +3204,14 @@ nserror llcache_handle_clone(llcache_handle *handle, llcache_handle **result) { nserror error; llcache_object_user *newuser; - + error = llcache_object_user_new(handle->cb, handle->pw, &newuser); if (error == NSERROR_OK) { llcache_object_add_user(handle->object, newuser); newuser->handle->state = handle->state; *result = newuser->handle; } - + return error; } @@ -2474,13 +3222,13 @@ nserror llcache_handle_abort(llcache_handle *handle) llcache_object *object = handle->object, *newobject; nserror error = NSERROR_OK; bool all_alone = true; - + /* Determine if we are the only user */ if (user->prev != NULL) all_alone = false; if (user->next != NULL) all_alone = false; - + if (all_alone == false) { /* We must snapshot this object */ error = llcache_object_snapshot(object, &newobject); @@ -2490,7 +3238,7 @@ nserror llcache_handle_abort(llcache_handle *handle) /* Move across to the new object */ if (user->iterator_target) { /* User is current iterator target, clone it */ - llcache_object_user *newuser = + llcache_object_user *newuser = calloc(1, sizeof(llcache_object_user)); if (newuser == NULL) { llcache_object_destroy(newobject); @@ -2509,9 +3257,9 @@ nserror llcache_handle_abort(llcache_handle *handle) llcache_object_remove_user(object, user); llcache_object_add_user(newobject, user); } - + /* Add new object to uncached list */ - llcache_object_add_to_list(newobject, + llcache_object_add_to_list(newobject, &llcache->uncached_objects); } else { /* We're the only user, so abort any fetch in progress */ @@ -2519,13 +3267,13 @@ nserror llcache_handle_abort(llcache_handle *handle) fetch_abort(object->fetch.fetch); object->fetch.fetch = NULL; } - + object->fetch.state = LLCACHE_FETCH_COMPLETE; - + /* Invalidate cache control data */ llcache_invalidate_cache_control_data(object); } - + return error; } @@ -2541,7 +3289,7 @@ nserror llcache_handle_force_stream(llcache_handle *handle) /* Forcibly uncache this object */ if (llcache_object_in_list(object, llcache->cached_objects)) { - llcache_object_remove_from_list(object, + llcache_object_remove_from_list(object, &llcache->cached_objects); llcache_object_add_to_list(object, &llcache->uncached_objects); } @@ -2554,8 +3302,8 @@ nserror llcache_handle_force_stream(llcache_handle *handle) /* See llcache.h for documentation */ nserror llcache_handle_invalidate_cache_data(llcache_handle *handle) { - if (handle->object != NULL && handle->object->fetch.fetch == NULL && - handle->object->cache.no_cache == + if (handle->object != NULL && handle->object->fetch.fetch == NULL && + handle->object->cache.no_cache == LLCACHE_VALIDATE_FRESH) { handle->object->cache.no_cache = LLCACHE_VALIDATE_ONCE; } @@ -2579,7 +3327,7 @@ const uint8_t *llcache_handle_get_source_data(const llcache_handle *handle, } /* See llcache.h for documentation */ -const char *llcache_handle_get_header(const llcache_handle *handle, +const char *llcache_handle_get_header(const llcache_handle *handle, const char *key) { const llcache_object *object = handle->object; @@ -2598,9 +3346,8 @@ const char *llcache_handle_get_header(const llcache_handle *handle, } /* See llcache.h for documentation */ -bool llcache_handle_references_same_object(const llcache_handle *a, +bool llcache_handle_references_same_object(const llcache_handle *a, const llcache_handle *b) { return a->object == b->object; } - diff --git a/content/llcache.h b/content/llcache.h index 3d8232cae..d4ed5f095 100644 --- a/content/llcache.h +++ b/content/llcache.h @@ -76,7 +76,7 @@ typedef struct { } data; /**< Event data */ } llcache_event; -/** +/** * Client callback for low-level cache events * * \param handle Handle for which event is issued @@ -84,18 +84,18 @@ typedef struct { * \param pw Pointer to client-specific data * \return NSERROR_OK on success, appropriate error otherwise. */ -typedef nserror (*llcache_handle_callback)(llcache_handle *handle, +typedef nserror (*llcache_handle_callback)(llcache_handle *handle, const llcache_event *event, void *pw); /** Flags for low-level cache object retrieval */ enum llcache_retrieve_flag { /* Note: We're permitted a maximum of 16 flags which must reside in the - * bottom 16 bits of the flags word. See hlcache.h for further details. + * bottom 16 bits of the flags word. See hlcache.h for further details. */ /** Force a new fetch */ - LLCACHE_RETRIEVE_FORCE_FETCH = (1 << 0), + LLCACHE_RETRIEVE_FORCE_FETCH = (1 << 0), /** Requested URL was verified */ - LLCACHE_RETRIEVE_VERIFIABLE = (1 << 1), + LLCACHE_RETRIEVE_VERIFIABLE = (1 << 1), /**< No error pages */ LLCACHE_RETRIEVE_NO_ERROR_PAGES = (1 << 2), /**< Stream data (implies that object is not cacheable) */ @@ -149,21 +149,89 @@ typedef nserror (*llcache_query_response)(bool proceed, void *cbpw); * \param cbpw Opaque value to pass into \a cb * \return NSERROR_OK on success, appropriate error otherwise * - * \note This callback should return immediately. Once a suitable answer to - * the query has been obtained, the provided response callback should be + * \note This callback should return immediately. Once a suitable answer to + * the query has been obtained, the provided response callback should be * called. This is intended to be an entirely asynchronous process. */ typedef nserror (*llcache_query_callback)(const llcache_query *query, void *pw, llcache_query_response cb, void *cbpw); /** + * Parameters to configure the low level cache backing store. + */ +struct llcache_store_parameters { + const char *path; /**< The path to the backing store */ + + size_t limit; /**< The backing store upper bound target size */ + size_t hysteresis; /**< The hysteresis around the target size */ + + /** log2 of the default maximum number of entries the cache + * can track. + * + * If unset this defaults to 16 (65536 entries) The cache + * control file takes precedence so cache data remains + * portable between builds with differing defaults. + */ + unsigned int entry_size; + + /** log2 of the default number of entries in the mapping between + * the url and cache entries. + * + * @note This is exposing an internal implementation detail of + * the filesystem based default backing store implementation. + * However it is likely any backing store implementation will + * need some way to map url to cache entries so it is a + * generally useful configuration value. + * + * Too small a value will cause unecessary collisions and + * cache misses and larger values cause proportionaly larger + * amounts of memory to be used. + * + * The "birthday paradox" means that the hash will experience + * a collision in every 2^(address_size/2) urls the cache + * stores. + * + * A value of 20 means one object stored in every 1024 will + * cause a collion and a cache miss while using two megabytes + * of storage. + * + * If unset this defaults to 20 (1048576 entries using two + * megabytes) The cache control file takes precedence so cache + * data remains portable between builds with differing + * defaults. + */ + unsigned int address_size; +}; + +/** + * Parameters to configure the low level cache. + */ +struct llcache_parameters { + llcache_query_callback cb; /**< Query handler for llcache */ + void *cb_ctx; /**< Pointer to llcache query handler data */ + + size_t limit; /**< The target upper bound for the RAM cache size */ + size_t hysteresis; /**< The hysteresis around the target size */ + + int minimum_lifetime; /**< The minimum lifetime to consider + * sending objects to backing store. + */ + + size_t bandwidth; /**< The maximum bandwidth to allow the + * backing store to use. + */ + + struct llcache_store_parameters store; +}; + +/** * Initialise the low-level cache * * \param cb Query handler * \param pw Pointer to query handler data * \return NSERROR_OK on success, appropriate error otherwise. */ -nserror llcache_initialise(llcache_query_callback cb, void *pw, uint32_t llcache_limit); +nserror llcache_initialise(const struct llcache_parameters *parameters); /** * Finalise the low-level cache @@ -171,18 +239,15 @@ nserror llcache_initialise(llcache_query_callback cb, void *pw, uint32_t llcache void llcache_finalise(void); /** - * Cause the low-level cache to emit any pending notifications. + * Cause the low-level cache to attempt to perform cleanup. * - * \return NSERROR_OK on success, appropriate error otherwise. - */ -nserror llcache_poll(void); - -/** - * Cause the low-level cache to attempt to perform cleanup. No - * guarantees are made as to whether or not cleanups will take + * No guarantees are made as to whether or not cleanups will take * place and what, if any, space savings will be made. + * + * \param purge Any objects held in the cache that are safely removable will + * be freed regardless of the configured size limits. */ -void llcache_clean(void); +void llcache_clean(bool purge); /** * Retrieve a handle for a low-level cache object @@ -280,12 +345,12 @@ const uint8_t *llcache_handle_get_source_data(const llcache_handle *handle, * \return Header value, or NULL if header does not exist * * \todo Make the key an enumeration, to avoid needless string comparisons - * \todo Forcing the client to parse the header value seems wrong. - * Better would be to return the actual value part and an array of + * \todo Forcing the client to parse the header value seems wrong. + * Better would be to return the actual value part and an array of * key-value pairs for any additional parameters. * \todo Deal with multiple headers of the same key (e.g. Set-Cookie) */ -const char *llcache_handle_get_header(const llcache_handle *handle, +const char *llcache_handle_get_header(const llcache_handle *handle, const char *key); /** @@ -295,7 +360,7 @@ const char *llcache_handle_get_header(const llcache_handle *handle, * \param b Second handle * \return True if handles reference the same object, false otherwise */ -bool llcache_handle_references_same_object(const llcache_handle *a, +bool llcache_handle_references_same_object(const llcache_handle *a, const llcache_handle *b); #endif diff --git a/content/mimesniff.c b/content/mimesniff.c index 5b2c14fb5..e8ebf8770 100644 --- a/content/mimesniff.c +++ b/content/mimesniff.c @@ -155,7 +155,7 @@ static bool mimesniff__has_binary_octets(const uint8_t *data, size_t len) static nserror mimesniff__match_mp4(const uint8_t *data, size_t len, lwc_string **effective_type) { - size_t box_size, i; + uint32_t box_size, i; /* ISO/IEC 14496-12:2008 $4.3 says (effectively): * @@ -204,7 +204,9 @@ static nserror mimesniff__match_mp4(const uint8_t *data, size_t len, /* Search each compatible brand in the box for "mp4" */ for (i = 16; i <= box_size - 4; i += 4) { - if (data[i] == 'm' && data[i+1] == 'p' && data[i+2] == '4') { + if (data[i] == 'm' && + data[i+1] == 'p' && + data[i+2] == '4') { *effective_type = lwc_string_ref(video_mp4); return NSERROR_OK; } diff --git a/content/no_backing_store.c b/content/no_backing_store.c new file mode 100644 index 000000000..192101522 --- /dev/null +++ b/content/no_backing_store.c @@ -0,0 +1,68 @@ +/* + * Copyright 2014 Vincent Sanders <vince@netsurf-browser.org> + * + * This file is part of NetSurf, http://www.netsurf-browser.org/ + * + * NetSurf is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; version 2 of the License. + * + * NetSurf is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see <http://www.gnu.org/licenses/>. + */ + +/** \file + * Low-level resource cache null persistant storage implementation. + */ + +#include "utils/nsurl.h" + +#include "content/backing_store.h" + + +/* default to disabled backing store */ +static nserror initialise(const struct llcache_store_parameters *parameters) +{ + return NSERROR_OK; +} + +static nserror finalise(void) +{ + return NSERROR_OK; +} + +static nserror store(nsurl *url, + enum backing_store_flags flags, + const uint8_t *data, + const size_t datalen) +{ + return NSERROR_SAVE_FAILED; +} + +static nserror fetch(nsurl *url, + enum backing_store_flags *flags, + uint8_t **data_out, + size_t *datalen_out) +{ + return NSERROR_NOT_FOUND; +} + +static nserror invalidate(nsurl *url) +{ + return NSERROR_NOT_FOUND; +} + +static struct gui_llcache_table llcache_table = { + .initialise = initialise, + .finalise = finalise, + .store = store, + .fetch = fetch, + .invalidate = invalidate, +}; + +struct gui_llcache_table *null_llcache_table = &llcache_table; diff --git a/content/urldb.c b/content/urldb.c index f55a1c291..c74dae36d 100644 --- a/content/urldb.c +++ b/content/urldb.c @@ -104,7 +104,6 @@ #include "utils/nsoption.h" #include "utils/log.h" #include "utils/corestrings.h" -#include "utils/filename.h" #include "utils/url.h" #include "utils/utils.h" #include "utils/bloom.h" @@ -1806,14 +1805,9 @@ struct path_data *urldb_add_path(lwc_string *scheme, unsigned int port, free(path_query); if (d && !d->url) { - /* Insert URL */ - if (nsurl_has_component(url, NSURL_FRAGMENT)) { - nserror err = nsurl_defragment(url, &d->url); - if (err != NSERROR_OK) - return NULL; - } else { - d->url = nsurl_ref(url); - } + /* Insert defragmented URL */ + if (nsurl_defragment(url, &d->url) != NSERROR_OK) + return NULL; } return d; @@ -2728,12 +2722,8 @@ bool urldb_set_cookie(const char *header, nsurl *url, nsurl *referer) assert(url && header); /* Get defragmented URL, as 'urlt' */ - if (nsurl_has_component(url, NSURL_FRAGMENT)) { - if (nsurl_defragment(url, &urlt) != NSERROR_OK) - return NULL; - } else { - urlt = nsurl_ref(url); - } + if (nsurl_defragment(url, &urlt) != NSERROR_OK) + return NULL; scheme = nsurl_get_component(url, NSURL_SCHEME); if (scheme == NULL) { diff --git a/content/urldb.h b/content/urldb.h index 6a2946515..d60043089 100644 --- a/content/urldb.h +++ b/content/urldb.h @@ -25,9 +25,8 @@ #include <stdbool.h> #include <time.h> -#include "content/content.h" -#include "content/content_type.h" #include "utils/nsurl.h" +#include "content/content_type.h" typedef enum { COOKIE_NETSCAPE = 0, |