From 58c28f9c1ab86da14f15cee44ae936c74d812a5f Mon Sep 17 00:00:00 2001 From: James Bursa Date: Thu, 17 Apr 2003 21:35:02 +0000 Subject: [project @ 2003-04-17 21:35:02 by bursa] Max one fetch from each host at once, fix multiple fetches of same url. svn path=/import/netsurf/; revision=127 --- content/content.c | 5 ++- content/fetch.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++----- content/fetch.h | 25 ++++++++++- content/fetchcache.c | 114 ++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 238 insertions(+), 29 deletions(-) diff --git a/content/content.c b/content/content.c index 5a9e2956b..70933a97b 100644 --- a/content/content.c +++ b/content/content.c @@ -1,5 +1,5 @@ /** - * $Id: content.c,v 1.7 2003/04/15 17:53:00 bursa Exp $ + * $Id: content.c,v 1.8 2003/04/17 21:35:02 bursa Exp $ */ #include @@ -121,7 +121,8 @@ void content_revive(struct content *c, unsigned long width, unsigned long height { assert(c != 0); assert(c->type < CONTENT_OTHER); - assert(c->status == CONTENT_DONE); + if (c->status != CONTENT_DONE) + return; c->available_width = width; handler_map[c->type].revive(c, width, height); } diff --git a/content/fetch.c b/content/fetch.c index 642010fb3..93a0c124d 100644 --- a/content/fetch.c +++ b/content/fetch.c @@ -1,11 +1,22 @@ /** - * $Id: fetch.c,v 1.5 2003/04/15 17:53:00 bursa Exp $ + * $Id: fetch.c,v 1.6 2003/04/17 21:35:02 bursa Exp $ + * + * This module handles fetching of data from any url. + * + * Implementation: + * This implementation uses libcurl's 'multi' interface. + * + * Active fetches are held in the linked list fetch_list. There may be at most + * one fetch from each host. Any further fetches are queued until the previous + * one ends. */ #include #include +#include #include #include "curl/curl.h" +#include "libxml/uri.h" #include "netsurf/content/fetch.h" #include "netsurf/utils/utils.h" #include "netsurf/utils/log.h" @@ -19,13 +30,19 @@ struct fetch int in_callback : 1; int aborting : 1; char *url; + char *referer; char error_buffer[CURL_ERROR_SIZE]; void *p; struct curl_slist *headers; + char *host; + struct fetch *queue; + struct fetch *prev; + struct fetch *next; }; static const char * const user_agent = "NetSurf"; static CURLM * curl_multi; +static struct fetch *fetch_list = 0; static size_t fetch_curl_data(void * data, size_t size, size_t nmemb, struct fetch *f); static size_t fetch_curl_header(void *data, size_t size, size_t nmemb, struct fetch *f); @@ -75,19 +92,52 @@ void fetch_quit(void) struct fetch * fetch_start(char *url, char *referer, void (*callback)(fetch_msg msg, void *p, char *data, unsigned long size), void *p) { - struct fetch* fetch = (struct fetch*) xcalloc(1, sizeof(struct fetch)); + struct fetch *fetch = xcalloc(1, sizeof(*fetch)), *host_fetch; CURLcode code; CURLMcode codem; + xmlURI *uri; LOG(("fetch %p, url '%s'", fetch, url)); - - fetch->start_time = time(&fetch->start_time); + + uri = xmlParseURI(url); + assert(uri != 0); + + /* construct a new fetch structure */ + fetch->start_time = time(0); fetch->callback = callback; fetch->had_headers = 0; fetch->in_callback = 0; fetch->aborting = 0; fetch->url = xstrdup(url); + fetch->referer = 0; + if (referer != 0) + fetch->referer = xstrdup(referer); fetch->p = p; + fetch->headers = 0; + fetch->host = xstrdup(uri->server); + fetch->queue = 0; + fetch->prev = 0; + fetch->next = 0; + + xmlFreeURI(uri); + + /* look for a fetch from the same host */ + for (host_fetch = fetch_list; + host_fetch != 0 && strcasecmp(host_fetch->host, fetch->host) != 0; + host_fetch = host_fetch->next) + ; + if (host_fetch != 0) { + /* fetch from this host in progress: queue the new fetch */ + LOG(("queueing")); + fetch->queue = host_fetch->queue; + host_fetch->queue = fetch; + return fetch; + } + + fetch->next = fetch_list; + if (fetch_list != 0) + fetch_list->prev = fetch; + fetch_list = fetch; /* create the curl easy handle */ fetch->curl_handle = curl_easy_init(); @@ -147,14 +197,60 @@ void fetch_abort(struct fetch *f) f->aborting = 1; return; } - - /* remove from curl */ + + /* remove from list of fetches */ + if (f->prev == 0) + fetch_list = f->next; + else + f->prev->next = f->next; + if (f->next != 0) + f->next->prev = f->prev; + + /* remove from curl multi handle */ codem = curl_multi_remove_handle(curl_multi, f->curl_handle); assert(codem == CURLM_OK); - curl_easy_cleanup(f->curl_handle); - curl_slist_free_all(f->headers); + + if (f->queue != 0) { + /* start a queued fetch for this host, reusing the handle for this host */ + struct fetch *fetch = f->queue; + CURLcode code; + CURLMcode codem; + + LOG(("starting queued %p '%s'", fetch, fetch->url)); + + fetch->prev = 0; + fetch->next = fetch_list; + if (fetch_list != 0) + fetch_list->prev = fetch; + fetch_list = fetch; + + fetch->curl_handle = f->curl_handle; + code = curl_easy_setopt(fetch->curl_handle, CURLOPT_URL, fetch->url); + assert(code == CURLE_OK); + code = curl_easy_setopt(fetch->curl_handle, CURLOPT_PRIVATE, fetch); + assert(code == CURLE_OK); + code = curl_easy_setopt(fetch->curl_handle, CURLOPT_ERRORBUFFER, fetch->error_buffer); + assert(code == CURLE_OK); + code = curl_easy_setopt(fetch->curl_handle, CURLOPT_WRITEDATA, fetch); + assert(code == CURLE_OK); + /* TODO: remove referer header if fetch->referer == 0 */ + if (fetch->referer != 0) { + code = curl_easy_setopt(fetch->curl_handle, CURLOPT_REFERER, fetch->referer); + assert(code == CURLE_OK); + } + + /* add to the global curl multi handle */ + codem = curl_multi_add_handle(curl_multi, fetch->curl_handle); + assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM); + + } else { + curl_easy_cleanup(f->curl_handle); + curl_slist_free_all(f->headers); + } xfree(f->url); + xfree(f->host); + xfree(f->referer); xfree(f); } @@ -169,9 +265,10 @@ void fetch_poll(void) { CURLcode code; CURLMcode codem; - int running, queue; + int running, queue, finished; CURLMsg * curl_msg; struct fetch *f; + void *p; /* do any possible work on the current fetches */ do { @@ -191,8 +288,10 @@ void fetch_poll(void) LOG(("CURLMSG_DONE, result %i", curl_msg->data.result)); /* inform the caller that the fetch is done */ + finished = 0; + p = f->p; if (curl_msg->data.result == CURLE_OK && f->had_headers) - f->callback(FETCH_FINISHED, f->p, 0, 0); + finished = 1; else if (curl_msg->data.result == CURLE_OK) f->callback(FETCH_ERROR, f->p, "No data received", 0); else if (curl_msg->data.result != CURLE_WRITE_ERROR) @@ -201,6 +300,10 @@ void fetch_poll(void) /* clean up fetch */ fetch_abort(f); + /* postponed until after abort so that queue fetches are started */ + if (finished) + f->callback(FETCH_FINISHED, p, 0, 0); + break; default: diff --git a/content/fetch.h b/content/fetch.h index ab3029b23..92c269a02 100644 --- a/content/fetch.h +++ b/content/fetch.h @@ -1,5 +1,28 @@ /** - * $Id: fetch.h,v 1.2 2003/03/15 15:53:20 bursa Exp $ + * $Id: fetch.h,v 1.3 2003/04/17 21:35:02 bursa Exp $ + * + * This module handles fetching of data from any url. + * + * Usage: + * + * fetch_init() must be called once before any other function. fetch_quit() + * must be called before exiting. + * + * fetch_start() will begin fetching a url. The function returns immediately. + * A pointer to an opaque struct fetch is returned, which can be passed to + * fetch_abort() to abort the fetch at any time. The caller must supply a + * callback function which is called when anything interesting happens. The + * callback function is first called with msg = FETCH_TYPE, with the + * Content-Type header in data, then one or more times with FETCH_DATA with + * some data for the url, and finally with FETCH_FINISHED. Alternatively, + * FETCH_ERROR indicates an error occurred: data contains an error message. + * Some private data can be passed as the last parameter to fetch_start, and + * callbacks will contain this. + * + * fetch_poll() must be called regularly to make progress on fetches. + * + * fetch_filetype() is used internally to determine the mime type of local + * files. It is platform specific, and implemented elsewhere. */ #ifndef _NETSURF_DESKTOP_FETCH_H_ diff --git a/content/fetchcache.c b/content/fetchcache.c index 8aa81f66f..34105c71d 100644 --- a/content/fetchcache.c +++ b/content/fetchcache.c @@ -1,5 +1,5 @@ /** - * $Id: fetchcache.c,v 1.7 2003/04/09 21:57:09 bursa Exp $ + * $Id: fetchcache.c,v 1.8 2003/04/17 21:35:02 bursa Exp $ */ #include @@ -20,8 +20,13 @@ struct fetchcache { unsigned long width, height; unsigned long size; content_type allowed; + struct fetchcache *next; + struct fetchcache *prev; + struct fetchcache *next_request; + int active; }; +static struct fetchcache *fetchcache_list = 0; static void fetchcache_free(struct fetchcache *fc); static void fetchcache_callback(fetchcache_msg msg, void *p, char *data, unsigned long size); @@ -33,7 +38,7 @@ void fetchcache(const char *url, char *referer, void *p, unsigned long width, unsigned long height, content_type allowed) { struct content *c; - struct fetchcache *fc; + struct fetchcache *fc, *fc_url; c = cache_get(url); if (c != 0) { @@ -59,7 +64,30 @@ void fetchcache(const char *url, char *referer, fc->height = height; fc->size = 0; fc->allowed = allowed; - fc->f = fetch_start(fc->url, referer, fetchcache_callback, fc); + fc->next = 0; + fc->prev = 0; + fc->next_request = 0; + fc->active = 1; + + /* check if we're already fetching this url */ + for (fc_url = fetchcache_list; + fc_url != 0 && strcmp(fc_url->url, url) != 0; + fc_url = fc_url->next) + ; + if (fc_url != 0) { + /* already fetching: add ourselves to list of requestors */ + LOG(("already fetching")); + fc->next_request = fc_url->next_request; + fc_url->next_request = fc; + + } else { + /* not fetching yet */ + if (fetchcache_list != 0) + fetchcache_list->prev = fc; + fc->next = fetchcache_list; + fetchcache_list = fc; + fc->f = fetch_start(fc->url, referer, fetchcache_callback, fc); + } } @@ -67,16 +95,24 @@ void fetchcache_free(struct fetchcache *fc) { free(fc->url); free(fc); + if (fc->prev == 0) + fetchcache_list = fc->next; + else + fc->prev->next = fc->next; + if (fc->next != 0) + fc->next->prev = fc->prev; } void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size) { - struct fetchcache *fc = p; + struct fetchcache *fc = p, *fc_url; content_type type; char *mime_type; char *semic; char status[40]; + int active = 0; + switch (msg) { case FETCH_TYPE: mime_type = strdup(data); @@ -84,46 +120,90 @@ void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size) *semic = 0; /* remove "; charset=..." */ type = content_lookup(mime_type); LOG(("FETCH_TYPE, type %u", type)); - if ((1 << type) & fc->allowed) { + + /* check if each request allows this type */ + for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request) { + if (!fc_url->active) + continue; + if ((1 << type) & fc_url->allowed) { + active++; + } else { + fc_url->active = 0; + fc_url->callback(FETCHCACHE_BADTYPE, 0, + fc_url->p, mime_type); + } + } + if (active != 0) { + /* someone is still interested */ fc->c = content_create(type, fc->url); fc->c->status_callback = status_callback; fc->c->status_p = fc; } else { + /* no request allows the type */ fetch_abort(fc->f); - fc->callback(FETCHCACHE_BADTYPE, 0, fc->p, mime_type); - free(fc); + for (; fc != 0; fc = fc_url) { + fc_url = fc->next_request; + fetchcache_free(fc); + } } + free(mime_type); break; + case FETCH_DATA: LOG(("FETCH_DATA")); assert(fc->c != 0); fc->size += size; sprintf(status, "Received %lu bytes", fc->size); - fc->callback(FETCHCACHE_STATUS, fc->c, fc->p, status); + for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request) + if (fc_url->active) + fc_url->callback(FETCHCACHE_STATUS, fc->c, + fc_url->p, status); content_process_data(fc->c, data, size); break; + case FETCH_FINISHED: LOG(("FETCH_FINISHED")); assert(fc->c != 0); sprintf(status, "Converting %lu bytes", fc->size); - fc->callback(FETCHCACHE_STATUS, fc->c, fc->p, status); + for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request) + if (fc_url->active) + fc_url->callback(FETCHCACHE_STATUS, fc->c, + fc_url->p, status); + if (content_convert(fc->c, fc->width, fc->height) == 0) { cache_put(fc->c); - fc->callback(FETCHCACHE_OK, fc->c, fc->p, 0); + for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request) + if (fc_url->active) + fc_url->callback(FETCHCACHE_OK, cache_get(fc->url), + fc_url->p, 0); + cache_free(fc->c); } else { content_destroy(fc->c); - fc->callback(FETCHCACHE_ERROR, 0, fc->p, "Conversion failed"); + for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request) + if (fc_url->active) + fc_url->callback(FETCHCACHE_ERROR, 0, + fc_url->p, "Conversion failed"); + } + for (; fc != 0; fc = fc_url) { + fc_url = fc->next_request; + fetchcache_free(fc); } - fetchcache_free(fc); break; + case FETCH_ERROR: LOG(("FETCH_ERROR, '%s'", data)); if (fc->c != 0) content_destroy(fc->c); - fc->callback(FETCHCACHE_ERROR, 0, fc->p, data); - fetchcache_free(fc); + for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request) + if (fc_url->active) + fc->callback(FETCHCACHE_ERROR, 0, fc_url->p, data); + for (; fc != 0; fc = fc_url) { + fc_url = fc->next_request; + fetchcache_free(fc); + } break; + default: assert(0); } @@ -132,8 +212,10 @@ void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size) void status_callback(void *p, const char *status) { - struct fetchcache *fc = p; - fc->callback(FETCHCACHE_STATUS, fc->c, fc->p, status); + struct fetchcache *fc = p, *fc_url; + for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request) + if (fc_url->active) + fc_url->callback(FETCHCACHE_STATUS, fc->c, fc_url->p, status); } -- cgit v1.2.3