From 58c28f9c1ab86da14f15cee44ae936c74d812a5f Mon Sep 17 00:00:00 2001
From: James Bursa <james@netsurf-browser.org>
Date: Thu, 17 Apr 2003 21:35:02 +0000
Subject: [project @ 2003-04-17 21:35:02 by bursa] Max one fetch from each host
 at once, fix multiple fetches of same url.

svn path=/import/netsurf/; revision=127
---
 content/content.c    |   5 ++-
 content/fetch.c      | 123 ++++++++++++++++++++++++++++++++++++++++++++++-----
 content/fetch.h      |  25 ++++++++++-
 content/fetchcache.c | 114 ++++++++++++++++++++++++++++++++++++++++-------
 4 files changed, 238 insertions(+), 29 deletions(-)

diff --git a/content/content.c b/content/content.c
index 5a9e2956b..70933a97b 100644
--- a/content/content.c
+++ b/content/content.c
@@ -1,5 +1,5 @@
 /**
- * $Id: content.c,v 1.7 2003/04/15 17:53:00 bursa Exp $
+ * $Id: content.c,v 1.8 2003/04/17 21:35:02 bursa Exp $
  */
 
 #include <assert.h>
@@ -121,7 +121,8 @@ void content_revive(struct content *c, unsigned long width, unsigned long height
 {
 	assert(c != 0);
 	assert(c->type < CONTENT_OTHER);
-	assert(c->status == CONTENT_DONE);
+	if (c->status != CONTENT_DONE)
+		return;
 	c->available_width = width;
 	handler_map[c->type].revive(c, width, height);
 }
diff --git a/content/fetch.c b/content/fetch.c
index 642010fb3..93a0c124d 100644
--- a/content/fetch.c
+++ b/content/fetch.c
@@ -1,11 +1,22 @@
 /**
- * $Id: fetch.c,v 1.5 2003/04/15 17:53:00 bursa Exp $
+ * $Id: fetch.c,v 1.6 2003/04/17 21:35:02 bursa Exp $
+ *
+ * This module handles fetching of data from any url.
+ *
+ * Implementation:
+ * This implementation uses libcurl's 'multi' interface.
+ *
+ * Active fetches are held in the linked list fetch_list. There may be at most
+ * one fetch from each host. Any further fetches are queued until the previous
+ * one ends.
  */
 
 #include <assert.h>
 #include <string.h>
+#include <strings.h>
 #include <time.h>
 #include "curl/curl.h"
+#include "libxml/uri.h"
 #include "netsurf/content/fetch.h"
 #include "netsurf/utils/utils.h"
 #include "netsurf/utils/log.h"
@@ -19,13 +30,19 @@ struct fetch
 	int in_callback : 1;
 	int aborting : 1;
 	char *url;
+	char *referer;
 	char error_buffer[CURL_ERROR_SIZE];
 	void *p;
 	struct curl_slist *headers;
+	char *host;
+	struct fetch *queue;
+	struct fetch *prev;
+	struct fetch *next;
 };
 
 static const char * const user_agent = "NetSurf";
 static CURLM * curl_multi;
+static struct fetch *fetch_list = 0;
 
 static size_t fetch_curl_data(void * data, size_t size, size_t nmemb, struct fetch *f);
 static size_t fetch_curl_header(void *data, size_t size, size_t nmemb, struct fetch *f);
@@ -75,19 +92,52 @@ void fetch_quit(void)
 struct fetch * fetch_start(char *url, char *referer,
                  void (*callback)(fetch_msg msg, void *p, char *data, unsigned long size), void *p)
 {
-	struct fetch* fetch = (struct fetch*) xcalloc(1, sizeof(struct fetch));
+	struct fetch *fetch = xcalloc(1, sizeof(*fetch)), *host_fetch;
 	CURLcode code;
 	CURLMcode codem;
+	xmlURI *uri;
 
 	LOG(("fetch %p, url '%s'", fetch, url));
-  
-	fetch->start_time = time(&fetch->start_time);
+
+	uri = xmlParseURI(url);
+	assert(uri != 0);
+
+	/* construct a new fetch structure */
+	fetch->start_time = time(0);
 	fetch->callback = callback;
 	fetch->had_headers = 0;
 	fetch->in_callback = 0;
 	fetch->aborting = 0;
 	fetch->url = xstrdup(url);
+	fetch->referer = 0;
+	if (referer != 0)
+		fetch->referer = xstrdup(referer);
 	fetch->p = p;
+	fetch->headers = 0;
+	fetch->host = xstrdup(uri->server);
+	fetch->queue = 0;
+	fetch->prev = 0;
+	fetch->next = 0;
+
+	xmlFreeURI(uri);
+
+	/* look for a fetch from the same host */
+	for (host_fetch = fetch_list;
+			host_fetch != 0 && strcasecmp(host_fetch->host, fetch->host) != 0;
+			host_fetch = host_fetch->next)
+		;
+	if (host_fetch != 0) {
+		/* fetch from this host in progress: queue the new fetch */
+		LOG(("queueing"));
+		fetch->queue = host_fetch->queue;
+		host_fetch->queue = fetch;
+		return fetch;
+	}
+
+	fetch->next = fetch_list;
+	if (fetch_list != 0)
+		fetch_list->prev = fetch;
+	fetch_list = fetch;
 
 	/* create the curl easy handle */
 	fetch->curl_handle = curl_easy_init();
@@ -147,14 +197,60 @@ void fetch_abort(struct fetch *f)
 		f->aborting = 1;
 		return;
 	}
-  
-	/* remove from curl */
+
+	/* remove from list of fetches */
+	if (f->prev == 0)
+		fetch_list = f->next;
+	else
+		f->prev->next = f->next;
+	if (f->next != 0)
+		f->next->prev = f->prev;
+
+	/* remove from curl multi handle */
 	codem = curl_multi_remove_handle(curl_multi, f->curl_handle);
 	assert(codem == CURLM_OK);
-	curl_easy_cleanup(f->curl_handle);
-	curl_slist_free_all(f->headers);
+
+	if (f->queue != 0) {
+		/* start a queued fetch for this host, reusing the handle for this host */
+		struct fetch *fetch = f->queue;
+		CURLcode code;
+		CURLMcode codem;
+
+		LOG(("starting queued %p '%s'", fetch, fetch->url));
+
+		fetch->prev = 0;
+		fetch->next = fetch_list;
+		if (fetch_list != 0)
+			fetch_list->prev = fetch;
+		fetch_list = fetch;
+
+		fetch->curl_handle = f->curl_handle;
+		code = curl_easy_setopt(fetch->curl_handle, CURLOPT_URL, fetch->url);
+		assert(code == CURLE_OK);
+		code = curl_easy_setopt(fetch->curl_handle, CURLOPT_PRIVATE, fetch);
+		assert(code == CURLE_OK);
+		code = curl_easy_setopt(fetch->curl_handle, CURLOPT_ERRORBUFFER, fetch->error_buffer);
+		assert(code == CURLE_OK);
+		code = curl_easy_setopt(fetch->curl_handle, CURLOPT_WRITEDATA, fetch);
+		assert(code == CURLE_OK);
+		/* TODO: remove referer header if fetch->referer == 0 */
+		if (fetch->referer != 0) {
+			code = curl_easy_setopt(fetch->curl_handle, CURLOPT_REFERER, fetch->referer);
+			assert(code == CURLE_OK);
+		}
+
+		/* add to the global curl multi handle */
+		codem = curl_multi_add_handle(curl_multi, fetch->curl_handle);
+		assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM);
+
+	} else {
+		curl_easy_cleanup(f->curl_handle);
+		curl_slist_free_all(f->headers);
+	}
 
 	xfree(f->url);
+	xfree(f->host);
+	xfree(f->referer);
 	xfree(f);
 }
 
@@ -169,9 +265,10 @@ void fetch_poll(void)
 {
 	CURLcode code;
 	CURLMcode codem;
-	int running, queue;
+	int running, queue, finished;
 	CURLMsg * curl_msg;
 	struct fetch *f;
+	void *p;
 
 	/* do any possible work on the current fetches */
 	do {
@@ -191,8 +288,10 @@ void fetch_poll(void)
 				LOG(("CURLMSG_DONE, result %i", curl_msg->data.result));
 
 				/* inform the caller that the fetch is done */
+				finished = 0;
+				p = f->p;
 				if (curl_msg->data.result == CURLE_OK && f->had_headers)
-					f->callback(FETCH_FINISHED, f->p, 0, 0);
+					finished = 1;
 				else if (curl_msg->data.result == CURLE_OK)
 					f->callback(FETCH_ERROR, f->p, "No data received", 0);
 				else if (curl_msg->data.result != CURLE_WRITE_ERROR)
@@ -201,6 +300,10 @@ void fetch_poll(void)
 				/* clean up fetch */
 				fetch_abort(f);
 
+				/* postponed until after abort so that queue fetches are started */
+				if (finished)
+					f->callback(FETCH_FINISHED, p, 0, 0);
+
 				break;
 
 			default:
diff --git a/content/fetch.h b/content/fetch.h
index ab3029b23..92c269a02 100644
--- a/content/fetch.h
+++ b/content/fetch.h
@@ -1,5 +1,28 @@
 /**
- * $Id: fetch.h,v 1.2 2003/03/15 15:53:20 bursa Exp $
+ * $Id: fetch.h,v 1.3 2003/04/17 21:35:02 bursa Exp $
+ *
+ * This module handles fetching of data from any url.
+ *
+ * Usage:
+ *
+ * fetch_init() must be called once before any other function. fetch_quit()
+ * must be called before exiting.
+ *
+ * fetch_start() will begin fetching a url. The function returns immediately.
+ * A pointer to an opaque struct fetch is returned, which can be passed to
+ * fetch_abort() to abort the fetch at any time. The caller must supply a
+ * callback function which is called when anything interesting happens. The
+ * callback function is first called with msg = FETCH_TYPE, with the
+ * Content-Type header in data, then one or more times with FETCH_DATA with
+ * some data for the url, and finally with FETCH_FINISHED. Alternatively,
+ * FETCH_ERROR indicates an error occurred: data contains an error message.
+ * Some private data can be passed as the last parameter to fetch_start, and
+ * callbacks will contain this.
+ *
+ * fetch_poll() must be called regularly to make progress on fetches.
+ *
+ * fetch_filetype() is used internally to determine the mime type of local
+ * files. It is platform specific, and implemented elsewhere.
  */
 
 #ifndef _NETSURF_DESKTOP_FETCH_H_
diff --git a/content/fetchcache.c b/content/fetchcache.c
index 8aa81f66f..34105c71d 100644
--- a/content/fetchcache.c
+++ b/content/fetchcache.c
@@ -1,5 +1,5 @@
 /**
- * $Id: fetchcache.c,v 1.7 2003/04/09 21:57:09 bursa Exp $
+ * $Id: fetchcache.c,v 1.8 2003/04/17 21:35:02 bursa Exp $
  */
 
 #include <assert.h>
@@ -20,8 +20,13 @@ struct fetchcache {
 	unsigned long width, height;
 	unsigned long size;
 	content_type allowed;
+	struct fetchcache *next;
+	struct fetchcache *prev;
+	struct fetchcache *next_request;
+	int active;
 };
 
+static struct fetchcache *fetchcache_list = 0;
 
 static void fetchcache_free(struct fetchcache *fc);
 static void fetchcache_callback(fetchcache_msg msg, void *p, char *data, unsigned long size);
@@ -33,7 +38,7 @@ void fetchcache(const char *url, char *referer,
 		void *p, unsigned long width, unsigned long height, content_type allowed)
 {
 	struct content *c;
-	struct fetchcache *fc;
+	struct fetchcache *fc, *fc_url;
 
 	c = cache_get(url);
 	if (c != 0) {
@@ -59,7 +64,30 @@ void fetchcache(const char *url, char *referer,
 	fc->height = height;
 	fc->size = 0;
 	fc->allowed = allowed;
-	fc->f = fetch_start(fc->url, referer, fetchcache_callback, fc);
+	fc->next = 0;
+	fc->prev = 0;
+	fc->next_request = 0;
+	fc->active = 1;
+
+	/* check if we're already fetching this url */
+	for (fc_url = fetchcache_list;
+			fc_url != 0 && strcmp(fc_url->url, url) != 0;
+			fc_url = fc_url->next)
+		;
+	if (fc_url != 0) {
+		/* already fetching: add ourselves to list of requestors */
+		LOG(("already fetching"));
+		fc->next_request = fc_url->next_request;
+		fc_url->next_request = fc;
+	
+	} else {
+		/* not fetching yet */
+		if (fetchcache_list != 0)
+			fetchcache_list->prev = fc;
+		fc->next = fetchcache_list;
+		fetchcache_list = fc;
+		fc->f = fetch_start(fc->url, referer, fetchcache_callback, fc);
+	}
 }
 
 
@@ -67,16 +95,24 @@ void fetchcache_free(struct fetchcache *fc)
 {
 	free(fc->url);
 	free(fc);
+	if (fc->prev == 0)
+		fetchcache_list = fc->next;
+	else
+		fc->prev->next = fc->next;
+	if (fc->next != 0)
+		fc->next->prev = fc->prev;
 }
 
 
 void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size)
 {
-	struct fetchcache *fc = p;
+	struct fetchcache *fc = p, *fc_url;
 	content_type type;
 	char *mime_type;
 	char *semic;
 	char status[40];
+	int active = 0;
+
 	switch (msg) {
 		case FETCH_TYPE:
 			mime_type = strdup(data);
@@ -84,46 +120,90 @@ void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size)
 				*semic = 0;	/* remove "; charset=..." */
 			type = content_lookup(mime_type);
 			LOG(("FETCH_TYPE, type %u", type));
-			if ((1 << type) & fc->allowed) {
+
+			/* check if each request allows this type */
+			for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request) {
+				if (!fc_url->active)
+					continue;
+				if ((1 << type) & fc_url->allowed) {
+					active++;
+				} else {
+					fc_url->active = 0;
+					fc_url->callback(FETCHCACHE_BADTYPE, 0,
+							fc_url->p, mime_type);
+				}
+			}
+			if (active != 0) {
+				/* someone is still interested */
 				fc->c = content_create(type, fc->url);
 				fc->c->status_callback = status_callback;
 				fc->c->status_p = fc;
 			} else {
+				/* no request allows the type */
 				fetch_abort(fc->f);
-				fc->callback(FETCHCACHE_BADTYPE, 0, fc->p, mime_type);
-				free(fc);
+				for (; fc != 0; fc = fc_url) {
+					fc_url = fc->next_request;
+					fetchcache_free(fc);
+				}
 			}
+
 			free(mime_type);
 			break;
+
 		case FETCH_DATA:
 			LOG(("FETCH_DATA"));
 			assert(fc->c != 0);
 			fc->size += size;
 			sprintf(status, "Received %lu bytes", fc->size);
-			fc->callback(FETCHCACHE_STATUS, fc->c, fc->p, status);
+			for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request)
+				if (fc_url->active)
+					fc_url->callback(FETCHCACHE_STATUS, fc->c,
+							fc_url->p, status);
 			content_process_data(fc->c, data, size);
 			break;
+
 		case FETCH_FINISHED:
 			LOG(("FETCH_FINISHED"));
 			assert(fc->c != 0);
 			sprintf(status, "Converting %lu bytes", fc->size);
-			fc->callback(FETCHCACHE_STATUS, fc->c, fc->p, status);
+			for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request)
+				if (fc_url->active)
+					fc_url->callback(FETCHCACHE_STATUS, fc->c,
+							fc_url->p, status);
+
 			if (content_convert(fc->c, fc->width, fc->height) == 0) {
 				cache_put(fc->c);
-				fc->callback(FETCHCACHE_OK, fc->c, fc->p, 0);
+				for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request)
+					if (fc_url->active)
+						fc_url->callback(FETCHCACHE_OK, cache_get(fc->url),
+								fc_url->p, 0);
+				cache_free(fc->c);
 			} else {
 				content_destroy(fc->c);
-				fc->callback(FETCHCACHE_ERROR, 0, fc->p, "Conversion failed");
+				for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request)
+					if (fc_url->active)
+						fc_url->callback(FETCHCACHE_ERROR, 0,
+								fc_url->p, "Conversion failed");
+			}
+			for (; fc != 0; fc = fc_url) {
+				fc_url = fc->next_request;
+				fetchcache_free(fc);
 			}
-			fetchcache_free(fc);
 			break;
+
 		case FETCH_ERROR:
 			LOG(("FETCH_ERROR, '%s'", data));
 			if (fc->c != 0)
 				content_destroy(fc->c);
-			fc->callback(FETCHCACHE_ERROR, 0, fc->p, data);
-			fetchcache_free(fc);
+			for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request)
+				if (fc_url->active)
+					fc->callback(FETCHCACHE_ERROR, 0, fc_url->p, data);
+			for (; fc != 0; fc = fc_url) {
+				fc_url = fc->next_request;
+				fetchcache_free(fc);
+			}
 			break;
+
 		default:
 			assert(0);
 	}
@@ -132,8 +212,10 @@ void fetchcache_callback(fetch_msg msg, void *p, char *data, unsigned long size)
 
 void status_callback(void *p, const char *status)
 {
-	struct fetchcache *fc = p;
-	fc->callback(FETCHCACHE_STATUS, fc->c, fc->p, status);
+	struct fetchcache *fc = p, *fc_url;
+	for (fc_url = fc; fc_url != 0; fc_url = fc_url->next_request)
+		if (fc_url->active)
+			fc_url->callback(FETCHCACHE_STATUS, fc->c, fc_url->p, status);
 }
 
 
-- 
cgit v1.2.3