From 1dd7e97eb3f2cd317ef64fa9c054aacba6499215 Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Sun, 10 Jun 2007 17:46:44 +0000 Subject: Merge scheme switcher branch in. svn path=/trunk/netsurf/; revision=3330 --- content/fetch.c | 1552 ++++++----------------------------------- content/fetch.h | 46 +- content/fetchers/fetch_curl.c | 1368 ++++++++++++++++++++++++++++++++++++ content/fetchers/fetch_curl.h | 22 + 4 files changed, 1625 insertions(+), 1363 deletions(-) create mode 100644 content/fetchers/fetch_curl.c create mode 100644 content/fetchers/fetch_curl.h (limited to 'content') diff --git a/content/fetch.c b/content/fetch.c index 0462e5900..38fc2c841 100644 --- a/content/fetch.c +++ b/content/fetch.c @@ -2,7 +2,7 @@ * This file is part of NetSurf, http://netsurf-browser.org/ * Licensed under the GNU General Public License, * http://www.opensource.org/licenses/gpl-license - * Copyright 2006 Daniel Silverstone + * Copyright 2006,2007 Daniel Silverstone * Copyright 2007 James Bursa * Copyright 2003 Phil Mellor */ @@ -10,13 +10,10 @@ /** \file * Fetching of data from a URL (implementation). * - * This implementation uses libcurl's 'multi' interface. - * * Active fetches are held in the circular linked list ::fetch_ring. There may * be at most ::option_max_fetchers_per_host active requests per Host: header. * There may be at most ::option_max_fetchers active requests overall. Inactive - * fetchers are stored in the ::queue_ring and there are at most - * ::option_max_cached_fetch_handles kept in there at any one time. + * fetchers are stored in the ::queue_ring waiting for use. */ #include @@ -27,16 +24,15 @@ #include #include #include -#include #ifdef riscos #include #endif -#include #include "utils/config.h" #ifdef WITH_SSL #include #endif #include "content/fetch.h" +#include "content/fetchers/fetch_curl.h" #include "content/urldb.h" #include "desktop/netsurf.h" #include "desktop/options.h" @@ -46,171 +42,106 @@ #include "utils/messages.h" #include "utils/url.h" #include "utils/utils.h" - +#include "utils/ring.h" bool fetch_active; /**< Fetches in progress, please call fetch_poll(). */ -#ifdef WITH_SSL -/** SSL certificate info */ -struct cert_info { - X509 *cert; /**< Pointer to certificate */ - long err; /**< OpenSSL error code */ -}; -#endif +/** Information about a fetcher for a given scheme. */ +typedef struct scheme_fetcher_s { + char *scheme_name; /**< The scheme. */ + fetcher_setup_fetch setup_fetch; /**< Set up a fetch. */ + fetcher_start_fetch start_fetch; /**< Start a fetch. */ + fetcher_abort_fetch abort_fetch; /**< Abort a fetch. */ + fetcher_free_fetch free_fetch; /**< Free a fetch. */ + fetcher_poll_fetcher poll_fetcher; /**< Poll this fetcher. */ + fetcher_finalise finaliser; /**< Clean up this fetcher. */ + int refcount; /**< When zero, clean up the fetcher. */ + struct scheme_fetcher_s *next_fetcher; /**< Next fetcher in the list. */ + struct scheme_fetcher_s *prev_fetcher; /**< Prev fetcher in the list. */ +} scheme_fetcher; + +static scheme_fetcher *fetchers = NULL; /** Information for a single fetch. */ struct fetch { - CURL * curl_handle; /**< cURL handle if being fetched, or 0. */ - void (*callback)(fetch_msg msg, void *p, const void *data, - unsigned long size); - /**< Callback function. */ - bool had_headers; /**< Headers have been processed. */ + fetch_callback callback;/**< Callback function. */ bool abort; /**< Abort requested. */ bool stopped; /**< Download stopped on purpose. */ - bool only_2xx; /**< Only HTTP 2xx responses acceptable. */ - bool verifiable; /**< Transaction is verifiable */ - char *url; /**< URL. */ - char *referer; /**< URL for Referer header. */ + char *url; /**< URL. */ + char *referer; /**< Referer URL. */ bool send_referer; /**< Valid to send the referer */ - char *parent_fetch_url; /**< URL of parent fetch (not necessarily - * the same as the referer) */ void *p; /**< Private data for callback. */ - struct curl_slist *headers; /**< List of request headers. */ char *host; /**< Host part of URL. */ - char *location; /**< Response Location header, or 0. */ - unsigned long content_length; /**< Response Content-Length, or 0. */ long http_code; /**< HTTP response code, or 0. */ - char *cookie_string; /**< Cookie string for this fetch */ - char *realm; /**< HTTP Auth Realm */ - char *post_urlenc; /**< Url encoded POST string, or 0. */ - struct curl_httppost *post_multipart; /**< Multipart post data, or 0. */ - struct cache_data cachedata; /**< Cache control data */ - time_t last_modified; /**< If-Modified-Since time */ - time_t file_etag; /**< ETag for local objects */ -#ifdef WITH_SSL -#define MAX_CERTS 10 - struct cert_info cert_data[MAX_CERTS]; /**< HTTPS certificate data */ -#endif + scheme_fetcher *ops; /**< Fetcher operations for this fetch. NULL if not set. */ + void *fetcher_handle; /**< The handle for the fetcher. */ + bool fetch_is_active; /**< This fetch is active. */ struct fetch *r_prev; /**< Previous active fetch in ::fetch_ring. */ struct fetch *r_next; /**< Next active fetch in ::fetch_ring. */ }; -struct cache_handle { - CURL *handle; /**< The cached cURL handle */ - char *host; /**< The host for which this handle is cached */ - - struct cache_handle *r_prev; /**< Previous cached handle in ring. */ - struct cache_handle *r_next; /**< Next cached handle in ring. */ -}; - -CURLM *fetch_curl_multi; /**< Global cURL multi handle. */ -/** Curl handle with default options set; not used for transfers. */ -static CURL *fetch_blank_curl; static struct fetch *fetch_ring = 0; /**< Ring of active fetches. */ static struct fetch *queue_ring = 0; /**< Ring of queued fetches */ -static struct cache_handle *handle_ring = 0; /**< Ring of cached handles */ - -static char fetch_error_buffer[CURL_ERROR_SIZE]; /**< Error buffer for cURL. */ -static char fetch_progress_buffer[256]; /**< Progress buffer for cURL */ -static char fetch_proxy_userpwd[100]; /**< Proxy authentication details. */ -static char fetch_user_agent[100] = "NetSurf"; -static void fetch_init_user_agent(void); -static CURLcode fetch_set_options(struct fetch *f); -#ifdef WITH_SSL -static CURLcode fetch_sslctxfun(CURL *curl_handle, SSL_CTX *sslctx, void *p); -#endif static void fetch_free(struct fetch *f); -static void fetch_stop(struct fetch *f); -static void fetch_done(CURL *curl_handle, CURLcode result); -static int fetch_curl_progress(void *clientp, double dltotal, double dlnow, - double ultotal, double ulnow); -static size_t fetch_curl_data(void *data, size_t size, size_t nmemb, - struct fetch *f); -static size_t fetch_curl_header(char *data, size_t size, size_t nmemb, - struct fetch *f); -static bool fetch_process_headers(struct fetch *f); -static struct curl_httppost *fetch_post_convert( - struct form_successful_control *control); -#ifdef WITH_SSL -static int fetch_verify_callback(int preverify_ok, X509_STORE_CTX *x509_ctx); -static int fetch_cert_verify_callback(X509_STORE_CTX *x509_ctx, void *parm); -#endif - -/** Insert the given item into the specified ring. - * Assumes that the element is zeroed as appropriate. - */ -#define RING_INSERT(ring,element) \ - /*LOG(("RING_INSERT(%s, %p(%s))", #ring, element, element->host));*/ \ - if (ring) { \ - element->r_next = ring; \ - element->r_prev = ring->r_prev; \ - ring->r_prev = element; \ - element->r_prev->r_next = element; \ - } else \ - ring = element->r_prev = element->r_next = element - -/** Remove the given element from the specified ring. - * Will zero the element as needed - */ -#define RING_REMOVE(ring, element) \ - /*LOG(("RING_REMOVE(%s, %p(%s)", #ring, element, element->host));*/ \ - if (element->r_next != element ) { \ - /* Not the only thing in the ring */ \ - element->r_next->r_prev = element->r_prev; \ - element->r_prev->r_next = element->r_next; \ - if (ring == element) ring = element->r_next; \ - } else { \ - /* Only thing in the ring */ \ - ring = 0; \ - } \ - element->r_next = element->r_prev = 0 - -/** Find the element (by hostname) in the given ring, leave it in the - * provided element variable - */ -#define RING_FINDBYHOST(ring, element, hostname) \ - /*LOG(("RING_FINDBYHOST(%s, %s)", #ring, hostname));*/ \ - if (ring) { \ - bool found = false; \ - element = ring; \ - do { \ - if (strcasecmp(element->host, hostname) == 0) { \ - found = true; \ - break; \ - } \ - element = element->r_next; \ - } while (element != ring); \ - if (!found) element = 0; \ - } else element = 0 +static void fetch_dispatch_jobs(void); -/** Measure the size of a ring and put it in the supplied variable */ -#define RING_GETSIZE(ringtype, ring, sizevar) \ - /*LOG(("RING_GETSIZE(%s)", #ring));*/ \ - if (ring) { \ - ringtype *p = ring; \ - sizevar = 0; \ - do { \ - sizevar++; \ - p = p->r_next; \ - } while (p != ring); \ - } else sizevar = 0 +#define fetch_ref_fetcher(F) F->refcount++; -/** Count the number of elements in the ring which match the provided hostname */ -#define RING_COUNTBYHOST(ringtype, ring, sizevar, hostname) \ - /*LOG(("RING_COUNTBYHOST(%s, %s)", #ring, hostname));*/ \ - if (ring) { \ - ringtype *p = ring; \ - sizevar = 0; \ - do { \ - if (strcasecmp(p->host, hostname) == 0) \ - sizevar++; \ - p = p->r_next; \ - } while (p != ring); \ - } else sizevar = 0 +static void fetch_unref_fetcher(scheme_fetcher *fetcher) +{ + if (--fetcher->refcount == 0) { + fetcher->finaliser(fetcher->scheme_name); + free(fetcher->scheme_name); + if (fetcher == fetchers) { + fetchers = fetcher->next_fetcher; + if (fetchers) + fetchers->prev_fetcher = NULL; + } else { + fetcher->prev_fetcher->next_fetcher = fetcher->next_fetcher; + if (fetcher->next_fetcher != NULL) + fetcher->next_fetcher->prev_fetcher = fetcher->prev_fetcher; + } + free(fetcher); + } +} -static void fetch_cache_handle(CURL *handle, char *hostname); -static void fetch_dispatch_jobs(void); +bool +fetch_add_fetcher(const char *scheme, + fetcher_initialise initialiser, + fetcher_setup_fetch setup_fetch, + fetcher_start_fetch start_fetch, + fetcher_abort_fetch abort_fetch, + fetcher_free_fetch free_fetch, + fetcher_poll_fetcher poll_fetcher, + fetcher_finalise finaliser) +{ + scheme_fetcher *new_fetcher; + if (!initialiser(scheme)) + return false; + new_fetcher = malloc(sizeof(scheme_fetcher)); + if (new_fetcher == NULL) { + finaliser(scheme); + return false; + } + new_fetcher->scheme_name = strdup(scheme); + if (new_fetcher->scheme_name == NULL) { + free(new_fetcher); + finaliser(scheme); + return false; + } + new_fetcher->refcount = 0; + new_fetcher->setup_fetch = setup_fetch; + new_fetcher->start_fetch = start_fetch; + new_fetcher->abort_fetch = abort_fetch; + new_fetcher->free_fetch = free_fetch; + new_fetcher->poll_fetcher = poll_fetcher; + new_fetcher->finaliser = finaliser; + new_fetcher->next_fetcher = fetchers; + fetchers = new_fetcher; + fetch_ref_fetcher(new_fetcher); + return true; +} /** * Initialise the fetcher. @@ -220,86 +151,8 @@ static void fetch_dispatch_jobs(void); void fetch_init(void) { - CURLcode code; - - LOG(("curl_version %s", curl_version())); - - code = curl_global_init(CURL_GLOBAL_ALL); - if (code != CURLE_OK) - die("Failed to initialise the fetch module " - "(curl_global_init failed)."); - - fetch_curl_multi = curl_multi_init(); - if (!fetch_curl_multi) - die("Failed to initialise the fetch module " - "(curl_multi_init failed)."); - - fetch_init_user_agent(); - - /* Create a curl easy handle with the options that are common to all - fetches. */ - fetch_blank_curl = curl_easy_init(); - if (!fetch_blank_curl) - die("Failed to initialise the fetch module " - "(curl_easy_init failed)."); - -#define SETOPT(option, value) \ - code = curl_easy_setopt(fetch_blank_curl, option, value); \ - if (code != CURLE_OK) \ - goto curl_easy_setopt_failed; - - if (verbose_log) { - SETOPT(CURLOPT_VERBOSE, 1); - } else { - SETOPT(CURLOPT_VERBOSE, 0); - } - SETOPT(CURLOPT_ERRORBUFFER, fetch_error_buffer); - SETOPT(CURLOPT_WRITEFUNCTION, fetch_curl_data); - SETOPT(CURLOPT_HEADERFUNCTION, fetch_curl_header); - SETOPT(CURLOPT_PROGRESSFUNCTION, fetch_curl_progress); - SETOPT(CURLOPT_NOPROGRESS, 0); - SETOPT(CURLOPT_USERAGENT, fetch_user_agent); - SETOPT(CURLOPT_ENCODING, "gzip"); - SETOPT(CURLOPT_LOW_SPEED_LIMIT, 1L); - SETOPT(CURLOPT_LOW_SPEED_TIME, 180L); - SETOPT(CURLOPT_NOSIGNAL, 1L); - SETOPT(CURLOPT_CONNECTTIMEOUT, 30L); - - if (option_ca_bundle) - SETOPT(CURLOPT_CAINFO, option_ca_bundle); - - return; - -curl_easy_setopt_failed: - die("Failed to initialise the fetch module " - "(curl_easy_setopt failed)."); -} - - -/** - * Fill fetch_user_agent with a string suitable for use as a user agent in - * HTTP requests. - */ - -void fetch_init_user_agent(void) -{ - struct utsname un; - - if (uname(&un) != 0) { - LOG(("uname: %i %s", errno, strerror(errno))); - die("Failed to initialise the fetch module " - "(uname failed)."); - } - - snprintf(fetch_user_agent, sizeof fetch_user_agent, - "NetSurf/%d.%d (%s; %s)", - netsurf_version_major, - netsurf_version_minor, - un.sysname, - un.machine); - fetch_user_agent[sizeof fetch_user_agent - 1] = 0; - - LOG(("fetch_user_agent \"%s\"", fetch_user_agent)); + register_curl_fetchers(); + fetch_active = false; } @@ -311,15 +164,14 @@ void fetch_init_user_agent(void) void fetch_quit(void) { - CURLMcode codem; - - curl_easy_cleanup(fetch_blank_curl); - - codem = curl_multi_cleanup(fetch_curl_multi); - if (codem != CURLM_OK) - LOG(("curl_multi_cleanup failed: ignoring")); - - curl_global_cleanup(); + while (fetchers != NULL) { + if (fetchers->refcount != 1) { + LOG(("Fetcher for scheme %s still active?!", fetchers->scheme_name)); + /* We shouldn't do this, but... */ + fetchers->refcount = 1; + } + fetch_unref_fetcher(fetchers); + } } @@ -341,23 +193,19 @@ void fetch_quit(void) * data contains an error message. FETCH_REDIRECT may replace the FETCH_TYPE, * FETCH_DATA, FETCH_FINISHED sequence if the server sends a replacement URL. * - * Some private data can be passed as the last parameter to fetch_start, and - * callbacks will contain this. */ struct fetch * fetch_start(const char *url, const char *referer, - void (*callback)(fetch_msg msg, void *p, const void *data, - unsigned long size), - void *p, bool only_2xx, const char *post_urlenc, - struct form_successful_control *post_multipart, - bool verifiable, const char *parent_url, char *headers[]) + fetch_callback callback, + void *p, bool only_2xx, const char *post_urlenc, + struct form_successful_control *post_multipart, + bool verifiable, const char *parent_url, char *headers[]) { char *host; struct fetch *fetch; - struct curl_slist *slist; url_func_result res; char *ref1 = 0, *ref2 = 0; - int i; + scheme_fetcher *fetcher = fetchers; fetch = malloc(sizeof (*fetch)); if (!fetch) @@ -395,120 +243,67 @@ struct fetch * fetch_start(const char *url, const char *referer, LOG(("fetch %p, url '%s'", fetch, url)); /* construct a new fetch structure */ - fetch->curl_handle = 0; fetch->callback = callback; - fetch->had_headers = false; fetch->abort = false; fetch->stopped = false; - fetch->only_2xx = only_2xx; - fetch->verifiable = verifiable; fetch->url = strdup(url); - fetch->referer = 0; - fetch->send_referer = false; - /* only send the referer if the schemes match */ - if (referer) { - fetch->referer = strdup(referer); - if (option_send_referer && ref1 && ref2 && - strcasecmp(ref1, ref2) == 0) - fetch->send_referer = true; - } - fetch->parent_fetch_url = parent_url ? strdup(parent_url) : 0; fetch->p = p; - fetch->headers = 0; fetch->host = host; - fetch->location = 0; - fetch->content_length = 0; fetch->http_code = 0; - fetch->cookie_string = 0; - fetch->realm = 0; - fetch->post_urlenc = 0; - fetch->post_multipart = 0; - if (post_urlenc) - fetch->post_urlenc = strdup(post_urlenc); - else if (post_multipart) - fetch->post_multipart = fetch_post_convert(post_multipart); - fetch->cachedata.req_time = time(0); - fetch->cachedata.res_time = 0; - fetch->cachedata.date = 0; - fetch->cachedata.expires = 0; - fetch->cachedata.age = INVALID_AGE; - fetch->cachedata.max_age = INVALID_AGE; - fetch->cachedata.no_cache = false; - fetch->cachedata.etag = 0; - fetch->cachedata.last_modified = 0; - fetch->last_modified = 0; - fetch->file_etag = 0; -#ifdef WITH_SSL - memset(fetch->cert_data, 0, sizeof(fetch->cert_data)); -#endif fetch->r_prev = 0; fetch->r_next = 0; + fetch->referer = 0; + fetch->ops = 0; + fetch->fetch_is_active = false; + + if (referer != NULL) { + fetch->referer = strdup(referer); + if (fetch->referer == NULL) + goto failed; + if (option_send_referer && ref1 && ref2 && + strcasecmp(ref1, ref2) == 0) + fetch->send_referer = true; + } - if (!fetch->url || (referer && !fetch->referer) || - (parent_url && !fetch->parent_fetch_url) || - (post_urlenc && !fetch->post_urlenc) || - (post_multipart && !fetch->post_multipart)) + if (!fetch->url) goto failed; + /* Pick the scheme ops */ + while (fetcher) { + if (strcmp(fetcher->scheme_name, ref1) == 0) { + fetch->ops = fetcher; + break; + } + fetcher = fetcher->next_fetcher; + } + + if (fetch->ops == NULL) + goto failed; + + /* Got a scheme fetcher, try and set up the fetch */ + fetch->fetcher_handle = + fetch->ops->setup_fetch(fetch, url, only_2xx, post_urlenc, + post_multipart, verifiable, parent_url, + (const char **)headers); + + if (fetch->fetcher_handle == NULL) + goto failed; + + /* Rah, got it, so ref the fetcher. */ + fetch_ref_fetcher(fetch->ops); + /* these aren't needed past here */ if (ref1) { free(ref1); ref1 = 0; } - if (ref2) { - free(ref2); - ref2 = 0; - } - -#define APPEND(list, value) \ - slist = curl_slist_append(list, value); \ - if (!slist) \ - goto failed; \ - list = slist; - - /* remove curl default headers */ - APPEND(fetch->headers, "Accept:"); - APPEND(fetch->headers, "Pragma:"); - - /* when doing a POST libcurl sends Expect: 100-continue" by default - * which fails with lighttpd, so disable it (see bug 1429054) */ - APPEND(fetch->headers, "Expect:"); - - if (option_accept_language) { - char s[80]; - snprintf(s, sizeof s, "Accept-Language: %s, *;q=0.1", - option_accept_language); - s[sizeof s - 1] = 0; - APPEND(fetch->headers, s); - } - - if (option_accept_charset) { - char s[80]; - snprintf(s, sizeof s, "Accept-Charset: %s, *;q=0.1", - option_accept_charset); - s[sizeof s - 1] = 0; - APPEND(fetch->headers, s); - } - - /* And add any headers specified by the caller */ - for (i = 0; headers[i]; i++) { - if (strncasecmp(headers[i], "If-Modified-Since:", 18) == 0) { - char *d = headers[i] + 18; - for (; *d && (*d == ' ' || *d == '\t'); d++) - /* do nothing */; - fetch->last_modified = curl_getdate(d, NULL); - } - else if (strncasecmp(headers[i], "If-None-Match:", 14) == 0) { - char *d = headers[i] + 14; - for (; *d && (*d == ' ' || *d == '\t' || *d == '"'); - d++) - /* do nothing */; - fetch->file_etag = atoi(d); - } - APPEND(fetch->headers, headers[i]); - } - - /* Dump us in the queue and ask the queue to run. */ + + if (ref2) { + free(ref2); + ref2 = 0; + } + + /* Dump us in the queue and ask the queue to run. */ RING_INSERT(queue_ring, fetch); fetch_dispatch_jobs(); return fetch; @@ -517,78 +312,27 @@ failed: free(host); if (ref1) free(ref1); - if (ref2) - free(ref2); free(fetch->url); - free(fetch->parent_fetch_url); - free(fetch->referer); - free(fetch->post_urlenc); - if (fetch->post_multipart) - curl_formfree(fetch->post_multipart); - curl_slist_free_all(fetch->headers); - free(fetch); + if (fetch->referer) + free(fetch->referer); + free(fetch); return 0; } -/** - * Initiate a fetch from the queue. - * - * Called with a fetch structure and a CURL handle to be used to fetch the content. - * - * This will return whether or not the fetch was successfully initiated. - */ -static bool fetch_initiate_fetch(struct fetch *fetch, CURL *handle) -{ - CURLcode code; - CURLMcode codem; - - fetch->curl_handle = handle; - - /* Initialise the handle */ - code = fetch_set_options(fetch); - if (code != CURLE_OK) { - fetch->curl_handle = 0; - return false; - } - - /* add to the global curl multi handle */ - codem = curl_multi_add_handle(fetch_curl_multi, fetch->curl_handle); - assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM); - - fetch_active = true; - return true; -} - -/** - * Find a CURL handle to use to dispatch a job - */ -static CURL *fetch_get_handle(char *host) -{ - struct cache_handle *h; - CURL *ret; - RING_FINDBYHOST(handle_ring, h, host); - if (h) { - ret = h->handle; - free(h->host); - RING_REMOVE(handle_ring, h); - free(h); - } else { - ret = curl_easy_duphandle(fetch_blank_curl); - } - return ret; -} - /** * Dispatch a single job */ static bool fetch_dispatch_job(struct fetch *fetch) { RING_REMOVE(queue_ring, fetch); - if (!fetch_initiate_fetch(fetch, fetch_get_handle(fetch->host))) { + LOG(("Attempting to start fetch %p, fetcher %p, url %s", fetch, + fetch->fetcher_handle, fetch->url)); + if (!fetch->ops->start_fetch(fetch->fetcher_handle)) { RING_INSERT(queue_ring, fetch); /* Put it back on the end of the queue */ return false; } else { RING_INSERT(fetch_ring, fetch); + fetch->fetch_is_active = true; return true; } } @@ -639,152 +383,9 @@ static void fetch_dispatch_jobs(void) break; } } + fetch_active = (all_active > 0); } -/** - * Cache a CURL handle for the provided host (if wanted) - * - */ -static void fetch_cache_handle(CURL *handle, char *host) -{ - struct cache_handle *h = 0; - int c; - RING_FINDBYHOST(handle_ring, h, host); - if (h) { - /* Already have a handle cached for this hostname */ - curl_easy_cleanup(handle); - return; - } - /* We do not have a handle cached, first up determine if the cache is full */ - RING_GETSIZE(struct cache_handle, handle_ring, c); - if (c >= option_max_cached_fetch_handles) { - /* Cache is full, so, we rotate the ring by one and replace the - * oldest handle with this one. We do this without freeing/allocating - * memory (except the hostname) and without removing the entry from the - * ring and then re-inserting it, in order to be as efficient as we can. - */ - h = handle_ring; - handle_ring = h->r_next; - curl_easy_cleanup(h->handle); - h->handle = handle; - free(h->host); - h->host = strdup(host); - return; - } - /* The table isn't full yet, so make a shiny new handle to add to the ring */ - h = (struct cache_handle*)malloc(sizeof(struct cache_handle)); - h->handle = handle; - h->host = strdup(host); - RING_INSERT(handle_ring, h); -} - -/** - * Set options specific for a fetch. - */ - -CURLcode fetch_set_options(struct fetch *f) -{ - CURLcode code; - const char *auth; - -#undef SETOPT -#define SETOPT(option, value) { \ - code = curl_easy_setopt(f->curl_handle, option, value); \ - if (code != CURLE_OK) \ - return code; \ - } - - SETOPT(CURLOPT_URL, f->url); - SETOPT(CURLOPT_PRIVATE, f); - SETOPT(CURLOPT_WRITEDATA, f); - SETOPT(CURLOPT_WRITEHEADER, f); - SETOPT(CURLOPT_PROGRESSDATA, f); - SETOPT(CURLOPT_REFERER, f->send_referer ? f->referer : 0); - SETOPT(CURLOPT_HTTPHEADER, f->headers); - if (f->post_urlenc) { - SETOPT(CURLOPT_HTTPPOST, 0); - SETOPT(CURLOPT_HTTPGET, 0L); - SETOPT(CURLOPT_POSTFIELDS, f->post_urlenc); - } else if (f->post_multipart) { - SETOPT(CURLOPT_POSTFIELDS, 0); - SETOPT(CURLOPT_HTTPGET, 0L); - SETOPT(CURLOPT_HTTPPOST, f->post_multipart); - } else { - SETOPT(CURLOPT_POSTFIELDS, 0); - SETOPT(CURLOPT_HTTPPOST, 0); - SETOPT(CURLOPT_HTTPGET, 1L); - } - - f->cookie_string = urldb_get_cookie(f->url); - if (f->cookie_string) { - SETOPT(CURLOPT_COOKIE, f->cookie_string); - } else { - SETOPT(CURLOPT_COOKIE, 0); - } - -#ifdef WITH_AUTH - if ((auth = urldb_get_auth_details(f->url)) != NULL) { - SETOPT(CURLOPT_HTTPAUTH, CURLAUTH_ANY); - SETOPT(CURLOPT_USERPWD, auth); - } else { -#endif - SETOPT(CURLOPT_USERPWD, 0); -#ifdef WITH_AUTH - } -#endif - if (option_http_proxy && option_http_proxy_host) { - SETOPT(CURLOPT_PROXY, option_http_proxy_host); - SETOPT(CURLOPT_PROXYPORT, (long) option_http_proxy_port); - if (option_http_proxy_auth != OPTION_HTTP_PROXY_AUTH_NONE) { - SETOPT(CURLOPT_PROXYAUTH, - option_http_proxy_auth == - OPTION_HTTP_PROXY_AUTH_BASIC ? - (long) CURLAUTH_BASIC : - (long) CURLAUTH_NTLM); - snprintf(fetch_proxy_userpwd, - sizeof fetch_proxy_userpwd, - "%s:%s", - option_http_proxy_auth_user, - option_http_proxy_auth_pass); - SETOPT(CURLOPT_PROXYUSERPWD, fetch_proxy_userpwd); - } - } - -#ifdef WITH_SSL - if (urldb_get_cert_permissions(f->url)) { - /* Disable certificate verification */ - SETOPT(CURLOPT_SSL_VERIFYPEER, 0L); - SETOPT(CURLOPT_SSL_VERIFYHOST, 0L); - SETOPT(CURLOPT_SSL_CTX_FUNCTION, 0); - SETOPT(CURLOPT_SSL_CTX_DATA, 0); - } else { - /* do verification */ - SETOPT(CURLOPT_SSL_VERIFYPEER, 1L); - SETOPT(CURLOPT_SSL_VERIFYHOST, 2L); - SETOPT(CURLOPT_SSL_CTX_FUNCTION, fetch_sslctxfun); - SETOPT(CURLOPT_SSL_CTX_DATA, f); - } -#endif - - return CURLE_OK; -} - - -#ifdef WITH_SSL -/** - * cURL SSL setup callback - */ - -CURLcode fetch_sslctxfun(CURL *curl_handle, SSL_CTX *sslctx, void *parm) -{ - SSL_CTX_set_verify(sslctx, SSL_VERIFY_PEER, fetch_verify_callback); - SSL_CTX_set_cert_verify_callback(sslctx, fetch_cert_verify_callback, - parm); - return CURLE_OK; -} -#endif - - /** * Abort a fetch. */ @@ -792,49 +393,8 @@ CURLcode fetch_sslctxfun(CURL *curl_handle, SSL_CTX *sslctx, void *parm) void fetch_abort(struct fetch *f) { assert(f); - LOG(("fetch %p, url '%s'", f, f->url)); - if (f->curl_handle) { - f->abort = true; - } else { - RING_REMOVE(queue_ring, f); - fetch_free(f); - } -} - - -/** - * Clean up the provided fetch object and free it. - * - * Will prod the queue afterwards to allow pending requests to be initiated. - */ - -void fetch_stop(struct fetch *f) -{ - CURLMcode codem; - - assert(f); - LOG(("fetch %p, url '%s'", f, f->url)); - - if (f->curl_handle) { - /* remove from curl multi handle */ - codem = curl_multi_remove_handle(fetch_curl_multi, - f->curl_handle); - assert(codem == CURLM_OK); - /* Put this curl handle into the cache if wanted. */ - fetch_cache_handle(f->curl_handle, f->host); - f->curl_handle = 0; - /* Remove this from the active set of fetches (if it's still there) */ - RING_REMOVE(fetch_ring, f); - } else { - /* Remove this from the queued set of fetches (if it's still there) */ - RING_REMOVE(queue_ring, f); - } - - fetch_free(f); - if (!fetch_ring && !queue_ring) - fetch_active = false; - else if (queue_ring) - fetch_dispatch_jobs(); + LOG(("fetch %p, fetcher %p, url '%s'", f, f->fetcher_handle, f->url)); + f->ops->abort_fetch(f->fetcher_handle); } @@ -844,34 +404,13 @@ void fetch_stop(struct fetch *f) void fetch_free(struct fetch *f) { -#ifdef WITH_SSL - int i; -#endif - - if (f->curl_handle) - curl_easy_cleanup(f->curl_handle); + LOG(("Freeing fetch %p, fetcher %p", f, f->fetcher_handle)); + f->ops->free_fetch(f->fetcher_handle); + fetch_unref_fetcher(f->ops); free(f->url); free(f->host); - free(f->parent_fetch_url); - free(f->referer); - free(f->location); - free(f->cookie_string); - free(f->realm); - if (f->headers) - curl_slist_free_all(f->headers); - free(f->post_urlenc); - if (f->post_multipart) - curl_formfree(f->post_multipart); - free(f->cachedata.etag); - -#ifdef WITH_SSL - for (i = 0; i < MAX_CERTS && f->cert_data[i].cert; i++) { - f->cert_data[i].cert->references--; - if (f->cert_data[i].cert->references == 0) - X509_free(f->cert_data[i].cert); - } -#endif - + if (f->referer) + free(f->referer); free(f); } @@ -884,639 +423,14 @@ void fetch_free(struct fetch *f) void fetch_poll(void) { - int running, queue; - CURLMcode codem; - CURLMsg *curl_msg; - - /* do any possible work on the current fetches */ - do { - codem = curl_multi_perform(fetch_curl_multi, &running); - assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM); - } while (codem == CURLM_CALL_MULTI_PERFORM); - - /* process curl results */ - curl_msg = curl_multi_info_read(fetch_curl_multi, &queue); - while (curl_msg) { - switch (curl_msg->msg) { - case CURLMSG_DONE: - fetch_done(curl_msg->easy_handle, - curl_msg->data.result); - break; - default: - break; - } - curl_msg = curl_multi_info_read(fetch_curl_multi, &queue); - } -} - - -/** - * Handle a completed fetch (CURLMSG_DONE from curl_multi_info_read()). - * - * \param curl_handle curl easy handle of fetch - */ - -void fetch_done(CURL *curl_handle, CURLcode result) -{ - bool finished = false; - bool error = false; -#ifdef WITH_SSL - bool cert = false; -#endif - bool abort; - struct fetch *f; - void *p; - void (*callback)(fetch_msg msg, void *p, const void *data, - unsigned long size); - CURLcode code; - struct cache_data cachedata; -#ifdef WITH_SSL - struct cert_info certs[MAX_CERTS]; - memset(certs, 0, sizeof(certs)); -#endif - - /* find the structure associated with this fetch */ - code = curl_easy_getinfo(curl_handle, CURLINFO_PRIVATE, &f); - assert(code == CURLE_OK); - - abort = f->abort; - callback = f->callback; - p = f->p; - - if (!abort && result == CURLE_OK) { - /* fetch completed normally */ - if (f->stopped || - (!f->had_headers && - fetch_process_headers(f))) - ; /* redirect with no body or similar */ - else - finished = true; - } else if (result == CURLE_PARTIAL_FILE) { - /* CURLE_PARTIAL_FILE occurs if the received body of a - * response is smaller than that specified in the - * Content-Length header. */ - if (!f->had_headers && fetch_process_headers(f)) - ; /* redirect with partial body, or similar */ - else - error = true; - } else if (result == CURLE_WRITE_ERROR && f->stopped) - /* CURLE_WRITE_ERROR occurs when fetch_curl_data - * returns 0, which we use to abort intentionally */ - ; -#ifdef WITH_SSL - else if (result == CURLE_SSL_PEER_CERTIFICATE || - result == CURLE_SSL_CACERT) { - memcpy(certs, f->cert_data, sizeof(certs)); - memset(f->cert_data, 0, sizeof(f->cert_data)); - cert = true; - } -#endif - else - error = true; - - /* If finished, acquire cache info to pass to callback */ - if (finished) { - memcpy(&cachedata, &f->cachedata, sizeof(struct cache_data)); - f->cachedata.etag = 0; - } - - /* clean up fetch and start any queued fetch for this host */ - fetch_stop(f); - - /* postponed until after stop so that queue fetches are started */ - if (abort) - ; /* fetch was aborted: no callback */ - else if (finished) { - callback(FETCH_FINISHED, p, &cachedata, 0); - free(cachedata.etag); - } -#ifdef WITH_SSL - else if (cert) { - int i; - BIO *mem; - BUF_MEM *buf; - struct ssl_cert_info ssl_certs[MAX_CERTS]; - - for (i = 0; i < MAX_CERTS && certs[i].cert; i++) { - ssl_certs[i].version = - X509_get_version(certs[i].cert); - - mem = BIO_new(BIO_s_mem()); - ASN1_TIME_print(mem, - X509_get_notBefore(certs[i].cert)); - BIO_get_mem_ptr(mem, &buf); - (void) BIO_set_close(mem, BIO_NOCLOSE); - BIO_free(mem); - snprintf(ssl_certs[i].not_before, - min(sizeof ssl_certs[i].not_before, - (unsigned) buf->length + 1), - "%s", buf->data); - BUF_MEM_free(buf); - - mem = BIO_new(BIO_s_mem()); - ASN1_TIME_print(mem, - X509_get_notAfter(certs[i].cert)); - BIO_get_mem_ptr(mem, &buf); - (void) BIO_set_close(mem, BIO_NOCLOSE); - BIO_free(mem); - snprintf(ssl_certs[i].not_after, - min(sizeof ssl_certs[i].not_after, - (unsigned) buf->length + 1), - "%s", buf->data); - BUF_MEM_free(buf); - - ssl_certs[i].sig_type = - X509_get_signature_type(certs[i].cert); - ssl_certs[i].serial = - ASN1_INTEGER_get( - X509_get_serialNumber(certs[i].cert)); - mem = BIO_new(BIO_s_mem()); - X509_NAME_print_ex(mem, - X509_get_issuer_name(certs[i].cert), - 0, XN_FLAG_SEP_CPLUS_SPC | - XN_FLAG_DN_REV | XN_FLAG_FN_NONE); - BIO_get_mem_ptr(mem, &buf); - (void) BIO_set_close(mem, BIO_NOCLOSE); - BIO_free(mem); - snprintf(ssl_certs[i].issuer, - min(sizeof ssl_certs[i].issuer, - (unsigned) buf->length + 1), - "%s", buf->data); - BUF_MEM_free(buf); - - mem = BIO_new(BIO_s_mem()); - X509_NAME_print_ex(mem, - X509_get_subject_name(certs[i].cert), - 0, XN_FLAG_SEP_CPLUS_SPC | - XN_FLAG_DN_REV | XN_FLAG_FN_NONE); - BIO_get_mem_ptr(mem, &buf); - (void) BIO_set_close(mem, BIO_NOCLOSE); - BIO_free(mem); - snprintf(ssl_certs[i].subject, - min(sizeof ssl_certs[i].subject, - (unsigned) buf->length + 1), - "%s", buf->data); - BUF_MEM_free(buf); - - ssl_certs[i].cert_type = - X509_certificate_type(certs[i].cert, - X509_get_pubkey(certs[i].cert)); - - /* and clean up */ - certs[i].cert->references--; - if (certs[i].cert->references == 0) - X509_free(certs[i].cert); - } - - callback(FETCH_CERT_ERR, p, &ssl_certs, i); - - } -#endif - else if (error) - callback(FETCH_ERROR, p, fetch_error_buffer, 0); -} - - -/** - * Callback function for fetch progress. - */ - -int fetch_curl_progress(void *clientp, double dltotal, double dlnow, - double ultotal, double ulnow) -{ - struct fetch *f = (struct fetch *) clientp; - double percent; - - if (f->abort) - return 0; - - if (dltotal > 0) { - percent = dlnow * 100.0f / dltotal; - snprintf(fetch_progress_buffer, 255, - messages_get("Progress"), - human_friendly_bytesize(dlnow), - human_friendly_bytesize(dltotal)); - f->callback(FETCH_PROGRESS, f->p, fetch_progress_buffer, - (unsigned long) percent); - } else { - snprintf(fetch_progress_buffer, 255, - messages_get("ProgressU"), - human_friendly_bytesize(dlnow)); - f->callback(FETCH_PROGRESS, f->p, fetch_progress_buffer, 0); - } - - return 0; -} - - -/** - * Callback function for cURL. - */ - -size_t fetch_curl_data(void *data, size_t size, size_t nmemb, - struct fetch *f) -{ - CURLcode code; - - /* ensure we only have to get this information once */ - if (!f->http_code) - { - code = curl_easy_getinfo(f->curl_handle, CURLINFO_HTTP_CODE, - &f->http_code); - assert(code == CURLE_OK); - } - - /* ignore body if this is a 401 reply by skipping it and reset - the HTTP response code to enable follow up fetches */ - if (f->http_code == 401) - { - f->http_code = 0; - return size * nmemb; - } - - /*LOG(("fetch %p, size %lu", f, size * nmemb));*/ - - if (f->abort || (!f->had_headers && fetch_process_headers(f))) { - f->stopped = true; - return 0; - } - - /* send data to the caller */ - /*LOG(("FETCH_DATA"));*/ - f->callback(FETCH_DATA, f->p, data, size * nmemb); - - if (f->abort) { - f->stopped = true; - return 0; - } - - return size * nmemb; -} - - -/** - * Callback function for headers. - */ - -size_t fetch_curl_header(char *data, size_t size, size_t nmemb, - struct fetch *f) -{ - int i; - size *= nmemb; - -#define SKIP_ST(o) for (i = (o); i < (int) size && (data[i] == ' ' || data[i] == '\t'); i++) - - /* Set fetch response time if not already set */ - if (f->cachedata.res_time == 0) - f->cachedata.res_time = time(0); - - if (12 < size && strncasecmp(data, "Location:", 9) == 0) { - /* extract Location header */ - free(f->location); - f->location = malloc(size); - if (!f->location) { - LOG(("malloc failed")); - return size; - } - SKIP_ST(9); - strncpy(f->location, data + i, size - i); - f->location[size - i] = '\0'; - for (i = size - i - 1; i >= 0 && - (f->location[i] == ' ' || - f->location[i] == '\t' || - f->location[i] == '\r' || - f->location[i] == '\n'); i--) - f->location[i] = '\0'; - } else if (15 < size && strncasecmp(data, "Content-Length:", 15) == 0) { - /* extract Content-Length header */ - SKIP_ST(15); - if (i < (int)size && '0' <= data[i] && data[i] <= '9') - f->content_length = atol(data + i); -#ifdef WITH_AUTH - } else if (17 < size && strncasecmp(data, "WWW-Authenticate:", 17) == 0) { - /* extract the first Realm from WWW-Authenticate header */ - free(f->realm); - f->realm = malloc(size); - if (!f->realm) { - LOG(("malloc failed")); - return size; - } - SKIP_ST(17); - - while (i < (int) size - 5 && - strncasecmp(data + i, "realm", 5)) - i++; - while (i < (int) size - 1 && data[++i] != '"') - /* */; - i++; - - if (i < (int) size) { - strncpy(f->realm, data + i, size - i); - f->realm[size - i] = '\0'; - for (i = size - i - 1; i >= 0 && - (f->realm[i] == ' ' || - f->realm[i] == '"' || - f->realm[i] == '\t' || - f->realm[i] == '\r' || - f->realm[i] == '\n'); --i) - f->realm[i] = '\0'; - } -#endif - } else if (5 < size && strncasecmp(data, "Date:", 5) == 0) { - /* extract Date header */ - SKIP_ST(5); - if (i < (int) size) - f->cachedata.date = curl_getdate(&data[i], NULL); - } else if (4 < size && strncasecmp(data, "Age:", 4) == 0) { - /* extract Age header */ - SKIP_ST(4); - if (i < (int) size && '0' <= data[i] && data[i] <= '9') - f->cachedata.age = atoi(data + i); - } else if (8 < size && strncasecmp(data, "Expires:", 8) == 0) { - /* extract Expires header */ - SKIP_ST(8); - if (i < (int) size) - f->cachedata.expires = curl_getdate(&data[i], NULL); - } else if (14 < size && strncasecmp(data, "Cache-Control:", 14) == 0) { - /* extract and parse Cache-Control header */ - int comma; - SKIP_ST(14); - - while (i < (int) size) { - for (comma = i; comma < (int) size; comma++) - if (data[comma] == ',') - break; - - SKIP_ST(i); - - if (8 < comma - i && (strncasecmp(data + i, "no-cache", 8) == 0 || strncasecmp(data + i, "no-store", 8) == 0)) - /* When we get a disk cache we should - * distinguish between these two */ - f->cachedata.no_cache = true; - else if (7 < comma - i && strncasecmp(data + i, "max-age", 7) == 0) { - for (; i < comma; i++) - if (data[i] == '=') - break; - SKIP_ST(i+1); - if (i < comma) - f->cachedata.max_age = - atoi(data + i); - } - - i = comma + 1; - } - } else if (5 < size && strncasecmp(data, "ETag:", 5) == 0) { - /* extract ETag header */ - free(f->cachedata.etag); - f->cachedata.etag = malloc(size); - if (!f->cachedata.etag) { - LOG(("malloc failed")); - return size; - } - SKIP_ST(5); - strncpy(f->cachedata.etag, data + i, size - i); - f->cachedata.etag[size - i] = '\0'; - for (i = size - i - 1; i >= 0 && - (f->cachedata.etag[i] == ' ' || - f->cachedata.etag[i] == '\t' || - f->cachedata.etag[i] == '\r' || - f->cachedata.etag[i] == '\n'); --i) - f->cachedata.etag[i] = '\0'; - } else if (14 < size && strncasecmp(data, "Last-Modified:", 14) == 0) { - /* extract Last-Modified header */ - SKIP_ST(14); - if (i < (int) size) { - f->cachedata.last_modified = - curl_getdate(&data[i], NULL); - } - } else if (11 < size && strncasecmp(data, "Set-Cookie:", 11) == 0) { - /* extract Set-Cookie header */ - SKIP_ST(11); - - /* If the fetch is unverifiable and there's no parent fetch - * url, err on the side of caution and do not set the - cookie */ - - if (f->verifiable || f->parent_fetch_url) { - /* If the transaction's verifiable, we don't require - * that the request uri and the parent domain match, - * so don't pass in the parent in this case. */ - urldb_set_cookie(&data[i], f->url, - f->verifiable ? 0 - : f->parent_fetch_url); - } - } - - return size; -#undef SKIP_ST -} - - -/** - * Find the status code and content type and inform the caller. - * - * Return true if the fetch is being aborted. - */ - -bool fetch_process_headers(struct fetch *f) -{ - long http_code; - const char *type; - CURLcode code; - struct stat s; - char *url_path = 0; - - f->had_headers = true; - - /* Set fetch response time if not already set */ - if (f->cachedata.res_time == 0) - f->cachedata.res_time = time(0); - - if (!f->http_code) - { - code = curl_easy_getinfo(f->curl_handle, CURLINFO_HTTP_CODE, - &f->http_code); - assert(code == CURLE_OK); - } - http_code = f->http_code; - LOG(("HTTP status code %li", http_code)); - - if (http_code == 304 && !f->post_urlenc && !f->post_multipart) { - /* Not Modified && GET request */ - f->callback(FETCH_NOTMODIFIED, f->p, - (const char *)&f->cachedata, 0); - return true; - } - - /* handle HTTP redirects (3xx response codes) */ - if (300 <= http_code && http_code < 400 && f->location != 0) { - LOG(("FETCH_REDIRECT, '%s'", f->location)); - f->callback(FETCH_REDIRECT, f->p, f->location, 0); - return true; - } - - /* handle HTTP 401 (Authentication errors) */ -#ifdef WITH_AUTH - if (http_code == 401) { - f->callback(FETCH_AUTH, f->p, f->realm,0); - return true; - } -#endif - - /* handle HTTP errors (non 2xx response codes) */ - if (f->only_2xx && strncmp(f->url, "http", 4) == 0 && - (http_code < 200 || 299 < http_code)) { - f->callback(FETCH_ERROR, f->p, messages_get("Not2xx"), 0); - return true; - } - - /* find MIME type from headers or filetype for local files */ - code = curl_easy_getinfo(f->curl_handle, CURLINFO_CONTENT_TYPE, &type); - assert(code == CURLE_OK); - - if (strncmp(f->url, "file:///", 8) == 0) - url_path = curl_unescape(f->url + 7, - (int) strlen(f->url) - 7); - - if (url_path && stat(url_path, &s) == 0) { - /* file: URL and file exists */ - /* create etag */ - free(f->cachedata.etag); - f->cachedata.etag = malloc(13); - if (f->cachedata.etag) - sprintf(f->cachedata.etag, - "\"%10d\"", (int)s.st_mtime); - - /* don't set last modified time so as to ensure that local - * files are revalidated at all times. */ - - /* If performed a conditional request and unmodified ... */ - if (f->last_modified && f->file_etag && - f->last_modified > s.st_mtime && - f->file_etag == s.st_mtime) { - f->callback(FETCH_NOTMODIFIED, f->p, - (const char *)&f->cachedata, 0); - curl_free(url_path); - return true; - } - } - - if (type == 0) { - type = "text/plain"; - if (url_path) { - type = fetch_filetype(url_path); - } - } - - curl_free(url_path); - - LOG(("FETCH_TYPE, '%s'", type)); - f->callback(FETCH_TYPE, f->p, type, f->content_length); - if (f->abort) - return true; - - return false; -} - - -/** - * Convert a list of struct ::form_successful_control to a list of - * struct curl_httppost for libcurl. - */ -struct curl_httppost *fetch_post_convert(struct form_successful_control *control) -{ - struct curl_httppost *post = 0, *last = 0; - CURLFORMcode code; - - for (; control; control = control->next) { - if (control->file) { - char *leafname = 0; -#ifdef riscos - char *temp; - int leaflen; - - temp = strrchr(control->value, '.'); - if (!temp) - temp = control->value; /* already leafname */ - else - temp += 1; - - leaflen = strlen(temp); - - leafname = malloc(leaflen + 1); - if (!leafname) { - LOG(("malloc failed")); - continue; - } - memcpy(leafname, temp, leaflen + 1); - - /* and s/\//\./g */ - for (temp = leafname; *temp; temp++) - if (*temp == '/') - *temp = '.'; -#else - leafname = strrchr(control->value, '/') ; - if (!leafname) - leafname = control->value; - else - leafname += 1; -#endif - /* We have to special case filenames of "", so curl - * a) actually attempts the fetch and - * b) doesn't attempt to open the file "" - */ - if (control->value[0] == '\0') { - /* dummy buffer - needs to be static so - * pointer's still valid when we go out - * of scope (not that libcurl should be - * attempting to access it, of course). */ - static char buf; - - code = curl_formadd(&post, &last, - CURLFORM_COPYNAME, control->name, - CURLFORM_BUFFER, control->value, - /* needed, as basename("") == "." */ - CURLFORM_FILENAME, "", - CURLFORM_BUFFERPTR, &buf, - CURLFORM_BUFFERLENGTH, 0, - CURLFORM_CONTENTTYPE, - "application/octet-stream", - CURLFORM_END); - if (code != CURL_FORMADD_OK) - LOG(("curl_formadd: %d (%s)", - code, control->name)); - } else { - char *mimetype = fetch_mimetype(control->value); - code = curl_formadd(&post, &last, - CURLFORM_COPYNAME, control->name, - CURLFORM_FILE, control->value, - CURLFORM_FILENAME, leafname, - CURLFORM_CONTENTTYPE, - (mimetype != 0 ? mimetype : "text/plain"), - CURLFORM_END); - if (code != CURL_FORMADD_OK) - LOG(("curl_formadd: %d (%s=%s)", - code, control->name, - control->value)); - free(mimetype); - } -#ifdef riscos - free(leafname); -#endif - } - else { - code = curl_formadd(&post, &last, - CURLFORM_COPYNAME, control->name, - CURLFORM_COPYCONTENTS, control->value, - CURLFORM_END); - if (code != CURL_FORMADD_OK) - LOG(("curl_formadd: %d (%s=%s)", code, - control->name, - control->value)); - } - } - - return post; + scheme_fetcher *fetcher = fetchers; + if (!fetch_active) + return; /* No point polling, there's no fetch active. */ + while (fetcher != NULL) { + LOG(("Polling fetcher for %s", fetcher->scheme_name)); + fetcher->poll_fetcher(fetcher->scheme_name); + fetcher = fetcher->next_fetcher; + } } @@ -1529,22 +443,21 @@ struct curl_httppost *fetch_post_convert(struct form_successful_control *control bool fetch_can_fetch(const char *url) { - unsigned int i; const char *semi; size_t len; - curl_version_info_data *data; - + scheme_fetcher *fetcher = fetchers; + if ((semi = strchr(url, ':')) == NULL) return false; len = semi - url; - - data = curl_version_info(CURLVERSION_NOW); - - for (i = 0; data->protocols[i]; i++) - if (strlen(data->protocols[i]) == len && - strncasecmp(url, data->protocols[i], len) == 0) - return true; - + + while (fetcher != NULL) { + if (strlen(fetcher->scheme_name) == len && + strncmp(fetcher->scheme_name, url, len) == 0) + return true; + fetcher = fetcher->next_fetcher; + } + return false; } @@ -1554,9 +467,8 @@ bool fetch_can_fetch(const char *url) */ void fetch_change_callback(struct fetch *fetch, - void (*callback)(fetch_msg msg, void *p, const void *data, - unsigned long size), - void *p) + fetch_callback callback, + void *p) { assert(fetch); fetch->callback = callback; @@ -1585,105 +497,39 @@ const char *fetch_get_referer(struct fetch *fetch) return fetch->referer; } - -#ifdef WITH_SSL -/** - * OpenSSL Certificate verification callback - * Stores certificate details in fetch struct. - */ - -int fetch_verify_callback(int preverify_ok, X509_STORE_CTX *x509_ctx) +void +fetch_send_callback(fetch_msg msg, struct fetch *fetch, void *data, unsigned long size) { - X509 *cert = X509_STORE_CTX_get_current_cert(x509_ctx); - int depth = X509_STORE_CTX_get_error_depth(x509_ctx); - int err = X509_STORE_CTX_get_error(x509_ctx); - struct fetch *f = X509_STORE_CTX_get_app_data(x509_ctx); - - /* save the certificate by incrementing the reference count and - * keeping a pointer */ - if (depth < MAX_CERTS && !f->cert_data[depth].cert) { - f->cert_data[depth].cert = cert; - f->cert_data[depth].err = err; - cert->references++; - } - - return preverify_ok; + LOG(("Fetcher sending callback. Fetch %p, fetcher %p data %p size %d", + fetch, fetch->fetcher_handle, data, size)); + fetch->callback(msg, fetch->p, data, size); } - -/** - * OpenSSL certificate chain verification callback - * Verifies certificate chain, setting up context for fetch_verify_callback - */ - -int fetch_cert_verify_callback(X509_STORE_CTX *x509_ctx, void *parm) +void +fetch_can_be_freed(struct fetch *fetch) { - int ok; - - /* Store fetch struct in context for verify callback */ - ok = X509_STORE_CTX_set_app_data(x509_ctx, parm); - - /* and verify the certificate chain */ - if (ok) - ok = X509_verify_cert(x509_ctx); - - return ok; + /* Go ahead and free the fetch properly now */ + LOG(("Fetch %p, fetcher %p can be freed", fetch, fetch->fetcher_handle)); + if (fetch->fetch_is_active) { + RING_REMOVE(fetch_ring, fetch); + } else { + RING_REMOVE(queue_ring, fetch); + } + fetch_active = (fetch_ring != NULL); + fetch_free(fetch); } -#endif - - -/** - * testing framework - */ - -#ifdef TEST -#include - -struct test {char *url; struct fetch *f;}; -void callback(fetch_msg msg, struct test *t, char *data, unsigned long size) +void +fetch_set_http_code(struct fetch *fetch, long http_code) { - printf("%s: ", t->url); - switch (msg) { - case FETCH_TYPE: - printf("FETCH_TYPE '%s'", data); - break; - case FETCH_DATA: - printf("FETCH_DATA %lu", size); - break; - case FETCH_FINISHED: - printf("FETCH_FINISHED"); - break; - case FETCH_ERROR: - printf("FETCH_ERROR '%s'", data); - break; - default: - assert(0); - } - printf("\n"); + LOG(("Setting HTTP code to %d", http_code)); + fetch->http_code = http_code; } -struct test test[] = { - {"http://127.0.0.1/", 0}, - {"http://netsurf.strcprstskrzkrk.co.uk/", 0}, - {"http://www.oxfordstudent.com/", 0}, - {"http://www.google.co.uk/", 0}, - {"http://news.bbc.co.uk/", 0}, - {"http://doesnt.exist/", 0}, - {"blah://blah", 0}, -}; - -int main(void) +const char * +fetch_get_referer_to_send(struct fetch *fetch) { - int i; - fetch_init(); - for (i = 0; i != sizeof(test) / sizeof(test[0]); i++) - test[i].f = fetch_start(test[i].url, 0, callback, &test[i]); - while (1) { - fetch_poll(); - sleep(1); - } - return 0; + if (fetch->send_referer) + return fetch->referer; + return NULL; } -#endif - diff --git a/content/fetch.h b/content/fetch.h index bf4446830..348171d2c 100644 --- a/content/fetch.h +++ b/content/fetch.h @@ -14,7 +14,6 @@ #include #include -#include #include "utils/config.h" typedef enum { @@ -64,15 +63,17 @@ struct ssl_cert_info { #endif extern bool fetch_active; -extern CURLM *fetch_curl_multi; + +typedef void (*fetch_callback)(fetch_msg msg, void *p, const void *data, + unsigned long size); + void fetch_init(void); struct fetch * fetch_start(const char *url, const char *referer, - void (*callback)(fetch_msg msg, void *p, const void *data, - unsigned long size), - void *p, bool only_2xx, const char *post_urlenc, - struct form_successful_control *post_multipart, - bool verifiable, const char *parent_url, char *headers[]); + fetch_callback callback, + void *p, bool only_2xx, const char *post_urlenc, + struct form_successful_control *post_multipart, + bool verifiable, const char *parent_url, char *headers[]); void fetch_abort(struct fetch *f); void fetch_poll(void); void fetch_quit(void); @@ -80,10 +81,35 @@ const char *fetch_filetype(const char *unix_path); char *fetch_mimetype(const char *ro_path); bool fetch_can_fetch(const char *url); void fetch_change_callback(struct fetch *fetch, - void (*callback)(fetch_msg msg, void *p, const void *data, - unsigned long size), - void *p); + fetch_callback callback, + void *p); long fetch_http_code(struct fetch *fetch); const char *fetch_get_referer(struct fetch *fetch); +/* API for fetchers themselves */ + +typedef bool (*fetcher_initialise)(const char *); +typedef void* (*fetcher_setup_fetch)(struct fetch *, const char *, + bool, const char *, + struct form_successful_control *, bool, + const char *, const char **); +typedef bool (*fetcher_start_fetch)(void *); +typedef void (*fetcher_abort_fetch)(void *); +typedef void (*fetcher_free_fetch)(void *); +typedef void (*fetcher_poll_fetcher)(const char *); +typedef void (*fetcher_finalise)(const char *); + +bool fetch_add_fetcher(const char *scheme, + fetcher_initialise initialiser, + fetcher_setup_fetch setup_fetch, + fetcher_start_fetch start_fetch, + fetcher_abort_fetch abort_fetch, + fetcher_free_fetch free_fetch, + fetcher_poll_fetcher poll_fetcher, + fetcher_finalise finaliser); + +void fetch_send_callback(fetch_msg msg, struct fetch *fetch, void *data, unsigned long size); +void fetch_can_be_freed(struct fetch *fetch); +void fetch_set_http_code(struct fetch *fetch, long http_code); +const char *fetch_get_referer_to_send(struct fetch *fetch); #endif diff --git a/content/fetchers/fetch_curl.c b/content/fetchers/fetch_curl.c new file mode 100644 index 000000000..4103e18f1 --- /dev/null +++ b/content/fetchers/fetch_curl.c @@ -0,0 +1,1368 @@ +/* + * This file is part of NetSurf, http://netsurf-browser.org/ + * Licensed under the GNU General Public License, + * http://www.opensource.org/licenses/gpl-license + * Copyright 2006 Daniel Silverstone + * Copyright 2007 James Bursa + * Copyright 2003 Phil Mellor + */ + +/** \file + * Fetching of data from a URL (implementation). + * + * This implementation uses libcurl's 'multi' interface. + * + * + * The CURL handles are cached in the cache_ring. There are at most + * ::option_max_cached_fetch_handles in this ring. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef riscos +#include +#endif +#include "utils/config.h" +#ifdef WITH_SSL +#include +#endif +#include "content/fetch.h" +#include "content/fetchers/fetch_curl.h" +#include "content/urldb.h" +#include "desktop/netsurf.h" +#include "desktop/options.h" +#include "render/form.h" +#undef NDEBUG +#include "utils/log.h" +#include "utils/messages.h" +#include "utils/url.h" +#include "utils/utils.h" +#include "utils/ring.h" +#include "utils/useragent.h" + +#ifdef WITH_SSL +/** SSL certificate info */ +struct cert_info { + X509 *cert; /**< Pointer to certificate */ + long err; /**< OpenSSL error code */ +}; +#endif + +/** Information for a single fetch. */ +struct curl_fetch_info { + struct fetch *fetch_handle; /**< The fetch handle we're parented by. */ + CURL * curl_handle; /**< cURL handle if being fetched, or 0. */ + bool had_headers; /**< Headers have been processed. */ + bool abort; /**< Abort requested. */ + bool stopped; /**< Download stopped on purpose. */ + bool only_2xx; /**< Only HTTP 2xx responses acceptable. */ + bool verifiable; /**< Transaction is verifiable */ + char *url; /**< URL of this fetch. */ + char *host; /**< The hostname of this fetch. */ + char *parent_fetch_url; /**< URL of parent fetch (not necessarily + * the same as the referer) */ + struct curl_slist *headers; /**< List of request headers. */ + char *location; /**< Response Location header, or 0. */ + unsigned long content_length; /**< Response Content-Length, or 0. */ + char *cookie_string; /**< Cookie string for this fetch */ + char *realm; /**< HTTP Auth Realm */ + char *post_urlenc; /**< Url encoded POST string, or 0. */ + unsigned long http_code; /**< HTTP result code from cURL. */ + struct curl_httppost *post_multipart; /**< Multipart post data, or 0. */ + struct cache_data cachedata; /**< Cache control data */ + time_t last_modified; /**< If-Modified-Since time */ + time_t file_etag; /**< ETag for local objects */ +#ifdef WITH_SSL +#define MAX_CERTS 10 + struct cert_info cert_data[MAX_CERTS]; /**< HTTPS certificate data */ +#endif +}; + +struct cache_handle { + CURL *handle; /**< The cached cURL handle */ + char *host; /**< The host for which this handle is cached */ + + struct cache_handle *r_prev; /**< Previous cached handle in ring. */ + struct cache_handle *r_next; /**< Next cached handle in ring. */ +}; + +CURLM *fetch_curl_multi; /**< Global cURL multi handle. */ +/** Curl handle with default options set; not used for transfers. */ +static CURL *fetch_blank_curl; +static struct cache_handle *curl_handle_ring = 0; /**< Ring of cached handles */ + +static char fetch_error_buffer[CURL_ERROR_SIZE]; /**< Error buffer for cURL. */ +static char fetch_progress_buffer[256]; /**< Progress buffer for cURL */ +static char fetch_proxy_userpwd[100]; /**< Proxy authentication details. */ + +static CURLcode fetch_curl_set_options(struct curl_fetch_info *f); +#ifdef WITH_SSL +static CURLcode fetch_curl_sslctxfun(CURL *curl_handle, SSL_CTX *sslctx, void *p); +#endif +static void fetch_curl_free(void *f); +static void fetch_curl_stop(struct curl_fetch_info *f); +static void fetch_curl_done(CURL *curl_handle, CURLcode result); +static int fetch_curl_progress(void *clientp, double dltotal, double dlnow, + double ultotal, double ulnow); +static size_t fetch_curl_data(void *data, size_t size, size_t nmemb, + struct curl_fetch_info *f); +static size_t fetch_curl_header(char *data, size_t size, size_t nmemb, + struct curl_fetch_info *f); +static bool fetch_curl_process_headers(struct curl_fetch_info *f); +static struct curl_httppost *fetch_curl_post_convert( + struct form_successful_control *control); +#ifdef WITH_SSL +static int fetch_curl_verify_callback(int preverify_ok, X509_STORE_CTX *x509_ctx); +static int fetch_curl_cert_verify_callback(X509_STORE_CTX *x509_ctx, void *parm); +#endif + +static void fetch_curl_cache_handle(CURL *handle, char *hostname); + +/** + * Start fetching data for the given URL. + * + * The function returns immediately. The fetch may be queued for later + * processing. + * + * A pointer to an opaque struct curl_fetch_info is returned, which can be passed to + * fetch_abort() to abort the fetch at any time. Returns 0 if memory is + * exhausted (or some other fatal error occurred). + * + * The caller must supply a callback function which is called when anything + * interesting happens. The callback function is first called with msg + * FETCH_TYPE, with the Content-Type header in data, then one or more times + * with FETCH_DATA with some data for the url, and finally with + * FETCH_FINISHED. Alternatively, FETCH_ERROR indicates an error occurred: + * data contains an error message. FETCH_REDIRECT may replace the FETCH_TYPE, + * FETCH_DATA, FETCH_FINISHED sequence if the server sends a replacement URL. + * + * Some private data can be passed as the last parameter to fetch_start, and + * callbacks will contain this. + */ + +static void * +fetch_curl_setup(struct fetch *parent_fetch, const char *url, + bool only_2xx, const char *post_urlenc, + struct form_successful_control *post_multipart, + bool verifiable, const char *parent_url, const char **headers) +{ + char *host; + struct curl_fetch_info *fetch; + struct curl_slist *slist; + url_func_result res; + int i; + + fetch = malloc(sizeof (*fetch)); + if (!fetch) + return 0; + + fetch->fetch_handle = parent_fetch; + + res = url_host(url, &host); + if (res != URL_FUNC_OK) { + /* we only fail memory exhaustion */ + if (res == URL_FUNC_NOMEM) + goto failed; + + host = strdup(""); + if (!host) + goto failed; + } + + LOG(("fetch %p, url '%s'", fetch, url)); + + /* construct a new fetch structure */ + fetch->curl_handle = 0; + fetch->had_headers = false; + fetch->abort = false; + fetch->stopped = false; + fetch->only_2xx = only_2xx; + fetch->verifiable = verifiable; + fetch->url = strdup(url); + fetch->parent_fetch_url = parent_url ? strdup(parent_url) : 0; + fetch->headers = 0; + fetch->host = host; + fetch->location = 0; + fetch->content_length = 0; + fetch->http_code = 0; + fetch->cookie_string = 0; + fetch->realm = 0; + fetch->post_urlenc = 0; + fetch->post_multipart = 0; + if (post_urlenc) + fetch->post_urlenc = strdup(post_urlenc); + else if (post_multipart) + fetch->post_multipart = fetch_curl_post_convert(post_multipart); + fetch->cachedata.req_time = time(0); + fetch->cachedata.res_time = 0; + fetch->cachedata.date = 0; + fetch->cachedata.expires = 0; + fetch->cachedata.age = INVALID_AGE; + fetch->cachedata.max_age = INVALID_AGE; + fetch->cachedata.no_cache = false; + fetch->cachedata.etag = 0; + fetch->cachedata.last_modified = 0; + fetch->last_modified = 0; + fetch->file_etag = 0; + fetch->http_code = 0; +#ifdef WITH_SSL + memset(fetch->cert_data, 0, sizeof(fetch->cert_data)); +#endif + + if (!fetch->url || + (parent_url && !fetch->parent_fetch_url) || + (post_urlenc && !fetch->post_urlenc) || + (post_multipart && !fetch->post_multipart)) + goto failed; + +#define APPEND(list, value) \ + slist = curl_slist_append(list, value); \ + if (!slist) \ + goto failed; \ + list = slist; + + /* remove curl default headers */ + APPEND(fetch->headers, "Accept:"); + APPEND(fetch->headers, "Pragma:"); + + /* when doing a POST libcurl sends Expect: 100-continue" by default + * which fails with lighttpd, so disable it (see bug 1429054) */ + APPEND(fetch->headers, "Expect:"); + + if (option_accept_language) { + char s[80]; + snprintf(s, sizeof s, "Accept-Language: %s, *;q=0.1", + option_accept_language); + s[sizeof s - 1] = 0; + APPEND(fetch->headers, s); + } + + if (option_accept_charset) { + char s[80]; + snprintf(s, sizeof s, "Accept-Charset: %s, *;q=0.1", + option_accept_charset); + s[sizeof s - 1] = 0; + APPEND(fetch->headers, s); + } + + /* And add any headers specified by the caller */ + for (i = 0; headers[i]; i++) { + if (strncasecmp(headers[i], "If-Modified-Since:", 18) == 0) { + char *d = headers[i] + 18; + for (; *d && (*d == ' ' || *d == '\t'); d++) + /* do nothing */; + fetch->last_modified = curl_getdate(d, NULL); + } + else if (strncasecmp(headers[i], "If-None-Match:", 14) == 0) { + char *d = headers[i] + 14; + for (; *d && (*d == ' ' || *d == '\t' || *d == '"'); + d++) + /* do nothing */; + fetch->file_etag = atoi(d); + } + APPEND(fetch->headers, headers[i]); + } + + return fetch; + +failed: + free(host); + free(fetch->url); + free(fetch->parent_fetch_url); + free(fetch->post_urlenc); + if (fetch->post_multipart) + curl_formfree(fetch->post_multipart); + curl_slist_free_all(fetch->headers); + free(fetch); + return 0; +} + +/** + * Initiate a fetch from the queue. + * + * Called with a fetch structure and a CURL handle to be used to fetch the content. + * + * This will return whether or not the fetch was successfully initiated. + */ +static bool fetch_curl_initiate_fetch(struct curl_fetch_info *fetch, CURL *handle) +{ + CURLcode code; + CURLMcode codem; + + fetch->curl_handle = handle; + + /* Initialise the handle */ + code = fetch_curl_set_options(fetch); + if (code != CURLE_OK) { + fetch->curl_handle = 0; + return false; + } + + /* add to the global curl multi handle */ + codem = curl_multi_add_handle(fetch_curl_multi, fetch->curl_handle); + assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM); + + return true; +} + +/** + * Find a CURL handle to use to dispatch a job + */ +static CURL *fetch_curl_get_handle(char *host) +{ + struct cache_handle *h; + CURL *ret; + RING_FINDBYHOST(curl_handle_ring, h, host); + if (h) { + ret = h->handle; + free(h->host); + RING_REMOVE(curl_handle_ring, h); + free(h); + } else { + ret = curl_easy_duphandle(fetch_blank_curl); + } + return ret; +} + +/** + * Dispatch a single job + */ +static bool fetch_curl_start(void *vfetch) +{ + struct curl_fetch_info *fetch = (struct curl_fetch_info*)vfetch; + return fetch_curl_initiate_fetch(fetch, fetch_curl_get_handle(fetch->host)); +} + +/** + * Cache a CURL handle for the provided host (if wanted) + * + */ +static void fetch_curl_cache_handle(CURL *handle, char *host) +{ + struct cache_handle *h = 0; + int c; + RING_FINDBYHOST(curl_handle_ring, h, host); + if (h) { + /* Already have a handle cached for this hostname */ + curl_easy_cleanup(handle); + return; + } + /* We do not have a handle cached, first up determine if the cache is full */ + RING_GETSIZE(struct cache_handle, curl_handle_ring, c); + if (c >= option_max_cached_fetch_handles) { + /* Cache is full, so, we rotate the ring by one and replace the + * oldest handle with this one. We do this without freeing/allocating + * memory (except the hostname) and without removing the entry from the + * ring and then re-inserting it, in order to be as efficient as we can. + */ + h = curl_handle_ring; + curl_handle_ring = h->r_next; + curl_easy_cleanup(h->handle); + h->handle = handle; + free(h->host); + h->host = strdup(host); + return; + } + /* The table isn't full yet, so make a shiny new handle to add to the ring */ + h = (struct cache_handle*)malloc(sizeof(struct cache_handle)); + h->handle = handle; + h->host = strdup(host); + RING_INSERT(curl_handle_ring, h); +} + +/** + * Set options specific for a fetch. + */ + +static CURLcode +fetch_curl_set_options(struct curl_fetch_info *f) +{ + CURLcode code; + const char *auth; + +#undef SETOPT +#define SETOPT(option, value) { \ + code = curl_easy_setopt(f->curl_handle, option, value); \ + if (code != CURLE_OK) \ + return code; \ + } + + SETOPT(CURLOPT_URL, f->url); + SETOPT(CURLOPT_PRIVATE, f); + SETOPT(CURLOPT_WRITEDATA, f); + SETOPT(CURLOPT_WRITEHEADER, f); + SETOPT(CURLOPT_PROGRESSDATA, f); + SETOPT(CURLOPT_REFERER, fetch_get_referer_to_send(f->fetch_handle)); + SETOPT(CURLOPT_HTTPHEADER, f->headers); + if (f->post_urlenc) { + SETOPT(CURLOPT_HTTPPOST, 0); + SETOPT(CURLOPT_HTTPGET, 0L); + SETOPT(CURLOPT_POSTFIELDS, f->post_urlenc); + } else if (f->post_multipart) { + SETOPT(CURLOPT_POSTFIELDS, 0); + SETOPT(CURLOPT_HTTPGET, 0L); + SETOPT(CURLOPT_HTTPPOST, f->post_multipart); + } else { + SETOPT(CURLOPT_POSTFIELDS, 0); + SETOPT(CURLOPT_HTTPPOST, 0); + SETOPT(CURLOPT_HTTPGET, 1L); + } + + f->cookie_string = urldb_get_cookie(f->url); + if (f->cookie_string) { + SETOPT(CURLOPT_COOKIE, f->cookie_string); + } else { + SETOPT(CURLOPT_COOKIE, 0); + } + +#ifdef WITH_AUTH + if ((auth = urldb_get_auth_details(f->url)) != NULL) { + SETOPT(CURLOPT_HTTPAUTH, CURLAUTH_ANY); + SETOPT(CURLOPT_USERPWD, auth); + } else { +#endif + SETOPT(CURLOPT_USERPWD, 0); +#ifdef WITH_AUTH + } +#endif + if (option_http_proxy && option_http_proxy_host) { + SETOPT(CURLOPT_PROXY, option_http_proxy_host); + SETOPT(CURLOPT_PROXYPORT, (long) option_http_proxy_port); + if (option_http_proxy_auth != OPTION_HTTP_PROXY_AUTH_NONE) { + SETOPT(CURLOPT_PROXYAUTH, + option_http_proxy_auth == + OPTION_HTTP_PROXY_AUTH_BASIC ? + (long) CURLAUTH_BASIC : + (long) CURLAUTH_NTLM); + snprintf(fetch_proxy_userpwd, + sizeof fetch_proxy_userpwd, + "%s:%s", + option_http_proxy_auth_user, + option_http_proxy_auth_pass); + SETOPT(CURLOPT_PROXYUSERPWD, fetch_proxy_userpwd); + } + } + +#ifdef WITH_SSL + if (urldb_get_cert_permissions(f->url)) { + /* Disable certificate verification */ + SETOPT(CURLOPT_SSL_VERIFYPEER, 0L); + SETOPT(CURLOPT_SSL_VERIFYHOST, 0L); + SETOPT(CURLOPT_SSL_CTX_FUNCTION, 0); + SETOPT(CURLOPT_SSL_CTX_DATA, 0); + } else { + /* do verification */ + SETOPT(CURLOPT_SSL_VERIFYPEER, 1L); + SETOPT(CURLOPT_SSL_VERIFYHOST, 2L); + SETOPT(CURLOPT_SSL_CTX_FUNCTION, fetch_curl_sslctxfun); + SETOPT(CURLOPT_SSL_CTX_DATA, f); + } +#endif + + return CURLE_OK; +} + + +#ifdef WITH_SSL +/** + * cURL SSL setup callback + */ + +static CURLcode +fetch_curl_sslctxfun(CURL *curl_handle, SSL_CTX *sslctx, void *parm) +{ + SSL_CTX_set_verify(sslctx, SSL_VERIFY_PEER, fetch_curl_verify_callback); + SSL_CTX_set_cert_verify_callback(sslctx, fetch_curl_cert_verify_callback, + parm); + return CURLE_OK; +} +#endif + + +/** + * Abort a fetch. + */ + +static void fetch_curl_abort(void *vf) +{ + struct curl_fetch_info *f = (struct curl_fetch_info *)vf; + assert(f); + LOG(("fetch %p, url '%s'", f, f->url)); + if (f->curl_handle) { + f->abort = true; + } else { + fetch_can_be_freed(f->fetch_handle); + } +} + + +/** + * Clean up the provided fetch object and free it. + * + * Will prod the queue afterwards to allow pending requests to be initiated. + */ + +static void fetch_curl_stop(struct curl_fetch_info *f) +{ + CURLMcode codem; + + assert(f); + LOG(("fetch %p, url '%s'", f, f->url)); + + if (f->curl_handle) { + /* remove from curl multi handle */ + codem = curl_multi_remove_handle(fetch_curl_multi, + f->curl_handle); + assert(codem == CURLM_OK); + /* Put this curl handle into the cache if wanted. */ + fetch_curl_cache_handle(f->curl_handle, f->host); + f->curl_handle = 0; + } + + fetch_can_be_freed(f->fetch_handle); +} + + +/** + * Free a fetch structure and associated resources. + */ + +static void fetch_curl_free(void *vf) +{ + struct curl_fetch_info *f = (struct curl_fetch_info *)vf; +#ifdef WITH_SSL + int i; +#endif + + if (f->curl_handle) + curl_easy_cleanup(f->curl_handle); + free(f->url); + free(f->host); + free(f->parent_fetch_url); + free(f->location); + free(f->cookie_string); + free(f->realm); + if (f->headers) + curl_slist_free_all(f->headers); + free(f->post_urlenc); + if (f->post_multipart) + curl_formfree(f->post_multipart); + free(f->cachedata.etag); + +#ifdef WITH_SSL + for (i = 0; i < MAX_CERTS && f->cert_data[i].cert; i++) { + f->cert_data[i].cert->references--; + if (f->cert_data[i].cert->references == 0) + X509_free(f->cert_data[i].cert); + } +#endif + + free(f); +} + + +/** + * Do some work on current fetches. + * + * Must be called regularly to make progress on fetches. + */ + +static void fetch_curl_poll(const char *scheme_ignored) +{ + int running, queue; + CURLMcode codem; + CURLMsg *curl_msg; + + /* do any possible work on the current fetches */ + do { + codem = curl_multi_perform(fetch_curl_multi, &running); + assert(codem == CURLM_OK || codem == CURLM_CALL_MULTI_PERFORM); + } while (codem == CURLM_CALL_MULTI_PERFORM); + + /* process curl results */ + curl_msg = curl_multi_info_read(fetch_curl_multi, &queue); + while (curl_msg) { + switch (curl_msg->msg) { + case CURLMSG_DONE: + fetch_curl_done(curl_msg->easy_handle, + curl_msg->data.result); + break; + default: + break; + } + curl_msg = curl_multi_info_read(fetch_curl_multi, &queue); + } +} + + +/** + * Handle a completed fetch (CURLMSG_DONE from curl_multi_info_read()). + * + * \param curl_handle curl easy handle of fetch + */ + +static void fetch_curl_done(CURL *curl_handle, CURLcode result) +{ + bool finished = false; + bool error = false; +#ifdef WITH_SSL + bool cert = false; +#endif + bool abort; + struct curl_fetch_info *f; + CURLcode code; + struct cache_data cachedata; +#ifdef WITH_SSL + struct cert_info certs[MAX_CERTS]; + memset(certs, 0, sizeof(certs)); +#endif + + /* find the structure associated with this fetch */ + code = curl_easy_getinfo(curl_handle, CURLINFO_PRIVATE, &f); + assert(code == CURLE_OK); + + abort = f->abort; + + if (!abort && result == CURLE_OK) { + /* fetch completed normally */ + if (f->stopped || + (!f->had_headers && + fetch_curl_process_headers(f))) + ; /* redirect with no body or similar */ + else + finished = true; + } else if (result == CURLE_PARTIAL_FILE) { + /* CURLE_PARTIAL_FILE occurs if the received body of a + * response is smaller than that specified in the + * Content-Length header. */ + if (!f->had_headers && fetch_curl_process_headers(f)) + ; /* redirect with partial body, or similar */ + else + error = true; + } else if (result == CURLE_WRITE_ERROR && f->stopped) + /* CURLE_WRITE_ERROR occurs when fetch_curl_data + * returns 0, which we use to abort intentionally */ + ; +#ifdef WITH_SSL + else if (result == CURLE_SSL_PEER_CERTIFICATE || + result == CURLE_SSL_CACERT) { + memcpy(certs, f->cert_data, sizeof(certs)); + memset(f->cert_data, 0, sizeof(f->cert_data)); + cert = true; + } +#endif + else + error = true; + + /* If finished, acquire cache info to pass to callback */ + if (finished) { + memcpy(&cachedata, &f->cachedata, sizeof(struct cache_data)); + f->cachedata.etag = 0; + } + + /* postponed until after stop so that queue fetches are started */ + if (abort) + ; /* fetch was aborted: no callback */ + else if (finished) { + fetch_send_callback(FETCH_FINISHED, f->fetch_handle, &cachedata, 0); + free(cachedata.etag); + } +#ifdef WITH_SSL + else if (cert) { + int i; + BIO *mem; + BUF_MEM *buf; + struct ssl_cert_info ssl_certs[MAX_CERTS]; + + for (i = 0; i < MAX_CERTS && certs[i].cert; i++) { + ssl_certs[i].version = + X509_get_version(certs[i].cert); + + mem = BIO_new(BIO_s_mem()); + ASN1_TIME_print(mem, + X509_get_notBefore(certs[i].cert)); + BIO_get_mem_ptr(mem, &buf); + (void) BIO_set_close(mem, BIO_NOCLOSE); + BIO_free(mem); + snprintf(ssl_certs[i].not_before, + min(sizeof ssl_certs[i].not_before, + (unsigned) buf->length + 1), + "%s", buf->data); + BUF_MEM_free(buf); + + mem = BIO_new(BIO_s_mem()); + ASN1_TIME_print(mem, + X509_get_notAfter(certs[i].cert)); + BIO_get_mem_ptr(mem, &buf); + (void) BIO_set_close(mem, BIO_NOCLOSE); + BIO_free(mem); + snprintf(ssl_certs[i].not_after, + min(sizeof ssl_certs[i].not_after, + (unsigned) buf->length + 1), + "%s", buf->data); + BUF_MEM_free(buf); + + ssl_certs[i].sig_type = + X509_get_signature_type(certs[i].cert); + ssl_certs[i].serial = + ASN1_INTEGER_get( + X509_get_serialNumber(certs[i].cert)); + mem = BIO_new(BIO_s_mem()); + X509_NAME_print_ex(mem, + X509_get_issuer_name(certs[i].cert), + 0, XN_FLAG_SEP_CPLUS_SPC | + XN_FLAG_DN_REV | XN_FLAG_FN_NONE); + BIO_get_mem_ptr(mem, &buf); + (void) BIO_set_close(mem, BIO_NOCLOSE); + BIO_free(mem); + snprintf(ssl_certs[i].issuer, + min(sizeof ssl_certs[i].issuer, + (unsigned) buf->length + 1), + "%s", buf->data); + BUF_MEM_free(buf); + + mem = BIO_new(BIO_s_mem()); + X509_NAME_print_ex(mem, + X509_get_subject_name(certs[i].cert), + 0, XN_FLAG_SEP_CPLUS_SPC | + XN_FLAG_DN_REV | XN_FLAG_FN_NONE); + BIO_get_mem_ptr(mem, &buf); + (void) BIO_set_close(mem, BIO_NOCLOSE); + BIO_free(mem); + snprintf(ssl_certs[i].subject, + min(sizeof ssl_certs[i].subject, + (unsigned) buf->length + 1), + "%s", buf->data); + BUF_MEM_free(buf); + + ssl_certs[i].cert_type = + X509_certificate_type(certs[i].cert, + X509_get_pubkey(certs[i].cert)); + + /* and clean up */ + certs[i].cert->references--; + if (certs[i].cert->references == 0) + X509_free(certs[i].cert); + } + + fetch_send_callback(FETCH_CERT_ERR, f->fetch_handle, &ssl_certs, i); + + } +#endif + else if (error) + fetch_send_callback(FETCH_ERROR, f->fetch_handle, fetch_error_buffer, 0); + + fetch_curl_stop(f); +} + + +/** + * Callback function for fetch progress. + */ + +int fetch_curl_progress(void *clientp, double dltotal, double dlnow, + double ultotal, double ulnow) +{ + struct curl_fetch_info *f = (struct curl_fetch_info *) clientp; + double percent; + + if (f->abort) + return 0; + + if (dltotal > 0) { + percent = dlnow * 100.0f / dltotal; + snprintf(fetch_progress_buffer, 255, + messages_get("Progress"), + human_friendly_bytesize(dlnow), + human_friendly_bytesize(dltotal)); + fetch_send_callback(FETCH_PROGRESS, f->fetch_handle, + fetch_progress_buffer, + (unsigned long) percent); + } else { + snprintf(fetch_progress_buffer, 255, + messages_get("ProgressU"), + human_friendly_bytesize(dlnow)); + fetch_send_callback(FETCH_PROGRESS, f->fetch_handle, + fetch_progress_buffer, 0); + } + + return 0; +} + + +/** + * Callback function for cURL. + */ + +size_t fetch_curl_data(void *data, size_t size, size_t nmemb, + struct curl_fetch_info *f) +{ + CURLcode code; + + /* ensure we only have to get this information once */ + if (!f->http_code) + { + code = curl_easy_getinfo(f->curl_handle, CURLINFO_HTTP_CODE, + &f->http_code); + fetch_set_http_code(f->fetch_handle, f->http_code); + assert(code == CURLE_OK); + } + + /* ignore body if this is a 401 reply by skipping it and reset + the HTTP response code to enable follow up fetches */ + if (f->http_code == 401) + { + f->http_code = 0; + return size * nmemb; + } + + /*LOG(("fetch %p, size %lu", f, size * nmemb));*/ + + if (f->abort || (!f->had_headers && fetch_curl_process_headers(f))) { + f->stopped = true; + return 0; + } + + /* send data to the caller */ + /*LOG(("FETCH_DATA"));*/ + fetch_send_callback(FETCH_DATA, f->fetch_handle, data, size * nmemb); + + if (f->abort) { + f->stopped = true; + return 0; + } + + return size * nmemb; +} + + +/** + * Callback function for headers. + */ + +size_t fetch_curl_header(char *data, size_t size, size_t nmemb, + struct curl_fetch_info *f) +{ + int i; + size *= nmemb; + +#define SKIP_ST(o) for (i = (o); i < (int) size && (data[i] == ' ' || data[i] == '\t'); i++) + + /* Set fetch response time if not already set */ + if (f->cachedata.res_time == 0) + f->cachedata.res_time = time(0); + + if (12 < size && strncasecmp(data, "Location:", 9) == 0) { + /* extract Location header */ + free(f->location); + f->location = malloc(size); + if (!f->location) { + LOG(("malloc failed")); + return size; + } + SKIP_ST(9); + strncpy(f->location, data + i, size - i); + f->location[size - i] = '\0'; + for (i = size - i - 1; i >= 0 && + (f->location[i] == ' ' || + f->location[i] == '\t' || + f->location[i] == '\r' || + f->location[i] == '\n'); i--) + f->location[i] = '\0'; + } else if (15 < size && strncasecmp(data, "Content-Length:", 15) == 0) { + /* extract Content-Length header */ + SKIP_ST(15); + if (i < (int)size && '0' <= data[i] && data[i] <= '9') + f->content_length = atol(data + i); +#ifdef WITH_AUTH + } else if (17 < size && strncasecmp(data, "WWW-Authenticate:", 17) == 0) { + /* extract the first Realm from WWW-Authenticate header */ + free(f->realm); + f->realm = malloc(size); + if (!f->realm) { + LOG(("malloc failed")); + return size; + } + SKIP_ST(17); + + while (i < (int) size - 5 && + strncasecmp(data + i, "realm", 5)) + i++; + while (i < (int) size - 1 && data[++i] != '"') + /* */; + i++; + + if (i < (int) size) { + strncpy(f->realm, data + i, size - i); + f->realm[size - i] = '\0'; + for (i = size - i - 1; i >= 0 && + (f->realm[i] == ' ' || + f->realm[i] == '"' || + f->realm[i] == '\t' || + f->realm[i] == '\r' || + f->realm[i] == '\n'); --i) + f->realm[i] = '\0'; + } +#endif + } else if (5 < size && strncasecmp(data, "Date:", 5) == 0) { + /* extract Date header */ + SKIP_ST(5); + if (i < (int) size) + f->cachedata.date = curl_getdate(&data[i], NULL); + } else if (4 < size && strncasecmp(data, "Age:", 4) == 0) { + /* extract Age header */ + SKIP_ST(4); + if (i < (int) size && '0' <= data[i] && data[i] <= '9') + f->cachedata.age = atoi(data + i); + } else if (8 < size && strncasecmp(data, "Expires:", 8) == 0) { + /* extract Expires header */ + SKIP_ST(8); + if (i < (int) size) + f->cachedata.expires = curl_getdate(&data[i], NULL); + } else if (14 < size && strncasecmp(data, "Cache-Control:", 14) == 0) { + /* extract and parse Cache-Control header */ + int comma; + SKIP_ST(14); + + while (i < (int) size) { + for (comma = i; comma < (int) size; comma++) + if (data[comma] == ',') + break; + + SKIP_ST(i); + + if (8 < comma - i && (strncasecmp(data + i, "no-cache", 8) == 0 || strncasecmp(data + i, "no-store", 8) == 0)) + /* When we get a disk cache we should + * distinguish between these two */ + f->cachedata.no_cache = true; + else if (7 < comma - i && strncasecmp(data + i, "max-age", 7) == 0) { + for (; i < comma; i++) + if (data[i] == '=') + break; + SKIP_ST(i+1); + if (i < comma) + f->cachedata.max_age = + atoi(data + i); + } + + i = comma + 1; + } + } else if (5 < size && strncasecmp(data, "ETag:", 5) == 0) { + /* extract ETag header */ + free(f->cachedata.etag); + f->cachedata.etag = malloc(size); + if (!f->cachedata.etag) { + LOG(("malloc failed")); + return size; + } + SKIP_ST(5); + strncpy(f->cachedata.etag, data + i, size - i); + f->cachedata.etag[size - i] = '\0'; + for (i = size - i - 1; i >= 0 && + (f->cachedata.etag[i] == ' ' || + f->cachedata.etag[i] == '\t' || + f->cachedata.etag[i] == '\r' || + f->cachedata.etag[i] == '\n'); --i) + f->cachedata.etag[i] = '\0'; + } else if (14 < size && strncasecmp(data, "Last-Modified:", 14) == 0) { + /* extract Last-Modified header */ + SKIP_ST(14); + if (i < (int) size) { + f->cachedata.last_modified = + curl_getdate(&data[i], NULL); + } + } else if (11 < size && strncasecmp(data, "Set-Cookie:", 11) == 0) { + /* extract Set-Cookie header */ + SKIP_ST(11); + + /* If the fetch is unverifiable and there's no parent fetch + * url, err on the side of caution and do not set the + cookie */ + + if (f->verifiable || f->parent_fetch_url) { + /* If the transaction's verifiable, we don't require + * that the request uri and the parent domain match, + * so don't pass in the parent in this case. */ + urldb_set_cookie(&data[i], f->url, + f->verifiable ? 0 + : f->parent_fetch_url); + } + } + + return size; +#undef SKIP_ST +} + + +/** + * Find the status code and content type and inform the caller. + * + * Return true if the fetch is being aborted. + */ + +bool fetch_curl_process_headers(struct curl_fetch_info *f) +{ + long http_code; + const char *type; + CURLcode code; + struct stat s; + char *url_path = 0; + + f->had_headers = true; + + /* Set fetch response time if not already set */ + if (f->cachedata.res_time == 0) + f->cachedata.res_time = time(0); + + if (!f->http_code) + { + code = curl_easy_getinfo(f->curl_handle, CURLINFO_HTTP_CODE, + &f->http_code); + fetch_set_http_code(f->fetch_handle, f->http_code); + assert(code == CURLE_OK); + } + http_code = f->http_code; + LOG(("HTTP status code %li", http_code)); + + if (http_code == 304 && !f->post_urlenc && !f->post_multipart) { + /* Not Modified && GET request */ + fetch_send_callback(FETCH_NOTMODIFIED, f->fetch_handle, + (const char *)&f->cachedata, 0); + return true; + } + + /* handle HTTP redirects (3xx response codes) */ + if (300 <= http_code && http_code < 400 && f->location != 0) { + LOG(("FETCH_REDIRECT, '%s'", f->location)); + fetch_send_callback(FETCH_REDIRECT, f->fetch_handle, f->location, 0); + return true; + } + + /* handle HTTP 401 (Authentication errors) */ +#ifdef WITH_AUTH + if (http_code == 401) { + fetch_send_callback(FETCH_AUTH, f->fetch_handle, f->realm,0); + return true; + } +#endif + + /* handle HTTP errors (non 2xx response codes) */ + if (f->only_2xx && strncmp(f->url, "http", 4) == 0 && + (http_code < 200 || 299 < http_code)) { + fetch_send_callback(FETCH_ERROR, f->fetch_handle, + messages_get("Not2xx"), 0); + return true; + } + + /* find MIME type from headers or filetype for local files */ + code = curl_easy_getinfo(f->curl_handle, CURLINFO_CONTENT_TYPE, &type); + assert(code == CURLE_OK); + + if (strncmp(f->url, "file:///", 8) == 0) + url_path = curl_unescape(f->url + 7, + (int) strlen(f->url) - 7); + + if (url_path && stat(url_path, &s) == 0) { + /* file: URL and file exists */ + /* create etag */ + free(f->cachedata.etag); + f->cachedata.etag = malloc(13); + if (f->cachedata.etag) + sprintf(f->cachedata.etag, + "\"%10d\"", (int)s.st_mtime); + + /* don't set last modified time so as to ensure that local + * files are revalidated at all times. */ + + /* If performed a conditional request and unmodified ... */ + if (f->last_modified && f->file_etag && + f->last_modified > s.st_mtime && + f->file_etag == s.st_mtime) { + fetch_send_callback(FETCH_NOTMODIFIED, f->fetch_handle, + (const char *)&f->cachedata, 0); + curl_free(url_path); + return true; + } + } + + if (type == 0) { + type = "text/plain"; + if (url_path) { + type = fetch_filetype(url_path); + } + } + + curl_free(url_path); + + LOG(("FETCH_TYPE, '%s'", type)); + fetch_send_callback(FETCH_TYPE, f->fetch_handle, type, f->content_length); + if (f->abort) + return true; + + return false; +} + + +/** + * Convert a list of struct ::form_successful_control to a list of + * struct curl_httppost for libcurl. + */ +struct curl_httppost * +fetch_curl_post_convert(struct form_successful_control *control) +{ + struct curl_httppost *post = 0, *last = 0; + CURLFORMcode code; + + for (; control; control = control->next) { + if (control->file) { + char *leafname = 0; +#ifdef riscos + char *temp; + int leaflen; + + temp = strrchr(control->value, '.'); + if (!temp) + temp = control->value; /* already leafname */ + else + temp += 1; + + leaflen = strlen(temp); + + leafname = malloc(leaflen + 1); + if (!leafname) { + LOG(("malloc failed")); + continue; + } + memcpy(leafname, temp, leaflen + 1); + + /* and s/\//\./g */ + for (temp = leafname; *temp; temp++) + if (*temp == '/') + *temp = '.'; +#else + leafname = strrchr(control->value, '/') ; + if (!leafname) + leafname = control->value; + else + leafname += 1; +#endif + /* We have to special case filenames of "", so curl + * a) actually attempts the fetch and + * b) doesn't attempt to open the file "" + */ + if (control->value[0] == '\0') { + /* dummy buffer - needs to be static so + * pointer's still valid when we go out + * of scope (not that libcurl should be + * attempting to access it, of course). */ + static char buf; + + code = curl_formadd(&post, &last, + CURLFORM_COPYNAME, control->name, + CURLFORM_BUFFER, control->value, + /* needed, as basename("") == "." */ + CURLFORM_FILENAME, "", + CURLFORM_BUFFERPTR, &buf, + CURLFORM_BUFFERLENGTH, 0, + CURLFORM_CONTENTTYPE, + "application/octet-stream", + CURLFORM_END); + if (code != CURL_FORMADD_OK) + LOG(("curl_formadd: %d (%s)", + code, control->name)); + } else { + char *mimetype = fetch_mimetype(control->value); + code = curl_formadd(&post, &last, + CURLFORM_COPYNAME, control->name, + CURLFORM_FILE, control->value, + CURLFORM_FILENAME, leafname, + CURLFORM_CONTENTTYPE, + (mimetype != 0 ? mimetype : "text/plain"), + CURLFORM_END); + if (code != CURL_FORMADD_OK) + LOG(("curl_formadd: %d (%s=%s)", + code, control->name, + control->value)); + free(mimetype); + } +#ifdef riscos + free(leafname); +#endif + } + else { + code = curl_formadd(&post, &last, + CURLFORM_COPYNAME, control->name, + CURLFORM_COPYCONTENTS, control->value, + CURLFORM_END); + if (code != CURL_FORMADD_OK) + LOG(("curl_formadd: %d (%s=%s)", code, + control->name, + control->value)); + } + } + + return post; +} + + +#ifdef WITH_SSL +/** + * OpenSSL Certificate verification callback + * Stores certificate details in fetch struct. + */ + +int fetch_curl_verify_callback(int preverify_ok, X509_STORE_CTX *x509_ctx) +{ + X509 *cert = X509_STORE_CTX_get_current_cert(x509_ctx); + int depth = X509_STORE_CTX_get_error_depth(x509_ctx); + int err = X509_STORE_CTX_get_error(x509_ctx); + struct curl_fetch_info *f = X509_STORE_CTX_get_app_data(x509_ctx); + + /* save the certificate by incrementing the reference count and + * keeping a pointer */ + if (depth < MAX_CERTS && !f->cert_data[depth].cert) { + f->cert_data[depth].cert = cert; + f->cert_data[depth].err = err; + cert->references++; + } + + return preverify_ok; +} + + +/** + * OpenSSL certificate chain verification callback + * Verifies certificate chain, setting up context for fetch_curl_verify_callback + */ + +int fetch_curl_cert_verify_callback(X509_STORE_CTX *x509_ctx, void *parm) +{ + int ok; + + /* Store fetch struct in context for verify callback */ + ok = X509_STORE_CTX_set_app_data(x509_ctx, parm); + + /* and verify the certificate chain */ + if (ok) + ok = X509_verify_cert(x509_ctx); + + return ok; +} +#endif + +static int curl_fetchers_registered = 0; +/** Initialise a cURL fetcher */ +static bool +fetch_curl_initialise(const char *scheme) +{ + LOG(("Initialise cURL fetcher for %s", scheme)); + curl_fetchers_registered++; + return true; /* Always succeeds */ +} + +/** Finalise a cURL fetcher */ +static void +fetch_curl_finalise(const char *scheme) +{ + curl_fetchers_registered--; + LOG(("Finalise cURL fetcher %s", scheme)); + if (curl_fetchers_registered == 0) { + /* All the fetchers have been finalised. */ + LOG(("All cURL fetchers finalised, closing down cURL")); + CURLMcode codem; + + curl_easy_cleanup(fetch_blank_curl); + + codem = curl_multi_cleanup(fetch_curl_multi); + if (codem != CURLM_OK) + LOG(("curl_multi_cleanup failed: ignoring")); + + curl_global_cleanup(); + } +} + +/** + * Initialise the fetcher. + * + * Must be called once before any other function. + */ + +void register_curl_fetchers(void) +{ + CURLcode code; + curl_version_info_data *data; + int i; + + LOG(("curl_version %s", curl_version())); + + code = curl_global_init(CURL_GLOBAL_ALL); + if (code != CURLE_OK) + die("Failed to initialise the fetch module " + "(curl_global_init failed)."); + + fetch_curl_multi = curl_multi_init(); + if (!fetch_curl_multi) + die("Failed to initialise the fetch module " + "(curl_multi_init failed)."); + + /* Create a curl easy handle with the options that are common to all + fetches. */ + fetch_blank_curl = curl_easy_init(); + if (!fetch_blank_curl) + die("Failed to initialise the fetch module " + "(curl_easy_init failed)."); + +#undef SETOPT +#define SETOPT(option, value) \ + code = curl_easy_setopt(fetch_blank_curl, option, value); \ + if (code != CURLE_OK) \ + goto curl_easy_setopt_failed; + + if (verbose_log) { + SETOPT(CURLOPT_VERBOSE, 1); + } else { + SETOPT(CURLOPT_VERBOSE, 0); + } + SETOPT(CURLOPT_ERRORBUFFER, fetch_error_buffer); + SETOPT(CURLOPT_WRITEFUNCTION, fetch_curl_data); + SETOPT(CURLOPT_HEADERFUNCTION, fetch_curl_header); + SETOPT(CURLOPT_PROGRESSFUNCTION, fetch_curl_progress); + SETOPT(CURLOPT_NOPROGRESS, 0); + SETOPT(CURLOPT_USERAGENT, user_agent_string()); + SETOPT(CURLOPT_ENCODING, "gzip"); + SETOPT(CURLOPT_LOW_SPEED_LIMIT, 1L); + SETOPT(CURLOPT_LOW_SPEED_TIME, 180L); + SETOPT(CURLOPT_NOSIGNAL, 1L); + SETOPT(CURLOPT_CONNECTTIMEOUT, 30L); + + if (option_ca_bundle) + SETOPT(CURLOPT_CAINFO, option_ca_bundle); + + /* cURL initialised okay, register the fetchers */ + + data = curl_version_info(CURLVERSION_NOW); + + for (i = 0; data->protocols[i]; i++) + if (!fetch_add_fetcher(data->protocols[i], + fetch_curl_initialise, + fetch_curl_setup, + fetch_curl_start, + fetch_curl_abort, + fetch_curl_free, + fetch_curl_poll, + fetch_curl_finalise)) { + LOG(("Unable to register cURL fetcher for %s", data->protocols[i])); + } + return; + +curl_easy_setopt_failed: + die("Failed to initialise the fetch module " + "(curl_easy_setopt failed)."); +} diff --git a/content/fetchers/fetch_curl.h b/content/fetchers/fetch_curl.h new file mode 100644 index 000000000..6dcba8914 --- /dev/null +++ b/content/fetchers/fetch_curl.h @@ -0,0 +1,22 @@ +/* + * This file is part of NetSurf, http://netsurf-browser.org/ + * Licensed under the GNU General Public License, + * http://www.opensource.org/licenses/gpl-license + * Copyright 2007 Daniel Silverstone + */ + +/** \file + * Fetching of data from a URL (Registration). + */ + +#ifndef NETSURF_CONTENT_FETCHERS_FETCH_CURL_H +#define NETSURF_CONTENT_FETCHERS_FETCH_CURL_H + +#include + +void register_curl_fetchers(void); + +/** Global cURL multi handle. */ +extern CURLM *fetch_curl_multi; + +#endif -- cgit v1.2.3