From ae992eab4d58f35465976ddb7603f37573370b84 Mon Sep 17 00:00:00 2001 From: James Bursa Date: Fri, 30 May 2008 04:11:16 +0000 Subject: Add new fetch callback FETCH_HEADER for headers and move as much header parsing as possible from fetch_curl.c to fetchcache.c. This simplifies fetch_curl.c and will make it possible to store response headers in future. svn path=/trunk/netsurf/; revision=4226 --- content/fetch.h | 1 + content/fetchcache.c | 106 ++++++++++++++++++++++++++++++++++++-- content/fetchers/fetch_curl.c | 115 ++++-------------------------------------- 3 files changed, 113 insertions(+), 109 deletions(-) diff --git a/content/fetch.h b/content/fetch.h index da846c607..4306e65da 100644 --- a/content/fetch.h +++ b/content/fetch.h @@ -30,6 +30,7 @@ typedef enum { FETCH_TYPE, FETCH_PROGRESS, + FETCH_HEADER, FETCH_DATA, FETCH_FINISHED, FETCH_ERROR, diff --git a/content/fetchcache.c b/content/fetchcache.c index d7e180b89..adfc9fac8 100644 --- a/content/fetchcache.c +++ b/content/fetchcache.c @@ -31,6 +31,7 @@ #include #include #include +#include /* for curl_getdate() */ #include "utils/config.h" #include "content/content.h" #include "content/fetchcache.h" @@ -47,6 +48,8 @@ static regex_t re_content_type; static void fetchcache_callback(fetch_msg msg, void *p, const void *data, unsigned long size); static char *fetchcache_parse_type(const char *s, char **params[]); +static void fetchcache_parse_header(struct content *c, const char *data, + size_t size); static void fetchcache_error_page(struct content *c, const char *error); static void fetchcache_cache_update(struct content *c, const struct cache_data *data); @@ -445,6 +448,12 @@ void fetchcache_callback(fetch_msg msg, void *p, const void *data, content_broadcast(c, CONTENT_MSG_STATUS, msg_data); break; + case FETCH_HEADER: + LOG(("FETCH_HEADER \"%.*s\"", + (int) size, (char *) data)); + fetchcache_parse_header(c, data, size); + break; + case FETCH_DATA: if (!content_process_data(c, data, size)) { fetch_abort(c->fetch); @@ -453,8 +462,6 @@ void fetchcache_callback(fetch_msg msg, void *p, const void *data, break; case FETCH_FINISHED: - fetchcache_cache_update(c, - (const struct cache_data *)data); c->fetch = 0; content_set_status(c, messages_get("Converting"), c->source_size); @@ -604,6 +611,96 @@ no_memory: } +/** + * Parse an HTTP response header. + * + * See RFC 2616 4.2. + */ + +void fetchcache_parse_header(struct content *c, const char *data, + size_t size) +{ + size_t i; + +#define SKIP_ST(o) for (i = (o); i < size && (data[i] == ' ' || data[i] == '\t'); i++) + + /* Set fetch response time if not already set */ + if (c->cache_data->res_time == 0) + c->cache_data->res_time = time(0); + + if (5 < size && strncasecmp(data, "Date:", 5) == 0) { + /* extract Date header */ + SKIP_ST(5); + if (i < size) + c->cache_data->date = curl_getdate(&data[i], NULL); + } else if (4 < size && strncasecmp(data, "Age:", 4) == 0) { + /* extract Age header */ + SKIP_ST(4); + if (i < size && '0' <= data[i] && data[i] <= '9') + c->cache_data->age = atoi(data + i); + } else if (8 < size && strncasecmp(data, "Expires:", 8) == 0) { + /* extract Expires header */ + SKIP_ST(8); + if (i < size) + c->cache_data->expires = curl_getdate(&data[i], NULL); + } else if (14 < size && strncasecmp(data, "Cache-Control:", 14) == 0) { + /* extract and parse Cache-Control header */ + size_t comma; + SKIP_ST(14); + + while (i < size) { + for (comma = i; comma < size; comma++) + if (data[comma] == ',') + break; + + SKIP_ST(i); + + if (8 < comma - i && (strncasecmp(data + i, "no-cache", 8) == 0 || strncasecmp(data + i, "no-store", 8) == 0)) + /* When we get a disk cache we should + * distinguish between these two */ + c->cache_data->no_cache = true; + else if (7 < comma - i && strncasecmp(data + i, "max-age", 7) == 0) { + for (; i < comma; i++) + if (data[i] == '=') + break; + SKIP_ST(i+1); + if (i < comma) + c->cache_data->max_age = + atoi(data + i); + } + + i = comma + 1; + } + } else if (5 < size && strncasecmp(data, "ETag:", 5) == 0) { + /* extract ETag header */ + free(c->cache_data->etag); + c->cache_data->etag = talloc_array(c, char, size); + if (!c->cache_data->etag) { + LOG(("malloc failed")); + return; + } + SKIP_ST(5); + strncpy(c->cache_data->etag, data + i, size - i); + c->cache_data->etag[size - i] = '\0'; + for (i = size - i - 1; i >= 0 && + (c->cache_data->etag[i] == ' ' || + c->cache_data->etag[i] == '\t' || + c->cache_data->etag[i] == '\r' || + c->cache_data->etag[i] == '\n'); --i) + c->cache_data->etag[i] = '\0'; + } else if (14 < size && strncasecmp(data, "Last-Modified:", 14) == 0) { + /* extract Last-Modified header */ + SKIP_ST(14); + if (i < size) { + c->cache_data->last_modified = + curl_getdate(&data[i], NULL); + } + } + + return; +} + + /** * Generate an error page. * @@ -682,7 +779,7 @@ void fetchcache_notmodified(struct content *c, const void *data) struct content *fb; union content_msg_data msg_data; - assert(c && data); + assert(c); assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN); /* Look for cached content */ @@ -748,8 +845,7 @@ void fetchcache_notmodified(struct content *c, const void *data) c->status = CONTENT_STATUS_ERROR; /* and update fallback's cache control data */ - fetchcache_cache_update(fb, - (const struct cache_data *)data); + fetchcache_cache_update(fb, c->cache_data); } else { /* No cached content, so unconditionally refetch */ diff --git a/content/fetchers/fetch_curl.c b/content/fetchers/fetch_curl.c index 68f35282e..7c2ca5c8d 100644 --- a/content/fetchers/fetch_curl.c +++ b/content/fetchers/fetch_curl.c @@ -24,7 +24,7 @@ * This implementation uses libcurl's 'multi' interface. * * - * The CURL handles are cached in the cache_ring. There are at most + * The CURL handles are cached in the curl_handle_ring. There are at most * ::option_max_cached_fetch_handles in this ring. */ @@ -81,7 +81,6 @@ struct curl_fetch_info { char *post_urlenc; /**< Url encoded POST string, or 0. */ unsigned long http_code; /**< HTTP result code from cURL. */ struct curl_httppost *post_multipart; /**< Multipart post data, or 0. */ - struct cache_data cachedata; /**< Cache control data */ time_t last_modified; /**< If-Modified-Since time */ time_t file_etag; /**< ETag for local objects */ #ifdef WITH_SSL @@ -340,15 +339,6 @@ void * fetch_curl_setup(struct fetch *parent_fetch, const char *url, fetch->post_urlenc = strdup(post_urlenc); else if (post_multipart) fetch->post_multipart = fetch_curl_post_convert(post_multipart); - fetch->cachedata.req_time = time(0); - fetch->cachedata.res_time = 0; - fetch->cachedata.date = 0; - fetch->cachedata.expires = 0; - fetch->cachedata.age = INVALID_AGE; - fetch->cachedata.max_age = INVALID_AGE; - fetch->cachedata.no_cache = false; - fetch->cachedata.etag = 0; - fetch->cachedata.last_modified = 0; fetch->last_modified = 0; fetch->file_etag = 0; fetch->http_code = 0; @@ -700,7 +690,6 @@ void fetch_curl_free(void *vf) free(f->post_urlenc); if (f->post_multipart) curl_formfree(f->post_multipart); - free(f->cachedata.etag); #ifdef WITH_SSL for (i = 0; i < MAX_CERTS && f->cert_data[i].cert; i++) { @@ -764,7 +753,6 @@ void fetch_curl_done(CURL *curl_handle, CURLcode result) bool abort; struct curl_fetch_info *f; CURLcode code; - struct cache_data cachedata; #ifdef WITH_SSL struct cert_info certs[MAX_CERTS]; memset(certs, 0, sizeof(certs)); @@ -810,19 +798,10 @@ void fetch_curl_done(CURL *curl_handle, CURLcode result) fetch_curl_stop(f); - /* If finished, acquire cache info to pass to callback */ - if (finished) { - memcpy(&cachedata, &f->cachedata, sizeof(struct cache_data)); - f->cachedata.etag = 0; - } - if (abort) ; /* fetch was aborted: no callback */ - else if (finished) { - fetch_send_callback(FETCH_FINISHED, f->fetch_handle, - &cachedata, 0); - free(cachedata.etag); - } + else if (finished) + fetch_send_callback(FETCH_FINISHED, f->fetch_handle, 0, 0); #ifdef WITH_SSL else if (cert) { int i; @@ -1009,6 +988,8 @@ size_t fetch_curl_data(void *data, size_t size, size_t nmemb, /** * Callback function for headers. + * + * See RFC 2616 4.2. */ size_t fetch_curl_header(char *data, size_t size, size_t nmemb, @@ -1017,11 +998,9 @@ size_t fetch_curl_header(char *data, size_t size, size_t nmemb, int i; size *= nmemb; -#define SKIP_ST(o) for (i = (o); i < (int) size && (data[i] == ' ' || data[i] == '\t'); i++) + fetch_send_callback(FETCH_HEADER, f->fetch_handle, data, size); - /* Set fetch response time if not already set */ - if (f->cachedata.res_time == 0) - f->cachedata.res_time = time(0); +#define SKIP_ST(o) for (i = (o); i < (int) size && (data[i] == ' ' || data[i] == '\t'); i++) if (12 < size && strncasecmp(data, "Location:", 9) == 0) { /* extract Location header */ @@ -1075,73 +1054,6 @@ size_t fetch_curl_header(char *data, size_t size, size_t nmemb, f->realm[i] = '\0'; } #endif - } else if (5 < size && strncasecmp(data, "Date:", 5) == 0) { - /* extract Date header */ - SKIP_ST(5); - if (i < (int) size) - f->cachedata.date = curl_getdate(&data[i], NULL); - } else if (4 < size && strncasecmp(data, "Age:", 4) == 0) { - /* extract Age header */ - SKIP_ST(4); - if (i < (int) size && '0' <= data[i] && data[i] <= '9') - f->cachedata.age = atoi(data + i); - } else if (8 < size && strncasecmp(data, "Expires:", 8) == 0) { - /* extract Expires header */ - SKIP_ST(8); - if (i < (int) size) - f->cachedata.expires = curl_getdate(&data[i], NULL); - } else if (14 < size && strncasecmp(data, "Cache-Control:", 14) == 0) { - /* extract and parse Cache-Control header */ - int comma; - SKIP_ST(14); - - while (i < (int) size) { - for (comma = i; comma < (int) size; comma++) - if (data[comma] == ',') - break; - - SKIP_ST(i); - - if (8 < comma - i && (strncasecmp(data + i, "no-cache", 8) == 0 || strncasecmp(data + i, "no-store", 8) == 0)) - /* When we get a disk cache we should - * distinguish between these two */ - f->cachedata.no_cache = true; - else if (7 < comma - i && strncasecmp(data + i, "max-age", 7) == 0) { - for (; i < comma; i++) - if (data[i] == '=') - break; - SKIP_ST(i+1); - if (i < comma) - f->cachedata.max_age = - atoi(data + i); - } - - i = comma + 1; - } - } else if (5 < size && strncasecmp(data, "ETag:", 5) == 0) { - /* extract ETag header */ - free(f->cachedata.etag); - f->cachedata.etag = malloc(size); - if (!f->cachedata.etag) { - LOG(("malloc failed")); - return size; - } - SKIP_ST(5); - strncpy(f->cachedata.etag, data + i, size - i); - f->cachedata.etag[size - i] = '\0'; - for (i = size - i - 1; i >= 0 && - (f->cachedata.etag[i] == ' ' || - f->cachedata.etag[i] == '\t' || - f->cachedata.etag[i] == '\r' || - f->cachedata.etag[i] == '\n'); --i) - f->cachedata.etag[i] = '\0'; - } else if (14 < size && strncasecmp(data, "Last-Modified:", 14) == 0) { - /* extract Last-Modified header */ - SKIP_ST(14); - if (i < (int) size) { - f->cachedata.last_modified = - curl_getdate(&data[i], NULL); - } } else if (11 < size && strncasecmp(data, "Set-Cookie:", 11) == 0) { /* extract Set-Cookie header */ SKIP_ST(11); @@ -1170,10 +1082,6 @@ bool fetch_curl_process_headers(struct curl_fetch_info *f) f->had_headers = true; - /* Set fetch response time if not already set */ - if (f->cachedata.res_time == 0) - f->cachedata.res_time = time(0); - if (!f->http_code) { code = curl_easy_getinfo(f->curl_handle, CURLINFO_HTTP_CODE, @@ -1186,8 +1094,7 @@ bool fetch_curl_process_headers(struct curl_fetch_info *f) if (http_code == 304 && !f->post_urlenc && !f->post_multipart) { /* Not Modified && GET request */ - fetch_send_callback(FETCH_NOTMODIFIED, f->fetch_handle, - (const char *)&f->cachedata, 0); + fetch_send_callback(FETCH_NOTMODIFIED, f->fetch_handle, 0, 0); return true; } @@ -1225,11 +1132,11 @@ bool fetch_curl_process_headers(struct curl_fetch_info *f) if (url_path && stat(url_path, &s) == 0) { /* file: URL and file exists */ /* create etag */ - free(f->cachedata.etag); + /*free(f->cachedata.etag); f->cachedata.etag = malloc(13); if (f->cachedata.etag) sprintf(f->cachedata.etag, - "\"%10d\"", (int)s.st_mtime); + "\"%10d\"", (int)s.st_mtime);*/ /* don't set last modified time so as to ensure that local * files are revalidated at all times. */ @@ -1239,7 +1146,7 @@ bool fetch_curl_process_headers(struct curl_fetch_info *f) f->last_modified > s.st_mtime && f->file_etag == s.st_mtime) { fetch_send_callback(FETCH_NOTMODIFIED, f->fetch_handle, - (const char *)&f->cachedata, 0); + 0, 0); curl_free(url_path); return true; } -- cgit v1.2.3