From 05a4eceae270ac1f51c2d8e8ee9ca123b52e541f Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Sun, 28 Mar 2010 15:34:07 +0000 Subject: Remove fetchcache.[ch] svn path=/trunk/netsurf/; revision=10183 --- content/fetchcache.c | 1328 -------------------------------------------------- 1 file changed, 1328 deletions(-) delete mode 100644 content/fetchcache.c (limited to 'content/fetchcache.c') diff --git a/content/fetchcache.c b/content/fetchcache.c deleted file mode 100644 index 3a0b667f9..000000000 --- a/content/fetchcache.c +++ /dev/null @@ -1,1328 +0,0 @@ -/* - * Copyright 2005 James Bursa - * Copyright 2009 John-Mark Bell - * - * This file is part of NetSurf, http://www.netsurf-browser.org/ - * - * NetSurf is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; version 2 of the License. - * - * NetSurf is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program. If not, see . - */ - -/** \file - * High-level fetching, caching and conversion (implementation). - * - * The implementation checks the cache for the requested URL. If it is not - * present, a content is created and a fetch is initiated. As the status of the - * fetch changes and data is received, the content is updated appropriately. - */ - -#define _GNU_SOURCE /* for strndup */ -#include -#include -#include -#include -#include -#include -#include -#include /* for curl_getdate() */ -#include "utils/config.h" -#include "content/content.h" -#include "content/fetchcache.h" -#include "content/fetch.h" -#include "desktop/options.h" -#include "desktop/searchweb.h" -#include "content/urldb.h" -#include "utils/log.h" -#include "utils/messages.h" -#include "utils/talloc.h" -#include "utils/url.h" -#include "utils/utils.h" - - -static char error_page[1000]; -static regex_t re_content_type; -static void fetchcache_callback(fetch_msg msg, void *p, const void *data, - unsigned long size, fetch_error_code errorcode); -static char *fetchcache_parse_type(const char *s, char **params[]); -static void fetchcache_parse_header(struct content *c, const char *data, - size_t size); -static void fetchcache_error_page(struct content *c, const char *error, - fetch_error_code errorcode); -static void fetchcache_search_redirect(struct content *c, const char *error); -static void fetchcache_cache_update(struct content *c); -static void fetchcache_cache_clone(struct content *c, - const struct cache_data *data); -static void fetchcache_notmodified(struct content *c, const void *data); -static void fetchcache_redirect(struct content *c, const void *data, - unsigned long size); -static void fetchcache_redirect_common(struct content *c, bool verifiable, - const char *url, const char *referer, - struct content *parent); - -static void fetchcache_auth(struct content *c, const char *realm); - - -/** - * Retrieve a URL or prepare to fetch, convert, and cache it. - * - * The caller must supply a callback function which is called when anything - * interesting happens to the content which is returned. See content.h. - * - * \param url address to fetch - * \param callback function to call when anything interesting happens to - * the new content - * \param p1 user parameter for callback (may be a pointer or integer) - * \param p2 user parameter for callback (may be a pointer or integer) - * \param width available space - * \param height available space - * \param no_error_pages if an error occurs, send CONTENT_MSG_ERROR instead - * of generating an error page - * \param post_urlenc url encoded post data, or 0 if none - * \param post_multipart multipart post data, or 0 if none - * \param verifiable this transaction is verifiable - * \param download download, rather than render content - * \return a new content, or 0 on memory exhaustion - * - * On success, call fetchcache_go() to start work on the new content. - */ - -struct content * fetchcache(const char *url, - void (*callback)(content_msg msg, struct content *c, - intptr_t p1, intptr_t p2, union content_msg_data data), - intptr_t p1, intptr_t p2, - int width, int height, - bool no_error_pages, - char *post_urlenc, - struct fetch_multipart_data *post_multipart, - bool verifiable, - bool download) -{ - struct content *c; - char *url1; - char *hash, *query; - char *etag = 0; - time_t date = 0; - - if (strncasecmp(url, "file:///", 8) && - strncasecmp(url, "file:/", 6) == 0) { - /* Manipulate file URLs into correct format */ - int len = strlen(url) + 1; - - if (strncasecmp(url, "file://", SLEN("file://")) == 0) { - /* file://path */ - url1 = malloc(len + 1 /* + '/' */); - if (!url1) - return NULL; - - memcpy(url1, "file:///", SLEN("file:///")); - memcpy(url1 + SLEN("file:///"), - url + SLEN("file://"), - len - SLEN("file://")); - } else { - /* file:/... */ - url1 = malloc(len + 2 /* + "//" */); - if (!url1) - return NULL; - - memcpy(url1, "file:///", SLEN("file:///")); - memcpy(url1 + SLEN("file:///"), - url + SLEN("file:/"), - len - SLEN("file:/")); - } - } else { - /* simply duplicate the URL */ - if ((url1 = strdup(url)) == NULL) - return NULL; - } - - /* strip fragment identifier */ - if ((hash = strchr(url1, '#')) != NULL) - *hash = 0; - - /* look for query; we don't cache URLs with a query segment */ - query = strchr(url1, '?'); - - LOG(("url %s", url1)); - - if (!post_urlenc && !post_multipart && !download && !query) { - if ((c = content_get(url1)) != NULL) { - struct cache_data *cd = &c->cache_data; - int current_age, freshness_lifetime; - - /* Calculate staleness of cached content as per - * RFC 2616 13.2.3/13.2.4 */ - current_age = max(0, (cd->res_time - cd->date)); - current_age = max(current_age, - (cd->age == INVALID_AGE) ? 0 - : cd->age); - current_age += cd->res_time - cd->req_time + - time(0) - cd->res_time; - freshness_lifetime = - (cd->max_age != INVALID_AGE) ? cd->max_age : - (cd->expires != 0) ? cd->expires - cd->date : - (cd->last_modified != 0) ? - (time(0) - cd->last_modified) / 10 : - 0; - - if (freshness_lifetime > current_age || - cd->date == 0) { - /* Ok, either a fresh content or we're - * currently fetching the selected content - * (therefore it must be fresh) */ - free(url1); - if (!content_add_user(c, callback, p1, p2)) - return NULL; - else - return c; - } - - /* Ok. We have a cache entry, but it appears stale. - * Therefore, validate it. */ - if (cd->last_modified) - date = cd->last_modified; - else - date = c->cache_data.date; - etag = c->cache_data.etag; - } - } - - c = content_create(url1); - free(url1); - if (!c) - return NULL; - - /* Fill in cache validation fields (if present) */ - if (date) - c->cache_data.date = date; - if (etag) { - c->cache_data.etag = talloc_strdup(c, etag); - if (!c->cache_data.etag) - return NULL; - } - - if (!content_add_user(c, callback, p1, p2)) { - return NULL; - } - - if (!post_urlenc && !post_multipart && !download && !query) - c->fresh = true; - - c->width = width; - c->height = height; - c->no_error_pages = no_error_pages; - c->download = download; - - return c; -} - - -/** - * Start fetching and converting a content. - * - * \param content content to fetch, as returned by fetchcache() - * \param referer referring URL, or 0 - * \param callback function to call when anything interesting happens to - * the new content - * \param p1 user parameter for callback - * \param p2 user parameter for callback - * \param width available space - * \param height available space - * \param post_urlenc url encoded post data, or 0 if none - * \param post_multipart multipart post data, or 0 if none - * \param verifiable this transaction is verifiable - * \param parent Content which spawned this one, or NULL if none - * - * Errors will be sent back through the callback. - */ - -void fetchcache_go(struct content *content, const char *referer, - void (*callback)(content_msg msg, struct content *c, - intptr_t p1, intptr_t p2, union content_msg_data data), - intptr_t p1, intptr_t p2, - int width, int height, - char *post_urlenc, - struct fetch_multipart_data *post_multipart, - bool verifiable, struct content *parent) -{ - char error_message[500]; - union content_msg_data msg_data; - - LOG(("url %s, status %s", content->url, - content_status_name[content->status])); - - /* We may well have been asked to fetch an URL using a protocol - * that we can't support. Check for this here and, if we can't - * perform the fetch, notify the caller and exit */ - if (!fetch_can_fetch(content->url)) { - - /* The only case where this should fail is if we're a - * brand new content with no active fetch. If we're not, - * another content with the same URL somehow got through - * the fetch_can_fetch check. That should be impossible. - */ - assert(content->status == CONTENT_STATUS_TYPE_UNKNOWN && - !content->fetch); - - snprintf(error_message, sizeof error_message, - messages_get("InvalidURL"), - content->url); - - if (content->no_error_pages) { - /* Mark as in error so content is destroyed - * on cache clean */ - content->status = CONTENT_STATUS_ERROR; - msg_data.error = error_message; - callback(CONTENT_MSG_ERROR, - content, p1, p2, msg_data); - } else { - fetchcache_error_page(content, error_message, - FETCH_ERROR_NO_ERROR); - } - - return; - } - - if (content->status == CONTENT_STATUS_TYPE_UNKNOWN && - content->fetch) { - /* fetching, but not yet received any response: - * no action required */ - - } else if (content->status == CONTENT_STATUS_TYPE_UNKNOWN) { - /* brand new content: start fetch */ - char **headers; - int i = 0; - char *etag = content->cache_data.etag; - time_t date = content->cache_data.date; - - content->cache_data.req_time = time(NULL); - content->cache_data.res_time = 0; - content->cache_data.date = 0; - content->cache_data.expires = 0; - content->cache_data.age = INVALID_AGE; - content->cache_data.max_age = INVALID_AGE; - content->cache_data.no_cache = false; - content->cache_data.etag = 0; - content->cache_data.last_modified = 0; - - headers = malloc(3 * sizeof(char *)); - if (!headers) { - content->status = CONTENT_STATUS_ERROR; - msg_data.error = messages_get("NoMemory"); - callback(CONTENT_MSG_ERROR, content, p1, p2, - msg_data); - return; - } - if (etag) { - int len = SLEN("If-None-Match: ") + strlen(etag) + 1; - - headers[i] = malloc(len); - if (!headers[i]) { - free(headers); - content->status = CONTENT_STATUS_ERROR; - msg_data.error = messages_get("NoMemory"); - callback(CONTENT_MSG_ERROR, content, p1, p2, - msg_data); - return; - } - snprintf(headers[i++], len, "If-None-Match: %s", etag); - talloc_free(etag); - } - if (date) { - /* Maximum length of an RFC 1123 date is 29 bytes */ - int len = SLEN("If-Modified-Since: ") + 29 + 1; - - headers[i] = malloc(len); - if (!headers[i]) { - while (--i >= 0) { - free(headers[i]); - } - free(headers); - content->status = CONTENT_STATUS_ERROR; - msg_data.error = messages_get("NoMemory"); - callback(CONTENT_MSG_ERROR, content, p1, p2, - msg_data); - return; - } - snprintf(headers[i++], len, "If-Modified-Since: %s", - rfc1123_date(date)); - } - headers[i] = 0; - content->fetch = fetch_start(content->url, referer, - fetchcache_callback, content, - content->no_error_pages, - post_urlenc, post_multipart, verifiable, - parent, headers); - for (i = 0; headers[i]; i++) - free(headers[i]); - free(headers); - if (!content->fetch) { - LOG(("warning: fetch_start failed")); - snprintf(error_message, sizeof error_message, - messages_get("InvalidURL"), - content->url); - if (content->no_error_pages) { - content->status = CONTENT_STATUS_ERROR; - msg_data.error = error_message; - content_broadcast(content, CONTENT_MSG_ERROR, - msg_data); - } else { - fetchcache_error_page(content, error_message, - FETCH_ERROR_NO_ERROR); - } - } - - /* in these remaining cases, we have to 'catch up' with the content's - * status, ie. send the same messages as if the content was - * gradually getting to the current status from TYPE_UNKNOWN */ - } else if (content->status == CONTENT_STATUS_LOADING) { - callback(CONTENT_MSG_LOADING, content, p1, p2, msg_data); - - } else if (content->status == CONTENT_STATUS_READY) { - callback(CONTENT_MSG_LOADING, content, p1, p2, msg_data); - if (content_find_user(content, callback, p1, p2)) - callback(CONTENT_MSG_READY, content, p1, p2, msg_data); - - } else if (content->status == CONTENT_STATUS_DONE) { - callback(CONTENT_MSG_LOADING, content, p1, p2, msg_data); - if (content->available_width != width) - content_reformat(content, width, height); - if (content_find_user(content, callback, p1, p2)) - callback(CONTENT_MSG_READY, content, p1, p2, msg_data); - if (content_find_user(content, callback, p1, p2)) - callback(CONTENT_MSG_DONE, content, p1, p2, msg_data); - - } else if (content->status == CONTENT_STATUS_ERROR) { - /* shouldn't usually occur */ - msg_data.error = messages_get("MiscError"); - callback(CONTENT_MSG_ERROR, content, p1, p2, msg_data); - } -} - - -/** - * Callback function for fetch. - * - * This is called when the status of a fetch changes. - */ - -void fetchcache_callback(fetch_msg msg, void *p, const void *data, - unsigned long size, fetch_error_code errorcode) -{ - bool res; - struct content *c = p; - content_type type; - char *mime_type; - char **params; - struct content *parent; - unsigned int i; - union content_msg_data msg_data; - - switch (msg) { - case FETCH_TYPE: - c->total_size = size; - c->http_code = fetch_http_code(c->fetch); - mime_type = fetchcache_parse_type(data, ¶ms); - if (!mime_type) { - msg_data.error = messages_get("NoMemory"); - content_broadcast(c, CONTENT_MSG_ERROR, - msg_data); - fetch_abort(c->fetch); - c->fetch = 0; - return; - } - type = content_lookup(mime_type); - parent = fetch_get_parent(c->fetch); - res = content_set_type(c, - c->download ? CONTENT_OTHER : type, - mime_type, (const char **) params, - parent); - free(mime_type); - for (i = 0; params[i]; i++) - free(params[i]); - free(params); - if (!res) { - fetch_abort(c->fetch); - c->fetch = 0; - } - - if (c->cache_data.date || c->cache_data.etag) { - /* We've just made a conditional request - * that returned with something other - * than 304. Therefore, there's a stale - * content floating around in the cache. - * Hunt it down and mark it as stale, so - * it'll get cleaned when unused. We - * assume it's either READY or DONE -- - * anything else is of marginal staleness - * (or in error, which will cause it to - * be flushed from the cache, anyway) - */ - struct content *stale_content = - content_get_ready(c->url); - - if (stale_content) - stale_content->fresh = false; - } - break; - - case FETCH_PROGRESS: - if (size) - content_set_status(c, - messages_get("RecPercent"), - data, (unsigned int)size); - else - content_set_status(c, - messages_get("Received"), - data); - content_broadcast(c, CONTENT_MSG_STATUS, msg_data); - break; - - case FETCH_HEADER: - fetchcache_parse_header(c, data, size); - break; - - case FETCH_DATA: - if (!content_process_data(c, data, size)) { - fetch_abort(c->fetch); - c->fetch = 0; - } - break; - - case FETCH_FINISHED: - fetchcache_cache_update(c); - c->fetch = 0; - content_set_status(c, messages_get("Converting"), - c->source_size); - content_broadcast(c, CONTENT_MSG_STATUS, msg_data); - content_convert(c, c->width, c->height); - break; - - case FETCH_ERROR: - LOG(("FETCH_ERROR, '%s'", (const char *)data)); - c->fetch = 0; - if (c->no_error_pages) { - c->status = CONTENT_STATUS_ERROR; - msg_data.error = data; - content_broadcast(c, CONTENT_MSG_ERROR, - msg_data); - } else { - content_reset(c); - fetchcache_error_page(c, data, errorcode); - } - break; - - case FETCH_REDIRECT: - fetchcache_redirect(c, data, size); - break; - - case FETCH_NOTMODIFIED: - fetchcache_notmodified(c, data); - break; - - case FETCH_AUTH: - fetchcache_auth(c, data); - break; - - case FETCH_CERT_ERR: - c->fetch = 0; - /* set the status to ERROR so that the content is - * destroyed in content_clean() */ - c->status = CONTENT_STATUS_ERROR; - - msg_data.ssl.certs = data; - msg_data.ssl.num = size; - content_broadcast(c, CONTENT_MSG_SSL, msg_data); - break; - - default: - assert(0); - } -} - - -/** - * Initialise the fetchcache module. - */ - -void fetchcache_init(void) -{ - regcomp_wrapper(&re_content_type, - "^([-0-9a-zA-Z_.]+/[-0-9a-zA-Z_.+]+)[ \t]*" - "(;[ \t]*([-0-9a-zA-Z_.]+)=" - "([-0-9a-zA-Z_.]+|\"([^\"]|[\\].)*\")[ \t]*)*$", - REG_EXTENDED); -} - - -/** - * Parse a Content-Type header. - * - * \param s a Content-Type header - * \param params updated to point to an array of strings, ordered attribute, - * value, attribute, ..., 0 - * \return a new string containing the MIME-type, or 0 on memory exhaustion - */ - -#define MAX_ATTRS 10 - -char *fetchcache_parse_type(const char *s, char **params[]) -{ - char *type = 0; - unsigned int i; - int r; - regmatch_t pmatch[2 + MAX_ATTRS * 3]; - - *params = malloc((MAX_ATTRS * 2 + 2) * sizeof (*params)[0]); - if (!*params) - goto no_memory; - for (i = 0; i != MAX_ATTRS * 2 + 2; i++) - (*params)[i] = 0; - - r = regexec(&re_content_type, s, 2 + MAX_ATTRS * 3, pmatch, 0); - if (r) { - char *semi; - LOG(("failed to parse content-type '%s'", s)); - /* The mime type must be first, so only copy up to the - * first semicolon in the string. This allows us to have - * a better attempt at handling pages sent with broken - * Content-Type headers. Obviously, any truly broken - * Content-Type headers will be unaffected by this heuristic - */ - semi = strchr(s, ';'); - if (semi) - type = strndup(s, semi - s); - else - type = strdup(s); - if (!type) - goto no_memory; - return type; - } - - type = strndup(s + pmatch[1].rm_so, pmatch[1].rm_eo - pmatch[1].rm_so); - if (!type) { - free(*params); - return 0; - } - - /* parameters */ - for (i = 0; i != MAX_ATTRS && pmatch[2 + 3 * i].rm_so != -1; i++) { - (*params)[2 * i] = strndup(s + pmatch[2 + 3 * i + 1].rm_so, - pmatch[2 + 3 * i + 1].rm_eo - - pmatch[2 + 3 * i + 1].rm_so); - (*params)[2 * i + 1] = strndup(s + pmatch[2 + 3 * i + 2].rm_so, - pmatch[2 + 3 * i + 2].rm_eo - - pmatch[2 + 3 * i + 2].rm_so); - if (!(*params)[2 * i] || !(*params)[2 * i + 1]) - goto no_memory; - } - (*params)[2 * i] = 0; - - return type; - -no_memory: - if (*params != NULL) { - for (i = 0; i != MAX_ATTRS * 2 + 2; i++) - free((*params)[i]); - free(*params); - } - free(type); - - return 0; -} - - -/** - * Parse an HTTP response header. - * - * See RFC 2616 4.2. - */ - -void fetchcache_parse_header(struct content *c, const char *data, - size_t size) -{ - size_t i; - -#define SKIP_ST(o) for (i = (o); i < size && (data[i] == ' ' || data[i] == '\t'); i++) - - /* Set fetch response time if not already set */ - if (c->cache_data.res_time == 0) - c->cache_data.res_time = time(NULL); - - if (5 < size && strncasecmp(data, "Date:", 5) == 0) { - /* extract Date header */ - SKIP_ST(5); - if (i < size) - c->cache_data.date = curl_getdate(&data[i], NULL); - } else if (4 < size && strncasecmp(data, "Age:", 4) == 0) { - /* extract Age header */ - SKIP_ST(4); - if (i < size && '0' <= data[i] && data[i] <= '9') - c->cache_data.age = atoi(data + i); - } else if (8 < size && strncasecmp(data, "Expires:", 8) == 0) { - /* extract Expires header */ - SKIP_ST(8); - if (i < size) - c->cache_data.expires = curl_getdate(&data[i], NULL); - } else if (14 < size && strncasecmp(data, "Cache-Control:", 14) == 0) { - /* extract and parse Cache-Control header */ - size_t comma; - SKIP_ST(14); - - while (i < size) { - for (comma = i; comma < size; comma++) - if (data[comma] == ',') - break; - - SKIP_ST(i); - - if (8 < comma - i && (strncasecmp(data + i, "no-cache", 8) == 0 || strncasecmp(data + i, "no-store", 8) == 0)) - /* When we get a disk cache we should - * distinguish between these two */ - c->cache_data.no_cache = true; - else if (7 < comma - i && strncasecmp(data + i, "max-age", 7) == 0) { - for (; i < comma; i++) - if (data[i] == '=') - break; - SKIP_ST(i+1); - if (i < comma) - c->cache_data.max_age = - atoi(data + i); - } - - i = comma + 1; - } - } else if (5 < size && strncasecmp(data, "ETag:", 5) == 0) { - /* extract ETag header */ - talloc_free(c->cache_data.etag); - c->cache_data.etag = talloc_array(c, char, size); - if (!c->cache_data.etag) { - LOG(("malloc failed")); - return; - } - SKIP_ST(5); - strncpy(c->cache_data.etag, data + i, size - i); - c->cache_data.etag[size - i] = '\0'; - for (i = size - i - 1; ((int) i) >= 0 && - (c->cache_data.etag[i] == ' ' || - c->cache_data.etag[i] == '\t' || - c->cache_data.etag[i] == '\r' || - c->cache_data.etag[i] == '\n'); --i) - c->cache_data.etag[i] = '\0'; - } else if (14 < size && strncasecmp(data, "Last-Modified:", 14) == 0) { - /* extract Last-Modified header */ - SKIP_ST(14); - if (i < size) { - c->cache_data.last_modified = - curl_getdate(&data[i], NULL); - } - } - - return; -} - - -/** - * Generate an error page. Optionally redirect to web search provider - * \param c empty content to generate the page in - * \param error message to display - */ - -void fetchcache_error_page(struct content *c, const char *error, - fetch_error_code errorcode) -{ - const char *params[] = { 0 }; - int length; - char *host; - - if (option_search_url_bar) { - if (url_host(c->url, &host) != URL_FUNC_OK) { - warn_user(messages_get("NoMemory"), 0); - } else if ((strcasecmp(host, search_web_provider_host()) - != 0) && (errorcode == - FETCH_ERROR_COULDNT_RESOLVE_HOST)) { - fetchcache_search_redirect(c, error); - free(host); - return; - } else - free(host); - } - if ((length = snprintf(error_page, sizeof(error_page), - messages_get("ErrorPage"), error)) < 0) - length = 0; - if (!content_set_type(c, CONTENT_HTML, "text/html", params, NULL)) - return; - if (!content_process_data(c, error_page, length)) - return; - content_convert(c, c->width, c->height); - - /* Mark content as non-fresh, so it'll get cleaned from the - * cache at the earliest opportunity */ - c->fresh = false; -} - -void fetchcache_search_redirect(struct content *c, const char *error) -{ - char *redirurl, *temp; - - /* clear http:// plus trailing / from url, it is already escaped */ - temp = strdup(c->url + SLEN("http://")); - if (temp == NULL) { - warn_user(messages_get("NoMemory"), 0); - return; - } - temp[strlen(temp)-1] = '\0'; - redirurl = search_web_get_url(temp); - if (redirurl == NULL) { - warn_user(messages_get("NoMemory"), 0); - return; - } - - fetchcache_redirect_common(c, false, redirurl, NULL, c); - free(redirurl); - return; -} - -/** - * Update a content's cache state - * - * \param c The content - */ - -void fetchcache_cache_update(struct content *c) -{ - if (c->cache_data.date == 0) - c->cache_data.date = time(NULL); - - if (c->cache_data.no_cache) - c->fresh = false; -} - -/** - * Clone cache info into a content - * - * \param c The content - * \param data Cache data - */ - -void fetchcache_cache_clone(struct content *c, - const struct cache_data *data) -{ - assert(c && data); - - c->cache_data.req_time = data->req_time; - c->cache_data.res_time = data->res_time; - - if (data->date != 0) - c->cache_data.date = data->date; - - if (data->expires != 0) - c->cache_data.expires = data->expires; - - if (data->age != INVALID_AGE) - c->cache_data.age = data->age; - - if (data->max_age != INVALID_AGE) - c->cache_data.max_age = data->max_age; - - if (data->no_cache) - c->cache_data.no_cache = data->no_cache; - - if (data->etag) { - talloc_free(c->cache_data.etag); - c->cache_data.etag = talloc_strdup(c, data->etag); - } - - if (data->last_modified) - c->cache_data.last_modified = data->last_modified; -} - - -/** - * Not modified callback handler - */ - -void fetchcache_notmodified(struct content *c, const void *data) -{ - struct content *fb; - union content_msg_data msg_data; - - assert(c); - assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN); - - /* Look for cached content */ - fb = content_get_ready(c->url); - - if (fb) { - /* Found it */ - intptr_t p1, p2; - void (*callback)(content_msg msg, - struct content *c, intptr_t p1, - intptr_t p2, - union content_msg_data data); - - /* Now notify all users that we're changing content */ - while (c->user_list->next) { - p1 = c->user_list->next->p1; - p2 = c->user_list->next->p2; - callback = c->user_list->next->callback; - - if (!content_add_user(fb, callback, p1, p2)) { - c->type = CONTENT_UNKNOWN; - c->status = CONTENT_STATUS_ERROR; - msg_data.error = messages_get("NoMemory"); - content_broadcast(c, CONTENT_MSG_ERROR, - msg_data); - return; - } - - content_remove_user(c, callback, p1, p2); - - msg_data.new_url = NULL; - callback(CONTENT_MSG_NEWPTR, fb, p1, p2, msg_data); - - /* and catch user up with fallback's state */ - if (fb->status == CONTENT_STATUS_LOADING) { - callback(CONTENT_MSG_LOADING, - fb, p1, p2, msg_data); - } else if (fb->status == CONTENT_STATUS_READY) { - callback(CONTENT_MSG_LOADING, - fb, p1, p2, msg_data); - if (content_find_user(fb, callback, p1, p2)) - callback(CONTENT_MSG_READY, - fb, p1, p2, msg_data); - } else if (fb->status == CONTENT_STATUS_DONE) { - callback(CONTENT_MSG_LOADING, - fb, p1, p2, msg_data); - if (content_find_user(fb, callback, p1, p2)) - callback(CONTENT_MSG_READY, - fb, p1, p2, msg_data); - if (content_find_user(fb, callback, p1, p2)) - callback(CONTENT_MSG_DONE, - fb, p1, p2, msg_data); - } else if (fb->status == CONTENT_STATUS_ERROR) { - /* shouldn't usually occur */ - msg_data.error = messages_get("MiscError"); - callback(CONTENT_MSG_ERROR, fb, p1, p2, - msg_data); - } - } - - /* mark content invalid */ - c->fetch = 0; - c->status = CONTENT_STATUS_ERROR; - - /* clone our cache control data into the fallback */ - fetchcache_cache_clone(fb, &c->cache_data); - /* and update the fallback's cache state */ - fetchcache_cache_update(fb); - } - else { - /* No cached content, so unconditionally refetch */ - struct content_user *u; - const char *ref = fetch_get_referer(c->fetch); - struct content *parent = fetch_get_parent(c->fetch); - char *referer = NULL; - - if (ref) { - referer = strdup(ref); - if (!referer) { - c->type = CONTENT_UNKNOWN; - c->status = CONTENT_STATUS_ERROR; - msg_data.error = messages_get("NoMemory"); - content_broadcast(c, CONTENT_MSG_ERROR, - msg_data); - return; - } - } - - fetch_abort(c->fetch); - c->fetch = 0; - - c->cache_data.date = 0; - talloc_free(c->cache_data.etag); - c->cache_data.etag = 0; - - for (u = c->user_list->next; u; u = u->next) { - fetchcache_go(c, referer, u->callback, u->p1, u->p2, - c->width, c->height, 0, 0, - false, parent); - } - - free(referer); - } -} - -/** - * Redirect callback handler - */ - -void fetchcache_redirect(struct content *c, const void *data, - unsigned long size) -{ - char *url, *url1; - char *referer; - char *scheme; - long http_code; - const char *ref; - struct content *parent; - bool parent_was_verifiable; - union content_msg_data msg_data; - url_func_result result; - - /* Preconditions */ - assert(c && data); - assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN); - - /* Extract fetch details */ - http_code = fetch_http_code(c->fetch); - ref = fetch_get_referer(c->fetch); - parent = fetch_get_parent(c->fetch); - parent_was_verifiable = fetch_get_verifiable(c->fetch); - - /* Ensure a redirect happened */ - assert(300 <= http_code && http_code <= 399); - /* 304 is handled by fetch_notmodified() */ - assert(http_code != 304); - - /* Clone referer -- original is destroyed in fetch_abort() */ - referer = ref ? strdup(ref) : NULL; - - /* set the status to ERROR so that this content is - * destroyed in content_clean() */ - fetch_abort(c->fetch); - c->fetch = 0; - c->status = CONTENT_STATUS_ERROR; - - /* Ensure that referer cloning succeeded - * _must_ be after content invalidation */ - if (ref && !referer) { - LOG(("Failed cloning referer")); - - msg_data.error = messages_get("BadRedirect"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - - return; - } - - /** \todo 300, 305, 307 - * More specifically: - * + 300 needs to serve up the fetch body to the user - * + 305 needs to refetch using the proxy specified in ::data - * + 307 needs to refetch. - * - * If the original request method was either GET or HEAD, then follow - * redirect unconditionally. If the original request method was neither - * GET nor HEAD, then the user MUST be asked what to do. - * - * Note: - * For backwards compatibility, all 301, 302 and 303 redirects are - * followed unconditionally with a GET request to the new location. - */ - if (http_code != 301 && http_code != 302 && http_code != 303) { - LOG(("Unsupported redirect type %ld", http_code)); - - msg_data.error = messages_get("BadRedirect"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - - free(referer); - return; - } - - /* Forcibly stop redirecting if we've followed too many redirects */ -#define REDIRECT_LIMIT 10 - if (c->redirect_count > REDIRECT_LIMIT) { - LOG(("Too many nested redirects")); - - msg_data.error = messages_get("BadRedirect"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - - free(referer); - return; - } -#undef REDIRECT_LIMIT - - /* redirect URLs must be absolute by HTTP/1.1, but many - * sites send relative ones: treat them as relative to - * requested URL */ - result = url_join(data, c->url, &url1); - if (result != URL_FUNC_OK) { - msg_data.error = messages_get("BadRedirect"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - - free(referer); - return; - } - - /* Normalize redirect target -- this is vital as this URL may - * be inserted into the urldb, which expects normalized URLs */ - result = url_normalize(url1, &url); - if (result != URL_FUNC_OK) { - msg_data.error = messages_get("BadRedirect"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - - free(url1); - free(referer); - return; - } - - /* No longer need url1 */ - free(url1); - - /* Ensure that redirects to file:/// URLs are trapped */ - result = url_scheme(url, &scheme); - if (result != URL_FUNC_OK) { - msg_data.error = messages_get("BadRedirect"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - - free(url); - free(referer); - return; - } - - if (strcasecmp(scheme, "file") == 0) { - msg_data.error = messages_get("BadRedirect"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - - free(scheme); - free(url); - free(referer); - return; - } - - free(scheme); - fetchcache_redirect_common(c, parent_was_verifiable, url, referer, parent); - free(url); - free(referer); -} - -/** - * common logic from fetchcache_redirect() / fetchcache_search_redirect() - * \param c the content param from the original function - * \param verifiable parent_was_verifiable [false for search_redirect] - * \param url the url being considered; caller retains ownership - * \param referer referer [ / NULL particularly for search_redirect] - * \param parent parent content [ / c for search_redirect] - */ - -void fetchcache_redirect_common(struct content *c, bool verifiable, - const char *url, const char *referer, struct content *parent) -{ - union content_msg_data msg_data; - bool can_fetch; - /* check there's a fetch handler */ - can_fetch = fetch_can_fetch(url); - - /* Process users of this content */ - while (c->user_list->next) { - intptr_t p1, p2; - void (*callback)(content_msg msg, - struct content *c, intptr_t p1, - intptr_t p2, - union content_msg_data data); - struct content *replacement; - - p1 = c->user_list->next->p1; - p2 = c->user_list->next->p2; - callback = c->user_list->next->callback; - - /* If we can't fetch this url, attempt to launch it */ - if (!can_fetch) { - msg_data.launch_url = url; - callback(CONTENT_MSG_LAUNCH, c, p1, p2, msg_data); - } - - /* Remove user */ - content_remove_user(c, callback, p1, p2); - - if (can_fetch) { - /* Get replacement content -- HTTP GET request */ - - /* A note about fetch verifiability: according to - * both RFC2109 and 2965, redirects result in an - * unverifiable fetch and thus cookies must be handled - * differently. Unfortunately, however, other browsers - * do not adhere to this rule and just process cookies - * as per normal in this case. Websites have come to - * depend upon this "feature", so we must do something - * which approximates the appropriate behaviour. - * - * Therefore, a redirected fetch will preserve the - * verifiability of the origin fetch. Thus, fetches - * for embedded objects will remain unverifiable, - * as expected. - */ - replacement = fetchcache(url, callback, p1, p2, - c->width, c->height, c->no_error_pages, - NULL, NULL, verifiable, - c->download); - if (!replacement) { - msg_data.error = messages_get("BadRedirect"); - content_broadcast(c, CONTENT_MSG_ERROR, - msg_data); - return; - } - /* Set replacement's redirect count to 1 greater - * than ours */ - replacement->redirect_count = c->redirect_count + 1; - - /* Notify user that content has changed */ - msg_data.new_url = url; - callback(CONTENT_MSG_NEWPTR, replacement, - p1, p2, msg_data); - - /* Start fetching the replacement content */ - fetchcache_go(replacement, referer, callback, p1, p2, - c->width, c->height, NULL, NULL, - verifiable, parent); - } - } -} - -/** - * Authentication callback handler - */ - -void fetchcache_auth(struct content *c, const char *realm) -{ - char *referer; - const char *ref; - const char *auth; - struct content *parent; - bool parent_was_verifiable; - union content_msg_data msg_data; - char *headers = NULL; - - /* Preconditions */ - assert(c); - assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN); - /* Realm may be NULL iff there was no WWW-Authenticate header - * Use the content's URL as the realm in this case */ - if (realm == NULL) - realm = c->url; - - /* Extract fetch details */ - ref = fetch_get_referer(c->fetch); - parent = fetch_get_parent(c->fetch); - parent_was_verifiable = fetch_get_verifiable(c->fetch); - - /* Clone referer -- original is destroyed in fetch_abort() */ - referer = ref ? strdup(ref) : NULL; - - fetch_abort(c->fetch); - c->fetch = NULL; - - /* Ensure that referer cloning succeeded - * _must_ be after content invalidation */ - if (ref && !referer) { - LOG(("Failed cloning referer")); - - c->status = CONTENT_STATUS_ERROR; - msg_data.error = messages_get("BadRedirect"); - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - - return; - } - - /* Now, see if we've got some auth details */ - auth = urldb_get_auth_details(c->url, realm); - - if (auth == NULL || c->tried_with_auth) { - /* No authentication details or we tried what we had, so ask - * our client for them. */ - c->tried_with_auth = false; /* Allow retry. */ - - c->status = CONTENT_STATUS_ERROR; - msg_data.auth_realm = realm; - content_broadcast(c, CONTENT_MSG_AUTH, msg_data); - - free(referer); - - return; - } - /* Flag we're retry fetching with auth data. Will be used to detect - * wrong auth data so that we can ask our client for better auth. */ - c->tried_with_auth = true; - - /* We have authentication details. Fetch with them. */ - /** \todo all the useful things like headers, POST. */ - c->fetch = fetch_start(c->url, referer, - fetchcache_callback, c, - c->no_error_pages, - NULL, NULL, parent_was_verifiable, - parent, &headers); - if (c->fetch == NULL) { - char error_message[500]; - - LOG(("warning: fetch_start failed")); - snprintf(error_message, sizeof error_message, - messages_get("InvalidURL"), - c->url); - if (c->no_error_pages) { - c->status = CONTENT_STATUS_ERROR; - msg_data.error = error_message; - content_broadcast(c, CONTENT_MSG_ERROR, msg_data); - } else { - fetchcache_error_page(c, error_message, - FETCH_ERROR_URL); - } - } - - /* Clean up */ - free(referer); -} - -#ifdef TEST - -#include - -void callback(fetchcache_msg msg, struct content *c, void *p, char *error) -{ - switch (msg) { - case FETCHCACHE_OK: - LOG(("FETCHCACHE_OK, url '%s'", p)); - break; - case FETCHCACHE_BADTYPE: - LOG(("FETCHCACHE_BADTYPE, url '%s'", p)); - break; - case FETCHCACHE_ERROR: - LOG(("FETCHCACHE_ERROR, url '%s', error '%s'", p, error)); - break; - default: - assert(0); - } -} - -char *test[] = {"http://www.google.co.uk/", "http://www.ox.ac.uk/", "blah://blah/"}; - -int main(void) -{ - int i; - - cache_init(); - fetch_init(); - - for (i = 0; i != sizeof(test) / sizeof(test[0]); i++) - fetchcache(test[i], 0, callback, test[i], 800, 0); - for (i = 0; i != 5; i++) { - fetch_poll(); - sleep(1); - } - for (i = 0; i != sizeof(test) / sizeof(test[0]); i++) - fetchcache(test[i], 0, callback, test[i], 800, 0); - for (i = 0; i != 20; i++) { - fetch_poll(); - sleep(1); - } - return 0; -} - -#endif -- cgit v1.2.3