summaryrefslogtreecommitdiff
path: root/content/fetchcache.c
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2008-01-28 01:35:00 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2008-01-28 01:35:00 +0000
commit78d194cb77db00a530779aa2a1e8d2ef9707d229 (patch)
treeba4d25c396623825fcc020bf26cd757ca34f37ec /content/fetchcache.c
parent32fe1bd1bfcaa2c96cd407c3d1e20f2d4000bd0b (diff)
downloadnetsurf-78d194cb77db00a530779aa2a1e8d2ef9707d229.tar.gz
netsurf-78d194cb77db00a530779aa2a1e8d2ef9707d229.tar.bz2
Rework handling of HTTP redirects -- we now count the number of redirects followed for a given item and abort if a fixed limit is reached. This fixes sites which have pages that redirect to themselves.
Redirect handling is now transparent to clients of fetchcache. The new scheme works as follows: 1) Request content for URL (fetchcache() 2) Start fetch of content (fetchcache_go() 3) If no redirect, continue through LOADING, READY, DONE etc. states as before If redirect, receive NEWPTR for each redirect that occurs, then continue through LOADING, READY, DONE etc. states as before. The upshot of this is that redirects result in extra contents being created. It also means that, until LOADING has been received, the content (and thus the URL being fetched) may change. Therefore, fetchcache clients should expect to have to deal with transient data prior to LOADING occurring. As a necessary side-effect of this, the HTML object URLs and CSS @import URLs are no longer stored alongside the relevant contents. These URLs can be accessed by interrogating the url member of struct content anyway, so it was a rather redundant scheme before. svn path=/trunk/netsurf/; revision=3787
Diffstat (limited to 'content/fetchcache.c')
-rw-r--r--content/fetchcache.c175
1 files changed, 142 insertions, 33 deletions
diff --git a/content/fetchcache.c b/content/fetchcache.c
index 2e8a7216c..c0e34e3aa 100644
--- a/content/fetchcache.c
+++ b/content/fetchcache.c
@@ -51,6 +51,8 @@ static void fetchcache_error_page(struct content *c, const char *error);
static void fetchcache_cache_update(struct content *c,
const struct cache_data *data);
static void fetchcache_notmodified(struct content *c, const void *data);
+static void fetchcache_redirect(struct content *c, const void *data,
+ unsigned long size);
/**
@@ -380,11 +382,10 @@ void fetchcache_callback(fetch_msg msg, void *p, const void *data,
bool res;
struct content *c = p;
content_type type;
- char *mime_type, *url;
+ char *mime_type;
char **params;
unsigned int i;
union content_msg_data msg_data;
- url_func_result result;
switch (msg) {
case FETCH_TYPE:
@@ -457,37 +458,7 @@ void fetchcache_callback(fetch_msg msg, void *p, const void *data,
break;
case FETCH_REDIRECT:
- c->fetch = 0;
- /* redirect URLs must be absolute by HTTP/1.1, but many sites send
- * relative ones: treat them as relative to requested URL */
- result = url_join(data, c->url, &url);
- /* set the status to ERROR so that the content is
- * destroyed in content_clean() */
- c->status = CONTENT_STATUS_ERROR;
- if (result == URL_FUNC_OK) {
- bool same;
-
- result = url_compare(c->url, url, &same);
-
- /* check that we're not attempting to
- * redirect to the same URL */
- if (result != URL_FUNC_OK || same) {
- msg_data.error =
- messages_get("BadRedirect");
- content_broadcast(c,
- CONTENT_MSG_ERROR, msg_data);
- }
- else {
- msg_data.redirect = url;
- content_broadcast(c,
- CONTENT_MSG_REDIRECT,
- msg_data);
- }
- free(url);
- } else {
- msg_data.error = messages_get("BadRedirect");
- content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
- }
+ fetchcache_redirect(c, data, size);
break;
case FETCH_NOTMODIFIED:
@@ -790,6 +761,144 @@ void fetchcache_notmodified(struct content *c, const void *data)
}
}
+/**
+ * Redirect callback handler
+ */
+
+void fetchcache_redirect(struct content *c, const void *data,
+ unsigned long size)
+{
+ char *url;
+ char *referer;
+ long http_code = fetch_http_code(c->fetch);
+ const char *ref = fetch_get_referer(c->fetch);
+ union content_msg_data msg_data;
+ url_func_result result;
+
+ /* Preconditions */
+ assert(c && data);
+ assert(c->status == CONTENT_STATUS_TYPE_UNKNOWN);
+ /* Ensure a redirect happened */
+ assert(300 <= http_code && http_code <= 399);
+ /* 304 is handled by fetch_notmodified() */
+ assert(http_code != 304);
+
+ /* Clone referer -- original is destroyed in fetch_abort() */
+ referer = ref ? strdup(ref) : NULL;
+
+ /* set the status to ERROR so that this content is
+ * destroyed in content_clean() */
+ fetch_abort(c->fetch);
+ c->fetch = 0;
+ c->status = CONTENT_STATUS_ERROR;
+
+ /* Ensure that referer cloning succeeded
+ * _must_ be after content invalidation */
+ if (ref && !referer) {
+ LOG(("Failed cloning referer"));
+
+ msg_data.error = messages_get("BadRedirect");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+
+ return;
+ }
+
+ /** \todo 300, 305, 307
+ * More specifically:
+ * + 300 needs to serve up the fetch body to the user
+ * + 305 needs to refetch using the proxy specified in ::data
+ * + 307 needs to refetch.
+ *
+ * If the original request method was either GET or HEAD, then follow
+ * redirect unconditionally. If the original request method was neither
+ * GET nor HEAD, then the user MUST be asked what to do.
+ *
+ * Note:
+ * For backwards compatibility, all 301, 302 and 303 redirects are
+ * followed unconditionally with a GET request to the new location.
+ */
+ if (http_code != 301 && http_code != 302 && http_code != 303) {
+ LOG(("Unsupported redirect type %ld", http_code));
+
+ msg_data.error = messages_get("BadRedirect");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+
+ free(referer);
+ return;
+ }
+
+ /* Forcibly stop redirecting if we've followed too many redirects */
+#define REDIRECT_LIMIT 10
+ if (c->redirect_count > REDIRECT_LIMIT) {
+ LOG(("Too many nested redirects"));
+
+ msg_data.error = messages_get("BadRedirect");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+
+ free(referer);
+ return;
+ }
+#undef REDIRECT_LIMIT
+
+ /* redirect URLs must be absolute by HTTP/1.1, but many
+ * sites send relative ones: treat them as relative to
+ * requested URL */
+ result = url_join(data, c->url, &url);
+
+ if (result != URL_FUNC_OK) {
+ msg_data.error = messages_get("BadRedirect");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+
+ free(referer);
+ return;
+ }
+
+ /* Process users of this content */
+ while (c->user_list->next) {
+ intptr_t p1, p2;
+ void (*callback)(content_msg msg,
+ struct content *c, intptr_t p1,
+ intptr_t p2,
+ union content_msg_data data);
+ struct content *replacement;
+
+ p1 = c->user_list->next->p1;
+ p2 = c->user_list->next->p2;
+ callback = c->user_list->next->callback;
+
+ /* Remove user */
+ content_remove_user(c, callback, p1, p2);
+
+ /* Get replacement content -- HTTP GET request */
+ replacement = fetchcache(url, callback, p1, p2,
+ c->width, c->height, c->no_error_pages,
+ NULL, NULL, false, c->download);
+ if (!replacement) {
+ msg_data.error = messages_get("BadRedirect");
+ content_broadcast(c, CONTENT_MSG_ERROR, msg_data);
+
+ free(url);
+ free(referer);
+ return;
+ }
+
+ /* Set replacement's redirect count to 1 greater than ours */
+ replacement->redirect_count = c->redirect_count + 1;
+
+ /* Notify user that content has changed */
+ callback(CONTENT_MSG_NEWPTR, replacement, p1, p2, msg_data);
+
+ /* Start fetching the replacement content */
+ fetchcache_go(replacement, referer, callback, p1, p2,
+ c->width, c->height, NULL, NULL,
+ false, referer ? referer : c->url);
+ }
+
+ /* Clean up */
+ free(url);
+ free(referer);
+}
+
#ifdef TEST
#include <unistd.h>