From a051b36ae73266e8d2a91bb8e86fcdad6ad01677 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sat, 23 Apr 2005 22:26:05 +0000 Subject: [project @ 2005-04-23 22:26:05 by jmb] Make url_join match the spec - see http://www.ics.uci.edu/~fielding/url/test1.html for testcases. svn path=/import/netsurf/; revision=1682 --- utils/url.c | 33 +++++++++++++++++++++++++++++++-- 1 file changed, 31 insertions(+), 2 deletions(-) diff --git a/utils/url.c b/utils/url.c index 006b71f1e..1c587b86b 100644 --- a/utils/url.c +++ b/utils/url.c @@ -236,6 +236,7 @@ url_func_result url_join(const char *rel, const char *base, char **result) path = base + base_match[5].rm_so; path_len = base_match[5].rm_eo - base_match[5].rm_so; + /* 1) */ m = regexec(&url_re, rel, 10, rel_match, 0); if (m) { @@ -245,7 +246,6 @@ url_func_result url_join(const char *rel, const char *base, char **result) /* 2) */ /* base + "#s" = (current document)#s (see Appendix C.1) */ - /** \todo does (current document) include the query? */ if (rel_match[9].rm_so != -1) { fragment = rel + rel_match[9].rm_so; fragment_len = rel_match[9].rm_eo - rel_match[9].rm_so; @@ -254,6 +254,11 @@ url_func_result url_join(const char *rel, const char *base, char **result) rel_match[2].rm_so == -1 && rel_match[4].rm_so == -1 && rel_match[6].rm_so == -1) { + if (base_match[7].rm_so != -1) { + query = base + base_match[7].rm_so; + query_len = base_match[7].rm_eo - + base_match[7].rm_so; + } goto step7; } if (rel_match[7].rm_so != -1) { @@ -261,6 +266,14 @@ url_func_result url_join(const char *rel, const char *base, char **result) query_len = rel_match[7].rm_eo - rel_match[7].rm_so; } + /* base + "?y" = (base - query)?y + * e.g http://a/b/c/d;p?q + ?y = http://a/b/c/d;p?y */ + if (rel_match[5].rm_so == rel_match[5].rm_eo && + rel_match[2].rm_so == -1 && + rel_match[4].rm_so == -1 && + rel_match[6].rm_so != -1) + goto step7; + /* 3) */ if (rel_match[2].rm_so != -1) { scheme = rel + rel_match[2].rm_so; @@ -334,8 +347,24 @@ url_func_result url_join(const char *rel, const char *base, char **result) } else path_len -= up_match[1].rm_eo - up_match[1].rm_so + 3; } + + /* and strip any remaining ../ | ./ pairs */ + for (path = buf; path - buf < path_len; ) { + if (*path == '.' && path[1] == '.' && path[2] == '/') { + memmove(buf + (path - buf), path + 3, + ((buf + path_len) - path) - 3); + path_len -= 3; + } + else if (*path == '.' && path[1] == '/') { + memmove(buf + (path - buf), path + 2, + ((buf + path_len) - path) - 2); + path_len -= 2; + } + else + path++; + } buf[path_len] = 0; - path = buf; + path = buf; step7: /* 7) */ (*result) = malloc(scheme_len + 1 + 2 + authority_len + path_len + 1 + 1 + -- cgit v1.2.3