/* * Copyright 2011 Michael Drake * * This file is part of NetSurf, http://www.netsurf-browser.org/ * * NetSurf is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; version 2 of the License. * * NetSurf is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ /** * \file * NetSurf URL handling implementation. * * This is the common implementation of all URL handling within the * browser. This implementation is based upon RFC3986 although this has * been superceeded by https://url.spec.whatwg.org/ which is based on * actual contemporary implementations. * * Care must be taken with character encodings within this module as * the specifications work with specific ascii ranges and must not be * affected by locale. Hence the c library character type functions * are not used. */ #include #include #include #include #include #include "utils/ascii.h" #include "utils/corestrings.h" #include "utils/errors.h" #include "utils/idna.h" #include "utils/log.h" #include "utils/nsurl/private.h" #include "utils/nsurl.h" #include "utils/utils.h" /** * Compare two component values. * * Sets match to false if the components are not the same. * Does nothing if the components are the same, so ensure match is * preset to true. */ #define nsurl__component_compare(c1, c2, match) \ if (c1 && c2 && lwc_error_ok == \ lwc_string_isequal(c1, c2, match)) { \ /* do nothing */ \ } else if (c1 || c2) { \ *match = false; \ } /****************************************************************************** * NetSurf URL Public API * ******************************************************************************/ /* exported interface, documented in nsurl.h */ nsurl *nsurl_ref(nsurl *url) { assert(url != NULL); url->count++; return url; } /* exported interface, documented in nsurl.h */ void nsurl_unref(nsurl *url) { assert(url != NULL); assert(url->count > 0); if (--url->count > 0) return; #ifdef NSURL_DEBUG nsurl__dump(url); #endif /* Release lwc strings */ nsurl__components_destroy(&url->components); /* Free the NetSurf URL */ free(url); } /* exported interface, documented in nsurl.h */ bool nsurl_compare(const nsurl *url1, const nsurl *url2, nsurl_component parts) { bool match = true; assert(url1 != NULL); assert(url2 != NULL); /* Compare URL components */ /* Path, host and query first, since they're most likely to differ */ if (parts & NSURL_PATH) { nsurl__component_compare(url1->components.path, url2->components.path, &match); if (match == false) return false; } if (parts & NSURL_HOST) { nsurl__component_compare(url1->components.host, url2->components.host, &match); if (match == false) return false; } if (parts & NSURL_QUERY) { nsurl__component_compare(url1->components.query, url2->components.query, &match); if (match == false) return false; } if (parts & NSURL_SCHEME) { nsurl__component_compare(url1->components.scheme, url2->components.scheme, &match); if (match == false) return false; } if (parts & NSURL_USERNAME) { nsurl__component_compare(url1->components.username, url2->components.username, &match); if (match == false) return false; } if (parts & NSURL_PASSWORD) { nsurl__component_compare(url1->components.password, url2->components.password, &match); if (match == false) return false; } if (parts & NSURL_PORT) { nsurl__component_compare(url1->components.port, url2->components.port, &match); if (match == false) return false; } if (parts & NSURL_FRAGMENT) { nsurl__component_compare(url1->components.fragment, url2->components.fragment, &match); if (match == false) return false; } return true; } /* exported interface, documented in nsurl.h */ nserror nsurl_get(const nsurl *url, nsurl_component parts, char **url_s, size_t *url_l) { assert(url != NULL); return nsurl__components_to_string(&(url->components), parts, 0, url_s, url_l); } /* exported interface, documented in nsurl.h */ lwc_string *nsurl_get_component(const nsurl *url, nsurl_component part) { assert(url != NULL); switch (part) { case NSURL_SCHEME: return (url->components.scheme != NULL) ? lwc_string_ref(url->components.scheme) : NULL; case NSURL_USERNAME: return (url->components.username != NULL) ? lwc_string_ref(url->components.username) : NULL; case NSURL_PASSWORD: return (url->components.password != NULL) ? lwc_string_ref(url->components.password) : NULL; case NSURL_HOST: return (url->components.host != NULL) ? lwc_string_ref(url->components.host) : NULL; case NSURL_PORT: return (url->components.port != NULL) ? lwc_string_ref(url->components.port) : NULL; case NSURL_PATH: return (url->components.path != NULL) ? lwc_string_ref(url->components.path) : NULL; case NSURL_QUERY: return (url->components.query != NULL) ? lwc_string_ref(url->components.query) : NULL; case NSURL_FRAGMENT: return (url->components.fragment != NULL) ? lwc_string_ref(url->components.fragment) : NULL; default: LOG("Unsupported value passed to part param."); assert(0); } return NULL; } /* exported interface, documented in nsurl.h */ bool nsurl_has_component(const nsurl *url, nsurl_component part) { assert(url != NULL); switch (part) { case NSURL_SCHEME: if (url->components.scheme != NULL) return true; else return false; case NSURL_CREDENTIALS: /* Only username required for credentials section */ /* Fall through */ case NSURL_USERNAME: if (url->components.username != NULL) return true; else return false; case NSURL_PASSWORD: if (url->components.password != NULL) return true; else return false; case NSURL_HOST: if (url->components.host != NULL) return true; else return false; case NSURL_PORT: if (url->components.port != NULL) return true; else return false; case NSURL_PATH: if (url->components.path != NULL) return true; else return false; case NSURL_QUERY: if (url->components.query != NULL) return true; else return false; case NSURL_FRAGMENT: if (url->components.fragment != NULL) return true; else return false; default: LOG("Unsupported value passed to part param."); assert(0); } return false; } /* exported interface, documented in nsurl.h */ const char *nsurl_access(const nsurl *url) { assert(url != NULL); return url->string; } /* exported interface, documented in nsurl.h */ nserror nsurl_get_utf8(const nsurl *url, char **url_s, size_t *url_l) { nserror err; lwc_string *host; char *idna_host = NULL; size_t idna_host_len; char *scheme = NULL; size_t scheme_len; char *path = NULL; size_t path_len; assert(url != NULL); if (url->components.host == NULL) { return nsurl_get(url, NSURL_WITH_FRAGMENT, url_s, url_l); } host = url->components.host; err = idna_decode(lwc_string_data(host), lwc_string_length(host), &idna_host, &idna_host_len); if (err != NSERROR_OK) { goto cleanup; } err = nsurl_get(url, NSURL_SCHEME | NSURL_CREDENTIALS, &scheme, &scheme_len); if (err != NSERROR_OK) { goto cleanup; } err = nsurl_get(url, NSURL_PORT | NSURL_PATH | NSURL_QUERY | NSURL_FRAGMENT, &path, &path_len); if (err != NSERROR_OK) { goto cleanup; } *url_l = scheme_len + idna_host_len + path_len + 1; /* +1 for \0 */ *url_s = malloc(*url_l); if (*url_s == NULL) { err = NSERROR_NOMEM; goto cleanup; } snprintf(*url_s, *url_l, "%s%s%s", scheme, idna_host, path); err = NSERROR_OK; cleanup: free(idna_host); free(scheme); free(path); return err; } /* exported interface, documented in nsurl.h */ const char *nsurl_access_leaf(const nsurl *url) { size_t path_len; const char *path; const char *leaf; assert(url != NULL); if (url->components.path == NULL) return ""; path = lwc_string_data(url->components.path); path_len = lwc_string_length(url->components.path); if (path_len == 0) return ""; if (path_len == 1 && *path == '/') return "/"; leaf = path + path_len; do { leaf--; } while ((leaf != path) && (*leaf != '/')); if (*leaf == '/') leaf++; return leaf; } /* exported interface, documented in nsurl.h */ size_t nsurl_length(const nsurl *url) { assert(url != NULL); return url->length; } /* exported interface, documented in nsurl.h */ uint32_t nsurl_hash(const nsurl *url) { assert(url != NULL); return url->hash; } /* exported interface, documented in nsurl.h */ nserror nsurl_defragment(const nsurl *url, nsurl **no_frag) { size_t length; char *pos; assert(url != NULL); /* check for source url having no fragment already */ if (url->components.fragment == NULL) { *no_frag = (nsurl *)url; (*no_frag)->count++; return NSERROR_OK; } /* Find the change in length from url to new_url */ length = url->length; if (url->components.fragment != NULL) { length -= 1 + lwc_string_length(url->components.fragment); } /* Create NetSurf URL object */ *no_frag = malloc(sizeof(nsurl) + length + 1); /* Add 1 for \0 */ if (*no_frag == NULL) { return NSERROR_NOMEM; } /* Copy components */ (*no_frag)->components.scheme = nsurl__component_copy(url->components.scheme); (*no_frag)->components.username = nsurl__component_copy(url->components.username); (*no_frag)->components.password = nsurl__component_copy(url->components.password); (*no_frag)->components.host = nsurl__component_copy(url->components.host); (*no_frag)->components.port = nsurl__component_copy(url->components.port); (*no_frag)->components.path = nsurl__component_copy(url->components.path); (*no_frag)->components.query = nsurl__component_copy(url->components.query); (*no_frag)->components.fragment = NULL; (*no_frag)->components.scheme_type = url->components.scheme_type; (*no_frag)->length = length; /* Fill out the url string */ pos = (*no_frag)->string; memcpy(pos, url->string, length); pos += length; *pos = '\0'; /* Get the nsurl's hash */ nsurl__calc_hash(*no_frag); /* Give the URL a reference */ (*no_frag)->count = 1; return NSERROR_OK; } /* exported interface, documented in nsurl.h */ nserror nsurl_refragment(const nsurl *url, lwc_string *frag, nsurl **new_url) { int frag_len; int base_len; char *pos; size_t len; assert(url != NULL); assert(frag != NULL); /* Find the change in length from url to new_url */ base_len = url->length; if (url->components.fragment != NULL) { base_len -= 1 + lwc_string_length(url->components.fragment); } frag_len = lwc_string_length(frag); /* Set new_url's length */ len = base_len + 1 /* # */ + frag_len; /* Create NetSurf URL object */ *new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */ if (*new_url == NULL) { return NSERROR_NOMEM; } (*new_url)->length = len; /* Set string */ pos = (*new_url)->string; memcpy(pos, url->string, base_len); pos += base_len; *pos = '#'; memcpy(++pos, lwc_string_data(frag), frag_len); pos += frag_len; *pos = '\0'; /* Copy components */ (*new_url)->components.scheme = nsurl__component_copy(url->components.scheme); (*new_url)->components.username = nsurl__component_copy(url->components.username); (*new_url)->components.password = nsurl__component_copy(url->components.password); (*new_url)->components.host = nsurl__component_copy(url->components.host); (*new_url)->components.port = nsurl__component_copy(url->components.port); (*new_url)->components.path = nsurl__component_copy(url->components.path); (*new_url)->components.query = nsurl__component_copy(url->components.query); (*new_url)->components.fragment = lwc_string_ref(frag); (*new_url)->components.scheme_type = url->components.scheme_type; /* Get the nsurl's hash */ nsurl__calc_hash(*new_url); /* Give the URL a reference */ (*new_url)->count = 1; return NSERROR_OK; } /* exported interface, documented in nsurl.h */ nserror nsurl_replace_query(const nsurl *url, const char *query, nsurl **new_url) { int query_len; /* Length of new query string, including '?' */ int frag_len = 0; /* Length of fragment, including '#' */ int base_len; /* Length of URL up to start of query */ char *pos; size_t len; lwc_string *lwc_query; assert(url != NULL); assert(query != NULL); assert(query[0] == '?'); /* Get the length of the new query */ query_len = strlen(query); /* Find the change in length from url to new_url */ base_len = url->length; if (url->components.query != NULL) { base_len -= lwc_string_length(url->components.query); } if (url->components.fragment != NULL) { frag_len = 1 + lwc_string_length(url->components.fragment); base_len -= frag_len; } /* Set new_url's length */ len = base_len + query_len + frag_len; /* Create NetSurf URL object */ *new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */ if (*new_url == NULL) { return NSERROR_NOMEM; } if (lwc_intern_string(query, query_len, &lwc_query) != lwc_error_ok) { free(*new_url); return NSERROR_NOMEM; } (*new_url)->length = len; /* Set string */ pos = (*new_url)->string; memcpy(pos, url->string, base_len); pos += base_len; memcpy(pos, query, query_len); pos += query_len; if (url->components.fragment != NULL) { const char *frag = lwc_string_data(url->components.fragment); *pos = '#'; memcpy(++pos, frag, frag_len - 1); pos += frag_len - 1; } *pos = '\0'; /* Copy components */ (*new_url)->components.scheme = nsurl__component_copy(url->components.scheme); (*new_url)->components.username = nsurl__component_copy(url->components.username); (*new_url)->components.password = nsurl__component_copy(url->components.password); (*new_url)->components.host = nsurl__component_copy(url->components.host); (*new_url)->components.port = nsurl__component_copy(url->components.port); (*new_url)->components.path = nsurl__component_copy(url->components.path); (*new_url)->components.query = lwc_query; (*new_url)->components.fragment = nsurl__component_copy(url->components.fragment); (*new_url)->components.scheme_type = url->components.scheme_type; /* Get the nsurl's hash */ nsurl__calc_hash(*new_url); /* Give the URL a reference */ (*new_url)->count = 1; return NSERROR_OK; } /* exported interface documented in utils/nsurl.h */ nserror nsurl_nice(const nsurl *url, char **result, bool remove_extensions) { const char *data; size_t len; size_t pos; bool match; char *name; assert(url != NULL); *result = 0; /* extract the last component of the path, if possible */ if ((url->components.path != NULL) && (lwc_string_length(url->components.path) != 0) && (lwc_string_isequal(url->components.path, corestring_lwc_slash_, &match) == lwc_error_ok) && (match == false)) { bool first = true; bool keep_looking; /* Get hold of the string data we're examining */ data = lwc_string_data(url->components.path); len = lwc_string_length(url->components.path); pos = len; do { keep_looking = false; pos--; /* Find last '/' with stuff after it */ while (pos != 0) { if (data[pos] == '/' && pos < len - 1) { break; } pos--; } if (pos == 0) { break; } if (first) { if (strncasecmp("/default.", data + pos, SLEN("/default.")) == 0) { keep_looking = true; } else if (strncasecmp("/index.", data + pos, 6) == 0) { keep_looking = true; } first = false; } } while (keep_looking); if (data[pos] == '/') pos++; if (strncasecmp("default.", data + pos, 8) != 0 && strncasecmp("index.", data + pos, 6) != 0) { size_t end = pos; while (data[end] != '\0' && data[end] != '/') { end++; } if (end - pos != 0) { name = malloc(end - pos + 1); if (name == NULL) { return NSERROR_NOMEM; } memcpy(name, data + pos, end - pos); name[end - pos] = '\0'; if (remove_extensions) { /* strip any extenstion */ char *dot = strchr(name, '.'); if (dot && dot != name) { *dot = '\0'; } } *result = name; return NSERROR_OK; } } } if (url->components.host != NULL) { name = strdup(lwc_string_data(url->components.host)); for (pos = 0; name[pos] != '\0'; pos++) { if (name[pos] == '.') { name[pos] = '_'; } } *result = name; return NSERROR_OK; } return NSERROR_NOT_FOUND; } /* exported interface, documented in nsurl.h */ nserror nsurl_parent(const nsurl *url, nsurl **new_url) { lwc_string *lwc_path; size_t old_path_len, new_path_len; size_t len; const char* path = NULL; char *pos; assert(url != NULL); old_path_len = (url->components.path == NULL) ? 0 : lwc_string_length(url->components.path); /* Find new path length */ if (old_path_len == 0) { new_path_len = old_path_len; } else { path = lwc_string_data(url->components.path); new_path_len = old_path_len; if (old_path_len > 1) { /* Skip over any trailing / */ if (path[new_path_len - 1] == '/') new_path_len--; /* Work back to next / */ while (new_path_len > 0 && path[new_path_len - 1] != '/') new_path_len--; } } /* Find the length of new_url */ len = url->length; if (url->components.query != NULL) { len -= lwc_string_length(url->components.query); } if (url->components.fragment != NULL) { len -= 1; /* # */ len -= lwc_string_length(url->components.fragment); } len -= old_path_len - new_path_len; /* Create NetSurf URL object */ *new_url = malloc(sizeof(nsurl) + len + 1); /* Add 1 for \0 */ if (*new_url == NULL) { return NSERROR_NOMEM; } /* Make new path */ if (old_path_len == 0) { lwc_path = NULL; } else if (old_path_len == new_path_len) { lwc_path = lwc_string_ref(url->components.path); } else { if (lwc_intern_string(path, old_path_len - new_path_len, &lwc_path) != lwc_error_ok) { free(*new_url); return NSERROR_NOMEM; } } (*new_url)->length = len; /* Set string */ pos = (*new_url)->string; memcpy(pos, url->string, len); pos += len; *pos = '\0'; /* Copy components */ (*new_url)->components.scheme = nsurl__component_copy(url->components.scheme); (*new_url)->components.username = nsurl__component_copy(url->components.username); (*new_url)->components.password = nsurl__component_copy(url->components.password); (*new_url)->components.host = nsurl__component_copy(url->components.host); (*new_url)->components.port = nsurl__component_copy(url->components.port); (*new_url)->components.path = lwc_path; (*new_url)->components.query = NULL; (*new_url)->components.fragment = NULL; (*new_url)->components.scheme_type = url->components.scheme_type; /* Get the nsurl's hash */ nsurl__calc_hash(*new_url); /* Give the URL a reference */ (*new_url)->count = 1; return NSERROR_OK; }