summaryrefslogtreecommitdiff
path: root/utils
diff options
context:
space:
mode:
authorJames Bursa <james@netsurf-browser.org>2004-03-27 23:18:52 +0000
committerJames Bursa <james@netsurf-browser.org>2004-03-27 23:18:52 +0000
commitd5a81883e8e250d4eb4fab0ee0cf509b96befe1d (patch)
tree1f323f5529091c95c23ebbefee2a88884f820d5a /utils
parent26f171382f471341f7472f76150064f7f2e7c68a (diff)
downloadnetsurf-d5a81883e8e250d4eb4fab0ee0cf509b96befe1d.tar.gz
netsurf-d5a81883e8e250d4eb4fab0ee0cf509b96befe1d.tar.bz2
[project @ 2004-03-27 23:18:52 by bursa]
Implement url_nice() and make save boxes use it to choose the default filename. svn path=/import/netsurf/; revision=682
Diffstat (limited to 'utils')
-rw-r--r--utils/url.c103
-rw-r--r--utils/url.h1
2 files changed, 101 insertions, 3 deletions
diff --git a/utils/url.c b/utils/url.c
index c22144495..c1d2c5a48 100644
--- a/utils/url.c
+++ b/utils/url.c
@@ -9,6 +9,7 @@
* URL parsing and joining (implementation).
*/
+#include <assert.h>
#include <ctype.h>
#include <stdbool.h>
#include <stdlib.h>
@@ -20,7 +21,7 @@
#include "netsurf/utils/utils.h"
-regex_t url_re, url_up_re;
+regex_t url_re, url_up_re, url_nice_re;
/**
* Initialise URL routines.
@@ -36,6 +37,10 @@ void url_init(void)
regcomp_wrapper(&url_up_re,
"/(|[^/]|[.][^./]|[^./][.]|[^/][^/][^/]+)/[.][.](/|$)",
REG_EXTENDED);
+ regcomp_wrapper(&url_nice_re,
+ "^([^.]{0,4}[.])?([^.][^.][.])?([^/?&;.=]*)"
+ "(=[^/?&;.]*)?[/?&;.]",
+ REG_EXTENDED);
}
@@ -394,6 +399,95 @@ char *url_host(const char *url)
}
+/**
+ * Attempt to find a nice filename for a URL.
+ *
+ * \param url an absolute URL
+ * \returns filename allocated on heap, or 0 on memory exhaustion
+ */
+
+char *url_nice(const char *url)
+{
+ unsigned int i, j, k = 0, so;
+ unsigned int len;
+ const char *colon;
+ char buf[40];
+ char *result;
+ char *rurl;
+ int m;
+ regmatch_t match[10];
+
+ result = malloc(40);
+ if (!result)
+ return 0;
+
+ len = strlen(url);
+ assert(len != 0);
+ rurl = malloc(len + 1);
+ if (!rurl) {
+ free(result);
+ return 0;
+ }
+
+ /* reverse url into rurl */
+ for (i = 0, j = len - 1; i != len; i++, j--)
+ rurl[i] = url[j];
+ rurl[len] = 0;
+
+ /* prepare a fallback: always succeeds */
+ colon = strchr(url, ':');
+ if (colon)
+ url = colon + 1;
+ strncpy(result, url, 15);
+ result[15] = 0;
+ for (i = 0; result[i]; i++)
+ if (!isalnum(result[i]))
+ result[i] = '_';
+
+ /* append nice pieces */
+ j = 0;
+ do {
+ m = regexec(&url_nice_re, rurl + j, 10, match, 0);
+ if (m)
+ break;
+
+ if (match[3].rm_so != match[3].rm_eo) {
+ so = match[3].rm_so;
+ i = match[3].rm_eo - so;
+ if (15 < i) {
+ so = match[3].rm_eo - 15;
+ i = 15;
+ }
+ if (15 < k + i)
+ break;
+ if (k)
+ k++;
+ strncpy(buf + k, rurl + j + so, i);
+ k += i;
+ buf[k] = 160; /* nbsp */
+ }
+
+ j += match[0].rm_eo;
+ } while (j != len);
+
+ if (k == 0) {
+ free(rurl);
+ return result;
+ }
+
+ /* reverse back */
+ for (i = 0, j = k - 1; i != k; i++, j--)
+ result[i] = buf[j];
+ result[k] = 0;
+
+ for (i = 0; i != k; i++)
+ if (result[i] != (char) 0xa0 && !isalnum(result[i]))
+ result[i] = '_';
+
+ return result;
+}
+
+
#ifdef TEST
@@ -411,12 +505,15 @@ int main(int argc, char *argv[])
s = url_host(argv[i]);
if (s)
printf("<== '%s'\n", s);*/
- if (1 != i) {
+/* if (1 != i) {
s = url_join(argv[i], argv[1]);
if (s)
printf("'%s' + '%s' \t= '%s'\n", argv[1],
argv[i], s);
- }
+ }*/
+ s = url_nice(argv[i]);
+ if (s)
+ printf("'%s'\n", s);
}
return 0;
}
diff --git a/utils/url.h b/utils/url.h
index f908e8f9a..96aa947f5 100644
--- a/utils/url.h
+++ b/utils/url.h
@@ -16,5 +16,6 @@ void url_init(void);
char *url_normalize(const char *url);
char *url_join(const char *rel, const char *base);
char *url_host(const char *url);
+char *url_nice(const char *url);
#endif