/* * This file is part of NetSurf, http://netsurf.sourceforge.net/ * Licensed under the GNU General Public License, * http://www.opensource.org/licenses/gpl-license * Copyright 2004 James Bursa * Copyright 2003 Phil Mellor * Copyright 2003 John M Bell * Copyright 2004 John Tytgat */ #include #include #include #include #include #include #include #include #include #include "libxml/encoding.h" #include "netsurf/utils/config.h" #define NDEBUG #include "netsurf/utils/log.h" #include "netsurf/utils/messages.h" #include "netsurf/utils/utils.h" char * strip(char * const s) { size_t i; for (i = strlen(s); i != 0 && (s[i - 1] == ' ' || s[i - 1] == '\n' || s[i - 1] == '\r' || s[i - 1] == '\t'); i--) ; s[i] = 0; return s + strspn(s, " \t\r\n"); } int whitespace(const char * str) { unsigned int i; for (i = 0; i < strlen(str); i++) if (!isspace(str[i])) return 0; return 1; } void * xcalloc(const size_t n, const size_t size) { void * p = calloc(n, size); if (p == 0) die("Out of memory in xcalloc()"); return p; } void * xrealloc(void * p, const size_t size) { p = realloc(p, size); if (p == 0) die("Out of memory in xrealloc()"); return p; } void xfree(void* p) { if (p == 0) fprintf(stderr, "Attempt to free NULL pointer\n"); else free(p); } char * xstrdup(const char * const s) { char *c; if (s == NULL) fprintf(stderr, "Attempt to strdup() NULL pointer\n"); c = malloc(((s == NULL) ? 0 : strlen(s)) + 1); if (c == NULL) die("Out of memory in xstrdup()"); strcpy(c, (s == NULL) ? "" : s); return c; } char * load(const char * const path) { FILE * fp = fopen(path, "rb"); char * buf; long size, read; if (fp == 0) die("Failed to open file"); if (fseek(fp, 0, SEEK_END) != 0) die("fseek() failed"); if ((size = ftell(fp)) == -1) die("ftell() failed"); buf = xcalloc((size_t) size, 1); if (fseek(fp, 0, SEEK_SET) != 0) die("fseek() failed"); read = fread(buf, 1, (size_t) size, fp); if (read < size) die("fread() failed"); fclose(fp); return buf; } char * squash_whitespace(const char * s) { char * c = malloc(strlen(s) + 1); int i = 0, j = 0; if (c == 0) die("Out of memory in squash_whitespace()"); do { if (s[i] == ' ' || s[i] == '\n' || s[i] == '\r' || s[i] == '\t') { c[j++] = ' '; while (s[i] == ' ' || s[i] == '\n' || s[i] == '\r' || s[i] == '\t') i++; } c[j++] = s[i++]; } while (s[i - 1] != 0); return c; } /** * Converts NUL terminated UTF-8 encoded string s containing zero or more * spaces (char 32) or TABs (char 9) to non-breaking spaces * (0xC2 + 0xA0 in UTF-8 encoding). * * Caller needs to free() result. Returns NULL in case of error. No * checking is done on validness of the UTF-8 input string. */ char *cnv_space2nbsp(const char *s) { const char *srcP; char *d, *d0; unsigned int numNBS; /* Convert space & TAB into non breaking space character (0xA0) */ for (numNBS = 0, srcP = (const char *)s; *srcP != '\0'; ++srcP) if (*srcP == ' ' || *srcP == '\t') ++numNBS; if ((d = (char *)malloc((srcP - s) + numNBS + 1)) == NULL) return NULL; for (d0 = d, srcP = (const char *)s; *srcP != '\0'; ++srcP) { if (*srcP == ' ' || *srcP == '\t') { *d0++ = 0xC2; *d0++ = 0xA0; } else *d0++ = *srcP; } *d0 = '\0'; return d; } /** * Convert local encoding to NUL terminated UTF-8 string. * Caller needs to free return value. * * \param s string in local machine encoding. NUL or length terminated (which comes first). * \param length maximum number of bytes to consider at s. * \return malloc()'ed NUL termined string in UTF-8 encoding. * * Based on RISCOS-LATIN1 code from libiconv. * \todo: we should use libiconv to support more local encodings instead * of only RISCOS-LATIN1. */ char *cnv_local_enc_str(const char *s, size_t length) { size_t l_out, l_in; const char *s_in; char *d, *d_out; static const unsigned int riscos1_2uni[32] = { /* 0x80 */ 0x221a, 0x0174, 0x0175, 0x0083, 0x2573, 0x0176, 0x0177, 0x0087, 0x21e6, 0x21e8, 0x21e9, 0x21e7, 0x2026, 0x2122, 0x2030, 0x2022, /* 0x90 */ 0x2018, 0x2019, 0x2039, 0x203a, 0x201c, 0x201d, 0x201e, 0x2013, 0x2014, 0x2212, 0x0152, 0x0153, 0x2020, 0x2021, 0xfb01, 0xfb02, }; /* We're counting on the fact that all riscos1_2uni[] values are * between 0x80 (incl) and 0x1000 (excl). */ for (s_in = s, l_in = length, l_out = 1; *s_in != '\0' && l_in != 0; ++s_in, --l_in) l_out += (*s_in >= 0x80 && *s_in < 0xA0) ? ((riscos1_2uni[*s_in - 0x80] < 0x800) ? 2 : 3) : 1; if ((d_out = (char *)malloc(l_out)) == NULL) return NULL; for (s_in = s, l_in = length, d = d_out; *s_in != '\0' && l_in != 0; ++s_in, --l_in) { unsigned int uc = (*s_in >= 0x80 && *s_in < 0xA0) ? riscos1_2uni[*s_in - 0x80] : *s_in; const int cnt = (uc < 0x80) ? 1 : (uc < 0x800) ? 2 : 3; switch (cnt) { case 3: d[2] = 0x80 | (uc & 0x3F); uc = (uc >> 6) | 0x800; /* fall through */ case 2: d[1] = 0x80 | (uc & 0x3F); uc = (uc >> 6) | 0xC0; /* fall through */ case 1: d[0] = uc; } d += cnt; } *d = '\0'; return d_out; } /** * Converts NUL terminated UTF-8 string to the machine local encoding. * Caller needs to free return value. */ char *cnv_str_local_enc(const char *s) { return cnv_strn_local_enc(s, strlen(s), NULL); } /** * Converts UTF-8 string of bytes to the machine local encoding. * Caller needs to free return value. * * When back_map is non-NULL, a ptr to a ptrdiff_t array is filled in which * needs to be free'd by the caller. The array contains per character * in the return string, a ptrdiff in the UTF-8 encoded string. * * \todo: we should use libiconv to support more local encodings instead * of only ISOLATIN1. */ char *cnv_strn_local_enc(const char *s, int length, const ptrdiff_t **back_mapPP) { /* Buffer at d & back_mapP can be overdimentioned but is certainly * big enough to carry the end result. */ char *d = xcalloc(length + 1, sizeof(char)); ptrdiff_t *back_mapP = (back_mapPP != NULL) ? xcalloc(length + 1, sizeof(ptrdiff_t)) : NULL; char *d0 = d; const char * const s0 = s; if (back_mapPP != NULL) *back_mapPP = back_mapP; while (length != 0) { int u, chars; chars = length; u = xmlGetUTF8Char(s, &chars); if (chars <= 0) { s += 1; length -= 1; continue; } if (back_mapP != NULL) *back_mapP++ = s - s0; s += chars; length -= chars; if (u == 0x09 || u == 0x0a || u == 0x0d || (0x20 <= u && u <= 0x7f) || (0xa0 <= u && u <= 0xff)) *d++ = u; else *d++ = '?'; } if (back_mapP != NULL) *back_mapP = s - s0; *d = 0; return d0; } /** * Check if a directory exists. */ bool is_dir(const char *path) { struct stat s; if (stat(path, &s)) return false; return S_ISDIR(s.st_mode) ? true : false; } /** * Compile a regular expression, handling errors. * * Parameters as for regcomp(), see man regex. */ void regcomp_wrapper(regex_t *preg, const char *regex, int cflags) { int r; r = regcomp(preg, regex, cflags); if (r) { char errbuf[200]; regerror(r, preg, errbuf, sizeof errbuf); fprintf(stderr, "Failed to compile regexp '%s'\n", regex); die(errbuf); } } /** * Remove expired cookies from the cookie jar. * libcurl /really/ should do this for us. * This gets called every time a window is closed or NetSurf is quit. */ #ifdef WITH_COOKIES void clean_cookiejar(void) { FILE *fp; int len; char *cookies = 0, *pos; char domain[256], flag[10], path[256], secure[10], exp[50], name[256], val[256]; long int expiry; fp = fopen(messages_get("cookiefile"), "r"); if (!fp) { LOG(("Failed to open cookie jar")); return; } /* read file length */ fseek(fp, 0, SEEK_END); len = ftell(fp); fseek(fp, 0, SEEK_SET); cookies = xcalloc((unsigned int)len, sizeof(char)); fread(cookies, (unsigned int)len, sizeof(char), fp); fclose(fp); if (remove(messages_get("cookiejar"))) { LOG(("Failed to remove old jar")); xfree(cookies); return; } fp = fopen(messages_get("cookiejar"), "w+"); if (!fp) { xfree(cookies); LOG(("Failed to create new jar")); return; } /* write header */ fputs("# Netscape HTTP Cookie File\n" "# http://www.netscape.com/newsref/std/cookie_spec.html\n" "# This file was generated by libcurl! Edit at your own risk.\n\n", fp); pos = cookies; while (pos != (cookies+len-1)) { if (*pos == '#') { for (; *pos != '\n'; pos++); pos += 1; continue; } sscanf(pos, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n", domain, flag, path, secure, exp, name, val); pos += (strlen(domain) + strlen(flag) + strlen(path) + strlen(secure) + strlen(exp) + strlen(name) + strlen(val) + 7); sscanf(exp, "%ld", &expiry); if (time(NULL) < expiry) { /* cookie hasn't expired */ fprintf(fp, "%s\t%s\t%s\t%s\t%s\t%s\t%s\n", domain, flag, path, secure, exp, name, val); } } fclose(fp); xfree(cookies); } #endif /** We can have a fairly good estimate of how long the buffer needs to * be. The unsigned long can store a value representing a maximum size * of around 4 GB. Therefore the greatest space required is to * represent 1023MB. Currently that would be represented as "1023MB" so 12 * including a null terminator. * Ideally we would be able to know this value for sure, in the mean * time the following should suffice. **/ #define BYTESIZE_BUFFER_SIZE 20 /** * Does a simple conversion which assumes the user speaks English. The buffer * returned is one of three static ones so may change each time this call is * made. Don't store the buffer for later use. It's done this way for * convenience and to fight possible memory leaks, it is not necessarily pretty. **/ char *human_friendly_bytesize(unsigned long bsize) { static char buffer1[BYTESIZE_BUFFER_SIZE]; static char buffer2[BYTESIZE_BUFFER_SIZE]; static char buffer3[BYTESIZE_BUFFER_SIZE]; static char *curbuffer = buffer3; float bytesize = (float)bsize; if (curbuffer == buffer1) curbuffer = buffer2; else if (curbuffer == buffer2) curbuffer = buffer3; else curbuffer = buffer1; enum {bytes, kilobytes, megabytes, gigabytes} unit = bytes; static char units[][7] = {"Bytes", "kBytes", "MBytes", "GBytes"}; if (bytesize > 1024) { bytesize /= 1024; unit = kilobytes; } if (bytesize > 1024) { bytesize /= 1024; unit = megabytes; } if (bytesize > 1024) { bytesize /= 1024; unit = gigabytes; } sprintf(curbuffer, "%3.2f%s", bytesize, messages_get(units[unit])); return curbuffer; }