summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Sanders <vince@kyllikki.org>2018-04-22 12:03:49 +0100
committerVincent Sanders <vince@kyllikki.org>2018-04-22 14:03:18 +0100
commitb94b96e272140f17a82ce0847e1634d081b5dc6c (patch)
tree16ed13af127ccfbcfcdf265573785f8e35d9cc9a
parentcfa5856eea7c0d840a19590baf1e66f6fee06b83 (diff)
downloadnetsurf-b94b96e272140f17a82ce0847e1634d081b5dc6c.tar.gz
netsurf-b94b96e272140f17a82ce0847e1634d081b5dc6c.tar.bz2
add hash table population from file or memory
-rw-r--r--utils/hashtable.c171
-rw-r--r--utils/hashtable.h53
-rw-r--r--utils/messages.c158
3 files changed, 225 insertions, 157 deletions
diff --git a/utils/hashtable.c b/utils/hashtable.c
index 3a1711da0..0de6f83d2 100644
--- a/utils/hashtable.c
+++ b/utils/hashtable.c
@@ -28,11 +28,15 @@
* it that has good coverage along side the other tests.
*/
+#include <stdint.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
-#include <stdbool.h>
-#include "utils/hashtable.h"
+#include <zlib.h>
+#include <errno.h>
+
#include "utils/log.h"
+#include "utils/hashtable.h"
struct hash_entry {
@@ -46,6 +50,8 @@ struct hash_table {
struct hash_entry **chain;
};
+/** maximum length of line for file or inline add */
+#define LINE_BUFFER_SIZE 512
/**
* Hash a string, returning a 32bit value. The hash algorithm used is
@@ -179,3 +185,164 @@ const char *hash_get(struct hash_table *ht, const char *key)
return NULL;
}
+
+
+/**
+ * process a line of input.
+ *
+ * \param hash The hash table to add the line to
+ * \param ln The line to process
+ * \param lnlen The length of \ln
+ * \return NSERROR_OK on success else NSERROR_INVALID
+ */
+static nserror
+process_line(struct hash_table *hash, uint8_t *ln, int lnlen)
+{
+ uint8_t *key;
+ uint8_t *value;
+ uint8_t *colon;
+
+ key = ln; /* set key to start of line */
+ value = ln + lnlen; /* set value to end of line */
+
+ /* skip leading whitespace */
+ while ((key < value) &&
+ ((*key == ' ') || (*key == '\t'))) {
+ key++;
+ }
+
+ /* empty or comment lines */
+ if ((*key == 0) || (*key == '#')) {
+ return NSERROR_OK;
+ }
+
+ /* find first colon as key/value separator */
+ for (colon = key; colon < value; colon++) {
+ if (*colon == ':') {
+ break;
+ }
+ }
+ if (colon == value) {
+ /* no colon found */
+ return NSERROR_INVALID;
+ }
+
+ *colon = 0; /* terminate key */
+ value = colon + 1;
+
+ if (hash_add(hash, (char *)key, (char *)value) == false) {
+ NSLOG(netsurf, INFO,
+ "Unable to add %s:%s to hash table", ln, value);
+ return NSERROR_INVALID;
+ }
+ return NSERROR_OK;
+}
+
+
+/* exported interface documented in utils/hashtable.h */
+nserror hash_add_file(struct hash_table *ht, const char *path)
+{
+ nserror res = NSERROR_OK;
+ char s[LINE_BUFFER_SIZE]; /* line buffer */
+ gzFile fp; /* compressed file handle */
+
+ if (path == NULL) {
+ return NSERROR_BAD_PARAMETER;
+ }
+
+ fp = gzopen(path, "r");
+ if (!fp) {
+ NSLOG(netsurf, INFO,
+ "Unable to open file \"%.100s\": %s", path,
+ strerror(errno));
+
+ return NSERROR_NOT_FOUND;
+ }
+
+ while (gzgets(fp, s, sizeof s)) {
+ int slen = strlen(s);
+ s[--slen] = 0; /* remove \n at end */
+
+ res = process_line(ht, (uint8_t *)s, slen);
+ if (res != NSERROR_OK) {
+ break;
+ }
+ }
+
+ gzclose(fp);
+
+ return res;
+}
+
+/* exported interface documented in utils/hashtable.h */
+nserror hash_add_inline(struct hash_table *ht, const uint8_t *data, size_t size)
+{
+ nserror res;
+ int ret; /* zlib return value */
+ z_stream strm;
+ uint8_t s[LINE_BUFFER_SIZE]; /* line buffer */
+ size_t used = 0; /* number of bytes in buffer in use */
+ uint8_t *nl;
+
+ strm.zalloc = Z_NULL;
+ strm.zfree = Z_NULL;
+ strm.opaque = Z_NULL;
+
+ strm.next_in = (uint8_t *)data;
+ strm.avail_in = size;
+
+ ret = inflateInit2(&strm, 32 + MAX_WBITS);
+ if (ret != Z_OK) {
+ NSLOG(netsurf, INFO, "inflateInit returned %d", ret);
+ return NSERROR_INVALID;
+ }
+
+ do {
+ strm.next_out = s + used;
+ strm.avail_out = sizeof(s) - used;
+
+ ret = inflate(&strm, Z_NO_FLUSH);
+ if ((ret != Z_OK) && (ret != Z_STREAM_END)) {
+ break;
+ }
+
+ used = sizeof(s) - strm.avail_out;
+ while (used > 0) {
+ /* find nl */
+ for (nl = &s[0]; nl < &s[used]; nl++) {
+ if (*nl == '\n') {
+ break;
+ }
+ }
+ if (nl == &s[used]) {
+ /* no nl found */
+ break;
+ }
+ /* found newline */
+ *nl = 0; /* null terminate line */
+ res = process_line(ht, &s[0], nl - &s[0]);
+ if (res != NSERROR_OK) {
+ inflateEnd(&strm);
+ return res;
+ }
+
+ /* move data down */
+ memmove(&s[0], nl + 1, used - ((nl + 1) - &s[0]) );
+ used -= ((nl +1) - &s[0]);
+ }
+ if (used == sizeof(s)) {
+ /* entire buffer used and no newline */
+ NSLOG(netsurf, INFO, "Overlength line");
+ used = 0;
+ }
+ } while (ret != Z_STREAM_END);
+
+ inflateEnd(&strm);
+
+ if (ret != Z_STREAM_END) {
+ NSLOG(netsurf, INFO, "inflate returned %d", ret);
+ return NSERROR_INVALID;
+ }
+ return NSERROR_OK;
+
+}
diff --git a/utils/hashtable.h b/utils/hashtable.h
index b0e7392c6..b1c0d5c41 100644
--- a/utils/hashtable.h
+++ b/utils/hashtable.h
@@ -29,8 +29,11 @@
struct hash_table;
/**
- * Create a new hash table, and return a context for it. The memory consumption
- * of a hash table is approximately 8 + (nchains * 12) bytes if it is empty.
+ * Create a new hash table
+ *
+ * Allocate a new hash table and return a context for it. The memory
+ * consumption of a hash table is approximately 8 + (nchains * 12)
+ * bytes if it is empty.
*
* \param chains Number of chains/buckets this hash table will have. This
* should be a prime number, and ideally a prime number just
@@ -41,18 +44,22 @@ struct hash_table;
struct hash_table *hash_create(unsigned int chains);
/**
- * Destroys a hash table, freeing all memory associated with it.
+ * Destroys a hash table
+ *
+ * Destroy a hash table freeing all memory associated with it.
*
* \param ht Hash table to destroy. After the function returns, this
- * will nolonger be valid.
+ * will no longer be valid.
*/
void hash_destroy(struct hash_table *ht);
/**
- * Adds a key/value pair to a hash table. If the key you're adding is already
- * in the hash table, it does not replace it, but it does take precedent over
- * it. The old key/value pair will be inaccessable but still in memory until
- * hash_destroy() is called on the hash table.
+ * Adds a key/value pair to a hash table.
+ *
+ * If the key you're adding is already in the hash table, it does not
+ * replace it, but it does take precedent over it. The old key/value
+ * pair will be inaccessable but still in memory until hash_destroy()
+ * is called on the hash table.
*
* \param ht The hash table context to add the key/value pair to.
* \param key The key to associate the value with. A copy is made.
@@ -71,4 +78,34 @@ bool hash_add(struct hash_table *ht, const char *key, const char *value);
*/
const char *hash_get(struct hash_table *ht, const char *key);
+/**
+ * Add key/value pairs to a hash table with data from a file
+ *
+ * The file should be formatted as a series of lines terminated with
+ * newline character. Each line should contain a key/value pair
+ * separated by a colon. If a line is empty or starts with a #
+ * character it will be ignored.
+ *
+ * The file may be optionally gzip compressed.
+ *
+ * \param ht The hash table context to add the key/value pairs to.
+ * \param path Path to file with key/value pairs in.
+ * \return NSERROR_OK on success else error code
+ */
+nserror hash_add_file(struct hash_table *ht, const char *path);
+
+/**
+ * Add key/value pairs to a hash table with data from a memory buffer
+ *
+ * The data format is the same as in hash_add_file() but held in memory
+ *
+ * The data may optionally be gzip compressed.
+ *
+ * \param ht The hash table context to add the key/value pairs to.
+ * \param data Source of key/value pairs
+ * \param size length of \a data
+ * \return NSERROR_OK on success else error code
+ */
+nserror hash_add_inline(struct hash_table *ht, const uint8_t *data, size_t size);
+
#endif
diff --git a/utils/messages.c b/utils/messages.c
index e2d45e9da..e1e61201f 100644
--- a/utils/messages.c
+++ b/utils/messages.c
@@ -45,66 +45,19 @@
/** The hash table used to store the standard Messages file for the old API */
static struct hash_table *messages_hash = NULL;
-/**
- * process a line of input.
- */
-static nserror
-message_process_line(struct hash_table *hash, uint8_t *ln, int lnlen)
-{
- uint8_t *value;
- uint8_t *colon;
-
- /* empty or comment lines */
- if (ln[0] == 0 || ln[0] == '#') {
- return NSERROR_OK;
- }
-
- /* find first colon as key/value separator */
- for (colon = ln; colon < (ln + lnlen); colon++) {
- if (*colon == ':') {
- break;
- }
- }
- if (colon == (ln + lnlen)) {
- /* no colon found */
- return NSERROR_INVALID;
- }
-
- *colon = 0; /* terminate key */
- value = colon + 1;
-
- if (hash_add(hash, (char *)ln, (char *)value) == false) {
- NSLOG(netsurf, INFO, "Unable to add %s:%s to hash table", ln,
- value);
- return NSERROR_INVALID;
- }
- return NSERROR_OK;
-}
/**
* Read keys and values from messages file.
*
* \param path pathname of messages file
- * \param ctx reference of hash table to merge with.
+ * \param ctx reference of hash table to merge with or NULL to create one.
* \return NSERROR_OK on sucess and ctx updated or error code on faliure.
*/
static nserror messages_load_ctx(const char *path, struct hash_table **ctx)
{
- char s[400]; /* line buffer */
- gzFile fp; /* compressed file handle */
struct hash_table *nctx; /* new context */
-
- assert(path != NULL);
-
- fp = gzopen(path, "r");
- if (!fp) {
- NSLOG(netsurf, INFO,
- "Unable to open messages file \"%.100s\": %s", path,
- strerror(errno));
-
- return NSERROR_NOT_FOUND;
- }
-
+ nserror res;
+
if (*ctx == NULL) {
nctx = hash_create(HASH_SIZE);
} else {
@@ -118,40 +71,16 @@ static nserror messages_load_ctx(const char *path, struct hash_table **ctx)
NSLOG(netsurf, INFO,
"Unable to create hash table for messages file %s",
path);
- gzclose(fp);
return NSERROR_NOMEM;
}
- while (gzgets(fp, s, sizeof s)) {
- char *colon, *value;
-
- if (s[0] == 0 || s[0] == '#')
- continue;
-
- s[strlen(s) - 1] = 0; /* remove \n at end */
- colon = strchr(s, ':');
- if (!colon)
- continue;
- *colon = 0; /* terminate key */
- value = colon + 1;
-
- if (hash_add(nctx, s, value) == false) {
- NSLOG(netsurf, INFO,
- "Unable to add %s:%s to hash table of %s", s,
- value, path);
- gzclose(fp);
- if (*ctx == NULL) {
- hash_destroy(nctx);
- }
- return NSERROR_INVALID;
- }
- }
-
- gzclose(fp);
- *ctx = nctx;
+ res = hash_add_file(nctx, path);
+ if (res == NSERROR_OK) {
+ *ctx = nctx;
+ }
- return NSERROR_OK;
+ return res;
}
@@ -203,30 +132,19 @@ static void messages_destroy_ctx(struct hash_table *ctx)
/* exported interface documented in messages.h */
nserror messages_add_from_file(const char *path)
{
- nserror err;
-
if (path == NULL) {
return NSERROR_BAD_PARAMETER;
}
NSLOG(netsurf, INFO, "Loading Messages from '%s'", path);
- err = messages_load_ctx(path, &messages_hash);
-
-
- return err;
+ return messages_load_ctx(path, &messages_hash);
}
/* exported interface documented in messages.h */
-nserror messages_add_from_inline(const uint8_t *data, size_t data_size)
+nserror messages_add_from_inline(const uint8_t *data, size_t size)
{
- z_stream strm;
- int ret;
- uint8_t s[512]; /* line buffer */
- size_t used = 0; /* number of bytes in buffer in use */
- uint8_t *nl;
-
/* ensure the hash table is initialised */
if (messages_hash == NULL) {
messages_hash = hash_create(HASH_SIZE);
@@ -235,61 +153,7 @@ nserror messages_add_from_inline(const uint8_t *data, size_t data_size)
NSLOG(netsurf, INFO, "Unable to create hash table");
return NSERROR_NOMEM;
}
-
- strm.zalloc = Z_NULL;
- strm.zfree = Z_NULL;
- strm.opaque = Z_NULL;
-
- strm.next_in = (uint8_t *)data;
- strm.avail_in = data_size;
-
- ret = inflateInit2(&strm, 32 + MAX_WBITS);
- if (ret != Z_OK) {
- NSLOG(netsurf, INFO, "inflateInit returned %d", ret);
- return NSERROR_INVALID;
- }
-
- do {
- strm.next_out = s + used;
- strm.avail_out = sizeof(s) - used;
-
- ret = inflate(&strm, Z_NO_FLUSH);
- if ((ret != Z_OK) && (ret != Z_STREAM_END)) {
- break;
- }
-
- used = sizeof(s) - strm.avail_out;
- while (used > 0) {
- /* find nl */
- for (nl = &s[0]; nl < &s[used]; nl++) {
- if (*nl == '\n') {
- break;
- }
- }
- if (nl == &s[used]) {
- /* no nl found */
- break;
- }
- /* found newline */
- *nl = 0; /* null terminate line */
- message_process_line(messages_hash, &s[0], nl - &s[0]);
- memmove(&s[0], nl + 1, used - ((nl + 1) - &s[0]) );
- used -= ((nl +1) - &s[0]);
- }
- if (used == sizeof(s)) {
- /* entire buffer used and no newline */
- NSLOG(netsurf, INFO, "Overlength line");
- used = 0;
- }
- } while (ret != Z_STREAM_END);
-
- inflateEnd(&strm);
-
- if (ret != Z_STREAM_END) {
- NSLOG(netsurf, INFO, "inflate returned %d", ret);
- return NSERROR_INVALID;
- }
- return NSERROR_OK;
+ return hash_add_inline(messages_hash, data, size);
}
/* exported interface documented in messages.h */