From ce2b034ae9dcad49d8c2721494830c3731a60ff8 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Mon, 1 Dec 2008 03:14:37 +0000 Subject: Chunked arrays: Pack length of entries into array as a prefix to the data. Limit maximum length of data items stored in hash/chunked array to 2^16-1. svn path=/trunk/libparserutils/; revision=5858 --- include/parserutils/utils/hash.h | 6 ++--- src/utils/chunkarray.c | 37 +++++++++++++++++-------- src/utils/chunkarray.h | 9 +++++-- src/utils/hash.c | 58 +++++++++++----------------------------- src/utils/utils.h | 4 +++ test/hash.c | 2 +- 6 files changed, 57 insertions(+), 59 deletions(-) diff --git a/include/parserutils/utils/hash.h b/include/parserutils/utils/hash.h index 129c6f6..92f0236 100644 --- a/include/parserutils/utils/hash.h +++ b/include/parserutils/utils/hash.h @@ -12,8 +12,8 @@ #include typedef struct parserutils_hash_entry { - size_t len; - const uint8_t *data; + uint16_t len; + const uint8_t data[]; } parserutils_hash_entry; struct parserutils_hash; @@ -24,7 +24,7 @@ parserutils_error parserutils_hash_create(parserutils_alloc alloc, void *pw, parserutils_error parserutils_hash_destroy(parserutils_hash *hash); parserutils_error parserutils_hash_insert(parserutils_hash *hash, - const uint8_t *data, size_t len, + const uint8_t *data, uint16_t len, const parserutils_hash_entry **inserted); #endif diff --git a/src/utils/chunkarray.c b/src/utils/chunkarray.c index 308f537..cd2acaa 100644 --- a/src/utils/chunkarray.c +++ b/src/utils/chunkarray.c @@ -9,6 +9,7 @@ #include #include "utils/chunkarray.h" +#include "utils/utils.h" typedef struct chunk chunk; @@ -99,25 +100,35 @@ parserutils_error parserutils_chunkarray_destroy(parserutils_chunkarray *array) * \return PARSERUTILS_OK on success, appropriate error otherwise */ parserutils_error parserutils_chunkarray_insert(parserutils_chunkarray *array, - const void *data, size_t len, const void **inserted) + const void *data, uint16_t len, + const parserutils_chunkarray_entry **inserted) { + parserutils_chunkarray_entry *entry; + size_t required_length; + if (array == NULL || data == NULL || inserted == NULL) return PARSERUTILS_BADPARM; + /* Calculate the required length. + * We require that each entry be an exact multiple of 4. */ + required_length = ALIGN(sizeof(parserutils_chunkarray_entry) + len); + /* If we're trying to insert data larger than CHUNK_SIZE, then it * gets its own chunk. */ - if (len > CHUNK_SIZE) { - chunk *c = array->alloc(0, sizeof(chunk) + len - CHUNK_SIZE, + if (required_length > CHUNK_SIZE) { + chunk *c = array->alloc(0, + sizeof(chunk) + required_length - CHUNK_SIZE, array->pw); if (c == NULL) return PARSERUTILS_NOMEM; /* Populate chunk */ - (*inserted) = c->data; - memcpy(c->data, data, len); - c->used = len; + entry = (parserutils_chunkarray_entry *) (void *) c->data; + memcpy(entry->data, data, len); + entry->length = len; + c->used = required_length; - /* And put it in the used list */ + /* And put it into the used list */ c->next = array->used_chunks; array->used_chunks = c; } else { @@ -126,7 +137,7 @@ parserutils_error parserutils_chunkarray_insert(parserutils_chunkarray *array, for (prev = NULL, c = array->free_chunks; c != NULL; prev = c, c = c->next) { - if (CHUNK_SIZE - c->used >= len) + if (CHUNK_SIZE - c->used >= required_length) break; } @@ -147,9 +158,11 @@ parserutils_error parserutils_chunkarray_insert(parserutils_chunkarray *array, } /* Populate chunk */ - (*inserted) = c->data + c->used; - memcpy(c->data + c->used, data, len); - c->used += len; + entry = (parserutils_chunkarray_entry *) + (void *) (c->data + c->used); + memcpy(entry->data, data, len); + entry->length = len; + c->used += required_length; /* If we've now filled the chunk, move it to the used list */ if (c->used == CHUNK_SIZE) { @@ -163,6 +176,8 @@ parserutils_error parserutils_chunkarray_insert(parserutils_chunkarray *array, } } + (*inserted) = entry; + return PARSERUTILS_OK; } diff --git a/src/utils/chunkarray.h b/src/utils/chunkarray.h index 4ee98cd..2ba0bff 100644 --- a/src/utils/chunkarray.h +++ b/src/utils/chunkarray.h @@ -11,6 +11,11 @@ #include #include +typedef struct parserutils_chunkarray_entry { + uint16_t length; + uint8_t data[]; +} parserutils_chunkarray_entry; + struct parserutils_chunkarray; typedef struct parserutils_chunkarray parserutils_chunkarray; @@ -19,8 +24,8 @@ parserutils_error parserutils_chunkarray_create(parserutils_alloc alloc, parserutils_error parserutils_chunkarray_destroy(parserutils_chunkarray *array); parserutils_error parserutils_chunkarray_insert(parserutils_chunkarray *array, - const void *data, size_t len, - const void **inserted); + const void *data, uint16_t len, + const parserutils_chunkarray_entry **inserted); #endif diff --git a/src/utils/hash.c b/src/utils/hash.c index 4a5a89c..aa01137 100644 --- a/src/utils/hash.c +++ b/src/utils/hash.c @@ -18,7 +18,6 @@ struct parserutils_hash { slot_table *slots; parserutils_chunkarray *data; - parserutils_chunkarray *entries; parserutils_alloc alloc; void *pw; @@ -29,7 +28,7 @@ struct slot_table { size_t n_slots; size_t n_used; - const parserutils_hash_entry *slots[]; + const parserutils_chunkarray_entry *slots[]; }; static inline uint32_t _hash(const uint8_t *data, size_t len); @@ -59,7 +58,7 @@ parserutils_error parserutils_hash_create(parserutils_alloc alloc, void *pw, return PARSERUTILS_NOMEM; h->slots = alloc(0, sizeof(slot_table) + - DEFAULT_SLOTS * sizeof(parserutils_hash_entry *), + DEFAULT_SLOTS * sizeof(parserutils_chunkarray_entry *), pw); if (h->slots == NULL) { alloc(h, 0, pw); @@ -67,7 +66,7 @@ parserutils_error parserutils_hash_create(parserutils_alloc alloc, void *pw, } memset(h->slots, 0, sizeof(slot_table) + - DEFAULT_SLOTS * sizeof(parserutils_hash_entry *)); + DEFAULT_SLOTS * sizeof(parserutils_chunkarray_entry *)); h->slots->n_slots = DEFAULT_SLOTS; error = parserutils_chunkarray_create(alloc, pw, &h->data); @@ -77,14 +76,6 @@ parserutils_error parserutils_hash_create(parserutils_alloc alloc, void *pw, return error; } - error = parserutils_chunkarray_create(alloc, pw, &h->entries); - if (error != PARSERUTILS_OK) { - alloc(h->data, 0, pw); - alloc(h->slots, 0, pw); - alloc(h, 0, pw); - return error; - } - h->alloc = alloc; h->pw = pw; @@ -106,8 +97,6 @@ parserutils_error parserutils_hash_destroy(parserutils_hash *hash) parserutils_chunkarray_destroy(hash->data); - parserutils_chunkarray_destroy(hash->entries); - hash->alloc(hash->slots, 0, hash->pw); hash->alloc(hash, 0, hash->pw); @@ -125,15 +114,13 @@ parserutils_error parserutils_hash_destroy(parserutils_hash *hash) * \return PARSERUTILS_OK on success, appropriate error otherwise */ parserutils_error parserutils_hash_insert(parserutils_hash *hash, - const uint8_t *data, size_t len, + const uint8_t *data, uint16_t len, const parserutils_hash_entry **inserted) { uint32_t index, mask; slot_table *slots; - const parserutils_hash_entry **entries; - const parserutils_hash_entry *entry; - const void *idata, *ientry; - parserutils_hash_entry new_entry; + const parserutils_chunkarray_entry **entries; + const parserutils_chunkarray_entry *entry; parserutils_error error; if (hash == NULL || data == NULL || inserted == NULL) @@ -149,8 +136,8 @@ parserutils_error parserutils_hash_insert(parserutils_hash *hash, /* Use linear probing to resolve collisions */ while ((entry = entries[index]) != NULL) { /* If this data is already in the hash, return it */ - if (_cmp(data, len, entry->data, entry->len) == 0) { - (*inserted) = entry; + if (_cmp(data, len, entry->data, entry->length) == 0) { + (*inserted) = (const parserutils_hash_entry *) entry; return PARSERUTILS_OK; } @@ -158,22 +145,12 @@ parserutils_error parserutils_hash_insert(parserutils_hash *hash, } /* The data isn't in the hash. Index identifies the slot to use */ - error = parserutils_chunkarray_insert(hash->data, data, len, &idata); + error = parserutils_chunkarray_insert(hash->data, data, len, &entry); if (error != PARSERUTILS_OK) return error; - new_entry.len = len; - new_entry.data = idata; - - error = parserutils_chunkarray_insert(hash->entries, - &new_entry, sizeof(parserutils_hash_entry), &ientry); - if (error != PARSERUTILS_OK) { - /* We effectively leak the inserted data, as chunkarray - * doesn't support deletion. */ - return error; - } - - (*inserted) = entries[index] = ientry; + entries[index] = entry; + (*inserted) = (const parserutils_hash_entry *) entry; /* If the table is 75% full, then increase its size */ if (++slots->n_used >= (slots->n_slots >> 1) + (slots->n_slots >> 2)) @@ -279,25 +256,25 @@ void grow_slots(parserutils_hash *hash) /* Create new slot table */ s = hash->alloc(0, sizeof(slot_table) + - new_size * sizeof(parserutils_hash_entry *), + new_size * sizeof(parserutils_chunkarray_entry *), hash->pw); if (s == NULL) return; memset(s, 0, sizeof(slot_table) + - new_size * sizeof(parserutils_hash_entry *)); + new_size * sizeof(parserutils_chunkarray_entry *)); s->n_slots = new_size; /* Now, populate it with the contents of the current one */ for (uint32_t i = 0; i < hash->slots->n_slots; i++) { - const parserutils_hash_entry *e = hash->slots->slots[i]; + const parserutils_chunkarray_entry *e = hash->slots->slots[i]; if (e == NULL) continue; /* Find new index */ uint32_t mask = s->n_slots - 1; - uint32_t index = _hash(e->data, e->len) & mask; + uint32_t index = _hash(e->data, e->length) & mask; /* Use linear probing to resolve collisions */ while (s->slots[index] != NULL) @@ -332,11 +309,8 @@ void parserutils_hash_dump(parserutils_hash *hash) printf("Data:\n"); parserutils_chunkarray_dump(hash->data); - printf("Entries:\n"); - parserutils_chunkarray_dump(hash->entries); - printf("Hash structures: %zu\n", sizeof(parserutils_hash) + sizeof(slot_table) + - hash->slots->n_slots * sizeof(parserutils_hash_entry *)); + hash->slots->n_slots * sizeof(parserutils_chunkarray_entry *)); } diff --git a/src/utils/utils.h b/src/utils/utils.h index 2a61c2d..7f4e1a6 100644 --- a/src/utils/utils.h +++ b/src/utils/utils.h @@ -29,4 +29,8 @@ #define N_ELEMENTS(s) (sizeof((s)) / sizeof((s)[0])) #endif +#ifndef ALIGN +#define ALIGN(val) (((val) + 3) & ~(3)) +#endif + #endif diff --git a/test/hash.c b/test/hash.c index 3eceeae..8504cbd 100644 --- a/test/hash.c +++ b/test/hash.c @@ -45,7 +45,7 @@ int main(int argc, char **argv) } } -// parserutils_hash_dump(hash); + parserutils_hash_dump(hash); parserutils_hash_destroy(hash); -- cgit v1.2.3