From be1ffaeca7cdb002d30817383aeb1f47fe9df79b Mon Sep 17 00:00:00 2001 From: Daniel Silverstone Date: Sat, 14 Feb 2009 14:58:59 +0000 Subject: Initial untested API and implementation svn path=/trunk/libwapcaplet/; revision=6495 --- include/libwapcaplet/libwapcaplet.h | 113 ++++++++++++++ src/libwapcaplet.c | 304 ++++++++++++++++++++++++++++++++++++ 2 files changed, 417 insertions(+) create mode 100644 include/libwapcaplet/libwapcaplet.h create mode 100644 src/libwapcaplet.c diff --git a/include/libwapcaplet/libwapcaplet.h b/include/libwapcaplet/libwapcaplet.h new file mode 100644 index 0000000..0294eec --- /dev/null +++ b/include/libwapcaplet/libwapcaplet.h @@ -0,0 +1,113 @@ +/* libwapcaplet.h + * + * String internment and management tools. + * + * Copyright 2009 The NetSurf Browser Project. + * Daniel Silverstone + */ + +#ifndef libwapcaplet_h_ +#define libwapcaplet_h_ + +#include + +/** + * Memory allocator type + */ +typedef void *(*lwc_allocator_fn)(void *ptr, size_t size, void *pw); + +/** + * A string internment context. + */ +typedef struct lwc_context_s lwc_context; + +/** + * An interned string. + */ +typedef struct lwc_string_s lwc_string; + +/** + * Error codes which libwapcaplet might return. + */ +typedef enum lwc_error_e { + lwc_error_ok = 0, + lwc_error_oom = 1, + lwc_error_range = 2, +} lwc_error; + +/** + * Create an internment context. + * + * This creates an internment context with a weak ref of one. + * + * You should reference the context with lwc_context_ref to convert + * that to a strong reference when you decide where to store it. + */ +extern lwc_error lwc_create_context(lwc_allocator_fn alloc, void *pw, + lwc_context **ret); + +/** + * Increment the reference count for an internment context. + */ +extern lwc_context *lwc_context_ref(lwc_context *ctx); + +/** + * Decrement the reference count for an internment context. + * + * @note If the reference count reaches zero, the context will be freed. + */ +extern void lwc_context_unref(lwc_context *ctx); + +/** + * Intern a string. + * + * If the string was already present, its reference count is incremented. + */ +extern lwc_error lwc_context_intern(lwc_context *ctx, + const char *s, size_t slen, + lwc_string **ret); + +/** + * Intern a substring. + */ +extern lwc_error lwc_context_intern_substring(lwc_context *ctx, + lwc_string *str, + size_t ssoffset, size_t sslen, + lwc_string **ret); + +/** + * Increment the reference count on an lwc_string. + * + * Use this if copying the string and intending both sides to retain + * ownership. + */ +extern lwc_string *lwc_context_string_ref(lwc_context *ctx, lwc_string *str); + +/** + * Release a reference on an lwc_string. + * + * If the reference count reaches zero then the string will be freed. + */ +extern void lwc_context_string_unref(lwc_context *ctx, lwc_string *str); + +/** + * Check if two interned strings are equal. + * + * @note The strings must be from the same intern context and that + * must be the context passed in. + */ +#define lwc_context_string_isequal(ctx, str1, str2, ret) \ + (*ret = (str1 == str2)), lwc_error_ok + +/** + * Check if two interned strings are case-insensitively equal. + * + * @note The strings must be from the same intern context and that + * must be the context passed in. + */ +extern lwc_error lwc_context_string_caseless_isequal(lwc_context *ctx, + lwc_string *str1, + lwc_string *str2, + bool *ret); + +#endif /* libwapcaplet_h_ */ diff --git a/src/libwapcaplet.c b/src/libwapcaplet.c new file mode 100644 index 0000000..1b5738b --- /dev/null +++ b/src/libwapcaplet.c @@ -0,0 +1,304 @@ +/* libwapcaplet.h + * + * String internment and management tools. + * + * Copyright 2009 The NetSurf Browser Project. + * Daniel Silverstone + */ + +#include + +#include "libwapcaplet/libwapcaplet.h" + +typedef uint32_t lwc_hash; +typedef uint32_t lwc_refcounter; + +static inline lwc_hash +lwc_calculate_hash(const char *str, size_t len) +{ + lwc_hash z = 0x811c9dc5; + + + while (len > 0) { + z *= 0x01000193; + z ^= *str++; + len--; + } + + return z; +} + +static inline char +dolower(const char c) +{ + if (c >= 'A' && c <= 'Z') + return c + 'a' - 'A'; + return c; +} + +static inline lwc_hash +lwc_calculate_lcase_hash(const char *str, size_t len) +{ + lwc_hash z = 0x811c9dc5; + + + while (len > 0) { + z *= 0x01000193; + z ^= dolower(*str++); + len--; + } + + return z; +} + +struct lwc_string_s { + lwc_string ** prevptr; + lwc_string * next; + size_t len; + lwc_hash hash; + lwc_refcounter refcnt; + lwc_string * insensitive; +}; + +#define STR_OF(str) ((char *)(str + 1)) +#define CSTR_OF(str) ((const char *)(str + 1)) + +#define NR_BUCKETS (4091) + +struct lwc_context_s { + lwc_allocator_fn alloc; + void * alloc_pw; + lwc_string * buckets[NR_BUCKETS]; + lwc_refcounter refcnt; + bool refweak; +}; + +lwc_error +lwc_create_context(lwc_allocator_fn alloc, void *pw, + lwc_context **ret) +{ + *ret = alloc(NULL, sizeof(lwc_context), pw); + + if (*ret == NULL) + return lwc_error_oom; + + memset(*ret, 0, sizeof(lwc_context)); + + (*ret)->alloc = alloc; + (*ret)->alloc_pw = pw; + (*ret)->refcnt = 1; + (*ret)->refweak = true; + + return lwc_error_ok; +} + +lwc_context * +lwc_context_ref(lwc_context *ctx) +{ + assert(ctx); + + if (ctx->refweak == true) + ctx->refweak = false; + else + ctx->refcnt++; + + return ctx; +} + +#define LWC_ALLOC(s) ctx->alloc(NULL, s, ctx->alloc_pw) +#define LWC_FREE(p) ctx->alloc(p, 0, ctx->alloc_pw) + +void +lwc_context_unref(lwc_context *ctx) +{ + int bucket_nr; + lwc_string *s, *s_next; + + assert(ctx); + + if (--(ctx->refcnt)) + return; + + for (bucket_nr = 0; bucket_nr < NR_BUCKETS; ++bucket_nr) { + s = ctx->buckets[bucket_nr]; + while (s != NULL) { + s_next = s->next; + LWC_FREE(s); + s = s_next; + } + } + + LWC_FREE(ctx); +} + +typedef lwc_hash (*lwc_hasher)(const char *, size_t); +typedef int (*lwc_strncmp)(const char *, const char *, size_t); +typedef void (*lwc_memcpy)(char *, const char *, size_t); + +static lwc_error +__lwc_context_intern(lwc_context *ctx, + const char *s, size_t slen, + lwc_string **ret, + lwc_hasher hasher, + lwc_strncmp compare, + lwc_memcpy copy) +{ + lwc_hash h; + lwc_hash bucket; + lwc_string *str; + + assert(ctx); + assert((s != NULL) || (slen == 0)); + assert(ret); + + h = hasher(s, slen); + bucket = h % NR_BUCKETS; + str = ctx->buckets[bucket]; + + while (str != NULL) { + if ((str->hash == h) && (str->len == slen)) { + if (compare(s, CSTR_OF(str), slen) == 0) { + str->refcnt++; + *ret = str; + return lwc_error_ok; + } + } + str = str->next; + } + + *ret = str = LWC_ALLOC(sizeof(lwc_string) + slen); + + if (str == NULL) + return lwc_error_oom; + + str->prevptr = &(ctx->buckets[bucket]); + str->next = ctx->buckets[bucket]; + ctx->buckets[bucket]->prevptr = &(str->next); + ctx->buckets[bucket] = str; + + str->len = slen; + str->hash = h; + str->refcnt = 1; + str->insensitive = NULL; + + copy(STR_OF(str), s, slen); + + return lwc_error_ok; +} + +lwc_error +lwc_context_intern(lwc_context *ctx, + const char *s, size_t slen, + lwc_string **ret) +{ + return __lwc_context_intern(ctx, s, slen, ret, + lwc_calculate_hash, + strncmp, memcpy); +} + +lwc_error +lwc_context_intern_substring(lwc_context *ctx, + lwc_string *str, + size_t ssoffset, size_t sslen, + lwc_string **ret) +{ + assert(ctx); + assert(str); + assert(ret); + + if (ssoffset >= str->len) + return lwc_error_range; + if ((ssoffset + sslen) > str->len) + return lwc_error_range; + + return lwc_context_intern(ctx, CSTR_OF(str) + ssoffset, sslen, ret); +} + +lwc_string * +lwc_context_string_ref(lwc_context *ctx, lwc_string *str) +{ + assert(ctx); + assert(str); + + str->refcnt++; + + return str; +} + +void +lwc_context_string_unref(lwc_context *ctx, lwc_string *str) +{ + assert(ctx); + assert(str); + + if (--(str->refcnt)) + return; + + *(str->prevptr) = str->next; + str->next->prevptr = str->prevptr; + + if (str->insensitive != NULL) + lwc_context_string_unref(ctx, str->insensitive); + + LWC_FREE(str); +} + +static int +lwc_lcase_strcmp(const char *s1, const char *s2, size_t n) +{ + while (n--) { + if (*s1++ != dolower(*s2++)) + return 1; + } + return 0; +} + +static void +lwc_lcase_memcpy(char *target, const char *source, size_t n) +{ + while (n--) { + *target++ = dolower(*source++); + } +} + +static lwc_error +lwc_context_intern_caseless(lwc_context *ctx, + lwc_string *str) +{ + assert(ctx); + assert(str); + assert(str->insensitive == NULL); + + return __lwc_context_intern(ctx, CSTR_OF(str), + str->len, &(str->insensitive), + lwc_calculate_lcase_hash, + lwc_lcase_strcmp, + lwc_lcase_memcpy); + +} + +lwc_error +lwc_context_string_caseless_isequal(lwc_context *ctx, + lwc_string *str1, + lwc_string *str2, + bool *ret) +{ + lwc_error err; + assert(ctx); + assert(str1); + assert(str2); + + if (str1->insensitive == NULL) { + err = lwc_context_intern_caseless(ctx, str1); + if (err != lwc_error_ok) + return err; + } + if (str2->insensitive == NULL) { + err = lwc_context_intern_caseless(ctx, str2); + if (err != lwc_error_ok) + return err; + } + + *ret = (str1->insensitive == str2->insensitive); + return lwc_error_ok; +} -- cgit v1.2.3