diff options
Diffstat (limited to 'src/core/string.c')
-rw-r--r-- | src/core/string.c | 358 |
1 files changed, 337 insertions, 21 deletions
diff --git a/src/core/string.c b/src/core/string.c index 806531a..e35f416 100644 --- a/src/core/string.c +++ b/src/core/string.c @@ -3,16 +3,17 @@ * Licensed under the MIT License, * http://www.opensource.org/licenses/mit-license.php * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org> + * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com> */ +#include <assert.h> #include <ctype.h> #include <inttypes.h> #include <string.h> #include <parserutils/charset/utf8.h> -#include <dom/core/string.h> - +#include "core/string.h" #include "core/document.h" #include "utils/utils.h" @@ -26,6 +27,10 @@ struct dom_string { size_t len; /**< Byte length of string */ + lwc_string *intern; /**< The lwc_string of this string */ + + lwc_context *context; /**< The lwc_context for the lwc_string */ + dom_alloc alloc; /**< Memory (de)allocation function */ void *pw; /**< Client-specific data */ @@ -40,6 +45,8 @@ static struct dom_string empty_string = { .refcnt = 1 }; + + /** * Claim a reference on a DOM string * @@ -60,8 +67,15 @@ void dom_string_ref(struct dom_string *str) */ void dom_string_unref(struct dom_string *str) { + if (str == NULL) + return; + if (--str->refcnt == 0) { - if (str->alloc != NULL) { + if (str->intern != NULL) { + lwc_context_unref(str->context); + lwc_context_string_unref(str->context, str->intern); + str->alloc(str, 0, str->pw); + } else if (str->alloc != NULL) { str->alloc(str->ptr, 0, str->pw); str->alloc(str, 0, str->pw); } @@ -71,11 +85,11 @@ void dom_string_unref(struct dom_string *str) /** * Create a DOM string from a string of characters * - * \param alloc Memory (de)allocation function - * \param pw Pointer to client-specific private data - * \param ptr Pointer to string of characters - * \param len Length, in bytes, of string of characters - * \param str Pointer to location to receive result + * \param alloc Memory (de)allocation function + * \param pw Pointer to client-specific private data + * \param ptr Pointer to string of characters + * \param len Length, in bytes, of string of characters + * \param str Pointer to location to receive result * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion * * The returned string will already be referenced, so there is no need @@ -89,7 +103,7 @@ dom_exception dom_string_create(dom_alloc alloc, void *pw, { struct dom_string *ret; - if (ptr == NULL && len == 0) { + if (ptr == NULL || len == 0) { dom_string_ref(&empty_string); *str = &empty_string; @@ -114,6 +128,9 @@ dom_exception dom_string_create(dom_alloc alloc, void *pw, ret->alloc = alloc; ret->pw = pw; + ret->intern = NULL; + ret->context = NULL; + ret->refcnt = 1; *str = ret; @@ -122,6 +139,152 @@ dom_exception dom_string_create(dom_alloc alloc, void *pw, } /** + * Clone a dom_string if necessary. This method is used to create a new string + * with a new allocator, but if the allocator is the same with the paramter + * str, just ref the string. + * + * \param alloc The new allocator for this string + * \param pw The new pw for this string + * \param str The source dom_string + * \param ret The cloned dom_string + * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion + * + * @note: When both the alloc and pw are the same as the str's, we need no + * real clone, just ref the source string is ok. + */ +dom_exception dom_string_clone(dom_alloc alloc, void *pw, + struct dom_string *str, struct dom_string **ret) +{ + if (alloc == str->alloc && pw == str->pw) { + *ret = str; + dom_string_ref(str); + return DOM_NO_ERR; + } + + if (str->intern != NULL) { + return _dom_string_create_from_lwcstring(alloc, pw, + str->context, str->intern, ret); + } else { + return dom_string_create(alloc, pw, str->ptr, str->len, ret); + } +} + +/** + * Create a dom_string from a lwc_string + * + * \param ctx The lwc_context + * \param str The lwc_string + * \param ret The new dom_string + * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion + */ +dom_exception _dom_string_create_from_lwcstring(dom_alloc alloc, void *pw, + lwc_context *ctx, lwc_string *str, struct dom_string **ret) +{ + dom_string *r; + + if (str == NULL) { + *ret = NULL; + return DOM_NO_ERR; + } + + r = alloc(NULL, sizeof(struct dom_string), pw); + if (r == NULL) + return DOM_NO_MEM_ERR; + + if (str == NULL) { + *ret = &empty_string; + dom_string_ref(*ret); + return DOM_NO_ERR; + } + + r->context = ctx; + r->intern = str; + r->ptr = (uint8_t *)lwc_string_data(str); + r->len = lwc_string_length(str); + + r->alloc = alloc; + r->pw = pw; + + r->refcnt = 1; + + /* Ref the lwc_string */ + lwc_context_ref(ctx); + lwc_context_string_ref(ctx, str); + + *ret = r; + return DOM_NO_ERR; + +} + +/** + * Make the dom_string be interned in the lwc_context + * + * \param str The dom_string to be interned + * \param ctx The lwc_context to intern this dom_string + * \param lwcstr The result lwc_string + * \return DOM_NO_ERR on success, appropriate dom_exception on failure. + */ +dom_exception _dom_string_intern(struct dom_string *str, + struct lwc_context_s *ctx, struct lwc_string_s **lwcstr) +{ + lwc_string *ret; + lwc_error lerr; + + /* If this string is interned with the same context, do nothing */ + if (str->context != NULL && str->context == ctx) { + *lwcstr = str->intern; + return DOM_NO_ERR; + } + + lerr = lwc_context_intern(ctx, (const char *)str->ptr, str->len, &ret); + if (lerr != lwc_error_ok) { + return _dom_exception_from_lwc_error(lerr); + } + + if (str->context != NULL) { + lwc_context_unref(str->context); + lwc_context_string_unref(str->context, str->intern); + str->ptr = NULL; + } + + str->context = ctx; + str->intern = ret; + lwc_context_ref(ctx); + lwc_context_string_ref(ctx, ret); + + if (str->ptr != NULL) { + str->alloc(str->ptr, 0, str->pw); + } + + str->ptr = (uint8_t *) lwc_string_data(ret); + + *lwcstr = ret; + return DOM_NO_ERR; +} + +/** + * Get the internal lwc_string + * + * \param str The dom_string object + * \param ctx The lwc_context which intern this dom_string + * \param lwcstr The lwc_string of this dom-string + * \return DOM_NO_ERR on success, appropriate dom_exception on failure. + */ +dom_exception dom_string_get_intern(struct dom_string *str, + struct lwc_context_s **ctx, struct lwc_string_s **lwcstr) +{ + *ctx = str->context; + *lwcstr = str->intern; + + if (*ctx != NULL) + lwc_context_ref(*ctx); + if (*lwcstr != NULL) + lwc_context_string_ref(*ctx, *lwcstr); + + return DOM_NO_ERR; +} + +/** * Case sensitively compare two DOM strings * * \param s1 The first string to compare @@ -132,12 +295,26 @@ dom_exception dom_string_create(dom_alloc alloc, void *pw, */ int dom_string_cmp(struct dom_string *s1, struct dom_string *s2) { + bool ret; + if (s1 == NULL) s1 = &empty_string; if (s2 == NULL) s2 = &empty_string; + if (s1->context == s2->context && s1->context != NULL) { + assert(s1->intern != NULL); + assert(s2->intern != NULL); + lwc_context_string_isequal(s1->context, s1->intern, + s2->intern, &ret); + if (ret == true) { + return 0; + } else { + return -1; + } + } + if (s1->len != s2->len) return 1; @@ -164,6 +341,19 @@ int dom_string_icmp(struct dom_string *s1, struct dom_string *s2) if (s2 == NULL) s2 = &empty_string; + bool ret; + if (s1->context == s2->context && s1->context != NULL) { + assert(s1->intern != NULL); + assert(s2->intern != NULL); + lwc_context_string_caseless_isequal(s1->context, s1->intern, + s2->intern, &ret); + if (ret == true) { + return 0; + } else { + return -1; + } + } + d1 = s1->ptr; d2 = s2->ptr; l1 = s1->len; @@ -304,6 +494,56 @@ uint32_t dom_string_length(struct dom_string *str) return clen; } +/** + * Get the UCS4 character at position index + * + * \param index The position of the charater + * \param ch The UCS4 character + * \return DOM_NO_ERR on success, appropriate dom_exception on failure. + */ +dom_exception dom_string_at(struct dom_string *str, uint32_t index, + uint32_t *ch) +{ + const uint8_t *s; + size_t clen, slen; + uint32_t c, i; + parserutils_error err; + + if (str == NULL) + str = &empty_string; + + s = str->ptr; + slen = str->len; + + i = 0; + + while (slen > 0) { + err = parserutils_charset_utf8_char_byte_length(s, &clen); + if (err != PARSERUTILS_OK) { + return (uint32_t) -1; + } + + i++; + if (i == index + 1) + break; + + s += clen; + slen -= clen; + } + + if (i == index + 1) { + err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen); + if (err != PARSERUTILS_OK) { + return (uint32_t) -1; + } + + *ch = c; + return DOM_NO_ERR; + } else { + return DOM_DOMSTRING_SIZE_ERR; + } +} + /** * Concatenate two dom strings * @@ -322,16 +562,33 @@ dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2, struct dom_string **result) { struct dom_string *concat; + dom_alloc alloc; + void *pw; + + assert(s1 != NULL); + assert(s2 != NULL); + + if (s1->alloc != NULL) { + alloc = s1->alloc; + pw = s1->pw; + } else if (s2->alloc != NULL) { + alloc = s2->alloc; + pw = s2->pw; + } else { + /* s1 == s2 == empty_string */ + *result = &empty_string; + return DOM_NO_ERR; + } - concat = s1->alloc(NULL, sizeof(struct dom_string), s1->pw); + concat = alloc(NULL, sizeof(struct dom_string), pw); if (concat == NULL) { return DOM_NO_MEM_ERR; } - concat->ptr = s1->alloc(NULL, s1->len + s2->len, s1->pw); + concat->ptr = alloc(NULL, s1->len + s2->len, pw); if (concat->ptr == NULL) { - s1->alloc(concat, 0, s1->pw); + alloc(concat, 0, pw); return DOM_NO_MEM_ERR; } @@ -342,8 +599,10 @@ dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2, concat->len = s1->len + s2->len; - concat->alloc = s1->alloc; - concat->pw = s1->pw; + concat->alloc = alloc; + concat->pw = pw; + concat->context = NULL; + concat->intern = NULL; concat->refcnt = 1; @@ -382,7 +641,7 @@ dom_exception dom_string_substr(struct dom_string *str, /* Calculate the byte index of the start */ while (i1 > 0) { - err = parserutils_charset_utf8_next(s, slen - b1, b1, &b1); + err = parserutils_charset_utf8_next(s, slen, b1, &b1); if (err != PARSERUTILS_OK) { return DOM_NO_MEM_ERR; } @@ -395,7 +654,7 @@ dom_exception dom_string_substr(struct dom_string *str, /* Calculate the byte index of the end */ while (i2 > 0) { - err = parserutils_charset_utf8_next(s, slen - b2, b2, &b2); + err = parserutils_charset_utf8_next(s, slen, b2, &b2); if (err != PARSERUTILS_OK) { return DOM_NO_MEM_ERR; } @@ -451,7 +710,7 @@ dom_exception dom_string_insert(struct dom_string *target, ins = tlen; } else { while (offset > 0) { - err = parserutils_charset_utf8_next(t, tlen - ins, + err = parserutils_charset_utf8_next(t, tlen, ins, &ins); if (err != PARSERUTILS_OK) { @@ -492,6 +751,8 @@ dom_exception dom_string_insert(struct dom_string *target, res->alloc = target->alloc; res->pw = target->pw; + res->intern = NULL; + res->context = NULL; res->refcnt = 1; @@ -526,6 +787,9 @@ dom_exception dom_string_replace(struct dom_string *target, uint32_t b1, b2; parserutils_error err; + if (source == NULL) + source = &empty_string; + t = target->ptr; tlen = target->len; s = source->ptr; @@ -538,7 +802,7 @@ dom_exception dom_string_replace(struct dom_string *target, /* Calculate the byte index of the start */ while (i1 > 0) { - err = parserutils_charset_utf8_next(s, slen - b1, b1, &b1); + err = parserutils_charset_utf8_next(s, slen, b1, &b1); if (err != PARSERUTILS_OK) { return DOM_NO_MEM_ERR; @@ -552,7 +816,7 @@ dom_exception dom_string_replace(struct dom_string *target, /* Calculate the byte index of the end */ while (i2 > 0) { - err = parserutils_charset_utf8_next(s, slen - b2, b2, &b2); + err = parserutils_charset_utf8_next(s, slen, b2, &b2); if (err != PARSERUTILS_OK) { return DOM_NO_MEM_ERR; @@ -594,6 +858,8 @@ dom_exception dom_string_replace(struct dom_string *target, res->alloc = target->alloc; res->pw = target->pw; + res->intern = NULL; + res->context = NULL; res->refcnt = 1; @@ -618,8 +884,13 @@ dom_exception dom_string_replace(struct dom_string *target, dom_exception dom_string_dup(struct dom_string *str, struct dom_string **result) { - return dom_string_create(str->alloc, str->pw, str->ptr, str->len, - result); + if (str->intern != NULL) { + return _dom_string_create_from_lwcstring(str->alloc, str->pw, + str->context, str->intern, result); + } else { + return dom_string_create(str->alloc, str->pw, str->ptr, + str->len, result); + } } /** @@ -645,3 +916,48 @@ uint32_t dom_string_hash(struct dom_string *str) return hash; } +/** + * Convert a lwc_error to a dom_exception + * + * \param err The input lwc_error + * \return the dom_exception + */ +dom_exception _dom_exception_from_lwc_error(lwc_error err) +{ + switch (err) { + case lwc_error_ok: + return DOM_NO_ERR; + case lwc_error_oom: + return DOM_NO_MEM_ERR; + case lwc_error_range: + return DOM_INDEX_SIZE_ERR; + } + assert ("Unknow lwc_error, can't convert to dom_exception"); + /* Suppress compile errors */ + return DOM_NO_ERR; +} + +/** + * Compare the raw data of two lwc_strings for equality when the two strings + * belong to different lwc_context + * + * \param s1 The first lwc_string + * \param s2 The second lwc_string + * \return 0 for equal, non-zero otherwise + */ +int _dom_lwc_string_compare_raw(struct lwc_string_s *s1, + struct lwc_string_s *s2) +{ + const char *rs1, *rs2; + size_t len; + + if (lwc_string_length(s1) != lwc_string_length(s2)) + return -1; + + len = lwc_string_length(s1); + rs1 = lwc_string_data(s1); + rs2 = lwc_string_data(s2); + + return memcmp(rs1, rs2, len); +} + |