diff options
author | John Mark Bell <jmb@netsurf-browser.org> | 2007-10-03 23:44:32 +0000 |
---|---|---|
committer | John Mark Bell <jmb@netsurf-browser.org> | 2007-10-03 23:44:32 +0000 |
commit | a376b80f718bceccecfee43ba291493fe1ff3fbb (patch) | |
tree | 96549872654851bdb23feafbae373060a81b10b5 /src/core/string.c | |
parent | 51ad846a92e651f9663305800aaa92943a79bb16 (diff) | |
download | libdom-a376b80f718bceccecfee43ba291493fe1ff3fbb.tar.gz libdom-a376b80f718bceccecfee43ba291493fe1ff3fbb.tar.bz2 |
Make the dom string class more useful.
Purge all trace of dom_string_get_data() from outside the dom string implementation.
Port affected code to new, more useful, APIs.
This also fixes the interned node name strings mentioned in the previous commit.
svn path=/trunk/dom/; revision=3621
Diffstat (limited to 'src/core/string.c')
-rw-r--r-- | src/core/string.c | 422 |
1 files changed, 379 insertions, 43 deletions
diff --git a/src/core/string.c b/src/core/string.c index faa3c85..1e3817c 100644 --- a/src/core/string.c +++ b/src/core/string.c @@ -62,6 +62,9 @@ static struct dom_string empty_string = { .refcnt = 1 }; +static dom_exception __dom_string_get_data(struct dom_string *str, + const uint8_t **data, size_t *len); + /** * Claim a reference on a DOM string * @@ -278,45 +281,6 @@ dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw, } /** - * Get a pointer to the string of characters within a DOM string - * - * \param str Pointer to DOM string to retrieve pointer from - * \param data Pointer to location to receive data - * \param len Pointer to location to receive byte length of data - * \return DOM_NO_ERR on success - * - * The caller must have previously claimed a reference on the DOM string. - * The returned pointer must not be freed. - */ -dom_exception dom_string_get_data(struct dom_string *str, - const uint8_t **data, size_t *len) -{ - /* Assume that a NULL str pointer indicates the empty string */ - if (str == NULL) - str = &empty_string; - - switch (str->type) { - case DOM_STRING_PTR: - *data = str->data.ptr; - break; - case DOM_STRING_CONST_PTR: - *data = str->data.cptr; - break; - case DOM_STRING_OFFSET: - *data = dom_document_get_base(str->ctx.doc) + - str->data.offset; - break; - case DOM_STRING_PTR_NODOC: - *data = str->data.ptr; - break; - } - - *len = str->len; - - return DOM_NO_ERR; -} - -/** * Case sensitively compare two DOM strings * * \param s1 The first string to compare @@ -332,11 +296,11 @@ int dom_string_cmp(struct dom_string *s1, struct dom_string *s2) size_t l1, l2; dom_exception err; - err = dom_string_get_data(s1, &d1, &l1); + err = __dom_string_get_data(s1, &d1, &l1); if (err != DOM_NO_ERR) return 1; /* arbitrary */ - err = dom_string_get_data(s2, &d2, &l2); + err = __dom_string_get_data(s2, &d2, &l2); if (err != DOM_NO_ERR) return 1; /* arbitrary */ @@ -387,11 +351,11 @@ int dom_string_icmp(struct dom_string *s1, struct dom_string *s2) size_t l1, l2; dom_exception err; - err = dom_string_get_data(s1, &d1, &l1); + err = __dom_string_get_data(s1, &d1, &l1); if (err != DOM_NO_ERR) return 1; /* arbitrary */ - err = dom_string_get_data(s2, &d2, &l2); + err = __dom_string_get_data(s2, &d2, &l2); if (err != DOM_NO_ERR) return 1; /* arbitrary */ @@ -427,3 +391,375 @@ int dom_string_icmp(struct dom_string *s1, struct dom_string *s2) return (int)(l1 - l2); } +/** + * Get the index of the first occurrence of a character in a dom string + * + * \param str The string to search in + * \param chr UCS4 value to look for + * \return Character index of found character, or -1 if none found + */ +uint32_t dom_string_index(struct dom_string *str, uint32_t chr) +{ + const uint8_t *s; + size_t clen, slen; + uint32_t c, index; + charset_error err; + + __dom_string_get_data(str, &s, &slen); + + index = 0; + + while (slen > 0) { + if (str->charset == DOM_STRING_UTF8) { + err = _dom_utf8_to_ucs4(s, slen, &c, &clen); + } else { + err = _dom_utf16_to_ucs4(s, slen, &c, &clen); + } + + if (err != CHARSET_OK) { + return (uint32_t) -1; + } + + if (c == chr) { + return index; + } + + s += clen; + slen -= clen; + index++; + } + + return (uint32_t) -1; +} + +/** + * Get the index of the last occurrence of a character in a dom string + * + * \param str The string to search in + * \param chr UCS4 value to look for + * \return Character index of found character, or -1 if none found + */ +uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr) +{ + const uint8_t *s; + size_t clen, slen; + uint32_t c, index; + charset_error err; + + __dom_string_get_data(str, &s, &slen); + + index = dom_string_length(str); + + while (slen > 0) { + if (str->charset == DOM_STRING_UTF8) { + err = _dom_utf8_prev(s, slen, &clen); + if (err == CHARSET_OK) { + err = _dom_utf8_to_ucs4(s + clen, slen - clen, + &c, &clen); + } + } else { + err = _dom_utf16_prev(s, slen, &clen); + if (err == CHARSET_OK) { + err = _dom_utf16_to_ucs4(s + clen, slen - clen, + &c, &clen); + } + } + + if (err != CHARSET_OK) { + return (uint32_t) -1; + } + + if (c == chr) { + return index; + } + + slen -= clen; + index--; + } + + return (uint32_t) -1; + +} + +/** + * Get the length, in characters, of a dom string + * + * \param str The string to measure the length of + * \return The length of the string, in characters + */ +uint32_t dom_string_length(struct dom_string *str) +{ + const uint8_t *s; + size_t slen; + uint32_t clen; + charset_error err; + + __dom_string_get_data(str, &s, &slen); + + if (str->charset == DOM_STRING_UTF8) { + err = _dom_utf8_length(s, slen, &clen); + } else { + err = _dom_utf16_length(s, slen, &clen); + } + + if (err != CHARSET_OK) { + return 0; + } + + return clen; +} + +/** + * Concatenate two dom strings + * + * \param s1 The first string + * \param s2 The second string + * \param result Pointer to location to receive result + * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion + * + * The returned string will be allocated using the allocation details + * stored in ::s1. + * + * The returned string will have its reference count increased. The client + * should dereference it once it has finished with it. + */ +dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2, + struct dom_string **result) +{ + struct dom_string *concat; + const uint8_t *s; + size_t slen; + + if (s1->type == DOM_STRING_PTR_NODOC) { + concat = s1->ctx.nodoc.alloc(NULL, + sizeof(struct dom_string), s1->ctx.nodoc.pw); + } else { + concat = dom_document_alloc(s1->ctx.doc, + NULL, sizeof(struct dom_string)); + } + + if (concat == NULL) { + return DOM_NO_MEM_ERR; + } + + /** \todo support attempted concatenation of mismatched charsets */ + + if (s1->type == DOM_STRING_PTR_NODOC) { + concat->data.ptr = s1->ctx.nodoc.alloc(NULL, + s1->len + s2->len, s1->ctx.nodoc.pw); + } else { + concat->data.ptr = dom_document_alloc(s1->ctx.doc, + NULL, s1->len + s2->len); + } + if (concat->data.ptr == NULL) { + if (s1->type == DOM_STRING_PTR_NODOC) { + s1->ctx.nodoc.alloc(concat, 0, s1->ctx.nodoc.pw); + } else { + dom_document_alloc(s1->ctx.doc, concat, 0); + } + return DOM_NO_MEM_ERR; + } + + concat->type = (s1->type == DOM_STRING_PTR_NODOC) + ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR; + + concat->charset = s1->charset; + + __dom_string_get_data(s1, &s, &slen); + + memcpy(concat->data.ptr, s, slen); + + __dom_string_get_data(s2, &s, &slen); + + memcpy(concat->data.ptr + s1->len, s, slen); + + concat->len = s1->len + s2->len; + + if (concat->type == DOM_STRING_PTR_NODOC) { + concat->ctx.nodoc.alloc = s1->ctx.nodoc.alloc; + concat->ctx.nodoc.pw = s1->ctx.nodoc.pw; + } else { + concat->ctx.doc = s1->ctx.doc; + } + + concat->refcnt = 1; + + *result = concat; + + return DOM_NO_ERR; +} + +/** + * Extract a substring from a dom string + * + * \param str The string to extract from + * \param i1 The character index of the start of the substring + * \param i2 The character index of the end of the substring + * \param result Pointer to location to receive result + * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion + * + * The returned string will be allocated using the allocation details + * stored in ::str. + * + * The returned string will have its reference count increased. The client + * should dereference it once it has finished with it. + */ +dom_exception dom_string_substr(struct dom_string *str, + uint32_t i1, uint32_t i2, struct dom_string **result) +{ + const uint8_t *s; + size_t slen; + size_t b1, b2; + charset_error err; + + __dom_string_get_data(str, &s, &slen); + + /* Initialise the byte index of the start to 0 */ + b1 = 0; + /* Make the end a character offset from the start */ + i2 -= i1; + + /* Calculate the byte index of the start */ + while (i1 > 0) { + if (str->charset == DOM_STRING_UTF8) { + err = _dom_utf8_next(s, slen, b1, &b1); + } else { + err = _dom_utf16_next(s, slen, b1, &b1); + } + + if (err != CHARSET_OK) { + return DOM_NO_MEM_ERR; + } + + i1--; + } + + /* Initialise the byte index of the end to that of the start */ + b2 = b1; + + /* Calculate the byte index of the end */ + while (i2 > 0) { + if (str->charset == DOM_STRING_UTF8) { + err = _dom_utf8_next(s, slen, b2, &b2); + } else { + err = _dom_utf16_next(s, slen, b2, &b2); + } + + if (err != CHARSET_OK) { + return DOM_NO_MEM_ERR; + } + + i2--; + } + + /* Create a string from the specified byte range */ + return (str->type == DOM_STRING_PTR_NODOC) + ? dom_string_create_from_ptr_no_doc( + str->ctx.nodoc.alloc, + str->ctx.nodoc.pw, + str->charset, + s + b1, b2 - b1, result) + : dom_string_create_from_ptr(str->ctx.doc, + s + b1, b2 - b2, result); +} + +/** + * Duplicate a dom string + * + * \param str The string to duplicate + * \param result Pointer to location to receive result + * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion + * + * The returned string will be allocated using the allocation details + * stored in ::str. + * + * The returned string will have its reference count increased. The client + * should dereference it once it has finished with it. + */ +dom_exception dom_string_dup(struct dom_string *str, + struct dom_string **result) +{ + const uint8_t *s; + size_t slen; + + __dom_string_get_data(str, &s, &slen); + + return str->type == DOM_STRING_PTR_NODOC + ? dom_string_create_from_ptr_no_doc( + str->ctx.nodoc.alloc, + str->ctx.nodoc.pw, + str->charset, + s, slen, result) + : dom_string_create_from_ptr(str->ctx.doc, + s, slen, result); +} + +/** + * Calculate a hash value from a dom string + * + * \param str The string to calculate a hash of + * \return The hash value associated with the string + */ +uint32_t dom_string_hash(struct dom_string *str) +{ + const uint8_t *s; + size_t slen; + uint32_t hash = 0x01000193; + + __dom_string_get_data(str, &s, &slen); + + while (slen > 0) { + hash *= 0x01000193; + hash ^= *s; + + s++; + slen--; + } + + return hash; +} + +/* */ +/*---------------------------------------------------------------------------*/ +/* */ + +/** + * Get a pointer to the string of characters within a DOM string + * + * \param str Pointer to DOM string to retrieve pointer from + * \param data Pointer to location to receive data + * \param len Pointer to location to receive byte length of data + * \return DOM_NO_ERR on success + * + * The caller must have previously claimed a reference on the DOM string. + * The returned pointer must not be freed. + */ +dom_exception __dom_string_get_data(struct dom_string *str, + const uint8_t **data, size_t *len) +{ + /* Assume that a NULL str pointer indicates the empty string */ + if (str == NULL) + str = &empty_string; + + switch (str->type) { + case DOM_STRING_PTR: + *data = str->data.ptr; + break; + case DOM_STRING_CONST_PTR: + *data = str->data.cptr; + break; + case DOM_STRING_OFFSET: + *data = dom_document_get_base(str->ctx.doc) + + str->data.offset; + break; + case DOM_STRING_PTR_NODOC: + *data = str->data.ptr; + break; + } + + *len = str->len; + + return DOM_NO_ERR; +} + + |