From 3479055b4a609032a1775871cc685fd7dd33ab32 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Tue, 3 Mar 2009 18:08:01 +0000 Subject: Rationalise dom_string (some consideration is required as to what happens wrt interning -- lwc_strings should probably be used) Purge charset handling -- a) documents are always converted to utf-8 b) use parserutils for utf-8 handling Fix Hubbub binding to compile. svn path=/trunk/dom/; revision=6682 --- src/core/string.c | 632 +++++++++++------------------------------------------- 1 file changed, 124 insertions(+), 508 deletions(-) (limited to 'src/core/string.c') diff --git a/src/core/string.c b/src/core/string.c index 8ec44aa..2540e26 100644 --- a/src/core/string.c +++ b/src/core/string.c @@ -9,62 +9,37 @@ #include #include +#include + #include #include "core/document.h" #include "utils/utils.h" -#include "utils/utf8.h" -#include "utils/utf16.h" /** * A DOM string * - * DOM strings store either a pointer to allocated data, a pointer - * to constant data or an offset into a document buffer. - * - * They are reference counted so freeing is performed correctly. + * Strings are reference counted so destruction is performed correctly. */ struct dom_string { - enum { DOM_STRING_PTR, - DOM_STRING_CONST_PTR, - DOM_STRING_OFFSET, - DOM_STRING_PTR_NODOC - } type; /**< String type */ - - dom_string_charset charset; /**< Charset of string */ - - union { - uint8_t *ptr; - const uint8_t *cptr; - uint32_t offset; - } data; /**< Type-specific data */ + uint8_t *ptr; /**< Pointer to string data */ size_t len; /**< Byte length of string */ - union { - struct dom_document *doc; /**< Owning document */ - struct { - dom_alloc alloc; /**< Memory (de)allocation - * function */ - void *pw; /**< Client-specific data */ - } nodoc; - } ctx; /**< Allocation context */ + dom_alloc alloc; /**< Memory (de)allocation function */ + void *pw; /**< Client-specific data */ uint32_t refcnt; /**< Reference count */ }; static struct dom_string empty_string = { - .type = DOM_STRING_CONST_PTR, - .charset = DOM_STRING_UTF8, - .data.ptr = NULL, + .ptr = NULL, .len = 0, - .ctx.doc = NULL, + .alloc = NULL, + .pw = NULL, .refcnt = 1 }; -static dom_exception __dom_string_get_data(struct dom_string *str, - const uint8_t **data, size_t *len); - /** * Claim a reference on a DOM string * @@ -86,155 +61,18 @@ void dom_string_ref(struct dom_string *str) void dom_string_unref(struct dom_string *str) { if (--str->refcnt == 0) { - if (str->type == DOM_STRING_PTR_NODOC) { - str->ctx.nodoc.alloc(str->data.ptr, 0, - str->ctx.nodoc.pw); - - str->ctx.nodoc.alloc(str, 0, str->ctx.nodoc.pw); - } else { - if (str->type == DOM_STRING_PTR) { - dom_document_alloc(str->ctx.doc, - str->data.ptr, 0); - } - - dom_document_alloc(str->ctx.doc, str, 0); + if (str->alloc != NULL) { + str->alloc(str->ptr, 0, str->pw); + str->alloc(str, 0, str->pw); } } } -/** - * Create a DOM string from an offset into the document buffer - * - * \param doc The document in which the string resides - * \param off Offset from start of document buffer - * \param len Length, in bytes, of string - * \param str Pointer to location to receive pointer to new string - * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion - * - * The returned string will already be referenced, so there is no need - * to explicitly reference it. - */ -dom_exception dom_string_create_from_off(struct dom_document *doc, - uint32_t off, size_t len, struct dom_string **str) -{ - struct dom_string *ret; - - ret = dom_document_alloc(doc, NULL, sizeof(struct dom_string)); - if (ret == NULL) - return DOM_NO_MEM_ERR; - - ret->type = DOM_STRING_OFFSET; - - ret->charset = dom_document_get_charset(doc); - - ret->data.offset = off; - - ret->len = len; - - ret->ctx.doc = doc; - - ret->refcnt = 1; - - *str = ret; - - return DOM_NO_ERR; -} - /** * Create a DOM string from a string of characters * - * \param doc The document in which the string resides - * \param ptr Pointer to string of characters - * \param len Length, in bytes, of string of characters - * \param str Pointer to location to receive pointer to new string - * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion - * - * The returned string will already be referenced, so there is no need - * to explicitly reference it. - * - * The string of characters passed in will be copied for use by the - * returned DOM string. - */ -dom_exception dom_string_create_from_ptr(struct dom_document *doc, - const uint8_t *ptr, size_t len, struct dom_string **str) -{ - struct dom_string *ret; - - ret = dom_document_alloc(doc, NULL, sizeof(struct dom_string)); - if (ret == NULL) - return DOM_NO_MEM_ERR; - - ret->data.ptr = dom_document_alloc(doc, NULL, len); - if (ret->data.ptr == NULL) { - dom_document_alloc(doc, ret, 0); - return DOM_NO_MEM_ERR; - } - - ret->type = DOM_STRING_PTR; - - ret->charset = dom_document_get_charset(doc); - - memcpy(ret->data.ptr, ptr, len); - - ret->len = len; - - ret->ctx.doc = doc; - - ret->refcnt = 1; - - *str = ret; - - return DOM_NO_ERR; -} - -/** - * Create a DOM string from a constant string of characters - * - * \param doc The document in which the string resides - * \param ptr Pointer to string of characters - * \param len Length, in bytes, of string of characters - * \param str Pointer to location to receive pointer to new string - * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion - * - * The returned string will already be referenced, so there is no need - * to explicitly reference it. - * - * The string of characters passed in will _not_ be copied for use by the - * returned DOM string. - */ -dom_exception dom_string_create_from_const_ptr(struct dom_document *doc, - const uint8_t *ptr, size_t len, struct dom_string **str) -{ - struct dom_string *ret; - - ret = dom_document_alloc(doc, NULL, sizeof(struct dom_string)); - if (ret == NULL) - return DOM_NO_MEM_ERR; - - ret->type = DOM_STRING_CONST_PTR; - - ret->charset = dom_document_get_charset(doc); - - ret->data.cptr = ptr; - - ret->len = len; - - ret->ctx.doc = doc; - - ret->refcnt = 1; - - *str = ret; - - return DOM_NO_ERR; -} - -/** - * Create a DOM string from a string of characters that does not belong - * to a document - * * \param alloc Memory (de)allocation function * \param pw Pointer to client-specific private data - * \param charset The charset of the string * \param ptr Pointer to string of characters * \param len Length, in bytes, of string of characters * \param str Pointer to location to receive result @@ -243,12 +81,11 @@ dom_exception dom_string_create_from_const_ptr(struct dom_document *doc, * The returned string will already be referenced, so there is no need * to explicitly reference it. * - * The string of characters passed in will be copied for use by the + * The string of characters passed in will be copied for use by the * returned DOM string. */ -dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw, - dom_string_charset charset, const uint8_t *ptr, size_t len, - struct dom_string **str) +dom_exception dom_string_create(dom_alloc alloc, void *pw, + const uint8_t *ptr, size_t len, struct dom_string **str) { struct dom_string *ret; @@ -256,22 +93,18 @@ dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw, if (ret == NULL) return DOM_NO_MEM_ERR; - ret->data.ptr = alloc(NULL, len, pw); - if (ret->data.ptr == NULL) { + ret->ptr = alloc(NULL, len, pw); + if (ret->ptr == NULL) { alloc(ret, 0, pw); return DOM_NO_MEM_ERR; } - ret->type = DOM_STRING_PTR_NODOC; - - ret->charset = charset; - - memcpy(ret->data.ptr, ptr, len); + memcpy(ret->ptr, ptr, len); ret->len = len; - ret->ctx.nodoc.alloc = alloc; - ret->ctx.nodoc.pw = pw; + ret->alloc = alloc; + ret->pw = pw; ret->refcnt = 1; @@ -291,48 +124,16 @@ dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw, */ int dom_string_cmp(struct dom_string *s1, struct dom_string *s2) { - const uint8_t *d1 = NULL; - const uint8_t *d2 = NULL; - size_t l1, l2; - dom_exception err; - - err = __dom_string_get_data(s1, &d1, &l1); - if (err != DOM_NO_ERR) - return 1; /* arbitrary */ + if (s1 == NULL) + s1 = &empty_string; - err = __dom_string_get_data(s2, &d2, &l2); - if (err != DOM_NO_ERR) - return 1; /* arbitrary */ + if (s2 == NULL) + s2 = &empty_string; - while (l1 > 0 && l2 > 0) { - uint32_t c1, c2; - size_t cl1, cl2; - charset_error err; - - err = (s1->charset == DOM_STRING_UTF8) - ? _dom_utf8_to_ucs4(d1, l1, &c1, &cl1) - : _dom_utf16_to_ucs4(d1, l1, &c1, &cl1); - if (err != CHARSET_OK) { - } + if (s1->len != s2->len) + return 1; - err = (s2->charset == DOM_STRING_UTF8) - ? _dom_utf8_to_ucs4(d2, l2, &c2, &cl2) - : _dom_utf16_to_ucs4(d2, l2, &c2, &cl2); - if (err != CHARSET_OK) { - } - - if (c1 != c2) { - return (int)(c1 - c2); - } - - d1 += cl1; - d2 += cl2; - - l1 -= cl1; - l2 -= cl2; - } - - return (int)(l1 - l2); + return memcmp(s1->ptr, s2->ptr, s1->len); } /** @@ -349,31 +150,28 @@ int dom_string_icmp(struct dom_string *s1, struct dom_string *s2) const uint8_t *d1 = NULL; const uint8_t *d2 = NULL; size_t l1, l2; - dom_exception err; - err = __dom_string_get_data(s1, &d1, &l1); - if (err != DOM_NO_ERR) - return 1; /* arbitrary */ + if (s1 == NULL) + s1 = &empty_string; + if (s2 == NULL) + s2 = &empty_string; - err = __dom_string_get_data(s2, &d2, &l2); - if (err != DOM_NO_ERR) - return 1; /* arbitrary */ + d1 = s1->ptr; + d2 = s2->ptr; + l1 = s1->len; + l2 = s2->len; while (l1 > 0 && l2 > 0) { uint32_t c1, c2; size_t cl1, cl2; - charset_error err; + parserutils_error err; - err = (s1->charset == DOM_STRING_UTF8) - ? _dom_utf8_to_ucs4(d1, l1, &c1, &cl1) - : _dom_utf16_to_ucs4(d1, l1, &c1, &cl1); - if (err != CHARSET_OK) { + err = parserutils_charset_utf8_to_ucs4(d1, l1, &c1, &cl1); + if (err != PARSERUTILS_OK) { } - err = (s2->charset == DOM_STRING_UTF8) - ? _dom_utf8_to_ucs4(d2, l2, &c2, &cl2) - : _dom_utf16_to_ucs4(d2, l2, &c2, &cl2); - if (err != CHARSET_OK) { + err = parserutils_charset_utf8_to_ucs4(d2, l2, &c2, &cl2); + if (err != PARSERUTILS_OK) { } /** \todo improved lower-casing algorithm */ @@ -403,20 +201,19 @@ uint32_t dom_string_index(struct dom_string *str, uint32_t chr) const uint8_t *s; size_t clen, slen; uint32_t c, index; - charset_error err; + parserutils_error err; - __dom_string_get_data(str, &s, &slen); + if (str == NULL) + str = &empty_string; + + s = str->ptr; + slen = str->len; index = 0; while (slen > 0) { - if (str->charset == DOM_STRING_UTF8) { - err = _dom_utf8_to_ucs4(s, slen, &c, &clen); - } else { - err = _dom_utf16_to_ucs4(s, slen, &c, &clen); - } - - if (err != CHARSET_OK) { + err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen); + if (err != PARSERUTILS_OK) { return (uint32_t) -1; } @@ -444,28 +241,25 @@ uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr) const uint8_t *s; size_t clen, slen; uint32_t c, index; - charset_error err; + parserutils_error err; + + if (str == NULL) + str = &empty_string; - __dom_string_get_data(str, &s, &slen); + s = str->ptr; + slen = str->len; index = dom_string_length(str); while (slen > 0) { - if (str->charset == DOM_STRING_UTF8) { - err = _dom_utf8_prev(s, slen, &clen); - if (err == CHARSET_OK) { - err = _dom_utf8_to_ucs4(s + clen, slen - clen, - &c, &clen); - } - } else { - err = _dom_utf16_prev(s, slen, &clen); - if (err == CHARSET_OK) { - err = _dom_utf16_to_ucs4(s + clen, slen - clen, - &c, &clen); - } + err = parserutils_charset_utf8_prev(s, slen, + (uint32_t *) &clen); + if (err == PARSERUTILS_OK) { + err = parserutils_charset_utf8_to_ucs4(s + clen, + slen - clen, &c, &clen); } - if (err != CHARSET_OK) { + if (err != PARSERUTILS_OK) { return (uint32_t) -1; } @@ -478,7 +272,6 @@ uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr) } return (uint32_t) -1; - } /** @@ -489,20 +282,14 @@ uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr) */ uint32_t dom_string_length(struct dom_string *str) { - const uint8_t *s; - size_t slen; - uint32_t clen; - charset_error err; - - __dom_string_get_data(str, &s, &slen); + size_t clen; + parserutils_error err; - if (str->charset == DOM_STRING_UTF8) { - err = _dom_utf8_length(s, slen, &clen); - } else { - err = _dom_utf16_length(s, slen, &clen); - } + if (str == NULL) + str = &empty_string; - if (err != CHARSET_OK) { + err = parserutils_charset_utf8_length(str->ptr, str->len, &clen); + if (err != PARSERUTILS_OK) { return 0; } @@ -527,60 +314,28 @@ dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2, struct dom_string **result) { struct dom_string *concat; - const uint8_t *s; - size_t slen; - if (s1->type == DOM_STRING_PTR_NODOC) { - concat = s1->ctx.nodoc.alloc(NULL, - sizeof(struct dom_string), s1->ctx.nodoc.pw); - } else { - concat = dom_document_alloc(s1->ctx.doc, - NULL, sizeof(struct dom_string)); - } + concat = s1->alloc(NULL, sizeof(struct dom_string), s1->pw); if (concat == NULL) { return DOM_NO_MEM_ERR; } - /** \todo support attempted concatenation of mismatched charsets */ + concat->ptr = s1->alloc(NULL, s1->len + s2->len, s1->pw); + if (concat->ptr == NULL) { + s1->alloc(concat, 0, s1->pw); - if (s1->type == DOM_STRING_PTR_NODOC) { - concat->data.ptr = s1->ctx.nodoc.alloc(NULL, - s1->len + s2->len, s1->ctx.nodoc.pw); - } else { - concat->data.ptr = dom_document_alloc(s1->ctx.doc, - NULL, s1->len + s2->len); - } - if (concat->data.ptr == NULL) { - if (s1->type == DOM_STRING_PTR_NODOC) { - s1->ctx.nodoc.alloc(concat, 0, s1->ctx.nodoc.pw); - } else { - dom_document_alloc(s1->ctx.doc, concat, 0); - } return DOM_NO_MEM_ERR; } - concat->type = (s1->type == DOM_STRING_PTR_NODOC) - ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR; - - concat->charset = s1->charset; - - __dom_string_get_data(s1, &s, &slen); + memcpy(concat->ptr, s1->ptr, s1->len); - memcpy(concat->data.ptr, s, slen); - - __dom_string_get_data(s2, &s, &slen); - - memcpy(concat->data.ptr + s1->len, s, slen); + memcpy(concat->ptr + s1->len, s2->ptr, s2->len); concat->len = s1->len + s2->len; - if (concat->type == DOM_STRING_PTR_NODOC) { - concat->ctx.nodoc.alloc = s1->ctx.nodoc.alloc; - concat->ctx.nodoc.pw = s1->ctx.nodoc.pw; - } else { - concat->ctx.doc = s1->ctx.doc; - } + concat->alloc = s1->alloc; + concat->pw = s1->pw; concat->refcnt = 1; @@ -607,12 +362,10 @@ dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2, dom_exception dom_string_substr(struct dom_string *str, uint32_t i1, uint32_t i2, struct dom_string **result) { - const uint8_t *s; - size_t slen; + const uint8_t *s = str->ptr; + size_t slen = str->len; size_t b1, b2; - charset_error err; - - __dom_string_get_data(str, &s, &slen); + parserutils_error err; /* Initialise the byte index of the start to 0 */ b1 = 0; @@ -621,13 +374,9 @@ dom_exception dom_string_substr(struct dom_string *str, /* Calculate the byte index of the start */ while (i1 > 0) { - if (str->charset == DOM_STRING_UTF8) { - err = _dom_utf8_next(s, slen - b1, b1, &b1); - } else { - err = _dom_utf16_next(s, slen - b1, b1, &b1); - } - - if (err != CHARSET_OK) { + err = parserutils_charset_utf8_next(s, slen - b1, b1, + (uint32_t *) &b1); + if (err != PARSERUTILS_OK) { return DOM_NO_MEM_ERR; } @@ -639,13 +388,10 @@ dom_exception dom_string_substr(struct dom_string *str, /* Calculate the byte index of the end */ while (i2 > 0) { - if (str->charset == DOM_STRING_UTF8) { - err = _dom_utf8_next(s, slen - b2, b2, &b2); - } else { - err = _dom_utf16_next(s, slen - b2, b2, &b2); - } + err = parserutils_charset_utf8_next(s, slen - b2, b2, + (uint32_t *) &b2); - if (err != CHARSET_OK) { + if (err != PARSERUTILS_OK) { return DOM_NO_MEM_ERR; } @@ -653,14 +399,7 @@ dom_exception dom_string_substr(struct dom_string *str, } /* Create a string from the specified byte range */ - return (str->type == DOM_STRING_PTR_NODOC) - ? dom_string_create_from_ptr_no_doc( - str->ctx.nodoc.alloc, - str->ctx.nodoc.pw, - str->charset, - s + b1, b2 - b1, result) - : dom_string_create_from_ptr(str->ctx.doc, - s + b1, b2 - b1, result); + return dom_string_create(str->alloc, str->pw, s + b1, b2 - b1, result); } /** @@ -688,11 +427,12 @@ dom_exception dom_string_insert(struct dom_string *target, const uint8_t *t, *s; uint32_t tlen, slen, clen; uint32_t ins = 0; - charset_error err; - - __dom_string_get_data(target, &t, &tlen); + parserutils_error err; - __dom_string_get_data(source, &s, &slen); + t = target->ptr; + tlen = target->len; + s = source->ptr; + slen = source->len; clen = dom_string_length(target); @@ -706,13 +446,10 @@ dom_exception dom_string_insert(struct dom_string *target, ins = tlen; } else { while (offset > 0) { - if (target->charset == DOM_STRING_UTF8) { - err = _dom_utf8_next(t, tlen - ins, ins, &ins); - } else { - err = _dom_utf16_next(t, tlen - ins, ins, &ins); - } + err = parserutils_charset_utf8_next(t, tlen - ins, + ins, &ins); - if (err != CHARSET_OK) { + if (err != PARSERUTILS_OK) { return DOM_NO_MEM_ERR; } @@ -721,65 +458,36 @@ dom_exception dom_string_insert(struct dom_string *target, } /* Allocate result string */ - if (target->type == DOM_STRING_PTR_NODOC) { - res = target->ctx.nodoc.alloc(NULL, sizeof(struct dom_string), - target->ctx.nodoc.pw); - } else { - res = dom_document_alloc(target->ctx.doc, - NULL, sizeof(struct dom_string)); - } - + res = target->alloc(NULL, sizeof(struct dom_string), target->pw); if (res == NULL) { return DOM_NO_MEM_ERR; } - /** \todo support insertion of a string from a different charset */ - /* Allocate data buffer for result contents */ - if (target->type == DOM_STRING_PTR_NODOC) { - res->data.ptr = target->ctx.nodoc.alloc(NULL, - tlen + slen, target->ctx.nodoc.pw); - } else { - res->data.ptr = dom_document_alloc(target->ctx.doc, - NULL, tlen + slen); - } - if (res->data.ptr == NULL) { - if (target->type == DOM_STRING_PTR_NODOC) { - target->ctx.nodoc.alloc(res, 0, target->ctx.nodoc.pw); - } else { - dom_document_alloc(target->ctx.doc, res, 0); - } + res->ptr = target->alloc(NULL, tlen + slen, target->pw); + if (res->ptr == NULL) { + target->alloc(res, 0, target->pw); return DOM_NO_MEM_ERR; } - /* Populate result members */ - res->type = (target->type == DOM_STRING_PTR_NODOC) - ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR; - - res->charset = target->charset; - /* Copy initial portion of target, if any, into result */ if (ins > 0) { - memcpy(res->data.ptr, t, ins); + memcpy(res->ptr, t, ins); } /* Copy inserted data into result */ - memcpy(res->data.ptr + ins, s, slen); + memcpy(res->ptr + ins, s, slen); /* Copy remainder of target, if any, into result */ if (tlen - ins > 0) { - memcpy(res->data.ptr + ins + slen, t + ins, tlen - ins); + memcpy(res->ptr + ins + slen, t + ins, tlen - ins); } res->len = tlen + slen; - if (res->type == DOM_STRING_PTR_NODOC) { - res->ctx.nodoc.alloc = target->ctx.nodoc.alloc; - res->ctx.nodoc.pw = target->ctx.nodoc.pw; - } else { - res->ctx.doc = target->ctx.doc; - } - + res->alloc = target->alloc; + res->pw = target->pw; + res->refcnt = 1; *result = res; @@ -811,11 +519,12 @@ dom_exception dom_string_replace(struct dom_string *target, const uint8_t *t, *s; uint32_t tlen, slen; uint32_t b1, b2; - charset_error err; - - __dom_string_get_data(target, &t, &tlen); + parserutils_error err; - __dom_string_get_data(source, &s, &slen); + t = target->ptr; + tlen = target->len; + s = source->ptr; + slen = source->len; /* Initialise the byte index of the start to 0 */ b1 = 0; @@ -824,13 +533,9 @@ dom_exception dom_string_replace(struct dom_string *target, /* Calculate the byte index of the start */ while (i1 > 0) { - if (target->charset == DOM_STRING_UTF8) { - err = _dom_utf8_next(s, slen - b1, b1, &b1); - } else { - err = _dom_utf16_next(s, slen - b1, b1, &b1); - } + err = parserutils_charset_utf8_next(s, slen - b1, b1, &b1); - if (err != CHARSET_OK) { + if (err != PARSERUTILS_OK) { return DOM_NO_MEM_ERR; } @@ -842,13 +547,9 @@ dom_exception dom_string_replace(struct dom_string *target, /* Calculate the byte index of the end */ while (i2 > 0) { - if (target->charset == DOM_STRING_UTF8) { - err = _dom_utf8_next(s, slen - b2, b2, &b2); - } else { - err = _dom_utf16_next(s, slen - b2, b2, &b2); - } + err = parserutils_charset_utf8_next(s, slen - b2, b2, &b2); - if (err != CHARSET_OK) { + if (err != PARSERUTILS_OK) { return DOM_NO_MEM_ERR; } @@ -856,66 +557,38 @@ dom_exception dom_string_replace(struct dom_string *target, } /* Allocate result string */ - if (target->type == DOM_STRING_PTR_NODOC) { - res = target->ctx.nodoc.alloc(NULL, sizeof(struct dom_string), - target->ctx.nodoc.pw); - } else { - res = dom_document_alloc(target->ctx.doc, - NULL, sizeof(struct dom_string)); - } + res = target->alloc(NULL, sizeof(struct dom_string), target->pw); if (res == NULL) { return DOM_NO_MEM_ERR; } - /** \todo support insertion of a string from a different charset */ - /* Allocate data buffer for result contents */ - if (target->type == DOM_STRING_PTR_NODOC) { - res->data.ptr = target->ctx.nodoc.alloc(NULL, - tlen + slen - (b2 - b1), target->ctx.nodoc.pw); - } else { - res->data.ptr = dom_document_alloc(target->ctx.doc, - NULL, tlen + slen - (b2 - b1)); - } - if (res->data.ptr == NULL) { - if (target->type == DOM_STRING_PTR_NODOC) { - target->ctx.nodoc.alloc(res, 0, target->ctx.nodoc.pw); - } else { - dom_document_alloc(target->ctx.doc, res, 0); - } + res->ptr = target->alloc(NULL, tlen + slen - (b2 - b1), target->pw); + if (res->ptr == NULL) { + target->alloc(res, 0, target->pw); return DOM_NO_MEM_ERR; } - /* Populate result members */ - res->type = (target->type == DOM_STRING_PTR_NODOC) - ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR; - - res->charset = target->charset; - /* Copy initial portion of target, if any, into result */ if (b1 > 0) { - memcpy(res->data.ptr, t, b1); + memcpy(res->ptr, t, b1); } /* Copy replacement data into result */ if (slen > 0) { - memcpy(res->data.ptr + b1, s, slen); + memcpy(res->ptr + b1, s, slen); } /* Copy remainder of target, if any, into result */ if (tlen - b2 > 0) { - memcpy(res->data.ptr + b1 + slen, t + b2, tlen - b2); + memcpy(res->ptr + b1 + slen, t + b2, tlen - b2); } res->len = tlen + slen - (b2 - b1); - if (res->type == DOM_STRING_PTR_NODOC) { - res->ctx.nodoc.alloc = target->ctx.nodoc.alloc; - res->ctx.nodoc.pw = target->ctx.nodoc.pw; - } else { - res->ctx.doc = target->ctx.doc; - } + res->alloc = target->alloc; + res->pw = target->pw; res->refcnt = 1; @@ -940,19 +613,8 @@ dom_exception dom_string_replace(struct dom_string *target, dom_exception dom_string_dup(struct dom_string *str, struct dom_string **result) { - const uint8_t *s; - size_t slen; - - __dom_string_get_data(str, &s, &slen); - - return str->type == DOM_STRING_PTR_NODOC - ? dom_string_create_from_ptr_no_doc( - str->ctx.nodoc.alloc, - str->ctx.nodoc.pw, - str->charset, - s, slen, result) - : dom_string_create_from_ptr(str->ctx.doc, - s, slen, result); + return dom_string_create(str->alloc, str->pw, str->ptr, str->len, + result); } /** @@ -963,12 +625,10 @@ dom_exception dom_string_dup(struct dom_string *str, */ uint32_t dom_string_hash(struct dom_string *str) { - const uint8_t *s; - size_t slen; + const uint8_t *s = str->ptr; + size_t slen = str->len; uint32_t hash = 0x01000193; - __dom_string_get_data(str, &s, &slen); - while (slen > 0) { hash *= 0x01000193; hash ^= *s; @@ -980,47 +640,3 @@ uint32_t dom_string_hash(struct dom_string *str) return hash; } -/* */ -/*---------------------------------------------------------------------------*/ -/* */ - -/** - * Get a pointer to the string of characters within a DOM string - * - * \param str Pointer to DOM string to retrieve pointer from - * \param data Pointer to location to receive data - * \param len Pointer to location to receive byte length of data - * \return DOM_NO_ERR on success - * - * The caller must have previously claimed a reference on the DOM string. - * The returned pointer must not be freed. - */ -dom_exception __dom_string_get_data(struct dom_string *str, - const uint8_t **data, size_t *len) -{ - /* Assume that a NULL str pointer indicates the empty string */ - if (str == NULL) - str = &empty_string; - - switch (str->type) { - case DOM_STRING_PTR: - *data = str->data.ptr; - break; - case DOM_STRING_CONST_PTR: - *data = str->data.cptr; - break; - case DOM_STRING_OFFSET: - *data = dom_document_get_base(str->ctx.doc) + - str->data.offset; - break; - case DOM_STRING_PTR_NODOC: - *data = str->data.ptr; - break; - } - - *len = str->len; - - return DOM_NO_ERR; -} - - -- cgit v1.2.3