summaryrefslogtreecommitdiff
path: root/src/core/string.c
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2009-03-03 18:08:01 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2009-03-03 18:08:01 +0000
commit3479055b4a609032a1775871cc685fd7dd33ab32 (patch)
tree9dc767860ebea940f1d936d14d69073b4e289c92 /src/core/string.c
parent3680a278da394bace852e3eff4701789d6f29bf6 (diff)
downloadlibdom-3479055b4a609032a1775871cc685fd7dd33ab32.tar.gz
libdom-3479055b4a609032a1775871cc685fd7dd33ab32.tar.bz2
Rationalise dom_string (some consideration is required as to what happens wrt interning -- lwc_strings should probably be used)
Purge charset handling -- a) documents are always converted to utf-8 b) use parserutils for utf-8 handling Fix Hubbub binding to compile. svn path=/trunk/dom/; revision=6682
Diffstat (limited to 'src/core/string.c')
-rw-r--r--src/core/string.c632
1 files changed, 124 insertions, 508 deletions
diff --git a/src/core/string.c b/src/core/string.c
index 8ec44aa..2540e26 100644
--- a/src/core/string.c
+++ b/src/core/string.c
@@ -9,62 +9,37 @@
#include <inttypes.h>
#include <string.h>
+#include <parserutils/charset/utf8.h>
+
#include <dom/core/string.h>
#include "core/document.h"
#include "utils/utils.h"
-#include "utils/utf8.h"
-#include "utils/utf16.h"
/**
* A DOM string
*
- * DOM strings store either a pointer to allocated data, a pointer
- * to constant data or an offset into a document buffer.
- *
- * They are reference counted so freeing is performed correctly.
+ * Strings are reference counted so destruction is performed correctly.
*/
struct dom_string {
- enum { DOM_STRING_PTR,
- DOM_STRING_CONST_PTR,
- DOM_STRING_OFFSET,
- DOM_STRING_PTR_NODOC
- } type; /**< String type */
-
- dom_string_charset charset; /**< Charset of string */
-
- union {
- uint8_t *ptr;
- const uint8_t *cptr;
- uint32_t offset;
- } data; /**< Type-specific data */
+ uint8_t *ptr; /**< Pointer to string data */
size_t len; /**< Byte length of string */
- union {
- struct dom_document *doc; /**< Owning document */
- struct {
- dom_alloc alloc; /**< Memory (de)allocation
- * function */
- void *pw; /**< Client-specific data */
- } nodoc;
- } ctx; /**< Allocation context */
+ dom_alloc alloc; /**< Memory (de)allocation function */
+ void *pw; /**< Client-specific data */
uint32_t refcnt; /**< Reference count */
};
static struct dom_string empty_string = {
- .type = DOM_STRING_CONST_PTR,
- .charset = DOM_STRING_UTF8,
- .data.ptr = NULL,
+ .ptr = NULL,
.len = 0,
- .ctx.doc = NULL,
+ .alloc = NULL,
+ .pw = NULL,
.refcnt = 1
};
-static dom_exception __dom_string_get_data(struct dom_string *str,
- const uint8_t **data, size_t *len);
-
/**
* Claim a reference on a DOM string
*
@@ -86,155 +61,18 @@ void dom_string_ref(struct dom_string *str)
void dom_string_unref(struct dom_string *str)
{
if (--str->refcnt == 0) {
- if (str->type == DOM_STRING_PTR_NODOC) {
- str->ctx.nodoc.alloc(str->data.ptr, 0,
- str->ctx.nodoc.pw);
-
- str->ctx.nodoc.alloc(str, 0, str->ctx.nodoc.pw);
- } else {
- if (str->type == DOM_STRING_PTR) {
- dom_document_alloc(str->ctx.doc,
- str->data.ptr, 0);
- }
-
- dom_document_alloc(str->ctx.doc, str, 0);
+ if (str->alloc != NULL) {
+ str->alloc(str->ptr, 0, str->pw);
+ str->alloc(str, 0, str->pw);
}
}
}
/**
- * Create a DOM string from an offset into the document buffer
- *
- * \param doc The document in which the string resides
- * \param off Offset from start of document buffer
- * \param len Length, in bytes, of string
- * \param str Pointer to location to receive pointer to new string
- * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
- *
- * The returned string will already be referenced, so there is no need
- * to explicitly reference it.
- */
-dom_exception dom_string_create_from_off(struct dom_document *doc,
- uint32_t off, size_t len, struct dom_string **str)
-{
- struct dom_string *ret;
-
- ret = dom_document_alloc(doc, NULL, sizeof(struct dom_string));
- if (ret == NULL)
- return DOM_NO_MEM_ERR;
-
- ret->type = DOM_STRING_OFFSET;
-
- ret->charset = dom_document_get_charset(doc);
-
- ret->data.offset = off;
-
- ret->len = len;
-
- ret->ctx.doc = doc;
-
- ret->refcnt = 1;
-
- *str = ret;
-
- return DOM_NO_ERR;
-}
-
-/**
* Create a DOM string from a string of characters
*
- * \param doc The document in which the string resides
- * \param ptr Pointer to string of characters
- * \param len Length, in bytes, of string of characters
- * \param str Pointer to location to receive pointer to new string
- * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
- *
- * The returned string will already be referenced, so there is no need
- * to explicitly reference it.
- *
- * The string of characters passed in will be copied for use by the
- * returned DOM string.
- */
-dom_exception dom_string_create_from_ptr(struct dom_document *doc,
- const uint8_t *ptr, size_t len, struct dom_string **str)
-{
- struct dom_string *ret;
-
- ret = dom_document_alloc(doc, NULL, sizeof(struct dom_string));
- if (ret == NULL)
- return DOM_NO_MEM_ERR;
-
- ret->data.ptr = dom_document_alloc(doc, NULL, len);
- if (ret->data.ptr == NULL) {
- dom_document_alloc(doc, ret, 0);
- return DOM_NO_MEM_ERR;
- }
-
- ret->type = DOM_STRING_PTR;
-
- ret->charset = dom_document_get_charset(doc);
-
- memcpy(ret->data.ptr, ptr, len);
-
- ret->len = len;
-
- ret->ctx.doc = doc;
-
- ret->refcnt = 1;
-
- *str = ret;
-
- return DOM_NO_ERR;
-}
-
-/**
- * Create a DOM string from a constant string of characters
- *
- * \param doc The document in which the string resides
- * \param ptr Pointer to string of characters
- * \param len Length, in bytes, of string of characters
- * \param str Pointer to location to receive pointer to new string
- * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
- *
- * The returned string will already be referenced, so there is no need
- * to explicitly reference it.
- *
- * The string of characters passed in will _not_ be copied for use by the
- * returned DOM string.
- */
-dom_exception dom_string_create_from_const_ptr(struct dom_document *doc,
- const uint8_t *ptr, size_t len, struct dom_string **str)
-{
- struct dom_string *ret;
-
- ret = dom_document_alloc(doc, NULL, sizeof(struct dom_string));
- if (ret == NULL)
- return DOM_NO_MEM_ERR;
-
- ret->type = DOM_STRING_CONST_PTR;
-
- ret->charset = dom_document_get_charset(doc);
-
- ret->data.cptr = ptr;
-
- ret->len = len;
-
- ret->ctx.doc = doc;
-
- ret->refcnt = 1;
-
- *str = ret;
-
- return DOM_NO_ERR;
-}
-
-/**
- * Create a DOM string from a string of characters that does not belong
- * to a document
- *
* \param alloc Memory (de)allocation function
* \param pw Pointer to client-specific private data
- * \param charset The charset of the string
* \param ptr Pointer to string of characters
* \param len Length, in bytes, of string of characters
* \param str Pointer to location to receive result
@@ -243,12 +81,11 @@ dom_exception dom_string_create_from_const_ptr(struct dom_document *doc,
* The returned string will already be referenced, so there is no need
* to explicitly reference it.
*
- * The string of characters passed in will be copied for use by the
+ * The string of characters passed in will be copied for use by the
* returned DOM string.
*/
-dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw,
- dom_string_charset charset, const uint8_t *ptr, size_t len,
- struct dom_string **str)
+dom_exception dom_string_create(dom_alloc alloc, void *pw,
+ const uint8_t *ptr, size_t len, struct dom_string **str)
{
struct dom_string *ret;
@@ -256,22 +93,18 @@ dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw,
if (ret == NULL)
return DOM_NO_MEM_ERR;
- ret->data.ptr = alloc(NULL, len, pw);
- if (ret->data.ptr == NULL) {
+ ret->ptr = alloc(NULL, len, pw);
+ if (ret->ptr == NULL) {
alloc(ret, 0, pw);
return DOM_NO_MEM_ERR;
}
- ret->type = DOM_STRING_PTR_NODOC;
-
- ret->charset = charset;
-
- memcpy(ret->data.ptr, ptr, len);
+ memcpy(ret->ptr, ptr, len);
ret->len = len;
- ret->ctx.nodoc.alloc = alloc;
- ret->ctx.nodoc.pw = pw;
+ ret->alloc = alloc;
+ ret->pw = pw;
ret->refcnt = 1;
@@ -291,48 +124,16 @@ dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw,
*/
int dom_string_cmp(struct dom_string *s1, struct dom_string *s2)
{
- const uint8_t *d1 = NULL;
- const uint8_t *d2 = NULL;
- size_t l1, l2;
- dom_exception err;
-
- err = __dom_string_get_data(s1, &d1, &l1);
- if (err != DOM_NO_ERR)
- return 1; /* arbitrary */
+ if (s1 == NULL)
+ s1 = &empty_string;
- err = __dom_string_get_data(s2, &d2, &l2);
- if (err != DOM_NO_ERR)
- return 1; /* arbitrary */
+ if (s2 == NULL)
+ s2 = &empty_string;
- while (l1 > 0 && l2 > 0) {
- uint32_t c1, c2;
- size_t cl1, cl2;
- charset_error err;
-
- err = (s1->charset == DOM_STRING_UTF8)
- ? _dom_utf8_to_ucs4(d1, l1, &c1, &cl1)
- : _dom_utf16_to_ucs4(d1, l1, &c1, &cl1);
- if (err != CHARSET_OK) {
- }
+ if (s1->len != s2->len)
+ return 1;
- err = (s2->charset == DOM_STRING_UTF8)
- ? _dom_utf8_to_ucs4(d2, l2, &c2, &cl2)
- : _dom_utf16_to_ucs4(d2, l2, &c2, &cl2);
- if (err != CHARSET_OK) {
- }
-
- if (c1 != c2) {
- return (int)(c1 - c2);
- }
-
- d1 += cl1;
- d2 += cl2;
-
- l1 -= cl1;
- l2 -= cl2;
- }
-
- return (int)(l1 - l2);
+ return memcmp(s1->ptr, s2->ptr, s1->len);
}
/**
@@ -349,31 +150,28 @@ int dom_string_icmp(struct dom_string *s1, struct dom_string *s2)
const uint8_t *d1 = NULL;
const uint8_t *d2 = NULL;
size_t l1, l2;
- dom_exception err;
- err = __dom_string_get_data(s1, &d1, &l1);
- if (err != DOM_NO_ERR)
- return 1; /* arbitrary */
+ if (s1 == NULL)
+ s1 = &empty_string;
+ if (s2 == NULL)
+ s2 = &empty_string;
- err = __dom_string_get_data(s2, &d2, &l2);
- if (err != DOM_NO_ERR)
- return 1; /* arbitrary */
+ d1 = s1->ptr;
+ d2 = s2->ptr;
+ l1 = s1->len;
+ l2 = s2->len;
while (l1 > 0 && l2 > 0) {
uint32_t c1, c2;
size_t cl1, cl2;
- charset_error err;
+ parserutils_error err;
- err = (s1->charset == DOM_STRING_UTF8)
- ? _dom_utf8_to_ucs4(d1, l1, &c1, &cl1)
- : _dom_utf16_to_ucs4(d1, l1, &c1, &cl1);
- if (err != CHARSET_OK) {
+ err = parserutils_charset_utf8_to_ucs4(d1, l1, &c1, &cl1);
+ if (err != PARSERUTILS_OK) {
}
- err = (s2->charset == DOM_STRING_UTF8)
- ? _dom_utf8_to_ucs4(d2, l2, &c2, &cl2)
- : _dom_utf16_to_ucs4(d2, l2, &c2, &cl2);
- if (err != CHARSET_OK) {
+ err = parserutils_charset_utf8_to_ucs4(d2, l2, &c2, &cl2);
+ if (err != PARSERUTILS_OK) {
}
/** \todo improved lower-casing algorithm */
@@ -403,20 +201,19 @@ uint32_t dom_string_index(struct dom_string *str, uint32_t chr)
const uint8_t *s;
size_t clen, slen;
uint32_t c, index;
- charset_error err;
+ parserutils_error err;
- __dom_string_get_data(str, &s, &slen);
+ if (str == NULL)
+ str = &empty_string;
+
+ s = str->ptr;
+ slen = str->len;
index = 0;
while (slen > 0) {
- if (str->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_to_ucs4(s, slen, &c, &clen);
- } else {
- err = _dom_utf16_to_ucs4(s, slen, &c, &clen);
- }
-
- if (err != CHARSET_OK) {
+ err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
+ if (err != PARSERUTILS_OK) {
return (uint32_t) -1;
}
@@ -444,28 +241,25 @@ uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr)
const uint8_t *s;
size_t clen, slen;
uint32_t c, index;
- charset_error err;
+ parserutils_error err;
+
+ if (str == NULL)
+ str = &empty_string;
- __dom_string_get_data(str, &s, &slen);
+ s = str->ptr;
+ slen = str->len;
index = dom_string_length(str);
while (slen > 0) {
- if (str->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_prev(s, slen, &clen);
- if (err == CHARSET_OK) {
- err = _dom_utf8_to_ucs4(s + clen, slen - clen,
- &c, &clen);
- }
- } else {
- err = _dom_utf16_prev(s, slen, &clen);
- if (err == CHARSET_OK) {
- err = _dom_utf16_to_ucs4(s + clen, slen - clen,
- &c, &clen);
- }
+ err = parserutils_charset_utf8_prev(s, slen,
+ (uint32_t *) &clen);
+ if (err == PARSERUTILS_OK) {
+ err = parserutils_charset_utf8_to_ucs4(s + clen,
+ slen - clen, &c, &clen);
}
- if (err != CHARSET_OK) {
+ if (err != PARSERUTILS_OK) {
return (uint32_t) -1;
}
@@ -478,7 +272,6 @@ uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr)
}
return (uint32_t) -1;
-
}
/**
@@ -489,20 +282,14 @@ uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr)
*/
uint32_t dom_string_length(struct dom_string *str)
{
- const uint8_t *s;
- size_t slen;
- uint32_t clen;
- charset_error err;
-
- __dom_string_get_data(str, &s, &slen);
+ size_t clen;
+ parserutils_error err;
- if (str->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_length(s, slen, &clen);
- } else {
- err = _dom_utf16_length(s, slen, &clen);
- }
+ if (str == NULL)
+ str = &empty_string;
- if (err != CHARSET_OK) {
+ err = parserutils_charset_utf8_length(str->ptr, str->len, &clen);
+ if (err != PARSERUTILS_OK) {
return 0;
}
@@ -527,60 +314,28 @@ dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2,
struct dom_string **result)
{
struct dom_string *concat;
- const uint8_t *s;
- size_t slen;
- if (s1->type == DOM_STRING_PTR_NODOC) {
- concat = s1->ctx.nodoc.alloc(NULL,
- sizeof(struct dom_string), s1->ctx.nodoc.pw);
- } else {
- concat = dom_document_alloc(s1->ctx.doc,
- NULL, sizeof(struct dom_string));
- }
+ concat = s1->alloc(NULL, sizeof(struct dom_string), s1->pw);
if (concat == NULL) {
return DOM_NO_MEM_ERR;
}
- /** \todo support attempted concatenation of mismatched charsets */
+ concat->ptr = s1->alloc(NULL, s1->len + s2->len, s1->pw);
+ if (concat->ptr == NULL) {
+ s1->alloc(concat, 0, s1->pw);
- if (s1->type == DOM_STRING_PTR_NODOC) {
- concat->data.ptr = s1->ctx.nodoc.alloc(NULL,
- s1->len + s2->len, s1->ctx.nodoc.pw);
- } else {
- concat->data.ptr = dom_document_alloc(s1->ctx.doc,
- NULL, s1->len + s2->len);
- }
- if (concat->data.ptr == NULL) {
- if (s1->type == DOM_STRING_PTR_NODOC) {
- s1->ctx.nodoc.alloc(concat, 0, s1->ctx.nodoc.pw);
- } else {
- dom_document_alloc(s1->ctx.doc, concat, 0);
- }
return DOM_NO_MEM_ERR;
}
- concat->type = (s1->type == DOM_STRING_PTR_NODOC)
- ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR;
-
- concat->charset = s1->charset;
-
- __dom_string_get_data(s1, &s, &slen);
+ memcpy(concat->ptr, s1->ptr, s1->len);
- memcpy(concat->data.ptr, s, slen);
-
- __dom_string_get_data(s2, &s, &slen);
-
- memcpy(concat->data.ptr + s1->len, s, slen);
+ memcpy(concat->ptr + s1->len, s2->ptr, s2->len);
concat->len = s1->len + s2->len;
- if (concat->type == DOM_STRING_PTR_NODOC) {
- concat->ctx.nodoc.alloc = s1->ctx.nodoc.alloc;
- concat->ctx.nodoc.pw = s1->ctx.nodoc.pw;
- } else {
- concat->ctx.doc = s1->ctx.doc;
- }
+ concat->alloc = s1->alloc;
+ concat->pw = s1->pw;
concat->refcnt = 1;
@@ -607,12 +362,10 @@ dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2,
dom_exception dom_string_substr(struct dom_string *str,
uint32_t i1, uint32_t i2, struct dom_string **result)
{
- const uint8_t *s;
- size_t slen;
+ const uint8_t *s = str->ptr;
+ size_t slen = str->len;
size_t b1, b2;
- charset_error err;
-
- __dom_string_get_data(str, &s, &slen);
+ parserutils_error err;
/* Initialise the byte index of the start to 0 */
b1 = 0;
@@ -621,13 +374,9 @@ dom_exception dom_string_substr(struct dom_string *str,
/* Calculate the byte index of the start */
while (i1 > 0) {
- if (str->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_next(s, slen - b1, b1, &b1);
- } else {
- err = _dom_utf16_next(s, slen - b1, b1, &b1);
- }
-
- if (err != CHARSET_OK) {
+ err = parserutils_charset_utf8_next(s, slen - b1, b1,
+ (uint32_t *) &b1);
+ if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
@@ -639,13 +388,10 @@ dom_exception dom_string_substr(struct dom_string *str,
/* Calculate the byte index of the end */
while (i2 > 0) {
- if (str->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_next(s, slen - b2, b2, &b2);
- } else {
- err = _dom_utf16_next(s, slen - b2, b2, &b2);
- }
+ err = parserutils_charset_utf8_next(s, slen - b2, b2,
+ (uint32_t *) &b2);
- if (err != CHARSET_OK) {
+ if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
@@ -653,14 +399,7 @@ dom_exception dom_string_substr(struct dom_string *str,
}
/* Create a string from the specified byte range */
- return (str->type == DOM_STRING_PTR_NODOC)
- ? dom_string_create_from_ptr_no_doc(
- str->ctx.nodoc.alloc,
- str->ctx.nodoc.pw,
- str->charset,
- s + b1, b2 - b1, result)
- : dom_string_create_from_ptr(str->ctx.doc,
- s + b1, b2 - b1, result);
+ return dom_string_create(str->alloc, str->pw, s + b1, b2 - b1, result);
}
/**
@@ -688,11 +427,12 @@ dom_exception dom_string_insert(struct dom_string *target,
const uint8_t *t, *s;
uint32_t tlen, slen, clen;
uint32_t ins = 0;
- charset_error err;
-
- __dom_string_get_data(target, &t, &tlen);
+ parserutils_error err;
- __dom_string_get_data(source, &s, &slen);
+ t = target->ptr;
+ tlen = target->len;
+ s = source->ptr;
+ slen = source->len;
clen = dom_string_length(target);
@@ -706,13 +446,10 @@ dom_exception dom_string_insert(struct dom_string *target,
ins = tlen;
} else {
while (offset > 0) {
- if (target->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_next(t, tlen - ins, ins, &ins);
- } else {
- err = _dom_utf16_next(t, tlen - ins, ins, &ins);
- }
+ err = parserutils_charset_utf8_next(t, tlen - ins,
+ ins, &ins);
- if (err != CHARSET_OK) {
+ if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
@@ -721,65 +458,36 @@ dom_exception dom_string_insert(struct dom_string *target,
}
/* Allocate result string */
- if (target->type == DOM_STRING_PTR_NODOC) {
- res = target->ctx.nodoc.alloc(NULL, sizeof(struct dom_string),
- target->ctx.nodoc.pw);
- } else {
- res = dom_document_alloc(target->ctx.doc,
- NULL, sizeof(struct dom_string));
- }
-
+ res = target->alloc(NULL, sizeof(struct dom_string), target->pw);
if (res == NULL) {
return DOM_NO_MEM_ERR;
}
- /** \todo support insertion of a string from a different charset */
-
/* Allocate data buffer for result contents */
- if (target->type == DOM_STRING_PTR_NODOC) {
- res->data.ptr = target->ctx.nodoc.alloc(NULL,
- tlen + slen, target->ctx.nodoc.pw);
- } else {
- res->data.ptr = dom_document_alloc(target->ctx.doc,
- NULL, tlen + slen);
- }
- if (res->data.ptr == NULL) {
- if (target->type == DOM_STRING_PTR_NODOC) {
- target->ctx.nodoc.alloc(res, 0, target->ctx.nodoc.pw);
- } else {
- dom_document_alloc(target->ctx.doc, res, 0);
- }
+ res->ptr = target->alloc(NULL, tlen + slen, target->pw);
+ if (res->ptr == NULL) {
+ target->alloc(res, 0, target->pw);
return DOM_NO_MEM_ERR;
}
- /* Populate result members */
- res->type = (target->type == DOM_STRING_PTR_NODOC)
- ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR;
-
- res->charset = target->charset;
-
/* Copy initial portion of target, if any, into result */
if (ins > 0) {
- memcpy(res->data.ptr, t, ins);
+ memcpy(res->ptr, t, ins);
}
/* Copy inserted data into result */
- memcpy(res->data.ptr + ins, s, slen);
+ memcpy(res->ptr + ins, s, slen);
/* Copy remainder of target, if any, into result */
if (tlen - ins > 0) {
- memcpy(res->data.ptr + ins + slen, t + ins, tlen - ins);
+ memcpy(res->ptr + ins + slen, t + ins, tlen - ins);
}
res->len = tlen + slen;
- if (res->type == DOM_STRING_PTR_NODOC) {
- res->ctx.nodoc.alloc = target->ctx.nodoc.alloc;
- res->ctx.nodoc.pw = target->ctx.nodoc.pw;
- } else {
- res->ctx.doc = target->ctx.doc;
- }
-
+ res->alloc = target->alloc;
+ res->pw = target->pw;
+
res->refcnt = 1;
*result = res;
@@ -811,11 +519,12 @@ dom_exception dom_string_replace(struct dom_string *target,
const uint8_t *t, *s;
uint32_t tlen, slen;
uint32_t b1, b2;
- charset_error err;
-
- __dom_string_get_data(target, &t, &tlen);
+ parserutils_error err;
- __dom_string_get_data(source, &s, &slen);
+ t = target->ptr;
+ tlen = target->len;
+ s = source->ptr;
+ slen = source->len;
/* Initialise the byte index of the start to 0 */
b1 = 0;
@@ -824,13 +533,9 @@ dom_exception dom_string_replace(struct dom_string *target,
/* Calculate the byte index of the start */
while (i1 > 0) {
- if (target->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_next(s, slen - b1, b1, &b1);
- } else {
- err = _dom_utf16_next(s, slen - b1, b1, &b1);
- }
+ err = parserutils_charset_utf8_next(s, slen - b1, b1, &b1);
- if (err != CHARSET_OK) {
+ if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
@@ -842,13 +547,9 @@ dom_exception dom_string_replace(struct dom_string *target,
/* Calculate the byte index of the end */
while (i2 > 0) {
- if (target->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_next(s, slen - b2, b2, &b2);
- } else {
- err = _dom_utf16_next(s, slen - b2, b2, &b2);
- }
+ err = parserutils_charset_utf8_next(s, slen - b2, b2, &b2);
- if (err != CHARSET_OK) {
+ if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
@@ -856,66 +557,38 @@ dom_exception dom_string_replace(struct dom_string *target,
}
/* Allocate result string */
- if (target->type == DOM_STRING_PTR_NODOC) {
- res = target->ctx.nodoc.alloc(NULL, sizeof(struct dom_string),
- target->ctx.nodoc.pw);
- } else {
- res = dom_document_alloc(target->ctx.doc,
- NULL, sizeof(struct dom_string));
- }
+ res = target->alloc(NULL, sizeof(struct dom_string), target->pw);
if (res == NULL) {
return DOM_NO_MEM_ERR;
}
- /** \todo support insertion of a string from a different charset */
-
/* Allocate data buffer for result contents */
- if (target->type == DOM_STRING_PTR_NODOC) {
- res->data.ptr = target->ctx.nodoc.alloc(NULL,
- tlen + slen - (b2 - b1), target->ctx.nodoc.pw);
- } else {
- res->data.ptr = dom_document_alloc(target->ctx.doc,
- NULL, tlen + slen - (b2 - b1));
- }
- if (res->data.ptr == NULL) {
- if (target->type == DOM_STRING_PTR_NODOC) {
- target->ctx.nodoc.alloc(res, 0, target->ctx.nodoc.pw);
- } else {
- dom_document_alloc(target->ctx.doc, res, 0);
- }
+ res->ptr = target->alloc(NULL, tlen + slen - (b2 - b1), target->pw);
+ if (res->ptr == NULL) {
+ target->alloc(res, 0, target->pw);
return DOM_NO_MEM_ERR;
}
- /* Populate result members */
- res->type = (target->type == DOM_STRING_PTR_NODOC)
- ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR;
-
- res->charset = target->charset;
-
/* Copy initial portion of target, if any, into result */
if (b1 > 0) {
- memcpy(res->data.ptr, t, b1);
+ memcpy(res->ptr, t, b1);
}
/* Copy replacement data into result */
if (slen > 0) {
- memcpy(res->data.ptr + b1, s, slen);
+ memcpy(res->ptr + b1, s, slen);
}
/* Copy remainder of target, if any, into result */
if (tlen - b2 > 0) {
- memcpy(res->data.ptr + b1 + slen, t + b2, tlen - b2);
+ memcpy(res->ptr + b1 + slen, t + b2, tlen - b2);
}
res->len = tlen + slen - (b2 - b1);
- if (res->type == DOM_STRING_PTR_NODOC) {
- res->ctx.nodoc.alloc = target->ctx.nodoc.alloc;
- res->ctx.nodoc.pw = target->ctx.nodoc.pw;
- } else {
- res->ctx.doc = target->ctx.doc;
- }
+ res->alloc = target->alloc;
+ res->pw = target->pw;
res->refcnt = 1;
@@ -940,19 +613,8 @@ dom_exception dom_string_replace(struct dom_string *target,
dom_exception dom_string_dup(struct dom_string *str,
struct dom_string **result)
{
- const uint8_t *s;
- size_t slen;
-
- __dom_string_get_data(str, &s, &slen);
-
- return str->type == DOM_STRING_PTR_NODOC
- ? dom_string_create_from_ptr_no_doc(
- str->ctx.nodoc.alloc,
- str->ctx.nodoc.pw,
- str->charset,
- s, slen, result)
- : dom_string_create_from_ptr(str->ctx.doc,
- s, slen, result);
+ return dom_string_create(str->alloc, str->pw, str->ptr, str->len,
+ result);
}
/**
@@ -963,12 +625,10 @@ dom_exception dom_string_dup(struct dom_string *str,
*/
uint32_t dom_string_hash(struct dom_string *str)
{
- const uint8_t *s;
- size_t slen;
+ const uint8_t *s = str->ptr;
+ size_t slen = str->len;
uint32_t hash = 0x01000193;
- __dom_string_get_data(str, &s, &slen);
-
while (slen > 0) {
hash *= 0x01000193;
hash ^= *s;
@@ -980,47 +640,3 @@ uint32_t dom_string_hash(struct dom_string *str)
return hash;
}
-/* */
-/*---------------------------------------------------------------------------*/
-/* */
-
-/**
- * Get a pointer to the string of characters within a DOM string
- *
- * \param str Pointer to DOM string to retrieve pointer from
- * \param data Pointer to location to receive data
- * \param len Pointer to location to receive byte length of data
- * \return DOM_NO_ERR on success
- *
- * The caller must have previously claimed a reference on the DOM string.
- * The returned pointer must not be freed.
- */
-dom_exception __dom_string_get_data(struct dom_string *str,
- const uint8_t **data, size_t *len)
-{
- /* Assume that a NULL str pointer indicates the empty string */
- if (str == NULL)
- str = &empty_string;
-
- switch (str->type) {
- case DOM_STRING_PTR:
- *data = str->data.ptr;
- break;
- case DOM_STRING_CONST_PTR:
- *data = str->data.cptr;
- break;
- case DOM_STRING_OFFSET:
- *data = dom_document_get_base(str->ctx.doc) +
- str->data.offset;
- break;
- case DOM_STRING_PTR_NODOC:
- *data = str->data.ptr;
- break;
- }
-
- *len = str->len;
-
- return DOM_NO_ERR;
-}
-
-