summaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2009-03-03 18:08:01 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2009-03-03 18:08:01 +0000
commit702d96e703473dbe4481a42c472b4aae423a51d1 (patch)
tree9dc767860ebea940f1d936d14d69073b4e289c92 /src/core
parenteeb651eadb47228ad41c21b80d75afc17c2924f8 (diff)
downloadlibdom-702d96e703473dbe4481a42c472b4aae423a51d1.tar.gz
libdom-702d96e703473dbe4481a42c472b4aae423a51d1.tar.bz2
Rationalise dom_string (some consideration is required as to what happens wrt interning -- lwc_strings should probably be used)
Purge charset handling -- a) documents are always converted to utf-8 b) use parserutils for utf-8 handling Fix Hubbub binding to compile. svn path=/trunk/dom/; revision=6682
Diffstat (limited to 'src/core')
-rw-r--r--src/core/attr.c2
-rw-r--r--src/core/document.c113
-rw-r--r--src/core/document.h6
-rw-r--r--src/core/implementation.c4
-rw-r--r--src/core/node.c21
-rw-r--r--src/core/string.c632
6 files changed, 152 insertions, 626 deletions
diff --git a/src/core/attr.c b/src/core/attr.c
index a82f117..5a85ac0 100644
--- a/src/core/attr.c
+++ b/src/core/attr.c
@@ -180,7 +180,7 @@ dom_exception dom_attr_get_value(struct dom_attr *attr,
struct dom_string *value, *temp;
dom_exception err;
- err = dom_string_create_from_const_ptr(a->owner,
+ err = dom_document_create_string(a->owner,
(const uint8_t *) "", SLEN(""), &value);
if (err != DOM_NO_ERR) {
return err;
diff --git a/src/core/document.c b/src/core/document.c
index 3e06541..74283f9 100644
--- a/src/core/document.c
+++ b/src/core/document.c
@@ -56,8 +56,6 @@ struct dom_doc_nnm {
struct dom_document {
struct dom_node base; /**< Base node */
- dom_string_charset charset; /**< Charset of strings in document */
-
struct dom_implementation *impl; /**< Owning implementation */
struct dom_doc_nl *nodelists; /**< List of active nodelists */
@@ -73,7 +71,6 @@ struct dom_document {
/** Interned node name strings, indexed by node type */
/* Index 0 is unused */
static struct dom_string *__nodenames_utf8[DOM_NODE_TYPE_COUNT + 1];
-static struct dom_string *__nodenames_utf16[DOM_NODE_TYPE_COUNT + 1];
/**
* Initialise the document module
@@ -102,27 +99,6 @@ dom_exception _dom_document_initialise(dom_alloc alloc, void *pw)
{ "#document-fragment", 18 }, /* Document fragment */
{ NULL, 0 } /* Notation */
};
-
- /** \todo This assumes Little Endian */
- static struct {
- const char *name;
- size_t len;
- } names_utf16[DOM_NODE_TYPE_COUNT + 1] = {
- { NULL, 0 }, /* Unused */
- { NULL, 0 }, /* Element */
- { NULL, 0 }, /* Attr */
- { "#\0t\0e\0x\0t\0", 10 }, /* Text */
- { "#\0c\0d\0a\0t\0a\0-\0s\0e\0c\0t\0i\0o\0n\0", 28 }, /* CDATA section */
- { NULL, 0 }, /* Entity reference */
- { NULL, 0 }, /* Entity */
- { NULL, 0 }, /* Processing instruction */
- { "#\0c\0o\0m\0m\0e\0n\0t\0", 16 }, /* Comment */
- { "#\0d\0o\0c\0u\0m\0e\0n\0t\0", 18 }, /* Document */
- { NULL, 0 }, /* Document type */
- { "#\0d\0o\0c\0u\0m\0e\0n\0t\0-\0f\0r\0a\0g\0m\0e\0n\0t\0", 36 }, /* Document fragment */
- { NULL, 0 } /* Notation */
- };
-
dom_exception err;
/* Initialise interned node names */
@@ -130,13 +106,11 @@ dom_exception _dom_document_initialise(dom_alloc alloc, void *pw)
if (names_utf8[i].name == NULL) {
/* Nothing to intern; skip this entry */
__nodenames_utf8[i] = NULL;
- __nodenames_utf16[i] = NULL;
continue;
}
/* Make string */
- err = dom_string_create_from_ptr_no_doc(alloc, pw,
- DOM_STRING_UTF8,
+ err = dom_string_create(alloc, pw,
(const uint8_t *) names_utf8[i].name,
names_utf8[i].len, &__nodenames_utf8[i]);
if (err != DOM_NO_ERR) {
@@ -144,29 +118,10 @@ dom_exception _dom_document_initialise(dom_alloc alloc, void *pw)
for (int j = 0; j < i; j++) {
if (__nodenames_utf8[j] != NULL) {
dom_string_unref(__nodenames_utf8[j]);
- dom_string_unref(__nodenames_utf16[j]);
}
}
return err;
}
-
- err = dom_string_create_from_ptr_no_doc(alloc, pw,
- DOM_STRING_UTF16,
- (const uint8_t *) names_utf16[i].name,
- names_utf16[i].len, &__nodenames_utf16[i]);
- if (err != DOM_NO_ERR) {
- /* Failed, clean up strings we've created so far */
- for (int j = 0; j < i; j++) {
- if (__nodenames_utf8[j] != NULL) {
- dom_string_unref(__nodenames_utf8[j]);
- dom_string_unref(__nodenames_utf16[j]);
- }
- }
-
- dom_string_unref(__nodenames_utf8[i]);
-
- return err;
- }
}
return DOM_NO_ERR;
@@ -182,7 +137,6 @@ dom_exception _dom_document_finalise(void)
for (int i = 0; i <= DOM_NODE_TYPE_COUNT; i++) {
if (__nodenames_utf8[i] != NULL) {
dom_string_unref(__nodenames_utf8[i]);
- dom_string_unref(__nodenames_utf16[i]);
}
}
@@ -193,7 +147,6 @@ dom_exception _dom_document_finalise(void)
* Create a Document
*
* \param impl The DOM implementation owning the document
- * \param charset The charset used for strings in the document
* \param alloc Memory (de)allocation function
* \param pw Pointer to client-specific private data
* \param doc Pointer to location to receive created document
@@ -204,8 +157,7 @@ dom_exception _dom_document_finalise(void)
* The returned document will already be referenced.
*/
dom_exception dom_document_create(struct dom_implementation *impl,
- dom_string_charset charset, dom_alloc alloc, void *pw,
- struct dom_document **doc)
+ dom_alloc alloc, void *pw, struct dom_document **doc)
{
struct dom_document *d;
dom_exception err;
@@ -233,7 +185,6 @@ dom_exception dom_document_create(struct dom_implementation *impl,
}
/* Initialise remaining type-specific data */
- d->charset = charset;
if (impl != NULL)
dom_implementation_ref(impl);
d->impl = impl;
@@ -241,8 +192,7 @@ dom_exception dom_document_create(struct dom_implementation *impl,
d->nodelists = NULL;
d->maps = NULL;
- d->nodenames = (charset == DOM_STRING_UTF8) ? __nodenames_utf8
- : __nodenames_utf16;
+ d->nodenames = __nodenames_utf8;
*doc = d;
@@ -1047,55 +997,30 @@ dom_exception dom_document_rename_node(struct dom_document *doc,
return DOM_NOT_SUPPORTED_ERR;
}
-/* */
-/* ----------------------------------------------------------------------- */
-/* */
-
/**
- * Acquire a pointer to the base of the document buffer
- *
- * \param doc Document to retrieve pointer from
- * \return Pointer to document buffer
+ * Create a DOM string, using a document's allocation context
*
- * The document buffer is _not_ reference counted (as it is an implicit part
- * of the document). It is destroyed with the document, and thus after all
- * users have been destroyed.
- */
-const uint8_t *dom_document_get_base(struct dom_document *doc)
-{
- UNUSED(doc);
-
- return NULL;
-}
-
-/**
- * Set the document buffer pointer
+ * \param doc The document
+ * \param data Pointer to string data
+ * \param len Length, in bytes, of string
+ * \param result Pointer to location to receive result
+ * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
*
- * \param doc Document to set buffer pointer of
- * \param buffer Pointer to buffer
- * \param buffer_len Length of buffer, in bytes
+ * The returned string will already be referenced, so there is no need
+ * to explicitly reference it.
*
- * By calling this, ownership of the buffer is transferred to the document.
- * It should be called once per document node.
+ * The string of characters passed in will be copied for use by the
+ * returned DOM string.
*/
-void dom_document_set_buffer(struct dom_document *doc, uint8_t *buffer,
- size_t buffer_len)
+dom_exception dom_document_create_string(struct dom_document *doc,
+ const uint8_t *data, size_t len, struct dom_string **result)
{
- UNUSED(doc);
- UNUSED(buffer);
- UNUSED(buffer_len);
+ return dom_string_create(doc->alloc, doc->pw, data, len, result);
}
-/**
- * Retrieve the character set used to encode strings in the document
- *
- * \param doc The document to get the charset of
- * \return The charset in use
- */
-dom_string_charset dom_document_get_charset(struct dom_document *doc)
-{
- return doc->charset;
-}
+/* */
+/* ----------------------------------------------------------------------- */
+/* */
/**
* (De)allocate memory with a document's context
diff --git a/src/core/document.h b/src/core/document.h
index 6982b74..c5c13ac 100644
--- a/src/core/document.h
+++ b/src/core/document.h
@@ -27,12 +27,6 @@ dom_exception _dom_document_finalise(void);
/* Destroy a document */
void dom_document_destroy(struct dom_document *doc);
-/* Get base of document buffer */
-const uint8_t *dom_document_get_base(struct dom_document *doc);
-
-/* Get the document character set */
-dom_string_charset dom_document_get_charset(struct dom_document *doc);
-
/* (De)allocate memory */
void *dom_document_alloc(struct dom_document *doc, void *ptr, size_t size);
diff --git a/src/core/implementation.c b/src/core/implementation.c
index 9738b7c..e37b27d 100644
--- a/src/core/implementation.c
+++ b/src/core/implementation.c
@@ -94,7 +94,6 @@ dom_exception dom_implementation_create_document_type(
* \param qname The qualified name of the document element
* \param doctype The type of document to create
* \param doc Pointer to location to receive result
- * \param charset The charset to use for strings in the document
* \param alloc Memory (de)allocation function
* \param pw Pointer to client-specific private data
* \return DOM_NO_ERR on success,
@@ -127,11 +126,10 @@ dom_exception dom_implementation_create_document(
struct dom_string *namespace, struct dom_string *qname,
struct dom_document_type *doctype,
struct dom_document **doc,
- dom_string_charset charset,
dom_alloc alloc, void *pw)
{
return impl->create_document(impl, namespace, qname, doctype, doc,
- charset, alloc, pw);
+ alloc, pw);
}
/**
diff --git a/src/core/node.c b/src/core/node.c
index 2284e4f..0eebfb0 100644
--- a/src/core/node.c
+++ b/src/core/node.c
@@ -306,15 +306,8 @@ dom_exception dom_node_get_node_name(struct dom_node *node,
struct dom_string *colon;
dom_exception err;
- /* ugh! */
- /** \todo Assumes little endian */
- err = dom_string_create_from_const_ptr(node->owner,
- (const uint8_t *) (
- (dom_document_get_charset(node->owner) ==
- DOM_STRING_UTF8) ? ":" : ":\0"),
- (dom_document_get_charset(node->owner) ==
- DOM_STRING_UTF8) ? 1 : 2,
- &colon);
+ err = dom_document_create_string(node->owner,
+ (const uint8_t *) ":", SLEN(":"), &colon);
if (err != DOM_NO_ERR) {
return err;
}
@@ -1639,7 +1632,7 @@ bool _dom_node_readonly(const struct dom_node *node)
* \param previous Previous node in sibling list, or NULL if none
* \param next Next node in sibling list, or NULL if none
*/
-inline void _dom_node_attach(struct dom_node *node, struct dom_node *parent,
+void _dom_node_attach(struct dom_node *node, struct dom_node *parent,
struct dom_node *previous, struct dom_node *next)
{
_dom_node_attach_range(node, node, parent, previous, next);
@@ -1650,7 +1643,7 @@ inline void _dom_node_attach(struct dom_node *node, struct dom_node *parent,
*
* \param node The node to detach
*/
-inline void _dom_node_detach(struct dom_node *node)
+void _dom_node_detach(struct dom_node *node)
{
_dom_node_detach_range(node, node);
}
@@ -1666,7 +1659,7 @@ inline void _dom_node_detach(struct dom_node *node)
*
* The range is assumed to be a linked list of sibling nodes.
*/
-inline void _dom_node_attach_range(struct dom_node *first,
+void _dom_node_attach_range(struct dom_node *first,
struct dom_node *last,
struct dom_node *parent,
struct dom_node *previous,
@@ -1697,7 +1690,7 @@ inline void _dom_node_attach_range(struct dom_node *first,
*
* The range is assumed to be a linked list of sibling nodes.
*/
-inline void _dom_node_detach_range(struct dom_node *first,
+void _dom_node_detach_range(struct dom_node *first,
struct dom_node *last)
{
if (first->previous != NULL)
@@ -1727,7 +1720,7 @@ inline void _dom_node_detach_range(struct dom_node *first,
* we want to perform any special replacement-related behaviour
* at a later date.
*/
-inline void _dom_node_replace(struct dom_node *old,
+void _dom_node_replace(struct dom_node *old,
struct dom_node *replacement)
{
struct dom_node *first, *last;
diff --git a/src/core/string.c b/src/core/string.c
index 8ec44aa..2540e26 100644
--- a/src/core/string.c
+++ b/src/core/string.c
@@ -9,62 +9,37 @@
#include <inttypes.h>
#include <string.h>
+#include <parserutils/charset/utf8.h>
+
#include <dom/core/string.h>
#include "core/document.h"
#include "utils/utils.h"
-#include "utils/utf8.h"
-#include "utils/utf16.h"
/**
* A DOM string
*
- * DOM strings store either a pointer to allocated data, a pointer
- * to constant data or an offset into a document buffer.
- *
- * They are reference counted so freeing is performed correctly.
+ * Strings are reference counted so destruction is performed correctly.
*/
struct dom_string {
- enum { DOM_STRING_PTR,
- DOM_STRING_CONST_PTR,
- DOM_STRING_OFFSET,
- DOM_STRING_PTR_NODOC
- } type; /**< String type */
-
- dom_string_charset charset; /**< Charset of string */
-
- union {
- uint8_t *ptr;
- const uint8_t *cptr;
- uint32_t offset;
- } data; /**< Type-specific data */
+ uint8_t *ptr; /**< Pointer to string data */
size_t len; /**< Byte length of string */
- union {
- struct dom_document *doc; /**< Owning document */
- struct {
- dom_alloc alloc; /**< Memory (de)allocation
- * function */
- void *pw; /**< Client-specific data */
- } nodoc;
- } ctx; /**< Allocation context */
+ dom_alloc alloc; /**< Memory (de)allocation function */
+ void *pw; /**< Client-specific data */
uint32_t refcnt; /**< Reference count */
};
static struct dom_string empty_string = {
- .type = DOM_STRING_CONST_PTR,
- .charset = DOM_STRING_UTF8,
- .data.ptr = NULL,
+ .ptr = NULL,
.len = 0,
- .ctx.doc = NULL,
+ .alloc = NULL,
+ .pw = NULL,
.refcnt = 1
};
-static dom_exception __dom_string_get_data(struct dom_string *str,
- const uint8_t **data, size_t *len);
-
/**
* Claim a reference on a DOM string
*
@@ -86,155 +61,18 @@ void dom_string_ref(struct dom_string *str)
void dom_string_unref(struct dom_string *str)
{
if (--str->refcnt == 0) {
- if (str->type == DOM_STRING_PTR_NODOC) {
- str->ctx.nodoc.alloc(str->data.ptr, 0,
- str->ctx.nodoc.pw);
-
- str->ctx.nodoc.alloc(str, 0, str->ctx.nodoc.pw);
- } else {
- if (str->type == DOM_STRING_PTR) {
- dom_document_alloc(str->ctx.doc,
- str->data.ptr, 0);
- }
-
- dom_document_alloc(str->ctx.doc, str, 0);
+ if (str->alloc != NULL) {
+ str->alloc(str->ptr, 0, str->pw);
+ str->alloc(str, 0, str->pw);
}
}
}
/**
- * Create a DOM string from an offset into the document buffer
- *
- * \param doc The document in which the string resides
- * \param off Offset from start of document buffer
- * \param len Length, in bytes, of string
- * \param str Pointer to location to receive pointer to new string
- * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
- *
- * The returned string will already be referenced, so there is no need
- * to explicitly reference it.
- */
-dom_exception dom_string_create_from_off(struct dom_document *doc,
- uint32_t off, size_t len, struct dom_string **str)
-{
- struct dom_string *ret;
-
- ret = dom_document_alloc(doc, NULL, sizeof(struct dom_string));
- if (ret == NULL)
- return DOM_NO_MEM_ERR;
-
- ret->type = DOM_STRING_OFFSET;
-
- ret->charset = dom_document_get_charset(doc);
-
- ret->data.offset = off;
-
- ret->len = len;
-
- ret->ctx.doc = doc;
-
- ret->refcnt = 1;
-
- *str = ret;
-
- return DOM_NO_ERR;
-}
-
-/**
* Create a DOM string from a string of characters
*
- * \param doc The document in which the string resides
- * \param ptr Pointer to string of characters
- * \param len Length, in bytes, of string of characters
- * \param str Pointer to location to receive pointer to new string
- * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
- *
- * The returned string will already be referenced, so there is no need
- * to explicitly reference it.
- *
- * The string of characters passed in will be copied for use by the
- * returned DOM string.
- */
-dom_exception dom_string_create_from_ptr(struct dom_document *doc,
- const uint8_t *ptr, size_t len, struct dom_string **str)
-{
- struct dom_string *ret;
-
- ret = dom_document_alloc(doc, NULL, sizeof(struct dom_string));
- if (ret == NULL)
- return DOM_NO_MEM_ERR;
-
- ret->data.ptr = dom_document_alloc(doc, NULL, len);
- if (ret->data.ptr == NULL) {
- dom_document_alloc(doc, ret, 0);
- return DOM_NO_MEM_ERR;
- }
-
- ret->type = DOM_STRING_PTR;
-
- ret->charset = dom_document_get_charset(doc);
-
- memcpy(ret->data.ptr, ptr, len);
-
- ret->len = len;
-
- ret->ctx.doc = doc;
-
- ret->refcnt = 1;
-
- *str = ret;
-
- return DOM_NO_ERR;
-}
-
-/**
- * Create a DOM string from a constant string of characters
- *
- * \param doc The document in which the string resides
- * \param ptr Pointer to string of characters
- * \param len Length, in bytes, of string of characters
- * \param str Pointer to location to receive pointer to new string
- * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
- *
- * The returned string will already be referenced, so there is no need
- * to explicitly reference it.
- *
- * The string of characters passed in will _not_ be copied for use by the
- * returned DOM string.
- */
-dom_exception dom_string_create_from_const_ptr(struct dom_document *doc,
- const uint8_t *ptr, size_t len, struct dom_string **str)
-{
- struct dom_string *ret;
-
- ret = dom_document_alloc(doc, NULL, sizeof(struct dom_string));
- if (ret == NULL)
- return DOM_NO_MEM_ERR;
-
- ret->type = DOM_STRING_CONST_PTR;
-
- ret->charset = dom_document_get_charset(doc);
-
- ret->data.cptr = ptr;
-
- ret->len = len;
-
- ret->ctx.doc = doc;
-
- ret->refcnt = 1;
-
- *str = ret;
-
- return DOM_NO_ERR;
-}
-
-/**
- * Create a DOM string from a string of characters that does not belong
- * to a document
- *
* \param alloc Memory (de)allocation function
* \param pw Pointer to client-specific private data
- * \param charset The charset of the string
* \param ptr Pointer to string of characters
* \param len Length, in bytes, of string of characters
* \param str Pointer to location to receive result
@@ -243,12 +81,11 @@ dom_exception dom_string_create_from_const_ptr(struct dom_document *doc,
* The returned string will already be referenced, so there is no need
* to explicitly reference it.
*
- * The string of characters passed in will be copied for use by the
+ * The string of characters passed in will be copied for use by the
* returned DOM string.
*/
-dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw,
- dom_string_charset charset, const uint8_t *ptr, size_t len,
- struct dom_string **str)
+dom_exception dom_string_create(dom_alloc alloc, void *pw,
+ const uint8_t *ptr, size_t len, struct dom_string **str)
{
struct dom_string *ret;
@@ -256,22 +93,18 @@ dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw,
if (ret == NULL)
return DOM_NO_MEM_ERR;
- ret->data.ptr = alloc(NULL, len, pw);
- if (ret->data.ptr == NULL) {
+ ret->ptr = alloc(NULL, len, pw);
+ if (ret->ptr == NULL) {
alloc(ret, 0, pw);
return DOM_NO_MEM_ERR;
}
- ret->type = DOM_STRING_PTR_NODOC;
-
- ret->charset = charset;
-
- memcpy(ret->data.ptr, ptr, len);
+ memcpy(ret->ptr, ptr, len);
ret->len = len;
- ret->ctx.nodoc.alloc = alloc;
- ret->ctx.nodoc.pw = pw;
+ ret->alloc = alloc;
+ ret->pw = pw;
ret->refcnt = 1;
@@ -291,48 +124,16 @@ dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw,
*/
int dom_string_cmp(struct dom_string *s1, struct dom_string *s2)
{
- const uint8_t *d1 = NULL;
- const uint8_t *d2 = NULL;
- size_t l1, l2;
- dom_exception err;
-
- err = __dom_string_get_data(s1, &d1, &l1);
- if (err != DOM_NO_ERR)
- return 1; /* arbitrary */
+ if (s1 == NULL)
+ s1 = &empty_string;
- err = __dom_string_get_data(s2, &d2, &l2);
- if (err != DOM_NO_ERR)
- return 1; /* arbitrary */
+ if (s2 == NULL)
+ s2 = &empty_string;
- while (l1 > 0 && l2 > 0) {
- uint32_t c1, c2;
- size_t cl1, cl2;
- charset_error err;
-
- err = (s1->charset == DOM_STRING_UTF8)
- ? _dom_utf8_to_ucs4(d1, l1, &c1, &cl1)
- : _dom_utf16_to_ucs4(d1, l1, &c1, &cl1);
- if (err != CHARSET_OK) {
- }
+ if (s1->len != s2->len)
+ return 1;
- err = (s2->charset == DOM_STRING_UTF8)
- ? _dom_utf8_to_ucs4(d2, l2, &c2, &cl2)
- : _dom_utf16_to_ucs4(d2, l2, &c2, &cl2);
- if (err != CHARSET_OK) {
- }
-
- if (c1 != c2) {
- return (int)(c1 - c2);
- }
-
- d1 += cl1;
- d2 += cl2;
-
- l1 -= cl1;
- l2 -= cl2;
- }
-
- return (int)(l1 - l2);
+ return memcmp(s1->ptr, s2->ptr, s1->len);
}
/**
@@ -349,31 +150,28 @@ int dom_string_icmp(struct dom_string *s1, struct dom_string *s2)
const uint8_t *d1 = NULL;
const uint8_t *d2 = NULL;
size_t l1, l2;
- dom_exception err;
- err = __dom_string_get_data(s1, &d1, &l1);
- if (err != DOM_NO_ERR)
- return 1; /* arbitrary */
+ if (s1 == NULL)
+ s1 = &empty_string;
+ if (s2 == NULL)
+ s2 = &empty_string;
- err = __dom_string_get_data(s2, &d2, &l2);
- if (err != DOM_NO_ERR)
- return 1; /* arbitrary */
+ d1 = s1->ptr;
+ d2 = s2->ptr;
+ l1 = s1->len;
+ l2 = s2->len;
while (l1 > 0 && l2 > 0) {
uint32_t c1, c2;
size_t cl1, cl2;
- charset_error err;
+ parserutils_error err;
- err = (s1->charset == DOM_STRING_UTF8)
- ? _dom_utf8_to_ucs4(d1, l1, &c1, &cl1)
- : _dom_utf16_to_ucs4(d1, l1, &c1, &cl1);
- if (err != CHARSET_OK) {
+ err = parserutils_charset_utf8_to_ucs4(d1, l1, &c1, &cl1);
+ if (err != PARSERUTILS_OK) {
}
- err = (s2->charset == DOM_STRING_UTF8)
- ? _dom_utf8_to_ucs4(d2, l2, &c2, &cl2)
- : _dom_utf16_to_ucs4(d2, l2, &c2, &cl2);
- if (err != CHARSET_OK) {
+ err = parserutils_charset_utf8_to_ucs4(d2, l2, &c2, &cl2);
+ if (err != PARSERUTILS_OK) {
}
/** \todo improved lower-casing algorithm */
@@ -403,20 +201,19 @@ uint32_t dom_string_index(struct dom_string *str, uint32_t chr)
const uint8_t *s;
size_t clen, slen;
uint32_t c, index;
- charset_error err;
+ parserutils_error err;
- __dom_string_get_data(str, &s, &slen);
+ if (str == NULL)
+ str = &empty_string;
+
+ s = str->ptr;
+ slen = str->len;
index = 0;
while (slen > 0) {
- if (str->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_to_ucs4(s, slen, &c, &clen);
- } else {
- err = _dom_utf16_to_ucs4(s, slen, &c, &clen);
- }
-
- if (err != CHARSET_OK) {
+ err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
+ if (err != PARSERUTILS_OK) {
return (uint32_t) -1;
}
@@ -444,28 +241,25 @@ uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr)
const uint8_t *s;
size_t clen, slen;
uint32_t c, index;
- charset_error err;
+ parserutils_error err;
+
+ if (str == NULL)
+ str = &empty_string;
- __dom_string_get_data(str, &s, &slen);
+ s = str->ptr;
+ slen = str->len;
index = dom_string_length(str);
while (slen > 0) {
- if (str->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_prev(s, slen, &clen);
- if (err == CHARSET_OK) {
- err = _dom_utf8_to_ucs4(s + clen, slen - clen,
- &c, &clen);
- }
- } else {
- err = _dom_utf16_prev(s, slen, &clen);
- if (err == CHARSET_OK) {
- err = _dom_utf16_to_ucs4(s + clen, slen - clen,
- &c, &clen);
- }
+ err = parserutils_charset_utf8_prev(s, slen,
+ (uint32_t *) &clen);
+ if (err == PARSERUTILS_OK) {
+ err = parserutils_charset_utf8_to_ucs4(s + clen,
+ slen - clen, &c, &clen);
}
- if (err != CHARSET_OK) {
+ if (err != PARSERUTILS_OK) {
return (uint32_t) -1;
}
@@ -478,7 +272,6 @@ uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr)
}
return (uint32_t) -1;
-
}
/**
@@ -489,20 +282,14 @@ uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr)
*/
uint32_t dom_string_length(struct dom_string *str)
{
- const uint8_t *s;
- size_t slen;
- uint32_t clen;
- charset_error err;
-
- __dom_string_get_data(str, &s, &slen);
+ size_t clen;
+ parserutils_error err;
- if (str->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_length(s, slen, &clen);
- } else {
- err = _dom_utf16_length(s, slen, &clen);
- }
+ if (str == NULL)
+ str = &empty_string;
- if (err != CHARSET_OK) {
+ err = parserutils_charset_utf8_length(str->ptr, str->len, &clen);
+ if (err != PARSERUTILS_OK) {
return 0;
}
@@ -527,60 +314,28 @@ dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2,
struct dom_string **result)
{
struct dom_string *concat;
- const uint8_t *s;
- size_t slen;
- if (s1->type == DOM_STRING_PTR_NODOC) {
- concat = s1->ctx.nodoc.alloc(NULL,
- sizeof(struct dom_string), s1->ctx.nodoc.pw);
- } else {
- concat = dom_document_alloc(s1->ctx.doc,
- NULL, sizeof(struct dom_string));
- }
+ concat = s1->alloc(NULL, sizeof(struct dom_string), s1->pw);
if (concat == NULL) {
return DOM_NO_MEM_ERR;
}
- /** \todo support attempted concatenation of mismatched charsets */
+ concat->ptr = s1->alloc(NULL, s1->len + s2->len, s1->pw);
+ if (concat->ptr == NULL) {
+ s1->alloc(concat, 0, s1->pw);
- if (s1->type == DOM_STRING_PTR_NODOC) {
- concat->data.ptr = s1->ctx.nodoc.alloc(NULL,
- s1->len + s2->len, s1->ctx.nodoc.pw);
- } else {
- concat->data.ptr = dom_document_alloc(s1->ctx.doc,
- NULL, s1->len + s2->len);
- }
- if (concat->data.ptr == NULL) {
- if (s1->type == DOM_STRING_PTR_NODOC) {
- s1->ctx.nodoc.alloc(concat, 0, s1->ctx.nodoc.pw);
- } else {
- dom_document_alloc(s1->ctx.doc, concat, 0);
- }
return DOM_NO_MEM_ERR;
}
- concat->type = (s1->type == DOM_STRING_PTR_NODOC)
- ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR;
-
- concat->charset = s1->charset;
-
- __dom_string_get_data(s1, &s, &slen);
+ memcpy(concat->ptr, s1->ptr, s1->len);
- memcpy(concat->data.ptr, s, slen);
-
- __dom_string_get_data(s2, &s, &slen);
-
- memcpy(concat->data.ptr + s1->len, s, slen);
+ memcpy(concat->ptr + s1->len, s2->ptr, s2->len);
concat->len = s1->len + s2->len;
- if (concat->type == DOM_STRING_PTR_NODOC) {
- concat->ctx.nodoc.alloc = s1->ctx.nodoc.alloc;
- concat->ctx.nodoc.pw = s1->ctx.nodoc.pw;
- } else {
- concat->ctx.doc = s1->ctx.doc;
- }
+ concat->alloc = s1->alloc;
+ concat->pw = s1->pw;
concat->refcnt = 1;
@@ -607,12 +362,10 @@ dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2,
dom_exception dom_string_substr(struct dom_string *str,
uint32_t i1, uint32_t i2, struct dom_string **result)
{
- const uint8_t *s;
- size_t slen;
+ const uint8_t *s = str->ptr;
+ size_t slen = str->len;
size_t b1, b2;
- charset_error err;
-
- __dom_string_get_data(str, &s, &slen);
+ parserutils_error err;
/* Initialise the byte index of the start to 0 */
b1 = 0;
@@ -621,13 +374,9 @@ dom_exception dom_string_substr(struct dom_string *str,
/* Calculate the byte index of the start */
while (i1 > 0) {
- if (str->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_next(s, slen - b1, b1, &b1);
- } else {
- err = _dom_utf16_next(s, slen - b1, b1, &b1);
- }
-
- if (err != CHARSET_OK) {
+ err = parserutils_charset_utf8_next(s, slen - b1, b1,
+ (uint32_t *) &b1);
+ if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
@@ -639,13 +388,10 @@ dom_exception dom_string_substr(struct dom_string *str,
/* Calculate the byte index of the end */
while (i2 > 0) {
- if (str->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_next(s, slen - b2, b2, &b2);
- } else {
- err = _dom_utf16_next(s, slen - b2, b2, &b2);
- }
+ err = parserutils_charset_utf8_next(s, slen - b2, b2,
+ (uint32_t *) &b2);
- if (err != CHARSET_OK) {
+ if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
@@ -653,14 +399,7 @@ dom_exception dom_string_substr(struct dom_string *str,
}
/* Create a string from the specified byte range */
- return (str->type == DOM_STRING_PTR_NODOC)
- ? dom_string_create_from_ptr_no_doc(
- str->ctx.nodoc.alloc,
- str->ctx.nodoc.pw,
- str->charset,
- s + b1, b2 - b1, result)
- : dom_string_create_from_ptr(str->ctx.doc,
- s + b1, b2 - b1, result);
+ return dom_string_create(str->alloc, str->pw, s + b1, b2 - b1, result);
}
/**
@@ -688,11 +427,12 @@ dom_exception dom_string_insert(struct dom_string *target,
const uint8_t *t, *s;
uint32_t tlen, slen, clen;
uint32_t ins = 0;
- charset_error err;
-
- __dom_string_get_data(target, &t, &tlen);
+ parserutils_error err;
- __dom_string_get_data(source, &s, &slen);
+ t = target->ptr;
+ tlen = target->len;
+ s = source->ptr;
+ slen = source->len;
clen = dom_string_length(target);
@@ -706,13 +446,10 @@ dom_exception dom_string_insert(struct dom_string *target,
ins = tlen;
} else {
while (offset > 0) {
- if (target->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_next(t, tlen - ins, ins, &ins);
- } else {
- err = _dom_utf16_next(t, tlen - ins, ins, &ins);
- }
+ err = parserutils_charset_utf8_next(t, tlen - ins,
+ ins, &ins);
- if (err != CHARSET_OK) {
+ if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
@@ -721,65 +458,36 @@ dom_exception dom_string_insert(struct dom_string *target,
}
/* Allocate result string */
- if (target->type == DOM_STRING_PTR_NODOC) {
- res = target->ctx.nodoc.alloc(NULL, sizeof(struct dom_string),
- target->ctx.nodoc.pw);
- } else {
- res = dom_document_alloc(target->ctx.doc,
- NULL, sizeof(struct dom_string));
- }
-
+ res = target->alloc(NULL, sizeof(struct dom_string), target->pw);
if (res == NULL) {
return DOM_NO_MEM_ERR;
}
- /** \todo support insertion of a string from a different charset */
-
/* Allocate data buffer for result contents */
- if (target->type == DOM_STRING_PTR_NODOC) {
- res->data.ptr = target->ctx.nodoc.alloc(NULL,
- tlen + slen, target->ctx.nodoc.pw);
- } else {
- res->data.ptr = dom_document_alloc(target->ctx.doc,
- NULL, tlen + slen);
- }
- if (res->data.ptr == NULL) {
- if (target->type == DOM_STRING_PTR_NODOC) {
- target->ctx.nodoc.alloc(res, 0, target->ctx.nodoc.pw);
- } else {
- dom_document_alloc(target->ctx.doc, res, 0);
- }
+ res->ptr = target->alloc(NULL, tlen + slen, target->pw);
+ if (res->ptr == NULL) {
+ target->alloc(res, 0, target->pw);
return DOM_NO_MEM_ERR;
}
- /* Populate result members */
- res->type = (target->type == DOM_STRING_PTR_NODOC)
- ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR;
-
- res->charset = target->charset;
-
/* Copy initial portion of target, if any, into result */
if (ins > 0) {
- memcpy(res->data.ptr, t, ins);
+ memcpy(res->ptr, t, ins);
}
/* Copy inserted data into result */
- memcpy(res->data.ptr + ins, s, slen);
+ memcpy(res->ptr + ins, s, slen);
/* Copy remainder of target, if any, into result */
if (tlen - ins > 0) {
- memcpy(res->data.ptr + ins + slen, t + ins, tlen - ins);
+ memcpy(res->ptr + ins + slen, t + ins, tlen - ins);
}
res->len = tlen + slen;
- if (res->type == DOM_STRING_PTR_NODOC) {
- res->ctx.nodoc.alloc = target->ctx.nodoc.alloc;
- res->ctx.nodoc.pw = target->ctx.nodoc.pw;
- } else {
- res->ctx.doc = target->ctx.doc;
- }
-
+ res->alloc = target->alloc;
+ res->pw = target->pw;
+
res->refcnt = 1;
*result = res;
@@ -811,11 +519,12 @@ dom_exception dom_string_replace(struct dom_string *target,
const uint8_t *t, *s;
uint32_t tlen, slen;
uint32_t b1, b2;
- charset_error err;
-
- __dom_string_get_data(target, &t, &tlen);
+ parserutils_error err;
- __dom_string_get_data(source, &s, &slen);
+ t = target->ptr;
+ tlen = target->len;
+ s = source->ptr;
+ slen = source->len;
/* Initialise the byte index of the start to 0 */
b1 = 0;
@@ -824,13 +533,9 @@ dom_exception dom_string_replace(struct dom_string *target,
/* Calculate the byte index of the start */
while (i1 > 0) {
- if (target->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_next(s, slen - b1, b1, &b1);
- } else {
- err = _dom_utf16_next(s, slen - b1, b1, &b1);
- }
+ err = parserutils_charset_utf8_next(s, slen - b1, b1, &b1);
- if (err != CHARSET_OK) {
+ if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
@@ -842,13 +547,9 @@ dom_exception dom_string_replace(struct dom_string *target,
/* Calculate the byte index of the end */
while (i2 > 0) {
- if (target->charset == DOM_STRING_UTF8) {
- err = _dom_utf8_next(s, slen - b2, b2, &b2);
- } else {
- err = _dom_utf16_next(s, slen - b2, b2, &b2);
- }
+ err = parserutils_charset_utf8_next(s, slen - b2, b2, &b2);
- if (err != CHARSET_OK) {
+ if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
@@ -856,66 +557,38 @@ dom_exception dom_string_replace(struct dom_string *target,
}
/* Allocate result string */
- if (target->type == DOM_STRING_PTR_NODOC) {
- res = target->ctx.nodoc.alloc(NULL, sizeof(struct dom_string),
- target->ctx.nodoc.pw);
- } else {
- res = dom_document_alloc(target->ctx.doc,
- NULL, sizeof(struct dom_string));
- }
+ res = target->alloc(NULL, sizeof(struct dom_string), target->pw);
if (res == NULL) {
return DOM_NO_MEM_ERR;
}
- /** \todo support insertion of a string from a different charset */
-
/* Allocate data buffer for result contents */
- if (target->type == DOM_STRING_PTR_NODOC) {
- res->data.ptr = target->ctx.nodoc.alloc(NULL,
- tlen + slen - (b2 - b1), target->ctx.nodoc.pw);
- } else {
- res->data.ptr = dom_document_alloc(target->ctx.doc,
- NULL, tlen + slen - (b2 - b1));
- }
- if (res->data.ptr == NULL) {
- if (target->type == DOM_STRING_PTR_NODOC) {
- target->ctx.nodoc.alloc(res, 0, target->ctx.nodoc.pw);
- } else {
- dom_document_alloc(target->ctx.doc, res, 0);
- }
+ res->ptr = target->alloc(NULL, tlen + slen - (b2 - b1), target->pw);
+ if (res->ptr == NULL) {
+ target->alloc(res, 0, target->pw);
return DOM_NO_MEM_ERR;
}
- /* Populate result members */
- res->type = (target->type == DOM_STRING_PTR_NODOC)
- ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR;
-
- res->charset = target->charset;
-
/* Copy initial portion of target, if any, into result */
if (b1 > 0) {
- memcpy(res->data.ptr, t, b1);
+ memcpy(res->ptr, t, b1);
}
/* Copy replacement data into result */
if (slen > 0) {
- memcpy(res->data.ptr + b1, s, slen);
+ memcpy(res->ptr + b1, s, slen);
}
/* Copy remainder of target, if any, into result */
if (tlen - b2 > 0) {
- memcpy(res->data.ptr + b1 + slen, t + b2, tlen - b2);
+ memcpy(res->ptr + b1 + slen, t + b2, tlen - b2);
}
res->len = tlen + slen - (b2 - b1);
- if (res->type == DOM_STRING_PTR_NODOC) {
- res->ctx.nodoc.alloc = target->ctx.nodoc.alloc;
- res->ctx.nodoc.pw = target->ctx.nodoc.pw;
- } else {
- res->ctx.doc = target->ctx.doc;
- }
+ res->alloc = target->alloc;
+ res->pw = target->pw;
res->refcnt = 1;
@@ -940,19 +613,8 @@ dom_exception dom_string_replace(struct dom_string *target,
dom_exception dom_string_dup(struct dom_string *str,
struct dom_string **result)
{
- const uint8_t *s;
- size_t slen;
-
- __dom_string_get_data(str, &s, &slen);
-
- return str->type == DOM_STRING_PTR_NODOC
- ? dom_string_create_from_ptr_no_doc(
- str->ctx.nodoc.alloc,
- str->ctx.nodoc.pw,
- str->charset,
- s, slen, result)
- : dom_string_create_from_ptr(str->ctx.doc,
- s, slen, result);
+ return dom_string_create(str->alloc, str->pw, str->ptr, str->len,
+ result);
}
/**
@@ -963,12 +625,10 @@ dom_exception dom_string_dup(struct dom_string *str,
*/
uint32_t dom_string_hash(struct dom_string *str)
{
- const uint8_t *s;
- size_t slen;
+ const uint8_t *s = str->ptr;
+ size_t slen = str->len;
uint32_t hash = 0x01000193;
- __dom_string_get_data(str, &s, &slen);
-
while (slen > 0) {
hash *= 0x01000193;
hash ^= *s;
@@ -980,47 +640,3 @@ uint32_t dom_string_hash(struct dom_string *str)
return hash;
}
-/* */
-/*---------------------------------------------------------------------------*/
-/* */
-
-/**
- * Get a pointer to the string of characters within a DOM string
- *
- * \param str Pointer to DOM string to retrieve pointer from
- * \param data Pointer to location to receive data
- * \param len Pointer to location to receive byte length of data
- * \return DOM_NO_ERR on success
- *
- * The caller must have previously claimed a reference on the DOM string.
- * The returned pointer must not be freed.
- */
-dom_exception __dom_string_get_data(struct dom_string *str,
- const uint8_t **data, size_t *len)
-{
- /* Assume that a NULL str pointer indicates the empty string */
- if (str == NULL)
- str = &empty_string;
-
- switch (str->type) {
- case DOM_STRING_PTR:
- *data = str->data.ptr;
- break;
- case DOM_STRING_CONST_PTR:
- *data = str->data.cptr;
- break;
- case DOM_STRING_OFFSET:
- *data = dom_document_get_base(str->ctx.doc) +
- str->data.offset;
- break;
- case DOM_STRING_PTR_NODOC:
- *data = str->data.ptr;
- break;
- }
-
- *len = str->len;
-
- return DOM_NO_ERR;
-}
-
-