Rationalise dom_string (some consideration is required as to what happens wrt interning -- lwc_strings should probably be used)

Purge charset handling -- a) documents are always converted to utf-8 b) use parserutils for utf-8 handling Fix Hubbub binding to compile. svn path=/trunk/dom/; revision=6682
author: John Mark Bell <jmb@netsurf-browser.org> 2009-03-03 18:08:01 +0000
committer: John Mark Bell <jmb@netsurf-browser.org> 2009-03-03 18:08:01 +0000
commit: 702d96e703473dbe4481a42c472b4aae423a51d1 (patch)
tree: 9dc767860ebea940f1d936d14d69073b4e289c92 /src/core
parent: eeb651eadb47228ad41c21b80d75afc17c2924f8 (diff)
download: libdom-702d96e703473dbe4481a42c472b4aae423a51d1.tar.gz
libdom-702d96e703473dbe4481a42c472b4aae423a51d1.tar.bz2
6 files changed, 152 insertions, 626 deletions
diff --git a/src/core/attr.c b/src/core/attr.c
index a82f117..5a85ac0 100644
--- a/src/core/attr.c
+++ b/src/core/attr.c
@@ -180,7 +180,7 @@ dom_exception dom_attr_get_value(struct dom_attr *attr,
 	struct dom_string *value, *temp;
 	dom_exception err;
 
-	err = dom_string_create_from_const_ptr(a->owner, 
+	err = dom_document_create_string(a->owner, 
 			(const uint8_t *) "", SLEN(""), &value);
 	if (err != DOM_NO_ERR) {
 		return err;
diff --git a/src/core/document.c b/src/core/document.c
index 3e06541..74283f9 100644
--- a/src/core/document.c
+++ b/src/core/document.c
@@ -56,8 +56,6 @@ struct dom_doc_nnm {
 struct dom_document {
 	struct dom_node base;		/**< Base node */
 
-	dom_string_charset charset;	/**< Charset of strings in document */
-
 	struct dom_implementation *impl;	/**< Owning implementation */
 
 	struct dom_doc_nl *nodelists;	/**< List of active nodelists */
@@ -73,7 +71,6 @@ struct dom_document {
 /** Interned node name strings, indexed by node type */
 /* Index 0 is unused */
 static struct dom_string *__nodenames_utf8[DOM_NODE_TYPE_COUNT + 1];
-static struct dom_string *__nodenames_utf16[DOM_NODE_TYPE_COUNT + 1];
 
 /**
  * Initialise the document module
@@ -102,27 +99,6 @@ dom_exception _dom_document_initialise(dom_alloc alloc, void *pw)
 		{ "#document-fragment",	18 },	/* Document fragment */
 		{ NULL,			0 }	/* Notation */
 	};
-
-	/** \todo This assumes Little Endian */
-	static struct {
-		const char *name;
-		size_t len;
-	} names_utf16[DOM_NODE_TYPE_COUNT + 1] = {
-		{ NULL,			0 },	/* Unused */
-		{ NULL,			0 },	/* Element */
-		{ NULL,			0 },	/* Attr */
-		{ "#\0t\0e\0x\0t\0",	10 },	/* Text */
-		{ "#\0c\0d\0a\0t\0a\0-\0s\0e\0c\0t\0i\0o\0n\0",	28 },	/* CDATA section */
-		{ NULL,			0 },	/* Entity reference */
-		{ NULL,			0 },	/* Entity */
-		{ NULL,			0 },	/* Processing instruction */
-		{ "#\0c\0o\0m\0m\0e\0n\0t\0",		16 },	/* Comment */
-		{ "#\0d\0o\0c\0u\0m\0e\0n\0t\0",		18 },	/* Document */
-		{ NULL,			0 },	/* Document type */
-		{ "#\0d\0o\0c\0u\0m\0e\0n\0t\0-\0f\0r\0a\0g\0m\0e\0n\0t\0",	36 },	/* Document fragment */
-		{ NULL,			0 }	/* Notation */
-	};
-
 	dom_exception err;
 
 	/* Initialise interned node names */
@@ -130,13 +106,11 @@ dom_exception _dom_document_initialise(dom_alloc alloc, void *pw)
 		if (names_utf8[i].name == NULL) {
 			/* Nothing to intern; skip this entry */
 			__nodenames_utf8[i] = NULL;
-			__nodenames_utf16[i] = NULL;
 			continue;
 		}
 
 		/* Make string */
-		err = dom_string_create_from_ptr_no_doc(alloc, pw,
-				DOM_STRING_UTF8,
+		err = dom_string_create(alloc, pw,
 				(const uint8_t *) names_utf8[i].name,
 				names_utf8[i].len, &__nodenames_utf8[i]);
 		if (err != DOM_NO_ERR) {
@@ -144,29 +118,10 @@ dom_exception _dom_document_initialise(dom_alloc alloc, void *pw)
 			for (int j = 0; j < i; j++) {
 				if (__nodenames_utf8[j] != NULL) {
 					dom_string_unref(__nodenames_utf8[j]);
-					dom_string_unref(__nodenames_utf16[j]);
 				}
 			}
 			return err;
 		}
-
-		err = dom_string_create_from_ptr_no_doc(alloc, pw,
-				DOM_STRING_UTF16,
-				(const uint8_t *) names_utf16[i].name,
-				names_utf16[i].len, &__nodenames_utf16[i]);
-		if (err != DOM_NO_ERR) {
-			/* Failed, clean up strings we've created so far */
-			for (int j = 0; j < i; j++) {
-				if (__nodenames_utf8[j] != NULL) {
-					dom_string_unref(__nodenames_utf8[j]);
-					dom_string_unref(__nodenames_utf16[j]);
-				}
-			}
-
-			dom_string_unref(__nodenames_utf8[i]);
-
-			return err;
-		}
 	}
 
 	return DOM_NO_ERR;
@@ -182,7 +137,6 @@ dom_exception _dom_document_finalise(void)
 	for (int i = 0; i <= DOM_NODE_TYPE_COUNT; i++) {
 		if (__nodenames_utf8[i] != NULL) {
 			dom_string_unref(__nodenames_utf8[i]);
-			dom_string_unref(__nodenames_utf16[i]);
 		}
 	}
 
@@ -193,7 +147,6 @@ dom_exception _dom_document_finalise(void)
  * Create a Document
  *
  * \param impl     The DOM implementation owning the document
- * \param charset  The charset used for strings in the document
  * \param alloc    Memory (de)allocation function
  * \param pw       Pointer to client-specific private data
  * \param doc      Pointer to location to receive created document
@@ -204,8 +157,7 @@ dom_exception _dom_document_finalise(void)
  * The returned document will already be referenced.
  */
 dom_exception dom_document_create(struct dom_implementation *impl,
-		dom_string_charset charset, dom_alloc alloc, void *pw, 
-		struct dom_document **doc)
+		dom_alloc alloc, void *pw, struct dom_document **doc)
 {
 	struct dom_document *d;
 	dom_exception err;
@@ -233,7 +185,6 @@ dom_exception dom_document_create(struct dom_implementation *impl,
 	}
 
 	/* Initialise remaining type-specific data */
-	d->charset = charset;
 	if (impl != NULL)
 		dom_implementation_ref(impl);
 	d->impl = impl;
@@ -241,8 +192,7 @@ dom_exception dom_document_create(struct dom_implementation *impl,
 	d->nodelists = NULL;
 	d->maps = NULL;
 
-	d->nodenames = (charset == DOM_STRING_UTF8) ? __nodenames_utf8 
-						    : __nodenames_utf16;
+	d->nodenames = __nodenames_utf8;
 
 	*doc = d;
 
@@ -1047,55 +997,30 @@ dom_exception dom_document_rename_node(struct dom_document *doc,
 	return DOM_NOT_SUPPORTED_ERR;
 }
 
-/*                                                                         */
-/* ----------------------------------------------------------------------- */
-/*                                                                         */
-
 /**
- * Acquire a pointer to the base of the document buffer
- *
- * \param doc  Document to retrieve pointer from
- * \return Pointer to document buffer
+ * Create a DOM string, using a document's allocation context
  *
- * The document buffer is _not_ reference counted (as it is an implicit part
- * of the document). It is destroyed with the document, and thus after all
- * users have been destroyed.
- */
-const uint8_t *dom_document_get_base(struct dom_document *doc)
-{
-	UNUSED(doc);
-
-	return NULL;
-}
-
-/**
- * Set the document buffer pointer
+ * \param doc     The document
+ * \param data    Pointer to string data
+ * \param len     Length, in bytes, of string
+ * \param result  Pointer to location to receive result
+ * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
  *
- * \param doc         Document to set buffer pointer of
- * \param buffer      Pointer to buffer
- * \param buffer_len  Length of buffer, in bytes
+ * The returned string will already be referenced, so there is no need
+ * to explicitly reference it.
  *
- * By calling this, ownership of the buffer is transferred to the document.
- * It should be called once per document node.
+ * The string of characters passed in will be copied for use by the
+ * returned DOM string.
  */
-void dom_document_set_buffer(struct dom_document *doc, uint8_t *buffer,
-		size_t buffer_len)
+dom_exception dom_document_create_string(struct dom_document *doc,
+		const uint8_t *data, size_t len, struct dom_string **result)
 {
-	UNUSED(doc);
-	UNUSED(buffer);
-	UNUSED(buffer_len);
+	return dom_string_create(doc->alloc, doc->pw, data, len, result);
 }
 
-/**
- * Retrieve the character set used to encode strings in the document
- *
- * \param doc  The document to get the charset of
- * \return The charset in use
- */
-dom_string_charset dom_document_get_charset(struct dom_document *doc)
-{
-	return doc->charset;
-}
+/*                                                                         */
+/* ----------------------------------------------------------------------- */
+/*                                                                         */
 
 /**
  * (De)allocate memory with a document's context
diff --git a/src/core/document.h b/src/core/document.h
index 6982b74..c5c13ac 100644
--- a/src/core/document.h
+++ b/src/core/document.h
@@ -27,12 +27,6 @@ dom_exception _dom_document_finalise(void);
 /* Destroy a document */
 void dom_document_destroy(struct dom_document *doc);
 
-/* Get base of document buffer */
-const uint8_t *dom_document_get_base(struct dom_document *doc);
-
-/* Get the document character set */ 
-dom_string_charset dom_document_get_charset(struct dom_document *doc);
-
 /* (De)allocate memory */
 void *dom_document_alloc(struct dom_document *doc, void *ptr, size_t size);
 
diff --git a/src/core/implementation.c b/src/core/implementation.c
index 9738b7c..e37b27d 100644
--- a/src/core/implementation.c
+++ b/src/core/implementation.c
@@ -94,7 +94,6 @@ dom_exception dom_implementation_create_document_type(
  * \param qname      The qualified name of the document element
  * \param doctype    The type of document to create
  * \param doc        Pointer to location to receive result
- * \param charset    The charset to use for strings in the document
  * \param alloc      Memory (de)allocation function
  * \param pw         Pointer to client-specific private data
  * \return DOM_NO_ERR on success,
@@ -127,11 +126,10 @@ dom_exception dom_implementation_create_document(
 		struct dom_string *namespace, struct dom_string *qname,
 		struct dom_document_type *doctype,
 		struct dom_document **doc,
-		dom_string_charset charset,
 		dom_alloc alloc, void *pw)
 {
 	return impl->create_document(impl, namespace, qname, doctype, doc,
-			charset, alloc, pw);
+			alloc, pw);
 }
 
 /**
diff --git a/src/core/node.c b/src/core/node.c
index 2284e4f..0eebfb0 100644
--- a/src/core/node.c
+++ b/src/core/node.c
@@ -306,15 +306,8 @@ dom_exception dom_node_get_node_name(struct dom_node *node,
 		struct dom_string *colon;
 		dom_exception err;
 
-		/* ugh! */
-		/** \todo Assumes little endian */
-		err = dom_string_create_from_const_ptr(node->owner,	
-			(const uint8_t *) (
-				(dom_document_get_charset(node->owner) == 
-					DOM_STRING_UTF8) ? ":" : ":\0"),
-			(dom_document_get_charset(node->owner) == 
-					DOM_STRING_UTF8) ? 1 : 2,
-			&colon);
+		err = dom_document_create_string(node->owner, 
+				(const uint8_t *) ":", SLEN(":"), &colon);
 		if (err != DOM_NO_ERR) {
 			return err;
 		}
@@ -1639,7 +1632,7 @@ bool _dom_node_readonly(const struct dom_node *node)
  * \param previous  Previous node in sibling list, or NULL if none
  * \param next      Next node in sibling list, or NULL if none
  */
-inline void _dom_node_attach(struct dom_node *node, struct dom_node *parent, 
+void _dom_node_attach(struct dom_node *node, struct dom_node *parent, 
 		struct dom_node *previous, struct dom_node *next)
 {
 	_dom_node_attach_range(node, node, parent, previous, next);
@@ -1650,7 +1643,7 @@ inline void _dom_node_attach(struct dom_node *node, struct dom_node *parent,
  *
  * \param node  The node to detach
  */
-inline void _dom_node_detach(struct dom_node *node)
+void _dom_node_detach(struct dom_node *node)
 {
 	_dom_node_detach_range(node, node);
 }
@@ -1666,7 +1659,7 @@ inline void _dom_node_detach(struct dom_node *node)
  *
  * The range is assumed to be a linked list of sibling nodes.
  */
-inline void _dom_node_attach_range(struct dom_node *first, 
+void _dom_node_attach_range(struct dom_node *first, 
 		struct dom_node *last,
 		struct dom_node *parent, 
 		struct dom_node *previous, 
@@ -1697,7 +1690,7 @@ inline void _dom_node_attach_range(struct dom_node *first,
  *
  * The range is assumed to be a linked list of sibling nodes.
  */
-inline void _dom_node_detach_range(struct dom_node *first, 
+void _dom_node_detach_range(struct dom_node *first, 
 		struct dom_node *last)
 {
 	if (first->previous != NULL)
@@ -1727,7 +1720,7 @@ inline void _dom_node_detach_range(struct dom_node *first,
  * we want to perform any special replacement-related behaviour 
  * at a later date.
  */
-inline void _dom_node_replace(struct dom_node *old,
+void _dom_node_replace(struct dom_node *old,
 		struct dom_node *replacement)
 {
 	struct dom_node *first, *last;
diff --git a/src/core/string.c b/src/core/string.c
index 8ec44aa..2540e26 100644
--- a/src/core/string.c
+++ b/src/core/string.c
@@ -9,62 +9,37 @@
 #include <inttypes.h>
 #include <string.h>
 
+#include <parserutils/charset/utf8.h>
+
 #include <dom/core/string.h>
 
 #include "core/document.h"
 #include "utils/utils.h"
-#include "utils/utf8.h"
-#include "utils/utf16.h"
 
 /**
  * A DOM string
  *
- * DOM strings store either a pointer to allocated data, a pointer
- * to constant data or an offset into a document buffer.
- *
- * They are reference counted so freeing is performed correctly.
+ * Strings are reference counted so destruction is performed correctly.
  */
 struct dom_string {
-	enum { DOM_STRING_PTR,
-	       DOM_STRING_CONST_PTR,
-	       DOM_STRING_OFFSET,
-	       DOM_STRING_PTR_NODOC
-	} type;				/**< String type */
-
-	dom_string_charset charset;	/**< Charset of string */
-
-	union {
-		uint8_t *ptr;
-		const uint8_t *cptr;
-		uint32_t offset;
-	} data;				/**< Type-specific data */
+	uint8_t *ptr;			/**< Pointer to string data */
 
 	size_t len;			/**< Byte length of string */
 
-	union {
-		struct dom_document *doc;	/**< Owning document */
-		struct {
-			dom_alloc alloc;	/**< Memory (de)allocation
-						 * function */
-			void *pw;	/**< Client-specific data */
-		} nodoc;
-	} ctx;				/**< Allocation context */
+	dom_alloc alloc;		/**< Memory (de)allocation function */
+	void *pw;			/**< Client-specific data */
 
 	uint32_t refcnt;		/**< Reference count */
 };
 
 static struct dom_string empty_string = { 
-	.type = DOM_STRING_CONST_PTR,
-	.charset = DOM_STRING_UTF8,
-	.data.ptr = NULL,
+	.ptr = NULL,
 	.len = 0,
-	.ctx.doc = NULL,
+	.alloc = NULL,
+	.pw = NULL,
 	.refcnt = 1
 };
 
-static dom_exception __dom_string_get_data(struct dom_string *str,
-		const uint8_t **data, size_t *len);
-
 /**
  * Claim a reference on a DOM string
  *
@@ -86,155 +61,18 @@ void dom_string_ref(struct dom_string *str)
 void dom_string_unref(struct dom_string *str)
 {
 	if (--str->refcnt == 0) {
-		if (str->type == DOM_STRING_PTR_NODOC) {
-			str->ctx.nodoc.alloc(str->data.ptr, 0,
-					str->ctx.nodoc.pw);
-
-			str->ctx.nodoc.alloc(str, 0, str->ctx.nodoc.pw);
-		} else {
-			if (str->type == DOM_STRING_PTR) {
-				dom_document_alloc(str->ctx.doc,
-						str->data.ptr, 0);
-			}
-
-			dom_document_alloc(str->ctx.doc, str, 0);
+		if (str->alloc != NULL) {
+			str->alloc(str->ptr, 0, str->pw);
+			str->alloc(str, 0, str->pw);
 		}
 	}
 }
 
 /**
- * Create a DOM string from an offset into the document buffer
- *
- * \param doc  The document in which the string resides
- * \param off  Offset from start of document buffer
- * \param len  Length, in bytes, of string
- * \param str  Pointer to location to receive pointer to new string
- * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
- *
- * The returned string will already be referenced, so there is no need
- * to explicitly reference it.
- */
-dom_exception dom_string_create_from_off(struct dom_document *doc,
-		uint32_t off, size_t len, struct dom_string **str)
-{
-	struct dom_string *ret;
-
-	ret = dom_document_alloc(doc, NULL, sizeof(struct dom_string));
-	if (ret == NULL)
-		return DOM_NO_MEM_ERR;
-
-	ret->type = DOM_STRING_OFFSET;
-
-	ret->charset = dom_document_get_charset(doc);
-
-	ret->data.offset = off;
-
-	ret->len = len;
-
-	ret->ctx.doc = doc;
-
-	ret->refcnt = 1;
-
-	*str = ret;
-
-	return DOM_NO_ERR;
-}
-
-/**
  * Create a DOM string from a string of characters
  *
- * \param doc  The document in which the string resides
- * \param ptr  Pointer to string of characters
- * \param len  Length, in bytes, of string of characters
- * \param str  Pointer to location to receive pointer to new string
- * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
- *
- * The returned string will already be referenced, so there is no need
- * to explicitly reference it.
- *
- * The string of characters passed in will be copied for use by the
- * returned DOM string.
- */
-dom_exception dom_string_create_from_ptr(struct dom_document *doc,
-		const uint8_t *ptr, size_t len, struct dom_string **str)
-{
-	struct dom_string *ret;
-
-	ret = dom_document_alloc(doc, NULL, sizeof(struct dom_string));
-	if (ret == NULL)
-		return DOM_NO_MEM_ERR;
-
-	ret->data.ptr = dom_document_alloc(doc, NULL, len);
-	if (ret->data.ptr == NULL) {
-		dom_document_alloc(doc, ret, 0);
-		return DOM_NO_MEM_ERR;
-	}
-
-	ret->type = DOM_STRING_PTR;
-
-	ret->charset = dom_document_get_charset(doc);
-
-	memcpy(ret->data.ptr, ptr, len);
-
-	ret->len = len;
-
-	ret->ctx.doc = doc;
-
-	ret->refcnt = 1;
-
-	*str = ret;
-
-	return DOM_NO_ERR;
-}
-
-/**
- * Create a DOM string from a constant string of characters
- *
- * \param doc  The document in which the string resides
- * \param ptr  Pointer to string of characters
- * \param len  Length, in bytes, of string of characters
- * \param str  Pointer to location to receive pointer to new string
- * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
- *
- * The returned string will already be referenced, so there is no need
- * to explicitly reference it.
- *
- * The string of characters passed in will _not_ be copied for use by the
- * returned DOM string.
- */
-dom_exception dom_string_create_from_const_ptr(struct dom_document *doc,
-		const uint8_t *ptr, size_t len, struct dom_string **str)
-{
-	struct dom_string *ret;
-
-	ret = dom_document_alloc(doc, NULL, sizeof(struct dom_string));
-	if (ret == NULL)
-		return DOM_NO_MEM_ERR;
-
-	ret->type = DOM_STRING_CONST_PTR;
-
-	ret->charset = dom_document_get_charset(doc);
-
-	ret->data.cptr = ptr;
-
-	ret->len = len;
-
-	ret->ctx.doc = doc;
-
-	ret->refcnt = 1;
-
-	*str = ret;
-
-	return DOM_NO_ERR;
-}
-
-/**
- * Create a DOM string from a string of characters that does not belong
- * to a document
- *
  * \param alloc    Memory (de)allocation function
  * \param pw       Pointer to client-specific private data
- * \param charset  The charset of the string
  * \param ptr      Pointer to string of characters
  * \param len      Length, in bytes, of string of characters
  * \param str      Pointer to location to receive result
@@ -243,12 +81,11 @@ dom_exception dom_string_create_from_const_ptr(struct dom_document *doc,
  * The returned string will already be referenced, so there is no need
  * to explicitly reference it.
  *
- * The string of characters passed in will be copied for use by the
+ * The string of characters passed in will be copied for use by the 
  * returned DOM string.
  */
-dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw,
-		dom_string_charset charset, const uint8_t *ptr, size_t len, 
-		struct dom_string **str)
+dom_exception dom_string_create(dom_alloc alloc, void *pw,
+		const uint8_t *ptr, size_t len, struct dom_string **str)
 {
 	struct dom_string *ret;
 
@@ -256,22 +93,18 @@ dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw,
 	if (ret == NULL)
 		return DOM_NO_MEM_ERR;
 
-	ret->data.ptr = alloc(NULL, len, pw);
-	if (ret->data.ptr == NULL) {
+	ret->ptr = alloc(NULL, len, pw);
+	if (ret->ptr == NULL) {
 		alloc(ret, 0, pw);
 		return DOM_NO_MEM_ERR;
 	}
 
-	ret->type = DOM_STRING_PTR_NODOC;
-
-	ret->charset = charset;
-
-	memcpy(ret->data.ptr, ptr, len);
+	memcpy(ret->ptr, ptr, len);
 
 	ret->len = len;
 
-	ret->ctx.nodoc.alloc = alloc;
-	ret->ctx.nodoc.pw = pw;
+	ret->alloc = alloc;
+	ret->pw = pw;
 
 	ret->refcnt = 1;
 
@@ -291,48 +124,16 @@ dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw,
  */
 int dom_string_cmp(struct dom_string *s1, struct dom_string *s2)
 {
-	const uint8_t *d1 = NULL;
-	const uint8_t *d2 = NULL;
-	size_t l1, l2;
-	dom_exception err;
-
-	err = __dom_string_get_data(s1, &d1, &l1);
-	if (err != DOM_NO_ERR)
-		return 1; /* arbitrary */
+	if (s1 == NULL)
+		s1 = &empty_string;
 
-	err = __dom_string_get_data(s2, &d2, &l2);
-	if (err != DOM_NO_ERR)
-		return 1; /* arbitrary */
+	if (s2 == NULL)
+		s2 = &empty_string;
 
-	while (l1 > 0 && l2 > 0) {
-		uint32_t c1, c2;
-		size_t cl1, cl2;
-		charset_error err;
-
-		err = (s1->charset == DOM_STRING_UTF8) 
-				? _dom_utf8_to_ucs4(d1, l1, &c1, &cl1) 
-				: _dom_utf16_to_ucs4(d1, l1, &c1, &cl1);
-		if (err != CHARSET_OK) {
-		}
+	if (s1->len != s2->len)
+		return 1;
 
-		err = (s2->charset == DOM_STRING_UTF8)
-				? _dom_utf8_to_ucs4(d2, l2, &c2, &cl2)
-				: _dom_utf16_to_ucs4(d2, l2, &c2, &cl2);
-		if (err != CHARSET_OK) {
-		}
-
-		if (c1 != c2) {
-			return (int)(c1 - c2);
-		}
-
-		d1 += cl1;
-		d2 += cl2;
-
-		l1 -= cl1;
-		l2 -= cl2;
-	}
-
-	return (int)(l1 - l2);
+	return memcmp(s1->ptr, s2->ptr, s1->len);
 }
 
 /**
@@ -349,31 +150,28 @@ int dom_string_icmp(struct dom_string *s1, struct dom_string *s2)
 	const uint8_t *d1 = NULL;
 	const uint8_t *d2 = NULL;
 	size_t l1, l2;
-	dom_exception err;
 
-	err = __dom_string_get_data(s1, &d1, &l1);
-	if (err != DOM_NO_ERR)
-		return 1; /* arbitrary */
+	if (s1 == NULL)
+		s1 = &empty_string;
+	if (s2 == NULL)
+		s2 = &empty_string;
 
-	err = __dom_string_get_data(s2, &d2, &l2);
-	if (err != DOM_NO_ERR)
-		return 1; /* arbitrary */
+	d1 = s1->ptr;
+	d2 = s2->ptr;
+	l1 = s1->len;
+	l2 = s2->len;
 
 	while (l1 > 0 && l2 > 0) {
 		uint32_t c1, c2;
 		size_t cl1, cl2;
-		charset_error err;
+		parserutils_error err;
 
-		err = (s1->charset == DOM_STRING_UTF8) 
-				? _dom_utf8_to_ucs4(d1, l1, &c1, &cl1) 
-				: _dom_utf16_to_ucs4(d1, l1, &c1, &cl1);
-		if (err != CHARSET_OK) {
+		err = parserutils_charset_utf8_to_ucs4(d1, l1, &c1, &cl1); 
+		if (err != PARSERUTILS_OK) {
 		}
 
-		err = (s2->charset == DOM_STRING_UTF8)
-				? _dom_utf8_to_ucs4(d2, l2, &c2, &cl2)
-				: _dom_utf16_to_ucs4(d2, l2, &c2, &cl2);
-		if (err != CHARSET_OK) {
+		err = parserutils_charset_utf8_to_ucs4(d2, l2, &c2, &cl2);
+		if (err != PARSERUTILS_OK) {
 		}
 
 		/** \todo improved lower-casing algorithm */
@@ -403,20 +201,19 @@ uint32_t dom_string_index(struct dom_string *str, uint32_t chr)
 	const uint8_t *s;
 	size_t clen, slen;
 	uint32_t c, index;
-	charset_error err;
+	parserutils_error err;
 
-	__dom_string_get_data(str, &s, &slen);
+	if (str == NULL)
+		str = &empty_string;
+
+	s = str->ptr;
+	slen = str->len;
 
 	index = 0;
 
 	while (slen > 0) {
-		if (str->charset == DOM_STRING_UTF8) {
-			err = _dom_utf8_to_ucs4(s, slen, &c, &clen);
-		} else {
-			err = _dom_utf16_to_ucs4(s, slen, &c, &clen);
-		}
-
-		if (err != CHARSET_OK) {
+		err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
+		if (err != PARSERUTILS_OK) {
 			return (uint32_t) -1;
 		}
 
@@ -444,28 +241,25 @@ uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr)
 	const uint8_t *s;
 	size_t clen, slen;
 	uint32_t c, index;
-	charset_error err;
+	parserutils_error err;
+
+	if (str == NULL)
+		str = &empty_string;
 
-	__dom_string_get_data(str, &s, &slen);
+	s = str->ptr;
+	slen = str->len;
 
 	index = dom_string_length(str);
 
 	while (slen > 0) {
-		if (str->charset == DOM_STRING_UTF8) {
-			err = _dom_utf8_prev(s, slen, &clen);
-			if (err == CHARSET_OK) {
-				err = _dom_utf8_to_ucs4(s + clen, slen - clen, 
-						&c, &clen);
-			}
-		} else {
-			err = _dom_utf16_prev(s, slen, &clen);
-			if (err == CHARSET_OK) {
-				err = _dom_utf16_to_ucs4(s + clen, slen - clen,
-						&c, &clen);
-			}
+		err = parserutils_charset_utf8_prev(s, slen, 
+				(uint32_t *) &clen);
+		if (err == PARSERUTILS_OK) {
+			err = parserutils_charset_utf8_to_ucs4(s + clen, 
+					slen - clen, &c, &clen);
 		}
 
-		if (err != CHARSET_OK) {
+		if (err != PARSERUTILS_OK) {
 			return (uint32_t) -1;
 		}
 
@@ -478,7 +272,6 @@ uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr)
 	}
 
 	return (uint32_t) -1;
-
 }
 
 /**
@@ -489,20 +282,14 @@ uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr)
  */
 uint32_t dom_string_length(struct dom_string *str)
 {
-	const uint8_t *s;
-	size_t slen;
-	uint32_t clen;
-	charset_error err;
-
-	__dom_string_get_data(str, &s, &slen);
+	size_t clen;
+	parserutils_error err;
 
-	if (str->charset == DOM_STRING_UTF8) {
-		err = _dom_utf8_length(s, slen, &clen);
-	} else {
-		err = _dom_utf16_length(s, slen, &clen);
-	}
+	if (str == NULL)
+		str = &empty_string;
 
-	if (err != CHARSET_OK) {
+	err = parserutils_charset_utf8_length(str->ptr, str->len, &clen);
+	if (err != PARSERUTILS_OK) {
 		return 0;
 	}
 
@@ -527,60 +314,28 @@ dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2,
 		struct dom_string **result)
 {
 	struct dom_string *concat;
-	const uint8_t *s;
-	size_t slen;
 
-	if (s1->type == DOM_STRING_PTR_NODOC) {
-		concat = s1->ctx.nodoc.alloc(NULL, 
-				sizeof(struct dom_string), s1->ctx.nodoc.pw);
-	} else {
-		concat = dom_document_alloc(s1->ctx.doc, 
-				NULL, sizeof(struct dom_string));
-	}
+	concat = s1->alloc(NULL, sizeof(struct dom_string), s1->pw);
 
 	if (concat == NULL) {
 		return DOM_NO_MEM_ERR;
 	}
 
-	/** \todo support attempted concatenation of mismatched charsets */
+	concat->ptr = s1->alloc(NULL, s1->len + s2->len, s1->pw);
+	if (concat->ptr == NULL) {
+		s1->alloc(concat, 0, s1->pw);
 
-	if (s1->type == DOM_STRING_PTR_NODOC) {
-		concat->data.ptr = s1->ctx.nodoc.alloc(NULL, 
-				s1->len + s2->len, s1->ctx.nodoc.pw);
-	} else {
-		concat->data.ptr = dom_document_alloc(s1->ctx.doc, 
-				NULL, s1->len + s2->len);
-	}
-	if (concat->data.ptr == NULL) {
-		if (s1->type == DOM_STRING_PTR_NODOC) {
-			s1->ctx.nodoc.alloc(concat, 0, s1->ctx.nodoc.pw);
-		} else {
-			dom_document_alloc(s1->ctx.doc, concat, 0);
-		}
 		return DOM_NO_MEM_ERR;
 	}
 
-	concat->type = (s1->type == DOM_STRING_PTR_NODOC) 
-			? DOM_STRING_PTR_NODOC : DOM_STRING_PTR;
-
-	concat->charset = s1->charset;
-
-	__dom_string_get_data(s1, &s, &slen);
+	memcpy(concat->ptr, s1->ptr, s1->len);
 
-	memcpy(concat->data.ptr, s, slen);
-
-	__dom_string_get_data(s2, &s, &slen);
-
-	memcpy(concat->data.ptr + s1->len, s, slen);
+	memcpy(concat->ptr + s1->len, s2->ptr, s2->len);
 
 	concat->len = s1->len + s2->len;
 
-	if (concat->type == DOM_STRING_PTR_NODOC) {
-		concat->ctx.nodoc.alloc = s1->ctx.nodoc.alloc;
-		concat->ctx.nodoc.pw = s1->ctx.nodoc.pw;
-	} else {
-		concat->ctx.doc = s1->ctx.doc;
-	}
+	concat->alloc = s1->alloc;
+	concat->pw = s1->pw;
 
 	concat->refcnt = 1;
 
@@ -607,12 +362,10 @@ dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2,
 dom_exception dom_string_substr(struct dom_string *str, 
 		uint32_t i1, uint32_t i2, struct dom_string **result)
 {
-	const uint8_t *s;
-	size_t slen;
+	const uint8_t *s = str->ptr;
+	size_t slen = str->len;
 	size_t b1, b2;
-	charset_error err;
-
-	__dom_string_get_data(str, &s, &slen);
+	parserutils_error err;
 
 	/* Initialise the byte index of the start to 0 */
 	b1 = 0;
@@ -621,13 +374,9 @@ dom_exception dom_string_substr(struct dom_string *str,
 
 	/* Calculate the byte index of the start */
 	while (i1 > 0) {
-		if (str->charset == DOM_STRING_UTF8) {
-			err = _dom_utf8_next(s, slen - b1, b1, &b1);
-		} else {
-			err = _dom_utf16_next(s, slen - b1, b1, &b1);
-		}
-
-		if (err != CHARSET_OK) {
+		err = parserutils_charset_utf8_next(s, slen - b1, b1, 
+				(uint32_t *) &b1);
+		if (err != PARSERUTILS_OK) {
 			return DOM_NO_MEM_ERR;
 		}
 
@@ -639,13 +388,10 @@ dom_exception dom_string_substr(struct dom_string *str,
 
 	/* Calculate the byte index of the end */
 	while (i2 > 0) {
-		if (str->charset == DOM_STRING_UTF8) {
-			err = _dom_utf8_next(s, slen - b2, b2, &b2);
-		} else {
-			err = _dom_utf16_next(s, slen - b2, b2, &b2);
-		}
+		err = parserutils_charset_utf8_next(s, slen - b2, b2, 
+				(uint32_t *) &b2);
 
-		if (err != CHARSET_OK) {
+		if (err != PARSERUTILS_OK) {
 			return DOM_NO_MEM_ERR;
 		}
 
@@ -653,14 +399,7 @@ dom_exception dom_string_substr(struct dom_string *str,
 	}
 
 	/* Create a string from the specified byte range */
-	return (str->type == DOM_STRING_PTR_NODOC)
-			? dom_string_create_from_ptr_no_doc(
-					str->ctx.nodoc.alloc,
-					str->ctx.nodoc.pw,
-					str->charset, 
-					s + b1, b2 - b1, result)
-			: dom_string_create_from_ptr(str->ctx.doc,
-					s + b1, b2 - b1, result);
+	return dom_string_create(str->alloc, str->pw, s + b1, b2 - b1, result);
 }
 
 /**
@@ -688,11 +427,12 @@ dom_exception dom_string_insert(struct dom_string *target,
 	const uint8_t *t, *s;
 	uint32_t tlen, slen, clen;
 	uint32_t ins = 0;
-	charset_error err;
-
-	__dom_string_get_data(target, &t, &tlen);
+	parserutils_error err;
 
-	__dom_string_get_data(source, &s, &slen);
+	t = target->ptr;
+	tlen = target->len;
+	s = source->ptr;
+	slen = source->len;
 
 	clen = dom_string_length(target);
 
@@ -706,13 +446,10 @@ dom_exception dom_string_insert(struct dom_string *target,
 		ins = tlen;
 	} else {
 		while (offset > 0) {
-			if (target->charset == DOM_STRING_UTF8) {
-				err = _dom_utf8_next(t, tlen - ins, ins, &ins);
-			} else {
-				err = _dom_utf16_next(t, tlen - ins, ins, &ins);
-			}
+			err = parserutils_charset_utf8_next(t, tlen - ins, 
+					ins, &ins);
 
-			if (err != CHARSET_OK) {
+			if (err != PARSERUTILS_OK) {
 				return DOM_NO_MEM_ERR;
 			}
 
@@ -721,65 +458,36 @@ dom_exception dom_string_insert(struct dom_string *target,
 	}
 
 	/* Allocate result string */
-	if (target->type == DOM_STRING_PTR_NODOC) {
-		res = target->ctx.nodoc.alloc(NULL, sizeof(struct dom_string), 
-				target->ctx.nodoc.pw);
-	} else {
-		res = dom_document_alloc(target->ctx.doc, 
-				NULL, sizeof(struct dom_string));
-	}
-
+	res = target->alloc(NULL, sizeof(struct dom_string), target->pw);
 	if (res == NULL) {
 		return DOM_NO_MEM_ERR;
 	}
 
-	/** \todo support insertion of a string from a different charset  */
-
 	/* Allocate data buffer for result contents */
-	if (target->type == DOM_STRING_PTR_NODOC) {
-		res->data.ptr = target->ctx.nodoc.alloc(NULL, 
-				tlen + slen, target->ctx.nodoc.pw);
-	} else {
-		res->data.ptr = dom_document_alloc(target->ctx.doc, 
-				NULL, tlen + slen);
-	}
-	if (res->data.ptr == NULL) {
-		if (target->type == DOM_STRING_PTR_NODOC) {
-			target->ctx.nodoc.alloc(res, 0, target->ctx.nodoc.pw);
-		} else {
-			dom_document_alloc(target->ctx.doc, res, 0);
-		}
+	res->ptr = target->alloc(NULL, tlen + slen, target->pw);
+	if (res->ptr == NULL) {
+		target->alloc(res, 0, target->pw);
 		return DOM_NO_MEM_ERR;
 	}
 
-	/* Populate result members */
-	res->type = (target->type == DOM_STRING_PTR_NODOC) 
-			? DOM_STRING_PTR_NODOC : DOM_STRING_PTR;
-
-	res->charset = target->charset;
-
 	/* Copy initial portion of target, if any, into result */
 	if (ins > 0) {
-		memcpy(res->data.ptr, t, ins);
+		memcpy(res->ptr, t, ins);
 	}
 
 	/* Copy inserted data into result */
-	memcpy(res->data.ptr + ins, s, slen);
+	memcpy(res->ptr + ins, s, slen);
 
 	/* Copy remainder of target, if any, into result */
 	if (tlen - ins > 0) {
-		memcpy(res->data.ptr + ins + slen, t + ins, tlen - ins);
+		memcpy(res->ptr + ins + slen, t + ins, tlen - ins);
 	}
 
 	res->len = tlen + slen;
 
-	if (res->type == DOM_STRING_PTR_NODOC) {
-		res->ctx.nodoc.alloc = target->ctx.nodoc.alloc;
-		res->ctx.nodoc.pw = target->ctx.nodoc.pw;
-	} else {
-		res->ctx.doc = target->ctx.doc;
-	}
-
+	res->alloc = target->alloc;
+	res->pw = target->pw;
+	
 	res->refcnt = 1;
 
 	*result = res;
@@ -811,11 +519,12 @@ dom_exception dom_string_replace(struct dom_string *target,
 	const uint8_t *t, *s;
 	uint32_t tlen, slen;
 	uint32_t b1, b2;
-	charset_error err;
-
-	__dom_string_get_data(target, &t, &tlen);
+	parserutils_error err;
 
-	__dom_string_get_data(source, &s, &slen);
+	t = target->ptr;
+	tlen = target->len;
+	s = source->ptr;
+	slen = source->len;
 
 	/* Initialise the byte index of the start to 0 */
 	b1 = 0;
@@ -824,13 +533,9 @@ dom_exception dom_string_replace(struct dom_string *target,
 
 	/* Calculate the byte index of the start */
 	while (i1 > 0) {
-		if (target->charset == DOM_STRING_UTF8) {
-			err = _dom_utf8_next(s, slen - b1, b1, &b1);
-		} else {
-			err = _dom_utf16_next(s, slen - b1, b1, &b1);
-		}
+		err = parserutils_charset_utf8_next(s, slen - b1, b1, &b1);
 
-		if (err != CHARSET_OK) {
+		if (err != PARSERUTILS_OK) {
 			return DOM_NO_MEM_ERR;
 		}
 
@@ -842,13 +547,9 @@ dom_exception dom_string_replace(struct dom_string *target,
 
 	/* Calculate the byte index of the end */
 	while (i2 > 0) {
-		if (target->charset == DOM_STRING_UTF8) {
-			err = _dom_utf8_next(s, slen - b2, b2, &b2);
-		} else {
-			err = _dom_utf16_next(s, slen - b2, b2, &b2);
-		}
+		err = parserutils_charset_utf8_next(s, slen - b2, b2, &b2);
 
-		if (err != CHARSET_OK) {
+		if (err != PARSERUTILS_OK) {
 			return DOM_NO_MEM_ERR;
 		}
 
@@ -856,66 +557,38 @@ dom_exception dom_string_replace(struct dom_string *target,
 	}
 
 	/* Allocate result string */
-	if (target->type == DOM_STRING_PTR_NODOC) {
-		res = target->ctx.nodoc.alloc(NULL, sizeof(struct dom_string), 
-				target->ctx.nodoc.pw);
-	} else {
-		res = dom_document_alloc(target->ctx.doc, 
-				NULL, sizeof(struct dom_string));
-	}
+	res = target->alloc(NULL, sizeof(struct dom_string), target->pw);
 
 	if (res == NULL) {
 		return DOM_NO_MEM_ERR;
 	}
 
-	/** \todo support insertion of a string from a different charset  */
-
 	/* Allocate data buffer for result contents */
-	if (target->type == DOM_STRING_PTR_NODOC) {
-		res->data.ptr = target->ctx.nodoc.alloc(NULL, 
-				tlen + slen - (b2 - b1), target->ctx.nodoc.pw);
-	} else {
-		res->data.ptr = dom_document_alloc(target->ctx.doc, 
-				NULL, tlen + slen - (b2 - b1));
-	}
-	if (res->data.ptr == NULL) {
-		if (target->type == DOM_STRING_PTR_NODOC) {
-			target->ctx.nodoc.alloc(res, 0, target->ctx.nodoc.pw);
-		} else {
-			dom_document_alloc(target->ctx.doc, res, 0);
-		}
+	res->ptr = target->alloc(NULL, tlen + slen - (b2 - b1), target->pw);
+	if (res->ptr == NULL) {
+		target->alloc(res, 0, target->pw);
 		return DOM_NO_MEM_ERR;
 	}
 
-	/* Populate result members */
-	res->type = (target->type == DOM_STRING_PTR_NODOC) 
-			? DOM_STRING_PTR_NODOC : DOM_STRING_PTR;
-
-	res->charset = target->charset;
-
 	/* Copy initial portion of target, if any, into result */
 	if (b1 > 0) {
-		memcpy(res->data.ptr, t, b1);
+		memcpy(res->ptr, t, b1);
 	}
 
 	/* Copy replacement data into result */
 	if (slen > 0) {
-		memcpy(res->data.ptr + b1, s, slen);
+		memcpy(res->ptr + b1, s, slen);
 	}
 
 	/* Copy remainder of target, if any, into result */
 	if (tlen - b2 > 0) {
-		memcpy(res->data.ptr + b1 + slen, t + b2, tlen - b2);
+		memcpy(res->ptr + b1 + slen, t + b2, tlen - b2);
 	}
 
 	res->len = tlen + slen - (b2 - b1);
 
-	if (res->type == DOM_STRING_PTR_NODOC) {
-		res->ctx.nodoc.alloc = target->ctx.nodoc.alloc;
-		res->ctx.nodoc.pw = target->ctx.nodoc.pw;
-	} else {
-		res->ctx.doc = target->ctx.doc;
-	}
+	res->alloc = target->alloc;
+	res->pw = target->pw;
 
 	res->refcnt = 1;
 
@@ -940,19 +613,8 @@ dom_exception dom_string_replace(struct dom_string *target,
 dom_exception dom_string_dup(struct dom_string *str, 
 		struct dom_string **result)
 {
-	const uint8_t *s;
-	size_t slen;
-
-	__dom_string_get_data(str, &s, &slen);
-
-	return str->type == DOM_STRING_PTR_NODOC 
-			? dom_string_create_from_ptr_no_doc(
-				str->ctx.nodoc.alloc,
-				str->ctx.nodoc.pw,
-				str->charset,
-				s, slen, result) 
-			: dom_string_create_from_ptr(str->ctx.doc,
-					s, slen, result);
+	return dom_string_create(str->alloc, str->pw, str->ptr, str->len, 
+			result);
 }
 
 /**
@@ -963,12 +625,10 @@ dom_exception dom_string_dup(struct dom_string *str,
  */
 uint32_t dom_string_hash(struct dom_string *str)
 {
-	const uint8_t *s;
-	size_t slen;
+	const uint8_t *s = str->ptr;
+	size_t slen = str->len;
 	uint32_t hash = 0x01000193;
 
-	__dom_string_get_data(str, &s, &slen);
-
 	while (slen > 0) {
 		hash *= 0x01000193;
 		hash ^= *s;
@@ -980,47 +640,3 @@ uint32_t dom_string_hash(struct dom_string *str)
 	return hash;
 }
 
-/*                                                                           */
-/*---------------------------------------------------------------------------*/
-/*                                                                           */
-
-/**
- * Get a pointer to the string of characters within a DOM string
- *
- * \param str   Pointer to DOM string to retrieve pointer from
- * \param data  Pointer to location to receive data
- * \param len   Pointer to location to receive byte length of data
- * \return DOM_NO_ERR on success
- *
- * The caller must have previously claimed a reference on the DOM string.
- * The returned pointer must not be freed.
- */
-dom_exception __dom_string_get_data(struct dom_string *str,
-		const uint8_t **data, size_t *len)
-{
-	/* Assume that a NULL str pointer indicates the empty string */
-	if (str == NULL)
-		str = &empty_string;
-
-	switch (str->type) {
-	case DOM_STRING_PTR:
-		*data = str->data.ptr;
-		break;
-	case DOM_STRING_CONST_PTR:
-		*data = str->data.cptr;
-		break;
-	case DOM_STRING_OFFSET:
-		*data = dom_document_get_base(str->ctx.doc) +
-				str->data.offset;
-		break;
-	case DOM_STRING_PTR_NODOC:
-		*data = str->data.ptr;
-		break;
-	}
-
-	*len = str->len;
-
-	return DOM_NO_ERR;
-}
-
-
author	John Mark Bell <jmb@netsurf-browser.org>	2009-03-03 18:08:01 +0000
committer	John Mark Bell <jmb@netsurf-browser.org>	2009-03-03 18:08:01 +0000
commit	702d96e703473dbe4481a42c472b4aae423a51d1 (patch)
tree	9dc767860ebea940f1d936d14d69073b4e289c92 /src/core
parent	eeb651eadb47228ad41c21b80d75afc17c2924f8 (diff)
download	libdom-702d96e703473dbe4481a42c472b4aae423a51d1.tar.gz libdom-702d96e703473dbe4481a42c472b4aae423a51d1.tar.bz2