summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2007-10-03 23:44:32 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2007-10-03 23:44:32 +0000
commitf3c02943d778e9b00064bf0e103aaecb06ab5e01 (patch)
tree96549872654851bdb23feafbae373060a81b10b5
parentf8897a9655ac97471d34f7a4071a721e2ee46560 (diff)
downloadlibdom-f3c02943d778e9b00064bf0e103aaecb06ab5e01.tar.gz
libdom-f3c02943d778e9b00064bf0e103aaecb06ab5e01.tar.bz2
Make the dom string class more useful.
Purge all trace of dom_string_get_data() from outside the dom string implementation. Port affected code to new, more useful, APIs. This also fixes the interned node name strings mentioned in the previous commit. svn path=/trunk/dom/; revision=3621
-rw-r--r--include/dom/core/string.h26
-rw-r--r--src/bootstrap/init_fini.c25
-rw-r--r--src/core/attr.c104
-rw-r--r--src/core/document.c184
-rw-r--r--src/core/document.h5
-rw-r--r--src/core/element.c2
-rw-r--r--src/core/node.c48
-rw-r--r--src/core/string.c422
-rw-r--r--src/utils/namespace.c138
-rw-r--r--src/utils/namespace.h3
10 files changed, 675 insertions, 282 deletions
diff --git a/include/dom/core/string.h b/include/dom/core/string.h
index 935e2c2..584db00 100644
--- a/include/dom/core/string.h
+++ b/include/dom/core/string.h
@@ -42,14 +42,32 @@ dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw,
dom_string_charset charset, const uint8_t *ptr, size_t len,
struct dom_string **str);
-/* Get a pointer to the string of characters within a DOM string */
-dom_exception dom_string_get_data(struct dom_string *str,
- const uint8_t **data, size_t *len);
-
/* Case sensitively compare two DOM strings */
int dom_string_cmp(struct dom_string *s1, struct dom_string *s2);
/* Case insensitively compare two DOM strings */
int dom_string_icmp(struct dom_string *s1, struct dom_string *s2);
+/* Get the index of the first occurrence of a character in a dom string */
+uint32_t dom_string_index(struct dom_string *str, uint32_t chr);
+/* Get the index of the last occurrence of a character in a dom string */
+uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr);
+
+/* Get the length, in characters, of a dom string */
+uint32_t dom_string_length(struct dom_string *str);
+
+/* Concatenate two dom strings */
+dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2,
+ struct dom_string **result);
+
+/* Extract a substring from a dom string */
+dom_exception dom_string_substr(struct dom_string *str,
+ uint32_t i1, uint32_t i2, struct dom_string **result);
+
+/* Duplicate a dom string */
+dom_exception dom_string_dup(struct dom_string *str,
+ struct dom_string **result);
+
+/* Calculate a hash value from a dom string */
+uint32_t dom_string_hash(struct dom_string *str);
#endif
diff --git a/src/bootstrap/init_fini.c b/src/bootstrap/init_fini.c
index 001e798..a5a62a1 100644
--- a/src/bootstrap/init_fini.c
+++ b/src/bootstrap/init_fini.c
@@ -9,6 +9,7 @@
#include <dom/bootstrap/init_fini.h>
+#include "core/document.h"
#include "utils/namespace.h"
static bool __initialised;
@@ -31,13 +32,19 @@ dom_exception dom_initialise(dom_alloc alloc, void *pw)
return DOM_NO_ERR;
}
- err = _dom_namespace_initialise(alloc, pw);
+ err = _dom_document_initialise(alloc, pw);
+ if (err != DOM_NO_ERR) {
+ return err;
+ }
- if (err == DOM_NO_ERR) {
- __initialised = true;
+ err = _dom_namespace_initialise(alloc, pw);
+ if (err != DOM_NO_ERR) {
+ return err;
}
- return err;
+ __initialised = true;
+
+ return DOM_NO_ERR;
}
/**
@@ -57,9 +64,17 @@ dom_exception dom_finalise(void)
}
err = _dom_namespace_finalise();
+ if (err != DOM_NO_ERR) {
+ return err;
+ }
+
+ err = _dom_document_finalise();
+ if (err != DOM_NO_ERR) {
+ return err;
+ }
__initialised = false;
- return err;
+ return DOM_NO_ERR;
}
diff --git a/src/core/attr.c b/src/core/attr.c
index 232f7ba..a82f117 100644
--- a/src/core/attr.c
+++ b/src/core/attr.c
@@ -177,120 +177,62 @@ dom_exception dom_attr_get_value(struct dom_attr *attr,
{
struct dom_node *a = (struct dom_node *) attr;
struct dom_node *c;
- uint8_t *rep;
- size_t rep_len;
- size_t rep_alloc;
+ struct dom_string *value, *temp;
dom_exception err;
-#define CHUNK 128
-
- rep = dom_document_alloc(a->owner, NULL, CHUNK);
- if (rep == NULL)
- return DOM_NO_MEM_ERR;
-
- rep_len = 0;
- rep_alloc = CHUNK;
+ err = dom_string_create_from_const_ptr(a->owner,
+ (const uint8_t *) "", SLEN(""), &value);
+ if (err != DOM_NO_ERR) {
+ return err;
+ }
/* Traverse children, building a string representation as we go */
for (c = a->first_child; c != NULL; c = c->next) {
if (c->type == DOM_TEXT_NODE && c->value != NULL) {
- const uint8_t *data;
- size_t len;
-
- err = dom_string_get_data(c->value, &data, &len);
+ /* Append to existing value */
+ err = dom_string_concat(value, c->value, &temp);
if (err != DOM_NO_ERR) {
- dom_document_alloc(a->owner, rep, 0);
+ dom_string_unref(value);
return err;
}
- /* Extend buffer, if necessary */
- if (rep_len + len >= rep_alloc) {
- uint8_t *temp;
- size_t required = (rep_len + len) - rep_alloc;
-
- /* Round required up to a chunk boundary */
- required =
- (required + CHUNK - 1) & ~(CHUNK - 1);
-
- temp = dom_document_alloc(a->owner, rep,
- rep_alloc + required);
- if (temp == NULL) {
- dom_document_alloc(a->owner, rep, 0);
- return DOM_NO_MEM_ERR;
- }
-
- rep = temp;
- rep_alloc += required;
- }
-
- /* Copy text into buffer */
- memcpy(rep + rep_len, data, len);
+ /* Finished with previous value */
+ dom_string_unref(value);
- /* And fix up length information */
- rep_len += len;
+ /* Claim new value */
+ value = temp;
} else if (c->type == DOM_ENTITY_REFERENCE_NODE) {
struct dom_string *tr;
- const uint8_t *data;
- size_t len;
/* Get textual representation of entity */
err = dom_entity_reference_get_textual_representation(
(struct dom_entity_reference *) c,
&tr);
if (err != DOM_NO_ERR) {
- dom_document_alloc(a->owner, rep, 0);
+ dom_string_unref(value);
return err;
}
- err = dom_string_get_data(tr, &data, &len);
+ /* Append to existing value */
+ err = dom_string_concat(value, tr, &temp);
if (err != DOM_NO_ERR) {
dom_string_unref(tr);
- dom_document_alloc(a->owner, rep, 0);
+ dom_string_unref(value);
return err;
}
- /* Extend buffer, if necessary */
- if (rep_len + len >= rep_alloc) {
- uint8_t *temp;
- size_t required = (rep_len + len) - rep_alloc;
-
- /* Round required up to a chunk boundary */
- required =
- (required + CHUNK - 1) & ~(CHUNK - 1);
-
- temp = dom_document_alloc(a->owner, rep,
- rep_alloc + required);
- if (temp == NULL) {
- dom_document_alloc(a->owner, rep, 0);
- return DOM_NO_MEM_ERR;
- }
-
- rep = temp;
- rep_alloc += required;
- }
-
- /* Copy text into buffer */
- memcpy(rep + rep_len, data, len);
-
- /* And fix up length information */
- rep_len += len;
-
/* No longer need textual representation */
dom_string_unref(tr);
- }
- }
-#undef CHUNK
+ /* Finished with previous value */
+ dom_string_unref(value);
- /* Create DOMString */
- err = dom_string_create_from_ptr(a->owner, rep, rep_len, result);
- if (err != DOM_NO_ERR) {
- dom_document_alloc(a->owner, rep, 0);
- return err;
+ /* Claim new value */
+ value = temp;
+ }
}
- /* Cleanup */
- dom_document_alloc(a->owner, rep, 0);
+ *result = value;
return DOM_NO_ERR;
}
diff --git a/src/core/document.c b/src/core/document.c
index e188868..42d2686 100644
--- a/src/core/document.c
+++ b/src/core/document.c
@@ -64,14 +64,131 @@ struct dom_document {
struct dom_doc_nnm *maps; /**< List of active namednodemaps */
- /** Interned node name strings, indexed by node type */
- /* Index 0 is unused */
- struct dom_string *nodenames[DOM_NODE_TYPE_COUNT + 1];
+ struct dom_string **nodenames; /**< Interned nodenames */
dom_alloc alloc; /**< Memory (de)allocation function */
void *pw; /**< Pointer to client data */
};
+/** Interned node name strings, indexed by node type */
+/* Index 0 is unused */
+static struct dom_string *__nodenames_utf8[DOM_NODE_TYPE_COUNT + 1];
+static struct dom_string *__nodenames_utf16[DOM_NODE_TYPE_COUNT + 1];
+
+/**
+ * Initialise the document module
+ *
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data
+ * \return DOM_NO_ERR on success
+ */
+dom_exception _dom_document_initialise(dom_alloc alloc, void *pw)
+{
+ static struct {
+ const char *name;
+ size_t len;
+ } names_utf8[DOM_NODE_TYPE_COUNT + 1] = {
+ { NULL, 0 }, /* Unused */
+ { NULL, 0 }, /* Element */
+ { NULL, 0 }, /* Attr */
+ { "#text", 5 }, /* Text */
+ { "#cdata-section", 14 }, /* CDATA section */
+ { NULL, 0 }, /* Entity reference */
+ { NULL, 0 }, /* Entity */
+ { NULL, 0 }, /* Processing instruction */
+ { "#comment", 8 }, /* Comment */
+ { "#document", 9 }, /* Document */
+ { NULL, 0 }, /* Document type */
+ { "#document-fragment", 18 }, /* Document fragment */
+ { NULL, 0 } /* Notation */
+ };
+
+ /** \todo This assumes Little Endian */
+ static struct {
+ const char *name;
+ size_t len;
+ } names_utf16[DOM_NODE_TYPE_COUNT + 1] = {
+ { NULL, 0 }, /* Unused */
+ { NULL, 0 }, /* Element */
+ { NULL, 0 }, /* Attr */
+ { "#\0t\0e\0x\0t\0", 10 }, /* Text */
+ { "#\0c\0d\0a\0t\0a\0-\0s\0e\0c\0t\0i\0o\0n\0", 28 }, /* CDATA section */
+ { NULL, 0 }, /* Entity reference */
+ { NULL, 0 }, /* Entity */
+ { NULL, 0 }, /* Processing instruction */
+ { "#\0c\0o\0m\0m\0e\0n\0t\0", 16 }, /* Comment */
+ { "#\0d\0o\0c\0u\0m\0e\0n\0t\0", 18 }, /* Document */
+ { NULL, 0 }, /* Document type */
+ { "#\0d\0o\0c\0u\0m\0e\0n\0t\0-\0f\0r\0a\0g\0m\0e\0n\0t\0", 36 }, /* Document fragment */
+ { NULL, 0 } /* Notation */
+ };
+
+ dom_exception err;
+
+ /* Initialise interned node names */
+ for (int i = 0; i <= DOM_NODE_TYPE_COUNT; i++) {
+ if (names_utf8[i].name == NULL) {
+ /* Nothing to intern; skip this entry */
+ __nodenames_utf8[i] = NULL;
+ __nodenames_utf16[i] = NULL;
+ continue;
+ }
+
+ /* Make string */
+ err = dom_string_create_from_ptr_no_doc(alloc, pw,
+ DOM_STRING_UTF8,
+ (const uint8_t *) names_utf8[i].name,
+ names_utf8[i].len, &__nodenames_utf8[i]);
+ if (err != DOM_NO_ERR) {
+ /* Failed, clean up strings we've created so far */
+ for (int j = 0; j < i; j++) {
+ if (__nodenames_utf8[j] != NULL) {
+ dom_string_unref(__nodenames_utf8[j]);
+ dom_string_unref(__nodenames_utf16[j]);
+ }
+ }
+ return err;
+ }
+
+ err = dom_string_create_from_ptr_no_doc(alloc, pw,
+ DOM_STRING_UTF16,
+ (const uint8_t *) names_utf16[i].name,
+ names_utf16[i].len, &__nodenames_utf16[i]);
+ if (err != DOM_NO_ERR) {
+ /* Failed, clean up strings we've created so far */
+ for (int j = 0; j < i; j++) {
+ if (__nodenames_utf8[j] != NULL) {
+ dom_string_unref(__nodenames_utf8[j]);
+ dom_string_unref(__nodenames_utf16[j]);
+ }
+ }
+
+ dom_string_unref(__nodenames_utf8[i]);
+
+ return err;
+ }
+ }
+
+ return DOM_NO_ERR;
+}
+
+/**
+ * Finalise the document module
+ *
+ * \return DOM_NO_ERR.
+ */
+dom_exception _dom_document_finalise(void)
+{
+ for (int i = 0; i <= DOM_NODE_TYPE_COUNT; i++) {
+ if (__nodenames_utf8[i] != NULL) {
+ dom_string_unref(__nodenames_utf8[i]);
+ dom_string_unref(__nodenames_utf16[i]);
+ }
+ }
+
+ return DOM_NO_ERR;
+}
+
/**
* Create a Document
*
@@ -90,21 +207,6 @@ dom_exception dom_document_create(struct dom_implementation *impl,
dom_string_charset charset, dom_alloc alloc, void *pw,
struct dom_document **doc)
{
- static const char *names[DOM_NODE_TYPE_COUNT + 1] = {
- NULL, /* Unused */
- NULL, /* Element */
- NULL, /* Attr */
- "#text", /* Text */
- "#cdata-section", /* CDATA section */
- NULL, /* Entity reference */
- NULL, /* Entity */
- NULL, /* Processing instruction */
- "#comment", /* Comment */
- "#document", /* Document */
- NULL, /* Document type */
- "#document-fragment", /* Document fragment */
- NULL /* Notation */
- };
struct dom_document *d;
dom_exception err;
@@ -114,34 +216,9 @@ dom_exception dom_document_create(struct dom_implementation *impl,
return DOM_NO_MEM_ERR;
/* Set up document allocation context - must be first */
- d->charset = charset;
d->alloc = alloc;
d->pw = pw;
- /* Initialise interned node names */
- for (int i = 0; i <= DOM_NODE_TYPE_COUNT; i++) {
- if (names[i] == NULL) {
- /* Nothing to intern; skip this entry */
- d->nodenames[i] = NULL;
- continue;
- }
-
- /* Make string */
- err = dom_string_create_from_const_ptr(d,
- (const uint8_t *) names[i],
- strlen(names[i]), &d->nodenames[i]);
- if (err != DOM_NO_ERR) {
- /* Failed, clean up strings we've created so far */
- for (int j = 0; j < i; j++) {
- if (d->nodenames[i] != NULL)
- dom_string_unref(d->nodenames[i]);
- }
- /* And destroy document */
- alloc(d, 0, pw);
- return err;
- }
- }
-
/* Initialise base class -- the Document has no parent, so
* destruction will be attempted as soon as its reference count
* reaches zero. Documents own themselves (this simplifies the
@@ -150,17 +227,13 @@ dom_exception dom_document_create(struct dom_implementation *impl,
err = dom_node_initialise(&d->base, d, DOM_DOCUMENT_NODE,
NULL, NULL, NULL, NULL);
if (err != DOM_NO_ERR) {
- /* Clean up interned strings */
- for (int i = 0; i <= DOM_NODE_TYPE_COUNT; i++) {
- if (d->nodenames[i] != NULL)
- dom_string_unref(d->nodenames[i]);
- }
- /* And document */
+ /* Clean up document */
alloc(d, 0, pw);
return err;
}
/* Initialise remaining type-specific data */
+ d->charset = charset;
if (impl != NULL)
dom_implementation_ref(impl);
d->impl = impl;
@@ -168,6 +241,9 @@ dom_exception dom_document_create(struct dom_implementation *impl,
d->nodelists = NULL;
d->maps = NULL;
+ d->nodenames = (charset == DOM_STRING_UTF8) ? __nodenames_utf8
+ : __nodenames_utf16;
+
*doc = d;
return DOM_NO_ERR;
@@ -224,12 +300,6 @@ void dom_document_destroy(struct dom_document *doc)
doc->nodelists = NULL;
doc->maps = NULL;
- /* Clean up interned strings */
- for (int i = 0; i <= DOM_NODE_TYPE_COUNT; i++) {
- if (doc->nodenames[i] != NULL)
- dom_string_unref(doc->nodenames[i]);
- }
-
/* Finalise base class */
dom_node_finalise(doc, &doc->base);
@@ -569,7 +639,7 @@ dom_exception dom_document_create_element_ns(struct dom_document *doc,
}
/* Divide QName into prefix/localname pair */
- err = _dom_namespace_split_qname(qname, doc, &prefix, &localname);
+ err = _dom_namespace_split_qname(qname, &prefix, &localname);
if (err != DOM_NO_ERR) {
return err;
}
@@ -630,7 +700,7 @@ dom_exception dom_document_create_attribute_ns(struct dom_document *doc,
}
/* Divide QName into prefix/localname pair */
- err = _dom_namespace_split_qname(qname, doc, &prefix, &localname);
+ err = _dom_namespace_split_qname(qname, &prefix, &localname);
if (err != DOM_NO_ERR) {
return err;
}
diff --git a/src/core/document.h b/src/core/document.h
index 5149f2e..6982b74 100644
--- a/src/core/document.h
+++ b/src/core/document.h
@@ -19,6 +19,11 @@ struct dom_namednodemap;
struct dom_node;
struct dom_nodelist;
+/* Initialise the document module */
+dom_exception _dom_document_initialise(dom_alloc alloc, void *pw);
+/* Finalise the document module */
+dom_exception _dom_document_finalise(void);
+
/* Destroy a document */
void dom_document_destroy(struct dom_document *doc);
diff --git a/src/core/element.c b/src/core/element.c
index 2e95a9f..37e3a7e 100644
--- a/src/core/element.c
+++ b/src/core/element.c
@@ -597,7 +597,7 @@ dom_exception dom_element_set_attribute_ns(struct dom_element *element,
}
/* Decompose QName */
- err = _dom_namespace_split_qname(qname, e->owner, &prefix, &localname);
+ err = _dom_namespace_split_qname(qname, &prefix, &localname);
if (err != DOM_NO_ERR) {
return err;
}
diff --git a/src/core/node.c b/src/core/node.c
index 8eff2ec..2284e4f 100644
--- a/src/core/node.c
+++ b/src/core/node.c
@@ -303,28 +303,43 @@ dom_exception dom_node_get_node_name(struct dom_node *node,
if ((node->type == DOM_ELEMENT_NODE ||
node->type == DOM_ATTRIBUTE_NODE) &&
node->prefix != NULL) {
- const uint8_t *prefix, *localname;
- size_t prefix_len, local_len;
+ struct dom_string *colon;
dom_exception err;
- dom_string_get_data(node->prefix, &prefix, &prefix_len);
-
- dom_string_get_data(node->name, &localname, &local_len);
+ /* ugh! */
+ /** \todo Assumes little endian */
+ err = dom_string_create_from_const_ptr(node->owner,
+ (const uint8_t *) (
+ (dom_document_get_charset(node->owner) ==
+ DOM_STRING_UTF8) ? ":" : ":\0"),
+ (dom_document_get_charset(node->owner) ==
+ DOM_STRING_UTF8) ? 1 : 2,
+ &colon);
+ if (err != DOM_NO_ERR) {
+ return err;
+ }
- uint8_t qname[prefix_len + 1 /* : */ + local_len + 1 /* \0 */];
+ /* Prefix + : */
+ err = dom_string_concat(node->prefix, colon, &node_name);
+ if (err != DOM_NO_ERR) {
+ dom_string_unref(colon);
+ return err;
+ }
- sprintf((char *) qname, "%.*s:%.*s",
- prefix_len, (const char *) prefix,
- local_len, (const char *) localname);
+ /* Finished with colon */
+ dom_string_unref(colon);
- /* Create the string */
- err = dom_string_create_from_ptr(node->owner, qname,
- prefix_len + 1 + local_len, &node_name);
+ /* Prefix + : + Localname */
+ err = dom_string_concat(node_name, node->name, &colon);
if (err != DOM_NO_ERR) {
+ dom_string_unref(node_name);
return err;
}
- /* QName is referenced on exit from constructor */
+ /* Finished with intermediate node name */
+ dom_string_unref(node_name);
+
+ node_name = colon;
} else {
dom_string_ref(node->name);
@@ -1128,13 +1143,8 @@ dom_exception dom_node_set_prefix(struct dom_node *node,
/* Set the prefix */
if (prefix != NULL) {
- const uint8_t *data;
- size_t len;
-
- dom_string_get_data(prefix, &data, &len);
-
/* Empty string is treated as NULL */
- if (len == 0) {
+ if (dom_string_length(prefix) == 0) {
node->prefix = NULL;
} else {
dom_string_ref(prefix);
diff --git a/src/core/string.c b/src/core/string.c
index faa3c85..1e3817c 100644
--- a/src/core/string.c
+++ b/src/core/string.c
@@ -62,6 +62,9 @@ static struct dom_string empty_string = {
.refcnt = 1
};
+static dom_exception __dom_string_get_data(struct dom_string *str,
+ const uint8_t **data, size_t *len);
+
/**
* Claim a reference on a DOM string
*
@@ -278,45 +281,6 @@ dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw,
}
/**
- * Get a pointer to the string of characters within a DOM string
- *
- * \param str Pointer to DOM string to retrieve pointer from
- * \param data Pointer to location to receive data
- * \param len Pointer to location to receive byte length of data
- * \return DOM_NO_ERR on success
- *
- * The caller must have previously claimed a reference on the DOM string.
- * The returned pointer must not be freed.
- */
-dom_exception dom_string_get_data(struct dom_string *str,
- const uint8_t **data, size_t *len)
-{
- /* Assume that a NULL str pointer indicates the empty string */
- if (str == NULL)
- str = &empty_string;
-
- switch (str->type) {
- case DOM_STRING_PTR:
- *data = str->data.ptr;
- break;
- case DOM_STRING_CONST_PTR:
- *data = str->data.cptr;
- break;
- case DOM_STRING_OFFSET:
- *data = dom_document_get_base(str->ctx.doc) +
- str->data.offset;
- break;
- case DOM_STRING_PTR_NODOC:
- *data = str->data.ptr;
- break;
- }
-
- *len = str->len;
-
- return DOM_NO_ERR;
-}
-
-/**
* Case sensitively compare two DOM strings
*
* \param s1 The first string to compare
@@ -332,11 +296,11 @@ int dom_string_cmp(struct dom_string *s1, struct dom_string *s2)
size_t l1, l2;
dom_exception err;
- err = dom_string_get_data(s1, &d1, &l1);
+ err = __dom_string_get_data(s1, &d1, &l1);
if (err != DOM_NO_ERR)
return 1; /* arbitrary */
- err = dom_string_get_data(s2, &d2, &l2);
+ err = __dom_string_get_data(s2, &d2, &l2);
if (err != DOM_NO_ERR)
return 1; /* arbitrary */
@@ -387,11 +351,11 @@ int dom_string_icmp(struct dom_string *s1, struct dom_string *s2)
size_t l1, l2;
dom_exception err;
- err = dom_string_get_data(s1, &d1, &l1);
+ err = __dom_string_get_data(s1, &d1, &l1);
if (err != DOM_NO_ERR)
return 1; /* arbitrary */
- err = dom_string_get_data(s2, &d2, &l2);
+ err = __dom_string_get_data(s2, &d2, &l2);
if (err != DOM_NO_ERR)
return 1; /* arbitrary */
@@ -427,3 +391,375 @@ int dom_string_icmp(struct dom_string *s1, struct dom_string *s2)
return (int)(l1 - l2);
}
+/**
+ * Get the index of the first occurrence of a character in a dom string
+ *
+ * \param str The string to search in
+ * \param chr UCS4 value to look for
+ * \return Character index of found character, or -1 if none found
+ */
+uint32_t dom_string_index(struct dom_string *str, uint32_t chr)
+{
+ const uint8_t *s;
+ size_t clen, slen;
+ uint32_t c, index;
+ charset_error err;
+
+ __dom_string_get_data(str, &s, &slen);
+
+ index = 0;
+
+ while (slen > 0) {
+ if (str->charset == DOM_STRING_UTF8) {
+ err = _dom_utf8_to_ucs4(s, slen, &c, &clen);
+ } else {
+ err = _dom_utf16_to_ucs4(s, slen, &c, &clen);
+ }
+
+ if (err != CHARSET_OK) {
+ return (uint32_t) -1;
+ }
+
+ if (c == chr) {
+ return index;
+ }
+
+ s += clen;
+ slen -= clen;
+ index++;
+ }
+
+ return (uint32_t) -1;
+}
+
+/**
+ * Get the index of the last occurrence of a character in a dom string
+ *
+ * \param str The string to search in
+ * \param chr UCS4 value to look for
+ * \return Character index of found character, or -1 if none found
+ */
+uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr)
+{
+ const uint8_t *s;
+ size_t clen, slen;
+ uint32_t c, index;
+ charset_error err;
+
+ __dom_string_get_data(str, &s, &slen);
+
+ index = dom_string_length(str);
+
+ while (slen > 0) {
+ if (str->charset == DOM_STRING_UTF8) {
+ err = _dom_utf8_prev(s, slen, &clen);
+ if (err == CHARSET_OK) {
+ err = _dom_utf8_to_ucs4(s + clen, slen - clen,
+ &c, &clen);
+ }
+ } else {
+ err = _dom_utf16_prev(s, slen, &clen);
+ if (err == CHARSET_OK) {
+ err = _dom_utf16_to_ucs4(s + clen, slen - clen,
+ &c, &clen);
+ }
+ }
+
+ if (err != CHARSET_OK) {
+ return (uint32_t) -1;
+ }
+
+ if (c == chr) {
+ return index;
+ }
+
+ slen -= clen;
+ index--;
+ }
+
+ return (uint32_t) -1;
+
+}
+
+/**
+ * Get the length, in characters, of a dom string
+ *
+ * \param str The string to measure the length of
+ * \return The length of the string, in characters
+ */
+uint32_t dom_string_length(struct dom_string *str)
+{
+ const uint8_t *s;
+ size_t slen;
+ uint32_t clen;
+ charset_error err;
+
+ __dom_string_get_data(str, &s, &slen);
+
+ if (str->charset == DOM_STRING_UTF8) {
+ err = _dom_utf8_length(s, slen, &clen);
+ } else {
+ err = _dom_utf16_length(s, slen, &clen);
+ }
+
+ if (err != CHARSET_OK) {
+ return 0;
+ }
+
+ return clen;
+}
+
+/**
+ * Concatenate two dom strings
+ *
+ * \param s1 The first string
+ * \param s2 The second string
+ * \param result Pointer to location to receive result
+ * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
+ *
+ * The returned string will be allocated using the allocation details
+ * stored in ::s1.
+ *
+ * The returned string will have its reference count increased. The client
+ * should dereference it once it has finished with it.
+ */
+dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2,
+ struct dom_string **result)
+{
+ struct dom_string *concat;
+ const uint8_t *s;
+ size_t slen;
+
+ if (s1->type == DOM_STRING_PTR_NODOC) {
+ concat = s1->ctx.nodoc.alloc(NULL,
+ sizeof(struct dom_string), s1->ctx.nodoc.pw);
+ } else {
+ concat = dom_document_alloc(s1->ctx.doc,
+ NULL, sizeof(struct dom_string));
+ }
+
+ if (concat == NULL) {
+ return DOM_NO_MEM_ERR;
+ }
+
+ /** \todo support attempted concatenation of mismatched charsets */
+
+ if (s1->type == DOM_STRING_PTR_NODOC) {
+ concat->data.ptr = s1->ctx.nodoc.alloc(NULL,
+ s1->len + s2->len, s1->ctx.nodoc.pw);
+ } else {
+ concat->data.ptr = dom_document_alloc(s1->ctx.doc,
+ NULL, s1->len + s2->len);
+ }
+ if (concat->data.ptr == NULL) {
+ if (s1->type == DOM_STRING_PTR_NODOC) {
+ s1->ctx.nodoc.alloc(concat, 0, s1->ctx.nodoc.pw);
+ } else {
+ dom_document_alloc(s1->ctx.doc, concat, 0);
+ }
+ return DOM_NO_MEM_ERR;
+ }
+
+ concat->type = (s1->type == DOM_STRING_PTR_NODOC)
+ ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR;
+
+ concat->charset = s1->charset;
+
+ __dom_string_get_data(s1, &s, &slen);
+
+ memcpy(concat->data.ptr, s, slen);
+
+ __dom_string_get_data(s2, &s, &slen);
+
+ memcpy(concat->data.ptr + s1->len, s, slen);
+
+ concat->len = s1->len + s2->len;
+
+ if (concat->type == DOM_STRING_PTR_NODOC) {
+ concat->ctx.nodoc.alloc = s1->ctx.nodoc.alloc;
+ concat->ctx.nodoc.pw = s1->ctx.nodoc.pw;
+ } else {
+ concat->ctx.doc = s1->ctx.doc;
+ }
+
+ concat->refcnt = 1;
+
+ *result = concat;
+
+ return DOM_NO_ERR;
+}
+
+/**
+ * Extract a substring from a dom string
+ *
+ * \param str The string to extract from
+ * \param i1 The character index of the start of the substring
+ * \param i2 The character index of the end of the substring
+ * \param result Pointer to location to receive result
+ * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
+ *
+ * The returned string will be allocated using the allocation details
+ * stored in ::str.
+ *
+ * The returned string will have its reference count increased. The client
+ * should dereference it once it has finished with it.
+ */
+dom_exception dom_string_substr(struct dom_string *str,
+ uint32_t i1, uint32_t i2, struct dom_string **result)
+{
+ const uint8_t *s;
+ size_t slen;
+ size_t b1, b2;
+ charset_error err;
+
+ __dom_string_get_data(str, &s, &slen);
+
+ /* Initialise the byte index of the start to 0 */
+ b1 = 0;
+ /* Make the end a character offset from the start */
+ i2 -= i1;
+
+ /* Calculate the byte index of the start */
+ while (i1 > 0) {
+ if (str->charset == DOM_STRING_UTF8) {
+ err = _dom_utf8_next(s, slen, b1, &b1);
+ } else {
+ err = _dom_utf16_next(s, slen, b1, &b1);
+ }
+
+ if (err != CHARSET_OK) {
+ return DOM_NO_MEM_ERR;
+ }
+
+ i1--;
+ }
+
+ /* Initialise the byte index of the end to that of the start */
+ b2 = b1;
+
+ /* Calculate the byte index of the end */
+ while (i2 > 0) {
+ if (str->charset == DOM_STRING_UTF8) {
+ err = _dom_utf8_next(s, slen, b2, &b2);
+ } else {
+ err = _dom_utf16_next(s, slen, b2, &b2);
+ }
+
+ if (err != CHARSET_OK) {
+ return DOM_NO_MEM_ERR;
+ }
+
+ i2--;
+ }
+
+ /* Create a string from the specified byte range */
+ return (str->type == DOM_STRING_PTR_NODOC)
+ ? dom_string_create_from_ptr_no_doc(
+ str->ctx.nodoc.alloc,
+ str->ctx.nodoc.pw,
+ str->charset,
+ s + b1, b2 - b1, result)
+ : dom_string_create_from_ptr(str->ctx.doc,
+ s + b1, b2 - b2, result);
+}
+
+/**
+ * Duplicate a dom string
+ *
+ * \param str The string to duplicate
+ * \param result Pointer to location to receive result
+ * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
+ *
+ * The returned string will be allocated using the allocation details
+ * stored in ::str.
+ *
+ * The returned string will have its reference count increased. The client
+ * should dereference it once it has finished with it.
+ */
+dom_exception dom_string_dup(struct dom_string *str,
+ struct dom_string **result)
+{
+ const uint8_t *s;
+ size_t slen;
+
+ __dom_string_get_data(str, &s, &slen);
+
+ return str->type == DOM_STRING_PTR_NODOC
+ ? dom_string_create_from_ptr_no_doc(
+ str->ctx.nodoc.alloc,
+ str->ctx.nodoc.pw,
+ str->charset,
+ s, slen, result)
+ : dom_string_create_from_ptr(str->ctx.doc,
+ s, slen, result);
+}
+
+/**
+ * Calculate a hash value from a dom string
+ *
+ * \param str The string to calculate a hash of
+ * \return The hash value associated with the string
+ */
+uint32_t dom_string_hash(struct dom_string *str)
+{
+ const uint8_t *s;
+ size_t slen;
+ uint32_t hash = 0x01000193;
+
+ __dom_string_get_data(str, &s, &slen);
+
+ while (slen > 0) {
+ hash *= 0x01000193;
+ hash ^= *s;
+
+ s++;
+ slen--;
+ }
+
+ return hash;
+}
+
+/* */
+/*---------------------------------------------------------------------------*/
+/* */
+
+/**
+ * Get a pointer to the string of characters within a DOM string
+ *
+ * \param str Pointer to DOM string to retrieve pointer from
+ * \param data Pointer to location to receive data
+ * \param len Pointer to location to receive byte length of data
+ * \return DOM_NO_ERR on success
+ *
+ * The caller must have previously claimed a reference on the DOM string.
+ * The returned pointer must not be freed.
+ */
+dom_exception __dom_string_get_data(struct dom_string *str,
+ const uint8_t **data, size_t *len)
+{
+ /* Assume that a NULL str pointer indicates the empty string */
+ if (str == NULL)
+ str = &empty_string;
+
+ switch (str->type) {
+ case DOM_STRING_PTR:
+ *data = str->data.ptr;
+ break;
+ case DOM_STRING_CONST_PTR:
+ *data = str->data.cptr;
+ break;
+ case DOM_STRING_OFFSET:
+ *data = dom_document_get_base(str->ctx.doc) +
+ str->data.offset;
+ break;
+ case DOM_STRING_PTR_NODOC:
+ *data = str->data.ptr;
+ break;
+ }
+
+ *len = str->len;
+
+ return DOM_NO_ERR;
+}
+
+
diff --git a/src/utils/namespace.c b/src/utils/namespace.c
index 8a53e45..9c0d214 100644
--- a/src/utils/namespace.c
+++ b/src/utils/namespace.c
@@ -12,10 +12,14 @@
#include "utils/namespace.h"
#include "utils/utils.h"
-/** XML namespace URI */
+/** XML prefix */
static struct dom_string *xml;
-/** XMLNS namespace URI */
+/** XML namespace URI */
+static struct dom_string *xml_ns;
+/** XMLNS prefix */
static struct dom_string *xmlns;
+/** XMLNS namespace URI */
+static struct dom_string *xmlns_ns;
/**
* Initialise the namespace component
@@ -29,11 +33,27 @@ dom_exception _dom_namespace_initialise(dom_alloc alloc, void *pw)
dom_exception err;
err = dom_string_create_from_ptr_no_doc(alloc, pw,
+ DOM_STRING_UTF8, (const uint8_t *) "xml", SLEN("xml"), &xml);
+ if (err != DOM_NO_ERR) {
+ return err;
+ }
+
+ err = dom_string_create_from_ptr_no_doc(alloc, pw,
DOM_STRING_UTF8,
(const uint8_t *) "http://www.w3.org/XML/1998/namespace",
SLEN("http://www.w3.org/XML/1998/namespace"),
- &xml);
+ &xml_ns);
if (err != DOM_NO_ERR) {
+ dom_string_unref(xml);
+ return err;
+ }
+
+ err = dom_string_create_from_ptr_no_doc(alloc, pw,
+ DOM_STRING_UTF8,
+ (const uint8_t *) "xmlns", SLEN("xmlns"), &xmlns);
+ if (err != DOM_NO_ERR) {
+ dom_string_unref(xml_ns);
+ dom_string_unref(xml);
return err;
}
@@ -41,8 +61,10 @@ dom_exception _dom_namespace_initialise(dom_alloc alloc, void *pw)
DOM_STRING_UTF8,
(const uint8_t *) "http://www.w3.org/2000/xmlns",
SLEN("http://www.w3.org/2000/xmlns"),
- &xmlns);
+ &xmlns_ns);
if (err != DOM_NO_ERR) {
+ dom_string_unref(xmlns);
+ dom_string_unref(xml_ns);
dom_string_unref(xml);
return err;
}
@@ -57,7 +79,9 @@ dom_exception _dom_namespace_initialise(dom_alloc alloc, void *pw)
*/
dom_exception _dom_namespace_finalise(void)
{
+ dom_string_unref(xmlns_ns);
dom_string_unref(xmlns);
+ dom_string_unref(xml_ns);
dom_string_unref(xml);
return DOM_NO_ERR;
@@ -86,62 +110,59 @@ dom_exception _dom_namespace_finalise(void)
dom_exception _dom_namespace_validate_qname(struct dom_string *qname,
struct dom_string *namespace)
{
- const uint8_t *qname_data, *c;
- size_t qname_len;
-
- dom_string_get_data(qname, &qname_data, &qname_len);
+ uint32_t colon;
/** \todo search qname for invalid characters */
/** \todo ensure qname is not malformed */
/* Find colon */
- /** \todo assumes ASCII-compatible encoding */
- for (c = qname_data; c != qname_data + qname_len; c++) {
- if (*c == (const uint8_t) ':') {
- break;
- }
- }
+ colon = dom_string_index(qname, ':');
- if (c == qname_data + qname_len) {
+ if (colon == (uint32_t) -1) {
/* No prefix */
/* If namespace URI is for xmlns, ensure qname == "xmlns" */
if (namespace != NULL &&
- dom_string_cmp(namespace, xmlns) == 0 &&
- (qname_len != SLEN("xmlns") ||
- strncmp((const char *) qname_data, "xmlns",
- SLEN("xmlns")) != 0)) {
+ dom_string_cmp(namespace, xmlns_ns) == 0 &&
+ dom_string_cmp(qname, xmlns) != 0) {
return DOM_NAMESPACE_ERR;
}
} else {
/* Prefix */
+ struct dom_string *prefix;
+ dom_exception err;
+
/* Ensure there is a namespace URI */
if (namespace == NULL) {
return DOM_NAMESPACE_ERR;
}
+ err = dom_string_substr(qname, 0, colon - 1, &prefix);
+ if (err != DOM_NO_ERR) {
+ return err;
+ }
+
/* Test for invalid XML namespace */
- if (c - qname_data == SLEN("xml") &&
- strncmp((const char *) qname_data, "xml",
- SLEN("xml")) == 0 &&
- dom_string_cmp(namespace, xml) != 0) {
+ if (dom_string_cmp(prefix, xml) == 0 &&
+ dom_string_cmp(namespace, xml_ns) != 0) {
+ dom_string_unref(prefix);
return DOM_NAMESPACE_ERR;
}
/* Test for invalid xmlns namespace */
- if (c - qname_data == SLEN("xmlns") &&
- strncmp((const char *) qname_data, "xmlns",
- SLEN("xmlns")) == 0 &&
- dom_string_cmp(namespace, xmlns) != 0) {
+ if (dom_string_cmp(prefix, xmlns) == 0 &&
+ dom_string_cmp(namespace, xmlns_ns) != 0) {
+ dom_string_unref(prefix);
return DOM_NAMESPACE_ERR;
}
/* Test for presence of xmlns namespace with non xmlns prefix */
- if (dom_string_cmp(namespace, xmlns) == 0 &&
- (c - qname_data != SLEN("xmlns") ||
- strncmp((const char *) qname_data, "xmlns",
- SLEN("xmlns")) != 0)) {
+ if (dom_string_cmp(namespace, xmlns_ns) == 0 &&
+ dom_string_cmp(prefix, xmlns) != 0) {
+ dom_string_unref(prefix);
return DOM_NAMESPACE_ERR;
}
+
+ dom_string_unref(prefix);
}
return DOM_NO_ERR;
@@ -151,7 +172,6 @@ dom_exception _dom_namespace_validate_qname(struct dom_string *qname,
* Split a QName into a namespace prefix and localname string
*
* \param qname The qname to split
- * \param doc The document context to create the prefix/localname in
* \param prefix Pointer to location to receive prefix
* \param localname Pointer to location to receive localname
* \return DOM_NO_ERR on success.
@@ -162,59 +182,37 @@ dom_exception _dom_namespace_validate_qname(struct dom_string *qname,
* them once finished.
*/
dom_exception _dom_namespace_split_qname(struct dom_string *qname,
- struct dom_document *doc, struct dom_string **prefix,
- struct dom_string **localname)
+ struct dom_string **prefix, struct dom_string **localname)
{
- const uint8_t *qname_data, *c, *local_data;
- size_t qname_len;
- size_t local_len;
- size_t prefix_len;
- struct dom_string *p = NULL;
- struct dom_string *l;
+ uint32_t colon;
dom_exception err;
- dom_string_get_data(qname, &qname_data, &qname_len);
-
/* Find colon, if any */
- /** \todo assumes ASCII-compatible encoding */
- for (c = qname_data; c != qname_data + qname_len; c++) {
- if (*c == (const uint8_t) ':')
- break;
- }
+ colon = dom_string_index(qname, ':');
- if (c == qname_data + qname_len) {
+ if (colon == (uint32_t) -1) {
/* None found => no prefix */
- local_data = qname_data;
- local_len = qname_len;
- prefix_len = 0;
+ *prefix = NULL;
+ err = dom_string_dup(qname, localname);
+ if (err != DOM_NO_ERR) {
+ return err;
+ }
} else {
/* Found one => prefix */
- local_data = ++c;
- local_len = qname_len - (c - qname_data);
- prefix_len = (c - qname_data - 1 /* ':' */);
- }
-
- /* Create prefix, if one exists */
- if (prefix_len > 0) {
- err = dom_string_create_from_ptr(doc, qname_data,
- prefix_len, &p);
+ err = dom_string_substr(qname, 0, colon - 1, prefix);
if (err != DOM_NO_ERR) {
return err;
}
- }
- /* Create localname */
- err = dom_string_create_from_ptr(doc, local_data, local_len, &l);
- if (err != DOM_NO_ERR) {
- if (p != NULL) {
- dom_string_unref(p);
+ err = dom_string_substr(qname, colon + 1,
+ dom_string_length(qname), localname);
+ if (err != DOM_NO_ERR) {
+ dom_string_unref(*prefix);
+ *prefix = NULL;
+ return err;
}
- return err;
}
- *prefix = p;
- *localname = l;
-
return DOM_NO_ERR;
}
diff --git a/src/utils/namespace.h b/src/utils/namespace.h
index 0bc5093..ec69035 100644
--- a/src/utils/namespace.h
+++ b/src/utils/namespace.h
@@ -26,8 +26,7 @@ dom_exception _dom_namespace_validate_qname(struct dom_string *qname,
/* Split a QName into a namespace prefix and localname string */
dom_exception _dom_namespace_split_qname(struct dom_string *qname,
- struct dom_document *doc, struct dom_string **prefix,
- struct dom_string **localname);
+ struct dom_string **prefix, struct dom_string **localname);
#endif