From f3c02943d778e9b00064bf0e103aaecb06ab5e01 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Wed, 3 Oct 2007 23:44:32 +0000 Subject: Make the dom string class more useful. Purge all trace of dom_string_get_data() from outside the dom string implementation. Port affected code to new, more useful, APIs. This also fixes the interned node name strings mentioned in the previous commit. svn path=/trunk/dom/; revision=3621 --- src/core/attr.c | 104 +++---------- src/core/document.c | 184 ++++++++++++++++------- src/core/document.h | 5 + src/core/element.c | 2 +- src/core/node.c | 48 +++--- src/core/string.c | 422 ++++++++++++++++++++++++++++++++++++++++++++++------ 6 files changed, 564 insertions(+), 201 deletions(-) (limited to 'src/core') diff --git a/src/core/attr.c b/src/core/attr.c index 232f7ba..a82f117 100644 --- a/src/core/attr.c +++ b/src/core/attr.c @@ -177,120 +177,62 @@ dom_exception dom_attr_get_value(struct dom_attr *attr, { struct dom_node *a = (struct dom_node *) attr; struct dom_node *c; - uint8_t *rep; - size_t rep_len; - size_t rep_alloc; + struct dom_string *value, *temp; dom_exception err; -#define CHUNK 128 - - rep = dom_document_alloc(a->owner, NULL, CHUNK); - if (rep == NULL) - return DOM_NO_MEM_ERR; - - rep_len = 0; - rep_alloc = CHUNK; + err = dom_string_create_from_const_ptr(a->owner, + (const uint8_t *) "", SLEN(""), &value); + if (err != DOM_NO_ERR) { + return err; + } /* Traverse children, building a string representation as we go */ for (c = a->first_child; c != NULL; c = c->next) { if (c->type == DOM_TEXT_NODE && c->value != NULL) { - const uint8_t *data; - size_t len; - - err = dom_string_get_data(c->value, &data, &len); + /* Append to existing value */ + err = dom_string_concat(value, c->value, &temp); if (err != DOM_NO_ERR) { - dom_document_alloc(a->owner, rep, 0); + dom_string_unref(value); return err; } - /* Extend buffer, if necessary */ - if (rep_len + len >= rep_alloc) { - uint8_t *temp; - size_t required = (rep_len + len) - rep_alloc; - - /* Round required up to a chunk boundary */ - required = - (required + CHUNK - 1) & ~(CHUNK - 1); - - temp = dom_document_alloc(a->owner, rep, - rep_alloc + required); - if (temp == NULL) { - dom_document_alloc(a->owner, rep, 0); - return DOM_NO_MEM_ERR; - } - - rep = temp; - rep_alloc += required; - } - - /* Copy text into buffer */ - memcpy(rep + rep_len, data, len); + /* Finished with previous value */ + dom_string_unref(value); - /* And fix up length information */ - rep_len += len; + /* Claim new value */ + value = temp; } else if (c->type == DOM_ENTITY_REFERENCE_NODE) { struct dom_string *tr; - const uint8_t *data; - size_t len; /* Get textual representation of entity */ err = dom_entity_reference_get_textual_representation( (struct dom_entity_reference *) c, &tr); if (err != DOM_NO_ERR) { - dom_document_alloc(a->owner, rep, 0); + dom_string_unref(value); return err; } - err = dom_string_get_data(tr, &data, &len); + /* Append to existing value */ + err = dom_string_concat(value, tr, &temp); if (err != DOM_NO_ERR) { dom_string_unref(tr); - dom_document_alloc(a->owner, rep, 0); + dom_string_unref(value); return err; } - /* Extend buffer, if necessary */ - if (rep_len + len >= rep_alloc) { - uint8_t *temp; - size_t required = (rep_len + len) - rep_alloc; - - /* Round required up to a chunk boundary */ - required = - (required + CHUNK - 1) & ~(CHUNK - 1); - - temp = dom_document_alloc(a->owner, rep, - rep_alloc + required); - if (temp == NULL) { - dom_document_alloc(a->owner, rep, 0); - return DOM_NO_MEM_ERR; - } - - rep = temp; - rep_alloc += required; - } - - /* Copy text into buffer */ - memcpy(rep + rep_len, data, len); - - /* And fix up length information */ - rep_len += len; - /* No longer need textual representation */ dom_string_unref(tr); - } - } -#undef CHUNK + /* Finished with previous value */ + dom_string_unref(value); - /* Create DOMString */ - err = dom_string_create_from_ptr(a->owner, rep, rep_len, result); - if (err != DOM_NO_ERR) { - dom_document_alloc(a->owner, rep, 0); - return err; + /* Claim new value */ + value = temp; + } } - /* Cleanup */ - dom_document_alloc(a->owner, rep, 0); + *result = value; return DOM_NO_ERR; } diff --git a/src/core/document.c b/src/core/document.c index e188868..42d2686 100644 --- a/src/core/document.c +++ b/src/core/document.c @@ -64,14 +64,131 @@ struct dom_document { struct dom_doc_nnm *maps; /**< List of active namednodemaps */ - /** Interned node name strings, indexed by node type */ - /* Index 0 is unused */ - struct dom_string *nodenames[DOM_NODE_TYPE_COUNT + 1]; + struct dom_string **nodenames; /**< Interned nodenames */ dom_alloc alloc; /**< Memory (de)allocation function */ void *pw; /**< Pointer to client data */ }; +/** Interned node name strings, indexed by node type */ +/* Index 0 is unused */ +static struct dom_string *__nodenames_utf8[DOM_NODE_TYPE_COUNT + 1]; +static struct dom_string *__nodenames_utf16[DOM_NODE_TYPE_COUNT + 1]; + +/** + * Initialise the document module + * + * \param alloc Memory (de)allocation function + * \param pw Pointer to client-specific private data + * \return DOM_NO_ERR on success + */ +dom_exception _dom_document_initialise(dom_alloc alloc, void *pw) +{ + static struct { + const char *name; + size_t len; + } names_utf8[DOM_NODE_TYPE_COUNT + 1] = { + { NULL, 0 }, /* Unused */ + { NULL, 0 }, /* Element */ + { NULL, 0 }, /* Attr */ + { "#text", 5 }, /* Text */ + { "#cdata-section", 14 }, /* CDATA section */ + { NULL, 0 }, /* Entity reference */ + { NULL, 0 }, /* Entity */ + { NULL, 0 }, /* Processing instruction */ + { "#comment", 8 }, /* Comment */ + { "#document", 9 }, /* Document */ + { NULL, 0 }, /* Document type */ + { "#document-fragment", 18 }, /* Document fragment */ + { NULL, 0 } /* Notation */ + }; + + /** \todo This assumes Little Endian */ + static struct { + const char *name; + size_t len; + } names_utf16[DOM_NODE_TYPE_COUNT + 1] = { + { NULL, 0 }, /* Unused */ + { NULL, 0 }, /* Element */ + { NULL, 0 }, /* Attr */ + { "#\0t\0e\0x\0t\0", 10 }, /* Text */ + { "#\0c\0d\0a\0t\0a\0-\0s\0e\0c\0t\0i\0o\0n\0", 28 }, /* CDATA section */ + { NULL, 0 }, /* Entity reference */ + { NULL, 0 }, /* Entity */ + { NULL, 0 }, /* Processing instruction */ + { "#\0c\0o\0m\0m\0e\0n\0t\0", 16 }, /* Comment */ + { "#\0d\0o\0c\0u\0m\0e\0n\0t\0", 18 }, /* Document */ + { NULL, 0 }, /* Document type */ + { "#\0d\0o\0c\0u\0m\0e\0n\0t\0-\0f\0r\0a\0g\0m\0e\0n\0t\0", 36 }, /* Document fragment */ + { NULL, 0 } /* Notation */ + }; + + dom_exception err; + + /* Initialise interned node names */ + for (int i = 0; i <= DOM_NODE_TYPE_COUNT; i++) { + if (names_utf8[i].name == NULL) { + /* Nothing to intern; skip this entry */ + __nodenames_utf8[i] = NULL; + __nodenames_utf16[i] = NULL; + continue; + } + + /* Make string */ + err = dom_string_create_from_ptr_no_doc(alloc, pw, + DOM_STRING_UTF8, + (const uint8_t *) names_utf8[i].name, + names_utf8[i].len, &__nodenames_utf8[i]); + if (err != DOM_NO_ERR) { + /* Failed, clean up strings we've created so far */ + for (int j = 0; j < i; j++) { + if (__nodenames_utf8[j] != NULL) { + dom_string_unref(__nodenames_utf8[j]); + dom_string_unref(__nodenames_utf16[j]); + } + } + return err; + } + + err = dom_string_create_from_ptr_no_doc(alloc, pw, + DOM_STRING_UTF16, + (const uint8_t *) names_utf16[i].name, + names_utf16[i].len, &__nodenames_utf16[i]); + if (err != DOM_NO_ERR) { + /* Failed, clean up strings we've created so far */ + for (int j = 0; j < i; j++) { + if (__nodenames_utf8[j] != NULL) { + dom_string_unref(__nodenames_utf8[j]); + dom_string_unref(__nodenames_utf16[j]); + } + } + + dom_string_unref(__nodenames_utf8[i]); + + return err; + } + } + + return DOM_NO_ERR; +} + +/** + * Finalise the document module + * + * \return DOM_NO_ERR. + */ +dom_exception _dom_document_finalise(void) +{ + for (int i = 0; i <= DOM_NODE_TYPE_COUNT; i++) { + if (__nodenames_utf8[i] != NULL) { + dom_string_unref(__nodenames_utf8[i]); + dom_string_unref(__nodenames_utf16[i]); + } + } + + return DOM_NO_ERR; +} + /** * Create a Document * @@ -90,21 +207,6 @@ dom_exception dom_document_create(struct dom_implementation *impl, dom_string_charset charset, dom_alloc alloc, void *pw, struct dom_document **doc) { - static const char *names[DOM_NODE_TYPE_COUNT + 1] = { - NULL, /* Unused */ - NULL, /* Element */ - NULL, /* Attr */ - "#text", /* Text */ - "#cdata-section", /* CDATA section */ - NULL, /* Entity reference */ - NULL, /* Entity */ - NULL, /* Processing instruction */ - "#comment", /* Comment */ - "#document", /* Document */ - NULL, /* Document type */ - "#document-fragment", /* Document fragment */ - NULL /* Notation */ - }; struct dom_document *d; dom_exception err; @@ -114,34 +216,9 @@ dom_exception dom_document_create(struct dom_implementation *impl, return DOM_NO_MEM_ERR; /* Set up document allocation context - must be first */ - d->charset = charset; d->alloc = alloc; d->pw = pw; - /* Initialise interned node names */ - for (int i = 0; i <= DOM_NODE_TYPE_COUNT; i++) { - if (names[i] == NULL) { - /* Nothing to intern; skip this entry */ - d->nodenames[i] = NULL; - continue; - } - - /* Make string */ - err = dom_string_create_from_const_ptr(d, - (const uint8_t *) names[i], - strlen(names[i]), &d->nodenames[i]); - if (err != DOM_NO_ERR) { - /* Failed, clean up strings we've created so far */ - for (int j = 0; j < i; j++) { - if (d->nodenames[i] != NULL) - dom_string_unref(d->nodenames[i]); - } - /* And destroy document */ - alloc(d, 0, pw); - return err; - } - } - /* Initialise base class -- the Document has no parent, so * destruction will be attempted as soon as its reference count * reaches zero. Documents own themselves (this simplifies the @@ -150,17 +227,13 @@ dom_exception dom_document_create(struct dom_implementation *impl, err = dom_node_initialise(&d->base, d, DOM_DOCUMENT_NODE, NULL, NULL, NULL, NULL); if (err != DOM_NO_ERR) { - /* Clean up interned strings */ - for (int i = 0; i <= DOM_NODE_TYPE_COUNT; i++) { - if (d->nodenames[i] != NULL) - dom_string_unref(d->nodenames[i]); - } - /* And document */ + /* Clean up document */ alloc(d, 0, pw); return err; } /* Initialise remaining type-specific data */ + d->charset = charset; if (impl != NULL) dom_implementation_ref(impl); d->impl = impl; @@ -168,6 +241,9 @@ dom_exception dom_document_create(struct dom_implementation *impl, d->nodelists = NULL; d->maps = NULL; + d->nodenames = (charset == DOM_STRING_UTF8) ? __nodenames_utf8 + : __nodenames_utf16; + *doc = d; return DOM_NO_ERR; @@ -224,12 +300,6 @@ void dom_document_destroy(struct dom_document *doc) doc->nodelists = NULL; doc->maps = NULL; - /* Clean up interned strings */ - for (int i = 0; i <= DOM_NODE_TYPE_COUNT; i++) { - if (doc->nodenames[i] != NULL) - dom_string_unref(doc->nodenames[i]); - } - /* Finalise base class */ dom_node_finalise(doc, &doc->base); @@ -569,7 +639,7 @@ dom_exception dom_document_create_element_ns(struct dom_document *doc, } /* Divide QName into prefix/localname pair */ - err = _dom_namespace_split_qname(qname, doc, &prefix, &localname); + err = _dom_namespace_split_qname(qname, &prefix, &localname); if (err != DOM_NO_ERR) { return err; } @@ -630,7 +700,7 @@ dom_exception dom_document_create_attribute_ns(struct dom_document *doc, } /* Divide QName into prefix/localname pair */ - err = _dom_namespace_split_qname(qname, doc, &prefix, &localname); + err = _dom_namespace_split_qname(qname, &prefix, &localname); if (err != DOM_NO_ERR) { return err; } diff --git a/src/core/document.h b/src/core/document.h index 5149f2e..6982b74 100644 --- a/src/core/document.h +++ b/src/core/document.h @@ -19,6 +19,11 @@ struct dom_namednodemap; struct dom_node; struct dom_nodelist; +/* Initialise the document module */ +dom_exception _dom_document_initialise(dom_alloc alloc, void *pw); +/* Finalise the document module */ +dom_exception _dom_document_finalise(void); + /* Destroy a document */ void dom_document_destroy(struct dom_document *doc); diff --git a/src/core/element.c b/src/core/element.c index 2e95a9f..37e3a7e 100644 --- a/src/core/element.c +++ b/src/core/element.c @@ -597,7 +597,7 @@ dom_exception dom_element_set_attribute_ns(struct dom_element *element, } /* Decompose QName */ - err = _dom_namespace_split_qname(qname, e->owner, &prefix, &localname); + err = _dom_namespace_split_qname(qname, &prefix, &localname); if (err != DOM_NO_ERR) { return err; } diff --git a/src/core/node.c b/src/core/node.c index 8eff2ec..2284e4f 100644 --- a/src/core/node.c +++ b/src/core/node.c @@ -303,28 +303,43 @@ dom_exception dom_node_get_node_name(struct dom_node *node, if ((node->type == DOM_ELEMENT_NODE || node->type == DOM_ATTRIBUTE_NODE) && node->prefix != NULL) { - const uint8_t *prefix, *localname; - size_t prefix_len, local_len; + struct dom_string *colon; dom_exception err; - dom_string_get_data(node->prefix, &prefix, &prefix_len); - - dom_string_get_data(node->name, &localname, &local_len); + /* ugh! */ + /** \todo Assumes little endian */ + err = dom_string_create_from_const_ptr(node->owner, + (const uint8_t *) ( + (dom_document_get_charset(node->owner) == + DOM_STRING_UTF8) ? ":" : ":\0"), + (dom_document_get_charset(node->owner) == + DOM_STRING_UTF8) ? 1 : 2, + &colon); + if (err != DOM_NO_ERR) { + return err; + } - uint8_t qname[prefix_len + 1 /* : */ + local_len + 1 /* \0 */]; + /* Prefix + : */ + err = dom_string_concat(node->prefix, colon, &node_name); + if (err != DOM_NO_ERR) { + dom_string_unref(colon); + return err; + } - sprintf((char *) qname, "%.*s:%.*s", - prefix_len, (const char *) prefix, - local_len, (const char *) localname); + /* Finished with colon */ + dom_string_unref(colon); - /* Create the string */ - err = dom_string_create_from_ptr(node->owner, qname, - prefix_len + 1 + local_len, &node_name); + /* Prefix + : + Localname */ + err = dom_string_concat(node_name, node->name, &colon); if (err != DOM_NO_ERR) { + dom_string_unref(node_name); return err; } - /* QName is referenced on exit from constructor */ + /* Finished with intermediate node name */ + dom_string_unref(node_name); + + node_name = colon; } else { dom_string_ref(node->name); @@ -1128,13 +1143,8 @@ dom_exception dom_node_set_prefix(struct dom_node *node, /* Set the prefix */ if (prefix != NULL) { - const uint8_t *data; - size_t len; - - dom_string_get_data(prefix, &data, &len); - /* Empty string is treated as NULL */ - if (len == 0) { + if (dom_string_length(prefix) == 0) { node->prefix = NULL; } else { dom_string_ref(prefix); diff --git a/src/core/string.c b/src/core/string.c index faa3c85..1e3817c 100644 --- a/src/core/string.c +++ b/src/core/string.c @@ -62,6 +62,9 @@ static struct dom_string empty_string = { .refcnt = 1 }; +static dom_exception __dom_string_get_data(struct dom_string *str, + const uint8_t **data, size_t *len); + /** * Claim a reference on a DOM string * @@ -277,45 +280,6 @@ dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw, return DOM_NO_ERR; } -/** - * Get a pointer to the string of characters within a DOM string - * - * \param str Pointer to DOM string to retrieve pointer from - * \param data Pointer to location to receive data - * \param len Pointer to location to receive byte length of data - * \return DOM_NO_ERR on success - * - * The caller must have previously claimed a reference on the DOM string. - * The returned pointer must not be freed. - */ -dom_exception dom_string_get_data(struct dom_string *str, - const uint8_t **data, size_t *len) -{ - /* Assume that a NULL str pointer indicates the empty string */ - if (str == NULL) - str = &empty_string; - - switch (str->type) { - case DOM_STRING_PTR: - *data = str->data.ptr; - break; - case DOM_STRING_CONST_PTR: - *data = str->data.cptr; - break; - case DOM_STRING_OFFSET: - *data = dom_document_get_base(str->ctx.doc) + - str->data.offset; - break; - case DOM_STRING_PTR_NODOC: - *data = str->data.ptr; - break; - } - - *len = str->len; - - return DOM_NO_ERR; -} - /** * Case sensitively compare two DOM strings * @@ -332,11 +296,11 @@ int dom_string_cmp(struct dom_string *s1, struct dom_string *s2) size_t l1, l2; dom_exception err; - err = dom_string_get_data(s1, &d1, &l1); + err = __dom_string_get_data(s1, &d1, &l1); if (err != DOM_NO_ERR) return 1; /* arbitrary */ - err = dom_string_get_data(s2, &d2, &l2); + err = __dom_string_get_data(s2, &d2, &l2); if (err != DOM_NO_ERR) return 1; /* arbitrary */ @@ -387,11 +351,11 @@ int dom_string_icmp(struct dom_string *s1, struct dom_string *s2) size_t l1, l2; dom_exception err; - err = dom_string_get_data(s1, &d1, &l1); + err = __dom_string_get_data(s1, &d1, &l1); if (err != DOM_NO_ERR) return 1; /* arbitrary */ - err = dom_string_get_data(s2, &d2, &l2); + err = __dom_string_get_data(s2, &d2, &l2); if (err != DOM_NO_ERR) return 1; /* arbitrary */ @@ -427,3 +391,375 @@ int dom_string_icmp(struct dom_string *s1, struct dom_string *s2) return (int)(l1 - l2); } +/** + * Get the index of the first occurrence of a character in a dom string + * + * \param str The string to search in + * \param chr UCS4 value to look for + * \return Character index of found character, or -1 if none found + */ +uint32_t dom_string_index(struct dom_string *str, uint32_t chr) +{ + const uint8_t *s; + size_t clen, slen; + uint32_t c, index; + charset_error err; + + __dom_string_get_data(str, &s, &slen); + + index = 0; + + while (slen > 0) { + if (str->charset == DOM_STRING_UTF8) { + err = _dom_utf8_to_ucs4(s, slen, &c, &clen); + } else { + err = _dom_utf16_to_ucs4(s, slen, &c, &clen); + } + + if (err != CHARSET_OK) { + return (uint32_t) -1; + } + + if (c == chr) { + return index; + } + + s += clen; + slen -= clen; + index++; + } + + return (uint32_t) -1; +} + +/** + * Get the index of the last occurrence of a character in a dom string + * + * \param str The string to search in + * \param chr UCS4 value to look for + * \return Character index of found character, or -1 if none found + */ +uint32_t dom_string_rindex(struct dom_string *str, uint32_t chr) +{ + const uint8_t *s; + size_t clen, slen; + uint32_t c, index; + charset_error err; + + __dom_string_get_data(str, &s, &slen); + + index = dom_string_length(str); + + while (slen > 0) { + if (str->charset == DOM_STRING_UTF8) { + err = _dom_utf8_prev(s, slen, &clen); + if (err == CHARSET_OK) { + err = _dom_utf8_to_ucs4(s + clen, slen - clen, + &c, &clen); + } + } else { + err = _dom_utf16_prev(s, slen, &clen); + if (err == CHARSET_OK) { + err = _dom_utf16_to_ucs4(s + clen, slen - clen, + &c, &clen); + } + } + + if (err != CHARSET_OK) { + return (uint32_t) -1; + } + + if (c == chr) { + return index; + } + + slen -= clen; + index--; + } + + return (uint32_t) -1; + +} + +/** + * Get the length, in characters, of a dom string + * + * \param str The string to measure the length of + * \return The length of the string, in characters + */ +uint32_t dom_string_length(struct dom_string *str) +{ + const uint8_t *s; + size_t slen; + uint32_t clen; + charset_error err; + + __dom_string_get_data(str, &s, &slen); + + if (str->charset == DOM_STRING_UTF8) { + err = _dom_utf8_length(s, slen, &clen); + } else { + err = _dom_utf16_length(s, slen, &clen); + } + + if (err != CHARSET_OK) { + return 0; + } + + return clen; +} + +/** + * Concatenate two dom strings + * + * \param s1 The first string + * \param s2 The second string + * \param result Pointer to location to receive result + * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion + * + * The returned string will be allocated using the allocation details + * stored in ::s1. + * + * The returned string will have its reference count increased. The client + * should dereference it once it has finished with it. + */ +dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2, + struct dom_string **result) +{ + struct dom_string *concat; + const uint8_t *s; + size_t slen; + + if (s1->type == DOM_STRING_PTR_NODOC) { + concat = s1->ctx.nodoc.alloc(NULL, + sizeof(struct dom_string), s1->ctx.nodoc.pw); + } else { + concat = dom_document_alloc(s1->ctx.doc, + NULL, sizeof(struct dom_string)); + } + + if (concat == NULL) { + return DOM_NO_MEM_ERR; + } + + /** \todo support attempted concatenation of mismatched charsets */ + + if (s1->type == DOM_STRING_PTR_NODOC) { + concat->data.ptr = s1->ctx.nodoc.alloc(NULL, + s1->len + s2->len, s1->ctx.nodoc.pw); + } else { + concat->data.ptr = dom_document_alloc(s1->ctx.doc, + NULL, s1->len + s2->len); + } + if (concat->data.ptr == NULL) { + if (s1->type == DOM_STRING_PTR_NODOC) { + s1->ctx.nodoc.alloc(concat, 0, s1->ctx.nodoc.pw); + } else { + dom_document_alloc(s1->ctx.doc, concat, 0); + } + return DOM_NO_MEM_ERR; + } + + concat->type = (s1->type == DOM_STRING_PTR_NODOC) + ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR; + + concat->charset = s1->charset; + + __dom_string_get_data(s1, &s, &slen); + + memcpy(concat->data.ptr, s, slen); + + __dom_string_get_data(s2, &s, &slen); + + memcpy(concat->data.ptr + s1->len, s, slen); + + concat->len = s1->len + s2->len; + + if (concat->type == DOM_STRING_PTR_NODOC) { + concat->ctx.nodoc.alloc = s1->ctx.nodoc.alloc; + concat->ctx.nodoc.pw = s1->ctx.nodoc.pw; + } else { + concat->ctx.doc = s1->ctx.doc; + } + + concat->refcnt = 1; + + *result = concat; + + return DOM_NO_ERR; +} + +/** + * Extract a substring from a dom string + * + * \param str The string to extract from + * \param i1 The character index of the start of the substring + * \param i2 The character index of the end of the substring + * \param result Pointer to location to receive result + * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion + * + * The returned string will be allocated using the allocation details + * stored in ::str. + * + * The returned string will have its reference count increased. The client + * should dereference it once it has finished with it. + */ +dom_exception dom_string_substr(struct dom_string *str, + uint32_t i1, uint32_t i2, struct dom_string **result) +{ + const uint8_t *s; + size_t slen; + size_t b1, b2; + charset_error err; + + __dom_string_get_data(str, &s, &slen); + + /* Initialise the byte index of the start to 0 */ + b1 = 0; + /* Make the end a character offset from the start */ + i2 -= i1; + + /* Calculate the byte index of the start */ + while (i1 > 0) { + if (str->charset == DOM_STRING_UTF8) { + err = _dom_utf8_next(s, slen, b1, &b1); + } else { + err = _dom_utf16_next(s, slen, b1, &b1); + } + + if (err != CHARSET_OK) { + return DOM_NO_MEM_ERR; + } + + i1--; + } + + /* Initialise the byte index of the end to that of the start */ + b2 = b1; + + /* Calculate the byte index of the end */ + while (i2 > 0) { + if (str->charset == DOM_STRING_UTF8) { + err = _dom_utf8_next(s, slen, b2, &b2); + } else { + err = _dom_utf16_next(s, slen, b2, &b2); + } + + if (err != CHARSET_OK) { + return DOM_NO_MEM_ERR; + } + + i2--; + } + + /* Create a string from the specified byte range */ + return (str->type == DOM_STRING_PTR_NODOC) + ? dom_string_create_from_ptr_no_doc( + str->ctx.nodoc.alloc, + str->ctx.nodoc.pw, + str->charset, + s + b1, b2 - b1, result) + : dom_string_create_from_ptr(str->ctx.doc, + s + b1, b2 - b2, result); +} + +/** + * Duplicate a dom string + * + * \param str The string to duplicate + * \param result Pointer to location to receive result + * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion + * + * The returned string will be allocated using the allocation details + * stored in ::str. + * + * The returned string will have its reference count increased. The client + * should dereference it once it has finished with it. + */ +dom_exception dom_string_dup(struct dom_string *str, + struct dom_string **result) +{ + const uint8_t *s; + size_t slen; + + __dom_string_get_data(str, &s, &slen); + + return str->type == DOM_STRING_PTR_NODOC + ? dom_string_create_from_ptr_no_doc( + str->ctx.nodoc.alloc, + str->ctx.nodoc.pw, + str->charset, + s, slen, result) + : dom_string_create_from_ptr(str->ctx.doc, + s, slen, result); +} + +/** + * Calculate a hash value from a dom string + * + * \param str The string to calculate a hash of + * \return The hash value associated with the string + */ +uint32_t dom_string_hash(struct dom_string *str) +{ + const uint8_t *s; + size_t slen; + uint32_t hash = 0x01000193; + + __dom_string_get_data(str, &s, &slen); + + while (slen > 0) { + hash *= 0x01000193; + hash ^= *s; + + s++; + slen--; + } + + return hash; +} + +/* */ +/*---------------------------------------------------------------------------*/ +/* */ + +/** + * Get a pointer to the string of characters within a DOM string + * + * \param str Pointer to DOM string to retrieve pointer from + * \param data Pointer to location to receive data + * \param len Pointer to location to receive byte length of data + * \return DOM_NO_ERR on success + * + * The caller must have previously claimed a reference on the DOM string. + * The returned pointer must not be freed. + */ +dom_exception __dom_string_get_data(struct dom_string *str, + const uint8_t **data, size_t *len) +{ + /* Assume that a NULL str pointer indicates the empty string */ + if (str == NULL) + str = &empty_string; + + switch (str->type) { + case DOM_STRING_PTR: + *data = str->data.ptr; + break; + case DOM_STRING_CONST_PTR: + *data = str->data.cptr; + break; + case DOM_STRING_OFFSET: + *data = dom_document_get_base(str->ctx.doc) + + str->data.offset; + break; + case DOM_STRING_PTR_NODOC: + *data = str->data.ptr; + break; + } + + *len = str->len; + + return DOM_NO_ERR; +} + + -- cgit v1.2.3