From d4d0364f52a658469fea937e87c6317ec21644c0 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sat, 3 Nov 2007 21:18:02 +0000 Subject: Add, and implement, dom_string_insert() and dom_string_replace() API Fix dom_string_substr() to have correct length limits when calling _dom_utf{8,16}_next() Fix dom_string_substr() to calculate correct length of output string when creating from ptr Implement dom_characterdata_get_length() Implement dom_characterdata_substring_data() Implement dom_characterdata_append_data() Implement dom_characterdata_insert_data() Implement dom_characterdata_delete_data() Implement dom_characterdata_replace_data() svn path=/trunk/dom/; revision=3641 --- include/dom/core/string.h | 10 ++ src/core/characterdata.c | 162 +++++++++++++++++++++++---- src/core/string.c | 271 +++++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 414 insertions(+), 29 deletions(-) diff --git a/include/dom/core/string.h b/include/dom/core/string.h index 584db00..e3dfa30 100644 --- a/include/dom/core/string.h +++ b/include/dom/core/string.h @@ -63,6 +63,16 @@ dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2, dom_exception dom_string_substr(struct dom_string *str, uint32_t i1, uint32_t i2, struct dom_string **result); +/* Insert data into a dom string at the given location */ +dom_exception dom_string_insert(struct dom_string *target, + struct dom_string *source, uint32_t offset, + struct dom_string **result); + +/* Replace a section of a dom string */ +dom_exception dom_string_replace(struct dom_string *target, + struct dom_string *source, uint32_t i1, uint32_t i2, + struct dom_string **result); + /* Duplicate a dom string */ dom_exception dom_string_dup(struct dom_string *str, struct dom_string **result); diff --git a/src/core/characterdata.c b/src/core/characterdata.c index 88aaff2..317dfd2 100644 --- a/src/core/characterdata.c +++ b/src/core/characterdata.c @@ -115,10 +115,15 @@ dom_exception dom_characterdata_set_data(struct dom_characterdata *cdata, dom_exception dom_characterdata_get_length(struct dom_characterdata *cdata, unsigned long *length) { - UNUSED(cdata); - UNUSED(length); + struct dom_node *c = (struct dom_node *) cdata; + + if (c->value != NULL) { + *length = dom_string_length(c->value); + } else { + *length = 0; + } - return DOM_NOT_SUPPORTED_ERR; + return DOM_NO_ERR; } /** @@ -143,12 +148,22 @@ dom_exception dom_characterdata_substring_data( struct dom_characterdata *cdata, unsigned long offset, unsigned long count, struct dom_string **data) { - UNUSED(cdata); - UNUSED(offset); - UNUSED(count); - UNUSED(data); + struct dom_node *c = (struct dom_node *) cdata; + uint32_t len, end; + + if (c->value != NULL) { + len = dom_string_length(c->value); + } else { + len = 0; + } + + if (offset >= len) { + return DOM_INDEX_SIZE_ERR; + } - return DOM_NOT_SUPPORTED_ERR; + end = (offset + count) >= len ? len : offset + count; + + return dom_string_substr(c->value, offset, end, data); } /** @@ -162,10 +177,28 @@ dom_exception dom_characterdata_substring_data( dom_exception dom_characterdata_append_data(struct dom_characterdata *cdata, struct dom_string *data) { - UNUSED(cdata); - UNUSED(data); + struct dom_node *c = (struct dom_node *) cdata; + struct dom_string *temp; + dom_exception err; + + if (_dom_node_readonly(c)) { + return DOM_NO_MODIFICATION_ALLOWED_ERR; + } + + err = dom_string_insert(c->value, data, + c->value != NULL ? dom_string_length(c->value) : 0, + &temp); + if (err != DOM_NO_ERR) { + return err; + } - return DOM_NOT_SUPPORTED_ERR; + if (c->value != NULL) { + dom_string_unref(c->value); + } + + c->value = temp; + + return DOM_NO_ERR; } /** @@ -182,11 +215,37 @@ dom_exception dom_characterdata_append_data(struct dom_characterdata *cdata, dom_exception dom_characterdata_insert_data(struct dom_characterdata *cdata, unsigned long offset, struct dom_string *data) { - UNUSED(cdata); - UNUSED(offset); - UNUSED(data); + struct dom_node *c = (struct dom_node *) cdata; + struct dom_string *temp; + uint32_t len; + dom_exception err; + + if (_dom_node_readonly(c)) { + return DOM_NO_MODIFICATION_ALLOWED_ERR; + } + + if (c->value != NULL) { + len = dom_string_length(c->value); + } else { + len = 0; + } + + if (offset >= len) { + return DOM_INDEX_SIZE_ERR; + } + + err = dom_string_insert(c->value, data, offset, &temp); + if (err != DOM_NO_ERR) { + return err; + } + + if (c->value != NULL) { + dom_string_unref(c->value); + } - return DOM_NOT_SUPPORTED_ERR; + c->value = temp; + + return DOM_NO_ERR; } /** @@ -203,11 +262,39 @@ dom_exception dom_characterdata_insert_data(struct dom_characterdata *cdata, dom_exception dom_characterdata_delete_data(struct dom_characterdata *cdata, unsigned long offset, unsigned long count) { - UNUSED(cdata); - UNUSED(offset); - UNUSED(count); + struct dom_node *c = (struct dom_node *) cdata; + struct dom_string *temp; + uint32_t len, end; + dom_exception err; + + if (_dom_node_readonly(c)) { + return DOM_NO_MODIFICATION_ALLOWED_ERR; + } + + if (c->value != NULL) { + len = dom_string_length(c->value); + } else { + len = 0; + } + + if (offset >= len) { + return DOM_INDEX_SIZE_ERR; + } + + end = (offset + count) >= len ? len : offset + count; + + err = dom_string_replace(c->value, NULL, offset, end, &temp); + if (err != DOM_NO_ERR) { + return err; + } + + if (c->value != NULL) { + dom_string_unref(c->value); + } + + c->value = temp; - return DOM_NOT_SUPPORTED_ERR; + return DOM_NO_ERR; } /** @@ -226,11 +313,38 @@ dom_exception dom_characterdata_replace_data(struct dom_characterdata *cdata, unsigned long offset, unsigned long count, struct dom_string *data) { - UNUSED(cdata); - UNUSED(offset); - UNUSED(count); - UNUSED(data); + struct dom_node *c = (struct dom_node *) cdata; + struct dom_string *temp; + uint32_t len, end; + dom_exception err; + + if (_dom_node_readonly(c)) { + return DOM_NO_MODIFICATION_ALLOWED_ERR; + } + + if (c->value != NULL) { + len = dom_string_length(c->value); + } else { + len = 0; + } + + if (offset >= len) { + return DOM_INDEX_SIZE_ERR; + } + + end = (offset + count) >= len ? len : offset + count; + + err = dom_string_replace(c->value, data, offset, end, &temp); + if (err != DOM_NO_ERR) { + return err; + } + + if (c->value != NULL) { + dom_string_unref(c->value); + } - return DOM_NOT_SUPPORTED_ERR; + c->value = temp; + + return DOM_NO_ERR; } diff --git a/src/core/string.c b/src/core/string.c index 1e3817c..8ec44aa 100644 --- a/src/core/string.c +++ b/src/core/string.c @@ -622,9 +622,9 @@ dom_exception dom_string_substr(struct dom_string *str, /* Calculate the byte index of the start */ while (i1 > 0) { if (str->charset == DOM_STRING_UTF8) { - err = _dom_utf8_next(s, slen, b1, &b1); + err = _dom_utf8_next(s, slen - b1, b1, &b1); } else { - err = _dom_utf16_next(s, slen, b1, &b1); + err = _dom_utf16_next(s, slen - b1, b1, &b1); } if (err != CHARSET_OK) { @@ -640,9 +640,9 @@ dom_exception dom_string_substr(struct dom_string *str, /* Calculate the byte index of the end */ while (i2 > 0) { if (str->charset == DOM_STRING_UTF8) { - err = _dom_utf8_next(s, slen, b2, &b2); + err = _dom_utf8_next(s, slen - b2, b2, &b2); } else { - err = _dom_utf16_next(s, slen, b2, &b2); + err = _dom_utf16_next(s, slen - b2, b2, &b2); } if (err != CHARSET_OK) { @@ -660,7 +660,268 @@ dom_exception dom_string_substr(struct dom_string *str, str->charset, s + b1, b2 - b1, result) : dom_string_create_from_ptr(str->ctx.doc, - s + b1, b2 - b2, result); + s + b1, b2 - b1, result); +} + +/** + * Insert data into a dom string at the given location + * + * \param target Pointer to string to insert into + * \param source Pointer to string to insert + * \param offset Character offset of location to insert at + * \param result Pointer to location to receive result + * \return DOM_NO_ERR on success, + * DOM_NO_MEM_ERR on memory exhaustion, + * DOM_INDEX_SIZE_ERR if ::offset > len(::target). + * + * The returned string will be allocated using the allocation details + * stored in ::target. + * + * The returned string will have its reference count increased. The client + * should dereference it once it has finished with it. + */ +dom_exception dom_string_insert(struct dom_string *target, + struct dom_string *source, uint32_t offset, + struct dom_string **result) +{ + struct dom_string *res; + const uint8_t *t, *s; + uint32_t tlen, slen, clen; + uint32_t ins = 0; + charset_error err; + + __dom_string_get_data(target, &t, &tlen); + + __dom_string_get_data(source, &s, &slen); + + clen = dom_string_length(target); + + if (offset > clen) + return DOM_INDEX_SIZE_ERR; + + /* Calculate the byte index of the insertion point */ + if (offset == clen) { + /* Optimisation for append */ + offset = 0; + ins = tlen; + } else { + while (offset > 0) { + if (target->charset == DOM_STRING_UTF8) { + err = _dom_utf8_next(t, tlen - ins, ins, &ins); + } else { + err = _dom_utf16_next(t, tlen - ins, ins, &ins); + } + + if (err != CHARSET_OK) { + return DOM_NO_MEM_ERR; + } + + offset--; + } + } + + /* Allocate result string */ + if (target->type == DOM_STRING_PTR_NODOC) { + res = target->ctx.nodoc.alloc(NULL, sizeof(struct dom_string), + target->ctx.nodoc.pw); + } else { + res = dom_document_alloc(target->ctx.doc, + NULL, sizeof(struct dom_string)); + } + + if (res == NULL) { + return DOM_NO_MEM_ERR; + } + + /** \todo support insertion of a string from a different charset */ + + /* Allocate data buffer for result contents */ + if (target->type == DOM_STRING_PTR_NODOC) { + res->data.ptr = target->ctx.nodoc.alloc(NULL, + tlen + slen, target->ctx.nodoc.pw); + } else { + res->data.ptr = dom_document_alloc(target->ctx.doc, + NULL, tlen + slen); + } + if (res->data.ptr == NULL) { + if (target->type == DOM_STRING_PTR_NODOC) { + target->ctx.nodoc.alloc(res, 0, target->ctx.nodoc.pw); + } else { + dom_document_alloc(target->ctx.doc, res, 0); + } + return DOM_NO_MEM_ERR; + } + + /* Populate result members */ + res->type = (target->type == DOM_STRING_PTR_NODOC) + ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR; + + res->charset = target->charset; + + /* Copy initial portion of target, if any, into result */ + if (ins > 0) { + memcpy(res->data.ptr, t, ins); + } + + /* Copy inserted data into result */ + memcpy(res->data.ptr + ins, s, slen); + + /* Copy remainder of target, if any, into result */ + if (tlen - ins > 0) { + memcpy(res->data.ptr + ins + slen, t + ins, tlen - ins); + } + + res->len = tlen + slen; + + if (res->type == DOM_STRING_PTR_NODOC) { + res->ctx.nodoc.alloc = target->ctx.nodoc.alloc; + res->ctx.nodoc.pw = target->ctx.nodoc.pw; + } else { + res->ctx.doc = target->ctx.doc; + } + + res->refcnt = 1; + + *result = res; + + return DOM_NO_ERR; +} + +/** + * Replace a section of a dom string + * + * \param target Pointer to string of which to replace a section + * \param source Pointer to replacement string + * \param i1 Character index of start of region to replace + * \param i2 Character index of end of region to replace + * \param result Pointer to location to receive result + * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion. + * + * The returned string will be allocated using the allocation details + * stored in ::target. + * + * The returned string will have its reference count increased. The client + * should dereference it once it has finished with it. + */ +dom_exception dom_string_replace(struct dom_string *target, + struct dom_string *source, uint32_t i1, uint32_t i2, + struct dom_string **result) +{ + struct dom_string *res; + const uint8_t *t, *s; + uint32_t tlen, slen; + uint32_t b1, b2; + charset_error err; + + __dom_string_get_data(target, &t, &tlen); + + __dom_string_get_data(source, &s, &slen); + + /* Initialise the byte index of the start to 0 */ + b1 = 0; + /* Make the end a character offset from the start */ + i2 -= i1; + + /* Calculate the byte index of the start */ + while (i1 > 0) { + if (target->charset == DOM_STRING_UTF8) { + err = _dom_utf8_next(s, slen - b1, b1, &b1); + } else { + err = _dom_utf16_next(s, slen - b1, b1, &b1); + } + + if (err != CHARSET_OK) { + return DOM_NO_MEM_ERR; + } + + i1--; + } + + /* Initialise the byte index of the end to that of the start */ + b2 = b1; + + /* Calculate the byte index of the end */ + while (i2 > 0) { + if (target->charset == DOM_STRING_UTF8) { + err = _dom_utf8_next(s, slen - b2, b2, &b2); + } else { + err = _dom_utf16_next(s, slen - b2, b2, &b2); + } + + if (err != CHARSET_OK) { + return DOM_NO_MEM_ERR; + } + + i2--; + } + + /* Allocate result string */ + if (target->type == DOM_STRING_PTR_NODOC) { + res = target->ctx.nodoc.alloc(NULL, sizeof(struct dom_string), + target->ctx.nodoc.pw); + } else { + res = dom_document_alloc(target->ctx.doc, + NULL, sizeof(struct dom_string)); + } + + if (res == NULL) { + return DOM_NO_MEM_ERR; + } + + /** \todo support insertion of a string from a different charset */ + + /* Allocate data buffer for result contents */ + if (target->type == DOM_STRING_PTR_NODOC) { + res->data.ptr = target->ctx.nodoc.alloc(NULL, + tlen + slen - (b2 - b1), target->ctx.nodoc.pw); + } else { + res->data.ptr = dom_document_alloc(target->ctx.doc, + NULL, tlen + slen - (b2 - b1)); + } + if (res->data.ptr == NULL) { + if (target->type == DOM_STRING_PTR_NODOC) { + target->ctx.nodoc.alloc(res, 0, target->ctx.nodoc.pw); + } else { + dom_document_alloc(target->ctx.doc, res, 0); + } + return DOM_NO_MEM_ERR; + } + + /* Populate result members */ + res->type = (target->type == DOM_STRING_PTR_NODOC) + ? DOM_STRING_PTR_NODOC : DOM_STRING_PTR; + + res->charset = target->charset; + + /* Copy initial portion of target, if any, into result */ + if (b1 > 0) { + memcpy(res->data.ptr, t, b1); + } + + /* Copy replacement data into result */ + if (slen > 0) { + memcpy(res->data.ptr + b1, s, slen); + } + + /* Copy remainder of target, if any, into result */ + if (tlen - b2 > 0) { + memcpy(res->data.ptr + b1 + slen, t + b2, tlen - b2); + } + + res->len = tlen + slen - (b2 - b1); + + if (res->type == DOM_STRING_PTR_NODOC) { + res->ctx.nodoc.alloc = target->ctx.nodoc.alloc; + res->ctx.nodoc.pw = target->ctx.nodoc.pw; + } else { + res->ctx.doc = target->ctx.doc; + } + + res->refcnt = 1; + + *result = res; + + return DOM_NO_ERR; } /** -- cgit v1.2.3