summaryrefslogtreecommitdiff
path: root/src/core/string.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/string.c')
-rw-r--r--src/core/string.c358
1 files changed, 337 insertions, 21 deletions
diff --git a/src/core/string.c b/src/core/string.c
index 806531a..e35f416 100644
--- a/src/core/string.c
+++ b/src/core/string.c
@@ -3,16 +3,17 @@
* Licensed under the MIT License,
* http://www.opensource.org/licenses/mit-license.php
* Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ * Copyright 2009 Bo Yang <struggleyb.nku@gmail.com>
*/
+#include <assert.h>
#include <ctype.h>
#include <inttypes.h>
#include <string.h>
#include <parserutils/charset/utf8.h>
-#include <dom/core/string.h>
-
+#include "core/string.h"
#include "core/document.h"
#include "utils/utils.h"
@@ -26,6 +27,10 @@ struct dom_string {
size_t len; /**< Byte length of string */
+ lwc_string *intern; /**< The lwc_string of this string */
+
+ lwc_context *context; /**< The lwc_context for the lwc_string */
+
dom_alloc alloc; /**< Memory (de)allocation function */
void *pw; /**< Client-specific data */
@@ -40,6 +45,8 @@ static struct dom_string empty_string = {
.refcnt = 1
};
+
+
/**
* Claim a reference on a DOM string
*
@@ -60,8 +67,15 @@ void dom_string_ref(struct dom_string *str)
*/
void dom_string_unref(struct dom_string *str)
{
+ if (str == NULL)
+ return;
+
if (--str->refcnt == 0) {
- if (str->alloc != NULL) {
+ if (str->intern != NULL) {
+ lwc_context_unref(str->context);
+ lwc_context_string_unref(str->context, str->intern);
+ str->alloc(str, 0, str->pw);
+ } else if (str->alloc != NULL) {
str->alloc(str->ptr, 0, str->pw);
str->alloc(str, 0, str->pw);
}
@@ -71,11 +85,11 @@ void dom_string_unref(struct dom_string *str)
/**
* Create a DOM string from a string of characters
*
- * \param alloc Memory (de)allocation function
- * \param pw Pointer to client-specific private data
- * \param ptr Pointer to string of characters
- * \param len Length, in bytes, of string of characters
- * \param str Pointer to location to receive result
+ * \param alloc Memory (de)allocation function
+ * \param pw Pointer to client-specific private data
+ * \param ptr Pointer to string of characters
+ * \param len Length, in bytes, of string of characters
+ * \param str Pointer to location to receive result
* \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
*
* The returned string will already be referenced, so there is no need
@@ -89,7 +103,7 @@ dom_exception dom_string_create(dom_alloc alloc, void *pw,
{
struct dom_string *ret;
- if (ptr == NULL && len == 0) {
+ if (ptr == NULL || len == 0) {
dom_string_ref(&empty_string);
*str = &empty_string;
@@ -114,6 +128,9 @@ dom_exception dom_string_create(dom_alloc alloc, void *pw,
ret->alloc = alloc;
ret->pw = pw;
+ ret->intern = NULL;
+ ret->context = NULL;
+
ret->refcnt = 1;
*str = ret;
@@ -122,6 +139,152 @@ dom_exception dom_string_create(dom_alloc alloc, void *pw,
}
/**
+ * Clone a dom_string if necessary. This method is used to create a new string
+ * with a new allocator, but if the allocator is the same with the paramter
+ * str, just ref the string.
+ *
+ * \param alloc The new allocator for this string
+ * \param pw The new pw for this string
+ * \param str The source dom_string
+ * \param ret The cloned dom_string
+ * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
+ *
+ * @note: When both the alloc and pw are the same as the str's, we need no
+ * real clone, just ref the source string is ok.
+ */
+dom_exception dom_string_clone(dom_alloc alloc, void *pw,
+ struct dom_string *str, struct dom_string **ret)
+{
+ if (alloc == str->alloc && pw == str->pw) {
+ *ret = str;
+ dom_string_ref(str);
+ return DOM_NO_ERR;
+ }
+
+ if (str->intern != NULL) {
+ return _dom_string_create_from_lwcstring(alloc, pw,
+ str->context, str->intern, ret);
+ } else {
+ return dom_string_create(alloc, pw, str->ptr, str->len, ret);
+ }
+}
+
+/**
+ * Create a dom_string from a lwc_string
+ *
+ * \param ctx The lwc_context
+ * \param str The lwc_string
+ * \param ret The new dom_string
+ * \return DOM_NO_ERR on success, DOM_NO_MEM_ERR on memory exhaustion
+ */
+dom_exception _dom_string_create_from_lwcstring(dom_alloc alloc, void *pw,
+ lwc_context *ctx, lwc_string *str, struct dom_string **ret)
+{
+ dom_string *r;
+
+ if (str == NULL) {
+ *ret = NULL;
+ return DOM_NO_ERR;
+ }
+
+ r = alloc(NULL, sizeof(struct dom_string), pw);
+ if (r == NULL)
+ return DOM_NO_MEM_ERR;
+
+ if (str == NULL) {
+ *ret = &empty_string;
+ dom_string_ref(*ret);
+ return DOM_NO_ERR;
+ }
+
+ r->context = ctx;
+ r->intern = str;
+ r->ptr = (uint8_t *)lwc_string_data(str);
+ r->len = lwc_string_length(str);
+
+ r->alloc = alloc;
+ r->pw = pw;
+
+ r->refcnt = 1;
+
+ /* Ref the lwc_string */
+ lwc_context_ref(ctx);
+ lwc_context_string_ref(ctx, str);
+
+ *ret = r;
+ return DOM_NO_ERR;
+
+}
+
+/**
+ * Make the dom_string be interned in the lwc_context
+ *
+ * \param str The dom_string to be interned
+ * \param ctx The lwc_context to intern this dom_string
+ * \param lwcstr The result lwc_string
+ * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
+ */
+dom_exception _dom_string_intern(struct dom_string *str,
+ struct lwc_context_s *ctx, struct lwc_string_s **lwcstr)
+{
+ lwc_string *ret;
+ lwc_error lerr;
+
+ /* If this string is interned with the same context, do nothing */
+ if (str->context != NULL && str->context == ctx) {
+ *lwcstr = str->intern;
+ return DOM_NO_ERR;
+ }
+
+ lerr = lwc_context_intern(ctx, (const char *)str->ptr, str->len, &ret);
+ if (lerr != lwc_error_ok) {
+ return _dom_exception_from_lwc_error(lerr);
+ }
+
+ if (str->context != NULL) {
+ lwc_context_unref(str->context);
+ lwc_context_string_unref(str->context, str->intern);
+ str->ptr = NULL;
+ }
+
+ str->context = ctx;
+ str->intern = ret;
+ lwc_context_ref(ctx);
+ lwc_context_string_ref(ctx, ret);
+
+ if (str->ptr != NULL) {
+ str->alloc(str->ptr, 0, str->pw);
+ }
+
+ str->ptr = (uint8_t *) lwc_string_data(ret);
+
+ *lwcstr = ret;
+ return DOM_NO_ERR;
+}
+
+/**
+ * Get the internal lwc_string
+ *
+ * \param str The dom_string object
+ * \param ctx The lwc_context which intern this dom_string
+ * \param lwcstr The lwc_string of this dom-string
+ * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
+ */
+dom_exception dom_string_get_intern(struct dom_string *str,
+ struct lwc_context_s **ctx, struct lwc_string_s **lwcstr)
+{
+ *ctx = str->context;
+ *lwcstr = str->intern;
+
+ if (*ctx != NULL)
+ lwc_context_ref(*ctx);
+ if (*lwcstr != NULL)
+ lwc_context_string_ref(*ctx, *lwcstr);
+
+ return DOM_NO_ERR;
+}
+
+/**
* Case sensitively compare two DOM strings
*
* \param s1 The first string to compare
@@ -132,12 +295,26 @@ dom_exception dom_string_create(dom_alloc alloc, void *pw,
*/
int dom_string_cmp(struct dom_string *s1, struct dom_string *s2)
{
+ bool ret;
+
if (s1 == NULL)
s1 = &empty_string;
if (s2 == NULL)
s2 = &empty_string;
+ if (s1->context == s2->context && s1->context != NULL) {
+ assert(s1->intern != NULL);
+ assert(s2->intern != NULL);
+ lwc_context_string_isequal(s1->context, s1->intern,
+ s2->intern, &ret);
+ if (ret == true) {
+ return 0;
+ } else {
+ return -1;
+ }
+ }
+
if (s1->len != s2->len)
return 1;
@@ -164,6 +341,19 @@ int dom_string_icmp(struct dom_string *s1, struct dom_string *s2)
if (s2 == NULL)
s2 = &empty_string;
+ bool ret;
+ if (s1->context == s2->context && s1->context != NULL) {
+ assert(s1->intern != NULL);
+ assert(s2->intern != NULL);
+ lwc_context_string_caseless_isequal(s1->context, s1->intern,
+ s2->intern, &ret);
+ if (ret == true) {
+ return 0;
+ } else {
+ return -1;
+ }
+ }
+
d1 = s1->ptr;
d2 = s2->ptr;
l1 = s1->len;
@@ -304,6 +494,56 @@ uint32_t dom_string_length(struct dom_string *str)
return clen;
}
+/**
+ * Get the UCS4 character at position index
+ *
+ * \param index The position of the charater
+ * \param ch The UCS4 character
+ * \return DOM_NO_ERR on success, appropriate dom_exception on failure.
+ */
+dom_exception dom_string_at(struct dom_string *str, uint32_t index,
+ uint32_t *ch)
+{
+ const uint8_t *s;
+ size_t clen, slen;
+ uint32_t c, i;
+ parserutils_error err;
+
+ if (str == NULL)
+ str = &empty_string;
+
+ s = str->ptr;
+ slen = str->len;
+
+ i = 0;
+
+ while (slen > 0) {
+ err = parserutils_charset_utf8_char_byte_length(s, &clen);
+ if (err != PARSERUTILS_OK) {
+ return (uint32_t) -1;
+ }
+
+ i++;
+ if (i == index + 1)
+ break;
+
+ s += clen;
+ slen -= clen;
+ }
+
+ if (i == index + 1) {
+ err = parserutils_charset_utf8_to_ucs4(s, slen, &c, &clen);
+ if (err != PARSERUTILS_OK) {
+ return (uint32_t) -1;
+ }
+
+ *ch = c;
+ return DOM_NO_ERR;
+ } else {
+ return DOM_DOMSTRING_SIZE_ERR;
+ }
+}
+
/**
* Concatenate two dom strings
*
@@ -322,16 +562,33 @@ dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2,
struct dom_string **result)
{
struct dom_string *concat;
+ dom_alloc alloc;
+ void *pw;
+
+ assert(s1 != NULL);
+ assert(s2 != NULL);
+
+ if (s1->alloc != NULL) {
+ alloc = s1->alloc;
+ pw = s1->pw;
+ } else if (s2->alloc != NULL) {
+ alloc = s2->alloc;
+ pw = s2->pw;
+ } else {
+ /* s1 == s2 == empty_string */
+ *result = &empty_string;
+ return DOM_NO_ERR;
+ }
- concat = s1->alloc(NULL, sizeof(struct dom_string), s1->pw);
+ concat = alloc(NULL, sizeof(struct dom_string), pw);
if (concat == NULL) {
return DOM_NO_MEM_ERR;
}
- concat->ptr = s1->alloc(NULL, s1->len + s2->len, s1->pw);
+ concat->ptr = alloc(NULL, s1->len + s2->len, pw);
if (concat->ptr == NULL) {
- s1->alloc(concat, 0, s1->pw);
+ alloc(concat, 0, pw);
return DOM_NO_MEM_ERR;
}
@@ -342,8 +599,10 @@ dom_exception dom_string_concat(struct dom_string *s1, struct dom_string *s2,
concat->len = s1->len + s2->len;
- concat->alloc = s1->alloc;
- concat->pw = s1->pw;
+ concat->alloc = alloc;
+ concat->pw = pw;
+ concat->context = NULL;
+ concat->intern = NULL;
concat->refcnt = 1;
@@ -382,7 +641,7 @@ dom_exception dom_string_substr(struct dom_string *str,
/* Calculate the byte index of the start */
while (i1 > 0) {
- err = parserutils_charset_utf8_next(s, slen - b1, b1, &b1);
+ err = parserutils_charset_utf8_next(s, slen, b1, &b1);
if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
@@ -395,7 +654,7 @@ dom_exception dom_string_substr(struct dom_string *str,
/* Calculate the byte index of the end */
while (i2 > 0) {
- err = parserutils_charset_utf8_next(s, slen - b2, b2, &b2);
+ err = parserutils_charset_utf8_next(s, slen, b2, &b2);
if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
}
@@ -451,7 +710,7 @@ dom_exception dom_string_insert(struct dom_string *target,
ins = tlen;
} else {
while (offset > 0) {
- err = parserutils_charset_utf8_next(t, tlen - ins,
+ err = parserutils_charset_utf8_next(t, tlen,
ins, &ins);
if (err != PARSERUTILS_OK) {
@@ -492,6 +751,8 @@ dom_exception dom_string_insert(struct dom_string *target,
res->alloc = target->alloc;
res->pw = target->pw;
+ res->intern = NULL;
+ res->context = NULL;
res->refcnt = 1;
@@ -526,6 +787,9 @@ dom_exception dom_string_replace(struct dom_string *target,
uint32_t b1, b2;
parserutils_error err;
+ if (source == NULL)
+ source = &empty_string;
+
t = target->ptr;
tlen = target->len;
s = source->ptr;
@@ -538,7 +802,7 @@ dom_exception dom_string_replace(struct dom_string *target,
/* Calculate the byte index of the start */
while (i1 > 0) {
- err = parserutils_charset_utf8_next(s, slen - b1, b1, &b1);
+ err = parserutils_charset_utf8_next(s, slen, b1, &b1);
if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
@@ -552,7 +816,7 @@ dom_exception dom_string_replace(struct dom_string *target,
/* Calculate the byte index of the end */
while (i2 > 0) {
- err = parserutils_charset_utf8_next(s, slen - b2, b2, &b2);
+ err = parserutils_charset_utf8_next(s, slen, b2, &b2);
if (err != PARSERUTILS_OK) {
return DOM_NO_MEM_ERR;
@@ -594,6 +858,8 @@ dom_exception dom_string_replace(struct dom_string *target,
res->alloc = target->alloc;
res->pw = target->pw;
+ res->intern = NULL;
+ res->context = NULL;
res->refcnt = 1;
@@ -618,8 +884,13 @@ dom_exception dom_string_replace(struct dom_string *target,
dom_exception dom_string_dup(struct dom_string *str,
struct dom_string **result)
{
- return dom_string_create(str->alloc, str->pw, str->ptr, str->len,
- result);
+ if (str->intern != NULL) {
+ return _dom_string_create_from_lwcstring(str->alloc, str->pw,
+ str->context, str->intern, result);
+ } else {
+ return dom_string_create(str->alloc, str->pw, str->ptr,
+ str->len, result);
+ }
}
/**
@@ -645,3 +916,48 @@ uint32_t dom_string_hash(struct dom_string *str)
return hash;
}
+/**
+ * Convert a lwc_error to a dom_exception
+ *
+ * \param err The input lwc_error
+ * \return the dom_exception
+ */
+dom_exception _dom_exception_from_lwc_error(lwc_error err)
+{
+ switch (err) {
+ case lwc_error_ok:
+ return DOM_NO_ERR;
+ case lwc_error_oom:
+ return DOM_NO_MEM_ERR;
+ case lwc_error_range:
+ return DOM_INDEX_SIZE_ERR;
+ }
+ assert ("Unknow lwc_error, can't convert to dom_exception");
+ /* Suppress compile errors */
+ return DOM_NO_ERR;
+}
+
+/**
+ * Compare the raw data of two lwc_strings for equality when the two strings
+ * belong to different lwc_context
+ *
+ * \param s1 The first lwc_string
+ * \param s2 The second lwc_string
+ * \return 0 for equal, non-zero otherwise
+ */
+int _dom_lwc_string_compare_raw(struct lwc_string_s *s1,
+ struct lwc_string_s *s2)
+{
+ const char *rs1, *rs2;
+ size_t len;
+
+ if (lwc_string_length(s1) != lwc_string_length(s2))
+ return -1;
+
+ len = lwc_string_length(s1);
+ rs1 = lwc_string_data(s1);
+ rs2 = lwc_string_data(s2);
+
+ return memcmp(rs1, rs2, len);
+}
+