From e0e38d906c8974bb22a0368a9709af9590362927 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Sun, 30 Sep 2007 21:10:50 +0000 Subject: DOM Strings are now capable of containing either UTF-8 or UTF-16 encoded data. The charset used for strings within a document is specified at document creation time. Whilst it is possible to mix charsets within a document, it's not recommended. Things that need fixing: + dom_string_get_data() doesn't return the charset. Better would be to permit the client to request a charset for the data to be returned in. + Interned node name strings will break if the document is UTF-16 (dom_document_create()). In fact, these could quite happily be globals, rather than allocating a set for each document. + Other usage of dom string constructors need checking for sanity + DOM Strings need to gain more utility APIs (such as getting the character length of a string, string concatenation etc). svn path=/trunk/dom/; revision=3614 --- include/dom/core/implementation.h | 3 ++- include/dom/core/string.h | 8 +++++++- 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/dom/core') diff --git a/include/dom/core/implementation.h b/include/dom/core/implementation.h index 3f42ab7..a51493f 100644 --- a/include/dom/core/implementation.h +++ b/include/dom/core/implementation.h @@ -12,11 +12,11 @@ #include #include +#include struct dom_document; struct dom_document_type; struct dom_implementation; -struct dom_string; void dom_implementation_ref(struct dom_implementation *impl); void dom_implementation_unref(struct dom_implementation *impl); @@ -37,6 +37,7 @@ dom_exception dom_implementation_create_document( struct dom_string *namespace, struct dom_string *qname, struct dom_document_type *doctype, struct dom_document **doc, + dom_string_charset charset, dom_alloc alloc, void *pw); dom_exception dom_implementation_get_feature( diff --git a/include/dom/core/string.h b/include/dom/core/string.h index c9ffd97..935e2c2 100644 --- a/include/dom/core/string.h +++ b/include/dom/core/string.h @@ -17,6 +17,11 @@ struct dom_document; struct dom_string; +typedef enum { + DOM_STRING_UTF8, + DOM_STRING_UTF16 +} dom_string_charset; + /* Claim a reference on a DOM string */ void dom_string_ref(struct dom_string *str); /* Release a reference on a DOM string */ @@ -34,7 +39,8 @@ dom_exception dom_string_create_from_const_ptr(struct dom_document *doc, /* Create a DOM string from a string of characters that does not belong * to a document */ dom_exception dom_string_create_from_ptr_no_doc(dom_alloc alloc, void *pw, - const uint8_t *ptr, size_t len, struct dom_string **str); + dom_string_charset charset, const uint8_t *ptr, size_t len, + struct dom_string **str); /* Get a pointer to the string of characters within a DOM string */ dom_exception dom_string_get_data(struct dom_string *str, -- cgit v1.2.3