summaryrefslogtreecommitdiff
path: root/src/utils/utf16.h
diff options
context:
space:
mode:
authorJohn Mark Bell <jmb@netsurf-browser.org>2007-09-30 21:10:50 +0000
committerJohn Mark Bell <jmb@netsurf-browser.org>2007-09-30 21:10:50 +0000
commit613f88393960853513873756933bd23b93543a33 (patch)
treef7f4c1acff769e9a7f6cd0f1c037ba2c28a66593 /src/utils/utf16.h
parent5eba18a08b9821fa10cc03af8dbd7d1c10653b13 (diff)
downloadlibdom-613f88393960853513873756933bd23b93543a33.tar.gz
libdom-613f88393960853513873756933bd23b93543a33.tar.bz2
DOM Strings are now capable of containing either UTF-8 or UTF-16 encoded data.
The charset used for strings within a document is specified at document creation time. Whilst it is possible to mix charsets within a document, it's not recommended. Things that need fixing: + dom_string_get_data() doesn't return the charset. Better would be to permit the client to request a charset for the data to be returned in. + Interned node name strings will break if the document is UTF-16 (dom_document_create()). In fact, these could quite happily be globals, rather than allocating a set for each document. + Other usage of dom string constructors need checking for sanity + DOM Strings need to gain more utility APIs (such as getting the character length of a string, string concatenation etc). svn path=/trunk/dom/; revision=3614
Diffstat (limited to 'src/utils/utf16.h')
-rw-r--r--src/utils/utf16.h38
1 files changed, 38 insertions, 0 deletions
diff --git a/src/utils/utf16.h b/src/utils/utf16.h
new file mode 100644
index 0000000..7b9e15f
--- /dev/null
+++ b/src/utils/utf16.h
@@ -0,0 +1,38 @@
+/*
+ * This file is part of libdom.
+ * Licensed under the MIT License,
+ * http://www.opensource.org/licenses/mit-license.php
+ * Copyright 2007 John-Mark Bell <jmb@netsurf-browser.org>
+ */
+
+/** \file
+ * UTF-16 manipulation functions (interface).
+ */
+
+#ifndef dom_utils_utf16_h_
+#define dom_utils_utf16_h_
+
+#include <inttypes.h>
+
+#include "utils/charset_errors.h"
+
+inline charset_error _dom_utf16_to_ucs4(const uint8_t *s, size_t len,
+ uint32_t *ucs4, size_t *clen);
+inline charset_error _dom_utf16_from_ucs4(uint32_t ucs4, uint8_t *s,
+ size_t *len);
+
+inline charset_error _dom_utf16_length(const uint8_t *s, size_t max,
+ size_t *len);
+inline charset_error _dom_utf16_char_byte_length(const uint8_t *s,
+ size_t *len);
+
+inline charset_error _dom_utf16_prev(const uint8_t *s, uint32_t off,
+ uint32_t *prevoff);
+inline charset_error _dom_utf16_next(const uint8_t *s, uint32_t len,
+ uint32_t off, uint32_t *nextoff);
+
+inline charset_error _dom_utf16_next_paranoid(const uint8_t *s,
+ uint32_t len, uint32_t off, uint32_t *nextoff);
+
+#endif
+