summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Sanders <vince@kyllikki.org>2014-11-14 13:39:47 (GMT)
committer Vincent Sanders <vince@kyllikki.org>2014-11-14 13:39:47 (GMT)
commit6b0e4847ddb1d62dffd5d4de4a1240b3fa3afd8a (patch)
treeebddb89ed063e1e8927ed9b837762f574d34fbf4
parentc203e4dcb680ec3bdccf5fdf7a496549442c56de (diff)
downloadlibutf8proc-6b0e4847ddb1d62dffd5d4de4a1240b3fa3afd8a.tar.gz
libutf8proc-6b0e4847ddb1d62dffd5d4de4a1240b3fa3afd8a.tar.bz2
Update with API extension from the NetSurf version
-rw-r--r--README11
-rw-r--r--include/libutf8proc/utf8proc.h8
-rw-r--r--src/utf8proc.c11
3 files changed, 24 insertions, 6 deletions
diff --git a/README b/README
index ac68401..f032eb2 100644
--- a/README
+++ b/README
@@ -6,8 +6,11 @@ conveniance library for NetSurf. Previously this library was simply
copied into the NetSurf sources.
This takes the unicode 5 capable version 1.1.6 of the library and
-converts it to the NetSurf build system. No C source code has been
-changed from upstream and all the Makefiles are licenced as per the
-utf8proc source.
+converts it to the NetSurf build system. additional API has been added
+with a normalisation function but there are no data changes from
+upstream.
-[1] http://www.public-software-group.org/utf8proc \ No newline at end of file
+All the Makefiles and changes are licenced as per the utf8proc
+source using the MIT "expat" licence.
+
+[1] http://www.public-software-group.org/utf8proc
diff --git a/include/libutf8proc/utf8proc.h b/include/libutf8proc/utf8proc.h
index 24a891b..c074779 100644
--- a/include/libutf8proc/utf8proc.h
+++ b/include/libutf8proc/utf8proc.h
@@ -81,6 +81,8 @@ extern "C" {
#define SSIZE_MAX ((size_t)SIZE_MAX/2)
#endif
+#define UTF8PROC_CCC_VIRAMA 9
+
#define UTF8PROC_NULLTERM (1<<0)
#define UTF8PROC_STABLE (1<<1)
#define UTF8PROC_COMPAT (1<<2)
@@ -326,6 +328,12 @@ ssize_t utf8proc_decompose(
* buffer size is returned.
*/
+ssize_t utf8proc_normalise(int32_t *buffer, ssize_t length, int options);
+/*
+ * Reencodes the sequence of unicode characters given by the pointer
+ * 'buffer' and 'length'. See utf8proc_reencode for further details.
+ */
+
ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options);
/*
* Reencodes the sequence of unicode characters given by the pointer
diff --git a/src/utf8proc.c b/src/utf8proc.c
index ef2d433..3e0b25f 100644
--- a/src/utf8proc.c
+++ b/src/utf8proc.c
@@ -387,7 +387,7 @@ ssize_t utf8proc_decompose(
if (decomp_result < 0) return decomp_result;
wpos += decomp_result;
/* prohibiting integer overflows due to too long strings: */
- if (wpos < 0 || wpos > SSIZE_MAX/sizeof(int32_t)/2)
+ if (wpos < 0 || wpos > (ssize_t)(SSIZE_MAX/sizeof(int32_t)/2))
return UTF8PROC_ERROR_OVERFLOW;
}
}
@@ -413,7 +413,7 @@ ssize_t utf8proc_decompose(
return wpos;
}
-ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options) {
+ssize_t utf8proc_normalise(int32_t *buffer, ssize_t length, int options) {
/* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored
ASSERT: 'buffer' has one spare byte of free space at the end! */
if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) {
@@ -516,6 +516,13 @@ ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options) {
}
length = wpos;
}
+ return length;
+}
+
+ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options) {
+ length = utf8proc_normalise(buffer, length, options);
+ if (length < 0) return length;
+
{
ssize_t rpos, wpos = 0;
int32_t uc;