summaryrefslogtreecommitdiff
path: root/mojibake.h
diff options
context:
space:
mode:
authorSteven G. Johnson <stevenj@mit.edu>2014-12-12 16:27:49 -0500
committerSteven G. Johnson <stevenj@mit.edu>2014-12-12 16:30:31 -0500
commit397a1eabea5d7bca2f5f9831ac9431b5b85017fc (patch)
treecb113b817ce4cd76594b1fa2db827b66b7909148 /mojibake.h
parent539d2cc2024f494b1e3292d4730bdc96390e1361 (diff)
downloadlibutf8proc-397a1eabea5d7bca2f5f9831ac9431b5b85017fc.tar.gz
libutf8proc-397a1eabea5d7bca2f5f9831ac9431b5b85017fc.tar.bz2
update graphemes for Unicode 7, add utf8proc_grapheme_break function
Diffstat (limited to 'mojibake.h')
-rw-r--r--mojibake.h27
1 files changed, 24 insertions, 3 deletions
diff --git a/mojibake.h b/mojibake.h
index fea87a5..d65d0c5 100644
--- a/mojibake.h
+++ b/mojibake.h
@@ -170,17 +170,17 @@ typedef struct utf8proc_property_struct {
utf8proc_propval_t bidi_class;
utf8proc_propval_t decomp_type;
const int32_t *decomp_mapping;
- unsigned bidi_mirrored:1;
+ const int32_t *casefold_mapping;
int32_t uppercase_mapping;
int32_t lowercase_mapping;
int32_t titlecase_mapping;
int32_t comb1st_index;
int32_t comb2nd_index;
+ unsigned bidi_mirrored:1;
unsigned comp_exclusion:1;
unsigned ignorable:1;
unsigned control_boundary:1;
- unsigned extend:1;
- const int32_t *casefold_mapping;
+ unsigned boundclass:4;
} utf8proc_property_t;
#define UTF8PROC_CATEGORY_LU 1
@@ -253,6 +253,21 @@ typedef struct utf8proc_property_struct {
#define UTF8PROC_DECOMP_TYPE_FRACTION 15
#define UTF8PROC_DECOMP_TYPE_COMPAT 16
+/* values for boundclass property: */
+#define UTF8PROC_BOUNDCLASS_START 0
+#define UTF8PROC_BOUNDCLASS_OTHER 1
+#define UTF8PROC_BOUNDCLASS_CR 2
+#define UTF8PROC_BOUNDCLASS_LF 3
+#define UTF8PROC_BOUNDCLASS_CONTROL 4
+#define UTF8PROC_BOUNDCLASS_EXTEND 5
+#define UTF8PROC_BOUNDCLASS_L 6
+#define UTF8PROC_BOUNDCLASS_V 7
+#define UTF8PROC_BOUNDCLASS_T 8
+#define UTF8PROC_BOUNDCLASS_LV 9
+#define UTF8PROC_BOUNDCLASS_LVT 10
+#define UTF8PROC_BOUNDCLASS_REGIONAL_INDICATOR 11
+#define UTF8PROC_BOUNDCLASS_SPACINGMARK 12
+
DLLEXPORT extern const int8_t utf8proc_utf8class[256];
DLLEXPORT const char *utf8proc_version(void);
@@ -367,6 +382,12 @@ DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, int options
* crash!
*/
+DLLEXPORT bool utf8proc_grapheme_break(int32_t c1, int32_t c2);
+/*
+ * Given a pair of consecutive codepoints (c1,c2), return whether a grapheme break is
+ * permitted between them (as defined by the extended grapheme clusters in UAX#29).
+ */
+
DLLEXPORT ssize_t utf8proc_map(
const uint8_t *str, ssize_t strlen, uint8_t **dstptr, int options
);