From 539d2cc2024f494b1e3292d4730bdc96390e1361 Mon Sep 17 00:00:00 2001
From: "Steven G. Johnson" <stevenj@alum.mit.edu>
Date: Sun, 7 Dec 2014 22:25:31 -0500
Subject: grapheme test for UAX#29

---
 tests.h | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 tests.h

(limited to 'tests.h')

diff --git a/tests.h b/tests.h
new file mode 100644
index 0000000..e3c1cf5
--- /dev/null
+++ b/tests.h
@@ -0,0 +1,53 @@
+/* Common functions and includes for our test programs. */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <ctype.h>
+#include <string.h>
+#include <stdarg.h>
+
+#include "mojibake.h"
+
+size_t lineno = 0;
+
+void check(int cond, const char *format, ...)
+{
+     if (!cond) {
+          va_list args;
+          fprintf(stderr, "line %zd: ", lineno);
+          va_start(args, format);
+          vfprintf(stderr, format, args);
+          va_end(args);
+          fprintf(stderr, "\n");
+          exit(1);
+     }
+}
+
+size_t skipspaces(const char *buf, size_t i)
+{
+    while (isspace(buf[i])) ++i;
+    return i;
+}
+
+/* if buf points to a sequence of codepoints encoded as hexadecimal strings,
+   separated by whitespace, and terminated by any character not in
+   [0-9a-fA-F] or whitespace, then stores the corresponding utf8 string
+   in dest, returning the number of bytes read from buf */
+size_t encode(char *dest, const char *buf)
+{
+     size_t i = 0, j, d = 0;
+     do {
+          int c;
+          i = skipspaces(buf, i);
+          for (j=i; buf[j] && strchr("0123456789abcdef", tolower(buf[j])); ++j)
+               ; /* find end of hex input */
+          if (j == i) { /* no codepoint found */
+               dest[d] = 0; /* NUL-terminate destination string */
+               return i + 1;
+          }
+          check(sscanf(buf + i, "%x", &c) == 1, "invalid hex input %s", buf+i);
+          i = j; /* skip to char after hex input */
+          d += utf8proc_encode_char(c, (uint8_t *) (dest + d));
+     } while (1);
+}
+
-- 
cgit v1.2.3