summaryrefslogtreecommitdiff
path: root/utils/utils.c
diff options
context:
space:
mode:
authorJohn Tytgat <joty@netsurf-browser.org>2004-07-19 20:29:47 +0000
committerJohn Tytgat <joty@netsurf-browser.org>2004-07-19 20:29:47 +0000
commitf94da4813992a06f829080629b7b17b17cb741c5 (patch)
treed353299c2de18996481ab2fe2caa28c79f7d3330 /utils/utils.c
parenta3925b4ffc4f3fa3a053975fa11b9a29981d11af (diff)
downloadnetsurf-f94da4813992a06f829080629b7b17b17cb741c5.tar.gz
netsurf-f94da4813992a06f829080629b7b17b17cb741c5.tar.bz2
[project @ 2004-07-19 20:29:47 by joty]
Added cnv_local_enc_str() : to convert string in local machine encoding into UTF-8 NUL terminated string. svn path=/import/netsurf/; revision=1116
Diffstat (limited to 'utils/utils.c')
-rw-r--r--utils/utils.c66
1 files changed, 64 insertions, 2 deletions
diff --git a/utils/utils.c b/utils/utils.c
index 8d028d5e0..aa2494595 100644
--- a/utils/utils.c
+++ b/utils/utils.c
@@ -149,6 +149,66 @@ char *cnv_space2nbsp(const char *s)
}
/**
+ * Convert local encoding to NUL terminated UTF-8 string.
+ * Caller needs to free return value.
+ *
+ * \param s string in local machine encoding. NUL or length terminated (which comes first).
+ * \param length maximum number of bytes to consider at s.
+ * \return malloc()'ed NUL termined string in UTF-8 encoding.
+ *
+ * Based on RISCOS-LATIN1 code from libiconv.
+ * \todo: we should use libiconv to support more local encodings instead
+ * of only RISCOS-LATIN1.
+ */
+char *cnv_local_enc_str(const char *s, size_t length)
+{
+ size_t l_out, l_in;
+ const char *s_in;
+ char *d, *d_out;
+ static const unsigned int riscos1_2uni[32] = {
+ /* 0x80 */
+ 0x221a, 0x0174, 0x0175, 0x0083, 0x2573, 0x0176, 0x0177, 0x0087,
+ 0x21e6, 0x21e8, 0x21e9, 0x21e7, 0x2026, 0x2122, 0x2030, 0x2022,
+ /* 0x90 */
+ 0x2018, 0x2019, 0x2039, 0x203a, 0x201c, 0x201d, 0x201e, 0x2013,
+ 0x2014, 0x2212, 0x0152, 0x0153, 0x2020, 0x2021, 0xfb01, 0xfb02,
+ };
+
+ /* We're counting on the fact that all riscos1_2uni[] values are
+ * between 0x80 (incl) and 0x1000 (excl).
+ */
+ for (s_in = s, l_in = length, l_out = 1;
+ *s_in != '\0' && l_in != 0;
+ ++s_in, --l_in)
+ l_out += (*s_in >= 0x80 && *s_in < 0xA0) ? ((riscos1_2uni[*s_in - 0x80] < 0x800) ? 2 : 3) : 1;
+ if ((d_out = (char *)malloc(l_out)) == NULL)
+ return NULL;
+ for (s_in = s, l_in = length, d = d_out;
+ *s_in != '\0' && l_in != 0;
+ ++s_in, --l_in) {
+ unsigned int uc = (*s_in >= 0x80 && *s_in < 0xA0) ? riscos1_2uni[*s_in - 0x80] : *s_in;
+ const int cnt = (uc < 0x80) ? 1 : (uc < 0x800) ? 2 : 3;
+ switch (cnt) {
+ case 3:
+ d[2] = 0x80 | (uc & 0x3F);
+ uc = (uc >> 6) | 0x800;
+ /* fall through */
+ case 2:
+ d[1] = 0x80 | (uc & 0x3F);
+ uc = (uc >> 6) | 0xC0;
+ /* fall through */
+ case 1:
+ d[0] = uc;
+ }
+ d += cnt;
+ }
+ *d = '\0';
+
+ return d_out;
+}
+
+
+/**
* Converts NUL terminated UTF-8 string <s> to the machine local encoding.
* Caller needs to free return value.
*/
@@ -157,6 +217,7 @@ char *cnv_str_local_enc(const char *s)
return cnv_strn_local_enc(s, strlen(s), NULL);
}
+
/**
* Converts UTF-8 string <s> of <length> bytes to the machine local encoding.
* Caller needs to free return value.
@@ -165,7 +226,8 @@ return cnv_strn_local_enc(s, strlen(s), NULL);
* needs to be free'd by the caller. The array contains per character
* in the return string, a ptrdiff in the <s> UTF-8 encoded string.
*
- * \todo more work is needed here. Only Latin1 is done here.
+ * \todo: we should use libiconv to support more local encodings instead
+ * of only ISOLATIN1.
*/
char *cnv_strn_local_enc(const char *s, int length, const ptrdiff_t **back_mapPP)
{
@@ -232,10 +294,10 @@ bool is_dir(const char *path)
void regcomp_wrapper(regex_t *preg, const char *regex, int cflags)
{
- char errbuf[200];
int r;
r = regcomp(preg, regex, cflags);
if (r) {
+ char errbuf[200];
regerror(r, preg, errbuf, sizeof errbuf);
fprintf(stderr, "Failed to compile regexp '%s'\n", regex);
die(errbuf);