summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorVincent Sanders <vince@kyllikki.org>2022-11-26 14:44:08 +0000
committerVincent Sanders <vince@kyllikki.org>2022-11-26 15:21:16 +0000
commit1d82ef411a65095f218c15441fb804715e59f0eb (patch)
tree5d456b371af051f11c742b24a1138496b6baae38
parent6780766fb7c27145415baa2c40251e386b3e894a (diff)
downloadnetsurf-1d82ef411a65095f218c15441fb804715e59f0eb.tar.gz
netsurf-1d82ef411a65095f218c15441fb804715e59f0eb.tar.bz2
consolodate duplicated conversion descriptor cache code
-rw-r--r--utils/utf8.c119
1 files changed, 61 insertions, 58 deletions
diff --git a/utils/utf8.c b/utils/utf8.c
index 84cacd78c..7aa7d935b 100644
--- a/utils/utf8.c
+++ b/utils/utf8.c
@@ -44,7 +44,7 @@ uint32_t utf8_to_ucs4(const char *s_in, size_t l)
parserutils_error perror;
perror = parserutils_charset_utf8_to_ucs4((const uint8_t *) s_in, l,
- &ucs4, &len);
+ &ucs4, &len);
if (perror != PARSERUTILS_OK)
ucs4 = 0xfffd;
@@ -106,7 +106,7 @@ size_t utf8_char_byte_length(const char *s)
parserutils_error perror;
perror = parserutils_charset_utf8_char_byte_length((const uint8_t *) s,
- &len);
+ &len);
assert(perror == PARSERUTILS_OK);
return len;
@@ -131,7 +131,7 @@ size_t utf8_next(const char *s, size_t l, size_t o)
parserutils_error perror;
perror = parserutils_charset_utf8_next((const uint8_t *) s, l, o,
- &next);
+ &next);
assert(perror == PARSERUTILS_OK);
return next;
@@ -151,6 +151,47 @@ static inline void utf8_clear_cd_cache(void)
last_cd.cd = 0;
}
+/**
+ * obtain a cached conversion descriptor
+ *
+ * either return the cached conversion descriptor or create one if required
+ */
+static nserror
+get_cached_cd(const char *enc_from, const char *enc_to, iconv_t *cd_out)
+{
+ iconv_t cd;
+ /* we cache the last used conversion descriptor,
+ * so check if we're trying to use it here */
+ if (strncasecmp(last_cd.from, enc_from, sizeof(last_cd.from)) == 0 &&
+ strncasecmp(last_cd.to, enc_to, sizeof(last_cd.to)) == 0 &&
+ last_cd.cd != 0) {
+ *cd_out = last_cd.cd;
+ return NSERROR_OK;
+ }
+
+ /* no match, so create a new cd */
+ cd = iconv_open(enc_to, enc_from);
+ if (cd == (iconv_t) -1) {
+ if (errno == EINVAL) {
+ return NSERROR_BAD_ENCODING;
+ }
+ /* default to no memory */
+ return NSERROR_NOMEM;
+ }
+
+ /* close the last cd - we don't care if this fails */
+ if (last_cd.cd) {
+ iconv_close(last_cd.cd);
+ }
+
+ /* and safely copy the to/from/cd data into last_cd */
+ snprintf(last_cd.from, sizeof(last_cd.from), enc_from);
+ snprintf(last_cd.to, sizeof(last_cd.to), "%s", enc_to);
+ *cd_out = last_cd.cd = cd;
+
+ return NSERROR_OK;
+}
+
/* exported interface documented in utils/utf8.h */
nserror utf8_finalise(void)
{
@@ -187,6 +228,7 @@ utf8_convert(const char *string,
iconv_t cd;
char *temp, *out, *in, *result;
size_t result_len;
+ nserror res;
assert(string && from && to && result_out);
@@ -215,29 +257,9 @@ utf8_convert(const char *string,
in = (char *)string;
- /* we cache the last used conversion descriptor,
- * so check if we're trying to use it here */
- if (strncasecmp(last_cd.from, from, sizeof(last_cd.from)) == 0 &&
- strncasecmp(last_cd.to, to, sizeof(last_cd.to)) == 0) {
- cd = last_cd.cd;
- } else {
- /* no match, so create a new cd */
- cd = iconv_open(to, from);
- if (cd == (iconv_t)-1) {
- if (errno == EINVAL)
- return NSERROR_BAD_ENCODING;
- /* default to no memory */
- return NSERROR_NOMEM;
- }
-
- /* close the last cd - we don't care if this fails */
- if (last_cd.cd)
- iconv_close(last_cd.cd);
-
- /* and copy the to/from/cd data into last_cd */
- snprintf(last_cd.from, sizeof(last_cd.from), "%s", from);
- snprintf(last_cd.to, sizeof(last_cd.to), "%s", to);
- last_cd.cd = cd;
+ res = get_cached_cd(from, to, &cd);
+ if (res != NSERROR_OK) {
+ return res;
}
/* Worst case = ASCII -> UCS4, so allocate an output buffer
@@ -289,14 +311,14 @@ utf8_convert(const char *string,
/* exported interface documented in utils/utf8.h */
nserror utf8_to_enc(const char *string, const char *encname,
- size_t len, char **result)
+ size_t len, char **result)
{
return utf8_convert(string, len, "UTF-8", encname, result, NULL);
}
/* exported interface documented in utils/utf8.h */
nserror utf8_from_enc(const char *string, const char *encname,
- size_t len, char **result, size_t *result_len)
+ size_t len, char **result, size_t *result_len)
{
return utf8_convert(string, len, encname, "UTF-8", result, result_len);
}
@@ -327,7 +349,7 @@ utf8_convert_html_chunk(iconv_t cd,
esclen = snprintf(escape, sizeof(escape), "&#x%06x;", ucs4);
pescape = escape;
ret = iconv(cd, (void *) &pescape, &esclen,
- (void *) out, outlen);
+ (void *) out, outlen);
if (ret == (size_t) -1)
return NSERROR_NOMEM;
@@ -339,6 +361,8 @@ utf8_convert_html_chunk(iconv_t cd,
return NSERROR_OK;
}
+
+
/* exported interface documented in utils/utf8.h */
nserror
utf8_to_html(const char *string, const char *encname, size_t len, char **result)
@@ -349,35 +373,14 @@ utf8_to_html(const char *string, const char *encname, size_t len, char **result)
size_t off, prev_off, inlen, outlen, origoutlen, esclen;
nserror ret;
char *pescape, escape[11];
+ nserror res;
if (len == 0)
len = strlen(string);
- /* we cache the last used conversion descriptor,
- * so check if we're trying to use it here */
- if (strncasecmp(last_cd.from, "UTF-8", sizeof(last_cd.from)) == 0 &&
- strncasecmp(last_cd.to, encname,
- sizeof(last_cd.to)) == 0 &&
- last_cd.cd != 0) {
- cd = last_cd.cd;
- } else {
- /* no match, so create a new cd */
- cd = iconv_open(encname, "UTF-8");
- if (cd == (iconv_t) -1) {
- if (errno == EINVAL)
- return NSERROR_BAD_ENCODING;
- /* default to no memory */
- return NSERROR_NOMEM;
- }
-
- /* close the last cd - we don't care if this fails */
- if (last_cd.cd)
- iconv_close(last_cd.cd);
-
- /* and safely copy the to/from/cd data into last_cd */
- snprintf(last_cd.from, sizeof(last_cd.from), "UTF-8");
- snprintf(last_cd.to, sizeof(last_cd.to), "%s", encname);
- last_cd.cd = cd;
+ res = get_cached_cd("UTF-8", encname, &cd);
+ if (res != NSERROR_OK) {
+ return res;
}
/* Worst case is ASCII -> UCS4, with all characters escaped:
@@ -397,13 +400,13 @@ utf8_to_html(const char *string, const char *encname, size_t len, char **result)
while (off < len) {
/* Must escape '&', '<', and '>' */
if (string[off] == '&' || string[off] == '<' ||
- string[off] == '>') {
+ string[off] == '>') {
if (off - prev_off > 0) {
/* Emit chunk */
in = string + prev_off;
inlen = off - prev_off;
ret = utf8_convert_html_chunk(cd, in, inlen,
- &out, &outlen);
+ &out, &outlen);
if (ret != NSERROR_OK) {
free(origout);
iconv_close(cd);
@@ -414,10 +417,10 @@ utf8_to_html(const char *string, const char *encname, size_t len, char **result)
/* Emit mandatory escape */
esclen = snprintf(escape, sizeof(escape),
- "&#x%06x;", string[off]);
+ "&#x%06x;", string[off]);
pescape = escape;
ret = utf8_convert_html_chunk(cd, pescape, esclen,
- &out, &outlen);
+ &out, &outlen);
if (ret != NSERROR_OK) {
free(origout);
iconv_close(cd);