From 805d6586d64ba4e3f748cca3c319559aad70d15f Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Mon, 13 Sep 2010 22:32:52 +0000 Subject: Replace all instances of NUL with U+FFFD svn path=/trunk/netsurf/; revision=10771 --- render/textplain.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'render/textplain.c') diff --git a/render/textplain.c b/render/textplain.c index 13ccd2660..920368634 100644 --- a/render/textplain.c +++ b/render/textplain.c @@ -163,13 +163,23 @@ no_memory: bool textplain_drain_input(struct content *c, parserutils_inputstream *stream, parserutils_error terminator) { + static const uint8_t *u_fffd = (const uint8_t *) "\xef\xbf\xfd"; const uint8_t *ch; - size_t chlen; + size_t chlen, outlen; /** \todo Optimise: stop invoking memcpy for each character */ while (parserutils_inputstream_peek(stream, 0, &ch, &chlen) != terminator) { - if (c->data.textplain.utf8_data_size + chlen >= + + /* Replace all instances of NUL with U+FFFD */ + if (chlen == 1 && *ch == 0) { + ch = u_fffd; + outlen = 3; + } else { + outlen = chlen; + } + + if (c->data.textplain.utf8_data_size + outlen >= c->data.textplain.utf8_data_allocated) { size_t allocated = CHUNK + c->data.textplain.utf8_data_allocated; @@ -184,8 +194,8 @@ bool textplain_drain_input(struct content *c, parserutils_inputstream *stream, } memcpy(c->data.textplain.utf8_data + - c->data.textplain.utf8_data_size, ch, chlen); - c->data.textplain.utf8_data_size += chlen; + c->data.textplain.utf8_data_size, ch, outlen); + c->data.textplain.utf8_data_size += outlen; parserutils_inputstream_advance(stream, chlen); } -- cgit v1.2.3