From 2ef742b2bbe323e50001bece2116734ec2b01ee0 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Mon, 7 Apr 2008 01:56:17 +0000 Subject: Fix recalculation of used document buffer length after resizing when the last attempt to write into the buffer failed mid-way through a multibyte sequence. Add regression test for this Include regression tests in the testsuite index. svn path=/trunk/hubbub/; revision=4075 --- src/input/utf16_stream.c | 17 +++----- src/input/utf8_stream.c | 17 +++----- test/INDEX | 5 +++ test/Makefile | 2 +- test/regression/stream-nomem.c | 88 ++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 106 insertions(+), 23 deletions(-) create mode 100644 test/regression/stream-nomem.c diff --git a/src/input/utf16_stream.c b/src/input/utf16_stream.c index 1d19a7e..e69f124 100644 --- a/src/input/utf16_stream.c +++ b/src/input/utf16_stream.c @@ -195,20 +195,18 @@ hubbub_error hubbub_utf16stream_append(hubbub_inputstream *stream, moved = (temp != stream->buffer); stream->buffer = temp; - stream->buffer_len += stream->buffer_alloc - - stream->buffer_len - space; + stream->buffer_len = stream->buffer_alloc - space; stream->buffer_alloc += BUFFER_CHUNK; base = stream->buffer + stream->buffer_len; - space = stream->buffer_alloc - stream->buffer_len; + space += BUFFER_CHUNK; if (moved) hubbub_inputstream_buffer_moved(stream); } /* And fix up buffer length */ - stream->buffer_len += stream->buffer_alloc - - stream->buffer_len - space; + stream->buffer_len = stream->buffer_alloc - space; stream->had_eof = true; } else { @@ -246,21 +244,18 @@ hubbub_error hubbub_utf16stream_append(hubbub_inputstream *stream, moved = (temp != stream->buffer); stream->buffer = temp; - stream->buffer_len += stream->buffer_alloc - - stream->buffer_len - space; + stream->buffer_len = stream->buffer_alloc - space; stream->buffer_alloc += BUFFER_CHUNK; base = stream->buffer + stream->buffer_len; - space = stream->buffer_alloc - stream->buffer_len - - space; + space += BUFFER_CHUNK; if (moved) hubbub_inputstream_buffer_moved(stream); } /* And fix up buffer length */ - stream->buffer_len += stream->buffer_alloc - - stream->buffer_len - space; + stream->buffer_len = stream->buffer_alloc - space; } return HUBBUB_OK; diff --git a/src/input/utf8_stream.c b/src/input/utf8_stream.c index 5d08993..3de142b 100644 --- a/src/input/utf8_stream.c +++ b/src/input/utf8_stream.c @@ -195,20 +195,18 @@ hubbub_error hubbub_utf8stream_append(hubbub_inputstream *stream, moved = (temp != stream->buffer); stream->buffer = temp; - stream->buffer_len += stream->buffer_alloc - - stream->buffer_len - space; + stream->buffer_len = stream->buffer_alloc - space; stream->buffer_alloc += BUFFER_CHUNK; base = stream->buffer + stream->buffer_len; - space = stream->buffer_alloc - stream->buffer_len; + space += BUFFER_CHUNK; if (moved) hubbub_inputstream_buffer_moved(stream); } /* And fix up buffer length */ - stream->buffer_len += stream->buffer_alloc - - stream->buffer_len - space; + stream->buffer_len = stream->buffer_alloc - space; stream->had_eof = true; } else { @@ -246,21 +244,18 @@ hubbub_error hubbub_utf8stream_append(hubbub_inputstream *stream, moved = (temp != stream->buffer); stream->buffer = temp; - stream->buffer_len += stream->buffer_alloc - - stream->buffer_len - space; + stream->buffer_len = stream->buffer_alloc - space; stream->buffer_alloc += BUFFER_CHUNK; base = stream->buffer + stream->buffer_len; - space = stream->buffer_alloc - stream->buffer_len - - space; + space += BUFFER_CHUNK; if (moved) hubbub_inputstream_buffer_moved(stream); } /* And fix up buffer length */ - stream->buffer_len += stream->buffer_alloc - - stream->buffer_len - space; + stream->buffer_len = stream->buffer_alloc - space; } return HUBBUB_OK; diff --git a/test/INDEX b/test/INDEX index fc45511..73b8039 100644 --- a/test/INDEX +++ b/test/INDEX @@ -15,3 +15,8 @@ parser-utf16 Public parser API (utf-16 internally) html tokeniser HTML tokeniser html tokeniser2 HTML tokeniser (again) tokeniser2 tree Treebuilding API html + +# Regression tests +regression/cscodec-segv Segfault in charset codecs +regression/filter-segv Segfault in input filtering +regression/stream-nomem Inputstream buffer expansion diff --git a/test/Makefile b/test/Makefile index 6df42d7..675b043 100644 --- a/test/Makefile +++ b/test/Makefile @@ -34,7 +34,7 @@ DEBUG = OBJS = aliases cscodec csdetect dict entities filter hubbub \ inputstream parser parser-utf16 tokeniser tokeniser2 \ tree -OBJS += regression/cscodec-segv regression/filter-segv +OBJS += regression/cscodec-segv regression/filter-segv regression/stream-nomem .PHONY: clean debug export release setup test diff --git a/test/regression/stream-nomem.c b/test/regression/stream-nomem.c new file mode 100644 index 0000000..7233ac7 --- /dev/null +++ b/test/regression/stream-nomem.c @@ -0,0 +1,88 @@ +#include +#include + +#include + +#include "utils/utils.h" + +#include "input/inputstream.h" + +#include "testutils.h" + +static void *myrealloc(void *ptr, size_t len, void *pw) +{ + UNUSED(pw); + + return realloc(ptr, len); +} + +int main(int argc, char **argv) +{ + hubbub_inputstream *stream; + + /* This is specially calculated so that the inputstream is forced to + * reallocate (it assumes that the inputstream's buffer chunk size + * is 4k) */ +#define BUFFER_SIZE (4096 + 4) + uint8_t input_buffer[BUFFER_SIZE]; + uint8_t *buffer; + size_t buflen; + uint32_t c; + + if (argc != 2) { + printf("Usage: %s \n", argv[0]); + return 1; + } + + /* Populate the buffer with something sane */ + memset(input_buffer, 'a', BUFFER_SIZE); + /* Now, set up our test data */ + input_buffer[BUFFER_SIZE - 1] = '5'; + input_buffer[BUFFER_SIZE - 2] = '4'; + input_buffer[BUFFER_SIZE - 3] = '\xbd'; + input_buffer[BUFFER_SIZE - 4] = '\xbf'; + /* This byte will occupy the 4095th byte in the buffer and + * thus cause the entirety of U+FFFD to be buffered until after + * the buffer has been enlarged */ + input_buffer[BUFFER_SIZE - 5] = '\xef'; + input_buffer[BUFFER_SIZE - 6] = '3'; + input_buffer[BUFFER_SIZE - 7] = '2'; + input_buffer[BUFFER_SIZE - 8] = '1'; + + assert(hubbub_initialise(argv[1], myrealloc, NULL) == HUBBUB_OK); + + stream = hubbub_inputstream_create("UTF-8", "UTF-8", myrealloc, NULL); + assert(stream != NULL); + + assert(hubbub_inputstream_append(stream, input_buffer, BUFFER_SIZE) == + HUBBUB_OK); + + assert(hubbub_inputstream_append(stream, NULL, 0) == HUBBUB_OK); + + while ((c = hubbub_inputstream_peek(stream)) != HUBBUB_INPUTSTREAM_EOF) + hubbub_inputstream_advance(stream); + + assert(hubbub_inputstream_claim_buffer(stream, &buffer, &buflen) == + HUBBUB_OK); + + assert(buflen == BUFFER_SIZE); + + printf("Buffer: '%.*s'\n", 8, buffer + (BUFFER_SIZE - 8)); + + assert( buffer[BUFFER_SIZE - 6] == '3' && + buffer[BUFFER_SIZE - 5] == (uint8_t) '\xef' && + buffer[BUFFER_SIZE - 4] == (uint8_t) '\xbf' && + buffer[BUFFER_SIZE - 3] == (uint8_t) '\xbd' && + buffer[BUFFER_SIZE - 2] == '4'); + + free(buffer); + + hubbub_inputstream_destroy(stream); + + assert(hubbub_finalise(myrealloc, NULL) == HUBBUB_OK); + + printf("PASS\n"); + + return 0; +} + -- cgit v1.2.3