From 2777a04ed2ba4fd36138b991d66a32a283361f7e Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Thu, 1 May 2008 16:34:46 +0000 Subject: Import parser construction utility library svn path=/trunk/libparserutils/; revision=4111 --- test/filter.c | 357 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 357 insertions(+) create mode 100644 test/filter.c (limited to 'test/filter.c') diff --git a/test/filter.c b/test/filter.c new file mode 100644 index 0000000..ff4d1e7 --- /dev/null +++ b/test/filter.c @@ -0,0 +1,357 @@ +#include +#include +#include +#include + +#include + +#include "utils/utils.h" + +#include "input/filter.h" + +#include "testutils.h" + +static void *myrealloc(void *ptr, size_t len, void *pw) +{ + UNUSED(pw); + + return realloc(ptr, len); +} + +int main(int argc, char **argv) +{ + parserutils_filter_optparams params; + parserutils_filter *input; + uint8_t inbuf[64], outbuf[64]; + size_t inlen, outlen; + const uint8_t *in = inbuf; + uint8_t *out = outbuf; + + if (argc != 2) { + printf("Usage: %s \n", argv[0]); + return 1; + } + + /* Initialise library */ + assert(parserutils_initialise(argv[1], myrealloc, NULL) == + PARSERUTILS_OK); + + /* Create input filter */ + input = parserutils_filter_create("UTF-8", myrealloc, NULL); + assert(input); + + /* Convert filter to UTF-8 encoding */ + params.encoding.name = "UTF-8"; + assert(parserutils_filter_setopt(input, PARSERUTILS_FILTER_SET_ENCODING, + (parserutils_filter_optparams *) ¶ms) == + PARSERUTILS_OK); + + + /* Simple case - valid input & output buffer large enough */ + in = inbuf; + out = outbuf; + strcpy((char *) inbuf, "hell\xc2\xa0o!"); + inlen = strlen((const char *) inbuf); + outbuf[0] = '\0'; + outlen = 64; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + assert(parserutils_filter_reset(input) == PARSERUTILS_OK); + + assert(memcmp(outbuf, "hell\xc2\xa0o!", + SLEN("hell\xc2\xa0o!")) == 0); + + + /* Too small an output buffer; no encoding edge cases */ + in = inbuf; + out = outbuf; + strcpy((char *) inbuf, "hello!"); + inlen = strlen((const char *) inbuf); + outbuf[0] = '\0'; + outlen = 5; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_NOMEM); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + outlen = 64 - 5 + outlen; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + assert(parserutils_filter_reset(input) == PARSERUTILS_OK); + + assert(memcmp(outbuf, "hello!", + SLEN("hello!")) == 0); + + + /* Illegal input sequence; output buffer large enough */ + in = inbuf; + out = outbuf; + strcpy((char *) inbuf, "hell\x96o!"); + inlen = strlen((const char *) inbuf); + outbuf[0] = '\0'; + outlen = 64; + + /* Input does loose decoding, converting to U+FFFD if illegal + * input is encountered */ + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + assert(parserutils_filter_reset(input) == PARSERUTILS_OK); + + assert(memcmp(outbuf, "hell\xef\xbf\xbdo!", + SLEN("hell\xef\xbf\xbdo!")) == 0); + + + /* Input ends mid-sequence */ + in = inbuf; + out = outbuf; + strcpy((char *) inbuf, "hell\xc2\xa0o!"); + inlen = strlen((const char *) inbuf) - 3; + outbuf[0] = '\0'; + outlen = 64; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + inlen += 3; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + assert(parserutils_filter_reset(input) == PARSERUTILS_OK); + + assert(memcmp(outbuf, "hell\xc2\xa0o!", + SLEN("hell\xc2\xa0o!")) == 0); + + + /* Input ends mid-sequence, but second attempt has too small a + * buffer, but large enough to write out the incomplete character. */ + in = inbuf; + out = outbuf; + strcpy((char *) inbuf, "hell\xc2\xa0o!"); + inlen = strlen((const char *) inbuf) - 3; + outbuf[0] = '\0'; + outlen = 64; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + inlen += 3; + outlen = 3; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_NOMEM); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + outlen = 64 - 7; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + assert(parserutils_filter_reset(input) == PARSERUTILS_OK); + + assert(memcmp(outbuf, "hell\xc2\xa0o!", + SLEN("hell\xc2\xa0o!")) == 0); + + + /* Input ends mid-sequence, but second attempt has too small a + * buffer, not large enough to write out the incomplete character. */ + in = inbuf; + out = outbuf; + strcpy((char *) inbuf, "hell\xc2\xa0o!"); + inlen = strlen((const char *) inbuf) - 3; + outbuf[0] = '\0'; + outlen = 64; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + inlen += 3; + outlen = 1; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_NOMEM); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + outlen = 60; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + assert(parserutils_filter_reset(input) == PARSERUTILS_OK); + + assert(memcmp(outbuf, "hell\xc2\xa0o!", + SLEN("hell\xc2\xa0o!")) == 0); + + + /* Input ends mid-sequence, but second attempt contains + * invalid character */ + in = inbuf; + out = outbuf; + strcpy((char *) inbuf, "hell\xc2\xc2o!"); + inlen = strlen((const char *) inbuf) - 3; + outbuf[0] = '\0'; + outlen = 64; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + inlen += 3; + + /* Input does loose decoding, converting to U+FFFD if illegal + * input is encountered */ + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + assert(parserutils_filter_reset(input) == PARSERUTILS_OK); + + assert(memcmp(outbuf, "hell\xef\xbf\xbd\xef\xbf\xbdo!", + SLEN("hell\xef\xbf\xbd\xef\xbf\xbdo!")) == 0); + + + /* Input ends mid-sequence, but second attempt contains another + * incomplete character */ + in = inbuf; + out = outbuf; + strcpy((char *) inbuf, "hell\xc2\xa0\xc2\xa1o!"); + inlen = strlen((const char *) inbuf) - 5; + outbuf[0] = '\0'; + outlen = 64; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + inlen += 2; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + inlen += 3; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + assert(parserutils_filter_reset(input) == PARSERUTILS_OK); + + assert(memcmp(outbuf, "hell\xc2\xa0\xc2\xa1o!", + SLEN("hell\xc2\xa0\xc2\xa1o!")) == 0); + + + /* Input ends mid-sequence, but second attempt contains insufficient + * data to complete the incomplete character */ + in = inbuf; + out = outbuf; + strcpy((char *) inbuf, "hell\xe2\x80\xa2o!"); + inlen = strlen((const char *) inbuf) - 4; + outbuf[0] = '\0'; + outlen = 64; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + inlen += 1; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + inlen += 3; + + assert(parserutils_filter_process_chunk(input, &in, &inlen, + &out, &outlen) == PARSERUTILS_OK); + + printf("'%.*s' %d '%.*s' %d\n", (int) inlen, in, (int) inlen, + (int) (out - ((uint8_t *) outbuf)), + outbuf, (int) outlen); + + assert(parserutils_filter_reset(input) == PARSERUTILS_OK); + + assert(memcmp(outbuf, "hell\xe2\x80\xa2o!", + SLEN("hell\xe2\x80\xa2o!")) == 0); + + + /* Clean up */ + parserutils_filter_destroy(input); + + assert(parserutils_finalise(myrealloc, NULL) == PARSERUTILS_OK); + + printf("PASS\n"); + + return 0; +} -- cgit v1.2.3