From a7c8ab91210e368b8b2f23e1d79d42ad1b364560 Mon Sep 17 00:00:00 2001 From: John Mark Bell Date: Thu, 1 May 2008 20:40:34 +0000 Subject: Testdriver for automated lexer tests. This needs a little more work to verify that the data contained within emitted tokens is as expected. Very trivial testdata to exercise this. svn path=/trunk/libcss/; revision=4115 --- test/INDEX | 1 + test/Makefile | 2 +- test/data/lex/INDEX | 5 + test/data/lex/tests1.dat | 8 ++ test/lex-auto.c | 260 +++++++++++++++++++++++++++++++++++++++++++++++ test/testutils.h | 27 +++++ 6 files changed, 302 insertions(+), 1 deletion(-) create mode 100644 test/data/lex/INDEX create mode 100644 test/data/lex/tests1.dat create mode 100644 test/lex-auto.c diff --git a/test/INDEX b/test/INDEX index e1896ef..b35be5c 100644 --- a/test/INDEX +++ b/test/INDEX @@ -5,6 +5,7 @@ libcss Library initialisation/finalisation csdetect Character set detection csdetect lex Lexing css +lex-auto Automated lexer tests lex # Regression tests diff --git a/test/Makefile b/test/Makefile index e904397..e2a97c6 100644 --- a/test/Makefile +++ b/test/Makefile @@ -35,7 +35,7 @@ d := $(DIR) CFLAGS := $(CFLAGS) -I$(TOP)/src/ -I$(d) # Tests -TESTS_$(d) := csdetect lex libcss +TESTS_$(d) := csdetect lex lex-auto libcss TESTS_$(d) := $(TESTS_$(d)) # Items for top-level makefile to use diff --git a/test/data/lex/INDEX b/test/data/lex/INDEX new file mode 100644 index 0000000..f6008ae --- /dev/null +++ b/test/data/lex/INDEX @@ -0,0 +1,5 @@ +# Index file for automated lexer tests +# +# Test Description + +tests1.dat Basic tests diff --git a/test/data/lex/tests1.dat b/test/data/lex/tests1.dat new file mode 100644 index 0000000..3b6b149 --- /dev/null +++ b/test/data/lex/tests1.dat @@ -0,0 +1,8 @@ +#data +body +#expected +IDENT:body +S +EOF +#reset + diff --git a/test/lex-auto.c b/test/lex-auto.c new file mode 100644 index 0000000..196836b --- /dev/null +++ b/test/lex-auto.c @@ -0,0 +1,260 @@ +#include +#include +#include +#include +#include + +#include + +#include + +#include "charset/detect.h" +#include "lex/lex.h" +#include "utils/utils.h" + +#include "testutils.h" + +typedef struct exp_entry { + css_token_type type; +} exp_entry; + +typedef struct line_ctx { + size_t buflen; + size_t bufused; + uint8_t *buf; + + size_t explen; + size_t expused; + exp_entry *exp; + + bool indata; + bool inexp; +} line_ctx; + +static bool handle_line(const char *data, size_t datalen, void *pw); +static void parse_expected(line_ctx *ctx, const char *data, size_t len); +static const char *string_from_type(css_token_type type); +static css_token_type string_to_type(const char *data, size_t len); +static void run_test(const uint8_t *data, size_t len, + exp_entry *exp, size_t explen); + +static void *myrealloc(void *ptr, size_t len, void *pw) +{ + UNUSED(pw); + + return realloc(ptr, len); +} + +int main(int argc, char **argv) +{ + line_ctx ctx; + + if (argc != 3) { + printf("Usage: %s \n", argv[0]); + return 1; + } + + assert(css_initialise(argv[1], myrealloc, NULL) == CSS_OK); + + ctx.buflen = parse_filesize(argv[2]); + if (ctx.buflen == 0) + return 1; + + ctx.buf = malloc(ctx.buflen); + if (ctx.buf == NULL) { + printf("Failed allocating %u bytes\n", + (unsigned int) ctx.buflen); + return 1; + } + + ctx.buf[0] = '\0'; + ctx.bufused = 0; + ctx.explen = 0; + ctx.expused = 0; + ctx.exp = NULL; + ctx.indata = false; + ctx.inexp = false; + + assert(parse_testfile(argv[2], handle_line, &ctx) == true); + + /* and run final test */ + if (ctx.bufused > 0) + run_test(ctx.buf, ctx.bufused, ctx.exp, ctx.expused); + + free(ctx.buf); + + assert(css_finalise(myrealloc, NULL) == CSS_OK); + + printf("PASS\n"); + + return 0; +} + +bool handle_line(const char *data, size_t datalen, void *pw) +{ + line_ctx *ctx = (line_ctx *) pw; + + if (data[0] == '#') { + if (ctx->inexp) { + /* This marks end of testcase, so run it */ + + run_test(ctx->buf, ctx->bufused, + ctx->exp, ctx->expused); + + ctx->buf[0] = '\0'; + ctx->bufused = 0; + + ctx->expused = 0; + } + + ctx->indata = (strncasecmp(data+1, "data", 4) == 0); + ctx->inexp = (strncasecmp(data+1, "expected", 8) == 0); + } else { + if (ctx->indata) { + memcpy(ctx->buf + ctx->bufused, data, datalen); + ctx->bufused += datalen; + } + if (ctx->inexp) { + if (data[datalen - 1] == '\n') + datalen -= 1; + + parse_expected(ctx, data, datalen); + } + } + + return true; +} + +void parse_expected(line_ctx *ctx, const char *data, size_t len) +{ + const char *colon = parse_strnchr(data, len, ':'); + if (colon == NULL) + colon = data + len; + + css_token_type type = string_to_type(data, colon - data); + + /** \todo expected token data */ + + /* Append to list of expected tokens */ + if (ctx->expused == ctx->explen) { + exp_entry *temp = realloc(ctx->exp, + ctx->explen * 2 * sizeof(exp_entry)); + if (temp == NULL) + assert(0 && "No memory for expected tokens"); + + ctx->exp = temp; + ctx->explen *= 2; + } + + ctx->exp[ctx->expused].type = type; + ctx->expused++; +} + +const char *string_from_type(css_token_type type) +{ + const char *names[] = + { + "IDENT", "ATKEYWORD", "STRING", "HASH", "NUMBER", + "PERCENTAGE", "DIMENSION", "URI", "UNICODE-RANGE", "CDO", + "CDC", "S", "COMMENT", "FUNCTION", "INCLUDES", + "DASHMATCH", "PREFIXMATCH", "SUFFIXMATCH", "SUBSTRINGMATCH", + "CHAR", "EOF" + }; + + return names[type]; +} + +css_token_type string_to_type(const char *data, size_t len) +{ + if (len == 5 && strncasecmp(data, "IDENT", len) == 0) + return CSS_TOKEN_IDENT; + else if (len == 9 && strncasecmp(data, "ATKEYWORD", len) == 0) + return CSS_TOKEN_ATKEYWORD; + else if (len == 6 && strncasecmp(data, "STRING", len) == 0) + return CSS_TOKEN_STRING; + else if (len == 4 && strncasecmp(data, "HASH", len) == 0) + return CSS_TOKEN_HASH; + else if (len == 6 && strncasecmp(data, "NUMBER", len) == 0) + return CSS_TOKEN_NUMBER; + else if (len == 10 && strncasecmp(data, "PERCENTAGE", len) == 0) + return CSS_TOKEN_PERCENTAGE; + else if (len == 9 && strncasecmp(data, "DIMENSION", len) == 0) + return CSS_TOKEN_DIMENSION; + else if (len == 3 && strncasecmp(data, "URI", len) == 0) + return CSS_TOKEN_URI; + else if (len == 13 && strncasecmp(data, "UNICODE-RANGE", len) == 0) + return CSS_TOKEN_UNICODE_RANGE; + else if (len == 3 && strncasecmp(data, "CDO", len) == 0) + return CSS_TOKEN_CDO; + else if (len == 3 && strncasecmp(data, "CDC", len) == 0) + return CSS_TOKEN_CDC; + else if (len == 1 && strncasecmp(data, "S", len) == 0) + return CSS_TOKEN_S; + else if (len == 7 && strncasecmp(data, "COMMENT", len) == 0) + return CSS_TOKEN_COMMENT; + else if (len == 8 && strncasecmp(data, "FUNCTION", len) == 0) + return CSS_TOKEN_FUNCTION; + else if (len == 8 && strncasecmp(data, "INCLUDES", len) == 0) + return CSS_TOKEN_INCLUDES; + else if (len == 9 && strncasecmp(data, "DASHMATCH", len) == 0) + return CSS_TOKEN_DASHMATCH; + else if (len == 11 && strncasecmp(data, "PREFIXMATCH", len) == 0) + return CSS_TOKEN_PREFIXMATCH; + else if (len == 11 && strncasecmp(data, "SUFFIXMATCH", len) == 0) + return CSS_TOKEN_SUFFIXMATCH; + else if (len == 14 && strncasecmp(data, "SUBSTRINGMATCH", len) == 0) + return CSS_TOKEN_SUBSTRINGMATCH; + else if (len == 4 && strncasecmp(data, "CHAR", len) == 0) + return CSS_TOKEN_CHAR; + else + return CSS_TOKEN_EOF; +} + +void run_test(const uint8_t *data, size_t len, exp_entry *exp, size_t explen) +{ + parserutils_inputstream *input; + css_lexer *lexer; + css_error error; + const css_token *tok; + size_t e; + static int testnum; + + input = parserutils_inputstream_create("UTF-8", CSS_CHARSET_DICTATED, + css_charset_extract, myrealloc, NULL); + assert(input != NULL); + + lexer = css_lexer_create(input, myrealloc, NULL); + assert(lexer != NULL); + + assert(parserutils_inputstream_append(input, data, len) == + PARSERUTILS_OK); + + assert(parserutils_inputstream_append(input, NULL, 0) == + PARSERUTILS_OK); + + e = 0; + testnum++; + + while ((error = css_lexer_get_token(lexer, &tok)) == CSS_OK) { + if (tok->type != exp[e].type) { + printf("%d: Got token %s, Expected %s [%d, %d]\n", + testnum, string_from_type(tok->type), + string_from_type(exp[e].type), + tok->line, tok->col); + assert(0); + } + + e++; + + if (tok->type == CSS_TOKEN_EOF) + break; + } + + assert(e == explen); + + css_lexer_destroy(lexer); + + parserutils_inputstream_destroy(input); + + printf("Test %d: PASS\n", testnum); +} diff --git a/test/testutils.h b/test/testutils.h index c91c5b8..584915b 100644 --- a/test/testutils.h +++ b/test/testutils.h @@ -33,6 +33,7 @@ void __assert2(const char *expr, const char *function, typedef bool (*line_func)(const char *data, size_t datalen, void *pw); static size_t parse_strlen(const char *str, size_t limit); +char *parse_strnchr(const char *str, size_t len, int chr); bool parse_testfile(const char *filename, line_func callback, void *pw); size_t parse_filesize(const char *filename); @@ -94,6 +95,32 @@ size_t parse_strlen(const char *str, size_t limit) return len; } +/** + * Length-limited strchr + * + * \param str String to search in + * \param len Length of string + * \param chr Character to search for + * \return Pointer to character in string, or NULL if not found + */ +char *parse_strnchr(const char *str, size_t len, int chr) +{ + size_t i; + + if (str == NULL) + return NULL; + + for (i = 0; i < len; i++) { + if (str[i] == chr) + break; + } + + if (i == len) + return NULL; + + return (char *) str + i; +} + /** * Read the size of a file * -- cgit v1.2.3