summaryrefslogtreecommitdiff
path: root/test/csdetect.c
blob: 3f24792cc5bd45ce01f25b6d40bccfeef5e3d413 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#include <inttypes.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include <parserutils/charset/mibenum.h>

#include <hubbub/hubbub.h>

#include "charset/detect.h"
#include "utils/utils.h"

#include "testutils.h"

typedef struct line_ctx {
	size_t buflen;
	size_t bufused;
	uint8_t *buf;
	char enc[64];
	bool indata;
	bool inenc;
} line_ctx;

static bool handle_line(const char *data, size_t datalen, void *pw);
static void run_test(const uint8_t *data, size_t len, char *expected);

int main(int argc, char **argv)
{
	line_ctx ctx;

	if (argc != 2) {
		printf("Usage: %s <filename>\n", argv[0]);
		return 1;
	}

	ctx.buflen = parse_filesize(argv[1]);
	if (ctx.buflen == 0)
		return 1;

	ctx.buf = malloc(ctx.buflen);
	if (ctx.buf == NULL) {
		printf("Failed allocating %u bytes\n",
				(unsigned int) ctx.buflen);
		return 1;
	}

	ctx.buf[0] = '\0';
	ctx.enc[0] = '\0';
	ctx.bufused = 0;
	ctx.indata = false;
	ctx.inenc = false;

	assert(parse_testfile(argv[1], handle_line, &ctx) == true);

	/* and run final test */
	if (ctx.bufused > 0 && ctx.buf[ctx.bufused - 1] == '\n')
		ctx.bufused -= 1;

	run_test(ctx.buf, ctx.bufused, ctx.enc);

	free(ctx.buf);

	printf("PASS\n");

	return 0;
}

bool handle_line(const char *data, size_t datalen, void *pw)
{
	line_ctx *ctx = (line_ctx *) pw;

	if (data[0] == '#') {
		if (ctx->inenc) {
			/* This marks end of testcase, so run it */

			if (ctx->buf[ctx->bufused - 1] == '\n')
				ctx->bufused -= 1;

			run_test(ctx->buf, ctx->bufused, ctx->enc);

			ctx->buf[0] = '\0';
			ctx->enc[0] = '\0';
			ctx->bufused = 0;
		}

		ctx->indata = (strncasecmp(data+1, "data", 4) == 0);
		ctx->inenc  = (strncasecmp(data+1, "encoding", 8) == 0);
	} else {
		if (ctx->indata) {
			memcpy(ctx->buf + ctx->bufused, data, datalen);
			ctx->bufused += datalen;
		}
		if (ctx->inenc) {
			strcpy(ctx->enc, data);
			if (ctx->enc[strlen(ctx->enc) - 1] == '\n')
				ctx->enc[strlen(ctx->enc) - 1] = '\0';
		}
	}

	return true;
}

void run_test(const uint8_t *data, size_t len, char *expected)
{
	uint16_t mibenum = 0;
	hubbub_charset_source source = HUBBUB_CHARSET_UNKNOWN;
	static int testnum;

	assert(hubbub_charset_extract(data, len,
			&mibenum, &source) == HUBBUB_OK);

	assert(mibenum != 0);

	printf("%d: Detected charset %s (%d) Source %d Expected %s (%d)\n",
			++testnum, parserutils_charset_mibenum_to_name(mibenum),
			mibenum, source, expected,
			parserutils_charset_mibenum_from_name(
					expected, strlen(expected)));

	assert(mibenum == parserutils_charset_mibenum_from_name(
			expected, strlen(expected)));
}