summaryrefslogtreecommitdiff
path: root/css/scanner.l
blob: 36347b3a84a1e090463bce5b71723a561702d3fb (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
/*
 * This file is part of NetSurf, http://netsurf.sourceforge.net/
 * Licensed under the GNU General Public License,
 *                http://www.opensource.org/licenses/gpl-license
 * Copyright 2004 James Bursa <bursa@users.sourceforge.net>
 */

/** \file
 * CSS tokeniser using re2c.
 *
 * see CSS2 Specification, chapter 4
 * http://www.w3.org/TR/REC-CSS2/syndata.html,
 * and errata
 * http://www.w3.org/Style/css2-updates/REC-CSS2-19980512-errata
 */

#include <stdbool.h>
#define CSS_INTERNALS
#include "netsurf/css/css.h"
#include "netsurf/css/parser.h"

#define YYCTYPE unsigned char
#define YYCURSOR (*buffer)
#define YYLIMIT end
#define YYMARKER marker
#define YYFILL(n) { return 0; }


/**
 * Identify a CSS source token.
 *
 * \param  buffer      source to tokenise, updated to new position
 * \param  end         end of source
 * \param  token_text  updated to start of recognized token
 * \return  token number
 */

int css_tokenise(unsigned char **buffer, unsigned char *end,
		unsigned char **token_text)
{
	unsigned char *marker;

start:
	*token_text = YYCURSOR;

/*!re2c
nonascii	=	[\200-\377];
unicode		=	"\\" [0-9a-f]+ [ \n\r\t\f]?;
escape		=	unicode | "\\" [ -~\200-\377];
nmchar		=	[-a-zA-Z0-9_] | nonascii | escape;
nmstart		=	[a-zA-Z_] | nonascii | escape;
ident		=	nmstart nmchar*;
name		=	nmchar+;
num		=	[+-]? [0-9]+ | [0-9]* "." [0-9]+;
nl		=	"\n" | "\r\n" | "\r" | "\f";
string1		=	"\"" ([\t !#$%&(-~] | "\\" nl | "'" | nonascii | escape)* "\"";
string2		=	"'"  ([\t !#$%&(-~] | "\\" nl | "\""| nonascii | escape)* "'";
string		=	string1 | string2;
w		=	[ \t\r\n\f]*;
any		=	[\000-\377];

ident		{ return IDENT; }
"@" ident	{ return ATKEYWORD; }
string		{ return STRING; }
"#" name	{ return HASH; }
num		{ return NUMBER; }
num "%"		{ return PERCENTAGE; }
num ident	{ return DIMENSION; }
"url(" w string w ")" | "url(" w ([!#$%&*-~]|nonascii|escape)* w ")"
		{ return URI; }
"U+" [0-9A-F?]+ ("-" [0-9A-F]+ )?
		{ return UNICODE_RANGE; }
"<!--"		{ goto start; /* ignore CDO */ }
"-->"		{ goto start; /* ignore CDC */ }
";"		{ return SEMI; }
"{"		{ return LBRACE; }
"}"		{ return RBRACE; }
"("		{ return LPAREN; }
")"		{ return RPAREN; }
"["		{ return LBRAC; }
"]"		{ return RBRAC; }
[ \t\r\n\f]+	{ goto start; /* ignore whitespace */ }
"/*" (any\[*])* "*"+ ((any\[/]) (any\[*])* "*"+)* "/"
		{ goto start; /* ignore comments */ }
ident "("	{ return FUNCTION; }
"="		{ return EQUALS; }
"~="		{ return INCLUDES; }
"|="		{ return DASHMATCH; }
":"		{ return COLON; }
","		{ return COMMA; }
"+"		{ return PLUS; }
">"		{ return GT; }
"."		{ return DOT; }
"*"		{ return ASTERISK; }
any		{ return DELIM; }
*/

}