From d7a4adf48149e0d6d1ae0cc79c1a6d73aa02dd3f Mon Sep 17 00:00:00 2001 From: James Bursa Date: Sat, 1 May 2004 17:48:38 +0000 Subject: [project @ 2004-05-01 17:48:38 by bursa] CSS parsing improvements: new tokeniser using re2c, improve memory-exhaustion behaviour, plug leaks, reduce memory usage, clean up code, add source documention. svn path=/import/netsurf/; revision=806 --- css/scanner.l | 145 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 82 insertions(+), 63 deletions(-) (limited to 'css/scanner.l') diff --git a/css/scanner.l b/css/scanner.l index 1f8cc1d92..36347b3a8 100644 --- a/css/scanner.l +++ b/css/scanner.l @@ -2,78 +2,97 @@ * This file is part of NetSurf, http://netsurf.sourceforge.net/ * Licensed under the GNU General Public License, * http://www.opensource.org/licenses/gpl-license - * Copyright 2003 James Bursa + * Copyright 2004 James Bursa */ -%{ -#include "parser.h" -%} +/** \file + * CSS tokeniser using re2c. + * + * see CSS2 Specification, chapter 4 + * http://www.w3.org/TR/REC-CSS2/syndata.html, + * and errata + * http://www.w3.org/Style/css2-updates/REC-CSS2-19980512-errata + */ + +#include +#define CSS_INTERNALS +#include "netsurf/css/css.h" +#include "netsurf/css/parser.h" + +#define YYCTYPE unsigned char +#define YYCURSOR (*buffer) +#define YYLIMIT end +#define YYMARKER marker +#define YYFILL(n) { return 0; } -%option 8bit -%option batch -%option case-insensitive -%option header-file="scanner.h" -%option outfile="scanner.c" -%option prefix="css_" -%option reentrant -%option never-interactive -%option noyywrap -%option yylineno -/* see CSS2 Specification, chapter 4 - http://www.w3.org/TR/REC-CSS2/syndata.html, - and errata - http://www.w3.org/Style/css2-updates/REC-CSS2-19980512-errata */ +/** + * Identify a CSS source token. + * + * \param buffer source to tokenise, updated to new position + * \param end end of source + * \param token_text updated to start of recognized token + * \return token number + */ + +int css_tokenise(unsigned char **buffer, unsigned char *end, + unsigned char **token_text) +{ + unsigned char *marker; -ident {nmstart}{nmchar}* -name {nmchar}+ -nmstart [a-zA-Z_]|{nonascii}|{escape} -nonascii [\200-\377] -unicode \\[0-9a-f]{1,6}[ \n\r\t\f]? -escape {unicode}|\\[ -~\200-\377] -nmchar [-a-zA-Z0-9_]|{nonascii}|{escape} -num [+-]?[0-9]+|[0-9]*\.[0-9]+ -string {string1}|{string2} -string1 \"([\t !#$%&(-~]|\\{nl}|\'|{nonascii}|{escape})*\" -string2 \'([\t !#$%&(-~]|\\{nl}|\"|{nonascii}|{escape})*\' -nl \n|\r\n|\r|\f -w [ \t\r\n\f]* +start: + *token_text = YYCURSOR; -%% +/*!re2c +nonascii = [\200-\377]; +unicode = "\\" [0-9a-f]+ [ \n\r\t\f]?; +escape = unicode | "\\" [ -~\200-\377]; +nmchar = [-a-zA-Z0-9_] | nonascii | escape; +nmstart = [a-zA-Z_] | nonascii | escape; +ident = nmstart nmchar*; +name = nmchar+; +num = [+-]? [0-9]+ | [0-9]* "." [0-9]+; +nl = "\n" | "\r\n" | "\r" | "\f"; +string1 = "\"" ([\t !#$%&(-~] | "\\" nl | "'" | nonascii | escape)* "\""; +string2 = "'" ([\t !#$%&(-~] | "\\" nl | "\""| nonascii | escape)* "'"; +string = string1 | string2; +w = [ \t\r\n\f]*; +any = [\000-\377]; -{ident} { return IDENT; } -@{ident} { return ATKEYWORD; } -{string} { return STRING; } -#{name} { return HASH; } -{num} { return NUMBER; } -{num}% { return PERCENTAGE; } -{num}{ident} { return DIMENSION; } -url\({w}{string}{w}\)|url\({w}([!#$%&*-~]|{nonascii}|{escape})*{w}\) { - return URI; } -U\+[0-9A-F?]{1,6}(-[0-9A-F]{1,6})? { - return UNICODE_RANGE; } -"" /* ignore CDC */ -; { return SEMI; } -\{ { return LBRACE; } -\} { return RBRACE; } -\( { return LPAREN; } -\) { return RPAREN; } -\[ { return LBRAC; } -\] { return RBRAC; } -[ \t\r\n\f]+ /* ignore whitespace */ -\/\*[^*]*\*+([^/][^*]*\*+)*\/ /* ignore comments */ -{ident}\( { return FUNCTION; } -= { return EQUALS; } -~= { return INCLUDES; } +ident { return IDENT; } +"@" ident { return ATKEYWORD; } +string { return STRING; } +"#" name { return HASH; } +num { return NUMBER; } +num "%" { return PERCENTAGE; } +num ident { return DIMENSION; } +"url(" w string w ")" | "url(" w ([!#$%&*-~]|nonascii|escape)* w ")" + { return URI; } +"U+" [0-9A-F?]+ ("-" [0-9A-F]+ )? + { return UNICODE_RANGE; } +"" { goto start; /* ignore CDC */ } +";" { return SEMI; } +"{" { return LBRACE; } +"}" { return RBRACE; } +"(" { return LPAREN; } +")" { return RPAREN; } +"[" { return LBRAC; } +"]" { return RBRAC; } +[ \t\r\n\f]+ { goto start; /* ignore whitespace */ } +"/*" (any\[*])* "*"+ ((any\[/]) (any\[*])* "*"+)* "/" + { goto start; /* ignore comments */ } +ident "(" { return FUNCTION; } +"=" { return EQUALS; } +"~=" { return INCLUDES; } "|=" { return DASHMATCH; } -: { return COLON; } -, { return COMMA; } +":" { return COLON; } +"," { return COMMA; } "+" { return PLUS; } -> { return GT; } +">" { return GT; } "." { return DOT; } "*" { return ASTERISK; } -. { return DELIM; } - -%% +any { return DELIM; } +*/ +} -- cgit v1.2.3