1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
#!/usr/bin/python
#
# Copyright 2007 Google Inc. All Rights Reserved.
"""CSS Lexical Grammar rules.
CSS lexical grammar from http://www.w3.org/TR/CSS21/grammar.html
"""
__author__ = ['elsigh@google.com (Lindsey Simon)',
'msamuel@google.com (Mike Samuel)']
# public symbols
__all__ = [ "NEWLINE", "HEX", "NON_ASCII", "UNICODE", "ESCAPE", "NMSTART", "NMCHAR", "STRING1", "STRING2", "IDENT", "NAME", "HASH", "NUM", "STRING", "URL", "SPACE", "WHITESPACE", "COMMENT", "QUANTITY", "PUNC" ]
# The comments below are mostly copied verbatim from the grammar.
# "@import" {return IMPORT_SYM;}
# "@page" {return PAGE_SYM;}
# "@media" {return MEDIA_SYM;}
# "@charset" {return CHARSET_SYM;}
KEYWORD = r'(?:\@(?:import|page|media|charset))'
# nl \n|\r\n|\r|\f ; a newline
NEWLINE = r'\n|\r\n|\r|\f'
# h [0-9a-f] ; a hexadecimal digit
HEX = r'[0-9a-f]'
# nonascii [\200-\377]
NON_ASCII = r'[\200-\377]'
# unicode \\{h}{1,6}(\r\n|[ \t\r\n\f])?
UNICODE = r'(?:(?:\\' + HEX + r'{1,6})(?:\r\n|[ \t\r\n\f])?)'
# escape {unicode}|\\[^\r\n\f0-9a-f]
ESCAPE = r'(?:' + UNICODE + r'|\\[^\r\n\f0-9a-f])'
# nmstart [_a-z]|{nonascii}|{escape}
NMSTART = r'(?:[_a-z]|' + NON_ASCII + r'|' + ESCAPE + r')'
# nmchar [_a-z0-9-]|{nonascii}|{escape}
NMCHAR = r'(?:[_a-z0-9-]|' + NON_ASCII + r'|' + ESCAPE + r')'
# ident -?{nmstart}{nmchar}*
IDENT = r'-?' + NMSTART + NMCHAR + '*'
# name {nmchar}+
NAME = NMCHAR + r'+'
# hash
HASH = r'#' + NAME
# string1 \"([^\n\r\f\\"]|\\{nl}|{escape})*\" ; "string"
STRING1 = r'"(?:[^\"\\]|\\.)*"'
# string2 \'([^\n\r\f\\']|\\{nl}|{escape})*\' ; 'string'
STRING2 = r"'(?:[^\'\\]|\\.)*'"
# string {string1}|{string2}
STRING = '(?:' + STRING1 + r'|' + STRING2 + ')'
# num [0-9]+|[0-9]*"."[0-9]+
NUM = r'(?:[0-9]*\.[0-9]+|[0-9]+)'
# s [ \t\r\n\f]
SPACE = r'[ \t\r\n\f]'
# w {s}*
WHITESPACE = '(?:' + SPACE + r'*)'
# url special chars
URL_SPECIAL_CHARS = r'[!#$%&*-~]'
# url chars ({url_special_chars}|{nonascii}|{escape})*
URL_CHARS = r'(?:%s|%s|%s)*' % (URL_SPECIAL_CHARS, NON_ASCII, ESCAPE)
# url
URL = r'url\(%s(%s|%s)%s\)' % (WHITESPACE, STRING, URL_CHARS, WHITESPACE)
# comments
# see http://www.w3.org/TR/CSS21/grammar.html
COMMENT = r'/\*[^*]*\*+([^/*][^*]*\*+)*/'
# {E}{M} {return EMS;}
# {E}{X} {return EXS;}
# {P}{X} {return LENGTH;}
# {C}{M} {return LENGTH;}
# {M}{M} {return LENGTH;}
# {I}{N} {return LENGTH;}
# {P}{T} {return LENGTH;}
# {P}{C} {return LENGTH;}
# {D}{E}{G} {return ANGLE;}
# {R}{A}{D} {return ANGLE;}
# {G}{R}{A}{D} {return ANGLE;}
# {M}{S} {return TIME;}
# {S} {return TIME;}
# {H}{Z} {return FREQ;}
# {K}{H}{Z} {return FREQ;}
# % {return PERCENTAGE;}
UNIT = r'(?:em|ex|px|cm|mm|in|pt|pc|deg|rad|grad|ms|s|hz|khz|%)'
# {num}{UNIT|IDENT} {return NUMBER;}
QUANTITY = '%s(?:%s%s|%s)?' % (NUM, WHITESPACE, UNIT, IDENT)
# "<!--" {return CDO;}
# "-->" {return CDC;}
# "~=" {return INCLUDES;}
# "|=" {return DASHMATCH;}
# {w}"{" {return LBRACE;}
# {w}"+" {return PLUS;}
# {w}">" {return GREATER;}
# {w}"," {return COMMA;}
PUNC = r'<!--|-->|~=|\|=|[\{\+>,:;]'
|