css_lexer/kind.rs
1use core::fmt;
2
3use crate::KindSet;
4
5/// Kind represents the token "Type", categorised mostly by the token types within the CSS Syntax spec.
6///
7/// Importantly, `Kind` is represented as `u8` and must only use the 5 low bits, because the upper 3 bits get used to
8/// house details about each kind, that a token would be interested in learning about.
9///
10/// Maintaining parity with the spec makes it easier to reason about logic around the parser, despite it being possible to
11/// group a bunch of these tokens into a single "delimiter" token. These Delim kinds, however, set the upper bit which
12/// means they cannot be inserted directly into a token. Instead a token.
13#[derive(Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
14pub enum Kind {
15 // Trivias (mask as 0b0_00XX)
16 /// Represents the [<eof-token>][1] defined in CSS. While CSS stipulates that this token is never produced by a
17 /// tokenizer, this [Lexer][crate::Lexer] _will_ produce [<eof-token>s][1] if the underlying source has been
18 /// fully consumed.
19 ///
20 /// [1]: https://drafts.csswg.org/css-syntax/#typedef-eof-token
21 Eof = 0b0000,
22
23 /// Represents the [<whitespace-token>][1] defined in CSS.
24 ///
25 /// ```md
26 /// <newline>
27 /// │├──╮─ "\n" ───╭──┤│
28 /// ├─ "\r\n" ─┤
29 /// ├─ "\r" ───┤
30 /// ╰─ "\f" ───╯
31 ///
32 /// <whitespace>
33 /// │├──╮─ " " ───────╭──┤│
34 /// ├─ "\t" ──────┤
35 /// ╰─ <newline> ─╯
36 ///
37 /// <whitespace-token>
38 /// │├─╭─ <whitespace> ─╮─┤│
39 /// ╰────────────────╯
40 /// ```
41 ///
42 /// While CSS stipulates that this token represents collapsed whitespace, it is possible for [Lexer][crate::Lexer]
43 /// to produce multiple consecutive [Kind::Whitespace] tokens if the
44 /// [Feature::SeparateWhitespace][crate::Feature::SeparateWhitespace] runtime feature is enabled. In this case,
45 /// `<whitespace-token>` becomes:
46 ///
47 /// ```md
48 /// <whitespace-token>
49 /// │├──╮─╭─ " " ───────╮─╭──┤│
50 /// │ ╰─────────────╯ │
51 /// ├─╭─ "\t" ──────╮─┤
52 /// │ ╰─────────────╯ │
53 /// ╰─╭─ <newline> ─╮─╯
54 /// ╰─────────────╯
55 /// ```
56 ///
57 /// [1]: https://drafts.csswg.org/css-syntax/#whitespace-token-diagram
58 #[default]
59 Whitespace = 0b0001,
60
61 /// Represents the [<comment>][1] defined in CSS. While CSS stipulates comment tokens are not produced during
62 /// tokenization, they are for this [Lexer][crate::Lexer] as they're needed in order to preserve them.
63 ///
64 /// ```md
65 /// <comment>
66 /// ╭──────────────────────────────────────────╮
67 /// │├─ "/*" ─╯-╭─ (anything but "*" followed by "/") ─╮─╰─ "*/" ─┤│
68 /// ╰──────────────────────────────────────╯
69 /// ```
70 ///
71 /// It is possible for [Lexer][crate::Lexer] to produce [Kind::Whitespace] tokens that begin `//` if the
72 /// [Feature::SingleLineComments][crate::Feature::SingleLineComments] runtime feature is enabled. In this mode,
73 /// `<comment>` becomes:
74 ///
75 /// ```md
76 /// <comment>
77 /// ╭──────────────────────────────────────────╮
78 /// │├──╮─ "/*" ─╯-╭─ (anything but "*" followed by "/") ─╮─╰─ "*/" ─╭─┤│
79 /// │ ╰──────────────────────────────────────╯ │
80 /// │ ╭───────────────────────────╮ │
81 /// ╰─ "//" ───────╯-╭─ (anything but "\n") ─╮─╰─ "\n" ──────────╯
82 /// ╰───────────────────────╯
83 /// ```
84 ///
85 /// [1]: https://drafts.csswg.org/css-syntax/#comment-diagram
86 Comment = 0b0010,
87
88 /// Represents both the [<cdc-token>][1] and [<cdo-token>][2]s defined in CSS. While CSS separates these tokens,
89 /// they're only useful representations at the top-level stylesheet, anywhere else they represent a parse error, and
90 /// it's a little pointless to define two tokens types for what amounts to a parse error.
91 ///
92 /// ```md
93 /// <cdo-token>
94 /// │├─ "<!--" ─┤│
95 ///
96 /// <cdc-token>
97 /// │├─ "-->" ─┤│
98 ///
99 /// <cdc-or-cdo-token> (Not part of the CSS specification)
100 /// │├──╮─ <cdo-token> ─╭──┤│
101 /// ╰─ <crc-token> ─╯
102 /// ```
103 ///
104 /// [1]: https://drafts.csswg.org/css-syntax/#CDC-token-diagram
105 /// [2]: https://drafts.csswg.org/css-syntax/#CDO-token-diagram
106 CdcOrCdo = 0b0011,
107
108 // Numerics (mask as 0b0_010X)
109 /// Represents the [<number-token>][1].
110 ///
111 /// ```md
112 ///
113 /// <number-token>
114 /// ╭─ "+" ─╮
115 /// │├─├───────┤───╭─ [digit] ─╮─ "." ─╭─ [digit] ─╮──╭───╮──────────────────────────────────╭──┤│
116 /// ╰─ "-" ─╯ │ ╰───────────╯ ╰───────────╯ │ │ ╭─ "+" ─╮ │
117 /// ├───────── ╭─ [digit] ─╮─────────────┤ ├─ "e" ─╭─├───────┤──╭─ [digit] ─╮─╯
118 /// │ ╰───────────╯ │ ╰─ "E" ─╯ ╰─ "-" ─╯ ╰───────────╯
119 /// ╰──── "." ─╭─ [digit] ─╮─────────────╯
120 /// ╰───────────╯
121 /// ```
122 ///
123 /// [1]: https://drafts.csswg.org/css-syntax/#number-token-diagram
124 Number = 0b0100,
125
126 /// Represents the [<dimension-token>][1].
127 ///
128 /// Here we deviate from the spec slightly, which has both [<dimension-token>][1] and [<percentage-token>][2].
129 /// `<percentage-token>` represents a dimension with a `%` symbol, but having this as a separate token results in more
130 /// work in the parser for little gain in the Lexer. So instead this lexer does not have a `<percentage-token>` and
131 /// instead folds the grammar for it inside of `<dimension-token>`.
132 ///
133 /// ```md
134 ///
135 /// <newline>
136 /// │├──╮─ "\n" ───╭──┤│
137 /// ├─ "\r\n" ─┤
138 /// ├─ "\r" ───┤
139 /// ╰─ "\f" ───╯
140 ///
141 /// <whitespace>
142 /// │├──╮─ " " ───────╭──┤│
143 /// ├─ "\t" ──────┤
144 /// ╰─ <newline> ─╯
145 ///
146 /// <hexdigit>
147 /// │├─ [ 0-9, A-F, a-f ] ─┤│
148 ///
149 ///
150 /// <escape>
151 /// │├─ "\" ─╮───── [not <newline> or <hexdigit>] ───╭─┤│
152 /// ╰─╭── <hexdigit> ─╮──╮────────────────╭─╯
153 /// ╰─ (1-6 times) ─╯ ╰─ <whitespace> ─╯
154 ///
155 /// <ident-token>
156 /// ╭───────────────── "--" ─────────────────────╮ ╭───────────────────────────────────────────╮
157 /// │├─╯─╮───────╭─╮─ [a-z, A-Z, "_", non-ASCII] ─╭─╰──╯─╭─╮─ [a-z, A-Z, 0-9, "_", non-ASCII] ─╭─╮─╰──┤│
158 /// ╰─ "-" ─╯ ╰──────── <escape> ────────────╯ │ ╰──────────── <escape> ─────────────╯ │
159 /// ╰───────────────────────────────────────╯
160 ///
161 /// <number-token>
162 /// ╭─ "+" ─╮
163 /// │├─├───────┤─╮─╭─ [digit] ─╮─ "." ─╭─ [digit] ─╮──╭───╮──────────────────────────────────╭──┤│
164 /// ╰─ "-" ─╯ │ ╰───────────╯ ╰───────────╯ │ │ ╭─ "+" ─╮ │
165 /// ├───────── ╭─ [digit] ─╮─────────────┤ ├─ "e" ─╭─├───────┤──╭─ [digit] ─╮─╯
166 /// │ ╰───────────╯ │ ╰─ "E" ─╯ ╰─ "-" ─╯ ╰───────────╯
167 /// ╰──── "." ─╭─ [digit] ─╮─────────────╯
168 /// ╰───────────╯
169 ///
170 /// <dimension-token>
171 /// │├─ <number-token> ─ <ident-token> ─┤│
172 ///
173 /// ```
174 ///
175 /// ```md
176 ///
177 /// <dimension-token> // Refined for this lexer, not true to the standard.
178 /// │├─ <number-token> ─╮─ <ident-token> ─╭──┤│
179 /// ╰────── "%" ──────╯
180 /// ```
181 ///
182 /// [1]: https://drafts.csswg.org/css-syntax/#dimension-token-diagram
183 /// [2]: https://drafts.csswg.org/css-syntax/#percentage-token-diagram
184 Dimension = 0b0101,
185
186 // Errors (mask as 0b1_XXXX)
187 /// Represents the [<bad-string-token>][1]. This token is a failure to fully lex the [<string-token>][2].
188 ///
189 /// [1]: https://drafts.csswg.org/css-syntax/#typedef-bad-string-token
190 /// [2]: https://drafts.csswg.org/css-syntax/#typedef-string-token
191 BadString = 0b1_1100,
192
193 /// Represents the [<bad-url-token>][1]. This token is a failure to fully lex the [<url-token>][2].
194 ///
195 /// [1]: https://drafts.csswg.org/css-syntax/#typedef-bad-url-token
196 /// [2]: https://drafts.csswg.org/css-syntax/#typedef-url-token
197 BadUrl = 0b1_1101,
198
199 /// These kind are non-standard Bad kinds and never emitted by the Lexer, but can be used by Parsers to denote a
200 /// token that are either:
201 /// - a Token that was unexpected in this position.
202 /// - a Token that was inserted to recover the parser to a known state.
203 BadWhitespace = 0b1_0001,
204 BadComment = 0b1_0010,
205 BadCdcOrCdo = 0b1_0011,
206 BadNumber = 0b1_0100,
207 BadDimension = 0b1_0101,
208 BadIdent = 0b1_1000,
209 BadFunction = 0b1_1001,
210 BadAtKeyword = 0b1_1010,
211 BadHash = 0b1_1011,
212 BadDelim = 0b1_1111,
213
214 // Variable length Ident-like Tokens (mask: 0b0_1XXX)
215 /// Represents the [<ident-token>][1].
216 ///
217 /// ```md
218 ///
219 /// <newline>
220 /// │├──╮─ "\n" ───╭──┤│
221 /// ├─ "\r\n" ─┤
222 /// ├─ "\r" ───┤
223 /// ╰─ "\f" ───╯
224 ///
225 /// <whitespace>
226 /// │├──╮─ " " ─────╭──┤│
227 /// ├─ "\t" ────┤
228 /// ╰─ newline ─╯
229 ///
230 /// <hexdigit>
231 /// │├─ [ 0-9, A-F, a-f ] ─┤│
232 ///
233 ///
234 /// <escape>
235 /// │├─ "\" ─╮───── [not <newline> or <hexdigit>] ───╭─┤│
236 /// ╰─╭── <hexdigit> ─╮──╮────────────────╭─╯
237 /// ╰─ (1-6 times) ─╯ ╰─ <whitespace> ─╯
238 ///
239 /// <ident-token>
240 /// ╭───────────────── "--" ─────────────────────╮ ╭───────────────────────────────────────────╮
241 /// │├─╯─╮───────╭─╮─ [a-z, A-Z, "_", non-ASCII] ─╭─╰──╯─╭─╮─ [a-z, A-Z, 0-9, "_", non-ASCII] ─╭─╮─╰──┤│
242 /// ╰─ "-" ─╯ ╰──────── <escape> ────────────╯ │ ╰──────────── <escape> ─────────────╯ │
243 /// ╰───────────────────────────────────────╯
244 ///
245 /// ```
246 ///
247 /// [1]: https://drafts.csswg.org/css-syntax/#ident-token-diagram
248 Ident = 0b1000,
249
250 /// Represents the [<function-token>][1].
251 ///
252 /// ```md
253 ///
254 /// <newline>
255 /// │├──╮─ "\n" ───╭──┤│
256 /// ├─ "\r\n" ─┤
257 /// ├─ "\r" ───┤
258 /// ╰─ "\f" ───╯
259 ///
260 /// <whitespace>
261 /// │├──╮─ " " ───────╭──┤│
262 /// ├─ "\t" ──────┤
263 /// ╰─ <newline> ─╯
264 ///
265 /// <hexdigit>
266 /// │├─ [ 0-9, A-F, a-f ] ─┤│
267 ///
268 ///
269 /// <escape>
270 /// │├─ "\" ─╮───── [not <newline> or <hexdigit>] ───╭─┤│
271 /// ╰─╭── <hexdigit> ─╮──╮────────────────╭─╯
272 /// ╰─ (1-6 times) ─╯ ╰─ <whitespace> ─╯
273 ///
274 /// <ident-token>
275 /// ╭───────────────── "--" ─────────────────────╮ ╭───────────────────────────────────────────╮
276 /// │├─╯─╮───────╭─╮─ [a-z, A-Z, "_", non-ASCII] ─╭─╰──╯─╭─╮─ [a-z, A-Z, 0-9, "_", non-ASCII] ─╭─╮─╰──┤│
277 /// ╰─ "-" ─╯ ╰──────── <escape> ────────────╯ │ ╰──────────── <escape> ─────────────╯ │
278 /// ╰───────────────────────────────────────╯
279 ///
280 /// <function-token>
281 /// │├─ <ident-token> ─ "(" ─┤│
282 ///
283 /// ```
284 ///
285 /// [1]: https://drafts.csswg.org/css-syntax/#function-token-diagram
286 Function = 0b1001,
287
288 /// Represents the [<at-keyword-token>][1].
289 ///
290 /// ```md
291 ///
292 /// <newline>
293 /// │├──╮─ "\n" ───╭──┤│
294 /// ├─ "\r\n" ─┤
295 /// ├─ "\r" ───┤
296 /// ╰─ "\f" ───╯
297 ///
298 /// <whitespace>
299 /// │├──╮─ " " ───────╭──┤│
300 /// ├─ "\t" ──────┤
301 /// ╰─ <newline> ─╯
302 ///
303 /// <hexdigit>
304 /// │├─ [ 0-9, A-F, a-f ] ─┤│
305 ///
306 ///
307 /// <escape>
308 /// │├─ "\" ─╮───── [not <newline> or <hexdigit>] ───╭─┤│
309 /// ╰─╭── <hexdigit> ─╮──╮────────────────╭─╯
310 /// ╰─ (1-6 times) ─╯ ╰─ <whitespace> ─╯
311 ///
312 /// <ident-token>
313 /// ╭───────────────── "--" ─────────────────────╮ ╭───────────────────────────────────────────╮
314 /// │├─╯─╮───────╭─╮─ [a-z, A-Z, "_", non-ASCII] ─╭─╰──╯─╭─╮─ [a-z, A-Z, 0-9, "_", non-ASCII] ─╭─╮─╰──┤│
315 /// ╰─ "-" ─╯ ╰──────── <escape> ────────────╯ │ ╰──────────── <escape> ─────────────╯ │
316 /// ╰───────────────────────────────────────╯
317 ///
318 /// <at-keyword-token>
319 /// │├─ "@" ─ <ident-token> ─┤│
320 ///
321 /// ```
322 ///
323 /// [1]: https://drafts.csswg.org/css-syntax/#hash-token-diagram
324 AtKeyword = 0b1010,
325
326 /// Represents the [<hash-token>][1].
327 ///
328 /// ```md
329 ///
330 /// <newline>
331 /// │├──╮─ "\n" ───╭──┤│
332 /// ├─ "\r\n" ─┤
333 /// ├─ "\r" ───┤
334 /// ╰─ "\f" ───╯
335 ///
336 /// <whitespace>
337 /// │├──╮─ " " ───────╭──┤│
338 /// ├─ "\t" ──────┤
339 /// ╰─ <newline> ─╯
340 ///
341 /// <hexdigit>
342 /// │├─ [ 0-9, A-F, a-f ] ─┤│
343 ///
344 ///
345 /// <escape>
346 /// │├─ "\" ─╮───── [not <newline> or <hexdigit>] ───╭─┤│
347 /// ╰─╭── <hexdigit> ─╮──╮────────────────╭─╯
348 /// ╰─ (1-6 times) ─╯ ╰─ <whitespace> ─╯
349 ///
350 /// <hash-token>
351 /// │├─ "#" ──╭─╮─ [a-z, A-Z, 0-9, "_", "-", non-ASCII] ─╭─╮─┤│
352 /// │ ╰─────────────── <escape> ───────────────╯ │
353 /// ╰────────────────────────────────────────────╯
354 ///
355 /// ```
356 ///
357 /// [1]: https://drafts.csswg.org/css-syntax/#at-keyword-token-diagram
358 Hash = 0b1011,
359
360 /// Represents the [<string-token>][1].
361 ///
362 /// ```md
363 ///
364 /// <newline>
365 /// │├──╮─ "\n" ───╭──┤│
366 /// ├─ "\r\n" ─┤
367 /// ├─ "\r" ───┤
368 /// ╰─ "\f" ───╯
369 ///
370 /// <escape>
371 /// │├─ "\" ─╮───── [not <newline> or <hexdigit>] ───╭─┤│
372 /// ╰─╭── <hexdigit> ─╮──╮────────────────╭─╯
373 /// ╰─ (1-6 times) ─╯ ╰─ <whitespace> ─╯
374 ///
375 /// <string-token>
376 /// ╭───────────────────────────────────╮
377 /// │├─╮─ """ ─╯─╭─╮─ [not """, "\", newline] ─╭─╮─╰── """ ─╭─┤│
378 /// │ │ ├──────── <escape> ─────────┤ │ │
379 /// │ │ ╰───── "\" ─ <newline> ─────╯ │ │
380 /// │ ╰───────────────────────────────╯ │
381 /// │ ╭───────────────────────────────────╮ │
382 /// ╰─ "'" ─╯─╭─╮─ [not """, "\", newline] ─╭─╮─╰── "'" ─╯
383 /// │ ├──────── <escape> ─────────┤ │
384 /// │ ╰───── "\" ─ <newline> ─────╯ │
385 /// ╰───────────────────────────────╯
386 ///
387 /// ```
388 ///
389 /// [1]: https://drafts.csswg.org/css-syntax/#string-token-diagram
390 String = 0b1100,
391
392 /// Represents the [<url-token>][1].
393 ///
394 /// ```md
395 ///
396 /// <newline>
397 /// │├──╮─ "\n" ───╭──┤│
398 /// ├─ "\r\n" ─┤
399 /// ├─ "\r" ───┤
400 /// ╰─ "\f" ───╯
401 ///
402 /// <whitespace>
403 /// │├──╮─ " " ───────╭──┤│
404 /// ├─ "\t" ──────┤
405 /// ╰─ <newline> ─╯
406 ///
407 /// <whitespace-token>
408 /// │├─╭─ <whitespace> ─╮─┤│
409 /// ╰────────────────╯
410 ///
411 /// <ws*>
412 /// ╭──────────────────────────╮
413 /// │├─╯─╭─ <whitespace-token> ─╮─╰─┤│
414 /// ╰──────────────────────╯
415 ///
416 /// <hexdigit>
417 /// │├─ [ 0-9, A-F, a-f ] ─┤│
418 ///
419 ///
420 /// <escape>
421 /// │├─ "\" ─╮───── [not <newline> or <hexdigit>] ───╭─┤│
422 /// ╰─╭── <hexdigit> ─╮──╮────────────────╭─╯
423 /// ╰─ (1-6 times) ─╯ ╰─ <whitespace> ─╯
424 ///
425 /// <ident-token>
426 /// ╭───────────────── "--" ─────────────────────╮ ╭───────────────────────────────────────────╮
427 /// │├─╯─╮───────╭─╮─ [a-z, A-Z, "_", non-ASCII] ─╭─╰──╯─╭─╮─ [a-z, A-Z, 0-9, "_", non-ASCII] ─╭─╮─╰──┤│
428 /// ╰─ "-" ─╯ ╰──────── <escape> ────────────╯ │ ╰──────────── <escape> ─────────────╯ │
429 /// ╰───────────────────────────────────────╯
430 ///
431 /// <url-token>
432 /// ╭───────────────────────────────────────────────────────────────────╮
433 /// │├─ <ident-token "url"> ─ "(" ─ <ws*> ─╯─╭─╮─ [not """ "'" "(" ")" "\" <whitespace> or non-printable] ─╭─╮─╰─ <ws*> ─ ")" ─┤│
434 /// │ ╰──────────────────────── <escape> ─────────────────────────╯ │
435 /// ╰───────────────────────────────────────────────────────────────╯
436 ///
437 /// ```
438 ///
439 /// [1]: https://drafts.csswg.org/css-syntax/#url-token-diagram
440 Url = 0b1101,
441
442 /// Represents the [<unicode-range-token>][1]. This token is only produced when the
443 /// [Feature::UnicodeRange][crate::Feature::UnicodeRange] feature is enabled.
444 ///
445 /// ```md
446 ///
447 /// <hexdigit>
448 /// │├─ [ 0-9, A-F, a-f ] ─┤│
449 ///
450 /// <unicode-range-token>
451 /// │├─╮─ 'U' ─╭─ '+' ─╭──────────────────╭── <hexdigit> ─╮──────────────────╭─┤│
452 /// ╰─ 'u' ─╯ │ ╰─ (1-6 times) ─╯ │
453 /// │ ╭───────────────────╮ │
454 /// ├─╯─╭── <hexdigit> ─╮─╰─╭───────────── ? ───────────╮─┤
455 /// │ ╰─ (1-5 times) ─╯ ╰─ (1 to (6 digits) times) ─╯ │
456 /// │ │
457 /// ╰────╭── <hexdigit> ─╮── '-' ──╭── <hexdigit> ─╮──────╯
458 /// ╰─ (1-5 times) ─╯ ╰─ (1-5 times) ─╯
459 /// ```
460 ///
461 /// [1]: https://drafts.csswg.org/css-syntax/#unicode-range-token-diagram
462 UnicodeRange = 0b1110,
463 /// Represents the [<delim-token>][1]. The `<delim-token>` has a value composed of a single code point.
464 ///
465 /// ```md
466 /// <delim-token>
467 /// │├─ [codepoint] ─┤│
468 /// ```
469 ///
470 /// [1]: https://drafts.csswg.org/css-syntax/#typedef-delim-token
471 Delim = 0b1111,
472
473 // Single character Tokens (mask 0b11_XXXX)
474 /// Represents the [<colon-token>][1].
475 ///
476 /// ```md
477 /// <colon-token>
478 /// │├─ ":" ─┤│
479 /// ```
480 ///
481 /// [1]: https://drafts.csswg.org/css-syntax/#typedef-colon-token
482 Colon = 0b10_0001,
483
484 /// Represents the [<semicolon-token>][1].
485 ///
486 /// ```md
487 /// <semicolon-token>
488 /// │├─ ";" ─┤│
489 /// ```
490 ///
491 /// [1]: https://drafts.csswg.org/css-syntax/#typedef-semicolon-token
492 Semicolon = 0b10_0010,
493
494 /// Represents the [<comma-token>][1].
495 ///
496 /// ```md
497 /// <comma-token>
498 /// │├─ "," ─┤│
499 /// ```
500 ///
501 /// [1]: https://drafts.csswg.org/css-syntax/#typedef-comma-token
502 Comma = 0b10_0011,
503
504 /// Represents the [<\[-token>][1].
505 ///
506 /// ```md
507 /// <[-token>
508 /// │├─ "[" ─┤│
509 /// ```
510 ///
511 /// [1]: https://drafts.csswg.org/css-syntax/#typedef-open-square
512 LeftSquare = 0b10_0100,
513
514 /// Represents the [<\]-token>][1].
515 ///
516 /// ```md
517 /// <]-token>
518 /// │├─ "]" ─┤│
519 /// ```
520 ///
521 /// [1]: https://drafts.csswg.org/css-syntax/#typedef-close-square
522 RightSquare = 0b10_0101,
523
524 /// Represents the [<(-token>][1].
525 ///
526 /// ```md
527 /// <(-token>
528 /// │├─ "(" ─┤│
529 /// ```
530 ///
531 /// [1]: https://drafts.csswg.org/css-syntax/#typedef-open-paren
532 LeftParen = 0b10_0110,
533
534 /// Represents the [<)-token>][1].
535 ///
536 /// ```md
537 /// <)-token>
538 /// │├─ ")" ─┤│
539 /// ```
540 ///
541 /// [1]: https://drafts.csswg.org/css-syntax/#typedef-close-paren
542 RightParen = 0b10_0111,
543
544 /// Represents the [<{-token>][1].
545 ///
546 /// ```md
547 /// <{-token>
548 /// │├─ "{" ─┤│
549 /// ```
550 ///
551 /// [1]: https://drafts.csswg.org/css-syntax/#typedef-open-curly
552 LeftCurly = 0b10_1000,
553
554 /// Represents the [<}-token>][1].
555 ///
556 /// ```md
557 /// <}-token>
558 /// │├─ "}" ─┤│
559 /// ```
560 ///
561 /// [1]: https://drafts.csswg.org/css-syntax/#typedef-close-curly
562 RightCurly = 0b10_1001,
563
564 /// These kind are non-standard Bad kinds and never emitted by the Lexer, but can be used by Parsers to denote a
565 /// token that are either:
566 /// - a Token that was unexpected in this position.
567 /// - a Token that was inserted to recover the parser to a known state.
568 BadColon = 0b11_0001,
569 BadSemicolon = 0b11_0010,
570 BadComma = 0b11_0011,
571 BadLeftSquare = 0b11_0100,
572 BadRightSquare = 0b11_0101,
573 BadLeftParen = 0b11_0110,
574 BadRightParen = 0b11_0111,
575 BadLeftCurly = 0b11_1000,
576 BadRightCurly = 0b11_1001,
577}
578
579impl Kind {
580 pub(crate) const fn from_bits(bits: u8) -> Self {
581 match bits {
582 0b0001 => Self::Whitespace,
583 0b0010 => Self::Comment,
584 0b0011 => Self::CdcOrCdo,
585 0b0100 => Self::Number,
586 0b0101 => Self::Dimension,
587 // 0b0110 => Reserved
588 // 0b0111 => Reserved
589 0b1000 => Self::Ident,
590 0b1001 => Self::Function,
591 0b1010 => Self::AtKeyword,
592 0b1011 => Self::Hash,
593 0b1100 => Self::String,
594 0b1101 => Self::Url,
595 0b1110 => Self::UnicodeRange,
596 0b1111 => Self::Delim,
597
598 // Error tokens are represented in 5 bits.
599 0b1_0001 => Self::BadWhitespace,
600 0b1_0010 => Self::BadComment,
601 0b1_0011 => Self::BadCdcOrCdo,
602 0b1_0100 => Self::BadNumber,
603 0b1_0101 => Self::BadDimension,
604 // 0b1_0110 => Self::Reserved,
605 // 0b1_0111 => Self::Reserved,
606 0b1_1000 => Self::BadIdent,
607 0b1_1001 => Self::BadFunction,
608 0b1_1010 => Self::BadAtKeyword,
609 0b1_1011 => Self::BadHash,
610 0b1_1100 => Self::BadString,
611 0b1_1101 => Self::BadUrl,
612 0b1_1110 => Self::UnicodeRange,
613 0b1_1111 => Self::BadDelim,
614
615 // Single character delimiters are represented in 6 bits
616 0b10_0001 => Self::Colon,
617 0b10_0010 => Self::Semicolon,
618 0b10_0011 => Self::Comma,
619 0b10_0100 => Self::LeftSquare,
620 0b10_0101 => Self::RightSquare,
621 0b10_0110 => Self::LeftParen,
622 0b10_0111 => Self::RightParen,
623 0b10_1000 => Self::LeftCurly,
624 0b10_1001 => Self::RightCurly,
625
626 0b11_0001 => Self::BadColon,
627 0b11_0010 => Self::BadSemicolon,
628 0b11_0011 => Self::BadComma,
629 0b11_0100 => Self::BadLeftSquare,
630 0b11_0101 => Self::BadRightSquare,
631 0b11_0110 => Self::BadLeftParen,
632 0b11_0111 => Self::BadRightParen,
633 0b11_1000 => Self::BadLeftCurly,
634 0b11_1001 => Self::BadRightCurly,
635 _ => Self::Eof,
636 }
637 }
638
639 #[doc(hidden)]
640 pub const fn as_str(&self) -> &str {
641 match *self {
642 Kind::Eof => "Eof",
643 Kind::Whitespace => "Whitespace",
644 Kind::Comment => "Comment",
645 Kind::CdcOrCdo => "CdcOrCdo",
646 Kind::Number => "Number",
647 Kind::Dimension => "Dimension",
648 Kind::Ident => "Ident",
649 Kind::Function => "Function",
650 Kind::AtKeyword => "AtKeyword",
651 Kind::Hash => "Hash",
652 Kind::String => "String",
653 Kind::Url => "Url",
654 Kind::UnicodeRange => "UnicodeRange",
655 Kind::Delim => "Delim",
656
657 Kind::BadWhitespace => "BadWhitespace",
658 Kind::BadComment => "BadComment",
659 Kind::BadCdcOrCdo => "BadCdcOrCdo",
660 Kind::BadNumber => "BadNumber",
661 Kind::BadDimension => "BadDimension",
662 Kind::BadIdent => "BadIdent",
663 Kind::BadFunction => "BadFunction",
664 Kind::BadAtKeyword => "BadAtKeyword",
665 Kind::BadHash => "BadHash",
666 Kind::BadString => "BadString",
667 Kind::BadUrl => "BadUrl",
668 Kind::BadDelim => "BadDelim",
669
670 Kind::Colon => "Colon",
671 Kind::Semicolon => "Semicolon",
672 Kind::Comma => "Comma",
673 Kind::LeftSquare => "LeftSquare",
674 Kind::RightSquare => "RightSquare",
675 Kind::LeftParen => "LeftParen",
676 Kind::RightParen => "RightParen",
677 Kind::LeftCurly => "LeftCurly",
678 Kind::RightCurly => "RightCurly",
679
680 Kind::BadColon => "BadColon",
681 Kind::BadSemicolon => "BadSemicolon",
682 Kind::BadComma => "BadComma",
683 Kind::BadLeftSquare => "BadLeftSquare",
684 Kind::BadRightSquare => "BadRightSquare",
685 Kind::BadLeftParen => "BadLeftParen",
686 Kind::BadRightParen => "BadRightParen",
687 Kind::BadLeftCurly => "BadLeftCurly",
688 Kind::BadRightCurly => "BadRightCurly",
689 }
690 }
691
692 pub const fn is_bad(&self) -> bool {
693 (*self as u8) & 0b11_0000 == 0b01_0000
694 }
695}
696
697impl fmt::Debug for Kind {
698 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
699 write!(f, "Kind::{}", self.as_str())
700 }
701}
702
703impl fmt::Display for Kind {
704 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
705 write!(f, "Kind::{}", self.as_str())
706 }
707}
708
709impl PartialEq<KindSet> for Kind {
710 fn eq(&self, other: &KindSet) -> bool {
711 other.contains_bits(*self as u8)
712 }
713}
714
715#[test]
716fn test_from_bits() {
717 assert_eq!(Kind::from_bits(Kind::Eof as u8), Kind::Eof);
718 assert_eq!(Kind::from_bits(Kind::Whitespace as u8), Kind::Whitespace);
719 assert_eq!(Kind::from_bits(Kind::Comment as u8), Kind::Comment);
720 assert_eq!(Kind::from_bits(Kind::CdcOrCdo as u8), Kind::CdcOrCdo);
721 assert_eq!(Kind::from_bits(Kind::Number as u8), Kind::Number);
722 assert_eq!(Kind::from_bits(Kind::Dimension as u8), Kind::Dimension);
723 assert_eq!(Kind::from_bits(Kind::Ident as u8), Kind::Ident);
724 assert_eq!(Kind::from_bits(Kind::Function as u8), Kind::Function);
725 assert_eq!(Kind::from_bits(Kind::AtKeyword as u8), Kind::AtKeyword);
726 assert_eq!(Kind::from_bits(Kind::Hash as u8), Kind::Hash);
727 assert_eq!(Kind::from_bits(Kind::String as u8), Kind::String);
728 assert_eq!(Kind::from_bits(Kind::Url as u8), Kind::Url);
729 assert_eq!(Kind::from_bits(Kind::UnicodeRange as u8), Kind::UnicodeRange);
730 assert_eq!(Kind::from_bits(Kind::Delim as u8), Kind::Delim);
731
732 assert_eq!(Kind::from_bits(Kind::BadWhitespace as u8), Kind::BadWhitespace);
733 assert_eq!(Kind::from_bits(Kind::BadComment as u8), Kind::BadComment);
734 assert_eq!(Kind::from_bits(Kind::BadCdcOrCdo as u8), Kind::BadCdcOrCdo);
735 assert_eq!(Kind::from_bits(Kind::BadNumber as u8), Kind::BadNumber);
736 assert_eq!(Kind::from_bits(Kind::BadDimension as u8), Kind::BadDimension);
737 assert_eq!(Kind::from_bits(Kind::BadIdent as u8), Kind::BadIdent);
738 assert_eq!(Kind::from_bits(Kind::BadFunction as u8), Kind::BadFunction);
739 assert_eq!(Kind::from_bits(Kind::BadAtKeyword as u8), Kind::BadAtKeyword);
740 assert_eq!(Kind::from_bits(Kind::BadHash as u8), Kind::BadHash);
741 assert_eq!(Kind::from_bits(Kind::BadString as u8), Kind::BadString);
742 assert_eq!(Kind::from_bits(Kind::BadUrl as u8), Kind::BadUrl);
743 assert_eq!(Kind::from_bits(Kind::BadDelim as u8), Kind::BadDelim);
744
745 assert_eq!(Kind::from_bits(Kind::Colon as u8), Kind::Colon);
746 assert_eq!(Kind::from_bits(Kind::Semicolon as u8), Kind::Semicolon);
747 assert_eq!(Kind::from_bits(Kind::Comma as u8), Kind::Comma);
748 assert_eq!(Kind::from_bits(Kind::LeftSquare as u8), Kind::LeftSquare);
749 assert_eq!(Kind::from_bits(Kind::RightSquare as u8), Kind::RightSquare);
750 assert_eq!(Kind::from_bits(Kind::LeftParen as u8), Kind::LeftParen);
751 assert_eq!(Kind::from_bits(Kind::RightParen as u8), Kind::RightParen);
752 assert_eq!(Kind::from_bits(Kind::LeftCurly as u8), Kind::LeftCurly);
753 assert_eq!(Kind::from_bits(Kind::RightCurly as u8), Kind::RightCurly);
754
755 assert_eq!(Kind::from_bits(Kind::BadColon as u8), Kind::BadColon);
756 assert_eq!(Kind::from_bits(Kind::BadSemicolon as u8), Kind::BadSemicolon);
757 assert_eq!(Kind::from_bits(Kind::BadComma as u8), Kind::BadComma);
758 assert_eq!(Kind::from_bits(Kind::BadLeftSquare as u8), Kind::BadLeftSquare);
759 assert_eq!(Kind::from_bits(Kind::BadRightSquare as u8), Kind::BadRightSquare);
760 assert_eq!(Kind::from_bits(Kind::BadLeftParen as u8), Kind::BadLeftParen);
761 assert_eq!(Kind::from_bits(Kind::BadRightParen as u8), Kind::BadRightParen);
762 assert_eq!(Kind::from_bits(Kind::BadLeftCurly as u8), Kind::BadLeftCurly);
763 assert_eq!(Kind::from_bits(Kind::BadRightCurly as u8), Kind::BadRightCurly);
764}
765
766#[test]
767fn size_test() {
768 assert_eq!(::std::mem::size_of::<Kind>(), 1);
769}