css_lexer/
token.rs

1use crate::{
2	AssociatedWhitespaceRules, CommentStyle, Cursor, Kind, KindSet, PairWise, QuoteStyle, SourceOffset, Whitespace,
3	constants::SINGLE_CHAR_KINDS,
4};
5use std::char::REPLACEMENT_CHARACTER;
6
7/// An abstract representation of the chunk of the source text, retaining certain "facts" about the source.
8///
9/// # Design
10///
11/// The [Token] type is an immutable packing of two [u32s][u32] that represents a unit in the source text, but without
12/// the associated offset data that points to its position in the source text. This is important because it means that
13/// equivalent [Tokens][Token] are equal even in different parts of the document. For the most part a [Token] doesn't
14/// represent data that can be put into a text file because it lacks the underlying character data. It is lossy. For
15/// example a [Token] with [Kind::Ident] just represents _an_ ident, but it doesn't retain what the keyword is).
16/// Storing raw-character data would require either storing tokens on the heap (and therefore they couldn't be [Sized])
17/// or by keeping a reference to `&'a str` which means larger token sizes and lifetime tracking. By _not_ storing
18/// character data we can keep [Token] [Sized] and keep it to `size_of` `8`, avoiding the heap, avoiding
19/// references/lifetimes, and keeping [Token] entirely in the stack. For a lot of tokens this is _fine_ because the
20/// underlying character data isn't that useful past a certain point.
21///
22/// A [Token] retains certain "facts" about the underlying unit of text, though. For example it retains the [Kind], how
23/// many characters the token consumed, and various other pieces of information, depending on the [Kind]. In some
24/// cases, it's entirely possible to represent the full token, including character data, into the available bits (for
25/// example [Kind::Delim] stores its [char], [Kind::Number] stores its [f32]). Taking the time in the tokenizer to
26/// gather these facts and values can keep cache-lines hot, which speeds up subsequent checks in the parser.
27///
28/// If you're familiar with "red green" syntax trees such as [Swiftlang's libsyntax][1], or [Rust-Analyzer's Rowan][2]
29/// or [Roslyn][3] this might be a little familiar in some concepts. However [Token] does not represent a tree, and
30/// relies on resorting back to the string data to find out keyword values.
31///
32/// [1]: https://gh.io/AAtdqpg
33/// [2]: https://gh.io/AAtf8pt
34/// [3]: https://gh.io/AAtab90
35///
36/// This representation of facts, kind, length, or other metadata can be quite complex - so here's a
37/// full breakdown:
38///
39/// # Anatomy of Token
40///
41/// A [Token] is a struct of `(u32, u32)`. The second u32 is _usually_ the token length (hence keeping them separate).
42/// The first [u32], however, is split into 3 (sometimes 5) parts. The two u32s can be thought of like so:
43///
44/// ```md
45///   |------|------|--------------------------|---------------------------------|
46///   | TF   | K    | VD                       | Value                           |
47/// 0b| 0000 | 0000 | 000000000000000000000000 | 0000000000000000000000000000000 |
48///   |------|------|--------------------------|---------------------------------|
49///   | 4--- | 4--- | 24---------------------- | 32----------------------------- |
50/// ```
51///
52/// ## TF = Type Flags (or "Token Facts")
53///
54/// This represents a bit-mask in the upper-most 3 bits. The flags are general purpose and change meaning depending on
55/// the Token's [Kind]. Each flag generally maps to a method so it's not necessary to remenber the contents of this
56/// table, but it can serve as a useful reference. Note that not all methods return a [bool], so footnotes have been
57/// added to explain these further.
58///
59/// | Kind::              | Flag   | Description                 | Method                                   |
60/// |---------------------|--------|-----------------------------|------------------------------------------|
61/// | [Kind::Number]      | `0001` | Error/Recovery token        | [Token::is_bad()][^bad]                  |
62/// |                     | `0010` | Floating Point              | [Token::is_float()]                      |
63/// |                     | `0100` | Has a "Sign" (-/+)          | [Token::has_sign()]                      |
64/// |                     | `1000` | Sign is required            | [Token::sign_is_required()]              |
65/// | [Kind::Dimension]   | `0001` | Error/Recovery token        | [Token::is_bad()][^bad]                  |
66/// |                     | `0010` | Floating Point              | [Token::is_float()]                      |
67/// |                     | `0100` | Has a "Sign" (-/+)          | [Token::has_sign()]                      |
68/// |                     | `1000` | Unit is a known dimension   | [Token::atom_bits()][^dimension]         |
69/// | [Kind::String]      | `0001` | Error/Recovery token        | [Token::is_bad()][^bad]                  |
70/// |                     | `0010` | Uses Double Quotes          | [Token::quote_style()][^quotes]          |
71/// |                     | `0100` | Has a closing quote         | [Token::has_close_quote()]               |
72/// |                     | `1000` | Contains escape characters  | [Token::contains_escape_chars()]         |
73/// | [Kind::Ident]       | `0001` | Error/Recovery token        | [Token::is_bad()][^bad]                  |
74/// |                     | `0010` | Contains non-lower-ASCII    | [Token::is_lower_case()]                 |
75/// |                     | `0100` | Is a "Dashed Ident"         | [Token::is_dashed_ident()]               |
76/// |                     | `1000` | Contains escape characters  | [Token::contains_escape_chars()]         |
77/// | [Kind::Function]    | `0001` | Error/Recovery token        | [Token::is_bad()][^bad]                  |
78/// |                     | `0010` | Contains non-lower-ASCII    | [Token::is_lower_case()]                 |
79/// |                     | `0100` | Is a "Dashed Ident"         | [Token::is_dashed_ident()]               |
80/// |                     | `1000` | Contains escape characters  | [Token::contains_escape_chars()]         |
81/// | [Kind::AtKeyword]   | `0001` | Error/Recovery token        | [Token::is_bad()][^bad]                  |
82/// |                     | `0010` | Contains non-lower-ASCII    | [Token::is_lower_case()]                 |
83/// |                     | `0100` | Is a "Dashed Ident"         | [Token::is_dashed_ident()]               |
84/// |                     | `1000` | Contains escape characters  | [Token::contains_escape_chars()]         |
85/// | [Kind::Hash]        | `0001` | Error/Recovery token        | [Token::is_bad()][^bad]                  |
86/// |                     | `0010` | Contains non-lower-ASCII    | [Token::is_lower_case()]                 |
87/// |                     | `0100` | First character is ASCII    | [Token::hash_is_id_like()]               |
88/// |                     | `1000` | Contains escape characters  | [Token::contains_escape_chars()]         |
89/// | [Kind::Url]         | `0001` | Error/Recovery token        | [Token::is_bad()][^bad]                  |
90/// |                     | `0010` | Has a closing paren )       | [Token::url_has_closing_paren()]         |
91/// |                     | `0100` | Contains whitespace after ( | [Token::url_has_leading_space()]         |
92/// |                     | `1000` | Contains escape characters  | [Token::contains_escape_chars()]         |
93/// | [Kind::UnicodeRange]| `0001` | Error/Recovery token        | [Token::is_bad()][^bad]                  |
94/// |                     | `0010` | (Reserved)                  | --                                       |
95/// |                     | `0100` | (Reserved)                  | --                                       |
96/// |                     | `1000` | (Reserved)                  | --                                       |
97/// | [Kind::CdcOrCdo]    | `0001` | Error/Recovery token        | [Token::is_bad()][^bad]                  |
98/// |                     | `0010` | Is CDO (`000` would be CDC) | [Token::is_cdc()]                        |
99/// |                     | `0100` | (Reserved)                  | --                                       |
100/// |                     | `1000` | (Reserved)                  | --                                       |
101/// | [Kind::Whitespace]  | `0001` | Error/Recovery token        | [Token::is_bad()][^bad]                  |
102/// |                     | `???0` | Whitespace style            | [Token::whitespace_style()][^whitespace] |
103/// | [Kind::Delim]       | `0001` | Error/Recovery token        | [Token::is_bad()][^bad]                  |
104/// |                     | `???0` | Associate whitespace rules  | [Token::associated_whitespace()][^delim] |
105/// | [Kind::Comment]     | `0001` | Error/Recovery token        | [Token::is_bad()][^bad]                  |
106/// |                     | `???0` | (Special)                   | [Token::comment_style()][^comments]      |
107///
108/// [^bad]: All tokens use the 4th bit to denote if this token is a "Erorr/Recovery Token". These tokens are not going
109/// to be emitted by the lexer (except in the case of BadString & BadUrl), but a Parser can set this flag on a token to
110/// help differentiate between tokens emitted by the lexer and tokens that were either emitted by the lexer but in an
111/// unexpected position, or tokens _constructed_ by the parser in order to aid in recovering the Parser into a state to
112/// resume.
113/// [^quotes]: Strings do not have a [bool] returning method for whether or not the quote is using double or single
114/// quotes, instead the [Token::quote_style()] method will returning the [QuoteStyle] enum for better readability.
115/// [^whitespace]: Whitespace tokens to not have a [bool] returning method, instead [Token::whitespace_style()] will
116/// return the [Whitespace] enum for improved readability.
117/// [^comments]: Rather than using the 3 bits as a bit-mask, Comment tokens use the data to store the [CommentStyle]
118/// enum, which is capable of representing 8 discrete comment styles.
119/// [^delim]: Delims can be used in interesting ways inside of CSS syntax. At higher levels CSS is _sometimes_
120/// whitespace sensitive, for example the whitespace inside of a CSS selector _sometimes_ represents the descendant
121/// combinator, meanwhile delimiters inside calc() are sensitive to whitespace collapse (`calc(1px + 1px)` is valid
122/// while `calc(1px+1px)` is a parse error). Further to this, introducing whitespace (say through a formatter) might
123/// break in interesting ways due to some combinations of Delims & Idents - for example Pseudo Classes like `:hover`,
124/// or CSS like languages such as SASS using `$var` style syntax. While `:hover` and `$var` are comprised of two tokens
125/// they're considered one conceptual unit. Having a way to express these relationships at the token level can be useful
126/// for other low level machinery such as formatters/minifiers, rather than introducing complex state at higher levels.
127/// For these reasons, Delim tokens have the ability to express their whitespace association. The lexer will always
128/// produce a token with empty whitespace rules, but parsers can replace this token with a more complex set of rules.
129///
130/// ## K = Kind Bits
131///
132/// The `K` value - upper-most bits 4-9 stores the 5-bit [Kind].
133///
134/// ## VD = Value Data
135///
136/// The `VD` value - the lower-most 24-bits - stores data depending on the [Token] [Kind]. For most kinds this data is
137/// reserved (just 0s). The value data cannot be interrogated manually, but it packs in additional data about the
138/// underlying string to make the string easier to parse without doing the same lookups that the tokenizer already had
139/// to - such as determining lengths of the various parts of the token, or packing values so that consulting the string
140/// can be avoided (which keeps cache-lines hot).
141///
142/// Below describes the special kinds which use the Value Data to store yet more information about the token...
143///
144/// ### Value Data for [Kind::Ident], [Kind::Function], [Kind::AtKeyword]
145///
146/// If the [Kind] is [Kind::Ident], [Kind::Function], or [Kind::AtKeyword] then Value Data represents the Ident's "Atom
147/// Data". When lexing one of these tokens the Lexer will pass the string slice to [DynAtomSet][crate::DynAtomSet] and
148/// set this bits accordingly. This allows implementations to provide a [DynAtomSet][crate::DynAtomSet] of interned
149/// strings to improve performance of string comparisons. The `ATOM_DYNAMIC_BIT` can be used to dynamically intern
150/// strings during runtime (this behaviour is abstracted by [DynAtomRegistry][crate::DynAtomRegistry]). This 24-bits
151/// allows for ~16MM unique strings, but with the `ATOM_DYNAMIC_BIT` this becomes ~8MM static atoms and ~8MM dynamic
152/// atoms (very unlikely CSS will ever reach even 10k predefined keywords, and most CSS files will have less than 1000
153/// unique strings).
154///
155/// ### Value Data for [Kind::Number]
156///
157/// If the [Kind] is [Kind::Number], Value Data represents the length of that number (this means the parser is
158/// restricted from representing numbers longer than 16,777,216 characters which is probably an acceptable limit). Note
159/// that this does not affect the _value_ of a number, just the characters in a string. Numbers in CSS are [f32]. The
160/// vast majority of [f32s][f32] can be represented in 16MM characters, but it's possible to author a document that
161/// contains a set of numeric characters longer than 16MM code points. These scenarios are considered [undefined
162/// behaviour][1].
163///
164/// [4]: https://en.wikipedia.org/wiki/Undefined_behavior
165///
166/// ### Value Data for [Kind::Hash]
167///
168/// If the [Kind] is [Kind::Hash], Value Data represents the length of that hash (this means the parser is restricted
169/// from representing IDs and hex codes longer than 16,777,216 characters which is probably an acceptable limit). Note
170/// that this restriction means that ID selectors have a much tigher limit than other tokens, such as strings or
171/// idents, but it's very unlikely to see a 16million character ID in CSS (String, maybe).
172///
173/// ### Value Data for [Kind::Url]
174///
175/// If the [Kind] is [Kind::Url], Value Data represents the "leading length" and "trailing length" of the URL. This
176/// means the value data is split into two 12 bit numbers:
177///
178/// ```md
179/// |--------------|--------------|
180/// | LL           | TL           |
181/// | 000000000000 | 000000000000 |
182/// |--------------|--------------|
183/// | 12---------- | 12---------- |
184/// ```
185///
186/// The "leading" length represents the `url(` part of the token. Typically this will be `4`, however it's possible
187/// (for legacy compatibility reasons within CSS) to add whitespace between the opening parenthesis and the URL value.
188/// It's also possible to escape the `url` ident portion. This means `\75\52\6c(   ` is also a valid leading section of
189/// a URL ident (which has a character length of 13), as is `\000075 \000052 \00006c (   ` (28 characters). 12 bits
190/// allows for a maximum character length of 4,096. It is not possible to represent a URL token's leading section using
191/// 4,096 characters so there is some headroom (wasted bytes) here.
192///
193/// The "trailing" length represents the `)` part of the token. Typically this will be `1`, however it's possible to
194/// add any number of whitespace characters between the end of the URL and the closing parenthesis. If a CSS document
195/// contains more than 4095 whitespace characters then this is considered [undefined behaviour][4].
196///
197/// ### Value Data for [Kind::Dimension]
198///
199/// If K is a Dimension, then this represents both the number of characters in the numeric portion of the dimension
200/// and the length of the ident portion of the dimension... or the dimension unit itself (more on that below). This
201/// means the value data is split into two 12 bit numbers:
202///
203/// ```md
204/// |--------------|--------------|
205/// | NL           | DUL          |
206/// | 000000000000 | 000000000000 |
207/// |--------------|--------------|
208/// | 12---------- | 12---------- |
209///
210/// |--------------|-------| --------|
211/// | NL           | KDUL  | KNOWN   |
212/// | 000000000000 | 00000 | 0000000 |
213/// |--------------|-------| --------|
214/// | 12---------- | 5---- | 7------ |
215/// ```
216///
217/// The NL portion - the numeric length - represents the length of characters the number contains. This means the
218/// numeric portion of a dimension can only be 4,096 characters long. This is dramatically shorter than the 16MM
219/// allowed for numbers but it's still also incredibly generous such that it's highly unlikely to ever be hit unless
220/// someone is intentionally trying to break the parser. The [Lexer][super::Lexer] encountering a dimension with a
221/// numeric portion longer than 4,096 characters is considered [undefined behaviour][4].
222///
223/// The DUL portion (if `TF & 100 == 0`) will represent the length of characters the ident portion of the dimension
224/// (aka the dimension unit) contains. This means the ident portion of a dimension can only be 4,096 characters long.
225/// For practical purposes CSS has a fixed set of dimensions - the longest of which (at the time of writing) are 5
226/// characters long (e.g. `svmax`). Through the use of escaping shenanigans it's possible to create a valid CSS
227/// dimension longer than 5 characters though (every ident can be made 8 times longer by using escape characters, e.g.
228/// `1svmax` at 6 characters can be instead written as `1\000073 \000076 \00006d \000061 \000078` at 40 characters). In
229/// addition to these factors, it's worth pointing out that there is scope for further dimensions and some [proposals
230/// for "custom" dimensions][5], and lastly this library is designed for CSS _and CSS-alike_ languages, which may
231/// invent their own dimension units. In other words being too restrictive on dimension ident length could be costly
232/// in the future, therefore 4,096 characters seems like a reasonable, if generous, trade-off.
233///
234/// There's a giant caveat here though. If `TF & 1000 != 0`, then the dimension is considered "known" and DUL will be
235/// encoded differently. Instead of just containing the dimension unit length, which requires consulting the underlying
236/// `&str` to get the actual dimension, it will be used to store an Atom - but only the first 7 bits (the KNOWN
237/// portion), which for an Atom must be a Dimension atom (an assummption made on anything that implements
238/// [AtomSet][crate::AtomSet] is that all dimension units should be stored in the byte values of 1-127, so that they
239/// can be encoded in this space). Dimension units _can_ be escape encoded, and so the underlying character data may
240/// differ from the unescaped unit length, as such 5-bit KDUL portion represents character data length, in other words
241/// `KNOWN.len()` may not always equal KDUL`.
242///
243/// [5]: https://github.com/w3c/csswg-drafts/issues/7379
244///
245/// ## Value
246///
247/// The `Value` portion of [Token] represents the length of the token for most token kinds. However, for some tokens
248/// their length is already packed into the first u32. So it would make more sense to use this u32 to store more
249/// interesting data.
250///
251/// ## Value for [Kind::Delim] and single character tokens
252///
253/// [Kind::Delim] and single-character tokens (i.e. [Kind::Colon]->[Kind::RightCurly]) typically have a length of `1`
254/// ([Kind::Delim] can have a varied length for surrogate pairs). Instead of storing the length and wasting a whole
255/// [u32], this region stores the [char]. Calling [Token::char()] will return an [Option] which will always be [Some]
256/// for [Kind::Delim] and single-character tokens.
257///
258/// ## Value for [Kind::Hash]
259///
260/// The length of a hash is stored in its `VD` portion, leaving 32bits to storing other data. It just so happens that
261/// a 8-character hex code (#ffaabbcc) fits nicely inside of 32-bits. During tokenization we can eagerly parse the hex
262/// code and stuff it here, so it can be more easily reasoned about in upstream code (rather than
263/// reading the character data).
264///
265/// ## Value for [Kind::Number] and [Kind::Dimension]
266///
267/// As these tokens store their length data in the `VD` portion, this [u32] instead stores the _value_ of the number,
268/// stored as [f32::to_bits()].
269///
270/// ## Value data for other tokens.
271///
272/// In all other cases, this represents the length of the token as utf-8 bytes. This means the token length is
273/// 4,294,967,296 aka ~4GB. This sounds very long but also CSS can host very large image data and browsers will
274/// accomodate very large URLs. [An mdn article on Data URLs][6] claims that Firefox supports 32mb Data URLs, Chrome
275/// supports over 512mb, and Safari over 2gb. The reality is that if someone has such a large data URL in their CSS
276/// they probably should split it out, but we have a whole 32 bits to store the length so we may as well use it...
277///
278/// [6]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs#common_problems
279#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
280pub struct Token(u32, u32);
281
282impl Default for Token {
283	fn default() -> Self {
284		Self((Kind::Whitespace as u32) << 24, 0)
285	}
286}
287
288const KIND_MASK: u32 = !((1 << 24) - 1);
289const LENGTH_MASK: u32 = (1 << 24) - 1;
290const HALF_LENGTH_MASK: u32 = !((1 << 12) - 1);
291
292/// The bit position used to distinguish between static and dynamic atoms.
293/// - Static atoms have this bit = 0 (values 0 to 8,388,607)
294/// - Dynamic atoms have this bit = 1 (values 8,388,608 to 16,777,215)
295///
296/// This allows atoms to use the full 24-bit space available in token data.
297#[allow(dead_code)] // Used in dyn_atom_registry module
298pub(crate) const ATOM_DYNAMIC_BIT: u32 = 23;
299
300impl Token {
301	/// Represents an empty token.
302	pub const EMPTY: Token = Token::new_whitespace(Whitespace::none(), 0);
303
304	/// Represents an EOF token.
305	pub const EOF: Token = Token(0b0, 0);
306
307	/// Represents a CDO (`<!--`) token.
308	pub const CDO: Token = Token(((Kind::CdcOrCdo as u32) << 24) & KIND_MASK, 4);
309
310	/// Represents a CDC (`-->`) token.
311	pub const CDC: Token = Token((((Kind::CdcOrCdo as u32) | 0b001_00000) << 24) & KIND_MASK, 3);
312
313	/// Represents a single ' ' space token.
314	pub const SPACE: Token = Token::new_whitespace(Whitespace::Space, 1);
315
316	/// Represents a single Tab token.
317	pub const TAB: Token = Token::new_whitespace(Whitespace::Tab, 1);
318
319	/// Represents a single `\n` token.
320	pub const NEWLINE: Token = Token::new_whitespace(Whitespace::Newline, 1);
321
322	/// Represents the Number `0`. This is not equal to other representations of zero, such as `00`, `0e0`, `0.0` and so
323	/// on.
324	pub const NUMBER_ZERO: Token = Token((((Kind::Number as u32) | 0b100_00000) << 24) & KIND_MASK, 1);
325
326	/// Represents the `:` token.
327	pub const COLON: Token = Token::new_delim(':');
328
329	/// Represents the `;` token.
330	pub const SEMICOLON: Token = Token::new_delim(';');
331
332	/// Represents the `,` token.
333	pub const COMMA: Token = Token::new_delim(',');
334
335	/// Represents the `[` token.
336	pub const LEFT_SQUARE: Token = Token::new_delim('[');
337
338	/// Represents the `]` token.
339	pub const RIGHT_SQUARE: Token = Token::new_delim(']');
340
341	/// Represents the `(` token.
342	pub const LEFT_PAREN: Token = Token::new_delim('(');
343
344	/// Represents the `)` token.
345	pub const RIGHT_PAREN: Token = Token::new_delim(')');
346
347	/// Represents the `{` token.
348	pub const LEFT_CURLY: Token = Token::new_delim('{');
349
350	/// Represents the `}` token.
351	pub const RIGHT_CURLY: Token = Token::new_delim('}');
352
353	/// Represents a `!` [Kind::Delim] token.
354	pub const BANG: Token = Token::new_delim('!');
355
356	/// Represents a `#` [Kind::Delim] token.
357	pub const HASH: Token = Token::new_delim('#');
358
359	/// Represents a `$` [Kind::Delim] token.
360	pub const DOLLAR: Token = Token::new_delim('$');
361
362	/// Represents a `%` [Kind::Delim] token - not to be confused with the `%` dimension.
363	pub const PERCENT: Token = Token::new_delim('%');
364
365	/// Represents a `&` [Kind::Delim] token.
366	pub const AMPERSAND: Token = Token::new_delim('&');
367
368	/// Represents a `*` [Kind::Delim] token.
369	pub const ASTERISK: Token = Token::new_delim('*');
370
371	/// Represents a `+` [Kind::Delim] token.
372	pub const PLUS: Token = Token::new_delim('+');
373
374	/// Represents a `-` [Kind::Delim] token.
375	pub const DASH: Token = Token::new_delim('-');
376
377	/// Represents a `.` [Kind::Delim] token.
378	pub const PERIOD: Token = Token::new_delim('.');
379
380	/// Represents a `/` [Kind::Delim] token.
381	pub const SLASH: Token = Token::new_delim('/');
382
383	/// Represents a `<` [Kind::Delim] token.
384	pub const LESS_THAN: Token = Token::new_delim('<');
385
386	/// Represents a `=` [Kind::Delim] token.
387	pub const EQUALS: Token = Token::new_delim('=');
388
389	/// Represents a `>` [Kind::Delim] token.
390	pub const GREATER_THAN: Token = Token::new_delim('>');
391
392	/// Represents a `?` [Kind::Delim] token.
393	pub const QUESTION: Token = Token::new_delim('?');
394
395	/// Represents a `@` [Kind::Delim] token. Not to be confused with the @keyword token.
396	pub const AT: Token = Token::new_delim('@');
397
398	/// Represents a `\\` [Kind::Delim] token.
399	pub const BACKSLASH: Token = Token::new_delim('\\');
400
401	/// Represents a `^` [Kind::Delim] token.
402	pub const CARET: Token = Token::new_delim('^');
403
404	/// Represents a `_` [Kind::Delim] token.
405	pub const UNDERSCORE: Token = Token::new_delim('_');
406
407	/// Represents a `\`` [Kind::Delim] token.
408	pub const BACKTICK: Token = Token::new_delim('\'');
409
410	/// Represents a `|` [Kind::Delim] token.
411	pub const PIPE: Token = Token::new_delim('|');
412
413	/// Represents a `~` [Kind::Delim] token.
414	pub const TILDE: Token = Token::new_delim('~');
415
416	/// Represents a replacement character [Kind::Delim] token.
417	pub const REPLACEMENT_CHARACTER: Token = Token::new_delim(REPLACEMENT_CHARACTER);
418
419	/// Creates a "Dummy" token with no additional data, just the [Kind].
420	#[inline]
421	pub const fn dummy(kind: Kind) -> Self {
422		Self((kind as u32) << 24, 0).with_bad_flag()
423	}
424
425	/// Creates a "Dummy" token with no additional data, just [Kind::Ident].
426	#[inline]
427	pub const fn dummy_ident() -> Self {
428		Self((Kind::Ident as u32) << 24, 0).with_bad_flag()
429	}
430
431	/// Creates a [Kind::Whitesapce] token.
432	#[inline]
433	pub(crate) const fn new_whitespace(style: Whitespace, len: u32) -> Self {
434		let flags: u32 = Kind::Whitespace as u32 | ((style.to_bits() as u32) << 5);
435		Self((flags << 24) & KIND_MASK, len)
436	}
437
438	/// Creates a [Kind::Comment] token.
439	#[inline]
440	pub(crate) fn new_comment(style: CommentStyle, len: u32) -> Self {
441		let flags: u32 = Kind::Comment as u32 | ((style as u32) << 5);
442		Self((flags << 24) & KIND_MASK, len)
443	}
444
445	/// Creates a [Kind::Number] token.
446	#[inline]
447	pub(crate) fn new_number(is_float: bool, has_sign: bool, len: u32, value: f32) -> Self {
448		let flags: u32 = Kind::Number as u32 | ((is_float as u32) << 5) | ((has_sign as u32) << 6);
449		Self((flags << 24) & KIND_MASK | (len & LENGTH_MASK), value.to_bits())
450	}
451
452	/// Creates a new [Kind::Dimension] token.
453	#[inline]
454	pub(crate) fn new_dimension(
455		is_float: bool,
456		has_sign: bool,
457		num_len: u32,
458		unit_len: u32,
459		value: f32,
460		atom: u8,
461	) -> Self {
462		debug_assert!(num_len <= 4097);
463		let num_len = (num_len << 12) & HALF_LENGTH_MASK;
464		let is_known_unit = if unit_len < 32 { ((atom != 0) as u32) << 7 } else { 0 };
465		let unit_len = if is_known_unit == 0 { unit_len } else { unit_len << 7 | (atom as u32 & 0b1111111) };
466		let flags: u32 = Kind::Dimension as u32 | is_known_unit | ((is_float as u32) << 5) | ((has_sign as u32) << 6);
467		Self(((flags << 24) & KIND_MASK) | ((num_len | unit_len) & LENGTH_MASK), value.to_bits())
468	}
469
470	/// Creates a new [Kind::BadString] token. Bad Strings are like String tokens but during lexing they failed to fully tokenize
471	/// into a proper string token, usually due to containing newline characters.
472	#[inline]
473	pub(crate) fn new_bad_string(len: u32) -> Self {
474		Self(((Kind::BadString as u32) << 24) & KIND_MASK, len)
475	}
476
477	/// Creates a new [Kind::BadUrl] token. Bad URLs are like URL tokens but during lexing they failed to fully tokenize into a
478	/// proper URL token, usually due to containing newline characters.
479	#[inline]
480	pub(crate) fn new_bad_url(len: u32) -> Self {
481		Self(((Kind::BadUrl as u32) << 24) & KIND_MASK, len)
482	}
483
484	/// Creates a new [Kind::Ident] token.
485	#[inline]
486	pub(crate) fn new_ident(
487		contains_non_lower_ascii: bool,
488		dashed: bool,
489		contains_escape: bool,
490		atom: u32,
491		len: u32,
492	) -> Self {
493		let flags: u32 = Kind::Ident as u32
494			| ((contains_non_lower_ascii as u32) << 5)
495			| ((dashed as u32) << 6)
496			| ((contains_escape as u32) << 7);
497		debug_assert!(atom & LENGTH_MASK == atom);
498		Self((flags << 24) & KIND_MASK | atom, len)
499	}
500
501	/// Creates a new [Kind::Function] token.
502	#[inline]
503	pub(crate) fn new_function(
504		contains_non_lower_ascii: bool,
505		dashed: bool,
506		contains_escape: bool,
507		atom: u32,
508		len: u32,
509	) -> Self {
510		let flags: u32 = Kind::Function as u32
511			| ((contains_non_lower_ascii as u32) << 5)
512			| ((dashed as u32) << 6)
513			| ((contains_escape as u32) << 7);
514		debug_assert!(atom & LENGTH_MASK == atom);
515		Self((flags << 24) & KIND_MASK | atom, len)
516	}
517
518	/// Creates a new [Kind::AtKeyword] token.
519	#[inline]
520	pub(crate) fn new_atkeyword(
521		contains_non_lower_ascii: bool,
522		dashed: bool,
523		contains_escape: bool,
524		atom: u32,
525		len: u32,
526	) -> Self {
527		let flags: u32 = Kind::AtKeyword as u32
528			| ((contains_non_lower_ascii as u32) << 5)
529			| ((dashed as u32) << 6)
530			| ((contains_escape as u32) << 7);
531		debug_assert!(atom & LENGTH_MASK == atom);
532		Self((flags << 24) & KIND_MASK | atom, len)
533	}
534
535	/// Creates a new [Kind::Hash] token.
536	#[inline]
537	pub(crate) fn new_hash(
538		contains_non_lower_ascii: bool,
539		first_is_ascii: bool,
540		contains_escape: bool,
541		len: u32,
542		hex_value: u32,
543	) -> Self {
544		let flags: u32 = Kind::Hash as u32
545			| ((contains_non_lower_ascii as u32) << 5)
546			| ((first_is_ascii as u32) << 6)
547			| ((contains_escape as u32) << 7);
548		debug_assert!(len < (1 << 24));
549		Self((flags << 24) & KIND_MASK | (len & LENGTH_MASK), hex_value)
550	}
551
552	/// Creates a new [Kind::String] token.
553	#[inline]
554	pub(crate) fn new_string(quotes: QuoteStyle, has_close_quote: bool, contains_escape: bool, len: u32) -> Self {
555		debug_assert!(quotes != QuoteStyle::None);
556		let quotes = if quotes == QuoteStyle::Double { 0b001_00000 } else { 0b0 };
557		let flags: u32 =
558			Kind::String as u32 | quotes | ((has_close_quote as u32) << 6) | ((contains_escape as u32) << 7);
559		Self((flags << 24) & KIND_MASK, len)
560	}
561
562	/// Creates a new [Kind::Url] token.
563	#[inline]
564	pub(crate) fn new_url(
565		ends_with_paren: bool,
566		contains_whitespace_after_open_paren: bool,
567		contains_escape: bool,
568		leading_length: u32,
569		trailing_length: u32,
570		len: u32,
571	) -> Self {
572		let leading_length = (leading_length << 12) & HALF_LENGTH_MASK;
573		let flags: u32 = Kind::Url as u32
574			| ((ends_with_paren as u32) << 5)
575			| ((contains_whitespace_after_open_paren as u32) << 6)
576			| ((contains_escape as u32) << 7);
577		Self((flags << 24) & KIND_MASK | ((leading_length | trailing_length) & LENGTH_MASK), len)
578	}
579
580	/// Creates a new [Kind::UnicodeRange] token.
581	#[inline]
582	pub(crate) fn new_unicode_range(start: u32, end: u32, len: u32) -> Self {
583		debug_assert!(start <= 0xFFFFFF);
584		debug_assert!(end <= 0xFFFFFF);
585		debug_assert!(len <= 255);
586		let flags: u32 = Kind::UnicodeRange as u32;
587		Self((flags << 24) & KIND_MASK | (start & LENGTH_MASK), (len << 24) | (end & LENGTH_MASK))
588	}
589
590	/// If the [Token] is [Kind::UnicodeRange], returns the start value of the range.
591	/// This value can be up to 0xFFFFFF (6 hex digits).
592	///
593	/// Asserts: The token is [Kind::UnicodeRange].
594	#[inline]
595	pub const fn unicode_range_start(&self) -> u32 {
596		debug_assert!(self.kind_bits() == Kind::UnicodeRange as u8);
597		self.0 & LENGTH_MASK
598	}
599
600	/// If the [Token] is [Kind::UnicodeRange], returns the end value of the range.
601	/// This value can be up to 0xFFFFFF (6 hex digits).
602	///
603	/// Asserts: The token is [Kind::UnicodeRange].
604	#[inline]
605	pub const fn unicode_range_end(&self) -> u32 {
606		debug_assert!(self.kind_bits() == Kind::UnicodeRange as u8);
607		self.1 & LENGTH_MASK
608	}
609
610	/// Creates a new [Kind::Delim] token.
611	#[inline]
612	pub(crate) const fn new_delim(char: char) -> Self {
613		let flags: u32 = Kind::Delim as u32;
614		Self((flags << 24) & KIND_MASK, char as u32)
615	}
616
617	/// Creates a new [Kind::Delim] token with associated whitespace.
618	#[inline]
619	pub(crate) const fn new_delim_with_associated_whitespace(char: char, rules: AssociatedWhitespaceRules) -> Self {
620		let flags: u32 = Kind::Delim as u32 | ((rules.to_bits() as u32) << 5);
621		Self((flags << 24) & KIND_MASK, char as u32)
622	}
623
624	/// \[private\]
625	/// Creates a new Token with an interned string.
626	#[inline]
627	pub fn new_interned(kind: Kind, bits: u32, len: u32) -> Token {
628		debug_assert!(kind == KindSet::IDENT_LIKE);
629		debug_assert!(bits & LENGTH_MASK == bits);
630		debug_assert!(len > 0);
631		Self(((kind as u32) << 24) & KIND_MASK | (bits & LENGTH_MASK), len + ((kind != Kind::Ident) as u32))
632	}
633
634	/// Returns the raw bits representing the [Kind].
635	#[inline(always)]
636	pub(crate) const fn kind_bits(&self) -> u8 {
637		(self.0 >> 24 & 0b1_1111) as u8
638	}
639
640	/// Returns the [Kind].
641	#[inline]
642	pub const fn kind(&self) -> Kind {
643		let kind_bits = if self.kind_bits() & 0b1111 == Kind::Delim as u8 {
644			let c = self.char().unwrap() as usize;
645			if c < 127 { SINGLE_CHAR_KINDS[c] as u8 } else { Kind::Delim as u8 }
646		} else {
647			self.kind_bits()
648		};
649		Kind::from_bits(if self.is_bad() { kind_bits | 0b1_0000 } else { kind_bits })
650	}
651
652	/// Check if the TF upper-most bit is set.
653	#[inline(always)]
654	const fn first_flag(&self) -> bool {
655		self.0 >> 31 == 1
656	}
657
658	/// Check if the TF second-upper-most bit is set.
659	#[inline(always)]
660	const fn second_flag(&self) -> bool {
661		self.0 >> 30 & 0b1 == 1
662	}
663
664	/// Check if the TF third-upper-most bit is set.
665	#[inline(always)]
666	const fn third_flag(&self) -> bool {
667		self.0 >> 29 & 0b1 == 1
668	}
669
670	/// Check if the [Kind] is "Ident Like", i.e. it is [Kind::Ident], [Kind::AtKeyword], [Kind::Function], [Kind::Hash].
671	#[inline(always)]
672	pub(crate) const fn is_ident_like(&self) -> bool {
673		self.kind_bits() & 0b1100 == 0b1000
674	}
675
676	/// Check if the [Kind] is "Delim Like", i.e. it is [Kind::Delim], [Kind::Colon], [Kind::Semicolon], [Kind::Comma],
677	/// [Kind::LeftSquare], [Kind::RightSquare], [Kind::LeftParen], [Kind::RightParen], [Kind::LeftCurly],
678	/// [Kind::RightCurly].
679	#[inline(always)]
680	pub(crate) const fn is_delim_like(&self) -> bool {
681		self.kind_bits() & 0b1111 == Kind::Delim as u8
682	}
683
684	/// The only token with an empty length is EOF, but this method is available for symmetry with `len()`.
685	#[inline]
686	pub const fn is_empty(&self) -> bool {
687		self.kind_bits() == Kind::Eof as u8
688	}
689
690	/// Returns the amount of characters (utf-8 code points) this Token represents in the underlying source text.
691	#[inline]
692	pub const fn len(&self) -> u32 {
693		if self.kind_bits() == Kind::Eof as u8 {
694			0
695		} else if self.is_delim_like() {
696			debug_assert!(matches!(
697				self.kind(),
698				Kind::Delim
699					| Kind::Colon | Kind::Semicolon
700					| Kind::Comma | Kind::LeftSquare
701					| Kind::RightSquare
702					| Kind::LeftParen
703					| Kind::RightParen
704					| Kind::LeftCurly
705					| Kind::RightCurly
706			));
707			self.char().unwrap().len_utf8() as u32
708		} else if self.kind_bits() & 0b1111 == Kind::Number as u8 {
709			self.numeric_len()
710		} else if self.kind_bits() & 0b1111 == Kind::Dimension as u8 {
711			if self.first_flag() {
712				self.numeric_len() + (self.0 >> 7 & 0b11111)
713			} else {
714				((self.0 & LENGTH_MASK) >> 12) + (self.0 & !HALF_LENGTH_MASK)
715			}
716		} else if self.kind_bits() & 0b1111 == Kind::Hash as u8 {
717			self.0 & LENGTH_MASK
718		} else if self.kind_bits() == Kind::UnicodeRange as u8 {
719			self.1 >> 24
720		} else {
721			self.1
722		}
723	}
724
725	/// If the [Kind] is "Delim Like" (i.e. it is [Kind::Delim], [Kind::Colon], [Kind::Semicolon], [Kind::Comma],
726	/// [Kind::LeftSquare], [Kind::RightSquare], [Kind::LeftParen], [Kind::RightParen], [Kind::LeftCurly],
727	/// [Kind::RightCurly]) then this will return a [Some] with a [char] representing the value.
728	/// For non-delim-like tokens this will return [None].
729	pub const fn char(&self) -> Option<char> {
730		if self.is_delim_like() {
731			return char::from_u32(self.1);
732		}
733		None
734	}
735
736	/// The [Token] is a [Kind::Dimension] or [Kind::Number] and is an integer - i.e. it has no `.`.
737	#[inline]
738	pub const fn is_int(&self) -> bool {
739		self.kind_bits() & 0b1110 == 0b0100 && !self.third_flag()
740	}
741
742	/// The [Token] is a [Kind::Dimension] or [Kind::Number] and is a float - i.e. it has decimal places. This will be
743	/// `true` even if the decimal places are 0. e.g. `0.0`.
744	#[inline]
745	pub const fn is_float(&self) -> bool {
746		self.kind_bits() & 0b1100 == 0b0100 && self.third_flag()
747	}
748
749	/// The [Token] is a [Kind::Dimension] or [Kind::Number] and the underlying character data included a `-` or `+`
750	/// character. Note that a positive number may not necessarily have a sign, e.g. `3` will return false, while `+3`
751	/// will return `true`.
752	#[inline]
753	pub const fn has_sign(&self) -> bool {
754		self.kind_bits() & 0b1100 == 0b0100 && self.second_flag()
755	}
756
757	/// The [Token] is a [Kind::Number] and the `+` sign is semantically required and should be preserved during
758	/// minification. This is used for numbers in `an+b` syntax (e.g., `:nth-child(+5)`) where the `+` sign
759	/// distinguishes the value from other syntactic forms.
760	///
761	/// Asserts: the `kind()` is [Kind::Number].
762	#[inline]
763	pub const fn sign_is_required(&self) -> bool {
764		debug_assert!(self.kind_bits() == Kind::Number as u8);
765		self.first_flag()
766	}
767
768	/// Returns a new [Token] with the `sign_is_required` flag set. This indicates that the `+` sign
769	/// should be preserved during minification (e.g., for `an+b` syntax).
770	///
771	/// Asserts: the `kind()` is [Kind::Number].
772	#[inline]
773	pub const fn with_sign_required(self) -> Token {
774		debug_assert!(self.kind_bits() == Kind::Number as u8);
775		Token(self.0 | (1 << 31), self.1)
776	}
777
778	/// If the [Token] is a [Kind::Dimension] or [Kind::Number] then this returns the amount of characters used to
779	/// represent this number in the underlying source text. Numbers may be inefficiently encoded in the source text,
780	/// e.g. `0.0000`.
781	///
782	/// Asserts: the `kind()` is [Kind::Dimension] or [Kind::Number].
783	#[inline]
784	pub const fn numeric_len(&self) -> u32 {
785		debug_assert!(matches!(self.kind(), Kind::Number | Kind::Dimension | Kind::BadNumber | Kind::BadDimension));
786		if self.kind_bits() & 0b1111 == Kind::Dimension as u8 {
787			(self.0 & LENGTH_MASK) >> 12
788		} else {
789			self.0 & LENGTH_MASK
790		}
791	}
792
793	/// If the [Token] is a [Kind::Dimension] or [Kind::Number] then this returns the [f32] representation of the number's
794	/// value.
795	///
796	/// Asserts: the `kind()` is [Kind::Dimension] or [Kind::Number].
797	#[inline]
798	pub fn value(&self) -> f32 {
799		debug_assert!(matches!(self.kind(), Kind::Number | Kind::Dimension));
800		f32::from_bits(self.1)
801	}
802
803	/// Returns the [Whitespace].
804	///
805	/// If the [Token] is not a [Kind::Whitespace] this will return [Whitespace::none()].
806	#[inline]
807	pub fn whitespace_style(&self) -> Whitespace {
808		if self.kind_bits() == Kind::Whitespace as u8 {
809			Whitespace::from_bits((self.0 >> 29) as u8)
810		} else {
811			Whitespace::none()
812		}
813	}
814
815	/// Returns the [AssociatedWhitespaceRules].
816	///
817	/// If the [Kind] is not "Delim Like" (i.e. it is not [Kind::Delim], [Kind::Colon], [Kind::Semicolon], [Kind::Comma],
818	/// [Kind::LeftSquare], [Kind::RightSquare], [Kind::LeftParen], [Kind::RightParen], [Kind::LeftCurly],
819	/// [Kind::RightCurly]) then this will always return `AssociatedWhitespaceRules::none()`.
820	#[inline]
821	pub fn associated_whitespace(&self) -> AssociatedWhitespaceRules {
822		if self.is_delim_like() {
823			AssociatedWhitespaceRules::from_bits((self.0 >> 29) as u8)
824		} else {
825			AssociatedWhitespaceRules::none()
826		}
827	}
828
829	/// Returns a new [Token] with the [AssociatedWhitespaceRules] set to the given [AssociatedWhitespaceRules],
830	/// if possible.
831	///
832	/// If the [Kind] is not "Delim Like" (i.e. it is not [Kind::Delim], [Kind::Colon], [Kind::Semicolon], [Kind::Comma],
833	/// [Kind::LeftSquare], [Kind::RightSquare], [Kind::LeftParen], [Kind::RightParen], [Kind::LeftCurly],
834	/// [Kind::RightCurly]) then this will return the same [Token].
835	/// If the [AssociatedWhitespaceRules] is different it will return a new [Token].
836	#[inline]
837	pub fn with_associated_whitespace(&self, rules: AssociatedWhitespaceRules) -> Token {
838		if !self.is_delim_like() {
839			return *self;
840		}
841		Token::new_delim_with_associated_whitespace(self.char().unwrap(), rules)
842	}
843
844	/// Returns the [CommentStyle].
845	///
846	/// If the [Token] is not a [Kind::Comment] this will return [None].
847	#[inline]
848	pub fn comment_style(&self) -> Option<CommentStyle> {
849		if self.kind_bits() == Kind::Comment as u8 { CommentStyle::from_bits((self.0 >> 29) as u8) } else { None }
850	}
851
852	/// Returns the [QuoteStyle].
853	///
854	/// If the [Token] is not a [Kind::String] this will return [QuoteStyle::None].
855	#[inline]
856	pub fn quote_style(&self) -> QuoteStyle {
857		if self.kind_bits() == Kind::String as u8 {
858			if self.third_flag() {
859				return QuoteStyle::Double;
860			} else {
861				return QuoteStyle::Single;
862			}
863		}
864		QuoteStyle::None
865	}
866
867	/// Returns a new [Token] with the [QuoteStyle] set to the given [QuoteStyle], if possible.
868	///
869	/// If the [Token] is not a [Kind::String], or the [QuoteStyle] is already the given [QuoteStyle] this will return the same [Token].
870	/// If the [QuoteStyle] is different it will return a new [Token].
871	/// [QuoteStyle] must not be [QuoteStyle::None]
872	#[inline]
873	pub fn with_quotes(&self, quote_style: QuoteStyle) -> Token {
874		debug_assert!(quote_style != QuoteStyle::None);
875		if self.kind_bits() != Kind::String as u8 || quote_style == self.quote_style() {
876			return *self;
877		}
878		Token::new_string(quote_style, self.has_close_quote(), self.contains_escape_chars(), self.len())
879	}
880
881	/// If the [Token] is a [Kind::String] this checks if the string ended in a close quote.
882	/// It is possible to have a valid String token that does not end in a close quote, by eliding the quote at the end of
883	/// a file.
884	///
885	/// Asserts: The [Kind] is [Kind::String].
886	#[inline]
887	pub const fn has_close_quote(&self) -> bool {
888		debug_assert!(self.kind_bits() == Kind::String as u8);
889		self.second_flag()
890	}
891
892	/// Checks if it is possible for the [Token] to contain escape characters. Numbers, for example, cannot. Idents can.
893	#[inline]
894	pub const fn can_escape(&self) -> bool {
895		self.kind_bits() == Kind::String as u8
896			|| self.kind_bits() == Kind::Url as u8
897			|| self.kind_bits() == Kind::Dimension as u8
898			|| self.is_ident_like()
899	}
900
901	/// If the [Token] can escape, checks if the underlying source text contained escape characters.
902	///
903	/// Asserts: The token can escape ([Token::can_escape()]).
904	#[inline]
905	pub const fn contains_escape_chars(&self) -> bool {
906		if self.kind_bits() == Kind::Dimension as u8 {
907			// Always assume Dimension contains escape because we have other fast paths to handle dimension units
908			return true;
909		}
910		self.can_escape() && self.first_flag()
911	}
912
913	/// If the [Token] is Ident like, checks if the first two code points are HYPHEN-MINUS (`-`).
914	///
915	/// Asserts: The token is "ident like", i.e. it is [Kind::Ident], [Kind::AtKeyword], [Kind::Function], [Kind::Hash].
916	#[inline]
917	pub const fn is_dashed_ident(&self) -> bool {
918		debug_assert!(self.is_ident_like());
919		self.second_flag()
920	}
921
922	/// Checks if the [Token] is Ident like and none of the characters are ASCII upper-case.
923	#[inline]
924	pub const fn is_lower_case(&self) -> bool {
925		self.is_ident_like() && !self.third_flag()
926	}
927
928	#[inline]
929	pub fn atom_bits(&self) -> u32 {
930		if self.kind_bits() & 0b1111 == Kind::Dimension as u8 && self.first_flag() {
931			self.0 & 0b111_1111
932		} else if self.is_ident_like() && self.kind_bits() & 0b1111 != Kind::Hash as u8 {
933			self.0 & LENGTH_MASK
934		} else {
935			0
936		}
937	}
938
939	/// Checks if the [Token] is Trivia-like, that is [Kind::Comment], [Kind::Whitespace], [Kind::Eof]
940	#[inline]
941	pub const fn is_trivia(&self) -> bool {
942		self.kind_bits() & 0b000011 == self.kind_bits()
943	}
944
945	/// If the [Token] is [Kind::Url], checks if there are leading Whitespace characters before the inner value.
946	///
947	/// Asserts: The token is [Kind::Url].
948	#[inline]
949	pub const fn url_has_leading_space(&self) -> bool {
950		debug_assert!(self.kind_bits() == Kind::Url as u8);
951		self.second_flag()
952	}
953
954	/// If the [Token] is [Kind::Url], checks if the closing parenthesis is present.
955	///
956	/// Asserts: The token is [Kind::Url].
957	#[inline]
958	pub const fn url_has_closing_paren(&self) -> bool {
959		debug_assert!(self.kind_bits() == Kind::Url as u8);
960		self.third_flag()
961	}
962
963	/// If the [Token] is [Kind::Hash], checks if the Hash is "ID-like" (i.e its first character is ASCII).
964	///
965	/// Asserts: The token is [Kind::Hash].
966	#[inline]
967	pub const fn hash_is_id_like(&self) -> bool {
968		debug_assert!(self.kind_bits() == Kind::Hash as u8);
969		self.second_flag()
970	}
971
972	/// Checks if the [Token] is [Kind::BadString] or [Kind::BadUrl], or the "bad flag" has been set.
973	#[inline]
974	pub const fn is_bad(&self) -> bool {
975		self.kind_bits() & 0b1_0000 == 0b1_0000
976	}
977
978	/// Returns a new token with the bad/recovery flag set.
979	/// This is used by the parser to mark tokens as problematic during error recovery.
980	#[inline]
981	pub const fn with_bad_flag(&self) -> Self {
982		Self(self.0 | 1 << 28, self.1)
983	}
984
985	/// Checks if the [Token] is [Kind::CdcOrCdo] and is the CDC variant of that token.
986	#[inline]
987	pub const fn is_cdc(&self) -> bool {
988		self.kind_bits() == (Kind::CdcOrCdo as u8) && self.third_flag()
989	}
990
991	/// Some tokens may have a "leading" part:
992	///  - [Kind::AtKeyword] always starts with a `@`,
993	///  - [Kind::Hash] with a `#`.
994	///  - [Kind::String] with a `"` or `'`.
995	///  - [Kind::Comment] with a leading `/*` (or `//`).
996	///  - [Kind::Dimension] has a leading numeric portion.
997	///  - [Kind::Url] has the leading `url(` ident (which may vary in exact representation).
998	///
999	/// This function returns the length of that, irrespective of the [Kind]. For other kinds not listed, this will return
1000	/// `0`, but for the above kinds it will calculate the leading length. This is useful for parsing out the underlying
1001	/// data which is likely to be of greater use.
1002	pub fn leading_len(&self) -> u32 {
1003		match self.kind() {
1004			Kind::AtKeyword | Kind::Hash | Kind::String | Kind::BadAtKeyword | Kind::BadHash | Kind::BadString => 1,
1005			Kind::Dimension | Kind::BadDimension => self.numeric_len(),
1006			Kind::Comment | Kind::BadComment => 2,
1007			Kind::Url | Kind::BadUrl => (self.0 & LENGTH_MASK) >> 12,
1008			_ => 0,
1009		}
1010	}
1011
1012	/// Some tokens may have a "trailing" part:
1013	///  - [Kind::Function] will always have an opening `(`.
1014	///  - [Kind::String] may have a closing `"` or `'`.
1015	///  - [Kind::Comment] may have a closing `*/`
1016	///  - [Kind::Url] may have a clsoing `)`.
1017	///
1018	/// This function returns the length of that, irrespective of the [Kind]. For other kinds not listed, this will return
1019	/// `0`, but for the above kinds it will calculate the leading length. This is useful for parsing out the underlying
1020	/// data which is likely to be of greater use.
1021	pub fn trailing_len(&self) -> u32 {
1022		match self.kind() {
1023			Kind::Function | Kind::BadFunction => 1,
1024			Kind::String | Kind::BadString => self.has_close_quote() as u32,
1025			Kind::Comment | Kind::BadComment if self.comment_style().unwrap().is_block() => 2,
1026			Kind::Url | Kind::BadUrl => self.0 & !HALF_LENGTH_MASK,
1027			_ => 0,
1028		}
1029	}
1030
1031	/// Certain kinds have a [PairWise] equivalent:
1032	///  - [Kind::LeftParen] has [Kind::RightParen]
1033	///  - [Kind::LeftCurly] has [Kind::RightCurly]
1034	///  - [Kind::LeftSquare] has [Kind::RightSquare]
1035	///
1036	/// This function returns the [PairWise] enum, if the [Token] is one of the above listed [Kinds][Kind]. For any other
1037	/// [Kind] this returns [None].
1038	#[inline]
1039	pub fn to_pairwise(&self) -> Option<PairWise> {
1040		PairWise::from_token(self)
1041	}
1042
1043	/// A convenience function for `Cursor::new(offset, token)`.
1044	#[inline(always)]
1045	pub fn with_cursor(self, offset: SourceOffset) -> Cursor {
1046		Cursor::new(offset, self)
1047	}
1048
1049	/// If the [Kind] is [Kind::Hash] then this token may have had the opportunity to be parsed as a `<hex-value>` (e.g.
1050	/// `#fff`). When this happens the character data is parsed during tokenization into a u32 which stores the
1051	/// RR,GG,BB,AA values.
1052	#[inline(always)]
1053	pub fn hex_value(self) -> u32 {
1054		if self == Kind::Hash { self.1 } else { 0 }
1055	}
1056
1057	/// If this [Token] is preceded by the [Token] `other` then a separating token (e.g. a comment) will need to be
1058	/// inserted between these the two tokens during serialization, in order for them to be able to be re-tokenized as
1059	/// the same tokens. For example an Ident ("a") adjacent to an Ident ("b"), if serialized without whitespace, would
1060	/// create a single Ident ("ab"). The rules for estbalishing whether or not these tokens needs whitespace are quite
1061	/// simple and are effectively [defined in the serialization section of the spec][1]. To reproduce the table:
1062	///
1063	/// [1]: https://drafts.csswg.org/css-syntax/#serialization
1064	///
1065	/// |            | ident | function | url | bad url | - | number | percentage | dimension | CDC | ( | * | % |
1066	/// |:-----------|:-----:|:--------:|:---:|:-------:|:-:|:------:|:----------:|:---------:|:---:|:-:|:-:|:-:|
1067	/// | ident      |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |    ✗   |      ✗     |     ✗     |  ✗  | ✗ |   |   |
1068	/// | at-keyword |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |    ✗   |      ✗     |     ✗     |  ✗  |   |   |   |
1069	/// | hash       |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |    ✗   |      ✗     |     ✗     |  ✗  |   |   |   |
1070	/// | dimension  |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |    ✗   |      ✗     |     ✗     |  ✗  |   |   |   |
1071	/// | #          |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |    ✗   |      ✗     |     ✗     |  ✗  |   |   |   |
1072	/// | \-         |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |    ✗   |      ✗     |     ✗     |  ✗  |   |   |   |
1073	/// | number     |   ✗   |    ✗     |  ✗  |    ✗    |   |    ✗   |      ✗     |     ✗     |  ✗  |   |   | ✗ |
1074	/// | @          |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |        |            |           |  ✗  |   |   |   |
1075	/// | .          |       |          |     |         |   |    ✗   |      ✗     |     ✗     |     |   |   |   |
1076	/// | +          |       |          |     |         |   |    ✗   |      ✗     |     ✗     |     |   |   |   |
1077	/// | /          |       |          |     |         |   |        |            |           |     |   | ✗ |   |
1078	///
1079	/// The one exception not in this table is that two consecutive `/` characters should also be separated by spaces in
1080	/// order to avoid abmiguities with CSS-alike languages that treat two consecutive `/` characters as a single line
1081	/// comment.
1082	///
1083	/// # Example
1084	///
1085	/// ```
1086	/// use css_lexer::*;
1087	/// let mut lexer = Lexer::new(&EmptyAtomSet::ATOMS, "10 %");
1088	/// let first = lexer.advance();
1089	/// let _ = lexer.advance(); // Whitespace
1090	/// let second = lexer.advance();
1091	/// assert!(first.needs_separator_for(second));
1092	/// ```
1093	pub fn needs_separator_for(&self, second: Token) -> bool {
1094		if second == AssociatedWhitespaceRules::EnforceBefore && *self != Kind::Whitespace
1095			|| *self == AssociatedWhitespaceRules::EnforceAfter && second != Kind::Whitespace
1096		{
1097			// We need whitespace after, unless the next token is actually whitespace.
1098			return true;
1099		}
1100		if *self == AssociatedWhitespaceRules::BanAfter {
1101			return false;
1102		}
1103		match self.kind() {
1104			Kind::Ident => {
1105				(matches!(second.kind(), Kind::Number | Kind::Dimension) &&
1106					// numbers with a `-` need separating, but with `+` they do not.
1107					(!second.has_sign() || second.value() < 0.0))
1108					|| matches!(second.kind(), Kind::Ident | Kind::Function | Kind::Url | Kind::BadUrl)
1109					|| matches!(second.char(), Some('(' | '-'))
1110					|| second.is_cdc()
1111			}
1112			Kind::AtKeyword | Kind::Hash | Kind::Dimension => {
1113				(matches!(second.kind(), Kind::Number | Kind::Dimension) &&
1114					// numbers with a `-` need separating, but with `+` they do not.
1115					(!second.has_sign() || second.value() < 0.0))
1116					|| matches!(second.kind(), Kind::Ident | Kind::Function | Kind::Url | Kind::BadUrl)
1117					|| matches!(second.char(), Some('-'))
1118					|| second.is_cdc()
1119			}
1120			Kind::Number => {
1121				matches!(
1122					second.kind(),
1123					Kind::Ident | Kind::Function | Kind::Url | Kind::BadUrl | Kind::Number | Kind::Dimension
1124				) || matches!(second.char(), Some('%'))
1125					|| second.is_cdc()
1126			}
1127			_ => match self.char() {
1128				Some('#') => {
1129					matches!(
1130						second.kind(),
1131						Kind::Ident | Kind::Function | Kind::Url | Kind::BadUrl | Kind::Number | Kind::Dimension
1132					) || matches!(second.char(), Some('-'))
1133						|| second.is_cdc()
1134				}
1135				Some('-') => {
1136					matches!(
1137						second.kind(),
1138						Kind::Ident | Kind::Function | Kind::Url | Kind::BadUrl | Kind::Number | Kind::Dimension
1139					) || matches!(second.char(), Some('-'))
1140						|| second.is_cdc()
1141				}
1142				Some('@') => {
1143					matches!(second.kind(), Kind::Ident | Kind::Function | Kind::Url | Kind::BadUrl)
1144						|| matches!(second.char(), Some('-'))
1145						|| second.is_cdc()
1146				}
1147				Some('.') => matches!(second.kind(), Kind::Number | Kind::Dimension),
1148				Some('+') => matches!(second.kind(), Kind::Number | Kind::Dimension),
1149				Some('/') => matches!(second.char(), Some('*' | '/')),
1150				_ => false,
1151			},
1152		}
1153	}
1154
1155	pub fn to_bits(&self) -> u64 {
1156		(self.0 as u64) << 32 | self.1 as u64
1157	}
1158}
1159
1160impl core::fmt::Debug for Token {
1161	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1162		let mut d = f.debug_struct(format!("Token::{}", self.kind().as_str()).as_str());
1163		match self.kind() {
1164			Kind::Eof => &mut d,
1165			Kind::Number => d.field("value", &self.value()).field("len", &self.numeric_len()),
1166			Kind::Dimension => {
1167				d.field("value", &self.value()).field("len", &self.numeric_len()).field("dimension_len", &self.len())
1168			}
1169			_ if self.is_delim_like() => {
1170				d.field("char", &self.char().unwrap()).field("len", &self.len());
1171				if !self.associated_whitespace().is_none() {
1172					d.field("associated_whitespace", &self.associated_whitespace());
1173				}
1174				&mut d
1175			}
1176			Kind::String => d
1177				.field("quote_style", &if self.first_flag() { "Double" } else { "Single" })
1178				.field("has_close_quote", &self.second_flag())
1179				.field("contains_escape_chars", &self.third_flag())
1180				.field("len", &self.len()),
1181			Kind::Ident | Kind::Function | Kind::AtKeyword => d
1182				.field("is_lower_case", &self.first_flag())
1183				.field("is_dashed_ident", &self.second_flag())
1184				.field("contains_escape_chars", &self.third_flag())
1185				.field("len", &self.len()),
1186			Kind::Hash => d
1187				.field("is_lower_case", &self.first_flag())
1188				.field("hash_is_id_like", &self.second_flag())
1189				.field("contains_escape_chars", &self.third_flag())
1190				.field("len", &self.len()),
1191			Kind::Url => d
1192				.field("url_has_closing_paren", &self.first_flag())
1193				.field("url_has_leading_space", &self.second_flag())
1194				.field("contains_escape_chars", &self.third_flag())
1195				.field("len", &self.len()),
1196			Kind::UnicodeRange => d
1197				.field("start", &format_args!("U+{:X}", self.unicode_range_start()))
1198				.field("end", &format_args!("U+{:X}", self.unicode_range_end()))
1199				.field("len", &self.len()),
1200			Kind::CdcOrCdo => d.field("is_cdc", &self.first_flag()).field("len", &self.len()),
1201			Kind::Whitespace => d.field("contains", &self.whitespace_style()).field("len", &self.len()),
1202			_ => d
1203				.field("flag_0", &self.first_flag())
1204				.field("flag_1", &self.second_flag())
1205				.field("flag_2", &self.third_flag())
1206				.field("len", &self.len()),
1207		}
1208		.finish()
1209	}
1210}
1211
1212impl std::fmt::Display for Token {
1213	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1214		match self.kind() {
1215			Kind::Delim => write!(f, "Delim({})", self.char().unwrap()),
1216			k => write!(f, "{}", k.as_str()),
1217		}
1218	}
1219}
1220
1221#[cfg(feature = "serde")]
1222impl serde::ser::Serialize for Token {
1223	fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1224	where
1225		S: serde::ser::Serializer,
1226	{
1227		use serde::ser::SerializeStruct;
1228		if *self == Self::EMPTY {
1229			return serializer.serialize_none();
1230		}
1231		let mut state = serializer.serialize_struct("Token", 3)?;
1232		state.serialize_field("kind", self.kind().as_str())?;
1233		state.serialize_field("len", &self.len())?;
1234		state.end()
1235	}
1236}
1237
1238impl From<Token> for Kind {
1239	fn from(token: Token) -> Self {
1240		token.kind()
1241	}
1242}
1243
1244impl PartialEq<Kind> for Token {
1245	fn eq(&self, other: &Kind) -> bool {
1246		self.kind() == *other
1247	}
1248}
1249
1250impl From<Token> for KindSet {
1251	fn from(token: Token) -> Self {
1252		KindSet::new(&[token.kind()])
1253	}
1254}
1255
1256impl PartialEq<KindSet> for Token {
1257	fn eq(&self, other: &KindSet) -> bool {
1258		other.contains(self.kind())
1259	}
1260}
1261
1262impl From<Token> for QuoteStyle {
1263	fn from(token: Token) -> Self {
1264		token.quote_style()
1265	}
1266}
1267
1268impl PartialEq<QuoteStyle> for Token {
1269	fn eq(&self, other: &QuoteStyle) -> bool {
1270		&self.quote_style() == other
1271	}
1272}
1273
1274impl From<Token> for Whitespace {
1275	fn from(token: Token) -> Self {
1276		token.whitespace_style()
1277	}
1278}
1279
1280impl PartialEq<Whitespace> for Token {
1281	fn eq(&self, other: &Whitespace) -> bool {
1282		self.whitespace_style().intersects(*other)
1283	}
1284}
1285
1286impl PartialEq<AssociatedWhitespaceRules> for Token {
1287	fn eq(&self, other: &AssociatedWhitespaceRules) -> bool {
1288		self.associated_whitespace().intersects(*other)
1289	}
1290}
1291
1292impl PartialEq<CommentStyle> for Token {
1293	fn eq(&self, other: &CommentStyle) -> bool {
1294		self.comment_style().map(|style| &style == other).unwrap_or(false)
1295	}
1296}
1297
1298impl PartialEq<PairWise> for Token {
1299	fn eq(&self, other: &PairWise) -> bool {
1300		self.to_pairwise().map(|style| &style == other).unwrap_or(false)
1301	}
1302}
1303
1304impl PartialEq<char> for Token {
1305	fn eq(&self, other: &char) -> bool {
1306		self.char().map(|char| char == *other).unwrap_or(false)
1307	}
1308}
1309
1310#[test]
1311fn size_test() {
1312	assert_eq!(::std::mem::size_of::<Token>(), 8);
1313}
1314
1315#[test]
1316fn test_new_whitespace() {
1317	assert_eq!(Token::SPACE, Kind::Whitespace);
1318	assert_eq!(Token::SPACE, Whitespace::Space);
1319	assert_eq!(Token::TAB, Kind::Whitespace);
1320	assert_eq!(Token::TAB, Whitespace::Tab);
1321	assert_eq!(Token::NEWLINE, Kind::Whitespace);
1322	assert_eq!(Token::NEWLINE, Whitespace::Newline);
1323	assert_eq!(Token::new_whitespace(Whitespace::Space, 4), Kind::Whitespace);
1324	assert_eq!(Token::new_whitespace(Whitespace::Space | Whitespace::Newline, 4), Whitespace::Space);
1325	assert_eq!(Token::new_whitespace(Whitespace::Space, 4).len(), 4);
1326	assert_eq!(Token::new_whitespace(Whitespace::Tab | Whitespace::Space, 4), Whitespace::Tab);
1327	assert_eq!(Token::new_whitespace(Whitespace::Newline, 4), Whitespace::Newline);
1328	assert_eq!(Token::new_whitespace(Whitespace::Newline, 4).len(), 4);
1329}
1330
1331#[test]
1332fn test_new_comment() {
1333	assert_eq!(Token::new_comment(CommentStyle::Block, 4), Kind::Comment);
1334	assert_eq!(Token::new_comment(CommentStyle::Block, 4), CommentStyle::Block);
1335	assert_eq!(Token::new_comment(CommentStyle::Single, 4), CommentStyle::Single);
1336}
1337
1338#[test]
1339fn test_new_number() {
1340	assert_eq!(Token::new_number(false, false, 3, 4.2), Kind::Number);
1341	assert_eq!(Token::new_number(false, false, 3, 4.2).value(), 4.2);
1342	assert_eq!(Token::new_number(false, false, 3, 4.2).len(), 3);
1343	assert_eq!(Token::new_number(false, true, 9, 4.2), Kind::Number);
1344	assert_eq!(Token::new_number(false, true, 9, 4.2).value(), 4.2);
1345	assert_eq!(Token::new_number(false, true, 9, 4.2).len(), 9);
1346	assert!(!Token::new_number(false, false, 3, 4.2).has_sign());
1347	assert!(Token::new_number(false, true, 3, 4.2).has_sign());
1348	assert!(!Token::new_number(false, true, 3, 4.0).is_float());
1349	assert!(Token::new_number(true, false, 3, 4.2).is_float());
1350}
1351
1352#[test]
1353fn test_new_string() {
1354	assert_eq!(Token::new_string(QuoteStyle::Single, false, false, 4), Kind::String);
1355	assert_eq!(Token::new_string(QuoteStyle::Single, false, false, 4), QuoteStyle::Single);
1356	assert!(!Token::new_string(QuoteStyle::Single, false, false, 4).has_close_quote());
1357	assert!(!Token::new_string(QuoteStyle::Single, false, false, 4).contains_escape_chars());
1358	assert_eq!(Token::new_string(QuoteStyle::Single, false, false, 4).len(), 4);
1359	assert_eq!(Token::new_string(QuoteStyle::Double, false, false, 4), Kind::String);
1360	assert_eq!(Token::new_string(QuoteStyle::Double, false, false, 4), QuoteStyle::Double);
1361	assert!(Token::new_string(QuoteStyle::Double, true, false, 4).has_close_quote());
1362	assert!(!Token::new_string(QuoteStyle::Double, true, false, 4).contains_escape_chars());
1363	assert_eq!(Token::new_string(QuoteStyle::Double, true, false, 5).len(), 5);
1364	assert!(Token::new_string(QuoteStyle::Double, true, true, 4).contains_escape_chars());
1365	assert!(Token::new_string(QuoteStyle::Double, false, true, 4).contains_escape_chars());
1366}
1367
1368#[test]
1369fn test_new_hash() {
1370	assert_eq!(Token::new_hash(false, false, false, 4, 0), Kind::Hash);
1371	assert!(!Token::new_hash(false, false, false, 4, 0).contains_escape_chars());
1372	assert!(Token::new_hash(false, false, true, 4, 0).contains_escape_chars());
1373	assert!(Token::new_hash(false, false, true, 4, 0).is_lower_case());
1374	assert!(!Token::new_hash(true, false, false, 4, 0).is_lower_case());
1375	assert_eq!(Token::new_hash(true, false, false, 4, 0).len(), 4);
1376	assert_eq!(Token::new_hash(true, false, false, 4, 0).hex_value(), 0);
1377	assert_eq!(Token::new_hash(true, false, false, 4, 18).hex_value(), 18);
1378}
1379
1380#[test]
1381#[should_panic]
1382fn test_new_string_with_quotes_none() {
1383	Token::new_string(QuoteStyle::None, false, true, 4);
1384}
1385
1386#[test]
1387fn test_new_delim() {
1388	assert_eq!(Token::new_delim('>'), Kind::Delim);
1389	assert_eq!(Token::new_delim('>'), '>');
1390	assert_eq!(Token::new_delim('>').len(), 1);
1391	assert_eq!(Token::new_delim('.'), Kind::Delim);
1392	assert_eq!(Token::new_delim('.'), '.');
1393	assert_eq!(Token::new_delim('.').len(), 1);
1394	assert_eq!(Token::new_delim('ℝ'), Kind::Delim);
1395	assert_eq!(Token::new_delim('ℝ'), 'ℝ');
1396	assert_eq!(Token::new_delim('ℝ').len(), 3);
1397	assert_eq!(Token::new_delim('💣'), Kind::Delim);
1398	assert_eq!(Token::new_delim('💣'), '💣');
1399	assert_eq!(Token::new_delim('💣').len(), 4);
1400	assert_eq!(Token::new_delim('💣').len(), 4);
1401	assert_eq!(Token::new_delim('💣').len(), 4);
1402}
1403
1404#[test]
1405fn with_associated_whitespace() {
1406	assert_eq!(
1407		Token::new_delim('>').with_associated_whitespace(
1408			AssociatedWhitespaceRules::EnforceBefore | AssociatedWhitespaceRules::EnforceAfter
1409		),
1410		AssociatedWhitespaceRules::EnforceBefore | AssociatedWhitespaceRules::EnforceBefore
1411	);
1412	assert_eq!(
1413		Token::new_delim('>').with_associated_whitespace(AssociatedWhitespaceRules::BanAfter),
1414		AssociatedWhitespaceRules::BanAfter
1415	);
1416}
1417
1418#[test]
1419fn test_with_quotes() {
1420	assert_eq!(
1421		Token::new_string(QuoteStyle::Single, false, false, 4).with_quotes(QuoteStyle::Double),
1422		Token::new_string(QuoteStyle::Double, false, false, 4)
1423	);
1424	assert_eq!(
1425		Token::new_string(QuoteStyle::Double, true, true, 8).with_quotes(QuoteStyle::Single),
1426		Token::new_string(QuoteStyle::Single, true, true, 8),
1427	);
1428}
1429
1430#[test]
1431#[should_panic]
1432fn test_with_quotes_none() {
1433	Token::new_string(QuoteStyle::Single, false, true, 4).with_quotes(QuoteStyle::None);
1434	Token::new_string(QuoteStyle::Double, false, true, 4).with_quotes(QuoteStyle::None);
1435}
1436
1437#[test]
1438fn test_new_dimension() {
1439	{
1440		let token = Token::new_dimension(false, false, 3, 3, 999.0, 0);
1441		assert_eq!(token, Kind::Dimension);
1442		assert_eq!(token.value(), 999.0);
1443		assert_eq!(token.numeric_len(), 3);
1444		assert_eq!(token.len(), 6);
1445		assert!(!token.is_float());
1446		assert!(!token.has_sign());
1447	}
1448	{
1449		let token = Token::new_dimension(false, false, 5, 2, 8191.0, 0);
1450		assert_eq!(token, Kind::Dimension);
1451		assert_eq!(token.value(), 8191.0);
1452		assert_eq!(token.numeric_len(), 5);
1453		assert_eq!(token.len(), 7);
1454		assert!(!token.is_float());
1455		assert!(!token.has_sign());
1456	}
1457	for i in -8191..8191 {
1458		let token = Token::new_dimension(false, false, 9, 3, i as f32, 0);
1459		assert_eq!(token.value(), i as f32);
1460	}
1461}
1462
1463#[test]
1464fn test_bad_bits() {
1465	let token = Token::new_dimension(false, false, 5, 2, 8191.0, 42);
1466	assert_eq!(token.is_bad(), false);
1467	assert_eq!(token.kind(), Kind::Dimension);
1468	assert_eq!(Kind::from_bits(token.kind_bits()), Kind::Dimension);
1469	assert_eq!(token.is_bad(), false);
1470	assert_eq!(token.len(), 7);
1471	assert_eq!(token.numeric_len(), 5);
1472	let bad_token = token.with_bad_flag();
1473	assert_eq!(bad_token.is_bad(), true);
1474	assert_eq!(bad_token.kind(), Kind::BadDimension);
1475	assert_eq!(Kind::from_bits(bad_token.kind_bits()), Kind::BadDimension);
1476	assert_eq!(bad_token.len(), 7);
1477	assert_eq!(bad_token.numeric_len(), 5);
1478	assert_eq!(bad_token.atom_bits(), 42);
1479
1480	let token = Token::new_delim('(');
1481	assert_eq!(token.is_bad(), false);
1482	let bad_token = token.with_bad_flag();
1483	assert_eq!(token.is_bad(), false);
1484	assert_eq!(bad_token.is_bad(), true);
1485	assert_eq!(bad_token.kind(), Kind::BadLeftParen);
1486
1487	let token = Token::new_delim('[');
1488	assert_eq!(token, Kind::LeftSquare);
1489	assert_eq!(token.with_bad_flag().kind(), Kind::BadLeftSquare);
1490}