css_lexer/
token.rs

1use crate::{
2	AssociatedWhitespaceRules, CommentStyle, Cursor, DimensionUnit, Kind, KindSet, PairWise, QuoteStyle, SourceOffset,
3	Whitespace,
4};
5use std::char::REPLACEMENT_CHARACTER;
6
7/// An abstract representation of the chunk of the source text, retaining certain "facts" about the source.
8///
9/// # Design
10///
11/// The [Token] type is an immutable packing of two [u32s][u32] that represents a unit in the source text, but without
12/// the associated offset data that points to its position in the source text. This is important because it means that
13/// equivalent [Tokens][Token] are equal even in different parts of the document. For the most part a [Token] doesn't
14/// represent data that can be put into a text file because it lacks the underlying character data. It is lossy. For
15/// example a [Token] with [Kind::Ident] just represents _an_ ident, but it doesn't retain what the keyword is).
16/// Storing raw-character data would require either storing tokens on the heap (and therefore they couldn't be [Sized])
17/// or by keeping a reference to `&'a str` which means larger token sizes and lifetime tracking. By _not_ storing
18/// character data we can keep [Token] [Sized] and keep it to `size_of` `8`, avoiding the heap, avoiding
19/// references/lifetimes, and keeping [Token] entirely in the stack. For a lot of tokens this is _fine_ because the
20/// underlying character data isn't that useful past a certain point.
21///
22/// A [Token] retains certain "facts" about the underlying unit of text, though. For example it retains the [Kind], how
23/// many characters the token consumed, and various other pieces of information, depending on the [Kind]. In some
24/// cases, it's entirely possible to represent the full token, including character data, into the available bits (for
25/// example [Kind::Delim] stores its [char], [Kind::Number] stores its [f32]). Taking the time in the tokenizer to
26/// gather these facts and values can keep cache-lines hot, which speeds up subsequent checks in the parser.
27///
28/// If you're familiar with "red green" syntax trees such as [Swiftlang's libsyntax][1], or [Rust-Analyzer's Rowan][2]
29/// or [Roslyn][3] this might be a little familiar in some concepts. However [Token] does not represent a tree, and
30/// relies on resorting back to the string data to find out keyword values.
31///
32/// [1]: https://gh.io/AAtdqpg
33/// [2]: https://gh.io/AAtf8pt
34/// [3]: https://gh.io/AAtab90
35///
36/// This representation of facts, kind, length, or other metadata can be quite complex - so here's a
37/// full breakdown:
38///
39/// # Anatomy of Token
40///
41/// A [Token] is a struct of `(u32, u32)`. The second u32 is _usually_ the token length (hence keeping them separate).
42/// The first [u32], however, is split into 3 (sometimes 5) parts. The two u32s can be thought of like so:
43///
44/// ```md
45///   |-----|-------|--------------------------|---------------------------------|
46///   | TF  | K     | VD                       | Value                           |
47/// 0b| 000 | 00000 | 000000000000000000000000 | 0000000000000000000000000000000 |
48///   |-----|-------|--------------------------|---------------------------------|
49///   | 3-- | 5---- | 24---------------------- | 32----------------------------- |
50/// ```
51///
52/// ## TF = Type Flags (or "Token Facts")
53///
54/// This represents a bit-mask in the upper-most 3 bits. The flags are general purpose and change meaning depending on
55/// the Token's [Kind]. Each flag generally maps to a method so it's not necessary to remenber the contents of this
56/// table, but it can serve as a useful reference. Note that not all methods return a [bool], so footnotes have been
57/// added to explain these further.
58///
59/// | Kind::             | Flag  | Description                 | Method                                   |
60/// |--------------------|-------|-----------------------------|------------------------------------------|
61/// | [Kind::Number]     | `001` | Floating Point              | [Token::is_float()]                      |
62/// |                    | `010` | Has a "Sign" (-/+)          | [Token::has_sign()]                      |
63/// |                    | `100` | (Reserved)                  | --                                       |
64/// | [Kind::Dimension]  | `001` | Floating Point              | [Token::is_float()]                      |
65/// |                    | `010` | Has a "Sign" (-/+)          | [Token::has_sign()]                      |
66/// |                    | `100` | Unit is a known dimension   | [Token::dimension_unit()][^dimension]    |
67/// | [Kind::String]     | `001` | Uses Double Quotes          | [Token::quote_style()][^quotes]          |
68/// |                    | `010` | Has a closing quote         | [Token::has_close_quote()]               |
69/// |                    | `100` | Contains escape characters  | [Token::contains_escape_chars()]         |
70/// | [Kind::Ident]      | `001` | Contains non-lower-ASCII    | [Token::is_lower_case()]                 |
71/// |                    | `010` | Is a "Dashed Ident"         | [Token::is_dashed_ident()]               |
72/// |                    | `100` | Contains escape characters  | [Token::contains_escape_chars()]         |
73/// | [Kind::Function]   | `001` | Contains non-lower-ASCII    | [Token::is_lower_case()]                 |
74/// |                    | `010` | Is a "Dashed Ident"         | [Token::is_dashed_ident()]               |
75/// |                    | `100` | Contains escape characters  | [Token::contains_escape_chars()]         |
76/// | [Kind::AtKeyword]  | `001` | Contains non-lower-ASCII    | [Token::is_lower_case()]                 |
77/// |                    | `010` | Is a "Dashed Ident"         | [Token::is_dashed_ident()]               |
78/// |                    | `100` | Contains escape characters  | [Token::contains_escape_chars()]         |
79/// | [Kind::Hash]       | `001` | Contains non-lower-ASCII    | [Token::is_lower_case()]                 |
80/// |                    | `010` | First character is ASCII    | [Token::hash_is_id_like()]               |
81/// |                    | `100` | Contains escape characters  | [Token::contains_escape_chars()]         |
82/// | [Kind::Url]        | `001` | Has a closing paren )       | [Token::url_has_closing_paren()]         |
83/// |                    | `010` | Contains whitespace after ( | [Token::url_has_leading_space()]         |
84/// |                    | `100` | Contains escape characters  | [Token::contains_escape_chars()]         |
85/// | [Kind::CdcOrCdo]   | `001` | Is CDO (`000` would be CDC) | [Token::is_cdc()]                        |
86/// |                    | `010` | (Reserved)                  | --                                       |
87/// |                    | `100` | (Reserved)                  | --                                       |
88/// | [Kind::Whitespace] | `---` | Whitespace style            | [Token::whitespace_style()][^whitespace] |
89/// | [Kind::Delim]      | `---` | Associate whitespace rules  | [Token::associated_whitespace()][^delim] |
90/// | [Kind::Comment]    | `---` | (Special)                   | [Token::comment_style()][^comments]      |
91///
92/// [^dimension]: Dimensions do not have a [bool] returning method for whether or not the dimension is known, instead
93/// [Token::dimension_unit()] `==` [DimensionUnit::Unknown] can be consulted.
94/// [^quotes]: Strings do not have a [bool] returning method for whether or not the quote is using double or single
95/// quotes, instead the [Token::quote_style()] method will returning the [QuoteStyle] enum for better readability.
96/// [^whitespace]: Whitespace tokens to not have a [bool] returning method, instead [Token::whitespace_style()] will
97/// return the [Whitespace] enum for improved readability.
98/// [^comments]: Rather than using the 3 bits as a bit-mask, Comment tokens use the data to store the [CommentStyle]
99/// enum, which is capable of representing 8 discrete comment styles.
100/// [^delim]: Delims can be used in interesting ways inside of CSS syntax. At higher levels CSS is _sometimes_
101/// whitespace sensitive, for example the whitespace inside of a CSS selector _sometimes_ represents the descendant
102/// combinator, meanwhile delimiters inside calc() are sensitive to whitespace collapse (`calc(1px + 1px)` is valid
103/// while `calc(1px+1px)` is a parse error). Further to this, introducing whitespace (say through a formatter) might
104/// break in interesting ways due to some combinations of Delims & Idents - for example Pseudo Classes like `:hover`,
105/// or CSS like languages such as SASS using `$var` style syntax. While `:hover` and `$var` are comprised of two tokens
106/// they're considered one conceptual unit. Having a way to express these relationships at the token level can be useful
107/// for other low level machinery such as formatters/minifiers, rather than introducing complex state at higher levels.
108/// For these reasons, Delim tokens have the ability to express their whitespace association. The lexer will always
109/// produce a token with empty whitespace rules, but parsers can replace this token with a more complex set of rules.
110///
111/// ## K = Kind Bits
112///
113/// The `K` value - upper-most bits 4-9 stores the 5-bit [Kind].
114///
115/// ## VD = Value Data
116///
117/// The `VD` value - the lower-most 24-bits - stores data depending on the [Token] [Kind]. For most kinds this data is
118/// reserved (just 0s). The value data cannot be interrogated manually, but it packs in additional data about the
119/// underlying string to make the string easier to parse without doing the same lookups that the tokenizer already had
120/// to - such as determining lengths of the various parts of the token, or packing values so that consulting the string
121/// can be avoided (which keeps cache-lines hot).
122///
123/// Below describes the special kinds which use the Value Data to store yet more information about the token...
124///
125/// ### Value Data for [Kind::Number]
126///
127/// If the [Kind] is [Kind::Number], Value Data represents the length of that number (this means the parser is
128/// restricted from representing numbers longer than 16,777,216 characters which is probably an acceptable limit). Note
129/// that this does not affect the _value_ of a number, just the characters in a string. Numbers in CSS are [f32]. The
130/// vast majority of [f32s][f32] can be represented in 16MM characters, but it's possible to author a document that
131/// contains a set of numeric characters longer than 16MM code points. These scenarios are considered [undefined
132/// behaviour][1].
133///
134/// [4]: https://en.wikipedia.org/wiki/Undefined_behavior
135///
136/// ### Value Data for [Kind::Hash]
137///
138/// If the [Kind] is [Kind::Hash], Value Data represents the length of that hash (this means the parser is restricted
139/// from representing IDs and hex codes longer than 16,777,216 characters which is probably an acceptable limit). Note
140/// that this restriction means that ID selectors have a much tigher limit than other tokens, such as strings or
141/// idents, but it's very unlikely to see a 16million character ID in CSS (String, maybe).
142///
143/// ### Value Data for [Kind::Url]
144///
145/// If the [Kind] is [Kind::Url], Value Data represents the "leading length" and "trailing length" of the URL. This
146/// means the value data is split into two 12 bit numbers:
147///
148/// ```md
149/// |--------------|--------------|
150/// | LL           | TL           |
151/// | 000000000000 | 000000000000 |
152/// |--------------|--------------|
153/// | 12---------- | 12---------- |
154/// ```
155///
156/// The "leading" length represents the `url(` part of the token. Typically this will be `4`, however it's possible
157/// (for legacy compatibility reasons within CSS) to add whitespace between the opening parenthesis and the URL value.
158/// It's also possible to escape the `url` ident portion. This means `\75\52\6c(   ` is also a valid leading section of
159/// a URL ident (which has a character length of 13), as is `\000075 \000052 \00006c (   ` (28 characters). 12 bits
160/// allows for a maximum character length of 4,096. It is not possible to represent a URL token's leading section using
161/// 4,096 characters so there is some headroom (wasted bytes) here.
162///
163/// The "trailing" length represents the `)` part of the token. Typically this will be `1`, however it's possible to
164/// add any number of whitespace characters between the end of the URL and the closing parenthesis. If a CSS document
165/// contains more than 4095 whitespace characters then this is considered [undefined behaviour][4].
166///
167/// ### Value Data for [Kind::Dimension]
168///
169/// If K is a Dimension, then this represents both the number of characters in the numeric portion of the dimension
170/// and the length of the ident portion of the dimension... or the dimension unit itself (more on that below). This
171/// means the value data is split into two 12 bit numbers:
172///
173/// ```md
174/// |--------------|--------------|
175/// | NL           | DUL          |
176/// | 000000000000 | 000000000000 |
177/// |--------------|--------------|
178/// | 12---------- | 12---------- |
179/// ```
180///
181/// The NL portion - the numeric length - represents the length of characters the number contains. This means the
182/// numeric portion of a dimension can only be 4,096 characters long. This is dramatically shorter than the 16MM
183/// allowed for numbers but it's still also incredibly generous such that it's highly unlikely to ever be hit unless
184/// someone is intentionally trying to break the parser. The [Lexer][super::Lexer] encountering a dimension with a
185/// numeric portion longer than 4,096 characters is considered [undefined behaviour][4].
186///
187/// The DUL portion (if `TF & 100 == 0`) will represent the length of characters the ident portion of the dimension
188/// (aka the dimension unit) contains. This means the ident portion of a dimension can only be 4,096 characters long.
189/// For practical purposes CSS has a fixed set of dimensions - the longest of which (at the time of writing) are 5
190/// characters long (e.g. `svmax`). Through the use of escaping shenanigans it's possible to create a valid CSS
191/// dimension longer than 5 characters though (every ident can be made 8 times longer by using escape characters, e.g.
192/// `1svmax` at 6 characters can be instead written as `1\000073 \000076 \00006d \000061 \000078` at 40 characters). In
193/// addition to these factors, it's worth pointing out that there is scope for further dimensions and some [proposals
194/// for "custom" dimensions][5], and lastly this library is designed for CSS _and CSS-alike_ languages, which may
195/// invent their own dimension units. In other words being too restrictive on dimension ident length could be costly
196/// in the future, therefore 4,096 characters seems like a reasonable, if generous, trade-off.
197///
198/// There's a giant caveat here though, and a carve out for parsing CSS as it exists today. If `TF & 100 != 0`, then
199/// the dimension is considered "known" and DUL will be encoded differently. Instead of being the dimension unit
200/// length, which requires consulting the underlying `&str` to get the actual dimension, it will be used to store the
201/// [DimensionUnit] - an enum of known CSS dimensions. In this mode [Token::dimension_unit()] will return a valid
202/// [DimensionUnit] (excluding [DimensionUnit::Unknown]). When it comes to reasoning about dimensions from the
203/// outside, this won't make a significant difference but it does provide a nice performance boost in parser
204/// implementations without slowing down the [Lexer][super::Lexer] by any significant amount. However, if a dimension
205/// unit is escaped in any way it will _not_ be represented as a known [DimensionUnit], due to the variability in the
206/// length encoding which would otherwise be lost if using the enum variant.
207///
208/// [5]: https://github.com/w3c/csswg-drafts/issues/7379
209///
210/// ## Value
211///
212/// The `Value` portion of [Token] represents the length of the token for most token kinds. However, for some tokens
213/// their length is already packed into the first u32. So it would make more sense to use this u32 to store more
214/// interesting data.
215///
216/// ## Value for [Kind::Delim] and single character tokens
217///
218/// [Kind::Delim] and single-character tokens (i.e. [Kind::Colon]->[Kind::RightCurly]) typically have a length of `1`
219/// ([Kind::Delim] can have a varied length for surrogate pairs). Instead of storing the length and wasting a whole
220/// [u32], this region stores the [char]. Calling [Token::char()] will return an [Option] which will always be [Some]
221/// for [Kind::Delim] and single-character tokens.
222///
223/// ## Value for [Kind::Hash]
224///
225/// The length of a hash is stored in its `VD` portion, leaving 32bits to storing other data. It just so happens that
226/// a 8-character hex code (#ffaabbcc) fits nicely inside of 32-bits. During tokenization we can eagerly parse the hex
227/// code and stuff it here, so it can be more easily reasoned about in upstream code (rather than
228/// reading the character data).
229///
230/// ## Value for [Kind::Number] and [Kind::Dimension]
231///
232/// As these tokens store their length data in the `VD` portion, this [u32] instead stores the _value_ of the number,
233/// stored as [f32::to_bits()].
234///
235/// ## Value data for other tokens.
236///
237/// In all other cases, this represents the length of the token as utf-8 bytes. This means the token length is
238/// 4,294,967,296 aka ~4GB. This sounds very long but also CSS can host very large image data and browsers will
239/// accomodate very large URLs. [An mdn article on Data URLs][6] claims that Firefox supports 32mb Data URLs, Chrome
240/// supports over 512mb, and Safari over 2gb. The reality is that if someone has such a large data URL in their CSS
241/// they probably should split it out, but we have a whole 32 bits to store the length so we may as well use it...
242///
243/// [6]: https://developer.mozilla.org/en-US/docs/Web/HTTP/Basics_of_HTTP/Data_URLs#common_problems
244#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
245pub struct Token(u32, u32);
246
247impl Default for Token {
248	fn default() -> Self {
249		Self((Kind::Whitespace as u32) << 24, 0)
250	}
251}
252
253const KIND_MASK: u32 = !((1 << 24) - 1);
254const LENGTH_MASK: u32 = (1 << 24) - 1;
255const HALF_LENGTH_MASK: u32 = !((1 << 12) - 1);
256
257impl Token {
258	/// Represents an empty token.
259	pub const EMPTY: Token = Token::new_whitespace(Whitespace::none(), 0);
260
261	/// Represents an EOF token.
262	pub const EOF: Token = Token(0b0, 0);
263
264	/// Represents a CDO (`<!--`) token.
265	pub const CDO: Token = Token(((Kind::CdcOrCdo as u32) << 24) & KIND_MASK, 4);
266
267	/// Represents a CDC (`-->`) token.
268	pub const CDC: Token = Token((((Kind::CdcOrCdo as u32) | 0b001_00000) << 24) & KIND_MASK, 3);
269
270	/// Represents a single ' ' space token.
271	pub const SPACE: Token = Token::new_whitespace(Whitespace::Space, 1);
272
273	/// Represents a single Tab token.
274	pub const TAB: Token = Token::new_whitespace(Whitespace::Tab, 1);
275
276	/// Represents a single `\n` token.
277	pub const NEWLINE: Token = Token::new_whitespace(Whitespace::Newline, 1);
278
279	/// Represents the Number `0`. This is not equal to other representations of zero, such as `00`, `0e0`, `0.0` and so
280	/// on.
281	pub const NUMBER_ZERO: Token = Token((((Kind::Number as u32) | 0b100_00000) << 24) & KIND_MASK, 1);
282
283	/// Represents the `:` token.
284	pub const COLON: Token = Token::new_delim_kind(Kind::Colon, ':');
285
286	/// Represents the `;` token.
287	pub const SEMICOLON: Token = Token::new_delim_kind(Kind::Semicolon, ';');
288
289	/// Represents the `,` token.
290	pub const COMMA: Token = Token::new_delim_kind(Kind::Comma, ',');
291
292	/// Represents the `[` token.
293	pub const LEFT_SQUARE: Token = Token::new_delim_kind(Kind::LeftSquare, '[');
294
295	/// Represents the `]` token.
296	pub const RIGHT_SQUARE: Token = Token::new_delim_kind(Kind::RightSquare, ']');
297
298	/// Represents the `(` token.
299	pub const LEFT_PAREN: Token = Token::new_delim_kind(Kind::LeftParen, '(');
300
301	/// Represents the `)` token.
302	pub const RIGHT_PAREN: Token = Token::new_delim_kind(Kind::RightParen, ')');
303
304	/// Represents the `{` token.
305	pub const LEFT_CURLY: Token = Token::new_delim_kind(Kind::LeftCurly, '{');
306
307	/// Represents the `}` token.
308	pub const RIGHT_CURLY: Token = Token::new_delim_kind(Kind::RightCurly, '}');
309
310	/// Represents a `!` [Kind::Delim] token.
311	pub const BANG: Token = Token::new_delim('!');
312
313	/// Represents a `#` [Kind::Delim] token.
314	pub const HASH: Token = Token::new_delim('#');
315
316	/// Represents a `$` [Kind::Delim] token.
317	pub const DOLLAR: Token = Token::new_delim('$');
318
319	/// Represents a `%` [Kind::Delim] token - not to be confused with the `%` dimension.
320	pub const PERCENT: Token = Token::new_delim('%');
321
322	/// Represents a `&` [Kind::Delim] token.
323	pub const AMPERSAND: Token = Token::new_delim('&');
324
325	/// Represents a `*` [Kind::Delim] token.
326	pub const ASTERISK: Token = Token::new_delim('*');
327
328	/// Represents a `+` [Kind::Delim] token.
329	pub const PLUS: Token = Token::new_delim('+');
330
331	/// Represents a `-` [Kind::Delim] token.
332	pub const DASH: Token = Token::new_delim('-');
333
334	/// Represents a `.` [Kind::Delim] token.
335	pub const PERIOD: Token = Token::new_delim('.');
336
337	/// Represents a `/` [Kind::Delim] token.
338	pub const SLASH: Token = Token::new_delim('/');
339
340	/// Represents a `<` [Kind::Delim] token.
341	pub const LESS_THAN: Token = Token::new_delim('<');
342
343	/// Represents a `=` [Kind::Delim] token.
344	pub const EQUALS: Token = Token::new_delim('=');
345
346	/// Represents a `>` [Kind::Delim] token.
347	pub const GREATER_THAN: Token = Token::new_delim('>');
348
349	/// Represents a `?` [Kind::Delim] token.
350	pub const QUESTION: Token = Token::new_delim('?');
351
352	/// Represents a `@` [Kind::Delim] token. Not to be confused with the @keyword token.
353	pub const AT: Token = Token::new_delim('@');
354
355	/// Represents a `\\` [Kind::Delim] token.
356	pub const BACKSLASH: Token = Token::new_delim('\\');
357
358	/// Represents a `^` [Kind::Delim] token.
359	pub const CARET: Token = Token::new_delim('^');
360
361	/// Represents a `_` [Kind::Delim] token.
362	pub const UNDERSCORE: Token = Token::new_delim('_');
363
364	/// Represents a `\`` [Kind::Delim] token.
365	pub const BACKTICK: Token = Token::new_delim('\'');
366
367	/// Represents a `|` [Kind::Delim] token.
368	pub const PIPE: Token = Token::new_delim('|');
369
370	/// Represents a `~` [Kind::Delim] token.
371	pub const TILDE: Token = Token::new_delim('~');
372
373	/// Represents a replacement character [Kind::Delim] token.
374	pub const REPLACEMENT_CHARACTER: Token = Token::new_delim(REPLACEMENT_CHARACTER);
375
376	/// Creates a "Dummy" token with no additional data, just the [Kind].
377	#[inline]
378	pub const fn dummy(kind: Kind) -> Self {
379		Self((kind as u32) << 24, 0)
380	}
381
382	/// Creates a "Dummy" token with no additional data, just [Kind::Ident].
383	#[inline]
384	pub const fn dummy_ident() -> Self {
385		Self((Kind::Ident as u32) << 24, 0)
386	}
387
388	/// Creates a [Kind::Whitesapce] token.
389	#[inline]
390	pub(crate) const fn new_whitespace(style: Whitespace, len: u32) -> Self {
391		let flags: u32 = Kind::Whitespace as u32 | ((style.to_bits() as u32) << 5);
392		Self((flags << 24) & KIND_MASK, len)
393	}
394
395	/// Creates a [Kind::Comment] token.
396	#[inline]
397	pub(crate) fn new_comment(style: CommentStyle, len: u32) -> Self {
398		let flags: u32 = Kind::Comment as u32 | ((style as u32) << 5);
399		Self((flags << 24) & KIND_MASK, len)
400	}
401
402	/// Creates a [Kind::Number] token.
403	#[inline]
404	pub(crate) fn new_number(is_float: bool, has_sign: bool, len: u32, value: f32) -> Self {
405		let flags: u32 = Kind::Number as u32 | ((is_float as u32) << 5) | ((has_sign as u32) << 6);
406		Self((flags << 24) & KIND_MASK | (len & LENGTH_MASK), value.to_bits())
407	}
408
409	/// Creates a new [Kind::Dimension] token.
410	#[inline]
411	pub(crate) fn new_dimension(
412		is_float: bool,
413		has_sign: bool,
414		num_len: u32,
415		unit_len: u32,
416		value: f32,
417		unit: DimensionUnit,
418	) -> Self {
419		debug_assert!(num_len <= 4097);
420		let num_len = (num_len << 12) & HALF_LENGTH_MASK;
421		let (is_known_unit, known_or_len) =
422			if unit == DimensionUnit::Unknown { (0, unit_len) } else { (0b100_00000, unit as u32) };
423		let flags: u32 = Kind::Dimension as u32 | is_known_unit | ((is_float as u32) << 5) | ((has_sign as u32) << 6);
424		Self(((flags << 24) & KIND_MASK) | ((num_len | known_or_len) & LENGTH_MASK), value.to_bits())
425	}
426
427	/// Creates a new [Kind::BadString] token. Bad Strings are like String tokens but during lexing they failed to fully tokenize
428	/// into a proper string token, usually due to containing newline characters.
429	#[inline]
430	pub(crate) fn new_bad_string(len: u32) -> Self {
431		Self(((Kind::BadString as u32) << 24) & KIND_MASK, len)
432	}
433
434	/// Creates a new [Kind::BadUrl] token. Bad URLs are like URL tokens but during lexing they failed to fully tokenize into a
435	/// proper URL token, usually due to containing newline characters.
436	#[inline]
437	pub(crate) fn new_bad_url(len: u32) -> Self {
438		Self(((Kind::BadUrl as u32) << 24) & KIND_MASK, len)
439	}
440
441	/// Creates a new [Kind::Ident] token.
442	#[inline]
443	pub(crate) fn new_ident(contains_non_lower_ascii: bool, dashed: bool, contains_escape: bool, len: u32) -> Self {
444		let flags: u32 = Kind::Ident as u32
445			| ((contains_non_lower_ascii as u32) << 5)
446			| ((dashed as u32) << 6)
447			| ((contains_escape as u32) << 7);
448		Self((flags << 24) & KIND_MASK, len)
449	}
450
451	/// Creates a new [Kind::Function] token.
452	#[inline]
453	pub(crate) fn new_function(contains_non_lower_ascii: bool, dashed: bool, contains_escape: bool, len: u32) -> Self {
454		let flags: u32 = Kind::Function as u32
455			| ((contains_non_lower_ascii as u32) << 5)
456			| ((dashed as u32) << 6)
457			| ((contains_escape as u32) << 7);
458		Self((flags << 24) & KIND_MASK, len)
459	}
460
461	/// Creates a new [Kind::AtKeyword] token.
462	#[inline]
463	pub(crate) fn new_atkeyword(contains_non_lower_ascii: bool, dashed: bool, contains_escape: bool, len: u32) -> Self {
464		let flags: u32 = Kind::AtKeyword as u32
465			| ((contains_non_lower_ascii as u32) << 5)
466			| ((dashed as u32) << 6)
467			| ((contains_escape as u32) << 7);
468		Self((flags << 24) & KIND_MASK, len)
469	}
470
471	/// Creates a new [Kind::Hash] token.
472	#[inline]
473	pub(crate) fn new_hash(
474		contains_non_lower_ascii: bool,
475		first_is_ascii: bool,
476		contains_escape: bool,
477		len: u32,
478		hex_value: u32,
479	) -> Self {
480		let flags: u32 = Kind::Hash as u32
481			| ((contains_non_lower_ascii as u32) << 5)
482			| ((first_is_ascii as u32) << 6)
483			| ((contains_escape as u32) << 7);
484		debug_assert!(len < (1 << 24));
485		Self((flags << 24) & KIND_MASK | (len & LENGTH_MASK), hex_value)
486	}
487
488	/// Creates a new [Kind::String] token.
489	#[inline]
490	pub(crate) fn new_string(quotes: QuoteStyle, has_close_quote: bool, contains_escape: bool, len: u32) -> Self {
491		debug_assert!(quotes != QuoteStyle::None);
492		let quotes = if quotes == QuoteStyle::Double { 0b001_00000 } else { 0b0 };
493		let flags: u32 =
494			Kind::String as u32 | quotes | ((has_close_quote as u32) << 6) | ((contains_escape as u32) << 7);
495		Self((flags << 24) & KIND_MASK, len)
496	}
497
498	/// Creates a new [Kind::Url] token.
499	#[inline]
500	pub(crate) fn new_url(
501		ends_with_paren: bool,
502		contains_whitespace_after_open_paren: bool,
503		contains_escape: bool,
504		leading_length: u32,
505		trailing_length: u32,
506		len: u32,
507	) -> Self {
508		let leading_length = (leading_length << 12) & HALF_LENGTH_MASK;
509		let flags: u32 = Kind::Url as u32
510			| ((ends_with_paren as u32) << 5)
511			| ((contains_whitespace_after_open_paren as u32) << 6)
512			| ((contains_escape as u32) << 7);
513		Self((flags << 24) & KIND_MASK | ((leading_length | trailing_length) & LENGTH_MASK), len)
514	}
515
516	/// Creates a new [Kind::Delim] token.
517	#[inline]
518	pub(crate) const fn new_delim(char: char) -> Self {
519		let flags: u32 = Kind::Delim as u32;
520		Self((flags << 24) & KIND_MASK, char as u32)
521	}
522
523	/// Creates a new [Kind::Delim] token.
524	#[inline]
525	pub(crate) const fn new_delim_kind(kind: Kind, char: char) -> Self {
526		let flags: u32 = kind as u32;
527		Self((flags << 24) & KIND_MASK, char as u32)
528	}
529
530	/// Creates a new [Kind::Delim] token with associated whitespace.
531	#[inline]
532	pub(crate) const fn new_delim_with_associated_whitespace(char: char, rules: AssociatedWhitespaceRules) -> Self {
533		let flags: u32 = Kind::Delim as u32 | ((rules.to_bits() as u32) << 5);
534		Self((flags << 24) & KIND_MASK, char as u32)
535	}
536
537	/// Returns the raw bits representing the [Kind].
538	#[inline(always)]
539	pub(crate) const fn kind_bits(&self) -> u8 {
540		(self.0 >> 24 & 0b11111) as u8
541	}
542
543	/// Returns the [Kind].
544	#[inline]
545	pub const fn kind(&self) -> Kind {
546		Kind::from_bits(self.kind_bits())
547	}
548
549	/// Check if the TF upper-most bit is set.
550	#[inline(always)]
551	const fn first_bit_is_set(&self) -> bool {
552		self.0 >> 31 == 1
553	}
554
555	/// Check if the TF second-upper-most bit is set.
556	#[inline(always)]
557	const fn second_bit_is_set(&self) -> bool {
558		self.0 >> 30 & 0b1 == 1
559	}
560
561	/// Check if the TF third-upper-most bit is set.
562	#[inline(always)]
563	const fn third_bit_is_set(&self) -> bool {
564		self.0 >> 29 & 0b1 == 1
565	}
566
567	/// Check if the [Kind] is "Ident Like", i.e. it is [Kind::Ident], [Kind::AtKeyword], [Kind::Function], [Kind::Hash].
568	#[inline(always)]
569	pub(crate) const fn is_ident_like(&self) -> bool {
570		self.kind_bits() & 0b11000 == 0b01000 && self.kind_bits() != Kind::String as u8
571	}
572
573	/// Check if the [Kind] is "Delim Like", i.e. it is [Kind::Delim], [Kind::Colon], [Kind::Semicolon], [Kind::Comma],
574	/// [Kind::LeftSquare], [Kind::RightSquare], [Kind::LeftParen], [Kind::RightParen], [Kind::LeftCurly],
575	/// [Kind::RightCurly].
576	#[inline(always)]
577	pub(crate) const fn is_delim_like(&self) -> bool {
578		self.kind_bits() & 0b10000 == 0b10000
579	}
580
581	/// The only token with an empty length is EOF, but this method is available for symmetry with `len()`.
582	#[inline]
583	pub const fn is_empty(&self) -> bool {
584		self.kind_bits() == Kind::Eof as u8
585	}
586
587	/// Returns the amount of characters (utf-8 code points) this Token represents in the underlying source text.
588	#[inline]
589	pub const fn len(&self) -> u32 {
590		if self.kind_bits() == Kind::Eof as u8 {
591			0
592		} else if self.is_delim_like() {
593			debug_assert!(matches!(
594				self.kind(),
595				Kind::Delim
596					| Kind::Colon | Kind::Semicolon
597					| Kind::Comma | Kind::LeftSquare
598					| Kind::RightSquare
599					| Kind::LeftParen
600					| Kind::RightParen
601					| Kind::LeftCurly
602					| Kind::RightCurly
603			));
604			self.char().unwrap().len_utf8() as u32
605		} else if self.kind_bits() == Kind::Number as u8 {
606			self.numeric_len()
607		} else if self.kind_bits() == Kind::Dimension as u8 {
608			if self.first_bit_is_set() {
609				self.numeric_len() + self.dimension_unit().len()
610			} else {
611				((self.0 & LENGTH_MASK) >> 12) + (self.0 & !HALF_LENGTH_MASK)
612			}
613		} else if self.kind_bits() == Kind::Hash as u8 {
614			self.0 & LENGTH_MASK
615		} else {
616			self.1
617		}
618	}
619
620	/// If the [Kind] is "Delim Like" (i.e. it is [Kind::Delim], [Kind::Colon], [Kind::Semicolon], [Kind::Comma],
621	/// [Kind::LeftSquare], [Kind::RightSquare], [Kind::LeftParen], [Kind::RightParen], [Kind::LeftCurly],
622	/// [Kind::RightCurly]) then this will return a [Some] with a [char] representing the value.
623	/// For non-delim-like tokens this will return [None].
624	pub const fn char(&self) -> Option<char> {
625		if self.is_delim_like() {
626			return char::from_u32(self.1);
627		}
628		None
629	}
630
631	/// The [Token] is a [Kind::Dimension] or [Kind::Number] and is an integer - i.e. it has no `.`.
632	#[inline]
633	pub const fn is_int(&self) -> bool {
634		self.kind_bits() & 0b11100 == 0b00100 && !self.third_bit_is_set()
635	}
636
637	/// The [Token] is a [Kind::Dimension] or [Kind::Number] and is a float - i.e. it has decimal places. This will be
638	/// `true` even if the decimal places are 0. e.g. `0.0`.
639	#[inline]
640	pub const fn is_float(&self) -> bool {
641		self.kind_bits() & 0b11100 == 0b00100 && self.third_bit_is_set()
642	}
643
644	/// The [Token] is a [Kind::Dimension] or [Kind::Number] and the underlying character data included a `-` or `+`
645	/// character. Note that a positive number may not necessarily have a sign, e.g. `3` will return false, while `+3`
646	/// will return `true`.
647	#[inline]
648	pub const fn has_sign(&self) -> bool {
649		self.kind_bits() & 0b11100 == 0b00100 && self.second_bit_is_set()
650	}
651
652	/// If the [Token] is a [Kind::Dimension] or [Kind::Number] then this returns the amount of characters used to
653	/// represent this number in the underlying source text. Numbers may be inefficiently encoded in the source text,
654	/// e.g. `0.0000`.
655	///
656	/// Asserts: the `kind()` is [Kind::Dimension] or [Kind::Number].
657	#[inline]
658	pub const fn numeric_len(&self) -> u32 {
659		debug_assert!(matches!(self.kind(), Kind::Number | Kind::Dimension));
660		if self.kind_bits() == Kind::Dimension as u8 {
661			(self.0 & LENGTH_MASK) >> 12
662		} else if self.first_bit_is_set() {
663			(self.0 & LENGTH_MASK) >> 16
664		} else {
665			self.0 & LENGTH_MASK
666		}
667	}
668
669	/// If the [Token] is a [Kind::Dimension] or [Kind::Number] then this returns the [f32] representation of the number's
670	/// value.
671	///
672	/// Asserts: the `kind()` is [Kind::Dimension] or [Kind::Number].
673	#[inline]
674	pub fn value(&self) -> f32 {
675		debug_assert!(matches!(self.kind(), Kind::Number | Kind::Dimension));
676		f32::from_bits(self.1)
677	}
678
679	/// Returns the [Whitespace].
680	///
681	/// If the [Token] is not a [Kind::Whitespace] this will return [Whitespace::none()].
682	#[inline]
683	pub fn whitespace_style(&self) -> Whitespace {
684		if self.kind_bits() == Kind::Whitespace as u8 {
685			Whitespace::from_bits((self.0 >> 29) as u8)
686		} else {
687			Whitespace::none()
688		}
689	}
690
691	/// Returns the [AssociatedWhitespaceRules].
692	///
693	/// If the [Kind] is not "Delim Like" (i.e. it is not [Kind::Delim], [Kind::Colon], [Kind::Semicolon], [Kind::Comma],
694	/// [Kind::LeftSquare], [Kind::RightSquare], [Kind::LeftParen], [Kind::RightParen], [Kind::LeftCurly],
695	/// [Kind::RightCurly]) then this will always return `AssociatedWhitespaceRules::none()`.
696	#[inline]
697	pub fn associated_whitespace(&self) -> AssociatedWhitespaceRules {
698		if self.is_delim_like() {
699			AssociatedWhitespaceRules::from_bits((self.0 >> 29) as u8)
700		} else {
701			AssociatedWhitespaceRules::none()
702		}
703	}
704
705	/// Returns a new [Token] with the [AssociatedWhitespaceRules] set to the given [AssociatedWhitespaceRules],
706	/// if possible.
707	///
708	/// If the [Kind] is not "Delim Like" (i.e. it is not [Kind::Delim], [Kind::Colon], [Kind::Semicolon], [Kind::Comma],
709	/// [Kind::LeftSquare], [Kind::RightSquare], [Kind::LeftParen], [Kind::RightParen], [Kind::LeftCurly],
710	/// [Kind::RightCurly]) then this will return the same [Token].
711	/// If the [AssociatedWhitespaceRules] is different it will return a new [Token].
712	#[inline]
713	pub fn with_associated_whitespace(&self, rules: AssociatedWhitespaceRules) -> Token {
714		if !self.is_delim_like() {
715			return *self;
716		}
717		Token::new_delim_with_associated_whitespace(self.char().unwrap(), rules)
718	}
719
720	/// Returns the [CommentStyle].
721	///
722	/// If the [Token] is not a [Kind::Comment] this will return [None].
723	#[inline]
724	pub fn comment_style(&self) -> Option<CommentStyle> {
725		if self.kind_bits() == Kind::Comment as u8 { CommentStyle::from_bits((self.0 >> 29) as u8) } else { None }
726	}
727
728	/// Returns the [DimensionUnit].
729	///
730	/// If the [Token] is not a [Kind::Dimension] this will return [DimensionUnit::Unknown].
731	/// If the [Token] _is_ a [Kind::Dimension], but the dimension unit is custom (e.g. dashed), has escape characters,
732	/// or is not a recognised CSS Dimension, this will return [DimensionUnit::Unknown].
733	#[inline]
734	pub const fn dimension_unit(&self) -> DimensionUnit {
735		if !self.first_bit_is_set() || self.kind_bits() != Kind::Dimension as u8 {
736			DimensionUnit::Unknown
737		} else {
738			DimensionUnit::from_u8((self.0 & !HALF_LENGTH_MASK) as u8)
739		}
740	}
741
742	/// Returns the [QuoteStyle].
743	///
744	/// If the [Token] is not a [Kind::String] this will return [QuoteStyle::None].
745	#[inline]
746	pub fn quote_style(&self) -> QuoteStyle {
747		if self.kind_bits() == Kind::String as u8 {
748			if self.third_bit_is_set() {
749				return QuoteStyle::Double;
750			} else {
751				return QuoteStyle::Single;
752			}
753		}
754		QuoteStyle::None
755	}
756
757	/// Returns a new [Token] with the [QuoteStyle] set to the given [QuoteStyle], if possible.
758	///
759	/// If the [Token] is not a [Kind::String], or the [QuoteStyle] is already the given [QuoteStyle] this will return the same [Token].
760	/// If the [QuoteStyle] is different it will return a new [Token].
761	/// [QuoteStyle] must not be [QuoteStyle::None]
762	#[inline]
763	pub fn with_quotes(&self, quote_style: QuoteStyle) -> Token {
764		debug_assert!(quote_style != QuoteStyle::None);
765		if self.kind_bits() != Kind::String as u8 || quote_style == self.quote_style() {
766			return *self;
767		}
768		Token::new_string(quote_style, self.has_close_quote(), self.contains_escape_chars(), self.len())
769	}
770
771	/// If the [Token] is a [Kind::String] this checks if the string ended in a close quote.
772	/// It is possible to have a valid String token that does not end in a close quote, by eliding the quote at the end of
773	/// a file.
774	///
775	/// Asserts: The [Kind] is [Kind::String].
776	#[inline]
777	pub const fn has_close_quote(&self) -> bool {
778		debug_assert!(self.kind_bits() == Kind::String as u8);
779		self.second_bit_is_set()
780	}
781
782	/// Checks if it is possible for the [Token] to contain escape characters. Numbers, for example, cannot. Idents can.
783	#[inline]
784	pub const fn can_escape(&self) -> bool {
785		self.kind_bits() == Kind::String as u8 || self.kind_bits() == Kind::Dimension as u8 || self.is_ident_like()
786	}
787
788	/// If the [Token] can escape, checks if the underlying source text contained escape characters.
789	///
790	/// Asserts: The token can escape ([Token::can_escape()]).
791	#[inline]
792	pub const fn contains_escape_chars(&self) -> bool {
793		if self.kind_bits() == Kind::Dimension as u8 {
794			return !self.first_bit_is_set();
795		}
796		self.can_escape() && self.first_bit_is_set()
797	}
798
799	/// If the [Token] is Ident like, checks if the first two code points are HYPHEN-MINUS (`-`).
800	///
801	/// Asserts: The token is "ident like", i.e. it is [Kind::Ident], [Kind::AtKeyword], [Kind::Function], [Kind::Hash].
802	#[inline]
803	pub const fn is_dashed_ident(&self) -> bool {
804		debug_assert!(self.is_ident_like());
805		self.second_bit_is_set()
806	}
807
808	/// Checks if the [Token] is Ident like and none of the characters are ASCII upper-case.
809	#[inline]
810	pub const fn is_lower_case(&self) -> bool {
811		self.is_ident_like() && !self.third_bit_is_set()
812	}
813
814	/// Checks if the [Token] is Trivia-like, that is [Kind::Comment], [Kind::Whitespace], [Kind::Eof]
815	#[inline]
816	pub const fn is_trivia(&self) -> bool {
817		self.kind_bits() & 0b000011 == self.kind_bits()
818	}
819
820	/// If the [Token] is [Kind::Url], checks if there are leading Whitespace characters before the inner value.
821	///
822	/// Asserts: The token is [Kind::Url].
823	#[inline]
824	pub const fn url_has_leading_space(&self) -> bool {
825		debug_assert!(self.kind_bits() == Kind::Url as u8);
826		self.second_bit_is_set()
827	}
828
829	/// If the [Token] is [Kind::Url], checks if the closing parenthesis is present.
830	///
831	/// Asserts: The token is [Kind::Url].
832	#[inline]
833	pub const fn url_has_closing_paren(&self) -> bool {
834		debug_assert!(self.kind_bits() == Kind::Url as u8);
835		self.third_bit_is_set()
836	}
837
838	/// If the [Token] is [Kind::Hash], checks if the Hash is "ID-like" (i.e its first character is ASCII).
839	///
840	/// Asserts: The token is [Kind::Hash].
841	#[inline]
842	pub const fn hash_is_id_like(&self) -> bool {
843		debug_assert!(self.kind_bits() == Kind::Hash as u8);
844		self.second_bit_is_set()
845	}
846
847	/// Checks if the [Token] is [Kind::BadString] or [Kind::BadUrl].
848	#[inline]
849	pub const fn is_bad(&self) -> bool {
850		(self.kind_bits() | 0b00001) & 0b11001 == 1
851	}
852
853	/// Checks if the [Token] is [Kind::CdcOrCdo] and is the CDC variant of that token.
854	#[inline]
855	pub const fn is_cdc(&self) -> bool {
856		self.kind_bits() == (Kind::CdcOrCdo as u8) && self.third_bit_is_set()
857	}
858
859	/// Some tokens may have a "leading" part:
860	///  - [Kind::AtKeyword] always starts with a `@`,
861	///  - [Kind::Hash] with a `#`.
862	///  - [Kind::String] with a `"` or `'`.
863	///  - [Kind::Comment] with a leading `/*` (or `//`).
864	///  - [Kind::Dimension] has a leading numeric portion.
865	///  - [Kind::Url] has the leading `url(` ident (which may vary in exact representation).
866	///
867	/// This function returns the length of that, irrespective of the [Kind]. For other kinds not listed, this will return
868	/// `0`, but for the above kinds it will calculate the leading length. This is useful for parsing out the underlying
869	/// data which is likely to be of greater use.
870	pub fn leading_len(&self) -> u32 {
871		match self.kind() {
872			Kind::AtKeyword | Kind::Hash | Kind::String => 1,
873			Kind::Dimension => self.numeric_len(),
874			Kind::Comment => 2,
875			Kind::Url => (self.0 & LENGTH_MASK) >> 12,
876			_ => 0,
877		}
878	}
879
880	/// Some tokens may have a "trailing" part:
881	///  - [Kind::Function] will always have an opening `(`.
882	///  - [Kind::String] may have a closing `"` or `'`.
883	///  - [Kind::Comment] may have a closing `*/`
884	///  - [Kind::Url] may have a clsoing `)`.
885	///
886	/// This function returns the length of that, irrespective of the [Kind]. For other kinds not listed, this will return
887	/// `0`, but for the above kinds it will calculate the leading length. This is useful for parsing out the underlying
888	/// data which is likely to be of greater use.
889	pub fn trailing_len(&self) -> u32 {
890		match self.kind() {
891			Kind::Function => 1,
892			Kind::String => self.has_close_quote() as u32,
893			Kind::Comment if self.comment_style().unwrap().is_block() => 2,
894			Kind::Url => self.0 & !HALF_LENGTH_MASK,
895			_ => 0,
896		}
897	}
898
899	/// Certain kinds have a [PairWise] equivalent:
900	///  - [Kind::LeftParen] has [Kind::RightParen]
901	///  - [Kind::LeftCurly] has [Kind::RightCurly]
902	///  - [Kind::LeftSquare] has [Kind::RightSquare]
903	///
904	/// This function returns the [PairWise] enum, if the [Token] is one of the above listed [Kinds][Kind]. For any other
905	/// [Kind] this returns [None].
906	#[inline]
907	pub fn to_pairwise(&self) -> Option<PairWise> {
908		PairWise::from_token(self)
909	}
910
911	/// A convenience function for `Cursor::new(offset, token)`.
912	#[inline(always)]
913	pub fn with_cursor(self, offset: SourceOffset) -> Cursor {
914		Cursor::new(offset, self)
915	}
916
917	/// If the [Kind] is [Kind::Hash] then this token may have had the opportunity to be parsed as a `<hex-value>` (e.g.
918	/// `#fff`). When this happens the character data is parsed during tokenization into a u32 which stores the
919	/// RR,GG,BB,AA values.
920	#[inline(always)]
921	pub fn hex_value(self) -> u32 {
922		if self == Kind::Hash { self.1 } else { 0 }
923	}
924
925	/// If this [Token] is preceded by the [Token] `other` then a separating token (e.g. a comment) will need to be
926	/// inserted between these the two tokens during serialization, in order for them to be able to be re-tokenized as
927	/// the same tokens. For example an Ident ("a") adjacent to an Ident ("b"), if serialized without whitespace, would
928	/// create a single Ident ("ab"). The rules for estbalishing whether or not these tokens needs whitespace are quite
929	/// simple and are effectively [defined in the serialization section of the spec][1]. To reproduce the table:
930	///
931	/// [1]: https://drafts.csswg.org/css-syntax/#serialization
932	///
933	/// |            | ident | function | url | bad url | - | number | percentage | dimension | CDC | ( | * | % |
934	/// |:-----------|:-----:|:--------:|:---:|:-------:|:-:|:------:|:----------:|:---------:|:---:|:-:|:-:|:-:|
935	/// | ident      |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |    ✗   |      ✗     |     ✗     |  ✗  | ✗ |   |   |
936	/// | at-keyword |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |    ✗   |      ✗     |     ✗     |  ✗  |   |   |   |
937	/// | hash       |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |    ✗   |      ✗     |     ✗     |  ✗  |   |   |   |
938	/// | dimension  |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |    ✗   |      ✗     |     ✗     |  ✗  |   |   |   |
939	/// | #          |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |    ✗   |      ✗     |     ✗     |  ✗  |   |   |   |
940	/// | \-         |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |    ✗   |      ✗     |     ✗     |  ✗  |   |   |   |
941	/// | number     |   ✗   |    ✗     |  ✗  |    ✗    |   |    ✗   |      ✗     |     ✗     |  ✗  |   |   | ✗ |
942	/// | @          |   ✗   |    ✗     |  ✗  |    ✗    | ✗ |        |            |           |  ✗  |   |   |   |
943	/// | .          |       |          |     |         |   |    ✗   |      ✗     |     ✗     |     |   |   |   |
944	/// | +          |       |          |     |         |   |    ✗   |      ✗     |     ✗     |     |   |   |   |
945	/// | /          |       |          |     |         |   |        |            |           |     |   | ✗ |   |
946	///
947	/// The one exception not in this table is that two consecutive `/` characters should also be separated by spaces in
948	/// order to avoid abmiguities with CSS-alike languages that treat two consecutive `/` characters as a single line
949	/// comment.
950	///
951	/// # Example
952	///
953	/// ```
954	/// use css_lexer::*;
955	/// let mut lexer = Lexer::new("10 %");
956	/// let first = lexer.advance();
957	/// let _ = lexer.advance(); // Whitespace
958	/// let second = lexer.advance();
959	/// assert!(first.needs_separator_for(second));
960	/// ```
961	pub fn needs_separator_for(&self, second: Token) -> bool {
962		if second == AssociatedWhitespaceRules::EnforceBefore && *self != Kind::Whitespace
963			|| *self == AssociatedWhitespaceRules::EnforceAfter && second != Kind::Whitespace
964		{
965			// We need whitespace after, unless the next token is actually whitespace.
966			return true;
967		}
968		if *self == AssociatedWhitespaceRules::BanAfter {
969			return false;
970		}
971		match self.kind() {
972			Kind::Ident => {
973				(matches!(second.kind(), Kind::Number | Kind::Dimension) &&
974					// numbers with a `-` need separating, but with `+` they do not.
975					(!second.has_sign() || second.value() < 0.0))
976					|| matches!(second.kind(), Kind::Ident | Kind::Function | Kind::Url | Kind::BadUrl)
977					|| matches!(second.char(), Some('(' | '-'))
978					|| second.is_cdc()
979			}
980			Kind::AtKeyword | Kind::Hash | Kind::Dimension => {
981				(matches!(second.kind(), Kind::Number | Kind::Dimension) &&
982					// numbers with a `-` need separating, but with `+` they do not.
983					(!second.has_sign() || second.value() < 0.0))
984					|| matches!(second.kind(), Kind::Ident | Kind::Function | Kind::Url | Kind::BadUrl)
985					|| matches!(second.char(), Some('-'))
986					|| second.is_cdc()
987			}
988			Kind::Number => {
989				matches!(
990					second.kind(),
991					Kind::Ident | Kind::Function | Kind::Url | Kind::BadUrl | Kind::Number | Kind::Dimension
992				) || matches!(second.char(), Some('%'))
993					|| second.is_cdc()
994			}
995			_ => match self.char() {
996				Some('#') => {
997					matches!(
998						second.kind(),
999						Kind::Ident | Kind::Function | Kind::Url | Kind::BadUrl | Kind::Number | Kind::Dimension
1000					) || matches!(second.char(), Some('-'))
1001						|| second.is_cdc()
1002				}
1003				Some('-') => {
1004					matches!(
1005						second.kind(),
1006						Kind::Ident | Kind::Function | Kind::Url | Kind::BadUrl | Kind::Number | Kind::Dimension
1007					) || matches!(second.char(), Some('-'))
1008						|| second.is_cdc()
1009				}
1010				Some('@') => {
1011					matches!(second.kind(), Kind::Ident | Kind::Function | Kind::Url | Kind::BadUrl)
1012						|| matches!(second.char(), Some('-'))
1013						|| second.is_cdc()
1014				}
1015				Some('.') => matches!(second.kind(), Kind::Number | Kind::Dimension),
1016				Some('+') => matches!(second.kind(), Kind::Number | Kind::Dimension),
1017				Some('/') => matches!(second.char(), Some('*' | '/')),
1018				_ => false,
1019			},
1020		}
1021	}
1022}
1023
1024impl core::fmt::Debug for Token {
1025	fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
1026		let mut d = f.debug_struct(format!("Token::{}", self.kind().as_str()).as_str());
1027		match self.kind() {
1028			Kind::Eof => &mut d,
1029			Kind::Number => d.field("value", &self.value()).field("len", &self.numeric_len()),
1030			Kind::Dimension => d
1031				.field("value", &self.value())
1032				.field("len", &self.numeric_len())
1033				.field("dimension", &self.dimension_unit())
1034				.field("dimension_len", &self.len()),
1035			_ if self.is_delim_like() => d
1036				.field("char", &self.char().unwrap())
1037				.field("len", &self.len())
1038				.field("associated_whitespace", &self.associated_whitespace()),
1039			Kind::String => d
1040				.field("quote_style", &if self.first_bit_is_set() { "Double" } else { "Single" })
1041				.field("has_close_quote", &self.second_bit_is_set())
1042				.field("contains_escape_chars", &self.third_bit_is_set())
1043				.field("len", &self.len()),
1044			Kind::Ident | Kind::Function | Kind::AtKeyword => d
1045				.field("is_lower_case", &self.first_bit_is_set())
1046				.field("is_dashed_ident", &self.second_bit_is_set())
1047				.field("contains_escape_chars", &self.third_bit_is_set())
1048				.field("len", &self.len()),
1049			Kind::Hash => d
1050				.field("is_lower_case", &self.first_bit_is_set())
1051				.field("hash_is_id_like", &self.second_bit_is_set())
1052				.field("contains_escape_chars", &self.third_bit_is_set())
1053				.field("len", &self.len()),
1054			Kind::Url => d
1055				.field("url_has_closing_paren", &self.first_bit_is_set())
1056				.field("url_has_leading_space", &self.second_bit_is_set())
1057				.field("contains_escape_chars", &self.third_bit_is_set())
1058				.field("len", &self.len()),
1059			Kind::CdcOrCdo => d.field("is_cdc", &self.first_bit_is_set()).field("len", &self.len()),
1060			Kind::Whitespace => d.field("contains", &self.whitespace_style()).field("len", &self.len()),
1061			_ => d
1062				.field("flag_0", &self.first_bit_is_set())
1063				.field("flag_1", &self.second_bit_is_set())
1064				.field("flag_2", &self.third_bit_is_set())
1065				.field("len", &self.len()),
1066		}
1067		.finish()
1068	}
1069}
1070
1071impl std::fmt::Display for Token {
1072	fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
1073		match self.kind() {
1074			Kind::Delim => write!(f, "Delim({})", self.char().unwrap()),
1075			k => write!(f, "{}", k.as_str()),
1076		}
1077	}
1078}
1079
1080#[cfg(feature = "serde")]
1081impl serde::ser::Serialize for Token {
1082	fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
1083	where
1084		S: serde::ser::Serializer,
1085	{
1086		use serde::ser::SerializeStruct;
1087		if *self == Self::EMPTY {
1088			return serializer.serialize_none();
1089		}
1090		let mut state = serializer.serialize_struct("Token", 3)?;
1091		state.serialize_field("kind", self.kind().as_str())?;
1092		state.serialize_field("len", &self.len())?;
1093		if self.kind_bits() == Kind::Dimension as u8 {
1094			state.serialize_field("unit", &self.dimension_unit())?;
1095		}
1096		state.end()
1097	}
1098}
1099
1100impl From<Token> for Kind {
1101	fn from(token: Token) -> Self {
1102		token.kind()
1103	}
1104}
1105
1106impl PartialEq<Kind> for Token {
1107	fn eq(&self, other: &Kind) -> bool {
1108		self.kind_bits() == *other as u8
1109	}
1110}
1111
1112impl From<Token> for KindSet {
1113	fn from(token: Token) -> Self {
1114		KindSet::new(&[token.kind()])
1115	}
1116}
1117
1118impl PartialEq<KindSet> for Token {
1119	fn eq(&self, other: &KindSet) -> bool {
1120		other.contains_bits(self.kind_bits())
1121	}
1122}
1123
1124impl From<Token> for QuoteStyle {
1125	fn from(token: Token) -> Self {
1126		token.quote_style()
1127	}
1128}
1129
1130impl PartialEq<QuoteStyle> for Token {
1131	fn eq(&self, other: &QuoteStyle) -> bool {
1132		&self.quote_style() == other
1133	}
1134}
1135
1136impl From<Token> for Whitespace {
1137	fn from(token: Token) -> Self {
1138		token.whitespace_style()
1139	}
1140}
1141
1142impl PartialEq<Whitespace> for Token {
1143	fn eq(&self, other: &Whitespace) -> bool {
1144		self.whitespace_style().intersects(*other)
1145	}
1146}
1147
1148impl PartialEq<AssociatedWhitespaceRules> for Token {
1149	fn eq(&self, other: &AssociatedWhitespaceRules) -> bool {
1150		self.associated_whitespace().intersects(*other)
1151	}
1152}
1153
1154impl PartialEq<CommentStyle> for Token {
1155	fn eq(&self, other: &CommentStyle) -> bool {
1156		self.comment_style().map(|style| &style == other).unwrap_or(false)
1157	}
1158}
1159
1160impl PartialEq<PairWise> for Token {
1161	fn eq(&self, other: &PairWise) -> bool {
1162		self.to_pairwise().map(|style| &style == other).unwrap_or(false)
1163	}
1164}
1165
1166impl PartialEq<char> for Token {
1167	fn eq(&self, other: &char) -> bool {
1168		self.char().map(|char| char == *other).unwrap_or(false)
1169	}
1170}
1171
1172impl From<Token> for DimensionUnit {
1173	fn from(token: Token) -> Self {
1174		token.dimension_unit()
1175	}
1176}
1177
1178impl PartialEq<DimensionUnit> for Token {
1179	fn eq(&self, other: &DimensionUnit) -> bool {
1180		self.dimension_unit() == *other
1181	}
1182}
1183
1184#[test]
1185fn size_test() {
1186	assert_eq!(::std::mem::size_of::<Token>(), 8);
1187}
1188
1189#[test]
1190fn test_new_whitespace() {
1191	assert_eq!(Token::SPACE, Kind::Whitespace);
1192	assert_eq!(Token::SPACE, Whitespace::Space);
1193	assert_eq!(Token::TAB, Kind::Whitespace);
1194	assert_eq!(Token::TAB, Whitespace::Tab);
1195	assert_eq!(Token::NEWLINE, Kind::Whitespace);
1196	assert_eq!(Token::NEWLINE, Whitespace::Newline);
1197	assert_eq!(Token::new_whitespace(Whitespace::Space, 4), Kind::Whitespace);
1198	assert_eq!(Token::new_whitespace(Whitespace::Space | Whitespace::Newline, 4), Whitespace::Space);
1199	assert_eq!(Token::new_whitespace(Whitespace::Space, 4).len(), 4);
1200	assert_eq!(Token::new_whitespace(Whitespace::Tab | Whitespace::Space, 4), Whitespace::Tab);
1201	assert_eq!(Token::new_whitespace(Whitespace::Newline, 4), Whitespace::Newline);
1202	assert_eq!(Token::new_whitespace(Whitespace::Newline, 4).len(), 4);
1203}
1204
1205#[test]
1206fn test_new_comment() {
1207	assert_eq!(Token::new_comment(CommentStyle::Block, 4), Kind::Comment);
1208	assert_eq!(Token::new_comment(CommentStyle::Block, 4), CommentStyle::Block);
1209	assert_eq!(Token::new_comment(CommentStyle::Single, 4), CommentStyle::Single);
1210}
1211
1212#[test]
1213fn test_new_number() {
1214	assert_eq!(Token::new_number(false, false, 3, 4.2), Kind::Number);
1215	assert_eq!(Token::new_number(false, false, 3, 4.2).value(), 4.2);
1216	assert_eq!(Token::new_number(false, false, 3, 4.2).len(), 3);
1217	assert_eq!(Token::new_number(false, true, 9, 4.2), Kind::Number);
1218	assert_eq!(Token::new_number(false, true, 9, 4.2).value(), 4.2);
1219	assert_eq!(Token::new_number(false, true, 9, 4.2).len(), 9);
1220	assert!(!Token::new_number(false, false, 3, 4.2).has_sign());
1221	assert!(Token::new_number(false, true, 3, 4.2).has_sign());
1222	assert!(!Token::new_number(false, true, 3, 4.0).is_float());
1223	assert!(Token::new_number(true, false, 3, 4.2).is_float());
1224}
1225
1226#[test]
1227fn test_new_string() {
1228	assert_eq!(Token::new_string(QuoteStyle::Single, false, false, 4), Kind::String);
1229	assert_eq!(Token::new_string(QuoteStyle::Single, false, false, 4), QuoteStyle::Single);
1230	assert!(!Token::new_string(QuoteStyle::Single, false, false, 4).has_close_quote());
1231	assert!(!Token::new_string(QuoteStyle::Single, false, false, 4).contains_escape_chars());
1232	assert_eq!(Token::new_string(QuoteStyle::Single, false, false, 4).len(), 4);
1233	assert_eq!(Token::new_string(QuoteStyle::Double, false, false, 4), Kind::String);
1234	assert_eq!(Token::new_string(QuoteStyle::Double, false, false, 4), QuoteStyle::Double);
1235	assert!(Token::new_string(QuoteStyle::Double, true, false, 4).has_close_quote());
1236	assert!(!Token::new_string(QuoteStyle::Double, true, false, 4).contains_escape_chars());
1237	assert_eq!(Token::new_string(QuoteStyle::Double, true, false, 5).len(), 5);
1238	assert!(Token::new_string(QuoteStyle::Double, true, true, 4).contains_escape_chars());
1239	assert!(Token::new_string(QuoteStyle::Double, false, true, 4).contains_escape_chars());
1240}
1241
1242#[test]
1243fn test_new_hash() {
1244	assert_eq!(Token::new_hash(false, false, false, 4, 0), Kind::Hash);
1245	assert!(!Token::new_hash(false, false, false, 4, 0).contains_escape_chars());
1246	assert!(Token::new_hash(false, false, true, 4, 0).contains_escape_chars());
1247	assert!(Token::new_hash(false, false, true, 4, 0).is_lower_case());
1248	assert!(!Token::new_hash(true, false, false, 4, 0).is_lower_case());
1249	assert_eq!(Token::new_hash(true, false, false, 4, 0).len(), 4);
1250	assert_eq!(Token::new_hash(true, false, false, 4, 0).hex_value(), 0);
1251	assert_eq!(Token::new_hash(true, false, false, 4, 18).hex_value(), 18);
1252}
1253
1254#[test]
1255#[should_panic]
1256fn test_new_string_with_quotes_none() {
1257	Token::new_string(QuoteStyle::None, false, true, 4);
1258}
1259
1260#[test]
1261fn test_new_delim() {
1262	assert_eq!(Token::new_delim('>'), Kind::Delim);
1263	assert_eq!(Token::new_delim('>'), '>');
1264	assert_eq!(Token::new_delim('>').len(), 1);
1265	assert_eq!(Token::new_delim('.'), Kind::Delim);
1266	assert_eq!(Token::new_delim('.'), '.');
1267	assert_eq!(Token::new_delim('.').len(), 1);
1268	assert_eq!(Token::new_delim('ℝ'), Kind::Delim);
1269	assert_eq!(Token::new_delim('ℝ'), 'ℝ');
1270	assert_eq!(Token::new_delim('ℝ').len(), 3);
1271	assert_eq!(Token::new_delim('💣'), Kind::Delim);
1272	assert_eq!(Token::new_delim('💣'), '💣');
1273	assert_eq!(Token::new_delim('💣').len(), 4);
1274	assert_eq!(Token::new_delim('💣').len(), 4);
1275	assert_eq!(Token::new_delim('💣').len(), 4);
1276}
1277
1278#[test]
1279fn with_associated_whitespace() {
1280	assert_eq!(
1281		Token::new_delim('>').with_associated_whitespace(
1282			AssociatedWhitespaceRules::EnforceBefore | AssociatedWhitespaceRules::EnforceAfter
1283		),
1284		AssociatedWhitespaceRules::EnforceBefore | AssociatedWhitespaceRules::EnforceBefore
1285	);
1286}
1287
1288#[test]
1289fn test_with_quotes() {
1290	assert_eq!(
1291		Token::new_string(QuoteStyle::Single, false, false, 4).with_quotes(QuoteStyle::Double),
1292		Token::new_string(QuoteStyle::Double, false, false, 4)
1293	);
1294	assert_eq!(
1295		Token::new_string(QuoteStyle::Double, true, true, 8).with_quotes(QuoteStyle::Single),
1296		Token::new_string(QuoteStyle::Single, true, true, 8),
1297	);
1298}
1299
1300#[test]
1301#[should_panic]
1302fn test_with_quotes_none() {
1303	Token::new_string(QuoteStyle::Single, false, true, 4).with_quotes(QuoteStyle::None);
1304	Token::new_string(QuoteStyle::Double, false, true, 4).with_quotes(QuoteStyle::None);
1305}
1306
1307#[test]
1308fn test_new_dimension() {
1309	{
1310		let token = Token::new_dimension(false, false, 3, 3, 999.0, DimensionUnit::Rad);
1311		assert_eq!(token, Kind::Dimension);
1312		assert_eq!(token.value(), 999.0);
1313		assert_eq!(token.dimension_unit(), DimensionUnit::Rad);
1314		assert_eq!(token.numeric_len(), 3);
1315		assert_eq!(token.len(), 6);
1316		assert!(!token.is_float());
1317		assert!(!token.has_sign());
1318	}
1319	{
1320		let token = Token::new_dimension(false, false, 5, 2, 8191.0, DimensionUnit::Px);
1321		assert_eq!(token, Kind::Dimension);
1322		assert_eq!(token.value(), 8191.0);
1323		assert_eq!(token.dimension_unit(), DimensionUnit::Px);
1324		assert_eq!(token.numeric_len(), 5);
1325		assert_eq!(token.len(), 7);
1326		assert!(!token.is_float());
1327		assert!(!token.has_sign());
1328	}
1329	for i in -8191..8191 {
1330		let token = Token::new_dimension(false, false, 9, 3, i as f32, DimensionUnit::Rem);
1331		assert_eq!(token.value(), i as f32);
1332	}
1333}