css_lexer/
feature.rs

1use bitmask_enum::bitmask;
2
3/// A set of runtime feature flags which can be enabled individually or in combination, which will change the way
4/// individual [Lexer][crate::Lexer] produces [Tokens][crate::Token].
5///
6/// To build multiple features, use the bitwise OR operator.
7///
8/// # Example
9///
10/// ```
11/// use css_lexer::*;
12/// let features = Feature::SingleLineComments | Feature::SeparateWhitespace;
13/// let mut lexer = Lexer::new_with_features(&EmptyAtomSet::ATOMS, "// foo", features);
14/// ```
15#[bitmask(u8)]
16#[bitmask_config(vec_debug)]
17#[derive(Default)]
18pub enum Feature {
19	/// With this flag enabled the [Lexer][crate::Lexer] will produce [Tokens][crate::Token] with
20	/// [Kind::Comment][crate::Kind::Comment] when it encounters two consecutative SOLIDUS characters (`//`), the
21	/// [Token][crate::Token] will have a length up to the next newline (`\n`) character. The contents between the two
22	/// SOLIDUS (`//`) characters and the `\n` will be consumed by this token, so no tokens will be produced for the
23	/// contents of the comment.
24	///
25	/// If this flag is not enabled, encountering something that looks like a single line commet will produce two
26	/// [Kind::Delim][crate::Kind::Delim] tokens for the two SOLIDUS (`//`) characters, and any number of other tokens
27	/// depending on the contents of the comment, per the CSS specification.
28	///
29	/// A comment with two leading SOLIDUS characters is not valid CSS syntax, but might be considered valid syntax in
30	/// other CSS-alike languages [for example SASS][1].
31	///
32	/// [1]: https://sass-lang.com/documentation/syntax/comments/
33	///
34	/// With this feature turned off comments are tokenized per the CSS specification:
35	///
36	/// ```md
37	/// <comment>
38	///            ╭──────────────────────────────────────────╮
39	///  │├─ "/*" ─╯-╭─ (anything but "*" followed by "/") ─╮─╰─ "*/" ─┤│
40	///              ╰──────────────────────────────────────╯
41	/// ```
42	///
43	/// With this feature turned on comments are tokenized with the additional grammar:
44	///
45	/// ```md
46	/// <comment>
47	///               ╭──────────────────────────────────────────╮
48	///  │├──╮─ "/*" ─╯-╭─ (anything but "*" followed by "/") ─╮─╰─ "*/" ─╭─┤│
49	///      │          ╰──────────────────────────────────────╯          │
50	///      │              ╭───────────────────────────╮                 │
51	///      ├─ "//" ───────╯-╭─ (anything but "\n") ─╮─╰─ "\n" ──────────╯
52	///      ╰─               ╰───────────────────────╯
53	/// ```
54	///
55	/// # Example
56	///
57	/// ```
58	/// use css_lexer::*;
59	/// let mut lexer = Lexer::new(&EmptyAtomSet::ATOMS, "// foo");
60	/// assert_eq!(lexer.advance(), Kind::Delim); // The first `/`
61	/// assert_eq!(lexer.advance(), Kind::Delim); // The second `/`
62	/// assert_eq!(lexer.advance(), Kind::Whitespace);
63	/// assert_eq!(lexer.advance(), Kind::Ident); // The "foo" in the comment
64	///
65	/// let mut lexer = Lexer::new_with_features(&EmptyAtomSet::ATOMS, "// foo", Feature::SingleLineComments);
66	/// let token = lexer.advance();
67	/// assert_eq!(token, Kind::Comment); // The whole comment "// foo"
68	/// assert_eq!(token, CommentStyle::Single);
69	/// ```
70	SingleLineComments = 0b0001,
71
72	/// The CSS Spec mentions that whitespace tokens should be [combined into a single Whitespace token][1]. This means
73	/// a single whitespace token can contain a cominbation of newlines, tabs, and space characters. This is often fine
74	/// as whitespace is generally ignored during parsing, however for certain IDE features it might be important to
75	/// tokenize discrete whitespace [Tokens][crate::Token], each with their own discrete whitespace. Enabling this flag
76	/// will enforce that the [Lexer][crate::Lexer] outputs these discrete tokens. In other words with this feature
77	/// enabled, multiple contiguous whitespace tokens may be returned from subsequent calls to
78	/// [Lexer::advance()][crate::Lexer::advance()], but with this feature off this will never be the case (as whitespace
79	/// is collapsed into a single [Token][crate::Token]).
80	///
81	/// With this feature turned off whitespace-tokens are tokenized per the CSS specification:
82	///
83	/// ```md
84	/// <newline>
85	///  │├──╮─ "\n" ───╭──┤│
86	///      ├─ "\r\n" ─┤
87	///      ├─ "\r" ───┤
88	///      ╰─ "\f" ───╯
89	///
90	/// <whitespace>
91	///  │├──╮─ " " ───────╭──┤│
92	///      ├─ "\t" ──────┤
93	///      ╰─ <newline> ─╯
94	///
95	/// <whitespace-token>
96	///  │├─╭─ <whitespace> ─╮─┤│
97	///     ╰────────────────╯
98	/// ```
99	///
100	/// With this feature turned on whitespace-tokens are tokenized with the additional grammar:
101	///
102	/// ```md
103	/// <whitespace-token>
104	///  │├──╮─╭─ " " ───────╮─╭──┤│
105	///      │ ╰─────────────╯ │
106	///      ├─╭─ "\t" ──────╮─┤
107	///      │ ╰─────────────╯ │
108	///      ╰─╭─ <newline> ─╮─╯
109	///        ╰─────────────╯
110	/// ```
111	///
112	/// [1]: https://drafts.csswg.org/css-syntax-3/#whitespace-diagram
113	///
114	/// # Example
115	///
116	/// ```
117	/// use css_lexer::*;
118	/// let mut lexer = Lexer::new(&EmptyAtomSet::ATOMS, "\n\thello world");
119	/// {
120	///		// This token will be collapsed Whitespace.
121	///		let token = lexer.advance();
122	///		assert_eq!(token, Kind::Whitespace);
123	///		// The Whitespace is comprised of many bits:
124	///		assert_eq!(token, Whitespace::Newline | Whitespace::Tab);
125	/// }
126	///	assert_eq!(lexer.advance(), Kind::Ident);
127	/// {
128	///		let token = lexer.advance();
129	///		assert_eq!(token, Kind::Whitespace);
130	///		assert_eq!(token, Whitespace::Space);
131	/// }
132	///	assert_eq!(lexer.advance(), Kind::Ident);
133	///
134	/// let mut lexer = Lexer::new_with_features(&EmptyAtomSet::ATOMS, "\n\thello world", Feature::SeparateWhitespace);
135	/// {
136	///		// This token will be discrete Whitespace, just the `\n`.
137	///		let token = lexer.advance();
138	///		assert_eq!(token, Kind::Whitespace);
139	///		// The Whitespace is comprised of a single bit:
140	///		assert_eq!(token, Whitespace::Newline);
141	/// }
142	/// {
143	///		// This token will be discrete Whitespace, just the `\t`.
144	///		let token = lexer.advance();
145	///		assert_eq!(token, Kind::Whitespace);
146	///		// The Whitespace is comprised of a single bit:
147	///		assert_eq!(token, Whitespace::Tab);
148	/// }
149	///	assert_eq!(lexer.advance(), Kind::Ident);
150	/// {
151	///		let token = lexer.advance();
152	///		assert_eq!(token, Kind::Whitespace);
153	///		assert_eq!(token, Whitespace::Space);
154	/// }
155	///	assert_eq!(lexer.advance(), Kind::Ident);
156	/// ```
157	SeparateWhitespace = 0b0010,
158}
159
160#[test]
161fn size_test() {
162	assert_eq!(::std::mem::size_of::<Feature>(), 1);
163}
css_lexer/feature.rs

css_lexer/
feature.rs