css_lexer/feature.rs
1use bitmask_enum::bitmask;
2
3/// A set of runtime feature flags which can be enabled individually or in combination, which will change the way
4/// individual [Lexer][crate::Lexer] produces [Tokens][crate::Token].
5///
6/// To build multiple features, use the bitwise OR operator.
7///
8/// # Example
9///
10/// ```
11/// use css_lexer::*;
12/// let features = Feature::SingleLineComments | Feature::SeparateWhitespace;
13/// let mut lexer = Lexer::new_with_features("// foo", features);
14/// ```
15#[bitmask(u8)]
16#[bitmask_config(vec_debug)]
17#[derive(Default)]
18pub enum Feature {
19 /// With this flag enabled the [Lexer][crate::Lexer] will produce [Tokens][crate::Token] with
20 /// [Kind::Comment][crate::Kind::Comment] when it encounters two consecutative SOLIDUS characters (`//`), the
21 /// [Token][crate::Token] will have a length up to the next newline (`\n`) character. The contents between the two
22 /// SOLIDUS (`//`) characters and the `\n` will be consumed by this token, so no tokens will be produced for the
23 /// contents of the comment.
24 ///
25 /// If this flag is not enabled, encountering something that looks like a single line commet will produce two
26 /// [Kind::Delim][crate::Kind::Delim] tokens for the two SOLIDUS (`//`) characters, and any number of other tokens
27 /// depending on the contents of the comment, per the CSS specification.
28 ///
29 /// A comment with two leading SOLIDUS characters is not valid CSS syntax, but might be considered valid syntax in
30 /// other CSS-alike languages [for example SASS][1].
31 ///
32 /// [1]: https://sass-lang.com/documentation/syntax/comments/
33 ///
34 /// With this feature turned off comments are tokenized per the CSS specification:
35 ///
36 /// ```md
37 /// <comment>
38 /// ╭──────────────────────────────────────────╮
39 /// │├─ "/*" ─╯-╭─ (anything but "*" followed by "/") ─╮─╰─ "*/" ─┤│
40 /// ╰──────────────────────────────────────╯
41 /// ```
42 ///
43 /// With this feature turned on comments are tokenized with the additional grammar:
44 ///
45 /// ```md
46 /// <comment>
47 /// ╭──────────────────────────────────────────╮
48 /// │├──╮─ "/*" ─╯-╭─ (anything but "*" followed by "/") ─╮─╰─ "*/" ─╭─┤│
49 /// │ ╰──────────────────────────────────────╯ │
50 /// │ ╭───────────────────────────╮ │
51 /// ├─ "//" ───────╯-╭─ (anything but "\n") ─╮─╰─ "\n" ──────────╯
52 /// ╰─ ╰───────────────────────╯
53 /// ```
54 ///
55 /// # Example
56 ///
57 /// ```
58 /// use css_lexer::*;
59 /// let mut lexer = Lexer::new("// foo");
60 /// assert_eq!(lexer.advance(), Kind::Delim); // The first `/`
61 /// assert_eq!(lexer.advance(), Kind::Delim); // The second `/`
62 /// assert_eq!(lexer.advance(), Kind::Whitespace);
63 /// assert_eq!(lexer.advance(), Kind::Ident); // The "foo" in the comment
64 ///
65 /// lexer = Lexer::new_with_features("// foo", Feature::SingleLineComments);
66 /// let token = lexer.advance();
67 /// assert_eq!(token, Kind::Comment); // The whole comment "// foo"
68 /// assert_eq!(token, CommentStyle::Single);
69 /// ```
70 SingleLineComments = 0b0001,
71
72 /// The CSS Spec mentions that whitespace tokens should be [combined into a single Whitespace token][1]. This means
73 /// a single whitespace token can contain a cominbation of newlines, tabs, and space characters. This is often fine
74 /// as whitespace is generally ignored during parsing, however for certain IDE features it might be important to
75 /// tokenize discrete whitespace [Tokens][crate::Token], each with their own discrete whitespace. Enabling this flag
76 /// will enforce that the [Lexer][crate::Lexer] outputs these discrete tokens. In other words with this feature
77 /// enabled, multiple contiguous whitespace tokens may be returned from subsequent calls to
78 /// [Lexer::advance()][crate::Lexer::advance()], but with this feature off this will never be the case (as whitespace
79 /// is collapsed into a single [Token][crate::Token]).
80 ///
81 /// With this feature turned off whitespace-tokens are tokenized per the CSS specification:
82 ///
83 /// ```md
84 /// <newline>
85 /// │├──╮─ "\n" ───╭──┤│
86 /// ├─ "\r\n" ─┤
87 /// ├─ "\r" ───┤
88 /// ╰─ "\f" ───╯
89 ///
90 /// <whitespace>
91 /// │├──╮─ " " ───────╭──┤│
92 /// ├─ "\t" ──────┤
93 /// ╰─ <newline> ─╯
94 ///
95 /// <whitespace-token>
96 /// │├─╭─ <whitespace> ─╮─┤│
97 /// ╰────────────────╯
98 /// ```
99 ///
100 /// With this feature turned on whitespace-tokens are tokenized with the additional grammar:
101 ///
102 /// ```md
103 /// <whitespace-token>
104 /// │├──╮─╭─ " " ───────╮─╭──┤│
105 /// │ ╰─────────────╯ │
106 /// ├─╭─ "\t" ──────╮─┤
107 /// │ ╰─────────────╯ │
108 /// ╰─╭─ <newline> ─╮─╯
109 /// ╰─────────────╯
110 /// ```
111 ///
112 /// [1]: https://drafts.csswg.org/css-syntax-3/#whitespace-diagram
113 ///
114 /// # Example
115 ///
116 /// ```
117 /// use css_lexer::*;
118 /// let mut lexer = Lexer::new("\n\thello world");
119 /// {
120 /// // This token will be collapsed Whitespace.
121 /// let token = lexer.advance();
122 /// assert_eq!(token, Kind::Whitespace);
123 /// // The Whitespace is comprised of many bits:
124 /// assert_eq!(token, Whitespace::Newline | Whitespace::Tab);
125 /// }
126 /// assert_eq!(lexer.advance(), Kind::Ident);
127 /// {
128 /// let token = lexer.advance();
129 /// assert_eq!(token, Kind::Whitespace);
130 /// assert_eq!(token, Whitespace::Space);
131 /// }
132 /// assert_eq!(lexer.advance(), Kind::Ident);
133 ///
134 /// lexer = Lexer::new_with_features("\n\thello world", Feature::SeparateWhitespace);
135 /// {
136 /// // This token will be discrete Whitespace, just the `\n`.
137 /// let token = lexer.advance();
138 /// assert_eq!(token, Kind::Whitespace);
139 /// // The Whitespace is comprised of a single bit:
140 /// assert_eq!(token, Whitespace::Newline);
141 /// }
142 /// {
143 /// // This token will be discrete Whitespace, just the `\t`.
144 /// let token = lexer.advance();
145 /// assert_eq!(token, Kind::Whitespace);
146 /// // The Whitespace is comprised of a single bit:
147 /// assert_eq!(token, Whitespace::Tab);
148 /// }
149 /// assert_eq!(lexer.advance(), Kind::Ident);
150 /// {
151 /// let token = lexer.advance();
152 /// assert_eq!(token, Kind::Whitespace);
153 /// assert_eq!(token, Whitespace::Space);
154 /// }
155 /// assert_eq!(lexer.advance(), Kind::Ident);
156 /// ```
157 SeparateWhitespace = 0b0010,
158}
159
160#[test]
161fn size_test() {
162 assert_eq!(::std::mem::size_of::<Feature>(), 1);
163}