1use crate::{
2 Cursor, Diagnostic, Feature, Kind, KindSet, ParserCheckpoint, ParserReturn, Result, SourceOffset, ToCursors,
3 traits::{Parse, Peek},
4};
5use bitmask_enum::bitmask;
6use bumpalo::{Bump, collections::Vec};
7use css_lexer::{AtomSet, DynAtomSet, SourceCursor};
8use std::mem;
9
10const BUFFER_LEN: usize = 12;
15const BUFFER_REFILL_INDEX: usize = BUFFER_LEN - 5;
19
20#[derive(Debug)]
21pub struct Parser<'a, I: Iterator<Item = Cursor> + Clone> {
22 pub(crate) source_text: &'a str,
23
24 pub(crate) cursor_iter: I,
25
26 #[allow(dead_code)]
27 pub(crate) features: Feature,
28
29 pub(crate) errors: Vec<'a, Diagnostic>,
30
31 pub(crate) trivia: Vec<'a, (Vec<'a, Cursor>, Cursor)>,
32
33 pub(crate) state: State,
34
35 pub(crate) bump: &'a Bump,
36
37 skip: KindSet,
38
39 stop: KindSet,
40
41 buffer: [Cursor; BUFFER_LEN],
42 buffer_index: usize,
43
44 #[cfg(debug_assertions)]
45 pub(crate) last_cursor: Option<Cursor>,
46}
47
48#[bitmask(u8)]
49#[bitmask_config(vec_debug)]
50#[derive(Default)]
51pub enum State {
52 Nested = 0b0000_0001,
53 DisallowRelativeSelector = 0b0000_0010,
55}
56
57#[inline]
58fn eof_cursor(len: usize) -> Cursor {
59 let eof_offset = css_lexer::SourceOffset(len as u32);
60 Cursor::new(eof_offset, css_lexer::Token::EOF)
61}
62
63impl<'a, I> Parser<'a, I>
64where
65 I: Iterator<Item = Cursor> + Clone,
66{
67 pub fn new(bump: &'a Bump, source_text: &'a str, mut cursor_iter: I) -> Self {
69 let eof_cursor = eof_cursor(source_text.len());
70 let mut buffer = [eof_cursor; BUFFER_LEN];
71 buffer.fill_with(|| cursor_iter.next().unwrap_or(eof_cursor));
72
73 Self {
74 source_text,
75 cursor_iter,
76 features: Feature::none(),
77 errors: Vec::new_in(bump),
78 trivia: Vec::new_in(bump),
79 state: State::none(),
80 skip: KindSet::TRIVIA,
81 stop: KindSet::NONE,
82 buffer,
83 buffer_index: 0,
84 bump,
85 #[cfg(debug_assertions)]
86 last_cursor: None,
87 }
88 }
89
90 pub fn with_features(mut self, features: Feature) -> Self {
91 self.features = features;
92 self
93 }
94
95 fn fill_buffer(&mut self, from: usize) {
96 self.buffer.copy_within(from..BUFFER_LEN, 0);
98 let eof = eof_cursor(self.source_text.len());
100 for i in BUFFER_LEN - from..BUFFER_LEN {
101 self.buffer[i] = self.cursor_iter.next().unwrap_or(eof);
102 }
103 self.buffer_index = 0;
104 }
105
106 #[inline]
107 pub fn bump(&self) -> &'a Bump {
108 self.bump
109 }
110
111 #[inline]
112 pub fn enabled(&self, other: Feature) -> bool {
113 self.features.contains(other)
114 }
115
116 #[inline]
117 pub fn is(&self, state: State) -> bool {
118 self.state.contains(state)
119 }
120
121 #[inline]
122 pub fn set_state(&mut self, state: State) -> State {
123 let old = self.state;
124 self.state = state;
125 old
126 }
127
128 #[inline]
129 pub fn set_skip(&mut self, skip: KindSet) -> KindSet {
130 let old = self.skip;
131 self.skip = skip;
132 old
133 }
134
135 #[inline]
136 pub fn set_stop(&mut self, stop: KindSet) -> KindSet {
137 let old = self.stop;
138 self.stop = stop;
139 old
140 }
141
142 pub fn parse_entirely<T: Parse<'a> + ToCursors>(&mut self) -> ParserReturn<'a, T> {
143 let output = match T::parse(self) {
144 Ok(output) => Some(output),
145 Err(error) => {
146 self.errors.push(error);
147 None
148 }
149 };
150 let remaining_non_trivia = !self.at_end() && self.peek_n(1) != Kind::Eof;
151 let at_end = self.peek_n_with_skip(1, KindSet::NONE) == Kind::Eof;
152
153 if !at_end {
154 let start = self.peek_n_with_skip(1, KindSet::NONE);
155 let mut end;
156 loop {
157 end = self.next();
158 if end == Kind::Eof {
159 break;
160 }
161 }
162 if remaining_non_trivia {
163 self.errors.push(Diagnostic::new(start, Diagnostic::expected_end).with_end_cursor(end));
164 }
165 }
166 let errors = mem::replace(&mut self.errors, Vec::new_in(self.bump));
167 let trivia = mem::replace(&mut self.trivia, Vec::new_in(self.bump));
168 ParserReturn::new(output, self.source_text, errors, trivia)
169 }
170
171 pub fn parse<T: Parse<'a>>(&mut self) -> Result<T> {
172 T::parse(self)
173 }
174
175 pub fn peek<T: Peek<'a>>(&self) -> bool {
176 T::peek(self, self.peek_n(1))
177 }
178
179 pub fn parse_if_peek<T: Peek<'a> + Parse<'a>>(&mut self) -> Result<Option<T>> {
180 if T::peek(self, self.peek_n(1)) { T::parse(self).map(Some) } else { Ok(None) }
181 }
182
183 pub fn try_parse<T: Parse<'a>>(&mut self) -> Result<T> {
184 T::try_parse(self)
185 }
186
187 pub fn try_parse_if_peek<T: Peek<'a> + Parse<'a>>(&mut self) -> Result<Option<T>> {
188 if T::peek(self, self.peek_n(1)) { T::try_parse(self).map(Some) } else { Ok(None) }
189 }
190
191 pub fn equals_atom(&self, c: Cursor, atom: &'static dyn DynAtomSet) -> bool {
192 let mut cursor_bits = c.atom_bits();
193 if cursor_bits == 0 {
194 let source_cursor = self.to_source_cursor(c);
195 cursor_bits = atom.str_to_bits(&source_cursor.parse(self.bump));
196 }
197 cursor_bits == atom.bits()
198 }
199
200 pub fn to_atom<A: AtomSet + PartialEq>(&self, c: Cursor) -> A {
201 let bits = c.atom_bits();
202 if bits == 0 {
203 let source_cursor = self.to_source_cursor(c);
204 return A::from_str(&source_cursor.parse(self.bump));
205 }
206 #[cfg(debug_assertions)]
207 {
208 let source_cursor = self.to_source_cursor(c);
209 if !(c == Kind::Ident && c.token().is_dashed_ident()) {
210 debug_assert!(
211 A::from_bits(bits) == A::from_str(&source_cursor.parse(self.bump)),
212 "{:?} -> {:?} != {:?} ({:?})",
213 c,
214 A::from_bits(bits),
215 A::from_str(&source_cursor.parse(self.bump)),
216 source_cursor.parse(self.bump)
217 );
218 }
219 }
220 A::from_bits(bits)
221 }
222
223 #[inline(always)]
224 pub fn offset(&self) -> SourceOffset {
225 self.buffer[self.buffer_index].offset()
226 }
227
228 #[inline(always)]
229 pub fn at_end(&self) -> bool {
230 self.buffer[self.buffer_index] == Kind::Eof
231 }
232
233 pub fn rewind(&mut self, checkpoint: ParserCheckpoint<I>) {
234 let ParserCheckpoint { iter, errors_pos, trivia_pos, buffer, buffer_index, skip, stop, state, .. } = checkpoint;
235
236 self.cursor_iter = iter;
237
238 self.errors.truncate(errors_pos as usize);
239 self.trivia.truncate(trivia_pos as usize);
240
241 self.buffer = buffer;
242 self.buffer_index = buffer_index;
243
244 self.skip = skip;
245 self.stop = stop;
246 self.state = state;
247
248 #[cfg(debug_assertions)]
249 {
250 self.last_cursor = None;
251 }
252 }
253
254 #[inline]
255 pub fn checkpoint(&self) -> ParserCheckpoint<I> {
256 ParserCheckpoint {
257 cursor: self.buffer[self.buffer_index],
258 errors_pos: self.errors.len() as u8,
259 trivia_pos: self.trivia.len() as u16,
260 iter: self.cursor_iter.clone(),
261 buffer: self.buffer,
262 buffer_index: self.buffer_index,
263 skip: self.skip,
264 stop: self.stop,
265 state: self.state,
266 }
267 }
268
269 #[inline]
270 pub fn next_is_stop(&self) -> bool {
271 for c in &self.buffer[self.buffer_index..BUFFER_LEN] {
272 if c != self.skip {
273 return c == self.stop;
274 }
275 }
276
277 let mut iter = self.cursor_iter.clone();
278 loop {
279 let Some(cursor) = iter.next() else {
280 return false;
281 };
282 if cursor != self.skip {
283 return cursor == self.stop;
284 }
285 }
286 }
287
288 #[inline]
289 pub(crate) fn peek_n_with_skip(&self, n: u8, skip: KindSet) -> Cursor {
290 let mut remaining = n;
291
292 for c in &self.buffer[self.buffer_index..BUFFER_LEN] {
293 if c == Kind::Eof {
294 return *c;
295 }
296 if c != skip {
297 remaining -= 1;
298 if remaining == 0 {
299 return *c;
300 }
301 }
302 }
303
304 let mut iter = self.cursor_iter.clone();
305 loop {
306 let Some(cursor) = iter.next() else {
307 return eof_cursor(self.source_text.len());
308 };
309 if cursor == Kind::Eof {
310 return cursor;
311 }
312 if cursor != skip {
313 remaining -= 1;
314 if remaining == 0 {
315 return cursor;
316 }
317 }
318 }
319 }
320
321 #[inline]
322 pub fn peek_n(&self, n: u8) -> Cursor {
323 self.peek_n_with_skip(n, self.skip)
324 }
325
326 pub fn to_source_cursor(&self, cursor: Cursor) -> SourceCursor<'a> {
327 SourceCursor::from(cursor, cursor.str_slice(self.source_text))
328 }
329
330 pub fn consume_trivia(&mut self) -> Vec<'a, Cursor> {
331 let mut trivia = Vec::new_in(self.bump);
332 for i in self.buffer_index..BUFFER_LEN {
333 let c = self.buffer[i];
334 if c == Kind::Eof {
335 return trivia;
336 } else if c == self.skip {
337 trivia.push(c)
338 } else {
339 self.fill_buffer(i);
340 return trivia;
341 }
342 }
343
344 loop {
345 let Some(c) = self.cursor_iter.next() else {
346 return trivia;
347 };
348 if c == Kind::Eof {
349 return trivia;
350 } else if c == self.skip {
351 trivia.push(c)
352 } else {
353 let eof = eof_cursor(self.source_text.len());
354 self.buffer[0] = c;
355 for i in 1..BUFFER_LEN {
356 self.buffer[i] = self.cursor_iter.next().unwrap_or(eof);
357 }
358 self.buffer_index = 0;
359 return trivia;
360 }
361 }
362 }
363
364 pub fn consume_trivia_as_leading(&mut self) {
368 let trivia = self.consume_trivia();
369 if !trivia.is_empty() {
370 let next = self.peek_n(1);
372 self.trivia.push((trivia, next));
373 }
374 }
375
376 #[allow(clippy::should_implement_trait)]
377 pub fn next(&mut self) -> Cursor {
378 let mut pending_trivia = Vec::new_in(self.bump);
380
381 if self.buffer_index >= BUFFER_REFILL_INDEX {
382 self.fill_buffer(self.buffer_index);
383 }
384
385 for i in self.buffer_index..BUFFER_LEN {
386 let c = self.buffer[i];
387 if c == Kind::Eof {
388 self.buffer_index = i + 1;
389 if !pending_trivia.is_empty() {
391 self.trivia.push((pending_trivia.clone(), c));
392 }
393 return c;
394 } else if c == self.skip {
395 pending_trivia.push(c);
396 self.buffer_index = i + 1;
397 } else {
398 self.buffer_index = i + 1;
399 if !pending_trivia.is_empty() {
401 self.trivia.push((pending_trivia.clone(), c));
402 }
403 return c;
404 }
405 }
406
407 let c;
408 loop {
409 let Some(cursor) = self.cursor_iter.next() else {
410 let eof_cursor = eof_cursor(self.source_text.len());
411 if !pending_trivia.is_empty() {
412 self.trivia.push((pending_trivia.clone(), eof_cursor));
413 }
414 return eof_cursor;
415 };
416 if cursor == Kind::Eof || cursor != self.skip {
417 c = cursor;
418 break;
419 }
420 pending_trivia.push(cursor);
421 }
422
423 if !pending_trivia.is_empty() {
425 self.trivia.push((pending_trivia.clone(), c));
426 }
427
428 #[cfg(debug_assertions)]
429 if let Some(last_cursor) = self.last_cursor {
430 debug_assert!(last_cursor != c, "Detected a next loop, {c:?} was fetched twice");
431 }
432 #[cfg(debug_assertions)]
433 if c == Kind::Eof {
434 self.last_cursor = None;
435 } else {
436 self.last_cursor = Some(c);
437 }
438
439 c
440 }
441}
442
443#[test]
444fn peek_and_next() {
445 let str = "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21";
446 let bump = bumpalo::Bump::default();
447 let lexer = css_lexer::Lexer::new(&css_lexer::EmptyAtomSet::ATOMS, &str);
448 let mut p = Parser::new(&bump, &str, lexer);
449 assert_eq!(p.at_end(), false);
450 assert_eq!(p.offset(), 0);
451 for n in 0..=1 {
452 let c = p.checkpoint();
453 for i in 0..=19 {
454 let c = p.peek_n(1);
455 assert_eq!(c.token(), Kind::Number);
456 assert_eq!(c.token().value(), i as f32);
457 let c = p.peek_n(2);
458 assert_eq!(c.token(), Kind::Number);
459 assert_eq!(c.token().value(), (i + 1) as f32);
460 let c = p.peek_n(3);
461 assert_eq!(c.token(), Kind::Number);
462 assert_eq!(c.token().value(), (i + 2) as f32);
463 let c = p.next();
464 assert_eq!(c.token().value(), i as f32);
465 let c = p.peek_n(1);
466 assert_eq!(c.token(), Kind::Number);
467 assert_eq!(c.token().value(), (i + 1) as f32);
468 }
469 if n == 0 {
470 p.rewind(c)
471 }
472 }
473 let c = p.next();
474 assert_eq!(c.token(), Kind::Number);
475 assert_eq!(c.token().value(), 20.0);
476 let c = p.next();
477 assert_eq!(c.token(), Kind::Number);
478 assert_eq!(c.token().value(), 21.0);
479 let c = p.next();
480 assert_eq!(c.token(), Kind::Eof);
481}
482
483#[test]
484fn peek_and_next_with_whitsespace() {
485 let str = "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21";
486 let bump = bumpalo::Bump::default();
487 let lexer = css_lexer::Lexer::new(&css_lexer::EmptyAtomSet::ATOMS, &str);
488 let mut p = Parser::new(&bump, &str, lexer);
489 p.set_skip(KindSet::COMMENTS);
490 assert_eq!(p.at_end(), false);
491 assert_eq!(p.offset(), 0);
492 for n in 0..=1 {
493 let c = p.checkpoint();
494 for i in 0..=19 {
495 let c = p.peek_n(1);
496 assert_eq!(c.token(), Kind::Number);
497 assert_eq!(c.token().value(), i as f32);
498 let c = p.peek_n(2);
499 assert_eq!(c.token(), Kind::Whitespace);
500 let c = p.peek_n(3);
501 assert_eq!(c.token(), Kind::Number);
502 assert_eq!(c.token().value(), (i + 1) as f32);
503 let c = p.peek_n(4);
504 assert_eq!(c.token(), Kind::Whitespace);
505 let c = p.peek_n(5);
506 assert_eq!(c.token(), Kind::Number);
507 assert_eq!(c.token().value(), (i + 2) as f32);
508 let c = p.next();
509 assert_eq!(c.token().value(), i as f32);
510 let c = p.peek_n(1);
511 assert_eq!(c.token(), Kind::Whitespace);
512 let c = p.peek_n(2);
513 assert_eq!(c.token(), Kind::Number);
514 assert_eq!(c.token().value(), (i + 1) as f32);
515 p.next();
516 }
517 if n == 0 {
518 p.rewind(c);
519 }
520 }
521 let c = p.next();
522 assert_eq!(c.token(), Kind::Number);
523 assert_eq!(c.token().value(), 20.0);
524 let c = p.next();
525 assert_eq!(c.token(), Kind::Whitespace);
526 let c = p.next();
527 assert_eq!(c.token(), Kind::Number);
528 assert_eq!(c.token().value(), 21.0);
529 let c = p.next();
530 assert_eq!(c.token(), Kind::Eof);
531}