1use crate::{
2 Cursor, Diagnostic, Feature, Kind, KindSet, ParserCheckpoint, ParserReturn, Result, SourceOffset, ToCursors,
3 traits::{Parse, Peek},
4};
5use bitmask_enum::bitmask;
6use bumpalo::{Bump, collections::Vec};
7use css_lexer::{AtomSet, DynAtomSet, SourceCursor};
8use std::mem;
9
10const BUFFER_LEN: usize = 12;
15const BUFFER_REFILL_INDEX: usize = BUFFER_LEN - 5;
19
20#[derive(Debug)]
21pub struct Parser<'a, I: Iterator<Item = Cursor> + Clone> {
22 pub(crate) source_text: &'a str,
23
24 pub(crate) cursor_iter: I,
25
26 #[allow(dead_code)]
27 pub(crate) features: Feature,
28
29 pub(crate) errors: Vec<'a, Diagnostic>,
30
31 pub(crate) trivia: Vec<'a, (Vec<'a, Cursor>, Cursor)>,
32
33 pub(crate) state: State,
34
35 pub(crate) bump: &'a Bump,
36
37 skip: KindSet,
38
39 stop: KindSet,
40
41 buffer: [Cursor; BUFFER_LEN],
42 buffer_index: usize,
43
44 #[cfg(debug_assertions)]
45 pub(crate) last_cursor: Option<Cursor>,
46}
47
48#[bitmask(u8)]
49#[bitmask_config(vec_debug)]
50#[derive(Default)]
51pub enum State {
52 Nested = 0b0000_0001,
53 DisallowRelativeSelector = 0b0000_0010,
55}
56
57#[inline]
58fn eof_cursor(len: usize) -> Cursor {
59 let eof_offset = css_lexer::SourceOffset(len as u32);
60 Cursor::new(eof_offset, css_lexer::Token::EOF)
61}
62
63impl<'a, I> Parser<'a, I>
64where
65 I: Iterator<Item = Cursor> + Clone,
66{
67 pub fn new(bump: &'a Bump, source_text: &'a str, mut cursor_iter: I) -> Self {
69 let eof_cursor = eof_cursor(source_text.len());
70 let mut buffer = [eof_cursor; BUFFER_LEN];
71 buffer.fill_with(|| cursor_iter.next().unwrap_or(eof_cursor));
72
73 Self {
74 source_text,
75 cursor_iter,
76 features: Feature::none(),
77 errors: Vec::new_in(bump),
78 trivia: Vec::new_in(bump),
79 state: State::none(),
80 skip: KindSet::TRIVIA,
81 stop: KindSet::NONE,
82 buffer,
83 buffer_index: 0,
84 bump,
85 #[cfg(debug_assertions)]
86 last_cursor: None,
87 }
88 }
89
90 pub fn with_features(mut self, features: Feature) -> Self {
91 self.features = features;
92 self
93 }
94
95 fn fill_buffer(&mut self, from: usize) {
96 self.buffer.copy_within(from..BUFFER_LEN, 0);
98 let eof = eof_cursor(self.source_text.len());
100 for i in BUFFER_LEN - from..BUFFER_LEN {
101 self.buffer[i] = self.cursor_iter.next().unwrap_or(eof);
102 }
103 self.buffer_index = 0;
104 }
105
106 #[inline]
107 pub fn bump(&self) -> &'a Bump {
108 self.bump
109 }
110
111 #[inline]
112 pub fn enabled(&self, other: Feature) -> bool {
113 self.features.contains(other)
114 }
115
116 #[inline]
117 pub fn is(&self, state: State) -> bool {
118 self.state.contains(state)
119 }
120
121 #[inline]
122 pub fn set_state(&mut self, state: State) -> State {
123 let old = self.state;
124 self.state = state;
125 old
126 }
127
128 #[inline]
129 pub fn set_skip(&mut self, skip: KindSet) -> KindSet {
130 let old = self.skip;
131 self.skip = skip;
132 old
133 }
134
135 #[inline]
136 pub fn set_stop(&mut self, stop: KindSet) -> KindSet {
137 let old = self.stop;
138 self.stop = stop;
139 old
140 }
141
142 pub fn parse_entirely<T: Parse<'a> + ToCursors>(&mut self) -> ParserReturn<'a, T> {
143 let output = match T::parse(self) {
144 Ok(output) => Some(output),
145 Err(error) => {
146 self.errors.push(error);
147 None
148 }
149 };
150 let remaining_non_trivia = !self.at_end() && self.peek_n(1) != Kind::Eof;
151 let at_end = self.peek_n_with_skip(1, KindSet::NONE) == Kind::Eof;
152
153 if !at_end {
154 let start = self.peek_n_with_skip(1, KindSet::NONE);
155 let mut end;
156 loop {
157 end = self.next();
158 if end == Kind::Eof {
159 break;
160 }
161 }
162 if remaining_non_trivia {
163 self.errors.push(Diagnostic::new(start, Diagnostic::expected_end).with_end_cursor(end));
164 }
165 }
166 let errors = mem::replace(&mut self.errors, Vec::new_in(self.bump));
167 let trivia = mem::replace(&mut self.trivia, Vec::new_in(self.bump));
168 ParserReturn::new(output, self.source_text, errors, trivia)
169 }
170
171 pub fn parse<T: Parse<'a>>(&mut self) -> Result<T> {
172 T::parse(self)
173 }
174
175 pub fn peek<T: Peek<'a>>(&self) -> bool {
176 T::peek(self, self.peek_n(1))
177 }
178
179 pub fn parse_if_peek<T: Peek<'a> + Parse<'a>>(&mut self) -> Result<Option<T>> {
180 if T::peek(self, self.peek_n(1)) { T::parse(self).map(Some) } else { Ok(None) }
181 }
182
183 pub fn try_parse<T: Parse<'a>>(&mut self) -> Result<T> {
184 T::try_parse(self)
185 }
186
187 pub fn try_parse_if_peek<T: Peek<'a> + Parse<'a>>(&mut self) -> Result<Option<T>> {
188 if T::peek(self, self.peek_n(1)) { T::try_parse(self).map(Some) } else { Ok(None) }
189 }
190
191 pub fn equals_atom(&self, c: Cursor, atom: &'static dyn DynAtomSet) -> bool {
192 let mut cursor_bits = c.atom_bits();
193 if cursor_bits == 0 {
194 if c != KindSet::ATOM_LIKE {
195 return false;
196 }
197 let source_cursor = self.to_source_cursor(c);
198 cursor_bits = atom.str_to_bits(&source_cursor.parse(self.bump));
199 }
200 cursor_bits == atom.bits()
201 }
202
203 pub fn to_atom<A: AtomSet + PartialEq>(&self, c: Cursor) -> A {
204 let bits = c.atom_bits();
205 if bits == 0 {
206 if c != KindSet::ATOM_LIKE {
207 return A::from_bits(0);
208 }
209 let source_cursor = self.to_source_cursor(c);
210 return A::from_str(&source_cursor.parse(self.bump));
211 }
212 #[cfg(debug_assertions)]
213 if c == KindSet::ATOM_LIKE && !((c == Kind::Ident || c == Kind::Function) && c.token().is_dashed_ident()) {
214 let source_cursor = self.to_source_cursor(c);
215 debug_assert!(
216 A::from_bits(bits) == A::from_str(&source_cursor.parse(self.bump)),
217 "{:?} -> {:?} != {:?} ({:?})",
218 c,
219 A::from_bits(bits),
220 A::from_str(&source_cursor.parse(self.bump)),
221 source_cursor.parse(self.bump)
222 );
223 }
224 A::from_bits(bits)
225 }
226
227 #[inline(always)]
228 pub fn offset(&self) -> SourceOffset {
229 self.buffer[self.buffer_index].offset()
230 }
231
232 #[inline(always)]
233 pub fn at_end(&self) -> bool {
234 self.buffer[self.buffer_index] == Kind::Eof
235 }
236
237 pub fn rewind(&mut self, checkpoint: ParserCheckpoint<I>) {
238 let ParserCheckpoint { iter, errors_pos, trivia_pos, buffer, buffer_index, skip, stop, state, .. } = checkpoint;
239
240 self.cursor_iter = iter;
241
242 self.errors.truncate(errors_pos as usize);
243 self.trivia.truncate(trivia_pos as usize);
244
245 self.buffer = buffer;
246 self.buffer_index = buffer_index;
247
248 self.skip = skip;
249 self.stop = stop;
250 self.state = state;
251
252 #[cfg(debug_assertions)]
253 {
254 self.last_cursor = None;
255 }
256 }
257
258 #[inline]
259 pub fn checkpoint(&self) -> ParserCheckpoint<I> {
260 ParserCheckpoint {
261 cursor: self.buffer[self.buffer_index],
262 errors_pos: self.errors.len() as u8,
263 trivia_pos: self.trivia.len() as u16,
264 iter: self.cursor_iter.clone(),
265 buffer: self.buffer,
266 buffer_index: self.buffer_index,
267 skip: self.skip,
268 stop: self.stop,
269 state: self.state,
270 }
271 }
272
273 #[inline]
274 pub fn next_is_stop(&self) -> bool {
275 for c in &self.buffer[self.buffer_index..BUFFER_LEN] {
276 if c != self.skip {
277 return c == self.stop;
278 }
279 }
280
281 let mut iter = self.cursor_iter.clone();
282 loop {
283 let Some(cursor) = iter.next() else {
284 return false;
285 };
286 if cursor != self.skip {
287 return cursor == self.stop;
288 }
289 }
290 }
291
292 #[inline]
293 pub(crate) fn peek_n_with_skip(&self, n: u8, skip: KindSet) -> Cursor {
294 let mut remaining = n;
295
296 for c in &self.buffer[self.buffer_index..BUFFER_LEN] {
297 if c == Kind::Eof {
298 return *c;
299 }
300 if c != skip {
301 remaining -= 1;
302 if remaining == 0 {
303 return *c;
304 }
305 }
306 }
307
308 let mut iter = self.cursor_iter.clone();
309 loop {
310 let Some(cursor) = iter.next() else {
311 return eof_cursor(self.source_text.len());
312 };
313 if cursor == Kind::Eof {
314 return cursor;
315 }
316 if cursor != skip {
317 remaining -= 1;
318 if remaining == 0 {
319 return cursor;
320 }
321 }
322 }
323 }
324
325 #[inline]
326 pub fn peek_n(&self, n: u8) -> Cursor {
327 self.peek_n_with_skip(n, self.skip)
328 }
329
330 #[inline]
331 pub fn peek_n_including_whitespace(&self, n: u8) -> Cursor {
332 self.peek_n_with_skip(n, self.skip.remove(Kind::Whitespace))
333 }
334
335 pub fn to_source_cursor(&self, cursor: Cursor) -> SourceCursor<'a> {
336 SourceCursor::from(cursor, cursor.str_slice(self.source_text))
337 }
338
339 pub fn consume_trivia(&mut self) -> Vec<'a, Cursor> {
340 let mut trivia = Vec::new_in(self.bump);
341 for i in self.buffer_index..BUFFER_LEN {
342 let c = self.buffer[i];
343 if c == Kind::Eof {
344 self.buffer_index = i;
345 return trivia;
346 } else if c == self.skip {
347 trivia.push(c)
348 } else {
349 self.buffer_index = i;
350 self.fill_buffer(i);
351 return trivia;
352 }
353 }
354
355 loop {
356 let Some(c) = self.cursor_iter.next() else {
357 return trivia;
358 };
359 if c == Kind::Eof {
360 return trivia;
361 } else if c == self.skip {
362 trivia.push(c)
363 } else {
364 let eof = eof_cursor(self.source_text.len());
365 self.buffer[0] = c;
366 for i in 1..BUFFER_LEN {
367 self.buffer[i] = self.cursor_iter.next().unwrap_or(eof);
368 }
369 self.buffer_index = 0;
370 return trivia;
371 }
372 }
373 }
374
375 pub fn consume_trivia_as_leading(&mut self) {
379 let trivia = self.consume_trivia();
380 if !trivia.is_empty() {
381 let next = self.peek_n(1);
383 self.trivia.push((trivia, next));
384 }
385 }
386
387 #[allow(clippy::should_implement_trait)]
388 pub fn next(&mut self) -> Cursor {
389 let mut pending_trivia = Vec::new_in(self.bump);
391
392 loop {
393 if self.buffer_index >= BUFFER_REFILL_INDEX {
394 self.fill_buffer(self.buffer_index);
395 }
396
397 for i in self.buffer_index..BUFFER_LEN {
398 let c = self.buffer[i];
399 if c == Kind::Eof {
400 self.buffer_index = i;
401 if !pending_trivia.is_empty() {
403 self.trivia.push((pending_trivia.clone(), c));
404 }
405 #[cfg(debug_assertions)]
406 {
407 self.last_cursor = None;
408 }
409 return c;
410 } else if c == self.skip {
411 pending_trivia.push(c);
412 } else {
413 self.buffer_index = i + 1;
414 if self.buffer_index >= BUFFER_REFILL_INDEX {
415 self.fill_buffer(self.buffer_index);
416 }
417 if !pending_trivia.is_empty() {
419 self.trivia.push((pending_trivia.clone(), c));
420 }
421 #[cfg(debug_assertions)]
422 {
423 if let Some(last_cursor) = self.last_cursor {
424 debug_assert!(last_cursor != c, "Detected a next loop, {c:?} was fetched twice");
425 }
426 self.last_cursor = Some(c);
427 }
428 return c;
429 }
430 }
431
432 self.fill_buffer(BUFFER_LEN);
434 }
435 }
436}
437
438#[test]
439fn test_filling_buffer_with_skip_tokens() {
440 let str = "/*x*//*x*//*x*//*x*//*x*//*x*//*x*//*x*//*x*//*x*//*x*/a";
441 let bump = bumpalo::Bump::default();
442 let lexer = css_lexer::Lexer::new(&css_lexer::EmptyAtomSet::ATOMS, str);
443 let mut p = Parser::new(&bump, str, lexer);
444 let c = p.next();
445 assert_eq!(c.token(), Kind::Ident);
446 let _ = p.at_end();
448 let _ = p.offset();
449}
450
451#[test]
452fn peek_and_next() {
453 let str = "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21";
454 let bump = bumpalo::Bump::default();
455 let lexer = css_lexer::Lexer::new(&css_lexer::EmptyAtomSet::ATOMS, &str);
456 let mut p = Parser::new(&bump, &str, lexer);
457 assert_eq!(p.at_end(), false);
458 assert_eq!(p.offset(), 0);
459 for n in 0..=1 {
460 let c = p.checkpoint();
461 for i in 0..=19 {
462 let c = p.peek_n(1);
463 assert_eq!(c.token(), Kind::Number);
464 assert_eq!(c.token().value(), i as f32);
465 let c = p.peek_n(2);
466 assert_eq!(c.token(), Kind::Number);
467 assert_eq!(c.token().value(), (i + 1) as f32);
468 let c = p.peek_n(3);
469 assert_eq!(c.token(), Kind::Number);
470 assert_eq!(c.token().value(), (i + 2) as f32);
471 let c = p.next();
472 assert_eq!(c.token().value(), i as f32);
473 let c = p.peek_n(1);
474 assert_eq!(c.token(), Kind::Number);
475 assert_eq!(c.token().value(), (i + 1) as f32);
476 }
477 if n == 0 {
478 p.rewind(c)
479 }
480 }
481 let c = p.next();
482 assert_eq!(c.token(), Kind::Number);
483 assert_eq!(c.token().value(), 20.0);
484 let c = p.next();
485 assert_eq!(c.token(), Kind::Number);
486 assert_eq!(c.token().value(), 21.0);
487 let c = p.next();
488 assert_eq!(c.token(), Kind::Eof);
489}
490
491#[test]
492fn peek_and_next_with_whitsespace() {
493 let str = "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21";
494 let bump = bumpalo::Bump::default();
495 let lexer = css_lexer::Lexer::new(&css_lexer::EmptyAtomSet::ATOMS, &str);
496 let mut p = Parser::new(&bump, &str, lexer);
497 p.set_skip(KindSet::COMMENTS);
498 assert_eq!(p.at_end(), false);
499 assert_eq!(p.offset(), 0);
500 for n in 0..=1 {
501 let c = p.checkpoint();
502 for i in 0..=19 {
503 let c = p.peek_n(1);
504 assert_eq!(c.token(), Kind::Number);
505 assert_eq!(c.token().value(), i as f32);
506 let c = p.peek_n(2);
507 assert_eq!(c.token(), Kind::Whitespace);
508 let c = p.peek_n(3);
509 assert_eq!(c.token(), Kind::Number);
510 assert_eq!(c.token().value(), (i + 1) as f32);
511 let c = p.peek_n(4);
512 assert_eq!(c.token(), Kind::Whitespace);
513 let c = p.peek_n(5);
514 assert_eq!(c.token(), Kind::Number);
515 assert_eq!(c.token().value(), (i + 2) as f32);
516 let c = p.next();
517 assert_eq!(c.token().value(), i as f32);
518 let c = p.peek_n(1);
519 assert_eq!(c.token(), Kind::Whitespace);
520 let c = p.peek_n(2);
521 assert_eq!(c.token(), Kind::Number);
522 assert_eq!(c.token().value(), (i + 1) as f32);
523 p.next();
524 }
525 if n == 0 {
526 p.rewind(c);
527 }
528 }
529 let c = p.next();
530 assert_eq!(c.token(), Kind::Number);
531 assert_eq!(c.token().value(), 20.0);
532 let c = p.next();
533 assert_eq!(c.token(), Kind::Whitespace);
534 let c = p.next();
535 assert_eq!(c.token(), Kind::Number);
536 assert_eq!(c.token().value(), 21.0);
537 let c = p.next();
538 assert_eq!(c.token(), Kind::Eof);
539}