1use crate::{
2 Cursor, Diagnostic, Feature, Kind, KindSet, ParserCheckpoint, ParserReturn, Result, SourceOffset, ToCursors,
3 traits::{Parse, Peek},
4};
5use bitmask_enum::bitmask;
6use bumpalo::{Bump, collections::Vec};
7use css_lexer::{AtomSet, DynAtomSet, SourceCursor};
8use std::mem;
9
10const BUFFER_LEN: usize = 12;
15const BUFFER_REFILL_INDEX: usize = BUFFER_LEN - 5;
19
20#[derive(Debug)]
21pub struct Parser<'a, I: Iterator<Item = Cursor> + Clone> {
22 pub(crate) source_text: &'a str,
23
24 pub(crate) cursor_iter: I,
25
26 #[allow(dead_code)]
27 pub(crate) features: Feature,
28
29 pub(crate) errors: Vec<'a, Diagnostic>,
30
31 pub(crate) trivia: Vec<'a, (Vec<'a, Cursor>, Cursor)>,
32
33 pub(crate) state: State,
34
35 pub(crate) bump: &'a Bump,
36
37 skip: KindSet,
38
39 stop: KindSet,
40
41 buffer: [Cursor; BUFFER_LEN],
42 buffer_index: usize,
43
44 #[cfg(debug_assertions)]
45 pub(crate) last_cursor: Option<Cursor>,
46}
47
48#[bitmask(u8)]
49#[bitmask_config(vec_debug)]
50#[derive(Default)]
51pub enum State {
52 Nested = 0b0000_0001,
53}
54
55#[inline]
56fn eof_cursor(len: usize) -> Cursor {
57 let eof_offset = css_lexer::SourceOffset(len as u32);
58 Cursor::new(eof_offset, css_lexer::Token::EOF)
59}
60
61impl<'a, I> Parser<'a, I>
62where
63 I: Iterator<Item = Cursor> + Clone,
64{
65 pub fn new(bump: &'a Bump, source_text: &'a str, mut cursor_iter: I) -> Self {
67 let eof_cursor = eof_cursor(source_text.len());
68 let mut buffer = [eof_cursor; BUFFER_LEN];
69 buffer.fill_with(|| cursor_iter.next().unwrap_or(eof_cursor));
70
71 Self {
72 source_text,
73 cursor_iter,
74 features: Feature::none(),
75 errors: Vec::new_in(bump),
76 trivia: Vec::new_in(bump),
77 state: State::none(),
78 skip: KindSet::TRIVIA,
79 stop: KindSet::NONE,
80 buffer,
81 buffer_index: 0,
82 bump,
83 #[cfg(debug_assertions)]
84 last_cursor: None,
85 }
86 }
87
88 pub fn with_features(mut self, features: Feature) -> Self {
89 self.features = features;
90 self
91 }
92
93 fn fill_buffer(&mut self, from: usize) {
94 self.buffer.copy_within(from..BUFFER_LEN, 0);
96 let eof = eof_cursor(self.source_text.len());
98 for i in BUFFER_LEN - from..BUFFER_LEN {
99 self.buffer[i] = self.cursor_iter.next().unwrap_or(eof);
100 }
101 self.buffer_index = 0;
102 }
103
104 #[inline]
105 pub fn bump(&self) -> &'a Bump {
106 self.bump
107 }
108
109 #[inline]
110 pub fn enabled(&self, other: Feature) -> bool {
111 self.features.contains(other)
112 }
113
114 #[inline]
115 pub fn is(&self, state: State) -> bool {
116 self.state.contains(state)
117 }
118
119 #[inline]
120 pub fn set_state(&mut self, state: State) -> State {
121 let old = self.state;
122 self.state = state;
123 old
124 }
125
126 #[inline]
127 pub fn set_skip(&mut self, skip: KindSet) -> KindSet {
128 let old = self.skip;
129 self.skip = skip;
130 old
131 }
132
133 #[inline]
134 pub fn set_stop(&mut self, stop: KindSet) -> KindSet {
135 let old = self.stop;
136 self.stop = stop;
137 old
138 }
139
140 pub fn parse_entirely<T: Parse<'a> + ToCursors>(&mut self) -> ParserReturn<'a, T> {
141 let output = match T::parse(self) {
142 Ok(output) => Some(output),
143 Err(error) => {
144 self.errors.push(error);
145 None
146 }
147 };
148 let remaining_non_trivia = !self.at_end() && self.peek_n(1) != Kind::Eof;
149 let at_end = self.peek_n_with_skip(1, KindSet::NONE) == Kind::Eof;
150
151 if !at_end {
152 let start = self.peek_n_with_skip(1, KindSet::NONE);
153 let mut end;
154 loop {
155 end = self.next();
156 if end == Kind::Eof {
157 break;
158 }
159 }
160 if remaining_non_trivia {
161 self.errors.push(Diagnostic::new(start, Diagnostic::expected_end).with_end_cursor(end));
162 }
163 }
164 let errors = mem::replace(&mut self.errors, Vec::new_in(self.bump));
165 let trivia = mem::replace(&mut self.trivia, Vec::new_in(self.bump));
166 ParserReturn::new(output, self.source_text, errors, trivia)
167 }
168
169 pub fn parse<T: Parse<'a>>(&mut self) -> Result<T> {
170 T::parse(self)
171 }
172
173 pub fn peek<T: Peek<'a>>(&self) -> bool {
174 T::peek(self, self.peek_n(1))
175 }
176
177 pub fn parse_if_peek<T: Peek<'a> + Parse<'a>>(&mut self) -> Result<Option<T>> {
178 if T::peek(self, self.peek_n(1)) { T::parse(self).map(Some) } else { Ok(None) }
179 }
180
181 pub fn try_parse<T: Parse<'a>>(&mut self) -> Result<T> {
182 T::try_parse(self)
183 }
184
185 pub fn try_parse_if_peek<T: Peek<'a> + Parse<'a>>(&mut self) -> Result<Option<T>> {
186 if T::peek(self, self.peek_n(1)) { T::try_parse(self).map(Some) } else { Ok(None) }
187 }
188
189 pub fn equals_atom(&self, c: Cursor, atom: &'static dyn DynAtomSet) -> bool {
190 let mut cursor_bits = c.atom_bits();
191 if cursor_bits == 0 {
192 let source_cursor = self.to_source_cursor(c);
193 cursor_bits = atom.str_to_bits(&source_cursor.parse(self.bump));
194 }
195 cursor_bits == atom.bits()
196 }
197
198 pub fn to_atom<A: AtomSet + PartialEq>(&self, c: Cursor) -> A {
199 let bits = c.atom_bits();
200 if bits == 0 {
201 let source_cursor = self.to_source_cursor(c);
202 return A::from_str(&source_cursor.parse(self.bump));
203 }
204 #[cfg(debug_assertions)]
205 {
206 let source_cursor = self.to_source_cursor(c);
207 debug_assert!(
208 A::from_bits(bits) == A::from_str(&source_cursor.parse(self.bump)),
209 "{:?} -> {:?} != {:?} ({:?})",
210 c,
211 A::from_bits(bits),
212 A::from_str(&source_cursor.parse(self.bump)),
213 source_cursor.parse(self.bump)
214 );
215 }
216 A::from_bits(bits)
217 }
218
219 #[inline(always)]
220 pub fn offset(&self) -> SourceOffset {
221 self.buffer[self.buffer_index].offset()
222 }
223
224 #[inline(always)]
225 pub fn at_end(&self) -> bool {
226 self.buffer[self.buffer_index] == Kind::Eof
227 }
228
229 pub fn rewind(&mut self, checkpoint: ParserCheckpoint<I>) {
230 let ParserCheckpoint { iter, errors_pos, trivia_pos, buffer, buffer_index, .. } = checkpoint;
231
232 self.cursor_iter = iter;
233
234 self.errors.truncate(errors_pos as usize);
235 self.trivia.truncate(trivia_pos as usize);
236
237 self.buffer = buffer;
238 self.buffer_index = buffer_index;
239
240 #[cfg(debug_assertions)]
241 {
242 self.last_cursor = None;
243 }
244 }
245
246 #[inline]
247 pub fn checkpoint(&self) -> ParserCheckpoint<I> {
248 ParserCheckpoint {
249 cursor: self.buffer[self.buffer_index],
250 errors_pos: self.errors.len() as u8,
251 trivia_pos: self.trivia.len() as u16,
252 iter: self.cursor_iter.clone(),
253 buffer: self.buffer,
254 buffer_index: self.buffer_index,
255 }
256 }
257
258 #[inline]
259 pub fn next_is_stop(&self) -> bool {
260 for c in &self.buffer[self.buffer_index..BUFFER_LEN] {
261 if c != self.skip {
262 return c == self.stop;
263 }
264 }
265
266 let mut iter = self.cursor_iter.clone();
267 loop {
268 let Some(cursor) = iter.next() else {
269 return false;
270 };
271 if cursor != self.skip {
272 return cursor == self.stop;
273 }
274 }
275 }
276
277 #[inline]
278 pub(crate) fn peek_n_with_skip(&self, n: u8, skip: KindSet) -> Cursor {
279 let mut remaining = n;
280
281 for c in &self.buffer[self.buffer_index..BUFFER_LEN] {
282 if c == Kind::Eof {
283 return *c;
284 }
285 if c != skip {
286 remaining -= 1;
287 if remaining == 0 {
288 return *c;
289 }
290 }
291 }
292
293 let mut iter = self.cursor_iter.clone();
294 loop {
295 let Some(cursor) = iter.next() else {
296 return eof_cursor(self.source_text.len());
297 };
298 if cursor == Kind::Eof {
299 return cursor;
300 }
301 if cursor != skip {
302 remaining -= 1;
303 if remaining == 0 {
304 return cursor;
305 }
306 }
307 }
308 }
309
310 #[inline]
311 pub fn peek_n(&self, n: u8) -> Cursor {
312 self.peek_n_with_skip(n, self.skip)
313 }
314
315 pub fn to_source_cursor(&self, cursor: Cursor) -> SourceCursor<'a> {
316 SourceCursor::from(cursor, cursor.str_slice(self.source_text))
317 }
318
319 pub fn consume_trivia(&mut self) -> Vec<'a, Cursor> {
320 let mut trivia = Vec::new_in(self.bump);
321 for i in self.buffer_index..BUFFER_LEN {
322 let c = self.buffer[i];
323 if c == Kind::Eof {
324 return trivia;
325 } else if c == self.skip {
326 trivia.push(c)
327 } else {
328 self.fill_buffer(i);
329 return trivia;
330 }
331 }
332
333 loop {
334 let Some(c) = self.cursor_iter.next() else {
335 return trivia;
336 };
337 if c == Kind::Eof {
338 return trivia;
339 } else if c == self.skip {
340 trivia.push(c)
341 } else {
342 let eof = eof_cursor(self.source_text.len());
343 self.buffer[0] = c;
344 for i in 1..BUFFER_LEN {
345 self.buffer[i] = self.cursor_iter.next().unwrap_or(eof);
346 }
347 self.buffer_index = 0;
348 return trivia;
349 }
350 }
351 }
352
353 #[allow(clippy::should_implement_trait)]
354 pub fn next(&mut self) -> Cursor {
355 let mut pending_trivia = Vec::new_in(self.bump);
357
358 if self.buffer_index >= BUFFER_REFILL_INDEX {
359 self.fill_buffer(self.buffer_index);
360 }
361
362 for i in self.buffer_index..BUFFER_LEN {
363 let c = self.buffer[i];
364 if c == Kind::Eof {
365 self.buffer_index = i + 1;
366 if !pending_trivia.is_empty() {
368 self.trivia.push((pending_trivia.clone(), c));
369 }
370 return c;
371 } else if c == self.skip {
372 pending_trivia.push(c);
373 self.buffer_index = i + 1;
374 } else {
375 self.buffer_index = i + 1;
376 if !pending_trivia.is_empty() {
378 self.trivia.push((pending_trivia.clone(), c));
379 }
380 return c;
381 }
382 }
383
384 let c;
385 loop {
386 let Some(cursor) = self.cursor_iter.next() else {
387 let eof_cursor = eof_cursor(self.source_text.len());
388 if !pending_trivia.is_empty() {
389 self.trivia.push((pending_trivia.clone(), eof_cursor));
390 }
391 return eof_cursor;
392 };
393 if cursor == Kind::Eof || cursor != self.skip {
394 c = cursor;
395 break;
396 }
397 pending_trivia.push(cursor);
398 }
399
400 if !pending_trivia.is_empty() {
402 self.trivia.push((pending_trivia.clone(), c));
403 }
404
405 #[cfg(debug_assertions)]
406 if let Some(last_cursor) = self.last_cursor {
407 debug_assert!(last_cursor != c, "Detected a next loop, {c:?} was fetched twice");
408 }
409 #[cfg(debug_assertions)]
410 if c == Kind::Eof {
411 self.last_cursor = None;
412 } else {
413 self.last_cursor = Some(c);
414 }
415
416 c
417 }
418}
419
420#[test]
421fn peek_and_next() {
422 let str = "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21";
423 let bump = bumpalo::Bump::default();
424 let lexer = css_lexer::Lexer::new(&css_lexer::EmptyAtomSet::ATOMS, &str);
425 let mut p = Parser::new(&bump, &str, lexer);
426 assert_eq!(p.at_end(), false);
427 assert_eq!(p.offset(), 0);
428 for n in 0..=1 {
429 let c = p.checkpoint();
430 for i in 0..=19 {
431 let c = p.peek_n(1);
432 assert_eq!(c.token(), Kind::Number);
433 assert_eq!(c.token().value(), i as f32);
434 let c = p.peek_n(2);
435 assert_eq!(c.token(), Kind::Number);
436 assert_eq!(c.token().value(), (i + 1) as f32);
437 let c = p.peek_n(3);
438 assert_eq!(c.token(), Kind::Number);
439 assert_eq!(c.token().value(), (i + 2) as f32);
440 let c = p.next();
441 assert_eq!(c.token().value(), i as f32);
442 let c = p.peek_n(1);
443 assert_eq!(c.token(), Kind::Number);
444 assert_eq!(c.token().value(), (i + 1) as f32);
445 }
446 if n == 0 {
447 p.rewind(c)
448 }
449 }
450 let c = p.next();
451 assert_eq!(c.token(), Kind::Number);
452 assert_eq!(c.token().value(), 20.0);
453 let c = p.next();
454 assert_eq!(c.token(), Kind::Number);
455 assert_eq!(c.token().value(), 21.0);
456 let c = p.next();
457 assert_eq!(c.token(), Kind::Eof);
458}
459
460#[test]
461fn peek_and_next_with_whitsespace() {
462 let str = "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21";
463 let bump = bumpalo::Bump::default();
464 let lexer = css_lexer::Lexer::new(&css_lexer::EmptyAtomSet::ATOMS, &str);
465 let mut p = Parser::new(&bump, &str, lexer);
466 p.set_skip(KindSet::COMMENTS);
467 assert_eq!(p.at_end(), false);
468 assert_eq!(p.offset(), 0);
469 for n in 0..=1 {
470 let c = p.checkpoint();
471 for i in 0..=19 {
472 let c = p.peek_n(1);
473 assert_eq!(c.token(), Kind::Number);
474 assert_eq!(c.token().value(), i as f32);
475 let c = p.peek_n(2);
476 assert_eq!(c.token(), Kind::Whitespace);
477 let c = p.peek_n(3);
478 assert_eq!(c.token(), Kind::Number);
479 assert_eq!(c.token().value(), (i + 1) as f32);
480 let c = p.peek_n(4);
481 assert_eq!(c.token(), Kind::Whitespace);
482 let c = p.peek_n(5);
483 assert_eq!(c.token(), Kind::Number);
484 assert_eq!(c.token().value(), (i + 2) as f32);
485 let c = p.next();
486 assert_eq!(c.token().value(), i as f32);
487 let c = p.peek_n(1);
488 assert_eq!(c.token(), Kind::Whitespace);
489 let c = p.peek_n(2);
490 assert_eq!(c.token(), Kind::Number);
491 assert_eq!(c.token().value(), (i + 1) as f32);
492 p.next();
493 }
494 if n == 0 {
495 p.rewind(c);
496 }
497 }
498 let c = p.next();
499 assert_eq!(c.token(), Kind::Number);
500 assert_eq!(c.token().value(), 20.0);
501 let c = p.next();
502 assert_eq!(c.token(), Kind::Whitespace);
503 let c = p.next();
504 assert_eq!(c.token(), Kind::Number);
505 assert_eq!(c.token().value(), 21.0);
506 let c = p.next();
507 assert_eq!(c.token(), Kind::Eof);
508}