Skip to main content

css_parse/
parser.rs

1use crate::{
2	Cursor, Diagnostic, Feature, Kind, KindSet, ParserCheckpoint, ParserReturn, Result, SourceOffset, ToCursors,
3	traits::{Parse, Peek},
4};
5use bitmask_enum::bitmask;
6use bumpalo::{Bump, collections::Vec};
7use css_lexer::{AtomSet, DynAtomSet, SourceCursor};
8use std::mem;
9
10// This is chosen rather arbitrarily, but:
11// - It needs to be a number larger than BUFFER_REFILL_INDEX (the largest `peek_n` distance we currently peek).
12// - It would be nice to keep Parser aligned to 64. It's not moved/copied... ever, so struct size doesn't really matter
13//   but making it, say, 1000, doesn't really improve performance. Always benchmark when changing!
14const BUFFER_LEN: usize = 12;
15// This number is chosen specifically because we peek_n(5) at most. Ensuring the buffer is always full enough that
16// peeks only use the buffer and don't end up cloning the lexer. While cloning the lexer is quite cheap, it's definitely
17// cheaper to simply look into the buffer. If we ever peek more than 5 tokens, we should change this number.
18const BUFFER_REFILL_INDEX: usize = BUFFER_LEN - 5;
19
20#[derive(Debug)]
21pub struct Parser<'a, I: Iterator<Item = Cursor> + Clone> {
22	pub(crate) source_text: &'a str,
23
24	pub(crate) cursor_iter: I,
25
26	#[allow(dead_code)]
27	pub(crate) features: Feature,
28
29	pub(crate) errors: Vec<'a, Diagnostic>,
30
31	pub(crate) trivia: Vec<'a, (Vec<'a, Cursor>, Cursor)>,
32
33	pub(crate) state: State,
34
35	pub(crate) bump: &'a Bump,
36
37	skip: KindSet,
38
39	stop: KindSet,
40
41	buffer: [Cursor; BUFFER_LEN],
42	buffer_index: usize,
43
44	#[cfg(debug_assertions)]
45	pub(crate) last_cursor: Option<Cursor>,
46}
47
48#[bitmask(u8)]
49#[bitmask_config(vec_debug)]
50#[derive(Default)]
51pub enum State {
52	Nested = 0b0000_0001,
53	/// Disallow relative selectors (:has). Set when inside :has() since nested :has() is invalid.
54	DisallowRelativeSelector = 0b0000_0010,
55}
56
57#[inline]
58fn eof_cursor(len: usize) -> Cursor {
59	let eof_offset = css_lexer::SourceOffset(len as u32);
60	Cursor::new(eof_offset, css_lexer::Token::EOF)
61}
62
63impl<'a, I> Parser<'a, I>
64where
65	I: Iterator<Item = Cursor> + Clone,
66{
67	/// Create a new parser with an iterator over cursors
68	pub fn new(bump: &'a Bump, source_text: &'a str, mut cursor_iter: I) -> Self {
69		let eof_cursor = eof_cursor(source_text.len());
70		let mut buffer = [eof_cursor; BUFFER_LEN];
71		buffer.fill_with(|| cursor_iter.next().unwrap_or(eof_cursor));
72
73		Self {
74			source_text,
75			cursor_iter,
76			features: Feature::none(),
77			errors: Vec::new_in(bump),
78			trivia: Vec::new_in(bump),
79			state: State::none(),
80			skip: KindSet::TRIVIA,
81			stop: KindSet::NONE,
82			buffer,
83			buffer_index: 0,
84			bump,
85			#[cfg(debug_assertions)]
86			last_cursor: None,
87		}
88	}
89
90	pub fn with_features(mut self, features: Feature) -> Self {
91		self.features = features;
92		self
93	}
94
95	fn fill_buffer(&mut self, from: usize) {
96		// Shift remaining buffer cursors left to the start of the slice.
97		self.buffer.copy_within(from..BUFFER_LEN, 0);
98		// Re-fill the buffer with new cursors.
99		let eof = eof_cursor(self.source_text.len());
100		for i in BUFFER_LEN - from..BUFFER_LEN {
101			self.buffer[i] = self.cursor_iter.next().unwrap_or(eof);
102		}
103		self.buffer_index = 0;
104	}
105
106	#[inline]
107	pub fn bump(&self) -> &'a Bump {
108		self.bump
109	}
110
111	#[inline]
112	pub fn enabled(&self, other: Feature) -> bool {
113		self.features.contains(other)
114	}
115
116	#[inline]
117	pub fn is(&self, state: State) -> bool {
118		self.state.contains(state)
119	}
120
121	#[inline]
122	pub fn set_state(&mut self, state: State) -> State {
123		let old = self.state;
124		self.state = state;
125		old
126	}
127
128	#[inline]
129	pub fn set_skip(&mut self, skip: KindSet) -> KindSet {
130		let old = self.skip;
131		self.skip = skip;
132		old
133	}
134
135	#[inline]
136	pub fn set_stop(&mut self, stop: KindSet) -> KindSet {
137		let old = self.stop;
138		self.stop = stop;
139		old
140	}
141
142	pub fn parse_entirely<T: Parse<'a> + ToCursors>(&mut self) -> ParserReturn<'a, T> {
143		let output = match T::parse(self) {
144			Ok(output) => Some(output),
145			Err(error) => {
146				self.errors.push(error);
147				None
148			}
149		};
150		let remaining_non_trivia = !self.at_end() && self.peek_n(1) != Kind::Eof;
151		let at_end = self.peek_n_with_skip(1, KindSet::NONE) == Kind::Eof;
152
153		if !at_end {
154			let start = self.peek_n_with_skip(1, KindSet::NONE);
155			let mut end;
156			loop {
157				end = self.next();
158				if end == Kind::Eof {
159					break;
160				}
161			}
162			if remaining_non_trivia {
163				self.errors.push(Diagnostic::new(start, Diagnostic::expected_end).with_end_cursor(end));
164			}
165		}
166		let errors = mem::replace(&mut self.errors, Vec::new_in(self.bump));
167		let trivia = mem::replace(&mut self.trivia, Vec::new_in(self.bump));
168		ParserReturn::new(output, self.source_text, errors, trivia)
169	}
170
171	pub fn parse<T: Parse<'a>>(&mut self) -> Result<T> {
172		T::parse(self)
173	}
174
175	pub fn peek<T: Peek<'a>>(&self) -> bool {
176		T::peek(self, self.peek_n(1))
177	}
178
179	pub fn parse_if_peek<T: Peek<'a> + Parse<'a>>(&mut self) -> Result<Option<T>> {
180		if T::peek(self, self.peek_n(1)) { T::parse(self).map(Some) } else { Ok(None) }
181	}
182
183	pub fn try_parse<T: Parse<'a>>(&mut self) -> Result<T> {
184		T::try_parse(self)
185	}
186
187	pub fn try_parse_if_peek<T: Peek<'a> + Parse<'a>>(&mut self) -> Result<Option<T>> {
188		if T::peek(self, self.peek_n(1)) { T::try_parse(self).map(Some) } else { Ok(None) }
189	}
190
191	pub fn equals_atom(&self, c: Cursor, atom: &'static dyn DynAtomSet) -> bool {
192		let mut cursor_bits = c.atom_bits();
193		if cursor_bits == 0 {
194			if c != KindSet::ATOM_LIKE {
195				return false;
196			}
197			let source_cursor = self.to_source_cursor(c);
198			cursor_bits = atom.str_to_bits(&source_cursor.parse(self.bump));
199		}
200		cursor_bits == atom.bits()
201	}
202
203	pub fn to_atom<A: AtomSet + PartialEq>(&self, c: Cursor) -> A {
204		let bits = c.atom_bits();
205		if bits == 0 {
206			if c != KindSet::ATOM_LIKE {
207				return A::from_bits(0);
208			}
209			let source_cursor = self.to_source_cursor(c);
210			return A::from_str(&source_cursor.parse(self.bump));
211		}
212		#[cfg(debug_assertions)]
213		if c == KindSet::ATOM_LIKE && !((c == Kind::Ident || c == Kind::Function) && c.token().is_dashed_ident()) {
214			let source_cursor = self.to_source_cursor(c);
215			debug_assert!(
216				A::from_bits(bits) == A::from_str(&source_cursor.parse(self.bump)),
217				"{:?} -> {:?} != {:?} ({:?})",
218				c,
219				A::from_bits(bits),
220				A::from_str(&source_cursor.parse(self.bump)),
221				source_cursor.parse(self.bump)
222			);
223		}
224		A::from_bits(bits)
225	}
226
227	#[inline(always)]
228	pub fn offset(&self) -> SourceOffset {
229		self.buffer[self.buffer_index].offset()
230	}
231
232	#[inline(always)]
233	pub fn at_end(&self) -> bool {
234		self.buffer[self.buffer_index] == Kind::Eof
235	}
236
237	pub fn rewind(&mut self, checkpoint: ParserCheckpoint<I>) {
238		let ParserCheckpoint { iter, errors_pos, trivia_pos, buffer, buffer_index, skip, stop, state, .. } = checkpoint;
239
240		self.cursor_iter = iter;
241
242		self.errors.truncate(errors_pos as usize);
243		self.trivia.truncate(trivia_pos as usize);
244
245		self.buffer = buffer;
246		self.buffer_index = buffer_index;
247
248		self.skip = skip;
249		self.stop = stop;
250		self.state = state;
251
252		#[cfg(debug_assertions)]
253		{
254			self.last_cursor = None;
255		}
256	}
257
258	#[inline]
259	pub fn checkpoint(&self) -> ParserCheckpoint<I> {
260		ParserCheckpoint {
261			cursor: self.buffer[self.buffer_index],
262			errors_pos: self.errors.len() as u8,
263			trivia_pos: self.trivia.len() as u16,
264			iter: self.cursor_iter.clone(),
265			buffer: self.buffer,
266			buffer_index: self.buffer_index,
267			skip: self.skip,
268			stop: self.stop,
269			state: self.state,
270		}
271	}
272
273	#[inline]
274	pub fn next_is_stop(&self) -> bool {
275		for c in &self.buffer[self.buffer_index..BUFFER_LEN] {
276			if c != self.skip {
277				return c == self.stop;
278			}
279		}
280
281		let mut iter = self.cursor_iter.clone();
282		loop {
283			let Some(cursor) = iter.next() else {
284				return false;
285			};
286			if cursor != self.skip {
287				return cursor == self.stop;
288			}
289		}
290	}
291
292	#[inline]
293	pub(crate) fn peek_n_with_skip(&self, n: u8, skip: KindSet) -> Cursor {
294		let mut remaining = n;
295
296		for c in &self.buffer[self.buffer_index..BUFFER_LEN] {
297			if c == Kind::Eof {
298				return *c;
299			}
300			if c != skip {
301				remaining -= 1;
302				if remaining == 0 {
303					return *c;
304				}
305			}
306		}
307
308		let mut iter = self.cursor_iter.clone();
309		loop {
310			let Some(cursor) = iter.next() else {
311				return eof_cursor(self.source_text.len());
312			};
313			if cursor == Kind::Eof {
314				return cursor;
315			}
316			if cursor != skip {
317				remaining -= 1;
318				if remaining == 0 {
319					return cursor;
320				}
321			}
322		}
323	}
324
325	#[inline]
326	pub fn peek_n(&self, n: u8) -> Cursor {
327		self.peek_n_with_skip(n, self.skip)
328	}
329
330	#[inline]
331	pub fn peek_n_including_whitespace(&self, n: u8) -> Cursor {
332		self.peek_n_with_skip(n, self.skip.remove(Kind::Whitespace))
333	}
334
335	pub fn to_source_cursor(&self, cursor: Cursor) -> SourceCursor<'a> {
336		SourceCursor::from(cursor, cursor.str_slice(self.source_text))
337	}
338
339	pub fn consume_trivia(&mut self) -> Vec<'a, Cursor> {
340		let mut trivia = Vec::new_in(self.bump);
341		for i in self.buffer_index..BUFFER_LEN {
342			let c = self.buffer[i];
343			if c == Kind::Eof {
344				self.buffer_index = i;
345				return trivia;
346			} else if c == self.skip {
347				trivia.push(c)
348			} else {
349				self.buffer_index = i;
350				self.fill_buffer(i);
351				return trivia;
352			}
353		}
354
355		loop {
356			let Some(c) = self.cursor_iter.next() else {
357				return trivia;
358			};
359			if c == Kind::Eof {
360				return trivia;
361			} else if c == self.skip {
362				trivia.push(c)
363			} else {
364				let eof = eof_cursor(self.source_text.len());
365				self.buffer[0] = c;
366				for i in 1..BUFFER_LEN {
367					self.buffer[i] = self.cursor_iter.next().unwrap_or(eof);
368				}
369				self.buffer_index = 0;
370				return trivia;
371			}
372		}
373	}
374
375	/// Consume trivia and attach it to the next content token for output preservation.
376	/// This should be called when you want to consume whitespace/comments but preserve
377	/// them for round-trip output fidelity.
378	pub fn consume_trivia_as_leading(&mut self) {
379		let trivia = self.consume_trivia();
380		if !trivia.is_empty() {
381			// Peek the next content token to attach trivia to it
382			let next = self.peek_n(1);
383			self.trivia.push((trivia, next));
384		}
385	}
386
387	#[allow(clippy::should_implement_trait)]
388	pub fn next(&mut self) -> Cursor {
389		// Collect trivia that should be associated with the next content token
390		let mut pending_trivia = Vec::new_in(self.bump);
391
392		loop {
393			if self.buffer_index >= BUFFER_REFILL_INDEX {
394				self.fill_buffer(self.buffer_index);
395			}
396
397			for i in self.buffer_index..BUFFER_LEN {
398				let c = self.buffer[i];
399				if c == Kind::Eof {
400					self.buffer_index = i;
401					// Associate pending trivia with EOF if any
402					if !pending_trivia.is_empty() {
403						self.trivia.push((pending_trivia.clone(), c));
404					}
405					#[cfg(debug_assertions)]
406					{
407						self.last_cursor = None;
408					}
409					return c;
410				} else if c == self.skip {
411					pending_trivia.push(c);
412				} else {
413					self.buffer_index = i + 1;
414					if self.buffer_index >= BUFFER_REFILL_INDEX {
415						self.fill_buffer(self.buffer_index);
416					}
417					// Associate all pending trivia with this content token
418					if !pending_trivia.is_empty() {
419						self.trivia.push((pending_trivia.clone(), c));
420					}
421					#[cfg(debug_assertions)]
422					{
423						if let Some(last_cursor) = self.last_cursor {
424							debug_assert!(last_cursor != c, "Detected a next loop, {c:?} was fetched twice");
425						}
426						self.last_cursor = Some(c);
427					}
428					return c;
429				}
430			}
431
432			// Buffer exhausted with only skip tokens. Refill so buffer_index stays valid.
433			self.fill_buffer(BUFFER_LEN);
434		}
435	}
436}
437
438#[test]
439fn test_filling_buffer_with_skip_tokens() {
440	let str = "/*x*//*x*//*x*//*x*//*x*//*x*//*x*//*x*//*x*//*x*//*x*/a";
441	let bump = bumpalo::Bump::default();
442	let lexer = css_lexer::Lexer::new(&css_lexer::EmptyAtomSet::ATOMS, str);
443	let mut p = Parser::new(&bump, str, lexer);
444	let c = p.next();
445	assert_eq!(c.token(), Kind::Ident);
446	// Must not panic:
447	let _ = p.at_end();
448	let _ = p.offset();
449}
450
451#[test]
452fn peek_and_next() {
453	let str = "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21";
454	let bump = bumpalo::Bump::default();
455	let lexer = css_lexer::Lexer::new(&css_lexer::EmptyAtomSet::ATOMS, &str);
456	let mut p = Parser::new(&bump, &str, lexer);
457	assert_eq!(p.at_end(), false);
458	assert_eq!(p.offset(), 0);
459	for n in 0..=1 {
460		let c = p.checkpoint();
461		for i in 0..=19 {
462			let c = p.peek_n(1);
463			assert_eq!(c.token(), Kind::Number);
464			assert_eq!(c.token().value(), i as f32);
465			let c = p.peek_n(2);
466			assert_eq!(c.token(), Kind::Number);
467			assert_eq!(c.token().value(), (i + 1) as f32);
468			let c = p.peek_n(3);
469			assert_eq!(c.token(), Kind::Number);
470			assert_eq!(c.token().value(), (i + 2) as f32);
471			let c = p.next();
472			assert_eq!(c.token().value(), i as f32);
473			let c = p.peek_n(1);
474			assert_eq!(c.token(), Kind::Number);
475			assert_eq!(c.token().value(), (i + 1) as f32);
476		}
477		if n == 0 {
478			p.rewind(c)
479		}
480	}
481	let c = p.next();
482	assert_eq!(c.token(), Kind::Number);
483	assert_eq!(c.token().value(), 20.0);
484	let c = p.next();
485	assert_eq!(c.token(), Kind::Number);
486	assert_eq!(c.token().value(), 21.0);
487	let c = p.next();
488	assert_eq!(c.token(), Kind::Eof);
489}
490
491#[test]
492fn peek_and_next_with_whitsespace() {
493	let str = "0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21";
494	let bump = bumpalo::Bump::default();
495	let lexer = css_lexer::Lexer::new(&css_lexer::EmptyAtomSet::ATOMS, &str);
496	let mut p = Parser::new(&bump, &str, lexer);
497	p.set_skip(KindSet::COMMENTS);
498	assert_eq!(p.at_end(), false);
499	assert_eq!(p.offset(), 0);
500	for n in 0..=1 {
501		let c = p.checkpoint();
502		for i in 0..=19 {
503			let c = p.peek_n(1);
504			assert_eq!(c.token(), Kind::Number);
505			assert_eq!(c.token().value(), i as f32);
506			let c = p.peek_n(2);
507			assert_eq!(c.token(), Kind::Whitespace);
508			let c = p.peek_n(3);
509			assert_eq!(c.token(), Kind::Number);
510			assert_eq!(c.token().value(), (i + 1) as f32);
511			let c = p.peek_n(4);
512			assert_eq!(c.token(), Kind::Whitespace);
513			let c = p.peek_n(5);
514			assert_eq!(c.token(), Kind::Number);
515			assert_eq!(c.token().value(), (i + 2) as f32);
516			let c = p.next();
517			assert_eq!(c.token().value(), i as f32);
518			let c = p.peek_n(1);
519			assert_eq!(c.token(), Kind::Whitespace);
520			let c = p.peek_n(2);
521			assert_eq!(c.token(), Kind::Number);
522			assert_eq!(c.token().value(), (i + 1) as f32);
523			p.next();
524		}
525		if n == 0 {
526			p.rewind(c);
527		}
528	}
529	let c = p.next();
530	assert_eq!(c.token(), Kind::Number);
531	assert_eq!(c.token().value(), 20.0);
532	let c = p.next();
533	assert_eq!(c.token(), Kind::Whitespace);
534	let c = p.next();
535	assert_eq!(c.token(), Kind::Number);
536	assert_eq!(c.token().value(), 21.0);
537	let c = p.next();
538	assert_eq!(c.token(), Kind::Eof);
539}