css_parse/
cursor_ordered_sink.rs

1use crate::{Cursor, CursorSink, Kind, SourceOffset};
2use bumpalo::collections::Vec;
3
4/// This is a [CursorSink] that buffers cursors and emits them in source order. It uses contiguous coverage tracking to
5/// eagerly emit cursors as soon as gaps are filled.
6///
7/// Many CSS grammars allow constructing a tree in arbitrarily authored order, but have a canonical ordering, so for e.g
8/// a function like `foo(bar, baz)` could be represented as `foo(baz, bar)` and still be valid. AST nodes output their
9/// cursors in grammar order, which is most often desirable, but this sink will enforce source ordering.
10pub struct CursorOrderedSink<'a, S> {
11	sink: &'a mut S,
12	/// Sorted buffer of cursors by start position
13	buffer: Vec<'a, Cursor>,
14	/// How far we've committed (emitted contiguously from position 0)
15	committed_position: SourceOffset,
16	#[cfg(debug_assertions)]
17	seen_eof: bool,
18}
19
20impl<'a, S: CursorSink> CursorOrderedSink<'a, S> {
21	pub fn new(bump: &'a bumpalo::Bump, sink: &'a mut S) -> Self {
22		Self {
23			sink,
24			buffer: Vec::new_in(bump),
25			committed_position: SourceOffset(0),
26			#[cfg(debug_assertions)]
27			seen_eof: false,
28		}
29	}
30
31	/// Flush all remaining buffered cursors to the delegate sink in source order.
32	/// This is typically called when no more cursors will be added.
33	pub fn flush(&mut self) {
34		self.buffer.sort_by_key(|cursor| cursor.span().start());
35		for cursor in self.buffer.iter() {
36			self.sink.append(*cursor);
37		}
38		if let Some(last_cursor) = self.buffer.last() {
39			self.committed_position = last_cursor.end_offset();
40		}
41		self.buffer.clear();
42	}
43}
44
45impl<'a, S: CursorSink> CursorSink for CursorOrderedSink<'a, S> {
46	// Insert cursor into the buffer, maintaining sorted order efficiently
47	//
48	// The algorithm models cursor coverage as an "array with holes" that get filled over time:
49	//
50	// ```text
51	// Positions: [0][1][2][3][4][5][6]
52	// Initial:   [ ][ ][ ][ ][ ][ ][ ]    committed_position = 0
53	//
54	// Add cursor_4: [ ][ ][ ][ ][4][ ][ ]    buffer: [4], nothing emitted
55	// Add cursor_0: [0][ ][ ][ ][4][ ][ ]    emit [0], committed_position = 1
56	// Add cursor_2: [0][ ][2][ ][4][ ][ ]    buffer: [2,4], gap at 1
57	// Add cursor_1: [0][1][2][ ][4][ ][ ]    emit [1,2], committed_position = 3
58	// Add cursor_3: [0][1][2][3][4][ ][ ]    emit [3,4], committed_position = 5
59	// Add cursor_5: [0][1][2][3][4][5][ ]    emit [5], committed_position = 6
60	// ```
61	//
62	// Once a contiguous section from `committed_position` is complete, it's emitted immediately.
63	fn append(&mut self, cursor: Cursor) {
64		#[cfg(debug_assertions)]
65		{
66			debug_assert!(!self.seen_eof, "Received cursor after EOF: {:?}", cursor);
67			if cursor == Kind::Eof {
68				self.seen_eof = true;
69			}
70		}
71		let cursor_start = cursor.span().start();
72		if self.buffer.is_empty() || cursor_start.0 >= self.buffer.last().unwrap().span().start().0 {
73			self.buffer.push(cursor);
74		} else if cursor_start == self.committed_position {
75			// This cursor is the next in order
76			self.sink.append(cursor);
77			self.committed_position = cursor.end_offset();
78		} else {
79			// The cursor needs to be buffered.
80			// TODO: binary_search_by_key is giving BTreeMap which would be O(log n) instead of O(n), but
81			// for small enough numbers that's fine? Investigate more.
82			let insert_pos =
83				self.buffer.binary_search_by_key(&cursor_start, |c| c.span().start()).unwrap_or_else(|pos| pos);
84			self.buffer.insert(insert_pos, cursor);
85		}
86
87		// Check if a contiguous section from committed_position exists, and emit it if so
88		while !self.buffer.is_empty() {
89			// Remove any overlapping cursors first (those that start before committed_position)
90			let mut overlapping_count = 0;
91			for cursor in self.buffer.iter() {
92				if cursor.span().start().0 < self.committed_position.0 {
93					overlapping_count += 1;
94				} else {
95					break;
96				}
97			}
98			if overlapping_count > 0 {
99				self.buffer.drain(0..overlapping_count);
100			}
101
102			if self.buffer.is_empty() {
103				break;
104			}
105
106			// Find how many contiguous cursors can be emitted from the front
107			let mut current_pos = self.committed_position;
108			let mut emit_count = 0;
109
110			for cursor in self.buffer.iter() {
111				let cursor_start = cursor.span().start();
112
113				if cursor_start == current_pos {
114					current_pos = cursor.end_offset();
115					emit_count += 1;
116				} else {
117					// If this cursor starts after current_pos, stop, as there is a gap
118					break;
119				}
120			}
121
122			if emit_count > 0 {
123				for cursor in self.buffer.drain(0..emit_count) {
124					self.sink.append(cursor);
125				}
126				self.committed_position = current_pos;
127			} else {
128				// No contiguous section found, stop
129				break;
130			}
131		}
132
133		// If we just processed EOF, flush any remaining buffered cursors
134		if cursor == Kind::Eof {
135			self.flush();
136		}
137	}
138}
139
140#[cfg(test)]
141mod tests {
142	use super::*;
143	use crate::{ComponentValues, EmptyAtomSet, Parser, SourceCursor, ToCursors};
144	use bumpalo::{Bump, collections::Vec as BumpVec};
145	use css_lexer::Lexer;
146	use std::fmt::Write;
147
148	#[test]
149	fn test_basic() {
150		let source_text = "foo bar";
151		let bump = Bump::default();
152		let mut output = BumpVec::new_in(&bump);
153		{
154			let mut ordered_sink = CursorOrderedSink::new(&bump, &mut output);
155			let lexer = Lexer::new(&EmptyAtomSet::ATOMS, source_text);
156			let mut parser = Parser::new(&bump, source_text, lexer);
157			parser.parse_entirely::<ComponentValues>().output.unwrap().to_cursors(&mut ordered_sink);
158			ordered_sink.flush();
159		}
160		let mut result = String::new();
161		for c in output.iter() {
162			write!(&mut result, "{}", SourceCursor::from(*c, c.str_slice(source_text))).unwrap();
163		}
164		assert_eq!(result, "foo bar");
165	}
166
167	#[test]
168	fn test_manual_flush() {
169		use crate::{SourceOffset, Token};
170
171		let bump = Bump::default();
172		let mut output = BumpVec::new_in(&bump);
173		{
174			let mut ordered_sink = CursorOrderedSink::new(&bump, &mut output);
175			let cursor1 = Cursor::new(SourceOffset(0), Token::SPACE); // position 0, length 1
176			let cursor2 = Cursor::new(SourceOffset(10), Token::SPACE); // position 10, length 1
177			let cursor3 = Cursor::new(SourceOffset(4), Token::SPACE); // position 4, length 1
178
179			// Append cursors in non-source-order
180			ordered_sink.append(cursor2);
181			ordered_sink.append(cursor1);
182			ordered_sink.append(cursor3);
183			ordered_sink.flush();
184		}
185		assert_eq!(output.len(), 3);
186		assert_eq!(output[0].span().start(), SourceOffset(0));
187		assert_eq!(output[1].span().start(), SourceOffset(4));
188		assert_eq!(output[2].span().start(), SourceOffset(10));
189	}
190
191	#[test]
192	fn test_contiguous_eager_emission() {
193		use crate::{SourceOffset, Token};
194		let bump = Bump::default();
195		let mut output = BumpVec::new_in(&bump);
196		{
197			let mut ordered_sink = CursorOrderedSink::new(&bump, &mut output);
198			// Create cursors that form a contiguous sequence
199			let cursor_at_0 = Cursor::new(SourceOffset(0), Token::SPACE);
200			let cursor_at_1 = Cursor::new(SourceOffset(1), Token::SPACE);
201			ordered_sink.append(cursor_at_0);
202			ordered_sink.append(cursor_at_1);
203			// Avoid flushing as they should be emitted immediately
204		}
205		assert_eq!(output.len(), 2);
206		assert_eq!(output[0].span().start(), SourceOffset(0));
207		assert_eq!(output[1].span().start(), SourceOffset(1));
208	}
209
210	#[test]
211	fn test_gap_filling() {
212		use crate::{SourceOffset, Token};
213		let bump = Bump::default();
214		let mut output = BumpVec::new_in(&bump);
215
216		{
217			let mut ordered_sink = CursorOrderedSink::new(&bump, &mut output);
218			// Create cursors with a gap, then fill the gap
219			let cursor_at_0 = Cursor::new(SourceOffset(0), Token::SPACE); // ends at 1
220			let cursor_at_2 = Cursor::new(SourceOffset(2), Token::SPACE); // ends at 3
221			let cursor_at_1 = Cursor::new(SourceOffset(1), Token::SPACE); // ends at 2, fills the gap
222			ordered_sink.append(cursor_at_0);
223			ordered_sink.append(cursor_at_2);
224			ordered_sink.append(cursor_at_1);
225			// Avoid flushing as they should be emitted immediately
226		}
227		assert_eq!(output.len(), 3);
228		assert_eq!(output[0].span().start(), SourceOffset(0));
229		assert_eq!(output[1].span().start(), SourceOffset(1));
230		assert_eq!(output[2].span().start(), SourceOffset(2));
231	}
232
233	#[test]
234	fn test_sequential() {
235		use crate::{SourceOffset, Token};
236		let bump = Bump::default();
237		let mut output = BumpVec::new_in(&bump);
238		{
239			let mut ordered_sink = CursorOrderedSink::new(&bump, &mut output);
240			let cursor1 = Cursor::new(SourceOffset(0), Token::SPACE);
241			let cursor2 = Cursor::new(SourceOffset(1), Token::SPACE);
242			let cursor3 = Cursor::new(SourceOffset(2), Token::SPACE);
243			ordered_sink.append(cursor1);
244			ordered_sink.append(cursor2);
245			ordered_sink.append(cursor3);
246			// Avoid flushing as they should be emitted immediately
247		}
248		assert_eq!(output.len(), 3);
249		assert_eq!(output[0].span().start(), SourceOffset(0));
250		assert_eq!(output[1].span().start(), SourceOffset(1));
251		assert_eq!(output[2].span().start(), SourceOffset(2));
252	}
253
254	#[test]
255	fn test_varied_order() {
256		use crate::{SourceOffset, Token};
257		let bump = Bump::default();
258		let mut output = BumpVec::new_in(&bump);
259		{
260			let mut ordered_sink = CursorOrderedSink::new(&bump, &mut output);
261			let cursor_at_4 = Cursor::new(SourceOffset(4), Token::SPACE); // ends at 5
262			let cursor_at_0 = Cursor::new(SourceOffset(0), Token::SPACE); // ends at 1
263			let cursor_at_6 = Cursor::new(SourceOffset(6), Token::SPACE); // ends at 7
264			let cursor_at_2 = Cursor::new(SourceOffset(2), Token::SPACE); // ends at 3
265			let cursor_at_1 = Cursor::new(SourceOffset(1), Token::SPACE); // ends at 2
266			let cursor_at_3 = Cursor::new(SourceOffset(3), Token::SPACE); // ends at 4
267			let cursor_at_5 = Cursor::new(SourceOffset(5), Token::SPACE); // ends at 6
268			ordered_sink.append(cursor_at_4);
269			ordered_sink.append(cursor_at_0);
270			ordered_sink.append(cursor_at_6);
271			ordered_sink.append(cursor_at_2);
272			ordered_sink.append(cursor_at_1);
273			ordered_sink.append(cursor_at_3);
274			ordered_sink.append(cursor_at_5);
275			// Avoid flushing as they should be emitted immediately
276		}
277		assert_eq!(output.len(), 7);
278		for i in 0..7 {
279			assert_eq!(output[i].span().start(), SourceOffset(i as u32));
280		}
281	}
282}