css_lexer/
cursor.rs

1use crate::{
2	AssociatedWhitespaceRules, CommentStyle, DimensionUnit, Kind, KindSet, QuoteStyle, SourceOffset, Span, ToSpan,
3	Token,
4	syntax::{ParseEscape, is_newline},
5};
6use bumpalo::{Bump, collections::String};
7use std::{char::REPLACEMENT_CHARACTER, fmt};
8
9/// Wraps [Token] with a [SourceOffset], allows it to reason about the character data of the source text.
10#[derive(Debug, Default, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
11pub struct Cursor(SourceOffset, Token);
12
13impl Cursor {
14	pub const DUMMY_SITE_NUMBER_ZERO: Self = Self(SourceOffset::DUMMY, Token::NUMBER_ZERO);
15	pub const EMPTY: Self = Self(SourceOffset::ZERO, Token::EMPTY);
16
17	#[inline(always)]
18	pub const fn new(offset: SourceOffset, token: Token) -> Self {
19		Self(offset, token)
20	}
21
22	#[inline(always)]
23	pub const fn dummy(token: Token) -> Self {
24		Self(SourceOffset::DUMMY, token)
25	}
26
27	#[inline(always)]
28	pub const fn token(&self) -> Token {
29		self.1
30	}
31
32	#[inline(always)]
33	pub const fn offset(&self) -> SourceOffset {
34		self.0
35	}
36
37	#[inline(always)]
38	pub fn end_offset(&self) -> SourceOffset {
39		if self.offset() == SourceOffset::DUMMY {
40			return self.offset();
41		}
42		SourceOffset(self.offset().0 + self.len())
43	}
44
45	#[inline(always)]
46	pub const fn is_empty(&self) -> bool {
47		self.token().is_empty()
48	}
49
50	#[inline(always)]
51	pub const fn len(&self) -> u32 {
52		self.token().len()
53	}
54
55	#[inline(always)]
56	pub fn span(&self) -> Span {
57		Span::new(self.offset(), self.end_offset())
58	}
59
60	pub fn write_str(&self, str: &str, f: &mut impl fmt::Write) -> fmt::Result {
61		match self.token().kind() {
62			Kind::Eof => {}
63			Kind::CdcOrCdo => {
64				if self.token().is_cdc() {
65					f.write_str("-->")?
66				} else {
67					f.write_str("<!--")?
68				}
69			}
70			Kind::Number => {
71				if self.token().has_sign() {
72					write!(f, "{:+}", self.token().value())?;
73				} else {
74					write!(f, "{}", self.token().value())?;
75				}
76			}
77			Kind::Dimension => match self.token().dimension_unit() {
78				DimensionUnit::Unknown => f.write_str(self.str_slice(str))?,
79				d => {
80					f.write_str(&self.token().value().to_string())?;
81					f.write_str(d.into())?;
82				}
83			},
84			// It is important to manually write out quotes for 2 reasons:
85			//  1. The quote style can be mutated from the source string (such as the case of normalising/switching quotes.
86			//  2. Some strings may not have the closing quote, which should be corrected.
87			Kind::String => match self.token().quote_style() {
88				QuoteStyle::Single => {
89					let inner =
90						&self.str_slice(str)[1..(self.len() as usize) - self.token().has_close_quote() as usize];
91					f.write_char('\'')?;
92					f.write_str(inner)?;
93					f.write_char('\'')?;
94				}
95				QuoteStyle::Double => {
96					let inner =
97						&self.str_slice(str)[1..(self.len() as usize) - self.token().has_close_quote() as usize];
98					f.write_char('"')?;
99					f.write_str(inner)?;
100					f.write_char('"')?;
101				}
102				// Strings must always be quoted!
103				QuoteStyle::None => unreachable!(),
104			},
105			Kind::Comment
106			| Kind::Whitespace
107			| Kind::BadString
108			| Kind::BadUrl
109			| Kind::Ident
110			| Kind::Function
111			| Kind::AtKeyword
112			| Kind::Hash
113			| Kind::Url => f.write_str(self.str_slice(str))?,
114			Kind::Delim
115			| Kind::Colon
116			| Kind::Semicolon
117			| Kind::Comma
118			| Kind::LeftSquare
119			| Kind::LeftParen
120			| Kind::RightSquare
121			| Kind::RightParen
122			| Kind::LeftCurly
123			| Kind::RightCurly => f.write_char(self.token().char().unwrap())?,
124		}
125		Ok(())
126	}
127
128	#[inline(always)]
129	pub fn str_slice<'a>(&self, str: &'a str) -> &'a str {
130		&str[(self.offset().0 as usize)..(self.end_offset().0 as usize)]
131	}
132
133	pub fn eq_ignore_ascii_case<'a>(&self, source: &'a str, other: &'a str) -> bool {
134		debug_assert!(self != Kind::Delim && self != Kind::Url);
135		debug_assert!(other.to_ascii_lowercase() == other);
136		let start = (self.offset().0 + self.token().leading_len()) as usize;
137		let end = (self.end_offset().0 - self.token().trailing_len()) as usize;
138		if !self.token().contains_escape_chars() {
139			if end - start != other.len() {
140				return false;
141			}
142			if self.token().is_lower_case() {
143				debug_assert!(source[start..end].to_ascii_lowercase() == source[start..end]);
144				return &source[start..end] == other;
145			}
146			return source[start..end].eq_ignore_ascii_case(other);
147		}
148		let mut chars = source[start..end].chars().peekable();
149		let mut other_chars = other.chars();
150		let mut i = 0;
151		while let Some(c) = chars.next() {
152			let o = other_chars.next();
153			if o.is_none() {
154				return false;
155			}
156			let o = o.unwrap();
157			if c == '\0' {
158				if REPLACEMENT_CHARACTER != o {
159					return false;
160				}
161				i += 1;
162			} else if c == '\\' {
163				// String has special rules
164				// https://drafts.csswg.org/css-syntax-3/#consume-string-cursor
165				if self.token().kind_bits() == Kind::String as u8 {
166					// When the token is a string, escaped EOF points are not consumed
167					// U+005C REVERSE SOLIDUS (\)
168					//   If the next input code point is EOF, do nothing.
169					//   Otherwise, if the next input code point is a newline, consume it.
170					let c = chars.peek();
171					if let Some(c) = c {
172						if is_newline(*c) {
173							chars.next();
174							if chars.peek() == Some(&'\n') {
175								i += 1;
176							}
177							i += 2;
178							chars = source[(start + i)..end].chars().peekable();
179							continue;
180						}
181					} else {
182						break;
183					}
184				}
185				i += 1;
186				let (ch, n) = source[(start + i)..].chars().parse_escape_sequence();
187				i += n as usize;
188				chars = source[(start + i)..end].chars().peekable();
189				if (ch == '\0' && REPLACEMENT_CHARACTER != o) || ch != o {
190					return false;
191				}
192			} else if c != o {
193				return false;
194			} else {
195				i += c.len_utf8();
196			}
197		}
198		other_chars.next().is_none()
199	}
200
201	pub fn parse_str<'a>(&self, source: &'a str, allocator: &'a Bump) -> &'a str {
202		debug_assert!(self != Kind::Delim);
203		let start = (self.offset().0 + self.token().leading_len()) as usize;
204		let end = (self.end_offset().0 - self.token().trailing_len()) as usize;
205		if !self.token().contains_escape_chars() {
206			return &source[start..end];
207		}
208		let mut chars = source[start..end].chars().peekable();
209		let mut i = 0;
210		let mut str: Option<String<'a>> = None;
211		while let Some(c) = chars.next() {
212			if c == '\0' {
213				if str.is_none() {
214					str = if i == 0 {
215						Some(String::new_in(allocator))
216					} else {
217						Some(String::from_str_in(&source[start..(start + i)], allocator))
218					}
219				}
220				str.as_mut().unwrap().push(REPLACEMENT_CHARACTER);
221				i += 1;
222			} else if c == '\\' {
223				if str.is_none() {
224					str = if i == 0 {
225						Some(String::new_in(allocator))
226					} else {
227						Some(String::from_str_in(&source[start..(start + i)], allocator))
228					}
229				}
230				// String has special rules
231				// https://drafts.csswg.org/css-syntax-3/#consume-string-cursor
232				if self.token().kind_bits() == Kind::String as u8 {
233					// When the token is a string, escaped EOF points are not consumed
234					// U+005C REVERSE SOLIDUS (\)
235					//   If the next input code point is EOF, do nothing.
236					//   Otherwise, if the next input code point is a newline, consume it.
237					let c = chars.peek();
238					if let Some(c) = c {
239						if is_newline(*c) {
240							chars.next();
241							if chars.peek() == Some(&'\n') {
242								i += 1;
243							}
244							i += 2;
245							chars = source[(start + i)..end].chars().peekable();
246							continue;
247						}
248					} else {
249						break;
250					}
251				}
252				i += 1;
253				let (ch, n) = source[(start + i)..].chars().parse_escape_sequence();
254				str.as_mut().unwrap().push(if ch == '\0' { REPLACEMENT_CHARACTER } else { ch });
255				i += n as usize;
256				chars = source[(start + i)..end].chars().peekable();
257			} else {
258				if let Some(text) = &mut str {
259					text.push(c);
260				}
261				i += c.len_utf8();
262			}
263		}
264		if str.is_some() { str.take().unwrap().into_bump_str() } else { &source[start..start + i] }
265	}
266
267	#[inline]
268	pub fn parse_str_lower<'a>(&self, source: &'a str, allocator: &'a Bump) -> &'a str {
269		debug_assert!(self != Kind::Delim);
270		if self.token().is_lower_case() {
271			return self.parse_str(source, allocator);
272		}
273		let start = (self.offset().0 + self.token().leading_len()) as usize;
274		let end = (self.end_offset().0 - self.token().trailing_len()) as usize;
275		if !self.token().contains_escape_chars() && self.token().is_lower_case() {
276			return &source[start..end];
277		}
278		let mut chars = source[start..end].chars().peekable();
279		let mut i = 0;
280		let mut str: String<'a> = String::new_in(allocator);
281		while let Some(c) = chars.next() {
282			if c == '\0' {
283				str.push(REPLACEMENT_CHARACTER);
284				i += 1;
285			} else if c == '\\' {
286				// String has special rules
287				// https://drafts.csswg.org/css-syntax-3/#consume-string-cursor
288				if self.token().kind_bits() == Kind::String as u8 {
289					// When the token is a string, escaped EOF points are not consumed
290					// U+005C REVERSE SOLIDUS (\)
291					//   If the next input code point is EOF, do nothing.
292					//   Otherwise, if the next input code point is a newline, consume it.
293					let c = chars.peek();
294					if let Some(c) = c {
295						if is_newline(*c) {
296							chars.next();
297							if chars.peek() == Some(&'\n') {
298								i += 1;
299							}
300							i += 2;
301							chars = source[(start + i)..end].chars().peekable();
302							continue;
303						}
304					} else {
305						break;
306					}
307				}
308				i += 1;
309				let (ch, n) = source[(start + i)..].chars().parse_escape_sequence();
310				str.push(if ch == '\0' { REPLACEMENT_CHARACTER } else { ch.to_ascii_lowercase() });
311				i += n as usize;
312				chars = source[(start + i)..end].chars().peekable();
313			} else {
314				str.push(c.to_ascii_lowercase());
315				i += c.len_utf8();
316			}
317		}
318		str.into_bump_str()
319	}
320
321	pub fn with_quotes(&self, quote_style: QuoteStyle) -> Self {
322		if *self == quote_style || *self != Kind::String {
323			return *self;
324		}
325		Self::new(self.offset(), self.token().with_quotes(quote_style))
326	}
327
328	pub fn with_associated_whitespace(&self, rules: AssociatedWhitespaceRules) -> Self {
329		debug_assert!(self.1 == KindSet::DELIM_LIKE);
330		if self.1.associated_whitespace().to_bits() == rules.to_bits() {
331			return *self;
332		}
333		Self::new(self.offset(), self.token().with_associated_whitespace(rules))
334	}
335}
336
337impl From<Cursor> for Token {
338	fn from(cursor: Cursor) -> Self {
339		cursor.token()
340	}
341}
342
343impl PartialEq<Token> for Cursor {
344	fn eq(&self, other: &Token) -> bool {
345		self.1 == *other
346	}
347}
348
349impl ToSpan for Cursor {
350	fn to_span(&self) -> Span {
351		self.span()
352	}
353}
354
355impl From<Cursor> for Span {
356	fn from(cursor: Cursor) -> Self {
357		cursor.span()
358	}
359}
360
361impl PartialEq<Span> for Cursor {
362	fn eq(&self, other: &Span) -> bool {
363		self.span() == *other
364	}
365}
366
367impl From<Cursor> for Kind {
368	fn from(cursor: Cursor) -> Self {
369		cursor.token().kind()
370	}
371}
372
373impl PartialEq<Kind> for Cursor {
374	fn eq(&self, other: &Kind) -> bool {
375		self.1 == *other
376	}
377}
378
379impl PartialEq<CommentStyle> for Cursor {
380	fn eq(&self, other: &CommentStyle) -> bool {
381		self.1 == *other
382	}
383}
384
385impl From<Cursor> for KindSet {
386	fn from(cursor: Cursor) -> Self {
387		cursor.token().into()
388	}
389}
390
391impl PartialEq<KindSet> for Cursor {
392	fn eq(&self, other: &KindSet) -> bool {
393		self.1 == *other
394	}
395}
396
397impl From<Cursor> for QuoteStyle {
398	fn from(cursor: Cursor) -> Self {
399		cursor.token().into()
400	}
401}
402
403impl PartialEq<QuoteStyle> for Cursor {
404	fn eq(&self, other: &QuoteStyle) -> bool {
405		self.1 == *other
406	}
407}
408
409impl PartialEq<AssociatedWhitespaceRules> for Cursor {
410	fn eq(&self, other: &AssociatedWhitespaceRules) -> bool {
411		self.1 == *other
412	}
413}
414
415impl PartialEq<char> for Cursor {
416	fn eq(&self, other: &char) -> bool {
417		self.1 == *other
418	}
419}
420
421impl From<Cursor> for DimensionUnit {
422	fn from(cursor: Cursor) -> Self {
423		cursor.token().into()
424	}
425}
426
427impl PartialEq<DimensionUnit> for Cursor {
428	fn eq(&self, other: &DimensionUnit) -> bool {
429		self.1 == *other
430	}
431}
432
433impl PartialEq<CommentStyle> for &Cursor {
434	fn eq(&self, other: &CommentStyle) -> bool {
435		self.1 == *other
436	}
437}
438
439impl PartialEq<Kind> for &Cursor {
440	fn eq(&self, other: &Kind) -> bool {
441		self.1 == *other
442	}
443}
444
445impl PartialEq<KindSet> for &Cursor {
446	fn eq(&self, other: &KindSet) -> bool {
447		self.1 == *other
448	}
449}
450
451impl PartialEq<QuoteStyle> for &Cursor {
452	fn eq(&self, other: &QuoteStyle) -> bool {
453		self.1 == *other
454	}
455}
456
457impl PartialEq<char> for &Cursor {
458	fn eq(&self, other: &char) -> bool {
459		self.1 == *other
460	}
461}
462
463impl PartialEq<DimensionUnit> for &Cursor {
464	fn eq(&self, other: &DimensionUnit) -> bool {
465		self.1 == *other
466	}
467}
468
469#[cfg(feature = "miette")]
470impl From<Cursor> for miette::SourceSpan {
471	fn from(val: Cursor) -> Self {
472		let span = val.span();
473		span.into()
474	}
475}
476
477#[cfg(feature = "serde")]
478impl serde::ser::Serialize for Cursor {
479	fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
480	where
481		S: serde::ser::Serializer,
482	{
483		use serde::ser::SerializeStruct;
484		if self.token() == Token::EMPTY {
485			return serializer.serialize_none();
486		}
487		let mut state = serializer.serialize_struct("Cursor", 3)?;
488		state.serialize_field("kind", self.token().kind().as_str())?;
489		state.serialize_field("offset", &self.offset())?;
490		state.serialize_field("len", &self.token().len())?;
491		if self.token().kind_bits() == Kind::Dimension as u8 {
492			state.serialize_field("unit", &self.token().dimension_unit())?;
493		}
494		state.end()
495	}
496}
497
498#[test]
499fn size_test() {
500	assert_eq!(::std::mem::size_of::<Cursor>(), 12);
501}
502
503#[test]
504fn parse_str_lower() {
505	let allocator = Bump::new();
506	let c = Cursor::new(SourceOffset(0), Token::new_ident(true, false, false, 3));
507	assert_eq!(c.parse_str_lower("FoO", &allocator), "foo");
508	assert_eq!(c.parse_str_lower("FOO", &allocator), "foo");
509	assert_eq!(c.parse_str_lower("foo", &allocator), "foo");
510
511	let c = Cursor::new(SourceOffset(0), Token::new_string(QuoteStyle::Single, true, false, 5));
512	assert_eq!(c.parse_str_lower("'FoO'", &allocator), "foo");
513	assert_eq!(c.parse_str_lower("'FOO'", &allocator), "foo");
514
515	let c = Cursor::new(SourceOffset(0), Token::new_string(QuoteStyle::Single, false, false, 4));
516	assert_eq!(c.parse_str_lower("'FoO", &allocator), "foo");
517	assert_eq!(c.parse_str_lower("'FOO", &allocator), "foo");
518	assert_eq!(c.parse_str_lower("'foo", &allocator), "foo");
519	assert_eq!(c.parse_str_lower("'foo", &allocator), "foo");
520
521	let c = Cursor::new(SourceOffset(0), Token::new_url(true, false, false, 4, 1, 6));
522	assert_eq!(c.parse_str_lower("url(a)", &allocator), "a");
523	assert_eq!(c.parse_str_lower("url(b)", &allocator), "b");
524
525	let c = Cursor::new(SourceOffset(0), Token::new_url(true, false, false, 6, 1, 8));
526	assert_eq!(c.parse_str_lower("\\75rl(A)", &allocator), "a");
527	assert_eq!(c.parse_str_lower("u\\52l(B)", &allocator), "b");
528	assert_eq!(c.parse_str_lower("ur\\6c(C)", &allocator), "c");
529
530	let c = Cursor::new(SourceOffset(0), Token::new_url(true, false, false, 8, 1, 10));
531	assert_eq!(c.parse_str_lower("\\75\\52l(A)", &allocator), "a");
532	assert_eq!(c.parse_str_lower("u\\52\\6c(B)", &allocator), "b");
533	assert_eq!(c.parse_str_lower("\\75r\\6c(C)", &allocator), "c");
534}
535
536#[test]
537fn eq_ignore_ascii_case() {
538	let c = Cursor::new(SourceOffset(0), Token::new_ident(false, false, false, 3));
539	assert!(c.eq_ignore_ascii_case("foo", "foo"));
540	assert!(!c.eq_ignore_ascii_case("foo", "bar"));
541	assert!(!c.eq_ignore_ascii_case("fo ", "foo"));
542	assert!(!c.eq_ignore_ascii_case("foo", "fooo"));
543	assert!(!c.eq_ignore_ascii_case("foo", "ғоо"));
544
545	let c = Cursor::new(SourceOffset(0), Token::new_ident(true, false, false, 3));
546	assert!(c.eq_ignore_ascii_case("FoO", "foo"));
547	assert!(c.eq_ignore_ascii_case("FOO", "foo"));
548	assert!(!c.eq_ignore_ascii_case("foo", "bar"));
549	assert!(!c.eq_ignore_ascii_case("fo ", "foo"));
550	assert!(!c.eq_ignore_ascii_case("foo", "fooo"));
551	assert!(!c.eq_ignore_ascii_case("foo", "ғоо"));
552
553	let c = Cursor::new(SourceOffset(3), Token::new_ident(false, false, false, 3));
554	assert!(c.eq_ignore_ascii_case("foobar", "bar"));
555
556	let c = Cursor::new(SourceOffset(3), Token::new_ident(false, false, true, 3));
557	assert!(c.eq_ignore_ascii_case("foobar", "bar"));
558
559	let c = Cursor::new(SourceOffset(3), Token::new_ident(false, false, true, 5));
560	assert!(c.eq_ignore_ascii_case("foob\\61r", "bar"));
561
562	let c = Cursor::new(SourceOffset(3), Token::new_ident(false, false, true, 7));
563	assert!(c.eq_ignore_ascii_case("foob\\61\\72", "bar"));
564}
565
566#[test]
567fn write_str() {
568	let bump = Bump::new();
569	let c = Cursor::new(SourceOffset(0), Token::new_string(QuoteStyle::Double, true, false, 5));
570	let mut str = String::new_in(&bump);
571	c.write_str("'foo'", &mut str).unwrap();
572	assert_eq!(c.token().quote_style(), QuoteStyle::Double);
573	assert_eq!(str, "\"foo\"");
574
575	let c = Cursor::new(SourceOffset(0), Token::new_string(QuoteStyle::Double, false, false, 4));
576	let mut str = String::new_in(&bump);
577	c.write_str("'foo", &mut str).unwrap();
578	assert_eq!(c.token().quote_style(), QuoteStyle::Double);
579	assert_eq!(str, "\"foo\"");
580
581	let c = Cursor::new(SourceOffset(0), Token::new_string(QuoteStyle::Single, false, false, 4));
582	let mut str = String::new_in(&bump);
583	c.write_str("\"foo", &mut str).unwrap();
584	assert_eq!(c.token().quote_style(), QuoteStyle::Single);
585	assert_eq!(str, "'foo'");
586}