1use crate::{
2 AssociatedWhitespaceRules, CommentStyle, CowStr, Cursor, Kind, KindSet, QuoteStyle, SourceOffset, Span, ToSpan,
3 Token,
4 syntax::{ParseEscape, is_newline},
5};
6use allocator_api2::{alloc::Allocator, boxed::Box, vec::Vec};
7use std::char::REPLACEMENT_CHARACTER;
8use std::fmt::{Display, Formatter, Result};
9
10#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
12pub struct SourceCursor<'a> {
13 cursor: Cursor,
14 source: &'a str,
15}
16
17impl<'a> ToSpan for SourceCursor<'a> {
18 fn to_span(&self) -> Span {
19 self.cursor.to_span()
20 }
21}
22
23impl<'a> Display for SourceCursor<'a> {
24 fn fmt(&self, f: &mut Formatter<'_>) -> Result {
25 match self.token().kind() {
26 Kind::Eof => Ok(()),
27 Kind::String => match self.token().quote_style() {
31 QuoteStyle::Single => {
32 let inner =
33 &self.source[1..(self.token().len() as usize) - self.token().has_close_quote() as usize];
34 write!(f, "'{inner}'")
35 }
36 QuoteStyle::Double => {
37 let inner =
38 &self.source[1..(self.token().len() as usize) - self.token().has_close_quote() as usize];
39 write!(f, "\"{inner}\"")
40 }
41 QuoteStyle::None => unreachable!(),
43 },
44 Kind::Delim
45 | Kind::Colon
46 | Kind::Semicolon
47 | Kind::Comma
48 | Kind::LeftSquare
49 | Kind::LeftParen
50 | Kind::RightSquare
51 | Kind::RightParen
52 | Kind::LeftCurly
53 | Kind::RightCurly => self.token().char().unwrap().fmt(f),
54 _ => f.write_str(self.source),
55 }
56 }
57}
58
59impl<'a> SourceCursor<'a> {
60 pub const SPACE: SourceCursor<'static> = SourceCursor::from(Cursor::new(SourceOffset(0), Token::SPACE), " ");
61 pub const TAB: SourceCursor<'static> = SourceCursor::from(Cursor::new(SourceOffset(0), Token::TAB), "\t");
62 pub const NEWLINE: SourceCursor<'static> = SourceCursor::from(Cursor::new(SourceOffset(0), Token::NEWLINE), "\n");
63
64 #[inline(always)]
65 pub const fn from(cursor: Cursor, source: &'a str) -> Self {
66 debug_assert!(
67 (cursor.len() as usize) == source.len(),
68 "A SourceCursor should be constructed with a source that matches the length of the cursor!"
69 );
70 Self { cursor, source }
71 }
72
73 #[inline(always)]
74 pub const fn cursor(&self) -> Cursor {
75 self.cursor
76 }
77
78 #[inline(always)]
79 pub const fn token(&self) -> Token {
80 self.cursor.token()
81 }
82
83 #[inline(always)]
84 pub const fn source(&self) -> &'a str {
85 self.source
86 }
87
88 pub fn with_quotes(&self, quote_style: QuoteStyle) -> Self {
89 Self::from(self.cursor.with_quotes(quote_style), self.source)
90 }
91
92 pub fn with_associated_whitespace(&self, rules: AssociatedWhitespaceRules) -> Self {
93 Self::from(self.cursor.with_associated_whitespace(rules), self.source)
94 }
95
96 pub fn eq_ignore_ascii_case(&self, other: &str) -> bool {
97 debug_assert!(self.token() != Kind::Delim && self.token() != Kind::Url);
98 debug_assert!(other.to_ascii_lowercase() == other);
99 let start = self.token().leading_len() as usize;
100 let end = self.source.len() - self.token().trailing_len() as usize;
101 if !self.token().contains_escape_chars() {
102 if end - start != other.len() {
103 return false;
104 }
105 if self.token().is_lower_case() {
106 debug_assert!(self.source[start..end].to_ascii_lowercase() == self.source[start..end]);
107 return &self.source[start..end] == other;
108 }
109 return self.source[start..end].eq_ignore_ascii_case(other);
110 }
111 let mut chars = self.source[start..end].chars().peekable();
112 let mut other_chars = other.chars();
113 let mut i = 0;
114 while let Some(c) = chars.next() {
115 let o = other_chars.next();
116 if o.is_none() {
117 return false;
118 }
119 let o = o.unwrap();
120 if c == '\0' {
121 if REPLACEMENT_CHARACTER != o {
122 return false;
123 }
124 i += 1;
125 } else if c == '\\' {
126 if self.token().kind_bits() == Kind::String as u8 {
129 let c = chars.peek();
134 if let Some(c) = c {
135 if is_newline(*c) {
136 chars.next();
137 if chars.peek() == Some(&'\n') {
138 i += 1;
139 }
140 i += 2;
141 chars = self.source[(start + i)..end].chars().peekable();
142 continue;
143 }
144 } else {
145 break;
146 }
147 }
148 i += 1;
149 let (ch, n) = self.source[(start + i)..].chars().parse_escape_sequence();
150 i += n as usize;
151 chars = self.source[(start + i)..end].chars().peekable();
152 if (ch == '\0' && REPLACEMENT_CHARACTER != o) || ch != o {
153 return false;
154 }
155 } else if c != o {
156 return false;
157 } else {
158 i += c.len_utf8();
159 }
160 }
161 other_chars.next().is_none()
162 }
163
164 pub fn parse<A: Allocator + Clone + 'a>(&self, allocator: A) -> CowStr<'a, A> {
166 debug_assert!(self.token() != Kind::Delim);
167 let start = self.token().leading_len() as usize;
168 let end = self.source.len() - self.token().trailing_len() as usize;
169 if !self.token().contains_escape_chars() {
170 return CowStr::<A>::Borrowed(&self.source[start..end]);
171 }
172 let mut chars = self.source[start..end].chars().peekable();
173 let mut i = 0;
174 let mut vec: Option<Vec<u8, A>> = None;
175 while let Some(c) = chars.next() {
176 if c == '\0' {
177 if vec.is_none() {
178 vec = if i == 0 {
179 Some(Vec::new_in(allocator.clone()))
180 } else {
181 Some({
182 let mut v = Vec::new_in(allocator.clone());
183 v.extend(self.source[start..(start + i)].bytes());
184 v
185 })
186 }
187 }
188 let mut buf = [0; 4];
189 let bytes = REPLACEMENT_CHARACTER.encode_utf8(&mut buf).as_bytes();
190 vec.as_mut().unwrap().extend_from_slice(bytes);
191 i += 1;
192 } else if c == '\\' {
193 if vec.is_none() {
194 vec = if i == 0 {
195 Some(Vec::new_in(allocator.clone()))
196 } else {
197 Some({
198 let mut v = Vec::new_in(allocator.clone());
199 v.extend(self.source[start..(start + i)].bytes());
200 v
201 })
202 }
203 }
204 if self.token().kind_bits() == Kind::String as u8 {
207 let c = chars.peek();
212 if let Some(c) = c {
213 if is_newline(*c) {
214 chars.next();
215 if chars.peek() == Some(&'\n') {
216 i += 1;
217 }
218 i += 2;
219 chars = self.source[(start + i)..end].chars().peekable();
220 continue;
221 }
222 } else {
223 break;
224 }
225 }
226 i += 1;
227 let (ch, n) = self.source[(start + i)..].chars().parse_escape_sequence();
228 let char_to_push = if ch == '\0' { REPLACEMENT_CHARACTER } else { ch };
229 let mut buf = [0; 4];
230 let bytes = char_to_push.encode_utf8(&mut buf).as_bytes();
231 vec.as_mut().unwrap().extend_from_slice(bytes);
232 i += n as usize;
233 chars = self.source[(start + i)..end].chars().peekable();
234 } else {
235 if let Some(bytes) = &mut vec {
236 let mut buf = [0; 4];
237 let char_bytes = c.encode_utf8(&mut buf).as_bytes();
238 bytes.extend_from_slice(char_bytes);
239 }
240 i += c.len_utf8();
241 }
242 }
243 match vec {
244 Some(vec) => {
245 let boxed_slice = vec.into_boxed_slice();
246 unsafe { CowStr::Owned(Box::from_raw_in(Box::into_raw(boxed_slice) as *mut str, allocator)) }
248 }
249 None => CowStr::Borrowed(&self.source[start..start + i]),
250 }
251 }
252
253 pub fn parse_ascii_lower<A: Allocator + Clone + 'a>(&self, allocator: A) -> CowStr<'a, A> {
255 debug_assert!(self.token() != Kind::Delim);
256 let start = self.token().leading_len() as usize;
257 let end = self.source.len() - self.token().trailing_len() as usize;
258 if !self.token().contains_escape_chars() && self.token().is_lower_case() {
259 return CowStr::Borrowed(&self.source[start..end]);
260 }
261 let mut chars = self.source[start..end].chars().peekable();
262 let mut i = 0;
263 let mut vec: Vec<u8, A> = Vec::new_in(allocator.clone());
264 while let Some(c) = chars.next() {
265 if c == '\0' {
266 let mut buf = [0; 4];
267 let bytes = REPLACEMENT_CHARACTER.encode_utf8(&mut buf).as_bytes();
268 vec.extend_from_slice(bytes);
269 i += 1;
270 } else if c == '\\' {
271 if self.token().kind_bits() == Kind::String as u8 {
274 let c = chars.peek();
279 if let Some(c) = c {
280 if is_newline(*c) {
281 chars.next();
282 if chars.peek() == Some(&'\n') {
283 i += 1;
284 }
285 i += 2;
286 chars = self.source[(start + i)..end].chars().peekable();
287 continue;
288 }
289 } else {
290 break;
291 }
292 }
293 i += 1;
294 let (ch, n) = self.source[(start + i)..].chars().parse_escape_sequence();
295 let char_to_push = if ch == '\0' { REPLACEMENT_CHARACTER } else { ch.to_ascii_lowercase() };
296 let mut buf = [0; 4];
297 let bytes = char_to_push.encode_utf8(&mut buf).as_bytes();
298 vec.extend_from_slice(bytes);
299 i += n as usize;
300 chars = self.source[(start + i)..end].chars().peekable();
301 } else {
302 let mut buf = [0; 4];
303 let bytes = c.to_ascii_lowercase().encode_utf8(&mut buf).as_bytes();
304 vec.extend_from_slice(bytes);
305 i += c.len_utf8();
306 }
307 }
308 let boxed_slice = vec.into_boxed_slice();
309 unsafe { CowStr::Owned(Box::from_raw_in(Box::into_raw(boxed_slice) as *mut str, allocator)) }
311 }
312}
313
314impl PartialEq<Kind> for SourceCursor<'_> {
315 fn eq(&self, other: &Kind) -> bool {
316 self.token() == *other
317 }
318}
319
320impl PartialEq<CommentStyle> for SourceCursor<'_> {
321 fn eq(&self, other: &CommentStyle) -> bool {
322 self.token() == *other
323 }
324}
325
326impl From<SourceCursor<'_>> for KindSet {
327 fn from(cursor: SourceCursor<'_>) -> Self {
328 cursor.token().into()
329 }
330}
331
332impl PartialEq<KindSet> for SourceCursor<'_> {
333 fn eq(&self, other: &KindSet) -> bool {
334 self.token() == *other
335 }
336}
337
338#[cfg(test)]
339mod test {
340 use crate::{Cursor, QuoteStyle, SourceCursor, SourceOffset, Token};
341 use allocator_api2::alloc::Global;
342 use std::fmt::Write;
343
344 #[test]
345 fn parse_str_lower() {
346 let c = Cursor::new(SourceOffset(0), Token::new_ident(true, false, false, 0, 3));
347 assert_eq!(SourceCursor::from(c, "FoO").parse_ascii_lower(Global), "foo");
348 assert_eq!(SourceCursor::from(c, "FOO").parse_ascii_lower(Global), "foo");
349 assert_eq!(SourceCursor::from(c, "foo").parse_ascii_lower(Global), "foo");
350
351 let c = Cursor::new(SourceOffset(0), Token::new_string(QuoteStyle::Single, true, false, 5));
352 assert_eq!(SourceCursor::from(c, "'FoO'").parse_ascii_lower(Global), "foo");
353 assert_eq!(SourceCursor::from(c, "'FOO'").parse_ascii_lower(Global), "foo");
354
355 let c = Cursor::new(SourceOffset(0), Token::new_string(QuoteStyle::Single, false, false, 4));
356 assert_eq!(SourceCursor::from(c, "'FoO").parse_ascii_lower(Global), "foo");
357 assert_eq!(SourceCursor::from(c, "'FOO").parse_ascii_lower(Global), "foo");
358 assert_eq!(SourceCursor::from(c, "'foo").parse_ascii_lower(Global), "foo");
359
360 let c = Cursor::new(SourceOffset(0), Token::new_url(true, false, false, 4, 1, 6));
361 assert_eq!(SourceCursor::from(c, "url(a)").parse_ascii_lower(Global), "a");
362 assert_eq!(SourceCursor::from(c, "url(b)").parse_ascii_lower(Global), "b");
363
364 let c = Cursor::new(SourceOffset(0), Token::new_url(true, false, false, 6, 1, 8));
365 assert_eq!(SourceCursor::from(c, "\\75rl(A)").parse_ascii_lower(Global), "a");
366 assert_eq!(SourceCursor::from(c, "u\\52l(B)").parse_ascii_lower(Global), "b");
367 assert_eq!(SourceCursor::from(c, "ur\\6c(C)").parse_ascii_lower(Global), "c");
368
369 let c = Cursor::new(SourceOffset(0), Token::new_url(true, false, false, 8, 1, 10));
370 assert_eq!(SourceCursor::from(c, "\\75\\52l(A)").parse_ascii_lower(Global), "a");
371 assert_eq!(SourceCursor::from(c, "u\\52\\6c(B)").parse_ascii_lower(Global), "b");
372 assert_eq!(SourceCursor::from(c, "\\75r\\6c(C)").parse_ascii_lower(Global), "c");
373 }
374
375 #[test]
376 fn eq_ignore_ascii_case() {
377 let c = Cursor::new(SourceOffset(0), Token::new_ident(false, false, false, 0, 3));
378 assert!(SourceCursor::from(c, "foo").eq_ignore_ascii_case("foo"));
379 assert!(!SourceCursor::from(c, "foo").eq_ignore_ascii_case("bar"));
380 assert!(!SourceCursor::from(c, "fo ").eq_ignore_ascii_case("foo"));
381 assert!(!SourceCursor::from(c, "foo").eq_ignore_ascii_case("fooo"));
382 assert!(!SourceCursor::from(c, "foo").eq_ignore_ascii_case("ғоо"));
383
384 let c = Cursor::new(SourceOffset(0), Token::new_ident(true, false, false, 0, 3));
385 assert!(SourceCursor::from(c, "FoO").eq_ignore_ascii_case("foo"));
386 assert!(SourceCursor::from(c, "FOO").eq_ignore_ascii_case("foo"));
387 assert!(!SourceCursor::from(c, "foo").eq_ignore_ascii_case("bar"));
388 assert!(!SourceCursor::from(c, "fo ").eq_ignore_ascii_case("foo"));
389 assert!(!SourceCursor::from(c, "foo").eq_ignore_ascii_case("fooo"));
390 assert!(!SourceCursor::from(c, "foo").eq_ignore_ascii_case("ғоо"));
391
392 let c = Cursor::new(SourceOffset(3), Token::new_ident(false, false, false, 0, 3));
393 assert!(SourceCursor::from(c, "bar").eq_ignore_ascii_case("bar"));
394
395 let c = Cursor::new(SourceOffset(3), Token::new_ident(false, false, true, 0, 3));
396 assert!(SourceCursor::from(c, "bar").eq_ignore_ascii_case("bar"));
397
398 let c = Cursor::new(SourceOffset(3), Token::new_ident(false, false, true, 0, 5));
399 assert!(SourceCursor::from(c, "b\\61r").eq_ignore_ascii_case("bar"));
400
401 let c = Cursor::new(SourceOffset(3), Token::new_ident(false, false, true, 0, 7));
402 assert!(SourceCursor::from(c, "b\\61\\72").eq_ignore_ascii_case("bar"));
403 }
404
405 #[test]
406 fn write_str() {
407 let c = Cursor::new(SourceOffset(0), Token::new_string(QuoteStyle::Double, true, false, 5));
408 let mut str = String::new();
409 write!(str, "{}", SourceCursor::from(c, "'foo'")).unwrap();
410 assert_eq!(c.token().quote_style(), QuoteStyle::Double);
411 assert_eq!(str, "\"foo\"");
412
413 let c = Cursor::new(SourceOffset(0), Token::new_string(QuoteStyle::Double, false, false, 4));
414 let mut str = String::new();
415 write!(str, "{}", SourceCursor::from(c, "'foo")).unwrap();
416 assert_eq!(c.token().quote_style(), QuoteStyle::Double);
417 assert_eq!(str, "\"foo\"");
418
419 let c = Cursor::new(SourceOffset(0), Token::new_string(QuoteStyle::Single, false, false, 4));
420 let mut str = String::new();
421 write!(str, "{}", SourceCursor::from(c, "\"foo")).unwrap();
422 assert_eq!(c.token().quote_style(), QuoteStyle::Single);
423 assert_eq!(str, "'foo'");
424 }
425
426 #[test]
427 #[cfg(feature = "bumpalo")]
428 fn test_bumpalo_compatibility() {
429 use bumpalo::Bump;
430
431 let bump = Bump::new();
433 let c = Cursor::new(SourceOffset(0), Token::new_ident(true, false, false, 0, 3));
434
435 assert_eq!(SourceCursor::from(c, "FoO").parse(&bump), "FoO");
437 assert_eq!(SourceCursor::from(c, "FoO").parse_ascii_lower(&bump), "foo");
438
439 assert_eq!(&*SourceCursor::from(c, "FoO").parse(&bump), "FoO");
441 assert_eq!(&*SourceCursor::from(c, "FoO").parse_ascii_lower(&bump), "foo");
442
443 let c = Cursor::new(SourceOffset(0), Token::new_ident(false, false, true, 0, 7));
445 assert_eq!(SourceCursor::from(c, "b\\61\\72").parse(&bump), "bar");
446 assert_eq!(&*SourceCursor::from(c, "b\\61\\72").parse(&bump), "bar");
447 }
448}