csskit_source_finder/
lib.rs

1#![deny(warnings)]
2use std::collections::HashSet;
3use std::io;
4use std::path::PathBuf;
5use std::str::from_utf8;
6
7use glob::glob;
8use grep_matcher::{Captures, Matcher};
9use grep_regex::{RegexMatcher, RegexMatcherBuilder};
10use grep_searcher::{Searcher, SearcherBuilder, Sink, SinkError, SinkMatch};
11use syn::{DeriveInput, parse_str};
12
13#[derive(Debug, Clone, PartialEq, Eq, Hash)]
14pub enum VisitMode {
15	/// `#[visit]` or `#[visit(self)]`
16	Self_,
17	/// `#[visit(all)]`
18	All,
19	/// `#[visit(skip)]`
20	Skip,
21	/// `#[visit(children)]`
22	Children,
23	/// Manual impl VisitableTrait
24	Manual,
25}
26
27impl VisitMode {
28	/// Returns true if this mode makes the node queryable (has a visit_self call)
29	pub fn is_queryable(&self) -> bool {
30		matches!(self, VisitMode::Self_ | VisitMode::All | VisitMode::Manual)
31	}
32}
33
34#[derive(Debug, Clone, PartialEq, Eq, Hash)]
35pub struct VisitableNode {
36	pub input: DeriveInput,
37	pub visit_mode: VisitMode,
38}
39
40impl VisitableNode {
41	pub fn ident(&self) -> &syn::Ident {
42		&self.input.ident
43	}
44
45	pub fn generics(&self) -> &syn::Generics {
46		&self.input.generics
47	}
48}
49
50pub struct NodeMatcher<'a> {
51	matcher: &'a RegexMatcher,
52	matches: &'a mut HashSet<VisitableNode>,
53}
54
55impl Sink for NodeMatcher<'_> {
56	type Error = io::Error;
57
58	fn matched(&mut self, _searcher: &Searcher, mat: &SinkMatch<'_>) -> Result<bool, io::Error> {
59		let mut captures = self.matcher.new_captures()?;
60		let line = match from_utf8(mat.bytes()) {
61			Ok(matched) => matched,
62			Err(err) => return Err(io::Error::error_message(err)),
63		};
64		self.matcher.captures_iter(mat.bytes(), &mut captures, |captures| -> bool {
65			// Group 1 contains everything between derive and pub struct/enum
66			let attrs_section = &line[captures.get(1).unwrap()];
67
68			// Search for visit attribute in the captured section
69			// The default (no visit attr) is Children mode per derive macro semantics
70			let visit_mode = if attrs_section.contains("visit(skip)") {
71				VisitMode::Skip
72			} else if attrs_section.contains("visit(children)") {
73				VisitMode::Children
74			} else if attrs_section.contains("visit(all)") {
75				VisitMode::All
76			} else if attrs_section.contains("visit(self)") {
77				VisitMode::Self_
78			} else if attrs_section.contains("visit") {
79				// Just `visit` (or `visit()`) means visit self AND children
80				VisitMode::Self_
81			} else {
82				// No visit attribute found - default is children only (not queryable)
83				VisitMode::Children
84			};
85
86			let capture = format!("{} {} {{}}", &line[captures.get(2).unwrap()], &line[captures.get(5).unwrap()]);
87			match parse_str::<DeriveInput>(&capture) {
88				Ok(input) => {
89					self.matches.insert(VisitableNode { input, visit_mode });
90				}
91				Err(err) => {
92					panic!("#[visit] or unknown: {capture} {err}");
93				}
94			}
95			true
96		})?;
97		Ok(true)
98	}
99}
100
101fn build_visit_attr_matcher() -> RegexMatcher {
102	RegexMatcherBuilder::new()
103		.multi_line(true)
104		.dot_matches_new_line(true)
105		.ignore_whitespace(true)
106		.build(
107			r#"
108			^\s*\#\[
109			# Match any type with derive(Visitable)
110			cfg_attr\([^,]+,\s*derive\((?:csskit_derives::)?Visitable\)
111			# Capture everything from here until the type declaration to search for visit attr
112			# This captures the visit attr whether it's on same line or separate line
113			([^\{\}]*?)
114			# Match the type declaration
115			(pub\s*(?:struct|enum)\s*)
116			# munch any comments/attributes between this and our name (for macros)
117			(:?\n?\s*(:?\/\/|\#)[^\n]*)*
118			# finally grab the word (plus any generics)
119			\s*(\w*(:?<[^>]+>)?)"#,
120		)
121		.unwrap()
122}
123
124fn build_manual_impl_matcher() -> RegexMatcher {
125	RegexMatcherBuilder::new()
126		.multi_line(true)
127		.ignore_whitespace(true)
128		.build(
129			r#"
130			# Match manual impl VisitableTrait for Type
131			impl\s*(?:<[^>]+>\s*)?
132			VisitableTrait\s+for\s+
133			# Capture the type name with optional generics
134			(\w+)(?:<[^>]+>)?"#,
135		)
136		.unwrap()
137}
138
139pub struct ManualImplMatcher<'a> {
140	matcher: &'a RegexMatcher,
141	matches: &'a mut HashSet<VisitableNode>,
142}
143
144impl Sink for ManualImplMatcher<'_> {
145	type Error = io::Error;
146
147	fn matched(&mut self, _searcher: &Searcher, mat: &SinkMatch<'_>) -> Result<bool, io::Error> {
148		let mut captures = self.matcher.new_captures()?;
149		let line = match from_utf8(mat.bytes()) {
150			Ok(matched) => matched,
151			Err(err) => return Err(io::Error::error_message(err)),
152		};
153		self.matcher.captures_iter(mat.bytes(), &mut captures, |captures| -> bool {
154			let type_name = &line[captures.get(1).unwrap()];
155			// Skip if already found by attr matcher
156			if self.matches.iter().any(|n| n.input.ident == type_name) {
157				return true;
158			}
159			let capture = format!("pub struct {} {{}}", type_name);
160			match parse_str::<DeriveInput>(&capture) {
161				Ok(input) => {
162					self.matches.insert(VisitableNode { input, visit_mode: VisitMode::Manual });
163				}
164				Err(err) => {
165					panic!("manual impl VisitableTrait: {capture} {err}");
166				}
167			}
168			true
169		})?;
170		Ok(true)
171	}
172}
173
174/// Find all types with `#[visit]` attribute or manual VisitableTrait impl
175pub fn find_visitable_nodes(dir: &str, matches: &mut HashSet<VisitableNode>, path_callback: impl Fn(&PathBuf) + Copy) {
176	let attr_matcher = build_visit_attr_matcher();
177	let manual_matcher = build_manual_impl_matcher();
178	let mut searcher = SearcherBuilder::new().line_number(false).multi_line(true).build();
179	let entries: Vec<_> = glob(dir).unwrap().filter_map(|p| p.ok()).collect();
180	// First pass: find types with derive(Visitable)
181	for entry in &entries {
182		path_callback(entry);
183		let context = NodeMatcher { matcher: &attr_matcher, matches };
184		searcher.search_path(&attr_matcher, entry, context).unwrap();
185	}
186	// Second pass: find types with manual impl VisitableTrait
187	for entry in &entries {
188		let context = ManualImplMatcher { matcher: &manual_matcher, matches };
189		searcher.search_path(&manual_matcher, entry, context).unwrap();
190	}
191}
192
193/// Find types that are queryable (have `#[visit]`, `#[visit(self)]`, or `#[visit(all)]` - not skip/children)
194///
195/// Queryable nodes are those that get a NodeId and can be matched by selectors.
196pub fn find_queryable_nodes(dir: &str, matches: &mut HashSet<VisitableNode>, path_callback: impl Fn(&PathBuf) + Copy) {
197	let mut all_visitable = HashSet::new();
198	find_visitable_nodes(dir, &mut all_visitable, path_callback);
199	// Filter to only queryable modes
200	matches.extend(all_visitable.into_iter().filter(|node| node.visit_mode.is_queryable()));
201}
202
203#[test]
204fn test_find_visitable_nodes() {
205	use itertools::Itertools;
206	use quote::ToTokens;
207	let mut matches = HashSet::new();
208	find_visitable_nodes("../css_ast/src/**/*.rs", &mut matches, |_| {});
209	::insta::assert_ron_snapshot!(
210		"all_visitable_nodes",
211		matches.iter().map(|node| node.input.to_token_stream().to_string()).sorted().collect::<Vec<_>>()
212	);
213}