css_lexer/
dyn_atom_registry.rs

1use crate::token::ATOM_DYNAMIC_BIT;
2use crate::{AtomSet, DynAtomSet};
3use fnv::FnvHashMap;
4use std::fmt::{Debug, Formatter, Result};
5use std::marker::PhantomData;
6use std::sync::RwLock;
7
8/// A typed atom that belongs to a specific `DynAtomRegistry<T>`.
9#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)]
10pub struct Atom<T: AtomSet> {
11	bits: u32,
12	_phantom: PhantomData<T>,
13}
14
15impl<T: AtomSet> Atom<T> {
16	/// Creates an atom from raw bits.
17	#[inline]
18	const fn new(bits: u32) -> Self {
19		Self { bits, _phantom: PhantomData }
20	}
21
22	/// Returns the raw bit representation.
23	#[inline]
24	pub const fn as_bits(self) -> u32 {
25		self.bits
26	}
27
28	/// Returns true if this is a dynamic (runtime-interned) atom.
29	#[inline]
30	pub const fn is_dynamic(self) -> bool {
31		self.bits & (1 << ATOM_DYNAMIC_BIT) != 0
32	}
33
34	/// Returns true if this is a static atom.
35	#[inline]
36	pub const fn is_static(self) -> bool {
37		!self.is_dynamic()
38	}
39}
40
41/// Combines static atoms with dynamic (runtime-interned) atoms.
42///
43/// Each `DynAtomRegistry<T>` should be registered as a singleton using the `register_atom_set!` macro. This ensures
44///
45/// # Example
46///
47/// ```rust
48/// use css_lexer::{AtomSet, Atom, DynAtomRegistry, RegisteredAtomSet, register_atom_set};
49/// use derive_atom_set::AtomSet as DeriveAtomSet;
50///
51/// #[derive(Debug, Default, Copy, Clone, PartialEq, DeriveAtomSet)]
52/// enum MyAtoms {
53///     #[default]
54///     _None,
55///     Px,
56/// }
57///
58/// // Register singleton
59/// register_atom_set!(MyAtoms);
60///
61/// let atoms = MyAtoms::get_dyn_set();
62///
63/// // Work with typed atoms
64/// let px: Atom<MyAtoms> = atoms.atom_from_str("px");
65/// let custom: Atom<MyAtoms> = atoms.atom_from_str("custom-value");
66///
67/// assert!(px.is_static());
68/// assert!(custom.is_dynamic());
69/// ```
70pub struct DynAtomRegistry<T: AtomSet> {
71	static_atoms: T,
72	str_to_bits_map: RwLock<FnvHashMap<&'static str, u32>>,
73	bits_to_str_vec: RwLock<Vec<&'static str>>,
74}
75
76impl<T: AtomSet> DynAtomRegistry<T> {
77	#[doc(hidden)]
78	pub fn new() -> Self {
79		Self {
80			static_atoms: T::default(),
81			str_to_bits_map: RwLock::new(FnvHashMap::default()),
82			bits_to_str_vec: RwLock::new(Vec::new()),
83		}
84	}
85
86	/// Converts a string to a typed atom.
87	pub fn atom_from_str(&self, s: &str) -> Atom<T> {
88		Atom::new(self.str_to_bits(s))
89	}
90
91	/// Converts raw bits to a typed atom.
92	pub fn atom_from_bits(&self, bits: u32) -> Atom<T> {
93		Atom::new(bits)
94	}
95
96	/// Converts an atom back to its string representation. Returns `None` for invalid atoms.
97	pub fn atom_to_str(&self, atom: Atom<T>) -> Option<&'static str> {
98		let bits = atom.as_bits();
99		if DynAtomRegistry::<T>::is_dynamic(bits) { self.lookup(bits) } else { Some(T::from_bits(bits).to_str()) }
100	}
101
102	/// Interns a string, returning its dynamic atom bits.
103	fn atomize(&self, s: &str) -> u32 {
104		if let Some(&bits) = self.str_to_bits_map.read().unwrap().get(s) {
105			return bits;
106		}
107
108		let mut str_to_bits = self.str_to_bits_map.write().unwrap();
109		let mut bits_to_str = self.bits_to_str_vec.write().unwrap();
110
111		if let Some(&bits) = str_to_bits.get(s) {
112			return bits;
113		}
114
115		let id = bits_to_str.len() as u32;
116		let bits = id | (1 << ATOM_DYNAMIC_BIT); // Set dynamic bit
117
118		// Leak the string to get a 'static reference - this is an intern pool
119		// where strings live for the entire program lifetime anyway due to static
120		// lifetime of singleton instances.
121		let static_str: &'static str = Box::leak(s.into());
122		bits_to_str.push(static_str);
123		str_to_bits.insert(static_str, bits);
124
125		bits
126	}
127
128	/// Looks up a string by its dynamic atom bits.
129	fn lookup(&self, bits: u32) -> Option<&'static str> {
130		debug_assert!(Self::is_dynamic(bits), "lookup() called on static atom bits");
131		let index = (bits & ((1 << ATOM_DYNAMIC_BIT) - 1)) as usize; // Mask off dynamic bit to get index
132		self.bits_to_str_vec.read().unwrap().get(index).copied()
133	}
134
135	/// Returns true if the given bits represent a dynamic atom.
136	#[inline]
137	pub const fn is_dynamic(bits: u32) -> bool {
138		bits & (1 << ATOM_DYNAMIC_BIT) != 0
139	}
140}
141
142impl<T: AtomSet> Default for DynAtomRegistry<T> {
143	fn default() -> Self {
144		Self::new()
145	}
146}
147
148impl<T: AtomSet + Debug> Debug for DynAtomRegistry<T> {
149	fn fmt(&self, f: &mut Formatter<'_>) -> Result {
150		f.debug_struct("DynAtomRegistry").field("static_atoms", &self.static_atoms).finish_non_exhaustive()
151	}
152}
153
154impl<T: AtomSet> DynAtomSet for DynAtomRegistry<T> {
155	fn str_to_bits(&self, keyword: &str) -> u32 {
156		let static_atom = T::from_str(keyword);
157		let default_bits = T::default().as_bits();
158
159		if static_atom.as_bits() != default_bits {
160			let bits = static_atom.as_bits();
161			debug_assert!(bits & (1 << ATOM_DYNAMIC_BIT) == 0, "Static atoms must have dynamic bit = 0");
162			return bits;
163		}
164
165		let bits = self.atomize(keyword);
166		debug_assert!(bits & (1 << ATOM_DYNAMIC_BIT) != 0, "Dynamic atoms must have dynamic bit = 1");
167		bits
168	}
169
170	fn bits_to_str(&self, bits: u32) -> &'static str {
171		if Self::is_dynamic(bits) {
172			return self.lookup(bits).unwrap_or_else(|| T::from_bits(0).to_str());
173		}
174		T::from_bits(bits).to_str()
175	}
176
177	fn bits(&self) -> u32 {
178		self.static_atoms.as_bits()
179	}
180}
181
182/// Trait for atom types that have a registered singleton `DynAtomRegistry`.
183pub trait RegisteredAtomSet: AtomSet + 'static {
184	/// Returns the singleton instance of the `DynAtomRegistry` for this atom type.
185	fn get_dyn_set() -> &'static DynAtomRegistry<Self>;
186}
187
188/// Registers a singleton `DynAtomRegistry<T>` for the given atom type.
189///
190/// This macro creates a static instance and implements the `RegisteredAtomSet` trait.
191///
192/// # Example
193///
194/// ```rust
195/// use css_lexer::{AtomSet, RegisteredAtomSet, register_atom_set};
196/// use derive_atom_set::AtomSet as DeriveAtomSet;
197///
198/// #[derive(Debug, Default, Copy, Clone, PartialEq, DeriveAtomSet)]
199/// enum MyAtoms {
200///     #[default]
201///     _None,
202///     Foo,
203/// }
204///
205/// register_atom_set!(MyAtoms);
206///
207/// // Now you can use the singleton
208/// let atoms = MyAtoms::get_dyn_set();
209/// let atom = atoms.atom_from_str("foo");
210/// ```
211#[macro_export]
212macro_rules! register_atom_set {
213	($atom_type:ty) => {
214		static __ATOM_SET: ::std::sync::LazyLock<$crate::DynAtomRegistry<$atom_type>> =
215			::std::sync::LazyLock::new(|| $crate::DynAtomRegistry::new());
216
217		impl $crate::RegisteredAtomSet for $atom_type {
218			#[inline]
219			fn get_dyn_set() -> &'static $crate::DynAtomRegistry<Self> {
220				&__ATOM_SET
221			}
222		}
223	};
224}
225
226#[cfg(test)]
227mod tests {
228	use super::*;
229
230	#[derive(Debug, Default, Copy, Clone, PartialEq)]
231	enum TestAtomSet {
232		#[default]
233		None,
234		Px,
235		Rem,
236	}
237
238	impl AtomSet for TestAtomSet {
239		fn from_str(keyword: &str) -> Self {
240			match keyword {
241				"px" => Self::Px,
242				"rem" => Self::Rem,
243				_ => Self::None,
244			}
245		}
246
247		fn to_str(self) -> &'static str {
248			match self {
249				Self::None => "",
250				Self::Px => "px",
251				Self::Rem => "rem",
252			}
253		}
254
255		fn len(&self) -> u32 {
256			self.to_str().len() as u32
257		}
258
259		fn from_bits(bits: u32) -> Self {
260			match bits {
261				0 => Self::None,
262				1 => Self::Px,
263				2 => Self::Rem,
264				_ => Self::None,
265			}
266		}
267
268		fn as_bits(&self) -> u32 {
269			*self as u32
270		}
271	}
272
273	#[test]
274	fn test_static_atoms_have_dynamic_bit_zero() {
275		let combined = DynAtomRegistry::<TestAtomSet>::new();
276
277		let px_bits = combined.str_to_bits("px");
278		assert_eq!(px_bits & (1 << ATOM_DYNAMIC_BIT), 0, "Static atoms should have dynamic bit = 0");
279
280		let rem_bits = combined.str_to_bits("rem");
281		assert_eq!(rem_bits & (1 << ATOM_DYNAMIC_BIT), 0, "Static atoms should have dynamic bit = 0");
282	}
283
284	#[test]
285	fn test_dynamic_atoms_have_dynamic_bit_one() {
286		let combined = DynAtomRegistry::<TestAtomSet>::new();
287
288		let custom_bits = combined.str_to_bits("custom");
289		assert_eq!(
290			custom_bits & (1 << ATOM_DYNAMIC_BIT),
291			1 << ATOM_DYNAMIC_BIT,
292			"Dynamic atoms should have dynamic bit = 1"
293		);
294	}
295
296	#[test]
297	fn test_dynamic_atom_interning() {
298		let combined = DynAtomRegistry::<TestAtomSet>::new();
299
300		let bits1 = combined.str_to_bits("custom");
301		let bits2 = combined.str_to_bits("custom");
302
303		assert_eq!(bits1, bits2, "Same string should atomize to same bits");
304	}
305
306	#[test]
307	fn test_lookup() {
308		let combined = DynAtomRegistry::<TestAtomSet>::new();
309
310		let custom_bits = combined.str_to_bits("my-custom-value");
311
312		assert_eq!(combined.lookup(custom_bits), Some("my-custom-value"));
313	}
314
315	#[test]
316	fn test_different_dynamic_atoms_get_different_bits() {
317		let combined = DynAtomRegistry::<TestAtomSet>::new();
318
319		let bits1 = combined.str_to_bits("custom1");
320		let bits2 = combined.str_to_bits("custom2");
321
322		assert_ne!(bits1, bits2, "Different strings should get different bits");
323		assert_eq!(bits1 & (1 << ATOM_DYNAMIC_BIT), 1 << ATOM_DYNAMIC_BIT);
324		assert_eq!(bits2 & (1 << ATOM_DYNAMIC_BIT), 1 << ATOM_DYNAMIC_BIT);
325	}
326
327	#[test]
328	fn test_typed_atoms() {
329		let combined = DynAtomRegistry::<TestAtomSet>::new();
330
331		// Static atom
332		let px_atom: Atom<TestAtomSet> = combined.atom_from_str("px");
333		assert!(px_atom.is_static());
334		assert!(!px_atom.is_dynamic());
335		assert_eq!(combined.atom_to_str(px_atom), Some("px"));
336
337		// Dynamic atom
338		let custom_atom: Atom<TestAtomSet> = combined.atom_from_str("custom");
339		assert!(custom_atom.is_dynamic());
340		assert!(!custom_atom.is_static());
341		assert_eq!(combined.atom_to_str(custom_atom), Some("custom"));
342
343		// Atoms should not be equal
344		assert_ne!(px_atom, custom_atom);
345
346		// Same string should give same atom
347		let custom_atom2 = combined.atom_from_str("custom");
348		assert_eq!(custom_atom, custom_atom2);
349	}
350
351	#[test]
352	fn test_atom_from_bits() {
353		let combined = DynAtomRegistry::<TestAtomSet>::new();
354
355		let px_atom = combined.atom_from_str("px");
356		let px_bits = px_atom.as_bits();
357
358		let px_atom2 = combined.atom_from_bits(px_bits);
359		assert_eq!(px_atom, px_atom2);
360		assert_eq!(combined.atom_to_str(px_atom2), Some("px"));
361	}
362}