1use std::fmt;
2use std::iter::Peekable;
3use std::sync::atomic::{AtomicU32, Ordering};
4
5use super::{Byte, Ref, Tree, Uninhabited};
6use crate::{Map, Set};
7
8#[derive(PartialEq)]
9#[cfg_attr(test, derive(Clone))]
10pub(crate) struct Dfa<R>
11where
12 R: Ref,
13{
14 pub(crate) transitions: Map<State, Transitions<R>>,
15 pub(crate) start: State,
16 pub(crate) accept: State,
17}
18
19#[derive(PartialEq, Clone, Debug)]
20pub(crate) struct Transitions<R>
21where
22 R: Ref,
23{
24 byte_transitions: EdgeSet<State>,
25 ref_transitions: Map<R, State>,
26}
27
28impl<R> Default for Transitions<R>
29where
30 R: Ref,
31{
32 fn default() -> Self {
33 Self { byte_transitions: EdgeSet::empty(), ref_transitions: Map::default() }
34 }
35}
36
37#[derive(Hash, Eq, PartialEq, PartialOrd, Ord, Copy, Clone)]
39pub(crate) struct State(pub(crate) u32);
40
41impl State {
42 pub(crate) fn new() -> Self {
43 static COUNTER: AtomicU32 = AtomicU32::new(0);
44 Self(COUNTER.fetch_add(1, Ordering::SeqCst))
45 }
46}
47
48impl fmt::Debug for State {
49 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
50 write!(f, "S_{}", self.0)
51 }
52}
53
54impl<R> Dfa<R>
55where
56 R: Ref,
57{
58 #[cfg(test)]
59 pub(crate) fn bool() -> Self {
60 Self::from_transitions(|accept| Transitions {
61 byte_transitions: EdgeSet::new(Byte::new(0x00..=0x01), accept),
62 ref_transitions: Map::default(),
63 })
64 }
65
66 pub(crate) fn unit() -> Self {
67 let transitions: Map<State, Transitions<R>> = Map::default();
68 let start = State::new();
69 let accept = start;
70
71 Self { transitions, start, accept }
72 }
73
74 pub(crate) fn from_byte(byte: Byte) -> Self {
75 Self::from_transitions(|accept| Transitions {
76 byte_transitions: EdgeSet::new(byte, accept),
77 ref_transitions: Map::default(),
78 })
79 }
80
81 pub(crate) fn from_ref(r: R) -> Self {
82 Self::from_transitions(|accept| Transitions {
83 byte_transitions: EdgeSet::empty(),
84 ref_transitions: [(r, accept)].into_iter().collect(),
85 })
86 }
87
88 fn from_transitions(f: impl FnOnce(State) -> Transitions<R>) -> Self {
89 let start = State::new();
90 let accept = State::new();
91
92 Self { transitions: [(start, f(accept))].into_iter().collect(), start, accept }
93 }
94
95 pub(crate) fn from_tree(tree: Tree<!, R>) -> Result<Self, Uninhabited> {
96 Ok(match tree {
97 Tree::Byte(b) => Self::from_byte(b),
98 Tree::Ref(r) => Self::from_ref(r),
99 Tree::Alt(alts) => {
100 let mut alts = alts.into_iter().map(Self::from_tree).filter_map(Result::ok);
102 let dfa = alts.next().ok_or(Uninhabited)?;
104 alts.fold(dfa, |dfa, alt| dfa.union(alt, State::new))
106 }
107 Tree::Seq(elts) => {
108 let mut dfa = Self::unit();
109 for elt in elts.into_iter().map(Self::from_tree) {
110 dfa = dfa.concat(elt?);
111 }
112 dfa
113 }
114 })
115 }
116
117 pub(crate) fn concat(self, other: Self) -> Self {
119 if self.start == self.accept {
120 return other;
121 } else if other.start == other.accept {
122 return self;
123 }
124
125 let start = self.start;
126 let accept = other.accept;
127
128 let mut transitions: Map<State, Transitions<R>> = self.transitions;
129
130 for (source, transition) in other.transitions {
131 let fix_state = |state| if state == other.start { self.accept } else { state };
132 let byte_transitions = transition.byte_transitions.map_states(&fix_state);
133 let ref_transitions = transition
134 .ref_transitions
135 .into_iter()
136 .map(|(r, state)| (r, fix_state(state)))
137 .collect();
138
139 let old = transitions
140 .insert(fix_state(source), Transitions { byte_transitions, ref_transitions });
141 assert!(old.is_none());
142 }
143
144 Self { transitions, start, accept }
145 }
146
147 pub(crate) fn union(self, other: Self, mut new_state: impl FnMut() -> State) -> Self {
149 let a = self;
155 let b = other;
156
157 let accept = new_state();
158
159 let mut mapping: Map<(Option<State>, Option<State>), State> = Map::default();
160
161 let mut mapped = |(a_state, b_state)| {
162 if Some(a.accept) == a_state || Some(b.accept) == b_state {
163 accept
166 } else {
167 *mapping.entry((a_state, b_state)).or_insert_with(&mut new_state)
168 }
169 };
170
171 let start = mapped((Some(a.start), Some(b.start)));
172 let mut transitions: Map<State, Transitions<R>> = Map::default();
173 let empty_transitions = Transitions::default();
174
175 struct WorkQueue {
176 queue: Vec<(Option<State>, Option<State>)>,
177 enqueued: Set<(Option<State>, Option<State>)>,
181 }
182 impl WorkQueue {
183 fn enqueue(&mut self, a_state: Option<State>, b_state: Option<State>) {
184 if self.enqueued.insert((a_state, b_state)) {
185 self.queue.push((a_state, b_state));
186 }
187 }
188 }
189 let mut queue = WorkQueue { queue: Vec::new(), enqueued: Set::default() };
190 queue.enqueue(Some(a.start), Some(b.start));
191
192 while let Some((a_src, b_src)) = queue.queue.pop() {
193 let src = mapped((a_src, b_src));
194 if src == accept {
195 continue;
207 }
208
209 let a_transitions =
210 a_src.and_then(|a_src| a.transitions.get(&a_src)).unwrap_or(&empty_transitions);
211 let b_transitions =
212 b_src.and_then(|b_src| b.transitions.get(&b_src)).unwrap_or(&empty_transitions);
213
214 let byte_transitions = a_transitions.byte_transitions.union(
215 &b_transitions.byte_transitions,
216 |a_dst, b_dst| {
217 assert!(a_dst.is_some() || b_dst.is_some());
218
219 queue.enqueue(a_dst, b_dst);
220 mapped((a_dst, b_dst))
221 },
222 );
223
224 let ref_transitions =
225 a_transitions.ref_transitions.keys().chain(b_transitions.ref_transitions.keys());
226
227 let ref_transitions = ref_transitions
228 .map(|ref_transition| {
229 let a_dst = a_transitions.ref_transitions.get(ref_transition).copied();
230 let b_dst = b_transitions.ref_transitions.get(ref_transition).copied();
231
232 assert!(a_dst.is_some() || b_dst.is_some());
233
234 queue.enqueue(a_dst, b_dst);
235 (*ref_transition, mapped((a_dst, b_dst)))
236 })
237 .collect();
238
239 let old = transitions.insert(src, Transitions { byte_transitions, ref_transitions });
240 assert_eq!(old, None);
243 }
244
245 Self { transitions, start, accept }
246 }
247
248 pub(crate) fn get_uninit_edge_dst(&self, state: State) -> Option<State> {
249 let transitions = self.transitions.get(&state)?;
250 transitions.byte_transitions.get_uninit_edge_dst()
251 }
252
253 pub(crate) fn bytes_from(&self, start: State) -> impl Iterator<Item = (Byte, State)> {
254 self.transitions
255 .get(&start)
256 .into_iter()
257 .flat_map(|transitions| transitions.byte_transitions.iter())
258 }
259
260 pub(crate) fn refs_from(&self, start: State) -> impl Iterator<Item = (R, State)> {
261 self.transitions
262 .get(&start)
263 .into_iter()
264 .flat_map(|transitions| transitions.ref_transitions.iter())
265 .map(|(r, s)| (*r, *s))
266 }
267
268 #[cfg(test)]
269 pub(crate) fn from_edges<B: Clone + Into<Byte>>(
270 start: u32,
271 accept: u32,
272 edges: &[(u32, B, u32)],
273 ) -> Self {
274 let start = State(start);
275 let accept = State(accept);
276 let mut transitions: Map<State, Vec<(Byte, State)>> = Map::default();
277
278 for &(src, ref edge, dst) in edges.iter() {
279 transitions.entry(State(src)).or_default().push((edge.clone().into(), State(dst)));
280 }
281
282 let transitions = transitions
283 .into_iter()
284 .map(|(src, edges)| {
285 (
286 src,
287 Transitions {
288 byte_transitions: EdgeSet::from_edges(edges),
289 ref_transitions: Map::default(),
290 },
291 )
292 })
293 .collect();
294
295 Self { start, accept, transitions }
296 }
297}
298
299impl<R> fmt::Debug for Dfa<R>
301where
302 R: Ref,
303{
304 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
305 writeln!(f, "digraph {{")?;
306 writeln!(f, " {:?} [shape = doublecircle]", self.start)?;
307 writeln!(f, " {:?} [shape = doublecircle]", self.accept)?;
308
309 for (src, transitions) in self.transitions.iter() {
310 for (t, dst) in transitions.byte_transitions.iter() {
311 writeln!(f, " {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
312 }
313
314 for (t, dst) in transitions.ref_transitions.iter() {
315 writeln!(f, " {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
316 }
317 }
318
319 writeln!(f, "}}")
320 }
321}
322
323use edge_set::EdgeSet;
324mod edge_set {
325 use smallvec::SmallVec;
326
327 use super::*;
328
329 #[derive(Eq, PartialEq, Clone, Debug)]
331 pub(super) struct EdgeSet<S = State> {
332 runs: SmallVec<[(Byte, S); 1]>,
337 }
338
339 impl<S> EdgeSet<S> {
340 pub(crate) fn new(range: Byte, dst: S) -> Self {
341 let mut this = Self { runs: SmallVec::new() };
342 if !range.is_empty() {
343 this.runs.push((range, dst));
344 }
345 this
346 }
347
348 pub(crate) fn empty() -> Self {
349 Self { runs: SmallVec::new() }
350 }
351
352 #[cfg(test)]
353 pub(crate) fn from_edges(mut edges: Vec<(Byte, S)>) -> Self
354 where
355 S: Ord,
356 {
357 edges.sort();
358 Self { runs: edges.into() }
359 }
360
361 pub(crate) fn iter(&self) -> impl Iterator<Item = (Byte, S)>
362 where
363 S: Copy,
364 {
365 self.runs.iter().copied()
366 }
367
368 pub(crate) fn get_uninit_edge_dst(&self) -> Option<S>
369 where
370 S: Copy,
371 {
372 let &(range, dst) = self.runs.last()?;
374 if range.contains_uninit() { Some(dst) } else { None }
375 }
376
377 pub(crate) fn map_states<SS>(self, mut f: impl FnMut(S) -> SS) -> EdgeSet<SS> {
378 EdgeSet {
379 runs: self.runs.into_iter().map(|(b, s)| (b, f(s))).collect(),
386 }
387 }
388
389 pub(crate) fn union(
399 &self,
400 other: &Self,
401 mut join: impl FnMut(Option<S>, Option<S>) -> S,
402 ) -> EdgeSet<S>
403 where
404 S: Copy + Eq,
405 {
406 let mut runs: SmallVec<[(Byte, S); 1]> = SmallVec::new();
407 let xs = self.runs.iter().copied();
408 let ys = other.runs.iter().copied();
409 for (range, (x, y)) in union(xs, ys) {
410 let state = join(x, y);
411 match runs.last_mut() {
412 Some(&mut (ref mut last_range, ref mut last_state))
414 if last_range.end == range.start && *last_state == state =>
415 {
416 last_range.end = range.end
417 }
418 _ => runs.push((range, state)),
419 }
420 }
421 EdgeSet { runs }
422 }
423 }
424}
425
426pub(crate) fn union<S: Copy, X: Iterator<Item = (Byte, S)>, Y: Iterator<Item = (Byte, S)>>(
428 xs: X,
429 ys: Y,
430) -> UnionIter<X, Y> {
431 UnionIter { xs: xs.peekable(), ys: ys.peekable() }
432}
433
434pub(crate) struct UnionIter<X: Iterator, Y: Iterator> {
435 xs: Peekable<X>,
436 ys: Peekable<Y>,
437}
438
439impl<S: Copy, X: Iterator<Item = (Byte, S)>, Y: Iterator<Item = (Byte, S)>> Iterator
441 for UnionIter<X, Y>
442{
443 type Item = (Byte, (Option<S>, Option<S>));
444
445 fn next(&mut self) -> Option<Self::Item> {
446 use std::cmp::{self, Ordering};
447
448 let ret;
449 match (self.xs.peek_mut(), self.ys.peek_mut()) {
450 (None, None) => {
451 ret = None;
452 }
453 (Some(x), None) => {
454 ret = Some((x.0, (Some(x.1), None)));
455 self.xs.next();
456 }
457 (None, Some(y)) => {
458 ret = Some((y.0, (None, Some(y.1))));
459 self.ys.next();
460 }
461 (Some(x), Some(y)) => {
462 let start;
463 let end;
464 let dst;
465 match x.0.start.cmp(&y.0.start) {
466 Ordering::Less => {
467 start = x.0.start;
468 end = cmp::min(x.0.end, y.0.start);
469 dst = (Some(x.1), None);
470 }
471 Ordering::Greater => {
472 start = y.0.start;
473 end = cmp::min(x.0.start, y.0.end);
474 dst = (None, Some(y.1));
475 }
476 Ordering::Equal => {
477 start = x.0.start;
478 end = cmp::min(x.0.end, y.0.end);
479 dst = (Some(x.1), Some(y.1));
480 }
481 }
482 ret = Some((Byte { start, end }, dst));
483 if start == x.0.start {
484 x.0.start = end;
485 }
486 if start == y.0.start {
487 y.0.start = end;
488 }
489 if x.0.is_empty() {
490 self.xs.next();
491 }
492 if y.0.is_empty() {
493 self.ys.next();
494 }
495 }
496 }
497 ret
498 }
499}