rustc_transmute/layout/
dfa.rs

1use std::fmt;
2use std::iter::Peekable;
3use std::sync::atomic::{AtomicU32, Ordering};
4
5use super::{Byte, Ref, Tree, Uninhabited};
6use crate::{Map, Set};
7
8#[derive(PartialEq)]
9#[cfg_attr(test, derive(Clone))]
10pub(crate) struct Dfa<R>
11where
12    R: Ref,
13{
14    pub(crate) transitions: Map<State, Transitions<R>>,
15    pub(crate) start: State,
16    pub(crate) accept: State,
17}
18
19#[derive(PartialEq, Clone, Debug)]
20pub(crate) struct Transitions<R>
21where
22    R: Ref,
23{
24    byte_transitions: EdgeSet<State>,
25    ref_transitions: Map<R, State>,
26}
27
28impl<R> Default for Transitions<R>
29where
30    R: Ref,
31{
32    fn default() -> Self {
33        Self { byte_transitions: EdgeSet::empty(), ref_transitions: Map::default() }
34    }
35}
36
37/// The states in a [`Dfa`] represent byte offsets.
38#[derive(Hash, Eq, PartialEq, PartialOrd, Ord, Copy, Clone)]
39pub(crate) struct State(pub(crate) u32);
40
41impl State {
42    pub(crate) fn new() -> Self {
43        static COUNTER: AtomicU32 = AtomicU32::new(0);
44        Self(COUNTER.fetch_add(1, Ordering::SeqCst))
45    }
46}
47
48impl fmt::Debug for State {
49    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
50        write!(f, "S_{}", self.0)
51    }
52}
53
54impl<R> Dfa<R>
55where
56    R: Ref,
57{
58    #[cfg(test)]
59    pub(crate) fn bool() -> Self {
60        Self::from_transitions(|accept| Transitions {
61            byte_transitions: EdgeSet::new(Byte::new(0x00..=0x01), accept),
62            ref_transitions: Map::default(),
63        })
64    }
65
66    pub(crate) fn unit() -> Self {
67        let transitions: Map<State, Transitions<R>> = Map::default();
68        let start = State::new();
69        let accept = start;
70
71        Self { transitions, start, accept }
72    }
73
74    pub(crate) fn from_byte(byte: Byte) -> Self {
75        Self::from_transitions(|accept| Transitions {
76            byte_transitions: EdgeSet::new(byte, accept),
77            ref_transitions: Map::default(),
78        })
79    }
80
81    pub(crate) fn from_ref(r: R) -> Self {
82        Self::from_transitions(|accept| Transitions {
83            byte_transitions: EdgeSet::empty(),
84            ref_transitions: [(r, accept)].into_iter().collect(),
85        })
86    }
87
88    fn from_transitions(f: impl FnOnce(State) -> Transitions<R>) -> Self {
89        let start = State::new();
90        let accept = State::new();
91
92        Self { transitions: [(start, f(accept))].into_iter().collect(), start, accept }
93    }
94
95    pub(crate) fn from_tree(tree: Tree<!, R>) -> Result<Self, Uninhabited> {
96        Ok(match tree {
97            Tree::Byte(b) => Self::from_byte(b),
98            Tree::Ref(r) => Self::from_ref(r),
99            Tree::Alt(alts) => {
100                // Convert and filter the inhabited alternatives.
101                let mut alts = alts.into_iter().map(Self::from_tree).filter_map(Result::ok);
102                // If there are no alternatives, return `Uninhabited`.
103                let dfa = alts.next().ok_or(Uninhabited)?;
104                // Combine the remaining alternatives with `dfa`.
105                alts.fold(dfa, |dfa, alt| dfa.union(alt, State::new))
106            }
107            Tree::Seq(elts) => {
108                let mut dfa = Self::unit();
109                for elt in elts.into_iter().map(Self::from_tree) {
110                    dfa = dfa.concat(elt?);
111                }
112                dfa
113            }
114        })
115    }
116
117    /// Concatenate two `Dfa`s.
118    pub(crate) fn concat(self, other: Self) -> Self {
119        if self.start == self.accept {
120            return other;
121        } else if other.start == other.accept {
122            return self;
123        }
124
125        let start = self.start;
126        let accept = other.accept;
127
128        let mut transitions: Map<State, Transitions<R>> = self.transitions;
129
130        for (source, transition) in other.transitions {
131            let fix_state = |state| if state == other.start { self.accept } else { state };
132            let byte_transitions = transition.byte_transitions.map_states(&fix_state);
133            let ref_transitions = transition
134                .ref_transitions
135                .into_iter()
136                .map(|(r, state)| (r, fix_state(state)))
137                .collect();
138
139            let old = transitions
140                .insert(fix_state(source), Transitions { byte_transitions, ref_transitions });
141            assert!(old.is_none());
142        }
143
144        Self { transitions, start, accept }
145    }
146
147    /// Compute the union of two `Dfa`s.
148    pub(crate) fn union(self, other: Self, mut new_state: impl FnMut() -> State) -> Self {
149        // We implement `union` by lazily initializing a set of states
150        // corresponding to the product of states in `self` and `other`, and
151        // then add transitions between these states that correspond to where
152        // they exist between `self` and `other`.
153
154        let a = self;
155        let b = other;
156
157        let accept = new_state();
158
159        let mut mapping: Map<(Option<State>, Option<State>), State> = Map::default();
160
161        let mut mapped = |(a_state, b_state)| {
162            if Some(a.accept) == a_state || Some(b.accept) == b_state {
163                // If either `a_state` or `b_state` are accepting, map to a
164                // common `accept` state.
165                accept
166            } else {
167                *mapping.entry((a_state, b_state)).or_insert_with(&mut new_state)
168            }
169        };
170
171        let start = mapped((Some(a.start), Some(b.start)));
172        let mut transitions: Map<State, Transitions<R>> = Map::default();
173        let empty_transitions = Transitions::default();
174
175        struct WorkQueue {
176            queue: Vec<(Option<State>, Option<State>)>,
177            // Track all entries ever enqueued to avoid duplicating work. This
178            // gives us a guarantee that a given (a_state, b_state) pair will
179            // only ever be visited once.
180            enqueued: Set<(Option<State>, Option<State>)>,
181        }
182        impl WorkQueue {
183            fn enqueue(&mut self, a_state: Option<State>, b_state: Option<State>) {
184                if self.enqueued.insert((a_state, b_state)) {
185                    self.queue.push((a_state, b_state));
186                }
187            }
188        }
189        let mut queue = WorkQueue { queue: Vec::new(), enqueued: Set::default() };
190        queue.enqueue(Some(a.start), Some(b.start));
191
192        while let Some((a_src, b_src)) = queue.queue.pop() {
193            let src = mapped((a_src, b_src));
194            if src == accept {
195                // While it's possible to have a DFA whose accept state has
196                // out-edges, these do not affect the semantics of the DFA, and
197                // so there's no point in processing them. Continuing here also
198                // has the advantage of guaranteeing that we only ever process a
199                // given node in the output DFA once. In particular, with the
200                // exception of the accept state, we ensure that we only push a
201                // given node to the `queue` once. This allows the following
202                // code to assume that we're processing a node we've never
203                // processed before, which means we never need to merge two edge
204                // sets - we only ever need to construct a new edge set from
205                // whole cloth.
206                continue;
207            }
208
209            let a_transitions =
210                a_src.and_then(|a_src| a.transitions.get(&a_src)).unwrap_or(&empty_transitions);
211            let b_transitions =
212                b_src.and_then(|b_src| b.transitions.get(&b_src)).unwrap_or(&empty_transitions);
213
214            let byte_transitions = a_transitions.byte_transitions.union(
215                &b_transitions.byte_transitions,
216                |a_dst, b_dst| {
217                    assert!(a_dst.is_some() || b_dst.is_some());
218
219                    queue.enqueue(a_dst, b_dst);
220                    mapped((a_dst, b_dst))
221                },
222            );
223
224            let ref_transitions =
225                a_transitions.ref_transitions.keys().chain(b_transitions.ref_transitions.keys());
226
227            let ref_transitions = ref_transitions
228                .map(|ref_transition| {
229                    let a_dst = a_transitions.ref_transitions.get(ref_transition).copied();
230                    let b_dst = b_transitions.ref_transitions.get(ref_transition).copied();
231
232                    assert!(a_dst.is_some() || b_dst.is_some());
233
234                    queue.enqueue(a_dst, b_dst);
235                    (*ref_transition, mapped((a_dst, b_dst)))
236                })
237                .collect();
238
239            let old = transitions.insert(src, Transitions { byte_transitions, ref_transitions });
240            // See `if src == accept { ... }` above. The comment there explains
241            // why this assert is valid.
242            assert_eq!(old, None);
243        }
244
245        Self { transitions, start, accept }
246    }
247
248    pub(crate) fn get_uninit_edge_dst(&self, state: State) -> Option<State> {
249        let transitions = self.transitions.get(&state)?;
250        transitions.byte_transitions.get_uninit_edge_dst()
251    }
252
253    pub(crate) fn bytes_from(&self, start: State) -> impl Iterator<Item = (Byte, State)> {
254        self.transitions
255            .get(&start)
256            .into_iter()
257            .flat_map(|transitions| transitions.byte_transitions.iter())
258    }
259
260    pub(crate) fn refs_from(&self, start: State) -> impl Iterator<Item = (R, State)> {
261        self.transitions
262            .get(&start)
263            .into_iter()
264            .flat_map(|transitions| transitions.ref_transitions.iter())
265            .map(|(r, s)| (*r, *s))
266    }
267
268    #[cfg(test)]
269    pub(crate) fn from_edges<B: Clone + Into<Byte>>(
270        start: u32,
271        accept: u32,
272        edges: &[(u32, B, u32)],
273    ) -> Self {
274        let start = State(start);
275        let accept = State(accept);
276        let mut transitions: Map<State, Vec<(Byte, State)>> = Map::default();
277
278        for &(src, ref edge, dst) in edges.iter() {
279            transitions.entry(State(src)).or_default().push((edge.clone().into(), State(dst)));
280        }
281
282        let transitions = transitions
283            .into_iter()
284            .map(|(src, edges)| {
285                (
286                    src,
287                    Transitions {
288                        byte_transitions: EdgeSet::from_edges(edges),
289                        ref_transitions: Map::default(),
290                    },
291                )
292            })
293            .collect();
294
295        Self { start, accept, transitions }
296    }
297}
298
299/// Serialize the DFA using the Graphviz DOT format.
300impl<R> fmt::Debug for Dfa<R>
301where
302    R: Ref,
303{
304    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
305        writeln!(f, "digraph {{")?;
306        writeln!(f, "    {:?} [shape = doublecircle]", self.start)?;
307        writeln!(f, "    {:?} [shape = doublecircle]", self.accept)?;
308
309        for (src, transitions) in self.transitions.iter() {
310            for (t, dst) in transitions.byte_transitions.iter() {
311                writeln!(f, "    {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
312            }
313
314            for (t, dst) in transitions.ref_transitions.iter() {
315                writeln!(f, "    {src:?} -> {dst:?} [label=\"{t:?}\"]")?;
316            }
317        }
318
319        writeln!(f, "}}")
320    }
321}
322
323use edge_set::EdgeSet;
324mod edge_set {
325    use smallvec::SmallVec;
326
327    use super::*;
328
329    /// The set of outbound byte edges associated with a DFA node.
330    #[derive(Eq, PartialEq, Clone, Debug)]
331    pub(super) struct EdgeSet<S = State> {
332        // A sequence of byte edges with contiguous byte values and a common
333        // destination is stored as a single run.
334        //
335        // Runs are non-empty, non-overlapping, and stored in ascending order.
336        runs: SmallVec<[(Byte, S); 1]>,
337    }
338
339    impl<S> EdgeSet<S> {
340        pub(crate) fn new(range: Byte, dst: S) -> Self {
341            let mut this = Self { runs: SmallVec::new() };
342            if !range.is_empty() {
343                this.runs.push((range, dst));
344            }
345            this
346        }
347
348        pub(crate) fn empty() -> Self {
349            Self { runs: SmallVec::new() }
350        }
351
352        #[cfg(test)]
353        pub(crate) fn from_edges(mut edges: Vec<(Byte, S)>) -> Self
354        where
355            S: Ord,
356        {
357            edges.sort();
358            Self { runs: edges.into() }
359        }
360
361        pub(crate) fn iter(&self) -> impl Iterator<Item = (Byte, S)>
362        where
363            S: Copy,
364        {
365            self.runs.iter().copied()
366        }
367
368        pub(crate) fn get_uninit_edge_dst(&self) -> Option<S>
369        where
370            S: Copy,
371        {
372            // Uninit is ordered last.
373            let &(range, dst) = self.runs.last()?;
374            if range.contains_uninit() { Some(dst) } else { None }
375        }
376
377        pub(crate) fn map_states<SS>(self, mut f: impl FnMut(S) -> SS) -> EdgeSet<SS> {
378            EdgeSet {
379                // NOTE: It appears as through `<Vec<_> as
380                // IntoIterator>::IntoIter` and `std::iter::Map` both implement
381                // `TrustedLen`, which in turn means that this `.collect()`
382                // allocates the correct number of elements once up-front [1].
383                //
384                // [1] https://doc.rust-lang.org/1.85.0/src/alloc/vec/spec_from_iter_nested.rs.html#47
385                runs: self.runs.into_iter().map(|(b, s)| (b, f(s))).collect(),
386            }
387        }
388
389        /// Unions two edge sets together.
390        ///
391        /// If `u = a.union(b)`, then for each byte value, `u` will have an edge
392        /// with that byte value and with the destination `join(Some(_), None)`,
393        /// `join(None, Some(_))`, or `join(Some(_), Some(_))` depending on whether `a`,
394        /// `b`, or both have an edge with that byte value.
395        ///
396        /// If neither `a` nor `b` have an edge with a particular byte value,
397        /// then no edge with that value will be present in `u`.
398        pub(crate) fn union(
399            &self,
400            other: &Self,
401            mut join: impl FnMut(Option<S>, Option<S>) -> S,
402        ) -> EdgeSet<S>
403        where
404            S: Copy + Eq,
405        {
406            let mut runs: SmallVec<[(Byte, S); 1]> = SmallVec::new();
407            let xs = self.runs.iter().copied();
408            let ys = other.runs.iter().copied();
409            for (range, (x, y)) in union(xs, ys) {
410                let state = join(x, y);
411                match runs.last_mut() {
412                    // Merge contiguous runs with a common destination.
413                    Some(&mut (ref mut last_range, ref mut last_state))
414                        if last_range.end == range.start && *last_state == state =>
415                    {
416                        last_range.end = range.end
417                    }
418                    _ => runs.push((range, state)),
419                }
420            }
421            EdgeSet { runs }
422        }
423    }
424}
425
426/// Merges two sorted sequences into one sorted sequence.
427pub(crate) fn union<S: Copy, X: Iterator<Item = (Byte, S)>, Y: Iterator<Item = (Byte, S)>>(
428    xs: X,
429    ys: Y,
430) -> UnionIter<X, Y> {
431    UnionIter { xs: xs.peekable(), ys: ys.peekable() }
432}
433
434pub(crate) struct UnionIter<X: Iterator, Y: Iterator> {
435    xs: Peekable<X>,
436    ys: Peekable<Y>,
437}
438
439// FIXME(jswrenn) we'd likely benefit from specializing try_fold here.
440impl<S: Copy, X: Iterator<Item = (Byte, S)>, Y: Iterator<Item = (Byte, S)>> Iterator
441    for UnionIter<X, Y>
442{
443    type Item = (Byte, (Option<S>, Option<S>));
444
445    fn next(&mut self) -> Option<Self::Item> {
446        use std::cmp::{self, Ordering};
447
448        let ret;
449        match (self.xs.peek_mut(), self.ys.peek_mut()) {
450            (None, None) => {
451                ret = None;
452            }
453            (Some(x), None) => {
454                ret = Some((x.0, (Some(x.1), None)));
455                self.xs.next();
456            }
457            (None, Some(y)) => {
458                ret = Some((y.0, (None, Some(y.1))));
459                self.ys.next();
460            }
461            (Some(x), Some(y)) => {
462                let start;
463                let end;
464                let dst;
465                match x.0.start.cmp(&y.0.start) {
466                    Ordering::Less => {
467                        start = x.0.start;
468                        end = cmp::min(x.0.end, y.0.start);
469                        dst = (Some(x.1), None);
470                    }
471                    Ordering::Greater => {
472                        start = y.0.start;
473                        end = cmp::min(x.0.start, y.0.end);
474                        dst = (None, Some(y.1));
475                    }
476                    Ordering::Equal => {
477                        start = x.0.start;
478                        end = cmp::min(x.0.end, y.0.end);
479                        dst = (Some(x.1), Some(y.1));
480                    }
481                }
482                ret = Some((Byte { start, end }, dst));
483                if start == x.0.start {
484                    x.0.start = end;
485                }
486                if start == y.0.start {
487                    y.0.start = end;
488                }
489                if x.0.is_empty() {
490                    self.xs.next();
491                }
492                if y.0.is_empty() {
493                    self.ys.next();
494                }
495            }
496        }
497        ret
498    }
499}