rustc_span/
span_encoding.rs

1use rustc_data_structures::fx::FxIndexSet;
2// This code is very hot and uses lots of arithmetic, avoid overflow checks for performance.
3// See https://github.com/rust-lang/rust/pull/119440#issuecomment-1874255727
4use rustc_serialize::int_overflow::DebugStrictAdd;
5
6use crate::def_id::{DefIndex, LocalDefId};
7use crate::hygiene::SyntaxContext;
8use crate::{BytePos, SPAN_TRACK, SpanData};
9
10/// A compressed span.
11///
12/// [`SpanData`] is 16 bytes, which is too big to stick everywhere. `Span` only
13/// takes up 8 bytes, with less space for the length, parent and context. The
14/// vast majority (99.9%+) of `SpanData` instances can be made to fit within
15/// those 8 bytes. Any `SpanData` whose fields don't fit into a `Span` are
16/// stored in a separate interner table, and the `Span` will index into that
17/// table. Interning is rare enough that the cost is low, but common enough
18/// that the code is exercised regularly.
19///
20/// An earlier version of this code used only 4 bytes for `Span`, but that was
21/// slower because only 80--90% of spans could be stored inline (even less in
22/// very large crates) and so the interner was used a lot more. That version of
23/// the code also predated the storage of parents.
24///
25/// There are four different span forms.
26///
27/// Inline-context format (requires non-huge length, non-huge context, and no parent):
28/// - `span.lo_or_index == span_data.lo`
29/// - `span.len_with_tag_or_marker == len == span_data.hi - span_data.lo` (must be `<= MAX_LEN`)
30/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
31///
32/// Inline-parent format (requires non-huge length, root context, and non-huge parent):
33/// - `span.lo_or_index == span_data.lo`
34/// - `span.len_with_tag_or_marker & !PARENT_TAG == len == span_data.hi - span_data.lo`
35///   (must be `<= MAX_LEN`)
36/// - `span.len_with_tag_or_marker` has top bit (`PARENT_TAG`) set
37/// - `span.ctxt_or_parent_or_marker == span_data.parent` (must be `<= MAX_CTXT`)
38///
39/// Partially-interned format (requires non-huge context):
40/// - `span.lo_or_index == index` (indexes into the interner table)
41/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
42/// - `span.ctxt_or_parent_or_marker == span_data.ctxt` (must be `<= MAX_CTXT`)
43///
44/// Fully-interned format (all cases not covered above):
45/// - `span.lo_or_index == index` (indexes into the interner table)
46/// - `span.len_with_tag_or_marker == BASE_LEN_INTERNED_MARKER`
47/// - `span.ctxt_or_parent_or_marker == CTXT_INTERNED_MARKER`
48///
49/// The partially-interned form requires looking in the interning table for
50/// lo and length, but the context is stored inline as well as interned.
51/// This is useful because context lookups are often done in isolation, and
52/// inline lookups are quicker.
53///
54/// Notes about the choice of field sizes:
55/// - `lo` is 32 bits in both `Span` and `SpanData`, which means that `lo`
56///   values never cause interning. The number of bits needed for `lo`
57///   depends on the crate size. 32 bits allows up to 4 GiB of code in a crate.
58///   Having no compression on this field means there is no performance cliff
59///   if a crate exceeds a particular size.
60/// - `len` is ~15 bits in `Span` (a u16, minus 1 bit for PARENT_TAG) and 32
61///   bits in `SpanData`, which means that large `len` values will cause
62///   interning. The number of bits needed for `len` does not depend on the
63///   crate size. The most common numbers of bits for `len` are from 0 to 7,
64///   with a peak usually at 3 or 4, and then it drops off quickly from 8
65///   onwards. 15 bits is enough for 99.99%+ of cases, but larger values
66///   (sometimes 20+ bits) might occur dozens of times in a typical crate.
67/// - `ctxt_or_parent_or_marker` is 16 bits in `Span` and two 32 bit fields in
68///   `SpanData`, which means intering will happen if `ctxt` is large, if
69///   `parent` is large, or if both values are non-zero. The number of bits
70///   needed for `ctxt` values depend partly on the crate size and partly on
71///   the form of the code. No crates in `rustc-perf` need more than 15 bits
72///   for `ctxt_or_parent_or_marker`, but larger crates might need more than 16
73///   bits. The number of bits needed for `parent` hasn't been measured,
74///   because `parent` isn't currently used by default.
75///
76/// In order to reliably use parented spans in incremental compilation,
77/// accesses to `lo` and `hi` must introduce a dependency to the parent definition's span.
78/// This is performed using the callback `SPAN_TRACK` to access the query engine.
79#[derive(Clone, Copy, Eq, PartialEq, Hash)]
80#[rustc_pass_by_value]
81pub struct Span {
82    lo_or_index: u32,
83    len_with_tag_or_marker: u16,
84    ctxt_or_parent_or_marker: u16,
85}
86
87// Convenience structures for all span formats.
88#[derive(Clone, Copy)]
89struct InlineCtxt {
90    lo: u32,
91    len: u16,
92    ctxt: u16,
93}
94
95#[derive(Clone, Copy)]
96struct InlineParent {
97    lo: u32,
98    len_with_tag: u16,
99    parent: u16,
100}
101
102#[derive(Clone, Copy)]
103struct PartiallyInterned {
104    index: u32,
105    ctxt: u16,
106}
107
108#[derive(Clone, Copy)]
109struct Interned {
110    index: u32,
111}
112
113impl InlineCtxt {
114    #[inline]
115    fn data(self) -> SpanData {
116        let len = self.len as u32;
117        debug_assert!(len <= MAX_LEN);
118        SpanData {
119            lo: BytePos(self.lo),
120            hi: BytePos(self.lo.debug_strict_add(len)),
121            ctxt: SyntaxContext::from_u16(self.ctxt),
122            parent: None,
123        }
124    }
125    #[inline]
126    fn span(lo: u32, len: u16, ctxt: u16) -> Span {
127        Span { lo_or_index: lo, len_with_tag_or_marker: len, ctxt_or_parent_or_marker: ctxt }
128    }
129    #[inline]
130    fn from_span(span: Span) -> InlineCtxt {
131        let (lo, len, ctxt) =
132            (span.lo_or_index, span.len_with_tag_or_marker, span.ctxt_or_parent_or_marker);
133        InlineCtxt { lo, len, ctxt }
134    }
135}
136
137impl InlineParent {
138    #[inline]
139    fn data(self) -> SpanData {
140        let len = (self.len_with_tag & !PARENT_TAG) as u32;
141        debug_assert!(len <= MAX_LEN);
142        SpanData {
143            lo: BytePos(self.lo),
144            hi: BytePos(self.lo.debug_strict_add(len)),
145            ctxt: SyntaxContext::root(),
146            parent: Some(LocalDefId { local_def_index: DefIndex::from_u16(self.parent) }),
147        }
148    }
149    #[inline]
150    fn span(lo: u32, len: u16, parent: u16) -> Span {
151        let (lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker) =
152            (lo, PARENT_TAG | len, parent);
153        Span { lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker }
154    }
155    #[inline]
156    fn from_span(span: Span) -> InlineParent {
157        let (lo, len_with_tag, parent) =
158            (span.lo_or_index, span.len_with_tag_or_marker, span.ctxt_or_parent_or_marker);
159        InlineParent { lo, len_with_tag, parent }
160    }
161}
162
163impl PartiallyInterned {
164    #[inline]
165    fn data(self) -> SpanData {
166        SpanData {
167            ctxt: SyntaxContext::from_u16(self.ctxt),
168            ..with_span_interner(|interner| interner.spans[self.index as usize])
169        }
170    }
171    #[inline]
172    fn span(index: u32, ctxt: u16) -> Span {
173        let (lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker) =
174            (index, BASE_LEN_INTERNED_MARKER, ctxt);
175        Span { lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker }
176    }
177    #[inline]
178    fn from_span(span: Span) -> PartiallyInterned {
179        PartiallyInterned { index: span.lo_or_index, ctxt: span.ctxt_or_parent_or_marker }
180    }
181}
182
183impl Interned {
184    #[inline]
185    fn data(self) -> SpanData {
186        with_span_interner(|interner| interner.spans[self.index as usize])
187    }
188    #[inline]
189    fn span(index: u32) -> Span {
190        let (lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker) =
191            (index, BASE_LEN_INTERNED_MARKER, CTXT_INTERNED_MARKER);
192        Span { lo_or_index, len_with_tag_or_marker, ctxt_or_parent_or_marker }
193    }
194    #[inline]
195    fn from_span(span: Span) -> Interned {
196        Interned { index: span.lo_or_index }
197    }
198}
199
200// This code is very hot, and converting span to an enum and matching on it doesn't optimize away
201// properly. So we are using a macro emulating such a match, but expand it directly to an if-else
202// chain.
203macro_rules! match_span_kind {
204    (
205        $span:expr,
206        InlineCtxt($span1:ident) => $arm1:expr,
207        InlineParent($span2:ident) => $arm2:expr,
208        PartiallyInterned($span3:ident) => $arm3:expr,
209        Interned($span4:ident) => $arm4:expr,
210    ) => {
211        if $span.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
212            if $span.len_with_tag_or_marker & PARENT_TAG == 0 {
213                // Inline-context format.
214                let $span1 = InlineCtxt::from_span($span);
215                $arm1
216            } else {
217                // Inline-parent format.
218                let $span2 = InlineParent::from_span($span);
219                $arm2
220            }
221        } else if $span.ctxt_or_parent_or_marker != CTXT_INTERNED_MARKER {
222            // Partially-interned format.
223            let $span3 = PartiallyInterned::from_span($span);
224            $arm3
225        } else {
226            // Interned format.
227            let $span4 = Interned::from_span($span);
228            $arm4
229        }
230    };
231}
232
233// `MAX_LEN` is chosen so that `PARENT_TAG | MAX_LEN` is distinct from
234// `BASE_LEN_INTERNED_MARKER`. (If `MAX_LEN` was 1 higher, this wouldn't be true.)
235const MAX_LEN: u32 = 0b0111_1111_1111_1110;
236const MAX_CTXT: u32 = 0b0111_1111_1111_1110;
237const PARENT_TAG: u16 = 0b1000_0000_0000_0000;
238const BASE_LEN_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111;
239const CTXT_INTERNED_MARKER: u16 = 0b1111_1111_1111_1111;
240
241/// The dummy span has zero position, length, and context, and no parent.
242pub const DUMMY_SP: Span =
243    Span { lo_or_index: 0, len_with_tag_or_marker: 0, ctxt_or_parent_or_marker: 0 };
244
245impl Span {
246    #[inline]
247    pub fn new(
248        mut lo: BytePos,
249        mut hi: BytePos,
250        ctxt: SyntaxContext,
251        parent: Option<LocalDefId>,
252    ) -> Self {
253        if lo > hi {
254            std::mem::swap(&mut lo, &mut hi);
255        }
256
257        // Small len and ctxt may enable one of fully inline formats (or may not).
258        let (len, ctxt32) = (hi.0 - lo.0, ctxt.as_u32());
259        if len <= MAX_LEN && ctxt32 <= MAX_CTXT {
260            match parent {
261                None => return InlineCtxt::span(lo.0, len as u16, ctxt32 as u16),
262                Some(parent) => {
263                    let parent32 = parent.local_def_index.as_u32();
264                    if ctxt32 == 0 && parent32 <= MAX_CTXT {
265                        return InlineParent::span(lo.0, len as u16, parent32 as u16);
266                    }
267                }
268            }
269        }
270
271        // Otherwise small ctxt may enable the partially inline format.
272        let index = |ctxt| {
273            with_span_interner(|interner| interner.intern(&SpanData { lo, hi, ctxt, parent }))
274        };
275        if ctxt32 <= MAX_CTXT {
276            // Interned ctxt should never be read, so it can use any value.
277            PartiallyInterned::span(index(SyntaxContext::from_u32(u32::MAX)), ctxt32 as u16)
278        } else {
279            Interned::span(index(ctxt))
280        }
281    }
282
283    #[inline]
284    pub fn data(self) -> SpanData {
285        let data = self.data_untracked();
286        if let Some(parent) = data.parent {
287            (*SPAN_TRACK)(parent);
288        }
289        data
290    }
291
292    /// Internal function to translate between an encoded span and the expanded representation.
293    /// This function must not be used outside the incremental engine.
294    #[inline]
295    pub fn data_untracked(self) -> SpanData {
296        match_span_kind! {
297            self,
298            InlineCtxt(span) => span.data(),
299            InlineParent(span) => span.data(),
300            PartiallyInterned(span) => span.data(),
301            Interned(span) => span.data(),
302        }
303    }
304
305    /// Returns `true` if this span comes from any kind of macro, desugaring or inlining.
306    #[inline]
307    pub fn from_expansion(self) -> bool {
308        let ctxt = match_span_kind! {
309            self,
310            // All branches here, except `InlineParent`, actually return `span.ctxt_or_parent_or_marker`.
311            // Since `Interned` is selected if the field contains `CTXT_INTERNED_MARKER` returning that value
312            // as the context allows the compiler to optimize out the branch that selects between either
313            // `Interned` and `PartiallyInterned`.
314            //
315            // Interned contexts can never be the root context and `CTXT_INTERNED_MARKER` has a different value
316            // than the root context so this works for checking is this is an expansion.
317            InlineCtxt(span) => SyntaxContext::from_u16(span.ctxt),
318            InlineParent(_span) => SyntaxContext::root(),
319            PartiallyInterned(span) => SyntaxContext::from_u16(span.ctxt),
320            Interned(_span) => SyntaxContext::from_u16(CTXT_INTERNED_MARKER),
321        };
322        !ctxt.is_root()
323    }
324
325    /// Returns `true` if this is a dummy span with any hygienic context.
326    #[inline]
327    pub fn is_dummy(self) -> bool {
328        if self.len_with_tag_or_marker != BASE_LEN_INTERNED_MARKER {
329            // Inline-context or inline-parent format.
330            let lo = self.lo_or_index;
331            let len = (self.len_with_tag_or_marker & !PARENT_TAG) as u32;
332            debug_assert!(len <= MAX_LEN);
333            lo == 0 && len == 0
334        } else {
335            // Fully-interned or partially-interned format.
336            let index = self.lo_or_index;
337            let data = with_span_interner(|interner| interner.spans[index as usize]);
338            data.lo == BytePos(0) && data.hi == BytePos(0)
339        }
340    }
341
342    #[inline]
343    pub fn map_ctxt(self, map: impl FnOnce(SyntaxContext) -> SyntaxContext) -> Span {
344        let data = match_span_kind! {
345            self,
346            InlineCtxt(span) => {
347                // This format occurs 1-2 orders of magnitude more often than others (#125017),
348                // so it makes sense to micro-optimize it to avoid `span.data()` and `Span::new()`.
349                let new_ctxt = map(SyntaxContext::from_u16(span.ctxt));
350                let new_ctxt32 = new_ctxt.as_u32();
351                return if new_ctxt32 <= MAX_CTXT {
352                    // Any small new context including zero will preserve the format.
353                    InlineCtxt::span(span.lo, span.len, new_ctxt32 as u16)
354                } else {
355                    span.data().with_ctxt(new_ctxt)
356                };
357            },
358            InlineParent(span) => span.data(),
359            PartiallyInterned(span) => span.data(),
360            Interned(span) => span.data(),
361        };
362
363        data.with_ctxt(map(data.ctxt))
364    }
365
366    // Returns either syntactic context, if it can be retrieved without taking the interner lock,
367    // or an index into the interner if it cannot.
368    #[inline]
369    fn inline_ctxt(self) -> Result<SyntaxContext, usize> {
370        match_span_kind! {
371            self,
372            InlineCtxt(span) => Ok(SyntaxContext::from_u16(span.ctxt)),
373            InlineParent(_span) => Ok(SyntaxContext::root()),
374            PartiallyInterned(span) => Ok(SyntaxContext::from_u16(span.ctxt)),
375            Interned(span) => Err(span.index as usize),
376        }
377    }
378
379    /// This function is used as a fast path when decoding the full `SpanData` is not necessary.
380    /// It's a cut-down version of `data_untracked`.
381    #[cfg_attr(not(test), rustc_diagnostic_item = "SpanCtxt")]
382    #[inline]
383    pub fn ctxt(self) -> SyntaxContext {
384        self.inline_ctxt()
385            .unwrap_or_else(|index| with_span_interner(|interner| interner.spans[index].ctxt))
386    }
387
388    #[inline]
389    pub fn eq_ctxt(self, other: Span) -> bool {
390        match (self.inline_ctxt(), other.inline_ctxt()) {
391            (Ok(ctxt1), Ok(ctxt2)) => ctxt1 == ctxt2,
392            // If `inline_ctxt` returns `Ok` the context is <= MAX_CTXT.
393            // If it returns `Err` the span is fully interned and the context is > MAX_CTXT.
394            // As these do not overlap an `Ok` and `Err` result cannot have an equal context.
395            (Ok(_), Err(_)) | (Err(_), Ok(_)) => false,
396            (Err(index1), Err(index2)) => with_span_interner(|interner| {
397                interner.spans[index1].ctxt == interner.spans[index2].ctxt
398            }),
399        }
400    }
401
402    #[inline]
403    pub fn with_parent(self, parent: Option<LocalDefId>) -> Span {
404        let data = match_span_kind! {
405            self,
406            InlineCtxt(span) => {
407                // This format occurs 1-2 orders of magnitude more often than others (#126544),
408                // so it makes sense to micro-optimize it to avoid `span.data()` and `Span::new()`.
409                // Copypaste from `Span::new`, the small len & ctxt conditions are known to hold.
410                match parent {
411                    None => return self,
412                    Some(parent) => {
413                        let parent32 = parent.local_def_index.as_u32();
414                        if span.ctxt == 0 && parent32 <= MAX_CTXT {
415                            return InlineParent::span(span.lo, span.len, parent32 as u16);
416                        }
417                    }
418                }
419                span.data()
420            },
421            InlineParent(span) => span.data(),
422            PartiallyInterned(span) => span.data(),
423            Interned(span) => span.data(),
424        };
425
426        if let Some(old_parent) = data.parent {
427            (*SPAN_TRACK)(old_parent);
428        }
429        data.with_parent(parent)
430    }
431
432    #[inline]
433    pub fn parent(self) -> Option<LocalDefId> {
434        let interned_parent =
435            |index: u32| with_span_interner(|interner| interner.spans[index as usize].parent);
436        match_span_kind! {
437            self,
438            InlineCtxt(_span) => None,
439            InlineParent(span) => Some(LocalDefId { local_def_index: DefIndex::from_u16(span.parent) }),
440            PartiallyInterned(span) => interned_parent(span.index),
441            Interned(span) => interned_parent(span.index),
442        }
443    }
444}
445
446#[derive(Default)]
447pub(crate) struct SpanInterner {
448    spans: FxIndexSet<SpanData>,
449}
450
451impl SpanInterner {
452    fn intern(&mut self, span_data: &SpanData) -> u32 {
453        let (index, _) = self.spans.insert_full(*span_data);
454        index as u32
455    }
456}
457
458// If an interner exists, return it. Otherwise, prepare a fresh one.
459#[inline]
460fn with_span_interner<T, F: FnOnce(&mut SpanInterner) -> T>(f: F) -> T {
461    crate::with_session_globals(|session_globals| f(&mut session_globals.span_interner.lock()))
462}