rustc_ast/
tokenstream.rs

1//! # Token Streams
2//!
3//! `TokenStream`s represent syntactic objects before they are converted into ASTs.
4//! A `TokenStream` is, roughly speaking, a sequence of [`TokenTree`]s,
5//! which are themselves a single [`Token`] or a `Delimited` subsequence of tokens.
6
7use std::borrow::Cow;
8use std::ops::Range;
9use std::sync::Arc;
10use std::{cmp, fmt, iter, mem};
11
12use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
13use rustc_data_structures::sync;
14use rustc_macros::{Decodable, Encodable, HashStable_Generic, Walkable};
15use rustc_serialize::{Decodable, Encodable};
16use rustc_span::{DUMMY_SP, Span, SpanDecoder, SpanEncoder, Symbol, sym};
17use thin_vec::ThinVec;
18
19use crate::ast::AttrStyle;
20use crate::ast_traits::{HasAttrs, HasTokens};
21use crate::token::{self, Delimiter, Token, TokenKind};
22use crate::{AttrVec, Attribute};
23
24/// Part of a `TokenStream`.
25#[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
26pub enum TokenTree {
27    /// A single token. Should never be `OpenDelim` or `CloseDelim`, because
28    /// delimiters are implicitly represented by `Delimited`.
29    Token(Token, Spacing),
30    /// A delimited sequence of token trees.
31    Delimited(DelimSpan, DelimSpacing, Delimiter, TokenStream),
32}
33
34// Ensure all fields of `TokenTree` are `DynSend` and `DynSync`.
35fn _dummy()
36where
37    Token: sync::DynSend + sync::DynSync,
38    Spacing: sync::DynSend + sync::DynSync,
39    DelimSpan: sync::DynSend + sync::DynSync,
40    Delimiter: sync::DynSend + sync::DynSync,
41    TokenStream: sync::DynSend + sync::DynSync,
42{
43}
44
45impl TokenTree {
46    /// Checks if this `TokenTree` is equal to the other, regardless of span/spacing information.
47    pub fn eq_unspanned(&self, other: &TokenTree) -> bool {
48        match (self, other) {
49            (TokenTree::Token(token, _), TokenTree::Token(token2, _)) => token.kind == token2.kind,
50            (TokenTree::Delimited(.., delim, tts), TokenTree::Delimited(.., delim2, tts2)) => {
51                delim == delim2 && tts.iter().eq_by(tts2.iter(), |a, b| a.eq_unspanned(b))
52            }
53            _ => false,
54        }
55    }
56
57    /// Retrieves the `TokenTree`'s span.
58    pub fn span(&self) -> Span {
59        match self {
60            TokenTree::Token(token, _) => token.span,
61            TokenTree::Delimited(sp, ..) => sp.entire(),
62        }
63    }
64
65    /// Create a `TokenTree::Token` with alone spacing.
66    pub fn token_alone(kind: TokenKind, span: Span) -> TokenTree {
67        TokenTree::Token(Token::new(kind, span), Spacing::Alone)
68    }
69
70    /// Create a `TokenTree::Token` with joint spacing.
71    pub fn token_joint(kind: TokenKind, span: Span) -> TokenTree {
72        TokenTree::Token(Token::new(kind, span), Spacing::Joint)
73    }
74
75    /// Create a `TokenTree::Token` with joint-hidden spacing.
76    pub fn token_joint_hidden(kind: TokenKind, span: Span) -> TokenTree {
77        TokenTree::Token(Token::new(kind, span), Spacing::JointHidden)
78    }
79
80    pub fn uninterpolate(&self) -> Cow<'_, TokenTree> {
81        match self {
82            TokenTree::Token(token, spacing) => match token.uninterpolate() {
83                Cow::Owned(token) => Cow::Owned(TokenTree::Token(token, *spacing)),
84                Cow::Borrowed(_) => Cow::Borrowed(self),
85            },
86            _ => Cow::Borrowed(self),
87        }
88    }
89}
90
91/// A lazy version of [`AttrTokenStream`], which defers creation of an actual
92/// `AttrTokenStream` until it is needed.
93#[derive(Clone)]
94pub struct LazyAttrTokenStream(Arc<LazyAttrTokenStreamInner>);
95
96impl LazyAttrTokenStream {
97    pub fn new_direct(stream: AttrTokenStream) -> LazyAttrTokenStream {
98        LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Direct(stream)))
99    }
100
101    pub fn new_pending(
102        start_token: (Token, Spacing),
103        cursor_snapshot: TokenCursor,
104        num_calls: u32,
105        break_last_token: u32,
106        node_replacements: ThinVec<NodeReplacement>,
107    ) -> LazyAttrTokenStream {
108        LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Pending {
109            start_token,
110            cursor_snapshot,
111            num_calls,
112            break_last_token,
113            node_replacements,
114        }))
115    }
116
117    pub fn to_attr_token_stream(&self) -> AttrTokenStream {
118        self.0.to_attr_token_stream()
119    }
120}
121
122impl fmt::Debug for LazyAttrTokenStream {
123    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
124        write!(f, "LazyAttrTokenStream({:?})", self.to_attr_token_stream())
125    }
126}
127
128impl<S: SpanEncoder> Encodable<S> for LazyAttrTokenStream {
129    fn encode(&self, _s: &mut S) {
130        panic!("Attempted to encode LazyAttrTokenStream");
131    }
132}
133
134impl<D: SpanDecoder> Decodable<D> for LazyAttrTokenStream {
135    fn decode(_d: &mut D) -> Self {
136        panic!("Attempted to decode LazyAttrTokenStream");
137    }
138}
139
140impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
141    fn hash_stable(&self, _hcx: &mut CTX, _hasher: &mut StableHasher) {
142        panic!("Attempted to compute stable hash for LazyAttrTokenStream");
143    }
144}
145
146/// A token range within a `Parser`'s full token stream.
147#[derive(Clone, Debug)]
148pub struct ParserRange(pub Range<u32>);
149
150/// A token range within an individual AST node's (lazy) token stream, i.e.
151/// relative to that node's first token. Distinct from `ParserRange` so the two
152/// kinds of range can't be mixed up.
153#[derive(Clone, Debug)]
154pub struct NodeRange(pub Range<u32>);
155
156/// Indicates a range of tokens that should be replaced by an `AttrsTarget`
157/// (replacement) or be replaced by nothing (deletion). This is used in two
158/// places during token collection.
159///
160/// 1. Replacement. During the parsing of an AST node that may have a
161///    `#[derive]` attribute, when we parse a nested AST node that has `#[cfg]`
162///    or `#[cfg_attr]`, we replace the entire inner AST node with
163///    `FlatToken::AttrsTarget`. This lets us perform eager cfg-expansion on an
164///    `AttrTokenStream`.
165///
166/// 2. Deletion. We delete inner attributes from all collected token streams,
167///    and instead track them through the `attrs` field on the AST node. This
168///    lets us manipulate them similarly to outer attributes. When we create a
169///    `TokenStream`, the inner attributes are inserted into the proper place
170///    in the token stream.
171///
172/// Each replacement starts off in `ParserReplacement` form but is converted to
173/// `NodeReplacement` form when it is attached to a single AST node, via
174/// `LazyAttrTokenStreamImpl`.
175pub type ParserReplacement = (ParserRange, Option<AttrsTarget>);
176
177/// See the comment on `ParserReplacement`.
178pub type NodeReplacement = (NodeRange, Option<AttrsTarget>);
179
180impl NodeRange {
181    // Converts a range within a parser's tokens to a range within a
182    // node's tokens beginning at `start_pos`.
183    //
184    // For example, imagine a parser with 50 tokens in its token stream, a
185    // function that spans `ParserRange(20..40)` and an inner attribute within
186    // that function that spans `ParserRange(30..35)`. We would find the inner
187    // attribute's range within the function's tokens by subtracting 20, which
188    // is the position of the function's start token. This gives
189    // `NodeRange(10..15)`.
190    pub fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange {
191        assert!(!parser_range.is_empty());
192        assert!(parser_range.start >= start_pos);
193        NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos))
194    }
195}
196
197enum LazyAttrTokenStreamInner {
198    // The token stream has already been produced.
199    Direct(AttrTokenStream),
200
201    // From a value of this type we can reconstruct the `TokenStream` seen by
202    // the `f` callback passed to a call to `Parser::collect_tokens`, by
203    // replaying the getting of the tokens. This saves us producing a
204    // `TokenStream` if it is never needed, e.g. a captured `macro_rules!`
205    // argument that is never passed to a proc macro. In practice, token stream
206    // creation happens rarely compared to calls to `collect_tokens` (see some
207    // statistics in #78736) so we are doing as little up-front work as
208    // possible.
209    //
210    // This also makes `Parser` very cheap to clone, since there is no
211    // intermediate collection buffer to clone.
212    Pending {
213        start_token: (Token, Spacing),
214        cursor_snapshot: TokenCursor,
215        num_calls: u32,
216        break_last_token: u32,
217        node_replacements: ThinVec<NodeReplacement>,
218    },
219}
220
221impl LazyAttrTokenStreamInner {
222    fn to_attr_token_stream(&self) -> AttrTokenStream {
223        match self {
224            LazyAttrTokenStreamInner::Direct(stream) => stream.clone(),
225            LazyAttrTokenStreamInner::Pending {
226                start_token,
227                cursor_snapshot,
228                num_calls,
229                break_last_token,
230                node_replacements,
231            } => {
232                // The token produced by the final call to `{,inlined_}next` was not
233                // actually consumed by the callback. The combination of chaining the
234                // initial token and using `take` produces the desired result - we
235                // produce an empty `TokenStream` if no calls were made, and omit the
236                // final token otherwise.
237                let mut cursor_snapshot = cursor_snapshot.clone();
238                let tokens = iter::once(FlatToken::Token(*start_token))
239                    .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next())))
240                    .take(*num_calls as usize);
241
242                if node_replacements.is_empty() {
243                    make_attr_token_stream(tokens, *break_last_token)
244                } else {
245                    let mut tokens: Vec<_> = tokens.collect();
246                    let mut node_replacements = node_replacements.to_vec();
247                    node_replacements.sort_by_key(|(range, _)| range.0.start);
248
249                    #[cfg(debug_assertions)]
250                    for [(node_range, tokens), (next_node_range, next_tokens)] in
251                        node_replacements.array_windows()
252                    {
253                        assert!(
254                            node_range.0.end <= next_node_range.0.start
255                                || node_range.0.end >= next_node_range.0.end,
256                            "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
257                            node_range,
258                            tokens,
259                            next_node_range,
260                            next_tokens,
261                        );
262                    }
263
264                    // Process the replace ranges, starting from the highest start
265                    // position and working our way back. If have tokens like:
266                    //
267                    // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
268                    //
269                    // Then we will generate replace ranges for both
270                    // the `#[cfg(FALSE)] field: bool` and the entire
271                    // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
272                    //
273                    // By starting processing from the replace range with the greatest
274                    // start position, we ensure that any (outer) replace range which
275                    // encloses another (inner) replace range will fully overwrite the
276                    // inner range's replacement.
277                    for (node_range, target) in node_replacements.into_iter().rev() {
278                        assert!(
279                            !node_range.0.is_empty(),
280                            "Cannot replace an empty node range: {:?}",
281                            node_range.0
282                        );
283
284                        // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s,
285                        // plus enough `FlatToken::Empty`s to fill up the rest of the range. This
286                        // keeps the total length of `tokens` constant throughout the replacement
287                        // process, allowing us to do all replacements without adjusting indices.
288                        let target_len = target.is_some() as usize;
289                        tokens.splice(
290                            (node_range.0.start as usize)..(node_range.0.end as usize),
291                            target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain(
292                                iter::repeat(FlatToken::Empty)
293                                    .take(node_range.0.len() - target_len),
294                            ),
295                        );
296                    }
297                    make_attr_token_stream(tokens.into_iter(), *break_last_token)
298                }
299            }
300        }
301    }
302}
303
304/// A helper struct used when building an `AttrTokenStream` from
305/// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens
306/// are stored as `FlatToken::Token`. A vector of `FlatToken`s
307/// is then 'parsed' to build up an `AttrTokenStream` with nested
308/// `AttrTokenTree::Delimited` tokens.
309#[derive(Debug, Clone)]
310enum FlatToken {
311    /// A token - this holds both delimiter (e.g. '{' and '}')
312    /// and non-delimiter tokens
313    Token((Token, Spacing)),
314    /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted
315    /// directly into the constructed `AttrTokenStream` as an
316    /// `AttrTokenTree::AttrsTarget`.
317    AttrsTarget(AttrsTarget),
318    /// A special 'empty' token that is ignored during the conversion
319    /// to an `AttrTokenStream`. This is used to simplify the
320    /// handling of replace ranges.
321    Empty,
322}
323
324/// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
325/// information about the tokens for attribute targets. This is used
326/// during expansion to perform early cfg-expansion, and to process attributes
327/// during proc-macro invocations.
328#[derive(Clone, Debug, Default, Encodable, Decodable)]
329pub struct AttrTokenStream(pub Arc<Vec<AttrTokenTree>>);
330
331/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an
332/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and
333/// close delims.
334fn make_attr_token_stream(
335    iter: impl Iterator<Item = FlatToken>,
336    break_last_token: u32,
337) -> AttrTokenStream {
338    #[derive(Debug)]
339    struct FrameData {
340        // This is `None` for the first frame, `Some` for all others.
341        open_delim_sp: Option<(Delimiter, Span, Spacing)>,
342        inner: Vec<AttrTokenTree>,
343    }
344    // The stack always has at least one element. Storing it separately makes for shorter code.
345    let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] };
346    let mut stack_rest = vec![];
347    for flat_token in iter {
348        match flat_token {
349            FlatToken::Token((token @ Token { kind, span }, spacing)) => {
350                if let Some(delim) = kind.open_delim() {
351                    stack_rest.push(mem::replace(
352                        &mut stack_top,
353                        FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] },
354                    ));
355                } else if let Some(delim) = kind.close_delim() {
356                    let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap());
357                    let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap();
358                    assert!(
359                        open_delim.eq_ignoring_invisible_origin(&delim),
360                        "Mismatched open/close delims: open={open_delim:?} close={span:?}"
361                    );
362                    let dspan = DelimSpan::from_pair(open_sp, span);
363                    let dspacing = DelimSpacing::new(open_spacing, spacing);
364                    let stream = AttrTokenStream::new(frame_data.inner);
365                    let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream);
366                    stack_top.inner.push(delimited);
367                } else {
368                    stack_top.inner.push(AttrTokenTree::Token(token, spacing))
369                }
370            }
371            FlatToken::AttrsTarget(target) => {
372                stack_top.inner.push(AttrTokenTree::AttrsTarget(target))
373            }
374            FlatToken::Empty => {}
375        }
376    }
377
378    if break_last_token > 0 {
379        let last_token = stack_top.inner.pop().unwrap();
380        if let AttrTokenTree::Token(last_token, spacing) = last_token {
381            let (unglued, _) = last_token.kind.break_two_token_op(break_last_token).unwrap();
382
383            // Tokens are always ASCII chars, so we can use byte arithmetic here.
384            let mut first_span = last_token.span.shrink_to_lo();
385            first_span =
386                first_span.with_hi(first_span.lo() + rustc_span::BytePos(break_last_token));
387
388            stack_top.inner.push(AttrTokenTree::Token(Token::new(unglued, first_span), spacing));
389        } else {
390            panic!("Unexpected last token {last_token:?}")
391        }
392    }
393    AttrTokenStream::new(stack_top.inner)
394}
395
396/// Like `TokenTree`, but for `AttrTokenStream`.
397#[derive(Clone, Debug, Encodable, Decodable)]
398pub enum AttrTokenTree {
399    Token(Token, Spacing),
400    Delimited(DelimSpan, DelimSpacing, Delimiter, AttrTokenStream),
401    /// Stores the attributes for an attribute target,
402    /// along with the tokens for that attribute target.
403    /// See `AttrsTarget` for more information
404    AttrsTarget(AttrsTarget),
405}
406
407impl AttrTokenStream {
408    pub fn new(tokens: Vec<AttrTokenTree>) -> AttrTokenStream {
409        AttrTokenStream(Arc::new(tokens))
410    }
411
412    /// Converts this `AttrTokenStream` to a plain `Vec<TokenTree>`. During
413    /// conversion, any `AttrTokenTree::AttrsTarget` gets "flattened" back to a
414    /// `TokenStream`, as described in the comment on
415    /// `attrs_and_tokens_to_token_trees`.
416    pub fn to_token_trees(&self) -> Vec<TokenTree> {
417        let mut res = Vec::with_capacity(self.0.len());
418        for tree in self.0.iter() {
419            match tree {
420                AttrTokenTree::Token(inner, spacing) => {
421                    res.push(TokenTree::Token(inner.clone(), *spacing));
422                }
423                AttrTokenTree::Delimited(span, spacing, delim, stream) => {
424                    res.push(TokenTree::Delimited(
425                        *span,
426                        *spacing,
427                        *delim,
428                        TokenStream::new(stream.to_token_trees()),
429                    ))
430                }
431                AttrTokenTree::AttrsTarget(target) => {
432                    attrs_and_tokens_to_token_trees(&target.attrs, &target.tokens, &mut res);
433                }
434            }
435        }
436        res
437    }
438}
439
440// Converts multiple attributes and the tokens for a target AST node into token trees, and appends
441// them to `res`.
442//
443// Example: if the AST node is "fn f() { blah(); }", then:
444// - Simple if no attributes are present, e.g. "fn f() { blah(); }"
445// - Simple if only outer attribute are present, e.g. "#[outer1] #[outer2] fn f() { blah(); }"
446// - Trickier if inner attributes are present, because they must be moved within the AST node's
447//   tokens, e.g. "#[outer] fn f() { #![inner] blah() }"
448fn attrs_and_tokens_to_token_trees(
449    attrs: &[Attribute],
450    target_tokens: &LazyAttrTokenStream,
451    res: &mut Vec<TokenTree>,
452) {
453    let idx = attrs.partition_point(|attr| matches!(attr.style, crate::AttrStyle::Outer));
454    let (outer_attrs, inner_attrs) = attrs.split_at(idx);
455
456    // Add outer attribute tokens.
457    for attr in outer_attrs {
458        res.extend(attr.token_trees());
459    }
460
461    // Add target AST node tokens.
462    res.extend(target_tokens.to_attr_token_stream().to_token_trees());
463
464    // Insert inner attribute tokens.
465    if !inner_attrs.is_empty() {
466        let found = insert_inner_attrs(inner_attrs, res);
467        assert!(found, "Failed to find trailing delimited group in: {res:?}");
468    }
469
470    // Inner attributes are only supported on blocks, functions, impls, and
471    // modules. All of these have their inner attributes placed at the
472    // beginning of the rightmost outermost braced group:
473    // e.g. `fn foo() { #![my_attr] }`. (Note: the braces may be within
474    // invisible delimiters.)
475    //
476    // Therefore, we can insert them back into the right location without
477    // needing to do any extra position tracking.
478    //
479    // Note: Outline modules are an exception - they can have attributes like
480    // `#![my_attr]` at the start of a file. Support for custom attributes in
481    // this position is not properly implemented - we always synthesize fake
482    // tokens, so we never reach this code.
483    fn insert_inner_attrs(inner_attrs: &[Attribute], tts: &mut Vec<TokenTree>) -> bool {
484        for tree in tts.iter_mut().rev() {
485            if let TokenTree::Delimited(span, spacing, Delimiter::Brace, stream) = tree {
486                // Found it: the rightmost, outermost braced group.
487                let mut tts = vec![];
488                for inner_attr in inner_attrs {
489                    tts.extend(inner_attr.token_trees());
490                }
491                tts.extend(stream.0.iter().cloned());
492                let stream = TokenStream::new(tts);
493                *tree = TokenTree::Delimited(*span, *spacing, Delimiter::Brace, stream);
494                return true;
495            } else if let TokenTree::Delimited(span, spacing, Delimiter::Invisible(src), stream) =
496                tree
497            {
498                // Recurse inside invisible delimiters.
499                let mut vec: Vec<_> = stream.iter().cloned().collect();
500                if insert_inner_attrs(inner_attrs, &mut vec) {
501                    *tree = TokenTree::Delimited(
502                        *span,
503                        *spacing,
504                        Delimiter::Invisible(*src),
505                        TokenStream::new(vec),
506                    );
507                    return true;
508                }
509            }
510        }
511        false
512    }
513}
514
515/// Stores the tokens for an attribute target, along
516/// with its attributes.
517///
518/// This is constructed during parsing when we need to capture
519/// tokens, for `cfg` and `cfg_attr` attributes.
520///
521/// For example, `#[cfg(FALSE)] struct Foo {}` would
522/// have an `attrs` field containing the `#[cfg(FALSE)]` attr,
523/// and a `tokens` field storing the (unparsed) tokens `struct Foo {}`
524///
525/// The `cfg`/`cfg_attr` processing occurs in
526/// `StripUnconfigured::configure_tokens`.
527#[derive(Clone, Debug, Encodable, Decodable)]
528pub struct AttrsTarget {
529    /// Attributes, both outer and inner.
530    /// These are stored in the original order that they were parsed in.
531    pub attrs: AttrVec,
532    /// The underlying tokens for the attribute target that `attrs`
533    /// are applied to
534    pub tokens: LazyAttrTokenStream,
535}
536
537/// Indicates whether a token can join with the following token to form a
538/// compound token. Used for conversions to `proc_macro::Spacing`. Also used to
539/// guide pretty-printing, which is where the `JointHidden` value (which isn't
540/// part of `proc_macro::Spacing`) comes in useful.
541#[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
542pub enum Spacing {
543    /// The token cannot join with the following token to form a compound
544    /// token.
545    ///
546    /// In token streams parsed from source code, the compiler will use `Alone`
547    /// for any token immediately followed by whitespace, a non-doc comment, or
548    /// EOF.
549    ///
550    /// When constructing token streams within the compiler, use this for each
551    /// token that (a) should be pretty-printed with a space after it, or (b)
552    /// is the last token in the stream. (In the latter case the choice of
553    /// spacing doesn't matter because it is never used for the last token. We
554    /// arbitrarily use `Alone`.)
555    ///
556    /// Converts to `proc_macro::Spacing::Alone`, and
557    /// `proc_macro::Spacing::Alone` converts back to this.
558    Alone,
559
560    /// The token can join with the following token to form a compound token.
561    ///
562    /// In token streams parsed from source code, the compiler will use `Joint`
563    /// for any token immediately followed by punctuation (as determined by
564    /// `Token::is_punct`).
565    ///
566    /// When constructing token streams within the compiler, use this for each
567    /// token that (a) should be pretty-printed without a space after it, and
568    /// (b) is followed by a punctuation token.
569    ///
570    /// Converts to `proc_macro::Spacing::Joint`, and
571    /// `proc_macro::Spacing::Joint` converts back to this.
572    Joint,
573
574    /// The token can join with the following token to form a compound token,
575    /// but this will not be visible at the proc macro level. (This is what the
576    /// `Hidden` means; see below.)
577    ///
578    /// In token streams parsed from source code, the compiler will use
579    /// `JointHidden` for any token immediately followed by anything not
580    /// covered by the `Alone` and `Joint` cases: an identifier, lifetime,
581    /// literal, delimiter, doc comment.
582    ///
583    /// When constructing token streams, use this for each token that (a)
584    /// should be pretty-printed without a space after it, and (b) is followed
585    /// by a non-punctuation token.
586    ///
587    /// Converts to `proc_macro::Spacing::Alone`, but
588    /// `proc_macro::Spacing::Alone` converts back to `token::Spacing::Alone`.
589    /// Because of that, pretty-printing of `TokenStream`s produced by proc
590    /// macros is unavoidably uglier (with more whitespace between tokens) than
591    /// pretty-printing of `TokenStream`'s produced by other means (i.e. parsed
592    /// source code, internally constructed token streams, and token streams
593    /// produced by declarative macros).
594    JointHidden,
595}
596
597/// A `TokenStream` is an abstract sequence of tokens, organized into [`TokenTree`]s.
598#[derive(Clone, Debug, Default, Encodable, Decodable)]
599pub struct TokenStream(pub(crate) Arc<Vec<TokenTree>>);
600
601impl TokenStream {
602    pub fn new(tts: Vec<TokenTree>) -> TokenStream {
603        TokenStream(Arc::new(tts))
604    }
605
606    pub fn is_empty(&self) -> bool {
607        self.0.is_empty()
608    }
609
610    pub fn len(&self) -> usize {
611        self.0.len()
612    }
613
614    pub fn get(&self, index: usize) -> Option<&TokenTree> {
615        self.0.get(index)
616    }
617
618    pub fn iter(&self) -> TokenStreamIter<'_> {
619        TokenStreamIter::new(self)
620    }
621
622    /// Create a token stream containing a single token with alone spacing. The
623    /// spacing used for the final token in a constructed stream doesn't matter
624    /// because it's never used. In practice we arbitrarily use
625    /// `Spacing::Alone`.
626    pub fn token_alone(kind: TokenKind, span: Span) -> TokenStream {
627        TokenStream::new(vec![TokenTree::token_alone(kind, span)])
628    }
629
630    pub fn from_ast(node: &(impl HasAttrs + HasTokens + fmt::Debug)) -> TokenStream {
631        let tokens = node.tokens().unwrap_or_else(|| panic!("missing tokens for node: {:?}", node));
632        let mut tts = vec![];
633        attrs_and_tokens_to_token_trees(node.attrs(), tokens, &mut tts);
634        TokenStream::new(tts)
635    }
636
637    // If `vec` is not empty, try to glue `tt` onto its last token. The return
638    // value indicates if gluing took place.
639    fn try_glue_to_last(vec: &mut Vec<TokenTree>, tt: &TokenTree) -> bool {
640        if let Some(TokenTree::Token(last_tok, Spacing::Joint | Spacing::JointHidden)) = vec.last()
641            && let TokenTree::Token(tok, spacing) = tt
642            && let Some(glued_tok) = last_tok.glue(tok)
643        {
644            // ...then overwrite the last token tree in `vec` with the
645            // glued token, and skip the first token tree from `stream`.
646            *vec.last_mut().unwrap() = TokenTree::Token(glued_tok, *spacing);
647            true
648        } else {
649            false
650        }
651    }
652
653    /// Push `tt` onto the end of the stream, possibly gluing it to the last
654    /// token. Uses `make_mut` to maximize efficiency.
655    pub fn push_tree(&mut self, tt: TokenTree) {
656        let vec_mut = Arc::make_mut(&mut self.0);
657
658        if Self::try_glue_to_last(vec_mut, &tt) {
659            // nothing else to do
660        } else {
661            vec_mut.push(tt);
662        }
663    }
664
665    /// Push `stream` onto the end of the stream, possibly gluing the first
666    /// token tree to the last token. (No other token trees will be glued.)
667    /// Uses `make_mut` to maximize efficiency.
668    pub fn push_stream(&mut self, stream: TokenStream) {
669        let vec_mut = Arc::make_mut(&mut self.0);
670
671        let stream_iter = stream.0.iter().cloned();
672
673        if let Some(first) = stream.0.first()
674            && Self::try_glue_to_last(vec_mut, first)
675        {
676            // Now skip the first token tree from `stream`.
677            vec_mut.extend(stream_iter.skip(1));
678        } else {
679            // Append all of `stream`.
680            vec_mut.extend(stream_iter);
681        }
682    }
683
684    pub fn chunks(&self, chunk_size: usize) -> core::slice::Chunks<'_, TokenTree> {
685        self.0.chunks(chunk_size)
686    }
687
688    /// Desugar doc comments like `/// foo` in the stream into `#[doc =
689    /// r"foo"]`. Modifies the `TokenStream` via `Arc::make_mut`, but as little
690    /// as possible.
691    pub fn desugar_doc_comments(&mut self) {
692        if let Some(desugared_stream) = desugar_inner(self.clone()) {
693            *self = desugared_stream;
694        }
695
696        // The return value is `None` if nothing in `stream` changed.
697        fn desugar_inner(mut stream: TokenStream) -> Option<TokenStream> {
698            let mut i = 0;
699            let mut modified = false;
700            while let Some(tt) = stream.0.get(i) {
701                match tt {
702                    &TokenTree::Token(
703                        Token { kind: token::DocComment(_, attr_style, data), span },
704                        _spacing,
705                    ) => {
706                        let desugared = desugared_tts(attr_style, data, span);
707                        let desugared_len = desugared.len();
708                        Arc::make_mut(&mut stream.0).splice(i..i + 1, desugared);
709                        modified = true;
710                        i += desugared_len;
711                    }
712
713                    &TokenTree::Token(..) => i += 1,
714
715                    &TokenTree::Delimited(sp, spacing, delim, ref delim_stream) => {
716                        if let Some(desugared_delim_stream) = desugar_inner(delim_stream.clone()) {
717                            let new_tt =
718                                TokenTree::Delimited(sp, spacing, delim, desugared_delim_stream);
719                            Arc::make_mut(&mut stream.0)[i] = new_tt;
720                            modified = true;
721                        }
722                        i += 1;
723                    }
724                }
725            }
726            if modified { Some(stream) } else { None }
727        }
728
729        fn desugared_tts(attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
730            // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
731            // required to wrap the text. E.g.
732            // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
733            // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
734            // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
735            let mut num_of_hashes = 0;
736            let mut count = 0;
737            for ch in data.as_str().chars() {
738                count = match ch {
739                    '"' => 1,
740                    '#' if count > 0 => count + 1,
741                    _ => 0,
742                };
743                num_of_hashes = cmp::max(num_of_hashes, count);
744            }
745
746            // `/// foo` becomes `[doc = r"foo"]`.
747            let delim_span = DelimSpan::from_single(span);
748            let body = TokenTree::Delimited(
749                delim_span,
750                DelimSpacing::new(Spacing::JointHidden, Spacing::Alone),
751                Delimiter::Bracket,
752                [
753                    TokenTree::token_alone(token::Ident(sym::doc, token::IdentIsRaw::No), span),
754                    TokenTree::token_alone(token::Eq, span),
755                    TokenTree::token_alone(
756                        TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
757                        span,
758                    ),
759                ]
760                .into_iter()
761                .collect::<TokenStream>(),
762            );
763
764            if attr_style == AttrStyle::Inner {
765                vec![
766                    TokenTree::token_joint(token::Pound, span),
767                    TokenTree::token_joint_hidden(token::Bang, span),
768                    body,
769                ]
770            } else {
771                vec![TokenTree::token_joint_hidden(token::Pound, span), body]
772            }
773        }
774    }
775
776    /// Given a `TokenStream` with a `Stream` of only two arguments, return a new `TokenStream`
777    /// separating the two arguments with a comma for diagnostic suggestions.
778    pub fn add_comma(&self) -> Option<(TokenStream, Span)> {
779        // Used to suggest if a user writes `foo!(a b);`
780        let mut suggestion = None;
781        let mut iter = self.0.iter().enumerate().peekable();
782        while let Some((pos, ts)) = iter.next() {
783            if let Some((_, next)) = iter.peek() {
784                let sp = match (&ts, &next) {
785                    (_, TokenTree::Token(Token { kind: token::Comma, .. }, _)) => continue,
786                    (
787                        TokenTree::Token(token_left, Spacing::Alone),
788                        TokenTree::Token(token_right, _),
789                    ) if (token_left.is_non_reserved_ident() || token_left.is_lit())
790                        && (token_right.is_non_reserved_ident() || token_right.is_lit()) =>
791                    {
792                        token_left.span
793                    }
794                    (TokenTree::Delimited(sp, ..), _) => sp.entire(),
795                    _ => continue,
796                };
797                let sp = sp.shrink_to_hi();
798                let comma = TokenTree::token_alone(token::Comma, sp);
799                suggestion = Some((pos, comma, sp));
800            }
801        }
802        if let Some((pos, comma, sp)) = suggestion {
803            let mut new_stream = Vec::with_capacity(self.0.len() + 1);
804            let parts = self.0.split_at(pos + 1);
805            new_stream.extend_from_slice(parts.0);
806            new_stream.push(comma);
807            new_stream.extend_from_slice(parts.1);
808            return Some((TokenStream::new(new_stream), sp));
809        }
810        None
811    }
812}
813
814impl PartialEq<TokenStream> for TokenStream {
815    fn eq(&self, other: &TokenStream) -> bool {
816        self.iter().eq(other.iter())
817    }
818}
819
820impl Eq for TokenStream {}
821
822impl FromIterator<TokenTree> for TokenStream {
823    fn from_iter<I: IntoIterator<Item = TokenTree>>(iter: I) -> Self {
824        TokenStream::new(iter.into_iter().collect::<Vec<TokenTree>>())
825    }
826}
827
828impl<CTX> HashStable<CTX> for TokenStream
829where
830    CTX: crate::HashStableContext,
831{
832    fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
833        for sub_tt in self.iter() {
834            sub_tt.hash_stable(hcx, hasher);
835        }
836    }
837}
838
839#[derive(Clone)]
840pub struct TokenStreamIter<'t> {
841    stream: &'t TokenStream,
842    index: usize,
843}
844
845impl<'t> TokenStreamIter<'t> {
846    fn new(stream: &'t TokenStream) -> Self {
847        TokenStreamIter { stream, index: 0 }
848    }
849
850    // Peeking could be done via `Peekable`, but most iterators need peeking,
851    // and this is simple and avoids the need to use `peekable` and `Peekable`
852    // at all the use sites.
853    pub fn peek(&self) -> Option<&'t TokenTree> {
854        self.stream.0.get(self.index)
855    }
856}
857
858impl<'t> Iterator for TokenStreamIter<'t> {
859    type Item = &'t TokenTree;
860
861    fn next(&mut self) -> Option<&'t TokenTree> {
862        self.stream.0.get(self.index).map(|tree| {
863            self.index += 1;
864            tree
865        })
866    }
867}
868
869#[derive(Clone, Debug)]
870pub struct TokenTreeCursor {
871    stream: TokenStream,
872    /// Points to the current token tree in the stream. In `TokenCursor::curr`,
873    /// this can be any token tree. In `TokenCursor::stack`, this is always a
874    /// `TokenTree::Delimited`.
875    index: usize,
876}
877
878impl TokenTreeCursor {
879    #[inline]
880    pub fn new(stream: TokenStream) -> Self {
881        TokenTreeCursor { stream, index: 0 }
882    }
883
884    #[inline]
885    pub fn curr(&self) -> Option<&TokenTree> {
886        self.stream.get(self.index)
887    }
888
889    pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
890        self.stream.get(self.index + n)
891    }
892
893    #[inline]
894    pub fn bump(&mut self) {
895        self.index += 1;
896    }
897
898    // For skipping ahead in rare circumstances.
899    #[inline]
900    pub fn bump_to_end(&mut self) {
901        self.index = self.stream.len();
902    }
903}
904
905/// A `TokenStream` cursor that produces `Token`s. It's a bit odd that
906/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
907/// use this type to emit them as a linear sequence. But a linear sequence is
908/// what the parser expects, for the most part.
909#[derive(Clone, Debug)]
910pub struct TokenCursor {
911    // Cursor for the current (innermost) token stream. The index within the
912    // cursor can point to any token tree in the stream (or one past the end).
913    // The delimiters for this token stream are found in `self.stack.last()`;
914    // if that is `None` we are in the outermost token stream which never has
915    // delimiters.
916    pub curr: TokenTreeCursor,
917
918    // Token streams surrounding the current one. The index within each cursor
919    // always points to a `TokenTree::Delimited`.
920    pub stack: Vec<TokenTreeCursor>,
921}
922
923impl TokenCursor {
924    pub fn next(&mut self) -> (Token, Spacing) {
925        self.inlined_next()
926    }
927
928    /// This always-inlined version should only be used on hot code paths.
929    #[inline(always)]
930    pub fn inlined_next(&mut self) -> (Token, Spacing) {
931        loop {
932            // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
933            // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
934            // below can be removed.
935            if let Some(tree) = self.curr.curr() {
936                match tree {
937                    &TokenTree::Token(token, spacing) => {
938                        debug_assert!(!token.kind.is_delim());
939                        let res = (token, spacing);
940                        self.curr.bump();
941                        return res;
942                    }
943                    &TokenTree::Delimited(sp, spacing, delim, ref tts) => {
944                        let trees = TokenTreeCursor::new(tts.clone());
945                        self.stack.push(mem::replace(&mut self.curr, trees));
946                        if !delim.skip() {
947                            return (Token::new(delim.as_open_token_kind(), sp.open), spacing.open);
948                        }
949                        // No open delimiter to return; continue on to the next iteration.
950                    }
951                };
952            } else if let Some(parent) = self.stack.pop() {
953                // We have exhausted this token stream. Move back to its parent token stream.
954                let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.curr() else {
955                    panic!("parent should be Delimited")
956                };
957                self.curr = parent;
958                self.curr.bump(); // move past the `Delimited`
959                if !delim.skip() {
960                    return (Token::new(delim.as_close_token_kind(), span.close), spacing.close);
961                }
962                // No close delimiter to return; continue on to the next iteration.
963            } else {
964                // We have exhausted the outermost token stream. The use of
965                // `Spacing::Alone` is arbitrary and immaterial, because the
966                // `Eof` token's spacing is never used.
967                return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
968            }
969        }
970    }
971}
972
973#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic, Walkable)]
974pub struct DelimSpan {
975    pub open: Span,
976    pub close: Span,
977}
978
979impl DelimSpan {
980    pub fn from_single(sp: Span) -> Self {
981        DelimSpan { open: sp, close: sp }
982    }
983
984    pub fn from_pair(open: Span, close: Span) -> Self {
985        DelimSpan { open, close }
986    }
987
988    pub fn dummy() -> Self {
989        Self::from_single(DUMMY_SP)
990    }
991
992    pub fn entire(self) -> Span {
993        self.open.with_hi(self.close.hi())
994    }
995}
996
997#[derive(Copy, Clone, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
998pub struct DelimSpacing {
999    pub open: Spacing,
1000    pub close: Spacing,
1001}
1002
1003impl DelimSpacing {
1004    pub fn new(open: Spacing, close: Spacing) -> DelimSpacing {
1005        DelimSpacing { open, close }
1006    }
1007}
1008
1009// Some types are used a lot. Make sure they don't unintentionally get bigger.
1010#[cfg(target_pointer_width = "64")]
1011mod size_asserts {
1012    use rustc_data_structures::static_assert_size;
1013
1014    use super::*;
1015    // tidy-alphabetical-start
1016    static_assert_size!(AttrTokenStream, 8);
1017    static_assert_size!(AttrTokenTree, 32);
1018    static_assert_size!(LazyAttrTokenStream, 8);
1019    static_assert_size!(LazyAttrTokenStreamInner, 88);
1020    static_assert_size!(Option<LazyAttrTokenStream>, 8); // must be small, used in many AST nodes
1021    static_assert_size!(TokenStream, 8);
1022    static_assert_size!(TokenTree, 32);
1023    // tidy-alphabetical-end
1024}
rustc_ast/tokenstream.rs

rustc_ast/
tokenstream.rs