rustc_expand/mbe/
transcribe.rs

1use std::mem;
2
3use rustc_ast::token::{
4    self, Delimiter, IdentIsRaw, InvisibleOrigin, Lit, LitKind, MetaVarKind, Token, TokenKind,
5};
6use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree};
7use rustc_ast::{ExprKind, StmtKind, TyKind, UnOp};
8use rustc_data_structures::fx::FxHashMap;
9use rustc_errors::{Diag, DiagCtxtHandle, PResult, pluralize};
10use rustc_parse::lexer::nfc_normalize;
11use rustc_parse::parser::ParseNtResult;
12use rustc_session::parse::{ParseSess, SymbolGallery};
13use rustc_span::hygiene::{LocalExpnId, Transparency};
14use rustc_span::{
15    Ident, MacroRulesNormalizedIdent, Span, Symbol, SyntaxContext, sym, with_metavar_spans,
16};
17use smallvec::{SmallVec, smallvec};
18
19use crate::errors::{
20    CountRepetitionMisplaced, MetaVarExprUnrecognizedVar, MetaVarsDifSeqMatchers, MustRepeatOnce,
21    NoSyntaxVarsExprRepeat, VarStillRepeating,
22};
23use crate::mbe::macro_parser::NamedMatch;
24use crate::mbe::macro_parser::NamedMatch::*;
25use crate::mbe::metavar_expr::{MetaVarExprConcatElem, RAW_IDENT_ERR};
26use crate::mbe::{self, KleeneOp, MetaVarExpr};
27
28// A Marker adds the given mark to the syntax context.
29struct Marker(LocalExpnId, Transparency, FxHashMap<SyntaxContext, SyntaxContext>);
30
31impl Marker {
32    fn mark_span(&mut self, span: &mut Span) {
33        // `apply_mark` is a relatively expensive operation, both due to taking hygiene lock, and
34        // by itself. All tokens in a macro body typically have the same syntactic context, unless
35        // it's some advanced case with macro-generated macros. So if we cache the marked version
36        // of that context once, we'll typically have a 100% cache hit rate after that.
37        let Marker(expn_id, transparency, ref mut cache) = *self;
38        *span = span.map_ctxt(|ctxt| {
39            *cache
40                .entry(ctxt)
41                .or_insert_with(|| ctxt.apply_mark(expn_id.to_expn_id(), transparency))
42        });
43    }
44}
45
46/// An iterator over the token trees in a delimited token tree (`{ ... }`) or a sequence (`$(...)`).
47struct Frame<'a> {
48    tts: &'a [mbe::TokenTree],
49    idx: usize,
50    kind: FrameKind,
51}
52
53enum FrameKind {
54    Delimited { delim: Delimiter, span: DelimSpan, spacing: DelimSpacing },
55    Sequence { sep: Option<Token>, kleene_op: KleeneOp },
56}
57
58impl<'a> Frame<'a> {
59    fn new_delimited(src: &'a mbe::Delimited, span: DelimSpan, spacing: DelimSpacing) -> Frame<'a> {
60        Frame {
61            tts: &src.tts,
62            idx: 0,
63            kind: FrameKind::Delimited { delim: src.delim, span, spacing },
64        }
65    }
66
67    fn new_sequence(
68        src: &'a mbe::SequenceRepetition,
69        sep: Option<Token>,
70        kleene_op: KleeneOp,
71    ) -> Frame<'a> {
72        Frame { tts: &src.tts, idx: 0, kind: FrameKind::Sequence { sep, kleene_op } }
73    }
74}
75
76impl<'a> Iterator for Frame<'a> {
77    type Item = &'a mbe::TokenTree;
78
79    fn next(&mut self) -> Option<&'a mbe::TokenTree> {
80        let res = self.tts.get(self.idx);
81        self.idx += 1;
82        res
83    }
84}
85
86/// This can do Macro-By-Example transcription.
87/// - `interp` is a map of meta-variables to the tokens (non-terminals) they matched in the
88///   invocation. We are assuming we already know there is a match.
89/// - `src` is the RHS of the MBE, that is, the "example" we are filling in.
90///
91/// For example,
92///
93/// ```rust
94/// macro_rules! foo {
95///     ($id:ident) => { println!("{}", stringify!($id)); }
96/// }
97///
98/// foo!(bar);
99/// ```
100///
101/// `interp` would contain `$id => bar` and `src` would contain `println!("{}", stringify!($id));`.
102///
103/// `transcribe` would return a `TokenStream` containing `println!("{}", stringify!(bar));`.
104///
105/// Along the way, we do some additional error checking.
106pub(super) fn transcribe<'a>(
107    psess: &'a ParseSess,
108    interp: &FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
109    src: &mbe::Delimited,
110    src_span: DelimSpan,
111    transparency: Transparency,
112    expand_id: LocalExpnId,
113) -> PResult<'a, TokenStream> {
114    // Nothing for us to transcribe...
115    if src.tts.is_empty() {
116        return Ok(TokenStream::default());
117    }
118
119    // We descend into the RHS (`src`), expanding things as we go. This stack contains the things
120    // we have yet to expand/are still expanding. We start the stack off with the whole RHS. The
121    // choice of spacing values doesn't matter.
122    let mut stack: SmallVec<[Frame<'_>; 1]> = smallvec![Frame::new_delimited(
123        src,
124        src_span,
125        DelimSpacing::new(Spacing::Alone, Spacing::Alone)
126    )];
127
128    // As we descend in the RHS, we will need to be able to match nested sequences of matchers.
129    // `repeats` keeps track of where we are in matching at each level, with the last element being
130    // the most deeply nested sequence. This is used as a stack.
131    let mut repeats: Vec<(usize, usize)> = Vec::new();
132
133    // `result` contains resulting token stream from the TokenTree we just finished processing. At
134    // the end, this will contain the full result of transcription, but at arbitrary points during
135    // `transcribe`, `result` will contain subsets of the final result.
136    //
137    // Specifically, as we descend into each TokenTree, we will push the existing results onto the
138    // `result_stack` and clear `results`. We will then produce the results of transcribing the
139    // TokenTree into `results`. Then, as we unwind back out of the `TokenTree`, we will pop the
140    // `result_stack` and append `results` too it to produce the new `results` up to that point.
141    //
142    // Thus, if we try to pop the `result_stack` and it is empty, we have reached the top-level
143    // again, and we are done transcribing.
144    let mut result: Vec<TokenTree> = Vec::new();
145    let mut result_stack = Vec::new();
146    let mut marker = Marker(expand_id, transparency, Default::default());
147
148    let dcx = psess.dcx();
149    loop {
150        // Look at the last frame on the stack.
151        // If it still has a TokenTree we have not looked at yet, use that tree.
152        let Some(tree) = stack.last_mut().unwrap().next() else {
153            // This else-case never produces a value for `tree` (it `continue`s or `return`s).
154
155            // Otherwise, if we have just reached the end of a sequence and we can keep repeating,
156            // go back to the beginning of the sequence.
157            let frame = stack.last_mut().unwrap();
158            if let FrameKind::Sequence { sep, .. } = &frame.kind {
159                let (repeat_idx, repeat_len) = repeats.last_mut().unwrap();
160                *repeat_idx += 1;
161                if repeat_idx < repeat_len {
162                    frame.idx = 0;
163                    if let Some(sep) = sep {
164                        result.push(TokenTree::Token(*sep, Spacing::Alone));
165                    }
166                    continue;
167                }
168            }
169
170            // We are done with the top of the stack. Pop it. Depending on what it was, we do
171            // different things. Note that the outermost item must be the delimited, wrapped RHS
172            // that was passed in originally to `transcribe`.
173            match stack.pop().unwrap().kind {
174                // Done with a sequence. Pop from repeats.
175                FrameKind::Sequence { .. } => {
176                    repeats.pop();
177                }
178
179                // We are done processing a Delimited. If this is the top-level delimited, we are
180                // done. Otherwise, we unwind the result_stack to append what we have produced to
181                // any previous results.
182                FrameKind::Delimited { delim, span, mut spacing, .. } => {
183                    // Hack to force-insert a space after `]` in certain case.
184                    // See discussion of the `hex-literal` crate in #114571.
185                    if delim == Delimiter::Bracket {
186                        spacing.close = Spacing::Alone;
187                    }
188                    if result_stack.is_empty() {
189                        // No results left to compute! We are back at the top-level.
190                        return Ok(TokenStream::new(result));
191                    }
192
193                    // Step back into the parent Delimited.
194                    let tree = TokenTree::Delimited(span, spacing, delim, TokenStream::new(result));
195                    result = result_stack.pop().unwrap();
196                    result.push(tree);
197                }
198            }
199            continue;
200        };
201
202        // At this point, we know we are in the middle of a TokenTree (the last one on `stack`).
203        // `tree` contains the next `TokenTree` to be processed.
204        match tree {
205            // We are descending into a sequence. We first make sure that the matchers in the RHS
206            // and the matches in `interp` have the same shape. Otherwise, either the caller or the
207            // macro writer has made a mistake.
208            seq @ mbe::TokenTree::Sequence(_, seq_rep) => {
209                match lockstep_iter_size(seq, interp, &repeats) {
210                    LockstepIterSize::Unconstrained => {
211                        return Err(dcx.create_err(NoSyntaxVarsExprRepeat { span: seq.span() }));
212                    }
213
214                    LockstepIterSize::Contradiction(msg) => {
215                        // FIXME: this really ought to be caught at macro definition time... It
216                        // happens when two meta-variables are used in the same repetition in a
217                        // sequence, but they come from different sequence matchers and repeat
218                        // different amounts.
219                        return Err(
220                            dcx.create_err(MetaVarsDifSeqMatchers { span: seq.span(), msg })
221                        );
222                    }
223
224                    LockstepIterSize::Constraint(len, _) => {
225                        // We do this to avoid an extra clone above. We know that this is a
226                        // sequence already.
227                        let mbe::TokenTree::Sequence(sp, seq) = seq else { unreachable!() };
228
229                        // Is the repetition empty?
230                        if len == 0 {
231                            if seq.kleene.op == KleeneOp::OneOrMore {
232                                // FIXME: this really ought to be caught at macro definition
233                                // time... It happens when the Kleene operator in the matcher and
234                                // the body for the same meta-variable do not match.
235                                return Err(dcx.create_err(MustRepeatOnce { span: sp.entire() }));
236                            }
237                        } else {
238                            // 0 is the initial counter (we have done 0 repetitions so far). `len`
239                            // is the total number of repetitions we should generate.
240                            repeats.push((0, len));
241
242                            // The first time we encounter the sequence we push it to the stack. It
243                            // then gets reused (see the beginning of the loop) until we are done
244                            // repeating.
245                            stack.push(Frame::new_sequence(
246                                seq_rep,
247                                seq.separator.clone(),
248                                seq.kleene.op,
249                            ));
250                        }
251                    }
252                }
253            }
254
255            // Replace the meta-var with the matched token tree from the invocation.
256            &mbe::TokenTree::MetaVar(mut sp, mut original_ident) => {
257                // Find the matched nonterminal from the macro invocation, and use it to replace
258                // the meta-var.
259                //
260                // We use `Spacing::Alone` everywhere here, because that's the conservative choice
261                // and spacing of declarative macros is tricky. E.g. in this macro:
262                // ```
263                // macro_rules! idents {
264                //     ($($a:ident,)*) => { stringify!($($a)*) }
265                // }
266                // ```
267                // `$a` has no whitespace after it and will be marked `JointHidden`. If you then
268                // call `idents!(x,y,z,)`, each of `x`, `y`, and `z` will be marked as `Joint`. So
269                // if you choose to use `$x`'s spacing or the identifier's spacing, you'll end up
270                // producing "xyz", which is bad because it effectively merges tokens.
271                // `Spacing::Alone` is the safer option. Fortunately, `space_between` will avoid
272                // some of the unnecessary whitespace.
273                let ident = MacroRulesNormalizedIdent::new(original_ident);
274                if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) {
275                    // We wrap the tokens in invisible delimiters, unless they are already wrapped
276                    // in invisible delimiters with the same `MetaVarKind`. Because some proc
277                    // macros can't handle multiple layers of invisible delimiters of the same
278                    // `MetaVarKind`. This loses some span info, though it hopefully won't matter.
279                    let mut mk_delimited = |mk_span, mv_kind, mut stream: TokenStream| {
280                        if stream.len() == 1 {
281                            let tree = stream.iter().next().unwrap();
282                            if let TokenTree::Delimited(_, _, delim, inner) = tree
283                                && let Delimiter::Invisible(InvisibleOrigin::MetaVar(mvk)) = delim
284                                && mv_kind == *mvk
285                            {
286                                stream = inner.clone();
287                            }
288                        }
289
290                        // Emit as a token stream within `Delimiter::Invisible` to maintain
291                        // parsing priorities.
292                        marker.mark_span(&mut sp);
293                        with_metavar_spans(|mspans| mspans.insert(mk_span, sp));
294                        // Both the open delim and close delim get the same span, which covers the
295                        // `$foo` in the decl macro RHS.
296                        TokenTree::Delimited(
297                            DelimSpan::from_single(sp),
298                            DelimSpacing::new(Spacing::Alone, Spacing::Alone),
299                            Delimiter::Invisible(InvisibleOrigin::MetaVar(mv_kind)),
300                            stream,
301                        )
302                    };
303                    let tt = match cur_matched {
304                        MatchedSingle(ParseNtResult::Tt(tt)) => {
305                            // `tt`s are emitted into the output stream directly as "raw tokens",
306                            // without wrapping them into groups. Other variables are emitted into
307                            // the output stream as groups with `Delimiter::Invisible` to maintain
308                            // parsing priorities.
309                            maybe_use_metavar_location(psess, &stack, sp, tt, &mut marker)
310                        }
311                        MatchedSingle(ParseNtResult::Ident(ident, is_raw)) => {
312                            marker.mark_span(&mut sp);
313                            with_metavar_spans(|mspans| mspans.insert(ident.span, sp));
314                            let kind = token::NtIdent(*ident, *is_raw);
315                            TokenTree::token_alone(kind, sp)
316                        }
317                        MatchedSingle(ParseNtResult::Lifetime(ident, is_raw)) => {
318                            marker.mark_span(&mut sp);
319                            with_metavar_spans(|mspans| mspans.insert(ident.span, sp));
320                            let kind = token::NtLifetime(*ident, *is_raw);
321                            TokenTree::token_alone(kind, sp)
322                        }
323                        MatchedSingle(ParseNtResult::Item(item)) => {
324                            mk_delimited(item.span, MetaVarKind::Item, TokenStream::from_ast(item))
325                        }
326                        MatchedSingle(ParseNtResult::Block(block)) => mk_delimited(
327                            block.span,
328                            MetaVarKind::Block,
329                            TokenStream::from_ast(block),
330                        ),
331                        MatchedSingle(ParseNtResult::Stmt(stmt)) => {
332                            let stream = if let StmtKind::Empty = stmt.kind {
333                                // FIXME: Properly collect tokens for empty statements.
334                                TokenStream::token_alone(token::Semi, stmt.span)
335                            } else {
336                                TokenStream::from_ast(stmt)
337                            };
338                            mk_delimited(stmt.span, MetaVarKind::Stmt, stream)
339                        }
340                        MatchedSingle(ParseNtResult::Pat(pat, pat_kind)) => mk_delimited(
341                            pat.span,
342                            MetaVarKind::Pat(*pat_kind),
343                            TokenStream::from_ast(pat),
344                        ),
345                        MatchedSingle(ParseNtResult::Expr(expr, kind)) => {
346                            let (can_begin_literal_maybe_minus, can_begin_string_literal) =
347                                match &expr.kind {
348                                    ExprKind::Lit(_) => (true, true),
349                                    ExprKind::Unary(UnOp::Neg, e)
350                                        if matches!(&e.kind, ExprKind::Lit(_)) =>
351                                    {
352                                        (true, false)
353                                    }
354                                    _ => (false, false),
355                                };
356                            mk_delimited(
357                                expr.span,
358                                MetaVarKind::Expr {
359                                    kind: *kind,
360                                    can_begin_literal_maybe_minus,
361                                    can_begin_string_literal,
362                                },
363                                TokenStream::from_ast(expr),
364                            )
365                        }
366                        MatchedSingle(ParseNtResult::Literal(lit)) => {
367                            mk_delimited(lit.span, MetaVarKind::Literal, TokenStream::from_ast(lit))
368                        }
369                        MatchedSingle(ParseNtResult::Ty(ty)) => {
370                            let is_path = matches!(&ty.kind, TyKind::Path(None, _path));
371                            mk_delimited(
372                                ty.span,
373                                MetaVarKind::Ty { is_path },
374                                TokenStream::from_ast(ty),
375                            )
376                        }
377                        MatchedSingle(ParseNtResult::Meta(attr_item)) => {
378                            let has_meta_form = attr_item.meta_kind().is_some();
379                            mk_delimited(
380                                attr_item.span(),
381                                MetaVarKind::Meta { has_meta_form },
382                                TokenStream::from_ast(attr_item),
383                            )
384                        }
385                        MatchedSingle(ParseNtResult::Path(path)) => {
386                            mk_delimited(path.span, MetaVarKind::Path, TokenStream::from_ast(path))
387                        }
388                        MatchedSingle(ParseNtResult::Vis(vis)) => {
389                            mk_delimited(vis.span, MetaVarKind::Vis, TokenStream::from_ast(vis))
390                        }
391                        MatchedSeq(..) => {
392                            // We were unable to descend far enough. This is an error.
393                            return Err(dcx.create_err(VarStillRepeating { span: sp, ident }));
394                        }
395                    };
396                    result.push(tt)
397                } else {
398                    // If we aren't able to match the meta-var, we push it back into the result but
399                    // with modified syntax context. (I believe this supports nested macros).
400                    marker.mark_span(&mut sp);
401                    marker.mark_span(&mut original_ident.span);
402                    result.push(TokenTree::token_joint_hidden(token::Dollar, sp));
403                    result.push(TokenTree::Token(
404                        Token::from_ast_ident(original_ident),
405                        Spacing::Alone,
406                    ));
407                }
408            }
409
410            // Replace meta-variable expressions with the result of their expansion.
411            mbe::TokenTree::MetaVarExpr(sp, expr) => {
412                transcribe_metavar_expr(
413                    dcx,
414                    expr,
415                    interp,
416                    &mut marker,
417                    &repeats,
418                    &mut result,
419                    sp,
420                    &psess.symbol_gallery,
421                )?;
422            }
423
424            // If we are entering a new delimiter, we push its contents to the `stack` to be
425            // processed, and we push all of the currently produced results to the `result_stack`.
426            // We will produce all of the results of the inside of the `Delimited` and then we will
427            // jump back out of the Delimited, pop the result_stack and add the new results back to
428            // the previous results (from outside the Delimited).
429            &mbe::TokenTree::Delimited(mut span, ref spacing, ref delimited) => {
430                marker.mark_span(&mut span.open);
431                marker.mark_span(&mut span.close);
432                stack.push(Frame::new_delimited(delimited, span, *spacing));
433                result_stack.push(mem::take(&mut result));
434            }
435
436            // Nothing much to do here. Just push the token to the result, being careful to
437            // preserve syntax context.
438            &mbe::TokenTree::Token(mut token) => {
439                marker.mark_span(&mut token.span);
440                if let token::NtIdent(ident, _) | token::NtLifetime(ident, _) = &mut token.kind {
441                    marker.mark_span(&mut ident.span);
442                }
443                let tt = TokenTree::Token(token, Spacing::Alone);
444                result.push(tt);
445            }
446
447            // There should be no meta-var declarations in the invocation of a macro.
448            mbe::TokenTree::MetaVarDecl(..) => panic!("unexpected `TokenTree::MetaVarDecl`"),
449        }
450    }
451}
452
453/// Store the metavariable span for this original span into a side table.
454/// FIXME: Try to put the metavariable span into `SpanData` instead of a side table (#118517).
455/// An optimal encoding for inlined spans will need to be selected to minimize regressions.
456/// The side table approach is relatively good, but not perfect due to collisions.
457/// In particular, collisions happen when token is passed as an argument through several macro
458/// calls, like in recursive macros.
459/// The old heuristic below is used to improve spans in case of collisions, but diagnostics are
460/// still degraded sometimes in those cases.
461///
462/// The old heuristic:
463///
464/// Usually metavariables `$var` produce interpolated tokens, which have an additional place for
465/// keeping both the original span and the metavariable span. For `tt` metavariables that's not the
466/// case however, and there's no place for keeping a second span. So we try to give the single
467/// produced span a location that would be most useful in practice (the hygiene part of the span
468/// must not be changed).
469///
470/// Different locations are useful for different purposes:
471/// - The original location is useful when we need to report a diagnostic for the original token in
472///   isolation, without combining it with any surrounding tokens. This case occurs, but it is not
473///   very common in practice.
474/// - The metavariable location is useful when we need to somehow combine the token span with spans
475///   of its surrounding tokens. This is the most common way to use token spans.
476///
477/// So this function replaces the original location with the metavariable location in all cases
478/// except these two:
479/// - The metavariable is an element of undelimited sequence `$($tt)*`.
480///   These are typically used for passing larger amounts of code, and tokens in that code usually
481///   combine with each other and not with tokens outside of the sequence.
482/// - The metavariable span comes from a different crate, then we prefer the more local span.
483fn maybe_use_metavar_location(
484    psess: &ParseSess,
485    stack: &[Frame<'_>],
486    mut metavar_span: Span,
487    orig_tt: &TokenTree,
488    marker: &mut Marker,
489) -> TokenTree {
490    let undelimited_seq = matches!(
491        stack.last(),
492        Some(Frame {
493            tts: [_],
494            kind: FrameKind::Sequence {
495                sep: None,
496                kleene_op: KleeneOp::ZeroOrMore | KleeneOp::OneOrMore,
497                ..
498            },
499            ..
500        })
501    );
502    if undelimited_seq {
503        // Do not record metavar spans for tokens from undelimited sequences, for perf reasons.
504        return orig_tt.clone();
505    }
506
507    marker.mark_span(&mut metavar_span);
508    let no_collision = match orig_tt {
509        TokenTree::Token(token, ..) => {
510            with_metavar_spans(|mspans| mspans.insert(token.span, metavar_span))
511        }
512        TokenTree::Delimited(dspan, ..) => with_metavar_spans(|mspans| {
513            mspans.insert(dspan.open, metavar_span)
514                && mspans.insert(dspan.close, metavar_span)
515                && mspans.insert(dspan.entire(), metavar_span)
516        }),
517    };
518    if no_collision || psess.source_map().is_imported(metavar_span) {
519        return orig_tt.clone();
520    }
521
522    // Setting metavar spans for the heuristic spans gives better opportunities for combining them
523    // with neighboring spans even despite their different syntactic contexts.
524    match orig_tt {
525        TokenTree::Token(Token { kind, span }, spacing) => {
526            let span = metavar_span.with_ctxt(span.ctxt());
527            with_metavar_spans(|mspans| mspans.insert(span, metavar_span));
528            TokenTree::Token(Token { kind: kind.clone(), span }, *spacing)
529        }
530        TokenTree::Delimited(dspan, dspacing, delimiter, tts) => {
531            let open = metavar_span.with_ctxt(dspan.open.ctxt());
532            let close = metavar_span.with_ctxt(dspan.close.ctxt());
533            with_metavar_spans(|mspans| {
534                mspans.insert(open, metavar_span) && mspans.insert(close, metavar_span)
535            });
536            let dspan = DelimSpan::from_pair(open, close);
537            TokenTree::Delimited(dspan, *dspacing, *delimiter, tts.clone())
538        }
539    }
540}
541
542/// Lookup the meta-var named `ident` and return the matched token tree from the invocation using
543/// the set of matches `interpolations`.
544///
545/// See the definition of `repeats` in the `transcribe` function. `repeats` is used to descend
546/// into the right place in nested matchers. If we attempt to descend too far, the macro writer has
547/// made a mistake, and we return `None`.
548fn lookup_cur_matched<'a>(
549    ident: MacroRulesNormalizedIdent,
550    interpolations: &'a FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
551    repeats: &[(usize, usize)],
552) -> Option<&'a NamedMatch> {
553    interpolations.get(&ident).map(|mut matched| {
554        for &(idx, _) in repeats {
555            match matched {
556                MatchedSingle(_) => break,
557                MatchedSeq(ads) => matched = ads.get(idx).unwrap(),
558            }
559        }
560
561        matched
562    })
563}
564
565/// An accumulator over a TokenTree to be used with `fold`. During transcription, we need to make
566/// sure that the size of each sequence and all of its nested sequences are the same as the sizes
567/// of all the matched (nested) sequences in the macro invocation. If they don't match, somebody
568/// has made a mistake (either the macro writer or caller).
569#[derive(Clone)]
570enum LockstepIterSize {
571    /// No constraints on length of matcher. This is true for any TokenTree variants except a
572    /// `MetaVar` with an actual `MatchedSeq` (as opposed to a `MatchedNonterminal`).
573    Unconstrained,
574
575    /// A `MetaVar` with an actual `MatchedSeq`. The length of the match and the name of the
576    /// meta-var are returned.
577    Constraint(usize, MacroRulesNormalizedIdent),
578
579    /// Two `Constraint`s on the same sequence had different lengths. This is an error.
580    Contradiction(String),
581}
582
583impl LockstepIterSize {
584    /// Find incompatibilities in matcher/invocation sizes.
585    /// - `Unconstrained` is compatible with everything.
586    /// - `Contradiction` is incompatible with everything.
587    /// - `Constraint(len)` is only compatible with other constraints of the same length.
588    fn with(self, other: LockstepIterSize) -> LockstepIterSize {
589        match self {
590            LockstepIterSize::Unconstrained => other,
591            LockstepIterSize::Contradiction(_) => self,
592            LockstepIterSize::Constraint(l_len, l_id) => match other {
593                LockstepIterSize::Unconstrained => self,
594                LockstepIterSize::Contradiction(_) => other,
595                LockstepIterSize::Constraint(r_len, _) if l_len == r_len => self,
596                LockstepIterSize::Constraint(r_len, r_id) => {
597                    let msg = format!(
598                        "meta-variable `{}` repeats {} time{}, but `{}` repeats {} time{}",
599                        l_id,
600                        l_len,
601                        pluralize!(l_len),
602                        r_id,
603                        r_len,
604                        pluralize!(r_len),
605                    );
606                    LockstepIterSize::Contradiction(msg)
607                }
608            },
609        }
610    }
611}
612
613/// Given a `tree`, make sure that all sequences have the same length as the matches for the
614/// appropriate meta-vars in `interpolations`.
615///
616/// Note that if `repeats` does not match the exact correct depth of a meta-var,
617/// `lookup_cur_matched` will return `None`, which is why this still works even in the presence of
618/// multiple nested matcher sequences.
619///
620/// Example: `$($($x $y)+*);+` -- we need to make sure that `x` and `y` repeat the same amount as
621/// each other at the given depth when the macro was invoked. If they don't it might mean they were
622/// declared at depths which weren't equal or there was a compiler bug. For example, if we have 3 repetitions of
623/// the outer sequence and 4 repetitions of the inner sequence for `x`, we should have the same for
624/// `y`; otherwise, we can't transcribe them both at the given depth.
625fn lockstep_iter_size(
626    tree: &mbe::TokenTree,
627    interpolations: &FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
628    repeats: &[(usize, usize)],
629) -> LockstepIterSize {
630    use mbe::TokenTree;
631    match tree {
632        TokenTree::Delimited(.., delimited) => {
633            delimited.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| {
634                size.with(lockstep_iter_size(tt, interpolations, repeats))
635            })
636        }
637        TokenTree::Sequence(_, seq) => {
638            seq.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| {
639                size.with(lockstep_iter_size(tt, interpolations, repeats))
640            })
641        }
642        TokenTree::MetaVar(_, name) | TokenTree::MetaVarDecl(_, name, _) => {
643            let name = MacroRulesNormalizedIdent::new(*name);
644            match lookup_cur_matched(name, interpolations, repeats) {
645                Some(matched) => match matched {
646                    MatchedSingle(_) => LockstepIterSize::Unconstrained,
647                    MatchedSeq(ads) => LockstepIterSize::Constraint(ads.len(), name),
648                },
649                _ => LockstepIterSize::Unconstrained,
650            }
651        }
652        TokenTree::MetaVarExpr(_, expr) => {
653            expr.for_each_metavar(LockstepIterSize::Unconstrained, |lis, ident| {
654                lis.with(lockstep_iter_size(
655                    &TokenTree::MetaVar(ident.span, *ident),
656                    interpolations,
657                    repeats,
658                ))
659            })
660        }
661        TokenTree::Token(..) => LockstepIterSize::Unconstrained,
662    }
663}
664
665/// Used solely by the `count` meta-variable expression, counts the outermost repetitions at a
666/// given optional nested depth.
667///
668/// For example, a macro parameter of `$( { $( $foo:ident ),* } )*` called with `{ a, b } { c }`:
669///
670/// * `[ $( ${count(foo)} ),* ]` will return [2, 1] with a, b = 2 and c = 1
671/// * `[ $( ${count(foo, 0)} ),* ]` will be the same as `[ $( ${count(foo)} ),* ]`
672/// * `[ $( ${count(foo, 1)} ),* ]` will return an error because `${count(foo, 1)}` is
673///   declared inside a single repetition and the index `1` implies two nested repetitions.
674fn count_repetitions<'a>(
675    dcx: DiagCtxtHandle<'a>,
676    depth_user: usize,
677    mut matched: &NamedMatch,
678    repeats: &[(usize, usize)],
679    sp: &DelimSpan,
680) -> PResult<'a, usize> {
681    // Recursively count the number of matches in `matched` at given depth
682    // (or at the top-level of `matched` if no depth is given).
683    fn count<'a>(depth_curr: usize, depth_max: usize, matched: &NamedMatch) -> PResult<'a, usize> {
684        match matched {
685            MatchedSingle(_) => Ok(1),
686            MatchedSeq(named_matches) => {
687                if depth_curr == depth_max {
688                    Ok(named_matches.len())
689                } else {
690                    named_matches.iter().map(|elem| count(depth_curr + 1, depth_max, elem)).sum()
691                }
692            }
693        }
694    }
695
696    /// Maximum depth
697    fn depth(counter: usize, matched: &NamedMatch) -> usize {
698        match matched {
699            MatchedSingle(_) => counter,
700            MatchedSeq(named_matches) => {
701                let rslt = counter + 1;
702                if let Some(elem) = named_matches.first() { depth(rslt, elem) } else { rslt }
703            }
704        }
705    }
706
707    let depth_max = depth(0, matched)
708        .checked_sub(1)
709        .and_then(|el| el.checked_sub(repeats.len()))
710        .unwrap_or_default();
711    if depth_user > depth_max {
712        return Err(out_of_bounds_err(dcx, depth_max + 1, sp.entire(), "count"));
713    }
714
715    // `repeats` records all of the nested levels at which we are currently
716    // matching meta-variables. The meta-var-expr `count($x)` only counts
717    // matches that occur in this "subtree" of the `NamedMatch` where we
718    // are currently transcribing, so we need to descend to that subtree
719    // before we start counting. `matched` contains the various levels of the
720    // tree as we descend, and its final value is the subtree we are currently at.
721    for &(idx, _) in repeats {
722        if let MatchedSeq(ads) = matched {
723            matched = &ads[idx];
724        }
725    }
726
727    if let MatchedSingle(_) = matched {
728        return Err(dcx.create_err(CountRepetitionMisplaced { span: sp.entire() }));
729    }
730
731    count(depth_user, depth_max, matched)
732}
733
734/// Returns a `NamedMatch` item declared on the LHS given an arbitrary [Ident]
735fn matched_from_ident<'ctx, 'interp, 'rslt>(
736    dcx: DiagCtxtHandle<'ctx>,
737    ident: Ident,
738    interp: &'interp FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
739) -> PResult<'ctx, &'rslt NamedMatch>
740where
741    'interp: 'rslt,
742{
743    let span = ident.span;
744    let key = MacroRulesNormalizedIdent::new(ident);
745    interp.get(&key).ok_or_else(|| dcx.create_err(MetaVarExprUnrecognizedVar { span, key }))
746}
747
748/// Used by meta-variable expressions when an user input is out of the actual declared bounds. For
749/// example, index(999999) in an repetition of only three elements.
750fn out_of_bounds_err<'a>(dcx: DiagCtxtHandle<'a>, max: usize, span: Span, ty: &str) -> Diag<'a> {
751    let msg = if max == 0 {
752        format!(
753            "meta-variable expression `{ty}` with depth parameter \
754             must be called inside of a macro repetition"
755        )
756    } else {
757        format!(
758            "depth parameter of meta-variable expression `{ty}` \
759             must be less than {max}"
760        )
761    };
762    dcx.struct_span_err(span, msg)
763}
764
765fn transcribe_metavar_expr<'a>(
766    dcx: DiagCtxtHandle<'a>,
767    expr: &MetaVarExpr,
768    interp: &FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
769    marker: &mut Marker,
770    repeats: &[(usize, usize)],
771    result: &mut Vec<TokenTree>,
772    sp: &DelimSpan,
773    symbol_gallery: &SymbolGallery,
774) -> PResult<'a, ()> {
775    let mut visited_span = || {
776        let mut span = sp.entire();
777        marker.mark_span(&mut span);
778        span
779    };
780    match *expr {
781        MetaVarExpr::Concat(ref elements) => {
782            let mut concatenated = String::new();
783            for element in elements.into_iter() {
784                let symbol = match element {
785                    MetaVarExprConcatElem::Ident(elem) => elem.name,
786                    MetaVarExprConcatElem::Literal(elem) => *elem,
787                    MetaVarExprConcatElem::Var(ident) => {
788                        match matched_from_ident(dcx, *ident, interp)? {
789                            NamedMatch::MatchedSeq(named_matches) => {
790                                let Some((curr_idx, _)) = repeats.last() else {
791                                    return Err(dcx.struct_span_err(sp.entire(), "invalid syntax"));
792                                };
793                                match &named_matches[*curr_idx] {
794                                    // FIXME(c410-f3r) Nested repetitions are unimplemented
795                                    MatchedSeq(_) => unimplemented!(),
796                                    MatchedSingle(pnr) => {
797                                        extract_symbol_from_pnr(dcx, pnr, ident.span)?
798                                    }
799                                }
800                            }
801                            NamedMatch::MatchedSingle(pnr) => {
802                                extract_symbol_from_pnr(dcx, pnr, ident.span)?
803                            }
804                        }
805                    }
806                };
807                concatenated.push_str(symbol.as_str());
808            }
809            let symbol = nfc_normalize(&concatenated);
810            let concatenated_span = visited_span();
811            if !rustc_lexer::is_ident(symbol.as_str()) {
812                return Err(dcx.struct_span_err(
813                    concatenated_span,
814                    "`${concat(..)}` is not generating a valid identifier",
815                ));
816            }
817            symbol_gallery.insert(symbol, concatenated_span);
818            // The current implementation marks the span as coming from the macro regardless of
819            // contexts of the concatenated identifiers but this behavior may change in the
820            // future.
821            result.push(TokenTree::Token(
822                Token::from_ast_ident(Ident::new(symbol, concatenated_span)),
823                Spacing::Alone,
824            ));
825        }
826        MetaVarExpr::Count(original_ident, depth) => {
827            let matched = matched_from_ident(dcx, original_ident, interp)?;
828            let count = count_repetitions(dcx, depth, matched, repeats, sp)?;
829            let tt = TokenTree::token_alone(
830                TokenKind::lit(token::Integer, sym::integer(count), None),
831                visited_span(),
832            );
833            result.push(tt);
834        }
835        MetaVarExpr::Ignore(original_ident) => {
836            // Used to ensure that `original_ident` is present in the LHS
837            let _ = matched_from_ident(dcx, original_ident, interp)?;
838        }
839        MetaVarExpr::Index(depth) => match repeats.iter().nth_back(depth) {
840            Some((index, _)) => {
841                result.push(TokenTree::token_alone(
842                    TokenKind::lit(token::Integer, sym::integer(*index), None),
843                    visited_span(),
844                ));
845            }
846            None => return Err(out_of_bounds_err(dcx, repeats.len(), sp.entire(), "index")),
847        },
848        MetaVarExpr::Len(depth) => match repeats.iter().nth_back(depth) {
849            Some((_, length)) => {
850                result.push(TokenTree::token_alone(
851                    TokenKind::lit(token::Integer, sym::integer(*length), None),
852                    visited_span(),
853                ));
854            }
855            None => return Err(out_of_bounds_err(dcx, repeats.len(), sp.entire(), "len")),
856        },
857    }
858    Ok(())
859}
860
861/// Extracts an metavariable symbol that can be an identifier, a token tree or a literal.
862fn extract_symbol_from_pnr<'a>(
863    dcx: DiagCtxtHandle<'a>,
864    pnr: &ParseNtResult,
865    span_err: Span,
866) -> PResult<'a, Symbol> {
867    match pnr {
868        ParseNtResult::Ident(nt_ident, is_raw) => {
869            if let IdentIsRaw::Yes = is_raw {
870                Err(dcx.struct_span_err(span_err, RAW_IDENT_ERR))
871            } else {
872                Ok(nt_ident.name)
873            }
874        }
875        ParseNtResult::Tt(TokenTree::Token(
876            Token { kind: TokenKind::Ident(symbol, is_raw), .. },
877            _,
878        )) => {
879            if let IdentIsRaw::Yes = is_raw {
880                Err(dcx.struct_span_err(span_err, RAW_IDENT_ERR))
881            } else {
882                Ok(*symbol)
883            }
884        }
885        ParseNtResult::Tt(TokenTree::Token(
886            Token {
887                kind: TokenKind::Literal(Lit { kind: LitKind::Str, symbol, suffix: None }),
888                ..
889            },
890            _,
891        )) => Ok(*symbol),
892        ParseNtResult::Literal(expr)
893            if let ExprKind::Lit(Lit { kind: LitKind::Str, symbol, suffix: None }) = &expr.kind =>
894        {
895            Ok(*symbol)
896        }
897        _ => Err(dcx
898            .struct_err(
899                "metavariables of `${concat(..)}` must be of type `ident`, `literal` or `tt`",
900            )
901            .with_note("currently only string literals are supported")
902            .with_span(span_err)),
903    }
904}