rustc_expand/mbe/transcribe.rs
1use std::mem;
2
3use rustc_ast::token::{
4 self, Delimiter, IdentIsRaw, InvisibleOrigin, Lit, LitKind, MetaVarKind, Token, TokenKind,
5};
6use rustc_ast::tokenstream::{DelimSpacing, DelimSpan, Spacing, TokenStream, TokenTree};
7use rustc_ast::{ExprKind, StmtKind, TyKind, UnOp};
8use rustc_data_structures::fx::FxHashMap;
9use rustc_errors::{Diag, DiagCtxtHandle, PResult, pluralize};
10use rustc_parse::lexer::nfc_normalize;
11use rustc_parse::parser::ParseNtResult;
12use rustc_session::parse::{ParseSess, SymbolGallery};
13use rustc_span::hygiene::{LocalExpnId, Transparency};
14use rustc_span::{
15 Ident, MacroRulesNormalizedIdent, Span, Symbol, SyntaxContext, sym, with_metavar_spans,
16};
17use smallvec::{SmallVec, smallvec};
18
19use crate::errors::{
20 CountRepetitionMisplaced, MetaVarExprUnrecognizedVar, MetaVarsDifSeqMatchers, MustRepeatOnce,
21 NoSyntaxVarsExprRepeat, VarStillRepeating,
22};
23use crate::mbe::macro_parser::NamedMatch;
24use crate::mbe::macro_parser::NamedMatch::*;
25use crate::mbe::metavar_expr::{MetaVarExprConcatElem, RAW_IDENT_ERR};
26use crate::mbe::{self, KleeneOp, MetaVarExpr};
27
28// A Marker adds the given mark to the syntax context.
29struct Marker(LocalExpnId, Transparency, FxHashMap<SyntaxContext, SyntaxContext>);
30
31impl Marker {
32 fn mark_span(&mut self, span: &mut Span) {
33 // `apply_mark` is a relatively expensive operation, both due to taking hygiene lock, and
34 // by itself. All tokens in a macro body typically have the same syntactic context, unless
35 // it's some advanced case with macro-generated macros. So if we cache the marked version
36 // of that context once, we'll typically have a 100% cache hit rate after that.
37 let Marker(expn_id, transparency, ref mut cache) = *self;
38 *span = span.map_ctxt(|ctxt| {
39 *cache
40 .entry(ctxt)
41 .or_insert_with(|| ctxt.apply_mark(expn_id.to_expn_id(), transparency))
42 });
43 }
44}
45
46/// An iterator over the token trees in a delimited token tree (`{ ... }`) or a sequence (`$(...)`).
47struct Frame<'a> {
48 tts: &'a [mbe::TokenTree],
49 idx: usize,
50 kind: FrameKind,
51}
52
53enum FrameKind {
54 Delimited { delim: Delimiter, span: DelimSpan, spacing: DelimSpacing },
55 Sequence { sep: Option<Token>, kleene_op: KleeneOp },
56}
57
58impl<'a> Frame<'a> {
59 fn new_delimited(src: &'a mbe::Delimited, span: DelimSpan, spacing: DelimSpacing) -> Frame<'a> {
60 Frame {
61 tts: &src.tts,
62 idx: 0,
63 kind: FrameKind::Delimited { delim: src.delim, span, spacing },
64 }
65 }
66
67 fn new_sequence(
68 src: &'a mbe::SequenceRepetition,
69 sep: Option<Token>,
70 kleene_op: KleeneOp,
71 ) -> Frame<'a> {
72 Frame { tts: &src.tts, idx: 0, kind: FrameKind::Sequence { sep, kleene_op } }
73 }
74}
75
76impl<'a> Iterator for Frame<'a> {
77 type Item = &'a mbe::TokenTree;
78
79 fn next(&mut self) -> Option<&'a mbe::TokenTree> {
80 let res = self.tts.get(self.idx);
81 self.idx += 1;
82 res
83 }
84}
85
86/// This can do Macro-By-Example transcription.
87/// - `interp` is a map of meta-variables to the tokens (non-terminals) they matched in the
88/// invocation. We are assuming we already know there is a match.
89/// - `src` is the RHS of the MBE, that is, the "example" we are filling in.
90///
91/// For example,
92///
93/// ```rust
94/// macro_rules! foo {
95/// ($id:ident) => { println!("{}", stringify!($id)); }
96/// }
97///
98/// foo!(bar);
99/// ```
100///
101/// `interp` would contain `$id => bar` and `src` would contain `println!("{}", stringify!($id));`.
102///
103/// `transcribe` would return a `TokenStream` containing `println!("{}", stringify!(bar));`.
104///
105/// Along the way, we do some additional error checking.
106pub(super) fn transcribe<'a>(
107 psess: &'a ParseSess,
108 interp: &FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
109 src: &mbe::Delimited,
110 src_span: DelimSpan,
111 transparency: Transparency,
112 expand_id: LocalExpnId,
113) -> PResult<'a, TokenStream> {
114 // Nothing for us to transcribe...
115 if src.tts.is_empty() {
116 return Ok(TokenStream::default());
117 }
118
119 // We descend into the RHS (`src`), expanding things as we go. This stack contains the things
120 // we have yet to expand/are still expanding. We start the stack off with the whole RHS. The
121 // choice of spacing values doesn't matter.
122 let mut stack: SmallVec<[Frame<'_>; 1]> = smallvec![Frame::new_delimited(
123 src,
124 src_span,
125 DelimSpacing::new(Spacing::Alone, Spacing::Alone)
126 )];
127
128 // As we descend in the RHS, we will need to be able to match nested sequences of matchers.
129 // `repeats` keeps track of where we are in matching at each level, with the last element being
130 // the most deeply nested sequence. This is used as a stack.
131 let mut repeats: Vec<(usize, usize)> = Vec::new();
132
133 // `result` contains resulting token stream from the TokenTree we just finished processing. At
134 // the end, this will contain the full result of transcription, but at arbitrary points during
135 // `transcribe`, `result` will contain subsets of the final result.
136 //
137 // Specifically, as we descend into each TokenTree, we will push the existing results onto the
138 // `result_stack` and clear `results`. We will then produce the results of transcribing the
139 // TokenTree into `results`. Then, as we unwind back out of the `TokenTree`, we will pop the
140 // `result_stack` and append `results` too it to produce the new `results` up to that point.
141 //
142 // Thus, if we try to pop the `result_stack` and it is empty, we have reached the top-level
143 // again, and we are done transcribing.
144 let mut result: Vec<TokenTree> = Vec::new();
145 let mut result_stack = Vec::new();
146 let mut marker = Marker(expand_id, transparency, Default::default());
147
148 let dcx = psess.dcx();
149 loop {
150 // Look at the last frame on the stack.
151 // If it still has a TokenTree we have not looked at yet, use that tree.
152 let Some(tree) = stack.last_mut().unwrap().next() else {
153 // This else-case never produces a value for `tree` (it `continue`s or `return`s).
154
155 // Otherwise, if we have just reached the end of a sequence and we can keep repeating,
156 // go back to the beginning of the sequence.
157 let frame = stack.last_mut().unwrap();
158 if let FrameKind::Sequence { sep, .. } = &frame.kind {
159 let (repeat_idx, repeat_len) = repeats.last_mut().unwrap();
160 *repeat_idx += 1;
161 if repeat_idx < repeat_len {
162 frame.idx = 0;
163 if let Some(sep) = sep {
164 result.push(TokenTree::Token(*sep, Spacing::Alone));
165 }
166 continue;
167 }
168 }
169
170 // We are done with the top of the stack. Pop it. Depending on what it was, we do
171 // different things. Note that the outermost item must be the delimited, wrapped RHS
172 // that was passed in originally to `transcribe`.
173 match stack.pop().unwrap().kind {
174 // Done with a sequence. Pop from repeats.
175 FrameKind::Sequence { .. } => {
176 repeats.pop();
177 }
178
179 // We are done processing a Delimited. If this is the top-level delimited, we are
180 // done. Otherwise, we unwind the result_stack to append what we have produced to
181 // any previous results.
182 FrameKind::Delimited { delim, span, mut spacing, .. } => {
183 // Hack to force-insert a space after `]` in certain case.
184 // See discussion of the `hex-literal` crate in #114571.
185 if delim == Delimiter::Bracket {
186 spacing.close = Spacing::Alone;
187 }
188 if result_stack.is_empty() {
189 // No results left to compute! We are back at the top-level.
190 return Ok(TokenStream::new(result));
191 }
192
193 // Step back into the parent Delimited.
194 let tree = TokenTree::Delimited(span, spacing, delim, TokenStream::new(result));
195 result = result_stack.pop().unwrap();
196 result.push(tree);
197 }
198 }
199 continue;
200 };
201
202 // At this point, we know we are in the middle of a TokenTree (the last one on `stack`).
203 // `tree` contains the next `TokenTree` to be processed.
204 match tree {
205 // We are descending into a sequence. We first make sure that the matchers in the RHS
206 // and the matches in `interp` have the same shape. Otherwise, either the caller or the
207 // macro writer has made a mistake.
208 seq @ mbe::TokenTree::Sequence(_, seq_rep) => {
209 match lockstep_iter_size(seq, interp, &repeats) {
210 LockstepIterSize::Unconstrained => {
211 return Err(dcx.create_err(NoSyntaxVarsExprRepeat { span: seq.span() }));
212 }
213
214 LockstepIterSize::Contradiction(msg) => {
215 // FIXME: this really ought to be caught at macro definition time... It
216 // happens when two meta-variables are used in the same repetition in a
217 // sequence, but they come from different sequence matchers and repeat
218 // different amounts.
219 return Err(
220 dcx.create_err(MetaVarsDifSeqMatchers { span: seq.span(), msg })
221 );
222 }
223
224 LockstepIterSize::Constraint(len, _) => {
225 // We do this to avoid an extra clone above. We know that this is a
226 // sequence already.
227 let mbe::TokenTree::Sequence(sp, seq) = seq else { unreachable!() };
228
229 // Is the repetition empty?
230 if len == 0 {
231 if seq.kleene.op == KleeneOp::OneOrMore {
232 // FIXME: this really ought to be caught at macro definition
233 // time... It happens when the Kleene operator in the matcher and
234 // the body for the same meta-variable do not match.
235 return Err(dcx.create_err(MustRepeatOnce { span: sp.entire() }));
236 }
237 } else {
238 // 0 is the initial counter (we have done 0 repetitions so far). `len`
239 // is the total number of repetitions we should generate.
240 repeats.push((0, len));
241
242 // The first time we encounter the sequence we push it to the stack. It
243 // then gets reused (see the beginning of the loop) until we are done
244 // repeating.
245 stack.push(Frame::new_sequence(
246 seq_rep,
247 seq.separator.clone(),
248 seq.kleene.op,
249 ));
250 }
251 }
252 }
253 }
254
255 // Replace the meta-var with the matched token tree from the invocation.
256 &mbe::TokenTree::MetaVar(mut sp, mut original_ident) => {
257 // Find the matched nonterminal from the macro invocation, and use it to replace
258 // the meta-var.
259 //
260 // We use `Spacing::Alone` everywhere here, because that's the conservative choice
261 // and spacing of declarative macros is tricky. E.g. in this macro:
262 // ```
263 // macro_rules! idents {
264 // ($($a:ident,)*) => { stringify!($($a)*) }
265 // }
266 // ```
267 // `$a` has no whitespace after it and will be marked `JointHidden`. If you then
268 // call `idents!(x,y,z,)`, each of `x`, `y`, and `z` will be marked as `Joint`. So
269 // if you choose to use `$x`'s spacing or the identifier's spacing, you'll end up
270 // producing "xyz", which is bad because it effectively merges tokens.
271 // `Spacing::Alone` is the safer option. Fortunately, `space_between` will avoid
272 // some of the unnecessary whitespace.
273 let ident = MacroRulesNormalizedIdent::new(original_ident);
274 if let Some(cur_matched) = lookup_cur_matched(ident, interp, &repeats) {
275 // We wrap the tokens in invisible delimiters, unless they are already wrapped
276 // in invisible delimiters with the same `MetaVarKind`. Because some proc
277 // macros can't handle multiple layers of invisible delimiters of the same
278 // `MetaVarKind`. This loses some span info, though it hopefully won't matter.
279 let mut mk_delimited = |mk_span, mv_kind, mut stream: TokenStream| {
280 if stream.len() == 1 {
281 let tree = stream.iter().next().unwrap();
282 if let TokenTree::Delimited(_, _, delim, inner) = tree
283 && let Delimiter::Invisible(InvisibleOrigin::MetaVar(mvk)) = delim
284 && mv_kind == *mvk
285 {
286 stream = inner.clone();
287 }
288 }
289
290 // Emit as a token stream within `Delimiter::Invisible` to maintain
291 // parsing priorities.
292 marker.mark_span(&mut sp);
293 with_metavar_spans(|mspans| mspans.insert(mk_span, sp));
294 // Both the open delim and close delim get the same span, which covers the
295 // `$foo` in the decl macro RHS.
296 TokenTree::Delimited(
297 DelimSpan::from_single(sp),
298 DelimSpacing::new(Spacing::Alone, Spacing::Alone),
299 Delimiter::Invisible(InvisibleOrigin::MetaVar(mv_kind)),
300 stream,
301 )
302 };
303 let tt = match cur_matched {
304 MatchedSingle(ParseNtResult::Tt(tt)) => {
305 // `tt`s are emitted into the output stream directly as "raw tokens",
306 // without wrapping them into groups. Other variables are emitted into
307 // the output stream as groups with `Delimiter::Invisible` to maintain
308 // parsing priorities.
309 maybe_use_metavar_location(psess, &stack, sp, tt, &mut marker)
310 }
311 MatchedSingle(ParseNtResult::Ident(ident, is_raw)) => {
312 marker.mark_span(&mut sp);
313 with_metavar_spans(|mspans| mspans.insert(ident.span, sp));
314 let kind = token::NtIdent(*ident, *is_raw);
315 TokenTree::token_alone(kind, sp)
316 }
317 MatchedSingle(ParseNtResult::Lifetime(ident, is_raw)) => {
318 marker.mark_span(&mut sp);
319 with_metavar_spans(|mspans| mspans.insert(ident.span, sp));
320 let kind = token::NtLifetime(*ident, *is_raw);
321 TokenTree::token_alone(kind, sp)
322 }
323 MatchedSingle(ParseNtResult::Item(item)) => {
324 mk_delimited(item.span, MetaVarKind::Item, TokenStream::from_ast(item))
325 }
326 MatchedSingle(ParseNtResult::Block(block)) => mk_delimited(
327 block.span,
328 MetaVarKind::Block,
329 TokenStream::from_ast(block),
330 ),
331 MatchedSingle(ParseNtResult::Stmt(stmt)) => {
332 let stream = if let StmtKind::Empty = stmt.kind {
333 // FIXME: Properly collect tokens for empty statements.
334 TokenStream::token_alone(token::Semi, stmt.span)
335 } else {
336 TokenStream::from_ast(stmt)
337 };
338 mk_delimited(stmt.span, MetaVarKind::Stmt, stream)
339 }
340 MatchedSingle(ParseNtResult::Pat(pat, pat_kind)) => mk_delimited(
341 pat.span,
342 MetaVarKind::Pat(*pat_kind),
343 TokenStream::from_ast(pat),
344 ),
345 MatchedSingle(ParseNtResult::Expr(expr, kind)) => {
346 let (can_begin_literal_maybe_minus, can_begin_string_literal) =
347 match &expr.kind {
348 ExprKind::Lit(_) => (true, true),
349 ExprKind::Unary(UnOp::Neg, e)
350 if matches!(&e.kind, ExprKind::Lit(_)) =>
351 {
352 (true, false)
353 }
354 _ => (false, false),
355 };
356 mk_delimited(
357 expr.span,
358 MetaVarKind::Expr {
359 kind: *kind,
360 can_begin_literal_maybe_minus,
361 can_begin_string_literal,
362 },
363 TokenStream::from_ast(expr),
364 )
365 }
366 MatchedSingle(ParseNtResult::Literal(lit)) => {
367 mk_delimited(lit.span, MetaVarKind::Literal, TokenStream::from_ast(lit))
368 }
369 MatchedSingle(ParseNtResult::Ty(ty)) => {
370 let is_path = matches!(&ty.kind, TyKind::Path(None, _path));
371 mk_delimited(
372 ty.span,
373 MetaVarKind::Ty { is_path },
374 TokenStream::from_ast(ty),
375 )
376 }
377 MatchedSingle(ParseNtResult::Meta(attr_item)) => {
378 let has_meta_form = attr_item.meta_kind().is_some();
379 mk_delimited(
380 attr_item.span(),
381 MetaVarKind::Meta { has_meta_form },
382 TokenStream::from_ast(attr_item),
383 )
384 }
385 MatchedSingle(ParseNtResult::Path(path)) => {
386 mk_delimited(path.span, MetaVarKind::Path, TokenStream::from_ast(path))
387 }
388 MatchedSingle(ParseNtResult::Vis(vis)) => {
389 mk_delimited(vis.span, MetaVarKind::Vis, TokenStream::from_ast(vis))
390 }
391 MatchedSeq(..) => {
392 // We were unable to descend far enough. This is an error.
393 return Err(dcx.create_err(VarStillRepeating { span: sp, ident }));
394 }
395 };
396 result.push(tt)
397 } else {
398 // If we aren't able to match the meta-var, we push it back into the result but
399 // with modified syntax context. (I believe this supports nested macros).
400 marker.mark_span(&mut sp);
401 marker.mark_span(&mut original_ident.span);
402 result.push(TokenTree::token_joint_hidden(token::Dollar, sp));
403 result.push(TokenTree::Token(
404 Token::from_ast_ident(original_ident),
405 Spacing::Alone,
406 ));
407 }
408 }
409
410 // Replace meta-variable expressions with the result of their expansion.
411 mbe::TokenTree::MetaVarExpr(sp, expr) => {
412 transcribe_metavar_expr(
413 dcx,
414 expr,
415 interp,
416 &mut marker,
417 &repeats,
418 &mut result,
419 sp,
420 &psess.symbol_gallery,
421 )?;
422 }
423
424 // If we are entering a new delimiter, we push its contents to the `stack` to be
425 // processed, and we push all of the currently produced results to the `result_stack`.
426 // We will produce all of the results of the inside of the `Delimited` and then we will
427 // jump back out of the Delimited, pop the result_stack and add the new results back to
428 // the previous results (from outside the Delimited).
429 &mbe::TokenTree::Delimited(mut span, ref spacing, ref delimited) => {
430 marker.mark_span(&mut span.open);
431 marker.mark_span(&mut span.close);
432 stack.push(Frame::new_delimited(delimited, span, *spacing));
433 result_stack.push(mem::take(&mut result));
434 }
435
436 // Nothing much to do here. Just push the token to the result, being careful to
437 // preserve syntax context.
438 &mbe::TokenTree::Token(mut token) => {
439 marker.mark_span(&mut token.span);
440 if let token::NtIdent(ident, _) | token::NtLifetime(ident, _) = &mut token.kind {
441 marker.mark_span(&mut ident.span);
442 }
443 let tt = TokenTree::Token(token, Spacing::Alone);
444 result.push(tt);
445 }
446
447 // There should be no meta-var declarations in the invocation of a macro.
448 mbe::TokenTree::MetaVarDecl(..) => panic!("unexpected `TokenTree::MetaVarDecl`"),
449 }
450 }
451}
452
453/// Store the metavariable span for this original span into a side table.
454/// FIXME: Try to put the metavariable span into `SpanData` instead of a side table (#118517).
455/// An optimal encoding for inlined spans will need to be selected to minimize regressions.
456/// The side table approach is relatively good, but not perfect due to collisions.
457/// In particular, collisions happen when token is passed as an argument through several macro
458/// calls, like in recursive macros.
459/// The old heuristic below is used to improve spans in case of collisions, but diagnostics are
460/// still degraded sometimes in those cases.
461///
462/// The old heuristic:
463///
464/// Usually metavariables `$var` produce interpolated tokens, which have an additional place for
465/// keeping both the original span and the metavariable span. For `tt` metavariables that's not the
466/// case however, and there's no place for keeping a second span. So we try to give the single
467/// produced span a location that would be most useful in practice (the hygiene part of the span
468/// must not be changed).
469///
470/// Different locations are useful for different purposes:
471/// - The original location is useful when we need to report a diagnostic for the original token in
472/// isolation, without combining it with any surrounding tokens. This case occurs, but it is not
473/// very common in practice.
474/// - The metavariable location is useful when we need to somehow combine the token span with spans
475/// of its surrounding tokens. This is the most common way to use token spans.
476///
477/// So this function replaces the original location with the metavariable location in all cases
478/// except these two:
479/// - The metavariable is an element of undelimited sequence `$($tt)*`.
480/// These are typically used for passing larger amounts of code, and tokens in that code usually
481/// combine with each other and not with tokens outside of the sequence.
482/// - The metavariable span comes from a different crate, then we prefer the more local span.
483fn maybe_use_metavar_location(
484 psess: &ParseSess,
485 stack: &[Frame<'_>],
486 mut metavar_span: Span,
487 orig_tt: &TokenTree,
488 marker: &mut Marker,
489) -> TokenTree {
490 let undelimited_seq = matches!(
491 stack.last(),
492 Some(Frame {
493 tts: [_],
494 kind: FrameKind::Sequence {
495 sep: None,
496 kleene_op: KleeneOp::ZeroOrMore | KleeneOp::OneOrMore,
497 ..
498 },
499 ..
500 })
501 );
502 if undelimited_seq {
503 // Do not record metavar spans for tokens from undelimited sequences, for perf reasons.
504 return orig_tt.clone();
505 }
506
507 marker.mark_span(&mut metavar_span);
508 let no_collision = match orig_tt {
509 TokenTree::Token(token, ..) => {
510 with_metavar_spans(|mspans| mspans.insert(token.span, metavar_span))
511 }
512 TokenTree::Delimited(dspan, ..) => with_metavar_spans(|mspans| {
513 mspans.insert(dspan.open, metavar_span)
514 && mspans.insert(dspan.close, metavar_span)
515 && mspans.insert(dspan.entire(), metavar_span)
516 }),
517 };
518 if no_collision || psess.source_map().is_imported(metavar_span) {
519 return orig_tt.clone();
520 }
521
522 // Setting metavar spans for the heuristic spans gives better opportunities for combining them
523 // with neighboring spans even despite their different syntactic contexts.
524 match orig_tt {
525 TokenTree::Token(Token { kind, span }, spacing) => {
526 let span = metavar_span.with_ctxt(span.ctxt());
527 with_metavar_spans(|mspans| mspans.insert(span, metavar_span));
528 TokenTree::Token(Token { kind: kind.clone(), span }, *spacing)
529 }
530 TokenTree::Delimited(dspan, dspacing, delimiter, tts) => {
531 let open = metavar_span.with_ctxt(dspan.open.ctxt());
532 let close = metavar_span.with_ctxt(dspan.close.ctxt());
533 with_metavar_spans(|mspans| {
534 mspans.insert(open, metavar_span) && mspans.insert(close, metavar_span)
535 });
536 let dspan = DelimSpan::from_pair(open, close);
537 TokenTree::Delimited(dspan, *dspacing, *delimiter, tts.clone())
538 }
539 }
540}
541
542/// Lookup the meta-var named `ident` and return the matched token tree from the invocation using
543/// the set of matches `interpolations`.
544///
545/// See the definition of `repeats` in the `transcribe` function. `repeats` is used to descend
546/// into the right place in nested matchers. If we attempt to descend too far, the macro writer has
547/// made a mistake, and we return `None`.
548fn lookup_cur_matched<'a>(
549 ident: MacroRulesNormalizedIdent,
550 interpolations: &'a FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
551 repeats: &[(usize, usize)],
552) -> Option<&'a NamedMatch> {
553 interpolations.get(&ident).map(|mut matched| {
554 for &(idx, _) in repeats {
555 match matched {
556 MatchedSingle(_) => break,
557 MatchedSeq(ads) => matched = ads.get(idx).unwrap(),
558 }
559 }
560
561 matched
562 })
563}
564
565/// An accumulator over a TokenTree to be used with `fold`. During transcription, we need to make
566/// sure that the size of each sequence and all of its nested sequences are the same as the sizes
567/// of all the matched (nested) sequences in the macro invocation. If they don't match, somebody
568/// has made a mistake (either the macro writer or caller).
569#[derive(Clone)]
570enum LockstepIterSize {
571 /// No constraints on length of matcher. This is true for any TokenTree variants except a
572 /// `MetaVar` with an actual `MatchedSeq` (as opposed to a `MatchedNonterminal`).
573 Unconstrained,
574
575 /// A `MetaVar` with an actual `MatchedSeq`. The length of the match and the name of the
576 /// meta-var are returned.
577 Constraint(usize, MacroRulesNormalizedIdent),
578
579 /// Two `Constraint`s on the same sequence had different lengths. This is an error.
580 Contradiction(String),
581}
582
583impl LockstepIterSize {
584 /// Find incompatibilities in matcher/invocation sizes.
585 /// - `Unconstrained` is compatible with everything.
586 /// - `Contradiction` is incompatible with everything.
587 /// - `Constraint(len)` is only compatible with other constraints of the same length.
588 fn with(self, other: LockstepIterSize) -> LockstepIterSize {
589 match self {
590 LockstepIterSize::Unconstrained => other,
591 LockstepIterSize::Contradiction(_) => self,
592 LockstepIterSize::Constraint(l_len, l_id) => match other {
593 LockstepIterSize::Unconstrained => self,
594 LockstepIterSize::Contradiction(_) => other,
595 LockstepIterSize::Constraint(r_len, _) if l_len == r_len => self,
596 LockstepIterSize::Constraint(r_len, r_id) => {
597 let msg = format!(
598 "meta-variable `{}` repeats {} time{}, but `{}` repeats {} time{}",
599 l_id,
600 l_len,
601 pluralize!(l_len),
602 r_id,
603 r_len,
604 pluralize!(r_len),
605 );
606 LockstepIterSize::Contradiction(msg)
607 }
608 },
609 }
610 }
611}
612
613/// Given a `tree`, make sure that all sequences have the same length as the matches for the
614/// appropriate meta-vars in `interpolations`.
615///
616/// Note that if `repeats` does not match the exact correct depth of a meta-var,
617/// `lookup_cur_matched` will return `None`, which is why this still works even in the presence of
618/// multiple nested matcher sequences.
619///
620/// Example: `$($($x $y)+*);+` -- we need to make sure that `x` and `y` repeat the same amount as
621/// each other at the given depth when the macro was invoked. If they don't it might mean they were
622/// declared at depths which weren't equal or there was a compiler bug. For example, if we have 3 repetitions of
623/// the outer sequence and 4 repetitions of the inner sequence for `x`, we should have the same for
624/// `y`; otherwise, we can't transcribe them both at the given depth.
625fn lockstep_iter_size(
626 tree: &mbe::TokenTree,
627 interpolations: &FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
628 repeats: &[(usize, usize)],
629) -> LockstepIterSize {
630 use mbe::TokenTree;
631 match tree {
632 TokenTree::Delimited(.., delimited) => {
633 delimited.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| {
634 size.with(lockstep_iter_size(tt, interpolations, repeats))
635 })
636 }
637 TokenTree::Sequence(_, seq) => {
638 seq.tts.iter().fold(LockstepIterSize::Unconstrained, |size, tt| {
639 size.with(lockstep_iter_size(tt, interpolations, repeats))
640 })
641 }
642 TokenTree::MetaVar(_, name) | TokenTree::MetaVarDecl(_, name, _) => {
643 let name = MacroRulesNormalizedIdent::new(*name);
644 match lookup_cur_matched(name, interpolations, repeats) {
645 Some(matched) => match matched {
646 MatchedSingle(_) => LockstepIterSize::Unconstrained,
647 MatchedSeq(ads) => LockstepIterSize::Constraint(ads.len(), name),
648 },
649 _ => LockstepIterSize::Unconstrained,
650 }
651 }
652 TokenTree::MetaVarExpr(_, expr) => {
653 expr.for_each_metavar(LockstepIterSize::Unconstrained, |lis, ident| {
654 lis.with(lockstep_iter_size(
655 &TokenTree::MetaVar(ident.span, *ident),
656 interpolations,
657 repeats,
658 ))
659 })
660 }
661 TokenTree::Token(..) => LockstepIterSize::Unconstrained,
662 }
663}
664
665/// Used solely by the `count` meta-variable expression, counts the outermost repetitions at a
666/// given optional nested depth.
667///
668/// For example, a macro parameter of `$( { $( $foo:ident ),* } )*` called with `{ a, b } { c }`:
669///
670/// * `[ $( ${count(foo)} ),* ]` will return [2, 1] with a, b = 2 and c = 1
671/// * `[ $( ${count(foo, 0)} ),* ]` will be the same as `[ $( ${count(foo)} ),* ]`
672/// * `[ $( ${count(foo, 1)} ),* ]` will return an error because `${count(foo, 1)}` is
673/// declared inside a single repetition and the index `1` implies two nested repetitions.
674fn count_repetitions<'a>(
675 dcx: DiagCtxtHandle<'a>,
676 depth_user: usize,
677 mut matched: &NamedMatch,
678 repeats: &[(usize, usize)],
679 sp: &DelimSpan,
680) -> PResult<'a, usize> {
681 // Recursively count the number of matches in `matched` at given depth
682 // (or at the top-level of `matched` if no depth is given).
683 fn count<'a>(depth_curr: usize, depth_max: usize, matched: &NamedMatch) -> PResult<'a, usize> {
684 match matched {
685 MatchedSingle(_) => Ok(1),
686 MatchedSeq(named_matches) => {
687 if depth_curr == depth_max {
688 Ok(named_matches.len())
689 } else {
690 named_matches.iter().map(|elem| count(depth_curr + 1, depth_max, elem)).sum()
691 }
692 }
693 }
694 }
695
696 /// Maximum depth
697 fn depth(counter: usize, matched: &NamedMatch) -> usize {
698 match matched {
699 MatchedSingle(_) => counter,
700 MatchedSeq(named_matches) => {
701 let rslt = counter + 1;
702 if let Some(elem) = named_matches.first() { depth(rslt, elem) } else { rslt }
703 }
704 }
705 }
706
707 let depth_max = depth(0, matched)
708 .checked_sub(1)
709 .and_then(|el| el.checked_sub(repeats.len()))
710 .unwrap_or_default();
711 if depth_user > depth_max {
712 return Err(out_of_bounds_err(dcx, depth_max + 1, sp.entire(), "count"));
713 }
714
715 // `repeats` records all of the nested levels at which we are currently
716 // matching meta-variables. The meta-var-expr `count($x)` only counts
717 // matches that occur in this "subtree" of the `NamedMatch` where we
718 // are currently transcribing, so we need to descend to that subtree
719 // before we start counting. `matched` contains the various levels of the
720 // tree as we descend, and its final value is the subtree we are currently at.
721 for &(idx, _) in repeats {
722 if let MatchedSeq(ads) = matched {
723 matched = &ads[idx];
724 }
725 }
726
727 if let MatchedSingle(_) = matched {
728 return Err(dcx.create_err(CountRepetitionMisplaced { span: sp.entire() }));
729 }
730
731 count(depth_user, depth_max, matched)
732}
733
734/// Returns a `NamedMatch` item declared on the LHS given an arbitrary [Ident]
735fn matched_from_ident<'ctx, 'interp, 'rslt>(
736 dcx: DiagCtxtHandle<'ctx>,
737 ident: Ident,
738 interp: &'interp FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
739) -> PResult<'ctx, &'rslt NamedMatch>
740where
741 'interp: 'rslt,
742{
743 let span = ident.span;
744 let key = MacroRulesNormalizedIdent::new(ident);
745 interp.get(&key).ok_or_else(|| dcx.create_err(MetaVarExprUnrecognizedVar { span, key }))
746}
747
748/// Used by meta-variable expressions when an user input is out of the actual declared bounds. For
749/// example, index(999999) in an repetition of only three elements.
750fn out_of_bounds_err<'a>(dcx: DiagCtxtHandle<'a>, max: usize, span: Span, ty: &str) -> Diag<'a> {
751 let msg = if max == 0 {
752 format!(
753 "meta-variable expression `{ty}` with depth parameter \
754 must be called inside of a macro repetition"
755 )
756 } else {
757 format!(
758 "depth parameter of meta-variable expression `{ty}` \
759 must be less than {max}"
760 )
761 };
762 dcx.struct_span_err(span, msg)
763}
764
765fn transcribe_metavar_expr<'a>(
766 dcx: DiagCtxtHandle<'a>,
767 expr: &MetaVarExpr,
768 interp: &FxHashMap<MacroRulesNormalizedIdent, NamedMatch>,
769 marker: &mut Marker,
770 repeats: &[(usize, usize)],
771 result: &mut Vec<TokenTree>,
772 sp: &DelimSpan,
773 symbol_gallery: &SymbolGallery,
774) -> PResult<'a, ()> {
775 let mut visited_span = || {
776 let mut span = sp.entire();
777 marker.mark_span(&mut span);
778 span
779 };
780 match *expr {
781 MetaVarExpr::Concat(ref elements) => {
782 let mut concatenated = String::new();
783 for element in elements.into_iter() {
784 let symbol = match element {
785 MetaVarExprConcatElem::Ident(elem) => elem.name,
786 MetaVarExprConcatElem::Literal(elem) => *elem,
787 MetaVarExprConcatElem::Var(ident) => {
788 match matched_from_ident(dcx, *ident, interp)? {
789 NamedMatch::MatchedSeq(named_matches) => {
790 let Some((curr_idx, _)) = repeats.last() else {
791 return Err(dcx.struct_span_err(sp.entire(), "invalid syntax"));
792 };
793 match &named_matches[*curr_idx] {
794 // FIXME(c410-f3r) Nested repetitions are unimplemented
795 MatchedSeq(_) => unimplemented!(),
796 MatchedSingle(pnr) => {
797 extract_symbol_from_pnr(dcx, pnr, ident.span)?
798 }
799 }
800 }
801 NamedMatch::MatchedSingle(pnr) => {
802 extract_symbol_from_pnr(dcx, pnr, ident.span)?
803 }
804 }
805 }
806 };
807 concatenated.push_str(symbol.as_str());
808 }
809 let symbol = nfc_normalize(&concatenated);
810 let concatenated_span = visited_span();
811 if !rustc_lexer::is_ident(symbol.as_str()) {
812 return Err(dcx.struct_span_err(
813 concatenated_span,
814 "`${concat(..)}` is not generating a valid identifier",
815 ));
816 }
817 symbol_gallery.insert(symbol, concatenated_span);
818 // The current implementation marks the span as coming from the macro regardless of
819 // contexts of the concatenated identifiers but this behavior may change in the
820 // future.
821 result.push(TokenTree::Token(
822 Token::from_ast_ident(Ident::new(symbol, concatenated_span)),
823 Spacing::Alone,
824 ));
825 }
826 MetaVarExpr::Count(original_ident, depth) => {
827 let matched = matched_from_ident(dcx, original_ident, interp)?;
828 let count = count_repetitions(dcx, depth, matched, repeats, sp)?;
829 let tt = TokenTree::token_alone(
830 TokenKind::lit(token::Integer, sym::integer(count), None),
831 visited_span(),
832 );
833 result.push(tt);
834 }
835 MetaVarExpr::Ignore(original_ident) => {
836 // Used to ensure that `original_ident` is present in the LHS
837 let _ = matched_from_ident(dcx, original_ident, interp)?;
838 }
839 MetaVarExpr::Index(depth) => match repeats.iter().nth_back(depth) {
840 Some((index, _)) => {
841 result.push(TokenTree::token_alone(
842 TokenKind::lit(token::Integer, sym::integer(*index), None),
843 visited_span(),
844 ));
845 }
846 None => return Err(out_of_bounds_err(dcx, repeats.len(), sp.entire(), "index")),
847 },
848 MetaVarExpr::Len(depth) => match repeats.iter().nth_back(depth) {
849 Some((_, length)) => {
850 result.push(TokenTree::token_alone(
851 TokenKind::lit(token::Integer, sym::integer(*length), None),
852 visited_span(),
853 ));
854 }
855 None => return Err(out_of_bounds_err(dcx, repeats.len(), sp.entire(), "len")),
856 },
857 }
858 Ok(())
859}
860
861/// Extracts an metavariable symbol that can be an identifier, a token tree or a literal.
862fn extract_symbol_from_pnr<'a>(
863 dcx: DiagCtxtHandle<'a>,
864 pnr: &ParseNtResult,
865 span_err: Span,
866) -> PResult<'a, Symbol> {
867 match pnr {
868 ParseNtResult::Ident(nt_ident, is_raw) => {
869 if let IdentIsRaw::Yes = is_raw {
870 Err(dcx.struct_span_err(span_err, RAW_IDENT_ERR))
871 } else {
872 Ok(nt_ident.name)
873 }
874 }
875 ParseNtResult::Tt(TokenTree::Token(
876 Token { kind: TokenKind::Ident(symbol, is_raw), .. },
877 _,
878 )) => {
879 if let IdentIsRaw::Yes = is_raw {
880 Err(dcx.struct_span_err(span_err, RAW_IDENT_ERR))
881 } else {
882 Ok(*symbol)
883 }
884 }
885 ParseNtResult::Tt(TokenTree::Token(
886 Token {
887 kind: TokenKind::Literal(Lit { kind: LitKind::Str, symbol, suffix: None }),
888 ..
889 },
890 _,
891 )) => Ok(*symbol),
892 ParseNtResult::Literal(expr)
893 if let ExprKind::Lit(Lit { kind: LitKind::Str, symbol, suffix: None }) = &expr.kind =>
894 {
895 Ok(*symbol)
896 }
897 _ => Err(dcx
898 .struct_err(
899 "metavariables of `${concat(..)}` must be of type `ident`, `literal` or `tt`",
900 )
901 .with_note("currently only string literals are supported")
902 .with_span(span_err)),
903 }
904}