rustc_ast/tokenstream.rs
1//! # Token Streams
2//!
3//! `TokenStream`s represent syntactic objects before they are converted into ASTs.
4//! A `TokenStream` is, roughly speaking, a sequence of [`TokenTree`]s,
5//! which are themselves a single [`Token`] or a `Delimited` subsequence of tokens.
6
7use std::borrow::Cow;
8use std::ops::Range;
9use std::sync::Arc;
10use std::{cmp, fmt, iter, mem};
11
12use rustc_data_structures::stable_hasher::{HashStable, StableHasher};
13use rustc_data_structures::sync;
14use rustc_macros::{Decodable, Encodable, HashStable_Generic, Walkable};
15use rustc_serialize::{Decodable, Encodable};
16use rustc_span::{DUMMY_SP, Span, SpanDecoder, SpanEncoder, Symbol, sym};
17use thin_vec::ThinVec;
18
19use crate::ast::AttrStyle;
20use crate::ast_traits::{HasAttrs, HasTokens};
21use crate::token::{self, Delimiter, Token, TokenKind};
22use crate::{AttrVec, Attribute};
23
24/// Part of a `TokenStream`.
25#[derive(Debug, Clone, PartialEq, Encodable, Decodable, HashStable_Generic)]
26pub enum TokenTree {
27 /// A single token. Should never be `OpenDelim` or `CloseDelim`, because
28 /// delimiters are implicitly represented by `Delimited`.
29 Token(Token, Spacing),
30 /// A delimited sequence of token trees.
31 Delimited(DelimSpan, DelimSpacing, Delimiter, TokenStream),
32}
33
34// Ensure all fields of `TokenTree` are `DynSend` and `DynSync`.
35fn _dummy()
36where
37 Token: sync::DynSend + sync::DynSync,
38 Spacing: sync::DynSend + sync::DynSync,
39 DelimSpan: sync::DynSend + sync::DynSync,
40 Delimiter: sync::DynSend + sync::DynSync,
41 TokenStream: sync::DynSend + sync::DynSync,
42{
43}
44
45impl TokenTree {
46 /// Checks if this `TokenTree` is equal to the other, regardless of span/spacing information.
47 pub fn eq_unspanned(&self, other: &TokenTree) -> bool {
48 match (self, other) {
49 (TokenTree::Token(token, _), TokenTree::Token(token2, _)) => token.kind == token2.kind,
50 (TokenTree::Delimited(.., delim, tts), TokenTree::Delimited(.., delim2, tts2)) => {
51 delim == delim2
52 && tts.len() == tts2.len()
53 && tts.iter().zip(tts2.iter()).all(|(a, b)| a.eq_unspanned(b))
54 }
55 _ => false,
56 }
57 }
58
59 /// Retrieves the `TokenTree`'s span.
60 pub fn span(&self) -> Span {
61 match self {
62 TokenTree::Token(token, _) => token.span,
63 TokenTree::Delimited(sp, ..) => sp.entire(),
64 }
65 }
66
67 /// Create a `TokenTree::Token` with alone spacing.
68 pub fn token_alone(kind: TokenKind, span: Span) -> TokenTree {
69 TokenTree::Token(Token::new(kind, span), Spacing::Alone)
70 }
71
72 /// Create a `TokenTree::Token` with joint spacing.
73 pub fn token_joint(kind: TokenKind, span: Span) -> TokenTree {
74 TokenTree::Token(Token::new(kind, span), Spacing::Joint)
75 }
76
77 /// Create a `TokenTree::Token` with joint-hidden spacing.
78 pub fn token_joint_hidden(kind: TokenKind, span: Span) -> TokenTree {
79 TokenTree::Token(Token::new(kind, span), Spacing::JointHidden)
80 }
81
82 pub fn uninterpolate(&self) -> Cow<'_, TokenTree> {
83 match self {
84 TokenTree::Token(token, spacing) => match token.uninterpolate() {
85 Cow::Owned(token) => Cow::Owned(TokenTree::Token(token, *spacing)),
86 Cow::Borrowed(_) => Cow::Borrowed(self),
87 },
88 _ => Cow::Borrowed(self),
89 }
90 }
91}
92
93/// A lazy version of [`AttrTokenStream`], which defers creation of an actual
94/// `AttrTokenStream` until it is needed.
95#[derive(Clone)]
96pub struct LazyAttrTokenStream(Arc<LazyAttrTokenStreamInner>);
97
98impl LazyAttrTokenStream {
99 pub fn new_direct(stream: AttrTokenStream) -> LazyAttrTokenStream {
100 LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Direct(stream)))
101 }
102
103 pub fn new_pending(
104 start_token: (Token, Spacing),
105 cursor_snapshot: TokenCursor,
106 num_calls: u32,
107 break_last_token: u32,
108 node_replacements: ThinVec<NodeReplacement>,
109 ) -> LazyAttrTokenStream {
110 LazyAttrTokenStream(Arc::new(LazyAttrTokenStreamInner::Pending {
111 start_token,
112 cursor_snapshot,
113 num_calls,
114 break_last_token,
115 node_replacements,
116 }))
117 }
118
119 pub fn to_attr_token_stream(&self) -> AttrTokenStream {
120 self.0.to_attr_token_stream()
121 }
122}
123
124impl fmt::Debug for LazyAttrTokenStream {
125 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
126 write!(f, "LazyAttrTokenStream({:?})", self.to_attr_token_stream())
127 }
128}
129
130impl<S: SpanEncoder> Encodable<S> for LazyAttrTokenStream {
131 fn encode(&self, _s: &mut S) {
132 panic!("Attempted to encode LazyAttrTokenStream");
133 }
134}
135
136impl<D: SpanDecoder> Decodable<D> for LazyAttrTokenStream {
137 fn decode(_d: &mut D) -> Self {
138 panic!("Attempted to decode LazyAttrTokenStream");
139 }
140}
141
142impl<CTX> HashStable<CTX> for LazyAttrTokenStream {
143 fn hash_stable(&self, _hcx: &mut CTX, _hasher: &mut StableHasher) {
144 panic!("Attempted to compute stable hash for LazyAttrTokenStream");
145 }
146}
147
148/// A token range within a `Parser`'s full token stream.
149#[derive(Clone, Debug)]
150pub struct ParserRange(pub Range<u32>);
151
152/// A token range within an individual AST node's (lazy) token stream, i.e.
153/// relative to that node's first token. Distinct from `ParserRange` so the two
154/// kinds of range can't be mixed up.
155#[derive(Clone, Debug)]
156pub struct NodeRange(pub Range<u32>);
157
158/// Indicates a range of tokens that should be replaced by an `AttrsTarget`
159/// (replacement) or be replaced by nothing (deletion). This is used in two
160/// places during token collection.
161///
162/// 1. Replacement. During the parsing of an AST node that may have a
163/// `#[derive]` attribute, when we parse a nested AST node that has `#[cfg]`
164/// or `#[cfg_attr]`, we replace the entire inner AST node with
165/// `FlatToken::AttrsTarget`. This lets us perform eager cfg-expansion on an
166/// `AttrTokenStream`.
167///
168/// 2. Deletion. We delete inner attributes from all collected token streams,
169/// and instead track them through the `attrs` field on the AST node. This
170/// lets us manipulate them similarly to outer attributes. When we create a
171/// `TokenStream`, the inner attributes are inserted into the proper place
172/// in the token stream.
173///
174/// Each replacement starts off in `ParserReplacement` form but is converted to
175/// `NodeReplacement` form when it is attached to a single AST node, via
176/// `LazyAttrTokenStreamImpl`.
177pub type ParserReplacement = (ParserRange, Option<AttrsTarget>);
178
179/// See the comment on `ParserReplacement`.
180pub type NodeReplacement = (NodeRange, Option<AttrsTarget>);
181
182impl NodeRange {
183 // Converts a range within a parser's tokens to a range within a
184 // node's tokens beginning at `start_pos`.
185 //
186 // For example, imagine a parser with 50 tokens in its token stream, a
187 // function that spans `ParserRange(20..40)` and an inner attribute within
188 // that function that spans `ParserRange(30..35)`. We would find the inner
189 // attribute's range within the function's tokens by subtracting 20, which
190 // is the position of the function's start token. This gives
191 // `NodeRange(10..15)`.
192 pub fn new(ParserRange(parser_range): ParserRange, start_pos: u32) -> NodeRange {
193 assert!(!parser_range.is_empty());
194 assert!(parser_range.start >= start_pos);
195 NodeRange((parser_range.start - start_pos)..(parser_range.end - start_pos))
196 }
197}
198
199enum LazyAttrTokenStreamInner {
200 // The token stream has already been produced.
201 Direct(AttrTokenStream),
202
203 // From a value of this type we can reconstruct the `TokenStream` seen by
204 // the `f` callback passed to a call to `Parser::collect_tokens`, by
205 // replaying the getting of the tokens. This saves us producing a
206 // `TokenStream` if it is never needed, e.g. a captured `macro_rules!`
207 // argument that is never passed to a proc macro. In practice, token stream
208 // creation happens rarely compared to calls to `collect_tokens` (see some
209 // statistics in #78736) so we are doing as little up-front work as
210 // possible.
211 //
212 // This also makes `Parser` very cheap to clone, since there is no
213 // intermediate collection buffer to clone.
214 Pending {
215 start_token: (Token, Spacing),
216 cursor_snapshot: TokenCursor,
217 num_calls: u32,
218 break_last_token: u32,
219 node_replacements: ThinVec<NodeReplacement>,
220 },
221}
222
223impl LazyAttrTokenStreamInner {
224 fn to_attr_token_stream(&self) -> AttrTokenStream {
225 match self {
226 LazyAttrTokenStreamInner::Direct(stream) => stream.clone(),
227 LazyAttrTokenStreamInner::Pending {
228 start_token,
229 cursor_snapshot,
230 num_calls,
231 break_last_token,
232 node_replacements,
233 } => {
234 // The token produced by the final call to `{,inlined_}next` was not
235 // actually consumed by the callback. The combination of chaining the
236 // initial token and using `take` produces the desired result - we
237 // produce an empty `TokenStream` if no calls were made, and omit the
238 // final token otherwise.
239 let mut cursor_snapshot = cursor_snapshot.clone();
240 let tokens = iter::once(FlatToken::Token(*start_token))
241 .chain(iter::repeat_with(|| FlatToken::Token(cursor_snapshot.next())))
242 .take(*num_calls as usize);
243
244 if node_replacements.is_empty() {
245 make_attr_token_stream(tokens, *break_last_token)
246 } else {
247 let mut tokens: Vec<_> = tokens.collect();
248 let mut node_replacements = node_replacements.to_vec();
249 node_replacements.sort_by_key(|(range, _)| range.0.start);
250
251 #[cfg(debug_assertions)]
252 for [(node_range, tokens), (next_node_range, next_tokens)] in
253 node_replacements.array_windows()
254 {
255 assert!(
256 node_range.0.end <= next_node_range.0.start
257 || node_range.0.end >= next_node_range.0.end,
258 "Node ranges should be disjoint or nested: ({:?}, {:?}) ({:?}, {:?})",
259 node_range,
260 tokens,
261 next_node_range,
262 next_tokens,
263 );
264 }
265
266 // Process the replace ranges, starting from the highest start
267 // position and working our way back. If have tokens like:
268 //
269 // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
270 //
271 // Then we will generate replace ranges for both
272 // the `#[cfg(FALSE)] field: bool` and the entire
273 // `#[cfg(FALSE)] struct Foo { #[cfg(FALSE)] field: bool }`
274 //
275 // By starting processing from the replace range with the greatest
276 // start position, we ensure that any (outer) replace range which
277 // encloses another (inner) replace range will fully overwrite the
278 // inner range's replacement.
279 for (node_range, target) in node_replacements.into_iter().rev() {
280 assert!(
281 !node_range.0.is_empty(),
282 "Cannot replace an empty node range: {:?}",
283 node_range.0
284 );
285
286 // Replace the tokens in range with zero or one `FlatToken::AttrsTarget`s,
287 // plus enough `FlatToken::Empty`s to fill up the rest of the range. This
288 // keeps the total length of `tokens` constant throughout the replacement
289 // process, allowing us to do all replacements without adjusting indices.
290 let target_len = target.is_some() as usize;
291 tokens.splice(
292 (node_range.0.start as usize)..(node_range.0.end as usize),
293 target.into_iter().map(|target| FlatToken::AttrsTarget(target)).chain(
294 iter::repeat(FlatToken::Empty)
295 .take(node_range.0.len() - target_len),
296 ),
297 );
298 }
299 make_attr_token_stream(tokens.into_iter(), *break_last_token)
300 }
301 }
302 }
303 }
304}
305
306/// A helper struct used when building an `AttrTokenStream` from
307/// a `LazyAttrTokenStream`. Both delimiter and non-delimited tokens
308/// are stored as `FlatToken::Token`. A vector of `FlatToken`s
309/// is then 'parsed' to build up an `AttrTokenStream` with nested
310/// `AttrTokenTree::Delimited` tokens.
311#[derive(Debug, Clone)]
312enum FlatToken {
313 /// A token - this holds both delimiter (e.g. '{' and '}')
314 /// and non-delimiter tokens
315 Token((Token, Spacing)),
316 /// Holds the `AttrsTarget` for an AST node. The `AttrsTarget` is inserted
317 /// directly into the constructed `AttrTokenStream` as an
318 /// `AttrTokenTree::AttrsTarget`.
319 AttrsTarget(AttrsTarget),
320 /// A special 'empty' token that is ignored during the conversion
321 /// to an `AttrTokenStream`. This is used to simplify the
322 /// handling of replace ranges.
323 Empty,
324}
325
326/// An `AttrTokenStream` is similar to a `TokenStream`, but with extra
327/// information about the tokens for attribute targets. This is used
328/// during expansion to perform early cfg-expansion, and to process attributes
329/// during proc-macro invocations.
330#[derive(Clone, Debug, Default, Encodable, Decodable)]
331pub struct AttrTokenStream(pub Arc<Vec<AttrTokenTree>>);
332
333/// Converts a flattened iterator of tokens (including open and close delimiter tokens) into an
334/// `AttrTokenStream`, creating an `AttrTokenTree::Delimited` for each matching pair of open and
335/// close delims.
336fn make_attr_token_stream(
337 iter: impl Iterator<Item = FlatToken>,
338 break_last_token: u32,
339) -> AttrTokenStream {
340 #[derive(Debug)]
341 struct FrameData {
342 // This is `None` for the first frame, `Some` for all others.
343 open_delim_sp: Option<(Delimiter, Span, Spacing)>,
344 inner: Vec<AttrTokenTree>,
345 }
346 // The stack always has at least one element. Storing it separately makes for shorter code.
347 let mut stack_top = FrameData { open_delim_sp: None, inner: vec![] };
348 let mut stack_rest = vec![];
349 for flat_token in iter {
350 match flat_token {
351 FlatToken::Token((token @ Token { kind, span }, spacing)) => {
352 if let Some(delim) = kind.open_delim() {
353 stack_rest.push(mem::replace(
354 &mut stack_top,
355 FrameData { open_delim_sp: Some((delim, span, spacing)), inner: vec![] },
356 ));
357 } else if let Some(delim) = kind.close_delim() {
358 let frame_data = mem::replace(&mut stack_top, stack_rest.pop().unwrap());
359 let (open_delim, open_sp, open_spacing) = frame_data.open_delim_sp.unwrap();
360 assert!(
361 open_delim.eq_ignoring_invisible_origin(&delim),
362 "Mismatched open/close delims: open={open_delim:?} close={span:?}"
363 );
364 let dspan = DelimSpan::from_pair(open_sp, span);
365 let dspacing = DelimSpacing::new(open_spacing, spacing);
366 let stream = AttrTokenStream::new(frame_data.inner);
367 let delimited = AttrTokenTree::Delimited(dspan, dspacing, delim, stream);
368 stack_top.inner.push(delimited);
369 } else {
370 stack_top.inner.push(AttrTokenTree::Token(token, spacing))
371 }
372 }
373 FlatToken::AttrsTarget(target) => {
374 stack_top.inner.push(AttrTokenTree::AttrsTarget(target))
375 }
376 FlatToken::Empty => {}
377 }
378 }
379
380 if break_last_token > 0 {
381 let last_token = stack_top.inner.pop().unwrap();
382 if let AttrTokenTree::Token(last_token, spacing) = last_token {
383 let (unglued, _) = last_token.kind.break_two_token_op(break_last_token).unwrap();
384
385 // Tokens are always ASCII chars, so we can use byte arithmetic here.
386 let mut first_span = last_token.span.shrink_to_lo();
387 first_span =
388 first_span.with_hi(first_span.lo() + rustc_span::BytePos(break_last_token));
389
390 stack_top.inner.push(AttrTokenTree::Token(Token::new(unglued, first_span), spacing));
391 } else {
392 panic!("Unexpected last token {last_token:?}")
393 }
394 }
395 AttrTokenStream::new(stack_top.inner)
396}
397
398/// Like `TokenTree`, but for `AttrTokenStream`.
399#[derive(Clone, Debug, Encodable, Decodable)]
400pub enum AttrTokenTree {
401 Token(Token, Spacing),
402 Delimited(DelimSpan, DelimSpacing, Delimiter, AttrTokenStream),
403 /// Stores the attributes for an attribute target,
404 /// along with the tokens for that attribute target.
405 /// See `AttrsTarget` for more information
406 AttrsTarget(AttrsTarget),
407}
408
409impl AttrTokenStream {
410 pub fn new(tokens: Vec<AttrTokenTree>) -> AttrTokenStream {
411 AttrTokenStream(Arc::new(tokens))
412 }
413
414 /// Converts this `AttrTokenStream` to a plain `Vec<TokenTree>`. During
415 /// conversion, any `AttrTokenTree::AttrsTarget` gets "flattened" back to a
416 /// `TokenStream`, as described in the comment on
417 /// `attrs_and_tokens_to_token_trees`.
418 pub fn to_token_trees(&self) -> Vec<TokenTree> {
419 let mut res = Vec::with_capacity(self.0.len());
420 for tree in self.0.iter() {
421 match tree {
422 AttrTokenTree::Token(inner, spacing) => {
423 res.push(TokenTree::Token(inner.clone(), *spacing));
424 }
425 AttrTokenTree::Delimited(span, spacing, delim, stream) => {
426 res.push(TokenTree::Delimited(
427 *span,
428 *spacing,
429 *delim,
430 TokenStream::new(stream.to_token_trees()),
431 ))
432 }
433 AttrTokenTree::AttrsTarget(target) => {
434 attrs_and_tokens_to_token_trees(&target.attrs, &target.tokens, &mut res);
435 }
436 }
437 }
438 res
439 }
440}
441
442// Converts multiple attributes and the tokens for a target AST node into token trees, and appends
443// them to `res`.
444//
445// Example: if the AST node is "fn f() { blah(); }", then:
446// - Simple if no attributes are present, e.g. "fn f() { blah(); }"
447// - Simple if only outer attribute are present, e.g. "#[outer1] #[outer2] fn f() { blah(); }"
448// - Trickier if inner attributes are present, because they must be moved within the AST node's
449// tokens, e.g. "#[outer] fn f() { #![inner] blah() }"
450fn attrs_and_tokens_to_token_trees(
451 attrs: &[Attribute],
452 target_tokens: &LazyAttrTokenStream,
453 res: &mut Vec<TokenTree>,
454) {
455 let idx = attrs.partition_point(|attr| matches!(attr.style, crate::AttrStyle::Outer));
456 let (outer_attrs, inner_attrs) = attrs.split_at(idx);
457
458 // Add outer attribute tokens.
459 for attr in outer_attrs {
460 res.extend(attr.token_trees());
461 }
462
463 // Add target AST node tokens.
464 res.extend(target_tokens.to_attr_token_stream().to_token_trees());
465
466 // Insert inner attribute tokens.
467 if !inner_attrs.is_empty() {
468 let found = insert_inner_attrs(inner_attrs, res);
469 assert!(found, "Failed to find trailing delimited group in: {res:?}");
470 }
471
472 // Inner attributes are only supported on blocks, functions, impls, and
473 // modules. All of these have their inner attributes placed at the
474 // beginning of the rightmost outermost braced group:
475 // e.g. `fn foo() { #![my_attr] }`. (Note: the braces may be within
476 // invisible delimiters.)
477 //
478 // Therefore, we can insert them back into the right location without
479 // needing to do any extra position tracking.
480 //
481 // Note: Outline modules are an exception - they can have attributes like
482 // `#![my_attr]` at the start of a file. Support for custom attributes in
483 // this position is not properly implemented - we always synthesize fake
484 // tokens, so we never reach this code.
485 fn insert_inner_attrs(inner_attrs: &[Attribute], tts: &mut Vec<TokenTree>) -> bool {
486 for tree in tts.iter_mut().rev() {
487 if let TokenTree::Delimited(span, spacing, Delimiter::Brace, stream) = tree {
488 // Found it: the rightmost, outermost braced group.
489 let mut tts = vec![];
490 for inner_attr in inner_attrs {
491 tts.extend(inner_attr.token_trees());
492 }
493 tts.extend(stream.0.iter().cloned());
494 let stream = TokenStream::new(tts);
495 *tree = TokenTree::Delimited(*span, *spacing, Delimiter::Brace, stream);
496 return true;
497 } else if let TokenTree::Delimited(span, spacing, Delimiter::Invisible(src), stream) =
498 tree
499 {
500 // Recurse inside invisible delimiters.
501 let mut vec: Vec<_> = stream.iter().cloned().collect();
502 if insert_inner_attrs(inner_attrs, &mut vec) {
503 *tree = TokenTree::Delimited(
504 *span,
505 *spacing,
506 Delimiter::Invisible(*src),
507 TokenStream::new(vec),
508 );
509 return true;
510 }
511 }
512 }
513 false
514 }
515}
516
517/// Stores the tokens for an attribute target, along
518/// with its attributes.
519///
520/// This is constructed during parsing when we need to capture
521/// tokens, for `cfg` and `cfg_attr` attributes.
522///
523/// For example, `#[cfg(FALSE)] struct Foo {}` would
524/// have an `attrs` field containing the `#[cfg(FALSE)]` attr,
525/// and a `tokens` field storing the (unparsed) tokens `struct Foo {}`
526///
527/// The `cfg`/`cfg_attr` processing occurs in
528/// `StripUnconfigured::configure_tokens`.
529#[derive(Clone, Debug, Encodable, Decodable)]
530pub struct AttrsTarget {
531 /// Attributes, both outer and inner.
532 /// These are stored in the original order that they were parsed in.
533 pub attrs: AttrVec,
534 /// The underlying tokens for the attribute target that `attrs`
535 /// are applied to
536 pub tokens: LazyAttrTokenStream,
537}
538
539/// Indicates whether a token can join with the following token to form a
540/// compound token. Used for conversions to `proc_macro::Spacing`. Also used to
541/// guide pretty-printing, which is where the `JointHidden` value (which isn't
542/// part of `proc_macro::Spacing`) comes in useful.
543#[derive(Clone, Copy, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
544pub enum Spacing {
545 /// The token cannot join with the following token to form a compound
546 /// token.
547 ///
548 /// In token streams parsed from source code, the compiler will use `Alone`
549 /// for any token immediately followed by whitespace, a non-doc comment, or
550 /// EOF.
551 ///
552 /// When constructing token streams within the compiler, use this for each
553 /// token that (a) should be pretty-printed with a space after it, or (b)
554 /// is the last token in the stream. (In the latter case the choice of
555 /// spacing doesn't matter because it is never used for the last token. We
556 /// arbitrarily use `Alone`.)
557 ///
558 /// Converts to `proc_macro::Spacing::Alone`, and
559 /// `proc_macro::Spacing::Alone` converts back to this.
560 Alone,
561
562 /// The token can join with the following token to form a compound token.
563 ///
564 /// In token streams parsed from source code, the compiler will use `Joint`
565 /// for any token immediately followed by punctuation (as determined by
566 /// `Token::is_punct`).
567 ///
568 /// When constructing token streams within the compiler, use this for each
569 /// token that (a) should be pretty-printed without a space after it, and
570 /// (b) is followed by a punctuation token.
571 ///
572 /// Converts to `proc_macro::Spacing::Joint`, and
573 /// `proc_macro::Spacing::Joint` converts back to this.
574 Joint,
575
576 /// The token can join with the following token to form a compound token,
577 /// but this will not be visible at the proc macro level. (This is what the
578 /// `Hidden` means; see below.)
579 ///
580 /// In token streams parsed from source code, the compiler will use
581 /// `JointHidden` for any token immediately followed by anything not
582 /// covered by the `Alone` and `Joint` cases: an identifier, lifetime,
583 /// literal, delimiter, doc comment.
584 ///
585 /// When constructing token streams, use this for each token that (a)
586 /// should be pretty-printed without a space after it, and (b) is followed
587 /// by a non-punctuation token.
588 ///
589 /// Converts to `proc_macro::Spacing::Alone`, but
590 /// `proc_macro::Spacing::Alone` converts back to `token::Spacing::Alone`.
591 /// Because of that, pretty-printing of `TokenStream`s produced by proc
592 /// macros is unavoidably uglier (with more whitespace between tokens) than
593 /// pretty-printing of `TokenStream`'s produced by other means (i.e. parsed
594 /// source code, internally constructed token streams, and token streams
595 /// produced by declarative macros).
596 JointHidden,
597}
598
599/// A `TokenStream` is an abstract sequence of tokens, organized into [`TokenTree`]s.
600#[derive(Clone, Debug, Default, Encodable, Decodable)]
601pub struct TokenStream(pub(crate) Arc<Vec<TokenTree>>);
602
603impl TokenStream {
604 pub fn new(tts: Vec<TokenTree>) -> TokenStream {
605 TokenStream(Arc::new(tts))
606 }
607
608 pub fn is_empty(&self) -> bool {
609 self.0.is_empty()
610 }
611
612 pub fn len(&self) -> usize {
613 self.0.len()
614 }
615
616 pub fn get(&self, index: usize) -> Option<&TokenTree> {
617 self.0.get(index)
618 }
619
620 pub fn iter(&self) -> TokenStreamIter<'_> {
621 TokenStreamIter::new(self)
622 }
623
624 /// Create a token stream containing a single token with alone spacing. The
625 /// spacing used for the final token in a constructed stream doesn't matter
626 /// because it's never used. In practice we arbitrarily use
627 /// `Spacing::Alone`.
628 pub fn token_alone(kind: TokenKind, span: Span) -> TokenStream {
629 TokenStream::new(vec![TokenTree::token_alone(kind, span)])
630 }
631
632 pub fn from_ast(node: &(impl HasAttrs + HasTokens + fmt::Debug)) -> TokenStream {
633 let tokens = node.tokens().unwrap_or_else(|| panic!("missing tokens for node: {:?}", node));
634 let mut tts = vec![];
635 attrs_and_tokens_to_token_trees(node.attrs(), tokens, &mut tts);
636 TokenStream::new(tts)
637 }
638
639 // If `vec` is not empty, try to glue `tt` onto its last token. The return
640 // value indicates if gluing took place.
641 fn try_glue_to_last(vec: &mut Vec<TokenTree>, tt: &TokenTree) -> bool {
642 if let Some(TokenTree::Token(last_tok, Spacing::Joint | Spacing::JointHidden)) = vec.last()
643 && let TokenTree::Token(tok, spacing) = tt
644 && let Some(glued_tok) = last_tok.glue(tok)
645 {
646 // ...then overwrite the last token tree in `vec` with the
647 // glued token, and skip the first token tree from `stream`.
648 *vec.last_mut().unwrap() = TokenTree::Token(glued_tok, *spacing);
649 true
650 } else {
651 false
652 }
653 }
654
655 /// Push `tt` onto the end of the stream, possibly gluing it to the last
656 /// token. Uses `make_mut` to maximize efficiency.
657 pub fn push_tree(&mut self, tt: TokenTree) {
658 let vec_mut = Arc::make_mut(&mut self.0);
659
660 if Self::try_glue_to_last(vec_mut, &tt) {
661 // nothing else to do
662 } else {
663 vec_mut.push(tt);
664 }
665 }
666
667 /// Push `stream` onto the end of the stream, possibly gluing the first
668 /// token tree to the last token. (No other token trees will be glued.)
669 /// Uses `make_mut` to maximize efficiency.
670 pub fn push_stream(&mut self, stream: TokenStream) {
671 let vec_mut = Arc::make_mut(&mut self.0);
672
673 let stream_iter = stream.0.iter().cloned();
674
675 if let Some(first) = stream.0.first()
676 && Self::try_glue_to_last(vec_mut, first)
677 {
678 // Now skip the first token tree from `stream`.
679 vec_mut.extend(stream_iter.skip(1));
680 } else {
681 // Append all of `stream`.
682 vec_mut.extend(stream_iter);
683 }
684 }
685
686 pub fn chunks(&self, chunk_size: usize) -> core::slice::Chunks<'_, TokenTree> {
687 self.0.chunks(chunk_size)
688 }
689
690 /// Desugar doc comments like `/// foo` in the stream into `#[doc =
691 /// r"foo"]`. Modifies the `TokenStream` via `Arc::make_mut`, but as little
692 /// as possible.
693 pub fn desugar_doc_comments(&mut self) {
694 if let Some(desugared_stream) = desugar_inner(self.clone()) {
695 *self = desugared_stream;
696 }
697
698 // The return value is `None` if nothing in `stream` changed.
699 fn desugar_inner(mut stream: TokenStream) -> Option<TokenStream> {
700 let mut i = 0;
701 let mut modified = false;
702 while let Some(tt) = stream.0.get(i) {
703 match tt {
704 &TokenTree::Token(
705 Token { kind: token::DocComment(_, attr_style, data), span },
706 _spacing,
707 ) => {
708 let desugared = desugared_tts(attr_style, data, span);
709 let desugared_len = desugared.len();
710 Arc::make_mut(&mut stream.0).splice(i..i + 1, desugared);
711 modified = true;
712 i += desugared_len;
713 }
714
715 &TokenTree::Token(..) => i += 1,
716
717 &TokenTree::Delimited(sp, spacing, delim, ref delim_stream) => {
718 if let Some(desugared_delim_stream) = desugar_inner(delim_stream.clone()) {
719 let new_tt =
720 TokenTree::Delimited(sp, spacing, delim, desugared_delim_stream);
721 Arc::make_mut(&mut stream.0)[i] = new_tt;
722 modified = true;
723 }
724 i += 1;
725 }
726 }
727 }
728 if modified { Some(stream) } else { None }
729 }
730
731 fn desugared_tts(attr_style: AttrStyle, data: Symbol, span: Span) -> Vec<TokenTree> {
732 // Searches for the occurrences of `"#*` and returns the minimum number of `#`s
733 // required to wrap the text. E.g.
734 // - `abc d` is wrapped as `r"abc d"` (num_of_hashes = 0)
735 // - `abc "d"` is wrapped as `r#"abc "d""#` (num_of_hashes = 1)
736 // - `abc "##d##"` is wrapped as `r###"abc ##"d"##"###` (num_of_hashes = 3)
737 let mut num_of_hashes = 0;
738 let mut count = 0;
739 for ch in data.as_str().chars() {
740 count = match ch {
741 '"' => 1,
742 '#' if count > 0 => count + 1,
743 _ => 0,
744 };
745 num_of_hashes = cmp::max(num_of_hashes, count);
746 }
747
748 // `/// foo` becomes `[doc = r"foo"]`.
749 let delim_span = DelimSpan::from_single(span);
750 let body = TokenTree::Delimited(
751 delim_span,
752 DelimSpacing::new(Spacing::JointHidden, Spacing::Alone),
753 Delimiter::Bracket,
754 [
755 TokenTree::token_alone(token::Ident(sym::doc, token::IdentIsRaw::No), span),
756 TokenTree::token_alone(token::Eq, span),
757 TokenTree::token_alone(
758 TokenKind::lit(token::StrRaw(num_of_hashes), data, None),
759 span,
760 ),
761 ]
762 .into_iter()
763 .collect::<TokenStream>(),
764 );
765
766 if attr_style == AttrStyle::Inner {
767 vec![
768 TokenTree::token_joint(token::Pound, span),
769 TokenTree::token_joint_hidden(token::Bang, span),
770 body,
771 ]
772 } else {
773 vec![TokenTree::token_joint_hidden(token::Pound, span), body]
774 }
775 }
776 }
777
778 /// Given a `TokenStream` with a `Stream` of only two arguments, return a new `TokenStream`
779 /// separating the two arguments with a comma for diagnostic suggestions.
780 pub fn add_comma(&self) -> Option<(TokenStream, Span)> {
781 // Used to suggest if a user writes `foo!(a b);`
782 let mut suggestion = None;
783 let mut iter = self.0.iter().enumerate().peekable();
784 while let Some((pos, ts)) = iter.next() {
785 if let Some((_, next)) = iter.peek() {
786 let sp = match (&ts, &next) {
787 (_, TokenTree::Token(Token { kind: token::Comma, .. }, _)) => continue,
788 (
789 TokenTree::Token(token_left, Spacing::Alone),
790 TokenTree::Token(token_right, _),
791 ) if (token_left.is_non_reserved_ident() || token_left.is_lit())
792 && (token_right.is_non_reserved_ident() || token_right.is_lit()) =>
793 {
794 token_left.span
795 }
796 (TokenTree::Delimited(sp, ..), _) => sp.entire(),
797 _ => continue,
798 };
799 let sp = sp.shrink_to_hi();
800 let comma = TokenTree::token_alone(token::Comma, sp);
801 suggestion = Some((pos, comma, sp));
802 }
803 }
804 if let Some((pos, comma, sp)) = suggestion {
805 let mut new_stream = Vec::with_capacity(self.0.len() + 1);
806 let parts = self.0.split_at(pos + 1);
807 new_stream.extend_from_slice(parts.0);
808 new_stream.push(comma);
809 new_stream.extend_from_slice(parts.1);
810 return Some((TokenStream::new(new_stream), sp));
811 }
812 None
813 }
814}
815
816impl PartialEq<TokenStream> for TokenStream {
817 fn eq(&self, other: &TokenStream) -> bool {
818 self.iter().eq(other.iter())
819 }
820}
821
822impl Eq for TokenStream {}
823
824impl FromIterator<TokenTree> for TokenStream {
825 fn from_iter<I: IntoIterator<Item = TokenTree>>(iter: I) -> Self {
826 TokenStream::new(iter.into_iter().collect::<Vec<TokenTree>>())
827 }
828}
829
830impl<CTX> HashStable<CTX> for TokenStream
831where
832 CTX: crate::HashStableContext,
833{
834 fn hash_stable(&self, hcx: &mut CTX, hasher: &mut StableHasher) {
835 for sub_tt in self.iter() {
836 sub_tt.hash_stable(hcx, hasher);
837 }
838 }
839}
840
841#[derive(Clone)]
842pub struct TokenStreamIter<'t> {
843 stream: &'t TokenStream,
844 index: usize,
845}
846
847impl<'t> TokenStreamIter<'t> {
848 fn new(stream: &'t TokenStream) -> Self {
849 TokenStreamIter { stream, index: 0 }
850 }
851
852 // Peeking could be done via `Peekable`, but most iterators need peeking,
853 // and this is simple and avoids the need to use `peekable` and `Peekable`
854 // at all the use sites.
855 pub fn peek(&self) -> Option<&'t TokenTree> {
856 self.stream.0.get(self.index)
857 }
858}
859
860impl<'t> Iterator for TokenStreamIter<'t> {
861 type Item = &'t TokenTree;
862
863 fn next(&mut self) -> Option<&'t TokenTree> {
864 self.stream.0.get(self.index).map(|tree| {
865 self.index += 1;
866 tree
867 })
868 }
869}
870
871#[derive(Clone, Debug)]
872pub struct TokenTreeCursor {
873 stream: TokenStream,
874 /// Points to the current token tree in the stream. In `TokenCursor::curr`,
875 /// this can be any token tree. In `TokenCursor::stack`, this is always a
876 /// `TokenTree::Delimited`.
877 index: usize,
878}
879
880impl TokenTreeCursor {
881 #[inline]
882 pub fn new(stream: TokenStream) -> Self {
883 TokenTreeCursor { stream, index: 0 }
884 }
885
886 #[inline]
887 pub fn curr(&self) -> Option<&TokenTree> {
888 self.stream.get(self.index)
889 }
890
891 pub fn look_ahead(&self, n: usize) -> Option<&TokenTree> {
892 self.stream.get(self.index + n)
893 }
894
895 #[inline]
896 pub fn bump(&mut self) {
897 self.index += 1;
898 }
899
900 // For skipping ahead in rare circumstances.
901 #[inline]
902 pub fn bump_to_end(&mut self) {
903 self.index = self.stream.len();
904 }
905}
906
907/// A `TokenStream` cursor that produces `Token`s. It's a bit odd that
908/// we (a) lex tokens into a nice tree structure (`TokenStream`), and then (b)
909/// use this type to emit them as a linear sequence. But a linear sequence is
910/// what the parser expects, for the most part.
911#[derive(Clone, Debug)]
912pub struct TokenCursor {
913 // Cursor for the current (innermost) token stream. The index within the
914 // cursor can point to any token tree in the stream (or one past the end).
915 // The delimiters for this token stream are found in `self.stack.last()`;
916 // if that is `None` we are in the outermost token stream which never has
917 // delimiters.
918 pub curr: TokenTreeCursor,
919
920 // Token streams surrounding the current one. The index within each cursor
921 // always points to a `TokenTree::Delimited`.
922 pub stack: Vec<TokenTreeCursor>,
923}
924
925impl TokenCursor {
926 pub fn next(&mut self) -> (Token, Spacing) {
927 self.inlined_next()
928 }
929
930 /// This always-inlined version should only be used on hot code paths.
931 #[inline(always)]
932 pub fn inlined_next(&mut self) -> (Token, Spacing) {
933 loop {
934 // FIXME: we currently don't return `Delimiter::Invisible` open/close delims. To fix
935 // #67062 we will need to, whereupon the `delim != Delimiter::Invisible` conditions
936 // below can be removed.
937 if let Some(tree) = self.curr.curr() {
938 match tree {
939 &TokenTree::Token(token, spacing) => {
940 debug_assert!(!token.kind.is_delim());
941 let res = (token, spacing);
942 self.curr.bump();
943 return res;
944 }
945 &TokenTree::Delimited(sp, spacing, delim, ref tts) => {
946 let trees = TokenTreeCursor::new(tts.clone());
947 self.stack.push(mem::replace(&mut self.curr, trees));
948 if !delim.skip() {
949 return (Token::new(delim.as_open_token_kind(), sp.open), spacing.open);
950 }
951 // No open delimiter to return; continue on to the next iteration.
952 }
953 };
954 } else if let Some(parent) = self.stack.pop() {
955 // We have exhausted this token stream. Move back to its parent token stream.
956 let Some(&TokenTree::Delimited(span, spacing, delim, _)) = parent.curr() else {
957 panic!("parent should be Delimited")
958 };
959 self.curr = parent;
960 self.curr.bump(); // move past the `Delimited`
961 if !delim.skip() {
962 return (Token::new(delim.as_close_token_kind(), span.close), spacing.close);
963 }
964 // No close delimiter to return; continue on to the next iteration.
965 } else {
966 // We have exhausted the outermost token stream. The use of
967 // `Spacing::Alone` is arbitrary and immaterial, because the
968 // `Eof` token's spacing is never used.
969 return (Token::new(token::Eof, DUMMY_SP), Spacing::Alone);
970 }
971 }
972 }
973}
974
975#[derive(Debug, Copy, Clone, PartialEq, Encodable, Decodable, HashStable_Generic, Walkable)]
976pub struct DelimSpan {
977 pub open: Span,
978 pub close: Span,
979}
980
981impl DelimSpan {
982 pub fn from_single(sp: Span) -> Self {
983 DelimSpan { open: sp, close: sp }
984 }
985
986 pub fn from_pair(open: Span, close: Span) -> Self {
987 DelimSpan { open, close }
988 }
989
990 pub fn dummy() -> Self {
991 Self::from_single(DUMMY_SP)
992 }
993
994 pub fn entire(self) -> Span {
995 self.open.with_hi(self.close.hi())
996 }
997}
998
999#[derive(Copy, Clone, Debug, PartialEq, Encodable, Decodable, HashStable_Generic)]
1000pub struct DelimSpacing {
1001 pub open: Spacing,
1002 pub close: Spacing,
1003}
1004
1005impl DelimSpacing {
1006 pub fn new(open: Spacing, close: Spacing) -> DelimSpacing {
1007 DelimSpacing { open, close }
1008 }
1009}
1010
1011// Some types are used a lot. Make sure they don't unintentionally get bigger.
1012#[cfg(target_pointer_width = "64")]
1013mod size_asserts {
1014 use rustc_data_structures::static_assert_size;
1015
1016 use super::*;
1017 // tidy-alphabetical-start
1018 static_assert_size!(AttrTokenStream, 8);
1019 static_assert_size!(AttrTokenTree, 32);
1020 static_assert_size!(LazyAttrTokenStream, 8);
1021 static_assert_size!(LazyAttrTokenStreamInner, 88);
1022 static_assert_size!(Option<LazyAttrTokenStream>, 8); // must be small, used in many AST nodes
1023 static_assert_size!(TokenStream, 8);
1024 static_assert_size!(TokenTree, 32);
1025 // tidy-alphabetical-end
1026}