rustc_parse/
lib.rs

1//! The main parser interface.
2
3// tidy-alphabetical-start
4#![allow(rustc::diagnostic_outside_of_impl)]
5#![allow(rustc::untranslatable_diagnostic)]
6#![feature(assert_matches)]
7#![feature(box_patterns)]
8#![feature(debug_closure_helpers)]
9#![feature(default_field_values)]
10#![feature(if_let_guard)]
11#![feature(iter_intersperse)]
12#![recursion_limit = "256"]
13// tidy-alphabetical-end
14
15use std::path::{Path, PathBuf};
16use std::str::Utf8Error;
17use std::sync::Arc;
18
19use rustc_ast as ast;
20use rustc_ast::tokenstream::{DelimSpan, TokenStream};
21use rustc_ast::{AttrItem, Attribute, MetaItemInner, token};
22use rustc_ast_pretty::pprust;
23use rustc_errors::{Diag, EmissionGuarantee, FatalError, PResult, pluralize};
24use rustc_session::parse::ParseSess;
25use rustc_span::source_map::SourceMap;
26use rustc_span::{FileName, SourceFile, Span};
27pub use unicode_normalization::UNICODE_VERSION as UNICODE_NORMALIZATION_VERSION;
28
29pub const MACRO_ARGUMENTS: Option<&str> = Some("macro arguments");
30
31#[macro_use]
32pub mod parser;
33use parser::Parser;
34use rustc_ast::token::Delimiter;
35
36use crate::lexer::StripTokens;
37
38pub mod lexer;
39
40mod errors;
41
42rustc_fluent_macro::fluent_messages! { "../messages.ftl" }
43
44// Unwrap the result if `Ok`, otherwise emit the diagnostics and abort.
45pub fn unwrap_or_emit_fatal<T>(expr: Result<T, Vec<Diag<'_>>>) -> T {
46    match expr {
47        Ok(expr) => expr,
48        Err(errs) => {
49            for err in errs {
50                err.emit();
51            }
52            FatalError.raise()
53        }
54    }
55}
56
57/// Creates a new parser from a source string. On failure, the errors must be consumed via
58/// `unwrap_or_emit_fatal`, `emit`, `cancel`, etc., otherwise a panic will occur when they are
59/// dropped.
60pub fn new_parser_from_source_str(
61    psess: &ParseSess,
62    name: FileName,
63    source: String,
64) -> Result<Parser<'_>, Vec<Diag<'_>>> {
65    let source_file = psess.source_map().new_source_file(name, source);
66    new_parser_from_source_file(psess, source_file, StripTokens::ShebangAndFrontmatter)
67}
68
69/// Creates a new parser from a simple (no shebang, no frontmatter) source string.
70///
71/// On failure, the errors must be consumed via `unwrap_or_emit_fatal`, `emit`, `cancel`,
72/// etc., otherwise a panic will occur when they are dropped.
73pub fn new_parser_from_simple_source_str(
74    psess: &ParseSess,
75    name: FileName,
76    source: String,
77) -> Result<Parser<'_>, Vec<Diag<'_>>> {
78    let source_file = psess.source_map().new_source_file(name, source);
79    new_parser_from_source_file(psess, source_file, StripTokens::Nothing)
80}
81
82/// Creates a new parser from a filename. On failure, the errors must be consumed via
83/// `unwrap_or_emit_fatal`, `emit`, `cancel`, etc., otherwise a panic will occur when they are
84/// dropped.
85///
86/// If a span is given, that is used on an error as the source of the problem.
87pub fn new_parser_from_file<'a>(
88    psess: &'a ParseSess,
89    path: &Path,
90    sp: Option<Span>,
91) -> Result<Parser<'a>, Vec<Diag<'a>>> {
92    let sm = psess.source_map();
93    let source_file = sm.load_file(path).unwrap_or_else(|e| {
94        let msg = format!("couldn't read `{}`: {}", path.display(), e);
95        let mut err = psess.dcx().struct_fatal(msg);
96        if let Ok(contents) = std::fs::read(path)
97            && let Err(utf8err) = String::from_utf8(contents.clone())
98        {
99            utf8_error(
100                sm,
101                &path.display().to_string(),
102                sp,
103                &mut err,
104                utf8err.utf8_error(),
105                &contents,
106            );
107        }
108        if let Some(sp) = sp {
109            err.span(sp);
110        }
111        err.emit();
112    });
113    new_parser_from_source_file(psess, source_file, StripTokens::ShebangAndFrontmatter)
114}
115
116pub fn utf8_error<E: EmissionGuarantee>(
117    sm: &SourceMap,
118    path: &str,
119    sp: Option<Span>,
120    err: &mut Diag<'_, E>,
121    utf8err: Utf8Error,
122    contents: &[u8],
123) {
124    // The file exists, but it wasn't valid UTF-8.
125    let start = utf8err.valid_up_to();
126    let note = format!("invalid utf-8 at byte `{start}`");
127    let msg = if let Some(len) = utf8err.error_len() {
128        format!(
129            "byte{s} `{bytes}` {are} not valid utf-8",
130            bytes = if len == 1 {
131                format!("{:?}", contents[start])
132            } else {
133                format!("{:?}", &contents[start..start + len])
134            },
135            s = pluralize!(len),
136            are = if len == 1 { "is" } else { "are" },
137        )
138    } else {
139        note.clone()
140    };
141    let contents = String::from_utf8_lossy(contents).to_string();
142    let source = sm.new_source_file(PathBuf::from(path).into(), contents);
143    let span = Span::with_root_ctxt(
144        source.normalized_byte_pos(start as u32),
145        source.normalized_byte_pos(start as u32),
146    );
147    if span.is_dummy() {
148        err.note(note);
149    } else {
150        if sp.is_some() {
151            err.span_note(span, msg);
152        } else {
153            err.span(span);
154            err.span_label(span, msg);
155        }
156    }
157}
158
159/// Given a session and a `source_file`, return a parser. Returns any buffered errors from lexing
160/// the initial token stream.
161fn new_parser_from_source_file(
162    psess: &ParseSess,
163    source_file: Arc<SourceFile>,
164    strip_tokens: StripTokens,
165) -> Result<Parser<'_>, Vec<Diag<'_>>> {
166    let end_pos = source_file.end_position();
167    let stream = source_file_to_stream(psess, source_file, None, strip_tokens)?;
168    let mut parser = Parser::new(psess, stream, None);
169    if parser.token == token::Eof {
170        parser.token.span = Span::new(end_pos, end_pos, parser.token.span.ctxt(), None);
171    }
172    Ok(parser)
173}
174
175pub fn source_str_to_stream(
176    psess: &ParseSess,
177    name: FileName,
178    source: String,
179    override_span: Option<Span>,
180) -> Result<TokenStream, Vec<Diag<'_>>> {
181    let source_file = psess.source_map().new_source_file(name, source);
182    // used mainly for `proc_macro` and the likes, not for our parsing purposes, so don't parse
183    // frontmatters as frontmatters, but for compatibility reason still strip the shebang
184    source_file_to_stream(psess, source_file, override_span, StripTokens::Shebang)
185}
186
187/// Given a source file, produces a sequence of token trees. Returns any buffered errors from
188/// parsing the token stream.
189fn source_file_to_stream<'psess>(
190    psess: &'psess ParseSess,
191    source_file: Arc<SourceFile>,
192    override_span: Option<Span>,
193    strip_tokens: StripTokens,
194) -> Result<TokenStream, Vec<Diag<'psess>>> {
195    let src = source_file.src.as_ref().unwrap_or_else(|| {
196        psess.dcx().bug(format!(
197            "cannot lex `source_file` without source: {}",
198            psess.source_map().filename_for_diagnostics(&source_file.name)
199        ));
200    });
201
202    lexer::lex_token_trees(psess, src.as_str(), source_file.start_pos, override_span, strip_tokens)
203}
204
205/// Runs the given subparser `f` on the tokens of the given `attr`'s item.
206pub fn parse_in<'a, T>(
207    psess: &'a ParseSess,
208    tts: TokenStream,
209    name: &'static str,
210    mut f: impl FnMut(&mut Parser<'a>) -> PResult<'a, T>,
211) -> PResult<'a, T> {
212    let mut parser = Parser::new(psess, tts, Some(name));
213    let result = f(&mut parser)?;
214    if parser.token != token::Eof {
215        parser.unexpected()?;
216    }
217    Ok(result)
218}
219
220pub fn fake_token_stream_for_item(psess: &ParseSess, item: &ast::Item) -> TokenStream {
221    let source = pprust::item_to_string(item);
222    let filename = FileName::macro_expansion_source_code(&source);
223    unwrap_or_emit_fatal(source_str_to_stream(psess, filename, source, Some(item.span)))
224}
225
226pub fn fake_token_stream_for_crate(psess: &ParseSess, krate: &ast::Crate) -> TokenStream {
227    let source = pprust::crate_to_string_for_macros(krate);
228    let filename = FileName::macro_expansion_source_code(&source);
229    unwrap_or_emit_fatal(source_str_to_stream(
230        psess,
231        filename,
232        source,
233        Some(krate.spans.inner_span),
234    ))
235}
236
237pub fn parse_cfg_attr(
238    cfg_attr: &Attribute,
239    psess: &ParseSess,
240) -> Option<(MetaItemInner, Vec<(AttrItem, Span)>)> {
241    const CFG_ATTR_GRAMMAR_HELP: &str = "#[cfg_attr(condition, attribute, other_attribute, ...)]";
242    const CFG_ATTR_NOTE_REF: &str = "for more information, visit \
243        <https://doc.rust-lang.org/reference/conditional-compilation.html#the-cfg_attr-attribute>";
244
245    match cfg_attr.get_normal_item().args {
246        ast::AttrArgs::Delimited(ast::DelimArgs { dspan, delim, ref tokens })
247            if !tokens.is_empty() =>
248        {
249            check_cfg_attr_bad_delim(psess, dspan, delim);
250            match parse_in(psess, tokens.clone(), "`cfg_attr` input", |p| p.parse_cfg_attr()) {
251                Ok(r) => return Some(r),
252                Err(e) => {
253                    e.with_help(format!("the valid syntax is `{CFG_ATTR_GRAMMAR_HELP}`"))
254                        .with_note(CFG_ATTR_NOTE_REF)
255                        .emit();
256                }
257            }
258        }
259        _ => {
260            psess.dcx().emit_err(errors::MalformedCfgAttr {
261                span: cfg_attr.span,
262                sugg: CFG_ATTR_GRAMMAR_HELP,
263            });
264        }
265    }
266    None
267}
268
269fn check_cfg_attr_bad_delim(psess: &ParseSess, span: DelimSpan, delim: Delimiter) {
270    if let Delimiter::Parenthesis = delim {
271        return;
272    }
273    psess.dcx().emit_err(errors::CfgAttrBadDelim {
274        span: span.entire(),
275        sugg: errors::MetaBadDelimSugg { open: span.open, close: span.close },
276    });
277}