cargo/util/
frontmatter.rs

1use crate::CargoResult;
2
3#[derive(Debug)]
4pub struct ScriptSource<'s> {
5    shebang: Option<&'s str>,
6    info: Option<&'s str>,
7    frontmatter: Option<&'s str>,
8    content: &'s str,
9}
10
11impl<'s> ScriptSource<'s> {
12    pub fn parse(input: &'s str) -> CargoResult<Self> {
13        let mut source = Self {
14            shebang: None,
15            info: None,
16            frontmatter: None,
17            content: input,
18        };
19
20        if let Some(shebang_end) = strip_shebang(source.content) {
21            let (shebang, content) = source.content.split_at(shebang_end);
22            source.shebang = Some(shebang);
23            source.content = content;
24        }
25
26        let mut rest = source.content;
27
28        // Whitespace may precede a frontmatter but must end with a newline
29        if let Some(nl_end) = strip_ws_lines(rest) {
30            rest = &rest[nl_end..];
31        }
32
33        // Opens with a line that starts with 3 or more `-` followed by an optional identifier
34        const FENCE_CHAR: char = '-';
35        let fence_length = rest
36            .char_indices()
37            .find_map(|(i, c)| (c != FENCE_CHAR).then_some(i))
38            .unwrap_or(rest.len());
39        match fence_length {
40            0 => {
41                return Ok(source);
42            }
43            1 | 2 => {
44                // either not a frontmatter or invalid frontmatter opening
45                anyhow::bail!(
46                    "found {fence_length} `{FENCE_CHAR}` in rust frontmatter, expected at least 3"
47                )
48            }
49            _ => {}
50        }
51        let (fence_pattern, rest) = rest.split_at(fence_length);
52        let Some(info_end_index) = rest.find('\n') else {
53            anyhow::bail!("no closing `{fence_pattern}` found for frontmatter");
54        };
55        let (info, rest) = rest.split_at(info_end_index);
56        let info = info.trim_matches(is_whitespace);
57        if !info.is_empty() {
58            source.info = Some(info);
59        }
60
61        // Ends with a line that starts with a matching number of `-` only followed by whitespace
62        let nl_fence_pattern = format!("\n{fence_pattern}");
63        let Some(frontmatter_nl) = rest.find(&nl_fence_pattern) else {
64            anyhow::bail!("no closing `{fence_pattern}` found for frontmatter");
65        };
66        let frontmatter = &rest[..frontmatter_nl + 1];
67        let frontmatter = frontmatter
68            .strip_prefix('\n')
69            .expect("earlier `found` + `split_at` left us here");
70        source.frontmatter = Some(frontmatter);
71        let rest = &rest[frontmatter_nl + nl_fence_pattern.len()..];
72
73        let (after_closing_fence, rest) = rest.split_once("\n").unwrap_or((rest, ""));
74        let after_closing_fence = after_closing_fence.trim_matches(is_whitespace);
75        if !after_closing_fence.is_empty() {
76            // extra characters beyond the original fence pattern, even if they are extra `-`
77            anyhow::bail!("trailing characters found after frontmatter close");
78        }
79
80        let frontmatter_len = input.len() - rest.len();
81        source.content = &input[frontmatter_len..];
82
83        let repeat = Self::parse(source.content)?;
84        if repeat.frontmatter.is_some() {
85            anyhow::bail!("only one frontmatter is supported");
86        }
87
88        Ok(source)
89    }
90
91    pub fn shebang(&self) -> Option<&'s str> {
92        self.shebang
93    }
94
95    pub fn info(&self) -> Option<&'s str> {
96        self.info
97    }
98
99    pub fn frontmatter(&self) -> Option<&'s str> {
100        self.frontmatter
101    }
102
103    pub fn content(&self) -> &'s str {
104        self.content
105    }
106}
107
108/// Returns the index after the shebang line, if present
109pub fn strip_shebang(input: &str) -> Option<usize> {
110    // See rust-lang/rust's compiler/rustc_lexer/src/lib.rs's `strip_shebang`
111    // Shebang must start with `#!` literally, without any preceding whitespace.
112    // For simplicity we consider any line starting with `#!` a shebang,
113    // regardless of restrictions put on shebangs by specific platforms.
114    if let Some(rest) = input.strip_prefix("#!") {
115        // Ok, this is a shebang but if the next non-whitespace token is `[`,
116        // then it may be valid Rust code, so consider it Rust code.
117        //
118        // NOTE: rustc considers line and block comments to be whitespace but to avoid
119        // any more awareness of Rust grammar, we are excluding it.
120        if !rest.trim_start().starts_with('[') {
121            // No other choice than to consider this a shebang.
122            let newline_end = input.find('\n').map(|pos| pos + 1).unwrap_or(input.len());
123            return Some(newline_end);
124        }
125    }
126    None
127}
128
129/// Returns the index after any lines with only whitespace, if present
130pub fn strip_ws_lines(input: &str) -> Option<usize> {
131    let ws_end = input.find(|c| !is_whitespace(c)).unwrap_or(input.len());
132    if ws_end == 0 {
133        return None;
134    }
135
136    let nl_start = input[0..ws_end].rfind('\n')?;
137    let nl_end = nl_start + 1;
138    Some(nl_end)
139}
140
141/// True if `c` is considered a whitespace according to Rust language definition.
142/// See [Rust language reference](https://doc.rust-lang.org/reference/whitespace.html)
143/// for definitions of these classes.
144///
145/// See rust-lang/rust's compiler/rustc_lexer/src/lib.rs `is_whitespace`
146fn is_whitespace(c: char) -> bool {
147    // This is Pattern_White_Space.
148    //
149    // Note that this set is stable (ie, it doesn't change with different
150    // Unicode versions), so it's ok to just hard-code the values.
151
152    matches!(
153        c,
154        // Usual ASCII suspects
155        '\u{0009}'   // \t
156        | '\u{000A}' // \n
157        | '\u{000B}' // vertical tab
158        | '\u{000C}' // form feed
159        | '\u{000D}' // \r
160        | '\u{0020}' // space
161
162        // NEXT LINE from latin1
163        | '\u{0085}'
164
165        // Bidi markers
166        | '\u{200E}' // LEFT-TO-RIGHT MARK
167        | '\u{200F}' // RIGHT-TO-LEFT MARK
168
169        // Dedicated whitespace characters from Unicode
170        | '\u{2028}' // LINE SEPARATOR
171        | '\u{2029}' // PARAGRAPH SEPARATOR
172    )
173}
174
175#[cfg(test)]
176mod test {
177    use snapbox::assert_data_eq;
178    use snapbox::prelude::*;
179    use snapbox::str;
180
181    use super::*;
182
183    #[track_caller]
184    fn assert_source(source: &str, expected: impl IntoData) {
185        use std::fmt::Write as _;
186
187        let actual = match ScriptSource::parse(source) {
188            Ok(actual) => actual,
189            Err(err) => panic!("unexpected err: {err}"),
190        };
191
192        let mut rendered = String::new();
193        write_optional_field(&mut rendered, "shebang", actual.shebang());
194        write_optional_field(&mut rendered, "info", actual.info());
195        write_optional_field(&mut rendered, "frontmatter", actual.frontmatter());
196        writeln!(&mut rendered, "content: {:?}", actual.content()).unwrap();
197        assert_data_eq!(rendered, expected.raw());
198    }
199
200    fn write_optional_field(writer: &mut dyn std::fmt::Write, field: &str, value: Option<&str>) {
201        if let Some(value) = value {
202            writeln!(writer, "{field}: {value:?}").unwrap();
203        } else {
204            writeln!(writer, "{field}: None").unwrap();
205        }
206    }
207
208    #[track_caller]
209    fn assert_err(
210        result: Result<impl std::fmt::Debug, impl std::fmt::Display>,
211        err: impl IntoData,
212    ) {
213        match result {
214            Ok(d) => panic!("unexpected Ok({d:#?})"),
215            Err(actual) => snapbox::assert_data_eq!(actual.to_string(), err.raw()),
216        }
217    }
218
219    #[test]
220    fn split_default() {
221        assert_source(
222            r#"fn main() {}
223"#,
224            str![[r#"
225shebang: None
226info: None
227frontmatter: None
228content: "fn main() {}\n"
229
230"#]],
231        );
232    }
233
234    #[test]
235    fn split_dependencies() {
236        assert_source(
237            r#"---
238[dependencies]
239time="0.1.25"
240---
241fn main() {}
242"#,
243            str![[r#"
244shebang: None
245info: None
246frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
247content: "fn main() {}\n"
248
249"#]],
250        );
251    }
252
253    #[test]
254    fn split_infostring() {
255        assert_source(
256            r#"---cargo
257[dependencies]
258time="0.1.25"
259---
260fn main() {}
261"#,
262            str![[r#"
263shebang: None
264info: "cargo"
265frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
266content: "fn main() {}\n"
267
268"#]],
269        );
270    }
271
272    #[test]
273    fn split_infostring_whitespace() {
274        assert_source(
275            r#"--- cargo 
276[dependencies]
277time="0.1.25"
278---
279fn main() {}
280"#,
281            str![[r#"
282shebang: None
283info: "cargo"
284frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
285content: "fn main() {}\n"
286
287"#]],
288        );
289    }
290
291    #[test]
292    fn split_shebang() {
293        assert_source(
294            r#"#!/usr/bin/env cargo
295---
296[dependencies]
297time="0.1.25"
298---
299fn main() {}
300"#,
301            str![[r##"
302shebang: "#!/usr/bin/env cargo\n"
303info: None
304frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
305content: "fn main() {}\n"
306
307"##]],
308        );
309    }
310
311    #[test]
312    fn split_crlf() {
313        assert_source(
314            "#!/usr/bin/env cargo\r\n---\r\n[dependencies]\r\ntime=\"0.1.25\"\r\n---\r\nfn main() {}",
315            str![[r##"
316shebang: "#!/usr/bin/env cargo\r\n"
317info: None
318frontmatter: "[dependencies]\r\ntime=\"0.1.25\"\r\n"
319content: "fn main() {}"
320
321"##]],
322        );
323    }
324
325    #[test]
326    fn split_leading_newlines() {
327        assert_source(
328            r#"#!/usr/bin/env cargo
329    
330
331
332---
333[dependencies]
334time="0.1.25"
335---
336
337
338fn main() {}
339"#,
340            str![[r##"
341shebang: "#!/usr/bin/env cargo\n"
342info: None
343frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
344content: "\n\nfn main() {}\n"
345
346"##]],
347        );
348    }
349
350    #[test]
351    fn split_attribute() {
352        assert_source(
353            r#"#[allow(dead_code)]
354---
355[dependencies]
356time="0.1.25"
357---
358fn main() {}
359"#,
360            str![[r##"
361shebang: None
362info: None
363frontmatter: None
364content: "#[allow(dead_code)]\n---\n[dependencies]\ntime=\"0.1.25\"\n---\nfn main() {}\n"
365
366"##]],
367        );
368    }
369
370    #[test]
371    fn split_extra_dash() {
372        assert_source(
373            r#"#!/usr/bin/env cargo
374----------
375[dependencies]
376time="0.1.25"
377----------
378
379fn main() {}"#,
380            str![[r##"
381shebang: "#!/usr/bin/env cargo\n"
382info: None
383frontmatter: "[dependencies]\ntime=\"0.1.25\"\n"
384content: "\nfn main() {}"
385
386"##]],
387        );
388    }
389
390    #[test]
391    fn split_too_few_dashes() {
392        assert_err(
393            ScriptSource::parse(
394                r#"#!/usr/bin/env cargo
395--
396[dependencies]
397time="0.1.25"
398--
399fn main() {}
400"#,
401            ),
402            str!["found 2 `-` in rust frontmatter, expected at least 3"],
403        );
404    }
405
406    #[test]
407    fn split_indent() {
408        assert_source(
409            r#"#!/usr/bin/env cargo
410    ---
411    [dependencies]
412    time="0.1.25"
413    ----
414
415fn main() {}
416"#,
417            str![[r##"
418shebang: "#!/usr/bin/env cargo\n"
419info: None
420frontmatter: None
421content: "    ---\n    [dependencies]\n    time=\"0.1.25\"\n    ----\n\nfn main() {}\n"
422
423"##]],
424        );
425    }
426
427    #[test]
428    fn split_escaped() {
429        assert_source(
430            r#"#!/usr/bin/env cargo
431-----
432---
433---
434-----
435
436fn main() {}
437"#,
438            str![[r##"
439shebang: "#!/usr/bin/env cargo\n"
440info: None
441frontmatter: "---\n---\n"
442content: "\nfn main() {}\n"
443
444"##]],
445        );
446    }
447
448    #[test]
449    fn split_invalid_escaped() {
450        assert_err(
451            ScriptSource::parse(
452                r#"#!/usr/bin/env cargo
453---
454-----
455-----
456---
457
458fn main() {}
459"#,
460            ),
461            str!["trailing characters found after frontmatter close"],
462        );
463    }
464
465    #[test]
466    fn split_dashes_in_body() {
467        assert_source(
468            r#"#!/usr/bin/env cargo
469---
470Hello---
471World
472---
473
474fn main() {}
475"#,
476            str![[r##"
477shebang: "#!/usr/bin/env cargo\n"
478info: None
479frontmatter: "Hello---\nWorld\n"
480content: "\nfn main() {}\n"
481
482"##]],
483        );
484    }
485
486    #[test]
487    fn split_mismatched_dashes() {
488        assert_err(
489            ScriptSource::parse(
490                r#"#!/usr/bin/env cargo
491---
492[dependencies]
493time="0.1.25"
494----
495fn main() {}
496"#,
497            ),
498            str!["trailing characters found after frontmatter close"],
499        );
500    }
501
502    #[test]
503    fn split_missing_close() {
504        assert_err(
505            ScriptSource::parse(
506                r#"#!/usr/bin/env cargo
507---
508[dependencies]
509time="0.1.25"
510fn main() {}
511"#,
512            ),
513            str!["no closing `---` found for frontmatter"],
514        );
515    }
516}