cargo/ops/
vendor.rs

1use crate::core::shell::Verbosity;
2use crate::core::SourceId;
3use crate::core::{GitReference, Package, Workspace};
4use crate::ops;
5use crate::sources::path::PathSource;
6use crate::sources::RegistrySource;
7use crate::sources::SourceConfigMap;
8use crate::sources::CRATES_IO_REGISTRY;
9use crate::util::cache_lock::CacheLockMode;
10use crate::util::{try_canonicalize, CargoResult, GlobalContext};
11
12use anyhow::{bail, Context as _};
13use cargo_util::{paths, Sha256};
14use cargo_util_schemas::core::SourceKind;
15use serde::Serialize;
16use walkdir::WalkDir;
17
18use std::collections::HashSet;
19use std::collections::{BTreeMap, BTreeSet, HashMap};
20use std::ffi::OsStr;
21use std::fs::{self, File, OpenOptions};
22use std::io::{Read, Write};
23use std::path::{Path, PathBuf};
24
25pub struct VendorOptions<'a> {
26    pub no_delete: bool,
27    pub versioned_dirs: bool,
28    pub destination: &'a Path,
29    pub extra: Vec<PathBuf>,
30    pub respect_source_config: bool,
31}
32
33pub fn vendor(ws: &Workspace<'_>, opts: &VendorOptions<'_>) -> CargoResult<()> {
34    let gctx = ws.gctx();
35    let mut extra_workspaces = Vec::new();
36    for extra in opts.extra.iter() {
37        let extra = gctx.cwd().join(extra);
38        let ws = Workspace::new(&extra, gctx)?;
39        extra_workspaces.push(ws);
40    }
41    let workspaces = extra_workspaces.iter().chain(Some(ws)).collect::<Vec<_>>();
42    let _lock = gctx.acquire_package_cache_lock(CacheLockMode::DownloadExclusive)?;
43    let vendor_config = sync(gctx, &workspaces, opts).context("failed to sync")?;
44
45    if gctx.shell().verbosity() != Verbosity::Quiet {
46        if vendor_config.source.is_empty() {
47            crate::drop_eprintln!(gctx, "There is no dependency to vendor in this project.");
48        } else {
49            crate::drop_eprint!(
50                gctx,
51                "To use vendored sources, add this to your .cargo/config.toml for this project:\n\n"
52            );
53            crate::drop_print!(gctx, "{}", &toml::to_string_pretty(&vendor_config).unwrap());
54        }
55    }
56
57    Ok(())
58}
59
60#[derive(Serialize)]
61struct VendorConfig {
62    source: BTreeMap<String, VendorSource>,
63}
64
65#[derive(Serialize)]
66#[serde(rename_all = "lowercase", untagged)]
67enum VendorSource {
68    Directory {
69        directory: String,
70    },
71    Registry {
72        registry: Option<String>,
73        #[serde(rename = "replace-with")]
74        replace_with: String,
75    },
76    Git {
77        git: String,
78        branch: Option<String>,
79        tag: Option<String>,
80        rev: Option<String>,
81        #[serde(rename = "replace-with")]
82        replace_with: String,
83    },
84}
85
86/// Cache for mapping replaced sources to replacements.
87struct SourceReplacementCache<'gctx> {
88    map: SourceConfigMap<'gctx>,
89    cache: HashMap<SourceId, SourceId>,
90}
91
92impl SourceReplacementCache<'_> {
93    fn new(
94        gctx: &GlobalContext,
95        respect_source_config: bool,
96    ) -> CargoResult<SourceReplacementCache<'_>> {
97        Ok(SourceReplacementCache {
98            map: if respect_source_config {
99                SourceConfigMap::new(gctx)
100            } else {
101                SourceConfigMap::empty(gctx)
102            }?,
103            cache: Default::default(),
104        })
105    }
106
107    fn get(&mut self, id: SourceId) -> CargoResult<SourceId> {
108        use std::collections::hash_map::Entry;
109        match self.cache.entry(id) {
110            Entry::Occupied(e) => Ok(e.get().clone()),
111            Entry::Vacant(e) => {
112                let replaced = self.map.load(id, &HashSet::new())?.replaced_source_id();
113                Ok(e.insert(replaced).clone())
114            }
115        }
116    }
117}
118
119fn sync(
120    gctx: &GlobalContext,
121    workspaces: &[&Workspace<'_>],
122    opts: &VendorOptions<'_>,
123) -> CargoResult<VendorConfig> {
124    let dry_run = false;
125    let vendor_dir = try_canonicalize(opts.destination);
126    let vendor_dir = vendor_dir.as_deref().unwrap_or(opts.destination);
127    let vendor_dir_already_exists = vendor_dir.exists();
128
129    paths::create_dir_all(&vendor_dir)?;
130    let mut to_remove = HashSet::new();
131    if !opts.no_delete {
132        for entry in vendor_dir.read_dir()? {
133            let entry = entry?;
134            if !entry
135                .file_name()
136                .to_str()
137                .map_or(false, |s| s.starts_with('.'))
138            {
139                to_remove.insert(entry.path());
140            }
141        }
142    }
143
144    let mut source_replacement_cache =
145        SourceReplacementCache::new(gctx, opts.respect_source_config)?;
146
147    let mut checksums = HashMap::new();
148    let mut ids = BTreeMap::new();
149
150    // Let's download all crates and start storing internal tables about them.
151    for ws in workspaces {
152        let (packages, resolve) = ops::resolve_ws(ws, dry_run)
153            .with_context(|| format!("failed to load lockfile for {}", ws.root().display()))?;
154
155        packages
156            .get_many(resolve.iter())
157            .with_context(|| format!("failed to download packages for {}", ws.root().display()))?;
158
159        for pkg in resolve.iter() {
160            let sid = source_replacement_cache.get(pkg.source_id())?;
161
162            // Don't vendor path crates since they're already in the repository
163            if sid.is_path() {
164                // And don't delete actual source code!
165                if let Ok(path) = sid.url().to_file_path() {
166                    if let Ok(path) = try_canonicalize(path) {
167                        to_remove.remove(&path);
168                    }
169                }
170                continue;
171            }
172
173            ids.insert(
174                pkg,
175                packages
176                    .get_one(pkg)
177                    .context("failed to fetch package")?
178                    .clone(),
179            );
180
181            checksums.insert(pkg, resolve.checksums().get(&pkg).cloned());
182        }
183    }
184
185    let mut versions = HashMap::new();
186    for id in ids.keys() {
187        let map = versions.entry(id.name()).or_insert_with(BTreeMap::default);
188        if let Some(prev) = map.get(&id.version()) {
189            bail!(
190                "found duplicate version of package `{} v{}` \
191                 vendored from two sources:\n\
192                 \n\
193                 \tsource 1: {}\n\
194                 \tsource 2: {}",
195                id.name(),
196                id.version(),
197                prev,
198                id.source_id()
199            );
200        }
201        map.insert(id.version(), id.source_id());
202    }
203
204    let mut sources = BTreeSet::new();
205    let mut tmp_buf = [0; 64 * 1024];
206    for (id, pkg) in ids.iter() {
207        // Next up, copy it to the vendor directory
208        let src = pkg.root();
209        let max_version = *versions[&id.name()].iter().rev().next().unwrap().0;
210        let dir_has_version_suffix = opts.versioned_dirs || id.version() != max_version;
211        let dst_name = if dir_has_version_suffix {
212            // Eg vendor/futures-0.1.13
213            format!("{}-{}", id.name(), id.version())
214        } else {
215            // Eg vendor/futures
216            id.name().to_string()
217        };
218
219        sources.insert(id.source_id());
220        let dst = vendor_dir.join(&dst_name);
221        to_remove.remove(&dst);
222        let cksum = dst.join(".cargo-checksum.json");
223        // Registries are the only immutable sources,
224        // path and git dependencies' versions cannot be trusted to mean "no change"
225        if dir_has_version_suffix && id.source_id().is_registry() && cksum.exists() {
226            // Don't re-copy directory with version suffix in case it comes from a registry
227            continue;
228        }
229
230        gctx.shell().status(
231            "Vendoring",
232            &format!("{} ({}) to {}", id, src.to_string_lossy(), dst.display()),
233        )?;
234
235        let _ = fs::remove_dir_all(&dst);
236
237        let mut file_cksums = BTreeMap::new();
238
239        // Need this mapping anyway because we will directly consult registry sources,
240        // otherwise builtin source replacement (sparse registry) won't be respected.
241        let sid = source_replacement_cache.get(id.source_id())?;
242
243        if sid.is_registry() {
244            // To keep the unpacked source from registry in a pristine state,
245            // we'll do a direct extraction into the vendor directory.
246            let registry = match sid.kind() {
247                SourceKind::Registry | SourceKind::SparseRegistry => {
248                    RegistrySource::remote(sid, &Default::default(), gctx)?
249                }
250                SourceKind::LocalRegistry => {
251                    let path = sid.url().to_file_path().expect("local path");
252                    RegistrySource::local(sid, &path, &Default::default(), gctx)
253                }
254                _ => unreachable!("not registry source: {sid}"),
255            };
256
257            let walkdir = |root| {
258                WalkDir::new(root)
259                    .into_iter()
260                    // It is safe to skip errors,
261                    // since we'll hit them during copying/reading later anyway.
262                    .filter_map(|e| e.ok())
263                    // There should be no symlink in tarballs on crates.io,
264                    // but might be wrong for local registries.
265                    // Hence here be conservative and include symlinks.
266                    .filter(|e| e.file_type().is_file() || e.file_type().is_symlink())
267            };
268            let mut compute_file_cksums = |root| {
269                for e in walkdir(root) {
270                    let path = e.path();
271                    let relative = path.strip_prefix(&dst).unwrap();
272                    let cksum = Sha256::new()
273                        .update_path(path)
274                        .map(Sha256::finish_hex)
275                        .with_context(|| format!("failed to checksum `{}`", path.display()))?;
276                    file_cksums.insert(relative.to_str().unwrap().replace("\\", "/"), cksum);
277                }
278                Ok::<_, anyhow::Error>(())
279            };
280            if dir_has_version_suffix {
281                registry.unpack_package_in(id, &vendor_dir, &vendor_this)?;
282                compute_file_cksums(&dst)?;
283            } else {
284                // Due to the extra sanity check in registry unpack
285                // (ensure it contain only one top-level directory with name `pkg-version`),
286                // we can only unpack a directory with version suffix,
287                // and move it to the no suffix directory.
288                let staging_dir = tempfile::Builder::new()
289                    .prefix(".vendor-staging")
290                    .tempdir_in(vendor_dir)?;
291                let unpacked_src =
292                    registry.unpack_package_in(id, staging_dir.path(), &vendor_this)?;
293                if let Err(e) = fs::rename(&unpacked_src, &dst) {
294                    // This fallback is mainly for Windows 10 versions earlier than 1607.
295                    // The destination of `fs::rename` can't be a diretory in older versions.
296                    // Can be removed once the minimal supported Windows version gets bumped.
297                    tracing::warn!("failed to `mv {unpacked_src:?} {dst:?}`: {e}");
298                    let paths: Vec<_> = walkdir(&unpacked_src).map(|e| e.into_path()).collect();
299                    cp_sources(pkg, src, &paths, &dst, &mut file_cksums, &mut tmp_buf, gctx)
300                        .with_context(|| format!("failed to copy vendored sources for {id}"))?;
301                } else {
302                    compute_file_cksums(&dst)?;
303                }
304            }
305        } else {
306            let paths = PathSource::new(src, sid, gctx)
307                .list_files(pkg)?
308                .into_iter()
309                .map(|p| p.into_path_buf())
310                .collect::<Vec<_>>();
311            cp_sources(pkg, src, &paths, &dst, &mut file_cksums, &mut tmp_buf, gctx)
312                .with_context(|| format!("failed to copy vendored sources for {id}"))?;
313        }
314
315        // Finally, emit the metadata about this package
316        let json = serde_json::json!({
317            "package": checksums.get(id),
318            "files": file_cksums,
319        });
320
321        paths::write(&cksum, json.to_string())?;
322    }
323
324    for path in to_remove {
325        if path.is_dir() {
326            paths::remove_dir_all(&path)?;
327        } else {
328            paths::remove_file(&path)?;
329        }
330    }
331
332    // add our vendored source
333    let mut config = BTreeMap::new();
334
335    let merged_source_name = "vendored-sources";
336
337    // replace original sources with vendor
338    for source_id in sources {
339        let name = if source_id.is_crates_io() {
340            CRATES_IO_REGISTRY.to_string()
341        } else {
342            // Remove `precise` since that makes the source name very long,
343            // and isn't needed to disambiguate multiple sources.
344            source_id.without_precise().as_url().to_string()
345        };
346
347        let source = if source_id.is_crates_io() {
348            VendorSource::Registry {
349                registry: None,
350                replace_with: merged_source_name.to_string(),
351            }
352        } else if source_id.is_remote_registry() {
353            let registry = source_id.url().to_string();
354            VendorSource::Registry {
355                registry: Some(registry),
356                replace_with: merged_source_name.to_string(),
357            }
358        } else if source_id.is_git() {
359            let mut branch = None;
360            let mut tag = None;
361            let mut rev = None;
362            if let Some(reference) = source_id.git_reference() {
363                match *reference {
364                    GitReference::Branch(ref b) => branch = Some(b.clone()),
365                    GitReference::Tag(ref t) => tag = Some(t.clone()),
366                    GitReference::Rev(ref r) => rev = Some(r.clone()),
367                    GitReference::DefaultBranch => {}
368                }
369            }
370            VendorSource::Git {
371                git: source_id.url().to_string(),
372                branch,
373                tag,
374                rev,
375                replace_with: merged_source_name.to_string(),
376            }
377        } else {
378            panic!("Invalid source ID: {}", source_id)
379        };
380        config.insert(name, source);
381    }
382
383    if !config.is_empty() {
384        config.insert(
385            merged_source_name.to_string(),
386            VendorSource::Directory {
387                // Windows-flavour paths are valid here on Windows but Unix.
388                // This backslash normalization is for making output paths more
389                // cross-platform compatible.
390                directory: opts.destination.to_string_lossy().replace("\\", "/"),
391            },
392        );
393    } else if !vendor_dir_already_exists {
394        // Nothing to vendor. Remove the destination dir we've just created.
395        paths::remove_dir(vendor_dir)?;
396    }
397
398    Ok(VendorConfig { source: config })
399}
400
401fn cp_sources(
402    pkg: &Package,
403    src: &Path,
404    paths: &[PathBuf],
405    dst: &Path,
406    cksums: &mut BTreeMap<String, String>,
407    tmp_buf: &mut [u8],
408    gctx: &GlobalContext,
409) -> CargoResult<()> {
410    for p in paths {
411        let relative = p.strip_prefix(&src).unwrap();
412
413        if !vendor_this(relative) {
414            continue;
415        }
416
417        // Join pathname components individually to make sure that the joined
418        // path uses the correct directory separators everywhere, since
419        // `relative` may use Unix-style and `dst` may require Windows-style
420        // backslashes.
421        let dst = relative
422            .iter()
423            .fold(dst.to_owned(), |acc, component| acc.join(&component));
424
425        paths::create_dir_all(dst.parent().unwrap())?;
426        let mut dst_opts = OpenOptions::new();
427        dst_opts.write(true).create(true).truncate(true);
428        // When vendoring git dependencies, the manifest has not been normalized like it would be
429        // when published. This causes issue when the manifest is using workspace inheritance.
430        // To get around this issue we use the "original" manifest after `{}.workspace = true`
431        // has been resolved for git dependencies.
432        let cksum = if dst.file_name() == Some(OsStr::new("Cargo.toml"))
433            && pkg.package_id().source_id().is_git()
434        {
435            let packaged_files = paths
436                .iter()
437                .map(|p| p.strip_prefix(src).unwrap().to_owned())
438                .collect::<Vec<_>>();
439            let vendored_pkg = prepare_for_vendor(pkg, &packaged_files, gctx)?;
440            let contents = vendored_pkg.manifest().to_normalized_contents()?;
441            copy_and_checksum(
442                &dst,
443                &mut dst_opts,
444                &mut contents.as_bytes(),
445                Path::new("Generated Cargo.toml"),
446                tmp_buf,
447            )?
448        } else {
449            let mut src = File::open(&p).with_context(|| format!("failed to open {:?}", &p))?;
450            #[cfg(unix)]
451            {
452                use std::os::unix::fs::{MetadataExt, OpenOptionsExt};
453                let src_metadata = src
454                    .metadata()
455                    .with_context(|| format!("failed to stat {:?}", p))?;
456                dst_opts.mode(src_metadata.mode());
457            }
458            copy_and_checksum(&dst, &mut dst_opts, &mut src, &p, tmp_buf)?
459        };
460
461        cksums.insert(relative.to_str().unwrap().replace("\\", "/"), cksum);
462    }
463    Ok(())
464}
465
466/// HACK: Perform the bare minimum of `prepare_for_publish` needed for #14348.
467///
468/// There are parts of `prepare_for_publish` that could be directly useful (e.g. stripping
469/// `[workspace]`) while other parts that require other filesystem operations (moving the README
470/// file) and ideally we'd reuse `cargo package` code to take care of all of this for us.
471fn prepare_for_vendor(
472    me: &Package,
473    packaged_files: &[PathBuf],
474    gctx: &GlobalContext,
475) -> CargoResult<Package> {
476    let contents = me.manifest().contents();
477    let document = me.manifest().document();
478    let original_toml = prepare_toml_for_vendor(
479        me.manifest().normalized_toml().clone(),
480        packaged_files,
481        gctx,
482    )?;
483    let normalized_toml = original_toml.clone();
484    let features = me.manifest().unstable_features().clone();
485    let workspace_config = me.manifest().workspace_config().clone();
486    let source_id = me.package_id().source_id();
487    let mut warnings = Default::default();
488    let mut errors = Default::default();
489    let manifest = crate::util::toml::to_real_manifest(
490        contents.to_owned(),
491        document.clone(),
492        original_toml,
493        normalized_toml,
494        features,
495        workspace_config,
496        source_id,
497        me.manifest_path(),
498        me.manifest().is_embedded(),
499        gctx,
500        &mut warnings,
501        &mut errors,
502    )?;
503    let new_pkg = Package::new(manifest, me.manifest_path());
504    Ok(new_pkg)
505}
506
507fn prepare_toml_for_vendor(
508    mut me: cargo_util_schemas::manifest::TomlManifest,
509    packaged_files: &[PathBuf],
510    gctx: &GlobalContext,
511) -> CargoResult<cargo_util_schemas::manifest::TomlManifest> {
512    let package = me
513        .package
514        .as_mut()
515        .expect("venedored manifests must have packages");
516    if let Some(cargo_util_schemas::manifest::StringOrBool::String(path)) = &package.build {
517        let path = paths::normalize_path(Path::new(path));
518        let included = packaged_files.contains(&path);
519        let build = if included {
520            let path = path
521                .into_os_string()
522                .into_string()
523                .map_err(|_err| anyhow::format_err!("non-UTF8 `package.build`"))?;
524            let path = crate::util::toml::normalize_path_string_sep(path);
525            cargo_util_schemas::manifest::StringOrBool::String(path)
526        } else {
527            gctx.shell().warn(format!(
528                "ignoring `package.build` as `{}` is not included in the published package",
529                path.display()
530            ))?;
531            cargo_util_schemas::manifest::StringOrBool::Bool(false)
532        };
533        package.build = Some(build);
534    }
535
536    let lib = if let Some(target) = &me.lib {
537        crate::util::toml::prepare_target_for_publish(
538            target,
539            Some(packaged_files),
540            "library",
541            gctx,
542        )?
543    } else {
544        None
545    };
546    let bin = crate::util::toml::prepare_targets_for_publish(
547        me.bin.as_ref(),
548        Some(packaged_files),
549        "binary",
550        gctx,
551    )?;
552    let example = crate::util::toml::prepare_targets_for_publish(
553        me.example.as_ref(),
554        Some(packaged_files),
555        "example",
556        gctx,
557    )?;
558    let test = crate::util::toml::prepare_targets_for_publish(
559        me.test.as_ref(),
560        Some(packaged_files),
561        "test",
562        gctx,
563    )?;
564    let bench = crate::util::toml::prepare_targets_for_publish(
565        me.bench.as_ref(),
566        Some(packaged_files),
567        "benchmark",
568        gctx,
569    )?;
570
571    me.lib = lib;
572    me.bin = bin;
573    me.example = example;
574    me.test = test;
575    me.bench = bench;
576
577    Ok(me)
578}
579
580fn copy_and_checksum<T: Read>(
581    dst_path: &Path,
582    dst_opts: &mut OpenOptions,
583    contents: &mut T,
584    contents_path: &Path,
585    buf: &mut [u8],
586) -> CargoResult<String> {
587    let mut dst = dst_opts
588        .open(dst_path)
589        .with_context(|| format!("failed to create {:?}", dst_path))?;
590    // Not going to bother setting mode on pre-existing files, since there
591    // shouldn't be any under normal conditions.
592    let mut cksum = Sha256::new();
593    loop {
594        let n = contents
595            .read(buf)
596            .with_context(|| format!("failed to read from {:?}", contents_path))?;
597        if n == 0 {
598            break Ok(cksum.finish_hex());
599        }
600        let data = &buf[..n];
601        cksum.update(data);
602        dst.write_all(data)
603            .with_context(|| format!("failed to write to {:?}", dst_path))?;
604    }
605}
606
607/// Filters files we want to vendor.
608///
609/// `relative` is a path relative to the package root.
610fn vendor_this(relative: &Path) -> bool {
611    match relative.to_str() {
612        // Skip git config files as they're not relevant to builds most of
613        // the time and if we respect them (e.g.  in git) then it'll
614        // probably mess with the checksums when a vendor dir is checked
615        // into someone else's source control
616        Some(".gitattributes" | ".gitignore" | ".git") => false,
617
618        // Temporary Cargo files
619        Some(".cargo-ok") => false,
620
621        _ => true,
622    }
623}