rustc_codegen_llvm/
consts.rs

1use std::ops::Range;
2
3use rustc_abi::{Align, HasDataLayout, Primitive, Scalar, Size, WrappingRange};
4use rustc_codegen_ssa::common;
5use rustc_codegen_ssa::traits::*;
6use rustc_hir::LangItem;
7use rustc_hir::attrs::Linkage;
8use rustc_hir::def::DefKind;
9use rustc_hir::def_id::{DefId, LOCAL_CRATE};
10use rustc_middle::middle::codegen_fn_attrs::{CodegenFnAttrFlags, CodegenFnAttrs};
11use rustc_middle::mir::interpret::{
12    Allocation, ConstAllocation, ErrorHandled, InitChunk, Pointer, Scalar as InterpScalar,
13    read_target_uint,
14};
15use rustc_middle::mir::mono::MonoItem;
16use rustc_middle::ty::layout::{HasTypingEnv, LayoutOf};
17use rustc_middle::ty::{self, Instance};
18use rustc_middle::{bug, span_bug};
19use tracing::{debug, instrument, trace};
20
21use crate::common::CodegenCx;
22use crate::errors::SymbolAlreadyDefined;
23use crate::type_::Type;
24use crate::type_of::LayoutLlvmExt;
25use crate::value::Value;
26use crate::{base, debuginfo, llvm};
27
28pub(crate) fn const_alloc_to_llvm<'ll>(
29    cx: &CodegenCx<'ll, '_>,
30    alloc: &Allocation,
31    is_static: bool,
32) -> &'ll Value {
33    // We expect that callers of const_alloc_to_llvm will instead directly codegen a pointer or
34    // integer for any &ZST where the ZST is a constant (i.e. not a static). We should never be
35    // producing empty LLVM allocations as they're just adding noise to binaries and forcing less
36    // optimal codegen.
37    //
38    // Statics have a guaranteed meaningful address so it's less clear that we want to do
39    // something like this; it's also harder.
40    if !is_static {
41        assert!(alloc.len() != 0);
42    }
43    let mut llvals = Vec::with_capacity(alloc.provenance().ptrs().len() + 1);
44    let dl = cx.data_layout();
45    let pointer_size = dl.pointer_size();
46    let pointer_size_bytes = pointer_size.bytes() as usize;
47
48    // Note: this function may call `inspect_with_uninit_and_ptr_outside_interpreter`, so `range`
49    // must be within the bounds of `alloc` and not contain or overlap a pointer provenance.
50    fn append_chunks_of_init_and_uninit_bytes<'ll, 'a, 'b>(
51        llvals: &mut Vec<&'ll Value>,
52        cx: &'a CodegenCx<'ll, 'b>,
53        alloc: &'a Allocation,
54        range: Range<usize>,
55    ) {
56        let chunks = alloc.init_mask().range_as_init_chunks(range.clone().into());
57
58        let chunk_to_llval = move |chunk| match chunk {
59            InitChunk::Init(range) => {
60                let range = (range.start.bytes() as usize)..(range.end.bytes() as usize);
61                let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(range);
62                cx.const_bytes(bytes)
63            }
64            InitChunk::Uninit(range) => {
65                let len = range.end.bytes() - range.start.bytes();
66                cx.const_undef(cx.type_array(cx.type_i8(), len))
67            }
68        };
69
70        // Generating partially-uninit consts is limited to small numbers of chunks,
71        // to avoid the cost of generating large complex const expressions.
72        // For example, `[(u32, u8); 1024 * 1024]` contains uninit padding in each element, and
73        // would result in `{ [5 x i8] zeroinitializer, [3 x i8] undef, ...repeat 1M times... }`.
74        let max = cx.sess().opts.unstable_opts.uninit_const_chunk_threshold;
75        let allow_uninit_chunks = chunks.clone().take(max.saturating_add(1)).count() <= max;
76
77        if allow_uninit_chunks {
78            llvals.extend(chunks.map(chunk_to_llval));
79        } else {
80            // If this allocation contains any uninit bytes, codegen as if it was initialized
81            // (using some arbitrary value for uninit bytes).
82            let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(range);
83            llvals.push(cx.const_bytes(bytes));
84        }
85    }
86
87    let mut next_offset = 0;
88    for &(offset, prov) in alloc.provenance().ptrs().iter() {
89        let offset = offset.bytes();
90        assert_eq!(offset as usize as u64, offset);
91        let offset = offset as usize;
92        if offset > next_offset {
93            // This `inspect` is okay since we have checked that there is no provenance, it
94            // is within the bounds of the allocation, and it doesn't affect interpreter execution
95            // (we inspect the result after interpreter execution).
96            append_chunks_of_init_and_uninit_bytes(&mut llvals, cx, alloc, next_offset..offset);
97        }
98        let ptr_offset = read_target_uint(
99            dl.endian,
100            // This `inspect` is okay since it is within the bounds of the allocation, it doesn't
101            // affect interpreter execution (we inspect the result after interpreter execution),
102            // and we properly interpret the provenance as a relocation pointer offset.
103            alloc.inspect_with_uninit_and_ptr_outside_interpreter(
104                offset..(offset + pointer_size_bytes),
105            ),
106        )
107        .expect("const_alloc_to_llvm: could not read relocation pointer")
108            as u64;
109
110        let address_space = cx.tcx.global_alloc(prov.alloc_id()).address_space(cx);
111
112        llvals.push(cx.scalar_to_backend(
113            InterpScalar::from_pointer(Pointer::new(prov, Size::from_bytes(ptr_offset)), &cx.tcx),
114            Scalar::Initialized {
115                value: Primitive::Pointer(address_space),
116                valid_range: WrappingRange::full(pointer_size),
117            },
118            cx.type_ptr_ext(address_space),
119        ));
120        next_offset = offset + pointer_size_bytes;
121    }
122    if alloc.len() >= next_offset {
123        let range = next_offset..alloc.len();
124        // This `inspect` is okay since we have check that it is after all provenance, it is
125        // within the bounds of the allocation, and it doesn't affect interpreter execution (we
126        // inspect the result after interpreter execution).
127        append_chunks_of_init_and_uninit_bytes(&mut llvals, cx, alloc, range);
128    }
129
130    // Avoid wrapping in a struct if there is only a single value. This ensures
131    // that LLVM is able to perform the string merging optimization if the constant
132    // is a valid C string. LLVM only considers bare arrays for this optimization,
133    // not arrays wrapped in a struct. LLVM handles this at:
134    // https://github.com/rust-lang/llvm-project/blob/acaea3d2bb8f351b740db7ebce7d7a40b9e21488/llvm/lib/Target/TargetLoweringObjectFile.cpp#L249-L280
135    if let &[data] = &*llvals { data } else { cx.const_struct(&llvals, true) }
136}
137
138fn codegen_static_initializer<'ll, 'tcx>(
139    cx: &CodegenCx<'ll, 'tcx>,
140    def_id: DefId,
141) -> Result<(&'ll Value, ConstAllocation<'tcx>), ErrorHandled> {
142    let alloc = cx.tcx.eval_static_initializer(def_id)?;
143    Ok((const_alloc_to_llvm(cx, alloc.inner(), /*static*/ true), alloc))
144}
145
146fn set_global_alignment<'ll>(cx: &CodegenCx<'ll, '_>, gv: &'ll Value, mut align: Align) {
147    // The target may require greater alignment for globals than the type does.
148    // Note: GCC and Clang also allow `__attribute__((aligned))` on variables,
149    // which can force it to be smaller. Rust doesn't support this yet.
150    if let Some(min_global) = cx.sess().target.min_global_align {
151        align = Ord::max(align, min_global);
152    }
153    llvm::set_alignment(gv, align);
154}
155
156fn check_and_apply_linkage<'ll, 'tcx>(
157    cx: &CodegenCx<'ll, 'tcx>,
158    attrs: &CodegenFnAttrs,
159    llty: &'ll Type,
160    sym: &str,
161    def_id: DefId,
162) -> &'ll Value {
163    if let Some(linkage) = attrs.import_linkage {
164        debug!("get_static: sym={} linkage={:?}", sym, linkage);
165
166        // Declare a symbol `foo`. If `foo` is an extern_weak symbol, we declare
167        // an extern_weak function, otherwise a global with the desired linkage.
168        let g1 = if matches!(attrs.import_linkage, Some(Linkage::ExternalWeak)) {
169            // An `extern_weak` function is represented as an `Option<unsafe extern ...>`,
170            // we extract the function signature and declare it as an extern_weak function
171            // instead of an extern_weak i8.
172            let instance = Instance::mono(cx.tcx, def_id);
173            if let ty::Adt(struct_def, args) = instance.ty(cx.tcx, cx.typing_env()).kind()
174                && cx.tcx.is_lang_item(struct_def.did(), LangItem::Option)
175                && let ty::FnPtr(sig, header) = args.type_at(0).kind()
176            {
177                let fn_sig = sig.with(*header);
178
179                let fn_abi = cx.fn_abi_of_fn_ptr(fn_sig, ty::List::empty());
180                cx.declare_fn(sym, &fn_abi, None)
181            } else {
182                cx.declare_global(sym, cx.type_i8())
183            }
184        } else {
185            cx.declare_global(sym, cx.type_i8())
186        };
187        llvm::set_linkage(g1, base::linkage_to_llvm(linkage));
188
189        // Declare an internal global `extern_with_linkage_foo` which
190        // is initialized with the address of `foo`. If `foo` is
191        // discarded during linking (for example, if `foo` has weak
192        // linkage and there are no definitions), then
193        // `extern_with_linkage_foo` will instead be initialized to
194        // zero.
195        let real_name =
196            format!("_rust_extern_with_linkage_{:016x}_{sym}", cx.tcx.stable_crate_id(LOCAL_CRATE));
197        let g2 = cx.define_global(&real_name, llty).unwrap_or_else(|| {
198            cx.sess().dcx().emit_fatal(SymbolAlreadyDefined {
199                span: cx.tcx.def_span(def_id),
200                symbol_name: sym,
201            })
202        });
203        llvm::set_linkage(g2, llvm::Linkage::InternalLinkage);
204        llvm::set_initializer(g2, g1);
205        g2
206    } else if cx.tcx.sess.target.arch == "x86"
207        && common::is_mingw_gnu_toolchain(&cx.tcx.sess.target)
208        && let Some(dllimport) = crate::common::get_dllimport(cx.tcx, def_id, sym)
209    {
210        cx.declare_global(&common::i686_decorated_name(dllimport, true, true, false), llty)
211    } else {
212        // Generate an external declaration.
213        // FIXME(nagisa): investigate whether it can be changed into define_global
214        cx.declare_global(sym, llty)
215    }
216}
217
218impl<'ll> CodegenCx<'ll, '_> {
219    pub(crate) fn const_bitcast(&self, val: &'ll Value, ty: &'ll Type) -> &'ll Value {
220        unsafe { llvm::LLVMConstBitCast(val, ty) }
221    }
222
223    pub(crate) fn const_pointercast(&self, val: &'ll Value, ty: &'ll Type) -> &'ll Value {
224        unsafe { llvm::LLVMConstPointerCast(val, ty) }
225    }
226
227    /// Create a global variable.
228    ///
229    /// The returned global variable is a pointer in the default address space for globals.
230    /// Fails if a symbol with the given name already exists.
231    pub(crate) fn static_addr_of_mut(
232        &self,
233        cv: &'ll Value,
234        align: Align,
235        kind: Option<&str>,
236    ) -> &'ll Value {
237        let gv = match kind {
238            Some(kind) if !self.tcx.sess.fewer_names() => {
239                let name = self.generate_local_symbol_name(kind);
240                let gv = self.define_global(&name, self.val_ty(cv)).unwrap_or_else(|| {
241                    bug!("symbol `{}` is already defined", name);
242                });
243                llvm::set_linkage(gv, llvm::Linkage::PrivateLinkage);
244                gv
245            }
246            _ => self.define_private_global(self.val_ty(cv)),
247        };
248        llvm::set_initializer(gv, cv);
249        set_global_alignment(self, gv, align);
250        llvm::set_unnamed_address(gv, llvm::UnnamedAddr::Global);
251        gv
252    }
253
254    /// Create a global constant.
255    ///
256    /// The returned global variable is a pointer in the default address space for globals.
257    pub(crate) fn static_addr_of_impl(
258        &self,
259        cv: &'ll Value,
260        align: Align,
261        kind: Option<&str>,
262    ) -> &'ll Value {
263        if let Some(&gv) = self.const_globals.borrow().get(&cv) {
264            unsafe {
265                // Upgrade the alignment in cases where the same constant is used with different
266                // alignment requirements
267                let llalign = align.bytes() as u32;
268                if llalign > llvm::LLVMGetAlignment(gv) {
269                    llvm::LLVMSetAlignment(gv, llalign);
270                }
271            }
272            return gv;
273        }
274        let gv = self.static_addr_of_mut(cv, align, kind);
275        llvm::set_global_constant(gv, true);
276
277        self.const_globals.borrow_mut().insert(cv, gv);
278        gv
279    }
280
281    #[instrument(level = "debug", skip(self))]
282    pub(crate) fn get_static(&self, def_id: DefId) -> &'ll Value {
283        let instance = Instance::mono(self.tcx, def_id);
284        trace!(?instance);
285
286        let DefKind::Static { nested, .. } = self.tcx.def_kind(def_id) else { bug!() };
287        // Nested statics do not have a type, so pick a dummy type and let `codegen_static` figure
288        // out the llvm type from the actual evaluated initializer.
289        let llty = if nested {
290            self.type_i8()
291        } else {
292            let ty = instance.ty(self.tcx, self.typing_env());
293            trace!(?ty);
294            self.layout_of(ty).llvm_type(self)
295        };
296        self.get_static_inner(def_id, llty)
297    }
298
299    #[instrument(level = "debug", skip(self, llty))]
300    fn get_static_inner(&self, def_id: DefId, llty: &'ll Type) -> &'ll Value {
301        let instance = Instance::mono(self.tcx, def_id);
302        if let Some(&g) = self.instances.borrow().get(&instance) {
303            trace!("used cached value");
304            return g;
305        }
306
307        let defined_in_current_codegen_unit =
308            self.codegen_unit.items().contains_key(&MonoItem::Static(def_id));
309        assert!(
310            !defined_in_current_codegen_unit,
311            "consts::get_static() should always hit the cache for \
312                 statics defined in the same CGU, but did not for `{def_id:?}`"
313        );
314
315        let sym = self.tcx.symbol_name(instance).name;
316        let fn_attrs = self.tcx.codegen_fn_attrs(def_id);
317
318        debug!(?sym, ?fn_attrs);
319
320        let g = if def_id.is_local() && !self.tcx.is_foreign_item(def_id) {
321            if let Some(g) = self.get_declared_value(sym) {
322                if self.val_ty(g) != self.type_ptr() {
323                    span_bug!(self.tcx.def_span(def_id), "Conflicting types for static");
324                }
325            }
326
327            let g = self.declare_global(sym, llty);
328
329            if !self.tcx.is_reachable_non_generic(def_id) {
330                llvm::set_visibility(g, llvm::Visibility::Hidden);
331            }
332
333            g
334        } else {
335            check_and_apply_linkage(self, fn_attrs, llty, sym, def_id)
336        };
337
338        // Thread-local statics in some other crate need to *always* be linked
339        // against in a thread-local fashion, so we need to be sure to apply the
340        // thread-local attribute locally if it was present remotely. If we
341        // don't do this then linker errors can be generated where the linker
342        // complains that one object files has a thread local version of the
343        // symbol and another one doesn't.
344        if fn_attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL) {
345            llvm::set_thread_local_mode(g, self.tls_model);
346        }
347
348        let dso_local = self.assume_dso_local(g, true);
349
350        if !def_id.is_local() {
351            let needs_dll_storage_attr = self.use_dll_storage_attrs
352                && !self.tcx.is_foreign_item(def_id)
353                // Local definitions can never be imported, so we must not apply
354                // the DLLImport annotation.
355                && !dso_local
356                // Linker plugin ThinLTO doesn't create the self-dllimport Rust uses for rlibs
357                // as the code generation happens out of process. Instead we assume static linkage
358                // and disallow dynamic linking when linker plugin based LTO is enabled.
359                // Regular in-process ThinLTO doesn't need this workaround.
360                && !self.tcx.sess.opts.cg.linker_plugin_lto.enabled();
361
362            // If this assertion triggers, there's something wrong with commandline
363            // argument validation.
364            assert!(
365                !(self.tcx.sess.opts.cg.linker_plugin_lto.enabled()
366                    && self.tcx.sess.target.is_like_windows
367                    && self.tcx.sess.opts.cg.prefer_dynamic)
368            );
369
370            if needs_dll_storage_attr {
371                // This item is external but not foreign, i.e., it originates from an external Rust
372                // crate. Since we don't know whether this crate will be linked dynamically or
373                // statically in the final application, we always mark such symbols as 'dllimport'.
374                // If final linkage happens to be static, we rely on compiler-emitted __imp_ stubs
375                // to make things work.
376                //
377                // However, in some scenarios we defer emission of statics to downstream
378                // crates, so there are cases where a static with an upstream DefId
379                // is actually present in the current crate. We can find out via the
380                // is_codegened_item query.
381                if !self.tcx.is_codegened_item(def_id) {
382                    llvm::set_dllimport_storage_class(g);
383                }
384            }
385        }
386
387        if self.use_dll_storage_attrs
388            && let Some(library) = self.tcx.native_library(def_id)
389            && library.kind.is_dllimport()
390        {
391            // For foreign (native) libs we know the exact storage type to use.
392            llvm::set_dllimport_storage_class(g);
393        }
394
395        self.instances.borrow_mut().insert(instance, g);
396        g
397    }
398
399    fn codegen_static_item(&mut self, def_id: DefId) {
400        assert!(
401            llvm::LLVMGetInitializer(
402                self.instances.borrow().get(&Instance::mono(self.tcx, def_id)).unwrap()
403            )
404            .is_none()
405        );
406        let attrs = self.tcx.codegen_fn_attrs(def_id);
407
408        let Ok((v, alloc)) = codegen_static_initializer(self, def_id) else {
409            // Error has already been reported
410            return;
411        };
412        let alloc = alloc.inner();
413
414        let val_llty = self.val_ty(v);
415
416        let g = self.get_static_inner(def_id, val_llty);
417        let llty = self.get_type_of_global(g);
418
419        let g = if val_llty == llty {
420            g
421        } else {
422            // codegen_static_initializer creates the global value just from the
423            // `Allocation` data by generating one big struct value that is just
424            // all the bytes and pointers after each other. This will almost never
425            // match the type that the static was declared with. Unfortunately
426            // we can't just LLVMConstBitCast our way out of it because that has very
427            // specific rules on what can be cast. So instead of adding a new way to
428            // generate static initializers that match the static's type, we picked
429            // the easier option and retroactively change the type of the static item itself.
430            let name = String::from_utf8(llvm::get_value_name(g))
431                .expect("we declare our statics with a utf8-valid name");
432            llvm::set_value_name(g, b"");
433
434            let linkage = llvm::get_linkage(g);
435            let visibility = llvm::get_visibility(g);
436
437            let new_g = self.declare_global(&name, val_llty);
438
439            llvm::set_linkage(new_g, linkage);
440            llvm::set_visibility(new_g, visibility);
441
442            // The old global has had its name removed but is returned by
443            // get_static since it is in the instance cache. Provide an
444            // alternative lookup that points to the new global so that
445            // global_asm! can compute the correct mangled symbol name
446            // for the global.
447            self.renamed_statics.borrow_mut().insert(def_id, new_g);
448
449            // To avoid breaking any invariants, we leave around the old
450            // global for the moment; we'll replace all references to it
451            // with the new global later. (See base::codegen_backend.)
452            self.statics_to_rauw.borrow_mut().push((g, new_g));
453            new_g
454        };
455        set_global_alignment(self, g, alloc.align);
456        llvm::set_initializer(g, v);
457
458        self.assume_dso_local(g, true);
459
460        // Forward the allocation's mutability (picked by the const interner) to LLVM.
461        if alloc.mutability.is_not() {
462            llvm::set_global_constant(g, true);
463        }
464
465        debuginfo::build_global_var_di_node(self, def_id, g);
466
467        if attrs.flags.contains(CodegenFnAttrFlags::THREAD_LOCAL) {
468            llvm::set_thread_local_mode(g, self.tls_model);
469        }
470
471        // Wasm statics with custom link sections get special treatment as they
472        // go into custom sections of the wasm executable. The exception to this
473        // is the `.init_array` section which are treated specially by the wasm linker.
474        if self.tcx.sess.target.is_like_wasm
475            && attrs
476                .link_section
477                .map(|link_section| !link_section.as_str().starts_with(".init_array"))
478                .unwrap_or(true)
479        {
480            if let Some(section) = attrs.link_section {
481                let section = self.create_metadata(section.as_str().as_bytes());
482                assert!(alloc.provenance().ptrs().is_empty());
483
484                // The `inspect` method is okay here because we checked for provenance, and
485                // because we are doing this access to inspect the final interpreter state (not
486                // as part of the interpreter execution).
487                let bytes = alloc.inspect_with_uninit_and_ptr_outside_interpreter(0..alloc.len());
488                let alloc = self.create_metadata(bytes);
489                let data = [section, alloc];
490                let meta =
491                    unsafe { llvm::LLVMMDNodeInContext2(self.llcx, data.as_ptr(), data.len()) };
492                let val = self.get_metadata_value(meta);
493                unsafe {
494                    llvm::LLVMAddNamedMetadataOperand(
495                        self.llmod,
496                        c"wasm.custom_sections".as_ptr(),
497                        val,
498                    )
499                };
500            }
501        } else {
502            base::set_link_section(g, attrs);
503        }
504
505        base::set_variable_sanitizer_attrs(g, attrs);
506
507        if attrs.flags.contains(CodegenFnAttrFlags::USED_COMPILER) {
508            // `USED` and `USED_LINKER` can't be used together.
509            assert!(!attrs.flags.contains(CodegenFnAttrFlags::USED_LINKER));
510
511            // The semantics of #[used] in Rust only require the symbol to make it into the
512            // object file. It is explicitly allowed for the linker to strip the symbol if it
513            // is dead, which means we are allowed to use `llvm.compiler.used` instead of
514            // `llvm.used` here.
515            //
516            // Additionally, https://reviews.llvm.org/D97448 in LLVM 13 started emitting unique
517            // sections with SHF_GNU_RETAIN flag for llvm.used symbols, which may trigger bugs
518            // in the handling of `.init_array` (the static constructor list) in versions of
519            // the gold linker (prior to the one released with binutils 2.36).
520            //
521            // That said, we only ever emit these when `#[used(compiler)]` is explicitly
522            // requested. This is to avoid similar breakage on other targets, in particular
523            // MachO targets have *their* static constructor lists broken if `llvm.compiler.used`
524            // is emitted rather than `llvm.used`. However, that check happens when assigning
525            // the `CodegenFnAttrFlags` in the `codegen_fn_attrs` query, so we don't need to
526            // take care of it here.
527            self.add_compiler_used_global(g);
528        }
529        if attrs.flags.contains(CodegenFnAttrFlags::USED_LINKER) {
530            // `USED` and `USED_LINKER` can't be used together.
531            assert!(!attrs.flags.contains(CodegenFnAttrFlags::USED_COMPILER));
532
533            self.add_used_global(g);
534        }
535    }
536
537    /// Add a global value to a list to be stored in the `llvm.used` variable, an array of ptr.
538    pub(crate) fn add_used_global(&mut self, global: &'ll Value) {
539        self.used_statics.push(global);
540    }
541
542    /// Add a global value to a list to be stored in the `llvm.compiler.used` variable,
543    /// an array of ptr.
544    pub(crate) fn add_compiler_used_global(&mut self, global: &'ll Value) {
545        self.compiler_used_statics.push(global);
546    }
547}
548
549impl<'ll> StaticCodegenMethods for CodegenCx<'ll, '_> {
550    /// Get a pointer to a global variable.
551    ///
552    /// The pointer will always be in the default address space. If global variables default to a
553    /// different address space, an addrspacecast is inserted.
554    fn static_addr_of(&self, cv: &'ll Value, align: Align, kind: Option<&str>) -> &'ll Value {
555        let gv = self.static_addr_of_impl(cv, align, kind);
556        // static_addr_of_impl returns the bare global variable, which might not be in the default
557        // address space. Cast to the default address space if necessary.
558        self.const_pointercast(gv, self.type_ptr())
559    }
560
561    fn codegen_static(&mut self, def_id: DefId) {
562        self.codegen_static_item(def_id)
563    }
564}