ostd/arch/x86/boot/
smp.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
// SPDX-License-Identifier: MPL-2.0

//! Multiprocessor Boot Support
//!
//! The MP initialization protocol defines two classes of processors:
//! the bootstrap processor (BSP) and the application processors (APs).
//! Following a power-up or RESET of an MP system, system hardware dynamically
//! selects one of the processors on the system bus as the BSP. The remaining
//! processors are designated as APs.
//!
//! The BSP executes the BIOS's boot-strap code to configure the APIC environment,
//! sets up system-wide data structures. Up to now, BSP has completed most of the
//! initialization of the OS, but APs has not been awakened.
//!
//! Following a power-up or reset, the APs complete a minimal self-configuration,
//! then wait for a startup signal (a SIPI message) from the BSP processor.
//!
//! The wake-up of AP follows SNIT-SIPI-SIPI IPI sequence:
//!  - Broadcast INIT IPI (Initialize the APs to the wait-for-SIPI state)
//!  - Wait
//!  - Broadcast De-assert INIT IPI (Only older processors need this step)
//!  - Wait
//!  - Broadcast SIPI IPI (APs exits the wait-for-SIPI state and starts executing code)
//!  - Wait
//!  - Broadcast SIPI IPI (If an AP fails to start)
//!
//! This sequence does not need to be strictly followed, and there may be
//! different considerations in different systems.

use acpi::madt::MadtEntry;

use crate::{
    arch::{
        if_tdx_enabled,
        kernel::{
            acpi::get_acpi_tables,
            apic::{
                self, Apic, ApicId, DeliveryMode, DeliveryStatus, DestinationMode,
                DestinationShorthand, Icr, Level, TriggerMode,
            },
        },
    },
    boot::{
        memory_region::{MemoryRegion, MemoryRegionType},
        smp::PerApRawInfo,
    },
    mm::{Paddr, PAGE_SIZE},
    task::disable_preempt,
};

/// Counts the number of processors.
/// Safety:
/// This function needs to be called after the OS initializes the ACPI table.
pub(crate) unsafe fn count_processors() -> Option<u32> {
    let acpi_tables = unsafe { get_acpi_tables()? };
    let madt_table = acpi_tables.find_table::<acpi::madt::Madt>().ok()?;

    // According to ACPI spec [1], "If this bit [the Enabled bit] is set the processor is ready for
    // use. If this bit is clear and the Online Capable bit is set, system hardware supports
    // enabling this processor during OS runtime."
    // [1]: https://uefi.org/htmlspecs/ACPI_Spec_6_4_html/05_ACPI_Software_Programming_Model/ACPI_Software_Programming_Model.html#local-apic-flags
    fn is_usable(flags: u32) -> bool {
        const ENABLED: u32 = 0b01;
        const ONLINE_CAPABLE: u32 = 0b10;

        (flags & ENABLED) != 0 || (flags & ONLINE_CAPABLE) != 0
    }

    // According to ACPI spec [1], "Logical processors with APIC ID values less than 255 (whether
    // in XAPIC or X2APIC mode) must use the Processor Local APIC structure to convey their APIC
    // information to OSPM [..] Logical processors with APIC ID values 255 and greater must use the
    // Processor Local x2APIC structure [..]"
    // [1]: https://uefi.org/htmlspecs/ACPI_Spec_6_4_html/05_ACPI_Software_Programming_Model/ACPI_Software_Programming_Model.html#processor-local-x2apic-structure
    let is_dup_apic = |id: u32| -> bool {
        // Check if the APIC entry also shows up as an x2APIC entry.
        if madt_table.get().entries().any(|e| {
            matches!(e, MadtEntry::LocalX2Apic(e)
                if e.x2apic_id == id && is_usable(e.flags))
        }) {
            log::warn!(
                "Firmware bug: In MADT, APIC ID {} is also listed as an x2APIC ID",
                id,
            );
            true
        } else {
            false
        }
    };

    let local_apic_counts = madt_table
        .get()
        .entries()
        .filter(|e| match e {
            MadtEntry::LocalX2Apic(entry) => {
                log::trace!("Found a local x2APIC entry in MADT: {:?}", entry);
                is_usable(entry.flags)
            }
            MadtEntry::LocalApic(entry) => {
                log::trace!("Found a local APIC entry in MADT: {:?}", entry);
                is_usable(entry.flags) && !is_dup_apic(entry.apic_id as u32)
            }
            _ => false,
        })
        .count();

    Some(local_apic_counts as u32)
}

/// Brings up all application processors.
///
/// # Safety
///
/// The caller must ensure that
/// 1. we're in the boot context of the BSP,
/// 2. all APs have not yet been booted, and
/// 3. the arguments are valid to boot APs.
pub(crate) unsafe fn bringup_all_aps(info_ptr: *const PerApRawInfo, pt_ptr: Paddr, num_cpus: u32) {
    // SAFETY: The code and data to boot AP is valid to write because
    // there are no readers and we are the only writer at this point.
    unsafe {
        copy_ap_boot_code();
        fill_boot_info_ptr(info_ptr);
        fill_boot_pt_ptr(pt_ptr);
    }

    // SAFETY: We've properly prepared all the resources to boot APs.
    if_tdx_enabled!({
        unsafe { wake_up_aps_via_mailbox(num_cpus) };
    } else {
        unsafe { send_boot_ipis() };
    });
}

/// This is where the linker load the symbols in the `.ap_boot` section.
/// The BSP would copy the AP boot code to this address.
const AP_BOOT_START_PA: usize = 0x8000;

/// The size of the AP boot code (the `.ap_boot` section).
fn ap_boot_code_size() -> usize {
    __ap_boot_end as usize - __ap_boot_start as usize
}

pub(super) fn reclaimable_memory_region() -> MemoryRegion {
    MemoryRegion::new(
        AP_BOOT_START_PA,
        ap_boot_code_size(),
        MemoryRegionType::Reclaimable,
    )
}

/// # Safety
///
/// The caller must ensure the memory region to be filled with AP boot code is valid to write.
unsafe fn copy_ap_boot_code() {
    let ap_boot_start = __ap_boot_start as usize as *const u8;
    let len = __ap_boot_end as usize - __ap_boot_start as usize;

    // SAFETY:
    // 1. The source memory region is valid for reading because it's inside the kernel text.
    // 2. The destination memory region is valid for writing because the caller upholds this.
    // 3. The memory is aligned because the alignment of `u8` is 1.
    // 4. The two memory regions do not overlap because the kernel text is isolated with the AP
    //    boot region.
    unsafe {
        core::ptr::copy_nonoverlapping(
            ap_boot_start,
            crate::mm::paddr_to_vaddr(AP_BOOT_START_PA) as *mut u8,
            len,
        );
    }
}

/// # Safety
///
/// The caller must ensure the pointer to be filled is valid to write.
unsafe fn fill_boot_info_ptr(info_ptr: *const PerApRawInfo) {
    extern "C" {
        static mut __ap_boot_info_array_pointer: *const PerApRawInfo;
    }

    // SAFETY: The safety is upheld by the caller.
    unsafe {
        __ap_boot_info_array_pointer = info_ptr;
    }
}

/// # Safety
///
/// The caller must ensure the pointer to be filled is valid to write.
unsafe fn fill_boot_pt_ptr(pt_ptr: Paddr) {
    extern "C" {
        static mut __boot_page_table_pointer: u32;
    }

    let pt_ptr32 = pt_ptr.try_into().unwrap();

    // SAFETY: The safety is upheld by the caller.
    unsafe {
        __boot_page_table_pointer = pt_ptr32;
    }
}

// The symbols are defined in the linker script.
extern "C" {
    fn __ap_boot_start();
    fn __ap_boot_end();
}

/// Wakes up all application processors via the ACPI multiprocessor mailbox structure.
///
/// # Safety
///
/// The safety preconditions are the same as [`send_boot_ipis`].
#[cfg(feature = "cvm_guest")]
unsafe fn wake_up_aps_via_mailbox(num_cpus: u32) {
    use acpi::platform::wakeup_aps;

    use crate::arch::kernel::acpi::AcpiMemoryHandler;

    // The symbols are defined in `ap_boot.S`.
    extern "C" {
        fn ap_boot_from_real_mode();
        fn ap_boot_from_long_mode();
    }

    let offset = ap_boot_from_long_mode as usize - ap_boot_from_real_mode as usize;

    let acpi_tables = unsafe { get_acpi_tables().unwrap() };
    for ap_num in 1..num_cpus {
        wakeup_aps(
            &acpi_tables,
            AcpiMemoryHandler {},
            ap_num,
            (AP_BOOT_START_PA + offset) as u64,
            1000,
        )
        .unwrap();
    }
}

/// Sends IPIs to notify all application processors to boot.
///
/// Follow the INIT-SIPI-SIPI IPI sequence.
/// Here, we don't check whether there is an AP that failed to start,
/// but send the second SIPI directly (checking whether each core is
/// started successfully one by one will bring extra overhead). For
/// APs that have been started, this signal will not bring any cost.
///
/// # Safety
///
/// The caller must ensure that all application processors can be
/// safely booted by ensuring that:
/// 1. We're in the boot context of the BSP and all APs have not yet
///    been booted.
/// 2. We've properly prepared all the resources for the application
///    processors to boot successfully (e.g., each AP's page table
///    and stack).
unsafe fn send_boot_ipis() {
    let preempt_guard = disable_preempt();
    let apic = apic::get_or_init(&preempt_guard as _);

    // SAFETY: We're sending IPIs to boot all application processors.
    // The safety is upheld by the caller.
    unsafe {
        send_init_to_all_aps(apic);
        spin_wait_cycles(100_000_000);

        send_init_deassert(apic);
        spin_wait_cycles(20_000_000);

        send_startup_to_all_aps(apic);
        spin_wait_cycles(20_000_000);

        send_startup_to_all_aps(apic);
        spin_wait_cycles(20_000_000);
    }
}

/// # Safety
///
/// The caller should ensure it's valid to send STARTUP IPIs to all CPUs excluding self.
unsafe fn send_startup_to_all_aps(apic: &dyn Apic) {
    let icr = Icr::new(
        ApicId::from(0),
        DestinationShorthand::AllExcludingSelf,
        TriggerMode::Edge,
        Level::Assert,
        DeliveryStatus::Idle,
        DestinationMode::Physical,
        DeliveryMode::StartUp,
        (AP_BOOT_START_PA / PAGE_SIZE) as u8,
    );
    // SAFETY: The safety is upheld by the caller.
    unsafe { apic.send_ipi(icr) }
}

/// # Safety
///
/// The caller should ensure it's valid to send INIT IPIs to all CPUs excluding self.
unsafe fn send_init_to_all_aps(apic: &dyn Apic) {
    let icr = Icr::new(
        ApicId::from(0),
        DestinationShorthand::AllExcludingSelf,
        TriggerMode::Level,
        Level::Assert,
        DeliveryStatus::Idle,
        DestinationMode::Physical,
        DeliveryMode::Init,
        0,
    );
    // SAFETY: The safety is upheld by the caller.
    unsafe { apic.send_ipi(icr) };
}

/// # Safety
///
/// The caller should ensure it's valid to deassert INIT IPIs for all CPUs excluding self.
unsafe fn send_init_deassert(apic: &dyn Apic) {
    let icr = Icr::new(
        ApicId::from(0),
        DestinationShorthand::AllIncludingSelf,
        TriggerMode::Level,
        Level::Deassert,
        DeliveryStatus::Idle,
        DestinationMode::Physical,
        DeliveryMode::Init,
        0,
    );
    // SAFETY: The safety is upheld by the caller.
    unsafe { apic.send_ipi(icr) };
}

/// Spin wait approximately `c` cycles.
///
/// Since the timer requires CPU local storage to be initialized, we
/// can only wait by spinning.
fn spin_wait_cycles(c: u64) {
    fn duration(from: u64, to: u64) -> u64 {
        if to >= from {
            to - from
        } else {
            u64::MAX - from + to
        }
    }

    let start = crate::arch::read_tsc();
    while duration(start, crate::arch::read_tsc()) < c {
        core::hint::spin_loop();
    }
}