miri/shims/x86/sse41.rs
1use rustc_abi::CanonAbi;
2use rustc_middle::ty::Ty;
3use rustc_span::Symbol;
4use rustc_target::callconv::FnAbi;
5
6use super::{conditional_dot_product, mpsadbw, packusdw, round_all, round_first, test_bits_masked};
7use crate::*;
8
9impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
10pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
11 fn emulate_x86_sse41_intrinsic(
12 &mut self,
13 link_name: Symbol,
14 abi: &FnAbi<'tcx, Ty<'tcx>>,
15 args: &[OpTy<'tcx>],
16 dest: &MPlaceTy<'tcx>,
17 ) -> InterpResult<'tcx, EmulateItemResult> {
18 let this = self.eval_context_mut();
19 this.expect_target_feature_for_intrinsic(link_name, "sse4.1")?;
20 // Prefix should have already been checked.
21 let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse41.").unwrap();
22
23 match unprefixed_name {
24 // Used to implement the _mm_insert_ps function.
25 // Takes one element of `right` and inserts it into `left` and
26 // optionally zero some elements. Source index is specified
27 // in bits `6..=7` of `imm`, destination index is specified in
28 // bits `4..=5` if `imm`, and `i`th bit specifies whether element
29 // `i` is zeroed.
30 "insertps" => {
31 let [left, right, imm] =
32 this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
33
34 let (left, left_len) = this.project_to_simd(left)?;
35 let (right, right_len) = this.project_to_simd(right)?;
36 let (dest, dest_len) = this.project_to_simd(dest)?;
37
38 assert_eq!(dest_len, left_len);
39 assert_eq!(dest_len, right_len);
40 assert!(dest_len <= 4);
41
42 let imm = this.read_scalar(imm)?.to_u8()?;
43 let src_index = u64::from((imm >> 6) & 0b11);
44 let dst_index = u64::from((imm >> 4) & 0b11);
45
46 let src_value = this.read_immediate(&this.project_index(&right, src_index)?)?;
47
48 for i in 0..dest_len {
49 let dest = this.project_index(&dest, i)?;
50
51 if imm & (1 << i) != 0 {
52 // zeroed
53 this.write_scalar(Scalar::from_u32(0), &dest)?;
54 } else if i == dst_index {
55 // copy from `right` at specified index
56 this.write_immediate(*src_value, &dest)?;
57 } else {
58 // copy from `left`
59 this.copy_op(&this.project_index(&left, i)?, &dest)?;
60 }
61 }
62 }
63 // Used to implement the _mm_packus_epi32 function.
64 // Concatenates two 32-bit signed integer vectors and converts
65 // the result to a 16-bit unsigned integer vector with saturation.
66 "packusdw" => {
67 let [left, right] =
68 this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
69
70 packusdw(this, left, right, dest)?;
71 }
72 // Used to implement the _mm_dp_ps and _mm_dp_pd functions.
73 // Conditionally multiplies the packed floating-point elements in
74 // `left` and `right` using the high 4 bits in `imm`, sums the four
75 // products, and conditionally stores the sum in `dest` using the low
76 // 4 bits of `imm`.
77 "dpps" | "dppd" => {
78 let [left, right, imm] =
79 this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
80
81 conditional_dot_product(this, left, right, imm, dest)?;
82 }
83 // Used to implement the _mm_floor_ss, _mm_ceil_ss and _mm_round_ss
84 // functions. Rounds the first element of `right` according to `rounding`
85 // and copies the remaining elements from `left`.
86 "round.ss" => {
87 let [left, right, rounding] =
88 this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
89
90 round_first::<rustc_apfloat::ieee::Single>(this, left, right, rounding, dest)?;
91 }
92 // Used to implement the _mm_floor_ps, _mm_ceil_ps and _mm_round_ps
93 // functions. Rounds the elements of `op` according to `rounding`.
94 "round.ps" => {
95 let [op, rounding] =
96 this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
97
98 round_all::<rustc_apfloat::ieee::Single>(this, op, rounding, dest)?;
99 }
100 // Used to implement the _mm_floor_sd, _mm_ceil_sd and _mm_round_sd
101 // functions. Rounds the first element of `right` according to `rounding`
102 // and copies the remaining elements from `left`.
103 "round.sd" => {
104 let [left, right, rounding] =
105 this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
106
107 round_first::<rustc_apfloat::ieee::Double>(this, left, right, rounding, dest)?;
108 }
109 // Used to implement the _mm_floor_pd, _mm_ceil_pd and _mm_round_pd
110 // functions. Rounds the elements of `op` according to `rounding`.
111 "round.pd" => {
112 let [op, rounding] =
113 this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
114
115 round_all::<rustc_apfloat::ieee::Double>(this, op, rounding, dest)?;
116 }
117 // Used to implement the _mm_minpos_epu16 function.
118 // Find the minimum unsinged 16-bit integer in `op` and
119 // returns its value and position.
120 "phminposuw" => {
121 let [op] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
122
123 let (op, op_len) = this.project_to_simd(op)?;
124 let (dest, dest_len) = this.project_to_simd(dest)?;
125
126 // Find minimum
127 let mut min_value = u16::MAX;
128 let mut min_index = 0;
129 for i in 0..op_len {
130 let op = this.read_scalar(&this.project_index(&op, i)?)?.to_u16()?;
131 if op < min_value {
132 min_value = op;
133 min_index = i;
134 }
135 }
136
137 // Write value and index
138 this.write_scalar(Scalar::from_u16(min_value), &this.project_index(&dest, 0)?)?;
139 this.write_scalar(
140 Scalar::from_u16(min_index.try_into().unwrap()),
141 &this.project_index(&dest, 1)?,
142 )?;
143 // Fill remainder with zeros
144 for i in 2..dest_len {
145 this.write_scalar(Scalar::from_u16(0), &this.project_index(&dest, i)?)?;
146 }
147 }
148 // Used to implement the _mm_mpsadbw_epu8 function.
149 // Compute the sum of absolute differences of quadruplets of unsigned
150 // 8-bit integers in `left` and `right`, and store the 16-bit results
151 // in `right`. Quadruplets are selected from `left` and `right` with
152 // offsets specified in `imm`.
153 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mpsadbw_epu8
154 "mpsadbw" => {
155 let [left, right, imm] =
156 this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
157
158 mpsadbw(this, left, right, imm, dest)?;
159 }
160 // Used to implement the _mm_testz_si128, _mm_testc_si128
161 // and _mm_testnzc_si128 functions.
162 // Tests `(op & mask) == 0`, `(op & mask) == mask` or
163 // `(op & mask) != 0 && (op & mask) != mask`
164 "ptestz" | "ptestc" | "ptestnzc" => {
165 let [op, mask] = this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
166
167 let (all_zero, masked_set) = test_bits_masked(this, op, mask)?;
168 let res = match unprefixed_name {
169 "ptestz" => all_zero,
170 "ptestc" => masked_set,
171 "ptestnzc" => !all_zero && !masked_set,
172 _ => unreachable!(),
173 };
174
175 this.write_scalar(Scalar::from_i32(res.into()), dest)?;
176 }
177 _ => return interp_ok(EmulateItemResult::NotSupported),
178 }
179 interp_ok(EmulateItemResult::NeedsReturn)
180 }
181}