miri/shims/x86/sse42.rs
1use rustc_abi::{CanonAbi, Size};
2use rustc_middle::mir;
3use rustc_middle::ty::Ty;
4use rustc_span::Symbol;
5use rustc_target::callconv::FnAbi;
6
7use crate::*;
8
9/// A bitmask constant for scrutinizing the immediate byte provided
10/// to the string comparison intrinsics. It distinuishes between
11/// 16-bit integers and 8-bit integers. See [`compare_strings`]
12/// for more details about the immediate byte.
13const USE_WORDS: u8 = 1;
14
15/// A bitmask constant for scrutinizing the immediate byte provided
16/// to the string comparison intrinsics. It distinuishes between
17/// signed integers and unsigned integers. See [`compare_strings`]
18/// for more details about the immediate byte.
19const USE_SIGNED: u8 = 2;
20
21/// The main worker for the string comparison intrinsics, where the given
22/// strings are analyzed according to the given immediate byte.
23///
24/// # Arguments
25///
26/// * `str1` - The first string argument. It is always a length 16 array of bytes
27/// or a length 8 array of two-byte words.
28/// * `str2` - The second string argument. It is always a length 16 array of bytes
29/// or a length 8 array of two-byte words.
30/// * `len` is the length values of the supplied strings. It is distinct from the operand length
31/// in that it describes how much of `str1` and `str2` will be used for the calculation and may
32/// be smaller than the array length of `str1` and `str2`. The string length is counted in bytes
33/// if using byte operands and in two-byte words when using two-byte word operands.
34/// If the value is `None`, the length of a string is determined by the first
35/// null value inside the string.
36/// * `imm` is the immediate byte argument supplied to the intrinsic. The byte influences
37/// the operation as follows:
38///
39/// ```text
40/// 0babccddef
41/// || | |||- Use of bytes vs use of two-byte words inside the operation.
42/// || | ||
43/// || | ||- Use of signed values versus use of unsigned values.
44/// || | |
45/// || | |- The comparison operation performed. A total of four operations are available.
46/// || | * Equal any: Checks which characters of `str2` are inside `str1`.
47/// || | * String ranges: Check if characters in `str2` are inside the provided character ranges.
48/// || | Adjacent characters in `str1` constitute one range.
49/// || | * String comparison: Mark positions where `str1` and `str2` have the same character.
50/// || | * Substring search: Mark positions where `str1` is a substring in `str2`.
51/// || |
52/// || |- Result Polarity. The result bits may be subjected to a bitwise complement
53/// || if these bits are set.
54/// ||
55/// ||- Output selection. This bit has two meanings depending on the instruction.
56/// | If the instruction is generating a mask, it distinguishes between a bit mask
57/// | and a byte mask. Otherwise it distinguishes between the most significand bit
58/// | and the least significand bit when generating an index.
59/// |
60/// |- This bit is ignored. It is expected that this bit is set to zero, but it is
61/// not a requirement.
62/// ```
63///
64/// # Returns
65///
66/// A result mask. The bit at index `i` inside the mask is set if 'str2' starting at `i`
67/// fulfills the test as defined inside the immediate byte.
68/// The mask may be negated if negation flags inside the immediate byte are set.
69///
70/// For more information, see the Intel Software Developer's Manual, Vol. 2b, Chapter 4.1.
71#[expect(clippy::arithmetic_side_effects)]
72fn compare_strings<'tcx>(
73 ecx: &mut MiriInterpCx<'tcx>,
74 str1: &OpTy<'tcx>,
75 str2: &OpTy<'tcx>,
76 len: Option<(u64, u64)>,
77 imm: u8,
78) -> InterpResult<'tcx, i32> {
79 let default_len = default_len::<u64>(imm);
80 let (len1, len2) = if let Some(t) = len {
81 t
82 } else {
83 let len1 = implicit_len(ecx, str1, imm)?.unwrap_or(default_len);
84 let len2 = implicit_len(ecx, str2, imm)?.unwrap_or(default_len);
85 (len1, len2)
86 };
87
88 let mut result = 0;
89 match (imm >> 2) & 3 {
90 0 => {
91 // Equal any: Checks which characters of `str2` are inside `str1`.
92 for i in 0..len2 {
93 let ch2 = ecx.read_immediate(&ecx.project_index(str2, i)?)?;
94
95 for j in 0..len1 {
96 let ch1 = ecx.read_immediate(&ecx.project_index(str1, j)?)?;
97
98 let eq = ecx.binary_op(mir::BinOp::Eq, &ch1, &ch2)?;
99 if eq.to_scalar().to_bool()? {
100 result |= 1 << i;
101 break;
102 }
103 }
104 }
105 }
106 1 => {
107 // String ranges: Check if characters in `str2` are inside the provided character ranges.
108 // Adjacent characters in `str1` constitute one range.
109 let len1 = len1 - (len1 & 1);
110 let get_ch = |ch: Scalar| -> InterpResult<'tcx, i32> {
111 let result = match (imm & USE_WORDS != 0, imm & USE_SIGNED != 0) {
112 (true, true) => i32::from(ch.to_i16()?),
113 (true, false) => i32::from(ch.to_u16()?),
114 (false, true) => i32::from(ch.to_i8()?),
115 (false, false) => i32::from(ch.to_u8()?),
116 };
117 interp_ok(result)
118 };
119
120 for i in 0..len2 {
121 for j in (0..len1).step_by(2) {
122 let ch2 = get_ch(ecx.read_scalar(&ecx.project_index(str2, i)?)?)?;
123 let ch1_1 = get_ch(ecx.read_scalar(&ecx.project_index(str1, j)?)?)?;
124 let ch1_2 = get_ch(ecx.read_scalar(&ecx.project_index(str1, j + 1)?)?)?;
125
126 if ch1_1 <= ch2 && ch2 <= ch1_2 {
127 result |= 1 << i;
128 }
129 }
130 }
131 }
132 2 => {
133 // String comparison: Mark positions where `str1` and `str2` have the same character.
134 result = (1 << default_len) - 1;
135 result ^= (1 << len1.max(len2)) - 1;
136
137 for i in 0..len1.min(len2) {
138 let ch1 = ecx.read_immediate(&ecx.project_index(str1, i)?)?;
139 let ch2 = ecx.read_immediate(&ecx.project_index(str2, i)?)?;
140 let eq = ecx.binary_op(mir::BinOp::Eq, &ch1, &ch2)?;
141 result |= i32::from(eq.to_scalar().to_bool()?) << i;
142 }
143 }
144 3 => {
145 // Substring search: Mark positions where `str1` is a substring in `str2`.
146 if len1 == 0 {
147 result = (1 << default_len) - 1;
148 } else if len1 <= len2 {
149 for i in 0..len2 {
150 if len1 > len2 - i {
151 break;
152 }
153
154 result |= 1 << i;
155
156 for j in 0..len1 {
157 let k = i + j;
158
159 if k >= default_len {
160 break;
161 } else {
162 let ch1 = ecx.read_immediate(&ecx.project_index(str1, j)?)?;
163 let ch2 = ecx.read_immediate(&ecx.project_index(str2, k)?)?;
164 let ne = ecx.binary_op(mir::BinOp::Ne, &ch1, &ch2)?;
165
166 if ne.to_scalar().to_bool()? {
167 result &= !(1 << i);
168 break;
169 }
170 }
171 }
172 }
173 }
174 }
175 _ => unreachable!(),
176 }
177
178 // Polarity: Possibly perform a bitwise complement on the result.
179 match (imm >> 4) & 3 {
180 3 => result ^= (1 << len1) - 1,
181 1 => result ^= (1 << default_len) - 1,
182 _ => (),
183 }
184
185 interp_ok(result)
186}
187
188/// Obtain the arguments of the intrinsic based on its name.
189/// The result is a tuple with the following values:
190/// * The first string argument.
191/// * The second string argument.
192/// * The string length values, if the intrinsic requires them.
193/// * The immediate instruction byte.
194///
195/// The string arguments will be transmuted into arrays of bytes
196/// or two-byte words, depending on the value of the immediate byte.
197/// Originally, they are [__m128i](https://doc.rust-lang.org/stable/core/arch/x86_64/struct.__m128i.html) values
198/// corresponding to the x86 128-bit integer SIMD type.
199fn deconstruct_args<'tcx>(
200 unprefixed_name: &str,
201 ecx: &mut MiriInterpCx<'tcx>,
202 link_name: Symbol,
203 abi: &FnAbi<'tcx, Ty<'tcx>>,
204 args: &[OpTy<'tcx>],
205) -> InterpResult<'tcx, (OpTy<'tcx>, OpTy<'tcx>, Option<(u64, u64)>, u8)> {
206 let array_layout_fn = |ecx: &mut MiriInterpCx<'tcx>, imm: u8| {
207 if imm & USE_WORDS != 0 {
208 ecx.layout_of(Ty::new_array(ecx.tcx.tcx, ecx.tcx.types.u16, 8))
209 } else {
210 ecx.layout_of(Ty::new_array(ecx.tcx.tcx, ecx.tcx.types.u8, 16))
211 }
212 };
213
214 // The fourth letter of each string comparison intrinsic is either 'e' for "explicit" or 'i' for "implicit".
215 // The distinction will correspond to the intrinsics type signature. In this constext, "explicit" and "implicit"
216 // refer to the way the string length is determined. The length is either passed explicitly in the "explicit"
217 // case or determined by a null terminator in the "implicit" case.
218 let is_explicit = match unprefixed_name.as_bytes().get(4) {
219 Some(&b'e') => true,
220 Some(&b'i') => false,
221 _ => unreachable!(),
222 };
223
224 if is_explicit {
225 let [str1, len1, str2, len2, imm] =
226 ecx.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
227 let imm = ecx.read_scalar(imm)?.to_u8()?;
228
229 let default_len = default_len::<u32>(imm);
230 let len1 = u64::from(ecx.read_scalar(len1)?.to_u32()?.min(default_len));
231 let len2 = u64::from(ecx.read_scalar(len2)?.to_u32()?.min(default_len));
232
233 let array_layout = array_layout_fn(ecx, imm)?;
234 let str1 = str1.transmute(array_layout, ecx)?;
235 let str2 = str2.transmute(array_layout, ecx)?;
236
237 interp_ok((str1, str2, Some((len1, len2)), imm))
238 } else {
239 let [str1, str2, imm] = ecx.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
240 let imm = ecx.read_scalar(imm)?.to_u8()?;
241
242 let array_layout = array_layout_fn(ecx, imm)?;
243 let str1 = str1.transmute(array_layout, ecx)?;
244 let str2 = str2.transmute(array_layout, ecx)?;
245
246 interp_ok((str1, str2, None, imm))
247 }
248}
249
250/// Calculate the c-style string length for a given string `str`.
251/// The string is either a length 16 array of bytes a length 8 array of two-byte words.
252fn implicit_len<'tcx>(
253 ecx: &mut MiriInterpCx<'tcx>,
254 str: &OpTy<'tcx>,
255 imm: u8,
256) -> InterpResult<'tcx, Option<u64>> {
257 let mut result = None;
258 let zero = ImmTy::from_int(0, str.layout.field(ecx, 0));
259
260 for i in 0..default_len::<u64>(imm) {
261 let ch = ecx.read_immediate(&ecx.project_index(str, i)?)?;
262 let is_zero = ecx.binary_op(mir::BinOp::Eq, &ch, &zero)?;
263 if is_zero.to_scalar().to_bool()? {
264 result = Some(i);
265 break;
266 }
267 }
268 interp_ok(result)
269}
270
271#[inline]
272fn default_len<T: From<u8>>(imm: u8) -> T {
273 if imm & USE_WORDS != 0 { T::from(8u8) } else { T::from(16u8) }
274}
275
276impl<'tcx> EvalContextExt<'tcx> for crate::MiriInterpCx<'tcx> {}
277pub(super) trait EvalContextExt<'tcx>: crate::MiriInterpCxExt<'tcx> {
278 fn emulate_x86_sse42_intrinsic(
279 &mut self,
280 link_name: Symbol,
281 abi: &FnAbi<'tcx, Ty<'tcx>>,
282 args: &[OpTy<'tcx>],
283 dest: &MPlaceTy<'tcx>,
284 ) -> InterpResult<'tcx, EmulateItemResult> {
285 let this = self.eval_context_mut();
286 this.expect_target_feature_for_intrinsic(link_name, "sse4.2")?;
287 // Prefix should have already been checked.
288 let unprefixed_name = link_name.as_str().strip_prefix("llvm.x86.sse42.").unwrap();
289
290 match unprefixed_name {
291 // Used to implement the `_mm_cmpestrm` and the `_mm_cmpistrm` functions.
292 // These functions compare the input strings and return the resulting mask.
293 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=1044,922
294 "pcmpistrm128" | "pcmpestrm128" => {
295 let (str1, str2, len, imm) =
296 deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
297 let mask = compare_strings(this, &str1, &str2, len, imm)?;
298
299 // The sixth bit inside the immediate byte distiguishes
300 // between a bit mask or a byte mask when generating a mask.
301 if imm & 0b100_0000 != 0 {
302 let (array_layout, size) = if imm & USE_WORDS != 0 {
303 (this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u16, 8))?, 2)
304 } else {
305 (this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u8, 16))?, 1)
306 };
307 let size = Size::from_bytes(size);
308 let dest = dest.transmute(array_layout, this)?;
309
310 for i in 0..default_len::<u64>(imm) {
311 let result = helpers::bool_to_simd_element(mask & (1 << i) != 0, size);
312 this.write_scalar(result, &this.project_index(&dest, i)?)?;
313 }
314 } else {
315 let layout = this.layout_of(this.tcx.types.i128)?;
316 let dest = dest.transmute(layout, this)?;
317 this.write_scalar(Scalar::from_i128(i128::from(mask)), &dest)?;
318 }
319 }
320
321 // Used to implement the `_mm_cmpestra` and the `_mm_cmpistra` functions.
322 // These functions compare the input strings and return `1` if the end of the second
323 // input string is not reached and the resulting mask is zero, and `0` otherwise.
324 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=919,1041
325 "pcmpistria128" | "pcmpestria128" => {
326 let (str1, str2, len, imm) =
327 deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
328 let result = if compare_strings(this, &str1, &str2, len, imm)? != 0 {
329 false
330 } else if let Some((_, len)) = len {
331 len >= default_len::<u64>(imm)
332 } else {
333 implicit_len(this, &str1, imm)?.is_some()
334 };
335
336 this.write_scalar(Scalar::from_i32(i32::from(result)), dest)?;
337 }
338
339 // Used to implement the `_mm_cmpestri` and the `_mm_cmpistri` functions.
340 // These functions compare the input strings and return the bit index
341 // for most significant or least significant bit of the resulting mask.
342 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=921,1043
343 "pcmpistri128" | "pcmpestri128" => {
344 let (str1, str2, len, imm) =
345 deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
346 let mask = compare_strings(this, &str1, &str2, len, imm)?;
347
348 let len = default_len::<u32>(imm);
349 // The sixth bit inside the immediate byte distiguishes between the least
350 // significant bit and the most significant bit when generating an index.
351 let result = if imm & 0b100_0000 != 0 {
352 // most significant bit
353 31u32.wrapping_sub(mask.leading_zeros()).min(len)
354 } else {
355 // least significant bit
356 mask.trailing_zeros().min(len)
357 };
358 this.write_scalar(Scalar::from_i32(i32::try_from(result).unwrap()), dest)?;
359 }
360
361 // Used to implement the `_mm_cmpestro` and the `_mm_cmpistro` functions.
362 // These functions compare the input strings and return the lowest bit of the
363 // resulting mask.
364 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=923,1045
365 "pcmpistrio128" | "pcmpestrio128" => {
366 let (str1, str2, len, imm) =
367 deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
368 let mask = compare_strings(this, &str1, &str2, len, imm)?;
369 this.write_scalar(Scalar::from_i32(mask & 1), dest)?;
370 }
371
372 // Used to implement the `_mm_cmpestrc` and the `_mm_cmpistrc` functions.
373 // These functions compare the input strings and return `1` if the resulting
374 // mask was non-zero, and `0` otherwise.
375 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=920,1042
376 "pcmpistric128" | "pcmpestric128" => {
377 let (str1, str2, len, imm) =
378 deconstruct_args(unprefixed_name, this, link_name, abi, args)?;
379 let mask = compare_strings(this, &str1, &str2, len, imm)?;
380 this.write_scalar(Scalar::from_i32(i32::from(mask != 0)), dest)?;
381 }
382
383 // Used to implement the `_mm_cmpistrz` and the `_mm_cmpistrs` functions.
384 // These functions return `1` if the string end has been reached and `0` otherwise.
385 // Since these functions define the string length implicitly, it is equal to a
386 // search for a null terminator (see `deconstruct_args` for more details).
387 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=924,925
388 "pcmpistriz128" | "pcmpistris128" => {
389 let [str1, str2, imm] =
390 this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
391 let imm = this.read_scalar(imm)?.to_u8()?;
392
393 let str = if unprefixed_name == "pcmpistris128" { str1 } else { str2 };
394 let array_layout = if imm & USE_WORDS != 0 {
395 this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u16, 8))?
396 } else {
397 this.layout_of(Ty::new_array(this.tcx.tcx, this.tcx.types.u8, 16))?
398 };
399 let str = str.transmute(array_layout, this)?;
400 let result = implicit_len(this, &str, imm)?.is_some();
401
402 this.write_scalar(Scalar::from_i32(i32::from(result)), dest)?;
403 }
404
405 // Used to implement the `_mm_cmpestrz` and the `_mm_cmpestrs` functions.
406 // These functions return 1 if the explicitly passed string length is smaller
407 // than 16 for byte-sized operands or 8 for word-sized operands.
408 // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#ig_expand=1046,1047
409 "pcmpestriz128" | "pcmpestris128" => {
410 let [_, len1, _, len2, imm] =
411 this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
412 let len = if unprefixed_name == "pcmpestris128" { len1 } else { len2 };
413 let len = this.read_scalar(len)?.to_i32()?;
414 let imm = this.read_scalar(imm)?.to_u8()?;
415 this.write_scalar(
416 Scalar::from_i32(i32::from(len < default_len::<i32>(imm))),
417 dest,
418 )?;
419 }
420
421 // Used to implement the `_mm_crc32_u{8, 16, 32, 64}` functions.
422 // These functions calculate a 32-bit CRC using `0x11EDC6F41`
423 // as the polynomial, also known as CRC32C.
424 // https://datatracker.ietf.org/doc/html/rfc3720#section-12.1
425 "crc32.32.8" | "crc32.32.16" | "crc32.32.32" | "crc32.64.64" => {
426 let bit_size = match unprefixed_name {
427 "crc32.32.8" => 8,
428 "crc32.32.16" => 16,
429 "crc32.32.32" => 32,
430 "crc32.64.64" => 64,
431 _ => unreachable!(),
432 };
433
434 if bit_size == 64 && this.tcx.sess.target.arch != "x86_64" {
435 return interp_ok(EmulateItemResult::NotSupported);
436 }
437
438 let [left, right] =
439 this.check_shim_sig_lenient(abi, CanonAbi::C, link_name, args)?;
440 let left = this.read_scalar(left)?;
441 let right = this.read_scalar(right)?;
442
443 let crc = if bit_size == 64 {
444 // The 64-bit version will only consider the lower 32 bits,
445 // while the upper 32 bits get discarded.
446 #[expect(clippy::as_conversions)]
447 u128::from((left.to_u64()? as u32).reverse_bits())
448 } else {
449 u128::from(left.to_u32()?.reverse_bits())
450 };
451 let v = match bit_size {
452 8 => u128::from(right.to_u8()?.reverse_bits()),
453 16 => u128::from(right.to_u16()?.reverse_bits()),
454 32 => u128::from(right.to_u32()?.reverse_bits()),
455 64 => u128::from(right.to_u64()?.reverse_bits()),
456 _ => unreachable!(),
457 };
458
459 // Perform polynomial division modulo 2.
460 // The algorithm for the division is an adapted version of the
461 // schoolbook division algorithm used for normal integer or polynomial
462 // division. In this context, the quotient is not calculated, since
463 // only the remainder is needed.
464 //
465 // The algorithm works as follows:
466 // 1. Pull down digits until division can be performed. In the context of division
467 // modulo 2 it means locating the most significant digit of the dividend and shifting
468 // the divisor such that the position of the divisors most significand digit and the
469 // dividends most significand digit match.
470 // 2. Perform a division and determine the remainder. Since it is arithmetic modulo 2,
471 // this operation is a simple bitwise exclusive or.
472 // 3. Repeat steps 1. and 2. until the full remainder is calculated. This is the case
473 // once the degree of the remainder polynomial is smaller than the degree of the
474 // divisor polynomial. In other words, the number of leading zeros of the remainder
475 // is larger than the number of leading zeros of the divisor. It is important to
476 // note that standard arithmetic comparison is not applicable here:
477 // 0b10011 / 0b11111 = 0b01100 is a valid division, even though the dividend is
478 // smaller than the divisor.
479 let mut dividend = (crc << bit_size) ^ (v << 32);
480 const POLYNOMIAL: u128 = 0x11EDC6F41;
481 while dividend.leading_zeros() <= POLYNOMIAL.leading_zeros() {
482 dividend ^=
483 (POLYNOMIAL << POLYNOMIAL.leading_zeros()) >> dividend.leading_zeros();
484 }
485
486 let result = u32::try_from(dividend).unwrap().reverse_bits();
487 let result = if bit_size == 64 {
488 Scalar::from_u64(u64::from(result))
489 } else {
490 Scalar::from_u32(result)
491 };
492
493 this.write_scalar(result, dest)?;
494 }
495 _ => return interp_ok(EmulateItemResult::NotSupported),
496 }
497 interp_ok(EmulateItemResult::NeedsReturn)
498 }
499}